Browse Source

ICUCollationKeywordFieldMapper use SortedSetDocValuesField (#26267)

Switch ICUCollationKeywordFieldMapper from using SortedDocValuesField to SortedSetDocValuesField
so we can support fields with multiple values.
Matt Weber 8 years ago
parent
commit
e89d9400c9

+ 10 - 1
plugins/analysis-icu/src/main/java/org/elasticsearch/index/mapper/ICUCollationKeywordFieldMapper.java

@@ -25,11 +25,13 @@ import com.ibm.icu.text.RuleBasedCollator;
 import com.ibm.icu.util.ULocale;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.SortedDocValuesField;
+import org.apache.lucene.document.SortedSetDocValuesField;
 import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.search.MultiTermQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.Version;
 import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.lucene.Lucene;
 import org.elasticsearch.common.settings.Settings;
@@ -50,6 +52,7 @@ import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Objects;
+import java.util.function.BiFunction;
 import java.util.function.LongSupplier;
 
 public class ICUCollationKeywordFieldMapper extends FieldMapper {
@@ -563,6 +566,7 @@ public class ICUCollationKeywordFieldMapper extends FieldMapper {
     private final String variableTop;
     private final boolean hiraganaQuaternaryMode;
     private final Collator collator;
+    private final BiFunction<String, BytesRef, Field> getDVField;
 
     protected ICUCollationKeywordFieldMapper(String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType,
                                              Settings indexSettings, MultiFields multiFields, CopyTo copyTo, String rules, String language,
@@ -584,6 +588,11 @@ public class ICUCollationKeywordFieldMapper extends FieldMapper {
         this.variableTop = variableTop;
         this.hiraganaQuaternaryMode = hiraganaQuaternaryMode;
         this.collator = collator;
+        if (indexCreatedVersion.onOrAfter(Version.V_5_6_0)) {
+            getDVField = SortedSetDocValuesField::new;
+        } else {
+            getDVField = SortedDocValuesField::new;
+        }
     }
 
     @Override
@@ -740,7 +749,7 @@ public class ICUCollationKeywordFieldMapper extends FieldMapper {
         }
 
         if (fieldType().hasDocValues()) {
-            fields.add(new SortedDocValuesField(fieldType().name(), binaryValue));
+            fields.add(getDVField.apply(fieldType().name(), binaryValue));
         }
     }
 }

+ 60 - 0
plugins/analysis-icu/src/test/java/org/elasticsearch/index/mapper/ICUCollationKeywordFieldMapperIT.java

@@ -35,6 +35,8 @@ import org.elasticsearch.index.query.QueryBuilders;
 import org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin;
 import org.elasticsearch.plugins.Plugin;
 import org.elasticsearch.search.builder.SearchSourceBuilder;
+import org.elasticsearch.search.sort.SortBuilders;
+import org.elasticsearch.search.sort.SortMode;
 import org.elasticsearch.search.sort.SortOrder;
 import org.elasticsearch.test.ESIntegTestCase;
 
@@ -94,6 +96,64 @@ public class ICUCollationKeywordFieldMapperIT extends ESIntegTestCase {
         assertOrderedSearchHits(response, "2", "1");
     }
 
+    public void testMultipleValues() throws Exception {
+        String index = "foo";
+        String type = "mytype";
+
+        String[] equilavent = {"a", "C", "a", "B"};
+
+        XContentBuilder builder = jsonBuilder()
+            .startObject().startObject("properties")
+            .startObject("collate")
+            .field("type", "icu_collation_keyword")
+            .field("language", "en")
+            .endObject()
+            .endObject().endObject();
+
+        assertAcked(client().admin().indices().prepareCreate(index).addMapping(type, builder));
+
+        // everything should be indexed fine, no exceptions
+        indexRandom(true,
+            client().prepareIndex(index, type, "1").setSource("{\"collate\":[\"" + equilavent[0] + "\", \""
+                + equilavent[1] + "\"]}", XContentType.JSON),
+            client().prepareIndex(index, type, "2").setSource("{\"collate\":\"" + equilavent[2] + "\"}", XContentType.JSON)
+        );
+
+        // using sort mode = max, values B and C will be used for the sort
+        SearchRequest request = new SearchRequest()
+            .indices(index)
+            .types(type)
+            .source(new SearchSourceBuilder()
+                .fetchSource(false)
+                .query(QueryBuilders.termQuery("collate", "a"))
+                // if mode max we use c and b as sort values, if max we use "a" for both
+                .sort(SortBuilders.fieldSort("collate").sortMode(SortMode.MAX).order(SortOrder.DESC))
+                .sort("_uid", SortOrder.DESC) // will be ignored
+            );
+
+        SearchResponse response = client().search(request).actionGet();
+        assertNoFailures(response);
+        assertHitCount(response, 2L);
+        assertOrderedSearchHits(response, "1", "2");
+
+        // same thing, using different sort mode that will use a for both docs
+        request = new SearchRequest()
+            .indices(index)
+            .types(type)
+            .source(new SearchSourceBuilder()
+                .fetchSource(false)
+                .query(QueryBuilders.termQuery("collate", "a"))
+                // if mode max we use c and b as sort values, if max we use "a" for both
+                .sort(SortBuilders.fieldSort("collate").sortMode(SortMode.MIN).order(SortOrder.DESC))
+                .sort("_uid", SortOrder.DESC) // will NOT be ignored and will determine order
+            );
+
+        response = client().search(request).actionGet();
+        assertNoFailures(response);
+        assertHitCount(response, 2L);
+        assertOrderedSearchHits(response, "2", "1");
+    }
+
     /*
     * Test usage of the decomposition option for unicode normalization.
     */

+ 111 - 2
plugins/analysis-icu/src/test/java/org/elasticsearch/index/mapper/ICUCollationKeywordFieldMapperTests.java

@@ -28,7 +28,9 @@ import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.index.IndexableFieldType;
 import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.Version;
 import org.elasticsearch.common.compress.CompressedXContent;
+import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.xcontent.XContentFactory;
 import org.elasticsearch.common.xcontent.XContentType;
 import org.elasticsearch.index.IndexService;
@@ -96,6 +98,51 @@ public class ICUCollationKeywordFieldMapperTests extends ESSingleNodeTestCase {
         assertThat(fieldType.storeTermVectorPayloads(), equalTo(false));
         assertEquals(DocValuesType.NONE, fieldType.docValuesType());
 
+        assertEquals(expected, fields[1].binaryValue());
+        fieldType = fields[1].fieldType();
+        assertThat(fieldType.indexOptions(), equalTo(IndexOptions.NONE));
+        assertEquals(DocValuesType.SORTED_SET, fieldType.docValuesType());
+    }
+
+    public void testBackCompat() throws Exception {
+        indexService = createIndex("oldindex", Settings.builder().put("index.version.created", Version.V_5_5_0).build());
+        parser = indexService.mapperService().documentMapperParser();
+
+        String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
+            .startObject("_all").field("enabled", false).endObject()
+            .startObject("properties").startObject("field").field("type", FIELD_TYPE).endObject().endObject()
+            .endObject().endObject().string();
+
+        DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
+
+        assertEquals(mapping, mapper.mappingSource().toString());
+
+        ParsedDocument doc = mapper.parse(SourceToParse.source("oldindex", "type", "1", XContentFactory.jsonBuilder()
+                .startObject()
+                .field("field", "1234")
+                .endObject()
+                .bytes(),
+            XContentType.JSON));
+
+        IndexableField[] fields = doc.rootDoc().getFields("field");
+        assertEquals(2, fields.length);
+
+        Collator collator = Collator.getInstance();
+        RawCollationKey key = collator.getRawCollationKey("1234", null);
+        BytesRef expected = new BytesRef(key.bytes, 0, key.size);
+
+        assertEquals(expected, fields[0].binaryValue());
+        IndexableFieldType fieldType = fields[0].fieldType();
+        assertThat(fieldType.omitNorms(), equalTo(true));
+        assertFalse(fieldType.tokenized());
+        assertFalse(fieldType.stored());
+        assertThat(fieldType.indexOptions(), equalTo(IndexOptions.DOCS));
+        assertThat(fieldType.storeTermVectors(), equalTo(false));
+        assertThat(fieldType.storeTermVectorOffsets(), equalTo(false));
+        assertThat(fieldType.storeTermVectorPositions(), equalTo(false));
+        assertThat(fieldType.storeTermVectorPayloads(), equalTo(false));
+        assertEquals(DocValuesType.NONE, fieldType.docValuesType());
+
         assertEquals(expected, fields[1].binaryValue());
         fieldType = fields[1].fieldType();
         assertThat(fieldType.indexOptions(), equalTo(IndexOptions.NONE));
@@ -194,7 +241,7 @@ public class ICUCollationKeywordFieldMapperTests extends ESSingleNodeTestCase {
         IndexableField[] fields = doc.rootDoc().getFields("field");
         assertEquals(1, fields.length);
         assertEquals(IndexOptions.NONE, fields[0].fieldType().indexOptions());
-        assertEquals(DocValuesType.SORTED, fields[0].fieldType().docValuesType());
+        assertEquals(DocValuesType.SORTED_SET, fields[0].fieldType().docValuesType());
     }
 
     public void testDisableDocValues() throws IOException {
@@ -219,6 +266,68 @@ public class ICUCollationKeywordFieldMapperTests extends ESSingleNodeTestCase {
         assertEquals(DocValuesType.NONE, fields[0].fieldType().docValuesType());
     }
 
+    public void testMultipleValues() throws IOException {
+        String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
+            .startObject("properties").startObject("field").field("type", FIELD_TYPE).endObject().endObject()
+            .endObject().endObject().string();
+
+        DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
+
+        assertEquals(mapping, mapper.mappingSource().toString());
+
+        ParsedDocument doc = mapper.parse(SourceToParse.source("test", "type", "1", XContentFactory.jsonBuilder()
+                .startObject()
+                .field("field", Arrays.asList("1234", "5678"))
+                .endObject()
+                .bytes(),
+            XContentType.JSON));
+
+        IndexableField[] fields = doc.rootDoc().getFields("field");
+        assertEquals(4, fields.length);
+
+        Collator collator = Collator.getInstance();
+        RawCollationKey key = collator.getRawCollationKey("1234", null);
+        BytesRef expected = new BytesRef(key.bytes, 0, key.size);
+
+        assertEquals(expected, fields[0].binaryValue());
+        IndexableFieldType fieldType = fields[0].fieldType();
+        assertThat(fieldType.omitNorms(), equalTo(true));
+        assertFalse(fieldType.tokenized());
+        assertFalse(fieldType.stored());
+        assertThat(fieldType.indexOptions(), equalTo(IndexOptions.DOCS));
+        assertThat(fieldType.storeTermVectors(), equalTo(false));
+        assertThat(fieldType.storeTermVectorOffsets(), equalTo(false));
+        assertThat(fieldType.storeTermVectorPositions(), equalTo(false));
+        assertThat(fieldType.storeTermVectorPayloads(), equalTo(false));
+        assertEquals(DocValuesType.NONE, fieldType.docValuesType());
+
+        assertEquals(expected, fields[1].binaryValue());
+        fieldType = fields[1].fieldType();
+        assertThat(fieldType.indexOptions(), equalTo(IndexOptions.NONE));
+        assertEquals(DocValuesType.SORTED_SET, fieldType.docValuesType());
+
+        collator = Collator.getInstance();
+        key = collator.getRawCollationKey("5678", null);
+        expected = new BytesRef(key.bytes, 0, key.size);
+
+        assertEquals(expected, fields[2].binaryValue());
+        fieldType = fields[2].fieldType();
+        assertThat(fieldType.omitNorms(), equalTo(true));
+        assertFalse(fieldType.tokenized());
+        assertFalse(fieldType.stored());
+        assertThat(fieldType.indexOptions(), equalTo(IndexOptions.DOCS));
+        assertThat(fieldType.storeTermVectors(), equalTo(false));
+        assertThat(fieldType.storeTermVectorOffsets(), equalTo(false));
+        assertThat(fieldType.storeTermVectorPositions(), equalTo(false));
+        assertThat(fieldType.storeTermVectorPayloads(), equalTo(false));
+        assertEquals(DocValuesType.NONE, fieldType.docValuesType());
+
+        assertEquals(expected, fields[3].binaryValue());
+        fieldType = fields[3].fieldType();
+        assertThat(fieldType.indexOptions(), equalTo(IndexOptions.NONE));
+        assertEquals(DocValuesType.SORTED_SET, fieldType.docValuesType());
+    }
+
     public void testIndexOptions() throws IOException {
         String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
             .startObject("properties").startObject("field").field("type", FIELD_TYPE)
@@ -316,7 +425,7 @@ public class ICUCollationKeywordFieldMapperTests extends ESSingleNodeTestCase {
         assertEquals(expected, fields[1].binaryValue());
         fieldType = fields[1].fieldType();
         assertThat(fieldType.indexOptions(), equalTo(IndexOptions.NONE));
-        assertEquals(DocValuesType.SORTED, fieldType.docValuesType());
+        assertEquals(DocValuesType.SORTED_SET, fieldType.docValuesType());
     }
 
     public void testUpdateCollator() throws IOException {