Browse Source

Add positive_score_impact to rank_features type (#69994)

rank_features field type misses positive_score_impact parameter
that rank_feature type has. This adds this parameter.

Closes #68619
Mayya Sharipova 4 years ago
parent
commit
1de0b616eb

+ 34 - 3
docs/reference/mapping/types/rank-features.asciidoc

@@ -20,6 +20,10 @@ PUT my-index-000001
     "properties": {
       "topics": {
         "type": "rank_features" <1>
+      },
+      "negative_reviews" : {
+        "type": "rank_features",
+        "positive_score_impact": false <2>
       }
     }
   }
@@ -27,9 +31,13 @@ PUT my-index-000001
 
 PUT my-index-000001/_doc/1
 {
-  "topics": { <2>
+  "topics": { <3>
     "politics": 20,
     "economics": 50.8
+  },
+  "negative_reviews": {
+    "1star": 10,
+    "2star": 100
   }
 }
 
@@ -38,21 +46,38 @@ PUT my-index-000001/_doc/2
   "topics": {
     "politics": 5.2,
     "sports": 80.1
+  },
+  "negative_reviews": {
+    "1star": 1,
+    "2star": 10
   }
 }
 
 GET my-index-000001/_search
 {
-  "query": {
+  "query": { <4>
     "rank_feature": {
       "field": "topics.politics"
     }
   }
 }
+
+GET my-index-000001/_search
+{
+  "query": { <5>
+    "rank_feature": {
+      "field": "negative_reviews.1star"
+    }
+  }
+}
 --------------------------------------------------
 
 <1> Rank features fields must use the `rank_features` field type
-<2> Rank features fields must be a hash with string keys and strictly positive numeric values
+<2> Rank features that correlate negatively with the score need to declare it
+<3> Rank features fields must be a hash with string keys and strictly positive numeric values
+<4> This query ranks documents by how much they are about the "politics" topic.
+<5> This query ranks documents inversely to the number of "1star" reviews they received.
+
 
 NOTE: `rank_features` fields only support single-valued features and strictly
 positive values. Multi-valued fields and zero or negative values will be rejected.
@@ -63,3 +88,9 @@ only be queried using <<query-dsl-rank-feature-query,`rank_feature`>> queries.
 NOTE: `rank_features` fields only preserve 9 significant bits for the
 precision, which translates to a relative error of about 0.4%.
 
+Rank features that correlate negatively with the score should set
+`positive_score_impact` to `false` (defaults to `true`). This will be used by
+the <<query-dsl-rank-feature-query,`rank_feature`>> query to modify the scoring formula
+in such a way that the score decreases with the value of the feature instead of
+increasing.
+

+ 24 - 6
modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/RankFeaturesFieldMapper.java

@@ -17,7 +17,6 @@ import org.elasticsearch.index.query.SearchExecutionContext;
 import org.elasticsearch.search.lookup.SearchLookup;
 
 import java.io.IOException;
-import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 import java.util.function.Supplier;
@@ -30,8 +29,14 @@ public class RankFeaturesFieldMapper extends FieldMapper {
 
     public static final String CONTENT_TYPE = "rank_features";
 
+    private static RankFeaturesFieldType ft(FieldMapper in) {
+        return ((RankFeaturesFieldMapper)in).fieldType();
+    }
+
     public static class Builder extends FieldMapper.Builder {
 
+        private final Parameter<Boolean> positiveScoreImpact
+            = Parameter.boolParam("positive_score_impact", false, m -> ft(m).positiveScoreImpact, true);
         private final Parameter<Map<String, String>> meta = Parameter.metaParam();
 
         public Builder(String name) {
@@ -40,14 +45,14 @@ public class RankFeaturesFieldMapper extends FieldMapper {
 
         @Override
         protected List<Parameter<?>> getParameters() {
-            return Collections.singletonList(meta);
+            return List.of(positiveScoreImpact, meta);
         }
 
         @Override
         public RankFeaturesFieldMapper build(ContentPath contentPath) {
             return new RankFeaturesFieldMapper(
-                    name, new RankFeaturesFieldType(buildFullName(contentPath), meta.getValue()),
-                    multiFieldsBuilder.build(this, contentPath), copyTo.build());
+                    name, new RankFeaturesFieldType(buildFullName(contentPath), meta.getValue(), positiveScoreImpact.getValue()),
+                    multiFieldsBuilder.build(this, contentPath), copyTo.build(), positiveScoreImpact.getValue());
         }
     }
 
@@ -55,8 +60,11 @@ public class RankFeaturesFieldMapper extends FieldMapper {
 
     public static final class RankFeaturesFieldType extends MappedFieldType {
 
-        public RankFeaturesFieldType(String name, Map<String, String> meta) {
+        private final boolean positiveScoreImpact;
+
+        public RankFeaturesFieldType(String name, Map<String, String> meta, boolean positiveScoreImpact) {
             super(name, false, false, false, TextSearchInfo.NONE, meta);
+            this.positiveScoreImpact = positiveScoreImpact;
         }
 
         @Override
@@ -64,6 +72,10 @@ public class RankFeaturesFieldMapper extends FieldMapper {
             return CONTENT_TYPE;
         }
 
+        public boolean positiveScoreImpact() {
+            return positiveScoreImpact;
+        }
+
         @Override
         public Query existsQuery(SearchExecutionContext context) {
             throw new IllegalArgumentException("[rank_features] fields do not support [exists] queries");
@@ -85,9 +97,12 @@ public class RankFeaturesFieldMapper extends FieldMapper {
         }
     }
 
+    private final boolean positiveScoreImpact;
+
     private RankFeaturesFieldMapper(String simpleName, MappedFieldType mappedFieldType,
-                                    MultiFields multiFields, CopyTo copyTo) {
+            MultiFields multiFields, CopyTo copyTo, boolean positiveScoreImpact) {
         super(simpleName, mappedFieldType, Lucene.KEYWORD_ANALYZER, multiFields, copyTo);
+        this.positiveScoreImpact = positiveScoreImpact;
     }
 
     @Override
@@ -124,6 +139,9 @@ public class RankFeaturesFieldMapper extends FieldMapper {
                     throw new IllegalArgumentException("[rank_features] fields do not support indexing multiple values for the same " +
                             "rank feature [" + key + "] in the same document");
                 }
+                if (positiveScoreImpact == false) {
+                    value = 1 / value;
+                }
                 context.doc().addWithKey(key, new FeatureField(name(), feature, value));
             } else {
                 throw new IllegalArgumentException("[rank_features] fields take hashes that map a feature to a strictly positive " +

+ 29 - 2
modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/RankFeaturesFieldMapperTests.java

@@ -45,8 +45,8 @@ public class RankFeaturesFieldMapperTests extends MapperTestCase {
     }
 
     @Override
-    protected void registerParameters(ParameterChecker checker) {
-        // no parameters to configure
+    protected void registerParameters(ParameterChecker checker) throws IOException {
+        checker.registerConflictCheck("positive_score_impact", b -> b.field("positive_score_impact", false));
     }
 
     @Override
@@ -80,6 +80,33 @@ public class RankFeaturesFieldMapperTests extends MapperTestCase {
         assertTrue(freq1 < freq2);
     }
 
+    public void testNegativeScoreImpact() throws Exception {
+        DocumentMapper mapper = createDocumentMapper(
+            fieldMapping(b -> b.field("type", "rank_features").field("positive_score_impact", false))
+        );
+
+        ParsedDocument doc1 = mapper.parse(source(this::writeField));
+
+        IndexableField[] fields = doc1.rootDoc().getFields("field");
+        assertEquals(2, fields.length);
+        assertThat(fields[0], Matchers.instanceOf(FeatureField.class));
+        FeatureField featureField1 = null;
+        FeatureField featureField2 = null;
+        for (IndexableField field : fields) {
+            if (field.stringValue().equals("ten")) {
+                featureField1 = (FeatureField)field;
+            } else if (field.stringValue().equals("twenty")) {
+                featureField2 = (FeatureField)field;
+            } else {
+                throw new UnsupportedOperationException();
+            }
+        }
+
+        int freq1 = RankFeatureFieldMapperTests.getFrequency(featureField1.tokenStream(null, null));
+        int freq2 = RankFeatureFieldMapperTests.getFrequency(featureField2.tokenStream(null, null));
+        assertTrue(freq1 > freq2);
+    }
+
     public void testRejectMultiValuedFields() throws MapperParsingException, IOException {
         DocumentMapper mapper = createDocumentMapper(mapping(b -> {
             b.startObject("field").field("type", "rank_features").endObject();

+ 1 - 1
modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/RankFeaturesFieldTypeTests.java

@@ -13,7 +13,7 @@ import java.util.Collections;
 public class RankFeaturesFieldTypeTests extends FieldTypeTestCase {
 
     public void testIsNotAggregatable() {
-        MappedFieldType fieldType = new RankFeaturesFieldMapper.RankFeaturesFieldType("field", Collections.emptyMap());
+        MappedFieldType fieldType = new RankFeaturesFieldMapper.RankFeaturesFieldType("field", Collections.emptyMap(), true);
         assertFalse(fieldType.isAggregatable());
     }
 }

+ 42 - 0
modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/rank_features/10_basic.yml

@@ -9,6 +9,10 @@ setup:
               properties:
                 tags:
                    type: rank_features
+                negative_reviews:
+                   type: rank_features
+                   positive_score_impact: false
+
 
   - do:
       index:
@@ -18,6 +22,9 @@ setup:
           tags:
             foo: 3
             bar: 5
+          negative_reviews:
+            1star: 10
+            2star: 1
 
   - do:
       index:
@@ -27,6 +34,9 @@ setup:
           tags:
             bar: 6
             quux: 10
+          negative_reviews:
+            1star: 1
+            2star: 10
 
   - do:
       indices.refresh: {}
@@ -122,3 +132,35 @@ setup:
       hits.hits.1._id: "1"
   - match:
       hits.hits.1._score: 5.0
+
+
+---
+"Linear negative impact":
+
+  - do:
+      search:
+        index: test
+        body:
+          query:
+            rank_feature:
+              field: negative_reviews.1star
+              linear: {}
+
+  - match:
+      hits.hits.0._id: "2"
+  - match:
+      hits.hits.1._id: "1"
+
+  - do:
+      search:
+        index: test
+        body:
+          query:
+            rank_feature:
+              field: negative_reviews.2star
+              linear: {}
+
+  - match:
+      hits.hits.0._id: "1"
+  - match:
+      hits.hits.1._id: "2"