1
0
Эх сурвалжийг харах

Fix rank_features parsing for dots in feature name (#93756)

Currently, parsing a rank_features field where the key of the feature contains a
dot leads to a hard to understand Json parsing errors because we interpret the 
dot in the feature name to represent a start of a new object.
This change allows parsing those dots but throws a more legible IAE exception 
in case we encounter a dot in a feature name. We shouldn't allow dots in feature
names because it can create ambiguity around mappings with more than one 
'rank_features' fields that contain dots in their name and names partially overlap.
Christoph Büscher 2 жил өмнө
parent
commit
a3f4f0bb21

+ 34 - 23
modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/RankFeaturesFieldMapper.java

@@ -155,33 +155,44 @@ public class RankFeaturesFieldMapper extends FieldMapper {
         }
 
         String feature = null;
-        for (Token token = context.parser().nextToken(); token != Token.END_OBJECT; token = context.parser().nextToken()) {
-            if (token == Token.FIELD_NAME) {
-                feature = context.parser().currentName();
-            } else if (token == Token.VALUE_NULL) {
-                // ignore feature, this is consistent with numeric fields
-            } else if (token == Token.VALUE_NUMBER || token == Token.VALUE_STRING) {
-                final String key = name() + "." + feature;
-                float value = context.parser().floatValue(true);
-                if (context.doc().getByKey(key) != null) {
+        try {
+            // make sure that we don't expand dots in field names while parsing
+            context.path().setWithinLeafObject(true);
+            for (Token token = context.parser().nextToken(); token != Token.END_OBJECT; token = context.parser().nextToken()) {
+                if (token == Token.FIELD_NAME) {
+                    feature = context.parser().currentName();
+                    if (feature.contains(".")) {
+                        throw new IllegalArgumentException(
+                            "[rank_features] fields do not support dots in feature names but found [" + feature + "]"
+                        );
+                    }
+                } else if (token == Token.VALUE_NULL) {
+                    // ignore feature, this is consistent with numeric fields
+                } else if (token == Token.VALUE_NUMBER || token == Token.VALUE_STRING) {
+                    final String key = name() + "." + feature;
+                    float value = context.parser().floatValue(true);
+                    if (context.doc().getByKey(key) != null) {
+                        throw new IllegalArgumentException(
+                            "[rank_features] fields do not support indexing multiple values for the same "
+                                + "rank feature ["
+                                + key
+                                + "] in the same document"
+                        );
+                    }
+                    if (positiveScoreImpact == false) {
+                        value = 1 / value;
+                    }
+                    context.doc().addWithKey(key, new FeatureField(name(), feature, value));
+                } else {
                     throw new IllegalArgumentException(
-                        "[rank_features] fields do not support indexing multiple values for the same "
-                            + "rank feature ["
-                            + key
-                            + "] in the same document"
+                        "[rank_features] fields take hashes that map a feature to a strictly positive "
+                            + "float, but got unexpected token "
+                            + token
                     );
                 }
-                if (positiveScoreImpact == false) {
-                    value = 1 / value;
-                }
-                context.doc().addWithKey(key, new FeatureField(name(), feature, value));
-            } else {
-                throw new IllegalArgumentException(
-                    "[rank_features] fields take hashes that map a feature to a strictly positive "
-                        + "float, but got unexpected token "
-                        + token
-                );
             }
+        } finally {
+            context.path().setWithinLeafObject(false);
         }
     }
 

+ 10 - 0
modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/RankFeaturesFieldMapperTests.java

@@ -126,6 +126,16 @@ public class RankFeaturesFieldMapperTests extends MapperTestCase {
         assertTrue(freq1 > freq2);
     }
 
+    public void testDotinFieldname() throws Exception {
+        DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping));
+        MapperParsingException ex = expectThrows(
+            MapperParsingException.class,
+            () -> mapper.parse(source(b -> b.field("field", Map.of("politi.cs", 10, "sports", 20))))
+        );
+        assertThat(ex.getCause().getMessage(), containsString("do not support dots in feature names"));
+        assertThat(ex.getCause().getMessage(), containsString("politi.cs"));
+    }
+
     public void testRejectMultiValuedFields() throws MapperParsingException, IOException {
         DocumentMapper mapper = createDocumentMapper(mapping(b -> {
             b.startObject("field").field("type", "rank_features").endObject();

+ 17 - 0
modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/rank_features/10_basic.yml

@@ -14,6 +14,7 @@ setup:
                    positive_score_impact: false
 
 
+
   - do:
       index:
         index: test
@@ -164,3 +165,19 @@ setup:
       hits.hits.0._id: "1"
   - match:
       hits.hits.1._id: "2"
+
+---
+"Dot in feature name":
+
+  - do:
+      catch: /\[rank_features\] fields do not support dots in feature names but found \[qu\.ux\]/
+      index:
+        index: test
+        id: "3"
+        body:
+          tags:
+            bar: 6
+            qu.ux: 10
+          negative_reviews:
+            1star: 1
+            2star: 10