Browse Source

fix bug in term vector api, payloads were not handled correctly when some where missing

The array holding the payloads (TermVectorFields.payloads) is reused for each token. If the
previous token had payloads but the current token had not, then the payloads of the previous
token were returned, because the payloads of the previous token were never invalidated.
For example, for a field only contained two tokens each occurring once, the first having a
payload and the second not, then for the second token, the payload of the first was returned.

closes #3873
Britta Weber 12 years ago
parent
commit
719d1e0318

+ 7 - 10
src/main/java/org/elasticsearch/action/termvector/TermVectorFields.java

@@ -27,7 +27,6 @@ import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.elasticsearch.common.bytes.BytesReference;
-import org.elasticsearch.common.hppc.HppcMaps;
 import org.elasticsearch.common.io.stream.BytesStreamInput;
 
 import java.io.IOException;
@@ -246,16 +245,14 @@ public final class TermVectorFields extends Fields {
                             }
                             if (hasPayloads) {
                                 int payloadLength = input.readVInt();
-                                if (payloadLength > 0) {
-                                    if (payloads[i] == null) {
-                                        payloads[i] = new BytesRef(payloadLength);
-                                    } else {
-                                        payloads[i].grow(payloadLength);
-                                    }
-                                    input.readBytes(payloads[i].bytes, 0, payloadLength);
-                                    payloads[i].length = payloadLength;
-                                    payloads[i].offset = 0;
+                                if (payloads[i] == null) {
+                                    payloads[i] = new BytesRef(payloadLength);
+                                } else {
+                                    payloads[i].grow(payloadLength);
                                 }
+                                input.readBytes(payloads[i].bytes, 0, payloadLength);
+                                payloads[i].length = payloadLength;
+                                payloads[i].offset = 0;
                             }
                         }
                     }

+ 1 - 1
src/main/java/org/elasticsearch/action/termvector/TermVectorResponse.java

@@ -246,7 +246,7 @@ public class TermVectorResponse extends ActionResponse implements ToXContent {
                 builder.field(FieldStrings.START_OFFSET, currentStartOffset[i]);
                 builder.field(FieldStrings.END_OFFSET, currentEndOffset[i]);
             }
-            if (curTerms.hasPayloads() && (currentPayloads[i] != null)) {
+            if (curTerms.hasPayloads() && (currentPayloads[i].length() > 0)) {
                 builder.field(FieldStrings.PAYLOAD, currentPayloads[i]);
             }
             builder.endObject();

+ 0 - 1
src/main/java/org/elasticsearch/action/termvector/TermVectorWriter.java

@@ -149,7 +149,6 @@ final class TermVectorWriter {
     }
 
     private void writePayload(BytesRef payload) throws IOException {
-        assert (payload != null);
         if (payload != null) {
             output.writeVInt(payload.length);
             output.writeBytes(payload.bytes, payload.offset, payload.length);