|
|
@@ -866,6 +866,10 @@ public final class KeywordFieldMapper extends FieldMapper {
|
|
|
if (value == null) {
|
|
|
return;
|
|
|
}
|
|
|
+ // if field is disabled, skip indexing
|
|
|
+ if ((fieldType.indexOptions() == IndexOptions.NONE) && (fieldType.stored() == false) && (fieldType().hasDocValues() == false)) {
|
|
|
+ return;
|
|
|
+ }
|
|
|
|
|
|
if (value.length() > fieldType().ignoreAbove()) {
|
|
|
context.addIgnoredField(name());
|
|
|
@@ -885,35 +889,31 @@ public final class KeywordFieldMapper extends FieldMapper {
|
|
|
context.getDimensions().addString(fieldType().name(), binaryValue);
|
|
|
}
|
|
|
|
|
|
- if (fieldType.indexOptions() != IndexOptions.NONE || fieldType.stored() || fieldType().hasDocValues()) {
|
|
|
- // If the UTF8 encoding of the field value is bigger than the max length 32766, Lucene fill fail the indexing request and, to
|
|
|
- // roll back the changes, will mark the (possibly partially indexed) document as deleted. This results in deletes, even in an
|
|
|
- // append-only workload, which in turn leads to slower merges, as these will potentially have to fall back to MergeStrategy.DOC
|
|
|
- // instead of MergeStrategy.BULK. To avoid this, we do a preflight check here before indexing the document into Lucene.
|
|
|
- if (binaryValue.length > MAX_TERM_LENGTH) {
|
|
|
- byte[] prefix = new byte[30];
|
|
|
- System.arraycopy(binaryValue.bytes, binaryValue.offset, prefix, 0, 30);
|
|
|
- String msg = "Document contains at least one immense term in field=\""
|
|
|
- + fieldType().name()
|
|
|
- + "\" (whose "
|
|
|
- + "UTF8 encoding is longer than the max length "
|
|
|
- + MAX_TERM_LENGTH
|
|
|
- + "), all of which were "
|
|
|
- + "skipped. Please correct the analyzer to not produce such terms. The prefix of the first immense "
|
|
|
- + "term is: '"
|
|
|
- + Arrays.toString(prefix)
|
|
|
- + "...'";
|
|
|
- throw new IllegalArgumentException(msg);
|
|
|
- }
|
|
|
+ // If the UTF8 encoding of the field value is bigger than the max length 32766, Lucene fill fail the indexing request and, to
|
|
|
+ // roll back the changes, will mark the (possibly partially indexed) document as deleted. This results in deletes, even in an
|
|
|
+ // append-only workload, which in turn leads to slower merges, as these will potentially have to fall back to MergeStrategy.DOC
|
|
|
+ // instead of MergeStrategy.BULK. To avoid this, we do a preflight check here before indexing the document into Lucene.
|
|
|
+ if (binaryValue.length > MAX_TERM_LENGTH) {
|
|
|
+ byte[] prefix = new byte[30];
|
|
|
+ System.arraycopy(binaryValue.bytes, binaryValue.offset, prefix, 0, 30);
|
|
|
+ String msg = "Document contains at least one immense term in field=\""
|
|
|
+ + fieldType().name()
|
|
|
+ + "\" (whose "
|
|
|
+ + "UTF8 encoding is longer than the max length "
|
|
|
+ + MAX_TERM_LENGTH
|
|
|
+ + "), all of which were "
|
|
|
+ + "skipped. Please correct the analyzer to not produce such terms. The prefix of the first immense "
|
|
|
+ + "term is: '"
|
|
|
+ + Arrays.toString(prefix)
|
|
|
+ + "...'";
|
|
|
+ throw new IllegalArgumentException(msg);
|
|
|
+ }
|
|
|
|
|
|
- if (fieldType.indexOptions() != IndexOptions.NONE || fieldType().hasDocValues() || fieldType.stored()) {
|
|
|
- Field field = new KeywordField(fieldType().name(), binaryValue, fieldType);
|
|
|
- context.doc().add(field);
|
|
|
+ Field field = new KeywordField(fieldType().name(), binaryValue, fieldType);
|
|
|
+ context.doc().add(field);
|
|
|
|
|
|
- if (fieldType().hasDocValues() == false && fieldType.omitNorms()) {
|
|
|
- context.addToFieldNames(fieldType().name());
|
|
|
- }
|
|
|
- }
|
|
|
+ if (fieldType().hasDocValues() == false && fieldType.omitNorms()) {
|
|
|
+ context.addToFieldNames(fieldType().name());
|
|
|
}
|
|
|
}
|
|
|
|