8 years ago · 4a45579506
--- a/core/src/main/java/org/elasticsearch/index/mapper/TokenCountFieldMapper.java
+++ b/core/src/main/java/org/elasticsearch/index/mapper/TokenCountFieldMapper.java
@@ -22,10 +22,8 @@ package org.elasticsearch.index.mapper;
 
				 import org.apache.lucene.analysis.Analyzer;
			
 
				 import org.apache.lucene.analysis.TokenStream;
			
 
				 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
			
 
				-import org.apache.lucene.document.Field;
			
 
				 import org.apache.lucene.index.IndexOptions;
			
 
				 import org.apache.lucene.index.IndexableField;
			
 
				-import org.elasticsearch.Version;
			
 
				 import org.elasticsearch.common.settings.Settings;
			
 
				 import org.elasticsearch.common.xcontent.XContentBuilder;
			
 
				 import org.elasticsearch.index.analysis.NamedAnalyzer;
			
@@ -36,6 +34,7 @@ import java.util.List;
 
				 import java.util.Map;
			
 
				 
			
 
				 import static org.elasticsearch.common.xcontent.support.XContentMapValues.nodeIntegerValue;
			
 
				+import static org.elasticsearch.common.xcontent.support.XContentMapValues.nodeBooleanValue;
			
 
				 import static org.elasticsearch.index.mapper.TypeParsers.parseField;
			
 
				 
			
 
				 /**
			
@@ -47,10 +46,12 @@ public class TokenCountFieldMapper extends FieldMapper {
 
				 
			
 
				     public static class Defaults {
			
 
				         public static final MappedFieldType FIELD_TYPE = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
			
 
				+        public static final boolean DEFAULT_POSITION_INCREMENTS = true;
			
 
				     }
			
 
				 
			
 
				     public static class Builder extends FieldMapper.Builder<Builder, TokenCountFieldMapper> {
			
 
				         private NamedAnalyzer analyzer;
			
 
				+        private boolean enablePositionIncrements = Defaults.DEFAULT_POSITION_INCREMENTS;
			
 
				 
			
 
				         public Builder(String name) {
			
 
				             super(name, Defaults.FIELD_TYPE, Defaults.FIELD_TYPE);
			
@@ -66,18 +67,26 @@ public class TokenCountFieldMapper extends FieldMapper {
 
				             return analyzer;
			
 
				         }
			
 
				 
			
 
				+        public Builder enablePositionIncrements(boolean enablePositionIncrements) {
			
 
				+            this.enablePositionIncrements = enablePositionIncrements;
			
 
				+            return this;
			
 
				+        }
			
 
				+
			
 
				+        public boolean enablePositionIncrements() {
			
 
				+            return enablePositionIncrements;
			
 
				+        }
			
 
				+
			
 
				         @Override
			
 
				         public TokenCountFieldMapper build(BuilderContext context) {
			
 
				             setupFieldType(context);
			
 
				             return new TokenCountFieldMapper(name, fieldType, defaultFieldType,
			
 
				-                    context.indexSettings(), analyzer, multiFieldsBuilder.build(this, context), copyTo);
			
 
				+                    context.indexSettings(), analyzer, enablePositionIncrements, multiFieldsBuilder.build(this, context), copyTo);
			
 
				         }
			
 
				     }
			
 
				 
			
 
				     public static class TypeParser implements Mapper.TypeParser {
			
 
				         @Override
			
 
				-        @SuppressWarnings("unchecked")
			
 
				-        public Mapper.Builder parse(String name, Map<String, Object> node, ParserContext parserContext) throws MapperParsingException {
			
 
				+        public Mapper.Builder<?,?> parse(String name, Map<String, Object> node, ParserContext parserContext) throws MapperParsingException {
			
 
				             TokenCountFieldMapper.Builder builder = new TokenCountFieldMapper.Builder(name);
			
 
				             for (Iterator<Map.Entry<String, Object>> iterator = node.entrySet().iterator(); iterator.hasNext();) {
			
 
				                 Map.Entry<String, Object> entry = iterator.next();
			
@@ -93,6 +102,9 @@ public class TokenCountFieldMapper extends FieldMapper {
 
				                     }
			
 
				                     builder.analyzer(analyzer);
			
 
				                     iterator.remove();
			
 
				+                } else if (propName.equals("enable_position_increments")) {
			
 
				+                    builder.enablePositionIncrements(nodeBooleanValue(propNode));
			
 
				+                    iterator.remove();
			
 
				                 }
			
 
				             }
			
 
				             parseField(builder, name, node, parserContext);
			
@@ -104,11 +116,13 @@ public class TokenCountFieldMapper extends FieldMapper {
 
				     }
			
 
				 
			
 
				     private NamedAnalyzer analyzer;
			
 
				+    private boolean enablePositionIncrements;
			
 
				 
			
 
				     protected TokenCountFieldMapper(String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType,
			
 
				-            Settings indexSettings, NamedAnalyzer analyzer, MultiFields multiFields, CopyTo copyTo) {
			
 
				+            Settings indexSettings, NamedAnalyzer analyzer, boolean enablePositionIncrements, MultiFields multiFields, CopyTo copyTo) {
			
 
				         super(simpleName, fieldType, defaultFieldType, indexSettings, multiFields, copyTo);
			
 
				         this.analyzer = analyzer;
			
 
				+        this.enablePositionIncrements = enablePositionIncrements;
			
 
				     }
			
 
				 
			
 
				     @Override
			
@@ -124,7 +138,7 @@ public class TokenCountFieldMapper extends FieldMapper {
 
				         if (value == null) {
			
 
				             tokenCount = (Integer) fieldType().nullValue();
			
 
				         } else {
			
 
				-            tokenCount = countPositions(analyzer, name(), value);
			
 
				+            tokenCount = countPositions(analyzer, name(), value, enablePositionIncrements);
			
 
				         }
			
 
				 
			
 
				         boolean indexed = fieldType().indexOptions() != IndexOptions.NONE;
			
@@ -138,19 +152,26 @@ public class TokenCountFieldMapper extends FieldMapper {
 
				      * @param analyzer analyzer to create token stream
			
 
				      * @param fieldName field name to pass to analyzer
			
 
				      * @param fieldValue field value to pass to analyzer
			
 
				+     * @param enablePositionIncrements should we count position increments ?
			
 
				      * @return number of position increments in a token stream
			
 
				      * @throws IOException if tokenStream throws it
			
 
				      */
			
 
				-    static int countPositions(Analyzer analyzer, String fieldName, String fieldValue) throws IOException {
			
 
				+    static int countPositions(Analyzer analyzer, String fieldName, String fieldValue, boolean enablePositionIncrements) throws IOException {
			
 
				         try (TokenStream tokenStream = analyzer.tokenStream(fieldName, fieldValue)) {
			
 
				             int count = 0;
			
 
				             PositionIncrementAttribute position = tokenStream.addAttribute(PositionIncrementAttribute.class);
			
 
				             tokenStream.reset();
			
 
				             while (tokenStream.incrementToken()) {
			
 
				-                count += position.getPositionIncrement();
			
 
				+                if (enablePositionIncrements) {
			
 
				+                    count += position.getPositionIncrement();
			
 
				+                } else {
			
 
				+                    count += Math.min(1, position.getPositionIncrement());
			
 
				+                }
			
 
				             }
			
 
				             tokenStream.end();
			
 
				-            count += position.getPositionIncrement();
			
 
				+            if (enablePositionIncrements) {
			
 
				+                count += position.getPositionIncrement();
			
 
				+            }
			
 
				             return count;
			
 
				         }
			
 
				     }
			
@@ -163,6 +184,14 @@ public class TokenCountFieldMapper extends FieldMapper {
 
				         return analyzer.name();
			
 
				     }
			
 
				 
			
 
				+    /**
			
 
				+     * Indicates if position increments are counted.
			
 
				+     * @return <code>true</code> if position increments are counted
			
 
				+     */
			
 
				+    public boolean enablePositionIncrements() {
			
 
				+        return enablePositionIncrements;
			
 
				+    }
			
 
				+
			
 
				     @Override
			
 
				     protected String contentType() {
			
 
				         return CONTENT_TYPE;
			
@@ -172,12 +201,16 @@ public class TokenCountFieldMapper extends FieldMapper {
 
				     protected void doMerge(Mapper mergeWith, boolean updateAllTypes) {
			
 
				         super.doMerge(mergeWith, updateAllTypes);
			
 
				         this.analyzer = ((TokenCountFieldMapper) mergeWith).analyzer;
			
 
				+        this.enablePositionIncrements = ((TokenCountFieldMapper) mergeWith).enablePositionIncrements;
			
 
				     }
			
 
				 
			
 
				     @Override
			
 
				     protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, Params params) throws IOException {
			
 
				         super.doXContentBody(builder, includeDefaults, params);
			
 
				         builder.field("analyzer", analyzer());
			
 
				+        if (includeDefaults || enablePositionIncrements() != Defaults.DEFAULT_POSITION_INCREMENTS) {
			
 
				+            builder.field("enable_position_increments", enablePositionIncrements());
			
 
				+        }
			
 
				     }
			
 
				 
			
 
				 }
			
--- a/core/src/test/java/org/elasticsearch/index/mapper/TokenCountFieldMapperIntegrationIT.java
+++ b/core/src/test/java/org/elasticsearch/index/mapper/TokenCountFieldMapperIntegrationIT.java
@@ -131,6 +131,12 @@ public class TokenCountFieldMapperIntegrationIT extends ESIntegTestCase {
 
				                                     .field("analyzer", "standard")
			
 
				                                     .field("doc_values", true)
			
 
				                                 .endObject()
			
 
				+                                .startObject("token_count_without_position_increments")
			
 
				+                                    .field("type", "token_count")
			
 
				+                                    .field("analyzer", "english")
			
 
				+                                    .field("enable_position_increments", false)
			
 
				+                                    .field("store", true)
			
 
				+                                .endObject()
			
 
				                             .endObject()
			
 
				                         .endObject()
			
 
				                     .endObject()
			
@@ -169,6 +175,7 @@ public class TokenCountFieldMapperIntegrationIT extends ESIntegTestCase {
 
				     private SearchRequestBuilder prepareSearch() {
			
 
				         SearchRequestBuilder request = client().prepareSearch("test").setTypes("test");
			
 
				         request.addStoredField("foo.token_count");
			
 
				+        request.addStoredField("foo.token_count_without_position_increments");
			
 
				         if (loadCountedFields) {
			
 
				             request.addStoredField("foo");
			
 
				         }
			
@@ -186,32 +193,38 @@ public class TokenCountFieldMapperIntegrationIT extends ESIntegTestCase {
 
				         for (SearchHit hit : result.getHits()) {
			
 
				             String id = hit.getId();
			
 
				             if (id.equals("single")) {
			
 
				-                assertSearchHit(hit, 4);
			
 
				+                assertSearchHit(hit, new int[]{4}, new int[]{4});
			
 
				             } else if (id.equals("bulk1")) {
			
 
				-                assertSearchHit(hit, 3);
			
 
				+                assertSearchHit(hit, new int[]{3}, new int[]{3});
			
 
				             } else if (id.equals("bulk2")) {
			
 
				-                assertSearchHit(hit, 5);
			
 
				+                assertSearchHit(hit, new int[]{5}, new int[]{4});
			
 
				             } else if (id.equals("multi")) {
			
 
				-                assertSearchHit(hit, 2, 7);
			
 
				+                assertSearchHit(hit, new int[]{2, 7}, new int[]{2, 7});
			
 
				             } else if (id.equals("multibulk1")) {
			
 
				-                assertSearchHit(hit, 1, 8);
			
 
				+                assertSearchHit(hit, new int[]{1, 8}, new int[]{1, 8});
			
 
				             } else if (id.equals("multibulk2")) {
			
 
				-                assertSearchHit(hit, 6, 10);
			
 
				+                assertSearchHit(hit, new int[]{6, 10}, new int[]{3, 9});
			
 
				             } else {
			
 
				                 throw new ElasticsearchException("Unexpected response!");
			
 
				             }
			
 
				         }
			
 
				     }
			
 
				 
			
 
				-    private void assertSearchHit(SearchHit hit, int... termCounts) {
			
 
				+    private void assertSearchHit(SearchHit hit, int[] standardTermCounts, int[] englishTermCounts) {
			
 
				         assertThat(hit.field("foo.token_count"), not(nullValue()));
			
 
				-        assertThat(hit.field("foo.token_count").getValues().size(), equalTo(termCounts.length));
			
 
				-        for (int i = 0; i < termCounts.length; i++) {
			
 
				-            assertThat((Integer) hit.field("foo.token_count").getValues().get(i), equalTo(termCounts[i]));
			
 
				+        assertThat(hit.field("foo.token_count").getValues().size(), equalTo(standardTermCounts.length));
			
 
				+        for (int i = 0; i < standardTermCounts.length; i++) {
			
 
				+            assertThat((Integer) hit.field("foo.token_count").getValues().get(i), equalTo(standardTermCounts[i]));
			
 
				+        }
			
 
				+
			
 
				+        assertThat(hit.field("foo.token_count_without_position_increments"), not(nullValue()));
			
 
				+        assertThat(hit.field("foo.token_count_without_position_increments").getValues().size(), equalTo(englishTermCounts.length));
			
 
				+        for (int i = 0; i < englishTermCounts.length; i++) {
			
 
				+            assertThat((Integer) hit.field("foo.token_count_without_position_increments").getValues().get(i), equalTo(englishTermCounts[i]));
			
 
				         }
			
 
				 
			
 
				         if (loadCountedFields && storeCountedFields) {
			
 
				-            assertThat(hit.field("foo").getValues().size(), equalTo(termCounts.length));
			
 
				+            assertThat(hit.field("foo").getValues().size(), equalTo(standardTermCounts.length));
			
 
				         }
			
 
				     }
			
 
				 }
			
--- a/core/src/test/java/org/elasticsearch/index/mapper/TokenCountFieldMapperTests.java
+++ b/core/src/test/java/org/elasticsearch/index/mapper/TokenCountFieldMapperTests.java
@@ -24,23 +24,18 @@ import org.apache.lucene.analysis.CannedTokenStream;
 
				 import org.apache.lucene.analysis.MockTokenizer;
			
 
				 import org.apache.lucene.analysis.Token;
			
 
				 import org.apache.lucene.analysis.TokenStream;
			
 
				-import org.elasticsearch.Version;
			
 
				-import org.elasticsearch.cluster.metadata.IndexMetaData;
			
 
				 import org.elasticsearch.common.compress.CompressedXContent;
			
 
				-import org.elasticsearch.common.settings.Settings;
			
 
				 import org.elasticsearch.common.xcontent.XContentFactory;
			
 
				 import org.elasticsearch.index.IndexService;
			
 
				 import org.elasticsearch.plugins.Plugin;
			
 
				 import org.elasticsearch.test.ESSingleNodeTestCase;
			
 
				 import org.elasticsearch.test.InternalSettingsPlugin;
			
 
				-import org.elasticsearch.test.VersionUtils;
			
 
				 
			
 
				 import java.io.IOException;
			
 
				 import java.util.Arrays;
			
 
				 import java.util.Collection;
			
 
				 import java.util.Collections;
			
 
				 
			
 
				-import static com.carrotsearch.randomizedtesting.RandomizedTest.getRandom;
			
 
				 import static org.hamcrest.Matchers.containsString;
			
 
				 import static org.hamcrest.Matchers.equalTo;
			
 
				 
			
@@ -80,26 +75,49 @@ public class TokenCountFieldMapperTests extends ESSingleNodeTestCase {
 
				         assertThat(((TokenCountFieldMapper) stage2.mappers().smartNameFieldMapper("tc")).analyzer(), equalTo("standard"));
			
 
				     }
			
 
				 
			
 
				-    public void testCountPositions() throws IOException {
			
 
				-        // We're looking to make sure that we:
			
 
				-        Token t1 = new Token();      // Don't count tokens without an increment
			
 
				+    /**
			
 
				+     *  When position increments are counted, we're looking to make sure that we:
			
 
				+        - don't count tokens without an increment
			
 
				+        - count normal tokens with one increment
			
 
				+        - count funny tokens with more than one increment
			
 
				+        - count the final token increments on the rare token streams that have them
			
 
				+     */
			
 
				+    public void testCountPositionsWithIncrements() throws IOException {
			
 
				+        Analyzer analyzer = createMockAnalyzer();
			
 
				+        assertThat(TokenCountFieldMapper.countPositions(analyzer, "", "", true), equalTo(7));
			
 
				+    }
			
 
				+
			
 
				+    /**
			
 
				+     *  When position increments are not counted (only positions are counted), we're looking to make sure that we:
			
 
				+        - don't count tokens without an increment
			
 
				+        - count normal tokens with one increment
			
 
				+        - count funny tokens with more than one increment as only one
			
 
				+        - don't count the final token increments on the rare token streams that have them
			
 
				+     */
			
 
				+    public void testCountPositionsWithoutIncrements() throws IOException {
			
 
				+        Analyzer analyzer = createMockAnalyzer();
			
 
				+        assertThat(TokenCountFieldMapper.countPositions(analyzer, "", "", false), equalTo(2));
			
 
				+    }
			
 
				+
			
 
				+    private Analyzer createMockAnalyzer() {
			
 
				+        Token t1 = new Token();      // Token without an increment
			
 
				         t1.setPositionIncrement(0);
			
 
				         Token t2 = new Token();
			
 
				-        t2.setPositionIncrement(1);  // Count normal tokens with one increment
			
 
				+        t2.setPositionIncrement(1);  // Normal token with one increment
			
 
				         Token t3 = new Token();
			
 
				-        t2.setPositionIncrement(2);  // Count funny tokens with more than one increment
			
 
				-        int finalTokenIncrement = 4; // Count the final token increment on the rare token streams that have them
			
 
				+        t2.setPositionIncrement(2);  // Funny token with more than one increment
			
 
				+        int finalTokenIncrement = 4; // Final token increment
			
 
				         Token[] tokens = new Token[] {t1, t2, t3};
			
 
				         Collections.shuffle(Arrays.asList(tokens), random());
			
 
				         final TokenStream tokenStream = new CannedTokenStream(finalTokenIncrement, 0, tokens);
			
 
				         // TODO: we have no CannedAnalyzer?
			
 
				         Analyzer analyzer = new Analyzer() {
			
 
				-                @Override
			
 
				-                public TokenStreamComponents createComponents(String fieldName) {
			
 
				-                    return new TokenStreamComponents(new MockTokenizer(), tokenStream);
			
 
				-                }
			
 
				-            };
			
 
				-        assertThat(TokenCountFieldMapper.countPositions(analyzer, "", ""), equalTo(7));
			
 
				+            @Override
			
 
				+            public TokenStreamComponents createComponents(String fieldName) {
			
 
				+                return new TokenStreamComponents(new MockTokenizer(), tokenStream);
			
 
				+            }
			
 
				+        };
			
 
				+        return analyzer;
			
 
				     }
			
 
				 
			
 
				     @Override
			
--- a/docs/reference/mapping/types/token-count.asciidoc
+++ b/docs/reference/mapping/types/token-count.asciidoc
@@ -48,12 +48,6 @@ GET my_index/_search
 
				 <2> The `name.length` field is a `token_count` <<multi-fields,multi-field>> which will index the number of tokens in the `name` field.
			
 
				 <3> This query matches only the document containing `Rachel Alice Williams`, as it contains three tokens.
			
 
				 
			
 
				-[NOTE]
			
 
				-===================================================================
			
 
				-Technically the `token_count` type sums position increments rather than
			
 
				-counting tokens. This means that even if the analyzer filters out stop
			
 
				-words they are included in the count.
			
 
				-===================================================================
			
 
				 
			
 
				 [[token-count-params]]
			
 
				 ==== Parameters for `token_count` fields
			
@@ -68,6 +62,12 @@ The following parameters are accepted by `token_count` fields:
 
				     value. Required. For best performance, use an analyzer without token
			
 
				     filters.
			
 
				 
			
 
				+`enable_position_increments`:: 
			
 
				+
			
 
				+Indicates if position increments should be counted. 
			
 
				+Set to `false` if you don't want to count tokens removed by analyzer filters (like <<analysis-stop-tokenfilter,`stop`>>). 
			
 
				+Defaults to `true`.
			
 
				+
			
 
				 <<mapping-boost,`boost`>>::
			
 
				 
			
 
				     Mapping field-level query time boosting. Accepts a floating point number, defaults