8 年之前 · 148376c2c5
--- a/core/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java
+++ b/core/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java
@@ -114,6 +114,8 @@ public final class IndexScopedSettings extends AbstractScopedSettings {
 
				         IndexSettings.MAX_INNER_RESULT_WINDOW_SETTING,
			
 
				         IndexSettings.MAX_DOCVALUE_FIELDS_SEARCH_SETTING,
			
 
				         IndexSettings.MAX_SCRIPT_FIELDS_SETTING,
			
 
				+        IndexSettings.MAX_NGRAM_DIFF_SETTING,
			
 
				+        IndexSettings.MAX_SHINGLE_DIFF_SETTING,
			
 
				         IndexSettings.MAX_RESCORE_WINDOW_SETTING,
			
 
				         IndexSettings.MAX_ADJACENCY_MATRIX_FILTERS_SETTING,
			
 
				         IndexSettings.INDEX_TRANSLOG_SYNC_INTERVAL_SETTING,
			
@@ -150,6 +152,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings {
 
				         EngineConfig.INDEX_CODEC_SETTING,
			
 
				         EngineConfig.INDEX_OPTIMIZE_AUTO_GENERATED_IDS,
			
 
				         IndexMetaData.SETTING_WAIT_FOR_ACTIVE_SHARDS,
			
 
				+
			
 
				         // validate that built-in similarities don't get redefined
			
 
				         Setting.groupSetting("index.similarity.", (s) -> {
			
 
				             Map<String, Settings> groups = s.getAsGroups();
			
--- a/core/src/main/java/org/elasticsearch/index/IndexSettings.java
+++ b/core/src/main/java/org/elasticsearch/index/IndexSettings.java
@@ -107,6 +107,26 @@ public final class IndexSettings {
 
				     public static final Setting<Integer> MAX_SCRIPT_FIELDS_SETTING =
			
 
				         Setting.intSetting("index.max_script_fields", 32, 0, Property.Dynamic, Property.IndexScope);
			
 
				 
			
 
				+    /**
			
 
				+     * Index setting describing for NGramTokenizer and NGramTokenFilter
			
 
				+     * the maximum difference between
			
 
				+     * max_gram (maximum length of characters in a gram) and
			
 
				+     * min_gram (minimum length of characters in a gram).
			
 
				+     * The default value is 1 as this is default difference in NGramTokenizer,
			
 
				+     * and is defensive as it prevents generating too many index terms.
			
 
				+     */
			
 
				+    public static final Setting<Integer> MAX_NGRAM_DIFF_SETTING =
			
 
				+        Setting.intSetting("index.max_ngram_diff", 1, 0, Property.Dynamic, Property.IndexScope);
			
 
				+
			
 
				+    /**
			
 
				+     * Index setting describing for ShingleTokenFilter
			
 
				+     * the maximum difference between
			
 
				+     * max_shingle_size and min_shingle_size.
			
 
				+     * The default value is 3 is defensive as it prevents generating too many tokens.
			
 
				+     */
			
 
				+    public static final Setting<Integer> MAX_SHINGLE_DIFF_SETTING =
			
 
				+        Setting.intSetting("index.max_shingle_diff", 3, 0, Property.Dynamic, Property.IndexScope);
			
 
				+
			
 
				     /**
			
 
				      * Index setting describing the maximum value of allowed `docvalue_fields`that can be retrieved
			
 
				      * per search request. The default maximum of 100 is defensive for the reason that retrieving
			
@@ -239,6 +259,8 @@ public final class IndexSettings {
 
				     private volatile int maxRescoreWindow;
			
 
				     private volatile int maxDocvalueFields;
			
 
				     private volatile int maxScriptFields;
			
 
				+    private volatile int maxNgramDiff;
			
 
				+    private volatile int maxShingleDiff;
			
 
				     private volatile boolean TTLPurgeDisabled;
			
 
				     /**
			
 
				      * The maximum number of refresh listeners allows on this shard.
			
@@ -342,6 +364,8 @@ public final class IndexSettings {
 
				         maxRescoreWindow = scopedSettings.get(MAX_RESCORE_WINDOW_SETTING);
			
 
				         maxDocvalueFields = scopedSettings.get(MAX_DOCVALUE_FIELDS_SEARCH_SETTING);
			
 
				         maxScriptFields = scopedSettings.get(MAX_SCRIPT_FIELDS_SETTING);
			
 
				+        maxNgramDiff = scopedSettings.get(MAX_NGRAM_DIFF_SETTING);
			
 
				+        maxShingleDiff = scopedSettings.get(MAX_SHINGLE_DIFF_SETTING);
			
 
				         TTLPurgeDisabled = scopedSettings.get(INDEX_TTL_DISABLE_PURGE_SETTING);
			
 
				         maxRefreshListeners = scopedSettings.get(MAX_REFRESH_LISTENERS_PER_SHARD);
			
 
				         maxSlicesPerScroll = scopedSettings.get(MAX_SLICES_PER_SCROLL);
			
@@ -373,6 +397,8 @@ public final class IndexSettings {
 
				         scopedSettings.addSettingsUpdateConsumer(MAX_RESCORE_WINDOW_SETTING, this::setMaxRescoreWindow);
			
 
				         scopedSettings.addSettingsUpdateConsumer(MAX_DOCVALUE_FIELDS_SEARCH_SETTING, this::setMaxDocvalueFields);
			
 
				         scopedSettings.addSettingsUpdateConsumer(MAX_SCRIPT_FIELDS_SETTING, this::setMaxScriptFields);
			
 
				+        scopedSettings.addSettingsUpdateConsumer(MAX_NGRAM_DIFF_SETTING, this::setMaxNgramDiff);
			
 
				+        scopedSettings.addSettingsUpdateConsumer(MAX_SHINGLE_DIFF_SETTING, this::setMaxShingleDiff);
			
 
				         scopedSettings.addSettingsUpdateConsumer(INDEX_WARMER_ENABLED_SETTING, this::setEnableWarmer);
			
 
				         scopedSettings.addSettingsUpdateConsumer(INDEX_GC_DELETES_SETTING, this::setGCDeletes);
			
 
				         scopedSettings.addSettingsUpdateConsumer(INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING, this::setTranslogFlushThresholdSize);
			
@@ -641,6 +667,20 @@ public final class IndexSettings {
 
				         this.maxDocvalueFields = maxDocvalueFields;
			
 
				     }
			
 
				 
			
 
				+    /**
			
 
				+     * Returns the maximum allowed difference between max and min length of ngram
			
 
				+     */
			
 
				+    public int getMaxNgramDiff() { return this.maxNgramDiff; }
			
 
				+
			
 
				+    private void setMaxNgramDiff(int maxNgramDiff) { this.maxNgramDiff = maxNgramDiff; }
			
 
				+
			
 
				+    /**
			
 
				+     * Returns the maximum allowed difference between max and min shingle_size
			
 
				+     */
			
 
				+    public int getMaxShingleDiff() { return this.maxShingleDiff; }
			
 
				+
			
 
				+    private void setMaxShingleDiff(int maxShingleDiff) { this.maxShingleDiff = maxShingleDiff; }
			
 
				+
			
 
				     /**
			
 
				      * Returns the maximum number of allowed script_fields to retrieve in a search request
			
 
				      */
			
--- a/core/src/main/java/org/elasticsearch/index/analysis/NGramTokenizerFactory.java
+++ b/core/src/main/java/org/elasticsearch/index/analysis/NGramTokenizerFactory.java
@@ -21,6 +21,7 @@ package org.elasticsearch.index.analysis;
 
				 
			
 
				 import org.apache.lucene.analysis.Tokenizer;
			
 
				 import org.apache.lucene.analysis.ngram.NGramTokenizer;
			
 
				+import org.elasticsearch.Version;
			
 
				 import org.elasticsearch.common.settings.Settings;
			
 
				 import org.elasticsearch.env.Environment;
			
 
				 import org.elasticsearch.index.IndexSettings;
			
@@ -84,8 +85,21 @@ public class NGramTokenizerFactory extends AbstractTokenizerFactory {
 
				 
			
 
				     public NGramTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
			
 
				         super(indexSettings, name, settings);
			
 
				+        int maxAllowedNgramDiff = indexSettings.getMaxNgramDiff();
			
 
				         this.minGram = settings.getAsInt("min_gram", NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE);
			
 
				         this.maxGram = settings.getAsInt("max_gram", NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE);
			
 
				+        int ngramDiff = maxGram - minGram;
			
 
				+        if (ngramDiff > maxAllowedNgramDiff) {
			
 
				+            if (indexSettings.getIndexVersionCreated().onOrAfter(Version.V_7_0_0_alpha1)) {
			
 
				+                throw new IllegalArgumentException(
			
 
				+                    "The difference between max_gram and min_gram in NGram Tokenizer must be less than or equal to: ["
			
 
				+                        + maxAllowedNgramDiff + "] but was [" + ngramDiff + "]. This limit can be set by changing the ["
			
 
				+                        + IndexSettings.MAX_NGRAM_DIFF_SETTING.getKey() + "] index level setting.");
			
 
				+            } else {
			
 
				+                deprecationLogger.deprecated("Deprecated big difference between max_gram and min_gram in NGram Tokenizer,"
			
 
				+                    + "expected difference must be less than or equal to: [" + maxAllowedNgramDiff + "]");
			
 
				+            }
			
 
				+        }
			
 
				         this.matcher = parseTokenChars(settings.getAsList("token_chars"));
			
 
				     }
			
 
				 
			
--- a/core/src/main/java/org/elasticsearch/index/analysis/ShingleTokenFilterFactory.java
+++ b/core/src/main/java/org/elasticsearch/index/analysis/ShingleTokenFilterFactory.java
@@ -22,6 +22,7 @@ package org.elasticsearch.index.analysis;
 
				 import org.apache.lucene.analysis.TokenStream;
			
 
				 import org.apache.lucene.analysis.miscellaneous.DisableGraphAttribute;
			
 
				 import org.apache.lucene.analysis.shingle.ShingleFilter;
			
 
				+import org.elasticsearch.Version;
			
 
				 import org.elasticsearch.common.settings.Settings;
			
 
				 import org.elasticsearch.env.Environment;
			
 
				 import org.elasticsearch.index.IndexSettings;
			
@@ -32,9 +33,24 @@ public class ShingleTokenFilterFactory extends AbstractTokenFilterFactory {
 
				 
			
 
				     public ShingleTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
			
 
				         super(indexSettings, name, settings);
			
 
				+        int maxAllowedShingleDiff = indexSettings.getMaxShingleDiff();
			
 
				         Integer maxShingleSize = settings.getAsInt("max_shingle_size", ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE);
			
 
				         Integer minShingleSize = settings.getAsInt("min_shingle_size", ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE);
			
 
				         Boolean outputUnigrams = settings.getAsBoolean("output_unigrams", true);
			
 
				+
			
 
				+        int shingleDiff = maxShingleSize - minShingleSize + (outputUnigrams ? 1 : 0);
			
 
				+        if (shingleDiff > maxAllowedShingleDiff) {
			
 
				+            if (indexSettings.getIndexVersionCreated().onOrAfter(Version.V_7_0_0_alpha1)) {
			
 
				+                throw new IllegalArgumentException(
			
 
				+                    "In Shingle TokenFilter the difference between max_shingle_size and min_shingle_size (and +1 if outputting unigrams)"
			
 
				+                        + " must be less than or equal to: [" + maxAllowedShingleDiff + "] but was [" + shingleDiff + "]. This limit"
			
 
				+                        + " can be set by changing the [" + IndexSettings.MAX_SHINGLE_DIFF_SETTING.getKey() + "] index level setting.");
			
 
				+            } else {
			
 
				+                deprecationLogger.deprecated("Deprecated big difference between maxShingleSize and minShingleSize in Shingle TokenFilter,"
			
 
				+                    + "expected difference must be less than or equal to: [" + maxAllowedShingleDiff + "]");
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				         Boolean outputUnigramsIfNoShingles = settings.getAsBoolean("output_unigrams_if_no_shingles", false);
			
 
				         String tokenSeparator = settings.get("token_separator", ShingleFilter.DEFAULT_TOKEN_SEPARATOR);
			
 
				         String fillerToken = settings.get("filler_token", ShingleFilter.DEFAULT_FILLER_TOKEN);
			
--- a/core/src/test/java/org/elasticsearch/index/analysis/ShingleTokenFilterFactoryTests.java
+++ b/core/src/test/java/org/elasticsearch/index/analysis/ShingleTokenFilterFactoryTests.java
@@ -27,6 +27,7 @@ import org.apache.lucene.analysis.TokenStream;
 
				 import org.apache.lucene.analysis.Tokenizer;
			
 
				 import org.apache.lucene.analysis.core.WhitespaceTokenizer;
			
 
				 import org.apache.lucene.analysis.miscellaneous.DisableGraphAttribute;
			
 
				+import org.elasticsearch.index.IndexSettings;
			
 
				 import org.elasticsearch.test.ESTestCase;
			
 
				 import org.elasticsearch.test.ESTokenStreamTestCase;
			
 
				 
			
@@ -102,4 +103,25 @@ public class ShingleTokenFilterFactoryTests extends ESTokenStreamTestCase {
 
				             assertFalse(stream.hasAttribute(DisableGraphAttribute.class));
			
 
				         }
			
 
				     }
			
 
				+
			
 
				+    /*`
			
 
				+    * test that throws an error when trying to get a ShingleTokenFilter where difference between max_shingle_size and min_shingle_size
			
 
				+    * is greater than the allowed value of max_shingle_diff
			
 
				+     */
			
 
				+    public void testMaxShingleDiffException() throws Exception{
			
 
				+        String RESOURCE2 = "/org/elasticsearch/index/analysis/shingle_analysis2.json";
			
 
				+        int maxAllowedShingleDiff = 3;
			
 
				+        int shingleDiff = 8;
			
 
				+        try {
			
 
				+            ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromClassPath(createTempDir(), RESOURCE2);
			
 
				+            analysis.tokenFilter.get("shingle");
			
 
				+            fail();
			
 
				+        } catch (IllegalArgumentException ex) {
			
 
				+            assertEquals(
			
 
				+                "In Shingle TokenFilter the difference between max_shingle_size and min_shingle_size (and +1 if outputting unigrams)"
			
 
				+                    + " must be less than or equal to: [" + maxAllowedShingleDiff + "] but was [" + shingleDiff + "]. This limit"
			
 
				+                    + " can be set by changing the [" + IndexSettings.MAX_SHINGLE_DIFF_SETTING.getKey() + "] index level setting.",
			
 
				+                ex.getMessage());
			
 
				+        }
			
 
				+    }
			
 
				 }
			
--- a/core/src/test/java/org/elasticsearch/search/query/SearchQueryIT.java
+++ b/core/src/test/java/org/elasticsearch/search/query/SearchQueryIT.java
@@ -30,6 +30,7 @@ import org.elasticsearch.common.Strings;
 
				 import org.elasticsearch.common.settings.Settings;
			
 
				 import org.elasticsearch.common.xcontent.XContentFactory;
			
 
				 import org.elasticsearch.common.xcontent.XContentType;
			
 
				+import org.elasticsearch.index.IndexSettings;
			
 
				 import org.elasticsearch.index.query.BoolQueryBuilder;
			
 
				 import org.elasticsearch.index.query.MatchQueryBuilder;
			
 
				 import org.elasticsearch.index.query.MultiMatchQueryBuilder;
			
@@ -1802,6 +1803,7 @@ public class SearchQueryIT extends ESIntegTestCase {
 
				     public void testNGramCopyField() {
			
 
				         CreateIndexRequestBuilder builder = prepareCreate("test").setSettings(Settings.builder()
			
 
				                 .put(indexSettings())
			
 
				+                .put(IndexSettings.MAX_NGRAM_DIFF_SETTING.getKey(), 9)
			
 
				                 .put("index.analysis.analyzer.my_ngram_analyzer.type", "custom")
			
 
				                 .put("index.analysis.analyzer.my_ngram_analyzer.tokenizer", "my_ngram_tokenizer")
			
 
				                 .put("index.analysis.tokenizer.my_ngram_tokenizer.type", "nGram")
			
--- a/core/src/test/java/org/elasticsearch/search/suggest/SuggestSearchIT.java
+++ b/core/src/test/java/org/elasticsearch/search/suggest/SuggestSearchIT.java
@@ -28,6 +28,7 @@ import org.elasticsearch.action.search.SearchResponse;
 
				 import org.elasticsearch.common.settings.Settings;
			
 
				 import org.elasticsearch.common.xcontent.XContentBuilder;
			
 
				 import org.elasticsearch.common.xcontent.XContentFactory;
			
 
				+import org.elasticsearch.index.IndexSettings;
			
 
				 import org.elasticsearch.plugins.Plugin;
			
 
				 import org.elasticsearch.plugins.ScriptPlugin;
			
 
				 import org.elasticsearch.script.ScriptContext;
			
@@ -683,6 +684,7 @@ public class SuggestSearchIT extends ESIntegTestCase {
 
				     public void testShardFailures() throws IOException, InterruptedException {
			
 
				         CreateIndexRequestBuilder builder = prepareCreate("test").setSettings(Settings.builder()
			
 
				                 .put(indexSettings())
			
 
				+                .put(IndexSettings.MAX_SHINGLE_DIFF_SETTING.getKey(), 4)
			
 
				                 .put("index.analysis.analyzer.suggest.tokenizer", "standard")
			
 
				                 .putList("index.analysis.analyzer.suggest.filter", "standard", "lowercase", "shingler")
			
 
				                 .put("index.analysis.filter.shingler.type", "shingle")
			
@@ -743,6 +745,7 @@ public class SuggestSearchIT extends ESIntegTestCase {
 
				                 endObject();
			
 
				         assertAcked(prepareCreate("test").setSettings(Settings.builder()
			
 
				                 .put(indexSettings())
			
 
				+                .put(IndexSettings.MAX_SHINGLE_DIFF_SETTING.getKey(), 4)
			
 
				                 .put("index.analysis.analyzer.suggest.tokenizer", "standard")
			
 
				                 .putList("index.analysis.analyzer.suggest.filter", "standard", "lowercase", "shingler")
			
 
				                 .put("index.analysis.filter.shingler.type", "shingle")
			
--- a/core/src/test/resources/org/elasticsearch/index/analysis/shingle_analysis2.json
+++ b/core/src/test/resources/org/elasticsearch/index/analysis/shingle_analysis2.json
@@ -0,0 +1,15 @@
 
				+{
			
 
				+    "index":{
			
 
				+        "analysis":{
			
 
				+            "filter":{
			
 
				+                "shingle_filler":{
			
 
				+                    "type":"shingle",
			
 
				+                    "max_shingle_size" : 10,
			
 
				+                    "min_shingle_size" : 2,
			
 
				+                    "output_unigrams" : false,
			
 
				+                    "filler_token" : "FILLER"
			
 
				+                }
			
 
				+            }            
			
 
				+        }
			
 
				+    }
			
 
				+}
			
--- a/docs/reference/analysis/tokenfilters/ngram-tokenfilter.asciidoc
+++ b/docs/reference/analysis/tokenfilters/ngram-tokenfilter.asciidoc
@@ -13,3 +13,6 @@ type:
 
				 |`max_gram` |Defaults to `2`.
			
 
				 |============================
			
 
				 
			
 
				+The index level setting `index.max_ngram_diff` controls the maximum allowed
			
 
				+difference between `max_gram` and `min_gram`.
			
 
				+
			
--- a/docs/reference/analysis/tokenfilters/shingle-tokenfilter.asciidoc
+++ b/docs/reference/analysis/tokenfilters/shingle-tokenfilter.asciidoc
@@ -38,3 +38,5 @@ used if the position increment is greater than one when a `stop` filter is used
 
				 together with the `shingle` filter. Defaults to `"_"`
			
 
				 |=======================================================================
			
 
				 
			
 
				+The index level setting `index.max_shingle_diff` controls the maximum allowed
			
 
				+difference between `max_shingle_size` and `min_shingle_size`.
			
--- a/docs/reference/analysis/tokenizers/ngram-tokenizer.asciidoc
+++ b/docs/reference/analysis/tokenizers/ngram-tokenizer.asciidoc
@@ -198,6 +198,9 @@ value.  The smaller the length, the more documents will match but the lower
 
				 the quality of the matches.  The longer the length, the more specific the
			
 
				 matches.  A tri-gram (length `3`) is a good place to start.
			
 
				 
			
 
				+The index level setting `index.max_ngram_diff` controls the maximum allowed
			
 
				+difference between `max_gram` and `min_gram`.
			
 
				+
			
 
				 [float]
			
 
				 === Example configuration
			
 
				 
			
--- a/docs/reference/index-modules.asciidoc
+++ b/docs/reference/index-modules.asciidoc
@@ -144,6 +144,16 @@ specific index module:
 
				     The maximum number of `script_fields` that are allowed in a query.
			
 
				     Defaults to `32`.
			
 
				 
			
 
				+`index.max_ngram_diff`::
			
 
				+
			
 
				+    The maximum allowed difference between min_gram and max_gram for NGramTokenizer and NGramTokenFilter.
			
 
				+    Defaults to `1`.
			
 
				+
			
 
				+`index.max_shingle_diff`::
			
 
				+
			
 
				+    The maximum allowed difference between max_shingle_size and min_shingle_size for ShingleTokenFilter.
			
 
				+    Defaults to `3`.
			
 
				+
			
 
				 `index.blocks.read_only`::
			
 
				 
			
 
				     Set to `true` to make the index and index metadata read only, `false` to
			
--- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/NGramTokenFilterFactory.java
+++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/NGramTokenFilterFactory.java
@@ -25,6 +25,8 @@ import org.elasticsearch.common.settings.Settings;
 
				 import org.elasticsearch.env.Environment;
			
 
				 import org.elasticsearch.index.IndexSettings;
			
 
				 import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
			
 
				+import org.elasticsearch.Version;
			
 
				+
			
 
				 
			
 
				 
			
 
				 public class NGramTokenFilterFactory extends AbstractTokenFilterFactory {
			
@@ -36,8 +38,21 @@ public class NGramTokenFilterFactory extends AbstractTokenFilterFactory {
 
				 
			
 
				     NGramTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
			
 
				         super(indexSettings, name, settings);
			
 
				+        int maxAllowedNgramDiff = indexSettings.getMaxNgramDiff();
			
 
				         this.minGram = settings.getAsInt("min_gram", NGramTokenFilter.DEFAULT_MIN_NGRAM_SIZE);
			
 
				         this.maxGram = settings.getAsInt("max_gram", NGramTokenFilter.DEFAULT_MAX_NGRAM_SIZE);
			
 
				+        int ngramDiff = maxGram - minGram;
			
 
				+        if (ngramDiff > maxAllowedNgramDiff) {
			
 
				+            if (indexSettings.getIndexVersionCreated().onOrAfter(Version.V_7_0_0_alpha1)) {
			
 
				+                throw new IllegalArgumentException(
			
 
				+                    "The difference between max_gram and min_gram in NGram Tokenizer must be less than or equal to: ["
			
 
				+                        + maxAllowedNgramDiff + "] but was [" + ngramDiff + "]. This limit can be set by changing the ["
			
 
				+                        + IndexSettings.MAX_NGRAM_DIFF_SETTING.getKey() + "] index level setting.");
			
 
				+            } else {
			
 
				+                deprecationLogger.deprecated("Deprecated big difference between max_gram and min_gram in NGram Tokenizer,"
			
 
				+                    + "expected difference must be less than or equal to: [" + maxAllowedNgramDiff + "]");
			
 
				+            }
			
 
				+        }
			
 
				     }
			
 
				 
			
 
				     @Override
			
--- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/HighlighterWithAnalyzersTests.java
+++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/HighlighterWithAnalyzersTests.java
@@ -21,6 +21,7 @@ package org.elasticsearch.analysis.common;
 
				 
			
 
				 import org.elasticsearch.action.search.SearchResponse;
			
 
				 import org.elasticsearch.common.settings.Settings;
			
 
				+import org.elasticsearch.index.IndexSettings;
			
 
				 import org.elasticsearch.index.query.Operator;
			
 
				 import org.elasticsearch.plugins.Plugin;
			
 
				 import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
			
@@ -66,6 +67,7 @@ public class HighlighterWithAnalyzersTests extends ESIntegTestCase {
 
				                         .endObject())
			
 
				                 .setSettings(Settings.builder()
			
 
				                         .put(indexSettings())
			
 
				+                        .put(IndexSettings.MAX_NGRAM_DIFF_SETTING.getKey(), 19)
			
 
				                         .put("analysis.tokenizer.autocomplete.max_gram", 20)
			
 
				                         .put("analysis.tokenizer.autocomplete.min_gram", 1)
			
 
				                         .put("analysis.tokenizer.autocomplete.token_chars", "letter,digit")
			
--- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/NGramTokenizerFactoryTests.java
+++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/NGramTokenizerFactoryTests.java
@@ -76,7 +76,8 @@ public class NGramTokenizerFactoryTests extends ESTokenStreamTestCase {
 
				     public void testNoTokenChars() throws IOException {
			
 
				         final Index index = new Index("test", "_na_");
			
 
				         final String name = "ngr";
			
 
				-        final Settings indexSettings = newAnalysisSettingsBuilder().build();
			
 
				+        final Settings indexSettings = newAnalysisSettingsBuilder().put(IndexSettings.MAX_NGRAM_DIFF_SETTING.getKey(), 2).build();
			
 
				+
			
 
				         final Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 4)
			
 
				             .putList("token_chars", new String[0]).build();
			
 
				         Tokenizer tokenizer = new NGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings)
			
@@ -152,6 +153,31 @@ public class NGramTokenizerFactoryTests extends ESTokenStreamTestCase {
 
				     }
			
 
				 
			
 
				 
			
 
				+    /*`
			
 
				+    * test that throws an error when trying to get a NGramTokenizer where difference between max_gram and min_gram
			
 
				+    * is greater than the allowed value of max_ngram_diff
			
 
				+     */
			
 
				+    public void testMaxNGramDiffException() throws Exception{
			
 
				+        final Index index = new Index("test", "_na_");
			
 
				+        final String name = "ngr";
			
 
				+        final Settings indexSettings = newAnalysisSettingsBuilder().build();
			
 
				+        IndexSettings indexProperties = IndexSettingsModule.newIndexSettings(index, indexSettings);
			
 
				+
			
 
				+        int maxAllowedNgramDiff = indexProperties.getMaxNgramDiff();
			
 
				+        int ngramDiff = maxAllowedNgramDiff + 1;
			
 
				+        int min_gram = 2;
			
 
				+        int max_gram = min_gram + ngramDiff;
			
 
				+
			
 
				+        final Settings settings = newAnalysisSettingsBuilder().put("min_gram", min_gram).put("max_gram", max_gram).build();
			
 
				+        IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () ->
			
 
				+            new NGramTokenizerFactory(indexProperties, null, name, settings).create());
			
 
				+        assertEquals(
			
 
				+            "The difference between max_gram and min_gram in NGram Tokenizer must be less than or equal to: ["
			
 
				+                + maxAllowedNgramDiff + "] but was [" + ngramDiff + "]. This limit can be set by changing the ["
			
 
				+                + IndexSettings.MAX_NGRAM_DIFF_SETTING.getKey() + "] index level setting.",
			
 
				+            ex.getMessage());
			
 
				+    }
			
 
				+
			
 
				     private Version randomVersion(Random random) throws IllegalArgumentException, IllegalAccessException {
			
 
				         Field[] declaredFields = Version.class.getFields();
			
 
				         List<Field> versionFields = new ArrayList<>();
			
--- a/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/30_tokenizers.yml
+++ b/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/30_tokenizers.yml
@@ -27,6 +27,21 @@
 
				     - match:  { detail.tokenizer.tokens.2.token: od }
			
 
				 
			
 
				 ---
			
 
				+"nGram_exception":
			
 
				+    - skip:
			
 
				+        version: " - 6.99.99"
			
 
				+        reason: only starting from version 7.x this throws an error
			
 
				+    - do:
			
 
				+        catch: /The difference between max_gram and min_gram in NGram Tokenizer must be less than or equal to[:] \[1\] but was \[2\]\. This limit can be set by changing the \[index.max_ngram_diff\] index level setting\./
			
 
				+        indices.analyze:
			
 
				+          body:
			
 
				+            text: good
			
 
				+            explain: true
			
 
				+            tokenizer:
			
 
				+              type: nGram
			
 
				+              min_gram: 2
			
 
				+              max_gram: 4
			
 
				+---
			
 
				 "simple_pattern":
			
 
				     - do:
			
 
				         indices.analyze:
			
--- a/modules/analysis-common/src/test/resources/rest-api-spec/test/search.query/30_ngram_highligthing.yml
+++ b/modules/analysis-common/src/test/resources/rest-api-spec/test/search.query/30_ngram_highligthing.yml
@@ -6,6 +6,7 @@
 
				           settings:
			
 
				             number_of_shards: 1
			
 
				             number_of_replicas: 0
			
 
				+            index.max_ngram_diff: 19
			
 
				             analysis:
			
 
				               tokenizer:
			
 
				                 my_ngramt: