Browse Source

document and test custom analyzer position offset gap

Matt Weber 10 years ago
parent
commit
63c4a214db

+ 4 - 0
docs/reference/analysis/analyzers/custom-analyzer.asciidoc

@@ -18,6 +18,9 @@ filters.
 
 |`char_filter` |An optional list of logical / registered name of char
 filters.
+
+|`position_offset_gap` |An optional number of positions to increment 
+between each field value of a field using this analyzer.
 |=======================================================================
 
 Here is an example:
@@ -32,6 +35,7 @@ index :
                 tokenizer : myTokenizer1
                 filter : [myTokenFilter1, myTokenFilter2]
                 char_filter : [my_html]
+                position_offset_gap: 256
         tokenizer :
             myTokenizer1 :
                 type : standard

+ 6 - 0
src/test/java/org/elasticsearch/index/analysis/AnalysisModuleTests.java

@@ -152,6 +152,12 @@ public class AnalysisModuleTests extends ElasticsearchTestCase {
 //        html = (HtmlStripCharFilterFactory) custom2.charFilters()[1];
 //        assertThat(html.readAheadLimit(), equalTo(1024));
 
+        // verify position offset gap
+        analyzer = analysisService.analyzer("custom6").analyzer();
+        assertThat(analyzer, instanceOf(CustomAnalyzer.class));
+        CustomAnalyzer custom6 = (CustomAnalyzer) analyzer;
+        assertThat(custom6.getPositionIncrementGap("any_string"), equalTo(256));
+
         // verify characters  mapping
         analyzer = analysisService.analyzer("custom5").analyzer();
         assertThat(analyzer, instanceOf(CustomAnalyzer.class));

+ 4 - 0
src/test/java/org/elasticsearch/index/analysis/test1.json

@@ -66,6 +66,10 @@
                     "tokenizer":"standard",
                     "char_filter":["my_mapping"]
                 },
+                "custom6":{
+                    "tokenizer":"standard",
+                    "position_offset_gap": 256
+                },
                 "czechAnalyzerWithStemmer":{
                     "tokenizer":"standard",
                     "filter":["standard", "lowercase", "stop", "czech_stem"]

+ 3 - 2
src/test/java/org/elasticsearch/index/analysis/test1.yml

@@ -49,7 +49,8 @@ index :
         tokenizer : standard
         char_filter : [my_mapping]
       custom6 :
-        type : standard
+        tokenizer : standard
+        position_offset_gap: 256
       custom7 :
         type : standard
         version: 3.6
@@ -58,4 +59,4 @@ index :
         filter : [standard, lowercase, stop, czech_stem]
       decompoundingAnalyzer :
         tokenizer : standard
-        filter : [dict_dec]
+        filter : [dict_dec]