7 years ago · 2fa09f062e
--- a/docs/plugins/mapper-annotated-text.asciidoc
+++ b/docs/plugins/mapper-annotated-text.asciidoc
@@ -0,0 +1,328 @@
 
				+[[mapper-annotated-text]]
			
 
				+=== Mapper Annotated Text Plugin
			
 
				+
			
 
				+experimental[]
			
 
				+
			
 
				+The mapper-annotated-text plugin provides the ability to index text that is a
			
 
				+combination of free-text and special markup that is typically used to identify
			
 
				+items of interest such as people or organisations (see NER or Named Entity Recognition
			
 
				+tools). 
			
 
				+
			
 
				+
			
 
				+The elasticsearch markup allows one or more additional tokens to be injected, unchanged, into the token
			
 
				+stream at the same position as the underlying text it annotates.
			
 
				+
			
 
				+:plugin_name: mapper-annotated-text
			
 
				+include::install_remove.asciidoc[]
			
 
				+
			
 
				+[[mapper-annotated-text-usage]]
			
 
				+==== Using the `annotated-text` field
			
 
				+
			
 
				+The `annotated-text` tokenizes text content as per the more common `text` field (see 
			
 
				+"limitations" below) but also injects any marked-up annotation tokens directly into
			
 
				+the search index:
			
 
				+
			
 
				+[source,js]
			
 
				+--------------------------
			
 
				+PUT my_index
			
 
				+{
			
 
				+  "mappings": {
			
 
				+    "_doc": {
			
 
				+      "properties": {
			
 
				+        "my_field": {
			
 
				+          "type": "annotated_text"
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+--------------------------
			
 
				+// CONSOLE
			
 
				+
			
 
				+Such a mapping would allow marked-up text eg wikipedia articles to be indexed as both text
			
 
				+and structured tokens. The annotations use a markdown-like syntax using URL encoding of
			
 
				+one or more values separated by the `&` symbol.
			
 
				+
			
 
				+
			
 
				+We can use the "_analyze" api to test how an example annotation would be stored as tokens
			
 
				+in the search index:
			
 
				+
			
 
				+
			
 
				+[source,js]
			
 
				+--------------------------
			
 
				+GET my_index/_analyze
			
 
				+{
			
 
				+  "field": "my_field", 
			
 
				+  "text":"Investors in [Apple](Apple+Inc.) rejoiced."
			
 
				+}
			
 
				+--------------------------
			
 
				+// NOTCONSOLE
			
 
				+
			
 
				+Response:
			
 
				+
			
 
				+[source,js]
			
 
				+--------------------------------------------------
			
 
				+{
			
 
				+  "tokens": [
			
 
				+    {
			
 
				+      "token": "investors",
			
 
				+      "start_offset": 0,
			
 
				+      "end_offset": 9,
			
 
				+      "type": "<ALPHANUM>",
			
 
				+      "position": 0
			
 
				+    },
			
 
				+    {
			
 
				+      "token": "in",
			
 
				+      "start_offset": 10,
			
 
				+      "end_offset": 12,
			
 
				+      "type": "<ALPHANUM>",
			
 
				+      "position": 1
			
 
				+    },
			
 
				+    {
			
 
				+      "token": "Apple Inc.", <1> 
			
 
				+      "start_offset": 13,
			
 
				+      "end_offset": 18,
			
 
				+      "type": "annotation",
			
 
				+      "position": 2
			
 
				+    },
			
 
				+    {
			
 
				+      "token": "apple",
			
 
				+      "start_offset": 13,
			
 
				+      "end_offset": 18,
			
 
				+      "type": "<ALPHANUM>",
			
 
				+      "position": 2
			
 
				+    },
			
 
				+    {
			
 
				+      "token": "rejoiced",
			
 
				+      "start_offset": 19,
			
 
				+      "end_offset": 27,
			
 
				+      "type": "<ALPHANUM>",
			
 
				+      "position": 3
			
 
				+    }
			
 
				+  ]
			
 
				+}
			
 
				+--------------------------------------------------
			
 
				+// NOTCONSOLE
			
 
				+
			
 
				+<1> Note the whole annotation token `Apple Inc.` is placed, unchanged as a single token in
			
 
				+the token stream and at the same position (position 2) as the text token (`apple`) it annotates.
			
 
				+
			
 
				+
			
 
				+We can now perform searches for annotations using regular `term` queries that don't tokenize
			
 
				+the provided search values. Annotations are a more precise way of matching as can be seen 
			
 
				+in this example where a search for `Beck` will not match `Jeff Beck` :
			
 
				+
			
 
				+[source,js]
			
 
				+--------------------------
			
 
				+# Example documents
			
 
				+PUT my_index/_doc/1
			
 
				+{
			
 
				+  "my_field": "[Beck](Beck) announced a new tour"<2>
			
 
				+}
			
 
				+
			
 
				+PUT my_index/_doc/2
			
 
				+{
			
 
				+  "my_field": "[Jeff Beck](Jeff+Beck&Guitarist) plays a strat"<1>
			
 
				+}
			
 
				+
			
 
				+# Example search
			
 
				+GET my_index/_search
			
 
				+{
			
 
				+  "query": {
			
 
				+    "term": {
			
 
				+        "my_field": "Beck" <3>
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+--------------------------
			
 
				+// CONSOLE
			
 
				+
			
 
				+<1> As well as tokenising the plain text into single words e.g. `beck`, here we 
			
 
				+inject the single token value `Beck` at the same position as `beck` in the token stream.
			
 
				+<2> Note annotations can inject multiple tokens at the same position - here we inject both
			
 
				+the very specific value `Jeff Beck` and the broader term `Guitarist`. This enables
			
 
				+broader positional queries e.g. finding mentions of a `Guitarist` near to `strat`.
			
 
				+<3> A benefit of searching with these carefully defined annotation tokens is that a query for 
			
 
				+`Beck` will not match document 2 that contains the tokens `jeff`, `beck` and `Jeff Beck`
			
 
				+
			
 
				+WARNING: Any use of `=` signs in annotation values eg `[Prince](person=Prince)` will 
			
 
				+cause the document to be rejected with a parse failure. In future we hope to have a use for
			
 
				+the equals signs so wil actively reject documents that contain this today.
			
 
				+
			
 
				+
			
 
				+[[mapper-annotated-text-tips]]
			
 
				+==== Data modelling tips
			
 
				+===== Use structured and unstructured fields
			
 
				+
			
 
				+Annotations are normally a way of weaving structured information into unstructured text for
			
 
				+higher-precision search.
			
 
				+
			
 
				+`Entity resolution` is a form of document enrichment undertaken by specialist software or people 
			
 
				+where references to entities in a document are disambiguated by attaching a canonical ID.
			
 
				+The ID is used to resolve any number of aliases or distinguish between people with the
			
 
				+same name. The hyperlinks connecting Wikipedia's articles are a good example of resolved 
			
 
				+entity IDs woven into text. 
			
 
				+
			
 
				+These IDs can be embedded as annotations in an annotated_text field but it often makes 
			
 
				+sense to include them in dedicated structured fields to support discovery via aggregations:
			
 
				+
			
 
				+[source,js]
			
 
				+--------------------------
			
 
				+PUT my_index
			
 
				+{
			
 
				+  "mappings": {
			
 
				+    "_doc": {
			
 
				+      "properties": {
			
 
				+        "my_unstructured_text_field": {
			
 
				+          "type": "annotated_text"
			
 
				+        },
			
 
				+        "my_structured_people_field": {
			
 
				+          "type": "text",
			
 
				+          "fields": {
			
 
				+          	"keyword" :{
			
 
				+          	  "type": "keyword"
			
 
				+          	}
			
 
				+          }
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+--------------------------
			
 
				+// CONSOLE
			
 
				+
			
 
				+Applications would then typically provide content and discover it as follows:
			
 
				+
			
 
				+[source,js]
			
 
				+--------------------------
			
 
				+# Example documents
			
 
				+PUT my_index/_doc/1
			
 
				+{
			
 
				+  "my_unstructured_text_field": "[Shay](%40kimchy) created elasticsearch",
			
 
				+  "my_twitter_handles": ["@kimchy"] <1>
			
 
				+}
			
 
				+
			
 
				+GET my_index/_search
			
 
				+{
			
 
				+  "query": {
			
 
				+    "query_string": {
			
 
				+        "query": "elasticsearch OR logstash OR kibana",<2>
			
 
				+        "default_field": "my_unstructured_text_field"
			
 
				+    }
			
 
				+  },
			
 
				+  "aggregations": {
			
 
				+  	"top_people" :{
			
 
				+  	    "significant_terms" : { <3>
			
 
				+	       "field" : "my_twitter_handles.keyword"
			
 
				+  	    }
			
 
				+  	}
			
 
				+  }
			
 
				+}
			
 
				+--------------------------
			
 
				+// CONSOLE
			
 
				+
			
 
				+<1> Note the `my_twitter_handles` contains a list of the annotation values
			
 
				+also used in the unstructured text. (Note the annotated_text syntax requires escaping). 
			
 
				+By repeating the annotation values in a structured field this application has ensured that 
			
 
				+the tokens discovered in the structured field can be used for search and highlighting 
			
 
				+in the unstructured field.  
			
 
				+<2> In this example we search for documents that talk about components of the elastic stack
			
 
				+<3> We use the `my_twitter_handles` field here to discover people who are significantly
			
 
				+associated with the elastic stack.
			
 
				+
			
 
				+===== Avoiding over-matching annotations
			
 
				+By design, the regular text tokens and the annotation tokens co-exist in the same indexed 
			
 
				+field but in rare cases this can lead to some over-matching.
			
 
				+
			
 
				+The value of an annotation often denotes a _named entity_ (a person, place or company).
			
 
				+The tokens for these named entities are inserted untokenized, and differ from typical text 
			
 
				+tokens because they are normally:
			
 
				+
			
 
				+* Mixed case e.g. `Madonna`
			
 
				+* Multiple words e.g. `Jeff Beck`
			
 
				+* Can have punctuation or numbers e.g. `Apple Inc.` or `@kimchy`
			
 
				+
			
 
				+This means, for the most part, a search for a named entity in the annotated text field will
			
 
				+not have any false positives e.g. when selecting `Apple Inc.` from an aggregation result 
			
 
				+you can drill down to highlight uses in the text without "over matching" on any text tokens 
			
 
				+like the word `apple` in this context:
			
 
				+
			
 
				+    the apple was very juicy
			
 
				+    
			
 
				+However, a problem arises if your named entity happens to be a single term and lower-case e.g. the 
			
 
				+company `elastic`. In this case, a search on the annotated text field for the token `elastic`
			
 
				+may match a text document such as this:
			
 
				+
			
 
				+    he fired an elastic band
			
 
				+
			
 
				+To avoid such false matches users should consider prefixing annotation values to ensure 
			
 
				+they don't name clash with text tokens e.g.
			
 
				+
			
 
				+    [elastic](Company_elastic) released version 7.0 of the elastic stack today
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+[[mapper-annotated-text-highlighter]]
			
 
				+==== Using the `annotated` highlighter
			
 
				+
			
 
				+The `annotated-text` plugin includes a custom highlighter designed to mark up search hits
			
 
				+in a way which is respectful of the original markup:
			
 
				+
			
 
				+[source,js]
			
 
				+--------------------------
			
 
				+# Example documents
			
 
				+PUT my_index/_doc/1
			
 
				+{
			
 
				+  "my_field": "The cat sat on the [mat](sku3578)"
			
 
				+}
			
 
				+
			
 
				+GET my_index/_search
			
 
				+{
			
 
				+  "query": {
			
 
				+    "query_string": {
			
 
				+        "query": "cats" 
			
 
				+    }
			
 
				+  },
			
 
				+  "highlight": {
			
 
				+    "fields": {
			
 
				+      "my_field": {
			
 
				+        "type": "annotated", <1>
			
 
				+        "require_field_match": false
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+--------------------------
			
 
				+// CONSOLE
			
 
				+<1> The `annotated` highlighter type is designed for use with annotated_text fields
			
 
				+
			
 
				+The annotated highlighter is based on the `unified` highlighter and supports the same
			
 
				+settings but does not use the `pre_tags` or `post_tags` parameters. Rather than using
			
 
				+html-like markup such as `<em>cat</em>` the annotated highlighter uses the same 
			
 
				+markdown-like syntax used for annotations and injects a key=value annotation where `_hit_term`
			
 
				+is the  key and the matched search term is the value e.g. 
			
 
				+
			
 
				+    The [cat](_hit_term=cat) sat on the [mat](sku3578)
			
 
				+
			
 
				+The annotated highlighter tries to be respectful of any existing markup in the original 
			
 
				+text:
			
 
				+
			
 
				+* If the search term matches exactly the location of an existing annotation then the 
			
 
				+`_hit_term` key is merged into the url-like syntax used in the `(...)` part of the
			
 
				+existing annotation. 
			
 
				+* However, if the search term overlaps the span of an existing annotation it would break
			
 
				+the markup formatting so the original annotation is removed in favour of a new annotation
			
 
				+with just the search hit information in the results. 
			
 
				+* Any non-overlapping annotations in the original text are preserved in highlighter
			
 
				+selections
			
 
				+
			
 
				+
			
 
				+[[mapper-annotated-text-limitations]]
			
 
				+==== Limitations
			
 
				+
			
 
				+The annotated_text field type supports the same mapping settings as the `text` field type
			
 
				+but with the following exceptions:
			
 
				+
			
 
				+* No support for `fielddata` or `fielddata_frequency_filter`
			
 
				+* No support for `index_prefixes` or `index_phrases` indexing
			
--- a/docs/plugins/mapper.asciidoc
+++ b/docs/plugins/mapper.asciidoc
@@ -19,5 +19,13 @@ indexes the size in bytes of the original
 
				 The mapper-murmur3 plugin allows hashes to be computed at index-time and stored
			
 
				 in the index for later use with the `cardinality` aggregation.
			
 
				 
			
 
				+<<mapper-annotated-text>>::
			
 
				+
			
 
				+The annotated text plugin provides the ability to index text that is a
			
 
				+combination of free-text and special markup that is typically used to identify
			
 
				+items of interest such as people or organisations (see NER or Named Entity Recognition
			
 
				+tools).
			
 
				+
			
 
				 include::mapper-size.asciidoc[]
			
 
				 include::mapper-murmur3.asciidoc[]
			
 
				+include::mapper-annotated-text.asciidoc[]
			
--- a/docs/reference/cat/plugins.asciidoc
+++ b/docs/reference/cat/plugins.asciidoc
@@ -28,6 +28,7 @@ U7321H6 discovery-gce           {version} The Google Compute Engine (GCE) Discov
 
				 U7321H6 ingest-attachment       {version} Ingest processor that uses Apache Tika to extract contents
			
 
				 U7321H6 ingest-geoip            {version} Ingest processor that uses looksup geo data based on ip adresses using the Maxmind geo database
			
 
				 U7321H6 ingest-user-agent       {version} Ingest processor that extracts information from a user agent
			
 
				+U7321H6 mapper-annotated-text   {version} The Mapper Annotated_text plugin adds support for text fields with markup used to inject annotation tokens into the index.
			
 
				 U7321H6 mapper-murmur3          {version} The Mapper Murmur3 plugin allows to compute hashes of a field's values at index-time and to store them in the index.
			
 
				 U7321H6 mapper-size             {version} The Mapper Size plugin allows document to record their uncompressed size at index time.
			
 
				 U7321H6 store-smb               {version} The Store SMB plugin adds support for SMB stores.
			
--- a/docs/reference/mapping/types.asciidoc
+++ b/docs/reference/mapping/types.asciidoc
@@ -35,6 +35,7 @@ string::        <<text,`text`>> and <<keyword,`keyword`>>
 
				                     `completion` to provide auto-complete suggestions
			
 
				 <<token-count>>::   `token_count` to count the number of tokens in a string
			
 
				 {plugins}/mapper-murmur3.html[`mapper-murmur3`]:: `murmur3` to compute hashes of values at index-time and store them in the index
			
 
				+{plugins}/mapper-annotated-text.html[`mapper-annotated-text`]:: `annotated-text` to index text containing special markup (typically used for identifying named entities)
			
 
				 
			
 
				 <<percolator>>::    Accepts queries from the query-dsl
			
 
				 
			
--- a/plugins/mapper-annotated-text/build.gradle
+++ b/plugins/mapper-annotated-text/build.gradle
@@ -0,0 +1,23 @@
 
				+/*
			
 
				+ * Licensed to Elasticsearch under one or more contributor
			
 
				+ * license agreements. See the NOTICE file distributed with
			
 
				+ * this work for additional information regarding copyright
			
 
				+ * ownership. Elasticsearch licenses this file to you under
			
 
				+ * the Apache License, Version 2.0 (the "License"); you may
			
 
				+ * not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing,
			
 
				+ * software distributed under the License is distributed on an
			
 
				+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+ * KIND, either express or implied.  See the License for the
			
 
				+ * specific language governing permissions and limitations
			
 
				+ * under the License.
			
 
				+ */
			
 
				+
			
 
				+esplugin {
			
 
				+  description 'The Mapper Annotated_text plugin adds support for text fields with markup used to inject annotation tokens into the index.'
			
 
				+  classname 'org.elasticsearch.plugin.mapper.AnnotatedTextPlugin'
			
 
				+}
			
--- a/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java
+++ b/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java
@@ -0,0 +1,776 @@
 
				+/*
			
 
				+ * Licensed to Elasticsearch under one or more contributor
			
 
				+ * license agreements. See the NOTICE file distributed with
			
 
				+ * this work for additional information regarding copyright
			
 
				+ * ownership. Elasticsearch licenses this file to you under
			
 
				+ * the Apache License, Version 2.0 (the "License"); you may
			
 
				+ * not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing,
			
 
				+ * software distributed under the License is distributed on an
			
 
				+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+ * KIND, either express or implied.  See the License for the
			
 
				+ * specific language governing permissions and limitations
			
 
				+ * under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.elasticsearch.index.mapper.annotatedtext;
			
 
				+
			
 
				+import org.apache.lucene.analysis.Analyzer;
			
 
				+import org.apache.lucene.analysis.Analyzer.TokenStreamComponents;
			
 
				+import org.apache.lucene.analysis.AnalyzerWrapper;
			
 
				+import org.apache.lucene.analysis.TokenFilter;
			
 
				+import org.apache.lucene.analysis.TokenStream;
			
 
				+import org.apache.lucene.analysis.Tokenizer;
			
 
				+import org.apache.lucene.analysis.standard.StandardAnalyzer;
			
 
				+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
			
 
				+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
			
 
				+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
			
 
				+import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
			
 
				+import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
			
 
				+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
			
 
				+import org.apache.lucene.document.Field;
			
 
				+import org.apache.lucene.index.IndexOptions;
			
 
				+import org.apache.lucene.index.IndexableField;
			
 
				+import org.apache.lucene.index.Term;
			
 
				+import org.apache.lucene.search.MultiPhraseQuery;
			
 
				+import org.apache.lucene.search.NormsFieldExistsQuery;
			
 
				+import org.apache.lucene.search.PhraseQuery;
			
 
				+import org.apache.lucene.search.Query;
			
 
				+import org.apache.lucene.search.TermQuery;
			
 
				+import org.elasticsearch.ElasticsearchParseException;
			
 
				+import org.elasticsearch.common.settings.Settings;
			
 
				+import org.elasticsearch.common.xcontent.XContentBuilder;
			
 
				+import org.elasticsearch.common.xcontent.support.XContentMapValues;
			
 
				+import org.elasticsearch.index.analysis.AnalyzerScope;
			
 
				+import org.elasticsearch.index.analysis.NamedAnalyzer;
			
 
				+import org.elasticsearch.index.mapper.FieldMapper;
			
 
				+import org.elasticsearch.index.mapper.FieldNamesFieldMapper;
			
 
				+import org.elasticsearch.index.mapper.MappedFieldType;
			
 
				+import org.elasticsearch.index.mapper.Mapper;
			
 
				+import org.elasticsearch.index.mapper.MapperParsingException;
			
 
				+import org.elasticsearch.index.mapper.ParseContext;
			
 
				+import org.elasticsearch.index.mapper.StringFieldType;
			
 
				+import org.elasticsearch.index.mapper.TextFieldMapper;
			
 
				+import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText.AnnotationToken;
			
 
				+import org.elasticsearch.index.query.QueryShardContext;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.io.Reader;
			
 
				+import java.io.StringReader;
			
 
				+import java.io.UncheckedIOException;
			
 
				+import java.io.UnsupportedEncodingException;
			
 
				+import java.net.URLDecoder;
			
 
				+import java.util.ArrayList;
			
 
				+import java.util.Iterator;
			
 
				+import java.util.List;
			
 
				+import java.util.Map;
			
 
				+import java.util.Objects;
			
 
				+import java.util.regex.Matcher;
			
 
				+import java.util.regex.Pattern;
			
 
				+
			
 
				+import static org.elasticsearch.index.mapper.TypeParsers.parseTextField;
			
 
				+
			
 
				+/** A {@link FieldMapper} for full-text fields with annotation markup e.g.
			
 
				+ * 
			
 
				+ *    "New mayor is [John Smith](type=person&amp;value=John%20Smith) "
			
 
				+ * 
			
 
				+ * A special Analyzer wraps the default choice of analyzer in order
			
 
				+ * to strip the text field of annotation markup and inject the related
			
 
				+ * entity annotation tokens as supplementary tokens at the relevant points
			
 
				+ * in the token stream.
			
 
				+ * This code is largely a copy of TextFieldMapper which is less than ideal - 
			
 
				+ * my attempts to subclass TextFieldMapper failed but we can revisit this.
			
 
				+ **/
			
 
				+public class AnnotatedTextFieldMapper extends FieldMapper {
			
 
				+
			
 
				+    public static final String CONTENT_TYPE = "annotated_text";
			
 
				+    private static final int POSITION_INCREMENT_GAP_USE_ANALYZER = -1;
			
 
				+
			
 
				+    public static class Defaults {
			
 
				+        public static final MappedFieldType FIELD_TYPE = new AnnotatedTextFieldType();
			
 
				+        static {
			
 
				+            FIELD_TYPE.freeze();
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    public static class Builder extends FieldMapper.Builder<Builder, AnnotatedTextFieldMapper> {
			
 
				+
			
 
				+        private int positionIncrementGap = POSITION_INCREMENT_GAP_USE_ANALYZER;
			
 
				+        
			
 
				+        public Builder(String name) {
			
 
				+            super(name, Defaults.FIELD_TYPE, Defaults.FIELD_TYPE);
			
 
				+            builder = this;
			
 
				+        }
			
 
				+
			
 
				+        @Override
			
 
				+        public AnnotatedTextFieldType fieldType() {
			
 
				+            return (AnnotatedTextFieldType) super.fieldType();
			
 
				+        }
			
 
				+
			
 
				+        public Builder positionIncrementGap(int positionIncrementGap) {
			
 
				+            if (positionIncrementGap < 0) {
			
 
				+                throw new MapperParsingException("[positions_increment_gap] must be positive, got " + positionIncrementGap);
			
 
				+            }
			
 
				+            this.positionIncrementGap = positionIncrementGap;
			
 
				+            return this;
			
 
				+        }
			
 
				+        
			
 
				+        @Override
			
 
				+        public Builder docValues(boolean docValues) {
			
 
				+            if (docValues) {
			
 
				+                throw new IllegalArgumentException("[" + CONTENT_TYPE + "] fields do not support doc values");
			
 
				+            }
			
 
				+            return super.docValues(docValues);
			
 
				+        }
			
 
				+
			
 
				+        @Override
			
 
				+        public AnnotatedTextFieldMapper build(BuilderContext context) {
			
 
				+            if (fieldType().indexOptions() == IndexOptions.NONE ) {
			
 
				+                throw new IllegalArgumentException("[" + CONTENT_TYPE + "] fields must be indexed");
			
 
				+            }
			
 
				+            if (positionIncrementGap != POSITION_INCREMENT_GAP_USE_ANALYZER) {
			
 
				+                if (fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
			
 
				+                    throw new IllegalArgumentException("Cannot set position_increment_gap on field ["
			
 
				+                        + name + "] without positions enabled");
			
 
				+                }
			
 
				+                fieldType.setIndexAnalyzer(new NamedAnalyzer(fieldType.indexAnalyzer(), positionIncrementGap));
			
 
				+                fieldType.setSearchAnalyzer(new NamedAnalyzer(fieldType.searchAnalyzer(), positionIncrementGap));
			
 
				+                fieldType.setSearchQuoteAnalyzer(new NamedAnalyzer(fieldType.searchQuoteAnalyzer(), positionIncrementGap));
			
 
				+            } else {
			
 
				+                //Using the analyzer's default BUT need to do the same thing AnalysisRegistry.processAnalyzerFactory 
			
 
				+                // does to splice in new default of posIncGap=100 by wrapping the analyzer                
			
 
				+                if (fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
			
 
				+                    int overrideInc = TextFieldMapper.Defaults.POSITION_INCREMENT_GAP;
			
 
				+                    fieldType.setIndexAnalyzer(new NamedAnalyzer(fieldType.indexAnalyzer(), overrideInc));
			
 
				+                    fieldType.setSearchAnalyzer(new NamedAnalyzer(fieldType.searchAnalyzer(), overrideInc));
			
 
				+                    fieldType.setSearchQuoteAnalyzer(new NamedAnalyzer(fieldType.searchQuoteAnalyzer(),overrideInc));
			
 
				+                }
			
 
				+            }
			
 
				+            setupFieldType(context);
			
 
				+            return new AnnotatedTextFieldMapper(
			
 
				+                    name, fieldType(), defaultFieldType, positionIncrementGap,
			
 
				+                    context.indexSettings(), multiFieldsBuilder.build(this, context), copyTo);
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    public static class TypeParser implements Mapper.TypeParser {
			
 
				+        @Override
			
 
				+        public Mapper.Builder<AnnotatedTextFieldMapper.Builder, AnnotatedTextFieldMapper> parse(
			
 
				+                String fieldName, Map<String, Object> node, ParserContext parserContext) throws MapperParsingException {
			
 
				+            AnnotatedTextFieldMapper.Builder builder = new AnnotatedTextFieldMapper.Builder(fieldName);
			
 
				+            
			
 
				+            builder.fieldType().setIndexAnalyzer(parserContext.getIndexAnalyzers().getDefaultIndexAnalyzer());
			
 
				+            builder.fieldType().setSearchAnalyzer(parserContext.getIndexAnalyzers().getDefaultSearchAnalyzer());
			
 
				+            builder.fieldType().setSearchQuoteAnalyzer(parserContext.getIndexAnalyzers().getDefaultSearchQuoteAnalyzer());
			
 
				+            parseTextField(builder, fieldName, node, parserContext);
			
 
				+            for (Iterator<Map.Entry<String, Object>> iterator = node.entrySet().iterator(); iterator.hasNext();) {
			
 
				+                Map.Entry<String, Object> entry = iterator.next();
			
 
				+                String propName = entry.getKey();
			
 
				+                Object propNode = entry.getValue();
			
 
				+                if (propName.equals("position_increment_gap")) {
			
 
				+                    int newPositionIncrementGap = XContentMapValues.nodeIntegerValue(propNode, -1);
			
 
				+                    builder.positionIncrementGap(newPositionIncrementGap);
			
 
				+                    iterator.remove();
			
 
				+                }
			
 
				+            }
			
 
				+            return builder;
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    
			
 
				+    /**
			
 
				+     * Parses markdown-like syntax into plain text and AnnotationTokens with offsets for
			
 
				+     * annotations found in texts
			
 
				+     */
			
 
				+    public static final class AnnotatedText {
			
 
				+        public final String textPlusMarkup;
			
 
				+        public final String textMinusMarkup;
			
 
				+        List<AnnotationToken> annotations;
			
 
				+        
			
 
				+        // Format is markdown-like syntax for URLs eg:
			
 
				+        //   "New mayor is [John Smith](type=person&value=John%20Smith) "
			
 
				+        static Pattern markdownPattern = Pattern.compile("\\[([^\\]\\[]*)\\]\\(([^\\)\\(]*)\\)");  
			
 
				+        
			
 
				+        public static AnnotatedText parse (String textPlusMarkup) {
			
 
				+            List<AnnotationToken> annotations =new ArrayList<>();
			
 
				+            Matcher m = markdownPattern.matcher(textPlusMarkup);                
			
 
				+            int lastPos = 0;
			
 
				+            StringBuilder sb = new StringBuilder();
			
 
				+            while(m.find()){
			
 
				+                if(m.start() > lastPos){
			
 
				+                    sb.append(textPlusMarkup.substring(lastPos, m.start()));
			
 
				+                }
			
 
				+                
			
 
				+                int startOffset = sb.length();
			
 
				+                int endOffset = sb.length() + m.group(1).length();
			
 
				+                sb.append(m.group(1));
			
 
				+                lastPos = m.end();
			
 
				+                
			
 
				+                String[] pairs = m.group(2).split("&");
			
 
				+                String value = null;
			
 
				+                for (String pair : pairs) {
			
 
				+                    String[] kv = pair.split("=");
			
 
				+                    try {
			
 
				+                        if(kv.length == 2){               
			
 
				+                            throw new ElasticsearchParseException("key=value pairs are not supported in annotations");
			
 
				+                        }
			
 
				+                        if(kv.length == 1) {
			
 
				+                            //Check "=" sign wasn't in the pair string
			
 
				+                            if(kv[0].length() == pair.length()) {
			
 
				+                                //untyped value
			
 
				+                                value = URLDecoder.decode(kv[0], "UTF-8");
			
 
				+                            }
			
 
				+                        }
			
 
				+                        if (value!=null && value.length() > 0) {
			
 
				+                            annotations.add(new AnnotationToken(startOffset, endOffset, value));
			
 
				+                        }
			
 
				+                    } catch (UnsupportedEncodingException uee){
			
 
				+                        throw new ElasticsearchParseException("Unsupported encoding parsing annotated text", uee);
			
 
				+                    }                        
			
 
				+                }                      
			
 
				+            }   
			
 
				+            if(lastPos < textPlusMarkup.length()){
			
 
				+                sb.append(textPlusMarkup.substring(lastPos));
			
 
				+            }
			
 
				+            return new AnnotatedText(sb.toString(), textPlusMarkup, annotations);
			
 
				+        }
			
 
				+
			
 
				+        protected AnnotatedText(String textMinusMarkup, String textPlusMarkup, List<AnnotationToken> annotations) {
			
 
				+            this.textMinusMarkup = textMinusMarkup;
			
 
				+            this.textPlusMarkup = textPlusMarkup;
			
 
				+            this.annotations = annotations;    
			
 
				+        }
			
 
				+        
			
 
				+        public static final class AnnotationToken {
			
 
				+            public final int offset;
			
 
				+            public final int endOffset;
			
 
				+            
			
 
				+            public final String value;
			
 
				+            public AnnotationToken(int offset, int endOffset, String value) {
			
 
				+                this.offset = offset;
			
 
				+                this.endOffset = endOffset;
			
 
				+                this.value = value;
			
 
				+            }
			
 
				+            @Override
			
 
				+            public String toString() {
			
 
				+               return value +" ("+offset+" - "+endOffset+")";
			
 
				+            }
			
 
				+            
			
 
				+            public boolean intersects(int start, int end) {
			
 
				+                return (start <= offset && end >= offset) || (start <= endOffset && end >= endOffset)
			
 
				+                        || (start >= offset && end <= endOffset);
			
 
				+            }
			
 
				+            
			
 
				+            @Override
			
 
				+            public int hashCode() {
			
 
				+                final int prime = 31;
			
 
				+                int result = 1;
			
 
				+                result = prime * result + endOffset;
			
 
				+                result = prime * result + offset;
			
 
				+                result = prime * result + Objects.hashCode(value);
			
 
				+                return result;
			
 
				+            }
			
 
				+            
			
 
				+            @Override
			
 
				+            public boolean equals(Object obj) {
			
 
				+                if (this == obj)
			
 
				+                    return true;
			
 
				+                if (obj == null)
			
 
				+                    return false;
			
 
				+                if (getClass() != obj.getClass())
			
 
				+                    return false;
			
 
				+                AnnotationToken other = (AnnotationToken) obj;
			
 
				+                return Objects.equals(endOffset, other.endOffset) && Objects.equals(offset, other.offset)
			
 
				+                        && Objects.equals(value, other.value);
			
 
				+            }
			
 
				+            
			
 
				+        }
			
 
				+        
			
 
				+        @Override
			
 
				+        public String toString() {
			
 
				+           StringBuilder sb = new StringBuilder();
			
 
				+           sb.append(textMinusMarkup);
			
 
				+           sb.append("\n");
			
 
				+           annotations.forEach(a -> {sb.append(a); sb.append("\n");});
			
 
				+           return sb.toString();
			
 
				+        }
			
 
				+
			
 
				+        public int numAnnotations() {
			
 
				+            return annotations.size();
			
 
				+        }
			
 
				+
			
 
				+        public AnnotationToken getAnnotation(int index) {
			
 
				+            return annotations.get(index);
			
 
				+        }   
			
 
				+    }
			
 
				+    
			
 
				+    // A utility class for use with highlighters where the content being highlighted 
			
 
				+    // needs plain text format for highlighting but marked-up format for token discovery.
			
 
				+    // The class takes markedup format field values and returns plain text versions.
			
 
				+    // When asked to tokenize plain-text versions by the highlighter it tokenizes the
			
 
				+    // original markup form in order to inject annotations.
			
 
				+    public static final class AnnotatedHighlighterAnalyzer extends AnalyzerWrapper {
			
 
				+        private Analyzer delegate;
			
 
				+        private AnnotatedText[] annotations;
			
 
				+        public AnnotatedHighlighterAnalyzer(Analyzer delegate){
			
 
				+            super(delegate.getReuseStrategy());
			
 
				+            this.delegate = delegate;
			
 
				+        }
			
 
				+
			
 
				+        public void init(String[] markedUpFieldValues) {
			
 
				+            this.annotations = new AnnotatedText[markedUpFieldValues.length];
			
 
				+            for (int i = 0; i < markedUpFieldValues.length; i++) {
			
 
				+                annotations[i] = AnnotatedText.parse(markedUpFieldValues[i]);
			
 
				+            }
			
 
				+        }
			
 
				+        
			
 
				+        public String []  getPlainTextValuesForHighlighter(){
			
 
				+            String [] result = new String[annotations.length];
			
 
				+            for (int i = 0; i < annotations.length; i++) {
			
 
				+                result[i] = annotations[i].textMinusMarkup;
			
 
				+            }
			
 
				+            return result;
			
 
				+        }
			
 
				+        
			
 
				+        public AnnotationToken[] getIntersectingAnnotations(int start, int end) {
			
 
				+            List<AnnotationToken> intersectingAnnotations = new ArrayList<>();
			
 
				+            int fieldValueOffset =0;
			
 
				+            for (AnnotatedText fieldValueAnnotations : this.annotations) {
			
 
				+                //This is called from a highlighter where all of the field values are concatenated
			
 
				+                // so each annotation offset will need to be adjusted so that it takes into account 
			
 
				+                // the previous values AND the MULTIVAL delimiter
			
 
				+                for (AnnotationToken token : fieldValueAnnotations.annotations) {
			
 
				+                    if(token.intersects(start - fieldValueOffset , end - fieldValueOffset)) {
			
 
				+                        intersectingAnnotations.add(new AnnotationToken(token.offset + fieldValueOffset, 
			
 
				+                                token.endOffset + fieldValueOffset, token.value));
			
 
				+                    }
			
 
				+                } 
			
 
				+                //add 1 for the fieldvalue separator character
			
 
				+                fieldValueOffset +=fieldValueAnnotations.textMinusMarkup.length() +1;
			
 
				+            }
			
 
				+            return intersectingAnnotations.toArray(new AnnotationToken[intersectingAnnotations.size()]);
			
 
				+        }        
			
 
				+        
			
 
				+        @Override
			
 
				+        public Analyzer getWrappedAnalyzer(String fieldName) {
			
 
				+          return delegate;
			
 
				+        }   
			
 
				+        
			
 
				+        @Override
			
 
				+        protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
			
 
				+            if(components instanceof AnnotatedHighlighterTokenStreamComponents){
			
 
				+                // already wrapped.
			
 
				+                return components;
			
 
				+            }
			
 
				+            AnnotationsInjector injector = new AnnotationsInjector(components.getTokenStream());
			
 
				+            return new AnnotatedHighlighterTokenStreamComponents(components.getTokenizer(), injector, this.annotations);
			
 
				+        }        
			
 
				+    }
			
 
				+    private static final class AnnotatedHighlighterTokenStreamComponents extends TokenStreamComponents{
			
 
				+
			
 
				+        private AnnotationsInjector annotationsInjector;
			
 
				+        private AnnotatedText[] annotations;
			
 
				+        int readerNum = 0;
			
 
				+
			
 
				+        AnnotatedHighlighterTokenStreamComponents(Tokenizer source, AnnotationsInjector annotationsFilter,
			
 
				+                AnnotatedText[] annotations) {
			
 
				+            super(source, annotationsFilter);
			
 
				+            this.annotationsInjector = annotationsFilter;
			
 
				+            this.annotations = annotations;            
			
 
				+        }
			
 
				+
			
 
				+        @Override
			
 
				+        protected void setReader(Reader reader) {
			
 
				+            String plainText = readToString(reader);
			
 
				+            AnnotatedText at = this.annotations[readerNum++];
			
 
				+            assert at.textMinusMarkup.equals(plainText);
			
 
				+            // This code is reliant on the behaviour of highlighter logic - it 
			
 
				+            // takes plain text multi-value fields and then calls the same analyzer 
			
 
				+            // for each field value in turn. This class has cached the annotations
			
 
				+            // associated with each plain-text value and are arranged in the same order
			
 
				+            annotationsInjector.setAnnotations(at);
			
 
				+            super.setReader(new StringReader(at.textMinusMarkup));  
			
 
				+        }
			
 
				+               
			
 
				+    }    
			
 
				+    
			
 
				+    
			
 
				+    public static final class AnnotationAnalyzerWrapper extends AnalyzerWrapper {
			
 
				+        
			
 
				+
			
 
				+        private final Analyzer delegate;
			
 
				+
			
 
				+        public AnnotationAnalyzerWrapper (Analyzer delegate) {
			
 
				+          super(delegate.getReuseStrategy());
			
 
				+          this.delegate = delegate;
			
 
				+        }
			
 
				+
			
 
				+        /**
			
 
				+         * Wraps {@link StandardAnalyzer}. 
			
 
				+         */
			
 
				+        public AnnotationAnalyzerWrapper() {
			
 
				+          this(new StandardAnalyzer());
			
 
				+        }
			
 
				+        
			
 
				+
			
 
				+        @Override
			
 
				+        public Analyzer getWrappedAnalyzer(String fieldName) {
			
 
				+          return delegate;
			
 
				+        }     
			
 
				+
			
 
				+        @Override
			
 
				+        protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
			
 
				+            if(components instanceof AnnotatedTokenStreamComponents){
			
 
				+                // already wrapped.
			
 
				+                return components;
			
 
				+            }
			
 
				+            AnnotationsInjector injector = new AnnotationsInjector(components.getTokenStream());
			
 
				+            return new AnnotatedTokenStreamComponents(components.getTokenizer(), injector);
			
 
				+        }
			
 
				+      }
			
 
				+    
			
 
				+    
			
 
				+    //This Analyzer is not "wrappable" because of a limitation in Lucene https://issues.apache.org/jira/browse/LUCENE-8352    
			
 
				+    private static final class AnnotatedTokenStreamComponents extends TokenStreamComponents{
			
 
				+        private AnnotationsInjector annotationsInjector;
			
 
				+
			
 
				+        AnnotatedTokenStreamComponents(Tokenizer source, AnnotationsInjector annotationsInjector) {
			
 
				+            super(source, annotationsInjector);
			
 
				+            this.annotationsInjector = annotationsInjector;
			
 
				+        }
			
 
				+
			
 
				+        @Override
			
 
				+        protected void setReader(Reader reader) {
			
 
				+            // Sneaky code to change the content downstream components will parse.
			
 
				+            // Replace the marked-up content Reader with a plain text Reader and prime the 
			
 
				+            // annotations injector with the AnnotatedTokens that need to be injected 
			
 
				+            // as plain-text parsing progresses.
			
 
				+            AnnotatedText annotations = AnnotatedText.parse(readToString(reader));
			
 
				+            annotationsInjector.setAnnotations(annotations);
			
 
				+            super.setReader(new StringReader(annotations.textMinusMarkup));
			
 
				+        }
			
 
				+    }
			
 
				+    
			
 
				+    static String readToString(Reader reader) {       
			
 
				+        char[] arr = new char[8 * 1024];
			
 
				+        StringBuilder buffer = new StringBuilder();
			
 
				+        int numCharsRead;
			
 
				+        try {
			
 
				+            while ((numCharsRead = reader.read(arr, 0, arr.length)) != -1) {
			
 
				+                buffer.append(arr, 0, numCharsRead);
			
 
				+            }
			
 
				+            reader.close();
			
 
				+            return buffer.toString();            
			
 
				+        } catch (IOException e) {
			
 
				+            throw new UncheckedIOException("IO Error reading field content", e);
			
 
				+        }
			
 
				+    }         
			
 
				+
			
 
				+    
			
 
				+    public static final class AnnotationsInjector extends TokenFilter {
			
 
				+        
			
 
				+        private AnnotatedText annotatedText;
			
 
				+        AnnotatedText.AnnotationToken nextAnnotationForInjection = null;
			
 
				+        private int currentAnnotationIndex = 0;
			
 
				+        List<State> pendingStates = new ArrayList<>();
			
 
				+        int pendingStatePos = 0;
			
 
				+        boolean inputExhausted = false;
			
 
				+
			
 
				+        private final OffsetAttribute textOffsetAtt = addAttribute(OffsetAttribute.class);
			
 
				+        private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
			
 
				+        private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class);
			
 
				+        private final PositionLengthAttribute posLenAtt = addAttribute(PositionLengthAttribute.class);
			
 
				+        private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
			
 
				+
			
 
				+        public AnnotationsInjector(TokenStream in) {
			
 
				+          super(in);
			
 
				+        }
			
 
				+
			
 
				+        public void setAnnotations(AnnotatedText annotatedText) {
			
 
				+          this.annotatedText = annotatedText;
			
 
				+          currentAnnotationIndex = 0;
			
 
				+          if(annotatedText!=null && annotatedText.numAnnotations()>0){
			
 
				+              nextAnnotationForInjection = annotatedText.getAnnotation(0);
			
 
				+          } else {
			
 
				+              nextAnnotationForInjection = null;
			
 
				+          }
			
 
				+        }
			
 
				+        
			
 
				+        
			
 
				+
			
 
				+        @Override
			
 
				+        public void reset() throws IOException {
			
 
				+            pendingStates.clear();
			
 
				+            pendingStatePos = 0;
			
 
				+            inputExhausted = false;
			
 
				+            super.reset();
			
 
				+        }
			
 
				+        
			
 
				+        // Abstracts if we are pulling from some pre-cached buffer of
			
 
				+        // text tokens or directly from the wrapped TokenStream
			
 
				+        private boolean internalNextToken() throws IOException{
			
 
				+            if (pendingStatePos < pendingStates.size()){
			
 
				+                restoreState(pendingStates.get(pendingStatePos));
			
 
				+                pendingStatePos ++;
			
 
				+                if(pendingStatePos >=pendingStates.size()){
			
 
				+                    pendingStatePos =0;
			
 
				+                    pendingStates.clear();
			
 
				+                }
			
 
				+                return true;
			
 
				+            }       
			
 
				+            if(inputExhausted) {
			
 
				+                return false;
			
 
				+            }
			
 
				+            return input.incrementToken();
			
 
				+        }
			
 
				+
			
 
				+        @Override
			
 
				+        public boolean incrementToken() throws IOException {
			
 
				+            if (internalNextToken()) {
			
 
				+                if (nextAnnotationForInjection != null) {
			
 
				+                    // If we are at the right point to inject an annotation....
			
 
				+                    if (textOffsetAtt.startOffset() >= nextAnnotationForInjection.offset) {
			
 
				+                        int firstSpannedTextPosInc = posAtt.getPositionIncrement();
			
 
				+                        int annotationPosLen = 1;
			
 
				+
			
 
				+                        // Capture the text token's state for later replay - but
			
 
				+                        // with a zero pos increment so is same as annotation
			
 
				+                        // that is injected before it
			
 
				+                        posAtt.setPositionIncrement(0);
			
 
				+                        pendingStates.add(captureState());
			
 
				+
			
 
				+                        while (textOffsetAtt.endOffset() <= nextAnnotationForInjection.endOffset) {
			
 
				+                            // Buffer up all the other tokens spanned by this annotation to determine length.
			
 
				+                            if (input.incrementToken()) {
			
 
				+                                if (textOffsetAtt.endOffset() <= nextAnnotationForInjection.endOffset
			
 
				+                                        && textOffsetAtt.startOffset() < nextAnnotationForInjection.endOffset) {
			
 
				+                                    annotationPosLen += posAtt.getPositionIncrement();
			
 
				+                                }
			
 
				+                                pendingStates.add(captureState());
			
 
				+                            } else {
			
 
				+                                inputExhausted = true;
			
 
				+                                break;
			
 
				+                            }
			
 
				+                        }
			
 
				+                        emitAnnotation(firstSpannedTextPosInc, annotationPosLen);
			
 
				+                        return true;
			
 
				+                    }
			
 
				+                }
			
 
				+                return true;
			
 
				+            } else {
			
 
				+                inputExhausted = true;
			
 
				+                return false;
			
 
				+            }
			
 
				+        }
			
 
				+        private void setType(AnnotationToken token) {
			
 
				+            //Default annotation type - in future AnnotationTokens may contain custom type info
			
 
				+            typeAtt.setType("annotation");
			
 
				+        }
			
 
				+
			
 
				+        private void emitAnnotation(int firstSpannedTextPosInc, int annotationPosLen) throws IOException {
			
 
				+            // Set the annotation's attributes
			
 
				+            posLenAtt.setPositionLength(annotationPosLen);
			
 
				+            textOffsetAtt.setOffset(nextAnnotationForInjection.offset, nextAnnotationForInjection.endOffset);
			
 
				+            setType(nextAnnotationForInjection);
			
 
				+            
			
 
				+            // We may have multiple annotations at this location - stack them up
			
 
				+            final int annotationOffset = nextAnnotationForInjection.offset;
			
 
				+            final AnnotatedText.AnnotationToken firstAnnotationAtThisPos = nextAnnotationForInjection;
			
 
				+            while (nextAnnotationForInjection != null && nextAnnotationForInjection.offset == annotationOffset) {
			
 
				+
			
 
				+                
			
 
				+                setType(nextAnnotationForInjection);
			
 
				+                termAtt.resizeBuffer(nextAnnotationForInjection.value.length());
			
 
				+                termAtt.copyBuffer(nextAnnotationForInjection.value.toCharArray(), 0, nextAnnotationForInjection.value.length());
			
 
				+                
			
 
				+                if (nextAnnotationForInjection == firstAnnotationAtThisPos) {
			
 
				+                    posAtt.setPositionIncrement(firstSpannedTextPosInc);
			
 
				+                    //Put at the head of the queue of tokens to be emitted
			
 
				+                    pendingStates.add(0, captureState());                
			
 
				+                } else {
			
 
				+                    posAtt.setPositionIncrement(0);                    
			
 
				+                    //Put after the head of the queue of tokens to be emitted
			
 
				+                    pendingStates.add(1, captureState());                
			
 
				+                }
			
 
				+                
			
 
				+                
			
 
				+                // Flag the inject annotation as null to prevent re-injection.
			
 
				+                currentAnnotationIndex++;
			
 
				+                if (currentAnnotationIndex < annotatedText.numAnnotations()) {
			
 
				+                    nextAnnotationForInjection = annotatedText.getAnnotation(currentAnnotationIndex);
			
 
				+                } else {
			
 
				+                    nextAnnotationForInjection = null;
			
 
				+                }
			
 
				+            }
			
 
				+            // Now pop the first of many potential buffered tokens:
			
 
				+            internalNextToken();
			
 
				+        }
			
 
				+
			
 
				+      }
			
 
				+  
			
 
				+
			
 
				+    public static final class AnnotatedTextFieldType extends StringFieldType {
			
 
				+
			
 
				+        public AnnotatedTextFieldType() {
			
 
				+            setTokenized(true);
			
 
				+        }
			
 
				+
			
 
				+        protected AnnotatedTextFieldType(AnnotatedTextFieldType ref) {
			
 
				+            super(ref);
			
 
				+        }
			
 
				+        
			
 
				+        @Override
			
 
				+        public void setIndexAnalyzer(NamedAnalyzer delegate) {
			
 
				+            if(delegate.analyzer() instanceof AnnotationAnalyzerWrapper){
			
 
				+                // Already wrapped the Analyzer with an AnnotationAnalyzer
			
 
				+                super.setIndexAnalyzer(delegate);
			
 
				+            } else {
			
 
				+                // Wrap the analyzer with an AnnotationAnalyzer that will inject required annotations
			
 
				+                super.setIndexAnalyzer(new NamedAnalyzer(delegate.name(), AnalyzerScope.INDEX,
			
 
				+                    new AnnotationAnalyzerWrapper(delegate.analyzer())));
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        public AnnotatedTextFieldType clone() {
			
 
				+            return new AnnotatedTextFieldType(this);
			
 
				+        }
			
 
				+
			
 
				+        @Override
			
 
				+        public String typeName() {
			
 
				+            return CONTENT_TYPE;
			
 
				+        }
			
 
				+
			
 
				+        @Override
			
 
				+        public Query existsQuery(QueryShardContext context) {
			
 
				+            if (omitNorms()) {
			
 
				+                return new TermQuery(new Term(FieldNamesFieldMapper.NAME, name()));
			
 
				+            } else {
			
 
				+                return new NormsFieldExistsQuery(name());
			
 
				+            }
			
 
				+        }
			
 
				+        
			
 
				+        @Override
			
 
				+        public Query phraseQuery(String field, TokenStream stream, int slop, boolean enablePosIncrements) throws IOException {
			
 
				+            PhraseQuery.Builder builder = new PhraseQuery.Builder();
			
 
				+            builder.setSlop(slop);
			
 
				+
			
 
				+            TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
			
 
				+            PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
			
 
				+            int position = -1;
			
 
				+
			
 
				+            stream.reset();
			
 
				+            while (stream.incrementToken()) {
			
 
				+                if (enablePosIncrements) {
			
 
				+                    position += posIncrAtt.getPositionIncrement();
			
 
				+                }
			
 
				+                else {
			
 
				+                    position += 1;
			
 
				+                }
			
 
				+                builder.add(new Term(field, termAtt.getBytesRef()), position);
			
 
				+            }
			
 
				+
			
 
				+            return builder.build();
			
 
				+        }
			
 
				+        
			
 
				+        @Override
			
 
				+        public Query multiPhraseQuery(String field, TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException {
			
 
				+
			
 
				+            MultiPhraseQuery.Builder mpqb = new MultiPhraseQuery.Builder();
			
 
				+            mpqb.setSlop(slop);
			
 
				+
			
 
				+            TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
			
 
				+
			
 
				+            PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
			
 
				+            int position = -1;
			
 
				+
			
 
				+            List<Term> multiTerms = new ArrayList<>();
			
 
				+            stream.reset();
			
 
				+            while (stream.incrementToken()) {
			
 
				+                int positionIncrement = posIncrAtt.getPositionIncrement();
			
 
				+
			
 
				+                if (positionIncrement > 0 && multiTerms.size() > 0) {
			
 
				+                    if (enablePositionIncrements) {
			
 
				+                        mpqb.add(multiTerms.toArray(new Term[0]), position);
			
 
				+                    } else {
			
 
				+                        mpqb.add(multiTerms.toArray(new Term[0]));
			
 
				+                    }
			
 
				+                    multiTerms.clear();
			
 
				+                }
			
 
				+                position += positionIncrement;
			
 
				+                multiTerms.add(new Term(field, termAtt.getBytesRef()));
			
 
				+            }
			
 
				+
			
 
				+            if (enablePositionIncrements) {
			
 
				+                mpqb.add(multiTerms.toArray(new Term[0]), position);
			
 
				+            } else {
			
 
				+                mpqb.add(multiTerms.toArray(new Term[0]));
			
 
				+            }
			
 
				+            return mpqb.build();
			
 
				+        }        
			
 
				+    }
			
 
				+    
			
 
				+    private int positionIncrementGap;
			
 
				+    protected AnnotatedTextFieldMapper(String simpleName, AnnotatedTextFieldType fieldType, MappedFieldType defaultFieldType,
			
 
				+                                int positionIncrementGap, 
			
 
				+                                Settings indexSettings, MultiFields multiFields, CopyTo copyTo) {
			
 
				+        super(simpleName, fieldType, defaultFieldType, indexSettings, multiFields, copyTo);
			
 
				+        assert fieldType.tokenized();
			
 
				+        assert fieldType.hasDocValues() == false;
			
 
				+        this.positionIncrementGap = positionIncrementGap;
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    protected AnnotatedTextFieldMapper clone() {
			
 
				+        return (AnnotatedTextFieldMapper) super.clone();
			
 
				+    }
			
 
				+
			
 
				+    public int getPositionIncrementGap() {
			
 
				+        return this.positionIncrementGap;
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    protected void parseCreateField(ParseContext context, List<IndexableField> fields) throws IOException {
			
 
				+        final String value;
			
 
				+        if (context.externalValueSet()) {
			
 
				+            value = context.externalValue().toString();
			
 
				+        } else {
			
 
				+            value = context.parser().textOrNull();
			
 
				+        }
			
 
				+
			
 
				+        if (value == null) {
			
 
				+            return;
			
 
				+        }
			
 
				+
			
 
				+        if (fieldType().indexOptions() != IndexOptions.NONE || fieldType().stored()) {
			
 
				+            Field field = new Field(fieldType().name(), value, fieldType());
			
 
				+            fields.add(field);
			
 
				+            if (fieldType().omitNorms()) {
			
 
				+                createFieldNamesField(context, fields);
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    protected String contentType() {
			
 
				+        return CONTENT_TYPE;
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public AnnotatedTextFieldType fieldType() {
			
 
				+        return (AnnotatedTextFieldType) super.fieldType();
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, Params params) throws IOException {
			
 
				+        super.doXContentBody(builder, includeDefaults, params);
			
 
				+        doXContentAnalyzers(builder, includeDefaults);
			
 
				+
			
 
				+        if (includeDefaults || positionIncrementGap != POSITION_INCREMENT_GAP_USE_ANALYZER) {
			
 
				+            builder.field("position_increment_gap", positionIncrementGap);
			
 
				+        }        
			
 
				+    }
			
 
				+}
			
--- a/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/plugin/mapper/AnnotatedTextPlugin.java
+++ b/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/plugin/mapper/AnnotatedTextPlugin.java
@@ -0,0 +1,44 @@
 
				+/*
			
 
				+ * Licensed to Elasticsearch under one or more contributor
			
 
				+ * license agreements. See the NOTICE file distributed with
			
 
				+ * this work for additional information regarding copyright
			
 
				+ * ownership. Elasticsearch licenses this file to you under
			
 
				+ * the Apache License, Version 2.0 (the "License"); you may
			
 
				+ * not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing,
			
 
				+ * software distributed under the License is distributed on an
			
 
				+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+ * KIND, either express or implied.  See the License for the
			
 
				+ * specific language governing permissions and limitations
			
 
				+ * under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.elasticsearch.plugin.mapper;
			
 
				+
			
 
				+import java.util.Collections;
			
 
				+import java.util.Map;
			
 
				+
			
 
				+import org.elasticsearch.index.mapper.Mapper;
			
 
				+import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper;
			
 
				+import org.elasticsearch.plugins.MapperPlugin;
			
 
				+import org.elasticsearch.plugins.Plugin;
			
 
				+import org.elasticsearch.plugins.SearchPlugin;
			
 
				+import org.elasticsearch.search.fetch.subphase.highlight.AnnotatedTextHighlighter;
			
 
				+import org.elasticsearch.search.fetch.subphase.highlight.Highlighter;
			
 
				+
			
 
				+public class AnnotatedTextPlugin extends Plugin implements MapperPlugin, SearchPlugin {
			
 
				+
			
 
				+    @Override
			
 
				+    public Map<String, Mapper.TypeParser> getMappers() {
			
 
				+        return Collections.singletonMap(AnnotatedTextFieldMapper.CONTENT_TYPE, new AnnotatedTextFieldMapper.TypeParser());
			
 
				+    }
			
 
				+    
			
 
				+    @Override
			
 
				+    public Map<String, Highlighter> getHighlighters() {
			
 
				+        return Collections.singletonMap(AnnotatedTextHighlighter.NAME, new AnnotatedTextHighlighter());   
			
 
				+    }
			
 
				+}
			
--- a/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedPassageFormatter.java
+++ b/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedPassageFormatter.java
@@ -0,0 +1,201 @@
 
				+/*
			
 
				+ * Licensed to Elasticsearch under one or more contributor
			
 
				+ * license agreements. See the NOTICE file distributed with
			
 
				+ * this work for additional information regarding copyright
			
 
				+ * ownership. Elasticsearch licenses this file to you under
			
 
				+ * the Apache License, Version 2.0 (the "License"); you may
			
 
				+ * not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing,
			
 
				+ * software distributed under the License is distributed on an
			
 
				+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+ * KIND, either express or implied.  See the License for the
			
 
				+ * specific language governing permissions and limitations
			
 
				+ * under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.elasticsearch.search.fetch.subphase.highlight;
			
 
				+
			
 
				+import org.apache.lucene.search.highlight.Encoder;
			
 
				+import org.apache.lucene.search.uhighlight.Passage;
			
 
				+import org.apache.lucene.search.uhighlight.PassageFormatter;
			
 
				+import org.apache.lucene.search.uhighlight.Snippet;
			
 
				+import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedHighlighterAnalyzer;
			
 
				+import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText.AnnotationToken;
			
 
				+
			
 
				+import java.io.UnsupportedEncodingException;
			
 
				+import java.net.URLEncoder;
			
 
				+import java.nio.charset.StandardCharsets;
			
 
				+import java.util.ArrayList;
			
 
				+import java.util.List;
			
 
				+
			
 
				+/**
			
 
				+ * Custom passage formatter that :
			
 
				+ * 1) marks up search hits in markdown-like syntax for URLs ({@link Snippet})
			
 
				+ * 2) injects any annotations from the original text that don't conflict with search hit highlighting
			
 
				+ */
			
 
				+public class AnnotatedPassageFormatter extends PassageFormatter {
			
 
				+
			
 
				+
			
 
				+    public static final String SEARCH_HIT_TYPE = "_hit_term";
			
 
				+    private final Encoder encoder;
			
 
				+    private AnnotatedHighlighterAnalyzer annotatedHighlighterAnalyzer;
			
 
				+
			
 
				+    public AnnotatedPassageFormatter(AnnotatedHighlighterAnalyzer annotatedHighlighterAnalyzer, Encoder encoder) {
			
 
				+        this.annotatedHighlighterAnalyzer = annotatedHighlighterAnalyzer;
			
 
				+        this.encoder = encoder;
			
 
				+    }
			
 
				+
			
 
				+    static class MarkupPassage {
			
 
				+        List<Markup> markups = new ArrayList<>();
			
 
				+        int lastMarkupEnd = -1;
			
 
				+
			
 
				+        public void addUnlessOverlapping(Markup newMarkup) {
			
 
				+            
			
 
				+            // Fast exit.
			
 
				+            if(newMarkup.start > lastMarkupEnd) {
			
 
				+                markups.add(newMarkup);
			
 
				+                lastMarkupEnd = newMarkup.end;                 
			
 
				+                return;
			
 
				+            }
			
 
				+            
			
 
				+            // Check to see if this new markup overlaps with any prior
			
 
				+            int index=0;
			
 
				+            for (Markup existingMarkup: markups) {
			
 
				+                if(existingMarkup.samePosition(newMarkup)) {
			
 
				+                    existingMarkup.merge(newMarkup);
			
 
				+                    return;
			
 
				+                }
			
 
				+                if(existingMarkup.overlaps(newMarkup)) {
			
 
				+                    // existing markup wins - we throw away the new markup that would span this position
			
 
				+                    return;
			
 
				+                }
			
 
				+                // markup list is in start offset order so we can insert at this position then shift others right 
			
 
				+                if(existingMarkup.isAfter(newMarkup)) {
			
 
				+                    markups.add(index, newMarkup);
			
 
				+                    return;
			
 
				+                }
			
 
				+                index++;
			
 
				+            }
			
 
				+            markups.add(newMarkup);
			
 
				+            lastMarkupEnd = newMarkup.end; 
			
 
				+        }
			
 
				+        
			
 
				+    }
			
 
				+    static class Markup {
			
 
				+        int start;
			
 
				+        int end;
			
 
				+        String metadata;
			
 
				+        Markup(int start, int end, String metadata) {
			
 
				+            super();
			
 
				+            this.start = start;
			
 
				+            this.end = end;
			
 
				+            this.metadata = metadata;
			
 
				+        }
			
 
				+        boolean isAfter(Markup other) {
			
 
				+            return start > other.end;
			
 
				+        }
			
 
				+        void merge(Markup newMarkup) {
			
 
				+            // metadata is key1=value&key2=value&.... syntax used for urls 
			
 
				+            assert samePosition(newMarkup);
			
 
				+            metadata += "&" + newMarkup.metadata;
			
 
				+        }
			
 
				+        boolean samePosition(Markup other) {
			
 
				+            return this.start == other.start && this.end == other.end;
			
 
				+        }
			
 
				+        boolean overlaps(Markup other) {
			
 
				+            return (start<=other.start  && end >= other.start)
			
 
				+                    || (start <= other.end && end >=other.end)
			
 
				+                    || (start>=other.start && end<=other.end);
			
 
				+        }
			
 
				+        @Override
			
 
				+        public String toString() {
			
 
				+            return "Markup [start=" + start + ", end=" + end + ", metadata=" + metadata + "]";
			
 
				+        }
			
 
				+        
			
 
				+        
			
 
				+    }
			
 
				+    // Merge original annotations and search hits into a single set of markups for each passage
			
 
				+    static MarkupPassage mergeAnnotations(AnnotationToken [] annotations, Passage passage){
			
 
				+        try {
			
 
				+            MarkupPassage markupPassage = new MarkupPassage();
			
 
				+
			
 
				+            // Add search hits first - they take precedence over any other markup
			
 
				+            for (int i = 0; i < passage.getNumMatches(); i++) {
			
 
				+                int start = passage.getMatchStarts()[i];
			
 
				+                int end = passage.getMatchEnds()[i];
			
 
				+                String searchTerm = passage.getMatchTerms()[i].utf8ToString();
			
 
				+                Markup markup = new Markup(start, end, SEARCH_HIT_TYPE+"="+URLEncoder.encode(searchTerm, StandardCharsets.UTF_8.name()));
			
 
				+                markupPassage.addUnlessOverlapping(markup);
			
 
				+            }
			
 
				+            
			
 
				+            // Now add original text's annotations - ignoring any that might conflict with the search hits markup.
			
 
				+            for (AnnotationToken token: annotations) {                
			
 
				+                int start = token.offset;
			
 
				+                int end = token.endOffset;
			
 
				+                if(start >= passage.getStartOffset() && end<=passage.getEndOffset()) {
			
 
				+                    String escapedValue = URLEncoder.encode(token.value, StandardCharsets.UTF_8.name());
			
 
				+                    Markup markup = new Markup(start, end, escapedValue);
			
 
				+                    markupPassage.addUnlessOverlapping(markup);                        
			
 
				+                }
			
 
				+            }
			
 
				+            return markupPassage;
			
 
				+            
			
 
				+        } catch (UnsupportedEncodingException e) {
			
 
				+            // We should always have UTF-8 support
			
 
				+            throw new IllegalStateException(e);
			
 
				+        }
			
 
				+    }
			
 
				+    
			
 
				+
			
 
				+    @Override
			
 
				+    public Snippet[] format(Passage[] passages, String content) {
			
 
				+        Snippet[] snippets = new Snippet[passages.length];
			
 
				+
			
 
				+        int pos;
			
 
				+        int j = 0;
			
 
				+        for (Passage passage : passages) {
			
 
				+            AnnotationToken [] annotations = annotatedHighlighterAnalyzer.getIntersectingAnnotations(passage.getStartOffset(), 
			
 
				+                    passage.getEndOffset());            
			
 
				+            MarkupPassage mergedMarkup = mergeAnnotations(annotations, passage);
			
 
				+            
			
 
				+            StringBuilder sb = new StringBuilder();
			
 
				+            pos = passage.getStartOffset();            
			
 
				+            for(Markup markup: mergedMarkup.markups) {
			
 
				+                int start = markup.start;
			
 
				+                int end = markup.end;
			
 
				+                // its possible to have overlapping terms
			
 
				+                if (start > pos) {
			
 
				+                    append(sb, content, pos, start);
			
 
				+                }
			
 
				+                if (end > pos) {
			
 
				+                    sb.append("[");
			
 
				+                    append(sb, content, Math.max(pos, start), end);
			
 
				+                    
			
 
				+                    sb.append("](");
			
 
				+                    sb.append(markup.metadata);
			
 
				+                    sb.append(")");
			
 
				+                    pos = end;
			
 
				+                }
			
 
				+            }
			
 
				+            // its possible a "term" from the analyzer could span a sentence boundary.
			
 
				+            append(sb, content, pos, Math.max(pos, passage.getEndOffset()));
			
 
				+            //we remove the paragraph separator if present at the end of the snippet (we used it as separator between values)
			
 
				+            if (sb.charAt(sb.length() - 1) == HighlightUtils.PARAGRAPH_SEPARATOR) {
			
 
				+                sb.deleteCharAt(sb.length() - 1);
			
 
				+            } else if (sb.charAt(sb.length() - 1) == HighlightUtils.NULL_SEPARATOR) {
			
 
				+                sb.deleteCharAt(sb.length() - 1);
			
 
				+            }
			
 
				+            //and we trim the snippets too
			
 
				+            snippets[j++] = new Snippet(sb.toString().trim(), passage.getScore(), passage.getNumMatches() > 0);
			
 
				+        }                    
			
 
				+        return snippets;
			
 
				+    }
			
 
				+
			
 
				+    private void append(StringBuilder dest, String content, int start, int end) {
			
 
				+        dest.append(encoder.encodeText(content.substring(start, end)));
			
 
				+    }
			
 
				+}
			
--- a/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedTextHighlighter.java
+++ b/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedTextHighlighter.java
@@ -0,0 +1,64 @@
 
				+/*
			
 
				+ * Licensed to Elasticsearch under one or more contributor
			
 
				+ * license agreements. See the NOTICE file distributed with
			
 
				+ * this work for additional information regarding copyright
			
 
				+ * ownership. Elasticsearch licenses this file to you under
			
 
				+ * the Apache License, Version 2.0 (the "License"); you may
			
 
				+ * not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing,
			
 
				+ * software distributed under the License is distributed on an
			
 
				+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+ * KIND, either express or implied.  See the License for the
			
 
				+ * specific language governing permissions and limitations
			
 
				+ * under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.elasticsearch.search.fetch.subphase.highlight;
			
 
				+
			
 
				+import org.apache.lucene.analysis.Analyzer;
			
 
				+import org.apache.lucene.search.highlight.Encoder;
			
 
				+import org.apache.lucene.search.uhighlight.PassageFormatter;
			
 
				+import org.elasticsearch.index.mapper.DocumentMapper;
			
 
				+import org.elasticsearch.index.mapper.MappedFieldType;
			
 
				+import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedHighlighterAnalyzer;
			
 
				+import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;
			
 
				+import org.elasticsearch.search.fetch.subphase.highlight.SearchContextHighlight.Field;
			
 
				+import org.elasticsearch.search.internal.SearchContext;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.util.Arrays;
			
 
				+import java.util.List;
			
 
				+
			
 
				+public class AnnotatedTextHighlighter extends UnifiedHighlighter {
			
 
				+    
			
 
				+    public static final String NAME = "annotated";
			
 
				+
			
 
				+    AnnotatedHighlighterAnalyzer annotatedHighlighterAnalyzer = null;    
			
 
				+    
			
 
				+    @Override
			
 
				+    protected Analyzer getAnalyzer(DocumentMapper docMapper, MappedFieldType type) {
			
 
				+        annotatedHighlighterAnalyzer = new AnnotatedHighlighterAnalyzer(super.getAnalyzer(docMapper, type));
			
 
				+        return annotatedHighlighterAnalyzer;
			
 
				+    }
			
 
				+
			
 
				+    // Convert the marked-up values held on-disk to plain-text versions for highlighting
			
 
				+    @Override
			
 
				+    protected List<Object> loadFieldValues(MappedFieldType fieldType, Field field, SearchContext context, HitContext hitContext)
			
 
				+            throws IOException {
			
 
				+        List<Object> fieldValues = super.loadFieldValues(fieldType, field, context, hitContext);
			
 
				+        String[] fieldValuesAsString = fieldValues.toArray(new String[fieldValues.size()]);
			
 
				+        annotatedHighlighterAnalyzer.init(fieldValuesAsString);
			
 
				+        return Arrays.asList((Object[]) annotatedHighlighterAnalyzer.getPlainTextValuesForHighlighter());
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    protected PassageFormatter getPassageFormatter(SearchContextHighlight.Field field, Encoder encoder) {
			
 
				+        return new AnnotatedPassageFormatter(annotatedHighlighterAnalyzer, encoder);
			
 
				+
			
 
				+    }
			
 
				+
			
 
				+}
			
--- a/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextClientYamlTestSuiteIT.java
+++ b/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextClientYamlTestSuiteIT.java
@@ -0,0 +1,39 @@
 
				+/*
			
 
				+ * Licensed to Elasticsearch under one or more contributor
			
 
				+ * license agreements. See the NOTICE file distributed with
			
 
				+ * this work for additional information regarding copyright
			
 
				+ * ownership. Elasticsearch licenses this file to you under
			
 
				+ * the Apache License, Version 2.0 (the "License"); you may
			
 
				+ * not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing,
			
 
				+ * software distributed under the License is distributed on an
			
 
				+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+ * KIND, either express or implied.  See the License for the
			
 
				+ * specific language governing permissions and limitations
			
 
				+ * under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.elasticsearch.index.mapper.annotatedtext;
			
 
				+
			
 
				+import com.carrotsearch.randomizedtesting.annotations.Name;
			
 
				+import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
			
 
				+
			
 
				+import org.elasticsearch.test.rest.yaml.ClientYamlTestCandidate;
			
 
				+import org.elasticsearch.test.rest.yaml.ESClientYamlSuiteTestCase;
			
 
				+
			
 
				+public class AnnotatedTextClientYamlTestSuiteIT extends ESClientYamlSuiteTestCase {
			
 
				+
			
 
				+    public AnnotatedTextClientYamlTestSuiteIT(@Name("yaml") ClientYamlTestCandidate testCandidate) {
			
 
				+        super(testCandidate);
			
 
				+    }
			
 
				+
			
 
				+    @ParametersFactory
			
 
				+    public static Iterable<Object[]> parameters() throws Exception {
			
 
				+        return createParameters();
			
 
				+    }
			
 
				+}
			
 
				+
			
--- a/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapperTests.java
+++ b/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapperTests.java
@@ -0,0 +1,681 @@
 
				+/*
			
 
				+ * Licensed to Elasticsearch under one or more contributor
			
 
				+ * license agreements. See the NOTICE file distributed with
			
 
				+ * this work for additional information regarding copyright
			
 
				+ * ownership. Elasticsearch licenses this file to you under
			
 
				+ * the Apache License, Version 2.0 (the "License"); you may
			
 
				+ * not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing,
			
 
				+ * software distributed under the License is distributed on an
			
 
				+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+ * KIND, either express or implied.  See the License for the
			
 
				+ * specific language governing permissions and limitations
			
 
				+ * under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.elasticsearch.index.mapper.annotatedtext;
			
 
				+
			
 
				+import org.apache.lucene.index.DocValuesType;
			
 
				+import org.apache.lucene.index.IndexOptions;
			
 
				+import org.apache.lucene.index.IndexableField;
			
 
				+import org.apache.lucene.index.IndexableFieldType;
			
 
				+import org.apache.lucene.index.LeafReader;
			
 
				+import org.apache.lucene.index.PostingsEnum;
			
 
				+import org.apache.lucene.index.Terms;
			
 
				+import org.apache.lucene.index.TermsEnum;
			
 
				+import org.apache.lucene.util.BytesRef;
			
 
				+import org.elasticsearch.action.bulk.BulkRequestBuilder;
			
 
				+import org.elasticsearch.action.index.IndexRequest;
			
 
				+import org.elasticsearch.action.termvectors.TermVectorsRequest;
			
 
				+import org.elasticsearch.action.termvectors.TermVectorsResponse;
			
 
				+import org.elasticsearch.common.Strings;
			
 
				+import org.elasticsearch.common.bytes.BytesReference;
			
 
				+import org.elasticsearch.common.compress.CompressedXContent;
			
 
				+import org.elasticsearch.common.lucene.uid.Versions;
			
 
				+import org.elasticsearch.common.settings.Settings;
			
 
				+import org.elasticsearch.common.xcontent.ToXContent;
			
 
				+import org.elasticsearch.common.xcontent.XContentBuilder;
			
 
				+import org.elasticsearch.common.xcontent.XContentFactory;
			
 
				+import org.elasticsearch.common.xcontent.XContentType;
			
 
				+import org.elasticsearch.index.IndexService;
			
 
				+import org.elasticsearch.index.VersionType;
			
 
				+import org.elasticsearch.index.engine.Engine;
			
 
				+import org.elasticsearch.index.mapper.DocumentMapper;
			
 
				+import org.elasticsearch.index.mapper.DocumentMapperParser;
			
 
				+import org.elasticsearch.index.mapper.MapperParsingException;
			
 
				+import org.elasticsearch.index.mapper.MapperService.MergeReason;
			
 
				+import org.elasticsearch.index.mapper.ParsedDocument;
			
 
				+import org.elasticsearch.index.mapper.SourceToParse;
			
 
				+import org.elasticsearch.index.mapper.TextFieldMapper;
			
 
				+import org.elasticsearch.index.shard.IndexShard;
			
 
				+import org.elasticsearch.index.termvectors.TermVectorsService;
			
 
				+import org.elasticsearch.indices.IndicesService;
			
 
				+import org.elasticsearch.plugin.mapper.AnnotatedTextPlugin;
			
 
				+import org.elasticsearch.plugins.Plugin;
			
 
				+import org.elasticsearch.test.ESSingleNodeTestCase;
			
 
				+import org.junit.Before;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.util.ArrayList;
			
 
				+import java.util.Arrays;
			
 
				+import java.util.Collection;
			
 
				+import java.util.Collections;
			
 
				+import java.util.HashMap;
			
 
				+import java.util.HashSet;
			
 
				+import java.util.List;
			
 
				+import java.util.Map;
			
 
				+import java.util.Set;
			
 
				+
			
 
				+import static org.hamcrest.Matchers.containsString;
			
 
				+import static org.hamcrest.Matchers.equalTo;
			
 
				+import static org.hamcrest.Matchers.notNullValue;
			
 
				+
			
 
				+public class AnnotatedTextFieldMapperTests extends ESSingleNodeTestCase {
			
 
				+
			
 
				+    IndexService indexService;
			
 
				+    DocumentMapperParser parser;
			
 
				+
			
 
				+    @Before
			
 
				+    public void setup() {
			
 
				+        Settings settings = Settings.builder()
			
 
				+            .put("index.analysis.filter.mySynonyms.type", "synonym")
			
 
				+            .putList("index.analysis.filter.mySynonyms.synonyms", Collections.singletonList("car, auto"))
			
 
				+            .put("index.analysis.analyzer.synonym.tokenizer", "standard")
			
 
				+            .put("index.analysis.analyzer.synonym.filter", "mySynonyms")
			
 
				+            // Stop filter remains in server as it is part of lucene-core
			
 
				+            .put("index.analysis.analyzer.my_stop_analyzer.tokenizer", "standard")
			
 
				+            .put("index.analysis.analyzer.my_stop_analyzer.filter", "stop")
			
 
				+            .build();
			
 
				+        indexService = createIndex("test", settings);
			
 
				+        parser = indexService.mapperService().documentMapperParser();
			
 
				+    }    
			
 
				+    
			
 
				+    
			
 
				+    
			
 
				+    @Override
			
 
				+    protected Collection<Class<? extends Plugin>> getPlugins() {
			
 
				+        List<Class<? extends Plugin>> classpathPlugins = new ArrayList<>();
			
 
				+        classpathPlugins.add(AnnotatedTextPlugin.class);
			
 
				+        return classpathPlugins;
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+
			
 
				+    protected String getFieldType() {
			
 
				+        return "annotated_text";
			
 
				+    }    
			
 
				+    
			
 
				+    public void testAnnotationInjection() throws IOException {
			
 
				+       
			
 
				+        String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
			
 
				+                .startObject("properties").startObject("field").field("type", getFieldType()).endObject().endObject()
			
 
				+                .endObject().endObject());
			
 
				+
			
 
				+        DocumentMapper mapper = indexService.mapperService().merge("type",
			
 
				+                new CompressedXContent(mapping), MergeReason.MAPPING_UPDATE);        
			
 
				+
			
 
				+        // Use example of typed and untyped annotations
			
 
				+        String annotatedText = "He paid [Stormy Daniels](Stephanie+Clifford&Payee) hush money";
			
 
				+        SourceToParse sourceToParse = SourceToParse.source("test", "type", "1", BytesReference
			
 
				+                .bytes(XContentFactory.jsonBuilder()
			
 
				+                        .startObject()
			
 
				+                        .field("field", annotatedText)
			
 
				+                        .endObject()),
			
 
				+            XContentType.JSON);
			
 
				+        ParsedDocument doc = mapper.parse(sourceToParse);
			
 
				+
			
 
				+        IndexableField[] fields = doc.rootDoc().getFields("field");
			
 
				+        assertEquals(1, fields.length);
			
 
				+
			
 
				+        assertEquals(annotatedText, fields[0].stringValue());
			
 
				+
			
 
				+        IndexShard shard = indexService.getShard(0);
			
 
				+        shard.applyIndexOperationOnPrimary(Versions.MATCH_ANY, VersionType.INTERNAL,
			
 
				+            sourceToParse, IndexRequest.UNSET_AUTO_GENERATED_TIMESTAMP, false);
			
 
				+        shard.refresh("test");
			
 
				+        try (Engine.Searcher searcher = shard.acquireSearcher("test")) {
			
 
				+            LeafReader leaf = searcher.getDirectoryReader().leaves().get(0).reader();
			
 
				+            TermsEnum terms = leaf.terms("field").iterator();
			
 
				+            
			
 
				+            assertTrue(terms.seekExact(new BytesRef("stormy")));
			
 
				+            PostingsEnum postings = terms.postings(null, PostingsEnum.POSITIONS);
			
 
				+            assertEquals(0, postings.nextDoc());
			
 
				+            assertEquals(2, postings.nextPosition());   
			
 
				+            
			
 
				+            assertTrue(terms.seekExact(new BytesRef("Stephanie Clifford")));
			
 
				+            postings = terms.postings(null, PostingsEnum.POSITIONS);
			
 
				+            assertEquals(0, postings.nextDoc());
			
 
				+            assertEquals(2, postings.nextPosition());
			
 
				+
			
 
				+            assertTrue(terms.seekExact(new BytesRef("Payee")));
			
 
				+            postings = terms.postings(null, PostingsEnum.POSITIONS);
			
 
				+            assertEquals(0, postings.nextDoc());
			
 
				+            assertEquals(2, postings.nextPosition());
			
 
				+
			
 
				+            
			
 
				+            assertTrue(terms.seekExact(new BytesRef("hush")));
			
 
				+            postings = terms.postings(null, PostingsEnum.POSITIONS);
			
 
				+            assertEquals(0, postings.nextDoc());
			
 
				+            assertEquals(4, postings.nextPosition());   
			
 
				+            
			
 
				+        }
			
 
				+    }  
			
 
				+    
			
 
				+    public void testToleranceForBadAnnotationMarkup() throws IOException {
			
 
				+        
			
 
				+        String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
			
 
				+                .startObject("properties").startObject("field").field("type", getFieldType()).endObject().endObject()
			
 
				+                .endObject().endObject());
			
 
				+
			
 
				+        DocumentMapper mapper = indexService.mapperService().merge("type",
			
 
				+                new CompressedXContent(mapping), MergeReason.MAPPING_UPDATE);        
			
 
				+
			
 
				+        String annotatedText = "foo [bar](MissingEndBracket baz";
			
 
				+        SourceToParse sourceToParse = SourceToParse.source("test", "type", "1", BytesReference
			
 
				+                .bytes(XContentFactory.jsonBuilder()
			
 
				+                        .startObject()
			
 
				+                        .field("field", annotatedText)
			
 
				+                        .endObject()),
			
 
				+            XContentType.JSON);
			
 
				+        ParsedDocument doc = mapper.parse(sourceToParse);
			
 
				+
			
 
				+        IndexableField[] fields = doc.rootDoc().getFields("field");
			
 
				+        assertEquals(1, fields.length);
			
 
				+
			
 
				+        assertEquals(annotatedText, fields[0].stringValue());
			
 
				+
			
 
				+        IndexShard shard = indexService.getShard(0);
			
 
				+        shard.applyIndexOperationOnPrimary(Versions.MATCH_ANY, VersionType.INTERNAL,
			
 
				+            sourceToParse, IndexRequest.UNSET_AUTO_GENERATED_TIMESTAMP, false);
			
 
				+        shard.refresh("test");
			
 
				+        try (Engine.Searcher searcher = shard.acquireSearcher("test")) {
			
 
				+            LeafReader leaf = searcher.getDirectoryReader().leaves().get(0).reader();
			
 
				+            TermsEnum terms = leaf.terms("field").iterator();
			
 
				+            
			
 
				+            assertTrue(terms.seekExact(new BytesRef("foo")));
			
 
				+            PostingsEnum postings = terms.postings(null, PostingsEnum.POSITIONS);
			
 
				+            assertEquals(0, postings.nextDoc());
			
 
				+            assertEquals(0, postings.nextPosition());   
			
 
				+            
			
 
				+            assertTrue(terms.seekExact(new BytesRef("bar")));
			
 
				+            postings = terms.postings(null, PostingsEnum.POSITIONS);
			
 
				+            assertEquals(0, postings.nextDoc());
			
 
				+            assertEquals(1, postings.nextPosition());
			
 
				+
			
 
				+            assertFalse(terms.seekExact(new BytesRef("MissingEndBracket")));
			
 
				+            // Bad markup means value is treated as plain text and fed through tokenisation
			
 
				+            assertTrue(terms.seekExact(new BytesRef("missingendbracket")));
			
 
				+            
			
 
				+        }
			
 
				+    }  
			
 
				+    
			
 
				+    public void testAgainstTermVectorsAPI() throws IOException {
			
 
				+        String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
			
 
				+                .startObject("properties").startObject("tvfield").field("type", getFieldType())
			
 
				+                .field("term_vector", "with_positions_offsets_payloads")
			
 
				+                .endObject().endObject()
			
 
				+                .endObject().endObject());        
			
 
				+        indexService.mapperService().merge("type", new CompressedXContent(mapping), MergeReason.MAPPING_UPDATE);          
			
 
				+        
			
 
				+
			
 
				+        int max = between(3, 10);
			
 
				+        BulkRequestBuilder bulk = client().prepareBulk();
			
 
				+        for (int i = 0; i < max; i++) {
			
 
				+            bulk.add(client().prepareIndex("test", "type", Integer.toString(i))
			
 
				+                    .setSource("tvfield", "the quick [brown](Color) fox jumped over the lazy dog"));
			
 
				+        }
			
 
				+        bulk.get();
			
 
				+
			
 
				+        TermVectorsRequest request = new TermVectorsRequest("test", "type", "0").termStatistics(true);
			
 
				+        
			
 
				+        IndicesService indicesService = getInstanceFromNode(IndicesService.class);
			
 
				+        IndexService test = indicesService.indexService(resolveIndex("test"));
			
 
				+        IndexShard shard = test.getShardOrNull(0);
			
 
				+        assertThat(shard, notNullValue());
			
 
				+        TermVectorsResponse response = TermVectorsService.getTermVectors(shard, request);        
			
 
				+        assertEquals(1, response.getFields().size());   
			
 
				+
			
 
				+        Terms terms = response.getFields().terms("tvfield");
			
 
				+        TermsEnum iterator = terms.iterator();
			
 
				+        BytesRef term;
			
 
				+        Set<String> foundTerms = new HashSet<>();
			
 
				+        while ((term = iterator.next()) != null) {
			
 
				+            foundTerms.add(term.utf8ToString());
			
 
				+        }        
			
 
				+        //Check we have both text and annotation tokens
			
 
				+        assertTrue(foundTerms.contains("brown"));
			
 
				+        assertTrue(foundTerms.contains("Color"));
			
 
				+        assertTrue(foundTerms.contains("fox"));
			
 
				+        
			
 
				+    }    
			
 
				+        
			
 
				+    // ===== Code below copied from TextFieldMapperTests ========
			
 
				+
			
 
				+    public void testDefaults() throws IOException {
			
 
				+        String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
			
 
				+                .startObject("properties").startObject("field").field("type", getFieldType()).endObject().endObject()
			
 
				+                .endObject().endObject());
			
 
				+
			
 
				+        DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
			
 
				+
			
 
				+        assertEquals(mapping, mapper.mappingSource().toString());
			
 
				+
			
 
				+        ParsedDocument doc = mapper.parse(SourceToParse.source("test", "type", "1", BytesReference
			
 
				+                .bytes(XContentFactory.jsonBuilder()
			
 
				+                        .startObject()
			
 
				+                        .field("field", "1234")
			
 
				+                        .endObject()),
			
 
				+                XContentType.JSON));
			
 
				+
			
 
				+        IndexableField[] fields = doc.rootDoc().getFields("field");
			
 
				+        assertEquals(1, fields.length);
			
 
				+
			
 
				+        assertEquals("1234", fields[0].stringValue());
			
 
				+        IndexableFieldType fieldType = fields[0].fieldType();
			
 
				+        assertThat(fieldType.omitNorms(), equalTo(false));
			
 
				+        assertTrue(fieldType.tokenized());
			
 
				+        assertFalse(fieldType.stored());
			
 
				+        assertThat(fieldType.indexOptions(), equalTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS));
			
 
				+        assertThat(fieldType.storeTermVectors(), equalTo(false));
			
 
				+        assertThat(fieldType.storeTermVectorOffsets(), equalTo(false));
			
 
				+        assertThat(fieldType.storeTermVectorPositions(), equalTo(false));
			
 
				+        assertThat(fieldType.storeTermVectorPayloads(), equalTo(false));
			
 
				+        assertEquals(DocValuesType.NONE, fieldType.docValuesType());
			
 
				+    }
			
 
				+
			
 
				+    public void testEnableStore() throws IOException {
			
 
				+        String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
			
 
				+                .startObject("properties").startObject("field").field("type", getFieldType()).field("store", true).endObject().endObject()
			
 
				+                .endObject().endObject());
			
 
				+
			
 
				+        DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
			
 
				+
			
 
				+        assertEquals(mapping, mapper.mappingSource().toString());
			
 
				+
			
 
				+        ParsedDocument doc = mapper.parse(SourceToParse.source("test", "type", "1", BytesReference
			
 
				+                .bytes(XContentFactory.jsonBuilder()
			
 
				+                        .startObject()
			
 
				+                        .field("field", "1234")
			
 
				+                        .endObject()),
			
 
				+                XContentType.JSON));
			
 
				+
			
 
				+        IndexableField[] fields = doc.rootDoc().getFields("field");
			
 
				+        assertEquals(1, fields.length);
			
 
				+        assertTrue(fields[0].fieldType().stored());
			
 
				+    }
			
 
				+
			
 
				+    public void testDisableNorms() throws IOException {
			
 
				+        String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
			
 
				+                .startObject("properties").startObject("field")
			
 
				+                    .field("type", getFieldType())
			
 
				+                    .field("norms", false)
			
 
				+                .endObject().endObject()
			
 
				+                .endObject().endObject());
			
 
				+
			
 
				+        DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
			
 
				+
			
 
				+        assertEquals(mapping, mapper.mappingSource().toString());
			
 
				+
			
 
				+        ParsedDocument doc = mapper.parse(SourceToParse.source("test", "type", "1", BytesReference
			
 
				+                .bytes(XContentFactory.jsonBuilder()
			
 
				+                        .startObject()
			
 
				+                        .field("field", "1234")
			
 
				+                        .endObject()),
			
 
				+                XContentType.JSON));
			
 
				+
			
 
				+        IndexableField[] fields = doc.rootDoc().getFields("field");
			
 
				+        assertEquals(1, fields.length);
			
 
				+        assertTrue(fields[0].fieldType().omitNorms());
			
 
				+    }
			
 
				+
			
 
				+    public void testIndexOptions() throws IOException {
			
 
				+        Map<String, IndexOptions> supportedOptions = new HashMap<>();
			
 
				+        supportedOptions.put("docs", IndexOptions.DOCS);
			
 
				+        supportedOptions.put("freqs", IndexOptions.DOCS_AND_FREQS);
			
 
				+        supportedOptions.put("positions", IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
			
 
				+        supportedOptions.put("offsets", IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
			
 
				+
			
 
				+        XContentBuilder mappingBuilder = XContentFactory.jsonBuilder().startObject().startObject("type").startObject("properties");
			
 
				+        for (String option : supportedOptions.keySet()) {
			
 
				+            mappingBuilder.startObject(option).field("type", getFieldType()).field("index_options", option).endObject();
			
 
				+        }
			
 
				+        String mapping = Strings.toString(mappingBuilder.endObject().endObject().endObject());
			
 
				+
			
 
				+        DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
			
 
				+
			
 
				+        XContentBuilder jsonDoc = XContentFactory.jsonBuilder().startObject();
			
 
				+        for (String option : supportedOptions.keySet()) {
			
 
				+            jsonDoc.field(option, "1234");
			
 
				+        }
			
 
				+        ParsedDocument doc = mapper.parse(SourceToParse.source("test", "type", "1", BytesReference.bytes(jsonDoc.endObject()),
			
 
				+                XContentType.JSON));
			
 
				+
			
 
				+        for (Map.Entry<String, IndexOptions> entry : supportedOptions.entrySet()) {
			
 
				+            String field = entry.getKey();
			
 
				+            IndexOptions options = entry.getValue();
			
 
				+            IndexableField[] fields = doc.rootDoc().getFields(field);
			
 
				+            assertEquals(1, fields.length);
			
 
				+            assertEquals(options, fields[0].fieldType().indexOptions());
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    public void testDefaultPositionIncrementGap() throws IOException {
			
 
				+        String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
			
 
				+                .startObject("properties").startObject("field").field("type", getFieldType()).endObject().endObject()
			
 
				+                .endObject().endObject());
			
 
				+
			
 
				+        DocumentMapper mapper = indexService.mapperService().merge("type",
			
 
				+                new CompressedXContent(mapping), MergeReason.MAPPING_UPDATE);
			
 
				+
			
 
				+        assertEquals(mapping, mapper.mappingSource().toString());
			
 
				+
			
 
				+        SourceToParse sourceToParse = SourceToParse.source("test", "type", "1", BytesReference
			
 
				+                .bytes(XContentFactory.jsonBuilder()
			
 
				+                        .startObject()
			
 
				+                        .array("field", new String[] {"a", "b"})
			
 
				+                        .endObject()),
			
 
				+            XContentType.JSON);
			
 
				+        ParsedDocument doc = mapper.parse(sourceToParse);
			
 
				+
			
 
				+        IndexableField[] fields = doc.rootDoc().getFields("field");
			
 
				+        assertEquals(2, fields.length);
			
 
				+
			
 
				+        assertEquals("a", fields[0].stringValue());
			
 
				+        assertEquals("b", fields[1].stringValue());
			
 
				+
			
 
				+        IndexShard shard = indexService.getShard(0);
			
 
				+        shard.applyIndexOperationOnPrimary(Versions.MATCH_ANY, VersionType.INTERNAL,
			
 
				+            sourceToParse, IndexRequest.UNSET_AUTO_GENERATED_TIMESTAMP, false);
			
 
				+        shard.refresh("test");
			
 
				+        try (Engine.Searcher searcher = shard.acquireSearcher("test")) {
			
 
				+            LeafReader leaf = searcher.getDirectoryReader().leaves().get(0).reader();
			
 
				+            TermsEnum terms = leaf.terms("field").iterator();
			
 
				+            assertTrue(terms.seekExact(new BytesRef("b")));
			
 
				+            PostingsEnum postings = terms.postings(null, PostingsEnum.POSITIONS);
			
 
				+            assertEquals(0, postings.nextDoc());
			
 
				+            assertEquals(TextFieldMapper.Defaults.POSITION_INCREMENT_GAP + 1, postings.nextPosition());
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    public void testPositionIncrementGap() throws IOException {
			
 
				+        final int positionIncrementGap = randomIntBetween(1, 1000);
			
 
				+        String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
			
 
				+                .startObject("properties").startObject("field")
			
 
				+                    .field("type", getFieldType())
			
 
				+                    .field("position_increment_gap", positionIncrementGap)
			
 
				+                .endObject().endObject()
			
 
				+                .endObject().endObject());
			
 
				+
			
 
				+        DocumentMapper mapper = indexService.mapperService().merge("type",
			
 
				+                new CompressedXContent(mapping), MergeReason.MAPPING_UPDATE);
			
 
				+
			
 
				+        assertEquals(mapping, mapper.mappingSource().toString());
			
 
				+
			
 
				+        SourceToParse sourceToParse = SourceToParse.source("test", "type", "1", BytesReference
			
 
				+                .bytes(XContentFactory.jsonBuilder()
			
 
				+                        .startObject()
			
 
				+                        .array("field", new String[]{"a", "b"})
			
 
				+                        .endObject()),
			
 
				+            XContentType.JSON);
			
 
				+        ParsedDocument doc = mapper.parse(sourceToParse);
			
 
				+
			
 
				+        IndexableField[] fields = doc.rootDoc().getFields("field");
			
 
				+        assertEquals(2, fields.length);
			
 
				+
			
 
				+        assertEquals("a", fields[0].stringValue());
			
 
				+        assertEquals("b", fields[1].stringValue());
			
 
				+
			
 
				+        IndexShard shard = indexService.getShard(0);
			
 
				+        shard.applyIndexOperationOnPrimary(Versions.MATCH_ANY, VersionType.INTERNAL,
			
 
				+            sourceToParse, IndexRequest.UNSET_AUTO_GENERATED_TIMESTAMP, false);
			
 
				+        shard.refresh("test");
			
 
				+        try (Engine.Searcher searcher = shard.acquireSearcher("test")) {
			
 
				+            LeafReader leaf = searcher.getDirectoryReader().leaves().get(0).reader();
			
 
				+            TermsEnum terms = leaf.terms("field").iterator();
			
 
				+            assertTrue(terms.seekExact(new BytesRef("b")));
			
 
				+            PostingsEnum postings = terms.postings(null, PostingsEnum.POSITIONS);
			
 
				+            assertEquals(0, postings.nextDoc());
			
 
				+            assertEquals(positionIncrementGap + 1, postings.nextPosition());
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    public void testSearchAnalyzerSerialization() throws IOException {
			
 
				+        String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
			
 
				+                .startObject("properties")
			
 
				+                    .startObject("field")
			
 
				+                        .field("type", getFieldType())
			
 
				+                        .field("analyzer", "standard")
			
 
				+                        .field("search_analyzer", "keyword")
			
 
				+                    .endObject()
			
 
				+                .endObject().endObject().endObject());
			
 
				+
			
 
				+        DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
			
 
				+        assertEquals(mapping,  mapper.mappingSource().toString());
			
 
				+
			
 
				+        // special case: default index analyzer
			
 
				+        mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
			
 
				+                .startObject("properties")
			
 
				+                    .startObject("field")
			
 
				+                        .field("type", getFieldType())
			
 
				+                        .field("analyzer", "default")
			
 
				+                        .field("search_analyzer", "keyword")
			
 
				+                    .endObject()
			
 
				+                .endObject().endObject().endObject());
			
 
				+
			
 
				+        mapper = parser.parse("type", new CompressedXContent(mapping));
			
 
				+        assertEquals(mapping,  mapper.mappingSource().toString());
			
 
				+
			
 
				+        mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
			
 
				+            .startObject("properties")
			
 
				+            .startObject("field")
			
 
				+            .field("type", getFieldType())
			
 
				+            .field("analyzer", "keyword")
			
 
				+            .endObject()
			
 
				+            .endObject().endObject().endObject());
			
 
				+
			
 
				+        mapper = parser.parse("type", new CompressedXContent(mapping));
			
 
				+        assertEquals(mapping,  mapper.mappingSource().toString());
			
 
				+
			
 
				+        // special case: default search analyzer
			
 
				+        mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
			
 
				+            .startObject("properties")
			
 
				+            .startObject("field")
			
 
				+            .field("type", getFieldType())
			
 
				+            .field("analyzer", "keyword")
			
 
				+            .field("search_analyzer", "default")
			
 
				+            .endObject()
			
 
				+            .endObject().endObject().endObject());
			
 
				+
			
 
				+        mapper = parser.parse("type", new CompressedXContent(mapping));
			
 
				+        assertEquals(mapping,  mapper.mappingSource().toString());
			
 
				+
			
 
				+        mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
			
 
				+            .startObject("properties")
			
 
				+            .startObject("field")
			
 
				+            .field("type", getFieldType())
			
 
				+            .field("analyzer", "keyword")
			
 
				+            .endObject()
			
 
				+            .endObject().endObject().endObject());
			
 
				+        mapper = parser.parse("type", new CompressedXContent(mapping));
			
 
				+
			
 
				+        XContentBuilder builder = XContentFactory.jsonBuilder();
			
 
				+        builder.startObject();
			
 
				+        mapper.toXContent(builder, new ToXContent.MapParams(Collections.singletonMap("include_defaults", "true")));
			
 
				+        builder.endObject();
			
 
				+
			
 
				+        String mappingString = Strings.toString(builder);
			
 
				+        assertTrue(mappingString.contains("analyzer"));
			
 
				+        assertTrue(mappingString.contains("search_analyzer"));
			
 
				+        assertTrue(mappingString.contains("search_quote_analyzer"));
			
 
				+    }
			
 
				+
			
 
				+    public void testSearchQuoteAnalyzerSerialization() throws IOException {
			
 
				+        String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
			
 
				+                .startObject("properties")
			
 
				+                    .startObject("field")
			
 
				+                        .field("type", getFieldType())
			
 
				+                        .field("analyzer", "standard")
			
 
				+                        .field("search_analyzer", "standard")
			
 
				+                        .field("search_quote_analyzer", "keyword")
			
 
				+                    .endObject()
			
 
				+                .endObject().endObject().endObject());
			
 
				+
			
 
				+        DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
			
 
				+        assertEquals(mapping,  mapper.mappingSource().toString());
			
 
				+
			
 
				+        // special case: default index/search analyzer
			
 
				+        mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
			
 
				+                .startObject("properties")
			
 
				+                    .startObject("field")
			
 
				+                        .field("type", getFieldType())
			
 
				+                        .field("analyzer", "default")
			
 
				+                        .field("search_analyzer", "default")
			
 
				+                        .field("search_quote_analyzer", "keyword")
			
 
				+                    .endObject()
			
 
				+                .endObject().endObject().endObject());
			
 
				+
			
 
				+        mapper = parser.parse("type", new CompressedXContent(mapping));
			
 
				+        assertEquals(mapping,  mapper.mappingSource().toString());
			
 
				+    }
			
 
				+
			
 
				+    public void testTermVectors() throws IOException {
			
 
				+        String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
			
 
				+                .startObject("properties")
			
 
				+                .startObject("field1")
			
 
				+                    .field("type", getFieldType())
			
 
				+                    .field("term_vector", "no")
			
 
				+                .endObject()
			
 
				+                .startObject("field2")
			
 
				+                    .field("type", getFieldType())
			
 
				+                    .field("term_vector", "yes")
			
 
				+                .endObject()
			
 
				+                .startObject("field3")
			
 
				+                    .field("type", getFieldType())
			
 
				+                    .field("term_vector", "with_offsets")
			
 
				+                .endObject()
			
 
				+                .startObject("field4")
			
 
				+                    .field("type", getFieldType())
			
 
				+                    .field("term_vector", "with_positions")
			
 
				+                .endObject()
			
 
				+                .startObject("field5")
			
 
				+                    .field("type", getFieldType())
			
 
				+                    .field("term_vector", "with_positions_offsets")
			
 
				+                .endObject()
			
 
				+                .startObject("field6")
			
 
				+                    .field("type", getFieldType())
			
 
				+                    .field("term_vector", "with_positions_offsets_payloads")
			
 
				+                .endObject()
			
 
				+                .endObject()
			
 
				+                .endObject().endObject());
			
 
				+
			
 
				+        DocumentMapper defaultMapper = parser.parse("type", new CompressedXContent(mapping));
			
 
				+
			
 
				+        ParsedDocument doc = defaultMapper.parse(SourceToParse.source("test", "type", "1", BytesReference
			
 
				+                .bytes(XContentFactory.jsonBuilder()
			
 
				+                        .startObject()
			
 
				+                        .field("field1", "1234")
			
 
				+                        .field("field2", "1234")
			
 
				+                        .field("field3", "1234")
			
 
				+                        .field("field4", "1234")
			
 
				+                        .field("field5", "1234")
			
 
				+                        .field("field6", "1234")
			
 
				+                        .endObject()),
			
 
				+                XContentType.JSON));
			
 
				+
			
 
				+        assertThat(doc.rootDoc().getField("field1").fieldType().storeTermVectors(), equalTo(false));
			
 
				+        assertThat(doc.rootDoc().getField("field1").fieldType().storeTermVectorOffsets(), equalTo(false));
			
 
				+        assertThat(doc.rootDoc().getField("field1").fieldType().storeTermVectorPositions(), equalTo(false));
			
 
				+        assertThat(doc.rootDoc().getField("field1").fieldType().storeTermVectorPayloads(), equalTo(false));
			
 
				+
			
 
				+        assertThat(doc.rootDoc().getField("field2").fieldType().storeTermVectors(), equalTo(true));
			
 
				+        assertThat(doc.rootDoc().getField("field2").fieldType().storeTermVectorOffsets(), equalTo(false));
			
 
				+        assertThat(doc.rootDoc().getField("field2").fieldType().storeTermVectorPositions(), equalTo(false));
			
 
				+        assertThat(doc.rootDoc().getField("field2").fieldType().storeTermVectorPayloads(), equalTo(false));
			
 
				+
			
 
				+        assertThat(doc.rootDoc().getField("field3").fieldType().storeTermVectors(), equalTo(true));
			
 
				+        assertThat(doc.rootDoc().getField("field3").fieldType().storeTermVectorOffsets(), equalTo(true));
			
 
				+        assertThat(doc.rootDoc().getField("field3").fieldType().storeTermVectorPositions(), equalTo(false));
			
 
				+        assertThat(doc.rootDoc().getField("field3").fieldType().storeTermVectorPayloads(), equalTo(false));
			
 
				+
			
 
				+        assertThat(doc.rootDoc().getField("field4").fieldType().storeTermVectors(), equalTo(true));
			
 
				+        assertThat(doc.rootDoc().getField("field4").fieldType().storeTermVectorOffsets(), equalTo(false));
			
 
				+        assertThat(doc.rootDoc().getField("field4").fieldType().storeTermVectorPositions(), equalTo(true));
			
 
				+        assertThat(doc.rootDoc().getField("field4").fieldType().storeTermVectorPayloads(), equalTo(false));
			
 
				+
			
 
				+        assertThat(doc.rootDoc().getField("field5").fieldType().storeTermVectors(), equalTo(true));
			
 
				+        assertThat(doc.rootDoc().getField("field5").fieldType().storeTermVectorOffsets(), equalTo(true));
			
 
				+        assertThat(doc.rootDoc().getField("field5").fieldType().storeTermVectorPositions(), equalTo(true));
			
 
				+        assertThat(doc.rootDoc().getField("field5").fieldType().storeTermVectorPayloads(), equalTo(false));
			
 
				+
			
 
				+        assertThat(doc.rootDoc().getField("field6").fieldType().storeTermVectors(), equalTo(true));
			
 
				+        assertThat(doc.rootDoc().getField("field6").fieldType().storeTermVectorOffsets(), equalTo(true));
			
 
				+        assertThat(doc.rootDoc().getField("field6").fieldType().storeTermVectorPositions(), equalTo(true));
			
 
				+        assertThat(doc.rootDoc().getField("field6").fieldType().storeTermVectorPayloads(), equalTo(true));
			
 
				+    }
			
 
				+   
			
 
				+    public void testNullConfigValuesFail() throws MapperParsingException, IOException {
			
 
				+        String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject()
			
 
				+                .startObject("type")
			
 
				+                    .startObject("properties")
			
 
				+                        .startObject("field")
			
 
				+                            .field("type", getFieldType())
			
 
				+                            .field("analyzer", (String) null)
			
 
				+                        .endObject()
			
 
				+                    .endObject()
			
 
				+                .endObject().endObject());
			
 
				+
			
 
				+        Exception e = expectThrows(MapperParsingException.class, () -> parser.parse("type", new CompressedXContent(mapping)));
			
 
				+        assertEquals("[analyzer] must not have a [null] value", e.getMessage());
			
 
				+    }
			
 
				+
			
 
				+    public void testNotIndexedField() throws IOException {
			
 
				+        String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
			
 
				+            .startObject("properties").startObject("field")
			
 
				+            .field("type", getFieldType())
			
 
				+            .field("index", false)
			
 
				+            .endObject().endObject().endObject().endObject());
			
 
				+
			
 
				+        IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
			
 
				+            () -> parser.parse("type", new CompressedXContent(mapping)));
			
 
				+        assertEquals("[annotated_text] fields must be indexed", e.getMessage());
			
 
				+    }
			
 
				+
			
 
				+    public void testAnalyzedFieldPositionIncrementWithoutPositions() throws IOException {
			
 
				+        for (String indexOptions : Arrays.asList("docs", "freqs")) {
			
 
				+            String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
			
 
				+                .startObject("properties").startObject("field")
			
 
				+                .field("type", getFieldType())
			
 
				+                .field("index_options", indexOptions)
			
 
				+                .field("position_increment_gap", 10)
			
 
				+                .endObject().endObject().endObject().endObject());
			
 
				+
			
 
				+            IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
			
 
				+                () -> parser.parse("type", new CompressedXContent(mapping)));
			
 
				+            assertEquals("Cannot set position_increment_gap on field [field] without positions enabled", e.getMessage());
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    public void testEmptyName() throws IOException {
			
 
				+        String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject()
			
 
				+                .startObject("type")
			
 
				+                    .startObject("properties")
			
 
				+                        .startObject("")
			
 
				+                            .field("type", getFieldType())
			
 
				+                        .endObject()
			
 
				+                    .endObject()
			
 
				+                .endObject().endObject());
			
 
				+
			
 
				+        // Empty name not allowed in index created after 5.0
			
 
				+        IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
			
 
				+            () -> parser.parse("type", new CompressedXContent(mapping))
			
 
				+        );
			
 
				+        assertThat(e.getMessage(), containsString("name cannot be empty string"));
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+        
			
 
				+}
			
--- a/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextParsingTests.java
+++ b/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextParsingTests.java
@@ -0,0 +1,73 @@
 
				+/*
			
 
				+ * Licensed to Elasticsearch under one or more contributor
			
 
				+ * license agreements. See the NOTICE file distributed with
			
 
				+ * this work for additional information regarding copyright
			
 
				+ * ownership. Elasticsearch licenses this file to you under
			
 
				+ * the Apache License, Version 2.0 (the "License"); you may
			
 
				+ * not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing,
			
 
				+ * software distributed under the License is distributed on an
			
 
				+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+ * KIND, either express or implied.  See the License for the
			
 
				+ * specific language governing permissions and limitations
			
 
				+ * under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.elasticsearch.index.mapper.annotatedtext;
			
 
				+
			
 
				+import org.elasticsearch.ElasticsearchParseException;
			
 
				+import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText;
			
 
				+import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText.AnnotationToken;
			
 
				+import org.elasticsearch.test.ESTestCase;
			
 
				+
			
 
				+import java.util.List;
			
 
				+
			
 
				+import static org.hamcrest.Matchers.equalTo;
			
 
				+
			
 
				+public class AnnotatedTextParsingTests extends ESTestCase {
			
 
				+    
			
 
				+    private void checkParsing(String markup, String expectedPlainText, AnnotationToken... expectedTokens) {
			
 
				+        AnnotatedText at = AnnotatedText.parse(markup);
			
 
				+        assertEquals(expectedPlainText, at.textMinusMarkup);
			
 
				+        List<AnnotationToken> actualAnnotations = at.annotations;
			
 
				+        assertEquals(expectedTokens.length, actualAnnotations.size());
			
 
				+        for (int i = 0; i < expectedTokens.length; i++) {
			
 
				+            assertEquals(expectedTokens[i], actualAnnotations.get(i));
			
 
				+        }
			
 
				+    }  
			
 
				+    
			
 
				+    public void testSingleValueMarkup() {
			
 
				+        checkParsing("foo [bar](Y)", "foo bar", new AnnotationToken(4,7,"Y"));
			
 
				+    }   
			
 
				+    
			
 
				+    public void testMultiValueMarkup() {
			
 
				+        checkParsing("foo [bar](Y&B)", "foo bar", new AnnotationToken(4,7,"Y"), 
			
 
				+                new AnnotationToken(4,7,"B"));
			
 
				+    }    
			
 
				+    
			
 
				+    public void testBlankTextAnnotation() {
			
 
				+        checkParsing("It sounded like this:[](theSoundOfOneHandClapping)", "It sounded like this:", 
			
 
				+                new AnnotationToken(21,21,"theSoundOfOneHandClapping"));
			
 
				+    }    
			
 
				+    
			
 
				+    public void testMissingBracket() {
			
 
				+        checkParsing("[foo](MissingEndBracket bar",
			
 
				+                "[foo](MissingEndBracket bar", new AnnotationToken[0]);
			
 
				+    }
			
 
				+    
			
 
				+    public void testAnnotationWithType() {
			
 
				+        Exception expectedException = expectThrows(ElasticsearchParseException.class,
			
 
				+                () -> checkParsing("foo [bar](type=foo) baz", "foo bar baz",  new AnnotationToken(4,7, "noType")));
			
 
				+            assertThat(expectedException.getMessage(), equalTo("key=value pairs are not supported in annotations"));
			
 
				+    }
			
 
				+    
			
 
				+    public void testMissingValue() {
			
 
				+        checkParsing("[foo]() bar", "foo bar", new AnnotationToken[0]);
			
 
				+    }    
			
 
				+        
			
 
				+
			
 
				+}
			
--- a/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/search/highlight/AnnotatedTextHighlighterTests.java
+++ b/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/search/highlight/AnnotatedTextHighlighterTests.java
@@ -0,0 +1,185 @@
 
				+/*
			
 
				+ * Licensed to Elasticsearch under one or more contributor
			
 
				+ * license agreements. See the NOTICE file distributed with
			
 
				+ * this work for additional information regarding copyright
			
 
				+ * ownership. Elasticsearch licenses this file to you under
			
 
				+ * the Apache License, Version 2.0 (the "License"); you may
			
 
				+ * not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing,
			
 
				+ * software distributed under the License is distributed on an
			
 
				+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+ * KIND, either express or implied.  See the License for the
			
 
				+ * specific language governing permissions and limitations
			
 
				+ * under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.elasticsearch.search.highlight;
			
 
				+
			
 
				+import org.apache.lucene.analysis.Analyzer;
			
 
				+import org.apache.lucene.analysis.standard.StandardAnalyzer;
			
 
				+import org.apache.lucene.document.Document;
			
 
				+import org.apache.lucene.document.Field;
			
 
				+import org.apache.lucene.document.FieldType;
			
 
				+import org.apache.lucene.document.TextField;
			
 
				+import org.apache.lucene.index.DirectoryReader;
			
 
				+import org.apache.lucene.index.IndexOptions;
			
 
				+import org.apache.lucene.index.IndexWriterConfig;
			
 
				+import org.apache.lucene.index.RandomIndexWriter;
			
 
				+import org.apache.lucene.index.Term;
			
 
				+import org.apache.lucene.search.IndexSearcher;
			
 
				+import org.apache.lucene.search.MatchAllDocsQuery;
			
 
				+import org.apache.lucene.search.PhraseQuery;
			
 
				+import org.apache.lucene.search.Query;
			
 
				+import org.apache.lucene.search.Sort;
			
 
				+import org.apache.lucene.search.TermQuery;
			
 
				+import org.apache.lucene.search.TopDocs;
			
 
				+import org.apache.lucene.search.highlight.DefaultEncoder;
			
 
				+import org.apache.lucene.search.uhighlight.CustomSeparatorBreakIterator;
			
 
				+import org.apache.lucene.search.uhighlight.CustomUnifiedHighlighter;
			
 
				+import org.apache.lucene.search.uhighlight.PassageFormatter;
			
 
				+import org.apache.lucene.search.uhighlight.Snippet;
			
 
				+import org.apache.lucene.search.uhighlight.SplittingBreakIterator;
			
 
				+import org.apache.lucene.store.Directory;
			
 
				+import org.elasticsearch.common.Strings;
			
 
				+import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedHighlighterAnalyzer;
			
 
				+import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotationAnalyzerWrapper;
			
 
				+import org.elasticsearch.search.fetch.subphase.highlight.AnnotatedPassageFormatter;
			
 
				+import org.elasticsearch.test.ESTestCase;
			
 
				+
			
 
				+import java.net.URLEncoder;
			
 
				+import java.text.BreakIterator;
			
 
				+import java.util.Locale;
			
 
				+
			
 
				+import static org.apache.lucene.search.uhighlight.CustomUnifiedHighlighter.MULTIVAL_SEP_CHAR;
			
 
				+import static org.hamcrest.CoreMatchers.equalTo;
			
 
				+
			
 
				+public class AnnotatedTextHighlighterTests  extends ESTestCase {
			
 
				+    
			
 
				+    private void assertHighlightOneDoc(String fieldName, String []markedUpInputs,
			
 
				+            Query query, Locale locale, BreakIterator breakIterator,
			
 
				+            int noMatchSize, String[] expectedPassages) throws Exception {
			
 
				+        
			
 
				+        // Annotated fields wrap the usual analyzer with one that injects extra tokens
			
 
				+        Analyzer wrapperAnalyzer = new AnnotationAnalyzerWrapper(new StandardAnalyzer());
			
 
				+        AnnotatedHighlighterAnalyzer hiliteAnalyzer = new AnnotatedHighlighterAnalyzer(wrapperAnalyzer);
			
 
				+        hiliteAnalyzer.init(markedUpInputs);
			
 
				+        PassageFormatter passageFormatter = new AnnotatedPassageFormatter(hiliteAnalyzer,new DefaultEncoder());
			
 
				+        String []plainTextForHighlighter = hiliteAnalyzer.getPlainTextValuesForHighlighter();
			
 
				+
			
 
				+        
			
 
				+        Directory dir = newDirectory();
			
 
				+        IndexWriterConfig iwc = newIndexWriterConfig(wrapperAnalyzer);
			
 
				+        iwc.setMergePolicy(newTieredMergePolicy(random()));
			
 
				+        RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
			
 
				+        FieldType ft = new FieldType(TextField.TYPE_STORED);
			
 
				+        if (randomBoolean()) {
			
 
				+            ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
			
 
				+        } else {
			
 
				+            ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
			
 
				+        }
			
 
				+        ft.freeze();
			
 
				+        Document doc = new Document();
			
 
				+        for (String input : markedUpInputs) {
			
 
				+            Field field = new Field(fieldName, "", ft);
			
 
				+            field.setStringValue(input);
			
 
				+            doc.add(field);
			
 
				+        }
			
 
				+        iw.addDocument(doc);
			
 
				+        DirectoryReader reader = iw.getReader();
			
 
				+        IndexSearcher searcher = newSearcher(reader);
			
 
				+        iw.close();
			
 
				+        TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), 1, Sort.INDEXORDER);
			
 
				+        assertThat(topDocs.totalHits.value, equalTo(1L));
			
 
				+        String rawValue = Strings.arrayToDelimitedString(plainTextForHighlighter, String.valueOf(MULTIVAL_SEP_CHAR));
			
 
				+        
			
 
				+        CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(searcher, hiliteAnalyzer, null,
			
 
				+                passageFormatter, locale,
			
 
				+                breakIterator, rawValue, noMatchSize);
			
 
				+        highlighter.setFieldMatcher((name) -> "text".equals(name));
			
 
				+        final Snippet[] snippets =
			
 
				+            highlighter.highlightField("text", query, topDocs.scoreDocs[0].doc, expectedPassages.length);
			
 
				+        assertEquals(expectedPassages.length, snippets.length);
			
 
				+        for (int i = 0; i < snippets.length; i++) {
			
 
				+            assertEquals(expectedPassages[i], snippets[i].getText());
			
 
				+        }
			
 
				+        reader.close();
			
 
				+        dir.close();
			
 
				+    }
			
 
				+    
			
 
				+
			
 
				+    public void testAnnotatedTextStructuredMatch() throws Exception {
			
 
				+        // Check that a structured token eg a URL can be highlighted in a query
			
 
				+        // on marked-up
			
 
				+        // content using an "annotated_text" type field.
			
 
				+        String url = "https://en.wikipedia.org/wiki/Key_Word_in_Context";
			
 
				+        String encodedUrl = URLEncoder.encode(url, "UTF-8");
			
 
				+        String annotatedWord = "[highlighting](" + encodedUrl + ")";
			
 
				+        String highlightedAnnotatedWord = "[highlighting](" + AnnotatedPassageFormatter.SEARCH_HIT_TYPE + "=" + encodedUrl + "&"
			
 
				+                + encodedUrl + ")";
			
 
				+        final String[] markedUpInputs = { "This is a test. Just a test1 " + annotatedWord + " from [annotated](bar) highlighter.",
			
 
				+                "This is the second " + annotatedWord + " value to perform highlighting on a longer text that gets scored lower." };
			
 
				+
			
 
				+        String[] expectedPassages = {
			
 
				+                "This is a test. Just a test1 " + highlightedAnnotatedWord + " from [annotated](bar) highlighter.",
			
 
				+                "This is the second " + highlightedAnnotatedWord + " value to perform highlighting on a"
			
 
				+                        + " longer text that gets scored lower." };
			
 
				+        Query query = new TermQuery(new Term("text", url));
			
 
				+        BreakIterator breakIterator = new CustomSeparatorBreakIterator(MULTIVAL_SEP_CHAR);
			
 
				+        assertHighlightOneDoc("text", markedUpInputs, query, Locale.ROOT, breakIterator, 0, expectedPassages);
			
 
				+    }
			
 
				+
			
 
				+    public void testAnnotatedTextOverlapsWithUnstructuredSearchTerms() throws Exception {
			
 
				+        final String[] markedUpInputs = { "[Donald Trump](Donald+Trump) visited Singapore",
			
 
				+                "Donald duck is a [Disney](Disney+Inc) invention" };
			
 
				+
			
 
				+        String[] expectedPassages = { "[Donald](_hit_term=donald) Trump visited Singapore",
			
 
				+                "[Donald](_hit_term=donald) duck is a [Disney](Disney+Inc) invention" };
			
 
				+        Query query = new TermQuery(new Term("text", "donald"));
			
 
				+        BreakIterator breakIterator = new CustomSeparatorBreakIterator(MULTIVAL_SEP_CHAR);
			
 
				+        assertHighlightOneDoc("text", markedUpInputs, query, Locale.ROOT, breakIterator, 0, expectedPassages);
			
 
				+    }
			
 
				+
			
 
				+    public void testAnnotatedTextMultiFieldWithBreakIterator() throws Exception {
			
 
				+        final String[] markedUpInputs = { "[Donald Trump](Donald+Trump) visited Singapore. Kim shook hands with Donald",
			
 
				+                "Donald duck is a [Disney](Disney+Inc) invention" };
			
 
				+        String[] expectedPassages = { "[Donald](_hit_term=donald) Trump visited Singapore",
			
 
				+                "Kim shook hands with [Donald](_hit_term=donald)",
			
 
				+                "[Donald](_hit_term=donald) duck is a [Disney](Disney+Inc) invention" };
			
 
				+        Query query = new TermQuery(new Term("text", "donald"));
			
 
				+        BreakIterator breakIterator = new CustomSeparatorBreakIterator(MULTIVAL_SEP_CHAR);
			
 
				+        breakIterator = new SplittingBreakIterator(breakIterator, '.');
			
 
				+        assertHighlightOneDoc("text", markedUpInputs, query, Locale.ROOT, breakIterator, 0, expectedPassages);
			
 
				+    }
			
 
				+    
			
 
				+    public void testAnnotatedTextSingleFieldWithBreakIterator() throws Exception {
			
 
				+        final String[] markedUpInputs = { "[Donald Trump](Donald+Trump) visited Singapore. Kim shook hands with Donald"};
			
 
				+        String[] expectedPassages = { "[Donald](_hit_term=donald) Trump visited Singapore",
			
 
				+                "Kim shook hands with [Donald](_hit_term=donald)"};
			
 
				+        Query query = new TermQuery(new Term("text", "donald"));
			
 
				+        BreakIterator breakIterator = new CustomSeparatorBreakIterator(MULTIVAL_SEP_CHAR);
			
 
				+        breakIterator = new SplittingBreakIterator(breakIterator, '.');
			
 
				+        assertHighlightOneDoc("text", markedUpInputs, query, Locale.ROOT, breakIterator, 0, expectedPassages);
			
 
				+    }  
			
 
				+    
			
 
				+    public void testAnnotatedTextSingleFieldWithPhraseQuery() throws Exception {
			
 
				+        final String[] markedUpInputs = { "[Donald Trump](Donald+Trump) visited Singapore", 
			
 
				+                "Donald Jr was with Melania Trump"};
			
 
				+        String[] expectedPassages = { "[Donald](_hit_term=donald) [Trump](_hit_term=trump) visited Singapore"};
			
 
				+        Query query = new PhraseQuery("text", "donald", "trump");
			
 
				+        BreakIterator breakIterator = new CustomSeparatorBreakIterator(MULTIVAL_SEP_CHAR);
			
 
				+        assertHighlightOneDoc("text", markedUpInputs, query, Locale.ROOT, breakIterator, 0, expectedPassages);
			
 
				+    }   
			
 
				+    
			
 
				+    public void testBadAnnotation() throws Exception {
			
 
				+        final String[] markedUpInputs = { "Missing bracket for [Donald Trump](Donald+Trump visited Singapore"};
			
 
				+        String[] expectedPassages = { "Missing bracket for [Donald Trump](Donald+Trump visited [Singapore](_hit_term=singapore)"};
			
 
				+        Query query = new TermQuery(new Term("text", "singapore"));
			
 
				+        BreakIterator breakIterator = new CustomSeparatorBreakIterator(MULTIVAL_SEP_CHAR);
			
 
				+        assertHighlightOneDoc("text", markedUpInputs, query, Locale.ROOT, breakIterator, 0, expectedPassages);
			
 
				+    }     
			
 
				+   
			
 
				+}
			
--- a/plugins/mapper-annotated-text/src/test/resources/rest-api-spec/test/mapper_annotatedtext/10_basic.yml
+++ b/plugins/mapper-annotated-text/src/test/resources/rest-api-spec/test/mapper_annotatedtext/10_basic.yml
@@ -0,0 +1,44 @@
 
				+# Integration tests for Mapper Annotated_text components
			
 
				+#
			
 
				+
			
 
				+---
			
 
				+"annotated highlighter on annotated text":
			
 
				+  - skip:
			
 
				+      version: " - 6.99.99"
			
 
				+      reason: Annotated text type introduced in 7.0.0-alpha1
			
 
				+
			
 
				+  - do:
			
 
				+      indices.create:
			
 
				+        index: annotated
			
 
				+        body:
			
 
				+          settings:
			
 
				+            number_of_shards: "1"
			
 
				+            number_of_replicas: "0"
			
 
				+          mappings:
			
 
				+            doc:
			
 
				+              properties:
			
 
				+                text:
			
 
				+                  type: annotated_text
			
 
				+                entityID:
			
 
				+                  type: keyword
			
 
				+
			
 
				+  - do:
			
 
				+      index:
			
 
				+        index: annotated
			
 
				+        type: doc
			
 
				+        body: 
			
 
				+            "text" : "The [quick brown fox](entity_3789) is brown."
			
 
				+            "entityID": "entity_3789"
			
 
				+        refresh: true
			
 
				+
			
 
				+  - do:
			
 
				+      search:
			
 
				+        body: { "query" : {"term" : { "entityID" : "entity_3789" } }, "highlight" : { "type" : "annotated", "require_field_match": false, "fields" : { "text" : {} } } }
			
 
				+
			
 
				+  - match: {hits.hits.0.highlight.text.0: "The [quick brown fox](_hit_term=entity_3789&entity_3789) is brown."}
			
 
				+
			
 
				+  - do:
			
 
				+      search:
			
 
				+        body: { "query" : {"term" : { "text" : "quick" } }, "highlight" : { "type" : "annotated", "require_field_match": false, "fields" : { "text" : {} } } }
			
 
				+
			
 
				+  - match: {hits.hits.0.highlight.text.0: "The [quick](_hit_term=quick) brown fox is brown."}
			
--- a/qa/vagrant/src/test/resources/packaging/tests/module_and_plugin_test_cases.bash
+++ b/qa/vagrant/src/test/resources/packaging/tests/module_and_plugin_test_cases.bash
@@ -266,6 +266,10 @@ fi
 
				     install_and_check_plugin mapper murmur3
			
 
				 }
			
 
				 
			
 
				+@test "[$GROUP] install annotated-text mapper plugin" {
			
 
				+    install_and_check_plugin mapper annotated-text
			
 
				+}
			
 
				+
			
 
				 @test "[$GROUP] check reindex module" {
			
 
				     check_module reindex
			
 
				 }
			
@@ -380,6 +384,10 @@ fi
 
				     remove_plugin mapper-murmur3
			
 
				 }
			
 
				 
			
 
				+@test "[$GROUP] remove annotated-text mapper plugin" {
			
 
				+    remove_plugin mapper-annotated-text
			
 
				+}
			
 
				+
			
 
				 @test "[$GROUP] remove size mapper plugin" {
			
 
				     remove_plugin mapper-size
			
 
				 }
			
--- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightUtils.java
+++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightUtils.java
@@ -18,10 +18,13 @@
 
				  */
			
 
				 package org.elasticsearch.search.fetch.subphase.highlight;
			
 
				 
			
 
				+import org.apache.lucene.analysis.Analyzer;
			
 
				 import org.apache.lucene.search.highlight.DefaultEncoder;
			
 
				 import org.apache.lucene.search.highlight.Encoder;
			
 
				 import org.apache.lucene.search.highlight.SimpleHTMLEncoder;
			
 
				 import org.elasticsearch.index.fieldvisitor.CustomFieldsVisitor;
			
 
				+import org.elasticsearch.index.mapper.DocumentMapper;
			
 
				+import org.elasticsearch.index.mapper.KeywordFieldMapper;
			
 
				 import org.elasticsearch.index.mapper.MappedFieldType;
			
 
				 import org.elasticsearch.search.fetch.FetchSubPhase;
			
 
				 import org.elasticsearch.search.internal.SearchContext;
			
@@ -70,8 +73,18 @@ public final class HighlightUtils {
 
				         return textsToHighlight;
			
 
				     }
			
 
				 
			
 
				-    static class Encoders {
			
 
				-        static final Encoder DEFAULT = new DefaultEncoder();
			
 
				-        static final Encoder HTML = new SimpleHTMLEncoder();
			
 
				+    public static class Encoders {
			
 
				+        public static final Encoder DEFAULT = new DefaultEncoder();
			
 
				+        public static final Encoder HTML = new SimpleHTMLEncoder();
			
 
				     }
			
 
				+    
			
 
				+    static Analyzer getAnalyzer(DocumentMapper docMapper, MappedFieldType type) {
			
 
				+        if (type instanceof KeywordFieldMapper.KeywordFieldType) {
			
 
				+            KeywordFieldMapper.KeywordFieldType keywordFieldType = (KeywordFieldMapper.KeywordFieldType) type;
			
 
				+            if (keywordFieldType.normalizer() != null) {
			
 
				+                return  keywordFieldType.normalizer();
			
 
				+            }
			
 
				+        }
			
 
				+        return docMapper.mappers().indexAnalyzer();
			
 
				+    }    
			
 
				 }
			
--- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java
+++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java
@@ -49,7 +49,6 @@ import java.util.List;
 
				 import java.util.Map;
			
 
				 
			
 
				 import static org.elasticsearch.search.fetch.subphase.highlight.UnifiedHighlighter.convertFieldValue;
			
 
				-import static org.elasticsearch.search.fetch.subphase.highlight.UnifiedHighlighter.getAnalyzer;
			
 
				 
			
 
				 public class PlainHighlighter implements Highlighter {
			
 
				     private static final String CACHE_KEY = "highlight-plain";
			
@@ -102,7 +101,7 @@ public class PlainHighlighter implements Highlighter {
 
				         int numberOfFragments = field.fieldOptions().numberOfFragments() == 0 ? 1 : field.fieldOptions().numberOfFragments();
			
 
				         ArrayList<TextFragment> fragsList = new ArrayList<>();
			
 
				         List<Object> textsToHighlight;
			
 
				-        Analyzer analyzer = getAnalyzer(context.mapperService().documentMapper(hitContext.hit().getType()), fieldType);
			
 
				+        Analyzer analyzer = HighlightUtils.getAnalyzer(context.mapperService().documentMapper(hitContext.hit().getType()), fieldType);
			
 
				         final int maxAnalyzedOffset = context.indexShard().indexSettings().getHighlightMaxAnalyzedOffset();
			
 
				 
			
 
				         try {
			
--- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/UnifiedHighlighter.java
+++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/UnifiedHighlighter.java
@@ -26,6 +26,7 @@ import org.apache.lucene.search.uhighlight.BoundedBreakIteratorScanner;
 
				 import org.apache.lucene.search.uhighlight.CustomPassageFormatter;
			
 
				 import org.apache.lucene.search.uhighlight.CustomSeparatorBreakIterator;
			
 
				 import org.apache.lucene.search.uhighlight.CustomUnifiedHighlighter;
			
 
				+import org.apache.lucene.search.uhighlight.PassageFormatter;
			
 
				 import org.apache.lucene.search.uhighlight.Snippet;
			
 
				 import org.apache.lucene.search.uhighlight.UnifiedHighlighter.OffsetSource;
			
 
				 import org.apache.lucene.util.BytesRef;
			
@@ -34,7 +35,6 @@ import org.elasticsearch.common.Strings;
 
				 import org.elasticsearch.common.text.Text;
			
 
				 import org.elasticsearch.index.IndexSettings;
			
 
				 import org.elasticsearch.index.mapper.DocumentMapper;
			
 
				-import org.elasticsearch.index.mapper.KeywordFieldMapper;
			
 
				 import org.elasticsearch.index.mapper.MappedFieldType;
			
 
				 import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
			
 
				 import org.elasticsearch.search.fetch.FetchSubPhase;
			
@@ -54,7 +54,7 @@ public class UnifiedHighlighter implements Highlighter {
 
				     public boolean canHighlight(MappedFieldType fieldType) {
			
 
				         return true;
			
 
				     }
			
 
				-
			
 
				+    
			
 
				     @Override
			
 
				     public HighlightField highlight(HighlighterContext highlighterContext) {
			
 
				         MappedFieldType fieldType = highlighterContext.fieldType;
			
@@ -62,23 +62,18 @@ public class UnifiedHighlighter implements Highlighter {
 
				         SearchContext context = highlighterContext.context;
			
 
				         FetchSubPhase.HitContext hitContext = highlighterContext.hitContext;
			
 
				         Encoder encoder = field.fieldOptions().encoder().equals("html") ? HighlightUtils.Encoders.HTML : HighlightUtils.Encoders.DEFAULT;
			
 
				-        CustomPassageFormatter passageFormatter = new CustomPassageFormatter(field.fieldOptions().preTags()[0],
			
 
				-            field.fieldOptions().postTags()[0], encoder);
			
 
				         final int maxAnalyzedOffset = context.indexShard().indexSettings().getHighlightMaxAnalyzedOffset();
			
 
				 
			
 
				         List<Snippet> snippets = new ArrayList<>();
			
 
				         int numberOfFragments;
			
 
				         try {
			
 
				 
			
 
				-            final Analyzer analyzer =
			
 
				-                getAnalyzer(context.mapperService().documentMapper(hitContext.hit().getType()), fieldType);
			
 
				-            List<Object> fieldValues = HighlightUtils.loadFieldValues(field, fieldType, context, hitContext);
			
 
				-            fieldValues = fieldValues.stream()
			
 
				-                .map((s) -> convertFieldValue(fieldType, s))
			
 
				-                .collect(Collectors.toList());
			
 
				+            final Analyzer analyzer = getAnalyzer(context.mapperService().documentMapper(hitContext.hit().getType()), fieldType);
			
 
				+            List<Object> fieldValues = loadFieldValues(fieldType, field, context, hitContext);
			
 
				             if (fieldValues.size() == 0) {
			
 
				                 return null;
			
 
				             }
			
 
				+            final PassageFormatter passageFormatter = getPassageFormatter(field, encoder);
			
 
				             final IndexSearcher searcher = new IndexSearcher(hitContext.reader());
			
 
				             final CustomUnifiedHighlighter highlighter;
			
 
				             final String fieldValue = mergeFieldValues(fieldValues, MULTIVAL_SEP_CHAR);
			
@@ -145,7 +140,27 @@ public class UnifiedHighlighter implements Highlighter {
 
				         return null;
			
 
				     }
			
 
				 
			
 
				-    private BreakIterator getBreakIterator(SearchContextHighlight.Field field) {
			
 
				+    protected PassageFormatter getPassageFormatter(SearchContextHighlight.Field field, Encoder encoder) {
			
 
				+        CustomPassageFormatter passageFormatter = new CustomPassageFormatter(field.fieldOptions().preTags()[0],
			
 
				+            field.fieldOptions().postTags()[0], encoder);
			
 
				+        return passageFormatter;
			
 
				+    }
			
 
				+
			
 
				+    
			
 
				+    protected Analyzer getAnalyzer(DocumentMapper docMapper, MappedFieldType type) {
			
 
				+        return HighlightUtils.getAnalyzer(docMapper, type);
			
 
				+    }
			
 
				+    
			
 
				+    protected List<Object> loadFieldValues(MappedFieldType fieldType, SearchContextHighlight.Field field, SearchContext context,
			
 
				+            FetchSubPhase.HitContext hitContext) throws IOException {
			
 
				+        List<Object> fieldValues = HighlightUtils.loadFieldValues(field, fieldType, context, hitContext);
			
 
				+        fieldValues = fieldValues.stream()
			
 
				+            .map((s) -> convertFieldValue(fieldType, s))
			
 
				+            .collect(Collectors.toList());
			
 
				+        return fieldValues;
			
 
				+    }
			
 
				+
			
 
				+    protected BreakIterator getBreakIterator(SearchContextHighlight.Field field) {
			
 
				         final SearchContextHighlight.FieldOptions fieldOptions = field.fieldOptions();
			
 
				         final Locale locale =
			
 
				             fieldOptions.boundaryScannerLocale() != null ? fieldOptions.boundaryScannerLocale() :
			
@@ -168,7 +183,7 @@ public class UnifiedHighlighter implements Highlighter {
 
				         }
			
 
				     }
			
 
				 
			
 
				-    private static List<Snippet> filterSnippets(List<Snippet> snippets, int numberOfFragments) {
			
 
				+    protected static List<Snippet> filterSnippets(List<Snippet> snippets, int numberOfFragments) {
			
 
				 
			
 
				         //We need to filter the snippets as due to no_match_size we could have
			
 
				         //either highlighted snippets or non highlighted ones and we don't want to mix those up
			
@@ -203,17 +218,7 @@ public class UnifiedHighlighter implements Highlighter {
 
				         return filteredSnippets;
			
 
				     }
			
 
				 
			
 
				-    static Analyzer getAnalyzer(DocumentMapper docMapper, MappedFieldType type) {
			
 
				-        if (type instanceof KeywordFieldMapper.KeywordFieldType) {
			
 
				-            KeywordFieldMapper.KeywordFieldType keywordFieldType = (KeywordFieldMapper.KeywordFieldType) type;
			
 
				-            if (keywordFieldType.normalizer() != null) {
			
 
				-                return  keywordFieldType.normalizer();
			
 
				-            }
			
 
				-        }
			
 
				-        return docMapper.mappers().indexAnalyzer();
			
 
				-    }
			
 
				-
			
 
				-    static String convertFieldValue(MappedFieldType type, Object value) {
			
 
				+    protected static String convertFieldValue(MappedFieldType type, Object value) {
			
 
				         if (value instanceof BytesRef) {
			
 
				             return type.valueForDisplay(value).toString();
			
 
				         } else {
			
@@ -221,14 +226,14 @@ public class UnifiedHighlighter implements Highlighter {
 
				         }
			
 
				     }
			
 
				 
			
 
				-    private static String mergeFieldValues(List<Object> fieldValues, char valuesSeparator) {
			
 
				+    protected static String mergeFieldValues(List<Object> fieldValues, char valuesSeparator) {
			
 
				         //postings highlighter accepts all values in a single string, as offsets etc. need to match with content
			
 
				         //loaded from stored fields, we merge all values using a proper separator
			
 
				         String rawValue = Strings.collectionToDelimitedString(fieldValues, String.valueOf(valuesSeparator));
			
 
				         return rawValue.substring(0, Math.min(rawValue.length(), Integer.MAX_VALUE - 1));
			
 
				     }
			
 
				 
			
 
				-    private OffsetSource getOffsetSource(MappedFieldType fieldType) {
			
 
				+    protected OffsetSource getOffsetSource(MappedFieldType fieldType) {
			
 
				         if (fieldType.indexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) {
			
 
				             return fieldType.storeTermVectors() ? OffsetSource.POSTINGS_WITH_TERM_VECTORS : OffsetSource.POSTINGS;
			
 
				         }