Răsfoiți Sursa

Document 5.0 mapping changes.

Adrien Grand 9 ani în urmă
părinte
comite
b42f66c8ac
47 a modificat fișierele cu 430 adăugiri și 527 ștergeri
  1. 1 1
      docs/reference/aggregations/bucket/nested-aggregation.asciidoc
  2. 3 3
      docs/reference/aggregations/bucket/reverse-nested-aggregation.asciidoc
  3. 1 1
      docs/reference/analysis/analyzers/keyword-analyzer.asciidoc
  4. 2 2
      docs/reference/docs/termvectors.asciidoc
  5. 3 3
      docs/reference/docs/update-by-query.asciidoc
  6. 1 1
      docs/reference/index-modules/similarity.asciidoc
  7. 1 2
      docs/reference/indices/aliases.asciidoc
  8. 1 1
      docs/reference/indices/create-index.asciidoc
  9. 9 9
      docs/reference/indices/get-field-mapping.asciidoc
  10. 11 13
      docs/reference/indices/put-mapping.asciidoc
  11. 7 8
      docs/reference/mapping.asciidoc
  12. 3 4
      docs/reference/mapping/dynamic/default-mapping.asciidoc
  13. 2 2
      docs/reference/mapping/dynamic/field-mapping.asciidoc
  14. 6 7
      docs/reference/mapping/dynamic/templates.asciidoc
  15. 7 7
      docs/reference/mapping/fields/all-field.asciidoc
  16. 2 4
      docs/reference/mapping/fields/parent-field.asciidoc
  17. 3 3
      docs/reference/mapping/params/analyzer.asciidoc
  18. 3 3
      docs/reference/mapping/params/boost.asciidoc
  19. 3 3
      docs/reference/mapping/params/copy-to.asciidoc
  20. 2 4
      docs/reference/mapping/params/doc-values.asciidoc
  21. 1 1
      docs/reference/mapping/params/dynamic.asciidoc
  22. 1 2
      docs/reference/mapping/params/enabled.asciidoc
  23. 11 122
      docs/reference/mapping/params/fielddata.asciidoc
  24. 2 8
      docs/reference/mapping/params/ignore-above.asciidoc
  25. 7 7
      docs/reference/mapping/params/include-in-all.asciidoc
  26. 1 1
      docs/reference/mapping/params/index-options.asciidoc
  27. 2 44
      docs/reference/mapping/params/index.asciidoc
  28. 8 9
      docs/reference/mapping/params/multi-fields.asciidoc
  29. 4 34
      docs/reference/mapping/params/norms.asciidoc
  30. 2 5
      docs/reference/mapping/params/null-value.asciidoc
  31. 1 1
      docs/reference/mapping/params/position-increment-gap.asciidoc
  32. 2 2
      docs/reference/mapping/params/properties.asciidoc
  33. 1 1
      docs/reference/mapping/params/search-analyzer.asciidoc
  34. 4 4
      docs/reference/mapping/params/similarity.asciidoc
  35. 2 2
      docs/reference/mapping/params/store.asciidoc
  36. 1 1
      docs/reference/mapping/params/term-vector.asciidoc
  37. 8 4
      docs/reference/mapping/types.asciidoc
  38. 1 1
      docs/reference/mapping/types/binary.asciidoc
  39. 111 0
      docs/reference/mapping/types/keyword.asciidoc
  40. 3 4
      docs/reference/mapping/types/object.asciidoc
  41. 1 176
      docs/reference/mapping/types/string.asciidoc
  42. 139 0
      docs/reference/mapping/types/text.asciidoc
  43. 1 1
      docs/reference/mapping/types/token-count.asciidoc
  44. 30 0
      docs/reference/migration/migrate_5_0/mapping.asciidoc
  45. 1 1
      docs/reference/query-dsl/exists-query.asciidoc
  46. 5 5
      docs/reference/query-dsl/mlt-query.asciidoc
  47. 9 10
      docs/reference/query-dsl/term-query.asciidoc

+ 1 - 1
docs/reference/aggregations/bucket/nested-aggregation.asciidoc

@@ -16,7 +16,7 @@ price for the product. The mapping could look like:
             "resellers" : { <1>
                 "type" : "nested",
                 "properties" : {
-                    "name" : { "type" : "string" },
+                    "name" : { "type" : "text" },
                     "price" : { "type" : "double" }
                 }
             }

+ 3 - 3
docs/reference/aggregations/bucket/reverse-nested-aggregation.asciidoc

@@ -22,12 +22,12 @@ the issue documents as nested documents. The mapping could look like:
 
     "issue" : {
         "properties" : {
-            "tags" : { "type" : "string" }
+            "tags" : { "type" : "text" }
             "comments" : { <1>
                 "type" : "nested"
                 "properties" : {
-                    "username" : { "type" : "string", "index" : "not_analyzed" },
-                    "comment" : { "type" : "string" }
+                    "username" : { "type" : "keyword" },
+                    "comment" : { "type" : "text" }
                 }
             }
         }

+ 1 - 1
docs/reference/analysis/analyzers/keyword-analyzer.asciidoc

@@ -4,4 +4,4 @@
 An analyzer of type `keyword` that "tokenizes" an entire stream as a
 single token. This is useful for data like zip codes, ids and so on.
 Note, when using mapping definitions, it might make more sense to simply
-mark the field as `not_analyzed`.
+map the field as a <<keyword,`keyword`>>.

+ 2 - 2
docs/reference/docs/termvectors.asciidoc

@@ -136,13 +136,13 @@ curl -s -XPUT 'http://localhost:9200/twitter/' -d '{
     "tweet": {
       "properties": {
         "text": {
-          "type": "string",
+          "type": "text",
           "term_vector": "with_positions_offsets_payloads",
           "store" : true,
           "analyzer" : "fulltext_analyzer"
          },
          "fullname": {
-          "type": "string",
+          "type": "text",
           "term_vector": "with_positions_offsets_payloads",
           "analyzer" : "fulltext_analyzer"
         }

+ 3 - 3
docs/reference/docs/update-by-query.asciidoc

@@ -281,7 +281,7 @@ PUT test
     "test": {
       "dynamic": false,   <1>
       "properties": {
-        "text": {"type": "string"}
+        "text": {"type": "text"}
       }
     }
   }
@@ -300,8 +300,8 @@ POST test/test?refresh
 PUT test/_mapping/test   <2>
 {
   "properties": {
-    "text": {"type": "string"},
-    "flag": {"type": "string", "analyzer": "keyword"}
+    "text": {"type": "text"},
+    "flag": {"type": "text", "analyzer": "keyword"}
   }
 }
 --------------------------------------------------

+ 1 - 1
docs/reference/index-modules/similarity.asciidoc

@@ -39,7 +39,7 @@ Here we configure the DFRSimilarity so it can be referenced as
 {
   "book" : {
     "properties" : {
-      "title" : { "type" : "string", "similarity" : "my_similarity" }
+      "title" : { "type" : "text", "similarity" : "my_similarity" }
     }
 }
 --------------------------------------------------

+ 1 - 2
docs/reference/indices/aliases.asciidoc

@@ -116,8 +116,7 @@ curl -XPUT 'http://localhost:9200/test1' -d '{
     "type1": {
       "properties": {
         "user" : {
-          "type": "string",
-          "index": "not_analyzed"
+          "type": "keyword"
         }
       }
     }

+ 1 - 1
docs/reference/indices/create-index.asciidoc

@@ -78,7 +78,7 @@ curl -XPOST localhost:9200/test -d '{
     "mappings" : {
         "type1" : {
             "properties" : {
-                "field1" : { "type" : "string", "index" : "not_analyzed" }
+                "field1" : { "type" : "text" }
             }
         }
     }

+ 9 - 9
docs/reference/indices/get-field-mapping.asciidoc

@@ -22,7 +22,7 @@ For which the response is (assuming `text` is a default string field):
          "text": {
             "full_name": "text",
             "mapping": {
-               "text": { "type": "string" }
+               "text": { "type": "text" }
             }
          }
       }
@@ -73,13 +73,13 @@ For example, consider the following mapping:
  {
      "article": {
          "properties": {
-             "id": { "type": "string" },
-             "title":  { "type": "string"},
-             "abstract": { "type": "string"},
+             "id": { "type": "text" },
+             "title":  { "type": "text"},
+             "abstract": { "type": "text"},
              "author": {
                  "properties": {
-                     "id": { "type": "string" },
-                     "name": { "type": "string" }
+                     "id": { "type": "text" },
+                     "name": { "type": "text" }
                  }
              }
          }
@@ -105,19 +105,19 @@ returns:
          "abstract": {
             "full_name": "abstract",
             "mapping": {
-               "abstract": { "type": "string" }
+               "abstract": { "type": "text" }
             }
          },
          "author.id": {
             "full_name": "author.id",
             "mapping": {
-               "id": { "type": "string" }
+               "id": { "type": "text" }
             }
          },
          "name": {
             "full_name": "author.name",
             "mapping": {
-               "name": { "type": "string" }
+               "name": { "type": "text" }
             }
          }
       }

+ 11 - 13
docs/reference/indices/put-mapping.asciidoc

@@ -12,7 +12,7 @@ PUT twitter <1>
     "tweet": {
       "properties": {
         "message": {
-          "type": "string"
+          "type": "text"
         }
       }
     }
@@ -23,7 +23,7 @@ PUT twitter/_mapping/user <2>
 {
   "properties": {
     "name": {
-      "type": "string"
+      "type": "text"
     }
   }
 }
@@ -32,7 +32,7 @@ PUT twitter/_mapping/tweet <3>
 {
   "properties": {
     "user_name": {
-      "type": "string"
+      "type": "text"
     }
   }
 }
@@ -86,13 +86,12 @@ PUT my_index <1>
         "name": {
           "properties": {
             "first": {
-              "type": "string"
+              "type": "text"
             }
           }
         },
         "user_id": {
-          "type": "string",
-          "index": "not_analyzed"
+          "type": "keyword"
         }
       }
     }
@@ -105,13 +104,12 @@ PUT my_index/_mapping/user
     "name": {
       "properties": {
         "last": { <2>
-          "type": "string"
+          "type": "text"
         }
       }
     },
     "user_id": {
-      "type": "string",
-      "index": "not_analyzed",
+      "type": "keyword",
       "ignore_above": 100 <3>
     }
   }
@@ -149,7 +147,7 @@ PUT my_index
     "type_one": {
       "properties": {
         "text": { <1>
-          "type": "string",
+          "type": "text",
           "analyzer": "standard"
         }
       }
@@ -157,7 +155,7 @@ PUT my_index
     "type_two": {
       "properties": {
         "text": { <1>
-          "type": "string",
+          "type": "text",
           "analyzer": "standard"
         }
       }
@@ -169,7 +167,7 @@ PUT my_index/_mapping/type_one <2>
 {
   "properties": {
     "text": {
-      "type": "string",
+      "type": "text",
       "analyzer": "standard",
       "search_analyzer": "whitespace"
     }
@@ -180,7 +178,7 @@ PUT my_index/_mapping/type_one?update_all_types <3>
 {
   "properties": {
     "text": {
-      "type": "string",
+      "type": "text",
       "analyzer": "standard",
       "search_analyzer": "whitespace"
     }

+ 7 - 8
docs/reference/mapping.asciidoc

@@ -46,7 +46,7 @@ Fields with the same name in different mapping types in the same index
 
 Each field has a data `type` which can be:
 
-* a simple type like <<string,`string`>>, <<date,`date`>>, <<number,`long`>>,
+* a simple type like <<text,`text`>>, <<keyword,`keyword`>>, <<date,`date`>>, <<number,`long`>>,
   <<number,`double`>>, <<boolean,`boolean`>> or <<ip,`ip`>>.
 * a type which supports the hierarchical nature of JSON such as
   <<object,`object`>> or <<nested,`nested`>>.
@@ -55,7 +55,7 @@ Each field has a data `type` which can be:
 
 It is often useful to index the same field in different ways for different
 purposes. For instance, a `string` field could be <<mapping-index,indexed>> as
-an `analyzed` field for full-text search, and as a `not_analyzed` field for
+a `text` field for full-text search, and as a `keyword` field for
 sorting or aggregations.  Alternatively, you could index a string field with
 the <<analysis-standard-analyzer,`standard` analyzer>>, the
 <<english-analyzer,`english`>> analyzer, and the
@@ -134,18 +134,17 @@ PUT my_index <1>
     "user": { <2>
       "_all":       { "enabled": false  }, <3>
       "properties": { <4>
-        "title":    { "type": "string"  }, <5>
-        "name":     { "type": "string"  }, <5>
+        "title":    { "type": "text"  }, <5>
+        "name":     { "type": "text"  }, <5>
         "age":      { "type": "integer" }  <5>
       }
     },
     "blogpost": { <2>
       "properties": { <4>
-        "title":    { "type": "string"  }, <5>
-        "body":     { "type": "string"  }, <5>
+        "title":    { "type": "text"  }, <5>
+        "body":     { "type": "text"  }, <5>
         "user_id":  {
-          "type":   "string", <5>
-          "index":  "not_analyzed"
+          "type":   "keyword" <5>
         },
         "created":  {
           "type":   "date", <5>

+ 3 - 4
docs/reference/mapping/dynamic/default-mapping.asciidoc

@@ -56,11 +56,10 @@ PUT _template/logging
           "strings": { <4>
             "match_mapping_type": "string",
             "mapping": {
-              "type": "string",
+              "type": "text",
               "fields": {
                 "raw": {
-                  "type":  "string",
-                  "index": "not_analyzed",
+                  "type":  "keyword",
                   "ignore_above": 256
                 }
               }
@@ -79,4 +78,4 @@ PUT logs-2015.10.01/event/1
 <1> The `logging` template will match any indices beginning with `logs-`.
 <2> Matching indices will be created with a single primary shard.
 <3> The `_all` field will be disabled by default for new type mappings.
-<4> String fields will be created with an `analyzed` main field, and a `not_analyzed` `.raw` field.
+<4> String fields will be created with a `text` main field, and a `keyword` `.raw` field.

+ 2 - 2
docs/reference/mapping/dynamic/field-mapping.asciidoc

@@ -22,7 +22,7 @@ string::                            Either a <<date,`date`>> field
                                         (if the value passes <<date-detection,date detection>>),
                                     a <<number,`double`>> or <<number,`long`>> field
                                         (if the value passes <<numeric-detection,numeric detection>>)
-                                    or an <<mapping-index,`analyzed`>> <<string,`string`>> field.
+                                    or an <<text,`text`>> field.
 
 These are the only <<mapping-types,field datatypes>> that are dynamically
 detected.  All other datatypes must be mapped explicitly.
@@ -81,7 +81,7 @@ PUT my_index/my_type/1 <1>
 --------------------------------------------------
 // AUTOSENSE
 
-<1> The `create_date` field has been added as a <<string,`string`>> field.
+<1> The `create_date` field has been added as a <<text,`text`>> field.
 
 ===== Customising detected date formats
 

+ 6 - 7
docs/reference/mapping/dynamic/templates.asciidoc

@@ -52,7 +52,7 @@ can be automatically detected: `boolean`, `date`, `double`, `long`, `object`,
 `string`.  It also accepts `*` to match all datatypes.
 
 For example, if we wanted to map all integer fields as `integer` instead of
-`long`, and all `string` fields as both `analyzed` and `not_analyzed`, we
+`long`, and all `string` fields as both `text` and `keyword`, we
 could use the following template:
 
 [source,js]
@@ -74,11 +74,10 @@ PUT my_index
           "strings": {
             "match_mapping_type": "string",
             "mapping": {
-              "type": "string",
+              "type": "text",
               "fields": {
                 "raw": {
-                  "type":  "string",
-                  "index": "not_analyzed",
+                  "type":  "keyword",
                   "ignore_above": 256
                 }
               }
@@ -99,7 +98,7 @@ PUT my_index/my_type/1
 --------------------------------------------------
 // AUTOSENSE
 <1> The `my_integer` field is mapped as an `integer`.
-<2> The `my_string` field is mapped as an analyzed `string`, with a `not_analyzed` <<multi-fields,multi field>>.
+<2> The `my_string` field is mapped as a `text`, with a `keyword` <<multi-fields,multi field>>.
 
 
 [[match-unmatch]]
@@ -180,7 +179,7 @@ PUT my_index
             "path_match":   "name.*",
             "path_unmatch": "*.middle",
             "mapping": {
-              "type":       "string",
+              "type":       "text",
               "copy_to":    "full_name"
             }
           }
@@ -221,7 +220,7 @@ PUT my_index
             "match_mapping_type": "string",
             "match": "*",
             "mapping": {
-              "type": "string",
+              "type": "text",
               "analyzer": "{name}"
             }
           }

+ 7 - 7
docs/reference/mapping/fields/all-field.asciidoc

@@ -45,7 +45,7 @@ from each field as a string. It does not combine the _terms_ from each field.
 
 =============================================================================
 
-The `_all` field is just a <<string,`string`>> field, and accepts the same
+The `_all` field is just a <<text,`text`>> field, and accepts the same
 parameters that  other string fields accept, including `analyzer`,
 `term_vectors`, `index_options`, and `store`.
 
@@ -136,7 +136,7 @@ PUT my_index
       },
       "properties": {
         "content": {
-          "type": "string"
+          "type": "text"
         }
       }
     }
@@ -172,11 +172,11 @@ PUT myindex
     "mytype": {
       "properties": {
         "title": { <1>
-          "type": "string",
+          "type": "text",
           "boost": 2
         },
         "content": { <1>
-          "type": "string"
+          "type": "text"
         }
       }
     }
@@ -210,15 +210,15 @@ PUT myindex
     "mytype": {
       "properties": {
         "first_name": {
-          "type":    "string",
+          "type":    "text",
           "copy_to": "full_name" <1>
         },
         "last_name": {
-          "type":    "string",
+          "type":    "text",
           "copy_to": "full_name" <1>
         },
         "full_name": {
-          "type":    "string"
+          "type":    "text"
         }
       }
     }

+ 2 - 4
docs/reference/mapping/fields/parent-field.asciidoc

@@ -127,7 +127,7 @@ global ordinals for the `_parent` field.
 Global ordinals, by default, are built lazily: the first parent-child query or
 aggregation after a refresh will trigger building of global ordinals. This can
 introduce a significant latency spike for your users. You can use
-<<fielddata-loading,eager_global_ordinals>> to shift the cost of building global
+<<global-ordinals,eager_global_ordinals>> to shift the cost of building global
 ordinals from query time to refresh time, by mapping the `_parent` field as follows:
 
 [source,js]
@@ -139,9 +139,7 @@ PUT my_index
     "my_child": {
       "_parent": {
         "type": "my_parent",
-        "fielddata": {
-          "loading": "eager_global_ordinals"
-        }
+        "eager_global_ordinals": true
       }
     }
   }

+ 3 - 3
docs/reference/mapping/params/analyzer.asciidoc

@@ -47,10 +47,10 @@ PUT my_index
     "my_type": {
       "properties": {
         "text": { <1>
-          "type": "string",
+          "type": "text",
           "fields": {
             "english": { <2>
-              "type":     "string",
+              "type":     "text",
               "analyzer": "english"
             }
           }
@@ -124,7 +124,7 @@ PUT /my_index
       "my_type":{
          "properties":{
             "title": {
-               "type":"string",
+               "type":"text",
                "analyzer":"my_analyzer", <3>
                "search_analyzer":"my_stop_analyzer", <4>
                "search_quote_analyzer":"my_analyzer" <5>

+ 3 - 3
docs/reference/mapping/params/boost.asciidoc

@@ -12,11 +12,11 @@ PUT my_index
     "my_type": {
       "properties": {
         "title": {
-          "type": "string",
+          "type": "text",
           "boost": 2 <1>
         },
         "content": {
-          "type": "string"
+          "type": "text"
         }
       }
     }
@@ -83,4 +83,4 @@ We advise against using index time boosting for the following reasons:
   byte.  This reduces the resolution of the field length normalization factor
   which can lead to lower quality relevance calculations.
 
-==================================================
+==================================================

+ 3 - 3
docs/reference/mapping/params/copy-to.asciidoc

@@ -15,15 +15,15 @@ PUT /my_index
     "my_type": {
       "properties": {
         "first_name": {
-          "type": "string",
+          "type": "text",
           "copy_to": "full_name" <1>
         },
         "last_name": {
-          "type": "string",
+          "type": "text",
           "copy_to": "full_name" <1>
         },
         "full_name": {
-          "type": "string"
+          "type": "text"
         }
       }
     }

+ 2 - 4
docs/reference/mapping/params/doc-values.asciidoc

@@ -29,12 +29,10 @@ PUT my_index
     "my_type": {
       "properties": {
         "status_code": { <1>
-          "type":       "string",
-          "index":      "not_analyzed"
+          "type":       "keyword"
         },
         "session_id": { <2>
-          "type":       "string",
-          "index":      "not_analyzed",
+          "type":       "keyword",
           "doc_values": false
         }
       }

+ 1 - 1
docs/reference/mapping/params/dynamic.asciidoc

@@ -67,7 +67,7 @@ PUT my_index
         "user": { <2>
           "properties": {
             "name": {
-              "type": "string"
+              "type": "text"
             },
             "social_networks": { <3>
               "dynamic": true,

+ 1 - 2
docs/reference/mapping/params/enabled.asciidoc

@@ -21,8 +21,7 @@ PUT my_index
     "session": {
       "properties": {
         "user_id": {
-          "type":  "string",
-          "index": "not_analyzed"
+          "type":  "keyword"
         },
         "last_updated": {
           "type": "date"

+ 11 - 122
docs/reference/mapping/params/fielddata.asciidoc

@@ -12,28 +12,28 @@ documents, we need to be able to look up the document and find the terms that
 it has in a field.
 
 Most fields can use index-time, on-disk <<doc-values,`doc_values`>> to support
-this type of data access pattern, but `analyzed` string fields do not support
-`doc_values`.
+this type of data access pattern, but `text` fields do not support `doc_values`.
 
-Instead, `analyzed` strings use a query-time data structure called
+Instead, `text` strings use a query-time data structure called
 `fielddata`.  This data structure is built on demand the first time that a
 field is used for aggregations, sorting, or is accessed in a script.  It is built
 by reading the entire inverted index for each segment from disk, inverting the
 term ↔︎ document relationship, and storing the result in memory, in the
 JVM heap.
 
-Loading fielddata is an expensive process so, once it has been loaded, it
-remains in memory for the lifetime of the segment.
+Loading fielddata is an expensive process so it is disabled by default. Also,
+when enabled, once it has been loaded, it remains in memory for the lifetime of
+the segment.
 
 [WARNING]
 .Fielddata can fill up your heap space
 ==============================================================================
 Fielddata can consume a lot of heap space, especially when loading high
-cardinality `analyzed` string fields.  Most of the time, it doesn't make sense
-to sort or aggregate on `analyzed` string fields (with the notable exception
+cardinality `text` fields.  Most of the time, it doesn't make sense
+to sort or aggregate on `text` fields (with the notable exception
 of the
 <<search-aggregations-bucket-significantterms-aggregation,`significant_terms`>>
-aggregation).  Always think about whether a `not_analyzed` field (which can
+aggregation).  Always think about whether a <<keyword,`keyword`>> field (which can
 use `doc_values`) would be  a better fit for your use case.
 ==============================================================================
 
@@ -42,71 +42,6 @@ same name in the same index.  Its value can be updated on existing fields
 using the <<indices-put-mapping,PUT mapping API>>.
 
 
-[[fielddata-format]]
-==== `fielddata.format`
-
-For `analyzed` string fields, the fielddata `format` controls whether
-fielddata should be enabled or not.  It accepts: `disabled` and `paged_bytes`
-(enabled, which is the default).  To disable fielddata loading, you can use
-the following mapping:
-
-[source,js]
---------------------------------------------------
-PUT my_index
-{
-  "mappings": {
-    "my_type": {
-      "properties": {
-        "text": {
-          "type": "string",
-          "fielddata": {
-            "format": "disabled" <1>
-          }
-        }
-      }
-    }
-  }
-}
---------------------------------------------------
-// AUTOSENSE
-<1> The `text` field cannot be used for sorting, aggregations, or in scripts.
-
-.Fielddata and other datatypes
-[NOTE]
-==================================================
-
-Historically, other field datatypes also used fielddata, but this has been replaced
-by index-time, disk-based <<doc-values,`doc_values`>>.
-
-==================================================
-
-
-[[fielddata-loading]]
-==== `fielddata.loading`
-
-This per-field setting controls when fielddata is loaded into memory. It
-accepts three options:
-
-[horizontal]
-`lazy`::
-
-    Fielddata is only loaded into memory when it is needed. (default)
-
-`eager`::
-
-    Fielddata is loaded into memory before a new search segment becomes
-    visible to search.  This can reduce the latency that a user may experience
-    if their search request has to trigger lazy loading from a big segment.
-
-`eager_global_ordinals`::
-
-    Loading fielddata into memory is only part of the work that is required.
-    After loading the fielddata for each segment, Elasticsearch builds the
-    <<global-ordinals>> data structure to make a list of all unique terms
-    across all the segments in a shard.  By default, global ordinals are built
-    lazily.  If the field has a very high cardinality, global ordinals may
-    take some time to build, in which case you can use eager loading instead.
-
 [[global-ordinals]]
 .Global ordinals
 *****************************************
@@ -141,15 +76,10 @@ can move the loading time from the first search request, to the refresh itself.
 *****************************************
 
 [[field-data-filtering]]
-==== `fielddata.filter`
+==== `fielddata_frequency_filter`
 
 Fielddata filtering can be used to reduce the number of terms loaded into
-memory, and thus reduce memory usage. Terms can be filtered by _frequency_ or
-by _regular expression_, or a combination of the two:
-
-Filtering by frequency::
-+
---
+memory, and thus reduce memory usage. Terms can be filtered by _frequency_:
 
 The frequency filter allows you to only load terms whose term frequency falls
 between a `min` and `max` value, which can be expressed an absolute
@@ -169,7 +99,7 @@ PUT my_index
     "my_type": {
       "properties": {
         "tag": {
-          "type": "string",
+          "type": "text",
           "fielddata": {
             "filter": {
               "frequency": {
@@ -186,44 +116,3 @@ PUT my_index
 }
 --------------------------------------------------
 // AUTOSENSE
---
-
-Filtering by regex::
-+
---
-Terms can also be filtered by regular expression - only values which
-match the regular expression are loaded. Note: the regular expression is
-applied to each term in the field, not to the whole field value. For
-instance, to only load hashtags from a tweet, we can use a regular
-expression which matches terms beginning with `#`:
-
-[source,js]
---------------------------------------------------
-PUT my_index
-{
-  "mappings": {
-    "my_type": {
-      "properties": {
-        "tweet": {
-          "type": "string",
-          "analyzer": "whitespace",
-          "fielddata": {
-            "filter": {
-              "regex": {
-                "pattern": "^#.*"
-              }
-            }
-          }
-        }
-      }
-    }
-  }
-}
---------------------------------------------------
-// AUTOSENSE
---
-
-These filters can be updated on an existing field mapping and will take
-effect the next time the fielddata for a segment is loaded. Use the
-<<indices-clearcache,Clear Cache>> API
-to reload the fielddata using the new filters.

+ 2 - 8
docs/reference/mapping/params/ignore-above.asciidoc

@@ -1,12 +1,7 @@
 [[ignore-above]]
 === `ignore_above`
 
-Strings longer than the `ignore_above` setting will not be processed by the
-<<analyzer,analyzer>> and will not be indexed. This is mainly useful for
-<<mapping-index,`not_analyzed`>> string fields, which are typically used for
-filtering, aggregations, and sorting.  These are structured fields and it
-doesn't usually make sense to allow very long terms to be indexed in these
-fields.
+Strings longer than the `ignore_above` setting will not be indexed or stored.
 
 [source,js]
 --------------------------------------------------
@@ -16,8 +11,7 @@ PUT my_index
     "my_type": {
       "properties": {
         "message": {
-          "type": "string",
-          "index": "not_analyzed",
+          "type": "keyword",
           "ignore_above": 20 <1>
         }
       }

+ 7 - 7
docs/reference/mapping/params/include-in-all.asciidoc

@@ -14,10 +14,10 @@ PUT my_index
     "my_type": {
       "properties": {
         "title": { <1>
-          "type": "string"
+          "type": "text"
         }
         "content": { <1>
-          "type": "string"
+          "type": "text"
         },
         "date": { <2>
           "type": "date",
@@ -50,18 +50,18 @@ PUT my_index
     "my_type": {
       "include_in_all": false, <1>
       "properties": {
-        "title":          { "type": "string" },
+        "title":          { "type": "text" },
         "author": {
           "include_in_all": true, <2>
           "properties": {
-            "first_name": { "type": "string" },
-            "last_name":  { "type": "string" }
+            "first_name": { "type": "text" },
+            "last_name":  { "type": "text" }
           }
         },
         "editor": {
           "properties": {
-            "first_name": { "type": "string" }, <3>
-            "last_name":  { "type": "string", "include_in_all": true } <3>
+            "first_name": { "type": "text" }, <3>
+            "last_name":  { "type": "text", "include_in_all": true } <3>
           }
         }
       }

+ 1 - 1
docs/reference/mapping/params/index-options.asciidoc

@@ -39,7 +39,7 @@ PUT my_index
     "my_type": {
       "properties": {
         "text": {
-          "type": "string",
+          "type": "text",
           "index_options": "offsets"
         }
       }

+ 2 - 44
docs/reference/mapping/params/index.asciidoc

@@ -1,48 +1,6 @@
 [[mapping-index]]
 === `index`
 
-The `index` option controls how field values are indexed and, thus, how they
-are searchable.  It accepts three values:
+The `index` option controls whether field values are indexed. It accepts `true`
+or `false`. Fields that are not indexed are not queryable.
 
-[horizontal]
-`no`::
-
-    Do not add this field value to the index. With this setting, the field
-    will not be queryable.
-
-`not_analyzed`::
-
-    Add the field value to the index unchanged, as a single term.  This is the
-    default for all fields that support this option except for
-    <<string,`string`>> fields.  `not_analyzed` fields are usually used with
-    <<term-level-queries,term-level queries>> for structured search.
-
-`analyzed`::
-
-    This option applies only to `string` fields, for which it is the default.
-    The string field value is first <<analysis,analyzed>> to convert the
-    string into terms (e.g. a list of individual words), which are then
-    indexed.  At search time, the query string is passed through
-    (<<search-analyzer,usually>>) the same analyzer to generate terms
-    in the same format as those in the index.  It is this process that enables
-    <<full-text-queries,full text search>>.
-
-For example, you can create a `not_analyzed` string field with the following:
-
-[source,js]
---------------------------------------------------
-PUT /my_index
-{
-  "mappings": {
-    "my_type": {
-      "properties": {
-        "status_code": {
-          "type": "string",
-          "index": "not_analyzed"
-        }
-      }
-    }
-  }
-}
---------------------------------------------------
-// AUTOSENSE

+ 8 - 9
docs/reference/mapping/params/multi-fields.asciidoc

@@ -3,8 +3,8 @@
 
 It is often useful to index the same field in different ways for different
 purposes.  This is the purpose of _multi-fields_. For instance, a `string`
-field could be <<mapping-index,indexed>> as an `analyzed` field for full-text
-search, and as a `not_analyzed` field for sorting or aggregations:
+field could be mapped as a `text` field for full-text
+search, and as a `keyword` field for sorting or aggregations:
 
 [source,js]
 --------------------------------------------------
@@ -14,11 +14,10 @@ PUT /my_index
     "my_type": {
       "properties": {
         "city": {
-          "type": "string",
+          "type": "text",
           "fields": {
             "raw": { <1>
-              "type":  "string",
-              "index": "not_analyzed"
+              "type":  "keyword"
             }
           }
         }
@@ -57,8 +56,8 @@ GET /my_index/_search
 }
 --------------------------------------------------
 // AUTOSENSE
-<1> The `city.raw` field is a `not_analyzed` version of the `city` field.
-<2> The analyzed `city` field can be used for full text search.
+<1> The `city.raw` field is a `keyword` version of the `city` field.
+<2> The `city` field can be used for full text search.
 <3> The `city.raw` field can be used for sorting and aggregations
 
 NOTE: Multi-fields do not change the original `_source` field.
@@ -83,10 +82,10 @@ PUT my_index
     "my_type": {
       "properties": {
         "text": { <1>
-          "type": "string",
+          "type": "text",
           "fields": {
             "english": { <2>
-              "type":     "string",
+              "type":     "text",
               "analyzer": "english"
             }
           }

+ 4 - 34
docs/reference/mapping/params/norms.asciidoc

@@ -4,14 +4,14 @@
 Norms store various normalization factors that are later used at query time
 in order to compute the score of a document relatively to a query.
 
-Although useful for scoring, norms also require quite a lot of memory
+Although useful for scoring, norms also require quite a lot of disk
 (typically in the order of one byte per document per field in your index, even
 for documents that don't have this specific field). As a consequence, if you
 don't need scoring on a specific field, you should disable norms on that
 field. In  particular, this is the case for fields that are used solely for
 filtering or aggregations.
 
-TIP: The `norms.enabled` setting must have the same setting for fields of the
+TIP: The `norms` setting must have the same setting for fields of the
 same name in the same index.  Norms can be disabled on existing fields using
 the <<indices-put-mapping,PUT mapping API>>.
 
@@ -24,10 +24,8 @@ PUT my_index/_mapping/my_type
 {
   "properties": {
     "title": {
-      "type": "string",
-      "norms": {
-        "enabled": false
-      }
+      "type": "text",
+      "norms": false
     }
   }
 }
@@ -41,31 +39,3 @@ results since some documents won't have norms anymore while other documents
 might still have norms.
 
 
-==== Lazy loading of norms
-
-Norms can be loaded into memory eagerly (`eager`), whenever a new segment
-comes online, or they can loaded lazily (`lazy`, default), only when the field
-is queried.
-
-Eager loading can be configured as follows:
-
-[source,js]
-------------
-PUT my_index/_mapping/my_type
-{
-  "properties": {
-    "title": {
-      "type": "string",
-      "norms": {
-        "loading": "eager"
-      }
-    }
-  }
-}
-------------
-// AUTOSENSE
-
-TIP: The `norms.loading` setting must have the same setting for fields of the
-same name in the same index.  Its value can be updated on existing fields
-using the <<indices-put-mapping,PUT mapping API>>.
-

+ 2 - 5
docs/reference/mapping/params/null-value.asciidoc

@@ -16,8 +16,7 @@ PUT my_index
     "my_type": {
       "properties": {
         "status_code": {
-          "type":       "string",
-          "index":      "not_analyzed",
+          "type":       "keyword",
           "null_value": "NULL" <1>
         }
       }
@@ -50,6 +49,4 @@ GET my_index/_search
 <3> A query for `NULL` returns document 1, but not document 2.
 
 IMPORTANT: The `null_value` needs to be the same datatype as the field.  For
-instance, a `long` field cannot have a string `null_value`.  String fields
-which are `analyzed` will also pass the `null_value` through the configured
-analyzer.
+instance, a `long` field cannot have a string `null_value`.

+ 1 - 1
docs/reference/mapping/params/position-increment-gap.asciidoc

@@ -57,7 +57,7 @@ PUT my_index
     "groups": {
       "properties": {
         "names": {
-          "type": "string",
+          "type": "text",
           "position_increment_gap": 0 <1>
         }
       }

+ 2 - 2
docs/reference/mapping/params/properties.asciidoc

@@ -23,14 +23,14 @@ PUT my_index
         "manager": { <2>
           "properties": {
             "age":  { "type": "integer" },
-            "name": { "type": "string"  }
+            "name": { "type": "text"  }
           }
         },
         "employees": { <3>
           "type": "nested",
           "properties": {
             "age":  { "type": "integer" },
-            "name": { "type": "string"  }
+            "name": { "type": "text"  }
           }
         }
       }

+ 1 - 1
docs/reference/mapping/params/search-analyzer.asciidoc

@@ -41,7 +41,7 @@ PUT /my_index
     "my_type": {
       "properties": {
         "text": {
-          "type": "string",
+          "type": "text",
           "analyzer": "autocomplete", <2>
           "search_analyzer": "standard" <2>
         }

+ 4 - 4
docs/reference/mapping/params/similarity.asciidoc

@@ -5,8 +5,8 @@ Elasticsearch allows you to configure a scoring algorithm or _similarity_ per
 field. The `similarity` setting provides a simple way of choosing a similarity
 algorithm other than the default TF/IDF, such as `BM25`.
 
-Similarities are mostly useful for <<string,`string`>> fields, especially
-`analyzed` string fields, but can also apply to other field types.
+Similarities are mostly useful for <<text,`text`>> fields, but can also apply
+to other field types.
 
 Custom similarities can be configured by tuning the parameters of the built-in
 similarities. For more details about this expert options, see the
@@ -37,10 +37,10 @@ PUT my_index
     "my_type": {
       "properties": {
         "default_field": { <1>
-          "type": "string"
+          "type": "text"
         },
         "bm25_field": {
-          "type": "string",
+          "type": "text",
           "similarity": "BM25" <2>
         }
       }

+ 2 - 2
docs/reference/mapping/params/store.asciidoc

@@ -24,7 +24,7 @@ PUT /my_index
     "my_type": {
       "properties": {
         "title": {
-          "type": "string",
+          "type": "text",
           "store": true <1>
         },
         "date": {
@@ -32,7 +32,7 @@ PUT /my_index
           "store": true <1>
         },
         "content": {
-          "type": "string"
+          "type": "text"
         }
       }
     }

+ 1 - 1
docs/reference/mapping/params/term-vector.asciidoc

@@ -35,7 +35,7 @@ PUT my_index
     "my_type": {
       "properties": {
         "text": {
-          "type":        "string",
+          "type":        "text",
           "term_vector": "with_positions_offsets"
         }
       }

+ 8 - 4
docs/reference/mapping/types.asciidoc

@@ -7,7 +7,7 @@ document:
 [float]
 === Core datatypes
 
-<<string>>::    `string`
+string::        <<text,`text`>> and <<keyword,`keyword`>>
 <<number>>::    `long`, `integer`, `short`, `byte`, `double`, `float`
 <<date>>::      `date`
 <<boolean>>::   `boolean`
@@ -45,9 +45,9 @@ Attachment datatype::
 === Multi-fields
 
 It is often useful to index the same field in different ways for different
-purposes. For instance, a `string` field could be <<mapping-index,indexed>> as
-an `analyzed` field for full-text search, and as a `not_analyzed` field for
-sorting or aggregations.  Alternatively, you could index a string field with
+purposes. For instance, a `string` field could be mapped as
+a `text` field for full-text search, and as a `keyword` field for
+sorting or aggregations.  Alternatively, you could index a text field with
 the <<analysis-standard-analyzer,`standard` analyzer>>, the
 <<english-analyzer,`english`>> analyzer, and the
 <<french-analyzer,`french` analyzer>>.
@@ -69,6 +69,8 @@ include::types/geo-shape.asciidoc[]
 
 include::types/ip.asciidoc[]
 
+include::types/keyword.asciidoc[]
+
 include::types/nested.asciidoc[]
 
 include::types/numeric.asciidoc[]
@@ -77,6 +79,8 @@ include::types/object.asciidoc[]
 
 include::types/string.asciidoc[]
 
+include::types/text.asciidoc[]
+
 include::types/token-count.asciidoc[]
 
 

+ 1 - 1
docs/reference/mapping/types/binary.asciidoc

@@ -13,7 +13,7 @@ PUT my_index
     "my_type": {
       "properties": {
         "name": {
-          "type": "string"
+          "type": "text"
         },
         "blob": {
           "type": "binary"

+ 111 - 0
docs/reference/mapping/types/keyword.asciidoc

@@ -0,0 +1,111 @@
+[[keyword]]
+=== Keyword datatype
+
+A field to index structured content such as email addresses, hostnames, status
+codes, zip codes or tags.
+
+They are typically used for filtering (_Find me all blog posts where
+++status++ is ++published++_), for sorting, and for aggregations. Keyword
+fields are ony searchable by their exact value.
+
+If you need to index full text content such as email bodies or product
+descriptions, it is likely that you should rather use a <<text,`text`>> field.
+
+Below is an example of a mapping for a keyword field:
+
+[source,js]
+--------------------------------
+PUT my_index
+{
+  "mappings": {
+    "my_type": {
+      "properties": {
+        "tags": {
+          "type":  "keyword"
+        }
+      }
+    }
+  }
+}
+--------------------------------
+// AUTOSENSE
+
+[[keyword-params]]
+==== Parameters for keyword fields
+
+The following parameters are accepted by `string` fields:
+
+[horizontal]
+
+<<mapping-boost,`boost`>>::
+
+    Mapping field-level query time boosting. Accepts a floating point number, defaults
+    to `1.0`.
+
+<<doc-values,`doc_values`>>::
+
+    Should the field be stored on disk in a column-stride fashion, so that it
+    can later be used for sorting, aggregations, or scripting? Accepts `true`
+    (default) or `false`.
+
+<<global-ordinals,`eager_global_ordinals`>>::
+
+    Should global ordinals be loaded eagerly on refresh? Accepts `true` or `false`
+    (default). Enabling this is a good idea on fields that are frequently used for
+    terms aggregations.
+
+<<multi-fields,`fields`>>::
+
+    Multi-fields allow the same string value to be indexed in multiple ways for
+    different purposes, such as one field for search and a multi-field for
+    sorting and aggregations.
+
+<<ignore-above,`ignore_above`>>::
+
+    Do not index or analyze any string longer than this value.  Defaults to
+    `2147483647` so that all values would be accepted.
+
+<<include-in-all,`include_in_all`>>::
+
+    Whether or not the field value should be included in the
+    <<mapping-all-field,`_all`>> field? Accepts `true` or `false`.  Defaults
+    to `false` if <<mapping-index,`index`>> is set to `no`, or if a parent
+    <<object,`object`>> field sets `include_in_all` to `false`.
+    Otherwise defaults to `true`.
+
+<<mapping-index,`index`>>::
+
+    Should the field be searchable? Accepts `true` (default) or `false`.
+
+<<index-options,`index_options`>>::
+
+    What information should be stored in the index, for scoring purposes.
+    Defaults to `docs` but can also be set to `freqs` to take term frequency into account
+    when computing scores.
+
+<<norms,`norms`>>::
+
+    Whether field-length should be taken into account when scoring queries.
+    Accepts `true` or `false` (default).
+
+<<null-value,`null_value`>>::
+
+    Accepts a string value which is substituted for any explicit `null`
+    values.  Defaults to `null`, which means the field is treated as missing.
+
+<<mapping-store,`store`>>::
+
+    Whether the field value should be stored and retrievable separately from
+    the <<mapping-source-field,`_source`>> field. Accepts `true` or `false`
+    (default).
+
+<<search-analyzer,`search_analyzer`>>::
+
+    The <<analyzer,`analyzer`>> that should be used at search time on
+    <<mapping-index,`analyzed`>> fields. Defaults to the `analyzer` setting.
+
+<<similarity,`similarity`>>::
+
+    Which scoring algorithm or _similarity_ should be used. Defaults
+    to `classic`, which uses TF/IDF.
+

+ 3 - 4
docs/reference/mapping/types/object.asciidoc

@@ -46,16 +46,15 @@ PUT my_index
     "my_type": { <1>
       "properties": {
         "region": {
-          "type": "string",
-          "index": "not_analyzed"
+          "type": "keyword"
         },
         "manager": { <2>
           "properties": {
             "age":  { "type": "integer" },
             "name": { <3>
               "properties": {
-                "first": { "type": "string" },
-                "last":  { "type": "string" }
+                "first": { "type": "text" },
+                "last":  { "type": "text" }
               }
             }
           }

+ 1 - 176
docs/reference/mapping/types/string.asciidoc

@@ -1,179 +1,4 @@
 [[string]]
 === String datatype
 
-Fields of type `string` accept text values.  Strings may be sub-divided into:
-
-Full text::
-+
---
-
-Full text values, like the body of an email, are typically used for text based
-relevance searches, such as: _Find the most relevant documents that match a
-query for "quick brown fox"_.
-
-These fields are `analyzed`, that is they are passed through an
-<<analysis,analyzer>> to convert the string into a list of individual terms
-before being indexed. The analysis process allows Elasticsearch to search for
-individual words _within_  each full text field.  Full text fields are not
-used for sorting and seldom used for aggregations (although the
-<<search-aggregations-bucket-significantterms-aggregation,significant terms aggregation>> is a notable exception).
-
---
-
-Keywords::
-
-Keywords are exact values like email addresses, hostnames, status codes, or
-tags.  They are typically used for filtering (_Find me all blog posts where
-++status++ is ++published++_), for sorting, and for aggregations. Keyword
-fields are `not_analyzed`.  Instead, the exact string value is added to the
-index as a single term.
-
-Below is an example of a mapping for a full text (`analyzed`) and a keyword
-(`not_analyzed`) string field:
-
-[source,js]
---------------------------------
-PUT my_index
-{
-  "mappings": {
-    "my_type": {
-      "properties": {
-        "full_name": { <1>
-          "type":  "string"
-        },
-        "status": {
-          "type":  "string", <2>
-          "index": "not_analyzed"
-        }
-      }
-    }
-  }
-}
---------------------------------
-// AUTOSENSE
-<1> The `full_name` field is an `analyzed` full text field -- `index:analyzed` is the default.
-<2> The `status` field is a `not_analyzed` keyword field.
-
-Sometimes it is useful to have both a full text (`analyzed`) and a keyword
-(`not_analyzed`) version of the same field: one for full text search and the
-other for aggregations and sorting. This can be achieved with
-<<multi-fields,multi-fields>>.
-
-
-[[string-params]]
-==== Parameters for string fields
-
-The following parameters are accepted by `string` fields:
-
-[horizontal]
-
-<<analyzer,`analyzer`>>::
-
-    The <<analysis,analyzer>> which should be used for
-    <<mapping-index,`analyzed`>> string fields, both at index-time and at
-    search-time (unless overridden by the  <<search-analyzer,`search_analyzer`>>).
-    Defaults to the default index analyzer, or the
-    <<analysis-standard-analyzer,`standard` analyzer>>.
-
-<<mapping-boost,`boost`>>::
-
-    Mapping field-level query time boosting. Accepts a floating point number, defaults
-    to `1.0`.
-
-<<doc-values,`doc_values`>>::
-
-    Should the field be stored on disk in a column-stride fashion, so that it
-    can later be used for sorting, aggregations, or scripting? Accepts `true`
-    or `false`. Defaults to `true` for `not_analyzed` fields. Analyzed fields
-    do not support doc values.
-
-<<fielddata,`fielddata`>>::
-
-    Can the field use in-memory fielddata for sorting, aggregations,
-    or scripting? Accepts `disabled` or `paged_bytes` (default).
-    Not analyzed fields will use <<doc-values,doc values>> in preference
-    to fielddata.
-
-<<multi-fields,`fields`>>::
-
-    Multi-fields allow the same string value to be indexed in multiple ways for
-    different purposes, such as one field for search and a multi-field for
-    sorting and aggregations, or the same string value analyzed by different
-    analyzers.
-
-<<ignore-above,`ignore_above`>>::
-
-    Do not index or analyze any string longer than this value.  Defaults to `0` (disabled).
-
-<<include-in-all,`include_in_all`>>::
-
-    Whether or not the field value should be included in the
-    <<mapping-all-field,`_all`>> field? Accepts `true` or `false`.  Defaults
-    to `false` if <<mapping-index,`index`>> is set to `no`, or if a parent
-    <<object,`object`>> field sets `include_in_all` to `false`.
-    Otherwise defaults to `true`.
-
-<<mapping-index,`index`>>::
-
-    Should the field be searchable? Accepts `analyzed` (default, treat as full-text field),
-    `not_analyzed` (treat as keyword field) and `no`.
-
-<<index-options,`index_options`>>::
-
-    What information should be stored in the index, for search and highlighting purposes.
-    Defaults to `positions` for <<mapping-index,`analyzed`>> fields, and to `docs` for
-    `not_analyzed` fields.
-
-
-<<norms,`norms`>>::
-+
---
-
-Whether field-length should be taken into account when scoring queries.
-Defaults depend on the <<mapping-index,`index`>> setting:
-
-* `analyzed` fields default to `{ "enabled": true, "loading": "lazy" }`.
-* `not_analyzed` fields default to `{ "enabled": false }`.
---
-
-<<null-value,`null_value`>>::
-
-    Accepts a string value which is substituted for any explicit `null`
-    values.  Defaults to `null`, which means the field is treated as missing.
-    If the field is `analyzed`, the `null_value` will also be analyzed.
-
-<<position-increment-gap,`position_increment_gap`>>::
-
-    The number of fake term positions which should be inserted between
-    each element of an array of strings. Defaults to 0.
-    The number of fake term position which should be inserted between each
-    element of an array of strings. Defaults to the position_increment_gap
-    configured on the analyzer which defaults to 100. 100 was chosen because it
-    prevents phrase queries with reasonably large slops (less than 100) from
-    matching terms across field values.
-
-<<mapping-store,`store`>>::
-
-    Whether the field value should be stored and retrievable separately from
-    the <<mapping-source-field,`_source`>> field. Accepts `true` or `false`
-    (default).
-
-<<search-analyzer,`search_analyzer`>>::
-
-    The <<analyzer,`analyzer`>> that should be used at search time on
-    <<mapping-index,`analyzed`>> fields. Defaults to the `analyzer` setting.
-	
-<<search-quote-analyzer,`search_quote_analyzer`>>::
-
-    The <<analyzer,`analyzer`>> that should be used at search time when a
-    phrase is encountered. Defaults to the `search_analyzer` setting.
-
-<<similarity,`similarity`>>::
-
-    Which scoring algorithm or _similarity_ should be used. Defaults
-    to `classic`, which uses TF/IDF.
-
-<<term-vector,`term_vector`>>::
-
-    Whether term vectors should be stored for an <<mapping-index,`analyzed`>>
-    field. Defaults to `no`.
+NOTE: The `string` field has been removed in favor of the `text` and `keyword` fields.

+ 139 - 0
docs/reference/mapping/types/text.asciidoc

@@ -0,0 +1,139 @@
+[[text]]
+=== Text datatype
+
+A field to index full-text values, such as the body of on email or the
+description of a product. These fields are `analyzed`, that is they are passed through an
+<<analysis,analyzer>> to convert the string into a list of individual terms
+before being indexed. The analysis process allows Elasticsearch to search for
+individual words _within_  each full text field.  Text fields are not
+used for sorting and seldom used for aggregations (although the
+<<search-aggregations-bucket-significantterms-aggregation,significant terms aggregation>> 
+is a notable exception).
+
+If you need to index structured content such as email addresses, hostnames, status
+codes, or tags, it is likely that you should rather use a <<keyword,`keyword`>> field.
+
+Below is an example of a mapping for a text field:
+
+[source,js]
+--------------------------------
+PUT my_index
+{
+  "mappings": {
+    "my_type": {
+      "properties": {
+        "full_name": {
+          "type":  "text"
+        }
+      }
+    }
+  }
+}
+--------------------------------
+// AUTOSENSE
+
+Sometimes it is useful to have both a full text (`text`) and a keyword
+(`keyword`) version of the same field: one for full text search and the
+other for aggregations and sorting. This can be achieved with
+<<multi-fields,multi-fields>>.
+
+[[text-params]]
+==== Parameters for text fields
+
+The following parameters are accepted by `text` fields:
+
+[horizontal]
+
+<<analyzer,`analyzer`>>::
+
+    The <<analysis,analyzer>> which should be used for
+    <<mapping-index,`analyzed`>> string fields, both at index-time and at
+    search-time (unless overridden by the  <<search-analyzer,`search_analyzer`>>).
+    Defaults to the default index analyzer, or the
+    <<analysis-standard-analyzer,`standard` analyzer>>.
+
+<<mapping-boost,`boost`>>::
+
+    Mapping field-level query time boosting. Accepts a floating point number, defaults
+    to `1.0`.
+
+<<global-ordinals,`eager_global_ordinals`>>::
+
+    Should global ordinals be loaded eagerly on refresh? Accepts `true` or `false`
+    (default). Enabling this is a good idea on fields that are frequently used for
+    (significant) terms aggregations.
+
+<<fielddata,`fielddata`>>::
+
+    Can the field use in-memory fielddata for sorting, aggregations,
+    or scripting? Accepts `true` or `false` (default).
+
+<<field-data-filtering,`fielddata_frequency_filter`>>::
+
+    Expert settings which allow to decide which values to load in memory when `fielddata`
+    is enabled. By default all values are loaded.
+
+<<multi-fields,`fields`>>::
+
+    Multi-fields allow the same string value to be indexed in multiple ways for
+    different purposes, such as one field for search and a multi-field for
+    sorting and aggregations, or the same string value analyzed by different
+    analyzers.
+
+<<include-in-all,`include_in_all`>>::
+
+    Whether or not the field value should be included in the
+    <<mapping-all-field,`_all`>> field? Accepts `true` or `false`.  Defaults
+    to `false` if <<mapping-index,`index`>> is set to `no`, or if a parent
+    <<object,`object`>> field sets `include_in_all` to `false`.
+    Otherwise defaults to `true`.
+
+<<mapping-index,`index`>>::
+
+    Should the field be searchable? Accepts `true` (default) or `false`.
+
+<<index-options,`index_options`>>::
+
+    What information should be stored in the index, for search and highlighting purposes.
+    Defaults to `positions`.
+
+<<norms,`norms`>>::
+
+    Whether field-length should be taken into account when scoring queries.
+    Accepts `true` (default) or `false`.
+
+<<position-increment-gap,`position_increment_gap`>>::
+
+    The number of fake term positions which should be inserted between
+    each element of an array of strings. Defaults to 0.
+    The number of fake term position which should be inserted between each
+    element of an array of strings. Defaults to the position_increment_gap
+    configured on the analyzer which defaults to 100. 100 was chosen because it
+    prevents phrase queries with reasonably large slops (less than 100) from
+    matching terms across field values.
+
+<<mapping-store,`store`>>::
+
+    Whether the field value should be stored and retrievable separately from
+    the <<mapping-source-field,`_source`>> field. Accepts `true` or `false`
+    (default).
+
+<<search-analyzer,`search_analyzer`>>::
+
+    The <<analyzer,`analyzer`>> that should be used at search time on
+    <<mapping-index,`analyzed`>> fields. Defaults to the `analyzer` setting.
+
+<<search-quote-analyzer,`search_quote_analyzer`>>::
+
+    The <<analyzer,`analyzer`>> that should be used at search time when a
+    phrase is encountered. Defaults to the `search_analyzer` setting.
+
+<<similarity,`similarity`>>::
+
+    Which scoring algorithm or _similarity_ should be used. Defaults
+    to `classic`, which uses TF/IDF.
+
+<<term-vector,`term_vector`>>::
+
+    Whether term vectors should be stored for an <<mapping-index,`analyzed`>>
+    field. Defaults to `no`.

+ 1 - 1
docs/reference/mapping/types/token-count.asciidoc

@@ -15,7 +15,7 @@ PUT my_index
     "my_type": {
       "properties": {
         "name": { <1>
-          "type": "string",
+          "type": "text",
           "fields": {
             "length": { <2>
               "type":     "token_count",

+ 30 - 0
docs/reference/migration/migrate_5_0/mapping.asciidoc

@@ -16,6 +16,26 @@ values.  For backwards compatibility purposes, during the 5.x series:
   with `string` fields are no longer possible with `text`/`keyword` fields
   such as enabling `term_vectors` on a not-analyzed `keyword` field.
 
+==== Default string mappings
+
+String mappings now have the following default mappings:
+
+[source,json]
+---------------
+{
+  "type": "text",
+  "fields": {
+    "keyword": {
+      "type": "keyword",
+      "ignore_above": 256
+    }
+  }
+}
+---------------
+
+This allows to perform full-text search on the original field name and to sort
+and run aggregations on the sub keyword field.
+
 ==== `index` property
 
 On all field datatypes (except for the deprecated `string` field), the `index`
@@ -35,12 +55,22 @@ now defaults to using `float` instead of `double`. The reasoning is that
 floats should be more than enough for most cases but would decrease storage
 requirements significantly.
 
+==== `norms`
+
+`norms` now take a boolean instead of an object. This boolean is the replacement
+for `norms.enabled`. There is no replacement for `norms.loading` since eager
+loading of norms is not useful anymore now that norms are disk-based.
+
 ==== `fielddata.format`
 
 Setting `fielddata.format: doc_values` in the mappings used to implicitly
 enable doc-values on a field. This no longer works: the only way to enable or
 disable doc-values is by using the `doc_values` property of mappings.
 
+==== `fielddata.frequency.regex`
+
+Regex filters are not supported anymore and will be dropped on upgrade.
+
 ==== Source-transform removed
 
 The source `transform` feature has been removed. Instead, use an ingest pipeline

+ 1 - 1
docs/reference/query-dsl/exists-query.asciidoc

@@ -47,7 +47,7 @@ instance, if the `user` field were mapped as follows:
 [source,js]
 --------------------------------------------------
   "user": {
-    "type": "string",
+    "type": "text",
     "null_value": "_null_"
   }
 --------------------------------------------------

+ 5 - 5
docs/reference/query-dsl/mlt-query.asciidoc

@@ -116,18 +116,18 @@ curl -s -XPUT 'http://localhost:9200/imdb/' -d '{
     "movies": {
       "properties": {
         "title": {
-          "type": "string",
+          "type": "text",
           "term_vector": "yes"
          },
          "description": {
-          "type": "string"
+          "type": "text"
         },
         "tags": {
-          "type": "string",
+          "type": "text",
           "fields" : {
             "raw": {
-              "type" : "string",
-              "index" : "not_analyzed",
+              "type" : "text",
+              "analyzer": "keyword",
               "term_vector" : "yes"
             }
           }

+ 9 - 10
docs/reference/query-dsl/term-query.asciidoc

@@ -49,13 +49,13 @@ GET /_search
 .Why doesn't the `term` query match my document?
 **************************************************
 
-String fields can be `analyzed` (treated as full text, like the body of an
-email), or `not_analyzed` (treated as exact values, like an email address or a
-zip code).  Exact values (like numbers, dates, and `not_analyzed` strings) have
+String fields can be of type `text` (treated as full text, like the body of an
+email), or `keyword` (treated as exact values, like an email address or a
+zip code).  Exact values (like numbers, dates, and keywords) have
 the exact value specified in the field added to the inverted index in order
 to make them searchable.
 
-By default, however, `string` fields are `analyzed`. This means that their
+However, `text` fields are `analyzed`. This means that their
 values are first passed through an <<analysis,analyzer>> to produce a list of
 terms, which are then added to the inverted index.
 
@@ -70,7 +70,7 @@ within a big block of full text.
 
 The `term` query looks for the *exact* term in the field's inverted index --
 it doesn't know anything about the field's analyzer.  This makes it useful for
-looking up values in `not_analyzed` string fields, or in numeric or date
+looking up values in keyword fields, or in numeric or date
 fields.  When querying full text fields, use the
 <<query-dsl-match-query,`match` query>> instead, which understands how the field
 has been analyzed.
@@ -86,11 +86,10 @@ PUT my_index
     "my_type": {
       "properties": {
         "full_text": {
-          "type":  "string" <1>
+          "type":  "text" <1>
         },
         "exact_value": {
-          "type":  "string",
-          "index": "not_analyzed" <2>
+          "type":  "keyword" <2>
         }
       }
     }
@@ -105,8 +104,8 @@ PUT my_index/my_type/1
 --------------------------------------------------
 // AUTOSENSE
 
-<1> The `full_text` field is `analyzed` by default.
-<2> The `exact_value` field is set to be `not_analyzed`.
+<1> The `full_text` field is of type `text` and will be analyzed.
+<2> The `exact_value` field is of type `keyword` and will NOT be analyzed.
 <3> The `full_text` inverted index will contain the terms: [`quick`, `foxes`].
 <4> The `exact_value` inverted index will contain the exact term: [`Quick Foxes!`].