Browse Source

Consolify term and phrase suggester docs

This includes a working example of reverse filters to support
correcting prefix errors.
Nik Everett 9 years ago
parent
commit
3c0288ee98

+ 7 - 0
docs/build.gradle

@@ -63,6 +63,13 @@ buildRestTests.docs = fileTree(projectDir) {
 
 Closure setupTwitter = { String name, int count ->
   buildRestTests.setups[name] = '''
+  - do:
+      indices.create:
+        index: twitter
+        body:
+          settings:
+            number_of_shards: 1
+            number_of_replicas: 1
   - do:
       bulk:
         index: twitter

+ 5 - 5
docs/reference/indices/flush.asciidoc

@@ -153,18 +153,18 @@ sync-flushed:
 --------------------------------------------------
 {
    "_shards": {
-      "total": 10,
-      "successful": 10,
+      "total": 2,
+      "successful": 2,
       "failed": 0
    },
    "twitter": {
-      "total": 10,
-      "successful": 10,
+      "total": 2,
+      "successful": 2,
       "failed": 0
    }
 }
 --------------------------------------------------
-// TESTRESPONSE[s/"successful": 10/"successful": 5/]
+// TESTRESPONSE[s/"successful": 2/"successful": 1/]
 
 Here is what it looks like when one shard group failed due to pending operations:
 

+ 56 - 181
docs/reference/search/suggesters.asciidoc

@@ -10,15 +10,25 @@ The suggest request part is either defined alongside the query part in a
 
 [source,js]
 --------------------------------------------------
-curl -s -XPOST 'localhost:9200/_search' -d '{
+POST twitter/_search
+{
   "query" : {
-    ...
+    "match": {
+      "message": "tring out Elasticsearch"
+    }
   },
   "suggest" : {
-    ...
+    "my-suggestion" : {
+      "text" : "trying out Elasticsearch",
+      "term" : {
+        "field" : "message"
+      }
+    }
   }
-}' 
+}
 --------------------------------------------------
+// CONSOLE
+// TEST[setup:twitter]
 
 Suggest requests executed against the `_suggest` endpoint should omit
 the surrounding `suggest` element which is only used if the suggest
@@ -26,15 +36,18 @@ request is part of a search.
 
 [source,js]
 --------------------------------------------------
-curl -XPOST 'localhost:9200/_suggest' -d '{
+POST _suggest
+{
   "my-suggestion" : {
-    "text" : "the amsterdma meetpu",
+    "text" : "tring out Elasticsearch",
     "term" : {
-      "field" : "body"
+      "field" : "message"
     }
   }
-}'
+}
 --------------------------------------------------
+// CONSOLE
+// TEST[setup:twitter]
 
 Several suggestions can be specified per request. Each suggestion is
 identified with an arbitrary name. In the example below two suggestions
@@ -43,21 +56,24 @@ the `term` suggester, but have a different `text`.
 
 [source,js]
 --------------------------------------------------
-"suggest" : {
+POST _suggest
+{
   "my-suggest-1" : {
-    "text" : "the amsterdma meetpu",
+    "text" : "tring out Elasticsearch",
     "term" : {
-      "field" : "body"
+      "field" : "message"
     }
   },
   "my-suggest-2" : {
-    "text" : "the rottredam meetpu",
+    "text" : "kmichy",
     "term" : {
-      "field" : "title"
+      "field" : "user"
     }
   }
 }
 --------------------------------------------------
+// CONSOLE
+// TEST[setup:twitter]
 
 The below suggest response example includes the suggestion response for
 `my-suggest-1` and `my-suggest-2`. Each suggestion part contains
@@ -68,44 +84,35 @@ in the suggest text and if found an arbitrary number of options.
 [source,js]
 --------------------------------------------------
 {
-  ...
-  "suggest": {
-    "my-suggest-1": [
-      {
-        "text" : "amsterdma",
-        "offset": 4,
-        "length": 9,
-        "options": [
-           ...
-        ]
-      },     
-      ...       
-    ],
-    "my-suggest-2" : [
-      ... 
-    ]
-  }
-  ...
+  "_shards": ...
+  "my-suggest-1": [ {
+    "text": "tring",
+    "offset": 0,
+    "length": 5,
+    "options": [ {"text": "trying", "score": 0.8, "freq": 1 } ]
+  }, {
+    "text": "out",
+    "offset": 6,
+    "length": 3,
+    "options": []
+  }, {
+    "text": "elasticsearch",
+    "offset": 10,
+    "length": 13,
+    "options": []
+  } ],
+  "my-suggest-2": ...
 }
 --------------------------------------------------
+// TESTRESPONSE[s/"_shards": \.\.\./"_shards": "$body._shards",/]
+// TESTRESPONSE[s/"my-suggest-2": \.\.\./"my-suggest-2": "$body.my-suggest-2"/]
+
 
 Each options array contains an option object that includes the
 suggested text, its document frequency and score compared to the suggest
 entry text. The meaning of the score depends on the used suggester. The
 term suggester's score is based on the edit distance.
 
-[source,js]
---------------------------------------------------
-"options": [
-  {
-    "text": "amsterdam",
-    "freq": 77,
-    "score": 0.8888889
-  },
-  ...
-]  
---------------------------------------------------
-
 [float]
 [[global-suggest]]
 === Global suggest text
@@ -116,157 +123,27 @@ and applies to the `my-suggest-1` and `my-suggest-2` suggestions.
 
 [source,js]
 --------------------------------------------------
-"suggest" : {
-  "text" : "the amsterdma meetpu",
+POST _suggest
+{
+  "text" : "tring out Elasticsearch",
   "my-suggest-1" : {
     "term" : {
-      "field" : "title"
+      "field" : "message"
     }
   },
   "my-suggest-2" : {
     "term" : {
-      "field" : "body"
+      "field" : "user"
     }
   }
 }
 --------------------------------------------------
+// CONSOLE
 
 The suggest text can in the above example also be specified as
 suggestion specific option. The suggest text specified on suggestion
 level override the suggest text on the global level.
 
-[float]
-=== Other suggest example
-
-In the below example we request suggestions for the following suggest
-text: `devloping distibutd saerch engies` on the `title` field with a
-maximum of 3 suggestions per term inside the suggest text. Note that in
-this example we set `size` to `0`. This isn't required, but a
-nice optimization. The suggestions are gathered in the `query` phase and
-in the case that we only care about suggestions (so no hits) we don't
-need to execute the `fetch` phase.
-
-[source,js]
---------------------------------------------------
-curl -s -XPOST 'localhost:9200/_search' -d '{
-  "size": 0,
-  "suggest" : {
-    "my-title-suggestions-1" : {
-      "text" : "devloping distibutd saerch engies",
-      "term" : {
-        "size" : 3,
-        "field" : "title"  
-      }
-    }
-  }
-}'
---------------------------------------------------
-
-The above request could yield the response as stated in the code example
-below. As you can see if we take the first suggested options of each
-suggestion entry we get `developing distributed search engines` as
-result.
-
-[source,js]
---------------------------------------------------
-{
-  ...
-  "suggest": {
-    "my-title-suggestions-1": [
-      {
-        "text": "devloping",
-        "offset": 0,
-        "length": 9,
-        "options": [
-          {
-            "text": "developing",
-            "freq": 77,
-            "score": 0.8888889
-          },
-          {
-            "text": "deloping",
-            "freq": 1,
-            "score": 0.875
-          },
-          {
-            "text": "deploying",
-            "freq": 2,
-            "score": 0.7777778
-          }
-        ]
-      },
-      {
-        "text": "distibutd",
-        "offset": 10,
-        "length": 9,
-        "options": [
-          {
-            "text": "distributed",
-            "freq": 217,
-            "score": 0.7777778
-          },
-          {
-            "text": "disributed",
-            "freq": 1,
-            "score": 0.7777778
-          },
-          {
-            "text": "distribute",
-            "freq": 1,
-            "score": 0.7777778
-          }
-        ]
-      },
-      {
-        "text": "saerch",
-        "offset": 20,
-        "length": 6,
-        "options": [
-          {
-            "text": "search",
-            "freq": 1038,
-            "score": 0.8333333
-          },
-          {
-            "text": "smerch",
-            "freq": 3,
-            "score": 0.8333333
-          },
-          {
-            "text": "serch",
-            "freq": 2,
-            "score": 0.8
-          }
-        ]
-      },
-      {
-        "text": "engies",
-        "offset": 27,
-        "length": 6,
-        "options": [
-          {
-            "text": "engines",
-            "freq": 568,
-            "score": 0.8333333
-          },
-          {
-            "text": "engles",
-            "freq": 3,
-            "score": 0.8333333
-          },
-          {
-            "text": "eggies",
-            "freq": 1,
-            "score": 0.8333333
-          }
-        ]
-      }
-    ]
-  }
-  ...
-}
---------------------------------------------------
-
 include::suggesters/term-suggest.asciidoc[]
 
 include::suggesters/phrase-suggest.asciidoc[]
@@ -274,5 +151,3 @@ include::suggesters/phrase-suggest.asciidoc[]
 include::suggesters/completion-suggest.asciidoc[]
 
 include::suggesters/context-suggest.asciidoc[]
-
-

+ 137 - 99
docs/reference/search/suggesters/phrase-suggest.asciidoc

@@ -17,36 +17,94 @@ co-occurrence and frequencies.
 
 ==== API Example
 
-The `phrase` request is defined along side the query part in the json
-request:
+In general the `phrase` suggester requires special mapping up front to work.
+The `phrase` suggester examples on this page need the following mapping to
+work. The `reverse` analyzer is used only in the last example.
 
 [source,js]
 --------------------------------------------------
-curl -XPOST 'localhost:9200/_search' -d '{
-  "suggest" : {
-    "text" : "Xor the Got-Jewel",
-    "simple_phrase" : {
-      "phrase" : {
-        "analyzer" : "body",
-        "field" : "bigram",
-        "size" : 1,
-        "real_word_error_likelihood" : 0.95,
-        "max_errors" : 0.5,
-        "gram_size" : 2,
-        "direct_generator" : [ {
-          "field" : "body",
-          "suggest_mode" : "always",
-          "min_word_length" : 1
-        } ],
-        "highlight": {
-          "pre_tag": "<em>",
-          "post_tag": "</em>"
+POST test
+{
+  "settings": {
+    "index": {
+      "number_of_shards": 1,
+      "analysis": {
+        "analyzer": {
+          "trigram": {
+            "type": "custom",
+            "tokenizer": "standard",
+            "filter": ["standard", "shingle"]
+          },
+          "reverse": {
+            "type": "custom",
+            "tokenizer": "standard",
+            "filter": ["standard", "reverse"]
+          }
+        },
+        "filter": {
+          "shingle": {
+            "type": "shingle",
+            "min_shingle_size": 2,
+            "max_shingle_size": 3
+          }
         }
       }
     }
+  },
+  "mappings": {
+    "test": {
+      "properties": {
+        "title": {
+          "type": "text",
+          "fields": {
+            "trigram": {
+              "type": "text",
+              "analyzer": "trigram"
+            },
+            "reverse": {
+              "type": "text",
+              "analyzer": "reverse"
+            }
+          }
+        }
+      }
+    }
+  }
+}
+POST test/test
+{"title": "noble warriors"}
+POST test/test
+{"title": "nobel prize"}
+POST _refresh
+--------------------------------------------------
+// TESTSETUP
+
+Once you have the analyzers and mappings set up you can use the `phrase`
+suggester in the same spot you'd use the `term` suggester:
+
+[source,js]
+--------------------------------------------------
+POST _suggest?pretty -d'
+{
+  "text": "noble prize",
+  "simple_phrase": {
+    "phrase": {
+      "field": "title.trigram",
+      "size": 1,
+      "gram_size": 3,
+      "direct_generator": [ {
+        "field": "title.trigram",
+        "suggest_mode": "always"
+      } ],
+      "highlight": {
+        "pre_tag": "<em>",
+        "post_tag": "</em>"
+      }
+    }
   }
-}'
+}
 --------------------------------------------------
+// CONSOLE
 
 The response contains suggestions scored by the most likely spell
 correction first. In this case we received the expected correction
@@ -57,37 +115,23 @@ can contain misspellings (See parameter descriptions below).
 
 [source,js]
 --------------------------------------------------
-  {
-  "took" : 5,
-  "timed_out" : false,
-  "_shards" : {
-    "total" : 5,
-    "successful" : 5,
-    "failed" : 0
-  },
-  "hits" : {
-    "total" : 2938,
-    "max_score" : 0.0,
-    "hits" : [ ]
-  },
-  "suggest" : {
-    "simple_phrase" : [ {
-      "text" : "Xor the Got-Jewel",
+{
+  "_shards": ...
+  "simple_phrase" : [
+    {
+      "text" : "noble prize",
       "offset" : 0,
-      "length" : 17,
+      "length" : 11,
       "options" : [ {
-        "text" : "xorr the god jewel",
-        "highlighted": "<em>xorr</em> the <em>god</em> jewel",
-        "score" : 0.17877324
-      }, {
-        "text" : "xor the god jewel",
-        "highlighted": "xor the <em>god</em> jewel",
-        "score" : 0.14231323
-      } ]
-    } ]
-  }
+        "text" : "nobel prize",
+        "highlighted": "<em>nobel</em> prize",
+        "score" : 0.40765354
+      }]
+    }
+  ]
 }
 --------------------------------------------------
+// TESTRESPONSE[s/"_shards": .../"_shards": "$body._shards",/]
 
 ==== Basic Phrase suggest API parameters
 
@@ -178,34 +222,34 @@ can contain misspellings (See parameter descriptions below).
 
 [source,js]
 --------------------------------------------------
-curl -XPOST 'localhost:9200/_search' -d {
-   "suggest" : {
-     "text" : "Xor the Got-Jewel",
-     "simple_phrase" : {
-       "phrase" : {
-         "field" :  "bigram",
-         "size" :   1,
-         "direct_generator" : [ {
-           "field" :            "body",
-           "suggest_mode" :     "always",
-           "min_word_length" :  1
-         } ],
-         "collate": {
-           "query": { <1>
-             "inline" : {
-               "match": {
-                   "{{field_name}}" : "{{suggestion}}" <2>
-               }
-             }
-           },
-           "params": {"field_name" : "title"}, <3>
-           "prune": true <4>
-         }
-       }
-     }
-   }
- }
+POST _suggest
+{
+  "text" : "noble prize",
+  "simple_phrase" : {
+    "phrase" : {
+      "field" :  "title.trigram",
+      "size" :   1,
+      "direct_generator" : [ {
+        "field" :            "title.trigram",
+        "suggest_mode" :     "always",
+        "min_word_length" :  1
+      } ],
+      "collate": {
+        "query": { <1>
+          "inline" : {
+            "match": {
+              "{{field_name}}" : "{{suggestion}}" <2>
+            }
+          }
+        },
+        "params": {"field_name" : "title"}, <3>
+        "prune": true <4>
+      }
+    }
+  }
+}
 --------------------------------------------------
+// CONSOLE
 <1> This query will be run once for every suggestion.
 <2> The `{{suggestion}}` variable will be replaced by the text
     of each suggestion.
@@ -342,33 +386,27 @@ accept ordinary analyzer names.
 
 [source,js]
 --------------------------------------------------
-curl -s -XPOST 'localhost:9200/_search' -d {
- "suggest" : {
-    "text" : "Xor the Got-Jewel",
-    "simple_phrase" : {
-      "phrase" : {
-        "analyzer" : "body",
-        "field" : "bigram",
-        "size" : 4,
-        "real_word_error_likelihood" : 0.95,
-        "confidence" : 2.0,
-        "gram_size" : 2,
-        "direct_generator" : [ {
-          "field" : "body",
-          "suggest_mode" : "always",
-          "min_word_length" : 1
-        }, {
-          "field" : "reverse",
-          "suggest_mode" : "always",
-          "min_word_length" : 1,
-          "pre_filter" : "reverse",
-          "post_filter" : "reverse"
-        } ]
-      }
+POST _suggest
+{
+  "text" : "obel prize",
+  "simple_phrase" : {
+    "phrase" : {
+      "field" : "title.trigram",
+      "size" : 1,
+      "direct_generator" : [ {
+        "field" : "title.trigram",
+        "suggest_mode" : "always"
+      }, {
+        "field" : "title.reverse",
+        "suggest_mode" : "always",
+        "pre_filter" : "reverse",
+        "post_filter" : "reverse"
+      } ]
     }
   }
 }
 --------------------------------------------------
+// CONSOLE
 
 `pre_filter` and `post_filter` can also be used to inject synonyms after
 candidates are generated. For instance for the query `captain usq` we

+ 4 - 4
docs/reference/search/uri-request.asciidoc

@@ -21,19 +21,19 @@ And here is a sample response:
     "timed_out": false,
     "took": 62,
     "_shards":{
-        "total" : 5,
-        "successful" : 5,
+        "total" : 1,
+        "successful" : 1,
         "failed" : 0
     },
     "hits":{
         "total" : 1,
-        "max_score": 0.2876821,
+        "max_score": 1.3862944,
         "hits" : [
             {
                 "_index" : "twitter",
                 "_type" : "tweet",
                 "_id" : "0",
-                "_score": 0.2876821,
+                "_score": 1.3862944,
                 "_source" : {
                     "user" : "kimchy",
                     "date" : "2009-11-15T14:12:12",