7 years ago · cef7bd2079
--- a/docs/reference/mapping/types/percolator.asciidoc
+++ b/docs/reference/mapping/types/percolator.asciidoc
@@ -423,6 +423,286 @@ This results in a response like this:
 
				 --------------------------------------------------
			
 
				 // TESTRESPONSE[s/"took": 6,/"took": "$body.took",/]
			
 
				 
			
 
				+[float]
			
 
				+==== Optimizing wildcard queries.
			
 
				+
			
 
				+Wildcard queries are  more expensive than other queries for the percolator,
			
 
				+especially if the wildcard expressions are large.
			
 
				+
			
 
				+In the case of `wildcard` queries with prefix wildcard expressions or just the `prefix` query,
			
 
				+the `edge_ngram` token filter can be used to replace these queries with regular `term`
			
 
				+query on a field where the `edge_ngram` token filter is configured.
			
 
				+
			
 
				+Creating an index with custom analysis settings:
			
 
				+
			
 
				+[source,js]
			
 
				+--------------------------------------------------
			
 
				+PUT my_queries1
			
 
				+{
			
 
				+  "settings": {
			
 
				+    "analysis": {
			
 
				+      "analyzer": {
			
 
				+        "wildcard_prefix": { <1>
			
 
				+          "type": "custom",
			
 
				+          "tokenizer": "standard",
			
 
				+          "filter": [
			
 
				+            "standard",
			
 
				+            "lowercase",
			
 
				+            "wildcard_edge_ngram"
			
 
				+          ]
			
 
				+        }
			
 
				+      },
			
 
				+      "filter": {
			
 
				+        "wildcard_edge_ngram": { <2>
			
 
				+          "type": "edge_ngram",
			
 
				+          "min_gram": 1,
			
 
				+          "max_gram": 32
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  },
			
 
				+  "mappings": {
			
 
				+    "query": {
			
 
				+      "properties": {
			
 
				+        "query": {
			
 
				+          "type": "percolator"
			
 
				+        },
			
 
				+        "my_field": {
			
 
				+          "type": "text",
			
 
				+          "fields": {
			
 
				+            "prefix": { <3>
			
 
				+              "type": "text",
			
 
				+              "analyzer": "wildcard_prefix",
			
 
				+              "search_analyzer": "standard"
			
 
				+            }
			
 
				+          }
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+--------------------------------------------------
			
 
				+// CONSOLE
			
 
				+// TEST[continued]
			
 
				+
			
 
				+<1> The analyzer that generates the prefix tokens to be used at index time only.
			
 
				+<2> Increase the `min_gram` and decrease `max_gram` settings based on your prefix search needs.
			
 
				+<3> This multifield should be used to do the prefix search
			
 
				+    with a `term` or `match` query instead of a `prefix` or `wildcard` query.
			
 
				+
			
 
				+
			
 
				+Then instead of indexing the following query:
			
 
				+
			
 
				+[source,js]
			
 
				+--------------------------------------------------
			
 
				+{
			
 
				+  "query": {
			
 
				+    "wildcard": {
			
 
				+      "my_field": "abc*"
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+--------------------------------------------------
			
 
				+// NOTCONSOLE
			
 
				+
			
 
				+this query below should be indexed:
			
 
				+
			
 
				+[source,js]
			
 
				+--------------------------------------------------
			
 
				+PUT /my_queries1/query/1?refresh
			
 
				+{
			
 
				+  "query": {
			
 
				+    "term": {
			
 
				+      "my_field.prefix": "abc"
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+--------------------------------------------------
			
 
				+// CONSOLE
			
 
				+// TEST[continued]
			
 
				+
			
 
				+This way can handle the second query more efficiently than the first query.
			
 
				+
			
 
				+The following search request will match with the previously indexed
			
 
				+percolator query:
			
 
				+
			
 
				+[source,js]
			
 
				+--------------------------------------------------
			
 
				+GET /my_queries1/_search
			
 
				+{
			
 
				+  "query": {
			
 
				+    "percolate": {
			
 
				+      "field": "query",
			
 
				+      "document": {
			
 
				+        "my_field": "abcd"
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+--------------------------------------------------
			
 
				+// CONSOLE
			
 
				+// TEST[continued]
			
 
				+
			
 
				+[source,js]
			
 
				+--------------------------------------------------
			
 
				+{
			
 
				+  "took": 6,
			
 
				+  "timed_out": false,
			
 
				+  "_shards": {
			
 
				+    "total": 5,
			
 
				+    "successful": 5,
			
 
				+    "skipped": 0,
			
 
				+    "failed": 0
			
 
				+  },
			
 
				+  "hits": {
			
 
				+    "total": 1,
			
 
				+    "max_score": 0.41501677,
			
 
				+    "hits": [
			
 
				+      {
			
 
				+        "_index": "my_queries1",
			
 
				+        "_type": "query",
			
 
				+        "_id": "1",
			
 
				+        "_score": 0.41501677,
			
 
				+        "_source": {
			
 
				+          "query": {
			
 
				+            "term": {
			
 
				+              "my_field.prefix": "abc"
			
 
				+            }
			
 
				+          }
			
 
				+        },
			
 
				+        "fields": {
			
 
				+          "_percolator_document_slot": [
			
 
				+            0
			
 
				+          ]
			
 
				+        }
			
 
				+      }
			
 
				+    ]
			
 
				+  }
			
 
				+}
			
 
				+--------------------------------------------------
			
 
				+// TESTRESPONSE[s/"took": 6,/"took": "$body.took",/]
			
 
				+
			
 
				+The same technique can also be used to speed up suffix
			
 
				+wildcard searches. By using the `reverse` token filter
			
 
				+before the `edge_ngram` token filter.
			
 
				+
			
 
				+[source,js]
			
 
				+--------------------------------------------------
			
 
				+PUT my_queries2
			
 
				+{
			
 
				+  "settings": {
			
 
				+    "analysis": {
			
 
				+      "analyzer": {
			
 
				+        "wildcard_suffix": {
			
 
				+          "type": "custom",
			
 
				+          "tokenizer": "standard",
			
 
				+          "filter": [
			
 
				+            "standard",
			
 
				+            "lowercase",
			
 
				+            "reverse",
			
 
				+            "wildcard_edge_ngram"
			
 
				+          ]
			
 
				+        },
			
 
				+        "wildcard_suffix_search_time": {
			
 
				+          "type": "custom",
			
 
				+          "tokenizer": "standard",
			
 
				+          "filter": [
			
 
				+            "standard",
			
 
				+            "lowercase",
			
 
				+            "reverse"
			
 
				+          ]
			
 
				+        }
			
 
				+      },
			
 
				+      "filter": {
			
 
				+        "wildcard_edge_ngram": {
			
 
				+          "type": "edge_ngram",
			
 
				+          "min_gram": 1,
			
 
				+          "max_gram": 32
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  },
			
 
				+  "mappings": {
			
 
				+    "query": {
			
 
				+      "properties": {
			
 
				+        "query": {
			
 
				+          "type": "percolator"
			
 
				+        },
			
 
				+        "my_field": {
			
 
				+          "type": "text",
			
 
				+          "fields": {
			
 
				+            "suffix": {
			
 
				+              "type": "text",
			
 
				+              "analyzer": "wildcard_suffix",
			
 
				+              "search_analyzer": "wildcard_suffix_search_time" <1>
			
 
				+            }
			
 
				+          }
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+--------------------------------------------------
			
 
				+// CONSOLE
			
 
				+// TEST[continued]
			
 
				+
			
 
				+<1> A custom analyzer is needed at search time too, because otherwise
			
 
				+    the query terms are not being reversed and would otherwise not match
			
 
				+    with the reserved suffix tokens.
			
 
				+
			
 
				+Then instead of indexing the following query:
			
 
				+
			
 
				+[source,js]
			
 
				+--------------------------------------------------
			
 
				+{
			
 
				+  "query": {
			
 
				+    "wildcard": {
			
 
				+      "my_field": "*xyz"
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+--------------------------------------------------
			
 
				+// NOTCONSOLE
			
 
				+
			
 
				+the following query below should be indexed:
			
 
				+
			
 
				+[source,js]
			
 
				+--------------------------------------------------
			
 
				+PUT /my_queries2/query/2?refresh
			
 
				+{
			
 
				+  "query": {
			
 
				+    "match": { <1>
			
 
				+      "my_field.suffix": "xyz"
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+--------------------------------------------------
			
 
				+// CONSOLE
			
 
				+// TEST[continued]
			
 
				+
			
 
				+<1> The `match` query should be used instead of the `term` query,
			
 
				+    because text analysis needs to reverse the query terms.
			
 
				+
			
 
				+The following search request will match with the previously indexed
			
 
				+percolator query:
			
 
				+
			
 
				+[source,js]
			
 
				+--------------------------------------------------
			
 
				+GET /my_queries2/_search
			
 
				+{
			
 
				+  "query": {
			
 
				+    "percolate": {
			
 
				+      "field": "query",
			
 
				+      "document": {
			
 
				+        "my_field": "wxyz"
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+--------------------------------------------------
			
 
				+// CONSOLE
			
 
				+// TEST[continued]
			
 
				+
			
 
				 [float]
			
 
				 ==== Dedicated Percolator Index