|
@@ -423,6 +423,286 @@ This results in a response like this:
|
|
|
--------------------------------------------------
|
|
|
// TESTRESPONSE[s/"took": 6,/"took": "$body.took",/]
|
|
|
|
|
|
+[float]
|
|
|
+==== Optimizing wildcard queries.
|
|
|
+
|
|
|
+Wildcard queries are more expensive than other queries for the percolator,
|
|
|
+especially if the wildcard expressions are large.
|
|
|
+
|
|
|
+In the case of `wildcard` queries with prefix wildcard expressions or just the `prefix` query,
|
|
|
+the `edge_ngram` token filter can be used to replace these queries with regular `term`
|
|
|
+query on a field where the `edge_ngram` token filter is configured.
|
|
|
+
|
|
|
+Creating an index with custom analysis settings:
|
|
|
+
|
|
|
+[source,js]
|
|
|
+--------------------------------------------------
|
|
|
+PUT my_queries1
|
|
|
+{
|
|
|
+ "settings": {
|
|
|
+ "analysis": {
|
|
|
+ "analyzer": {
|
|
|
+ "wildcard_prefix": { <1>
|
|
|
+ "type": "custom",
|
|
|
+ "tokenizer": "standard",
|
|
|
+ "filter": [
|
|
|
+ "standard",
|
|
|
+ "lowercase",
|
|
|
+ "wildcard_edge_ngram"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ },
|
|
|
+ "filter": {
|
|
|
+ "wildcard_edge_ngram": { <2>
|
|
|
+ "type": "edge_ngram",
|
|
|
+ "min_gram": 1,
|
|
|
+ "max_gram": 32
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ },
|
|
|
+ "mappings": {
|
|
|
+ "query": {
|
|
|
+ "properties": {
|
|
|
+ "query": {
|
|
|
+ "type": "percolator"
|
|
|
+ },
|
|
|
+ "my_field": {
|
|
|
+ "type": "text",
|
|
|
+ "fields": {
|
|
|
+ "prefix": { <3>
|
|
|
+ "type": "text",
|
|
|
+ "analyzer": "wildcard_prefix",
|
|
|
+ "search_analyzer": "standard"
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+--------------------------------------------------
|
|
|
+// CONSOLE
|
|
|
+// TEST[continued]
|
|
|
+
|
|
|
+<1> The analyzer that generates the prefix tokens to be used at index time only.
|
|
|
+<2> Increase the `min_gram` and decrease `max_gram` settings based on your prefix search needs.
|
|
|
+<3> This multifield should be used to do the prefix search
|
|
|
+ with a `term` or `match` query instead of a `prefix` or `wildcard` query.
|
|
|
+
|
|
|
+
|
|
|
+Then instead of indexing the following query:
|
|
|
+
|
|
|
+[source,js]
|
|
|
+--------------------------------------------------
|
|
|
+{
|
|
|
+ "query": {
|
|
|
+ "wildcard": {
|
|
|
+ "my_field": "abc*"
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+--------------------------------------------------
|
|
|
+// NOTCONSOLE
|
|
|
+
|
|
|
+this query below should be indexed:
|
|
|
+
|
|
|
+[source,js]
|
|
|
+--------------------------------------------------
|
|
|
+PUT /my_queries1/query/1?refresh
|
|
|
+{
|
|
|
+ "query": {
|
|
|
+ "term": {
|
|
|
+ "my_field.prefix": "abc"
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+--------------------------------------------------
|
|
|
+// CONSOLE
|
|
|
+// TEST[continued]
|
|
|
+
|
|
|
+This way can handle the second query more efficiently than the first query.
|
|
|
+
|
|
|
+The following search request will match with the previously indexed
|
|
|
+percolator query:
|
|
|
+
|
|
|
+[source,js]
|
|
|
+--------------------------------------------------
|
|
|
+GET /my_queries1/_search
|
|
|
+{
|
|
|
+ "query": {
|
|
|
+ "percolate": {
|
|
|
+ "field": "query",
|
|
|
+ "document": {
|
|
|
+ "my_field": "abcd"
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+--------------------------------------------------
|
|
|
+// CONSOLE
|
|
|
+// TEST[continued]
|
|
|
+
|
|
|
+[source,js]
|
|
|
+--------------------------------------------------
|
|
|
+{
|
|
|
+ "took": 6,
|
|
|
+ "timed_out": false,
|
|
|
+ "_shards": {
|
|
|
+ "total": 5,
|
|
|
+ "successful": 5,
|
|
|
+ "skipped": 0,
|
|
|
+ "failed": 0
|
|
|
+ },
|
|
|
+ "hits": {
|
|
|
+ "total": 1,
|
|
|
+ "max_score": 0.41501677,
|
|
|
+ "hits": [
|
|
|
+ {
|
|
|
+ "_index": "my_queries1",
|
|
|
+ "_type": "query",
|
|
|
+ "_id": "1",
|
|
|
+ "_score": 0.41501677,
|
|
|
+ "_source": {
|
|
|
+ "query": {
|
|
|
+ "term": {
|
|
|
+ "my_field.prefix": "abc"
|
|
|
+ }
|
|
|
+ }
|
|
|
+ },
|
|
|
+ "fields": {
|
|
|
+ "_percolator_document_slot": [
|
|
|
+ 0
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ }
|
|
|
+ ]
|
|
|
+ }
|
|
|
+}
|
|
|
+--------------------------------------------------
|
|
|
+// TESTRESPONSE[s/"took": 6,/"took": "$body.took",/]
|
|
|
+
|
|
|
+The same technique can also be used to speed up suffix
|
|
|
+wildcard searches. By using the `reverse` token filter
|
|
|
+before the `edge_ngram` token filter.
|
|
|
+
|
|
|
+[source,js]
|
|
|
+--------------------------------------------------
|
|
|
+PUT my_queries2
|
|
|
+{
|
|
|
+ "settings": {
|
|
|
+ "analysis": {
|
|
|
+ "analyzer": {
|
|
|
+ "wildcard_suffix": {
|
|
|
+ "type": "custom",
|
|
|
+ "tokenizer": "standard",
|
|
|
+ "filter": [
|
|
|
+ "standard",
|
|
|
+ "lowercase",
|
|
|
+ "reverse",
|
|
|
+ "wildcard_edge_ngram"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "wildcard_suffix_search_time": {
|
|
|
+ "type": "custom",
|
|
|
+ "tokenizer": "standard",
|
|
|
+ "filter": [
|
|
|
+ "standard",
|
|
|
+ "lowercase",
|
|
|
+ "reverse"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ },
|
|
|
+ "filter": {
|
|
|
+ "wildcard_edge_ngram": {
|
|
|
+ "type": "edge_ngram",
|
|
|
+ "min_gram": 1,
|
|
|
+ "max_gram": 32
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ },
|
|
|
+ "mappings": {
|
|
|
+ "query": {
|
|
|
+ "properties": {
|
|
|
+ "query": {
|
|
|
+ "type": "percolator"
|
|
|
+ },
|
|
|
+ "my_field": {
|
|
|
+ "type": "text",
|
|
|
+ "fields": {
|
|
|
+ "suffix": {
|
|
|
+ "type": "text",
|
|
|
+ "analyzer": "wildcard_suffix",
|
|
|
+ "search_analyzer": "wildcard_suffix_search_time" <1>
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+--------------------------------------------------
|
|
|
+// CONSOLE
|
|
|
+// TEST[continued]
|
|
|
+
|
|
|
+<1> A custom analyzer is needed at search time too, because otherwise
|
|
|
+ the query terms are not being reversed and would otherwise not match
|
|
|
+ with the reserved suffix tokens.
|
|
|
+
|
|
|
+Then instead of indexing the following query:
|
|
|
+
|
|
|
+[source,js]
|
|
|
+--------------------------------------------------
|
|
|
+{
|
|
|
+ "query": {
|
|
|
+ "wildcard": {
|
|
|
+ "my_field": "*xyz"
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+--------------------------------------------------
|
|
|
+// NOTCONSOLE
|
|
|
+
|
|
|
+the following query below should be indexed:
|
|
|
+
|
|
|
+[source,js]
|
|
|
+--------------------------------------------------
|
|
|
+PUT /my_queries2/query/2?refresh
|
|
|
+{
|
|
|
+ "query": {
|
|
|
+ "match": { <1>
|
|
|
+ "my_field.suffix": "xyz"
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+--------------------------------------------------
|
|
|
+// CONSOLE
|
|
|
+// TEST[continued]
|
|
|
+
|
|
|
+<1> The `match` query should be used instead of the `term` query,
|
|
|
+ because text analysis needs to reverse the query terms.
|
|
|
+
|
|
|
+The following search request will match with the previously indexed
|
|
|
+percolator query:
|
|
|
+
|
|
|
+[source,js]
|
|
|
+--------------------------------------------------
|
|
|
+GET /my_queries2/_search
|
|
|
+{
|
|
|
+ "query": {
|
|
|
+ "percolate": {
|
|
|
+ "field": "query",
|
|
|
+ "document": {
|
|
|
+ "my_field": "wxyz"
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+--------------------------------------------------
|
|
|
+// CONSOLE
|
|
|
+// TEST[continued]
|
|
|
+
|
|
|
[float]
|
|
|
==== Dedicated Percolator Index
|
|
|
|