FuzzyLikeThisQueryParser.java 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162
  1. /*
  2. * Licensed to Elasticsearch under one or more contributor
  3. * license agreements. See the NOTICE file distributed with
  4. * this work for additional information regarding copyright
  5. * ownership. Elasticsearch licenses this file to you under
  6. * the Apache License, Version 2.0 (the "License"); you may
  7. * not use this file except in compliance with the License.
  8. * You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing,
  13. * software distributed under the License is distributed on an
  14. * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  15. * KIND, either express or implied. See the License for the
  16. * specific language governing permissions and limitations
  17. * under the License.
  18. */
  19. package org.elasticsearch.index.query;
  20. import com.google.common.collect.Lists;
  21. import org.apache.lucene.analysis.Analyzer;
  22. import org.apache.lucene.sandbox.queries.FuzzyLikeThisQuery;
  23. import org.apache.lucene.search.Query;
  24. import org.elasticsearch.ElasticsearchIllegalArgumentException;
  25. import org.elasticsearch.common.ParseField;
  26. import org.elasticsearch.common.inject.Inject;
  27. import org.elasticsearch.common.unit.Fuzziness;
  28. import org.elasticsearch.common.xcontent.XContentParser;
  29. import org.elasticsearch.index.analysis.Analysis;
  30. import java.io.IOException;
  31. import java.util.Iterator;
  32. import java.util.List;
  33. /**
  34. * <pre>
  35. * {
  36. * fuzzy_like_this : {
  37. * maxNumTerms : 12,
  38. * boost : 1.1,
  39. * fields : ["field1", "field2"]
  40. * likeText : "..."
  41. * }
  42. * }
  43. * </pre>
  44. */
  45. public class FuzzyLikeThisQueryParser implements QueryParser {
  46. public static final String NAME = "flt";
  47. private static final ParseField FUZZINESS = Fuzziness.FIELD.withDeprecation("min_similarity");
  48. @Inject
  49. public FuzzyLikeThisQueryParser() {
  50. }
  51. @Override
  52. public String[] names() {
  53. return new String[]{NAME, "fuzzy_like_this", "fuzzyLikeThis"};
  54. }
  55. @Override
  56. public Query parse(QueryParseContext parseContext) throws IOException, QueryParsingException {
  57. XContentParser parser = parseContext.parser();
  58. int maxNumTerms = 25;
  59. float boost = 1.0f;
  60. List<String> fields = null;
  61. String likeText = null;
  62. Fuzziness fuzziness = Fuzziness.TWO;
  63. int prefixLength = 0;
  64. boolean ignoreTF = false;
  65. Analyzer analyzer = null;
  66. boolean failOnUnsupportedField = true;
  67. String queryName = null;
  68. XContentParser.Token token;
  69. String currentFieldName = null;
  70. while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
  71. if (token == XContentParser.Token.FIELD_NAME) {
  72. currentFieldName = parser.currentName();
  73. } else if (token.isValue()) {
  74. if ("like_text".equals(currentFieldName) || "likeText".equals(currentFieldName)) {
  75. likeText = parser.text();
  76. } else if ("max_query_terms".equals(currentFieldName) || "maxQueryTerms".equals(currentFieldName)) {
  77. maxNumTerms = parser.intValue();
  78. } else if ("boost".equals(currentFieldName)) {
  79. boost = parser.floatValue();
  80. } else if ("ignore_tf".equals(currentFieldName) || "ignoreTF".equals(currentFieldName)) {
  81. ignoreTF = parser.booleanValue();
  82. } else if (FUZZINESS.match(currentFieldName, parseContext.parseFlags())) {
  83. fuzziness = Fuzziness.parse(parser);
  84. } else if ("prefix_length".equals(currentFieldName) || "prefixLength".equals(currentFieldName)) {
  85. prefixLength = parser.intValue();
  86. } else if ("analyzer".equals(currentFieldName)) {
  87. analyzer = parseContext.analysisService().analyzer(parser.text());
  88. } else if ("fail_on_unsupported_field".equals(currentFieldName) || "failOnUnsupportedField".equals(currentFieldName)) {
  89. failOnUnsupportedField = parser.booleanValue();
  90. } else if ("_name".equals(currentFieldName)) {
  91. queryName = parser.text();
  92. } else {
  93. throw new QueryParsingException(parseContext.index(), "[flt] query does not support [" + currentFieldName + "]");
  94. }
  95. } else if (token == XContentParser.Token.START_ARRAY) {
  96. if ("fields".equals(currentFieldName)) {
  97. fields = Lists.newLinkedList();
  98. while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
  99. fields.add(parseContext.indexName(parser.text()));
  100. }
  101. } else {
  102. throw new QueryParsingException(parseContext.index(), "[flt] query does not support [" + currentFieldName + "]");
  103. }
  104. }
  105. }
  106. if (likeText == null) {
  107. throw new QueryParsingException(parseContext.index(), "fuzzy_like_this requires 'like_text' to be specified");
  108. }
  109. if (analyzer == null) {
  110. analyzer = parseContext.mapperService().searchAnalyzer();
  111. }
  112. FuzzyLikeThisQuery query = new FuzzyLikeThisQuery(maxNumTerms, analyzer);
  113. if (fields == null) {
  114. fields = Lists.newArrayList(parseContext.defaultField());
  115. } else if (fields.isEmpty()) {
  116. throw new QueryParsingException(parseContext.index(), "fuzzy_like_this requires 'fields' to be non-empty");
  117. }
  118. for (Iterator<String> it = fields.iterator(); it.hasNext(); ) {
  119. final String fieldName = it.next();
  120. if (!Analysis.generatesCharacterTokenStream(analyzer, fieldName)) {
  121. if (failOnUnsupportedField) {
  122. throw new ElasticsearchIllegalArgumentException("more_like_this doesn't support binary/numeric fields: [" + fieldName + "]");
  123. } else {
  124. it.remove();
  125. }
  126. }
  127. }
  128. if (fields.isEmpty()) {
  129. return null;
  130. }
  131. float minSimilarity = fuzziness.asFloat();
  132. if (minSimilarity >= 1.0f && minSimilarity != (int)minSimilarity) {
  133. throw new ElasticsearchIllegalArgumentException("fractional edit distances are not allowed");
  134. }
  135. if (minSimilarity < 0.0f) {
  136. throw new ElasticsearchIllegalArgumentException("minimumSimilarity cannot be less than 0");
  137. }
  138. for (String field : fields) {
  139. query.addTerms(likeText, field, minSimilarity, prefixLength);
  140. }
  141. query.setBoost(boost);
  142. query.setIgnoreTF(ignoreTF);
  143. if (queryName != null) {
  144. parseContext.addNamedQuery(queryName, query);
  145. }
  146. return query;
  147. }
  148. }