1
0

MoreLikeThisFieldQueryParser.java 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. /*
  2. * Licensed to Elasticsearch under one or more contributor
  3. * license agreements. See the NOTICE file distributed with
  4. * this work for additional information regarding copyright
  5. * ownership. Elasticsearch licenses this file to you under
  6. * the Apache License, Version 2.0 (the "License"); you may
  7. * not use this file except in compliance with the License.
  8. * You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing,
  13. * software distributed under the License is distributed on an
  14. * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  15. * KIND, either express or implied. See the License for the
  16. * specific language governing permissions and limitations
  17. * under the License.
  18. */
  19. package org.elasticsearch.index.query;
  20. import com.google.common.collect.Sets;
  21. import org.apache.lucene.analysis.Analyzer;
  22. import org.apache.lucene.search.Query;
  23. import org.elasticsearch.ElasticsearchIllegalArgumentException;
  24. import org.elasticsearch.common.Strings;
  25. import org.elasticsearch.common.inject.Inject;
  26. import org.elasticsearch.common.lucene.search.MoreLikeThisQuery;
  27. import org.elasticsearch.common.xcontent.XContentParser;
  28. import org.elasticsearch.index.analysis.Analysis;
  29. import org.elasticsearch.index.mapper.MapperService;
  30. import java.io.IOException;
  31. import java.util.Set;
  32. import static org.elasticsearch.index.query.support.QueryParsers.wrapSmartNameQuery;
  33. /**
  34. *
  35. */
  36. public class MoreLikeThisFieldQueryParser implements QueryParser {
  37. public static final String NAME = "mlt_field";
  38. @Inject
  39. public MoreLikeThisFieldQueryParser() {
  40. }
  41. @Override
  42. public String[] names() {
  43. return new String[]{NAME, "more_like_this_field", Strings.toCamelCase(NAME), "moreLikeThisField"};
  44. }
  45. @Override
  46. public Query parse(QueryParseContext parseContext) throws IOException, QueryParsingException {
  47. XContentParser parser = parseContext.parser();
  48. XContentParser.Token token = parser.nextToken();
  49. assert token == XContentParser.Token.FIELD_NAME;
  50. String fieldName = parser.currentName();
  51. // now, we move after the field name, which starts the object
  52. token = parser.nextToken();
  53. assert token == XContentParser.Token.START_OBJECT;
  54. MoreLikeThisQuery mltQuery = new MoreLikeThisQuery();
  55. mltQuery.setSimilarity(parseContext.searchSimilarity());
  56. Analyzer analyzer = null;
  57. boolean failOnUnsupportedField = true;
  58. String queryName = null;
  59. String currentFieldName = null;
  60. while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
  61. if (token == XContentParser.Token.FIELD_NAME) {
  62. currentFieldName = parser.currentName();
  63. } else if (token.isValue()) {
  64. if (MoreLikeThisQueryParser.Fields.LIKE_TEXT.match(currentFieldName,parseContext.parseFlags()) ) {
  65. mltQuery.setLikeText(parser.text());
  66. } else if (MoreLikeThisQueryParser.Fields.MIN_TERM_FREQ.match(currentFieldName,parseContext.parseFlags()) ) {
  67. mltQuery.setMinTermFrequency(parser.intValue());
  68. } else if (MoreLikeThisQueryParser.Fields.MAX_QUERY_TERMS.match(currentFieldName,parseContext.parseFlags())) {
  69. mltQuery.setMaxQueryTerms(parser.intValue());
  70. } else if (MoreLikeThisQueryParser.Fields.MIN_DOC_FREQ.match(currentFieldName,parseContext.parseFlags())) {
  71. mltQuery.setMinDocFreq(parser.intValue());
  72. } else if (MoreLikeThisQueryParser.Fields.MAX_DOC_FREQ.match(currentFieldName,parseContext.parseFlags())) {
  73. mltQuery.setMaxDocFreq(parser.intValue());
  74. } else if (MoreLikeThisQueryParser.Fields.MIN_WORD_LENGTH.match(currentFieldName,parseContext.parseFlags())) {
  75. mltQuery.setMinWordLen(parser.intValue());
  76. } else if (MoreLikeThisQueryParser.Fields.MAX_WORD_LENGTH.match(currentFieldName,parseContext.parseFlags())) {
  77. mltQuery.setMaxWordLen(parser.intValue());
  78. } else if (MoreLikeThisQueryParser.Fields.BOOST_TERMS.match(currentFieldName,parseContext.parseFlags())) {
  79. float boostFactor = parser.floatValue();
  80. if (boostFactor != 0) {
  81. mltQuery.setBoostTerms(true);
  82. mltQuery.setBoostTermsFactor(boostFactor);
  83. }
  84. } else if (MoreLikeThisQueryParser.Fields.MINIMUM_SHOULD_MATCH.match(currentFieldName,parseContext.parseFlags())) {
  85. mltQuery.setMinimumShouldMatch(parser.text());
  86. } else if (MoreLikeThisQueryParser.Fields.PERCENT_TERMS_TO_MATCH.match(currentFieldName,parseContext.parseFlags())) {
  87. mltQuery.setMinimumShouldMatch(Math.round(parser.floatValue() * 100) + "%");
  88. } else if ("analyzer".equals(currentFieldName)) {
  89. analyzer = parseContext.analysisService().analyzer(parser.text());
  90. } else if ("boost".equals(currentFieldName)) {
  91. mltQuery.setBoost(parser.floatValue());
  92. } else if (MoreLikeThisQueryParser.Fields.FAIL_ON_UNSUPPORTED_FIELD.match(currentFieldName,parseContext.parseFlags())) {
  93. failOnUnsupportedField = parser.booleanValue();
  94. } else if ("_name".equals(currentFieldName)) {
  95. queryName = parser.text();
  96. } else {
  97. throw new QueryParsingException(parseContext.index(), "[mlt_field] query does not support [" + currentFieldName + "]");
  98. }
  99. } else if (token == XContentParser.Token.START_ARRAY) {
  100. if (MoreLikeThisQueryParser.Fields.STOP_WORDS.match(currentFieldName,parseContext.parseFlags())) {
  101. Set<String> stopWords = Sets.newHashSet();
  102. while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
  103. stopWords.add(parser.text());
  104. }
  105. mltQuery.setStopWords(stopWords);
  106. } else {
  107. throw new QueryParsingException(parseContext.index(), "[mlt_field] query does not support [" + currentFieldName + "]");
  108. }
  109. }
  110. }
  111. if (mltQuery.getLikeText() == null) {
  112. throw new QueryParsingException(parseContext.index(), "more_like_this_field requires 'like_text' to be specified");
  113. }
  114. // move to the next end object, to close the field name
  115. token = parser.nextToken();
  116. assert token == XContentParser.Token.END_OBJECT;
  117. MapperService.SmartNameFieldMappers smartNameFieldMappers = parseContext.smartFieldMappers(fieldName);
  118. if (smartNameFieldMappers != null) {
  119. if (smartNameFieldMappers.hasMapper()) {
  120. fieldName = smartNameFieldMappers.mapper().names().indexName();
  121. }
  122. if (analyzer == null) {
  123. analyzer = smartNameFieldMappers.searchAnalyzer();
  124. }
  125. }
  126. if (analyzer == null) {
  127. analyzer = parseContext.mapperService().searchAnalyzer();
  128. }
  129. if (!Analysis.generatesCharacterTokenStream(analyzer, fieldName)) {
  130. if (failOnUnsupportedField) {
  131. throw new ElasticsearchIllegalArgumentException("more_like_this_field doesn't support binary/numeric fields: [" + fieldName + "]");
  132. } else {
  133. return null;
  134. }
  135. }
  136. mltQuery.setAnalyzer(analyzer);
  137. mltQuery.setMoreLikeFields(new String[]{fieldName});
  138. Query query = wrapSmartNameQuery(mltQuery, smartNameFieldMappers, parseContext);
  139. if (queryName != null) {
  140. parseContext.addNamedQuery(queryName, query);
  141. }
  142. return query;
  143. }
  144. }