BlendedTermQueryTest.java 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. /*
  2. * Licensed to Elasticsearch under one or more contributor
  3. * license agreements. See the NOTICE file distributed with
  4. * this work for additional information regarding copyright
  5. * ownership. Elasticsearch licenses this file to you under
  6. * the Apache License, Version 2.0 (the "License"); you may
  7. * not use this file except in compliance with the License.
  8. * You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing,
  13. * software distributed under the License is distributed on an
  14. * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  15. * KIND, either express or implied. See the License for the
  16. * specific language governing permissions and limitations
  17. * under the License.
  18. */
  19. package org.apache.lucene.queries;
  20. import org.apache.lucene.analysis.MockAnalyzer;
  21. import org.apache.lucene.document.Document;
  22. import org.apache.lucene.document.Field;
  23. import org.apache.lucene.document.FieldType;
  24. import org.apache.lucene.document.TextField;
  25. import org.apache.lucene.index.DirectoryReader;
  26. import org.apache.lucene.index.FieldInfo;
  27. import org.apache.lucene.index.IndexWriter;
  28. import org.apache.lucene.index.Term;
  29. import org.apache.lucene.search.*;
  30. import org.apache.lucene.search.similarities.BM25Similarity;
  31. import org.apache.lucene.search.similarities.DefaultSimilarity;
  32. import org.apache.lucene.search.similarities.Similarity;
  33. import org.apache.lucene.store.Directory;
  34. import org.apache.lucene.util._TestUtil;
  35. import org.elasticsearch.test.ElasticsearchLuceneTestCase;
  36. import org.junit.Test;
  37. import java.io.IOException;
  38. import java.util.*;
  39. import static org.hamcrest.Matchers.containsInAnyOrder;
  40. import static org.hamcrest.Matchers.equalTo;
  41. /**
  42. */
  43. public class BlendedTermQueryTest extends ElasticsearchLuceneTestCase {
  44. @Test
  45. public void testBooleanQuery() throws IOException {
  46. Directory dir = newDirectory();
  47. IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
  48. String[] firstNames = new String[]{
  49. "simon", "paul"
  50. };
  51. String[] surNames = new String[]{
  52. "willnauer", "simon"
  53. };
  54. for (int i = 0; i < surNames.length; i++) {
  55. Document d = new Document();
  56. d.add(new TextField("id", Integer.toString(i), Field.Store.YES));
  57. d.add(new TextField("firstname", firstNames[i], Field.Store.NO));
  58. d.add(new TextField("surname", surNames[i], Field.Store.NO));
  59. w.addDocument(d);
  60. }
  61. int iters = scaledRandomIntBetween(25, 100);
  62. for (int j = 0; j < iters; j++) {
  63. Document d = new Document();
  64. d.add(new TextField("id", Integer.toString(firstNames.length + j), Field.Store.YES));
  65. d.add(new TextField("firstname", rarely() ? "some_other_name" :
  66. "simon the sorcerer", Field.Store.NO)); // make sure length-norm is the tie-breaker
  67. d.add(new TextField("surname", "bogus", Field.Store.NO));
  68. w.addDocument(d);
  69. }
  70. w.commit();
  71. DirectoryReader reader = DirectoryReader.open(w, true);
  72. IndexSearcher searcher = setSimilarity(newSearcher(reader));
  73. {
  74. Term[] terms = new Term[]{new Term("firstname", "simon"), new Term("surname", "simon")};
  75. BlendedTermQuery query = BlendedTermQuery.booleanBlendedQuery(terms, true);
  76. TopDocs search = searcher.search(query, 3);
  77. ScoreDoc[] scoreDocs = search.scoreDocs;
  78. assertEquals(3, scoreDocs.length);
  79. assertEquals(Integer.toString(0), reader.document(scoreDocs[0].doc).getField("id").stringValue());
  80. }
  81. {
  82. BooleanQuery query = new BooleanQuery(false);
  83. query.add(new TermQuery(new Term("firstname", "simon")), BooleanClause.Occur.SHOULD);
  84. query.add(new TermQuery(new Term("surname", "simon")), BooleanClause.Occur.SHOULD);
  85. TopDocs search = searcher.search(query, 1);
  86. ScoreDoc[] scoreDocs = search.scoreDocs;
  87. assertEquals(Integer.toString(1), reader.document(scoreDocs[0].doc).getField("id").stringValue());
  88. }
  89. reader.close();
  90. w.close();
  91. dir.close();
  92. }
  93. @Test
  94. public void testDismaxQuery() throws IOException {
  95. Directory dir = newDirectory();
  96. IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
  97. String[] username = new String[]{
  98. "foo fighters", "some cool fan", "cover band"};
  99. String[] song = new String[]{
  100. "generator", "foo fighers - generator", "foo fighters generator"
  101. };
  102. FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
  103. ft.setIndexOptions(random().nextBoolean() ? FieldInfo.IndexOptions.DOCS_ONLY : FieldInfo.IndexOptions.DOCS_AND_FREQS);
  104. ft.setOmitNorms(random().nextBoolean());
  105. ft.freeze();
  106. FieldType ft1 = new FieldType(TextField.TYPE_NOT_STORED);
  107. ft1.setIndexOptions(random().nextBoolean() ? FieldInfo.IndexOptions.DOCS_ONLY : FieldInfo.IndexOptions.DOCS_AND_FREQS);
  108. ft1.setOmitNorms(random().nextBoolean());
  109. ft1.freeze();
  110. for (int i = 0; i < username.length; i++) {
  111. Document d = new Document();
  112. d.add(new TextField("id", Integer.toString(i), Field.Store.YES));
  113. d.add(new Field("username", username[i], ft));
  114. d.add(new Field("song", song[i], ft));
  115. w.addDocument(d);
  116. }
  117. int iters = scaledRandomIntBetween(25, 100);
  118. for (int j = 0; j < iters; j++) {
  119. Document d = new Document();
  120. d.add(new TextField("id", Integer.toString(username.length + j), Field.Store.YES));
  121. d.add(new Field("username", "foo fighters", ft1));
  122. d.add(new Field("song", "some bogus text to bump up IDF", ft1));
  123. w.addDocument(d);
  124. }
  125. w.commit();
  126. DirectoryReader reader = DirectoryReader.open(w, true);
  127. IndexSearcher searcher = setSimilarity(newSearcher(reader));
  128. {
  129. String[] fields = new String[]{"username", "song"};
  130. BooleanQuery query = new BooleanQuery(false);
  131. query.add(BlendedTermQuery.dismaxBlendedQuery(toTerms(fields, "foo"), 0.1f), BooleanClause.Occur.SHOULD);
  132. query.add(BlendedTermQuery.dismaxBlendedQuery(toTerms(fields, "fighters"), 0.1f), BooleanClause.Occur.SHOULD);
  133. query.add(BlendedTermQuery.dismaxBlendedQuery(toTerms(fields, "generator"), 0.1f), BooleanClause.Occur.SHOULD);
  134. TopDocs search = searcher.search(query, 10);
  135. ScoreDoc[] scoreDocs = search.scoreDocs;
  136. assertEquals(Integer.toString(0), reader.document(scoreDocs[0].doc).getField("id").stringValue());
  137. }
  138. {
  139. BooleanQuery query = new BooleanQuery(false);
  140. DisjunctionMaxQuery uname = new DisjunctionMaxQuery(0.0f);
  141. uname.add(new TermQuery(new Term("username", "foo")));
  142. uname.add(new TermQuery(new Term("song", "foo")));
  143. DisjunctionMaxQuery s = new DisjunctionMaxQuery(0.0f);
  144. s.add(new TermQuery(new Term("username", "fighers")));
  145. s.add(new TermQuery(new Term("song", "fighers")));
  146. DisjunctionMaxQuery gen = new DisjunctionMaxQuery(0f);
  147. gen.add(new TermQuery(new Term("username", "generator")));
  148. gen.add(new TermQuery(new Term("song", "generator")));
  149. query.add(uname, BooleanClause.Occur.SHOULD);
  150. query.add(s, BooleanClause.Occur.SHOULD);
  151. query.add(gen, BooleanClause.Occur.SHOULD);
  152. TopDocs search = searcher.search(query, 4);
  153. ScoreDoc[] scoreDocs = search.scoreDocs;
  154. assertEquals(Integer.toString(1), reader.document(scoreDocs[0].doc).getField("id").stringValue());
  155. }
  156. reader.close();
  157. w.close();
  158. dir.close();
  159. }
  160. @Test
  161. public void testBasics() {
  162. final int iters = scaledRandomIntBetween(5, 25);
  163. for (int j = 0; j < iters; j++) {
  164. String[] fields = new String[1 + random().nextInt(10)];
  165. for (int i = 0; i < fields.length; i++) {
  166. fields[i] = _TestUtil.randomRealisticUnicodeString(random(), 1, 10);
  167. }
  168. String term = _TestUtil.randomRealisticUnicodeString(random(), 1, 10);
  169. Term[] terms = toTerms(fields, term);
  170. boolean disableCoord = random().nextBoolean();
  171. boolean useBoolean = random().nextBoolean();
  172. float tieBreaker = random().nextFloat();
  173. BlendedTermQuery query = useBoolean ? BlendedTermQuery.booleanBlendedQuery(terms, disableCoord) : BlendedTermQuery.dismaxBlendedQuery(terms, tieBreaker);
  174. QueryUtils.check(query);
  175. terms = toTerms(fields, term);
  176. BlendedTermQuery query2 = useBoolean ? BlendedTermQuery.booleanBlendedQuery(terms, disableCoord) : BlendedTermQuery.dismaxBlendedQuery(terms, tieBreaker);
  177. assertEquals(query, query2);
  178. }
  179. }
  180. public Term[] toTerms(String[] fields, String term) {
  181. Term[] terms = new Term[fields.length];
  182. List<String> fieldsList = Arrays.asList(fields);
  183. Collections.shuffle(fieldsList, random());
  184. fields = fieldsList.toArray(new String[0]);
  185. for (int i = 0; i < fields.length; i++) {
  186. terms[i] = new Term(fields[i], term);
  187. }
  188. return terms;
  189. }
  190. public IndexSearcher setSimilarity(IndexSearcher searcher) {
  191. Similarity similarity = random().nextBoolean() ? new BM25Similarity() : new DefaultSimilarity();
  192. searcher.setSimilarity(similarity);
  193. return searcher;
  194. }
  195. @Test
  196. public void testExtractTerms() {
  197. Set<Term> terms = new HashSet<>();
  198. int num = scaledRandomIntBetween(1, 10);
  199. for (int i = 0; i < num; i++) {
  200. terms.add(new Term(_TestUtil.randomRealisticUnicodeString(random(), 1, 10), _TestUtil.randomRealisticUnicodeString(random(), 1, 10)));
  201. }
  202. BlendedTermQuery blendedTermQuery = random().nextBoolean() ? BlendedTermQuery.dismaxBlendedQuery(terms.toArray(new Term[0]), random().nextFloat()) :
  203. BlendedTermQuery.booleanBlendedQuery(terms.toArray(new Term[0]), random().nextBoolean());
  204. Set<Term> extracted = new HashSet<>();
  205. blendedTermQuery.extractTerms(extracted);
  206. assertThat(extracted.size(), equalTo(terms.size()));
  207. assertThat(extracted, containsInAnyOrder(terms.toArray(new Term[0])));
  208. }
  209. }