RankDocsQueryBuilderTests.java 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266
  1. /*
  2. * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
  3. * or more contributor license agreements. Licensed under the "Elastic License
  4. * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
  5. * Public License v 1"; you may not use this file except in compliance with, at
  6. * your election, the "Elastic License 2.0", the "GNU Affero General Public
  7. * License v3.0 only", or the "Server Side Public License, v 1".
  8. */
  9. package org.elasticsearch.index.query;
  10. import org.apache.lucene.document.Document;
  11. import org.apache.lucene.document.NumericDocValuesField;
  12. import org.apache.lucene.index.DirectoryReader;
  13. import org.apache.lucene.index.IndexReader;
  14. import org.apache.lucene.index.IndexWriter;
  15. import org.apache.lucene.index.IndexWriterConfig;
  16. import org.apache.lucene.index.NoMergePolicy;
  17. import org.apache.lucene.search.IndexSearcher;
  18. import org.apache.lucene.search.Query;
  19. import org.apache.lucene.search.ScoreDoc;
  20. import org.apache.lucene.search.TopScoreDocCollectorManager;
  21. import org.apache.lucene.store.Directory;
  22. import org.apache.lucene.tests.index.RandomIndexWriter;
  23. import org.elasticsearch.search.rank.RankDoc;
  24. import org.elasticsearch.search.retriever.rankdoc.RankDocsQuery;
  25. import org.elasticsearch.test.AbstractQueryTestCase;
  26. import java.io.IOException;
  27. import java.util.Arrays;
  28. import java.util.Random;
  29. import static org.hamcrest.Matchers.equalTo;
  30. import static org.hamcrest.Matchers.lessThanOrEqualTo;
  31. public class RankDocsQueryBuilderTests extends AbstractQueryTestCase<RankDocsQueryBuilder> {
  32. private RankDoc[] generateRandomRankDocs() {
  33. int totalDocs = randomIntBetween(0, 10);
  34. RankDoc[] rankDocs = new RankDoc[totalDocs];
  35. int currentDoc = 0;
  36. for (int i = 0; i < totalDocs; i++) {
  37. RankDoc rankDoc = new RankDoc(currentDoc, randomFloat(), randomIntBetween(0, 2));
  38. rankDocs[i] = rankDoc;
  39. currentDoc += randomIntBetween(0, 100);
  40. }
  41. return rankDocs;
  42. }
  43. @Override
  44. protected RankDocsQueryBuilder doCreateTestQueryBuilder() {
  45. RankDoc[] rankDocs = generateRandomRankDocs();
  46. return new RankDocsQueryBuilder(rankDocs, null, false);
  47. }
  48. @Override
  49. protected void doAssertLuceneQuery(RankDocsQueryBuilder queryBuilder, Query query, SearchExecutionContext context) throws IOException {
  50. assertTrue(query instanceof RankDocsQuery);
  51. RankDocsQuery rankDocsQuery = (RankDocsQuery) query;
  52. assertArrayEquals(queryBuilder.rankDocs(), rankDocsQuery.rankDocs());
  53. }
  54. /**
  55. * Overridden to ensure that {@link SearchExecutionContext} has a non-null {@link IndexReader}
  56. */
  57. @Override
  58. public void testToQuery() throws IOException {
  59. try (Directory directory = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), directory)) {
  60. iw.addDocument(new Document());
  61. try (IndexReader reader = iw.getReader()) {
  62. SearchExecutionContext context = createSearchExecutionContext(newSearcher(reader));
  63. RankDocsQueryBuilder queryBuilder = createTestQueryBuilder();
  64. Query query = queryBuilder.doToQuery(context);
  65. assertTrue(query instanceof RankDocsQuery);
  66. RankDocsQuery rankDocsQuery = (RankDocsQuery) query;
  67. int shardIndex = context.getShardRequestIndex();
  68. int expectedDocs = (int) Arrays.stream(queryBuilder.rankDocs()).filter(x -> x.shardIndex == shardIndex).count();
  69. assertEquals(expectedDocs, rankDocsQuery.rankDocs().length);
  70. }
  71. }
  72. }
  73. /**
  74. * Overridden to ensure that {@link SearchExecutionContext} has a non-null {@link IndexReader}
  75. */
  76. @Override
  77. public void testCacheability() throws IOException {
  78. try (Directory directory = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), directory)) {
  79. iw.addDocument(new Document());
  80. try (IndexReader reader = iw.getReader()) {
  81. SearchExecutionContext context = createSearchExecutionContext(newSearcher(reader));
  82. RankDocsQueryBuilder queryBuilder = createTestQueryBuilder();
  83. QueryBuilder rewriteQuery = rewriteQuery(queryBuilder, new SearchExecutionContext(context));
  84. assertNotNull(rewriteQuery.toQuery(context));
  85. assertTrue("query should be cacheable: " + queryBuilder.toString(), context.isCacheable());
  86. }
  87. }
  88. }
  89. /**
  90. * Overridden to ensure that {@link SearchExecutionContext} has a non-null {@link IndexReader}
  91. */
  92. @Override
  93. public void testMustRewrite() throws IOException {
  94. try (Directory directory = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), directory)) {
  95. iw.addDocument(new Document());
  96. try (IndexReader reader = iw.getReader()) {
  97. SearchExecutionContext context = createSearchExecutionContext(newSearcher(reader));
  98. context.setAllowUnmappedFields(true);
  99. RankDocsQueryBuilder queryBuilder = createTestQueryBuilder();
  100. queryBuilder.toQuery(context);
  101. }
  102. }
  103. }
  104. public void testRankDocsQueryEarlyTerminate() throws IOException {
  105. try (Directory directory = newDirectory()) {
  106. IndexWriterConfig config = new IndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE);
  107. try (IndexWriter iw = new IndexWriter(directory, config)) {
  108. int seg = atLeast(5);
  109. int numDocs = atLeast(20);
  110. for (int i = 0; i < seg; i++) {
  111. for (int j = 0; j < numDocs; j++) {
  112. Document doc = new Document();
  113. doc.add(new NumericDocValuesField("active", 1));
  114. iw.addDocument(doc);
  115. }
  116. if (frequently()) {
  117. iw.flush();
  118. }
  119. }
  120. }
  121. try (IndexReader reader = DirectoryReader.open(directory)) {
  122. int topSize = randomIntBetween(1, reader.maxDoc() / 5);
  123. RankDoc[] rankDocs = new RankDoc[topSize];
  124. int index = 0;
  125. for (int r : randomSample(random(), reader.maxDoc(), topSize)) {
  126. rankDocs[index++] = new RankDoc(r, randomFloat(), randomIntBetween(0, 5));
  127. }
  128. Arrays.sort(rankDocs);
  129. for (int i = 0; i < rankDocs.length; i++) {
  130. rankDocs[i].rank = i;
  131. }
  132. IndexSearcher searcher = new IndexSearcher(reader);
  133. for (int totalHitsThreshold = 0; totalHitsThreshold < reader.maxDoc(); totalHitsThreshold += randomIntBetween(1, 10)) {
  134. // Tests that the query matches only the {@link RankDoc} when the hit threshold is reached.
  135. RankDocsQuery q = new RankDocsQuery(
  136. reader,
  137. rankDocs,
  138. new Query[] { NumericDocValuesField.newSlowExactQuery("active", 1) },
  139. new String[1],
  140. false
  141. );
  142. var topDocsManager = new TopScoreDocCollectorManager(topSize, null, totalHitsThreshold);
  143. var col = searcher.search(q, topDocsManager);
  144. // depending on the doc-ids of the RankDocs (i.e. the actual docs to have score) we could visit them last,
  145. // so worst case is we could end up collecting up to 1 + max(topSize , totalHitsThreshold) + rankDocs.length documents
  146. // as we could have already filled the priority queue with non-optimal docs
  147. assertThat(
  148. col.totalHits.value,
  149. lessThanOrEqualTo((long) (1 + Math.max(topSize, totalHitsThreshold) + rankDocs.length))
  150. );
  151. assertEqualTopDocs(col.scoreDocs, rankDocs);
  152. }
  153. {
  154. // Return all docs (rank + tail)
  155. RankDocsQuery q = new RankDocsQuery(
  156. reader,
  157. rankDocs,
  158. new Query[] { NumericDocValuesField.newSlowExactQuery("active", 1) },
  159. new String[1],
  160. false
  161. );
  162. var topDocsManager = new TopScoreDocCollectorManager(topSize, null, Integer.MAX_VALUE);
  163. var col = searcher.search(q, topDocsManager);
  164. assertThat(col.totalHits.value, equalTo((long) reader.maxDoc()));
  165. assertEqualTopDocs(col.scoreDocs, rankDocs);
  166. }
  167. {
  168. // Only rank docs
  169. RankDocsQuery q = new RankDocsQuery(
  170. reader,
  171. rankDocs,
  172. new Query[] { NumericDocValuesField.newSlowExactQuery("active", 1) },
  173. new String[1],
  174. true
  175. );
  176. var topDocsManager = new TopScoreDocCollectorManager(topSize, null, Integer.MAX_VALUE);
  177. var col = searcher.search(q, topDocsManager);
  178. assertThat(col.totalHits.value, equalTo((long) topSize));
  179. assertEqualTopDocs(col.scoreDocs, rankDocs);
  180. }
  181. {
  182. // A single rank doc in the last segment
  183. RankDoc[] singleRankDoc = new RankDoc[1];
  184. singleRankDoc[0] = rankDocs[rankDocs.length - 1];
  185. RankDocsQuery q = new RankDocsQuery(
  186. reader,
  187. singleRankDoc,
  188. new Query[] { NumericDocValuesField.newSlowExactQuery("active", 1) },
  189. new String[1],
  190. false
  191. );
  192. var topDocsManager = new TopScoreDocCollectorManager(1, null, 0);
  193. var col = searcher.search(q, topDocsManager);
  194. assertThat(col.totalHits.value, lessThanOrEqualTo((long) (2 + rankDocs.length)));
  195. assertEqualTopDocs(col.scoreDocs, singleRankDoc);
  196. }
  197. }
  198. }
  199. }
  200. private static int[] randomSample(Random rand, int n, int k) {
  201. int[] reservoir = new int[k];
  202. for (int i = 0; i < k; i++) {
  203. reservoir[i] = i;
  204. }
  205. for (int i = k; i < n; i++) {
  206. int j = rand.nextInt(i + 1);
  207. if (j < k) {
  208. reservoir[j] = i;
  209. }
  210. }
  211. return reservoir;
  212. }
  213. private static void assertEqualTopDocs(ScoreDoc[] scoreDocs, RankDoc[] rankDocs) {
  214. for (int i = 0; i < scoreDocs.length; i++) {
  215. assertEquals(rankDocs[i].doc, scoreDocs[i].doc);
  216. assertEquals(rankDocs[i].score, scoreDocs[i].score, 0f);
  217. assertEquals(-1, scoreDocs[i].shardIndex);
  218. }
  219. }
  220. @Override
  221. public void testFromXContent() throws IOException {
  222. // no-op since RankDocsQueryBuilder is an internal only API
  223. }
  224. @Override
  225. public void testUnknownField() throws IOException {
  226. // no-op since RankDocsQueryBuilder is agnostic to unknown fields and an internal only API
  227. }
  228. @Override
  229. public void testValidOutput() throws IOException {
  230. // no-op since RankDocsQueryBuilder is an internal only API
  231. }
  232. public void shouldThrowForNegativeScores() throws IOException {
  233. try (Directory directory = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), directory)) {
  234. iw.addDocument(new Document());
  235. try (IndexReader reader = iw.getReader()) {
  236. SearchExecutionContext context = createSearchExecutionContext(newSearcher(reader));
  237. RankDocsQueryBuilder queryBuilder = new RankDocsQueryBuilder(new RankDoc[] { new RankDoc(0, -1.0f, 0) }, null, false);
  238. IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> queryBuilder.doToQuery(context));
  239. assertEquals("RankDoc scores must be positive values. Missing a normalization step?", ex.getMessage());
  240. }
  241. }
  242. }
  243. }