fts2n.test 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196
  1. # 2007 April 26
  2. #
  3. # The author disclaims copyright to this source code.
  4. #
  5. #*************************************************************************
  6. # This file implements tests for prefix-searching in the fts2
  7. # component of the SQLite library.
  8. #
  9. # $Id: fts2n.test,v 1.2 2007/12/13 21:54:11 drh Exp $
  10. #
  11. set testdir [file dirname $argv0]
  12. source $testdir/tester.tcl
  13. # If SQLITE_ENABLE_FTS2 is defined, omit this file.
  14. ifcapable !fts2 {
  15. finish_test
  16. return
  17. }
  18. # A large string to prime the pump with.
  19. set text {
  20. Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Maecenas
  21. iaculis mollis ipsum. Praesent rhoncus placerat justo. Duis non quam
  22. sed turpis posuere placerat. Curabitur et lorem in lorem porttitor
  23. aliquet. Pellentesque bibendum tincidunt diam. Vestibulum blandit
  24. ante nec elit. In sapien diam, facilisis eget, dictum sed, viverra
  25. at, felis. Vestibulum magna. Sed magna dolor, vestibulum rhoncus,
  26. ornare vel, vulputate sit amet, felis. Integer malesuada, tellus at
  27. luctus gravida, diam nunc porta nibh, nec imperdiet massa metus eu
  28. lectus. Aliquam nisi. Nunc fringilla nulla at lectus. Suspendisse
  29. potenti. Cum sociis natoque penatibus et magnis dis parturient
  30. montes, nascetur ridiculus mus. Pellentesque odio nulla, feugiat eu,
  31. suscipit nec, consequat quis, risus.
  32. }
  33. db eval {
  34. CREATE VIRTUAL TABLE t1 USING fts2(c);
  35. INSERT INTO t1(rowid, c) VALUES(1, $text);
  36. INSERT INTO t1(rowid, c) VALUES(2, 'Another lovely row');
  37. }
  38. # Exact match
  39. do_test fts2n-1.1 {
  40. execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lorem'"
  41. } {1}
  42. # And a prefix
  43. do_test fts2n-1.2 {
  44. execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lore*'"
  45. } {1}
  46. # Prefix includes exact match
  47. do_test fts2n-1.3 {
  48. execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lorem*'"
  49. } {1}
  50. # Make certain everything isn't considered a prefix!
  51. do_test fts2n-1.4 {
  52. execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lore'"
  53. } {}
  54. # Prefix across multiple rows.
  55. do_test fts2n-1.5 {
  56. execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lo*'"
  57. } {1 2}
  58. # Likewise, with multiple hits in one document.
  59. do_test fts2n-1.6 {
  60. execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'l*'"
  61. } {1 2}
  62. # Prefix which should only hit one document.
  63. do_test fts2n-1.7 {
  64. execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lov*'"
  65. } {2}
  66. # * not at end is dropped.
  67. do_test fts2n-1.8 {
  68. execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lo *'"
  69. } {}
  70. # Stand-alone * is dropped.
  71. do_test fts2n-1.9 {
  72. execsql "SELECT rowid FROM t1 WHERE t1 MATCH '*'"
  73. } {}
  74. # Phrase-query prefix.
  75. do_test fts2n-1.10 {
  76. execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"lovely r*\"'"
  77. } {2}
  78. do_test fts2n-1.11 {
  79. execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"lovely r\"'"
  80. } {}
  81. # Phrase query with multiple prefix matches.
  82. do_test fts2n-1.12 {
  83. execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"a* l*\"'"
  84. } {1 2}
  85. # Phrase query with multiple prefix matches.
  86. do_test fts2n-1.13 {
  87. execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"a* l* row\"'"
  88. } {2}
  89. # Test across updates (and, by implication, deletes).
  90. # Version of text without "lorem".
  91. regsub -all {[Ll]orem} $text '' ntext
  92. db eval {
  93. CREATE VIRTUAL TABLE t2 USING fts2(c);
  94. INSERT INTO t2(rowid, c) VALUES(1, $text);
  95. INSERT INTO t2(rowid, c) VALUES(2, 'Another lovely row');
  96. UPDATE t2 SET c = $ntext WHERE rowid = 1;
  97. }
  98. # Can't see lorem as an exact match.
  99. do_test fts2n-2.1 {
  100. execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lorem'"
  101. } {}
  102. # Can't see a prefix of lorem, either.
  103. do_test fts2n-2.2 {
  104. execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lore*'"
  105. } {}
  106. # Can see lovely in the other document.
  107. do_test fts2n-2.3 {
  108. execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lo*'"
  109. } {2}
  110. # Can still see other hits.
  111. do_test fts2n-2.4 {
  112. execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'l*'"
  113. } {1 2}
  114. # Prefix which should only hit one document.
  115. do_test fts2n-2.5 {
  116. execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lov*'"
  117. } {2}
  118. # Test with a segment which will have multiple levels in the tree.
  119. # Build a big document with lots of unique terms.
  120. set bigtext $text
  121. foreach c {a b c d e} {
  122. regsub -all {[A-Za-z]+} $bigtext "&$c" t
  123. append bigtext $t
  124. }
  125. # Populate a table with many copies of the big document, so that we
  126. # can test the number of hits found. Populate $ret with the expected
  127. # hit counts for each row. offsets() returns 4 elements for every
  128. # hit. We'll have 6 hits for row 1, 1 for row 2, and 6*(2^5)==192 for
  129. # $bigtext.
  130. set ret {6 1}
  131. db eval {
  132. BEGIN;
  133. CREATE VIRTUAL TABLE t3 USING fts2(c);
  134. INSERT INTO t3(rowid, c) VALUES(1, $text);
  135. INSERT INTO t3(rowid, c) VALUES(2, 'Another lovely row');
  136. }
  137. for {set i 0} {$i<100} {incr i} {
  138. db eval {INSERT INTO t3(rowid, c) VALUES(3+$i, $bigtext)}
  139. lappend ret 192
  140. }
  141. db eval {COMMIT;}
  142. # Test that we get the expected number of hits.
  143. do_test fts2n-3.1 {
  144. set t {}
  145. db eval {SELECT offsets(t3) as o FROM t3 WHERE t3 MATCH 'l*'} {
  146. set l [llength $o]
  147. lappend t [expr {$l/4}]
  148. }
  149. set t
  150. } $ret
  151. # TODO(shess) It would be useful to test a couple edge cases, but I
  152. # don't know if we have the precision to manage it from here at this
  153. # time. Prefix hits can cross leaves, which the code above _should_
  154. # hit by virtue of size. There are two variations on this. If the
  155. # tree is 2 levels high, the code will find the leaf-node extent
  156. # directly, but if its higher, the code will have to follow two
  157. # separate interior branches down the tree. Both should be tested.
  158. finish_test