fts3matchinfo.test 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436
  1. # 2010 November 02
  2. #
  3. # The author disclaims copyright to this source code. In place of
  4. # a legal notice, here is a blessing:
  5. #
  6. # May you do good and not evil.
  7. # May you find forgiveness for yourself and forgive others.
  8. # May you share freely, never taking more than you give.
  9. #
  10. #***********************************************************************
  11. # This file implements regression tests for the FTS3 module. The focus
  12. # of this file is tables created with the "matchinfo=fts3" option.
  13. #
  14. set testdir [file dirname $argv0]
  15. source $testdir/tester.tcl
  16. # If SQLITE_ENABLE_FTS3 is not defined, omit this file.
  17. ifcapable !fts3 { finish_test ; return }
  18. set testprefix fts3matchinfo
  19. set sqlite_fts3_enable_parentheses 0
  20. proc mit {blob} {
  21. set scan(littleEndian) i*
  22. set scan(bigEndian) I*
  23. binary scan $blob $scan($::tcl_platform(byteOrder)) r
  24. return $r
  25. }
  26. db func mit mit
  27. do_execsql_test 1.0 {
  28. CREATE VIRTUAL TABLE t1 USING fts4(matchinfo=fts3);
  29. SELECT name FROM sqlite_master WHERE type = 'table';
  30. } {t1 t1_content t1_segments t1_segdir t1_stat}
  31. do_execsql_test 1.1 {
  32. INSERT INTO t1(content) VALUES('I wandered lonely as a cloud');
  33. INSERT INTO t1(content) VALUES('That floats on high o''er vales and hills,');
  34. INSERT INTO t1(content) VALUES('When all at once I saw a crowd,');
  35. INSERT INTO t1(content) VALUES('A host, of golden daffodils,');
  36. SELECT mit(matchinfo(t1)) FROM t1 WHERE t1 MATCH 'I';
  37. } {{1 1 1 2 2} {1 1 1 2 2}}
  38. # Now create an FTS4 table that does not specify matchinfo=fts3.
  39. #
  40. do_execsql_test 1.2 {
  41. CREATE VIRTUAL TABLE t2 USING fts4;
  42. INSERT INTO t2 SELECT * FROM t1;
  43. SELECT mit(matchinfo(t2)) FROM t2 WHERE t2 MATCH 'I';
  44. } {{1 1 1 2 2} {1 1 1 2 2}}
  45. # Test some syntax-error handling.
  46. #
  47. do_catchsql_test 2.0 {
  48. CREATE VIRTUAL TABLE x1 USING fts4(matchinfo=fs3);
  49. } {1 {unrecognized matchinfo: fs3}}
  50. do_catchsql_test 2.1 {
  51. CREATE VIRTUAL TABLE x2 USING fts4(mtchinfo=fts3);
  52. } {1 {unrecognized parameter: mtchinfo=fts3}}
  53. do_catchsql_test 2.2 {
  54. CREATE VIRTUAL TABLE x2 USING fts4(matchinfo=fts5);
  55. } {1 {unrecognized matchinfo: fts5}}
  56. # Check that with fts3, the "=" character is permitted in column definitions.
  57. #
  58. do_execsql_test 3.1 {
  59. CREATE VIRTUAL TABLE t3 USING fts3(mtchinfo=fts3);
  60. INSERT INTO t3(mtchinfo) VALUES('Beside the lake, beneath the trees');
  61. SELECT mtchinfo FROM t3;
  62. } {{Beside the lake, beneath the trees}}
  63. do_execsql_test 3.2 {
  64. CREATE VIRTUAL TABLE xx USING FTS4;
  65. }
  66. do_execsql_test 3.3 {
  67. SELECT * FROM xx WHERE xx MATCH 'abc';
  68. }
  69. do_execsql_test 3.4 {
  70. SELECT * FROM xx WHERE xx MATCH 'a b c';
  71. }
  72. #--------------------------------------------------------------------------
  73. # Proc [do_matchinfo_test] is used to test the FTSX matchinfo() function.
  74. #
  75. # The first argument - $tn - is a test identifier. This may be either a
  76. # full identifier (i.e. "fts3matchinfo-1.1") or, if global var $testprefix
  77. # is set, just the numeric component (i.e. "1.1").
  78. #
  79. # The second argument is the name of an FTSX table. The third is the
  80. # full text of a WHERE/MATCH expression to query the table for
  81. # (i.e. "t1 MATCH 'abc'"). The final argument - $results - should be a
  82. # key-value list (serialized array) with matchinfo() format specifiers
  83. # as keys, and the results of executing the statement:
  84. #
  85. # SELECT matchinfo($tbl, '$key') FROM $tbl WHERE $expr
  86. #
  87. # For example:
  88. #
  89. # CREATE VIRTUAL TABLE t1 USING fts4;
  90. # INSERT INTO t1 VALUES('abc');
  91. # INSERT INTO t1 VALUES('def');
  92. # INSERT INTO t1 VALUES('abc abc');
  93. #
  94. # do_matchinfo_test 1.1 t1 "t1 MATCH 'abc'" {
  95. # n {3 3}
  96. # p {1 1}
  97. # c {1 1}
  98. # x {{1 3 2} {2 3 2}}
  99. # }
  100. #
  101. # If the $results list contains keys mapped to "-" instead of a matchinfo()
  102. # result, then this command computes the expected results based on other
  103. # mappings to test the matchinfo() function. For example, the command above
  104. # could be changed to:
  105. #
  106. # do_matchinfo_test 1.1 t1 "t1 MATCH 'abc'" {
  107. # n {3 3} p {1 1} c {1 1} x {{1 3 2} {2 3 2}}
  108. # pcx -
  109. # }
  110. #
  111. # And this command would compute the expected results for matchinfo(t1, 'pcx')
  112. # based on the results of matchinfo(t1, 'p'), matchinfo(t1, 'c') and
  113. # matchinfo(t1, 'x') in order to test 'pcx'.
  114. #
  115. proc do_matchinfo_test {tn tbl expr results} {
  116. foreach {fmt res} $results {
  117. if {$res == "-"} continue
  118. set resarray($fmt) $res
  119. }
  120. set nRow 0
  121. foreach {fmt res} [array get resarray] {
  122. if {[llength $res]>$nRow} { set nRow [llength $res] }
  123. }
  124. # Construct expected results for any formats for which the caller
  125. # supplied result is "-".
  126. #
  127. foreach {fmt res} $results {
  128. if {$res == "-"} {
  129. set res [list]
  130. for {set iRow 0} {$iRow<$nRow} {incr iRow} {
  131. set rowres [list]
  132. foreach c [split $fmt ""] {
  133. set rowres [concat $rowres [lindex $resarray($c) $iRow]]
  134. }
  135. lappend res $rowres
  136. }
  137. set resarray($fmt) $res
  138. }
  139. }
  140. # Test each matchinfo() request individually.
  141. #
  142. foreach {fmt res} [array get resarray] {
  143. set sql "SELECT mit(matchinfo($tbl, '$fmt')) FROM $tbl WHERE $expr"
  144. do_execsql_test $tn.$fmt $sql [normalize2 $res]
  145. }
  146. # Test them all executed together (multiple invocations of matchinfo()).
  147. #
  148. set exprlist [list]
  149. foreach {format res} [array get resarray] {
  150. lappend exprlist "mit(matchinfo($tbl, '$format'))"
  151. }
  152. set allres [list]
  153. for {set iRow 0} {$iRow<$nRow} {incr iRow} {
  154. foreach {format res} [array get resarray] {
  155. lappend allres [lindex $res $iRow]
  156. }
  157. }
  158. set sql "SELECT [join $exprlist ,] FROM $tbl WHERE $expr"
  159. do_execsql_test $tn.multi $sql [normalize2 $allres]
  160. }
  161. proc normalize2 {list_of_lists} {
  162. set res [list]
  163. foreach elem $list_of_lists {
  164. lappend res [list {*}$elem]
  165. }
  166. return $res
  167. }
  168. do_execsql_test 4.1.0 {
  169. CREATE VIRTUAL TABLE t4 USING fts4(x, y);
  170. INSERT INTO t4 VALUES('a b c d e', 'f g h i j');
  171. INSERT INTO t4 VALUES('f g h i j', 'a b c d e');
  172. }
  173. do_matchinfo_test 4.1.1 t4 {t4 MATCH 'a b c'} {
  174. p {3 3}
  175. c {2 2}
  176. x {
  177. {1 1 1 0 1 1 1 1 1 0 1 1 1 1 1 0 1 1}
  178. {0 1 1 1 1 1 0 1 1 1 1 1 0 1 1 1 1 1}
  179. }
  180. n {2 2}
  181. l {{5 5} {5 5}}
  182. a {{5 5} {5 5}}
  183. s {{3 0} {0 3}}
  184. xxxxxxxxxxxxxxxxxx - pcx - xpc - ccc - pppxpcpcx - laxnpc -
  185. xpxsscplax -
  186. }
  187. do_matchinfo_test 4.1.2 t4 {t4 MATCH '"g h i"'} {
  188. p {1 1}
  189. c {2 2}
  190. x {
  191. {0 1 1 1 1 1}
  192. {1 1 1 0 1 1}
  193. }
  194. n {2 2}
  195. l {{5 5} {5 5}}
  196. a {{5 5} {5 5}}
  197. s {{0 1} {1 0}}
  198. xxxxxxxxxxxxxxxxxx - pcx - xpc - ccc - pppxpcpcx - laxnpc -
  199. sxsxs -
  200. }
  201. do_matchinfo_test 4.1.3 t4 {t4 MATCH 'a b'} { s {{2 0} {0 2}} }
  202. do_matchinfo_test 4.1.4 t4 {t4 MATCH '"a b" c'} { s {{2 0} {0 2}} }
  203. do_matchinfo_test 4.1.5 t4 {t4 MATCH 'a "b c"'} { s {{2 0} {0 2}} }
  204. do_matchinfo_test 4.1.6 t4 {t4 MATCH 'd d'} { s {{1 0} {0 1}} }
  205. do_matchinfo_test 4.1.7 t4 {t4 MATCH 'f OR abcd'} {
  206. x {
  207. {0 1 1 1 1 1 0 0 0 0 0 0}
  208. {1 1 1 0 1 1 0 0 0 0 0 0}
  209. }
  210. }
  211. do_matchinfo_test 4.1.8 t4 {t4 MATCH 'f -abcd'} {
  212. x {
  213. {0 1 1 1 1 1}
  214. {1 1 1 0 1 1}
  215. }
  216. }
  217. do_execsql_test 4.2.0 {
  218. CREATE VIRTUAL TABLE t5 USING fts4;
  219. INSERT INTO t5 VALUES('a a a a a');
  220. INSERT INTO t5 VALUES('a b a b a');
  221. INSERT INTO t5 VALUES('c b c b c');
  222. INSERT INTO t5 VALUES('x x x x x');
  223. }
  224. do_matchinfo_test 4.2.1 t5 {t5 MATCH 'a a'} {
  225. x {{5 8 2 5 8 2} {3 8 2 3 8 2}}
  226. s {2 1}
  227. }
  228. do_matchinfo_test 4.2.2 t5 {t5 MATCH 'a b'} { s {2} }
  229. do_matchinfo_test 4.2.3 t5 {t5 MATCH 'a b a'} { s {3} }
  230. do_matchinfo_test 4.2.4 t5 {t5 MATCH 'a a a'} { s {3 1} }
  231. do_matchinfo_test 4.2.5 t5 {t5 MATCH '"a b" "a b"'} { s {2} }
  232. do_matchinfo_test 4.2.6 t5 {t5 MATCH 'a OR b'} { s {1 2 1} }
  233. do_execsql_test 4.3.0 "INSERT INTO t5 VALUES('x y [string repeat {b } 50000]')";
  234. # It used to be that the second 'a' token would be deferred. That doesn't
  235. # work any longer.
  236. if 0 {
  237. do_matchinfo_test 4.3.1 t5 {t5 MATCH 'a a'} {
  238. x {{5 8 2 5 5 5} {3 8 2 3 5 5}}
  239. s {2 1}
  240. }
  241. }
  242. do_matchinfo_test 4.3.2 t5 {t5 MATCH 'a b'} { s {2} }
  243. do_matchinfo_test 4.3.3 t5 {t5 MATCH 'a b a'} { s {3} }
  244. do_matchinfo_test 4.3.4 t5 {t5 MATCH 'a a a'} { s {3 1} }
  245. do_matchinfo_test 4.3.5 t5 {t5 MATCH '"a b" "a b"'} { s {2} }
  246. do_matchinfo_test 4.3.6 t5 {t5 MATCH 'a OR b'} { s {1 2 1 1} }
  247. do_execsql_test 4.4.0.1 { INSERT INTO t5(t5) VALUES('optimize') }
  248. ifcapable fts4_deferred {
  249. do_execsql_test 4.4.0.2 {
  250. UPDATE t5_segments
  251. SET block = zeroblob(length(block))
  252. WHERE length(block)>10000;
  253. }
  254. }
  255. do_matchinfo_test 4.4.2 t5 {t5 MATCH 'a b'} { s {2} }
  256. do_matchinfo_test 4.4.1 t5 {t5 MATCH 'a a'} { s {2 1} }
  257. do_matchinfo_test 4.4.2 t5 {t5 MATCH 'a b'} { s {2} }
  258. do_matchinfo_test 4.4.3 t5 {t5 MATCH 'a b a'} { s {3} }
  259. do_matchinfo_test 4.4.4 t5 {t5 MATCH 'a a a'} { s {3 1} }
  260. do_matchinfo_test 4.4.5 t5 {t5 MATCH '"a b" "a b"'} { s {2} }
  261. do_execsql_test 4.5.0 {
  262. CREATE VIRTUAL TABLE t6 USING fts4(a, b, c);
  263. INSERT INTO t6 VALUES('a', 'b', 'c');
  264. }
  265. do_matchinfo_test 4.5.1 t6 {t6 MATCH 'a b c'} { s {{1 1 1}} }
  266. #-------------------------------------------------------------------------
  267. # Check the following restrictions:
  268. #
  269. # + Matchinfo flags 'a', 'l' and 'n' can only be used with fts4, not fts3.
  270. # + Matchinfo flag 'l' cannot be used with matchinfo=fts3.
  271. #
  272. do_execsql_test 5.1 {
  273. CREATE VIRTUAL TABLE t7 USING fts3(a, b);
  274. INSERT INTO t7 VALUES('u v w', 'x y z');
  275. CREATE VIRTUAL TABLE t8 USING fts4(a, b, matchinfo=fts3);
  276. INSERT INTO t8 VALUES('u v w', 'x y z');
  277. }
  278. do_catchsql_test 5.2.1 {
  279. SELECT matchinfo(t7, 'a') FROM t7 WHERE t7 MATCH 'x y'
  280. } {1 {unrecognized matchinfo request: a}}
  281. do_catchsql_test 5.2.2 {
  282. SELECT matchinfo(t7, 'l') FROM t7 WHERE t7 MATCH 'x y'
  283. } {1 {unrecognized matchinfo request: l}}
  284. do_catchsql_test 5.2.3 {
  285. SELECT matchinfo(t7, 'n') FROM t7 WHERE t7 MATCH 'x y'
  286. } {1 {unrecognized matchinfo request: n}}
  287. do_catchsql_test 5.3.1 {
  288. SELECT matchinfo(t8, 'l') FROM t8 WHERE t8 MATCH 'x y'
  289. } {1 {unrecognized matchinfo request: l}}
  290. #-------------------------------------------------------------------------
  291. # Test that the offsets() function handles corruption in the %_content
  292. # table correctly.
  293. #
  294. do_execsql_test 6.1 {
  295. CREATE VIRTUAL TABLE t9 USING fts4;
  296. INSERT INTO t9 VALUES(
  297. 'this record is used to try to dectect corruption'
  298. );
  299. SELECT offsets(t9) FROM t9 WHERE t9 MATCH 'to';
  300. } {{0 0 20 2 0 0 27 2}}
  301. do_catchsql_test 6.2 {
  302. UPDATE t9_content SET c0content = 'this record is used to';
  303. SELECT offsets(t9) FROM t9 WHERE t9 MATCH 'to';
  304. } {1 {database disk image is malformed}}
  305. #-------------------------------------------------------------------------
  306. # Test the outcome of matchinfo() when used within a query that does not
  307. # use the full-text index (i.e. lookup by rowid or full-table scan).
  308. #
  309. do_execsql_test 7.1 {
  310. CREATE VIRTUAL TABLE t10 USING fts4;
  311. INSERT INTO t10 VALUES('first record');
  312. INSERT INTO t10 VALUES('second record');
  313. }
  314. do_execsql_test 7.2 {
  315. SELECT typeof(matchinfo(t10)), length(matchinfo(t10)) FROM t10;
  316. } {blob 0 blob 0}
  317. do_execsql_test 7.3 {
  318. SELECT typeof(matchinfo(t10)), length(matchinfo(t10)) FROM t10 WHERE docid=1;
  319. } {blob 0}
  320. do_execsql_test 7.4 {
  321. SELECT typeof(matchinfo(t10)), length(matchinfo(t10))
  322. FROM t10 WHERE t10 MATCH 'record'
  323. } {blob 20 blob 20}
  324. #-------------------------------------------------------------------------
  325. # Test a special case - matchinfo('nxa') with many zero length documents.
  326. # Special because "x" internally uses a statement used by both "n" and "a".
  327. # This was causing a problem at one point in the obscure case where the
  328. # total number of bytes of data stored in an fts3 table was greater than
  329. # the number of rows. i.e. when the following query returns true:
  330. #
  331. # SELECT sum(length(content)) < count(*) FROM fts4table;
  332. #
  333. do_execsql_test 8.1 {
  334. CREATE VIRTUAL TABLE t11 USING fts4;
  335. INSERT INTO t11(t11) VALUES('nodesize=24');
  336. INSERT INTO t11 VALUES('quitealongstringoftext');
  337. INSERT INTO t11 VALUES('anotherquitealongstringoftext');
  338. INSERT INTO t11 VALUES('athirdlongstringoftext');
  339. INSERT INTO t11 VALUES('andonemoreforgoodluck');
  340. }
  341. do_test 8.2 {
  342. for {set i 0} {$i < 200} {incr i} {
  343. execsql { INSERT INTO t11 VALUES('') }
  344. }
  345. execsql { INSERT INTO t11(t11) VALUES('optimize') }
  346. } {}
  347. do_execsql_test 8.3 {
  348. SELECT mit(matchinfo(t11, 'nxa')) FROM t11 WHERE t11 MATCH 'a*'
  349. } {{204 1 3 3 0} {204 1 3 3 0} {204 1 3 3 0}}
  350. # Corruption related tests.
  351. do_execsql_test 8.4.1.1 { UPDATE t11_stat SET value = X'0000'; }
  352. do_catchsql_test 8.5.1.2 {
  353. SELECT mit(matchinfo(t11, 'nxa')) FROM t11 WHERE t11 MATCH 'a*'
  354. } {1 {database disk image is malformed}}
  355. do_execsql_test 8.4.2.1 { UPDATE t11_stat SET value = X'00'; }
  356. do_catchsql_test 8.5.2.2 {
  357. SELECT mit(matchinfo(t11, 'nxa')) FROM t11 WHERE t11 MATCH 'a*'
  358. } {1 {database disk image is malformed}}
  359. do_execsql_test 8.4.3.1 { UPDATE t11_stat SET value = NULL; }
  360. do_catchsql_test 8.5.3.2 {
  361. SELECT mit(matchinfo(t11, 'nxa')) FROM t11 WHERE t11 MATCH 'a*'
  362. } {1 {database disk image is malformed}}
  363. #-------------------------------------------------------------------------
  364. do_execsql_test 8.1 {
  365. CREATE VIRTUAL TABLE t12 USING fts4;
  366. INSERT INTO t12 VALUES('a b c d');
  367. SELECT mit(matchinfo(t12, 'x')) FROM t12 WHERE t12 MATCH 'a NEAR/1 d OR a';
  368. } {{0 0 0 0 0 0 1 1 1}}
  369. do_execsql_test 8.2 {
  370. INSERT INTO t12 VALUES('a d c d');
  371. SELECT mit(matchinfo(t12, 'x')) FROM t12 WHERE t12 MATCH 'a NEAR/1 d OR a';
  372. } {
  373. {0 1 1 0 1 1 1 2 2} {1 1 1 1 1 1 1 2 2}
  374. }
  375. do_execsql_test 8.3 {
  376. INSERT INTO t12 VALUES('a d d a');
  377. SELECT mit(matchinfo(t12, 'x')) FROM t12 WHERE t12 MATCH 'a NEAR/1 d OR a';
  378. } {
  379. {0 3 2 0 3 2 1 4 3} {1 3 2 1 3 2 1 4 3} {2 3 2 2 3 2 2 4 3}
  380. }
  381. do_execsql_test 9.1 {
  382. CREATE VIRTUAL TABLE ft2 USING fts4;
  383. INSERT INTO ft2 VALUES('a b c d e');
  384. INSERT INTO ft2 VALUES('f a b c d');
  385. SELECT snippet(ft2, '[', ']', '', -1, 1) FROM ft2 WHERE ft2 MATCH 'c';
  386. } {{[c]} {[c]}}
  387. finish_test