enc2.test 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555
  1. # 2002 May 24
  2. #
  3. # The author disclaims copyright to this source code. In place of
  4. # a legal notice, here is a blessing:
  5. #
  6. # May you do good and not evil.
  7. # May you find forgiveness for yourself and forgive others.
  8. # May you share freely, never taking more than you give.
  9. #
  10. #***********************************************************************
  11. # This file implements regression tests for SQLite library. The focus of
  12. # this file is testing the SQLite routines used for converting between the
  13. # various suported unicode encodings (UTF-8, UTF-16, UTF-16le and
  14. # UTF-16be).
  15. #
  16. # $Id: enc2.test,v 1.29 2007/10/09 08:29:32 danielk1977 Exp $
  17. set testdir [file dirname $argv0]
  18. source $testdir/tester.tcl
  19. # If UTF16 support is disabled, ignore the tests in this file
  20. #
  21. ifcapable {!utf16} {
  22. finish_test
  23. return
  24. }
  25. # The rough organisation of tests in this file is:
  26. #
  27. # enc2.1.*: Simple tests with a UTF-8 db.
  28. # enc2.2.*: Simple tests with a UTF-16LE db.
  29. # enc2.3.*: Simple tests with a UTF-16BE db.
  30. # enc2.4.*: Test that attached databases must have the same text encoding
  31. # as the main database.
  32. # enc2.5.*: Test the behavior of the library when a collation sequence is
  33. # not available for the most desirable text encoding.
  34. # enc2.6.*: Similar test for user functions.
  35. # enc2.7.*: Test that the VerifyCookie opcode protects against assuming the
  36. # wrong text encoding for the database.
  37. # enc2.8.*: Test sqlite3_complete16()
  38. #
  39. db close
  40. # Return the UTF-8 representation of the supplied UTF-16 string $str.
  41. proc utf8 {str} {
  42. # If $str ends in two 0x00 0x00 bytes, knock these off before
  43. # converting to UTF-8 using TCL.
  44. binary scan $str \c* vals
  45. if {[lindex $vals end]==0 && [lindex $vals end-1]==0} {
  46. set str [binary format \c* [lrange $vals 0 end-2]]
  47. }
  48. set r [encoding convertfrom unicode $str]
  49. return $r
  50. }
  51. #
  52. # This proc contains all the tests in this file. It is run
  53. # three times. Each time the file 'test.db' contains a database
  54. # with the following contents:
  55. set dbcontents {
  56. CREATE TABLE t1(a PRIMARY KEY, b, c);
  57. INSERT INTO t1 VALUES('one', 'I', 1);
  58. }
  59. # This proc tests that we can open and manipulate the test.db
  60. # database, and that it is possible to retreive values in
  61. # various text encodings.
  62. #
  63. proc run_test_script {t enc} {
  64. # Open the database and pull out a (the) row.
  65. do_test $t.1 {
  66. sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
  67. execsql {SELECT * FROM t1}
  68. } {one I 1}
  69. # Insert some data
  70. do_test $t.2 {
  71. execsql {INSERT INTO t1 VALUES('two', 'II', 2);}
  72. execsql {SELECT * FROM t1}
  73. } {one I 1 two II 2}
  74. # Insert some data
  75. do_test $t.3 {
  76. execsql {
  77. INSERT INTO t1 VALUES('three','III',3);
  78. INSERT INTO t1 VALUES('four','IV',4);
  79. INSERT INTO t1 VALUES('five','V',5);
  80. }
  81. execsql {SELECT * FROM t1}
  82. } {one I 1 two II 2 three III 3 four IV 4 five V 5}
  83. # Use the index
  84. do_test $t.4 {
  85. execsql {
  86. SELECT * FROM t1 WHERE a = 'one';
  87. }
  88. } {one I 1}
  89. do_test $t.5 {
  90. execsql {
  91. SELECT * FROM t1 WHERE a = 'four';
  92. }
  93. } {four IV 4}
  94. ifcapable subquery {
  95. do_test $t.6 {
  96. execsql {
  97. SELECT * FROM t1 WHERE a IN ('one', 'two');
  98. }
  99. } {one I 1 two II 2}
  100. }
  101. # Now check that we can retrieve data in both UTF-16 and UTF-8
  102. do_test $t.7 {
  103. set STMT [sqlite3_prepare $DB "SELECT a FROM t1 WHERE c>3;" -1 TAIL]
  104. sqlite3_step $STMT
  105. sqlite3_column_text $STMT 0
  106. } {four}
  107. do_test $t.8 {
  108. sqlite3_step $STMT
  109. utf8 [sqlite3_column_text16 $STMT 0]
  110. } {five}
  111. do_test $t.9 {
  112. sqlite3_finalize $STMT
  113. } SQLITE_OK
  114. ifcapable vacuum {
  115. execsql VACUUM
  116. }
  117. do_test $t.10 {
  118. db eval {PRAGMA encoding}
  119. } $enc
  120. }
  121. # The three unicode encodings understood by SQLite.
  122. set encodings [list UTF-8 UTF-16le UTF-16be]
  123. set sqlite_os_trace 0
  124. set i 1
  125. foreach enc $encodings {
  126. forcedelete test.db
  127. sqlite3 db test.db
  128. db eval "PRAGMA encoding = \"$enc\""
  129. execsql $dbcontents
  130. do_test enc2-$i.0.1 {
  131. db eval {PRAGMA encoding}
  132. } $enc
  133. do_test enc2-$i.0.2 {
  134. db eval {PRAGMA encoding=UTF8}
  135. db eval {PRAGMA encoding}
  136. } $enc
  137. do_test enc2-$i.0.3 {
  138. db eval {PRAGMA encoding=UTF16le}
  139. db eval {PRAGMA encoding}
  140. } $enc
  141. do_test enc2-$i.0.4 {
  142. db eval {PRAGMA encoding=UTF16be}
  143. db eval {PRAGMA encoding}
  144. } $enc
  145. db close
  146. run_test_script enc2-$i $enc
  147. db close
  148. incr i
  149. }
  150. # Test that it is an error to try to attach a database with a different
  151. # encoding to the main database.
  152. ifcapable attach {
  153. do_test enc2-4.1 {
  154. forcedelete test.db
  155. sqlite3 db test.db
  156. db eval "PRAGMA encoding = 'UTF-8'"
  157. db eval "CREATE TABLE abc(a, b, c);"
  158. } {}
  159. do_test enc2-4.2 {
  160. forcedelete test2.db
  161. sqlite3 db2 test2.db
  162. db2 eval "PRAGMA encoding = 'UTF-16'"
  163. db2 eval "CREATE TABLE abc(a, b, c);"
  164. } {}
  165. do_test enc2-4.3 {
  166. catchsql {
  167. ATTACH 'test2.db' as aux;
  168. }
  169. } {1 {attached databases must use the same text encoding as main database}}
  170. db2 close
  171. db close
  172. }
  173. # The following tests - enc2-5.* - test that SQLite selects the correct
  174. # collation sequence when more than one is available.
  175. set ::values [list one two three four five]
  176. set ::test_collate_enc INVALID
  177. proc test_collate {enc lhs rhs} {
  178. set ::test_collate_enc $enc
  179. set l [lsearch -exact $::values $lhs]
  180. set r [lsearch -exact $::values $rhs]
  181. set res [expr $l - $r]
  182. # puts "enc=$enc lhs=$lhs/$l rhs=$rhs/$r res=$res"
  183. return $res
  184. }
  185. forcedelete test.db
  186. sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
  187. do_test enc2-5.0 {
  188. execsql {
  189. CREATE TABLE t5(a);
  190. INSERT INTO t5 VALUES('one');
  191. INSERT INTO t5 VALUES('two');
  192. INSERT INTO t5 VALUES('five');
  193. INSERT INTO t5 VALUES('three');
  194. INSERT INTO t5 VALUES('four');
  195. }
  196. } {}
  197. do_test enc2-5.1 {
  198. add_test_collate $DB 1 1 1
  199. set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate;}]
  200. lappend res $::test_collate_enc
  201. } {one two three four five UTF-8}
  202. do_test enc2-5.2 {
  203. add_test_collate $DB 0 1 0
  204. set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
  205. lappend res $::test_collate_enc
  206. } {one two three four five UTF-16LE}
  207. do_test enc2-5.3 {
  208. add_test_collate $DB 0 0 1
  209. set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
  210. lappend res $::test_collate_enc
  211. } {one two three four five UTF-16BE}
  212. db close
  213. forcedelete test.db
  214. sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
  215. execsql {pragma encoding = 'UTF-16LE'}
  216. do_test enc2-5.4 {
  217. execsql {
  218. CREATE TABLE t5(a);
  219. INSERT INTO t5 VALUES('one');
  220. INSERT INTO t5 VALUES('two');
  221. INSERT INTO t5 VALUES('five');
  222. INSERT INTO t5 VALUES('three');
  223. INSERT INTO t5 VALUES('four');
  224. }
  225. } {}
  226. do_test enc2-5.5 {
  227. add_test_collate $DB 1 1 1
  228. set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
  229. lappend res $::test_collate_enc
  230. } {one two three four five UTF-16LE}
  231. do_test enc2-5.6 {
  232. add_test_collate $DB 1 0 1
  233. set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
  234. lappend res $::test_collate_enc
  235. } {one two three four five UTF-16BE}
  236. do_test enc2-5.7 {
  237. add_test_collate $DB 1 0 0
  238. set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
  239. lappend res $::test_collate_enc
  240. } {one two three four five UTF-8}
  241. db close
  242. forcedelete test.db
  243. sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
  244. execsql {pragma encoding = 'UTF-16BE'}
  245. do_test enc2-5.8 {
  246. execsql {
  247. CREATE TABLE t5(a);
  248. INSERT INTO t5 VALUES('one');
  249. INSERT INTO t5 VALUES('two');
  250. INSERT INTO t5 VALUES('five');
  251. INSERT INTO t5 VALUES('three');
  252. INSERT INTO t5 VALUES('four');
  253. }
  254. } {}
  255. do_test enc2-5.9 {
  256. add_test_collate $DB 1 1 1
  257. set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
  258. lappend res $::test_collate_enc
  259. } {one two three four five UTF-16BE}
  260. do_test enc2-5.10 {
  261. add_test_collate $DB 1 1 0
  262. set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
  263. lappend res $::test_collate_enc
  264. } {one two three four five UTF-16LE}
  265. do_test enc2-5.11 {
  266. add_test_collate $DB 1 0 0
  267. set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
  268. lappend res $::test_collate_enc
  269. } {one two three four five UTF-8}
  270. # Also test that a UTF-16 collation factory works.
  271. do_test enc2-5-12 {
  272. add_test_collate $DB 0 0 0
  273. catchsql {
  274. SELECT * FROM t5 ORDER BY 1 COLLATE test_collate
  275. }
  276. } {1 {no such collation sequence: test_collate}}
  277. do_test enc2-5.13 {
  278. add_test_collate_needed $DB
  279. set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate; }]
  280. lappend res $::test_collate_enc
  281. } {one two three four five UTF-16BE}
  282. do_test enc2-5.14 {
  283. set ::sqlite_last_needed_collation
  284. } test_collate
  285. db close
  286. forcedelete test.db
  287. do_test enc2-5.15 {
  288. sqlite3 db test.db; set ::DB [sqlite3_connection_pointer db]
  289. add_test_collate_needed $::DB
  290. set ::sqlite_last_needed_collation
  291. } {}
  292. do_test enc2-5.16 {
  293. execsql {CREATE TABLE t1(a varchar collate test_collate);}
  294. } {}
  295. do_test enc2-5.17 {
  296. set ::sqlite_last_needed_collation
  297. } {test_collate}
  298. # The following tests - enc2-6.* - test that SQLite selects the correct
  299. # user function when more than one is available.
  300. proc test_function {enc arg} {
  301. return "$enc $arg"
  302. }
  303. db close
  304. forcedelete test.db
  305. sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
  306. execsql {pragma encoding = 'UTF-8'}
  307. do_test enc2-6.0 {
  308. execsql {
  309. CREATE TABLE t5(a);
  310. INSERT INTO t5 VALUES('one');
  311. }
  312. } {}
  313. do_test enc2-6.1 {
  314. add_test_function $DB 1 1 1
  315. execsql {
  316. SELECT test_function('sqlite')
  317. }
  318. } {{UTF-8 sqlite}}
  319. db close
  320. sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
  321. do_test enc2-6.2 {
  322. add_test_function $DB 0 1 0
  323. execsql {
  324. SELECT test_function('sqlite')
  325. }
  326. } {{UTF-16LE sqlite}}
  327. db close
  328. sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
  329. do_test enc2-6.3 {
  330. add_test_function $DB 0 0 1
  331. execsql {
  332. SELECT test_function('sqlite')
  333. }
  334. } {{UTF-16BE sqlite}}
  335. db close
  336. forcedelete test.db
  337. sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
  338. execsql {pragma encoding = 'UTF-16LE'}
  339. do_test enc2-6.3 {
  340. execsql {
  341. CREATE TABLE t5(a);
  342. INSERT INTO t5 VALUES('sqlite');
  343. }
  344. } {}
  345. do_test enc2-6.4 {
  346. add_test_function $DB 1 1 1
  347. execsql {
  348. SELECT test_function('sqlite')
  349. }
  350. } {{UTF-16LE sqlite}}
  351. db close
  352. sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
  353. do_test enc2-6.5 {
  354. add_test_function $DB 0 1 0
  355. execsql {
  356. SELECT test_function('sqlite')
  357. }
  358. } {{UTF-16LE sqlite}}
  359. db close
  360. sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
  361. do_test enc2-6.6 {
  362. add_test_function $DB 0 0 1
  363. execsql {
  364. SELECT test_function('sqlite')
  365. }
  366. } {{UTF-16BE sqlite}}
  367. db close
  368. forcedelete test.db
  369. sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
  370. execsql {pragma encoding = 'UTF-16BE'}
  371. do_test enc2-6.7 {
  372. execsql {
  373. CREATE TABLE t5(a);
  374. INSERT INTO t5 VALUES('sqlite');
  375. }
  376. } {}
  377. do_test enc2-6.8 {
  378. add_test_function $DB 1 1 1
  379. execsql {
  380. SELECT test_function('sqlite')
  381. }
  382. } {{UTF-16BE sqlite}}
  383. db close
  384. sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
  385. do_test enc2-6.9 {
  386. add_test_function $DB 0 1 0
  387. execsql {
  388. SELECT test_function('sqlite')
  389. }
  390. } {{UTF-16LE sqlite}}
  391. db close
  392. sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
  393. do_test enc2-6.10 {
  394. add_test_function $DB 0 0 1
  395. execsql {
  396. SELECT test_function('sqlite')
  397. }
  398. } {{UTF-16BE sqlite}}
  399. db close
  400. forcedelete test.db
  401. # The following tests - enc2-7.* - function as follows:
  402. #
  403. # 1: Open an empty database file assuming UTF-16 encoding.
  404. # 2: Open the same database with a different handle assuming UTF-8. Create
  405. # a table using this handle.
  406. # 3: Read the sqlite_master table from the first handle.
  407. # 4: Ensure the first handle recognises the database encoding is UTF-8.
  408. #
  409. do_test enc2-7.1 {
  410. sqlite3 db test.db
  411. execsql {
  412. PRAGMA encoding = 'UTF-16';
  413. SELECT * FROM sqlite_master;
  414. }
  415. } {}
  416. do_test enc2-7.2 {
  417. set enc [execsql {
  418. PRAGMA encoding;
  419. }]
  420. string range $enc 0 end-2 ;# Chop off the "le" or "be"
  421. } {UTF-16}
  422. do_test enc2-7.3 {
  423. sqlite3 db2 test.db
  424. execsql {
  425. PRAGMA encoding = 'UTF-8';
  426. CREATE TABLE abc(a, b, c);
  427. } db2
  428. } {}
  429. do_test enc2-7.4 {
  430. execsql {
  431. SELECT * FROM sqlite_master;
  432. }
  433. } "table abc abc [expr $AUTOVACUUM?3:2] {CREATE TABLE abc(a, b, c)}"
  434. do_test enc2-7.5 {
  435. execsql {
  436. PRAGMA encoding;
  437. }
  438. } {UTF-8}
  439. db close
  440. db2 close
  441. proc utf16 {utf8} {
  442. set utf16 [encoding convertto unicode $utf8]
  443. append utf16 "\x00\x00"
  444. return $utf16
  445. }
  446. ifcapable {complete} {
  447. do_test enc2-8.1 {
  448. sqlite3_complete16 [utf16 "SELECT * FROM t1;"]
  449. } {1}
  450. do_test enc2-8.2 {
  451. sqlite3_complete16 [utf16 "SELECT * FROM"]
  452. } {0}
  453. }
  454. # Test that the encoding of an empty database may still be set after the
  455. # (empty) schema has been initialized.
  456. forcedelete test.db
  457. do_test enc2-9.1 {
  458. sqlite3 db test.db
  459. execsql {
  460. PRAGMA encoding = 'UTF-8';
  461. PRAGMA encoding;
  462. }
  463. } {UTF-8}
  464. do_test enc2-9.2 {
  465. sqlite3 db test.db
  466. execsql {
  467. PRAGMA encoding = 'UTF-16le';
  468. PRAGMA encoding;
  469. }
  470. } {UTF-16le}
  471. do_test enc2-9.3 {
  472. sqlite3 db test.db
  473. execsql {
  474. SELECT * FROM sqlite_master;
  475. PRAGMA encoding = 'UTF-8';
  476. PRAGMA encoding;
  477. }
  478. } {UTF-8}
  479. do_test enc2-9.4 {
  480. sqlite3 db test.db
  481. execsql {
  482. PRAGMA encoding = 'UTF-16le';
  483. CREATE TABLE abc(a, b, c);
  484. PRAGMA encoding;
  485. }
  486. } {UTF-16le}
  487. do_test enc2-9.5 {
  488. sqlite3 db test.db
  489. execsql {
  490. PRAGMA encoding = 'UTF-8';
  491. PRAGMA encoding;
  492. }
  493. } {UTF-16le}
  494. # Ticket #1987.
  495. # Disallow encoding changes once the encoding has been set.
  496. #
  497. do_test enc2-10.1 {
  498. db close
  499. forcedelete test.db test.db-journal
  500. sqlite3 db test.db
  501. db eval {
  502. PRAGMA encoding=UTF16;
  503. CREATE TABLE t1(a);
  504. PRAGMA encoding=UTF8;
  505. CREATE TABLE t2(b);
  506. }
  507. db close
  508. sqlite3 db test.db
  509. db eval {
  510. SELECT name FROM sqlite_master
  511. }
  512. } {t1 t2}
  513. finish_test