enc.test 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172
  1. # 2002 May 24
  2. #
  3. # The author disclaims copyright to this source code. In place of
  4. # a legal notice, here is a blessing:
  5. #
  6. # May you do good and not evil.
  7. # May you find forgiveness for yourself and forgive others.
  8. # May you share freely, never taking more than you give.
  9. #
  10. #***********************************************************************
  11. # This file implements regression tests for SQLite library. The focus of
  12. # this file is testing the SQLite routines used for converting between the
  13. # various suported unicode encodings (UTF-8, UTF-16, UTF-16le and
  14. # UTF-16be).
  15. #
  16. # $Id: enc.test,v 1.7 2007/05/23 16:23:09 danielk1977 Exp $
  17. set testdir [file dirname $argv0]
  18. source $testdir/tester.tcl
  19. # Skip this test if the build does not support multiple encodings.
  20. #
  21. ifcapable {!utf16} {
  22. finish_test
  23. return
  24. }
  25. proc do_bincmp_test {testname got expect} {
  26. binary scan $expect \c* expectvals
  27. binary scan $got \c* gotvals
  28. do_test $testname [list set dummy $gotvals] $expectvals
  29. }
  30. # $utf16 is a UTF-16 encoded string. Swap each pair of bytes around
  31. # to change the byte-order of the string.
  32. proc swap_byte_order {utf16} {
  33. binary scan $utf16 \c* ints
  34. foreach {a b} $ints {
  35. lappend ints2 $b
  36. lappend ints2 $a
  37. }
  38. return [binary format \c* $ints2]
  39. }
  40. #
  41. # Test that the SQLite routines for converting between UTF encodings
  42. # produce the same results as their TCL counterparts.
  43. #
  44. # $testname is the prefix to be used for the test names.
  45. # $str is a string to use for testing (encoded in UTF-8, as normal for TCL).
  46. #
  47. # The test procedure is:
  48. # 1. Convert the string from UTF-8 to UTF-16le and check that the TCL and
  49. # SQLite routines produce the same results.
  50. #
  51. # 2. Convert the string from UTF-8 to UTF-16be and check that the TCL and
  52. # SQLite routines produce the same results.
  53. #
  54. # 3. Use the SQLite routines to convert the native machine order UTF-16
  55. # representation back to the original UTF-8. Check that the result
  56. # matches the original representation.
  57. #
  58. # 4. Add a byte-order mark to each of the UTF-16 representations and
  59. # check that the SQLite routines can convert them back to UTF-8. For
  60. # byte-order mark info, refer to section 3.10 of the unicode standard.
  61. #
  62. # 5. Take the byte-order marked UTF-16 strings from step 4 and ensure
  63. # that SQLite can convert them both to native byte order UTF-16
  64. # strings, sans BOM.
  65. #
  66. # Coverage:
  67. #
  68. # sqlite_utf8to16be (step 2)
  69. # sqlite_utf8to16le (step 1)
  70. # sqlite_utf16to8 (steps 3, 4)
  71. # sqlite_utf16to16le (step 5)
  72. # sqlite_utf16to16be (step 5)
  73. #
  74. proc test_conversion {testname str} {
  75. # Step 1.
  76. set utf16le_sqlite3 [test_translate $str UTF8 UTF16LE]
  77. set utf16le_tcl [encoding convertto unicode $str]
  78. append utf16le_tcl "\x00\x00"
  79. if { $::tcl_platform(byteOrder)!="littleEndian" } {
  80. set utf16le_tcl [swap_byte_order $utf16le_tcl]
  81. }
  82. do_bincmp_test $testname.1 $utf16le_sqlite3 $utf16le_tcl
  83. set utf16le $utf16le_tcl
  84. # Step 2.
  85. set utf16be_sqlite3 [test_translate $str UTF8 UTF16BE]
  86. set utf16be_tcl [encoding convertto unicode $str]
  87. append utf16be_tcl "\x00\x00"
  88. if { $::tcl_platform(byteOrder)=="littleEndian" } {
  89. set utf16be_tcl [swap_byte_order $utf16be_tcl]
  90. }
  91. do_bincmp_test $testname.2 $utf16be_sqlite3 $utf16be_tcl
  92. set utf16be $utf16be_tcl
  93. # Step 3.
  94. if { $::tcl_platform(byteOrder)=="littleEndian" } {
  95. set utf16 $utf16le
  96. } else {
  97. set utf16 $utf16be
  98. }
  99. set utf8_sqlite3 [test_translate $utf16 UTF16 UTF8]
  100. do_bincmp_test $testname.3 $utf8_sqlite3 [binarize $str]
  101. # Step 4 (little endian).
  102. append utf16le_bom "\xFF\xFE" $utf16le
  103. set utf8_sqlite3 [test_translate $utf16le_bom UTF16 UTF8 1]
  104. do_bincmp_test $testname.4.le $utf8_sqlite3 [binarize $str]
  105. # Step 4 (big endian).
  106. append utf16be_bom "\xFE\xFF" $utf16be
  107. set utf8_sqlite3 [test_translate $utf16be_bom UTF16 UTF8]
  108. do_bincmp_test $testname.4.be $utf8_sqlite3 [binarize $str]
  109. # Step 5 (little endian to little endian).
  110. set utf16_sqlite3 [test_translate $utf16le_bom UTF16LE UTF16LE]
  111. do_bincmp_test $testname.5.le.le $utf16_sqlite3 $utf16le
  112. # Step 5 (big endian to big endian).
  113. set utf16_sqlite3 [test_translate $utf16be_bom UTF16 UTF16BE]
  114. do_bincmp_test $testname.5.be.be $utf16_sqlite3 $utf16be
  115. # Step 5 (big endian to little endian).
  116. set utf16_sqlite3 [test_translate $utf16be_bom UTF16 UTF16LE]
  117. do_bincmp_test $testname.5.be.le $utf16_sqlite3 $utf16le
  118. # Step 5 (little endian to big endian).
  119. set utf16_sqlite3 [test_translate $utf16le_bom UTF16 UTF16BE]
  120. do_bincmp_test $testname.5.le.be $utf16_sqlite3 $utf16be
  121. }
  122. translate_selftest
  123. test_conversion enc-1 "hello world"
  124. test_conversion enc-2 "sqlite"
  125. test_conversion enc-3 ""
  126. test_conversion enc-X "\u0100"
  127. test_conversion enc-4 "\u1234"
  128. test_conversion enc-5 "\u4321abc"
  129. test_conversion enc-6 "\u4321\u1234"
  130. test_conversion enc-7 [string repeat "abcde\u00EF\u00EE\uFFFCabc" 100]
  131. test_conversion enc-8 [string repeat "\u007E\u007F\u0080\u0081" 100]
  132. test_conversion enc-9 [string repeat "\u07FE\u07FF\u0800\u0801\uFFF0" 100]
  133. test_conversion enc-10 [string repeat "\uE000" 100]
  134. proc test_collate {enc zLeft zRight} {
  135. return [string compare $zLeft $zRight]
  136. }
  137. add_test_collate $::DB 0 0 1
  138. do_test enc-11.1 {
  139. execsql {
  140. CREATE TABLE ab(a COLLATE test_collate, b);
  141. INSERT INTO ab VALUES(CAST (X'C388' AS TEXT), X'888800');
  142. INSERT INTO ab VALUES(CAST (X'C0808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808388' AS TEXT), X'888800');
  143. CREATE INDEX ab_i ON ab(a, b);
  144. }
  145. } {}
  146. do_test enc-11.2 {
  147. set cp200 "\u00C8"
  148. execsql {
  149. SELECT count(*) FROM ab WHERE a = $::cp200;
  150. }
  151. } {2}
  152. finish_test