complete.c 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283
  1. /*
  2. ** 2001 September 15
  3. **
  4. ** The author disclaims copyright to this source code. In place of
  5. ** a legal notice, here is a blessing:
  6. **
  7. ** May you do good and not evil.
  8. ** May you find forgiveness for yourself and forgive others.
  9. ** May you share freely, never taking more than you give.
  10. **
  11. *************************************************************************
  12. ** An tokenizer for SQL
  13. **
  14. ** This file contains C code that implements the sqlite3_complete() API.
  15. ** This code used to be part of the tokenizer.c source file. But by
  16. ** separating it out, the code will be automatically omitted from
  17. ** static links that do not use it.
  18. */
  19. #include "sqliteInt.h"
  20. #ifndef SQLITE_OMIT_COMPLETE
  21. /*
  22. ** This is defined in tokenize.c. We just have to import the definition.
  23. */
  24. #ifndef SQLITE_AMALGAMATION
  25. #ifdef SQLITE_ASCII
  26. #define IdChar(C) ((sqlite3CtypeMap[(unsigned char)C]&0x46)!=0)
  27. #endif
  28. #ifdef SQLITE_EBCDIC
  29. extern const char sqlite3IsEbcdicIdChar[];
  30. #define IdChar(C) (((c=C)>=0x42 && sqlite3IsEbcdicIdChar[c-0x40]))
  31. #endif
  32. #endif /* SQLITE_AMALGAMATION */
  33. /*
  34. ** Token types used by the sqlite3_complete() routine. See the header
  35. ** comments on that procedure for additional information.
  36. */
  37. #define tkSEMI 0
  38. #define tkWS 1
  39. #define tkOTHER 2
  40. #ifndef SQLITE_OMIT_TRIGGER
  41. #define tkEXPLAIN 3
  42. #define tkCREATE 4
  43. #define tkTEMP 5
  44. #define tkTRIGGER 6
  45. #define tkEND 7
  46. #endif
  47. /*
  48. ** Return TRUE if the given SQL string ends in a semicolon.
  49. **
  50. ** Special handling is require for CREATE TRIGGER statements.
  51. ** Whenever the CREATE TRIGGER keywords are seen, the statement
  52. ** must end with ";END;".
  53. **
  54. ** This implementation uses a state machine with 8 states:
  55. **
  56. ** (0) INVALID We have not yet seen a non-whitespace character.
  57. **
  58. ** (1) START At the beginning or end of an SQL statement. This routine
  59. ** returns 1 if it ends in the START state and 0 if it ends
  60. ** in any other state.
  61. **
  62. ** (2) NORMAL We are in the middle of statement which ends with a single
  63. ** semicolon.
  64. **
  65. ** (3) EXPLAIN The keyword EXPLAIN has been seen at the beginning of
  66. ** a statement.
  67. **
  68. ** (4) CREATE The keyword CREATE has been seen at the beginning of a
  69. ** statement, possibly preceeded by EXPLAIN and/or followed by
  70. ** TEMP or TEMPORARY
  71. **
  72. ** (5) TRIGGER We are in the middle of a trigger definition that must be
  73. ** ended by a semicolon, the keyword END, and another semicolon.
  74. **
  75. ** (6) SEMI We've seen the first semicolon in the ";END;" that occurs at
  76. ** the end of a trigger definition.
  77. **
  78. ** (7) END We've seen the ";END" of the ";END;" that occurs at the end
  79. ** of a trigger difinition.
  80. **
  81. ** Transitions between states above are determined by tokens extracted
  82. ** from the input. The following tokens are significant:
  83. **
  84. ** (0) tkSEMI A semicolon.
  85. ** (1) tkWS Whitespace.
  86. ** (2) tkOTHER Any other SQL token.
  87. ** (3) tkEXPLAIN The "explain" keyword.
  88. ** (4) tkCREATE The "create" keyword.
  89. ** (5) tkTEMP The "temp" or "temporary" keyword.
  90. ** (6) tkTRIGGER The "trigger" keyword.
  91. ** (7) tkEND The "end" keyword.
  92. **
  93. ** Whitespace never causes a state transition and is always ignored.
  94. ** This means that a SQL string of all whitespace is invalid.
  95. **
  96. ** If we compile with SQLITE_OMIT_TRIGGER, all of the computation needed
  97. ** to recognize the end of a trigger can be omitted. All we have to do
  98. ** is look for a semicolon that is not part of an string or comment.
  99. */
  100. int sqlite3_complete(const char *zSql){
  101. u8 state = 0; /* Current state, using numbers defined in header comment */
  102. u8 token; /* Value of the next token */
  103. #ifndef SQLITE_OMIT_TRIGGER
  104. /* A complex statement machine used to detect the end of a CREATE TRIGGER
  105. ** statement. This is the normal case.
  106. */
  107. static const u8 trans[8][8] = {
  108. /* Token: */
  109. /* State: ** SEMI WS OTHER EXPLAIN CREATE TEMP TRIGGER END */
  110. /* 0 INVALID: */ { 1, 0, 2, 3, 4, 2, 2, 2, },
  111. /* 1 START: */ { 1, 1, 2, 3, 4, 2, 2, 2, },
  112. /* 2 NORMAL: */ { 1, 2, 2, 2, 2, 2, 2, 2, },
  113. /* 3 EXPLAIN: */ { 1, 3, 3, 2, 4, 2, 2, 2, },
  114. /* 4 CREATE: */ { 1, 4, 2, 2, 2, 4, 5, 2, },
  115. /* 5 TRIGGER: */ { 6, 5, 5, 5, 5, 5, 5, 5, },
  116. /* 6 SEMI: */ { 6, 6, 5, 5, 5, 5, 5, 7, },
  117. /* 7 END: */ { 1, 7, 5, 5, 5, 5, 5, 5, },
  118. };
  119. #else
  120. /* If triggers are not supported by this compile then the statement machine
  121. ** used to detect the end of a statement is much simplier
  122. */
  123. static const u8 trans[3][3] = {
  124. /* Token: */
  125. /* State: ** SEMI WS OTHER */
  126. /* 0 INVALID: */ { 1, 0, 2, },
  127. /* 1 START: */ { 1, 1, 2, },
  128. /* 2 NORMAL: */ { 1, 2, 2, },
  129. };
  130. #endif /* SQLITE_OMIT_TRIGGER */
  131. while( *zSql ){
  132. switch( *zSql ){
  133. case ';': { /* A semicolon */
  134. token = tkSEMI;
  135. break;
  136. }
  137. case ' ':
  138. case '\r':
  139. case '\t':
  140. case '\n':
  141. case '\f': { /* White space is ignored */
  142. token = tkWS;
  143. break;
  144. }
  145. case '/': { /* C-style comments */
  146. if( zSql[1]!='*' ){
  147. token = tkOTHER;
  148. break;
  149. }
  150. zSql += 2;
  151. while( zSql[0] && (zSql[0]!='*' || zSql[1]!='/') ){ zSql++; }
  152. if( zSql[0]==0 ) return 0;
  153. zSql++;
  154. token = tkWS;
  155. break;
  156. }
  157. case '-': { /* SQL-style comments from "--" to end of line */
  158. if( zSql[1]!='-' ){
  159. token = tkOTHER;
  160. break;
  161. }
  162. while( *zSql && *zSql!='\n' ){ zSql++; }
  163. if( *zSql==0 ) return state==1;
  164. token = tkWS;
  165. break;
  166. }
  167. case '[': { /* Microsoft-style identifiers in [...] */
  168. zSql++;
  169. while( *zSql && *zSql!=']' ){ zSql++; }
  170. if( *zSql==0 ) return 0;
  171. token = tkOTHER;
  172. break;
  173. }
  174. case '`': /* Grave-accent quoted symbols used by MySQL */
  175. case '"': /* single- and double-quoted strings */
  176. case '\'': {
  177. int c = *zSql;
  178. zSql++;
  179. while( *zSql && *zSql!=c ){ zSql++; }
  180. if( *zSql==0 ) return 0;
  181. token = tkOTHER;
  182. break;
  183. }
  184. default: {
  185. #ifdef SQLITE_EBCDIC
  186. unsigned char c;
  187. #endif
  188. if( IdChar((u8)*zSql) ){
  189. /* Keywords and unquoted identifiers */
  190. int nId;
  191. for(nId=1; IdChar(zSql[nId]); nId++){}
  192. #ifdef SQLITE_OMIT_TRIGGER
  193. token = tkOTHER;
  194. #else
  195. switch( *zSql ){
  196. case 'c': case 'C': {
  197. if( nId==6 && sqlite3StrNICmp(zSql, "create", 6)==0 ){
  198. token = tkCREATE;
  199. }else{
  200. token = tkOTHER;
  201. }
  202. break;
  203. }
  204. case 't': case 'T': {
  205. if( nId==7 && sqlite3StrNICmp(zSql, "trigger", 7)==0 ){
  206. token = tkTRIGGER;
  207. }else if( nId==4 && sqlite3StrNICmp(zSql, "temp", 4)==0 ){
  208. token = tkTEMP;
  209. }else if( nId==9 && sqlite3StrNICmp(zSql, "temporary", 9)==0 ){
  210. token = tkTEMP;
  211. }else{
  212. token = tkOTHER;
  213. }
  214. break;
  215. }
  216. case 'e': case 'E': {
  217. if( nId==3 && sqlite3StrNICmp(zSql, "end", 3)==0 ){
  218. token = tkEND;
  219. }else
  220. #ifndef SQLITE_OMIT_EXPLAIN
  221. if( nId==7 && sqlite3StrNICmp(zSql, "explain", 7)==0 ){
  222. token = tkEXPLAIN;
  223. }else
  224. #endif
  225. {
  226. token = tkOTHER;
  227. }
  228. break;
  229. }
  230. default: {
  231. token = tkOTHER;
  232. break;
  233. }
  234. }
  235. #endif /* SQLITE_OMIT_TRIGGER */
  236. zSql += nId-1;
  237. }else{
  238. /* Operators and special symbols */
  239. token = tkOTHER;
  240. }
  241. break;
  242. }
  243. }
  244. state = trans[state][token];
  245. zSql++;
  246. }
  247. return state==1;
  248. }
  249. #ifndef SQLITE_OMIT_UTF16
  250. /*
  251. ** This routine is the same as the sqlite3_complete() routine described
  252. ** above, except that the parameter is required to be UTF-16 encoded, not
  253. ** UTF-8.
  254. */
  255. int sqlite3_complete16(const void *zSql){
  256. sqlite3_value *pVal;
  257. char const *zSql8;
  258. int rc = SQLITE_NOMEM;
  259. #ifndef SQLITE_OMIT_AUTOINIT
  260. rc = sqlite3_initialize();
  261. if( rc ) return rc;
  262. #endif
  263. pVal = sqlite3ValueNew(0);
  264. sqlite3ValueSetStr(pVal, -1, zSql, SQLITE_UTF16NATIVE, SQLITE_STATIC);
  265. zSql8 = sqlite3ValueText(pVal, SQLITE_UTF8);
  266. if( zSql8 ){
  267. rc = sqlite3_complete(zSql8);
  268. }else{
  269. rc = SQLITE_NOMEM;
  270. }
  271. sqlite3ValueFree(pVal);
  272. return sqlite3ApiExit(0, rc);
  273. }
  274. #endif /* SQLITE_OMIT_UTF16 */
  275. #endif /* SQLITE_OMIT_COMPLETE */