fts3_expr.c 40 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278
  1. /*
  2. ** 2008 Nov 28
  3. **
  4. ** The author disclaims copyright to this source code. In place of
  5. ** a legal notice, here is a blessing:
  6. **
  7. ** May you do good and not evil.
  8. ** May you find forgiveness for yourself and forgive others.
  9. ** May you share freely, never taking more than you give.
  10. **
  11. ******************************************************************************
  12. **
  13. ** This module contains code that implements a parser for fts3 query strings
  14. ** (the right-hand argument to the MATCH operator). Because the supported
  15. ** syntax is relatively simple, the whole tokenizer/parser system is
  16. ** hand-coded.
  17. */
  18. #include "fts3Int.h"
  19. #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
  20. /*
  21. ** By default, this module parses the legacy syntax that has been
  22. ** traditionally used by fts3. Or, if SQLITE_ENABLE_FTS3_PARENTHESIS
  23. ** is defined, then it uses the new syntax. The differences between
  24. ** the new and the old syntaxes are:
  25. **
  26. ** a) The new syntax supports parenthesis. The old does not.
  27. **
  28. ** b) The new syntax supports the AND and NOT operators. The old does not.
  29. **
  30. ** c) The old syntax supports the "-" token qualifier. This is not
  31. ** supported by the new syntax (it is replaced by the NOT operator).
  32. **
  33. ** d) When using the old syntax, the OR operator has a greater precedence
  34. ** than an implicit AND. When using the new, both implicity and explicit
  35. ** AND operators have a higher precedence than OR.
  36. **
  37. ** If compiled with SQLITE_TEST defined, then this module exports the
  38. ** symbol "int sqlite3_fts3_enable_parentheses". Setting this variable
  39. ** to zero causes the module to use the old syntax. If it is set to
  40. ** non-zero the new syntax is activated. This is so both syntaxes can
  41. ** be tested using a single build of testfixture.
  42. **
  43. ** The following describes the syntax supported by the fts3 MATCH
  44. ** operator in a similar format to that used by the lemon parser
  45. ** generator. This module does not use actually lemon, it uses a
  46. ** custom parser.
  47. **
  48. ** query ::= andexpr (OR andexpr)*.
  49. **
  50. ** andexpr ::= notexpr (AND? notexpr)*.
  51. **
  52. ** notexpr ::= nearexpr (NOT nearexpr|-TOKEN)*.
  53. ** notexpr ::= LP query RP.
  54. **
  55. ** nearexpr ::= phrase (NEAR distance_opt nearexpr)*.
  56. **
  57. ** distance_opt ::= .
  58. ** distance_opt ::= / INTEGER.
  59. **
  60. ** phrase ::= TOKEN.
  61. ** phrase ::= COLUMN:TOKEN.
  62. ** phrase ::= "TOKEN TOKEN TOKEN...".
  63. */
  64. #ifdef SQLITE_TEST
  65. int sqlite3_fts3_enable_parentheses = 0;
  66. #else
  67. # ifdef SQLITE_ENABLE_FTS3_PARENTHESIS
  68. # define sqlite3_fts3_enable_parentheses 1
  69. # else
  70. # define sqlite3_fts3_enable_parentheses 0
  71. # endif
  72. #endif
  73. /*
  74. ** Default span for NEAR operators.
  75. */
  76. #define SQLITE_FTS3_DEFAULT_NEAR_PARAM 10
  77. #include <string.h>
  78. #include <assert.h>
  79. /*
  80. ** isNot:
  81. ** This variable is used by function getNextNode(). When getNextNode() is
  82. ** called, it sets ParseContext.isNot to true if the 'next node' is a
  83. ** FTSQUERY_PHRASE with a unary "-" attached to it. i.e. "mysql" in the
  84. ** FTS3 query "sqlite -mysql". Otherwise, ParseContext.isNot is set to
  85. ** zero.
  86. */
  87. typedef struct ParseContext ParseContext;
  88. struct ParseContext {
  89. sqlite3_tokenizer *pTokenizer; /* Tokenizer module */
  90. int iLangid; /* Language id used with tokenizer */
  91. const char **azCol; /* Array of column names for fts3 table */
  92. int bFts4; /* True to allow FTS4-only syntax */
  93. int nCol; /* Number of entries in azCol[] */
  94. int iDefaultCol; /* Default column to query */
  95. int isNot; /* True if getNextNode() sees a unary - */
  96. sqlite3_context *pCtx; /* Write error message here */
  97. int nNest; /* Number of nested brackets */
  98. };
  99. /*
  100. ** This function is equivalent to the standard isspace() function.
  101. **
  102. ** The standard isspace() can be awkward to use safely, because although it
  103. ** is defined to accept an argument of type int, its behavior when passed
  104. ** an integer that falls outside of the range of the unsigned char type
  105. ** is undefined (and sometimes, "undefined" means segfault). This wrapper
  106. ** is defined to accept an argument of type char, and always returns 0 for
  107. ** any values that fall outside of the range of the unsigned char type (i.e.
  108. ** negative values).
  109. */
  110. static int fts3isspace(char c){
  111. return c==' ' || c=='\t' || c=='\n' || c=='\r' || c=='\v' || c=='\f';
  112. }
  113. /*
  114. ** Allocate nByte bytes of memory using sqlite3_malloc(). If successful,
  115. ** zero the memory before returning a pointer to it. If unsuccessful,
  116. ** return NULL.
  117. */
  118. static void *fts3MallocZero(int nByte){
  119. void *pRet = sqlite3_malloc(nByte);
  120. if( pRet ) memset(pRet, 0, nByte);
  121. return pRet;
  122. }
  123. int sqlite3Fts3OpenTokenizer(
  124. sqlite3_tokenizer *pTokenizer,
  125. int iLangid,
  126. const char *z,
  127. int n,
  128. sqlite3_tokenizer_cursor **ppCsr
  129. ){
  130. sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
  131. sqlite3_tokenizer_cursor *pCsr = 0;
  132. int rc;
  133. rc = pModule->xOpen(pTokenizer, z, n, &pCsr);
  134. assert( rc==SQLITE_OK || pCsr==0 );
  135. if( rc==SQLITE_OK ){
  136. pCsr->pTokenizer = pTokenizer;
  137. if( pModule->iVersion>=1 ){
  138. rc = pModule->xLanguageid(pCsr, iLangid);
  139. if( rc!=SQLITE_OK ){
  140. pModule->xClose(pCsr);
  141. pCsr = 0;
  142. }
  143. }
  144. }
  145. *ppCsr = pCsr;
  146. return rc;
  147. }
  148. /*
  149. ** Extract the next token from buffer z (length n) using the tokenizer
  150. ** and other information (column names etc.) in pParse. Create an Fts3Expr
  151. ** structure of type FTSQUERY_PHRASE containing a phrase consisting of this
  152. ** single token and set *ppExpr to point to it. If the end of the buffer is
  153. ** reached before a token is found, set *ppExpr to zero. It is the
  154. ** responsibility of the caller to eventually deallocate the allocated
  155. ** Fts3Expr structure (if any) by passing it to sqlite3_free().
  156. **
  157. ** Return SQLITE_OK if successful, or SQLITE_NOMEM if a memory allocation
  158. ** fails.
  159. */
  160. static int getNextToken(
  161. ParseContext *pParse, /* fts3 query parse context */
  162. int iCol, /* Value for Fts3Phrase.iColumn */
  163. const char *z, int n, /* Input string */
  164. Fts3Expr **ppExpr, /* OUT: expression */
  165. int *pnConsumed /* OUT: Number of bytes consumed */
  166. ){
  167. sqlite3_tokenizer *pTokenizer = pParse->pTokenizer;
  168. sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
  169. int rc;
  170. sqlite3_tokenizer_cursor *pCursor;
  171. Fts3Expr *pRet = 0;
  172. int nConsumed = 0;
  173. rc = sqlite3Fts3OpenTokenizer(pTokenizer, pParse->iLangid, z, n, &pCursor);
  174. if( rc==SQLITE_OK ){
  175. const char *zToken;
  176. int nToken = 0, iStart = 0, iEnd = 0, iPosition = 0;
  177. int nByte; /* total space to allocate */
  178. rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition);
  179. if( rc==SQLITE_OK ){
  180. nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken;
  181. pRet = (Fts3Expr *)fts3MallocZero(nByte);
  182. if( !pRet ){
  183. rc = SQLITE_NOMEM;
  184. }else{
  185. pRet->eType = FTSQUERY_PHRASE;
  186. pRet->pPhrase = (Fts3Phrase *)&pRet[1];
  187. pRet->pPhrase->nToken = 1;
  188. pRet->pPhrase->iColumn = iCol;
  189. pRet->pPhrase->aToken[0].n = nToken;
  190. pRet->pPhrase->aToken[0].z = (char *)&pRet->pPhrase[1];
  191. memcpy(pRet->pPhrase->aToken[0].z, zToken, nToken);
  192. if( iEnd<n && z[iEnd]=='*' ){
  193. pRet->pPhrase->aToken[0].isPrefix = 1;
  194. iEnd++;
  195. }
  196. while( 1 ){
  197. if( !sqlite3_fts3_enable_parentheses
  198. && iStart>0 && z[iStart-1]=='-'
  199. ){
  200. pParse->isNot = 1;
  201. iStart--;
  202. }else if( pParse->bFts4 && iStart>0 && z[iStart-1]=='^' ){
  203. pRet->pPhrase->aToken[0].bFirst = 1;
  204. iStart--;
  205. }else{
  206. break;
  207. }
  208. }
  209. }
  210. nConsumed = iEnd;
  211. }
  212. pModule->xClose(pCursor);
  213. }
  214. *pnConsumed = nConsumed;
  215. *ppExpr = pRet;
  216. return rc;
  217. }
  218. /*
  219. ** Enlarge a memory allocation. If an out-of-memory allocation occurs,
  220. ** then free the old allocation.
  221. */
  222. static void *fts3ReallocOrFree(void *pOrig, int nNew){
  223. void *pRet = sqlite3_realloc(pOrig, nNew);
  224. if( !pRet ){
  225. sqlite3_free(pOrig);
  226. }
  227. return pRet;
  228. }
  229. /*
  230. ** Buffer zInput, length nInput, contains the contents of a quoted string
  231. ** that appeared as part of an fts3 query expression. Neither quote character
  232. ** is included in the buffer. This function attempts to tokenize the entire
  233. ** input buffer and create an Fts3Expr structure of type FTSQUERY_PHRASE
  234. ** containing the results.
  235. **
  236. ** If successful, SQLITE_OK is returned and *ppExpr set to point at the
  237. ** allocated Fts3Expr structure. Otherwise, either SQLITE_NOMEM (out of memory
  238. ** error) or SQLITE_ERROR (tokenization error) is returned and *ppExpr set
  239. ** to 0.
  240. */
  241. static int getNextString(
  242. ParseContext *pParse, /* fts3 query parse context */
  243. const char *zInput, int nInput, /* Input string */
  244. Fts3Expr **ppExpr /* OUT: expression */
  245. ){
  246. sqlite3_tokenizer *pTokenizer = pParse->pTokenizer;
  247. sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
  248. int rc;
  249. Fts3Expr *p = 0;
  250. sqlite3_tokenizer_cursor *pCursor = 0;
  251. char *zTemp = 0;
  252. int nTemp = 0;
  253. const int nSpace = sizeof(Fts3Expr) + sizeof(Fts3Phrase);
  254. int nToken = 0;
  255. /* The final Fts3Expr data structure, including the Fts3Phrase,
  256. ** Fts3PhraseToken structures token buffers are all stored as a single
  257. ** allocation so that the expression can be freed with a single call to
  258. ** sqlite3_free(). Setting this up requires a two pass approach.
  259. **
  260. ** The first pass, in the block below, uses a tokenizer cursor to iterate
  261. ** through the tokens in the expression. This pass uses fts3ReallocOrFree()
  262. ** to assemble data in two dynamic buffers:
  263. **
  264. ** Buffer p: Points to the Fts3Expr structure, followed by the Fts3Phrase
  265. ** structure, followed by the array of Fts3PhraseToken
  266. ** structures. This pass only populates the Fts3PhraseToken array.
  267. **
  268. ** Buffer zTemp: Contains copies of all tokens.
  269. **
  270. ** The second pass, in the block that begins "if( rc==SQLITE_DONE )" below,
  271. ** appends buffer zTemp to buffer p, and fills in the Fts3Expr and Fts3Phrase
  272. ** structures.
  273. */
  274. rc = sqlite3Fts3OpenTokenizer(
  275. pTokenizer, pParse->iLangid, zInput, nInput, &pCursor);
  276. if( rc==SQLITE_OK ){
  277. int ii;
  278. for(ii=0; rc==SQLITE_OK; ii++){
  279. const char *zByte;
  280. int nByte = 0, iBegin = 0, iEnd = 0, iPos = 0;
  281. rc = pModule->xNext(pCursor, &zByte, &nByte, &iBegin, &iEnd, &iPos);
  282. if( rc==SQLITE_OK ){
  283. Fts3PhraseToken *pToken;
  284. p = fts3ReallocOrFree(p, nSpace + ii*sizeof(Fts3PhraseToken));
  285. if( !p ) goto no_mem;
  286. zTemp = fts3ReallocOrFree(zTemp, nTemp + nByte);
  287. if( !zTemp ) goto no_mem;
  288. assert( nToken==ii );
  289. pToken = &((Fts3Phrase *)(&p[1]))->aToken[ii];
  290. memset(pToken, 0, sizeof(Fts3PhraseToken));
  291. memcpy(&zTemp[nTemp], zByte, nByte);
  292. nTemp += nByte;
  293. pToken->n = nByte;
  294. pToken->isPrefix = (iEnd<nInput && zInput[iEnd]=='*');
  295. pToken->bFirst = (iBegin>0 && zInput[iBegin-1]=='^');
  296. nToken = ii+1;
  297. }
  298. }
  299. pModule->xClose(pCursor);
  300. pCursor = 0;
  301. }
  302. if( rc==SQLITE_DONE ){
  303. int jj;
  304. char *zBuf = 0;
  305. p = fts3ReallocOrFree(p, nSpace + nToken*sizeof(Fts3PhraseToken) + nTemp);
  306. if( !p ) goto no_mem;
  307. memset(p, 0, (char *)&(((Fts3Phrase *)&p[1])->aToken[0])-(char *)p);
  308. p->eType = FTSQUERY_PHRASE;
  309. p->pPhrase = (Fts3Phrase *)&p[1];
  310. p->pPhrase->iColumn = pParse->iDefaultCol;
  311. p->pPhrase->nToken = nToken;
  312. zBuf = (char *)&p->pPhrase->aToken[nToken];
  313. if( zTemp ){
  314. memcpy(zBuf, zTemp, nTemp);
  315. sqlite3_free(zTemp);
  316. }else{
  317. assert( nTemp==0 );
  318. }
  319. for(jj=0; jj<p->pPhrase->nToken; jj++){
  320. p->pPhrase->aToken[jj].z = zBuf;
  321. zBuf += p->pPhrase->aToken[jj].n;
  322. }
  323. rc = SQLITE_OK;
  324. }
  325. *ppExpr = p;
  326. return rc;
  327. no_mem:
  328. if( pCursor ){
  329. pModule->xClose(pCursor);
  330. }
  331. sqlite3_free(zTemp);
  332. sqlite3_free(p);
  333. *ppExpr = 0;
  334. return SQLITE_NOMEM;
  335. }
  336. /*
  337. ** Function getNextNode(), which is called by fts3ExprParse(), may itself
  338. ** call fts3ExprParse(). So this forward declaration is required.
  339. */
  340. static int fts3ExprParse(ParseContext *, const char *, int, Fts3Expr **, int *);
  341. /*
  342. ** The output variable *ppExpr is populated with an allocated Fts3Expr
  343. ** structure, or set to 0 if the end of the input buffer is reached.
  344. **
  345. ** Returns an SQLite error code. SQLITE_OK if everything works, SQLITE_NOMEM
  346. ** if a malloc failure occurs, or SQLITE_ERROR if a parse error is encountered.
  347. ** If SQLITE_ERROR is returned, pContext is populated with an error message.
  348. */
  349. static int getNextNode(
  350. ParseContext *pParse, /* fts3 query parse context */
  351. const char *z, int n, /* Input string */
  352. Fts3Expr **ppExpr, /* OUT: expression */
  353. int *pnConsumed /* OUT: Number of bytes consumed */
  354. ){
  355. static const struct Fts3Keyword {
  356. char *z; /* Keyword text */
  357. unsigned char n; /* Length of the keyword */
  358. unsigned char parenOnly; /* Only valid in paren mode */
  359. unsigned char eType; /* Keyword code */
  360. } aKeyword[] = {
  361. { "OR" , 2, 0, FTSQUERY_OR },
  362. { "AND", 3, 1, FTSQUERY_AND },
  363. { "NOT", 3, 1, FTSQUERY_NOT },
  364. { "NEAR", 4, 0, FTSQUERY_NEAR }
  365. };
  366. int ii;
  367. int iCol;
  368. int iColLen;
  369. int rc;
  370. Fts3Expr *pRet = 0;
  371. const char *zInput = z;
  372. int nInput = n;
  373. pParse->isNot = 0;
  374. /* Skip over any whitespace before checking for a keyword, an open or
  375. ** close bracket, or a quoted string.
  376. */
  377. while( nInput>0 && fts3isspace(*zInput) ){
  378. nInput--;
  379. zInput++;
  380. }
  381. if( nInput==0 ){
  382. return SQLITE_DONE;
  383. }
  384. /* See if we are dealing with a keyword. */
  385. for(ii=0; ii<(int)(sizeof(aKeyword)/sizeof(struct Fts3Keyword)); ii++){
  386. const struct Fts3Keyword *pKey = &aKeyword[ii];
  387. if( (pKey->parenOnly & ~sqlite3_fts3_enable_parentheses)!=0 ){
  388. continue;
  389. }
  390. if( nInput>=pKey->n && 0==memcmp(zInput, pKey->z, pKey->n) ){
  391. int nNear = SQLITE_FTS3_DEFAULT_NEAR_PARAM;
  392. int nKey = pKey->n;
  393. char cNext;
  394. /* If this is a "NEAR" keyword, check for an explicit nearness. */
  395. if( pKey->eType==FTSQUERY_NEAR ){
  396. assert( nKey==4 );
  397. if( zInput[4]=='/' && zInput[5]>='0' && zInput[5]<='9' ){
  398. nNear = 0;
  399. for(nKey=5; zInput[nKey]>='0' && zInput[nKey]<='9'; nKey++){
  400. nNear = nNear * 10 + (zInput[nKey] - '0');
  401. }
  402. }
  403. }
  404. /* At this point this is probably a keyword. But for that to be true,
  405. ** the next byte must contain either whitespace, an open or close
  406. ** parenthesis, a quote character, or EOF.
  407. */
  408. cNext = zInput[nKey];
  409. if( fts3isspace(cNext)
  410. || cNext=='"' || cNext=='(' || cNext==')' || cNext==0
  411. ){
  412. pRet = (Fts3Expr *)fts3MallocZero(sizeof(Fts3Expr));
  413. if( !pRet ){
  414. return SQLITE_NOMEM;
  415. }
  416. pRet->eType = pKey->eType;
  417. pRet->nNear = nNear;
  418. *ppExpr = pRet;
  419. *pnConsumed = (int)((zInput - z) + nKey);
  420. return SQLITE_OK;
  421. }
  422. /* Turns out that wasn't a keyword after all. This happens if the
  423. ** user has supplied a token such as "ORacle". Continue.
  424. */
  425. }
  426. }
  427. /* Check for an open bracket. */
  428. if( sqlite3_fts3_enable_parentheses ){
  429. if( *zInput=='(' ){
  430. int nConsumed;
  431. pParse->nNest++;
  432. rc = fts3ExprParse(pParse, &zInput[1], nInput-1, ppExpr, &nConsumed);
  433. if( rc==SQLITE_OK && !*ppExpr ){
  434. rc = SQLITE_DONE;
  435. }
  436. *pnConsumed = (int)((zInput - z) + 1 + nConsumed);
  437. return rc;
  438. }
  439. /* Check for a close bracket. */
  440. if( *zInput==')' ){
  441. pParse->nNest--;
  442. *pnConsumed = (int)((zInput - z) + 1);
  443. return SQLITE_DONE;
  444. }
  445. }
  446. /* See if we are dealing with a quoted phrase. If this is the case, then
  447. ** search for the closing quote and pass the whole string to getNextString()
  448. ** for processing. This is easy to do, as fts3 has no syntax for escaping
  449. ** a quote character embedded in a string.
  450. */
  451. if( *zInput=='"' ){
  452. for(ii=1; ii<nInput && zInput[ii]!='"'; ii++);
  453. *pnConsumed = (int)((zInput - z) + ii + 1);
  454. if( ii==nInput ){
  455. return SQLITE_ERROR;
  456. }
  457. return getNextString(pParse, &zInput[1], ii-1, ppExpr);
  458. }
  459. /* If control flows to this point, this must be a regular token, or
  460. ** the end of the input. Read a regular token using the sqlite3_tokenizer
  461. ** interface. Before doing so, figure out if there is an explicit
  462. ** column specifier for the token.
  463. **
  464. ** TODO: Strangely, it is not possible to associate a column specifier
  465. ** with a quoted phrase, only with a single token. Not sure if this was
  466. ** an implementation artifact or an intentional decision when fts3 was
  467. ** first implemented. Whichever it was, this module duplicates the
  468. ** limitation.
  469. */
  470. iCol = pParse->iDefaultCol;
  471. iColLen = 0;
  472. for(ii=0; ii<pParse->nCol; ii++){
  473. const char *zStr = pParse->azCol[ii];
  474. int nStr = (int)strlen(zStr);
  475. if( nInput>nStr && zInput[nStr]==':'
  476. && sqlite3_strnicmp(zStr, zInput, nStr)==0
  477. ){
  478. iCol = ii;
  479. iColLen = (int)((zInput - z) + nStr + 1);
  480. break;
  481. }
  482. }
  483. rc = getNextToken(pParse, iCol, &z[iColLen], n-iColLen, ppExpr, pnConsumed);
  484. *pnConsumed += iColLen;
  485. return rc;
  486. }
  487. /*
  488. ** The argument is an Fts3Expr structure for a binary operator (any type
  489. ** except an FTSQUERY_PHRASE). Return an integer value representing the
  490. ** precedence of the operator. Lower values have a higher precedence (i.e.
  491. ** group more tightly). For example, in the C language, the == operator
  492. ** groups more tightly than ||, and would therefore have a higher precedence.
  493. **
  494. ** When using the new fts3 query syntax (when SQLITE_ENABLE_FTS3_PARENTHESIS
  495. ** is defined), the order of the operators in precedence from highest to
  496. ** lowest is:
  497. **
  498. ** NEAR
  499. ** NOT
  500. ** AND (including implicit ANDs)
  501. ** OR
  502. **
  503. ** Note that when using the old query syntax, the OR operator has a higher
  504. ** precedence than the AND operator.
  505. */
  506. static int opPrecedence(Fts3Expr *p){
  507. assert( p->eType!=FTSQUERY_PHRASE );
  508. if( sqlite3_fts3_enable_parentheses ){
  509. return p->eType;
  510. }else if( p->eType==FTSQUERY_NEAR ){
  511. return 1;
  512. }else if( p->eType==FTSQUERY_OR ){
  513. return 2;
  514. }
  515. assert( p->eType==FTSQUERY_AND );
  516. return 3;
  517. }
  518. /*
  519. ** Argument ppHead contains a pointer to the current head of a query
  520. ** expression tree being parsed. pPrev is the expression node most recently
  521. ** inserted into the tree. This function adds pNew, which is always a binary
  522. ** operator node, into the expression tree based on the relative precedence
  523. ** of pNew and the existing nodes of the tree. This may result in the head
  524. ** of the tree changing, in which case *ppHead is set to the new root node.
  525. */
  526. static void insertBinaryOperator(
  527. Fts3Expr **ppHead, /* Pointer to the root node of a tree */
  528. Fts3Expr *pPrev, /* Node most recently inserted into the tree */
  529. Fts3Expr *pNew /* New binary node to insert into expression tree */
  530. ){
  531. Fts3Expr *pSplit = pPrev;
  532. while( pSplit->pParent && opPrecedence(pSplit->pParent)<=opPrecedence(pNew) ){
  533. pSplit = pSplit->pParent;
  534. }
  535. if( pSplit->pParent ){
  536. assert( pSplit->pParent->pRight==pSplit );
  537. pSplit->pParent->pRight = pNew;
  538. pNew->pParent = pSplit->pParent;
  539. }else{
  540. *ppHead = pNew;
  541. }
  542. pNew->pLeft = pSplit;
  543. pSplit->pParent = pNew;
  544. }
  545. /*
  546. ** Parse the fts3 query expression found in buffer z, length n. This function
  547. ** returns either when the end of the buffer is reached or an unmatched
  548. ** closing bracket - ')' - is encountered.
  549. **
  550. ** If successful, SQLITE_OK is returned, *ppExpr is set to point to the
  551. ** parsed form of the expression and *pnConsumed is set to the number of
  552. ** bytes read from buffer z. Otherwise, *ppExpr is set to 0 and SQLITE_NOMEM
  553. ** (out of memory error) or SQLITE_ERROR (parse error) is returned.
  554. */
  555. static int fts3ExprParse(
  556. ParseContext *pParse, /* fts3 query parse context */
  557. const char *z, int n, /* Text of MATCH query */
  558. Fts3Expr **ppExpr, /* OUT: Parsed query structure */
  559. int *pnConsumed /* OUT: Number of bytes consumed */
  560. ){
  561. Fts3Expr *pRet = 0;
  562. Fts3Expr *pPrev = 0;
  563. Fts3Expr *pNotBranch = 0; /* Only used in legacy parse mode */
  564. int nIn = n;
  565. const char *zIn = z;
  566. int rc = SQLITE_OK;
  567. int isRequirePhrase = 1;
  568. while( rc==SQLITE_OK ){
  569. Fts3Expr *p = 0;
  570. int nByte = 0;
  571. rc = getNextNode(pParse, zIn, nIn, &p, &nByte);
  572. if( rc==SQLITE_OK ){
  573. int isPhrase;
  574. if( !sqlite3_fts3_enable_parentheses
  575. && p->eType==FTSQUERY_PHRASE && pParse->isNot
  576. ){
  577. /* Create an implicit NOT operator. */
  578. Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr));
  579. if( !pNot ){
  580. sqlite3Fts3ExprFree(p);
  581. rc = SQLITE_NOMEM;
  582. goto exprparse_out;
  583. }
  584. pNot->eType = FTSQUERY_NOT;
  585. pNot->pRight = p;
  586. p->pParent = pNot;
  587. if( pNotBranch ){
  588. pNot->pLeft = pNotBranch;
  589. pNotBranch->pParent = pNot;
  590. }
  591. pNotBranch = pNot;
  592. p = pPrev;
  593. }else{
  594. int eType = p->eType;
  595. isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft);
  596. /* The isRequirePhrase variable is set to true if a phrase or
  597. ** an expression contained in parenthesis is required. If a
  598. ** binary operator (AND, OR, NOT or NEAR) is encounted when
  599. ** isRequirePhrase is set, this is a syntax error.
  600. */
  601. if( !isPhrase && isRequirePhrase ){
  602. sqlite3Fts3ExprFree(p);
  603. rc = SQLITE_ERROR;
  604. goto exprparse_out;
  605. }
  606. if( isPhrase && !isRequirePhrase ){
  607. /* Insert an implicit AND operator. */
  608. Fts3Expr *pAnd;
  609. assert( pRet && pPrev );
  610. pAnd = fts3MallocZero(sizeof(Fts3Expr));
  611. if( !pAnd ){
  612. sqlite3Fts3ExprFree(p);
  613. rc = SQLITE_NOMEM;
  614. goto exprparse_out;
  615. }
  616. pAnd->eType = FTSQUERY_AND;
  617. insertBinaryOperator(&pRet, pPrev, pAnd);
  618. pPrev = pAnd;
  619. }
  620. /* This test catches attempts to make either operand of a NEAR
  621. ** operator something other than a phrase. For example, either of
  622. ** the following:
  623. **
  624. ** (bracketed expression) NEAR phrase
  625. ** phrase NEAR (bracketed expression)
  626. **
  627. ** Return an error in either case.
  628. */
  629. if( pPrev && (
  630. (eType==FTSQUERY_NEAR && !isPhrase && pPrev->eType!=FTSQUERY_PHRASE)
  631. || (eType!=FTSQUERY_PHRASE && isPhrase && pPrev->eType==FTSQUERY_NEAR)
  632. )){
  633. sqlite3Fts3ExprFree(p);
  634. rc = SQLITE_ERROR;
  635. goto exprparse_out;
  636. }
  637. if( isPhrase ){
  638. if( pRet ){
  639. assert( pPrev && pPrev->pLeft && pPrev->pRight==0 );
  640. pPrev->pRight = p;
  641. p->pParent = pPrev;
  642. }else{
  643. pRet = p;
  644. }
  645. }else{
  646. insertBinaryOperator(&pRet, pPrev, p);
  647. }
  648. isRequirePhrase = !isPhrase;
  649. }
  650. assert( nByte>0 );
  651. }
  652. assert( rc!=SQLITE_OK || (nByte>0 && nByte<=nIn) );
  653. nIn -= nByte;
  654. zIn += nByte;
  655. pPrev = p;
  656. }
  657. if( rc==SQLITE_DONE && pRet && isRequirePhrase ){
  658. rc = SQLITE_ERROR;
  659. }
  660. if( rc==SQLITE_DONE ){
  661. rc = SQLITE_OK;
  662. if( !sqlite3_fts3_enable_parentheses && pNotBranch ){
  663. if( !pRet ){
  664. rc = SQLITE_ERROR;
  665. }else{
  666. Fts3Expr *pIter = pNotBranch;
  667. while( pIter->pLeft ){
  668. pIter = pIter->pLeft;
  669. }
  670. pIter->pLeft = pRet;
  671. pRet->pParent = pIter;
  672. pRet = pNotBranch;
  673. }
  674. }
  675. }
  676. *pnConsumed = n - nIn;
  677. exprparse_out:
  678. if( rc!=SQLITE_OK ){
  679. sqlite3Fts3ExprFree(pRet);
  680. sqlite3Fts3ExprFree(pNotBranch);
  681. pRet = 0;
  682. }
  683. *ppExpr = pRet;
  684. return rc;
  685. }
  686. /*
  687. ** Return SQLITE_ERROR if the maximum depth of the expression tree passed
  688. ** as the only argument is more than nMaxDepth.
  689. */
  690. static int fts3ExprCheckDepth(Fts3Expr *p, int nMaxDepth){
  691. int rc = SQLITE_OK;
  692. if( p ){
  693. if( nMaxDepth<0 ){
  694. rc = SQLITE_TOOBIG;
  695. }else{
  696. rc = fts3ExprCheckDepth(p->pLeft, nMaxDepth-1);
  697. if( rc==SQLITE_OK ){
  698. rc = fts3ExprCheckDepth(p->pRight, nMaxDepth-1);
  699. }
  700. }
  701. }
  702. return rc;
  703. }
  704. /*
  705. ** This function attempts to transform the expression tree at (*pp) to
  706. ** an equivalent but more balanced form. The tree is modified in place.
  707. ** If successful, SQLITE_OK is returned and (*pp) set to point to the
  708. ** new root expression node.
  709. **
  710. ** nMaxDepth is the maximum allowable depth of the balanced sub-tree.
  711. **
  712. ** Otherwise, if an error occurs, an SQLite error code is returned and
  713. ** expression (*pp) freed.
  714. */
  715. static int fts3ExprBalance(Fts3Expr **pp, int nMaxDepth){
  716. int rc = SQLITE_OK; /* Return code */
  717. Fts3Expr *pRoot = *pp; /* Initial root node */
  718. Fts3Expr *pFree = 0; /* List of free nodes. Linked by pParent. */
  719. int eType = pRoot->eType; /* Type of node in this tree */
  720. if( nMaxDepth==0 ){
  721. rc = SQLITE_ERROR;
  722. }
  723. if( rc==SQLITE_OK && (eType==FTSQUERY_AND || eType==FTSQUERY_OR) ){
  724. Fts3Expr **apLeaf;
  725. apLeaf = (Fts3Expr **)sqlite3_malloc(sizeof(Fts3Expr *) * nMaxDepth);
  726. if( 0==apLeaf ){
  727. rc = SQLITE_NOMEM;
  728. }else{
  729. memset(apLeaf, 0, sizeof(Fts3Expr *) * nMaxDepth);
  730. }
  731. if( rc==SQLITE_OK ){
  732. int i;
  733. Fts3Expr *p;
  734. /* Set $p to point to the left-most leaf in the tree of eType nodes. */
  735. for(p=pRoot; p->eType==eType; p=p->pLeft){
  736. assert( p->pParent==0 || p->pParent->pLeft==p );
  737. assert( p->pLeft && p->pRight );
  738. }
  739. /* This loop runs once for each leaf in the tree of eType nodes. */
  740. while( 1 ){
  741. int iLvl;
  742. Fts3Expr *pParent = p->pParent; /* Current parent of p */
  743. assert( pParent==0 || pParent->pLeft==p );
  744. p->pParent = 0;
  745. if( pParent ){
  746. pParent->pLeft = 0;
  747. }else{
  748. pRoot = 0;
  749. }
  750. rc = fts3ExprBalance(&p, nMaxDepth-1);
  751. if( rc!=SQLITE_OK ) break;
  752. for(iLvl=0; p && iLvl<nMaxDepth; iLvl++){
  753. if( apLeaf[iLvl]==0 ){
  754. apLeaf[iLvl] = p;
  755. p = 0;
  756. }else{
  757. assert( pFree );
  758. pFree->pLeft = apLeaf[iLvl];
  759. pFree->pRight = p;
  760. pFree->pLeft->pParent = pFree;
  761. pFree->pRight->pParent = pFree;
  762. p = pFree;
  763. pFree = pFree->pParent;
  764. p->pParent = 0;
  765. apLeaf[iLvl] = 0;
  766. }
  767. }
  768. if( p ){
  769. sqlite3Fts3ExprFree(p);
  770. rc = SQLITE_TOOBIG;
  771. break;
  772. }
  773. /* If that was the last leaf node, break out of the loop */
  774. if( pParent==0 ) break;
  775. /* Set $p to point to the next leaf in the tree of eType nodes */
  776. for(p=pParent->pRight; p->eType==eType; p=p->pLeft);
  777. /* Remove pParent from the original tree. */
  778. assert( pParent->pParent==0 || pParent->pParent->pLeft==pParent );
  779. pParent->pRight->pParent = pParent->pParent;
  780. if( pParent->pParent ){
  781. pParent->pParent->pLeft = pParent->pRight;
  782. }else{
  783. assert( pParent==pRoot );
  784. pRoot = pParent->pRight;
  785. }
  786. /* Link pParent into the free node list. It will be used as an
  787. ** internal node of the new tree. */
  788. pParent->pParent = pFree;
  789. pFree = pParent;
  790. }
  791. if( rc==SQLITE_OK ){
  792. p = 0;
  793. for(i=0; i<nMaxDepth; i++){
  794. if( apLeaf[i] ){
  795. if( p==0 ){
  796. p = apLeaf[i];
  797. p->pParent = 0;
  798. }else{
  799. assert( pFree!=0 );
  800. pFree->pRight = p;
  801. pFree->pLeft = apLeaf[i];
  802. pFree->pLeft->pParent = pFree;
  803. pFree->pRight->pParent = pFree;
  804. p = pFree;
  805. pFree = pFree->pParent;
  806. p->pParent = 0;
  807. }
  808. }
  809. }
  810. pRoot = p;
  811. }else{
  812. /* An error occurred. Delete the contents of the apLeaf[] array
  813. ** and pFree list. Everything else is cleaned up by the call to
  814. ** sqlite3Fts3ExprFree(pRoot) below. */
  815. Fts3Expr *pDel;
  816. for(i=0; i<nMaxDepth; i++){
  817. sqlite3Fts3ExprFree(apLeaf[i]);
  818. }
  819. while( (pDel=pFree)!=0 ){
  820. pFree = pDel->pParent;
  821. sqlite3_free(pDel);
  822. }
  823. }
  824. assert( pFree==0 );
  825. sqlite3_free( apLeaf );
  826. }
  827. }
  828. if( rc!=SQLITE_OK ){
  829. sqlite3Fts3ExprFree(pRoot);
  830. pRoot = 0;
  831. }
  832. *pp = pRoot;
  833. return rc;
  834. }
  835. /*
  836. ** This function is similar to sqlite3Fts3ExprParse(), with the following
  837. ** differences:
  838. **
  839. ** 1. It does not do expression rebalancing.
  840. ** 2. It does not check that the expression does not exceed the
  841. ** maximum allowable depth.
  842. ** 3. Even if it fails, *ppExpr may still be set to point to an
  843. ** expression tree. It should be deleted using sqlite3Fts3ExprFree()
  844. ** in this case.
  845. */
  846. static int fts3ExprParseUnbalanced(
  847. sqlite3_tokenizer *pTokenizer, /* Tokenizer module */
  848. int iLangid, /* Language id for tokenizer */
  849. char **azCol, /* Array of column names for fts3 table */
  850. int bFts4, /* True to allow FTS4-only syntax */
  851. int nCol, /* Number of entries in azCol[] */
  852. int iDefaultCol, /* Default column to query */
  853. const char *z, int n, /* Text of MATCH query */
  854. Fts3Expr **ppExpr /* OUT: Parsed query structure */
  855. ){
  856. int nParsed;
  857. int rc;
  858. ParseContext sParse;
  859. memset(&sParse, 0, sizeof(ParseContext));
  860. sParse.pTokenizer = pTokenizer;
  861. sParse.iLangid = iLangid;
  862. sParse.azCol = (const char **)azCol;
  863. sParse.nCol = nCol;
  864. sParse.iDefaultCol = iDefaultCol;
  865. sParse.bFts4 = bFts4;
  866. if( z==0 ){
  867. *ppExpr = 0;
  868. return SQLITE_OK;
  869. }
  870. if( n<0 ){
  871. n = (int)strlen(z);
  872. }
  873. rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed);
  874. assert( rc==SQLITE_OK || *ppExpr==0 );
  875. /* Check for mismatched parenthesis */
  876. if( rc==SQLITE_OK && sParse.nNest ){
  877. rc = SQLITE_ERROR;
  878. }
  879. return rc;
  880. }
  881. /*
  882. ** Parameters z and n contain a pointer to and length of a buffer containing
  883. ** an fts3 query expression, respectively. This function attempts to parse the
  884. ** query expression and create a tree of Fts3Expr structures representing the
  885. ** parsed expression. If successful, *ppExpr is set to point to the head
  886. ** of the parsed expression tree and SQLITE_OK is returned. If an error
  887. ** occurs, either SQLITE_NOMEM (out-of-memory error) or SQLITE_ERROR (parse
  888. ** error) is returned and *ppExpr is set to 0.
  889. **
  890. ** If parameter n is a negative number, then z is assumed to point to a
  891. ** nul-terminated string and the length is determined using strlen().
  892. **
  893. ** The first parameter, pTokenizer, is passed the fts3 tokenizer module to
  894. ** use to normalize query tokens while parsing the expression. The azCol[]
  895. ** array, which is assumed to contain nCol entries, should contain the names
  896. ** of each column in the target fts3 table, in order from left to right.
  897. ** Column names must be nul-terminated strings.
  898. **
  899. ** The iDefaultCol parameter should be passed the index of the table column
  900. ** that appears on the left-hand-side of the MATCH operator (the default
  901. ** column to match against for tokens for which a column name is not explicitly
  902. ** specified as part of the query string), or -1 if tokens may by default
  903. ** match any table column.
  904. */
  905. int sqlite3Fts3ExprParse(
  906. sqlite3_tokenizer *pTokenizer, /* Tokenizer module */
  907. int iLangid, /* Language id for tokenizer */
  908. char **azCol, /* Array of column names for fts3 table */
  909. int bFts4, /* True to allow FTS4-only syntax */
  910. int nCol, /* Number of entries in azCol[] */
  911. int iDefaultCol, /* Default column to query */
  912. const char *z, int n, /* Text of MATCH query */
  913. Fts3Expr **ppExpr, /* OUT: Parsed query structure */
  914. char **pzErr /* OUT: Error message (sqlite3_malloc) */
  915. ){
  916. int rc = fts3ExprParseUnbalanced(
  917. pTokenizer, iLangid, azCol, bFts4, nCol, iDefaultCol, z, n, ppExpr
  918. );
  919. /* Rebalance the expression. And check that its depth does not exceed
  920. ** SQLITE_FTS3_MAX_EXPR_DEPTH. */
  921. if( rc==SQLITE_OK && *ppExpr ){
  922. rc = fts3ExprBalance(ppExpr, SQLITE_FTS3_MAX_EXPR_DEPTH);
  923. if( rc==SQLITE_OK ){
  924. rc = fts3ExprCheckDepth(*ppExpr, SQLITE_FTS3_MAX_EXPR_DEPTH);
  925. }
  926. }
  927. if( rc!=SQLITE_OK ){
  928. sqlite3Fts3ExprFree(*ppExpr);
  929. *ppExpr = 0;
  930. if( rc==SQLITE_TOOBIG ){
  931. *pzErr = sqlite3_mprintf(
  932. "FTS expression tree is too large (maximum depth %d)",
  933. SQLITE_FTS3_MAX_EXPR_DEPTH
  934. );
  935. rc = SQLITE_ERROR;
  936. }else if( rc==SQLITE_ERROR ){
  937. *pzErr = sqlite3_mprintf("malformed MATCH expression: [%s]", z);
  938. }
  939. }
  940. return rc;
  941. }
  942. /*
  943. ** Free a single node of an expression tree.
  944. */
  945. static void fts3FreeExprNode(Fts3Expr *p){
  946. assert( p->eType==FTSQUERY_PHRASE || p->pPhrase==0 );
  947. sqlite3Fts3EvalPhraseCleanup(p->pPhrase);
  948. sqlite3_free(p->aMI);
  949. sqlite3_free(p);
  950. }
  951. /*
  952. ** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse().
  953. **
  954. ** This function would be simpler if it recursively called itself. But
  955. ** that would mean passing a sufficiently large expression to ExprParse()
  956. ** could cause a stack overflow.
  957. */
  958. void sqlite3Fts3ExprFree(Fts3Expr *pDel){
  959. Fts3Expr *p;
  960. assert( pDel==0 || pDel->pParent==0 );
  961. for(p=pDel; p && (p->pLeft||p->pRight); p=(p->pLeft ? p->pLeft : p->pRight)){
  962. assert( p->pParent==0 || p==p->pParent->pRight || p==p->pParent->pLeft );
  963. }
  964. while( p ){
  965. Fts3Expr *pParent = p->pParent;
  966. fts3FreeExprNode(p);
  967. if( pParent && p==pParent->pLeft && pParent->pRight ){
  968. p = pParent->pRight;
  969. while( p && (p->pLeft || p->pRight) ){
  970. assert( p==p->pParent->pRight || p==p->pParent->pLeft );
  971. p = (p->pLeft ? p->pLeft : p->pRight);
  972. }
  973. }else{
  974. p = pParent;
  975. }
  976. }
  977. }
  978. /****************************************************************************
  979. *****************************************************************************
  980. ** Everything after this point is just test code.
  981. */
  982. #ifdef SQLITE_TEST
  983. #include <stdio.h>
  984. /*
  985. ** Function to query the hash-table of tokenizers (see README.tokenizers).
  986. */
  987. static int queryTestTokenizer(
  988. sqlite3 *db,
  989. const char *zName,
  990. const sqlite3_tokenizer_module **pp
  991. ){
  992. int rc;
  993. sqlite3_stmt *pStmt;
  994. const char zSql[] = "SELECT fts3_tokenizer(?)";
  995. *pp = 0;
  996. rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
  997. if( rc!=SQLITE_OK ){
  998. return rc;
  999. }
  1000. sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
  1001. if( SQLITE_ROW==sqlite3_step(pStmt) ){
  1002. if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){
  1003. memcpy((void *)pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp));
  1004. }
  1005. }
  1006. return sqlite3_finalize(pStmt);
  1007. }
  1008. /*
  1009. ** Return a pointer to a buffer containing a text representation of the
  1010. ** expression passed as the first argument. The buffer is obtained from
  1011. ** sqlite3_malloc(). It is the responsibility of the caller to use
  1012. ** sqlite3_free() to release the memory. If an OOM condition is encountered,
  1013. ** NULL is returned.
  1014. **
  1015. ** If the second argument is not NULL, then its contents are prepended to
  1016. ** the returned expression text and then freed using sqlite3_free().
  1017. */
  1018. static char *exprToString(Fts3Expr *pExpr, char *zBuf){
  1019. if( pExpr==0 ){
  1020. return sqlite3_mprintf("");
  1021. }
  1022. switch( pExpr->eType ){
  1023. case FTSQUERY_PHRASE: {
  1024. Fts3Phrase *pPhrase = pExpr->pPhrase;
  1025. int i;
  1026. zBuf = sqlite3_mprintf(
  1027. "%zPHRASE %d 0", zBuf, pPhrase->iColumn);
  1028. for(i=0; zBuf && i<pPhrase->nToken; i++){
  1029. zBuf = sqlite3_mprintf("%z %.*s%s", zBuf,
  1030. pPhrase->aToken[i].n, pPhrase->aToken[i].z,
  1031. (pPhrase->aToken[i].isPrefix?"+":"")
  1032. );
  1033. }
  1034. return zBuf;
  1035. }
  1036. case FTSQUERY_NEAR:
  1037. zBuf = sqlite3_mprintf("%zNEAR/%d ", zBuf, pExpr->nNear);
  1038. break;
  1039. case FTSQUERY_NOT:
  1040. zBuf = sqlite3_mprintf("%zNOT ", zBuf);
  1041. break;
  1042. case FTSQUERY_AND:
  1043. zBuf = sqlite3_mprintf("%zAND ", zBuf);
  1044. break;
  1045. case FTSQUERY_OR:
  1046. zBuf = sqlite3_mprintf("%zOR ", zBuf);
  1047. break;
  1048. }
  1049. if( zBuf ) zBuf = sqlite3_mprintf("%z{", zBuf);
  1050. if( zBuf ) zBuf = exprToString(pExpr->pLeft, zBuf);
  1051. if( zBuf ) zBuf = sqlite3_mprintf("%z} {", zBuf);
  1052. if( zBuf ) zBuf = exprToString(pExpr->pRight, zBuf);
  1053. if( zBuf ) zBuf = sqlite3_mprintf("%z}", zBuf);
  1054. return zBuf;
  1055. }
  1056. /*
  1057. ** This is the implementation of a scalar SQL function used to test the
  1058. ** expression parser. It should be called as follows:
  1059. **
  1060. ** fts3_exprtest(<tokenizer>, <expr>, <column 1>, ...);
  1061. **
  1062. ** The first argument, <tokenizer>, is the name of the fts3 tokenizer used
  1063. ** to parse the query expression (see README.tokenizers). The second argument
  1064. ** is the query expression to parse. Each subsequent argument is the name
  1065. ** of a column of the fts3 table that the query expression may refer to.
  1066. ** For example:
  1067. **
  1068. ** SELECT fts3_exprtest('simple', 'Bill col2:Bloggs', 'col1', 'col2');
  1069. */
  1070. static void fts3ExprTest(
  1071. sqlite3_context *context,
  1072. int argc,
  1073. sqlite3_value **argv
  1074. ){
  1075. sqlite3_tokenizer_module const *pModule = 0;
  1076. sqlite3_tokenizer *pTokenizer = 0;
  1077. int rc;
  1078. char **azCol = 0;
  1079. const char *zExpr;
  1080. int nExpr;
  1081. int nCol;
  1082. int ii;
  1083. Fts3Expr *pExpr;
  1084. char *zBuf = 0;
  1085. sqlite3 *db = sqlite3_context_db_handle(context);
  1086. if( argc<3 ){
  1087. sqlite3_result_error(context,
  1088. "Usage: fts3_exprtest(tokenizer, expr, col1, ...", -1
  1089. );
  1090. return;
  1091. }
  1092. rc = queryTestTokenizer(db,
  1093. (const char *)sqlite3_value_text(argv[0]), &pModule);
  1094. if( rc==SQLITE_NOMEM ){
  1095. sqlite3_result_error_nomem(context);
  1096. goto exprtest_out;
  1097. }else if( !pModule ){
  1098. sqlite3_result_error(context, "No such tokenizer module", -1);
  1099. goto exprtest_out;
  1100. }
  1101. rc = pModule->xCreate(0, 0, &pTokenizer);
  1102. assert( rc==SQLITE_NOMEM || rc==SQLITE_OK );
  1103. if( rc==SQLITE_NOMEM ){
  1104. sqlite3_result_error_nomem(context);
  1105. goto exprtest_out;
  1106. }
  1107. pTokenizer->pModule = pModule;
  1108. zExpr = (const char *)sqlite3_value_text(argv[1]);
  1109. nExpr = sqlite3_value_bytes(argv[1]);
  1110. nCol = argc-2;
  1111. azCol = (char **)sqlite3_malloc(nCol*sizeof(char *));
  1112. if( !azCol ){
  1113. sqlite3_result_error_nomem(context);
  1114. goto exprtest_out;
  1115. }
  1116. for(ii=0; ii<nCol; ii++){
  1117. azCol[ii] = (char *)sqlite3_value_text(argv[ii+2]);
  1118. }
  1119. if( sqlite3_user_data(context) ){
  1120. char *zDummy = 0;
  1121. rc = sqlite3Fts3ExprParse(
  1122. pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr, &zDummy
  1123. );
  1124. assert( rc==SQLITE_OK || pExpr==0 );
  1125. sqlite3_free(zDummy);
  1126. }else{
  1127. rc = fts3ExprParseUnbalanced(
  1128. pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr
  1129. );
  1130. }
  1131. if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){
  1132. sqlite3Fts3ExprFree(pExpr);
  1133. sqlite3_result_error(context, "Error parsing expression", -1);
  1134. }else if( rc==SQLITE_NOMEM || !(zBuf = exprToString(pExpr, 0)) ){
  1135. sqlite3_result_error_nomem(context);
  1136. }else{
  1137. sqlite3_result_text(context, zBuf, -1, SQLITE_TRANSIENT);
  1138. sqlite3_free(zBuf);
  1139. }
  1140. sqlite3Fts3ExprFree(pExpr);
  1141. exprtest_out:
  1142. if( pModule && pTokenizer ){
  1143. rc = pModule->xDestroy(pTokenizer);
  1144. }
  1145. sqlite3_free(azCol);
  1146. }
  1147. /*
  1148. ** Register the query expression parser test function fts3_exprtest()
  1149. ** with database connection db.
  1150. */
  1151. int sqlite3Fts3ExprInitTestInterface(sqlite3* db){
  1152. int rc = sqlite3_create_function(
  1153. db, "fts3_exprtest", -1, SQLITE_UTF8, 0, fts3ExprTest, 0, 0
  1154. );
  1155. if( rc==SQLITE_OK ){
  1156. rc = sqlite3_create_function(db, "fts3_exprtest_rebalance",
  1157. -1, SQLITE_UTF8, (void *)1, fts3ExprTest, 0, 0
  1158. );
  1159. }
  1160. return rc;
  1161. }
  1162. #endif
  1163. #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */