1
0

fts3view.c 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874
  1. /*
  2. ** This program is a debugging and analysis utility that displays
  3. ** information about an FTS3 or FTS4 index.
  4. **
  5. ** Link this program against the SQLite3 amalgamation with the
  6. ** SQLITE_ENABLE_FTS4 compile-time option. Then run it as:
  7. **
  8. ** fts3view DATABASE
  9. **
  10. ** to get a list of all FTS3/4 tables in DATABASE, or do
  11. **
  12. ** fts3view DATABASE TABLE COMMAND ....
  13. **
  14. ** to see various aspects of the TABLE table. Type fts3view with no
  15. ** arguments for a list of available COMMANDs.
  16. */
  17. #include <stdio.h>
  18. #include <stdarg.h>
  19. #include <stdlib.h>
  20. #include <string.h>
  21. #include <ctype.h>
  22. #include "sqlite3.h"
  23. /*
  24. ** Extra command-line arguments:
  25. */
  26. int nExtra;
  27. char **azExtra;
  28. /*
  29. ** Look for a command-line argument.
  30. */
  31. const char *findOption(const char *zName, int hasArg, const char *zDefault){
  32. int i;
  33. const char *zResult = zDefault;
  34. for(i=0; i<nExtra; i++){
  35. const char *z = azExtra[i];
  36. while( z[0]=='-' ) z++;
  37. if( strcmp(z, zName)==0 ){
  38. int j = 1;
  39. if( hasArg==0 || i==nExtra-1 ) j = 0;
  40. zResult = azExtra[i+j];
  41. while( i+j<nExtra ){
  42. azExtra[i] = azExtra[i+j+1];
  43. i++;
  44. }
  45. break;
  46. }
  47. }
  48. return zResult;
  49. }
  50. /*
  51. ** Prepare an SQL query
  52. */
  53. static sqlite3_stmt *prepare(sqlite3 *db, const char *zFormat, ...){
  54. va_list ap;
  55. char *zSql;
  56. sqlite3_stmt *pStmt;
  57. int rc;
  58. va_start(ap, zFormat);
  59. zSql = sqlite3_vmprintf(zFormat, ap);
  60. va_end(ap);
  61. rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
  62. if( rc ){
  63. fprintf(stderr, "Error: %s\nSQL: %s\n", sqlite3_errmsg(db), zSql);
  64. exit(1);
  65. }
  66. sqlite3_free(zSql);
  67. return pStmt;
  68. }
  69. /*
  70. ** Run an SQL statement
  71. */
  72. static int runSql(sqlite3 *db, const char *zFormat, ...){
  73. va_list ap;
  74. char *zSql;
  75. int rc;
  76. va_start(ap, zFormat);
  77. zSql = sqlite3_vmprintf(zFormat, ap);
  78. rc = sqlite3_exec(db, zSql, 0, 0, 0);
  79. va_end(ap);
  80. return rc;
  81. }
  82. /*
  83. ** Show the table schema
  84. */
  85. static void showSchema(sqlite3 *db, const char *zTab){
  86. sqlite3_stmt *pStmt;
  87. pStmt = prepare(db,
  88. "SELECT sql FROM sqlite_master"
  89. " WHERE name LIKE '%q%%'"
  90. " ORDER BY 1",
  91. zTab);
  92. while( sqlite3_step(pStmt)==SQLITE_ROW ){
  93. printf("%s;\n", sqlite3_column_text(pStmt, 0));
  94. }
  95. sqlite3_finalize(pStmt);
  96. pStmt = prepare(db, "PRAGMA page_size");
  97. while( sqlite3_step(pStmt)==SQLITE_ROW ){
  98. printf("PRAGMA page_size=%s;\n", sqlite3_column_text(pStmt, 0));
  99. }
  100. sqlite3_finalize(pStmt);
  101. pStmt = prepare(db, "PRAGMA journal_mode");
  102. while( sqlite3_step(pStmt)==SQLITE_ROW ){
  103. printf("PRAGMA journal_mode=%s;\n", sqlite3_column_text(pStmt, 0));
  104. }
  105. sqlite3_finalize(pStmt);
  106. pStmt = prepare(db, "PRAGMA auto_vacuum");
  107. while( sqlite3_step(pStmt)==SQLITE_ROW ){
  108. const char *zType = "???";
  109. switch( sqlite3_column_int(pStmt, 0) ){
  110. case 0: zType = "OFF"; break;
  111. case 1: zType = "FULL"; break;
  112. case 2: zType = "INCREMENTAL"; break;
  113. }
  114. printf("PRAGMA auto_vacuum=%s;\n", zType);
  115. }
  116. sqlite3_finalize(pStmt);
  117. pStmt = prepare(db, "PRAGMA encoding");
  118. while( sqlite3_step(pStmt)==SQLITE_ROW ){
  119. printf("PRAGMA encoding=%s;\n", sqlite3_column_text(pStmt, 0));
  120. }
  121. sqlite3_finalize(pStmt);
  122. }
  123. /*
  124. ** Read a 64-bit variable-length integer from memory starting at p[0].
  125. ** Return the number of bytes read, or 0 on error.
  126. ** The value is stored in *v.
  127. */
  128. int getVarint(const unsigned char *p, sqlite_int64 *v){
  129. const unsigned char *q = p;
  130. sqlite_uint64 x = 0, y = 1;
  131. while( (*q&0x80)==0x80 && q-(unsigned char *)p<9 ){
  132. x += y * (*q++ & 0x7f);
  133. y <<= 7;
  134. }
  135. x += y * (*q++);
  136. *v = (sqlite_int64) x;
  137. return (int) (q - (unsigned char *)p);
  138. }
  139. /* Show the content of the %_stat table
  140. */
  141. static void showStat(sqlite3 *db, const char *zTab){
  142. sqlite3_stmt *pStmt;
  143. pStmt = prepare(db, "SELECT id, value FROM '%q_stat'", zTab);
  144. while( sqlite3_step(pStmt)==SQLITE_ROW ){
  145. printf("stat[%d] =", sqlite3_column_int(pStmt, 0));
  146. switch( sqlite3_column_type(pStmt, 1) ){
  147. case SQLITE_INTEGER: {
  148. printf(" %d\n", sqlite3_column_int(pStmt, 1));
  149. break;
  150. }
  151. case SQLITE_BLOB: {
  152. unsigned char *x = (unsigned char*)sqlite3_column_blob(pStmt, 1);
  153. int len = sqlite3_column_bytes(pStmt, 1);
  154. int i = 0;
  155. sqlite3_int64 v;
  156. while( i<len ){
  157. i += getVarint(x, &v);
  158. printf(" %lld", v);
  159. }
  160. printf("\n");
  161. break;
  162. }
  163. }
  164. }
  165. sqlite3_finalize(pStmt);
  166. }
  167. /*
  168. ** Report on the vocabulary. This creates an fts4aux table with a random
  169. ** name, but deletes it in the end.
  170. */
  171. static void showVocabulary(sqlite3 *db, const char *zTab){
  172. char *zAux;
  173. sqlite3_uint64 r;
  174. sqlite3_stmt *pStmt;
  175. int nDoc = 0;
  176. int nToken = 0;
  177. int nOccurrence = 0;
  178. int nTop;
  179. int n, i;
  180. sqlite3_randomness(sizeof(r), &r);
  181. zAux = sqlite3_mprintf("viewer_%llx", zTab, r);
  182. runSql(db, "BEGIN");
  183. pStmt = prepare(db, "SELECT count(*) FROM %Q", zTab);
  184. while( sqlite3_step(pStmt)==SQLITE_ROW ){
  185. nDoc = sqlite3_column_int(pStmt, 0);
  186. }
  187. sqlite3_finalize(pStmt);
  188. printf("Number of documents...................... %9d\n", nDoc);
  189. runSql(db, "CREATE VIRTUAL TABLE %s USING fts4aux(%Q)", zAux, zTab);
  190. pStmt = prepare(db,
  191. "SELECT count(*), sum(occurrences) FROM %s WHERE col='*'",
  192. zAux);
  193. while( sqlite3_step(pStmt)==SQLITE_ROW ){
  194. nToken = sqlite3_column_int(pStmt, 0);
  195. nOccurrence = sqlite3_column_int(pStmt, 1);
  196. }
  197. sqlite3_finalize(pStmt);
  198. printf("Total tokens in all documents............ %9d\n", nOccurrence);
  199. printf("Total number of distinct tokens.......... %9d\n", nToken);
  200. if( nToken==0 ) goto end_vocab;
  201. n = 0;
  202. pStmt = prepare(db, "SELECT count(*) FROM %s"
  203. " WHERE col='*' AND occurrences==1", zAux);
  204. while( sqlite3_step(pStmt)==SQLITE_ROW ){
  205. n = sqlite3_column_int(pStmt, 0);
  206. }
  207. sqlite3_finalize(pStmt);
  208. printf("Tokens used exactly once................. %9d %5.2f%%\n",
  209. n, n*100.0/nToken);
  210. n = 0;
  211. pStmt = prepare(db, "SELECT count(*) FROM %s"
  212. " WHERE col='*' AND documents==1", zAux);
  213. while( sqlite3_step(pStmt)==SQLITE_ROW ){
  214. n = sqlite3_column_int(pStmt, 0);
  215. }
  216. sqlite3_finalize(pStmt);
  217. printf("Tokens used in only one document......... %9d %5.2f%%\n",
  218. n, n*100.0/nToken);
  219. if( nDoc>=2000 ){
  220. n = 0;
  221. pStmt = prepare(db, "SELECT count(*) FROM %s"
  222. " WHERE col='*' AND occurrences<=%d", zAux, nDoc/1000);
  223. while( sqlite3_step(pStmt)==SQLITE_ROW ){
  224. n = sqlite3_column_int(pStmt, 0);
  225. }
  226. sqlite3_finalize(pStmt);
  227. printf("Tokens used in 0.1%% or less of docs...... %9d %5.2f%%\n",
  228. n, n*100.0/nToken);
  229. }
  230. if( nDoc>=200 ){
  231. n = 0;
  232. pStmt = prepare(db, "SELECT count(*) FROM %s"
  233. " WHERE col='*' AND occurrences<=%d", zAux, nDoc/100);
  234. while( sqlite3_step(pStmt)==SQLITE_ROW ){
  235. n = sqlite3_column_int(pStmt, 0);
  236. }
  237. sqlite3_finalize(pStmt);
  238. printf("Tokens used in 1%% or less of docs........ %9d %5.2f%%\n",
  239. n, n*100.0/nToken);
  240. }
  241. nTop = atoi(findOption("top", 1, "25"));
  242. printf("The %d most common tokens:\n", nTop);
  243. pStmt = prepare(db,
  244. "SELECT term, documents FROM %s"
  245. " WHERE col='*'"
  246. " ORDER BY documents DESC, term"
  247. " LIMIT %d", zAux, nTop);
  248. i = 0;
  249. while( sqlite3_step(pStmt)==SQLITE_ROW ){
  250. i++;
  251. n = sqlite3_column_int(pStmt, 1);
  252. printf(" %2d. %-30s %9d docs %5.2f%%\n", i,
  253. sqlite3_column_text(pStmt, 0), n, n*100.0/nDoc);
  254. }
  255. sqlite3_finalize(pStmt);
  256. end_vocab:
  257. runSql(db, "ROLLBACK");
  258. sqlite3_free(zAux);
  259. }
  260. /*
  261. ** Report on the number and sizes of segments
  262. */
  263. static void showSegmentStats(sqlite3 *db, const char *zTab){
  264. sqlite3_stmt *pStmt;
  265. int nSeg = 0;
  266. sqlite3_int64 szSeg = 0, mxSeg = 0;
  267. int nIdx = 0;
  268. sqlite3_int64 szIdx = 0, mxIdx = 0;
  269. int nRoot = 0;
  270. sqlite3_int64 szRoot = 0, mxRoot = 0;
  271. sqlite3_int64 mx;
  272. int nLeaf;
  273. int n;
  274. int pgsz;
  275. int mxLevel;
  276. int i;
  277. pStmt = prepare(db,
  278. "SELECT count(*), sum(length(block)), max(length(block))"
  279. " FROM '%q_segments'",
  280. zTab);
  281. while( sqlite3_step(pStmt)==SQLITE_ROW ){
  282. nSeg = sqlite3_column_int(pStmt, 0);
  283. szSeg = sqlite3_column_int64(pStmt, 1);
  284. mxSeg = sqlite3_column_int64(pStmt, 2);
  285. }
  286. sqlite3_finalize(pStmt);
  287. pStmt = prepare(db,
  288. "SELECT count(*), sum(length(block)), max(length(block))"
  289. " FROM '%q_segments' a JOIN '%q_segdir' b"
  290. " WHERE a.blockid BETWEEN b.leaves_end_block+1 AND b.end_block",
  291. zTab, zTab);
  292. while( sqlite3_step(pStmt)==SQLITE_ROW ){
  293. nIdx = sqlite3_column_int(pStmt, 0);
  294. szIdx = sqlite3_column_int64(pStmt, 1);
  295. mxIdx = sqlite3_column_int64(pStmt, 2);
  296. }
  297. sqlite3_finalize(pStmt);
  298. pStmt = prepare(db,
  299. "SELECT count(*), sum(length(root)), max(length(root))"
  300. " FROM '%q_segdir'",
  301. zTab);
  302. while( sqlite3_step(pStmt)==SQLITE_ROW ){
  303. nRoot = sqlite3_column_int(pStmt, 0);
  304. szRoot = sqlite3_column_int64(pStmt, 1);
  305. mxRoot = sqlite3_column_int64(pStmt, 2);
  306. }
  307. sqlite3_finalize(pStmt);
  308. printf("Number of segments....................... %9d\n", nSeg+nRoot);
  309. printf("Number of leaf segments.................. %9d\n", nSeg-nIdx);
  310. printf("Number of index segments................. %9d\n", nIdx);
  311. printf("Number of root segments.................. %9d\n", nRoot);
  312. printf("Total size of all segments............... %9lld\n", szSeg+szRoot);
  313. printf("Total size of all leaf segments.......... %9lld\n", szSeg-szIdx);
  314. printf("Total size of all index segments......... %9lld\n", szIdx);
  315. printf("Total size of all root segments.......... %9lld\n", szRoot);
  316. if( nSeg>0 ){
  317. printf("Average size of all segments............. %11.1f\n",
  318. (double)(szSeg+szRoot)/(double)(nSeg+nRoot));
  319. printf("Average size of leaf segments............ %11.1f\n",
  320. (double)(szSeg-szIdx)/(double)(nSeg-nIdx));
  321. }
  322. if( nIdx>0 ){
  323. printf("Average size of index segments........... %11.1f\n",
  324. (double)szIdx/(double)nIdx);
  325. }
  326. if( nRoot>0 ){
  327. printf("Average size of root segments............ %11.1f\n",
  328. (double)szRoot/(double)nRoot);
  329. }
  330. mx = mxSeg;
  331. if( mx<mxRoot ) mx = mxRoot;
  332. printf("Maximum segment size..................... %9lld\n", mx);
  333. printf("Maximum index segment size............... %9lld\n", mxIdx);
  334. printf("Maximum root segment size................ %9lld\n", mxRoot);
  335. pStmt = prepare(db, "PRAGMA page_size");
  336. pgsz = 1024;
  337. while( sqlite3_step(pStmt)==SQLITE_ROW ){
  338. pgsz = sqlite3_column_int(pStmt, 0);
  339. }
  340. sqlite3_finalize(pStmt);
  341. printf("Database page size....................... %9d\n", pgsz);
  342. pStmt = prepare(db,
  343. "SELECT count(*)"
  344. " FROM '%q_segments' a JOIN '%q_segdir' b"
  345. " WHERE a.blockid BETWEEN b.start_block AND b.leaves_end_block"
  346. " AND length(a.block)>%d",
  347. zTab, zTab, pgsz-45);
  348. n = 0;
  349. while( sqlite3_step(pStmt)==SQLITE_ROW ){
  350. n = sqlite3_column_int(pStmt, 0);
  351. }
  352. sqlite3_finalize(pStmt);
  353. nLeaf = nSeg - nIdx;
  354. printf("Leaf segments larger than %5d bytes.... %9d %5.2f%%\n",
  355. pgsz-45, n, n*100.0/nLeaf);
  356. pStmt = prepare(db, "SELECT max(level%%1024) FROM '%q_segdir'", zTab);
  357. mxLevel = 0;
  358. while( sqlite3_step(pStmt)==SQLITE_ROW ){
  359. mxLevel = sqlite3_column_int(pStmt, 0);
  360. }
  361. sqlite3_finalize(pStmt);
  362. for(i=0; i<=mxLevel; i++){
  363. pStmt = prepare(db,
  364. "SELECT count(*), sum(len), avg(len), max(len), sum(len>%d),"
  365. " count(distinct idx)"
  366. " FROM (SELECT length(a.block) AS len, idx"
  367. " FROM '%q_segments' a JOIN '%q_segdir' b"
  368. " WHERE (a.blockid BETWEEN b.start_block"
  369. " AND b.leaves_end_block)"
  370. " AND (b.level%%1024)==%d)",
  371. pgsz-45, zTab, zTab, i);
  372. if( sqlite3_step(pStmt)==SQLITE_ROW
  373. && (nLeaf = sqlite3_column_int(pStmt, 0))>0
  374. ){
  375. int nIdx = sqlite3_column_int(pStmt, 5);
  376. sqlite3_int64 sz;
  377. printf("For level %d:\n", i);
  378. printf(" Number of indexes...................... %9d\n", nIdx);
  379. printf(" Number of leaf segments................ %9d\n", nLeaf);
  380. if( nIdx>1 ){
  381. printf(" Average leaf segments per index........ %11.1f\n",
  382. (double)nLeaf/(double)nIdx);
  383. }
  384. printf(" Total size of all leaf segments........ %9lld\n",
  385. (sz = sqlite3_column_int64(pStmt, 1)));
  386. printf(" Average size of leaf segments.......... %11.1f\n",
  387. sqlite3_column_double(pStmt, 2));
  388. if( nIdx>1 ){
  389. printf(" Average leaf segment size per index.... %11.1f\n",
  390. (double)sz/(double)nIdx);
  391. }
  392. printf(" Maximum leaf segment size.............. %9lld\n",
  393. sqlite3_column_int64(pStmt, 3));
  394. n = sqlite3_column_int(pStmt, 4);
  395. printf(" Leaf segments larger than %5d bytes.. %9d %5.2f%%\n",
  396. pgsz-45, n, n*100.0/nLeaf);
  397. }
  398. sqlite3_finalize(pStmt);
  399. }
  400. }
  401. /*
  402. ** Print a single "tree" line of the segdir map output.
  403. */
  404. static void printTreeLine(sqlite3_int64 iLower, sqlite3_int64 iUpper){
  405. printf(" tree %9lld", iLower);
  406. if( iUpper>iLower ){
  407. printf(" thru %9lld (%lld blocks)", iUpper, iUpper-iLower+1);
  408. }
  409. printf("\n");
  410. }
  411. /*
  412. ** Check to see if the block of a %_segments entry is NULL.
  413. */
  414. static int isNullSegment(sqlite3 *db, const char *zTab, sqlite3_int64 iBlockId){
  415. sqlite3_stmt *pStmt;
  416. int rc = 1;
  417. pStmt = prepare(db, "SELECT block IS NULL FROM '%q_segments'"
  418. " WHERE blockid=%lld", zTab, iBlockId);
  419. if( sqlite3_step(pStmt)==SQLITE_ROW ){
  420. rc = sqlite3_column_int(pStmt, 0);
  421. }
  422. sqlite3_finalize(pStmt);
  423. return rc;
  424. }
  425. /*
  426. ** Show a map of segments derived from the %_segdir table.
  427. */
  428. static void showSegdirMap(sqlite3 *db, const char *zTab){
  429. int mxIndex, iIndex;
  430. sqlite3_stmt *pStmt = 0;
  431. sqlite3_stmt *pStmt2 = 0;
  432. int prevLevel;
  433. pStmt = prepare(db, "SELECT max(level/1024) FROM '%q_segdir'", zTab);
  434. if( sqlite3_step(pStmt)==SQLITE_ROW ){
  435. mxIndex = sqlite3_column_int(pStmt, 0);
  436. }else{
  437. mxIndex = 0;
  438. }
  439. sqlite3_finalize(pStmt);
  440. printf("Number of inverted indices............... %3d\n", mxIndex+1);
  441. pStmt = prepare(db,
  442. "SELECT level, idx, start_block, leaves_end_block, end_block, rowid"
  443. " FROM '%q_segdir'"
  444. " WHERE level/1024==?"
  445. " ORDER BY level DESC, idx",
  446. zTab);
  447. pStmt2 = prepare(db,
  448. "SELECT blockid FROM '%q_segments'"
  449. " WHERE blockid BETWEEN ? AND ? ORDER BY blockid",
  450. zTab);
  451. for(iIndex=0; iIndex<=mxIndex; iIndex++){
  452. if( mxIndex>0 ){
  453. printf("**************************** Index %d "
  454. "****************************\n", iIndex);
  455. }
  456. sqlite3_bind_int(pStmt, 1, iIndex);
  457. prevLevel = -1;
  458. while( sqlite3_step(pStmt)==SQLITE_ROW ){
  459. int iLevel = sqlite3_column_int(pStmt, 0)%1024;
  460. int iIdx = sqlite3_column_int(pStmt, 1);
  461. sqlite3_int64 iStart = sqlite3_column_int64(pStmt, 2);
  462. sqlite3_int64 iLEnd = sqlite3_column_int64(pStmt, 3);
  463. sqlite3_int64 iEnd = sqlite3_column_int64(pStmt, 4);
  464. char rtag[20];
  465. if( iLevel!=prevLevel ){
  466. printf("level %2d idx %2d", iLevel, iIdx);
  467. prevLevel = iLevel;
  468. }else{
  469. printf(" idx %2d", iIdx);
  470. }
  471. sqlite3_snprintf(sizeof(rtag), rtag, "r%lld",
  472. sqlite3_column_int64(pStmt,5));
  473. printf(" root %9s\n", rtag);
  474. if( iLEnd>iStart ){
  475. sqlite3_int64 iLower, iPrev, iX;
  476. if( iLEnd+1<=iEnd ){
  477. sqlite3_bind_int64(pStmt2, 1, iLEnd+1);
  478. sqlite3_bind_int64(pStmt2, 2, iEnd);
  479. iLower = -1;
  480. while( sqlite3_step(pStmt2)==SQLITE_ROW ){
  481. iX = sqlite3_column_int64(pStmt2, 0);
  482. if( iLower<0 ){
  483. iLower = iPrev = iX;
  484. }else if( iX==iPrev+1 ){
  485. iPrev = iX;
  486. }else{
  487. printTreeLine(iLower, iPrev);
  488. iLower = iPrev = iX;
  489. }
  490. }
  491. sqlite3_reset(pStmt2);
  492. if( iLower>=0 ){
  493. if( iLower==iPrev && iLower==iEnd
  494. && isNullSegment(db,zTab,iLower)
  495. ){
  496. printf(" null %9lld\n", iLower);
  497. }else{
  498. printTreeLine(iLower, iPrev);
  499. }
  500. }
  501. }
  502. printf(" leaves %9lld thru %9lld (%lld blocks)\n",
  503. iStart, iLEnd, iLEnd - iStart + 1);
  504. }
  505. }
  506. sqlite3_reset(pStmt);
  507. }
  508. sqlite3_finalize(pStmt);
  509. sqlite3_finalize(pStmt2);
  510. }
  511. /*
  512. ** Decode a single segment block and display the results on stdout.
  513. */
  514. static void decodeSegment(
  515. const unsigned char *aData, /* Content to print */
  516. int nData /* Number of bytes of content */
  517. ){
  518. sqlite3_int64 iChild;
  519. sqlite3_int64 iPrefix;
  520. sqlite3_int64 nTerm;
  521. sqlite3_int64 n;
  522. sqlite3_int64 iDocsz;
  523. int iHeight;
  524. int i = 0;
  525. int cnt = 0;
  526. char zTerm[1000];
  527. i += getVarint(aData, &n);
  528. iHeight = (int)n;
  529. printf("height: %d\n", iHeight);
  530. if( iHeight>0 ){
  531. i += getVarint(aData+i, &iChild);
  532. printf("left-child: %lld\n", iChild);
  533. }
  534. while( i<nData ){
  535. if( (cnt++)>0 ){
  536. i += getVarint(aData+i, &iPrefix);
  537. }else{
  538. iPrefix = 0;
  539. }
  540. i += getVarint(aData+i, &nTerm);
  541. if( iPrefix+nTerm+1 >= sizeof(zTerm) ){
  542. fprintf(stderr, "term to long\n");
  543. exit(1);
  544. }
  545. memcpy(zTerm+iPrefix, aData+i, nTerm);
  546. zTerm[iPrefix+nTerm] = 0;
  547. i += nTerm;
  548. if( iHeight==0 ){
  549. i += getVarint(aData+i, &iDocsz);
  550. printf("term: %-25s doclist %7lld bytes offset %d\n", zTerm, iDocsz, i);
  551. i += iDocsz;
  552. }else{
  553. printf("term: %-25s child %lld\n", zTerm, ++iChild);
  554. }
  555. }
  556. }
  557. /*
  558. ** Print a a blob as hex and ascii.
  559. */
  560. static void printBlob(
  561. const unsigned char *aData, /* Content to print */
  562. int nData /* Number of bytes of content */
  563. ){
  564. int i, j;
  565. const char *zOfstFmt;
  566. const int perLine = 16;
  567. if( (nData&~0xfff)==0 ){
  568. zOfstFmt = " %03x: ";
  569. }else if( (nData&~0xffff)==0 ){
  570. zOfstFmt = " %04x: ";
  571. }else if( (nData&~0xfffff)==0 ){
  572. zOfstFmt = " %05x: ";
  573. }else if( (nData&~0xffffff)==0 ){
  574. zOfstFmt = " %06x: ";
  575. }else{
  576. zOfstFmt = " %08x: ";
  577. }
  578. for(i=0; i<nData; i += perLine){
  579. fprintf(stdout, zOfstFmt, i);
  580. for(j=0; j<perLine; j++){
  581. if( i+j>nData ){
  582. fprintf(stdout, " ");
  583. }else{
  584. fprintf(stdout,"%02x ", aData[i+j]);
  585. }
  586. }
  587. for(j=0; j<perLine; j++){
  588. if( i+j>nData ){
  589. fprintf(stdout, " ");
  590. }else{
  591. fprintf(stdout,"%c", isprint(aData[i+j]) ? aData[i+j] : '.');
  592. }
  593. }
  594. fprintf(stdout,"\n");
  595. }
  596. }
  597. /*
  598. ** Convert text to a 64-bit integer
  599. */
  600. static sqlite3_int64 atoi64(const char *z){
  601. sqlite3_int64 v = 0;
  602. while( z[0]>='0' && z[0]<='9' ){
  603. v = v*10 + z[0] - '0';
  604. z++;
  605. }
  606. return v;
  607. }
  608. /*
  609. ** Return a prepared statement which, when stepped, will return in its
  610. ** first column the blob associated with segment zId. If zId begins with
  611. ** 'r' then it is a rowid of a %_segdir entry. Otherwise it is a
  612. ** %_segment entry.
  613. */
  614. static sqlite3_stmt *prepareToGetSegment(
  615. sqlite3 *db, /* The database */
  616. const char *zTab, /* The FTS3/4 table name */
  617. const char *zId /* ID of the segment to open */
  618. ){
  619. sqlite3_stmt *pStmt;
  620. if( zId[0]=='r' ){
  621. pStmt = prepare(db, "SELECT root FROM '%q_segdir' WHERE rowid=%lld",
  622. zTab, atoi64(zId+1));
  623. }else{
  624. pStmt = prepare(db, "SELECT block FROM '%q_segments' WHERE blockid=%lld",
  625. zTab, atoi64(zId));
  626. }
  627. return pStmt;
  628. }
  629. /*
  630. ** Print the content of a segment or of the root of a segdir. The segment
  631. ** or root is identified by azExtra[0]. If the first character of azExtra[0]
  632. ** is 'r' then the remainder is the integer rowid of the %_segdir entry.
  633. ** If the first character of azExtra[0] is not 'r' then, then all of
  634. ** azExtra[0] is an integer which is the block number.
  635. **
  636. ** If the --raw option is present in azExtra, then a hex dump is provided.
  637. ** Otherwise a decoding is shown.
  638. */
  639. static void showSegment(sqlite3 *db, const char *zTab){
  640. const unsigned char *aData;
  641. int nData;
  642. sqlite3_stmt *pStmt;
  643. pStmt = prepareToGetSegment(db, zTab, azExtra[0]);
  644. if( sqlite3_step(pStmt)!=SQLITE_ROW ){
  645. sqlite3_finalize(pStmt);
  646. return;
  647. }
  648. nData = sqlite3_column_bytes(pStmt, 0);
  649. aData = sqlite3_column_blob(pStmt, 0);
  650. printf("Segment %s of size %d bytes:\n", azExtra[0], nData);
  651. if( findOption("raw", 0, 0)!=0 ){
  652. printBlob(aData, nData);
  653. }else{
  654. decodeSegment(aData, nData);
  655. }
  656. sqlite3_finalize(pStmt);
  657. }
  658. /*
  659. ** Decode a single doclist and display the results on stdout.
  660. */
  661. static void decodeDoclist(
  662. const unsigned char *aData, /* Content to print */
  663. int nData /* Number of bytes of content */
  664. ){
  665. sqlite3_int64 iPrevDocid = 0;
  666. sqlite3_int64 iDocid;
  667. sqlite3_int64 iPos;
  668. sqlite3_int64 iPrevPos = 0;
  669. sqlite3_int64 iCol;
  670. int i = 0;
  671. while( i<nData ){
  672. i += getVarint(aData+i, &iDocid);
  673. printf("docid %lld col0", iDocid+iPrevDocid);
  674. iPrevDocid += iDocid;
  675. iPrevPos = 0;
  676. while( 1 ){
  677. i += getVarint(aData+i, &iPos);
  678. if( iPos==1 ){
  679. i += getVarint(aData+i, &iCol);
  680. printf(" col%lld", iCol);
  681. iPrevPos = 0;
  682. }else if( iPos==0 ){
  683. printf("\n");
  684. break;
  685. }else{
  686. iPrevPos += iPos - 2;
  687. printf(" %lld", iPrevPos);
  688. }
  689. }
  690. }
  691. }
  692. /*
  693. ** Print the content of a doclist. The segment or segdir-root is
  694. ** identified by azExtra[0]. If the first character of azExtra[0]
  695. ** is 'r' then the remainder is the integer rowid of the %_segdir entry.
  696. ** If the first character of azExtra[0] is not 'r' then, then all of
  697. ** azExtra[0] is an integer which is the block number. The offset
  698. ** into the segment is identified by azExtra[1]. The size of the doclist
  699. ** is azExtra[2].
  700. **
  701. ** If the --raw option is present in azExtra, then a hex dump is provided.
  702. ** Otherwise a decoding is shown.
  703. */
  704. static void showDoclist(sqlite3 *db, const char *zTab){
  705. const unsigned char *aData;
  706. sqlite3_int64 offset, nData;
  707. sqlite3_stmt *pStmt;
  708. offset = atoi64(azExtra[1]);
  709. nData = atoi64(azExtra[2]);
  710. pStmt = prepareToGetSegment(db, zTab, azExtra[0]);
  711. if( sqlite3_step(pStmt)!=SQLITE_ROW ){
  712. sqlite3_finalize(pStmt);
  713. return;
  714. }
  715. aData = sqlite3_column_blob(pStmt, 0);
  716. printf("Doclist at %s offset %lld of size %lld bytes:\n",
  717. azExtra[0], offset, nData);
  718. if( findOption("raw", 0, 0)!=0 ){
  719. printBlob(aData+offset, nData);
  720. }else{
  721. decodeDoclist(aData+offset, nData);
  722. }
  723. sqlite3_finalize(pStmt);
  724. }
  725. /*
  726. ** Show the top N largest segments
  727. */
  728. static void listBigSegments(sqlite3 *db, const char *zTab){
  729. int nTop, i;
  730. sqlite3_stmt *pStmt;
  731. sqlite3_int64 sz;
  732. sqlite3_int64 id;
  733. nTop = atoi(findOption("top", 1, "25"));
  734. printf("The %d largest segments:\n", nTop);
  735. pStmt = prepare(db,
  736. "SELECT blockid, length(block) AS len FROM '%q_segments'"
  737. " ORDER BY 2 DESC, 1"
  738. " LIMIT %d", zTab, nTop);
  739. i = 0;
  740. while( sqlite3_step(pStmt)==SQLITE_ROW ){
  741. i++;
  742. id = sqlite3_column_int64(pStmt, 0);
  743. sz = sqlite3_column_int64(pStmt, 1);
  744. printf(" %2d. %9lld size %lld\n", i, id, sz);
  745. }
  746. sqlite3_finalize(pStmt);
  747. }
  748. static void usage(const char *argv0){
  749. fprintf(stderr, "Usage: %s DATABASE\n"
  750. " or: %s DATABASE FTS3TABLE ARGS...\n", argv0, argv0);
  751. fprintf(stderr,
  752. "ARGS:\n"
  753. " big-segments [--top N] show the largest segments\n"
  754. " doclist BLOCKID OFFSET SIZE [--raw] Decode a doclist\n"
  755. " schema FTS table schema\n"
  756. " segdir directory of segments\n"
  757. " segment BLOCKID [--raw] content of a segment\n"
  758. " segment-stats info on segment sizes\n"
  759. " stat the %%_stat table\n"
  760. " vocabulary [--top N] document vocabulary\n"
  761. );
  762. exit(1);
  763. }
  764. int main(int argc, char **argv){
  765. sqlite3 *db;
  766. int rc;
  767. const char *zTab;
  768. const char *zCmd;
  769. if( argc<2 ) usage(argv[0]);
  770. rc = sqlite3_open(argv[1], &db);
  771. if( rc ){
  772. fprintf(stderr, "Cannot open %s\n", argv[1]);
  773. exit(1);
  774. }
  775. if( argc==2 ){
  776. sqlite3_stmt *pStmt;
  777. int cnt = 0;
  778. pStmt = prepare(db, "SELECT b.sql"
  779. " FROM sqlite_master a, sqlite_master b"
  780. " WHERE a.name GLOB '*_segdir'"
  781. " AND b.name=substr(a.name,1,length(a.name)-7)"
  782. " ORDER BY 1");
  783. while( sqlite3_step(pStmt)==SQLITE_ROW ){
  784. cnt++;
  785. printf("%s;\n", sqlite3_column_text(pStmt, 0));
  786. }
  787. sqlite3_finalize(pStmt);
  788. if( cnt==0 ){
  789. printf("/* No FTS3/4 tables found in database %s */\n", argv[1]);
  790. }
  791. return 0;
  792. }
  793. if( argc<4 ) usage(argv[0]);
  794. zTab = argv[2];
  795. zCmd = argv[3];
  796. nExtra = argc-4;
  797. azExtra = argv+4;
  798. if( strcmp(zCmd,"big-segments")==0 ){
  799. listBigSegments(db, zTab);
  800. }else if( strcmp(zCmd,"doclist")==0 ){
  801. if( argc<7 ) usage(argv[0]);
  802. showDoclist(db, zTab);
  803. }else if( strcmp(zCmd,"schema")==0 ){
  804. showSchema(db, zTab);
  805. }else if( strcmp(zCmd,"segdir")==0 ){
  806. showSegdirMap(db, zTab);
  807. }else if( strcmp(zCmd,"segment")==0 ){
  808. if( argc<5 ) usage(argv[0]);
  809. showSegment(db, zTab);
  810. }else if( strcmp(zCmd,"segment-stats")==0 ){
  811. showSegmentStats(db, zTab);
  812. }else if( strcmp(zCmd,"stat")==0 ){
  813. showStat(db, zTab);
  814. }else if( strcmp(zCmd,"vocabulary")==0 ){
  815. showVocabulary(db, zTab);
  816. }else{
  817. usage(argv[0]);
  818. }
  819. return 0;
  820. }