fts3_write.c 179 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419
  1. /*
  2. ** 2009 Oct 23
  3. **
  4. ** The author disclaims copyright to this source code. In place of
  5. ** a legal notice, here is a blessing:
  6. **
  7. ** May you do good and not evil.
  8. ** May you find forgiveness for yourself and forgive others.
  9. ** May you share freely, never taking more than you give.
  10. **
  11. ******************************************************************************
  12. **
  13. ** This file is part of the SQLite FTS3 extension module. Specifically,
  14. ** this file contains code to insert, update and delete rows from FTS3
  15. ** tables. It also contains code to merge FTS3 b-tree segments. Some
  16. ** of the sub-routines used to merge segments are also used by the query
  17. ** code in fts3.c.
  18. */
  19. #include "fts3Int.h"
  20. #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
  21. #include <string.h>
  22. #include <assert.h>
  23. #include <stdlib.h>
  24. #define FTS_MAX_APPENDABLE_HEIGHT 16
  25. /*
  26. ** When full-text index nodes are loaded from disk, the buffer that they
  27. ** are loaded into has the following number of bytes of padding at the end
  28. ** of it. i.e. if a full-text index node is 900 bytes in size, then a buffer
  29. ** of 920 bytes is allocated for it.
  30. **
  31. ** This means that if we have a pointer into a buffer containing node data,
  32. ** it is always safe to read up to two varints from it without risking an
  33. ** overread, even if the node data is corrupted.
  34. */
  35. #define FTS3_NODE_PADDING (FTS3_VARINT_MAX*2)
  36. /*
  37. ** Under certain circumstances, b-tree nodes (doclists) can be loaded into
  38. ** memory incrementally instead of all at once. This can be a big performance
  39. ** win (reduced IO and CPU) if SQLite stops calling the virtual table xNext()
  40. ** method before retrieving all query results (as may happen, for example,
  41. ** if a query has a LIMIT clause).
  42. **
  43. ** Incremental loading is used for b-tree nodes FTS3_NODE_CHUNK_THRESHOLD
  44. ** bytes and larger. Nodes are loaded in chunks of FTS3_NODE_CHUNKSIZE bytes.
  45. ** The code is written so that the hard lower-limit for each of these values
  46. ** is 1. Clearly such small values would be inefficient, but can be useful
  47. ** for testing purposes.
  48. **
  49. ** If this module is built with SQLITE_TEST defined, these constants may
  50. ** be overridden at runtime for testing purposes. File fts3_test.c contains
  51. ** a Tcl interface to read and write the values.
  52. */
  53. #ifdef SQLITE_TEST
  54. int test_fts3_node_chunksize = (4*1024);
  55. int test_fts3_node_chunk_threshold = (4*1024)*4;
  56. # define FTS3_NODE_CHUNKSIZE test_fts3_node_chunksize
  57. # define FTS3_NODE_CHUNK_THRESHOLD test_fts3_node_chunk_threshold
  58. #else
  59. # define FTS3_NODE_CHUNKSIZE (4*1024)
  60. # define FTS3_NODE_CHUNK_THRESHOLD (FTS3_NODE_CHUNKSIZE*4)
  61. #endif
  62. /*
  63. ** The two values that may be meaningfully bound to the :1 parameter in
  64. ** statements SQL_REPLACE_STAT and SQL_SELECT_STAT.
  65. */
  66. #define FTS_STAT_DOCTOTAL 0
  67. #define FTS_STAT_INCRMERGEHINT 1
  68. #define FTS_STAT_AUTOINCRMERGE 2
  69. /*
  70. ** If FTS_LOG_MERGES is defined, call sqlite3_log() to report each automatic
  71. ** and incremental merge operation that takes place. This is used for
  72. ** debugging FTS only, it should not usually be turned on in production
  73. ** systems.
  74. */
  75. #ifdef FTS3_LOG_MERGES
  76. static void fts3LogMerge(int nMerge, sqlite3_int64 iAbsLevel){
  77. sqlite3_log(SQLITE_OK, "%d-way merge from level %d", nMerge, (int)iAbsLevel);
  78. }
  79. #else
  80. #define fts3LogMerge(x, y)
  81. #endif
  82. typedef struct PendingList PendingList;
  83. typedef struct SegmentNode SegmentNode;
  84. typedef struct SegmentWriter SegmentWriter;
  85. /*
  86. ** An instance of the following data structure is used to build doclists
  87. ** incrementally. See function fts3PendingListAppend() for details.
  88. */
  89. struct PendingList {
  90. int nData;
  91. char *aData;
  92. int nSpace;
  93. sqlite3_int64 iLastDocid;
  94. sqlite3_int64 iLastCol;
  95. sqlite3_int64 iLastPos;
  96. };
  97. /*
  98. ** Each cursor has a (possibly empty) linked list of the following objects.
  99. */
  100. struct Fts3DeferredToken {
  101. Fts3PhraseToken *pToken; /* Pointer to corresponding expr token */
  102. int iCol; /* Column token must occur in */
  103. Fts3DeferredToken *pNext; /* Next in list of deferred tokens */
  104. PendingList *pList; /* Doclist is assembled here */
  105. };
  106. /*
  107. ** An instance of this structure is used to iterate through the terms on
  108. ** a contiguous set of segment b-tree leaf nodes. Although the details of
  109. ** this structure are only manipulated by code in this file, opaque handles
  110. ** of type Fts3SegReader* are also used by code in fts3.c to iterate through
  111. ** terms when querying the full-text index. See functions:
  112. **
  113. ** sqlite3Fts3SegReaderNew()
  114. ** sqlite3Fts3SegReaderFree()
  115. ** sqlite3Fts3SegReaderIterate()
  116. **
  117. ** Methods used to manipulate Fts3SegReader structures:
  118. **
  119. ** fts3SegReaderNext()
  120. ** fts3SegReaderFirstDocid()
  121. ** fts3SegReaderNextDocid()
  122. */
  123. struct Fts3SegReader {
  124. int iIdx; /* Index within level, or 0x7FFFFFFF for PT */
  125. u8 bLookup; /* True for a lookup only */
  126. u8 rootOnly; /* True for a root-only reader */
  127. sqlite3_int64 iStartBlock; /* Rowid of first leaf block to traverse */
  128. sqlite3_int64 iLeafEndBlock; /* Rowid of final leaf block to traverse */
  129. sqlite3_int64 iEndBlock; /* Rowid of final block in segment (or 0) */
  130. sqlite3_int64 iCurrentBlock; /* Current leaf block (or 0) */
  131. char *aNode; /* Pointer to node data (or NULL) */
  132. int nNode; /* Size of buffer at aNode (or 0) */
  133. int nPopulate; /* If >0, bytes of buffer aNode[] loaded */
  134. sqlite3_blob *pBlob; /* If not NULL, blob handle to read node */
  135. Fts3HashElem **ppNextElem;
  136. /* Variables set by fts3SegReaderNext(). These may be read directly
  137. ** by the caller. They are valid from the time SegmentReaderNew() returns
  138. ** until SegmentReaderNext() returns something other than SQLITE_OK
  139. ** (i.e. SQLITE_DONE).
  140. */
  141. int nTerm; /* Number of bytes in current term */
  142. char *zTerm; /* Pointer to current term */
  143. int nTermAlloc; /* Allocated size of zTerm buffer */
  144. char *aDoclist; /* Pointer to doclist of current entry */
  145. int nDoclist; /* Size of doclist in current entry */
  146. /* The following variables are used by fts3SegReaderNextDocid() to iterate
  147. ** through the current doclist (aDoclist/nDoclist).
  148. */
  149. char *pOffsetList;
  150. int nOffsetList; /* For descending pending seg-readers only */
  151. sqlite3_int64 iDocid;
  152. };
  153. #define fts3SegReaderIsPending(p) ((p)->ppNextElem!=0)
  154. #define fts3SegReaderIsRootOnly(p) ((p)->rootOnly!=0)
  155. /*
  156. ** An instance of this structure is used to create a segment b-tree in the
  157. ** database. The internal details of this type are only accessed by the
  158. ** following functions:
  159. **
  160. ** fts3SegWriterAdd()
  161. ** fts3SegWriterFlush()
  162. ** fts3SegWriterFree()
  163. */
  164. struct SegmentWriter {
  165. SegmentNode *pTree; /* Pointer to interior tree structure */
  166. sqlite3_int64 iFirst; /* First slot in %_segments written */
  167. sqlite3_int64 iFree; /* Next free slot in %_segments */
  168. char *zTerm; /* Pointer to previous term buffer */
  169. int nTerm; /* Number of bytes in zTerm */
  170. int nMalloc; /* Size of malloc'd buffer at zMalloc */
  171. char *zMalloc; /* Malloc'd space (possibly) used for zTerm */
  172. int nSize; /* Size of allocation at aData */
  173. int nData; /* Bytes of data in aData */
  174. char *aData; /* Pointer to block from malloc() */
  175. };
  176. /*
  177. ** Type SegmentNode is used by the following three functions to create
  178. ** the interior part of the segment b+-tree structures (everything except
  179. ** the leaf nodes). These functions and type are only ever used by code
  180. ** within the fts3SegWriterXXX() family of functions described above.
  181. **
  182. ** fts3NodeAddTerm()
  183. ** fts3NodeWrite()
  184. ** fts3NodeFree()
  185. **
  186. ** When a b+tree is written to the database (either as a result of a merge
  187. ** or the pending-terms table being flushed), leaves are written into the
  188. ** database file as soon as they are completely populated. The interior of
  189. ** the tree is assembled in memory and written out only once all leaves have
  190. ** been populated and stored. This is Ok, as the b+-tree fanout is usually
  191. ** very large, meaning that the interior of the tree consumes relatively
  192. ** little memory.
  193. */
  194. struct SegmentNode {
  195. SegmentNode *pParent; /* Parent node (or NULL for root node) */
  196. SegmentNode *pRight; /* Pointer to right-sibling */
  197. SegmentNode *pLeftmost; /* Pointer to left-most node of this depth */
  198. int nEntry; /* Number of terms written to node so far */
  199. char *zTerm; /* Pointer to previous term buffer */
  200. int nTerm; /* Number of bytes in zTerm */
  201. int nMalloc; /* Size of malloc'd buffer at zMalloc */
  202. char *zMalloc; /* Malloc'd space (possibly) used for zTerm */
  203. int nData; /* Bytes of valid data so far */
  204. char *aData; /* Node data */
  205. };
  206. /*
  207. ** Valid values for the second argument to fts3SqlStmt().
  208. */
  209. #define SQL_DELETE_CONTENT 0
  210. #define SQL_IS_EMPTY 1
  211. #define SQL_DELETE_ALL_CONTENT 2
  212. #define SQL_DELETE_ALL_SEGMENTS 3
  213. #define SQL_DELETE_ALL_SEGDIR 4
  214. #define SQL_DELETE_ALL_DOCSIZE 5
  215. #define SQL_DELETE_ALL_STAT 6
  216. #define SQL_SELECT_CONTENT_BY_ROWID 7
  217. #define SQL_NEXT_SEGMENT_INDEX 8
  218. #define SQL_INSERT_SEGMENTS 9
  219. #define SQL_NEXT_SEGMENTS_ID 10
  220. #define SQL_INSERT_SEGDIR 11
  221. #define SQL_SELECT_LEVEL 12
  222. #define SQL_SELECT_LEVEL_RANGE 13
  223. #define SQL_SELECT_LEVEL_COUNT 14
  224. #define SQL_SELECT_SEGDIR_MAX_LEVEL 15
  225. #define SQL_DELETE_SEGDIR_LEVEL 16
  226. #define SQL_DELETE_SEGMENTS_RANGE 17
  227. #define SQL_CONTENT_INSERT 18
  228. #define SQL_DELETE_DOCSIZE 19
  229. #define SQL_REPLACE_DOCSIZE 20
  230. #define SQL_SELECT_DOCSIZE 21
  231. #define SQL_SELECT_STAT 22
  232. #define SQL_REPLACE_STAT 23
  233. #define SQL_SELECT_ALL_PREFIX_LEVEL 24
  234. #define SQL_DELETE_ALL_TERMS_SEGDIR 25
  235. #define SQL_DELETE_SEGDIR_RANGE 26
  236. #define SQL_SELECT_ALL_LANGID 27
  237. #define SQL_FIND_MERGE_LEVEL 28
  238. #define SQL_MAX_LEAF_NODE_ESTIMATE 29
  239. #define SQL_DELETE_SEGDIR_ENTRY 30
  240. #define SQL_SHIFT_SEGDIR_ENTRY 31
  241. #define SQL_SELECT_SEGDIR 32
  242. #define SQL_CHOMP_SEGDIR 33
  243. #define SQL_SEGMENT_IS_APPENDABLE 34
  244. #define SQL_SELECT_INDEXES 35
  245. #define SQL_SELECT_MXLEVEL 36
  246. /*
  247. ** This function is used to obtain an SQLite prepared statement handle
  248. ** for the statement identified by the second argument. If successful,
  249. ** *pp is set to the requested statement handle and SQLITE_OK returned.
  250. ** Otherwise, an SQLite error code is returned and *pp is set to 0.
  251. **
  252. ** If argument apVal is not NULL, then it must point to an array with
  253. ** at least as many entries as the requested statement has bound
  254. ** parameters. The values are bound to the statements parameters before
  255. ** returning.
  256. */
  257. static int fts3SqlStmt(
  258. Fts3Table *p, /* Virtual table handle */
  259. int eStmt, /* One of the SQL_XXX constants above */
  260. sqlite3_stmt **pp, /* OUT: Statement handle */
  261. sqlite3_value **apVal /* Values to bind to statement */
  262. ){
  263. const char *azSql[] = {
  264. /* 0 */ "DELETE FROM %Q.'%q_content' WHERE rowid = ?",
  265. /* 1 */ "SELECT NOT EXISTS(SELECT docid FROM %Q.'%q_content' WHERE rowid!=?)",
  266. /* 2 */ "DELETE FROM %Q.'%q_content'",
  267. /* 3 */ "DELETE FROM %Q.'%q_segments'",
  268. /* 4 */ "DELETE FROM %Q.'%q_segdir'",
  269. /* 5 */ "DELETE FROM %Q.'%q_docsize'",
  270. /* 6 */ "DELETE FROM %Q.'%q_stat'",
  271. /* 7 */ "SELECT %s WHERE rowid=?",
  272. /* 8 */ "SELECT (SELECT max(idx) FROM %Q.'%q_segdir' WHERE level = ?) + 1",
  273. /* 9 */ "REPLACE INTO %Q.'%q_segments'(blockid, block) VALUES(?, ?)",
  274. /* 10 */ "SELECT coalesce((SELECT max(blockid) FROM %Q.'%q_segments') + 1, 1)",
  275. /* 11 */ "REPLACE INTO %Q.'%q_segdir' VALUES(?,?,?,?,?,?)",
  276. /* Return segments in order from oldest to newest.*/
  277. /* 12 */ "SELECT idx, start_block, leaves_end_block, end_block, root "
  278. "FROM %Q.'%q_segdir' WHERE level = ? ORDER BY idx ASC",
  279. /* 13 */ "SELECT idx, start_block, leaves_end_block, end_block, root "
  280. "FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?"
  281. "ORDER BY level DESC, idx ASC",
  282. /* 14 */ "SELECT count(*) FROM %Q.'%q_segdir' WHERE level = ?",
  283. /* 15 */ "SELECT max(level) FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?",
  284. /* 16 */ "DELETE FROM %Q.'%q_segdir' WHERE level = ?",
  285. /* 17 */ "DELETE FROM %Q.'%q_segments' WHERE blockid BETWEEN ? AND ?",
  286. /* 18 */ "INSERT INTO %Q.'%q_content' VALUES(%s)",
  287. /* 19 */ "DELETE FROM %Q.'%q_docsize' WHERE docid = ?",
  288. /* 20 */ "REPLACE INTO %Q.'%q_docsize' VALUES(?,?)",
  289. /* 21 */ "SELECT size FROM %Q.'%q_docsize' WHERE docid=?",
  290. /* 22 */ "SELECT value FROM %Q.'%q_stat' WHERE id=?",
  291. /* 23 */ "REPLACE INTO %Q.'%q_stat' VALUES(?,?)",
  292. /* 24 */ "",
  293. /* 25 */ "",
  294. /* 26 */ "DELETE FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?",
  295. /* 27 */ "SELECT DISTINCT level / (1024 * ?) FROM %Q.'%q_segdir'",
  296. /* This statement is used to determine which level to read the input from
  297. ** when performing an incremental merge. It returns the absolute level number
  298. ** of the oldest level in the db that contains at least ? segments. Or,
  299. ** if no level in the FTS index contains more than ? segments, the statement
  300. ** returns zero rows. */
  301. /* 28 */ "SELECT level FROM %Q.'%q_segdir' GROUP BY level HAVING count(*)>=?"
  302. " ORDER BY (level %% 1024) ASC LIMIT 1",
  303. /* Estimate the upper limit on the number of leaf nodes in a new segment
  304. ** created by merging the oldest :2 segments from absolute level :1. See
  305. ** function sqlite3Fts3Incrmerge() for details. */
  306. /* 29 */ "SELECT 2 * total(1 + leaves_end_block - start_block) "
  307. " FROM %Q.'%q_segdir' WHERE level = ? AND idx < ?",
  308. /* SQL_DELETE_SEGDIR_ENTRY
  309. ** Delete the %_segdir entry on absolute level :1 with index :2. */
  310. /* 30 */ "DELETE FROM %Q.'%q_segdir' WHERE level = ? AND idx = ?",
  311. /* SQL_SHIFT_SEGDIR_ENTRY
  312. ** Modify the idx value for the segment with idx=:3 on absolute level :2
  313. ** to :1. */
  314. /* 31 */ "UPDATE %Q.'%q_segdir' SET idx = ? WHERE level=? AND idx=?",
  315. /* SQL_SELECT_SEGDIR
  316. ** Read a single entry from the %_segdir table. The entry from absolute
  317. ** level :1 with index value :2. */
  318. /* 32 */ "SELECT idx, start_block, leaves_end_block, end_block, root "
  319. "FROM %Q.'%q_segdir' WHERE level = ? AND idx = ?",
  320. /* SQL_CHOMP_SEGDIR
  321. ** Update the start_block (:1) and root (:2) fields of the %_segdir
  322. ** entry located on absolute level :3 with index :4. */
  323. /* 33 */ "UPDATE %Q.'%q_segdir' SET start_block = ?, root = ?"
  324. "WHERE level = ? AND idx = ?",
  325. /* SQL_SEGMENT_IS_APPENDABLE
  326. ** Return a single row if the segment with end_block=? is appendable. Or
  327. ** no rows otherwise. */
  328. /* 34 */ "SELECT 1 FROM %Q.'%q_segments' WHERE blockid=? AND block IS NULL",
  329. /* SQL_SELECT_INDEXES
  330. ** Return the list of valid segment indexes for absolute level ? */
  331. /* 35 */ "SELECT idx FROM %Q.'%q_segdir' WHERE level=? ORDER BY 1 ASC",
  332. /* SQL_SELECT_MXLEVEL
  333. ** Return the largest relative level in the FTS index or indexes. */
  334. /* 36 */ "SELECT max( level %% 1024 ) FROM %Q.'%q_segdir'"
  335. };
  336. int rc = SQLITE_OK;
  337. sqlite3_stmt *pStmt;
  338. assert( SizeofArray(azSql)==SizeofArray(p->aStmt) );
  339. assert( eStmt<SizeofArray(azSql) && eStmt>=0 );
  340. pStmt = p->aStmt[eStmt];
  341. if( !pStmt ){
  342. char *zSql;
  343. if( eStmt==SQL_CONTENT_INSERT ){
  344. zSql = sqlite3_mprintf(azSql[eStmt], p->zDb, p->zName, p->zWriteExprlist);
  345. }else if( eStmt==SQL_SELECT_CONTENT_BY_ROWID ){
  346. zSql = sqlite3_mprintf(azSql[eStmt], p->zReadExprlist);
  347. }else{
  348. zSql = sqlite3_mprintf(azSql[eStmt], p->zDb, p->zName);
  349. }
  350. if( !zSql ){
  351. rc = SQLITE_NOMEM;
  352. }else{
  353. rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, NULL);
  354. sqlite3_free(zSql);
  355. assert( rc==SQLITE_OK || pStmt==0 );
  356. p->aStmt[eStmt] = pStmt;
  357. }
  358. }
  359. if( apVal ){
  360. int i;
  361. int nParam = sqlite3_bind_parameter_count(pStmt);
  362. for(i=0; rc==SQLITE_OK && i<nParam; i++){
  363. rc = sqlite3_bind_value(pStmt, i+1, apVal[i]);
  364. }
  365. }
  366. *pp = pStmt;
  367. return rc;
  368. }
  369. static int fts3SelectDocsize(
  370. Fts3Table *pTab, /* FTS3 table handle */
  371. sqlite3_int64 iDocid, /* Docid to bind for SQL_SELECT_DOCSIZE */
  372. sqlite3_stmt **ppStmt /* OUT: Statement handle */
  373. ){
  374. sqlite3_stmt *pStmt = 0; /* Statement requested from fts3SqlStmt() */
  375. int rc; /* Return code */
  376. rc = fts3SqlStmt(pTab, SQL_SELECT_DOCSIZE, &pStmt, 0);
  377. if( rc==SQLITE_OK ){
  378. sqlite3_bind_int64(pStmt, 1, iDocid);
  379. rc = sqlite3_step(pStmt);
  380. if( rc!=SQLITE_ROW || sqlite3_column_type(pStmt, 0)!=SQLITE_BLOB ){
  381. rc = sqlite3_reset(pStmt);
  382. if( rc==SQLITE_OK ) rc = FTS_CORRUPT_VTAB;
  383. pStmt = 0;
  384. }else{
  385. rc = SQLITE_OK;
  386. }
  387. }
  388. *ppStmt = pStmt;
  389. return rc;
  390. }
  391. int sqlite3Fts3SelectDoctotal(
  392. Fts3Table *pTab, /* Fts3 table handle */
  393. sqlite3_stmt **ppStmt /* OUT: Statement handle */
  394. ){
  395. sqlite3_stmt *pStmt = 0;
  396. int rc;
  397. rc = fts3SqlStmt(pTab, SQL_SELECT_STAT, &pStmt, 0);
  398. if( rc==SQLITE_OK ){
  399. sqlite3_bind_int(pStmt, 1, FTS_STAT_DOCTOTAL);
  400. if( sqlite3_step(pStmt)!=SQLITE_ROW
  401. || sqlite3_column_type(pStmt, 0)!=SQLITE_BLOB
  402. ){
  403. rc = sqlite3_reset(pStmt);
  404. if( rc==SQLITE_OK ) rc = FTS_CORRUPT_VTAB;
  405. pStmt = 0;
  406. }
  407. }
  408. *ppStmt = pStmt;
  409. return rc;
  410. }
  411. int sqlite3Fts3SelectDocsize(
  412. Fts3Table *pTab, /* Fts3 table handle */
  413. sqlite3_int64 iDocid, /* Docid to read size data for */
  414. sqlite3_stmt **ppStmt /* OUT: Statement handle */
  415. ){
  416. return fts3SelectDocsize(pTab, iDocid, ppStmt);
  417. }
  418. /*
  419. ** Similar to fts3SqlStmt(). Except, after binding the parameters in
  420. ** array apVal[] to the SQL statement identified by eStmt, the statement
  421. ** is executed.
  422. **
  423. ** Returns SQLITE_OK if the statement is successfully executed, or an
  424. ** SQLite error code otherwise.
  425. */
  426. static void fts3SqlExec(
  427. int *pRC, /* Result code */
  428. Fts3Table *p, /* The FTS3 table */
  429. int eStmt, /* Index of statement to evaluate */
  430. sqlite3_value **apVal /* Parameters to bind */
  431. ){
  432. sqlite3_stmt *pStmt;
  433. int rc;
  434. if( *pRC ) return;
  435. rc = fts3SqlStmt(p, eStmt, &pStmt, apVal);
  436. if( rc==SQLITE_OK ){
  437. sqlite3_step(pStmt);
  438. rc = sqlite3_reset(pStmt);
  439. }
  440. *pRC = rc;
  441. }
  442. /*
  443. ** This function ensures that the caller has obtained an exclusive
  444. ** shared-cache table-lock on the %_segdir table. This is required before
  445. ** writing data to the fts3 table. If this lock is not acquired first, then
  446. ** the caller may end up attempting to take this lock as part of committing
  447. ** a transaction, causing SQLite to return SQLITE_LOCKED or
  448. ** LOCKED_SHAREDCACHEto a COMMIT command.
  449. **
  450. ** It is best to avoid this because if FTS3 returns any error when
  451. ** committing a transaction, the whole transaction will be rolled back.
  452. ** And this is not what users expect when they get SQLITE_LOCKED_SHAREDCACHE.
  453. ** It can still happen if the user locks the underlying tables directly
  454. ** instead of accessing them via FTS.
  455. */
  456. static int fts3Writelock(Fts3Table *p){
  457. int rc = SQLITE_OK;
  458. if( p->nPendingData==0 ){
  459. sqlite3_stmt *pStmt;
  460. rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_LEVEL, &pStmt, 0);
  461. if( rc==SQLITE_OK ){
  462. sqlite3_bind_null(pStmt, 1);
  463. sqlite3_step(pStmt);
  464. rc = sqlite3_reset(pStmt);
  465. }
  466. }
  467. return rc;
  468. }
  469. /*
  470. ** FTS maintains a separate indexes for each language-id (a 32-bit integer).
  471. ** Within each language id, a separate index is maintained to store the
  472. ** document terms, and each configured prefix size (configured the FTS
  473. ** "prefix=" option). And each index consists of multiple levels ("relative
  474. ** levels").
  475. **
  476. ** All three of these values (the language id, the specific index and the
  477. ** level within the index) are encoded in 64-bit integer values stored
  478. ** in the %_segdir table on disk. This function is used to convert three
  479. ** separate component values into the single 64-bit integer value that
  480. ** can be used to query the %_segdir table.
  481. **
  482. ** Specifically, each language-id/index combination is allocated 1024
  483. ** 64-bit integer level values ("absolute levels"). The main terms index
  484. ** for language-id 0 is allocate values 0-1023. The first prefix index
  485. ** (if any) for language-id 0 is allocated values 1024-2047. And so on.
  486. ** Language 1 indexes are allocated immediately following language 0.
  487. **
  488. ** So, for a system with nPrefix prefix indexes configured, the block of
  489. ** absolute levels that corresponds to language-id iLangid and index
  490. ** iIndex starts at absolute level ((iLangid * (nPrefix+1) + iIndex) * 1024).
  491. */
  492. static sqlite3_int64 getAbsoluteLevel(
  493. Fts3Table *p, /* FTS3 table handle */
  494. int iLangid, /* Language id */
  495. int iIndex, /* Index in p->aIndex[] */
  496. int iLevel /* Level of segments */
  497. ){
  498. sqlite3_int64 iBase; /* First absolute level for iLangid/iIndex */
  499. assert( iLangid>=0 );
  500. assert( p->nIndex>0 );
  501. assert( iIndex>=0 && iIndex<p->nIndex );
  502. iBase = ((sqlite3_int64)iLangid * p->nIndex + iIndex) * FTS3_SEGDIR_MAXLEVEL;
  503. return iBase + iLevel;
  504. }
  505. /*
  506. ** Set *ppStmt to a statement handle that may be used to iterate through
  507. ** all rows in the %_segdir table, from oldest to newest. If successful,
  508. ** return SQLITE_OK. If an error occurs while preparing the statement,
  509. ** return an SQLite error code.
  510. **
  511. ** There is only ever one instance of this SQL statement compiled for
  512. ** each FTS3 table.
  513. **
  514. ** The statement returns the following columns from the %_segdir table:
  515. **
  516. ** 0: idx
  517. ** 1: start_block
  518. ** 2: leaves_end_block
  519. ** 3: end_block
  520. ** 4: root
  521. */
  522. int sqlite3Fts3AllSegdirs(
  523. Fts3Table *p, /* FTS3 table */
  524. int iLangid, /* Language being queried */
  525. int iIndex, /* Index for p->aIndex[] */
  526. int iLevel, /* Level to select (relative level) */
  527. sqlite3_stmt **ppStmt /* OUT: Compiled statement */
  528. ){
  529. int rc;
  530. sqlite3_stmt *pStmt = 0;
  531. assert( iLevel==FTS3_SEGCURSOR_ALL || iLevel>=0 );
  532. assert( iLevel<FTS3_SEGDIR_MAXLEVEL );
  533. assert( iIndex>=0 && iIndex<p->nIndex );
  534. if( iLevel<0 ){
  535. /* "SELECT * FROM %_segdir WHERE level BETWEEN ? AND ? ORDER BY ..." */
  536. rc = fts3SqlStmt(p, SQL_SELECT_LEVEL_RANGE, &pStmt, 0);
  537. if( rc==SQLITE_OK ){
  538. sqlite3_bind_int64(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex, 0));
  539. sqlite3_bind_int64(pStmt, 2,
  540. getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1)
  541. );
  542. }
  543. }else{
  544. /* "SELECT * FROM %_segdir WHERE level = ? ORDER BY ..." */
  545. rc = fts3SqlStmt(p, SQL_SELECT_LEVEL, &pStmt, 0);
  546. if( rc==SQLITE_OK ){
  547. sqlite3_bind_int64(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex,iLevel));
  548. }
  549. }
  550. *ppStmt = pStmt;
  551. return rc;
  552. }
  553. /*
  554. ** Append a single varint to a PendingList buffer. SQLITE_OK is returned
  555. ** if successful, or an SQLite error code otherwise.
  556. **
  557. ** This function also serves to allocate the PendingList structure itself.
  558. ** For example, to create a new PendingList structure containing two
  559. ** varints:
  560. **
  561. ** PendingList *p = 0;
  562. ** fts3PendingListAppendVarint(&p, 1);
  563. ** fts3PendingListAppendVarint(&p, 2);
  564. */
  565. static int fts3PendingListAppendVarint(
  566. PendingList **pp, /* IN/OUT: Pointer to PendingList struct */
  567. sqlite3_int64 i /* Value to append to data */
  568. ){
  569. PendingList *p = *pp;
  570. /* Allocate or grow the PendingList as required. */
  571. if( !p ){
  572. p = sqlite3_malloc(sizeof(*p) + 100);
  573. if( !p ){
  574. return SQLITE_NOMEM;
  575. }
  576. p->nSpace = 100;
  577. p->aData = (char *)&p[1];
  578. p->nData = 0;
  579. }
  580. else if( p->nData+FTS3_VARINT_MAX+1>p->nSpace ){
  581. int nNew = p->nSpace * 2;
  582. p = sqlite3_realloc(p, sizeof(*p) + nNew);
  583. if( !p ){
  584. sqlite3_free(*pp);
  585. *pp = 0;
  586. return SQLITE_NOMEM;
  587. }
  588. p->nSpace = nNew;
  589. p->aData = (char *)&p[1];
  590. }
  591. /* Append the new serialized varint to the end of the list. */
  592. p->nData += sqlite3Fts3PutVarint(&p->aData[p->nData], i);
  593. p->aData[p->nData] = '\0';
  594. *pp = p;
  595. return SQLITE_OK;
  596. }
  597. /*
  598. ** Add a docid/column/position entry to a PendingList structure. Non-zero
  599. ** is returned if the structure is sqlite3_realloced as part of adding
  600. ** the entry. Otherwise, zero.
  601. **
  602. ** If an OOM error occurs, *pRc is set to SQLITE_NOMEM before returning.
  603. ** Zero is always returned in this case. Otherwise, if no OOM error occurs,
  604. ** it is set to SQLITE_OK.
  605. */
  606. static int fts3PendingListAppend(
  607. PendingList **pp, /* IN/OUT: PendingList structure */
  608. sqlite3_int64 iDocid, /* Docid for entry to add */
  609. sqlite3_int64 iCol, /* Column for entry to add */
  610. sqlite3_int64 iPos, /* Position of term for entry to add */
  611. int *pRc /* OUT: Return code */
  612. ){
  613. PendingList *p = *pp;
  614. int rc = SQLITE_OK;
  615. assert( !p || p->iLastDocid<=iDocid );
  616. if( !p || p->iLastDocid!=iDocid ){
  617. sqlite3_int64 iDelta = iDocid - (p ? p->iLastDocid : 0);
  618. if( p ){
  619. assert( p->nData<p->nSpace );
  620. assert( p->aData[p->nData]==0 );
  621. p->nData++;
  622. }
  623. if( SQLITE_OK!=(rc = fts3PendingListAppendVarint(&p, iDelta)) ){
  624. goto pendinglistappend_out;
  625. }
  626. p->iLastCol = -1;
  627. p->iLastPos = 0;
  628. p->iLastDocid = iDocid;
  629. }
  630. if( iCol>0 && p->iLastCol!=iCol ){
  631. if( SQLITE_OK!=(rc = fts3PendingListAppendVarint(&p, 1))
  632. || SQLITE_OK!=(rc = fts3PendingListAppendVarint(&p, iCol))
  633. ){
  634. goto pendinglistappend_out;
  635. }
  636. p->iLastCol = iCol;
  637. p->iLastPos = 0;
  638. }
  639. if( iCol>=0 ){
  640. assert( iPos>p->iLastPos || (iPos==0 && p->iLastPos==0) );
  641. rc = fts3PendingListAppendVarint(&p, 2+iPos-p->iLastPos);
  642. if( rc==SQLITE_OK ){
  643. p->iLastPos = iPos;
  644. }
  645. }
  646. pendinglistappend_out:
  647. *pRc = rc;
  648. if( p!=*pp ){
  649. *pp = p;
  650. return 1;
  651. }
  652. return 0;
  653. }
  654. /*
  655. ** Free a PendingList object allocated by fts3PendingListAppend().
  656. */
  657. static void fts3PendingListDelete(PendingList *pList){
  658. sqlite3_free(pList);
  659. }
  660. /*
  661. ** Add an entry to one of the pending-terms hash tables.
  662. */
  663. static int fts3PendingTermsAddOne(
  664. Fts3Table *p,
  665. int iCol,
  666. int iPos,
  667. Fts3Hash *pHash, /* Pending terms hash table to add entry to */
  668. const char *zToken,
  669. int nToken
  670. ){
  671. PendingList *pList;
  672. int rc = SQLITE_OK;
  673. pList = (PendingList *)fts3HashFind(pHash, zToken, nToken);
  674. if( pList ){
  675. p->nPendingData -= (pList->nData + nToken + sizeof(Fts3HashElem));
  676. }
  677. if( fts3PendingListAppend(&pList, p->iPrevDocid, iCol, iPos, &rc) ){
  678. if( pList==fts3HashInsert(pHash, zToken, nToken, pList) ){
  679. /* Malloc failed while inserting the new entry. This can only
  680. ** happen if there was no previous entry for this token.
  681. */
  682. assert( 0==fts3HashFind(pHash, zToken, nToken) );
  683. sqlite3_free(pList);
  684. rc = SQLITE_NOMEM;
  685. }
  686. }
  687. if( rc==SQLITE_OK ){
  688. p->nPendingData += (pList->nData + nToken + sizeof(Fts3HashElem));
  689. }
  690. return rc;
  691. }
  692. /*
  693. ** Tokenize the nul-terminated string zText and add all tokens to the
  694. ** pending-terms hash-table. The docid used is that currently stored in
  695. ** p->iPrevDocid, and the column is specified by argument iCol.
  696. **
  697. ** If successful, SQLITE_OK is returned. Otherwise, an SQLite error code.
  698. */
  699. static int fts3PendingTermsAdd(
  700. Fts3Table *p, /* Table into which text will be inserted */
  701. int iLangid, /* Language id to use */
  702. const char *zText, /* Text of document to be inserted */
  703. int iCol, /* Column into which text is being inserted */
  704. u32 *pnWord /* IN/OUT: Incr. by number tokens inserted */
  705. ){
  706. int rc;
  707. int iStart = 0;
  708. int iEnd = 0;
  709. int iPos = 0;
  710. int nWord = 0;
  711. char const *zToken;
  712. int nToken = 0;
  713. sqlite3_tokenizer *pTokenizer = p->pTokenizer;
  714. sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
  715. sqlite3_tokenizer_cursor *pCsr;
  716. int (*xNext)(sqlite3_tokenizer_cursor *pCursor,
  717. const char**,int*,int*,int*,int*);
  718. assert( pTokenizer && pModule );
  719. /* If the user has inserted a NULL value, this function may be called with
  720. ** zText==0. In this case, add zero token entries to the hash table and
  721. ** return early. */
  722. if( zText==0 ){
  723. *pnWord = 0;
  724. return SQLITE_OK;
  725. }
  726. rc = sqlite3Fts3OpenTokenizer(pTokenizer, iLangid, zText, -1, &pCsr);
  727. if( rc!=SQLITE_OK ){
  728. return rc;
  729. }
  730. xNext = pModule->xNext;
  731. while( SQLITE_OK==rc
  732. && SQLITE_OK==(rc = xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos))
  733. ){
  734. int i;
  735. if( iPos>=nWord ) nWord = iPos+1;
  736. /* Positions cannot be negative; we use -1 as a terminator internally.
  737. ** Tokens must have a non-zero length.
  738. */
  739. if( iPos<0 || !zToken || nToken<=0 ){
  740. rc = SQLITE_ERROR;
  741. break;
  742. }
  743. /* Add the term to the terms index */
  744. rc = fts3PendingTermsAddOne(
  745. p, iCol, iPos, &p->aIndex[0].hPending, zToken, nToken
  746. );
  747. /* Add the term to each of the prefix indexes that it is not too
  748. ** short for. */
  749. for(i=1; rc==SQLITE_OK && i<p->nIndex; i++){
  750. struct Fts3Index *pIndex = &p->aIndex[i];
  751. if( nToken<pIndex->nPrefix ) continue;
  752. rc = fts3PendingTermsAddOne(
  753. p, iCol, iPos, &pIndex->hPending, zToken, pIndex->nPrefix
  754. );
  755. }
  756. }
  757. pModule->xClose(pCsr);
  758. *pnWord += nWord;
  759. return (rc==SQLITE_DONE ? SQLITE_OK : rc);
  760. }
  761. /*
  762. ** Calling this function indicates that subsequent calls to
  763. ** fts3PendingTermsAdd() are to add term/position-list pairs for the
  764. ** contents of the document with docid iDocid.
  765. */
  766. static int fts3PendingTermsDocid(
  767. Fts3Table *p, /* Full-text table handle */
  768. int iLangid, /* Language id of row being written */
  769. sqlite_int64 iDocid /* Docid of row being written */
  770. ){
  771. assert( iLangid>=0 );
  772. /* TODO(shess) Explore whether partially flushing the buffer on
  773. ** forced-flush would provide better performance. I suspect that if
  774. ** we ordered the doclists by size and flushed the largest until the
  775. ** buffer was half empty, that would let the less frequent terms
  776. ** generate longer doclists.
  777. */
  778. if( iDocid<=p->iPrevDocid
  779. || p->iPrevLangid!=iLangid
  780. || p->nPendingData>p->nMaxPendingData
  781. ){
  782. int rc = sqlite3Fts3PendingTermsFlush(p);
  783. if( rc!=SQLITE_OK ) return rc;
  784. }
  785. p->iPrevDocid = iDocid;
  786. p->iPrevLangid = iLangid;
  787. return SQLITE_OK;
  788. }
  789. /*
  790. ** Discard the contents of the pending-terms hash tables.
  791. */
  792. void sqlite3Fts3PendingTermsClear(Fts3Table *p){
  793. int i;
  794. for(i=0; i<p->nIndex; i++){
  795. Fts3HashElem *pElem;
  796. Fts3Hash *pHash = &p->aIndex[i].hPending;
  797. for(pElem=fts3HashFirst(pHash); pElem; pElem=fts3HashNext(pElem)){
  798. PendingList *pList = (PendingList *)fts3HashData(pElem);
  799. fts3PendingListDelete(pList);
  800. }
  801. fts3HashClear(pHash);
  802. }
  803. p->nPendingData = 0;
  804. }
  805. /*
  806. ** This function is called by the xUpdate() method as part of an INSERT
  807. ** operation. It adds entries for each term in the new record to the
  808. ** pendingTerms hash table.
  809. **
  810. ** Argument apVal is the same as the similarly named argument passed to
  811. ** fts3InsertData(). Parameter iDocid is the docid of the new row.
  812. */
  813. static int fts3InsertTerms(
  814. Fts3Table *p,
  815. int iLangid,
  816. sqlite3_value **apVal,
  817. u32 *aSz
  818. ){
  819. int i; /* Iterator variable */
  820. for(i=2; i<p->nColumn+2; i++){
  821. int iCol = i-2;
  822. if( p->abNotindexed[iCol]==0 ){
  823. const char *zText = (const char *)sqlite3_value_text(apVal[i]);
  824. int rc = fts3PendingTermsAdd(p, iLangid, zText, iCol, &aSz[iCol]);
  825. if( rc!=SQLITE_OK ){
  826. return rc;
  827. }
  828. aSz[p->nColumn] += sqlite3_value_bytes(apVal[i]);
  829. }
  830. }
  831. return SQLITE_OK;
  832. }
  833. /*
  834. ** This function is called by the xUpdate() method for an INSERT operation.
  835. ** The apVal parameter is passed a copy of the apVal argument passed by
  836. ** SQLite to the xUpdate() method. i.e:
  837. **
  838. ** apVal[0] Not used for INSERT.
  839. ** apVal[1] rowid
  840. ** apVal[2] Left-most user-defined column
  841. ** ...
  842. ** apVal[p->nColumn+1] Right-most user-defined column
  843. ** apVal[p->nColumn+2] Hidden column with same name as table
  844. ** apVal[p->nColumn+3] Hidden "docid" column (alias for rowid)
  845. ** apVal[p->nColumn+4] Hidden languageid column
  846. */
  847. static int fts3InsertData(
  848. Fts3Table *p, /* Full-text table */
  849. sqlite3_value **apVal, /* Array of values to insert */
  850. sqlite3_int64 *piDocid /* OUT: Docid for row just inserted */
  851. ){
  852. int rc; /* Return code */
  853. sqlite3_stmt *pContentInsert; /* INSERT INTO %_content VALUES(...) */
  854. if( p->zContentTbl ){
  855. sqlite3_value *pRowid = apVal[p->nColumn+3];
  856. if( sqlite3_value_type(pRowid)==SQLITE_NULL ){
  857. pRowid = apVal[1];
  858. }
  859. if( sqlite3_value_type(pRowid)!=SQLITE_INTEGER ){
  860. return SQLITE_CONSTRAINT;
  861. }
  862. *piDocid = sqlite3_value_int64(pRowid);
  863. return SQLITE_OK;
  864. }
  865. /* Locate the statement handle used to insert data into the %_content
  866. ** table. The SQL for this statement is:
  867. **
  868. ** INSERT INTO %_content VALUES(?, ?, ?, ...)
  869. **
  870. ** The statement features N '?' variables, where N is the number of user
  871. ** defined columns in the FTS3 table, plus one for the docid field.
  872. */
  873. rc = fts3SqlStmt(p, SQL_CONTENT_INSERT, &pContentInsert, &apVal[1]);
  874. if( rc==SQLITE_OK && p->zLanguageid ){
  875. rc = sqlite3_bind_int(
  876. pContentInsert, p->nColumn+2,
  877. sqlite3_value_int(apVal[p->nColumn+4])
  878. );
  879. }
  880. if( rc!=SQLITE_OK ) return rc;
  881. /* There is a quirk here. The users INSERT statement may have specified
  882. ** a value for the "rowid" field, for the "docid" field, or for both.
  883. ** Which is a problem, since "rowid" and "docid" are aliases for the
  884. ** same value. For example:
  885. **
  886. ** INSERT INTO fts3tbl(rowid, docid) VALUES(1, 2);
  887. **
  888. ** In FTS3, this is an error. It is an error to specify non-NULL values
  889. ** for both docid and some other rowid alias.
  890. */
  891. if( SQLITE_NULL!=sqlite3_value_type(apVal[3+p->nColumn]) ){
  892. if( SQLITE_NULL==sqlite3_value_type(apVal[0])
  893. && SQLITE_NULL!=sqlite3_value_type(apVal[1])
  894. ){
  895. /* A rowid/docid conflict. */
  896. return SQLITE_ERROR;
  897. }
  898. rc = sqlite3_bind_value(pContentInsert, 1, apVal[3+p->nColumn]);
  899. if( rc!=SQLITE_OK ) return rc;
  900. }
  901. /* Execute the statement to insert the record. Set *piDocid to the
  902. ** new docid value.
  903. */
  904. sqlite3_step(pContentInsert);
  905. rc = sqlite3_reset(pContentInsert);
  906. *piDocid = sqlite3_last_insert_rowid(p->db);
  907. return rc;
  908. }
  909. /*
  910. ** Remove all data from the FTS3 table. Clear the hash table containing
  911. ** pending terms.
  912. */
  913. static int fts3DeleteAll(Fts3Table *p, int bContent){
  914. int rc = SQLITE_OK; /* Return code */
  915. /* Discard the contents of the pending-terms hash table. */
  916. sqlite3Fts3PendingTermsClear(p);
  917. /* Delete everything from the shadow tables. Except, leave %_content as
  918. ** is if bContent is false. */
  919. assert( p->zContentTbl==0 || bContent==0 );
  920. if( bContent ) fts3SqlExec(&rc, p, SQL_DELETE_ALL_CONTENT, 0);
  921. fts3SqlExec(&rc, p, SQL_DELETE_ALL_SEGMENTS, 0);
  922. fts3SqlExec(&rc, p, SQL_DELETE_ALL_SEGDIR, 0);
  923. if( p->bHasDocsize ){
  924. fts3SqlExec(&rc, p, SQL_DELETE_ALL_DOCSIZE, 0);
  925. }
  926. if( p->bHasStat ){
  927. fts3SqlExec(&rc, p, SQL_DELETE_ALL_STAT, 0);
  928. }
  929. return rc;
  930. }
  931. /*
  932. **
  933. */
  934. static int langidFromSelect(Fts3Table *p, sqlite3_stmt *pSelect){
  935. int iLangid = 0;
  936. if( p->zLanguageid ) iLangid = sqlite3_column_int(pSelect, p->nColumn+1);
  937. return iLangid;
  938. }
  939. /*
  940. ** The first element in the apVal[] array is assumed to contain the docid
  941. ** (an integer) of a row about to be deleted. Remove all terms from the
  942. ** full-text index.
  943. */
  944. static void fts3DeleteTerms(
  945. int *pRC, /* Result code */
  946. Fts3Table *p, /* The FTS table to delete from */
  947. sqlite3_value *pRowid, /* The docid to be deleted */
  948. u32 *aSz, /* Sizes of deleted document written here */
  949. int *pbFound /* OUT: Set to true if row really does exist */
  950. ){
  951. int rc;
  952. sqlite3_stmt *pSelect;
  953. assert( *pbFound==0 );
  954. if( *pRC ) return;
  955. rc = fts3SqlStmt(p, SQL_SELECT_CONTENT_BY_ROWID, &pSelect, &pRowid);
  956. if( rc==SQLITE_OK ){
  957. if( SQLITE_ROW==sqlite3_step(pSelect) ){
  958. int i;
  959. int iLangid = langidFromSelect(p, pSelect);
  960. rc = fts3PendingTermsDocid(p, iLangid, sqlite3_column_int64(pSelect, 0));
  961. for(i=1; rc==SQLITE_OK && i<=p->nColumn; i++){
  962. int iCol = i-1;
  963. if( p->abNotindexed[iCol]==0 ){
  964. const char *zText = (const char *)sqlite3_column_text(pSelect, i);
  965. rc = fts3PendingTermsAdd(p, iLangid, zText, -1, &aSz[iCol]);
  966. aSz[p->nColumn] += sqlite3_column_bytes(pSelect, i);
  967. }
  968. }
  969. if( rc!=SQLITE_OK ){
  970. sqlite3_reset(pSelect);
  971. *pRC = rc;
  972. return;
  973. }
  974. *pbFound = 1;
  975. }
  976. rc = sqlite3_reset(pSelect);
  977. }else{
  978. sqlite3_reset(pSelect);
  979. }
  980. *pRC = rc;
  981. }
  982. /*
  983. ** Forward declaration to account for the circular dependency between
  984. ** functions fts3SegmentMerge() and fts3AllocateSegdirIdx().
  985. */
  986. static int fts3SegmentMerge(Fts3Table *, int, int, int);
  987. /*
  988. ** This function allocates a new level iLevel index in the segdir table.
  989. ** Usually, indexes are allocated within a level sequentially starting
  990. ** with 0, so the allocated index is one greater than the value returned
  991. ** by:
  992. **
  993. ** SELECT max(idx) FROM %_segdir WHERE level = :iLevel
  994. **
  995. ** However, if there are already FTS3_MERGE_COUNT indexes at the requested
  996. ** level, they are merged into a single level (iLevel+1) segment and the
  997. ** allocated index is 0.
  998. **
  999. ** If successful, *piIdx is set to the allocated index slot and SQLITE_OK
  1000. ** returned. Otherwise, an SQLite error code is returned.
  1001. */
  1002. static int fts3AllocateSegdirIdx(
  1003. Fts3Table *p,
  1004. int iLangid, /* Language id */
  1005. int iIndex, /* Index for p->aIndex */
  1006. int iLevel,
  1007. int *piIdx
  1008. ){
  1009. int rc; /* Return Code */
  1010. sqlite3_stmt *pNextIdx; /* Query for next idx at level iLevel */
  1011. int iNext = 0; /* Result of query pNextIdx */
  1012. assert( iLangid>=0 );
  1013. assert( p->nIndex>=1 );
  1014. /* Set variable iNext to the next available segdir index at level iLevel. */
  1015. rc = fts3SqlStmt(p, SQL_NEXT_SEGMENT_INDEX, &pNextIdx, 0);
  1016. if( rc==SQLITE_OK ){
  1017. sqlite3_bind_int64(
  1018. pNextIdx, 1, getAbsoluteLevel(p, iLangid, iIndex, iLevel)
  1019. );
  1020. if( SQLITE_ROW==sqlite3_step(pNextIdx) ){
  1021. iNext = sqlite3_column_int(pNextIdx, 0);
  1022. }
  1023. rc = sqlite3_reset(pNextIdx);
  1024. }
  1025. if( rc==SQLITE_OK ){
  1026. /* If iNext is FTS3_MERGE_COUNT, indicating that level iLevel is already
  1027. ** full, merge all segments in level iLevel into a single iLevel+1
  1028. ** segment and allocate (newly freed) index 0 at level iLevel. Otherwise,
  1029. ** if iNext is less than FTS3_MERGE_COUNT, allocate index iNext.
  1030. */
  1031. if( iNext>=FTS3_MERGE_COUNT ){
  1032. fts3LogMerge(16, getAbsoluteLevel(p, iLangid, iIndex, iLevel));
  1033. rc = fts3SegmentMerge(p, iLangid, iIndex, iLevel);
  1034. *piIdx = 0;
  1035. }else{
  1036. *piIdx = iNext;
  1037. }
  1038. }
  1039. return rc;
  1040. }
  1041. /*
  1042. ** The %_segments table is declared as follows:
  1043. **
  1044. ** CREATE TABLE %_segments(blockid INTEGER PRIMARY KEY, block BLOB)
  1045. **
  1046. ** This function reads data from a single row of the %_segments table. The
  1047. ** specific row is identified by the iBlockid parameter. If paBlob is not
  1048. ** NULL, then a buffer is allocated using sqlite3_malloc() and populated
  1049. ** with the contents of the blob stored in the "block" column of the
  1050. ** identified table row is. Whether or not paBlob is NULL, *pnBlob is set
  1051. ** to the size of the blob in bytes before returning.
  1052. **
  1053. ** If an error occurs, or the table does not contain the specified row,
  1054. ** an SQLite error code is returned. Otherwise, SQLITE_OK is returned. If
  1055. ** paBlob is non-NULL, then it is the responsibility of the caller to
  1056. ** eventually free the returned buffer.
  1057. **
  1058. ** This function may leave an open sqlite3_blob* handle in the
  1059. ** Fts3Table.pSegments variable. This handle is reused by subsequent calls
  1060. ** to this function. The handle may be closed by calling the
  1061. ** sqlite3Fts3SegmentsClose() function. Reusing a blob handle is a handy
  1062. ** performance improvement, but the blob handle should always be closed
  1063. ** before control is returned to the user (to prevent a lock being held
  1064. ** on the database file for longer than necessary). Thus, any virtual table
  1065. ** method (xFilter etc.) that may directly or indirectly call this function
  1066. ** must call sqlite3Fts3SegmentsClose() before returning.
  1067. */
  1068. int sqlite3Fts3ReadBlock(
  1069. Fts3Table *p, /* FTS3 table handle */
  1070. sqlite3_int64 iBlockid, /* Access the row with blockid=$iBlockid */
  1071. char **paBlob, /* OUT: Blob data in malloc'd buffer */
  1072. int *pnBlob, /* OUT: Size of blob data */
  1073. int *pnLoad /* OUT: Bytes actually loaded */
  1074. ){
  1075. int rc; /* Return code */
  1076. /* pnBlob must be non-NULL. paBlob may be NULL or non-NULL. */
  1077. assert( pnBlob );
  1078. if( p->pSegments ){
  1079. rc = sqlite3_blob_reopen(p->pSegments, iBlockid);
  1080. }else{
  1081. if( 0==p->zSegmentsTbl ){
  1082. p->zSegmentsTbl = sqlite3_mprintf("%s_segments", p->zName);
  1083. if( 0==p->zSegmentsTbl ) return SQLITE_NOMEM;
  1084. }
  1085. rc = sqlite3_blob_open(
  1086. p->db, p->zDb, p->zSegmentsTbl, "block", iBlockid, 0, &p->pSegments
  1087. );
  1088. }
  1089. if( rc==SQLITE_OK ){
  1090. int nByte = sqlite3_blob_bytes(p->pSegments);
  1091. *pnBlob = nByte;
  1092. if( paBlob ){
  1093. char *aByte = sqlite3_malloc(nByte + FTS3_NODE_PADDING);
  1094. if( !aByte ){
  1095. rc = SQLITE_NOMEM;
  1096. }else{
  1097. if( pnLoad && nByte>(FTS3_NODE_CHUNK_THRESHOLD) ){
  1098. nByte = FTS3_NODE_CHUNKSIZE;
  1099. *pnLoad = nByte;
  1100. }
  1101. rc = sqlite3_blob_read(p->pSegments, aByte, nByte, 0);
  1102. memset(&aByte[nByte], 0, FTS3_NODE_PADDING);
  1103. if( rc!=SQLITE_OK ){
  1104. sqlite3_free(aByte);
  1105. aByte = 0;
  1106. }
  1107. }
  1108. *paBlob = aByte;
  1109. }
  1110. }
  1111. return rc;
  1112. }
  1113. /*
  1114. ** Close the blob handle at p->pSegments, if it is open. See comments above
  1115. ** the sqlite3Fts3ReadBlock() function for details.
  1116. */
  1117. void sqlite3Fts3SegmentsClose(Fts3Table *p){
  1118. sqlite3_blob_close(p->pSegments);
  1119. p->pSegments = 0;
  1120. }
  1121. static int fts3SegReaderIncrRead(Fts3SegReader *pReader){
  1122. int nRead; /* Number of bytes to read */
  1123. int rc; /* Return code */
  1124. nRead = MIN(pReader->nNode - pReader->nPopulate, FTS3_NODE_CHUNKSIZE);
  1125. rc = sqlite3_blob_read(
  1126. pReader->pBlob,
  1127. &pReader->aNode[pReader->nPopulate],
  1128. nRead,
  1129. pReader->nPopulate
  1130. );
  1131. if( rc==SQLITE_OK ){
  1132. pReader->nPopulate += nRead;
  1133. memset(&pReader->aNode[pReader->nPopulate], 0, FTS3_NODE_PADDING);
  1134. if( pReader->nPopulate==pReader->nNode ){
  1135. sqlite3_blob_close(pReader->pBlob);
  1136. pReader->pBlob = 0;
  1137. pReader->nPopulate = 0;
  1138. }
  1139. }
  1140. return rc;
  1141. }
  1142. static int fts3SegReaderRequire(Fts3SegReader *pReader, char *pFrom, int nByte){
  1143. int rc = SQLITE_OK;
  1144. assert( !pReader->pBlob
  1145. || (pFrom>=pReader->aNode && pFrom<&pReader->aNode[pReader->nNode])
  1146. );
  1147. while( pReader->pBlob && rc==SQLITE_OK
  1148. && (pFrom - pReader->aNode + nByte)>pReader->nPopulate
  1149. ){
  1150. rc = fts3SegReaderIncrRead(pReader);
  1151. }
  1152. return rc;
  1153. }
  1154. /*
  1155. ** Set an Fts3SegReader cursor to point at EOF.
  1156. */
  1157. static void fts3SegReaderSetEof(Fts3SegReader *pSeg){
  1158. if( !fts3SegReaderIsRootOnly(pSeg) ){
  1159. sqlite3_free(pSeg->aNode);
  1160. sqlite3_blob_close(pSeg->pBlob);
  1161. pSeg->pBlob = 0;
  1162. }
  1163. pSeg->aNode = 0;
  1164. }
  1165. /*
  1166. ** Move the iterator passed as the first argument to the next term in the
  1167. ** segment. If successful, SQLITE_OK is returned. If there is no next term,
  1168. ** SQLITE_DONE. Otherwise, an SQLite error code.
  1169. */
  1170. static int fts3SegReaderNext(
  1171. Fts3Table *p,
  1172. Fts3SegReader *pReader,
  1173. int bIncr
  1174. ){
  1175. int rc; /* Return code of various sub-routines */
  1176. char *pNext; /* Cursor variable */
  1177. int nPrefix; /* Number of bytes in term prefix */
  1178. int nSuffix; /* Number of bytes in term suffix */
  1179. if( !pReader->aDoclist ){
  1180. pNext = pReader->aNode;
  1181. }else{
  1182. pNext = &pReader->aDoclist[pReader->nDoclist];
  1183. }
  1184. if( !pNext || pNext>=&pReader->aNode[pReader->nNode] ){
  1185. if( fts3SegReaderIsPending(pReader) ){
  1186. Fts3HashElem *pElem = *(pReader->ppNextElem);
  1187. if( pElem==0 ){
  1188. pReader->aNode = 0;
  1189. }else{
  1190. PendingList *pList = (PendingList *)fts3HashData(pElem);
  1191. pReader->zTerm = (char *)fts3HashKey(pElem);
  1192. pReader->nTerm = fts3HashKeysize(pElem);
  1193. pReader->nNode = pReader->nDoclist = pList->nData + 1;
  1194. pReader->aNode = pReader->aDoclist = pList->aData;
  1195. pReader->ppNextElem++;
  1196. assert( pReader->aNode );
  1197. }
  1198. return SQLITE_OK;
  1199. }
  1200. fts3SegReaderSetEof(pReader);
  1201. /* If iCurrentBlock>=iLeafEndBlock, this is an EOF condition. All leaf
  1202. ** blocks have already been traversed. */
  1203. assert( pReader->iCurrentBlock<=pReader->iLeafEndBlock );
  1204. if( pReader->iCurrentBlock>=pReader->iLeafEndBlock ){
  1205. return SQLITE_OK;
  1206. }
  1207. rc = sqlite3Fts3ReadBlock(
  1208. p, ++pReader->iCurrentBlock, &pReader->aNode, &pReader->nNode,
  1209. (bIncr ? &pReader->nPopulate : 0)
  1210. );
  1211. if( rc!=SQLITE_OK ) return rc;
  1212. assert( pReader->pBlob==0 );
  1213. if( bIncr && pReader->nPopulate<pReader->nNode ){
  1214. pReader->pBlob = p->pSegments;
  1215. p->pSegments = 0;
  1216. }
  1217. pNext = pReader->aNode;
  1218. }
  1219. assert( !fts3SegReaderIsPending(pReader) );
  1220. rc = fts3SegReaderRequire(pReader, pNext, FTS3_VARINT_MAX*2);
  1221. if( rc!=SQLITE_OK ) return rc;
  1222. /* Because of the FTS3_NODE_PADDING bytes of padding, the following is
  1223. ** safe (no risk of overread) even if the node data is corrupted. */
  1224. pNext += sqlite3Fts3GetVarint32(pNext, &nPrefix);
  1225. pNext += sqlite3Fts3GetVarint32(pNext, &nSuffix);
  1226. if( nPrefix<0 || nSuffix<=0
  1227. || &pNext[nSuffix]>&pReader->aNode[pReader->nNode]
  1228. ){
  1229. return FTS_CORRUPT_VTAB;
  1230. }
  1231. if( nPrefix+nSuffix>pReader->nTermAlloc ){
  1232. int nNew = (nPrefix+nSuffix)*2;
  1233. char *zNew = sqlite3_realloc(pReader->zTerm, nNew);
  1234. if( !zNew ){
  1235. return SQLITE_NOMEM;
  1236. }
  1237. pReader->zTerm = zNew;
  1238. pReader->nTermAlloc = nNew;
  1239. }
  1240. rc = fts3SegReaderRequire(pReader, pNext, nSuffix+FTS3_VARINT_MAX);
  1241. if( rc!=SQLITE_OK ) return rc;
  1242. memcpy(&pReader->zTerm[nPrefix], pNext, nSuffix);
  1243. pReader->nTerm = nPrefix+nSuffix;
  1244. pNext += nSuffix;
  1245. pNext += sqlite3Fts3GetVarint32(pNext, &pReader->nDoclist);
  1246. pReader->aDoclist = pNext;
  1247. pReader->pOffsetList = 0;
  1248. /* Check that the doclist does not appear to extend past the end of the
  1249. ** b-tree node. And that the final byte of the doclist is 0x00. If either
  1250. ** of these statements is untrue, then the data structure is corrupt.
  1251. */
  1252. if( &pReader->aDoclist[pReader->nDoclist]>&pReader->aNode[pReader->nNode]
  1253. || (pReader->nPopulate==0 && pReader->aDoclist[pReader->nDoclist-1])
  1254. ){
  1255. return FTS_CORRUPT_VTAB;
  1256. }
  1257. return SQLITE_OK;
  1258. }
  1259. /*
  1260. ** Set the SegReader to point to the first docid in the doclist associated
  1261. ** with the current term.
  1262. */
  1263. static int fts3SegReaderFirstDocid(Fts3Table *pTab, Fts3SegReader *pReader){
  1264. int rc = SQLITE_OK;
  1265. assert( pReader->aDoclist );
  1266. assert( !pReader->pOffsetList );
  1267. if( pTab->bDescIdx && fts3SegReaderIsPending(pReader) ){
  1268. u8 bEof = 0;
  1269. pReader->iDocid = 0;
  1270. pReader->nOffsetList = 0;
  1271. sqlite3Fts3DoclistPrev(0,
  1272. pReader->aDoclist, pReader->nDoclist, &pReader->pOffsetList,
  1273. &pReader->iDocid, &pReader->nOffsetList, &bEof
  1274. );
  1275. }else{
  1276. rc = fts3SegReaderRequire(pReader, pReader->aDoclist, FTS3_VARINT_MAX);
  1277. if( rc==SQLITE_OK ){
  1278. int n = sqlite3Fts3GetVarint(pReader->aDoclist, &pReader->iDocid);
  1279. pReader->pOffsetList = &pReader->aDoclist[n];
  1280. }
  1281. }
  1282. return rc;
  1283. }
  1284. /*
  1285. ** Advance the SegReader to point to the next docid in the doclist
  1286. ** associated with the current term.
  1287. **
  1288. ** If arguments ppOffsetList and pnOffsetList are not NULL, then
  1289. ** *ppOffsetList is set to point to the first column-offset list
  1290. ** in the doclist entry (i.e. immediately past the docid varint).
  1291. ** *pnOffsetList is set to the length of the set of column-offset
  1292. ** lists, not including the nul-terminator byte. For example:
  1293. */
  1294. static int fts3SegReaderNextDocid(
  1295. Fts3Table *pTab,
  1296. Fts3SegReader *pReader, /* Reader to advance to next docid */
  1297. char **ppOffsetList, /* OUT: Pointer to current position-list */
  1298. int *pnOffsetList /* OUT: Length of *ppOffsetList in bytes */
  1299. ){
  1300. int rc = SQLITE_OK;
  1301. char *p = pReader->pOffsetList;
  1302. char c = 0;
  1303. assert( p );
  1304. if( pTab->bDescIdx && fts3SegReaderIsPending(pReader) ){
  1305. /* A pending-terms seg-reader for an FTS4 table that uses order=desc.
  1306. ** Pending-terms doclists are always built up in ascending order, so
  1307. ** we have to iterate through them backwards here. */
  1308. u8 bEof = 0;
  1309. if( ppOffsetList ){
  1310. *ppOffsetList = pReader->pOffsetList;
  1311. *pnOffsetList = pReader->nOffsetList - 1;
  1312. }
  1313. sqlite3Fts3DoclistPrev(0,
  1314. pReader->aDoclist, pReader->nDoclist, &p, &pReader->iDocid,
  1315. &pReader->nOffsetList, &bEof
  1316. );
  1317. if( bEof ){
  1318. pReader->pOffsetList = 0;
  1319. }else{
  1320. pReader->pOffsetList = p;
  1321. }
  1322. }else{
  1323. char *pEnd = &pReader->aDoclist[pReader->nDoclist];
  1324. /* Pointer p currently points at the first byte of an offset list. The
  1325. ** following block advances it to point one byte past the end of
  1326. ** the same offset list. */
  1327. while( 1 ){
  1328. /* The following line of code (and the "p++" below the while() loop) is
  1329. ** normally all that is required to move pointer p to the desired
  1330. ** position. The exception is if this node is being loaded from disk
  1331. ** incrementally and pointer "p" now points to the first byte past
  1332. ** the populated part of pReader->aNode[].
  1333. */
  1334. while( *p | c ) c = *p++ & 0x80;
  1335. assert( *p==0 );
  1336. if( pReader->pBlob==0 || p<&pReader->aNode[pReader->nPopulate] ) break;
  1337. rc = fts3SegReaderIncrRead(pReader);
  1338. if( rc!=SQLITE_OK ) return rc;
  1339. }
  1340. p++;
  1341. /* If required, populate the output variables with a pointer to and the
  1342. ** size of the previous offset-list.
  1343. */
  1344. if( ppOffsetList ){
  1345. *ppOffsetList = pReader->pOffsetList;
  1346. *pnOffsetList = (int)(p - pReader->pOffsetList - 1);
  1347. }
  1348. /* List may have been edited in place by fts3EvalNearTrim() */
  1349. while( p<pEnd && *p==0 ) p++;
  1350. /* If there are no more entries in the doclist, set pOffsetList to
  1351. ** NULL. Otherwise, set Fts3SegReader.iDocid to the next docid and
  1352. ** Fts3SegReader.pOffsetList to point to the next offset list before
  1353. ** returning.
  1354. */
  1355. if( p>=pEnd ){
  1356. pReader->pOffsetList = 0;
  1357. }else{
  1358. rc = fts3SegReaderRequire(pReader, p, FTS3_VARINT_MAX);
  1359. if( rc==SQLITE_OK ){
  1360. sqlite3_int64 iDelta;
  1361. pReader->pOffsetList = p + sqlite3Fts3GetVarint(p, &iDelta);
  1362. if( pTab->bDescIdx ){
  1363. pReader->iDocid -= iDelta;
  1364. }else{
  1365. pReader->iDocid += iDelta;
  1366. }
  1367. }
  1368. }
  1369. }
  1370. return SQLITE_OK;
  1371. }
  1372. int sqlite3Fts3MsrOvfl(
  1373. Fts3Cursor *pCsr,
  1374. Fts3MultiSegReader *pMsr,
  1375. int *pnOvfl
  1376. ){
  1377. Fts3Table *p = (Fts3Table*)pCsr->base.pVtab;
  1378. int nOvfl = 0;
  1379. int ii;
  1380. int rc = SQLITE_OK;
  1381. int pgsz = p->nPgsz;
  1382. assert( p->bFts4 );
  1383. assert( pgsz>0 );
  1384. for(ii=0; rc==SQLITE_OK && ii<pMsr->nSegment; ii++){
  1385. Fts3SegReader *pReader = pMsr->apSegment[ii];
  1386. if( !fts3SegReaderIsPending(pReader)
  1387. && !fts3SegReaderIsRootOnly(pReader)
  1388. ){
  1389. sqlite3_int64 jj;
  1390. for(jj=pReader->iStartBlock; jj<=pReader->iLeafEndBlock; jj++){
  1391. int nBlob;
  1392. rc = sqlite3Fts3ReadBlock(p, jj, 0, &nBlob, 0);
  1393. if( rc!=SQLITE_OK ) break;
  1394. if( (nBlob+35)>pgsz ){
  1395. nOvfl += (nBlob + 34)/pgsz;
  1396. }
  1397. }
  1398. }
  1399. }
  1400. *pnOvfl = nOvfl;
  1401. return rc;
  1402. }
  1403. /*
  1404. ** Free all allocations associated with the iterator passed as the
  1405. ** second argument.
  1406. */
  1407. void sqlite3Fts3SegReaderFree(Fts3SegReader *pReader){
  1408. if( pReader && !fts3SegReaderIsPending(pReader) ){
  1409. sqlite3_free(pReader->zTerm);
  1410. if( !fts3SegReaderIsRootOnly(pReader) ){
  1411. sqlite3_free(pReader->aNode);
  1412. sqlite3_blob_close(pReader->pBlob);
  1413. }
  1414. }
  1415. sqlite3_free(pReader);
  1416. }
  1417. /*
  1418. ** Allocate a new SegReader object.
  1419. */
  1420. int sqlite3Fts3SegReaderNew(
  1421. int iAge, /* Segment "age". */
  1422. int bLookup, /* True for a lookup only */
  1423. sqlite3_int64 iStartLeaf, /* First leaf to traverse */
  1424. sqlite3_int64 iEndLeaf, /* Final leaf to traverse */
  1425. sqlite3_int64 iEndBlock, /* Final block of segment */
  1426. const char *zRoot, /* Buffer containing root node */
  1427. int nRoot, /* Size of buffer containing root node */
  1428. Fts3SegReader **ppReader /* OUT: Allocated Fts3SegReader */
  1429. ){
  1430. Fts3SegReader *pReader; /* Newly allocated SegReader object */
  1431. int nExtra = 0; /* Bytes to allocate segment root node */
  1432. assert( iStartLeaf<=iEndLeaf );
  1433. if( iStartLeaf==0 ){
  1434. nExtra = nRoot + FTS3_NODE_PADDING;
  1435. }
  1436. pReader = (Fts3SegReader *)sqlite3_malloc(sizeof(Fts3SegReader) + nExtra);
  1437. if( !pReader ){
  1438. return SQLITE_NOMEM;
  1439. }
  1440. memset(pReader, 0, sizeof(Fts3SegReader));
  1441. pReader->iIdx = iAge;
  1442. pReader->bLookup = bLookup!=0;
  1443. pReader->iStartBlock = iStartLeaf;
  1444. pReader->iLeafEndBlock = iEndLeaf;
  1445. pReader->iEndBlock = iEndBlock;
  1446. if( nExtra ){
  1447. /* The entire segment is stored in the root node. */
  1448. pReader->aNode = (char *)&pReader[1];
  1449. pReader->rootOnly = 1;
  1450. pReader->nNode = nRoot;
  1451. memcpy(pReader->aNode, zRoot, nRoot);
  1452. memset(&pReader->aNode[nRoot], 0, FTS3_NODE_PADDING);
  1453. }else{
  1454. pReader->iCurrentBlock = iStartLeaf-1;
  1455. }
  1456. *ppReader = pReader;
  1457. return SQLITE_OK;
  1458. }
  1459. /*
  1460. ** This is a comparison function used as a qsort() callback when sorting
  1461. ** an array of pending terms by term. This occurs as part of flushing
  1462. ** the contents of the pending-terms hash table to the database.
  1463. */
  1464. static int fts3CompareElemByTerm(const void *lhs, const void *rhs){
  1465. char *z1 = fts3HashKey(*(Fts3HashElem **)lhs);
  1466. char *z2 = fts3HashKey(*(Fts3HashElem **)rhs);
  1467. int n1 = fts3HashKeysize(*(Fts3HashElem **)lhs);
  1468. int n2 = fts3HashKeysize(*(Fts3HashElem **)rhs);
  1469. int n = (n1<n2 ? n1 : n2);
  1470. int c = memcmp(z1, z2, n);
  1471. if( c==0 ){
  1472. c = n1 - n2;
  1473. }
  1474. return c;
  1475. }
  1476. /*
  1477. ** This function is used to allocate an Fts3SegReader that iterates through
  1478. ** a subset of the terms stored in the Fts3Table.pendingTerms array.
  1479. **
  1480. ** If the isPrefixIter parameter is zero, then the returned SegReader iterates
  1481. ** through each term in the pending-terms table. Or, if isPrefixIter is
  1482. ** non-zero, it iterates through each term and its prefixes. For example, if
  1483. ** the pending terms hash table contains the terms "sqlite", "mysql" and
  1484. ** "firebird", then the iterator visits the following 'terms' (in the order
  1485. ** shown):
  1486. **
  1487. ** f fi fir fire fireb firebi firebir firebird
  1488. ** m my mys mysq mysql
  1489. ** s sq sql sqli sqlit sqlite
  1490. **
  1491. ** Whereas if isPrefixIter is zero, the terms visited are:
  1492. **
  1493. ** firebird mysql sqlite
  1494. */
  1495. int sqlite3Fts3SegReaderPending(
  1496. Fts3Table *p, /* Virtual table handle */
  1497. int iIndex, /* Index for p->aIndex */
  1498. const char *zTerm, /* Term to search for */
  1499. int nTerm, /* Size of buffer zTerm */
  1500. int bPrefix, /* True for a prefix iterator */
  1501. Fts3SegReader **ppReader /* OUT: SegReader for pending-terms */
  1502. ){
  1503. Fts3SegReader *pReader = 0; /* Fts3SegReader object to return */
  1504. Fts3HashElem *pE; /* Iterator variable */
  1505. Fts3HashElem **aElem = 0; /* Array of term hash entries to scan */
  1506. int nElem = 0; /* Size of array at aElem */
  1507. int rc = SQLITE_OK; /* Return Code */
  1508. Fts3Hash *pHash;
  1509. pHash = &p->aIndex[iIndex].hPending;
  1510. if( bPrefix ){
  1511. int nAlloc = 0; /* Size of allocated array at aElem */
  1512. for(pE=fts3HashFirst(pHash); pE; pE=fts3HashNext(pE)){
  1513. char *zKey = (char *)fts3HashKey(pE);
  1514. int nKey = fts3HashKeysize(pE);
  1515. if( nTerm==0 || (nKey>=nTerm && 0==memcmp(zKey, zTerm, nTerm)) ){
  1516. if( nElem==nAlloc ){
  1517. Fts3HashElem **aElem2;
  1518. nAlloc += 16;
  1519. aElem2 = (Fts3HashElem **)sqlite3_realloc(
  1520. aElem, nAlloc*sizeof(Fts3HashElem *)
  1521. );
  1522. if( !aElem2 ){
  1523. rc = SQLITE_NOMEM;
  1524. nElem = 0;
  1525. break;
  1526. }
  1527. aElem = aElem2;
  1528. }
  1529. aElem[nElem++] = pE;
  1530. }
  1531. }
  1532. /* If more than one term matches the prefix, sort the Fts3HashElem
  1533. ** objects in term order using qsort(). This uses the same comparison
  1534. ** callback as is used when flushing terms to disk.
  1535. */
  1536. if( nElem>1 ){
  1537. qsort(aElem, nElem, sizeof(Fts3HashElem *), fts3CompareElemByTerm);
  1538. }
  1539. }else{
  1540. /* The query is a simple term lookup that matches at most one term in
  1541. ** the index. All that is required is a straight hash-lookup.
  1542. **
  1543. ** Because the stack address of pE may be accessed via the aElem pointer
  1544. ** below, the "Fts3HashElem *pE" must be declared so that it is valid
  1545. ** within this entire function, not just this "else{...}" block.
  1546. */
  1547. pE = fts3HashFindElem(pHash, zTerm, nTerm);
  1548. if( pE ){
  1549. aElem = &pE;
  1550. nElem = 1;
  1551. }
  1552. }
  1553. if( nElem>0 ){
  1554. int nByte = sizeof(Fts3SegReader) + (nElem+1)*sizeof(Fts3HashElem *);
  1555. pReader = (Fts3SegReader *)sqlite3_malloc(nByte);
  1556. if( !pReader ){
  1557. rc = SQLITE_NOMEM;
  1558. }else{
  1559. memset(pReader, 0, nByte);
  1560. pReader->iIdx = 0x7FFFFFFF;
  1561. pReader->ppNextElem = (Fts3HashElem **)&pReader[1];
  1562. memcpy(pReader->ppNextElem, aElem, nElem*sizeof(Fts3HashElem *));
  1563. }
  1564. }
  1565. if( bPrefix ){
  1566. sqlite3_free(aElem);
  1567. }
  1568. *ppReader = pReader;
  1569. return rc;
  1570. }
  1571. /*
  1572. ** Compare the entries pointed to by two Fts3SegReader structures.
  1573. ** Comparison is as follows:
  1574. **
  1575. ** 1) EOF is greater than not EOF.
  1576. **
  1577. ** 2) The current terms (if any) are compared using memcmp(). If one
  1578. ** term is a prefix of another, the longer term is considered the
  1579. ** larger.
  1580. **
  1581. ** 3) By segment age. An older segment is considered larger.
  1582. */
  1583. static int fts3SegReaderCmp(Fts3SegReader *pLhs, Fts3SegReader *pRhs){
  1584. int rc;
  1585. if( pLhs->aNode && pRhs->aNode ){
  1586. int rc2 = pLhs->nTerm - pRhs->nTerm;
  1587. if( rc2<0 ){
  1588. rc = memcmp(pLhs->zTerm, pRhs->zTerm, pLhs->nTerm);
  1589. }else{
  1590. rc = memcmp(pLhs->zTerm, pRhs->zTerm, pRhs->nTerm);
  1591. }
  1592. if( rc==0 ){
  1593. rc = rc2;
  1594. }
  1595. }else{
  1596. rc = (pLhs->aNode==0) - (pRhs->aNode==0);
  1597. }
  1598. if( rc==0 ){
  1599. rc = pRhs->iIdx - pLhs->iIdx;
  1600. }
  1601. assert( rc!=0 );
  1602. return rc;
  1603. }
  1604. /*
  1605. ** A different comparison function for SegReader structures. In this
  1606. ** version, it is assumed that each SegReader points to an entry in
  1607. ** a doclist for identical terms. Comparison is made as follows:
  1608. **
  1609. ** 1) EOF (end of doclist in this case) is greater than not EOF.
  1610. **
  1611. ** 2) By current docid.
  1612. **
  1613. ** 3) By segment age. An older segment is considered larger.
  1614. */
  1615. static int fts3SegReaderDoclistCmp(Fts3SegReader *pLhs, Fts3SegReader *pRhs){
  1616. int rc = (pLhs->pOffsetList==0)-(pRhs->pOffsetList==0);
  1617. if( rc==0 ){
  1618. if( pLhs->iDocid==pRhs->iDocid ){
  1619. rc = pRhs->iIdx - pLhs->iIdx;
  1620. }else{
  1621. rc = (pLhs->iDocid > pRhs->iDocid) ? 1 : -1;
  1622. }
  1623. }
  1624. assert( pLhs->aNode && pRhs->aNode );
  1625. return rc;
  1626. }
  1627. static int fts3SegReaderDoclistCmpRev(Fts3SegReader *pLhs, Fts3SegReader *pRhs){
  1628. int rc = (pLhs->pOffsetList==0)-(pRhs->pOffsetList==0);
  1629. if( rc==0 ){
  1630. if( pLhs->iDocid==pRhs->iDocid ){
  1631. rc = pRhs->iIdx - pLhs->iIdx;
  1632. }else{
  1633. rc = (pLhs->iDocid < pRhs->iDocid) ? 1 : -1;
  1634. }
  1635. }
  1636. assert( pLhs->aNode && pRhs->aNode );
  1637. return rc;
  1638. }
  1639. /*
  1640. ** Compare the term that the Fts3SegReader object passed as the first argument
  1641. ** points to with the term specified by arguments zTerm and nTerm.
  1642. **
  1643. ** If the pSeg iterator is already at EOF, return 0. Otherwise, return
  1644. ** -ve if the pSeg term is less than zTerm/nTerm, 0 if the two terms are
  1645. ** equal, or +ve if the pSeg term is greater than zTerm/nTerm.
  1646. */
  1647. static int fts3SegReaderTermCmp(
  1648. Fts3SegReader *pSeg, /* Segment reader object */
  1649. const char *zTerm, /* Term to compare to */
  1650. int nTerm /* Size of term zTerm in bytes */
  1651. ){
  1652. int res = 0;
  1653. if( pSeg->aNode ){
  1654. if( pSeg->nTerm>nTerm ){
  1655. res = memcmp(pSeg->zTerm, zTerm, nTerm);
  1656. }else{
  1657. res = memcmp(pSeg->zTerm, zTerm, pSeg->nTerm);
  1658. }
  1659. if( res==0 ){
  1660. res = pSeg->nTerm-nTerm;
  1661. }
  1662. }
  1663. return res;
  1664. }
  1665. /*
  1666. ** Argument apSegment is an array of nSegment elements. It is known that
  1667. ** the final (nSegment-nSuspect) members are already in sorted order
  1668. ** (according to the comparison function provided). This function shuffles
  1669. ** the array around until all entries are in sorted order.
  1670. */
  1671. static void fts3SegReaderSort(
  1672. Fts3SegReader **apSegment, /* Array to sort entries of */
  1673. int nSegment, /* Size of apSegment array */
  1674. int nSuspect, /* Unsorted entry count */
  1675. int (*xCmp)(Fts3SegReader *, Fts3SegReader *) /* Comparison function */
  1676. ){
  1677. int i; /* Iterator variable */
  1678. assert( nSuspect<=nSegment );
  1679. if( nSuspect==nSegment ) nSuspect--;
  1680. for(i=nSuspect-1; i>=0; i--){
  1681. int j;
  1682. for(j=i; j<(nSegment-1); j++){
  1683. Fts3SegReader *pTmp;
  1684. if( xCmp(apSegment[j], apSegment[j+1])<0 ) break;
  1685. pTmp = apSegment[j+1];
  1686. apSegment[j+1] = apSegment[j];
  1687. apSegment[j] = pTmp;
  1688. }
  1689. }
  1690. #ifndef NDEBUG
  1691. /* Check that the list really is sorted now. */
  1692. for(i=0; i<(nSuspect-1); i++){
  1693. assert( xCmp(apSegment[i], apSegment[i+1])<0 );
  1694. }
  1695. #endif
  1696. }
  1697. /*
  1698. ** Insert a record into the %_segments table.
  1699. */
  1700. static int fts3WriteSegment(
  1701. Fts3Table *p, /* Virtual table handle */
  1702. sqlite3_int64 iBlock, /* Block id for new block */
  1703. char *z, /* Pointer to buffer containing block data */
  1704. int n /* Size of buffer z in bytes */
  1705. ){
  1706. sqlite3_stmt *pStmt;
  1707. int rc = fts3SqlStmt(p, SQL_INSERT_SEGMENTS, &pStmt, 0);
  1708. if( rc==SQLITE_OK ){
  1709. sqlite3_bind_int64(pStmt, 1, iBlock);
  1710. sqlite3_bind_blob(pStmt, 2, z, n, SQLITE_STATIC);
  1711. sqlite3_step(pStmt);
  1712. rc = sqlite3_reset(pStmt);
  1713. }
  1714. return rc;
  1715. }
  1716. /*
  1717. ** Find the largest relative level number in the table. If successful, set
  1718. ** *pnMax to this value and return SQLITE_OK. Otherwise, if an error occurs,
  1719. ** set *pnMax to zero and return an SQLite error code.
  1720. */
  1721. int sqlite3Fts3MaxLevel(Fts3Table *p, int *pnMax){
  1722. int rc;
  1723. int mxLevel = 0;
  1724. sqlite3_stmt *pStmt = 0;
  1725. rc = fts3SqlStmt(p, SQL_SELECT_MXLEVEL, &pStmt, 0);
  1726. if( rc==SQLITE_OK ){
  1727. if( SQLITE_ROW==sqlite3_step(pStmt) ){
  1728. mxLevel = sqlite3_column_int(pStmt, 0);
  1729. }
  1730. rc = sqlite3_reset(pStmt);
  1731. }
  1732. *pnMax = mxLevel;
  1733. return rc;
  1734. }
  1735. /*
  1736. ** Insert a record into the %_segdir table.
  1737. */
  1738. static int fts3WriteSegdir(
  1739. Fts3Table *p, /* Virtual table handle */
  1740. sqlite3_int64 iLevel, /* Value for "level" field (absolute level) */
  1741. int iIdx, /* Value for "idx" field */
  1742. sqlite3_int64 iStartBlock, /* Value for "start_block" field */
  1743. sqlite3_int64 iLeafEndBlock, /* Value for "leaves_end_block" field */
  1744. sqlite3_int64 iEndBlock, /* Value for "end_block" field */
  1745. char *zRoot, /* Blob value for "root" field */
  1746. int nRoot /* Number of bytes in buffer zRoot */
  1747. ){
  1748. sqlite3_stmt *pStmt;
  1749. int rc = fts3SqlStmt(p, SQL_INSERT_SEGDIR, &pStmt, 0);
  1750. if( rc==SQLITE_OK ){
  1751. sqlite3_bind_int64(pStmt, 1, iLevel);
  1752. sqlite3_bind_int(pStmt, 2, iIdx);
  1753. sqlite3_bind_int64(pStmt, 3, iStartBlock);
  1754. sqlite3_bind_int64(pStmt, 4, iLeafEndBlock);
  1755. sqlite3_bind_int64(pStmt, 5, iEndBlock);
  1756. sqlite3_bind_blob(pStmt, 6, zRoot, nRoot, SQLITE_STATIC);
  1757. sqlite3_step(pStmt);
  1758. rc = sqlite3_reset(pStmt);
  1759. }
  1760. return rc;
  1761. }
  1762. /*
  1763. ** Return the size of the common prefix (if any) shared by zPrev and
  1764. ** zNext, in bytes. For example,
  1765. **
  1766. ** fts3PrefixCompress("abc", 3, "abcdef", 6) // returns 3
  1767. ** fts3PrefixCompress("abX", 3, "abcdef", 6) // returns 2
  1768. ** fts3PrefixCompress("abX", 3, "Xbcdef", 6) // returns 0
  1769. */
  1770. static int fts3PrefixCompress(
  1771. const char *zPrev, /* Buffer containing previous term */
  1772. int nPrev, /* Size of buffer zPrev in bytes */
  1773. const char *zNext, /* Buffer containing next term */
  1774. int nNext /* Size of buffer zNext in bytes */
  1775. ){
  1776. int n;
  1777. UNUSED_PARAMETER(nNext);
  1778. for(n=0; n<nPrev && zPrev[n]==zNext[n]; n++);
  1779. return n;
  1780. }
  1781. /*
  1782. ** Add term zTerm to the SegmentNode. It is guaranteed that zTerm is larger
  1783. ** (according to memcmp) than the previous term.
  1784. */
  1785. static int fts3NodeAddTerm(
  1786. Fts3Table *p, /* Virtual table handle */
  1787. SegmentNode **ppTree, /* IN/OUT: SegmentNode handle */
  1788. int isCopyTerm, /* True if zTerm/nTerm is transient */
  1789. const char *zTerm, /* Pointer to buffer containing term */
  1790. int nTerm /* Size of term in bytes */
  1791. ){
  1792. SegmentNode *pTree = *ppTree;
  1793. int rc;
  1794. SegmentNode *pNew;
  1795. /* First try to append the term to the current node. Return early if
  1796. ** this is possible.
  1797. */
  1798. if( pTree ){
  1799. int nData = pTree->nData; /* Current size of node in bytes */
  1800. int nReq = nData; /* Required space after adding zTerm */
  1801. int nPrefix; /* Number of bytes of prefix compression */
  1802. int nSuffix; /* Suffix length */
  1803. nPrefix = fts3PrefixCompress(pTree->zTerm, pTree->nTerm, zTerm, nTerm);
  1804. nSuffix = nTerm-nPrefix;
  1805. nReq += sqlite3Fts3VarintLen(nPrefix)+sqlite3Fts3VarintLen(nSuffix)+nSuffix;
  1806. if( nReq<=p->nNodeSize || !pTree->zTerm ){
  1807. if( nReq>p->nNodeSize ){
  1808. /* An unusual case: this is the first term to be added to the node
  1809. ** and the static node buffer (p->nNodeSize bytes) is not large
  1810. ** enough. Use a separately malloced buffer instead This wastes
  1811. ** p->nNodeSize bytes, but since this scenario only comes about when
  1812. ** the database contain two terms that share a prefix of almost 2KB,
  1813. ** this is not expected to be a serious problem.
  1814. */
  1815. assert( pTree->aData==(char *)&pTree[1] );
  1816. pTree->aData = (char *)sqlite3_malloc(nReq);
  1817. if( !pTree->aData ){
  1818. return SQLITE_NOMEM;
  1819. }
  1820. }
  1821. if( pTree->zTerm ){
  1822. /* There is no prefix-length field for first term in a node */
  1823. nData += sqlite3Fts3PutVarint(&pTree->aData[nData], nPrefix);
  1824. }
  1825. nData += sqlite3Fts3PutVarint(&pTree->aData[nData], nSuffix);
  1826. memcpy(&pTree->aData[nData], &zTerm[nPrefix], nSuffix);
  1827. pTree->nData = nData + nSuffix;
  1828. pTree->nEntry++;
  1829. if( isCopyTerm ){
  1830. if( pTree->nMalloc<nTerm ){
  1831. char *zNew = sqlite3_realloc(pTree->zMalloc, nTerm*2);
  1832. if( !zNew ){
  1833. return SQLITE_NOMEM;
  1834. }
  1835. pTree->nMalloc = nTerm*2;
  1836. pTree->zMalloc = zNew;
  1837. }
  1838. pTree->zTerm = pTree->zMalloc;
  1839. memcpy(pTree->zTerm, zTerm, nTerm);
  1840. pTree->nTerm = nTerm;
  1841. }else{
  1842. pTree->zTerm = (char *)zTerm;
  1843. pTree->nTerm = nTerm;
  1844. }
  1845. return SQLITE_OK;
  1846. }
  1847. }
  1848. /* If control flows to here, it was not possible to append zTerm to the
  1849. ** current node. Create a new node (a right-sibling of the current node).
  1850. ** If this is the first node in the tree, the term is added to it.
  1851. **
  1852. ** Otherwise, the term is not added to the new node, it is left empty for
  1853. ** now. Instead, the term is inserted into the parent of pTree. If pTree
  1854. ** has no parent, one is created here.
  1855. */
  1856. pNew = (SegmentNode *)sqlite3_malloc(sizeof(SegmentNode) + p->nNodeSize);
  1857. if( !pNew ){
  1858. return SQLITE_NOMEM;
  1859. }
  1860. memset(pNew, 0, sizeof(SegmentNode));
  1861. pNew->nData = 1 + FTS3_VARINT_MAX;
  1862. pNew->aData = (char *)&pNew[1];
  1863. if( pTree ){
  1864. SegmentNode *pParent = pTree->pParent;
  1865. rc = fts3NodeAddTerm(p, &pParent, isCopyTerm, zTerm, nTerm);
  1866. if( pTree->pParent==0 ){
  1867. pTree->pParent = pParent;
  1868. }
  1869. pTree->pRight = pNew;
  1870. pNew->pLeftmost = pTree->pLeftmost;
  1871. pNew->pParent = pParent;
  1872. pNew->zMalloc = pTree->zMalloc;
  1873. pNew->nMalloc = pTree->nMalloc;
  1874. pTree->zMalloc = 0;
  1875. }else{
  1876. pNew->pLeftmost = pNew;
  1877. rc = fts3NodeAddTerm(p, &pNew, isCopyTerm, zTerm, nTerm);
  1878. }
  1879. *ppTree = pNew;
  1880. return rc;
  1881. }
  1882. /*
  1883. ** Helper function for fts3NodeWrite().
  1884. */
  1885. static int fts3TreeFinishNode(
  1886. SegmentNode *pTree,
  1887. int iHeight,
  1888. sqlite3_int64 iLeftChild
  1889. ){
  1890. int nStart;
  1891. assert( iHeight>=1 && iHeight<128 );
  1892. nStart = FTS3_VARINT_MAX - sqlite3Fts3VarintLen(iLeftChild);
  1893. pTree->aData[nStart] = (char)iHeight;
  1894. sqlite3Fts3PutVarint(&pTree->aData[nStart+1], iLeftChild);
  1895. return nStart;
  1896. }
  1897. /*
  1898. ** Write the buffer for the segment node pTree and all of its peers to the
  1899. ** database. Then call this function recursively to write the parent of
  1900. ** pTree and its peers to the database.
  1901. **
  1902. ** Except, if pTree is a root node, do not write it to the database. Instead,
  1903. ** set output variables *paRoot and *pnRoot to contain the root node.
  1904. **
  1905. ** If successful, SQLITE_OK is returned and output variable *piLast is
  1906. ** set to the largest blockid written to the database (or zero if no
  1907. ** blocks were written to the db). Otherwise, an SQLite error code is
  1908. ** returned.
  1909. */
  1910. static int fts3NodeWrite(
  1911. Fts3Table *p, /* Virtual table handle */
  1912. SegmentNode *pTree, /* SegmentNode handle */
  1913. int iHeight, /* Height of this node in tree */
  1914. sqlite3_int64 iLeaf, /* Block id of first leaf node */
  1915. sqlite3_int64 iFree, /* Block id of next free slot in %_segments */
  1916. sqlite3_int64 *piLast, /* OUT: Block id of last entry written */
  1917. char **paRoot, /* OUT: Data for root node */
  1918. int *pnRoot /* OUT: Size of root node in bytes */
  1919. ){
  1920. int rc = SQLITE_OK;
  1921. if( !pTree->pParent ){
  1922. /* Root node of the tree. */
  1923. int nStart = fts3TreeFinishNode(pTree, iHeight, iLeaf);
  1924. *piLast = iFree-1;
  1925. *pnRoot = pTree->nData - nStart;
  1926. *paRoot = &pTree->aData[nStart];
  1927. }else{
  1928. SegmentNode *pIter;
  1929. sqlite3_int64 iNextFree = iFree;
  1930. sqlite3_int64 iNextLeaf = iLeaf;
  1931. for(pIter=pTree->pLeftmost; pIter && rc==SQLITE_OK; pIter=pIter->pRight){
  1932. int nStart = fts3TreeFinishNode(pIter, iHeight, iNextLeaf);
  1933. int nWrite = pIter->nData - nStart;
  1934. rc = fts3WriteSegment(p, iNextFree, &pIter->aData[nStart], nWrite);
  1935. iNextFree++;
  1936. iNextLeaf += (pIter->nEntry+1);
  1937. }
  1938. if( rc==SQLITE_OK ){
  1939. assert( iNextLeaf==iFree );
  1940. rc = fts3NodeWrite(
  1941. p, pTree->pParent, iHeight+1, iFree, iNextFree, piLast, paRoot, pnRoot
  1942. );
  1943. }
  1944. }
  1945. return rc;
  1946. }
  1947. /*
  1948. ** Free all memory allocations associated with the tree pTree.
  1949. */
  1950. static void fts3NodeFree(SegmentNode *pTree){
  1951. if( pTree ){
  1952. SegmentNode *p = pTree->pLeftmost;
  1953. fts3NodeFree(p->pParent);
  1954. while( p ){
  1955. SegmentNode *pRight = p->pRight;
  1956. if( p->aData!=(char *)&p[1] ){
  1957. sqlite3_free(p->aData);
  1958. }
  1959. assert( pRight==0 || p->zMalloc==0 );
  1960. sqlite3_free(p->zMalloc);
  1961. sqlite3_free(p);
  1962. p = pRight;
  1963. }
  1964. }
  1965. }
  1966. /*
  1967. ** Add a term to the segment being constructed by the SegmentWriter object
  1968. ** *ppWriter. When adding the first term to a segment, *ppWriter should
  1969. ** be passed NULL. This function will allocate a new SegmentWriter object
  1970. ** and return it via the input/output variable *ppWriter in this case.
  1971. **
  1972. ** If successful, SQLITE_OK is returned. Otherwise, an SQLite error code.
  1973. */
  1974. static int fts3SegWriterAdd(
  1975. Fts3Table *p, /* Virtual table handle */
  1976. SegmentWriter **ppWriter, /* IN/OUT: SegmentWriter handle */
  1977. int isCopyTerm, /* True if buffer zTerm must be copied */
  1978. const char *zTerm, /* Pointer to buffer containing term */
  1979. int nTerm, /* Size of term in bytes */
  1980. const char *aDoclist, /* Pointer to buffer containing doclist */
  1981. int nDoclist /* Size of doclist in bytes */
  1982. ){
  1983. int nPrefix; /* Size of term prefix in bytes */
  1984. int nSuffix; /* Size of term suffix in bytes */
  1985. int nReq; /* Number of bytes required on leaf page */
  1986. int nData;
  1987. SegmentWriter *pWriter = *ppWriter;
  1988. if( !pWriter ){
  1989. int rc;
  1990. sqlite3_stmt *pStmt;
  1991. /* Allocate the SegmentWriter structure */
  1992. pWriter = (SegmentWriter *)sqlite3_malloc(sizeof(SegmentWriter));
  1993. if( !pWriter ) return SQLITE_NOMEM;
  1994. memset(pWriter, 0, sizeof(SegmentWriter));
  1995. *ppWriter = pWriter;
  1996. /* Allocate a buffer in which to accumulate data */
  1997. pWriter->aData = (char *)sqlite3_malloc(p->nNodeSize);
  1998. if( !pWriter->aData ) return SQLITE_NOMEM;
  1999. pWriter->nSize = p->nNodeSize;
  2000. /* Find the next free blockid in the %_segments table */
  2001. rc = fts3SqlStmt(p, SQL_NEXT_SEGMENTS_ID, &pStmt, 0);
  2002. if( rc!=SQLITE_OK ) return rc;
  2003. if( SQLITE_ROW==sqlite3_step(pStmt) ){
  2004. pWriter->iFree = sqlite3_column_int64(pStmt, 0);
  2005. pWriter->iFirst = pWriter->iFree;
  2006. }
  2007. rc = sqlite3_reset(pStmt);
  2008. if( rc!=SQLITE_OK ) return rc;
  2009. }
  2010. nData = pWriter->nData;
  2011. nPrefix = fts3PrefixCompress(pWriter->zTerm, pWriter->nTerm, zTerm, nTerm);
  2012. nSuffix = nTerm-nPrefix;
  2013. /* Figure out how many bytes are required by this new entry */
  2014. nReq = sqlite3Fts3VarintLen(nPrefix) + /* varint containing prefix size */
  2015. sqlite3Fts3VarintLen(nSuffix) + /* varint containing suffix size */
  2016. nSuffix + /* Term suffix */
  2017. sqlite3Fts3VarintLen(nDoclist) + /* Size of doclist */
  2018. nDoclist; /* Doclist data */
  2019. if( nData>0 && nData+nReq>p->nNodeSize ){
  2020. int rc;
  2021. /* The current leaf node is full. Write it out to the database. */
  2022. rc = fts3WriteSegment(p, pWriter->iFree++, pWriter->aData, nData);
  2023. if( rc!=SQLITE_OK ) return rc;
  2024. p->nLeafAdd++;
  2025. /* Add the current term to the interior node tree. The term added to
  2026. ** the interior tree must:
  2027. **
  2028. ** a) be greater than the largest term on the leaf node just written
  2029. ** to the database (still available in pWriter->zTerm), and
  2030. **
  2031. ** b) be less than or equal to the term about to be added to the new
  2032. ** leaf node (zTerm/nTerm).
  2033. **
  2034. ** In other words, it must be the prefix of zTerm 1 byte longer than
  2035. ** the common prefix (if any) of zTerm and pWriter->zTerm.
  2036. */
  2037. assert( nPrefix<nTerm );
  2038. rc = fts3NodeAddTerm(p, &pWriter->pTree, isCopyTerm, zTerm, nPrefix+1);
  2039. if( rc!=SQLITE_OK ) return rc;
  2040. nData = 0;
  2041. pWriter->nTerm = 0;
  2042. nPrefix = 0;
  2043. nSuffix = nTerm;
  2044. nReq = 1 + /* varint containing prefix size */
  2045. sqlite3Fts3VarintLen(nTerm) + /* varint containing suffix size */
  2046. nTerm + /* Term suffix */
  2047. sqlite3Fts3VarintLen(nDoclist) + /* Size of doclist */
  2048. nDoclist; /* Doclist data */
  2049. }
  2050. /* If the buffer currently allocated is too small for this entry, realloc
  2051. ** the buffer to make it large enough.
  2052. */
  2053. if( nReq>pWriter->nSize ){
  2054. char *aNew = sqlite3_realloc(pWriter->aData, nReq);
  2055. if( !aNew ) return SQLITE_NOMEM;
  2056. pWriter->aData = aNew;
  2057. pWriter->nSize = nReq;
  2058. }
  2059. assert( nData+nReq<=pWriter->nSize );
  2060. /* Append the prefix-compressed term and doclist to the buffer. */
  2061. nData += sqlite3Fts3PutVarint(&pWriter->aData[nData], nPrefix);
  2062. nData += sqlite3Fts3PutVarint(&pWriter->aData[nData], nSuffix);
  2063. memcpy(&pWriter->aData[nData], &zTerm[nPrefix], nSuffix);
  2064. nData += nSuffix;
  2065. nData += sqlite3Fts3PutVarint(&pWriter->aData[nData], nDoclist);
  2066. memcpy(&pWriter->aData[nData], aDoclist, nDoclist);
  2067. pWriter->nData = nData + nDoclist;
  2068. /* Save the current term so that it can be used to prefix-compress the next.
  2069. ** If the isCopyTerm parameter is true, then the buffer pointed to by
  2070. ** zTerm is transient, so take a copy of the term data. Otherwise, just
  2071. ** store a copy of the pointer.
  2072. */
  2073. if( isCopyTerm ){
  2074. if( nTerm>pWriter->nMalloc ){
  2075. char *zNew = sqlite3_realloc(pWriter->zMalloc, nTerm*2);
  2076. if( !zNew ){
  2077. return SQLITE_NOMEM;
  2078. }
  2079. pWriter->nMalloc = nTerm*2;
  2080. pWriter->zMalloc = zNew;
  2081. pWriter->zTerm = zNew;
  2082. }
  2083. assert( pWriter->zTerm==pWriter->zMalloc );
  2084. memcpy(pWriter->zTerm, zTerm, nTerm);
  2085. }else{
  2086. pWriter->zTerm = (char *)zTerm;
  2087. }
  2088. pWriter->nTerm = nTerm;
  2089. return SQLITE_OK;
  2090. }
  2091. /*
  2092. ** Flush all data associated with the SegmentWriter object pWriter to the
  2093. ** database. This function must be called after all terms have been added
  2094. ** to the segment using fts3SegWriterAdd(). If successful, SQLITE_OK is
  2095. ** returned. Otherwise, an SQLite error code.
  2096. */
  2097. static int fts3SegWriterFlush(
  2098. Fts3Table *p, /* Virtual table handle */
  2099. SegmentWriter *pWriter, /* SegmentWriter to flush to the db */
  2100. sqlite3_int64 iLevel, /* Value for 'level' column of %_segdir */
  2101. int iIdx /* Value for 'idx' column of %_segdir */
  2102. ){
  2103. int rc; /* Return code */
  2104. if( pWriter->pTree ){
  2105. sqlite3_int64 iLast = 0; /* Largest block id written to database */
  2106. sqlite3_int64 iLastLeaf; /* Largest leaf block id written to db */
  2107. char *zRoot = NULL; /* Pointer to buffer containing root node */
  2108. int nRoot = 0; /* Size of buffer zRoot */
  2109. iLastLeaf = pWriter->iFree;
  2110. rc = fts3WriteSegment(p, pWriter->iFree++, pWriter->aData, pWriter->nData);
  2111. if( rc==SQLITE_OK ){
  2112. rc = fts3NodeWrite(p, pWriter->pTree, 1,
  2113. pWriter->iFirst, pWriter->iFree, &iLast, &zRoot, &nRoot);
  2114. }
  2115. if( rc==SQLITE_OK ){
  2116. rc = fts3WriteSegdir(
  2117. p, iLevel, iIdx, pWriter->iFirst, iLastLeaf, iLast, zRoot, nRoot);
  2118. }
  2119. }else{
  2120. /* The entire tree fits on the root node. Write it to the segdir table. */
  2121. rc = fts3WriteSegdir(
  2122. p, iLevel, iIdx, 0, 0, 0, pWriter->aData, pWriter->nData);
  2123. }
  2124. p->nLeafAdd++;
  2125. return rc;
  2126. }
  2127. /*
  2128. ** Release all memory held by the SegmentWriter object passed as the
  2129. ** first argument.
  2130. */
  2131. static void fts3SegWriterFree(SegmentWriter *pWriter){
  2132. if( pWriter ){
  2133. sqlite3_free(pWriter->aData);
  2134. sqlite3_free(pWriter->zMalloc);
  2135. fts3NodeFree(pWriter->pTree);
  2136. sqlite3_free(pWriter);
  2137. }
  2138. }
  2139. /*
  2140. ** The first value in the apVal[] array is assumed to contain an integer.
  2141. ** This function tests if there exist any documents with docid values that
  2142. ** are different from that integer. i.e. if deleting the document with docid
  2143. ** pRowid would mean the FTS3 table were empty.
  2144. **
  2145. ** If successful, *pisEmpty is set to true if the table is empty except for
  2146. ** document pRowid, or false otherwise, and SQLITE_OK is returned. If an
  2147. ** error occurs, an SQLite error code is returned.
  2148. */
  2149. static int fts3IsEmpty(Fts3Table *p, sqlite3_value *pRowid, int *pisEmpty){
  2150. sqlite3_stmt *pStmt;
  2151. int rc;
  2152. if( p->zContentTbl ){
  2153. /* If using the content=xxx option, assume the table is never empty */
  2154. *pisEmpty = 0;
  2155. rc = SQLITE_OK;
  2156. }else{
  2157. rc = fts3SqlStmt(p, SQL_IS_EMPTY, &pStmt, &pRowid);
  2158. if( rc==SQLITE_OK ){
  2159. if( SQLITE_ROW==sqlite3_step(pStmt) ){
  2160. *pisEmpty = sqlite3_column_int(pStmt, 0);
  2161. }
  2162. rc = sqlite3_reset(pStmt);
  2163. }
  2164. }
  2165. return rc;
  2166. }
  2167. /*
  2168. ** Set *pnMax to the largest segment level in the database for the index
  2169. ** iIndex.
  2170. **
  2171. ** Segment levels are stored in the 'level' column of the %_segdir table.
  2172. **
  2173. ** Return SQLITE_OK if successful, or an SQLite error code if not.
  2174. */
  2175. static int fts3SegmentMaxLevel(
  2176. Fts3Table *p,
  2177. int iLangid,
  2178. int iIndex,
  2179. sqlite3_int64 *pnMax
  2180. ){
  2181. sqlite3_stmt *pStmt;
  2182. int rc;
  2183. assert( iIndex>=0 && iIndex<p->nIndex );
  2184. /* Set pStmt to the compiled version of:
  2185. **
  2186. ** SELECT max(level) FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?
  2187. **
  2188. ** (1024 is actually the value of macro FTS3_SEGDIR_PREFIXLEVEL_STR).
  2189. */
  2190. rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR_MAX_LEVEL, &pStmt, 0);
  2191. if( rc!=SQLITE_OK ) return rc;
  2192. sqlite3_bind_int64(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex, 0));
  2193. sqlite3_bind_int64(pStmt, 2,
  2194. getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1)
  2195. );
  2196. if( SQLITE_ROW==sqlite3_step(pStmt) ){
  2197. *pnMax = sqlite3_column_int64(pStmt, 0);
  2198. }
  2199. return sqlite3_reset(pStmt);
  2200. }
  2201. /*
  2202. ** Delete all entries in the %_segments table associated with the segment
  2203. ** opened with seg-reader pSeg. This function does not affect the contents
  2204. ** of the %_segdir table.
  2205. */
  2206. static int fts3DeleteSegment(
  2207. Fts3Table *p, /* FTS table handle */
  2208. Fts3SegReader *pSeg /* Segment to delete */
  2209. ){
  2210. int rc = SQLITE_OK; /* Return code */
  2211. if( pSeg->iStartBlock ){
  2212. sqlite3_stmt *pDelete; /* SQL statement to delete rows */
  2213. rc = fts3SqlStmt(p, SQL_DELETE_SEGMENTS_RANGE, &pDelete, 0);
  2214. if( rc==SQLITE_OK ){
  2215. sqlite3_bind_int64(pDelete, 1, pSeg->iStartBlock);
  2216. sqlite3_bind_int64(pDelete, 2, pSeg->iEndBlock);
  2217. sqlite3_step(pDelete);
  2218. rc = sqlite3_reset(pDelete);
  2219. }
  2220. }
  2221. return rc;
  2222. }
  2223. /*
  2224. ** This function is used after merging multiple segments into a single large
  2225. ** segment to delete the old, now redundant, segment b-trees. Specifically,
  2226. ** it:
  2227. **
  2228. ** 1) Deletes all %_segments entries for the segments associated with
  2229. ** each of the SegReader objects in the array passed as the third
  2230. ** argument, and
  2231. **
  2232. ** 2) deletes all %_segdir entries with level iLevel, or all %_segdir
  2233. ** entries regardless of level if (iLevel<0).
  2234. **
  2235. ** SQLITE_OK is returned if successful, otherwise an SQLite error code.
  2236. */
  2237. static int fts3DeleteSegdir(
  2238. Fts3Table *p, /* Virtual table handle */
  2239. int iLangid, /* Language id */
  2240. int iIndex, /* Index for p->aIndex */
  2241. int iLevel, /* Level of %_segdir entries to delete */
  2242. Fts3SegReader **apSegment, /* Array of SegReader objects */
  2243. int nReader /* Size of array apSegment */
  2244. ){
  2245. int rc = SQLITE_OK; /* Return Code */
  2246. int i; /* Iterator variable */
  2247. sqlite3_stmt *pDelete = 0; /* SQL statement to delete rows */
  2248. for(i=0; rc==SQLITE_OK && i<nReader; i++){
  2249. rc = fts3DeleteSegment(p, apSegment[i]);
  2250. }
  2251. if( rc!=SQLITE_OK ){
  2252. return rc;
  2253. }
  2254. assert( iLevel>=0 || iLevel==FTS3_SEGCURSOR_ALL );
  2255. if( iLevel==FTS3_SEGCURSOR_ALL ){
  2256. rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_RANGE, &pDelete, 0);
  2257. if( rc==SQLITE_OK ){
  2258. sqlite3_bind_int64(pDelete, 1, getAbsoluteLevel(p, iLangid, iIndex, 0));
  2259. sqlite3_bind_int64(pDelete, 2,
  2260. getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1)
  2261. );
  2262. }
  2263. }else{
  2264. rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_LEVEL, &pDelete, 0);
  2265. if( rc==SQLITE_OK ){
  2266. sqlite3_bind_int64(
  2267. pDelete, 1, getAbsoluteLevel(p, iLangid, iIndex, iLevel)
  2268. );
  2269. }
  2270. }
  2271. if( rc==SQLITE_OK ){
  2272. sqlite3_step(pDelete);
  2273. rc = sqlite3_reset(pDelete);
  2274. }
  2275. return rc;
  2276. }
  2277. /*
  2278. ** When this function is called, buffer *ppList (size *pnList bytes) contains
  2279. ** a position list that may (or may not) feature multiple columns. This
  2280. ** function adjusts the pointer *ppList and the length *pnList so that they
  2281. ** identify the subset of the position list that corresponds to column iCol.
  2282. **
  2283. ** If there are no entries in the input position list for column iCol, then
  2284. ** *pnList is set to zero before returning.
  2285. **
  2286. ** If parameter bZero is non-zero, then any part of the input list following
  2287. ** the end of the output list is zeroed before returning.
  2288. */
  2289. static void fts3ColumnFilter(
  2290. int iCol, /* Column to filter on */
  2291. int bZero, /* Zero out anything following *ppList */
  2292. char **ppList, /* IN/OUT: Pointer to position list */
  2293. int *pnList /* IN/OUT: Size of buffer *ppList in bytes */
  2294. ){
  2295. char *pList = *ppList;
  2296. int nList = *pnList;
  2297. char *pEnd = &pList[nList];
  2298. int iCurrent = 0;
  2299. char *p = pList;
  2300. assert( iCol>=0 );
  2301. while( 1 ){
  2302. char c = 0;
  2303. while( p<pEnd && (c | *p)&0xFE ) c = *p++ & 0x80;
  2304. if( iCol==iCurrent ){
  2305. nList = (int)(p - pList);
  2306. break;
  2307. }
  2308. nList -= (int)(p - pList);
  2309. pList = p;
  2310. if( nList==0 ){
  2311. break;
  2312. }
  2313. p = &pList[1];
  2314. p += sqlite3Fts3GetVarint32(p, &iCurrent);
  2315. }
  2316. if( bZero && &pList[nList]!=pEnd ){
  2317. memset(&pList[nList], 0, pEnd - &pList[nList]);
  2318. }
  2319. *ppList = pList;
  2320. *pnList = nList;
  2321. }
  2322. /*
  2323. ** Cache data in the Fts3MultiSegReader.aBuffer[] buffer (overwriting any
  2324. ** existing data). Grow the buffer if required.
  2325. **
  2326. ** If successful, return SQLITE_OK. Otherwise, if an OOM error is encountered
  2327. ** trying to resize the buffer, return SQLITE_NOMEM.
  2328. */
  2329. static int fts3MsrBufferData(
  2330. Fts3MultiSegReader *pMsr, /* Multi-segment-reader handle */
  2331. char *pList,
  2332. int nList
  2333. ){
  2334. if( nList>pMsr->nBuffer ){
  2335. char *pNew;
  2336. pMsr->nBuffer = nList*2;
  2337. pNew = (char *)sqlite3_realloc(pMsr->aBuffer, pMsr->nBuffer);
  2338. if( !pNew ) return SQLITE_NOMEM;
  2339. pMsr->aBuffer = pNew;
  2340. }
  2341. memcpy(pMsr->aBuffer, pList, nList);
  2342. return SQLITE_OK;
  2343. }
  2344. int sqlite3Fts3MsrIncrNext(
  2345. Fts3Table *p, /* Virtual table handle */
  2346. Fts3MultiSegReader *pMsr, /* Multi-segment-reader handle */
  2347. sqlite3_int64 *piDocid, /* OUT: Docid value */
  2348. char **paPoslist, /* OUT: Pointer to position list */
  2349. int *pnPoslist /* OUT: Size of position list in bytes */
  2350. ){
  2351. int nMerge = pMsr->nAdvance;
  2352. Fts3SegReader **apSegment = pMsr->apSegment;
  2353. int (*xCmp)(Fts3SegReader *, Fts3SegReader *) = (
  2354. p->bDescIdx ? fts3SegReaderDoclistCmpRev : fts3SegReaderDoclistCmp
  2355. );
  2356. if( nMerge==0 ){
  2357. *paPoslist = 0;
  2358. return SQLITE_OK;
  2359. }
  2360. while( 1 ){
  2361. Fts3SegReader *pSeg;
  2362. pSeg = pMsr->apSegment[0];
  2363. if( pSeg->pOffsetList==0 ){
  2364. *paPoslist = 0;
  2365. break;
  2366. }else{
  2367. int rc;
  2368. char *pList;
  2369. int nList;
  2370. int j;
  2371. sqlite3_int64 iDocid = apSegment[0]->iDocid;
  2372. rc = fts3SegReaderNextDocid(p, apSegment[0], &pList, &nList);
  2373. j = 1;
  2374. while( rc==SQLITE_OK
  2375. && j<nMerge
  2376. && apSegment[j]->pOffsetList
  2377. && apSegment[j]->iDocid==iDocid
  2378. ){
  2379. rc = fts3SegReaderNextDocid(p, apSegment[j], 0, 0);
  2380. j++;
  2381. }
  2382. if( rc!=SQLITE_OK ) return rc;
  2383. fts3SegReaderSort(pMsr->apSegment, nMerge, j, xCmp);
  2384. if( nList>0 && fts3SegReaderIsPending(apSegment[0]) ){
  2385. rc = fts3MsrBufferData(pMsr, pList, nList+1);
  2386. if( rc!=SQLITE_OK ) return rc;
  2387. assert( (pMsr->aBuffer[nList] & 0xFE)==0x00 );
  2388. pList = pMsr->aBuffer;
  2389. }
  2390. if( pMsr->iColFilter>=0 ){
  2391. fts3ColumnFilter(pMsr->iColFilter, 1, &pList, &nList);
  2392. }
  2393. if( nList>0 ){
  2394. *paPoslist = pList;
  2395. *piDocid = iDocid;
  2396. *pnPoslist = nList;
  2397. break;
  2398. }
  2399. }
  2400. }
  2401. return SQLITE_OK;
  2402. }
  2403. static int fts3SegReaderStart(
  2404. Fts3Table *p, /* Virtual table handle */
  2405. Fts3MultiSegReader *pCsr, /* Cursor object */
  2406. const char *zTerm, /* Term searched for (or NULL) */
  2407. int nTerm /* Length of zTerm in bytes */
  2408. ){
  2409. int i;
  2410. int nSeg = pCsr->nSegment;
  2411. /* If the Fts3SegFilter defines a specific term (or term prefix) to search
  2412. ** for, then advance each segment iterator until it points to a term of
  2413. ** equal or greater value than the specified term. This prevents many
  2414. ** unnecessary merge/sort operations for the case where single segment
  2415. ** b-tree leaf nodes contain more than one term.
  2416. */
  2417. for(i=0; pCsr->bRestart==0 && i<pCsr->nSegment; i++){
  2418. int res = 0;
  2419. Fts3SegReader *pSeg = pCsr->apSegment[i];
  2420. do {
  2421. int rc = fts3SegReaderNext(p, pSeg, 0);
  2422. if( rc!=SQLITE_OK ) return rc;
  2423. }while( zTerm && (res = fts3SegReaderTermCmp(pSeg, zTerm, nTerm))<0 );
  2424. if( pSeg->bLookup && res!=0 ){
  2425. fts3SegReaderSetEof(pSeg);
  2426. }
  2427. }
  2428. fts3SegReaderSort(pCsr->apSegment, nSeg, nSeg, fts3SegReaderCmp);
  2429. return SQLITE_OK;
  2430. }
  2431. int sqlite3Fts3SegReaderStart(
  2432. Fts3Table *p, /* Virtual table handle */
  2433. Fts3MultiSegReader *pCsr, /* Cursor object */
  2434. Fts3SegFilter *pFilter /* Restrictions on range of iteration */
  2435. ){
  2436. pCsr->pFilter = pFilter;
  2437. return fts3SegReaderStart(p, pCsr, pFilter->zTerm, pFilter->nTerm);
  2438. }
  2439. int sqlite3Fts3MsrIncrStart(
  2440. Fts3Table *p, /* Virtual table handle */
  2441. Fts3MultiSegReader *pCsr, /* Cursor object */
  2442. int iCol, /* Column to match on. */
  2443. const char *zTerm, /* Term to iterate through a doclist for */
  2444. int nTerm /* Number of bytes in zTerm */
  2445. ){
  2446. int i;
  2447. int rc;
  2448. int nSegment = pCsr->nSegment;
  2449. int (*xCmp)(Fts3SegReader *, Fts3SegReader *) = (
  2450. p->bDescIdx ? fts3SegReaderDoclistCmpRev : fts3SegReaderDoclistCmp
  2451. );
  2452. assert( pCsr->pFilter==0 );
  2453. assert( zTerm && nTerm>0 );
  2454. /* Advance each segment iterator until it points to the term zTerm/nTerm. */
  2455. rc = fts3SegReaderStart(p, pCsr, zTerm, nTerm);
  2456. if( rc!=SQLITE_OK ) return rc;
  2457. /* Determine how many of the segments actually point to zTerm/nTerm. */
  2458. for(i=0; i<nSegment; i++){
  2459. Fts3SegReader *pSeg = pCsr->apSegment[i];
  2460. if( !pSeg->aNode || fts3SegReaderTermCmp(pSeg, zTerm, nTerm) ){
  2461. break;
  2462. }
  2463. }
  2464. pCsr->nAdvance = i;
  2465. /* Advance each of the segments to point to the first docid. */
  2466. for(i=0; i<pCsr->nAdvance; i++){
  2467. rc = fts3SegReaderFirstDocid(p, pCsr->apSegment[i]);
  2468. if( rc!=SQLITE_OK ) return rc;
  2469. }
  2470. fts3SegReaderSort(pCsr->apSegment, i, i, xCmp);
  2471. assert( iCol<0 || iCol<p->nColumn );
  2472. pCsr->iColFilter = iCol;
  2473. return SQLITE_OK;
  2474. }
  2475. /*
  2476. ** This function is called on a MultiSegReader that has been started using
  2477. ** sqlite3Fts3MsrIncrStart(). One or more calls to MsrIncrNext() may also
  2478. ** have been made. Calling this function puts the MultiSegReader in such
  2479. ** a state that if the next two calls are:
  2480. **
  2481. ** sqlite3Fts3SegReaderStart()
  2482. ** sqlite3Fts3SegReaderStep()
  2483. **
  2484. ** then the entire doclist for the term is available in
  2485. ** MultiSegReader.aDoclist/nDoclist.
  2486. */
  2487. int sqlite3Fts3MsrIncrRestart(Fts3MultiSegReader *pCsr){
  2488. int i; /* Used to iterate through segment-readers */
  2489. assert( pCsr->zTerm==0 );
  2490. assert( pCsr->nTerm==0 );
  2491. assert( pCsr->aDoclist==0 );
  2492. assert( pCsr->nDoclist==0 );
  2493. pCsr->nAdvance = 0;
  2494. pCsr->bRestart = 1;
  2495. for(i=0; i<pCsr->nSegment; i++){
  2496. pCsr->apSegment[i]->pOffsetList = 0;
  2497. pCsr->apSegment[i]->nOffsetList = 0;
  2498. pCsr->apSegment[i]->iDocid = 0;
  2499. }
  2500. return SQLITE_OK;
  2501. }
  2502. int sqlite3Fts3SegReaderStep(
  2503. Fts3Table *p, /* Virtual table handle */
  2504. Fts3MultiSegReader *pCsr /* Cursor object */
  2505. ){
  2506. int rc = SQLITE_OK;
  2507. int isIgnoreEmpty = (pCsr->pFilter->flags & FTS3_SEGMENT_IGNORE_EMPTY);
  2508. int isRequirePos = (pCsr->pFilter->flags & FTS3_SEGMENT_REQUIRE_POS);
  2509. int isColFilter = (pCsr->pFilter->flags & FTS3_SEGMENT_COLUMN_FILTER);
  2510. int isPrefix = (pCsr->pFilter->flags & FTS3_SEGMENT_PREFIX);
  2511. int isScan = (pCsr->pFilter->flags & FTS3_SEGMENT_SCAN);
  2512. int isFirst = (pCsr->pFilter->flags & FTS3_SEGMENT_FIRST);
  2513. Fts3SegReader **apSegment = pCsr->apSegment;
  2514. int nSegment = pCsr->nSegment;
  2515. Fts3SegFilter *pFilter = pCsr->pFilter;
  2516. int (*xCmp)(Fts3SegReader *, Fts3SegReader *) = (
  2517. p->bDescIdx ? fts3SegReaderDoclistCmpRev : fts3SegReaderDoclistCmp
  2518. );
  2519. if( pCsr->nSegment==0 ) return SQLITE_OK;
  2520. do {
  2521. int nMerge;
  2522. int i;
  2523. /* Advance the first pCsr->nAdvance entries in the apSegment[] array
  2524. ** forward. Then sort the list in order of current term again.
  2525. */
  2526. for(i=0; i<pCsr->nAdvance; i++){
  2527. Fts3SegReader *pSeg = apSegment[i];
  2528. if( pSeg->bLookup ){
  2529. fts3SegReaderSetEof(pSeg);
  2530. }else{
  2531. rc = fts3SegReaderNext(p, pSeg, 0);
  2532. }
  2533. if( rc!=SQLITE_OK ) return rc;
  2534. }
  2535. fts3SegReaderSort(apSegment, nSegment, pCsr->nAdvance, fts3SegReaderCmp);
  2536. pCsr->nAdvance = 0;
  2537. /* If all the seg-readers are at EOF, we're finished. return SQLITE_OK. */
  2538. assert( rc==SQLITE_OK );
  2539. if( apSegment[0]->aNode==0 ) break;
  2540. pCsr->nTerm = apSegment[0]->nTerm;
  2541. pCsr->zTerm = apSegment[0]->zTerm;
  2542. /* If this is a prefix-search, and if the term that apSegment[0] points
  2543. ** to does not share a suffix with pFilter->zTerm/nTerm, then all
  2544. ** required callbacks have been made. In this case exit early.
  2545. **
  2546. ** Similarly, if this is a search for an exact match, and the first term
  2547. ** of segment apSegment[0] is not a match, exit early.
  2548. */
  2549. if( pFilter->zTerm && !isScan ){
  2550. if( pCsr->nTerm<pFilter->nTerm
  2551. || (!isPrefix && pCsr->nTerm>pFilter->nTerm)
  2552. || memcmp(pCsr->zTerm, pFilter->zTerm, pFilter->nTerm)
  2553. ){
  2554. break;
  2555. }
  2556. }
  2557. nMerge = 1;
  2558. while( nMerge<nSegment
  2559. && apSegment[nMerge]->aNode
  2560. && apSegment[nMerge]->nTerm==pCsr->nTerm
  2561. && 0==memcmp(pCsr->zTerm, apSegment[nMerge]->zTerm, pCsr->nTerm)
  2562. ){
  2563. nMerge++;
  2564. }
  2565. assert( isIgnoreEmpty || (isRequirePos && !isColFilter) );
  2566. if( nMerge==1
  2567. && !isIgnoreEmpty
  2568. && !isFirst
  2569. && (p->bDescIdx==0 || fts3SegReaderIsPending(apSegment[0])==0)
  2570. ){
  2571. pCsr->nDoclist = apSegment[0]->nDoclist;
  2572. if( fts3SegReaderIsPending(apSegment[0]) ){
  2573. rc = fts3MsrBufferData(pCsr, apSegment[0]->aDoclist, pCsr->nDoclist);
  2574. pCsr->aDoclist = pCsr->aBuffer;
  2575. }else{
  2576. pCsr->aDoclist = apSegment[0]->aDoclist;
  2577. }
  2578. if( rc==SQLITE_OK ) rc = SQLITE_ROW;
  2579. }else{
  2580. int nDoclist = 0; /* Size of doclist */
  2581. sqlite3_int64 iPrev = 0; /* Previous docid stored in doclist */
  2582. /* The current term of the first nMerge entries in the array
  2583. ** of Fts3SegReader objects is the same. The doclists must be merged
  2584. ** and a single term returned with the merged doclist.
  2585. */
  2586. for(i=0; i<nMerge; i++){
  2587. fts3SegReaderFirstDocid(p, apSegment[i]);
  2588. }
  2589. fts3SegReaderSort(apSegment, nMerge, nMerge, xCmp);
  2590. while( apSegment[0]->pOffsetList ){
  2591. int j; /* Number of segments that share a docid */
  2592. char *pList = 0;
  2593. int nList = 0;
  2594. int nByte;
  2595. sqlite3_int64 iDocid = apSegment[0]->iDocid;
  2596. fts3SegReaderNextDocid(p, apSegment[0], &pList, &nList);
  2597. j = 1;
  2598. while( j<nMerge
  2599. && apSegment[j]->pOffsetList
  2600. && apSegment[j]->iDocid==iDocid
  2601. ){
  2602. fts3SegReaderNextDocid(p, apSegment[j], 0, 0);
  2603. j++;
  2604. }
  2605. if( isColFilter ){
  2606. fts3ColumnFilter(pFilter->iCol, 0, &pList, &nList);
  2607. }
  2608. if( !isIgnoreEmpty || nList>0 ){
  2609. /* Calculate the 'docid' delta value to write into the merged
  2610. ** doclist. */
  2611. sqlite3_int64 iDelta;
  2612. if( p->bDescIdx && nDoclist>0 ){
  2613. iDelta = iPrev - iDocid;
  2614. }else{
  2615. iDelta = iDocid - iPrev;
  2616. }
  2617. assert( iDelta>0 || (nDoclist==0 && iDelta==iDocid) );
  2618. assert( nDoclist>0 || iDelta==iDocid );
  2619. nByte = sqlite3Fts3VarintLen(iDelta) + (isRequirePos?nList+1:0);
  2620. if( nDoclist+nByte>pCsr->nBuffer ){
  2621. char *aNew;
  2622. pCsr->nBuffer = (nDoclist+nByte)*2;
  2623. aNew = sqlite3_realloc(pCsr->aBuffer, pCsr->nBuffer);
  2624. if( !aNew ){
  2625. return SQLITE_NOMEM;
  2626. }
  2627. pCsr->aBuffer = aNew;
  2628. }
  2629. if( isFirst ){
  2630. char *a = &pCsr->aBuffer[nDoclist];
  2631. int nWrite;
  2632. nWrite = sqlite3Fts3FirstFilter(iDelta, pList, nList, a);
  2633. if( nWrite ){
  2634. iPrev = iDocid;
  2635. nDoclist += nWrite;
  2636. }
  2637. }else{
  2638. nDoclist += sqlite3Fts3PutVarint(&pCsr->aBuffer[nDoclist], iDelta);
  2639. iPrev = iDocid;
  2640. if( isRequirePos ){
  2641. memcpy(&pCsr->aBuffer[nDoclist], pList, nList);
  2642. nDoclist += nList;
  2643. pCsr->aBuffer[nDoclist++] = '\0';
  2644. }
  2645. }
  2646. }
  2647. fts3SegReaderSort(apSegment, nMerge, j, xCmp);
  2648. }
  2649. if( nDoclist>0 ){
  2650. pCsr->aDoclist = pCsr->aBuffer;
  2651. pCsr->nDoclist = nDoclist;
  2652. rc = SQLITE_ROW;
  2653. }
  2654. }
  2655. pCsr->nAdvance = nMerge;
  2656. }while( rc==SQLITE_OK );
  2657. return rc;
  2658. }
  2659. void sqlite3Fts3SegReaderFinish(
  2660. Fts3MultiSegReader *pCsr /* Cursor object */
  2661. ){
  2662. if( pCsr ){
  2663. int i;
  2664. for(i=0; i<pCsr->nSegment; i++){
  2665. sqlite3Fts3SegReaderFree(pCsr->apSegment[i]);
  2666. }
  2667. sqlite3_free(pCsr->apSegment);
  2668. sqlite3_free(pCsr->aBuffer);
  2669. pCsr->nSegment = 0;
  2670. pCsr->apSegment = 0;
  2671. pCsr->aBuffer = 0;
  2672. }
  2673. }
  2674. /*
  2675. ** Merge all level iLevel segments in the database into a single
  2676. ** iLevel+1 segment. Or, if iLevel<0, merge all segments into a
  2677. ** single segment with a level equal to the numerically largest level
  2678. ** currently present in the database.
  2679. **
  2680. ** If this function is called with iLevel<0, but there is only one
  2681. ** segment in the database, SQLITE_DONE is returned immediately.
  2682. ** Otherwise, if successful, SQLITE_OK is returned. If an error occurs,
  2683. ** an SQLite error code is returned.
  2684. */
  2685. static int fts3SegmentMerge(
  2686. Fts3Table *p,
  2687. int iLangid, /* Language id to merge */
  2688. int iIndex, /* Index in p->aIndex[] to merge */
  2689. int iLevel /* Level to merge */
  2690. ){
  2691. int rc; /* Return code */
  2692. int iIdx = 0; /* Index of new segment */
  2693. sqlite3_int64 iNewLevel = 0; /* Level/index to create new segment at */
  2694. SegmentWriter *pWriter = 0; /* Used to write the new, merged, segment */
  2695. Fts3SegFilter filter; /* Segment term filter condition */
  2696. Fts3MultiSegReader csr; /* Cursor to iterate through level(s) */
  2697. int bIgnoreEmpty = 0; /* True to ignore empty segments */
  2698. assert( iLevel==FTS3_SEGCURSOR_ALL
  2699. || iLevel==FTS3_SEGCURSOR_PENDING
  2700. || iLevel>=0
  2701. );
  2702. assert( iLevel<FTS3_SEGDIR_MAXLEVEL );
  2703. assert( iIndex>=0 && iIndex<p->nIndex );
  2704. rc = sqlite3Fts3SegReaderCursor(p, iLangid, iIndex, iLevel, 0, 0, 1, 0, &csr);
  2705. if( rc!=SQLITE_OK || csr.nSegment==0 ) goto finished;
  2706. if( iLevel==FTS3_SEGCURSOR_ALL ){
  2707. /* This call is to merge all segments in the database to a single
  2708. ** segment. The level of the new segment is equal to the numerically
  2709. ** greatest segment level currently present in the database for this
  2710. ** index. The idx of the new segment is always 0. */
  2711. if( csr.nSegment==1 ){
  2712. rc = SQLITE_DONE;
  2713. goto finished;
  2714. }
  2715. rc = fts3SegmentMaxLevel(p, iLangid, iIndex, &iNewLevel);
  2716. bIgnoreEmpty = 1;
  2717. }else if( iLevel==FTS3_SEGCURSOR_PENDING ){
  2718. iNewLevel = getAbsoluteLevel(p, iLangid, iIndex, 0);
  2719. rc = fts3AllocateSegdirIdx(p, iLangid, iIndex, 0, &iIdx);
  2720. }else{
  2721. /* This call is to merge all segments at level iLevel. find the next
  2722. ** available segment index at level iLevel+1. The call to
  2723. ** fts3AllocateSegdirIdx() will merge the segments at level iLevel+1 to
  2724. ** a single iLevel+2 segment if necessary. */
  2725. rc = fts3AllocateSegdirIdx(p, iLangid, iIndex, iLevel+1, &iIdx);
  2726. iNewLevel = getAbsoluteLevel(p, iLangid, iIndex, iLevel+1);
  2727. }
  2728. if( rc!=SQLITE_OK ) goto finished;
  2729. assert( csr.nSegment>0 );
  2730. assert( iNewLevel>=getAbsoluteLevel(p, iLangid, iIndex, 0) );
  2731. assert( iNewLevel<getAbsoluteLevel(p, iLangid, iIndex,FTS3_SEGDIR_MAXLEVEL) );
  2732. memset(&filter, 0, sizeof(Fts3SegFilter));
  2733. filter.flags = FTS3_SEGMENT_REQUIRE_POS;
  2734. filter.flags |= (bIgnoreEmpty ? FTS3_SEGMENT_IGNORE_EMPTY : 0);
  2735. rc = sqlite3Fts3SegReaderStart(p, &csr, &filter);
  2736. while( SQLITE_OK==rc ){
  2737. rc = sqlite3Fts3SegReaderStep(p, &csr);
  2738. if( rc!=SQLITE_ROW ) break;
  2739. rc = fts3SegWriterAdd(p, &pWriter, 1,
  2740. csr.zTerm, csr.nTerm, csr.aDoclist, csr.nDoclist);
  2741. }
  2742. if( rc!=SQLITE_OK ) goto finished;
  2743. assert( pWriter );
  2744. if( iLevel!=FTS3_SEGCURSOR_PENDING ){
  2745. rc = fts3DeleteSegdir(
  2746. p, iLangid, iIndex, iLevel, csr.apSegment, csr.nSegment
  2747. );
  2748. if( rc!=SQLITE_OK ) goto finished;
  2749. }
  2750. rc = fts3SegWriterFlush(p, pWriter, iNewLevel, iIdx);
  2751. finished:
  2752. fts3SegWriterFree(pWriter);
  2753. sqlite3Fts3SegReaderFinish(&csr);
  2754. return rc;
  2755. }
  2756. /*
  2757. ** Flush the contents of pendingTerms to level 0 segments.
  2758. */
  2759. int sqlite3Fts3PendingTermsFlush(Fts3Table *p){
  2760. int rc = SQLITE_OK;
  2761. int i;
  2762. for(i=0; rc==SQLITE_OK && i<p->nIndex; i++){
  2763. rc = fts3SegmentMerge(p, p->iPrevLangid, i, FTS3_SEGCURSOR_PENDING);
  2764. if( rc==SQLITE_DONE ) rc = SQLITE_OK;
  2765. }
  2766. sqlite3Fts3PendingTermsClear(p);
  2767. /* Determine the auto-incr-merge setting if unknown. If enabled,
  2768. ** estimate the number of leaf blocks of content to be written
  2769. */
  2770. if( rc==SQLITE_OK && p->bHasStat
  2771. && p->bAutoincrmerge==0xff && p->nLeafAdd>0
  2772. ){
  2773. sqlite3_stmt *pStmt = 0;
  2774. rc = fts3SqlStmt(p, SQL_SELECT_STAT, &pStmt, 0);
  2775. if( rc==SQLITE_OK ){
  2776. sqlite3_bind_int(pStmt, 1, FTS_STAT_AUTOINCRMERGE);
  2777. rc = sqlite3_step(pStmt);
  2778. p->bAutoincrmerge = (rc==SQLITE_ROW && sqlite3_column_int(pStmt, 0));
  2779. rc = sqlite3_reset(pStmt);
  2780. }
  2781. }
  2782. return rc;
  2783. }
  2784. /*
  2785. ** Encode N integers as varints into a blob.
  2786. */
  2787. static void fts3EncodeIntArray(
  2788. int N, /* The number of integers to encode */
  2789. u32 *a, /* The integer values */
  2790. char *zBuf, /* Write the BLOB here */
  2791. int *pNBuf /* Write number of bytes if zBuf[] used here */
  2792. ){
  2793. int i, j;
  2794. for(i=j=0; i<N; i++){
  2795. j += sqlite3Fts3PutVarint(&zBuf[j], (sqlite3_int64)a[i]);
  2796. }
  2797. *pNBuf = j;
  2798. }
  2799. /*
  2800. ** Decode a blob of varints into N integers
  2801. */
  2802. static void fts3DecodeIntArray(
  2803. int N, /* The number of integers to decode */
  2804. u32 *a, /* Write the integer values */
  2805. const char *zBuf, /* The BLOB containing the varints */
  2806. int nBuf /* size of the BLOB */
  2807. ){
  2808. int i, j;
  2809. UNUSED_PARAMETER(nBuf);
  2810. for(i=j=0; i<N; i++){
  2811. sqlite3_int64 x;
  2812. j += sqlite3Fts3GetVarint(&zBuf[j], &x);
  2813. assert(j<=nBuf);
  2814. a[i] = (u32)(x & 0xffffffff);
  2815. }
  2816. }
  2817. /*
  2818. ** Insert the sizes (in tokens) for each column of the document
  2819. ** with docid equal to p->iPrevDocid. The sizes are encoded as
  2820. ** a blob of varints.
  2821. */
  2822. static void fts3InsertDocsize(
  2823. int *pRC, /* Result code */
  2824. Fts3Table *p, /* Table into which to insert */
  2825. u32 *aSz /* Sizes of each column, in tokens */
  2826. ){
  2827. char *pBlob; /* The BLOB encoding of the document size */
  2828. int nBlob; /* Number of bytes in the BLOB */
  2829. sqlite3_stmt *pStmt; /* Statement used to insert the encoding */
  2830. int rc; /* Result code from subfunctions */
  2831. if( *pRC ) return;
  2832. pBlob = sqlite3_malloc( 10*p->nColumn );
  2833. if( pBlob==0 ){
  2834. *pRC = SQLITE_NOMEM;
  2835. return;
  2836. }
  2837. fts3EncodeIntArray(p->nColumn, aSz, pBlob, &nBlob);
  2838. rc = fts3SqlStmt(p, SQL_REPLACE_DOCSIZE, &pStmt, 0);
  2839. if( rc ){
  2840. sqlite3_free(pBlob);
  2841. *pRC = rc;
  2842. return;
  2843. }
  2844. sqlite3_bind_int64(pStmt, 1, p->iPrevDocid);
  2845. sqlite3_bind_blob(pStmt, 2, pBlob, nBlob, sqlite3_free);
  2846. sqlite3_step(pStmt);
  2847. *pRC = sqlite3_reset(pStmt);
  2848. }
  2849. /*
  2850. ** Record 0 of the %_stat table contains a blob consisting of N varints,
  2851. ** where N is the number of user defined columns in the fts3 table plus
  2852. ** two. If nCol is the number of user defined columns, then values of the
  2853. ** varints are set as follows:
  2854. **
  2855. ** Varint 0: Total number of rows in the table.
  2856. **
  2857. ** Varint 1..nCol: For each column, the total number of tokens stored in
  2858. ** the column for all rows of the table.
  2859. **
  2860. ** Varint 1+nCol: The total size, in bytes, of all text values in all
  2861. ** columns of all rows of the table.
  2862. **
  2863. */
  2864. static void fts3UpdateDocTotals(
  2865. int *pRC, /* The result code */
  2866. Fts3Table *p, /* Table being updated */
  2867. u32 *aSzIns, /* Size increases */
  2868. u32 *aSzDel, /* Size decreases */
  2869. int nChng /* Change in the number of documents */
  2870. ){
  2871. char *pBlob; /* Storage for BLOB written into %_stat */
  2872. int nBlob; /* Size of BLOB written into %_stat */
  2873. u32 *a; /* Array of integers that becomes the BLOB */
  2874. sqlite3_stmt *pStmt; /* Statement for reading and writing */
  2875. int i; /* Loop counter */
  2876. int rc; /* Result code from subfunctions */
  2877. const int nStat = p->nColumn+2;
  2878. if( *pRC ) return;
  2879. a = sqlite3_malloc( (sizeof(u32)+10)*nStat );
  2880. if( a==0 ){
  2881. *pRC = SQLITE_NOMEM;
  2882. return;
  2883. }
  2884. pBlob = (char*)&a[nStat];
  2885. rc = fts3SqlStmt(p, SQL_SELECT_STAT, &pStmt, 0);
  2886. if( rc ){
  2887. sqlite3_free(a);
  2888. *pRC = rc;
  2889. return;
  2890. }
  2891. sqlite3_bind_int(pStmt, 1, FTS_STAT_DOCTOTAL);
  2892. if( sqlite3_step(pStmt)==SQLITE_ROW ){
  2893. fts3DecodeIntArray(nStat, a,
  2894. sqlite3_column_blob(pStmt, 0),
  2895. sqlite3_column_bytes(pStmt, 0));
  2896. }else{
  2897. memset(a, 0, sizeof(u32)*(nStat) );
  2898. }
  2899. rc = sqlite3_reset(pStmt);
  2900. if( rc!=SQLITE_OK ){
  2901. sqlite3_free(a);
  2902. *pRC = rc;
  2903. return;
  2904. }
  2905. if( nChng<0 && a[0]<(u32)(-nChng) ){
  2906. a[0] = 0;
  2907. }else{
  2908. a[0] += nChng;
  2909. }
  2910. for(i=0; i<p->nColumn+1; i++){
  2911. u32 x = a[i+1];
  2912. if( x+aSzIns[i] < aSzDel[i] ){
  2913. x = 0;
  2914. }else{
  2915. x = x + aSzIns[i] - aSzDel[i];
  2916. }
  2917. a[i+1] = x;
  2918. }
  2919. fts3EncodeIntArray(nStat, a, pBlob, &nBlob);
  2920. rc = fts3SqlStmt(p, SQL_REPLACE_STAT, &pStmt, 0);
  2921. if( rc ){
  2922. sqlite3_free(a);
  2923. *pRC = rc;
  2924. return;
  2925. }
  2926. sqlite3_bind_int(pStmt, 1, FTS_STAT_DOCTOTAL);
  2927. sqlite3_bind_blob(pStmt, 2, pBlob, nBlob, SQLITE_STATIC);
  2928. sqlite3_step(pStmt);
  2929. *pRC = sqlite3_reset(pStmt);
  2930. sqlite3_free(a);
  2931. }
  2932. /*
  2933. ** Merge the entire database so that there is one segment for each
  2934. ** iIndex/iLangid combination.
  2935. */
  2936. static int fts3DoOptimize(Fts3Table *p, int bReturnDone){
  2937. int bSeenDone = 0;
  2938. int rc;
  2939. sqlite3_stmt *pAllLangid = 0;
  2940. rc = fts3SqlStmt(p, SQL_SELECT_ALL_LANGID, &pAllLangid, 0);
  2941. if( rc==SQLITE_OK ){
  2942. int rc2;
  2943. sqlite3_bind_int(pAllLangid, 1, p->nIndex);
  2944. while( sqlite3_step(pAllLangid)==SQLITE_ROW ){
  2945. int i;
  2946. int iLangid = sqlite3_column_int(pAllLangid, 0);
  2947. for(i=0; rc==SQLITE_OK && i<p->nIndex; i++){
  2948. rc = fts3SegmentMerge(p, iLangid, i, FTS3_SEGCURSOR_ALL);
  2949. if( rc==SQLITE_DONE ){
  2950. bSeenDone = 1;
  2951. rc = SQLITE_OK;
  2952. }
  2953. }
  2954. }
  2955. rc2 = sqlite3_reset(pAllLangid);
  2956. if( rc==SQLITE_OK ) rc = rc2;
  2957. }
  2958. sqlite3Fts3SegmentsClose(p);
  2959. sqlite3Fts3PendingTermsClear(p);
  2960. return (rc==SQLITE_OK && bReturnDone && bSeenDone) ? SQLITE_DONE : rc;
  2961. }
  2962. /*
  2963. ** This function is called when the user executes the following statement:
  2964. **
  2965. ** INSERT INTO <tbl>(<tbl>) VALUES('rebuild');
  2966. **
  2967. ** The entire FTS index is discarded and rebuilt. If the table is one
  2968. ** created using the content=xxx option, then the new index is based on
  2969. ** the current contents of the xxx table. Otherwise, it is rebuilt based
  2970. ** on the contents of the %_content table.
  2971. */
  2972. static int fts3DoRebuild(Fts3Table *p){
  2973. int rc; /* Return Code */
  2974. rc = fts3DeleteAll(p, 0);
  2975. if( rc==SQLITE_OK ){
  2976. u32 *aSz = 0;
  2977. u32 *aSzIns = 0;
  2978. u32 *aSzDel = 0;
  2979. sqlite3_stmt *pStmt = 0;
  2980. int nEntry = 0;
  2981. /* Compose and prepare an SQL statement to loop through the content table */
  2982. char *zSql = sqlite3_mprintf("SELECT %s" , p->zReadExprlist);
  2983. if( !zSql ){
  2984. rc = SQLITE_NOMEM;
  2985. }else{
  2986. rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0);
  2987. sqlite3_free(zSql);
  2988. }
  2989. if( rc==SQLITE_OK ){
  2990. int nByte = sizeof(u32) * (p->nColumn+1)*3;
  2991. aSz = (u32 *)sqlite3_malloc(nByte);
  2992. if( aSz==0 ){
  2993. rc = SQLITE_NOMEM;
  2994. }else{
  2995. memset(aSz, 0, nByte);
  2996. aSzIns = &aSz[p->nColumn+1];
  2997. aSzDel = &aSzIns[p->nColumn+1];
  2998. }
  2999. }
  3000. while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){
  3001. int iCol;
  3002. int iLangid = langidFromSelect(p, pStmt);
  3003. rc = fts3PendingTermsDocid(p, iLangid, sqlite3_column_int64(pStmt, 0));
  3004. memset(aSz, 0, sizeof(aSz[0]) * (p->nColumn+1));
  3005. for(iCol=0; rc==SQLITE_OK && iCol<p->nColumn; iCol++){
  3006. if( p->abNotindexed[iCol]==0 ){
  3007. const char *z = (const char *) sqlite3_column_text(pStmt, iCol+1);
  3008. rc = fts3PendingTermsAdd(p, iLangid, z, iCol, &aSz[iCol]);
  3009. aSz[p->nColumn] += sqlite3_column_bytes(pStmt, iCol+1);
  3010. }
  3011. }
  3012. if( p->bHasDocsize ){
  3013. fts3InsertDocsize(&rc, p, aSz);
  3014. }
  3015. if( rc!=SQLITE_OK ){
  3016. sqlite3_finalize(pStmt);
  3017. pStmt = 0;
  3018. }else{
  3019. nEntry++;
  3020. for(iCol=0; iCol<=p->nColumn; iCol++){
  3021. aSzIns[iCol] += aSz[iCol];
  3022. }
  3023. }
  3024. }
  3025. if( p->bFts4 ){
  3026. fts3UpdateDocTotals(&rc, p, aSzIns, aSzDel, nEntry);
  3027. }
  3028. sqlite3_free(aSz);
  3029. if( pStmt ){
  3030. int rc2 = sqlite3_finalize(pStmt);
  3031. if( rc==SQLITE_OK ){
  3032. rc = rc2;
  3033. }
  3034. }
  3035. }
  3036. return rc;
  3037. }
  3038. /*
  3039. ** This function opens a cursor used to read the input data for an
  3040. ** incremental merge operation. Specifically, it opens a cursor to scan
  3041. ** the oldest nSeg segments (idx=0 through idx=(nSeg-1)) in absolute
  3042. ** level iAbsLevel.
  3043. */
  3044. static int fts3IncrmergeCsr(
  3045. Fts3Table *p, /* FTS3 table handle */
  3046. sqlite3_int64 iAbsLevel, /* Absolute level to open */
  3047. int nSeg, /* Number of segments to merge */
  3048. Fts3MultiSegReader *pCsr /* Cursor object to populate */
  3049. ){
  3050. int rc; /* Return Code */
  3051. sqlite3_stmt *pStmt = 0; /* Statement used to read %_segdir entry */
  3052. int nByte; /* Bytes allocated at pCsr->apSegment[] */
  3053. /* Allocate space for the Fts3MultiSegReader.aCsr[] array */
  3054. memset(pCsr, 0, sizeof(*pCsr));
  3055. nByte = sizeof(Fts3SegReader *) * nSeg;
  3056. pCsr->apSegment = (Fts3SegReader **)sqlite3_malloc(nByte);
  3057. if( pCsr->apSegment==0 ){
  3058. rc = SQLITE_NOMEM;
  3059. }else{
  3060. memset(pCsr->apSegment, 0, nByte);
  3061. rc = fts3SqlStmt(p, SQL_SELECT_LEVEL, &pStmt, 0);
  3062. }
  3063. if( rc==SQLITE_OK ){
  3064. int i;
  3065. int rc2;
  3066. sqlite3_bind_int64(pStmt, 1, iAbsLevel);
  3067. assert( pCsr->nSegment==0 );
  3068. for(i=0; rc==SQLITE_OK && sqlite3_step(pStmt)==SQLITE_ROW && i<nSeg; i++){
  3069. rc = sqlite3Fts3SegReaderNew(i, 0,
  3070. sqlite3_column_int64(pStmt, 1), /* segdir.start_block */
  3071. sqlite3_column_int64(pStmt, 2), /* segdir.leaves_end_block */
  3072. sqlite3_column_int64(pStmt, 3), /* segdir.end_block */
  3073. sqlite3_column_blob(pStmt, 4), /* segdir.root */
  3074. sqlite3_column_bytes(pStmt, 4), /* segdir.root */
  3075. &pCsr->apSegment[i]
  3076. );
  3077. pCsr->nSegment++;
  3078. }
  3079. rc2 = sqlite3_reset(pStmt);
  3080. if( rc==SQLITE_OK ) rc = rc2;
  3081. }
  3082. return rc;
  3083. }
  3084. typedef struct IncrmergeWriter IncrmergeWriter;
  3085. typedef struct NodeWriter NodeWriter;
  3086. typedef struct Blob Blob;
  3087. typedef struct NodeReader NodeReader;
  3088. /*
  3089. ** An instance of the following structure is used as a dynamic buffer
  3090. ** to build up nodes or other blobs of data in.
  3091. **
  3092. ** The function blobGrowBuffer() is used to extend the allocation.
  3093. */
  3094. struct Blob {
  3095. char *a; /* Pointer to allocation */
  3096. int n; /* Number of valid bytes of data in a[] */
  3097. int nAlloc; /* Allocated size of a[] (nAlloc>=n) */
  3098. };
  3099. /*
  3100. ** This structure is used to build up buffers containing segment b-tree
  3101. ** nodes (blocks).
  3102. */
  3103. struct NodeWriter {
  3104. sqlite3_int64 iBlock; /* Current block id */
  3105. Blob key; /* Last key written to the current block */
  3106. Blob block; /* Current block image */
  3107. };
  3108. /*
  3109. ** An object of this type contains the state required to create or append
  3110. ** to an appendable b-tree segment.
  3111. */
  3112. struct IncrmergeWriter {
  3113. int nLeafEst; /* Space allocated for leaf blocks */
  3114. int nWork; /* Number of leaf pages flushed */
  3115. sqlite3_int64 iAbsLevel; /* Absolute level of input segments */
  3116. int iIdx; /* Index of *output* segment in iAbsLevel+1 */
  3117. sqlite3_int64 iStart; /* Block number of first allocated block */
  3118. sqlite3_int64 iEnd; /* Block number of last allocated block */
  3119. NodeWriter aNodeWriter[FTS_MAX_APPENDABLE_HEIGHT];
  3120. };
  3121. /*
  3122. ** An object of the following type is used to read data from a single
  3123. ** FTS segment node. See the following functions:
  3124. **
  3125. ** nodeReaderInit()
  3126. ** nodeReaderNext()
  3127. ** nodeReaderRelease()
  3128. */
  3129. struct NodeReader {
  3130. const char *aNode;
  3131. int nNode;
  3132. int iOff; /* Current offset within aNode[] */
  3133. /* Output variables. Containing the current node entry. */
  3134. sqlite3_int64 iChild; /* Pointer to child node */
  3135. Blob term; /* Current term */
  3136. const char *aDoclist; /* Pointer to doclist */
  3137. int nDoclist; /* Size of doclist in bytes */
  3138. };
  3139. /*
  3140. ** If *pRc is not SQLITE_OK when this function is called, it is a no-op.
  3141. ** Otherwise, if the allocation at pBlob->a is not already at least nMin
  3142. ** bytes in size, extend (realloc) it to be so.
  3143. **
  3144. ** If an OOM error occurs, set *pRc to SQLITE_NOMEM and leave pBlob->a
  3145. ** unmodified. Otherwise, if the allocation succeeds, update pBlob->nAlloc
  3146. ** to reflect the new size of the pBlob->a[] buffer.
  3147. */
  3148. static void blobGrowBuffer(Blob *pBlob, int nMin, int *pRc){
  3149. if( *pRc==SQLITE_OK && nMin>pBlob->nAlloc ){
  3150. int nAlloc = nMin;
  3151. char *a = (char *)sqlite3_realloc(pBlob->a, nAlloc);
  3152. if( a ){
  3153. pBlob->nAlloc = nAlloc;
  3154. pBlob->a = a;
  3155. }else{
  3156. *pRc = SQLITE_NOMEM;
  3157. }
  3158. }
  3159. }
  3160. /*
  3161. ** Attempt to advance the node-reader object passed as the first argument to
  3162. ** the next entry on the node.
  3163. **
  3164. ** Return an error code if an error occurs (SQLITE_NOMEM is possible).
  3165. ** Otherwise return SQLITE_OK. If there is no next entry on the node
  3166. ** (e.g. because the current entry is the last) set NodeReader->aNode to
  3167. ** NULL to indicate EOF. Otherwise, populate the NodeReader structure output
  3168. ** variables for the new entry.
  3169. */
  3170. static int nodeReaderNext(NodeReader *p){
  3171. int bFirst = (p->term.n==0); /* True for first term on the node */
  3172. int nPrefix = 0; /* Bytes to copy from previous term */
  3173. int nSuffix = 0; /* Bytes to append to the prefix */
  3174. int rc = SQLITE_OK; /* Return code */
  3175. assert( p->aNode );
  3176. if( p->iChild && bFirst==0 ) p->iChild++;
  3177. if( p->iOff>=p->nNode ){
  3178. /* EOF */
  3179. p->aNode = 0;
  3180. }else{
  3181. if( bFirst==0 ){
  3182. p->iOff += sqlite3Fts3GetVarint32(&p->aNode[p->iOff], &nPrefix);
  3183. }
  3184. p->iOff += sqlite3Fts3GetVarint32(&p->aNode[p->iOff], &nSuffix);
  3185. blobGrowBuffer(&p->term, nPrefix+nSuffix, &rc);
  3186. if( rc==SQLITE_OK ){
  3187. memcpy(&p->term.a[nPrefix], &p->aNode[p->iOff], nSuffix);
  3188. p->term.n = nPrefix+nSuffix;
  3189. p->iOff += nSuffix;
  3190. if( p->iChild==0 ){
  3191. p->iOff += sqlite3Fts3GetVarint32(&p->aNode[p->iOff], &p->nDoclist);
  3192. p->aDoclist = &p->aNode[p->iOff];
  3193. p->iOff += p->nDoclist;
  3194. }
  3195. }
  3196. }
  3197. assert( p->iOff<=p->nNode );
  3198. return rc;
  3199. }
  3200. /*
  3201. ** Release all dynamic resources held by node-reader object *p.
  3202. */
  3203. static void nodeReaderRelease(NodeReader *p){
  3204. sqlite3_free(p->term.a);
  3205. }
  3206. /*
  3207. ** Initialize a node-reader object to read the node in buffer aNode/nNode.
  3208. **
  3209. ** If successful, SQLITE_OK is returned and the NodeReader object set to
  3210. ** point to the first entry on the node (if any). Otherwise, an SQLite
  3211. ** error code is returned.
  3212. */
  3213. static int nodeReaderInit(NodeReader *p, const char *aNode, int nNode){
  3214. memset(p, 0, sizeof(NodeReader));
  3215. p->aNode = aNode;
  3216. p->nNode = nNode;
  3217. /* Figure out if this is a leaf or an internal node. */
  3218. if( p->aNode[0] ){
  3219. /* An internal node. */
  3220. p->iOff = 1 + sqlite3Fts3GetVarint(&p->aNode[1], &p->iChild);
  3221. }else{
  3222. p->iOff = 1;
  3223. }
  3224. return nodeReaderNext(p);
  3225. }
  3226. /*
  3227. ** This function is called while writing an FTS segment each time a leaf o
  3228. ** node is finished and written to disk. The key (zTerm/nTerm) is guaranteed
  3229. ** to be greater than the largest key on the node just written, but smaller
  3230. ** than or equal to the first key that will be written to the next leaf
  3231. ** node.
  3232. **
  3233. ** The block id of the leaf node just written to disk may be found in
  3234. ** (pWriter->aNodeWriter[0].iBlock) when this function is called.
  3235. */
  3236. static int fts3IncrmergePush(
  3237. Fts3Table *p, /* Fts3 table handle */
  3238. IncrmergeWriter *pWriter, /* Writer object */
  3239. const char *zTerm, /* Term to write to internal node */
  3240. int nTerm /* Bytes at zTerm */
  3241. ){
  3242. sqlite3_int64 iPtr = pWriter->aNodeWriter[0].iBlock;
  3243. int iLayer;
  3244. assert( nTerm>0 );
  3245. for(iLayer=1; ALWAYS(iLayer<FTS_MAX_APPENDABLE_HEIGHT); iLayer++){
  3246. sqlite3_int64 iNextPtr = 0;
  3247. NodeWriter *pNode = &pWriter->aNodeWriter[iLayer];
  3248. int rc = SQLITE_OK;
  3249. int nPrefix;
  3250. int nSuffix;
  3251. int nSpace;
  3252. /* Figure out how much space the key will consume if it is written to
  3253. ** the current node of layer iLayer. Due to the prefix compression,
  3254. ** the space required changes depending on which node the key is to
  3255. ** be added to. */
  3256. nPrefix = fts3PrefixCompress(pNode->key.a, pNode->key.n, zTerm, nTerm);
  3257. nSuffix = nTerm - nPrefix;
  3258. nSpace = sqlite3Fts3VarintLen(nPrefix);
  3259. nSpace += sqlite3Fts3VarintLen(nSuffix) + nSuffix;
  3260. if( pNode->key.n==0 || (pNode->block.n + nSpace)<=p->nNodeSize ){
  3261. /* If the current node of layer iLayer contains zero keys, or if adding
  3262. ** the key to it will not cause it to grow to larger than nNodeSize
  3263. ** bytes in size, write the key here. */
  3264. Blob *pBlk = &pNode->block;
  3265. if( pBlk->n==0 ){
  3266. blobGrowBuffer(pBlk, p->nNodeSize, &rc);
  3267. if( rc==SQLITE_OK ){
  3268. pBlk->a[0] = (char)iLayer;
  3269. pBlk->n = 1 + sqlite3Fts3PutVarint(&pBlk->a[1], iPtr);
  3270. }
  3271. }
  3272. blobGrowBuffer(pBlk, pBlk->n + nSpace, &rc);
  3273. blobGrowBuffer(&pNode->key, nTerm, &rc);
  3274. if( rc==SQLITE_OK ){
  3275. if( pNode->key.n ){
  3276. pBlk->n += sqlite3Fts3PutVarint(&pBlk->a[pBlk->n], nPrefix);
  3277. }
  3278. pBlk->n += sqlite3Fts3PutVarint(&pBlk->a[pBlk->n], nSuffix);
  3279. memcpy(&pBlk->a[pBlk->n], &zTerm[nPrefix], nSuffix);
  3280. pBlk->n += nSuffix;
  3281. memcpy(pNode->key.a, zTerm, nTerm);
  3282. pNode->key.n = nTerm;
  3283. }
  3284. }else{
  3285. /* Otherwise, flush the current node of layer iLayer to disk.
  3286. ** Then allocate a new, empty sibling node. The key will be written
  3287. ** into the parent of this node. */
  3288. rc = fts3WriteSegment(p, pNode->iBlock, pNode->block.a, pNode->block.n);
  3289. assert( pNode->block.nAlloc>=p->nNodeSize );
  3290. pNode->block.a[0] = (char)iLayer;
  3291. pNode->block.n = 1 + sqlite3Fts3PutVarint(&pNode->block.a[1], iPtr+1);
  3292. iNextPtr = pNode->iBlock;
  3293. pNode->iBlock++;
  3294. pNode->key.n = 0;
  3295. }
  3296. if( rc!=SQLITE_OK || iNextPtr==0 ) return rc;
  3297. iPtr = iNextPtr;
  3298. }
  3299. assert( 0 );
  3300. return 0;
  3301. }
  3302. /*
  3303. ** Append a term and (optionally) doclist to the FTS segment node currently
  3304. ** stored in blob *pNode. The node need not contain any terms, but the
  3305. ** header must be written before this function is called.
  3306. **
  3307. ** A node header is a single 0x00 byte for a leaf node, or a height varint
  3308. ** followed by the left-hand-child varint for an internal node.
  3309. **
  3310. ** The term to be appended is passed via arguments zTerm/nTerm. For a
  3311. ** leaf node, the doclist is passed as aDoclist/nDoclist. For an internal
  3312. ** node, both aDoclist and nDoclist must be passed 0.
  3313. **
  3314. ** If the size of the value in blob pPrev is zero, then this is the first
  3315. ** term written to the node. Otherwise, pPrev contains a copy of the
  3316. ** previous term. Before this function returns, it is updated to contain a
  3317. ** copy of zTerm/nTerm.
  3318. **
  3319. ** It is assumed that the buffer associated with pNode is already large
  3320. ** enough to accommodate the new entry. The buffer associated with pPrev
  3321. ** is extended by this function if requrired.
  3322. **
  3323. ** If an error (i.e. OOM condition) occurs, an SQLite error code is
  3324. ** returned. Otherwise, SQLITE_OK.
  3325. */
  3326. static int fts3AppendToNode(
  3327. Blob *pNode, /* Current node image to append to */
  3328. Blob *pPrev, /* Buffer containing previous term written */
  3329. const char *zTerm, /* New term to write */
  3330. int nTerm, /* Size of zTerm in bytes */
  3331. const char *aDoclist, /* Doclist (or NULL) to write */
  3332. int nDoclist /* Size of aDoclist in bytes */
  3333. ){
  3334. int rc = SQLITE_OK; /* Return code */
  3335. int bFirst = (pPrev->n==0); /* True if this is the first term written */
  3336. int nPrefix; /* Size of term prefix in bytes */
  3337. int nSuffix; /* Size of term suffix in bytes */
  3338. /* Node must have already been started. There must be a doclist for a
  3339. ** leaf node, and there must not be a doclist for an internal node. */
  3340. assert( pNode->n>0 );
  3341. assert( (pNode->a[0]=='\0')==(aDoclist!=0) );
  3342. blobGrowBuffer(pPrev, nTerm, &rc);
  3343. if( rc!=SQLITE_OK ) return rc;
  3344. nPrefix = fts3PrefixCompress(pPrev->a, pPrev->n, zTerm, nTerm);
  3345. nSuffix = nTerm - nPrefix;
  3346. memcpy(pPrev->a, zTerm, nTerm);
  3347. pPrev->n = nTerm;
  3348. if( bFirst==0 ){
  3349. pNode->n += sqlite3Fts3PutVarint(&pNode->a[pNode->n], nPrefix);
  3350. }
  3351. pNode->n += sqlite3Fts3PutVarint(&pNode->a[pNode->n], nSuffix);
  3352. memcpy(&pNode->a[pNode->n], &zTerm[nPrefix], nSuffix);
  3353. pNode->n += nSuffix;
  3354. if( aDoclist ){
  3355. pNode->n += sqlite3Fts3PutVarint(&pNode->a[pNode->n], nDoclist);
  3356. memcpy(&pNode->a[pNode->n], aDoclist, nDoclist);
  3357. pNode->n += nDoclist;
  3358. }
  3359. assert( pNode->n<=pNode->nAlloc );
  3360. return SQLITE_OK;
  3361. }
  3362. /*
  3363. ** Append the current term and doclist pointed to by cursor pCsr to the
  3364. ** appendable b-tree segment opened for writing by pWriter.
  3365. **
  3366. ** Return SQLITE_OK if successful, or an SQLite error code otherwise.
  3367. */
  3368. static int fts3IncrmergeAppend(
  3369. Fts3Table *p, /* Fts3 table handle */
  3370. IncrmergeWriter *pWriter, /* Writer object */
  3371. Fts3MultiSegReader *pCsr /* Cursor containing term and doclist */
  3372. ){
  3373. const char *zTerm = pCsr->zTerm;
  3374. int nTerm = pCsr->nTerm;
  3375. const char *aDoclist = pCsr->aDoclist;
  3376. int nDoclist = pCsr->nDoclist;
  3377. int rc = SQLITE_OK; /* Return code */
  3378. int nSpace; /* Total space in bytes required on leaf */
  3379. int nPrefix; /* Size of prefix shared with previous term */
  3380. int nSuffix; /* Size of suffix (nTerm - nPrefix) */
  3381. NodeWriter *pLeaf; /* Object used to write leaf nodes */
  3382. pLeaf = &pWriter->aNodeWriter[0];
  3383. nPrefix = fts3PrefixCompress(pLeaf->key.a, pLeaf->key.n, zTerm, nTerm);
  3384. nSuffix = nTerm - nPrefix;
  3385. nSpace = sqlite3Fts3VarintLen(nPrefix);
  3386. nSpace += sqlite3Fts3VarintLen(nSuffix) + nSuffix;
  3387. nSpace += sqlite3Fts3VarintLen(nDoclist) + nDoclist;
  3388. /* If the current block is not empty, and if adding this term/doclist
  3389. ** to the current block would make it larger than Fts3Table.nNodeSize
  3390. ** bytes, write this block out to the database. */
  3391. if( pLeaf->block.n>0 && (pLeaf->block.n + nSpace)>p->nNodeSize ){
  3392. rc = fts3WriteSegment(p, pLeaf->iBlock, pLeaf->block.a, pLeaf->block.n);
  3393. pWriter->nWork++;
  3394. /* Add the current term to the parent node. The term added to the
  3395. ** parent must:
  3396. **
  3397. ** a) be greater than the largest term on the leaf node just written
  3398. ** to the database (still available in pLeaf->key), and
  3399. **
  3400. ** b) be less than or equal to the term about to be added to the new
  3401. ** leaf node (zTerm/nTerm).
  3402. **
  3403. ** In other words, it must be the prefix of zTerm 1 byte longer than
  3404. ** the common prefix (if any) of zTerm and pWriter->zTerm.
  3405. */
  3406. if( rc==SQLITE_OK ){
  3407. rc = fts3IncrmergePush(p, pWriter, zTerm, nPrefix+1);
  3408. }
  3409. /* Advance to the next output block */
  3410. pLeaf->iBlock++;
  3411. pLeaf->key.n = 0;
  3412. pLeaf->block.n = 0;
  3413. nSuffix = nTerm;
  3414. nSpace = 1;
  3415. nSpace += sqlite3Fts3VarintLen(nSuffix) + nSuffix;
  3416. nSpace += sqlite3Fts3VarintLen(nDoclist) + nDoclist;
  3417. }
  3418. blobGrowBuffer(&pLeaf->block, pLeaf->block.n + nSpace, &rc);
  3419. if( rc==SQLITE_OK ){
  3420. if( pLeaf->block.n==0 ){
  3421. pLeaf->block.n = 1;
  3422. pLeaf->block.a[0] = '\0';
  3423. }
  3424. rc = fts3AppendToNode(
  3425. &pLeaf->block, &pLeaf->key, zTerm, nTerm, aDoclist, nDoclist
  3426. );
  3427. }
  3428. return rc;
  3429. }
  3430. /*
  3431. ** This function is called to release all dynamic resources held by the
  3432. ** merge-writer object pWriter, and if no error has occurred, to flush
  3433. ** all outstanding node buffers held by pWriter to disk.
  3434. **
  3435. ** If *pRc is not SQLITE_OK when this function is called, then no attempt
  3436. ** is made to write any data to disk. Instead, this function serves only
  3437. ** to release outstanding resources.
  3438. **
  3439. ** Otherwise, if *pRc is initially SQLITE_OK and an error occurs while
  3440. ** flushing buffers to disk, *pRc is set to an SQLite error code before
  3441. ** returning.
  3442. */
  3443. static void fts3IncrmergeRelease(
  3444. Fts3Table *p, /* FTS3 table handle */
  3445. IncrmergeWriter *pWriter, /* Merge-writer object */
  3446. int *pRc /* IN/OUT: Error code */
  3447. ){
  3448. int i; /* Used to iterate through non-root layers */
  3449. int iRoot; /* Index of root in pWriter->aNodeWriter */
  3450. NodeWriter *pRoot; /* NodeWriter for root node */
  3451. int rc = *pRc; /* Error code */
  3452. /* Set iRoot to the index in pWriter->aNodeWriter[] of the output segment
  3453. ** root node. If the segment fits entirely on a single leaf node, iRoot
  3454. ** will be set to 0. If the root node is the parent of the leaves, iRoot
  3455. ** will be 1. And so on. */
  3456. for(iRoot=FTS_MAX_APPENDABLE_HEIGHT-1; iRoot>=0; iRoot--){
  3457. NodeWriter *pNode = &pWriter->aNodeWriter[iRoot];
  3458. if( pNode->block.n>0 ) break;
  3459. assert( *pRc || pNode->block.nAlloc==0 );
  3460. assert( *pRc || pNode->key.nAlloc==0 );
  3461. sqlite3_free(pNode->block.a);
  3462. sqlite3_free(pNode->key.a);
  3463. }
  3464. /* Empty output segment. This is a no-op. */
  3465. if( iRoot<0 ) return;
  3466. /* The entire output segment fits on a single node. Normally, this means
  3467. ** the node would be stored as a blob in the "root" column of the %_segdir
  3468. ** table. However, this is not permitted in this case. The problem is that
  3469. ** space has already been reserved in the %_segments table, and so the
  3470. ** start_block and end_block fields of the %_segdir table must be populated.
  3471. ** And, by design or by accident, released versions of FTS cannot handle
  3472. ** segments that fit entirely on the root node with start_block!=0.
  3473. **
  3474. ** Instead, create a synthetic root node that contains nothing but a
  3475. ** pointer to the single content node. So that the segment consists of a
  3476. ** single leaf and a single interior (root) node.
  3477. **
  3478. ** Todo: Better might be to defer allocating space in the %_segments
  3479. ** table until we are sure it is needed.
  3480. */
  3481. if( iRoot==0 ){
  3482. Blob *pBlock = &pWriter->aNodeWriter[1].block;
  3483. blobGrowBuffer(pBlock, 1 + FTS3_VARINT_MAX, &rc);
  3484. if( rc==SQLITE_OK ){
  3485. pBlock->a[0] = 0x01;
  3486. pBlock->n = 1 + sqlite3Fts3PutVarint(
  3487. &pBlock->a[1], pWriter->aNodeWriter[0].iBlock
  3488. );
  3489. }
  3490. iRoot = 1;
  3491. }
  3492. pRoot = &pWriter->aNodeWriter[iRoot];
  3493. /* Flush all currently outstanding nodes to disk. */
  3494. for(i=0; i<iRoot; i++){
  3495. NodeWriter *pNode = &pWriter->aNodeWriter[i];
  3496. if( pNode->block.n>0 && rc==SQLITE_OK ){
  3497. rc = fts3WriteSegment(p, pNode->iBlock, pNode->block.a, pNode->block.n);
  3498. }
  3499. sqlite3_free(pNode->block.a);
  3500. sqlite3_free(pNode->key.a);
  3501. }
  3502. /* Write the %_segdir record. */
  3503. if( rc==SQLITE_OK ){
  3504. rc = fts3WriteSegdir(p,
  3505. pWriter->iAbsLevel+1, /* level */
  3506. pWriter->iIdx, /* idx */
  3507. pWriter->iStart, /* start_block */
  3508. pWriter->aNodeWriter[0].iBlock, /* leaves_end_block */
  3509. pWriter->iEnd, /* end_block */
  3510. pRoot->block.a, pRoot->block.n /* root */
  3511. );
  3512. }
  3513. sqlite3_free(pRoot->block.a);
  3514. sqlite3_free(pRoot->key.a);
  3515. *pRc = rc;
  3516. }
  3517. /*
  3518. ** Compare the term in buffer zLhs (size in bytes nLhs) with that in
  3519. ** zRhs (size in bytes nRhs) using memcmp. If one term is a prefix of
  3520. ** the other, it is considered to be smaller than the other.
  3521. **
  3522. ** Return -ve if zLhs is smaller than zRhs, 0 if it is equal, or +ve
  3523. ** if it is greater.
  3524. */
  3525. static int fts3TermCmp(
  3526. const char *zLhs, int nLhs, /* LHS of comparison */
  3527. const char *zRhs, int nRhs /* RHS of comparison */
  3528. ){
  3529. int nCmp = MIN(nLhs, nRhs);
  3530. int res;
  3531. res = memcmp(zLhs, zRhs, nCmp);
  3532. if( res==0 ) res = nLhs - nRhs;
  3533. return res;
  3534. }
  3535. /*
  3536. ** Query to see if the entry in the %_segments table with blockid iEnd is
  3537. ** NULL. If no error occurs and the entry is NULL, set *pbRes 1 before
  3538. ** returning. Otherwise, set *pbRes to 0.
  3539. **
  3540. ** Or, if an error occurs while querying the database, return an SQLite
  3541. ** error code. The final value of *pbRes is undefined in this case.
  3542. **
  3543. ** This is used to test if a segment is an "appendable" segment. If it
  3544. ** is, then a NULL entry has been inserted into the %_segments table
  3545. ** with blockid %_segdir.end_block.
  3546. */
  3547. static int fts3IsAppendable(Fts3Table *p, sqlite3_int64 iEnd, int *pbRes){
  3548. int bRes = 0; /* Result to set *pbRes to */
  3549. sqlite3_stmt *pCheck = 0; /* Statement to query database with */
  3550. int rc; /* Return code */
  3551. rc = fts3SqlStmt(p, SQL_SEGMENT_IS_APPENDABLE, &pCheck, 0);
  3552. if( rc==SQLITE_OK ){
  3553. sqlite3_bind_int64(pCheck, 1, iEnd);
  3554. if( SQLITE_ROW==sqlite3_step(pCheck) ) bRes = 1;
  3555. rc = sqlite3_reset(pCheck);
  3556. }
  3557. *pbRes = bRes;
  3558. return rc;
  3559. }
  3560. /*
  3561. ** This function is called when initializing an incremental-merge operation.
  3562. ** It checks if the existing segment with index value iIdx at absolute level
  3563. ** (iAbsLevel+1) can be appended to by the incremental merge. If it can, the
  3564. ** merge-writer object *pWriter is initialized to write to it.
  3565. **
  3566. ** An existing segment can be appended to by an incremental merge if:
  3567. **
  3568. ** * It was initially created as an appendable segment (with all required
  3569. ** space pre-allocated), and
  3570. **
  3571. ** * The first key read from the input (arguments zKey and nKey) is
  3572. ** greater than the largest key currently stored in the potential
  3573. ** output segment.
  3574. */
  3575. static int fts3IncrmergeLoad(
  3576. Fts3Table *p, /* Fts3 table handle */
  3577. sqlite3_int64 iAbsLevel, /* Absolute level of input segments */
  3578. int iIdx, /* Index of candidate output segment */
  3579. const char *zKey, /* First key to write */
  3580. int nKey, /* Number of bytes in nKey */
  3581. IncrmergeWriter *pWriter /* Populate this object */
  3582. ){
  3583. int rc; /* Return code */
  3584. sqlite3_stmt *pSelect = 0; /* SELECT to read %_segdir entry */
  3585. rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR, &pSelect, 0);
  3586. if( rc==SQLITE_OK ){
  3587. sqlite3_int64 iStart = 0; /* Value of %_segdir.start_block */
  3588. sqlite3_int64 iLeafEnd = 0; /* Value of %_segdir.leaves_end_block */
  3589. sqlite3_int64 iEnd = 0; /* Value of %_segdir.end_block */
  3590. const char *aRoot = 0; /* Pointer to %_segdir.root buffer */
  3591. int nRoot = 0; /* Size of aRoot[] in bytes */
  3592. int rc2; /* Return code from sqlite3_reset() */
  3593. int bAppendable = 0; /* Set to true if segment is appendable */
  3594. /* Read the %_segdir entry for index iIdx absolute level (iAbsLevel+1) */
  3595. sqlite3_bind_int64(pSelect, 1, iAbsLevel+1);
  3596. sqlite3_bind_int(pSelect, 2, iIdx);
  3597. if( sqlite3_step(pSelect)==SQLITE_ROW ){
  3598. iStart = sqlite3_column_int64(pSelect, 1);
  3599. iLeafEnd = sqlite3_column_int64(pSelect, 2);
  3600. iEnd = sqlite3_column_int64(pSelect, 3);
  3601. nRoot = sqlite3_column_bytes(pSelect, 4);
  3602. aRoot = sqlite3_column_blob(pSelect, 4);
  3603. }else{
  3604. return sqlite3_reset(pSelect);
  3605. }
  3606. /* Check for the zero-length marker in the %_segments table */
  3607. rc = fts3IsAppendable(p, iEnd, &bAppendable);
  3608. /* Check that zKey/nKey is larger than the largest key the candidate */
  3609. if( rc==SQLITE_OK && bAppendable ){
  3610. char *aLeaf = 0;
  3611. int nLeaf = 0;
  3612. rc = sqlite3Fts3ReadBlock(p, iLeafEnd, &aLeaf, &nLeaf, 0);
  3613. if( rc==SQLITE_OK ){
  3614. NodeReader reader;
  3615. for(rc = nodeReaderInit(&reader, aLeaf, nLeaf);
  3616. rc==SQLITE_OK && reader.aNode;
  3617. rc = nodeReaderNext(&reader)
  3618. ){
  3619. assert( reader.aNode );
  3620. }
  3621. if( fts3TermCmp(zKey, nKey, reader.term.a, reader.term.n)<=0 ){
  3622. bAppendable = 0;
  3623. }
  3624. nodeReaderRelease(&reader);
  3625. }
  3626. sqlite3_free(aLeaf);
  3627. }
  3628. if( rc==SQLITE_OK && bAppendable ){
  3629. /* It is possible to append to this segment. Set up the IncrmergeWriter
  3630. ** object to do so. */
  3631. int i;
  3632. int nHeight = (int)aRoot[0];
  3633. NodeWriter *pNode;
  3634. pWriter->nLeafEst = (int)((iEnd - iStart) + 1)/FTS_MAX_APPENDABLE_HEIGHT;
  3635. pWriter->iStart = iStart;
  3636. pWriter->iEnd = iEnd;
  3637. pWriter->iAbsLevel = iAbsLevel;
  3638. pWriter->iIdx = iIdx;
  3639. for(i=nHeight+1; i<FTS_MAX_APPENDABLE_HEIGHT; i++){
  3640. pWriter->aNodeWriter[i].iBlock = pWriter->iStart + i*pWriter->nLeafEst;
  3641. }
  3642. pNode = &pWriter->aNodeWriter[nHeight];
  3643. pNode->iBlock = pWriter->iStart + pWriter->nLeafEst*nHeight;
  3644. blobGrowBuffer(&pNode->block, MAX(nRoot, p->nNodeSize), &rc);
  3645. if( rc==SQLITE_OK ){
  3646. memcpy(pNode->block.a, aRoot, nRoot);
  3647. pNode->block.n = nRoot;
  3648. }
  3649. for(i=nHeight; i>=0 && rc==SQLITE_OK; i--){
  3650. NodeReader reader;
  3651. pNode = &pWriter->aNodeWriter[i];
  3652. rc = nodeReaderInit(&reader, pNode->block.a, pNode->block.n);
  3653. while( reader.aNode && rc==SQLITE_OK ) rc = nodeReaderNext(&reader);
  3654. blobGrowBuffer(&pNode->key, reader.term.n, &rc);
  3655. if( rc==SQLITE_OK ){
  3656. memcpy(pNode->key.a, reader.term.a, reader.term.n);
  3657. pNode->key.n = reader.term.n;
  3658. if( i>0 ){
  3659. char *aBlock = 0;
  3660. int nBlock = 0;
  3661. pNode = &pWriter->aNodeWriter[i-1];
  3662. pNode->iBlock = reader.iChild;
  3663. rc = sqlite3Fts3ReadBlock(p, reader.iChild, &aBlock, &nBlock, 0);
  3664. blobGrowBuffer(&pNode->block, MAX(nBlock, p->nNodeSize), &rc);
  3665. if( rc==SQLITE_OK ){
  3666. memcpy(pNode->block.a, aBlock, nBlock);
  3667. pNode->block.n = nBlock;
  3668. }
  3669. sqlite3_free(aBlock);
  3670. }
  3671. }
  3672. nodeReaderRelease(&reader);
  3673. }
  3674. }
  3675. rc2 = sqlite3_reset(pSelect);
  3676. if( rc==SQLITE_OK ) rc = rc2;
  3677. }
  3678. return rc;
  3679. }
  3680. /*
  3681. ** Determine the largest segment index value that exists within absolute
  3682. ** level iAbsLevel+1. If no error occurs, set *piIdx to this value plus
  3683. ** one before returning SQLITE_OK. Or, if there are no segments at all
  3684. ** within level iAbsLevel, set *piIdx to zero.
  3685. **
  3686. ** If an error occurs, return an SQLite error code. The final value of
  3687. ** *piIdx is undefined in this case.
  3688. */
  3689. static int fts3IncrmergeOutputIdx(
  3690. Fts3Table *p, /* FTS Table handle */
  3691. sqlite3_int64 iAbsLevel, /* Absolute index of input segments */
  3692. int *piIdx /* OUT: Next free index at iAbsLevel+1 */
  3693. ){
  3694. int rc;
  3695. sqlite3_stmt *pOutputIdx = 0; /* SQL used to find output index */
  3696. rc = fts3SqlStmt(p, SQL_NEXT_SEGMENT_INDEX, &pOutputIdx, 0);
  3697. if( rc==SQLITE_OK ){
  3698. sqlite3_bind_int64(pOutputIdx, 1, iAbsLevel+1);
  3699. sqlite3_step(pOutputIdx);
  3700. *piIdx = sqlite3_column_int(pOutputIdx, 0);
  3701. rc = sqlite3_reset(pOutputIdx);
  3702. }
  3703. return rc;
  3704. }
  3705. /*
  3706. ** Allocate an appendable output segment on absolute level iAbsLevel+1
  3707. ** with idx value iIdx.
  3708. **
  3709. ** In the %_segdir table, a segment is defined by the values in three
  3710. ** columns:
  3711. **
  3712. ** start_block
  3713. ** leaves_end_block
  3714. ** end_block
  3715. **
  3716. ** When an appendable segment is allocated, it is estimated that the
  3717. ** maximum number of leaf blocks that may be required is the sum of the
  3718. ** number of leaf blocks consumed by the input segments, plus the number
  3719. ** of input segments, multiplied by two. This value is stored in stack
  3720. ** variable nLeafEst.
  3721. **
  3722. ** A total of 16*nLeafEst blocks are allocated when an appendable segment
  3723. ** is created ((1 + end_block - start_block)==16*nLeafEst). The contiguous
  3724. ** array of leaf nodes starts at the first block allocated. The array
  3725. ** of interior nodes that are parents of the leaf nodes start at block
  3726. ** (start_block + (1 + end_block - start_block) / 16). And so on.
  3727. **
  3728. ** In the actual code below, the value "16" is replaced with the
  3729. ** pre-processor macro FTS_MAX_APPENDABLE_HEIGHT.
  3730. */
  3731. static int fts3IncrmergeWriter(
  3732. Fts3Table *p, /* Fts3 table handle */
  3733. sqlite3_int64 iAbsLevel, /* Absolute level of input segments */
  3734. int iIdx, /* Index of new output segment */
  3735. Fts3MultiSegReader *pCsr, /* Cursor that data will be read from */
  3736. IncrmergeWriter *pWriter /* Populate this object */
  3737. ){
  3738. int rc; /* Return Code */
  3739. int i; /* Iterator variable */
  3740. int nLeafEst = 0; /* Blocks allocated for leaf nodes */
  3741. sqlite3_stmt *pLeafEst = 0; /* SQL used to determine nLeafEst */
  3742. sqlite3_stmt *pFirstBlock = 0; /* SQL used to determine first block */
  3743. /* Calculate nLeafEst. */
  3744. rc = fts3SqlStmt(p, SQL_MAX_LEAF_NODE_ESTIMATE, &pLeafEst, 0);
  3745. if( rc==SQLITE_OK ){
  3746. sqlite3_bind_int64(pLeafEst, 1, iAbsLevel);
  3747. sqlite3_bind_int64(pLeafEst, 2, pCsr->nSegment);
  3748. if( SQLITE_ROW==sqlite3_step(pLeafEst) ){
  3749. nLeafEst = sqlite3_column_int(pLeafEst, 0);
  3750. }
  3751. rc = sqlite3_reset(pLeafEst);
  3752. }
  3753. if( rc!=SQLITE_OK ) return rc;
  3754. /* Calculate the first block to use in the output segment */
  3755. rc = fts3SqlStmt(p, SQL_NEXT_SEGMENTS_ID, &pFirstBlock, 0);
  3756. if( rc==SQLITE_OK ){
  3757. if( SQLITE_ROW==sqlite3_step(pFirstBlock) ){
  3758. pWriter->iStart = sqlite3_column_int64(pFirstBlock, 0);
  3759. pWriter->iEnd = pWriter->iStart - 1;
  3760. pWriter->iEnd += nLeafEst * FTS_MAX_APPENDABLE_HEIGHT;
  3761. }
  3762. rc = sqlite3_reset(pFirstBlock);
  3763. }
  3764. if( rc!=SQLITE_OK ) return rc;
  3765. /* Insert the marker in the %_segments table to make sure nobody tries
  3766. ** to steal the space just allocated. This is also used to identify
  3767. ** appendable segments. */
  3768. rc = fts3WriteSegment(p, pWriter->iEnd, 0, 0);
  3769. if( rc!=SQLITE_OK ) return rc;
  3770. pWriter->iAbsLevel = iAbsLevel;
  3771. pWriter->nLeafEst = nLeafEst;
  3772. pWriter->iIdx = iIdx;
  3773. /* Set up the array of NodeWriter objects */
  3774. for(i=0; i<FTS_MAX_APPENDABLE_HEIGHT; i++){
  3775. pWriter->aNodeWriter[i].iBlock = pWriter->iStart + i*pWriter->nLeafEst;
  3776. }
  3777. return SQLITE_OK;
  3778. }
  3779. /*
  3780. ** Remove an entry from the %_segdir table. This involves running the
  3781. ** following two statements:
  3782. **
  3783. ** DELETE FROM %_segdir WHERE level = :iAbsLevel AND idx = :iIdx
  3784. ** UPDATE %_segdir SET idx = idx - 1 WHERE level = :iAbsLevel AND idx > :iIdx
  3785. **
  3786. ** The DELETE statement removes the specific %_segdir level. The UPDATE
  3787. ** statement ensures that the remaining segments have contiguously allocated
  3788. ** idx values.
  3789. */
  3790. static int fts3RemoveSegdirEntry(
  3791. Fts3Table *p, /* FTS3 table handle */
  3792. sqlite3_int64 iAbsLevel, /* Absolute level to delete from */
  3793. int iIdx /* Index of %_segdir entry to delete */
  3794. ){
  3795. int rc; /* Return code */
  3796. sqlite3_stmt *pDelete = 0; /* DELETE statement */
  3797. rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_ENTRY, &pDelete, 0);
  3798. if( rc==SQLITE_OK ){
  3799. sqlite3_bind_int64(pDelete, 1, iAbsLevel);
  3800. sqlite3_bind_int(pDelete, 2, iIdx);
  3801. sqlite3_step(pDelete);
  3802. rc = sqlite3_reset(pDelete);
  3803. }
  3804. return rc;
  3805. }
  3806. /*
  3807. ** One or more segments have just been removed from absolute level iAbsLevel.
  3808. ** Update the 'idx' values of the remaining segments in the level so that
  3809. ** the idx values are a contiguous sequence starting from 0.
  3810. */
  3811. static int fts3RepackSegdirLevel(
  3812. Fts3Table *p, /* FTS3 table handle */
  3813. sqlite3_int64 iAbsLevel /* Absolute level to repack */
  3814. ){
  3815. int rc; /* Return code */
  3816. int *aIdx = 0; /* Array of remaining idx values */
  3817. int nIdx = 0; /* Valid entries in aIdx[] */
  3818. int nAlloc = 0; /* Allocated size of aIdx[] */
  3819. int i; /* Iterator variable */
  3820. sqlite3_stmt *pSelect = 0; /* Select statement to read idx values */
  3821. sqlite3_stmt *pUpdate = 0; /* Update statement to modify idx values */
  3822. rc = fts3SqlStmt(p, SQL_SELECT_INDEXES, &pSelect, 0);
  3823. if( rc==SQLITE_OK ){
  3824. int rc2;
  3825. sqlite3_bind_int64(pSelect, 1, iAbsLevel);
  3826. while( SQLITE_ROW==sqlite3_step(pSelect) ){
  3827. if( nIdx>=nAlloc ){
  3828. int *aNew;
  3829. nAlloc += 16;
  3830. aNew = sqlite3_realloc(aIdx, nAlloc*sizeof(int));
  3831. if( !aNew ){
  3832. rc = SQLITE_NOMEM;
  3833. break;
  3834. }
  3835. aIdx = aNew;
  3836. }
  3837. aIdx[nIdx++] = sqlite3_column_int(pSelect, 0);
  3838. }
  3839. rc2 = sqlite3_reset(pSelect);
  3840. if( rc==SQLITE_OK ) rc = rc2;
  3841. }
  3842. if( rc==SQLITE_OK ){
  3843. rc = fts3SqlStmt(p, SQL_SHIFT_SEGDIR_ENTRY, &pUpdate, 0);
  3844. }
  3845. if( rc==SQLITE_OK ){
  3846. sqlite3_bind_int64(pUpdate, 2, iAbsLevel);
  3847. }
  3848. assert( p->bIgnoreSavepoint==0 );
  3849. p->bIgnoreSavepoint = 1;
  3850. for(i=0; rc==SQLITE_OK && i<nIdx; i++){
  3851. if( aIdx[i]!=i ){
  3852. sqlite3_bind_int(pUpdate, 3, aIdx[i]);
  3853. sqlite3_bind_int(pUpdate, 1, i);
  3854. sqlite3_step(pUpdate);
  3855. rc = sqlite3_reset(pUpdate);
  3856. }
  3857. }
  3858. p->bIgnoreSavepoint = 0;
  3859. sqlite3_free(aIdx);
  3860. return rc;
  3861. }
  3862. static void fts3StartNode(Blob *pNode, int iHeight, sqlite3_int64 iChild){
  3863. pNode->a[0] = (char)iHeight;
  3864. if( iChild ){
  3865. assert( pNode->nAlloc>=1+sqlite3Fts3VarintLen(iChild) );
  3866. pNode->n = 1 + sqlite3Fts3PutVarint(&pNode->a[1], iChild);
  3867. }else{
  3868. assert( pNode->nAlloc>=1 );
  3869. pNode->n = 1;
  3870. }
  3871. }
  3872. /*
  3873. ** The first two arguments are a pointer to and the size of a segment b-tree
  3874. ** node. The node may be a leaf or an internal node.
  3875. **
  3876. ** This function creates a new node image in blob object *pNew by copying
  3877. ** all terms that are greater than or equal to zTerm/nTerm (for leaf nodes)
  3878. ** or greater than zTerm/nTerm (for internal nodes) from aNode/nNode.
  3879. */
  3880. static int fts3TruncateNode(
  3881. const char *aNode, /* Current node image */
  3882. int nNode, /* Size of aNode in bytes */
  3883. Blob *pNew, /* OUT: Write new node image here */
  3884. const char *zTerm, /* Omit all terms smaller than this */
  3885. int nTerm, /* Size of zTerm in bytes */
  3886. sqlite3_int64 *piBlock /* OUT: Block number in next layer down */
  3887. ){
  3888. NodeReader reader; /* Reader object */
  3889. Blob prev = {0, 0, 0}; /* Previous term written to new node */
  3890. int rc = SQLITE_OK; /* Return code */
  3891. int bLeaf = aNode[0]=='\0'; /* True for a leaf node */
  3892. /* Allocate required output space */
  3893. blobGrowBuffer(pNew, nNode, &rc);
  3894. if( rc!=SQLITE_OK ) return rc;
  3895. pNew->n = 0;
  3896. /* Populate new node buffer */
  3897. for(rc = nodeReaderInit(&reader, aNode, nNode);
  3898. rc==SQLITE_OK && reader.aNode;
  3899. rc = nodeReaderNext(&reader)
  3900. ){
  3901. if( pNew->n==0 ){
  3902. int res = fts3TermCmp(reader.term.a, reader.term.n, zTerm, nTerm);
  3903. if( res<0 || (bLeaf==0 && res==0) ) continue;
  3904. fts3StartNode(pNew, (int)aNode[0], reader.iChild);
  3905. *piBlock = reader.iChild;
  3906. }
  3907. rc = fts3AppendToNode(
  3908. pNew, &prev, reader.term.a, reader.term.n,
  3909. reader.aDoclist, reader.nDoclist
  3910. );
  3911. if( rc!=SQLITE_OK ) break;
  3912. }
  3913. if( pNew->n==0 ){
  3914. fts3StartNode(pNew, (int)aNode[0], reader.iChild);
  3915. *piBlock = reader.iChild;
  3916. }
  3917. assert( pNew->n<=pNew->nAlloc );
  3918. nodeReaderRelease(&reader);
  3919. sqlite3_free(prev.a);
  3920. return rc;
  3921. }
  3922. /*
  3923. ** Remove all terms smaller than zTerm/nTerm from segment iIdx in absolute
  3924. ** level iAbsLevel. This may involve deleting entries from the %_segments
  3925. ** table, and modifying existing entries in both the %_segments and %_segdir
  3926. ** tables.
  3927. **
  3928. ** SQLITE_OK is returned if the segment is updated successfully. Or an
  3929. ** SQLite error code otherwise.
  3930. */
  3931. static int fts3TruncateSegment(
  3932. Fts3Table *p, /* FTS3 table handle */
  3933. sqlite3_int64 iAbsLevel, /* Absolute level of segment to modify */
  3934. int iIdx, /* Index within level of segment to modify */
  3935. const char *zTerm, /* Remove terms smaller than this */
  3936. int nTerm /* Number of bytes in buffer zTerm */
  3937. ){
  3938. int rc = SQLITE_OK; /* Return code */
  3939. Blob root = {0,0,0}; /* New root page image */
  3940. Blob block = {0,0,0}; /* Buffer used for any other block */
  3941. sqlite3_int64 iBlock = 0; /* Block id */
  3942. sqlite3_int64 iNewStart = 0; /* New value for iStartBlock */
  3943. sqlite3_int64 iOldStart = 0; /* Old value for iStartBlock */
  3944. sqlite3_stmt *pFetch = 0; /* Statement used to fetch segdir */
  3945. rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR, &pFetch, 0);
  3946. if( rc==SQLITE_OK ){
  3947. int rc2; /* sqlite3_reset() return code */
  3948. sqlite3_bind_int64(pFetch, 1, iAbsLevel);
  3949. sqlite3_bind_int(pFetch, 2, iIdx);
  3950. if( SQLITE_ROW==sqlite3_step(pFetch) ){
  3951. const char *aRoot = sqlite3_column_blob(pFetch, 4);
  3952. int nRoot = sqlite3_column_bytes(pFetch, 4);
  3953. iOldStart = sqlite3_column_int64(pFetch, 1);
  3954. rc = fts3TruncateNode(aRoot, nRoot, &root, zTerm, nTerm, &iBlock);
  3955. }
  3956. rc2 = sqlite3_reset(pFetch);
  3957. if( rc==SQLITE_OK ) rc = rc2;
  3958. }
  3959. while( rc==SQLITE_OK && iBlock ){
  3960. char *aBlock = 0;
  3961. int nBlock = 0;
  3962. iNewStart = iBlock;
  3963. rc = sqlite3Fts3ReadBlock(p, iBlock, &aBlock, &nBlock, 0);
  3964. if( rc==SQLITE_OK ){
  3965. rc = fts3TruncateNode(aBlock, nBlock, &block, zTerm, nTerm, &iBlock);
  3966. }
  3967. if( rc==SQLITE_OK ){
  3968. rc = fts3WriteSegment(p, iNewStart, block.a, block.n);
  3969. }
  3970. sqlite3_free(aBlock);
  3971. }
  3972. /* Variable iNewStart now contains the first valid leaf node. */
  3973. if( rc==SQLITE_OK && iNewStart ){
  3974. sqlite3_stmt *pDel = 0;
  3975. rc = fts3SqlStmt(p, SQL_DELETE_SEGMENTS_RANGE, &pDel, 0);
  3976. if( rc==SQLITE_OK ){
  3977. sqlite3_bind_int64(pDel, 1, iOldStart);
  3978. sqlite3_bind_int64(pDel, 2, iNewStart-1);
  3979. sqlite3_step(pDel);
  3980. rc = sqlite3_reset(pDel);
  3981. }
  3982. }
  3983. if( rc==SQLITE_OK ){
  3984. sqlite3_stmt *pChomp = 0;
  3985. rc = fts3SqlStmt(p, SQL_CHOMP_SEGDIR, &pChomp, 0);
  3986. if( rc==SQLITE_OK ){
  3987. sqlite3_bind_int64(pChomp, 1, iNewStart);
  3988. sqlite3_bind_blob(pChomp, 2, root.a, root.n, SQLITE_STATIC);
  3989. sqlite3_bind_int64(pChomp, 3, iAbsLevel);
  3990. sqlite3_bind_int(pChomp, 4, iIdx);
  3991. sqlite3_step(pChomp);
  3992. rc = sqlite3_reset(pChomp);
  3993. }
  3994. }
  3995. sqlite3_free(root.a);
  3996. sqlite3_free(block.a);
  3997. return rc;
  3998. }
  3999. /*
  4000. ** This function is called after an incrmental-merge operation has run to
  4001. ** merge (or partially merge) two or more segments from absolute level
  4002. ** iAbsLevel.
  4003. **
  4004. ** Each input segment is either removed from the db completely (if all of
  4005. ** its data was copied to the output segment by the incrmerge operation)
  4006. ** or modified in place so that it no longer contains those entries that
  4007. ** have been duplicated in the output segment.
  4008. */
  4009. static int fts3IncrmergeChomp(
  4010. Fts3Table *p, /* FTS table handle */
  4011. sqlite3_int64 iAbsLevel, /* Absolute level containing segments */
  4012. Fts3MultiSegReader *pCsr, /* Chomp all segments opened by this cursor */
  4013. int *pnRem /* Number of segments not deleted */
  4014. ){
  4015. int i;
  4016. int nRem = 0;
  4017. int rc = SQLITE_OK;
  4018. for(i=pCsr->nSegment-1; i>=0 && rc==SQLITE_OK; i--){
  4019. Fts3SegReader *pSeg = 0;
  4020. int j;
  4021. /* Find the Fts3SegReader object with Fts3SegReader.iIdx==i. It is hiding
  4022. ** somewhere in the pCsr->apSegment[] array. */
  4023. for(j=0; ALWAYS(j<pCsr->nSegment); j++){
  4024. pSeg = pCsr->apSegment[j];
  4025. if( pSeg->iIdx==i ) break;
  4026. }
  4027. assert( j<pCsr->nSegment && pSeg->iIdx==i );
  4028. if( pSeg->aNode==0 ){
  4029. /* Seg-reader is at EOF. Remove the entire input segment. */
  4030. rc = fts3DeleteSegment(p, pSeg);
  4031. if( rc==SQLITE_OK ){
  4032. rc = fts3RemoveSegdirEntry(p, iAbsLevel, pSeg->iIdx);
  4033. }
  4034. *pnRem = 0;
  4035. }else{
  4036. /* The incremental merge did not copy all the data from this
  4037. ** segment to the upper level. The segment is modified in place
  4038. ** so that it contains no keys smaller than zTerm/nTerm. */
  4039. const char *zTerm = pSeg->zTerm;
  4040. int nTerm = pSeg->nTerm;
  4041. rc = fts3TruncateSegment(p, iAbsLevel, pSeg->iIdx, zTerm, nTerm);
  4042. nRem++;
  4043. }
  4044. }
  4045. if( rc==SQLITE_OK && nRem!=pCsr->nSegment ){
  4046. rc = fts3RepackSegdirLevel(p, iAbsLevel);
  4047. }
  4048. *pnRem = nRem;
  4049. return rc;
  4050. }
  4051. /*
  4052. ** Store an incr-merge hint in the database.
  4053. */
  4054. static int fts3IncrmergeHintStore(Fts3Table *p, Blob *pHint){
  4055. sqlite3_stmt *pReplace = 0;
  4056. int rc; /* Return code */
  4057. rc = fts3SqlStmt(p, SQL_REPLACE_STAT, &pReplace, 0);
  4058. if( rc==SQLITE_OK ){
  4059. sqlite3_bind_int(pReplace, 1, FTS_STAT_INCRMERGEHINT);
  4060. sqlite3_bind_blob(pReplace, 2, pHint->a, pHint->n, SQLITE_STATIC);
  4061. sqlite3_step(pReplace);
  4062. rc = sqlite3_reset(pReplace);
  4063. }
  4064. return rc;
  4065. }
  4066. /*
  4067. ** Load an incr-merge hint from the database. The incr-merge hint, if one
  4068. ** exists, is stored in the rowid==1 row of the %_stat table.
  4069. **
  4070. ** If successful, populate blob *pHint with the value read from the %_stat
  4071. ** table and return SQLITE_OK. Otherwise, if an error occurs, return an
  4072. ** SQLite error code.
  4073. */
  4074. static int fts3IncrmergeHintLoad(Fts3Table *p, Blob *pHint){
  4075. sqlite3_stmt *pSelect = 0;
  4076. int rc;
  4077. pHint->n = 0;
  4078. rc = fts3SqlStmt(p, SQL_SELECT_STAT, &pSelect, 0);
  4079. if( rc==SQLITE_OK ){
  4080. int rc2;
  4081. sqlite3_bind_int(pSelect, 1, FTS_STAT_INCRMERGEHINT);
  4082. if( SQLITE_ROW==sqlite3_step(pSelect) ){
  4083. const char *aHint = sqlite3_column_blob(pSelect, 0);
  4084. int nHint = sqlite3_column_bytes(pSelect, 0);
  4085. if( aHint ){
  4086. blobGrowBuffer(pHint, nHint, &rc);
  4087. if( rc==SQLITE_OK ){
  4088. memcpy(pHint->a, aHint, nHint);
  4089. pHint->n = nHint;
  4090. }
  4091. }
  4092. }
  4093. rc2 = sqlite3_reset(pSelect);
  4094. if( rc==SQLITE_OK ) rc = rc2;
  4095. }
  4096. return rc;
  4097. }
  4098. /*
  4099. ** If *pRc is not SQLITE_OK when this function is called, it is a no-op.
  4100. ** Otherwise, append an entry to the hint stored in blob *pHint. Each entry
  4101. ** consists of two varints, the absolute level number of the input segments
  4102. ** and the number of input segments.
  4103. **
  4104. ** If successful, leave *pRc set to SQLITE_OK and return. If an error occurs,
  4105. ** set *pRc to an SQLite error code before returning.
  4106. */
  4107. static void fts3IncrmergeHintPush(
  4108. Blob *pHint, /* Hint blob to append to */
  4109. i64 iAbsLevel, /* First varint to store in hint */
  4110. int nInput, /* Second varint to store in hint */
  4111. int *pRc /* IN/OUT: Error code */
  4112. ){
  4113. blobGrowBuffer(pHint, pHint->n + 2*FTS3_VARINT_MAX, pRc);
  4114. if( *pRc==SQLITE_OK ){
  4115. pHint->n += sqlite3Fts3PutVarint(&pHint->a[pHint->n], iAbsLevel);
  4116. pHint->n += sqlite3Fts3PutVarint(&pHint->a[pHint->n], (i64)nInput);
  4117. }
  4118. }
  4119. /*
  4120. ** Read the last entry (most recently pushed) from the hint blob *pHint
  4121. ** and then remove the entry. Write the two values read to *piAbsLevel and
  4122. ** *pnInput before returning.
  4123. **
  4124. ** If no error occurs, return SQLITE_OK. If the hint blob in *pHint does
  4125. ** not contain at least two valid varints, return SQLITE_CORRUPT_VTAB.
  4126. */
  4127. static int fts3IncrmergeHintPop(Blob *pHint, i64 *piAbsLevel, int *pnInput){
  4128. const int nHint = pHint->n;
  4129. int i;
  4130. i = pHint->n-2;
  4131. while( i>0 && (pHint->a[i-1] & 0x80) ) i--;
  4132. while( i>0 && (pHint->a[i-1] & 0x80) ) i--;
  4133. pHint->n = i;
  4134. i += sqlite3Fts3GetVarint(&pHint->a[i], piAbsLevel);
  4135. i += sqlite3Fts3GetVarint32(&pHint->a[i], pnInput);
  4136. if( i!=nHint ) return SQLITE_CORRUPT_VTAB;
  4137. return SQLITE_OK;
  4138. }
  4139. /*
  4140. ** Attempt an incremental merge that writes nMerge leaf blocks.
  4141. **
  4142. ** Incremental merges happen nMin segments at a time. The two
  4143. ** segments to be merged are the nMin oldest segments (the ones with
  4144. ** the smallest indexes) in the highest level that contains at least
  4145. ** nMin segments. Multiple merges might occur in an attempt to write the
  4146. ** quota of nMerge leaf blocks.
  4147. */
  4148. int sqlite3Fts3Incrmerge(Fts3Table *p, int nMerge, int nMin){
  4149. int rc; /* Return code */
  4150. int nRem = nMerge; /* Number of leaf pages yet to be written */
  4151. Fts3MultiSegReader *pCsr; /* Cursor used to read input data */
  4152. Fts3SegFilter *pFilter; /* Filter used with cursor pCsr */
  4153. IncrmergeWriter *pWriter; /* Writer object */
  4154. int nSeg = 0; /* Number of input segments */
  4155. sqlite3_int64 iAbsLevel = 0; /* Absolute level number to work on */
  4156. Blob hint = {0, 0, 0}; /* Hint read from %_stat table */
  4157. int bDirtyHint = 0; /* True if blob 'hint' has been modified */
  4158. /* Allocate space for the cursor, filter and writer objects */
  4159. const int nAlloc = sizeof(*pCsr) + sizeof(*pFilter) + sizeof(*pWriter);
  4160. pWriter = (IncrmergeWriter *)sqlite3_malloc(nAlloc);
  4161. if( !pWriter ) return SQLITE_NOMEM;
  4162. pFilter = (Fts3SegFilter *)&pWriter[1];
  4163. pCsr = (Fts3MultiSegReader *)&pFilter[1];
  4164. rc = fts3IncrmergeHintLoad(p, &hint);
  4165. while( rc==SQLITE_OK && nRem>0 ){
  4166. const i64 nMod = FTS3_SEGDIR_MAXLEVEL * p->nIndex;
  4167. sqlite3_stmt *pFindLevel = 0; /* SQL used to determine iAbsLevel */
  4168. int bUseHint = 0; /* True if attempting to append */
  4169. /* Search the %_segdir table for the absolute level with the smallest
  4170. ** relative level number that contains at least nMin segments, if any.
  4171. ** If one is found, set iAbsLevel to the absolute level number and
  4172. ** nSeg to nMin. If no level with at least nMin segments can be found,
  4173. ** set nSeg to -1.
  4174. */
  4175. rc = fts3SqlStmt(p, SQL_FIND_MERGE_LEVEL, &pFindLevel, 0);
  4176. sqlite3_bind_int(pFindLevel, 1, nMin);
  4177. if( sqlite3_step(pFindLevel)==SQLITE_ROW ){
  4178. iAbsLevel = sqlite3_column_int64(pFindLevel, 0);
  4179. nSeg = nMin;
  4180. }else{
  4181. nSeg = -1;
  4182. }
  4183. rc = sqlite3_reset(pFindLevel);
  4184. /* If the hint read from the %_stat table is not empty, check if the
  4185. ** last entry in it specifies a relative level smaller than or equal
  4186. ** to the level identified by the block above (if any). If so, this
  4187. ** iteration of the loop will work on merging at the hinted level.
  4188. */
  4189. if( rc==SQLITE_OK && hint.n ){
  4190. int nHint = hint.n;
  4191. sqlite3_int64 iHintAbsLevel = 0; /* Hint level */
  4192. int nHintSeg = 0; /* Hint number of segments */
  4193. rc = fts3IncrmergeHintPop(&hint, &iHintAbsLevel, &nHintSeg);
  4194. if( nSeg<0 || (iAbsLevel % nMod) >= (iHintAbsLevel % nMod) ){
  4195. iAbsLevel = iHintAbsLevel;
  4196. nSeg = nHintSeg;
  4197. bUseHint = 1;
  4198. bDirtyHint = 1;
  4199. }else{
  4200. /* This undoes the effect of the HintPop() above - so that no entry
  4201. ** is removed from the hint blob. */
  4202. hint.n = nHint;
  4203. }
  4204. }
  4205. /* If nSeg is less that zero, then there is no level with at least
  4206. ** nMin segments and no hint in the %_stat table. No work to do.
  4207. ** Exit early in this case. */
  4208. if( nSeg<0 ) break;
  4209. /* Open a cursor to iterate through the contents of the oldest nSeg
  4210. ** indexes of absolute level iAbsLevel. If this cursor is opened using
  4211. ** the 'hint' parameters, it is possible that there are less than nSeg
  4212. ** segments available in level iAbsLevel. In this case, no work is
  4213. ** done on iAbsLevel - fall through to the next iteration of the loop
  4214. ** to start work on some other level. */
  4215. memset(pWriter, 0, nAlloc);
  4216. pFilter->flags = FTS3_SEGMENT_REQUIRE_POS;
  4217. if( rc==SQLITE_OK ){
  4218. rc = fts3IncrmergeCsr(p, iAbsLevel, nSeg, pCsr);
  4219. }
  4220. if( SQLITE_OK==rc && pCsr->nSegment==nSeg
  4221. && SQLITE_OK==(rc = sqlite3Fts3SegReaderStart(p, pCsr, pFilter))
  4222. && SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, pCsr))
  4223. ){
  4224. int iIdx = 0; /* Largest idx in level (iAbsLevel+1) */
  4225. rc = fts3IncrmergeOutputIdx(p, iAbsLevel, &iIdx);
  4226. if( rc==SQLITE_OK ){
  4227. if( bUseHint && iIdx>0 ){
  4228. const char *zKey = pCsr->zTerm;
  4229. int nKey = pCsr->nTerm;
  4230. rc = fts3IncrmergeLoad(p, iAbsLevel, iIdx-1, zKey, nKey, pWriter);
  4231. }else{
  4232. rc = fts3IncrmergeWriter(p, iAbsLevel, iIdx, pCsr, pWriter);
  4233. }
  4234. }
  4235. if( rc==SQLITE_OK && pWriter->nLeafEst ){
  4236. fts3LogMerge(nSeg, iAbsLevel);
  4237. do {
  4238. rc = fts3IncrmergeAppend(p, pWriter, pCsr);
  4239. if( rc==SQLITE_OK ) rc = sqlite3Fts3SegReaderStep(p, pCsr);
  4240. if( pWriter->nWork>=nRem && rc==SQLITE_ROW ) rc = SQLITE_OK;
  4241. }while( rc==SQLITE_ROW );
  4242. /* Update or delete the input segments */
  4243. if( rc==SQLITE_OK ){
  4244. nRem -= (1 + pWriter->nWork);
  4245. rc = fts3IncrmergeChomp(p, iAbsLevel, pCsr, &nSeg);
  4246. if( nSeg!=0 ){
  4247. bDirtyHint = 1;
  4248. fts3IncrmergeHintPush(&hint, iAbsLevel, nSeg, &rc);
  4249. }
  4250. }
  4251. }
  4252. fts3IncrmergeRelease(p, pWriter, &rc);
  4253. }
  4254. sqlite3Fts3SegReaderFinish(pCsr);
  4255. }
  4256. /* Write the hint values into the %_stat table for the next incr-merger */
  4257. if( bDirtyHint && rc==SQLITE_OK ){
  4258. rc = fts3IncrmergeHintStore(p, &hint);
  4259. }
  4260. sqlite3_free(pWriter);
  4261. sqlite3_free(hint.a);
  4262. return rc;
  4263. }
  4264. /*
  4265. ** Convert the text beginning at *pz into an integer and return
  4266. ** its value. Advance *pz to point to the first character past
  4267. ** the integer.
  4268. */
  4269. static int fts3Getint(const char **pz){
  4270. const char *z = *pz;
  4271. int i = 0;
  4272. while( (*z)>='0' && (*z)<='9' ) i = 10*i + *(z++) - '0';
  4273. *pz = z;
  4274. return i;
  4275. }
  4276. /*
  4277. ** Process statements of the form:
  4278. **
  4279. ** INSERT INTO table(table) VALUES('merge=A,B');
  4280. **
  4281. ** A and B are integers that decode to be the number of leaf pages
  4282. ** written for the merge, and the minimum number of segments on a level
  4283. ** before it will be selected for a merge, respectively.
  4284. */
  4285. static int fts3DoIncrmerge(
  4286. Fts3Table *p, /* FTS3 table handle */
  4287. const char *zParam /* Nul-terminated string containing "A,B" */
  4288. ){
  4289. int rc;
  4290. int nMin = (FTS3_MERGE_COUNT / 2);
  4291. int nMerge = 0;
  4292. const char *z = zParam;
  4293. /* Read the first integer value */
  4294. nMerge = fts3Getint(&z);
  4295. /* If the first integer value is followed by a ',', read the second
  4296. ** integer value. */
  4297. if( z[0]==',' && z[1]!='\0' ){
  4298. z++;
  4299. nMin = fts3Getint(&z);
  4300. }
  4301. if( z[0]!='\0' || nMin<2 ){
  4302. rc = SQLITE_ERROR;
  4303. }else{
  4304. rc = SQLITE_OK;
  4305. if( !p->bHasStat ){
  4306. assert( p->bFts4==0 );
  4307. sqlite3Fts3CreateStatTable(&rc, p);
  4308. }
  4309. if( rc==SQLITE_OK ){
  4310. rc = sqlite3Fts3Incrmerge(p, nMerge, nMin);
  4311. }
  4312. sqlite3Fts3SegmentsClose(p);
  4313. }
  4314. return rc;
  4315. }
  4316. /*
  4317. ** Process statements of the form:
  4318. **
  4319. ** INSERT INTO table(table) VALUES('automerge=X');
  4320. **
  4321. ** where X is an integer. X==0 means to turn automerge off. X!=0 means
  4322. ** turn it on. The setting is persistent.
  4323. */
  4324. static int fts3DoAutoincrmerge(
  4325. Fts3Table *p, /* FTS3 table handle */
  4326. const char *zParam /* Nul-terminated string containing boolean */
  4327. ){
  4328. int rc = SQLITE_OK;
  4329. sqlite3_stmt *pStmt = 0;
  4330. p->bAutoincrmerge = fts3Getint(&zParam)!=0;
  4331. if( !p->bHasStat ){
  4332. assert( p->bFts4==0 );
  4333. sqlite3Fts3CreateStatTable(&rc, p);
  4334. if( rc ) return rc;
  4335. }
  4336. rc = fts3SqlStmt(p, SQL_REPLACE_STAT, &pStmt, 0);
  4337. if( rc ) return rc;
  4338. sqlite3_bind_int(pStmt, 1, FTS_STAT_AUTOINCRMERGE);
  4339. sqlite3_bind_int(pStmt, 2, p->bAutoincrmerge);
  4340. sqlite3_step(pStmt);
  4341. rc = sqlite3_reset(pStmt);
  4342. return rc;
  4343. }
  4344. /*
  4345. ** Return a 64-bit checksum for the FTS index entry specified by the
  4346. ** arguments to this function.
  4347. */
  4348. static u64 fts3ChecksumEntry(
  4349. const char *zTerm, /* Pointer to buffer containing term */
  4350. int nTerm, /* Size of zTerm in bytes */
  4351. int iLangid, /* Language id for current row */
  4352. int iIndex, /* Index (0..Fts3Table.nIndex-1) */
  4353. i64 iDocid, /* Docid for current row. */
  4354. int iCol, /* Column number */
  4355. int iPos /* Position */
  4356. ){
  4357. int i;
  4358. u64 ret = (u64)iDocid;
  4359. ret += (ret<<3) + iLangid;
  4360. ret += (ret<<3) + iIndex;
  4361. ret += (ret<<3) + iCol;
  4362. ret += (ret<<3) + iPos;
  4363. for(i=0; i<nTerm; i++) ret += (ret<<3) + zTerm[i];
  4364. return ret;
  4365. }
  4366. /*
  4367. ** Return a checksum of all entries in the FTS index that correspond to
  4368. ** language id iLangid. The checksum is calculated by XORing the checksums
  4369. ** of each individual entry (see fts3ChecksumEntry()) together.
  4370. **
  4371. ** If successful, the checksum value is returned and *pRc set to SQLITE_OK.
  4372. ** Otherwise, if an error occurs, *pRc is set to an SQLite error code. The
  4373. ** return value is undefined in this case.
  4374. */
  4375. static u64 fts3ChecksumIndex(
  4376. Fts3Table *p, /* FTS3 table handle */
  4377. int iLangid, /* Language id to return cksum for */
  4378. int iIndex, /* Index to cksum (0..p->nIndex-1) */
  4379. int *pRc /* OUT: Return code */
  4380. ){
  4381. Fts3SegFilter filter;
  4382. Fts3MultiSegReader csr;
  4383. int rc;
  4384. u64 cksum = 0;
  4385. assert( *pRc==SQLITE_OK );
  4386. memset(&filter, 0, sizeof(filter));
  4387. memset(&csr, 0, sizeof(csr));
  4388. filter.flags = FTS3_SEGMENT_REQUIRE_POS|FTS3_SEGMENT_IGNORE_EMPTY;
  4389. filter.flags |= FTS3_SEGMENT_SCAN;
  4390. rc = sqlite3Fts3SegReaderCursor(
  4391. p, iLangid, iIndex, FTS3_SEGCURSOR_ALL, 0, 0, 0, 1,&csr
  4392. );
  4393. if( rc==SQLITE_OK ){
  4394. rc = sqlite3Fts3SegReaderStart(p, &csr, &filter);
  4395. }
  4396. if( rc==SQLITE_OK ){
  4397. while( SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, &csr)) ){
  4398. char *pCsr = csr.aDoclist;
  4399. char *pEnd = &pCsr[csr.nDoclist];
  4400. i64 iDocid = 0;
  4401. i64 iCol = 0;
  4402. i64 iPos = 0;
  4403. pCsr += sqlite3Fts3GetVarint(pCsr, &iDocid);
  4404. while( pCsr<pEnd ){
  4405. i64 iVal = 0;
  4406. pCsr += sqlite3Fts3GetVarint(pCsr, &iVal);
  4407. if( pCsr<pEnd ){
  4408. if( iVal==0 || iVal==1 ){
  4409. iCol = 0;
  4410. iPos = 0;
  4411. if( iVal ){
  4412. pCsr += sqlite3Fts3GetVarint(pCsr, &iCol);
  4413. }else{
  4414. pCsr += sqlite3Fts3GetVarint(pCsr, &iVal);
  4415. iDocid += iVal;
  4416. }
  4417. }else{
  4418. iPos += (iVal - 2);
  4419. cksum = cksum ^ fts3ChecksumEntry(
  4420. csr.zTerm, csr.nTerm, iLangid, iIndex, iDocid,
  4421. (int)iCol, (int)iPos
  4422. );
  4423. }
  4424. }
  4425. }
  4426. }
  4427. }
  4428. sqlite3Fts3SegReaderFinish(&csr);
  4429. *pRc = rc;
  4430. return cksum;
  4431. }
  4432. /*
  4433. ** Check if the contents of the FTS index match the current contents of the
  4434. ** content table. If no error occurs and the contents do match, set *pbOk
  4435. ** to true and return SQLITE_OK. Or if the contents do not match, set *pbOk
  4436. ** to false before returning.
  4437. **
  4438. ** If an error occurs (e.g. an OOM or IO error), return an SQLite error
  4439. ** code. The final value of *pbOk is undefined in this case.
  4440. */
  4441. static int fts3IntegrityCheck(Fts3Table *p, int *pbOk){
  4442. int rc = SQLITE_OK; /* Return code */
  4443. u64 cksum1 = 0; /* Checksum based on FTS index contents */
  4444. u64 cksum2 = 0; /* Checksum based on %_content contents */
  4445. sqlite3_stmt *pAllLangid = 0; /* Statement to return all language-ids */
  4446. /* This block calculates the checksum according to the FTS index. */
  4447. rc = fts3SqlStmt(p, SQL_SELECT_ALL_LANGID, &pAllLangid, 0);
  4448. if( rc==SQLITE_OK ){
  4449. int rc2;
  4450. sqlite3_bind_int(pAllLangid, 1, p->nIndex);
  4451. while( rc==SQLITE_OK && sqlite3_step(pAllLangid)==SQLITE_ROW ){
  4452. int iLangid = sqlite3_column_int(pAllLangid, 0);
  4453. int i;
  4454. for(i=0; i<p->nIndex; i++){
  4455. cksum1 = cksum1 ^ fts3ChecksumIndex(p, iLangid, i, &rc);
  4456. }
  4457. }
  4458. rc2 = sqlite3_reset(pAllLangid);
  4459. if( rc==SQLITE_OK ) rc = rc2;
  4460. }
  4461. /* This block calculates the checksum according to the %_content table */
  4462. rc = fts3SqlStmt(p, SQL_SELECT_ALL_LANGID, &pAllLangid, 0);
  4463. if( rc==SQLITE_OK ){
  4464. sqlite3_tokenizer_module const *pModule = p->pTokenizer->pModule;
  4465. sqlite3_stmt *pStmt = 0;
  4466. char *zSql;
  4467. zSql = sqlite3_mprintf("SELECT %s" , p->zReadExprlist);
  4468. if( !zSql ){
  4469. rc = SQLITE_NOMEM;
  4470. }else{
  4471. rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0);
  4472. sqlite3_free(zSql);
  4473. }
  4474. while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){
  4475. i64 iDocid = sqlite3_column_int64(pStmt, 0);
  4476. int iLang = langidFromSelect(p, pStmt);
  4477. int iCol;
  4478. for(iCol=0; rc==SQLITE_OK && iCol<p->nColumn; iCol++){
  4479. const char *zText = (const char *)sqlite3_column_text(pStmt, iCol+1);
  4480. int nText = sqlite3_column_bytes(pStmt, iCol+1);
  4481. sqlite3_tokenizer_cursor *pT = 0;
  4482. rc = sqlite3Fts3OpenTokenizer(p->pTokenizer, iLang, zText, nText, &pT);
  4483. while( rc==SQLITE_OK ){
  4484. char const *zToken; /* Buffer containing token */
  4485. int nToken = 0; /* Number of bytes in token */
  4486. int iDum1 = 0, iDum2 = 0; /* Dummy variables */
  4487. int iPos = 0; /* Position of token in zText */
  4488. rc = pModule->xNext(pT, &zToken, &nToken, &iDum1, &iDum2, &iPos);
  4489. if( rc==SQLITE_OK ){
  4490. int i;
  4491. cksum2 = cksum2 ^ fts3ChecksumEntry(
  4492. zToken, nToken, iLang, 0, iDocid, iCol, iPos
  4493. );
  4494. for(i=1; i<p->nIndex; i++){
  4495. if( p->aIndex[i].nPrefix<=nToken ){
  4496. cksum2 = cksum2 ^ fts3ChecksumEntry(
  4497. zToken, p->aIndex[i].nPrefix, iLang, i, iDocid, iCol, iPos
  4498. );
  4499. }
  4500. }
  4501. }
  4502. }
  4503. if( pT ) pModule->xClose(pT);
  4504. if( rc==SQLITE_DONE ) rc = SQLITE_OK;
  4505. }
  4506. }
  4507. sqlite3_finalize(pStmt);
  4508. }
  4509. *pbOk = (cksum1==cksum2);
  4510. return rc;
  4511. }
  4512. /*
  4513. ** Run the integrity-check. If no error occurs and the current contents of
  4514. ** the FTS index are correct, return SQLITE_OK. Or, if the contents of the
  4515. ** FTS index are incorrect, return SQLITE_CORRUPT_VTAB.
  4516. **
  4517. ** Or, if an error (e.g. an OOM or IO error) occurs, return an SQLite
  4518. ** error code.
  4519. **
  4520. ** The integrity-check works as follows. For each token and indexed token
  4521. ** prefix in the document set, a 64-bit checksum is calculated (by code
  4522. ** in fts3ChecksumEntry()) based on the following:
  4523. **
  4524. ** + The index number (0 for the main index, 1 for the first prefix
  4525. ** index etc.),
  4526. ** + The token (or token prefix) text itself,
  4527. ** + The language-id of the row it appears in,
  4528. ** + The docid of the row it appears in,
  4529. ** + The column it appears in, and
  4530. ** + The tokens position within that column.
  4531. **
  4532. ** The checksums for all entries in the index are XORed together to create
  4533. ** a single checksum for the entire index.
  4534. **
  4535. ** The integrity-check code calculates the same checksum in two ways:
  4536. **
  4537. ** 1. By scanning the contents of the FTS index, and
  4538. ** 2. By scanning and tokenizing the content table.
  4539. **
  4540. ** If the two checksums are identical, the integrity-check is deemed to have
  4541. ** passed.
  4542. */
  4543. static int fts3DoIntegrityCheck(
  4544. Fts3Table *p /* FTS3 table handle */
  4545. ){
  4546. int rc;
  4547. int bOk = 0;
  4548. rc = fts3IntegrityCheck(p, &bOk);
  4549. if( rc==SQLITE_OK && bOk==0 ) rc = SQLITE_CORRUPT_VTAB;
  4550. return rc;
  4551. }
  4552. /*
  4553. ** Handle a 'special' INSERT of the form:
  4554. **
  4555. ** "INSERT INTO tbl(tbl) VALUES(<expr>)"
  4556. **
  4557. ** Argument pVal contains the result of <expr>. Currently the only
  4558. ** meaningful value to insert is the text 'optimize'.
  4559. */
  4560. static int fts3SpecialInsert(Fts3Table *p, sqlite3_value *pVal){
  4561. int rc; /* Return Code */
  4562. const char *zVal = (const char *)sqlite3_value_text(pVal);
  4563. int nVal = sqlite3_value_bytes(pVal);
  4564. if( !zVal ){
  4565. return SQLITE_NOMEM;
  4566. }else if( nVal==8 && 0==sqlite3_strnicmp(zVal, "optimize", 8) ){
  4567. rc = fts3DoOptimize(p, 0);
  4568. }else if( nVal==7 && 0==sqlite3_strnicmp(zVal, "rebuild", 7) ){
  4569. rc = fts3DoRebuild(p);
  4570. }else if( nVal==15 && 0==sqlite3_strnicmp(zVal, "integrity-check", 15) ){
  4571. rc = fts3DoIntegrityCheck(p);
  4572. }else if( nVal>6 && 0==sqlite3_strnicmp(zVal, "merge=", 6) ){
  4573. rc = fts3DoIncrmerge(p, &zVal[6]);
  4574. }else if( nVal>10 && 0==sqlite3_strnicmp(zVal, "automerge=", 10) ){
  4575. rc = fts3DoAutoincrmerge(p, &zVal[10]);
  4576. #ifdef SQLITE_TEST
  4577. }else if( nVal>9 && 0==sqlite3_strnicmp(zVal, "nodesize=", 9) ){
  4578. p->nNodeSize = atoi(&zVal[9]);
  4579. rc = SQLITE_OK;
  4580. }else if( nVal>11 && 0==sqlite3_strnicmp(zVal, "maxpending=", 9) ){
  4581. p->nMaxPendingData = atoi(&zVal[11]);
  4582. rc = SQLITE_OK;
  4583. }else if( nVal>21 && 0==sqlite3_strnicmp(zVal, "test-no-incr-doclist=", 21) ){
  4584. p->bNoIncrDoclist = atoi(&zVal[21]);
  4585. rc = SQLITE_OK;
  4586. #endif
  4587. }else{
  4588. rc = SQLITE_ERROR;
  4589. }
  4590. return rc;
  4591. }
  4592. #ifndef SQLITE_DISABLE_FTS4_DEFERRED
  4593. /*
  4594. ** Delete all cached deferred doclists. Deferred doclists are cached
  4595. ** (allocated) by the sqlite3Fts3CacheDeferredDoclists() function.
  4596. */
  4597. void sqlite3Fts3FreeDeferredDoclists(Fts3Cursor *pCsr){
  4598. Fts3DeferredToken *pDef;
  4599. for(pDef=pCsr->pDeferred; pDef; pDef=pDef->pNext){
  4600. fts3PendingListDelete(pDef->pList);
  4601. pDef->pList = 0;
  4602. }
  4603. }
  4604. /*
  4605. ** Free all entries in the pCsr->pDeffered list. Entries are added to
  4606. ** this list using sqlite3Fts3DeferToken().
  4607. */
  4608. void sqlite3Fts3FreeDeferredTokens(Fts3Cursor *pCsr){
  4609. Fts3DeferredToken *pDef;
  4610. Fts3DeferredToken *pNext;
  4611. for(pDef=pCsr->pDeferred; pDef; pDef=pNext){
  4612. pNext = pDef->pNext;
  4613. fts3PendingListDelete(pDef->pList);
  4614. sqlite3_free(pDef);
  4615. }
  4616. pCsr->pDeferred = 0;
  4617. }
  4618. /*
  4619. ** Generate deferred-doclists for all tokens in the pCsr->pDeferred list
  4620. ** based on the row that pCsr currently points to.
  4621. **
  4622. ** A deferred-doclist is like any other doclist with position information
  4623. ** included, except that it only contains entries for a single row of the
  4624. ** table, not for all rows.
  4625. */
  4626. int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *pCsr){
  4627. int rc = SQLITE_OK; /* Return code */
  4628. if( pCsr->pDeferred ){
  4629. int i; /* Used to iterate through table columns */
  4630. sqlite3_int64 iDocid; /* Docid of the row pCsr points to */
  4631. Fts3DeferredToken *pDef; /* Used to iterate through deferred tokens */
  4632. Fts3Table *p = (Fts3Table *)pCsr->base.pVtab;
  4633. sqlite3_tokenizer *pT = p->pTokenizer;
  4634. sqlite3_tokenizer_module const *pModule = pT->pModule;
  4635. assert( pCsr->isRequireSeek==0 );
  4636. iDocid = sqlite3_column_int64(pCsr->pStmt, 0);
  4637. for(i=0; i<p->nColumn && rc==SQLITE_OK; i++){
  4638. if( p->abNotindexed[i]==0 ){
  4639. const char *zText = (const char *)sqlite3_column_text(pCsr->pStmt, i+1);
  4640. sqlite3_tokenizer_cursor *pTC = 0;
  4641. rc = sqlite3Fts3OpenTokenizer(pT, pCsr->iLangid, zText, -1, &pTC);
  4642. while( rc==SQLITE_OK ){
  4643. char const *zToken; /* Buffer containing token */
  4644. int nToken = 0; /* Number of bytes in token */
  4645. int iDum1 = 0, iDum2 = 0; /* Dummy variables */
  4646. int iPos = 0; /* Position of token in zText */
  4647. rc = pModule->xNext(pTC, &zToken, &nToken, &iDum1, &iDum2, &iPos);
  4648. for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){
  4649. Fts3PhraseToken *pPT = pDef->pToken;
  4650. if( (pDef->iCol>=p->nColumn || pDef->iCol==i)
  4651. && (pPT->bFirst==0 || iPos==0)
  4652. && (pPT->n==nToken || (pPT->isPrefix && pPT->n<nToken))
  4653. && (0==memcmp(zToken, pPT->z, pPT->n))
  4654. ){
  4655. fts3PendingListAppend(&pDef->pList, iDocid, i, iPos, &rc);
  4656. }
  4657. }
  4658. }
  4659. if( pTC ) pModule->xClose(pTC);
  4660. if( rc==SQLITE_DONE ) rc = SQLITE_OK;
  4661. }
  4662. }
  4663. for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){
  4664. if( pDef->pList ){
  4665. rc = fts3PendingListAppendVarint(&pDef->pList, 0);
  4666. }
  4667. }
  4668. }
  4669. return rc;
  4670. }
  4671. int sqlite3Fts3DeferredTokenList(
  4672. Fts3DeferredToken *p,
  4673. char **ppData,
  4674. int *pnData
  4675. ){
  4676. char *pRet;
  4677. int nSkip;
  4678. sqlite3_int64 dummy;
  4679. *ppData = 0;
  4680. *pnData = 0;
  4681. if( p->pList==0 ){
  4682. return SQLITE_OK;
  4683. }
  4684. pRet = (char *)sqlite3_malloc(p->pList->nData);
  4685. if( !pRet ) return SQLITE_NOMEM;
  4686. nSkip = sqlite3Fts3GetVarint(p->pList->aData, &dummy);
  4687. *pnData = p->pList->nData - nSkip;
  4688. *ppData = pRet;
  4689. memcpy(pRet, &p->pList->aData[nSkip], *pnData);
  4690. return SQLITE_OK;
  4691. }
  4692. /*
  4693. ** Add an entry for token pToken to the pCsr->pDeferred list.
  4694. */
  4695. int sqlite3Fts3DeferToken(
  4696. Fts3Cursor *pCsr, /* Fts3 table cursor */
  4697. Fts3PhraseToken *pToken, /* Token to defer */
  4698. int iCol /* Column that token must appear in (or -1) */
  4699. ){
  4700. Fts3DeferredToken *pDeferred;
  4701. pDeferred = sqlite3_malloc(sizeof(*pDeferred));
  4702. if( !pDeferred ){
  4703. return SQLITE_NOMEM;
  4704. }
  4705. memset(pDeferred, 0, sizeof(*pDeferred));
  4706. pDeferred->pToken = pToken;
  4707. pDeferred->pNext = pCsr->pDeferred;
  4708. pDeferred->iCol = iCol;
  4709. pCsr->pDeferred = pDeferred;
  4710. assert( pToken->pDeferred==0 );
  4711. pToken->pDeferred = pDeferred;
  4712. return SQLITE_OK;
  4713. }
  4714. #endif
  4715. /*
  4716. ** SQLite value pRowid contains the rowid of a row that may or may not be
  4717. ** present in the FTS3 table. If it is, delete it and adjust the contents
  4718. ** of subsiduary data structures accordingly.
  4719. */
  4720. static int fts3DeleteByRowid(
  4721. Fts3Table *p,
  4722. sqlite3_value *pRowid,
  4723. int *pnChng, /* IN/OUT: Decrement if row is deleted */
  4724. u32 *aSzDel
  4725. ){
  4726. int rc = SQLITE_OK; /* Return code */
  4727. int bFound = 0; /* True if *pRowid really is in the table */
  4728. fts3DeleteTerms(&rc, p, pRowid, aSzDel, &bFound);
  4729. if( bFound && rc==SQLITE_OK ){
  4730. int isEmpty = 0; /* Deleting *pRowid leaves the table empty */
  4731. rc = fts3IsEmpty(p, pRowid, &isEmpty);
  4732. if( rc==SQLITE_OK ){
  4733. if( isEmpty ){
  4734. /* Deleting this row means the whole table is empty. In this case
  4735. ** delete the contents of all three tables and throw away any
  4736. ** data in the pendingTerms hash table. */
  4737. rc = fts3DeleteAll(p, 1);
  4738. *pnChng = 0;
  4739. memset(aSzDel, 0, sizeof(u32) * (p->nColumn+1) * 2);
  4740. }else{
  4741. *pnChng = *pnChng - 1;
  4742. if( p->zContentTbl==0 ){
  4743. fts3SqlExec(&rc, p, SQL_DELETE_CONTENT, &pRowid);
  4744. }
  4745. if( p->bHasDocsize ){
  4746. fts3SqlExec(&rc, p, SQL_DELETE_DOCSIZE, &pRowid);
  4747. }
  4748. }
  4749. }
  4750. }
  4751. return rc;
  4752. }
  4753. /*
  4754. ** This function does the work for the xUpdate method of FTS3 virtual
  4755. ** tables. The schema of the virtual table being:
  4756. **
  4757. ** CREATE TABLE <table name>(
  4758. ** <user columns>,
  4759. ** <table name> HIDDEN,
  4760. ** docid HIDDEN,
  4761. ** <langid> HIDDEN
  4762. ** );
  4763. **
  4764. **
  4765. */
  4766. int sqlite3Fts3UpdateMethod(
  4767. sqlite3_vtab *pVtab, /* FTS3 vtab object */
  4768. int nArg, /* Size of argument array */
  4769. sqlite3_value **apVal, /* Array of arguments */
  4770. sqlite_int64 *pRowid /* OUT: The affected (or effected) rowid */
  4771. ){
  4772. Fts3Table *p = (Fts3Table *)pVtab;
  4773. int rc = SQLITE_OK; /* Return Code */
  4774. int isRemove = 0; /* True for an UPDATE or DELETE */
  4775. u32 *aSzIns = 0; /* Sizes of inserted documents */
  4776. u32 *aSzDel = 0; /* Sizes of deleted documents */
  4777. int nChng = 0; /* Net change in number of documents */
  4778. int bInsertDone = 0;
  4779. assert( p->pSegments==0 );
  4780. assert(
  4781. nArg==1 /* DELETE operations */
  4782. || nArg==(2 + p->nColumn + 3) /* INSERT or UPDATE operations */
  4783. );
  4784. /* Check for a "special" INSERT operation. One of the form:
  4785. **
  4786. ** INSERT INTO xyz(xyz) VALUES('command');
  4787. */
  4788. if( nArg>1
  4789. && sqlite3_value_type(apVal[0])==SQLITE_NULL
  4790. && sqlite3_value_type(apVal[p->nColumn+2])!=SQLITE_NULL
  4791. ){
  4792. rc = fts3SpecialInsert(p, apVal[p->nColumn+2]);
  4793. goto update_out;
  4794. }
  4795. if( nArg>1 && sqlite3_value_int(apVal[2 + p->nColumn + 2])<0 ){
  4796. rc = SQLITE_CONSTRAINT;
  4797. goto update_out;
  4798. }
  4799. /* Allocate space to hold the change in document sizes */
  4800. aSzDel = sqlite3_malloc( sizeof(aSzDel[0])*(p->nColumn+1)*2 );
  4801. if( aSzDel==0 ){
  4802. rc = SQLITE_NOMEM;
  4803. goto update_out;
  4804. }
  4805. aSzIns = &aSzDel[p->nColumn+1];
  4806. memset(aSzDel, 0, sizeof(aSzDel[0])*(p->nColumn+1)*2);
  4807. rc = fts3Writelock(p);
  4808. if( rc!=SQLITE_OK ) goto update_out;
  4809. /* If this is an INSERT operation, or an UPDATE that modifies the rowid
  4810. ** value, then this operation requires constraint handling.
  4811. **
  4812. ** If the on-conflict mode is REPLACE, this means that the existing row
  4813. ** should be deleted from the database before inserting the new row. Or,
  4814. ** if the on-conflict mode is other than REPLACE, then this method must
  4815. ** detect the conflict and return SQLITE_CONSTRAINT before beginning to
  4816. ** modify the database file.
  4817. */
  4818. if( nArg>1 && p->zContentTbl==0 ){
  4819. /* Find the value object that holds the new rowid value. */
  4820. sqlite3_value *pNewRowid = apVal[3+p->nColumn];
  4821. if( sqlite3_value_type(pNewRowid)==SQLITE_NULL ){
  4822. pNewRowid = apVal[1];
  4823. }
  4824. if( sqlite3_value_type(pNewRowid)!=SQLITE_NULL && (
  4825. sqlite3_value_type(apVal[0])==SQLITE_NULL
  4826. || sqlite3_value_int64(apVal[0])!=sqlite3_value_int64(pNewRowid)
  4827. )){
  4828. /* The new rowid is not NULL (in this case the rowid will be
  4829. ** automatically assigned and there is no chance of a conflict), and
  4830. ** the statement is either an INSERT or an UPDATE that modifies the
  4831. ** rowid column. So if the conflict mode is REPLACE, then delete any
  4832. ** existing row with rowid=pNewRowid.
  4833. **
  4834. ** Or, if the conflict mode is not REPLACE, insert the new record into
  4835. ** the %_content table. If we hit the duplicate rowid constraint (or any
  4836. ** other error) while doing so, return immediately.
  4837. **
  4838. ** This branch may also run if pNewRowid contains a value that cannot
  4839. ** be losslessly converted to an integer. In this case, the eventual
  4840. ** call to fts3InsertData() (either just below or further on in this
  4841. ** function) will return SQLITE_MISMATCH. If fts3DeleteByRowid is
  4842. ** invoked, it will delete zero rows (since no row will have
  4843. ** docid=$pNewRowid if $pNewRowid is not an integer value).
  4844. */
  4845. if( sqlite3_vtab_on_conflict(p->db)==SQLITE_REPLACE ){
  4846. rc = fts3DeleteByRowid(p, pNewRowid, &nChng, aSzDel);
  4847. }else{
  4848. rc = fts3InsertData(p, apVal, pRowid);
  4849. bInsertDone = 1;
  4850. }
  4851. }
  4852. }
  4853. if( rc!=SQLITE_OK ){
  4854. goto update_out;
  4855. }
  4856. /* If this is a DELETE or UPDATE operation, remove the old record. */
  4857. if( sqlite3_value_type(apVal[0])!=SQLITE_NULL ){
  4858. assert( sqlite3_value_type(apVal[0])==SQLITE_INTEGER );
  4859. rc = fts3DeleteByRowid(p, apVal[0], &nChng, aSzDel);
  4860. isRemove = 1;
  4861. }
  4862. /* If this is an INSERT or UPDATE operation, insert the new record. */
  4863. if( nArg>1 && rc==SQLITE_OK ){
  4864. int iLangid = sqlite3_value_int(apVal[2 + p->nColumn + 2]);
  4865. if( bInsertDone==0 ){
  4866. rc = fts3InsertData(p, apVal, pRowid);
  4867. if( rc==SQLITE_CONSTRAINT && p->zContentTbl==0 ){
  4868. rc = FTS_CORRUPT_VTAB;
  4869. }
  4870. }
  4871. if( rc==SQLITE_OK && (!isRemove || *pRowid!=p->iPrevDocid ) ){
  4872. rc = fts3PendingTermsDocid(p, iLangid, *pRowid);
  4873. }
  4874. if( rc==SQLITE_OK ){
  4875. assert( p->iPrevDocid==*pRowid );
  4876. rc = fts3InsertTerms(p, iLangid, apVal, aSzIns);
  4877. }
  4878. if( p->bHasDocsize ){
  4879. fts3InsertDocsize(&rc, p, aSzIns);
  4880. }
  4881. nChng++;
  4882. }
  4883. if( p->bFts4 ){
  4884. fts3UpdateDocTotals(&rc, p, aSzIns, aSzDel, nChng);
  4885. }
  4886. update_out:
  4887. sqlite3_free(aSzDel);
  4888. sqlite3Fts3SegmentsClose(p);
  4889. return rc;
  4890. }
  4891. /*
  4892. ** Flush any data in the pending-terms hash table to disk. If successful,
  4893. ** merge all segments in the database (including the new segment, if
  4894. ** there was any data to flush) into a single segment.
  4895. */
  4896. int sqlite3Fts3Optimize(Fts3Table *p){
  4897. int rc;
  4898. rc = sqlite3_exec(p->db, "SAVEPOINT fts3", 0, 0, 0);
  4899. if( rc==SQLITE_OK ){
  4900. rc = fts3DoOptimize(p, 1);
  4901. if( rc==SQLITE_OK || rc==SQLITE_DONE ){
  4902. int rc2 = sqlite3_exec(p->db, "RELEASE fts3", 0, 0, 0);
  4903. if( rc2!=SQLITE_OK ) rc = rc2;
  4904. }else{
  4905. sqlite3_exec(p->db, "ROLLBACK TO fts3", 0, 0, 0);
  4906. sqlite3_exec(p->db, "RELEASE fts3", 0, 0, 0);
  4907. }
  4908. }
  4909. sqlite3Fts3SegmentsClose(p);
  4910. return rc;
  4911. }
  4912. #endif