pager.c 273 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909691069116912691369146915691669176918691969206921692269236924692569266927692869296930693169326933693469356936693769386939694069416942694369446945694669476948694969506951695269536954695569566957695869596960696169626963696469656966696769686969697069716972697369746975697669776978697969806981698269836984698569866987698869896990699169926993699469956996699769986999700070017002700370047005700670077008700970107011701270137014701570167017701870197020702170227023702470257026702770287029703070317032703370347035703670377038703970407041704270437044704570467047704870497050705170527053705470557056705770587059706070617062706370647065706670677068706970707071707270737074707570767077707870797080708170827083708470857086708770887089709070917092709370947095709670977098709971007101710271037104710571067107710871097110711171127113711471157116711771187119712071217122712371247125712671277128712971307131713271337134713571367137713871397140714171427143714471457146714771487149715071517152715371547155715671577158715971607161716271637164716571667167716871697170717171727173717471757176717771787179718071817182718371847185718671877188
  1. /*
  2. ** 2001 September 15
  3. **
  4. ** The author disclaims copyright to this source code. In place of
  5. ** a legal notice, here is a blessing:
  6. **
  7. ** May you do good and not evil.
  8. ** May you find forgiveness for yourself and forgive others.
  9. ** May you share freely, never taking more than you give.
  10. **
  11. *************************************************************************
  12. ** This is the implementation of the page cache subsystem or "pager".
  13. **
  14. ** The pager is used to access a database disk file. It implements
  15. ** atomic commit and rollback through the use of a journal file that
  16. ** is separate from the database file. The pager also implements file
  17. ** locking to prevent two processes from writing the same database
  18. ** file simultaneously, or one process from reading the database while
  19. ** another is writing.
  20. */
  21. #ifndef SQLITE_OMIT_DISKIO
  22. #include "sqliteInt.h"
  23. #include "wal.h"
  24. /******************* NOTES ON THE DESIGN OF THE PAGER ************************
  25. **
  26. ** This comment block describes invariants that hold when using a rollback
  27. ** journal. These invariants do not apply for journal_mode=WAL,
  28. ** journal_mode=MEMORY, or journal_mode=OFF.
  29. **
  30. ** Within this comment block, a page is deemed to have been synced
  31. ** automatically as soon as it is written when PRAGMA synchronous=OFF.
  32. ** Otherwise, the page is not synced until the xSync method of the VFS
  33. ** is called successfully on the file containing the page.
  34. **
  35. ** Definition: A page of the database file is said to be "overwriteable" if
  36. ** one or more of the following are true about the page:
  37. **
  38. ** (a) The original content of the page as it was at the beginning of
  39. ** the transaction has been written into the rollback journal and
  40. ** synced.
  41. **
  42. ** (b) The page was a freelist leaf page at the start of the transaction.
  43. **
  44. ** (c) The page number is greater than the largest page that existed in
  45. ** the database file at the start of the transaction.
  46. **
  47. ** (1) A page of the database file is never overwritten unless one of the
  48. ** following are true:
  49. **
  50. ** (a) The page and all other pages on the same sector are overwriteable.
  51. **
  52. ** (b) The atomic page write optimization is enabled, and the entire
  53. ** transaction other than the update of the transaction sequence
  54. ** number consists of a single page change.
  55. **
  56. ** (2) The content of a page written into the rollback journal exactly matches
  57. ** both the content in the database when the rollback journal was written
  58. ** and the content in the database at the beginning of the current
  59. ** transaction.
  60. **
  61. ** (3) Writes to the database file are an integer multiple of the page size
  62. ** in length and are aligned on a page boundary.
  63. **
  64. ** (4) Reads from the database file are either aligned on a page boundary and
  65. ** an integer multiple of the page size in length or are taken from the
  66. ** first 100 bytes of the database file.
  67. **
  68. ** (5) All writes to the database file are synced prior to the rollback journal
  69. ** being deleted, truncated, or zeroed.
  70. **
  71. ** (6) If a master journal file is used, then all writes to the database file
  72. ** are synced prior to the master journal being deleted.
  73. **
  74. ** Definition: Two databases (or the same database at two points it time)
  75. ** are said to be "logically equivalent" if they give the same answer to
  76. ** all queries. Note in particular the content of freelist leaf
  77. ** pages can be changed arbitarily without effecting the logical equivalence
  78. ** of the database.
  79. **
  80. ** (7) At any time, if any subset, including the empty set and the total set,
  81. ** of the unsynced changes to a rollback journal are removed and the
  82. ** journal is rolled back, the resulting database file will be logical
  83. ** equivalent to the database file at the beginning of the transaction.
  84. **
  85. ** (8) When a transaction is rolled back, the xTruncate method of the VFS
  86. ** is called to restore the database file to the same size it was at
  87. ** the beginning of the transaction. (In some VFSes, the xTruncate
  88. ** method is a no-op, but that does not change the fact the SQLite will
  89. ** invoke it.)
  90. **
  91. ** (9) Whenever the database file is modified, at least one bit in the range
  92. ** of bytes from 24 through 39 inclusive will be changed prior to releasing
  93. ** the EXCLUSIVE lock, thus signaling other connections on the same
  94. ** database to flush their caches.
  95. **
  96. ** (10) The pattern of bits in bytes 24 through 39 shall not repeat in less
  97. ** than one billion transactions.
  98. **
  99. ** (11) A database file is well-formed at the beginning and at the conclusion
  100. ** of every transaction.
  101. **
  102. ** (12) An EXCLUSIVE lock is held on the database file when writing to
  103. ** the database file.
  104. **
  105. ** (13) A SHARED lock is held on the database file while reading any
  106. ** content out of the database file.
  107. **
  108. ******************************************************************************/
  109. /*
  110. ** Macros for troubleshooting. Normally turned off
  111. */
  112. #if 0
  113. int sqlite3PagerTrace=1; /* True to enable tracing */
  114. #define sqlite3DebugPrintf printf
  115. #define PAGERTRACE(X) if( sqlite3PagerTrace ){ sqlite3DebugPrintf X; }
  116. #else
  117. #define PAGERTRACE(X)
  118. #endif
  119. /*
  120. ** The following two macros are used within the PAGERTRACE() macros above
  121. ** to print out file-descriptors.
  122. **
  123. ** PAGERID() takes a pointer to a Pager struct as its argument. The
  124. ** associated file-descriptor is returned. FILEHANDLEID() takes an sqlite3_file
  125. ** struct as its argument.
  126. */
  127. #define PAGERID(p) ((int)(p->fd))
  128. #define FILEHANDLEID(fd) ((int)fd)
  129. /*
  130. ** The Pager.eState variable stores the current 'state' of a pager. A
  131. ** pager may be in any one of the seven states shown in the following
  132. ** state diagram.
  133. **
  134. ** OPEN <------+------+
  135. ** | | |
  136. ** V | |
  137. ** +---------> READER-------+ |
  138. ** | | |
  139. ** | V |
  140. ** |<-------WRITER_LOCKED------> ERROR
  141. ** | | ^
  142. ** | V |
  143. ** |<------WRITER_CACHEMOD-------->|
  144. ** | | |
  145. ** | V |
  146. ** |<-------WRITER_DBMOD---------->|
  147. ** | | |
  148. ** | V |
  149. ** +<------WRITER_FINISHED-------->+
  150. **
  151. **
  152. ** List of state transitions and the C [function] that performs each:
  153. **
  154. ** OPEN -> READER [sqlite3PagerSharedLock]
  155. ** READER -> OPEN [pager_unlock]
  156. **
  157. ** READER -> WRITER_LOCKED [sqlite3PagerBegin]
  158. ** WRITER_LOCKED -> WRITER_CACHEMOD [pager_open_journal]
  159. ** WRITER_CACHEMOD -> WRITER_DBMOD [syncJournal]
  160. ** WRITER_DBMOD -> WRITER_FINISHED [sqlite3PagerCommitPhaseOne]
  161. ** WRITER_*** -> READER [pager_end_transaction]
  162. **
  163. ** WRITER_*** -> ERROR [pager_error]
  164. ** ERROR -> OPEN [pager_unlock]
  165. **
  166. **
  167. ** OPEN:
  168. **
  169. ** The pager starts up in this state. Nothing is guaranteed in this
  170. ** state - the file may or may not be locked and the database size is
  171. ** unknown. The database may not be read or written.
  172. **
  173. ** * No read or write transaction is active.
  174. ** * Any lock, or no lock at all, may be held on the database file.
  175. ** * The dbSize, dbOrigSize and dbFileSize variables may not be trusted.
  176. **
  177. ** READER:
  178. **
  179. ** In this state all the requirements for reading the database in
  180. ** rollback (non-WAL) mode are met. Unless the pager is (or recently
  181. ** was) in exclusive-locking mode, a user-level read transaction is
  182. ** open. The database size is known in this state.
  183. **
  184. ** A connection running with locking_mode=normal enters this state when
  185. ** it opens a read-transaction on the database and returns to state
  186. ** OPEN after the read-transaction is completed. However a connection
  187. ** running in locking_mode=exclusive (including temp databases) remains in
  188. ** this state even after the read-transaction is closed. The only way
  189. ** a locking_mode=exclusive connection can transition from READER to OPEN
  190. ** is via the ERROR state (see below).
  191. **
  192. ** * A read transaction may be active (but a write-transaction cannot).
  193. ** * A SHARED or greater lock is held on the database file.
  194. ** * The dbSize variable may be trusted (even if a user-level read
  195. ** transaction is not active). The dbOrigSize and dbFileSize variables
  196. ** may not be trusted at this point.
  197. ** * If the database is a WAL database, then the WAL connection is open.
  198. ** * Even if a read-transaction is not open, it is guaranteed that
  199. ** there is no hot-journal in the file-system.
  200. **
  201. ** WRITER_LOCKED:
  202. **
  203. ** The pager moves to this state from READER when a write-transaction
  204. ** is first opened on the database. In WRITER_LOCKED state, all locks
  205. ** required to start a write-transaction are held, but no actual
  206. ** modifications to the cache or database have taken place.
  207. **
  208. ** In rollback mode, a RESERVED or (if the transaction was opened with
  209. ** BEGIN EXCLUSIVE) EXCLUSIVE lock is obtained on the database file when
  210. ** moving to this state, but the journal file is not written to or opened
  211. ** to in this state. If the transaction is committed or rolled back while
  212. ** in WRITER_LOCKED state, all that is required is to unlock the database
  213. ** file.
  214. **
  215. ** IN WAL mode, WalBeginWriteTransaction() is called to lock the log file.
  216. ** If the connection is running with locking_mode=exclusive, an attempt
  217. ** is made to obtain an EXCLUSIVE lock on the database file.
  218. **
  219. ** * A write transaction is active.
  220. ** * If the connection is open in rollback-mode, a RESERVED or greater
  221. ** lock is held on the database file.
  222. ** * If the connection is open in WAL-mode, a WAL write transaction
  223. ** is open (i.e. sqlite3WalBeginWriteTransaction() has been successfully
  224. ** called).
  225. ** * The dbSize, dbOrigSize and dbFileSize variables are all valid.
  226. ** * The contents of the pager cache have not been modified.
  227. ** * The journal file may or may not be open.
  228. ** * Nothing (not even the first header) has been written to the journal.
  229. **
  230. ** WRITER_CACHEMOD:
  231. **
  232. ** A pager moves from WRITER_LOCKED state to this state when a page is
  233. ** first modified by the upper layer. In rollback mode the journal file
  234. ** is opened (if it is not already open) and a header written to the
  235. ** start of it. The database file on disk has not been modified.
  236. **
  237. ** * A write transaction is active.
  238. ** * A RESERVED or greater lock is held on the database file.
  239. ** * The journal file is open and the first header has been written
  240. ** to it, but the header has not been synced to disk.
  241. ** * The contents of the page cache have been modified.
  242. **
  243. ** WRITER_DBMOD:
  244. **
  245. ** The pager transitions from WRITER_CACHEMOD into WRITER_DBMOD state
  246. ** when it modifies the contents of the database file. WAL connections
  247. ** never enter this state (since they do not modify the database file,
  248. ** just the log file).
  249. **
  250. ** * A write transaction is active.
  251. ** * An EXCLUSIVE or greater lock is held on the database file.
  252. ** * The journal file is open and the first header has been written
  253. ** and synced to disk.
  254. ** * The contents of the page cache have been modified (and possibly
  255. ** written to disk).
  256. **
  257. ** WRITER_FINISHED:
  258. **
  259. ** It is not possible for a WAL connection to enter this state.
  260. **
  261. ** A rollback-mode pager changes to WRITER_FINISHED state from WRITER_DBMOD
  262. ** state after the entire transaction has been successfully written into the
  263. ** database file. In this state the transaction may be committed simply
  264. ** by finalizing the journal file. Once in WRITER_FINISHED state, it is
  265. ** not possible to modify the database further. At this point, the upper
  266. ** layer must either commit or rollback the transaction.
  267. **
  268. ** * A write transaction is active.
  269. ** * An EXCLUSIVE or greater lock is held on the database file.
  270. ** * All writing and syncing of journal and database data has finished.
  271. ** If no error occurred, all that remains is to finalize the journal to
  272. ** commit the transaction. If an error did occur, the caller will need
  273. ** to rollback the transaction.
  274. **
  275. ** ERROR:
  276. **
  277. ** The ERROR state is entered when an IO or disk-full error (including
  278. ** SQLITE_IOERR_NOMEM) occurs at a point in the code that makes it
  279. ** difficult to be sure that the in-memory pager state (cache contents,
  280. ** db size etc.) are consistent with the contents of the file-system.
  281. **
  282. ** Temporary pager files may enter the ERROR state, but in-memory pagers
  283. ** cannot.
  284. **
  285. ** For example, if an IO error occurs while performing a rollback,
  286. ** the contents of the page-cache may be left in an inconsistent state.
  287. ** At this point it would be dangerous to change back to READER state
  288. ** (as usually happens after a rollback). Any subsequent readers might
  289. ** report database corruption (due to the inconsistent cache), and if
  290. ** they upgrade to writers, they may inadvertently corrupt the database
  291. ** file. To avoid this hazard, the pager switches into the ERROR state
  292. ** instead of READER following such an error.
  293. **
  294. ** Once it has entered the ERROR state, any attempt to use the pager
  295. ** to read or write data returns an error. Eventually, once all
  296. ** outstanding transactions have been abandoned, the pager is able to
  297. ** transition back to OPEN state, discarding the contents of the
  298. ** page-cache and any other in-memory state at the same time. Everything
  299. ** is reloaded from disk (and, if necessary, hot-journal rollback peformed)
  300. ** when a read-transaction is next opened on the pager (transitioning
  301. ** the pager into READER state). At that point the system has recovered
  302. ** from the error.
  303. **
  304. ** Specifically, the pager jumps into the ERROR state if:
  305. **
  306. ** 1. An error occurs while attempting a rollback. This happens in
  307. ** function sqlite3PagerRollback().
  308. **
  309. ** 2. An error occurs while attempting to finalize a journal file
  310. ** following a commit in function sqlite3PagerCommitPhaseTwo().
  311. **
  312. ** 3. An error occurs while attempting to write to the journal or
  313. ** database file in function pagerStress() in order to free up
  314. ** memory.
  315. **
  316. ** In other cases, the error is returned to the b-tree layer. The b-tree
  317. ** layer then attempts a rollback operation. If the error condition
  318. ** persists, the pager enters the ERROR state via condition (1) above.
  319. **
  320. ** Condition (3) is necessary because it can be triggered by a read-only
  321. ** statement executed within a transaction. In this case, if the error
  322. ** code were simply returned to the user, the b-tree layer would not
  323. ** automatically attempt a rollback, as it assumes that an error in a
  324. ** read-only statement cannot leave the pager in an internally inconsistent
  325. ** state.
  326. **
  327. ** * The Pager.errCode variable is set to something other than SQLITE_OK.
  328. ** * There are one or more outstanding references to pages (after the
  329. ** last reference is dropped the pager should move back to OPEN state).
  330. ** * The pager is not an in-memory pager.
  331. **
  332. **
  333. ** Notes:
  334. **
  335. ** * A pager is never in WRITER_DBMOD or WRITER_FINISHED state if the
  336. ** connection is open in WAL mode. A WAL connection is always in one
  337. ** of the first four states.
  338. **
  339. ** * Normally, a connection open in exclusive mode is never in PAGER_OPEN
  340. ** state. There are two exceptions: immediately after exclusive-mode has
  341. ** been turned on (and before any read or write transactions are
  342. ** executed), and when the pager is leaving the "error state".
  343. **
  344. ** * See also: assert_pager_state().
  345. */
  346. #define PAGER_OPEN 0
  347. #define PAGER_READER 1
  348. #define PAGER_WRITER_LOCKED 2
  349. #define PAGER_WRITER_CACHEMOD 3
  350. #define PAGER_WRITER_DBMOD 4
  351. #define PAGER_WRITER_FINISHED 5
  352. #define PAGER_ERROR 6
  353. /*
  354. ** The Pager.eLock variable is almost always set to one of the
  355. ** following locking-states, according to the lock currently held on
  356. ** the database file: NO_LOCK, SHARED_LOCK, RESERVED_LOCK or EXCLUSIVE_LOCK.
  357. ** This variable is kept up to date as locks are taken and released by
  358. ** the pagerLockDb() and pagerUnlockDb() wrappers.
  359. **
  360. ** If the VFS xLock() or xUnlock() returns an error other than SQLITE_BUSY
  361. ** (i.e. one of the SQLITE_IOERR subtypes), it is not clear whether or not
  362. ** the operation was successful. In these circumstances pagerLockDb() and
  363. ** pagerUnlockDb() take a conservative approach - eLock is always updated
  364. ** when unlocking the file, and only updated when locking the file if the
  365. ** VFS call is successful. This way, the Pager.eLock variable may be set
  366. ** to a less exclusive (lower) value than the lock that is actually held
  367. ** at the system level, but it is never set to a more exclusive value.
  368. **
  369. ** This is usually safe. If an xUnlock fails or appears to fail, there may
  370. ** be a few redundant xLock() calls or a lock may be held for longer than
  371. ** required, but nothing really goes wrong.
  372. **
  373. ** The exception is when the database file is unlocked as the pager moves
  374. ** from ERROR to OPEN state. At this point there may be a hot-journal file
  375. ** in the file-system that needs to be rolled back (as part of a OPEN->SHARED
  376. ** transition, by the same pager or any other). If the call to xUnlock()
  377. ** fails at this point and the pager is left holding an EXCLUSIVE lock, this
  378. ** can confuse the call to xCheckReservedLock() call made later as part
  379. ** of hot-journal detection.
  380. **
  381. ** xCheckReservedLock() is defined as returning true "if there is a RESERVED
  382. ** lock held by this process or any others". So xCheckReservedLock may
  383. ** return true because the caller itself is holding an EXCLUSIVE lock (but
  384. ** doesn't know it because of a previous error in xUnlock). If this happens
  385. ** a hot-journal may be mistaken for a journal being created by an active
  386. ** transaction in another process, causing SQLite to read from the database
  387. ** without rolling it back.
  388. **
  389. ** To work around this, if a call to xUnlock() fails when unlocking the
  390. ** database in the ERROR state, Pager.eLock is set to UNKNOWN_LOCK. It
  391. ** is only changed back to a real locking state after a successful call
  392. ** to xLock(EXCLUSIVE). Also, the code to do the OPEN->SHARED state transition
  393. ** omits the check for a hot-journal if Pager.eLock is set to UNKNOWN_LOCK
  394. ** lock. Instead, it assumes a hot-journal exists and obtains an EXCLUSIVE
  395. ** lock on the database file before attempting to roll it back. See function
  396. ** PagerSharedLock() for more detail.
  397. **
  398. ** Pager.eLock may only be set to UNKNOWN_LOCK when the pager is in
  399. ** PAGER_OPEN state.
  400. */
  401. #define UNKNOWN_LOCK (EXCLUSIVE_LOCK+1)
  402. /*
  403. ** A macro used for invoking the codec if there is one
  404. */
  405. #ifdef SQLITE_HAS_CODEC
  406. # define CODEC1(P,D,N,X,E) \
  407. if( P->xCodec && P->xCodec(P->pCodec,D,N,X)==0 ){ E; }
  408. # define CODEC2(P,D,N,X,E,O) \
  409. if( P->xCodec==0 ){ O=(char*)D; }else \
  410. if( (O=(char*)(P->xCodec(P->pCodec,D,N,X)))==0 ){ E; }
  411. #else
  412. # define CODEC1(P,D,N,X,E) /* NO-OP */
  413. # define CODEC2(P,D,N,X,E,O) O=(char*)D
  414. #endif
  415. /*
  416. ** The maximum allowed sector size. 64KiB. If the xSectorsize() method
  417. ** returns a value larger than this, then MAX_SECTOR_SIZE is used instead.
  418. ** This could conceivably cause corruption following a power failure on
  419. ** such a system. This is currently an undocumented limit.
  420. */
  421. #define MAX_SECTOR_SIZE 0x10000
  422. /*
  423. ** An instance of the following structure is allocated for each active
  424. ** savepoint and statement transaction in the system. All such structures
  425. ** are stored in the Pager.aSavepoint[] array, which is allocated and
  426. ** resized using sqlite3Realloc().
  427. **
  428. ** When a savepoint is created, the PagerSavepoint.iHdrOffset field is
  429. ** set to 0. If a journal-header is written into the main journal while
  430. ** the savepoint is active, then iHdrOffset is set to the byte offset
  431. ** immediately following the last journal record written into the main
  432. ** journal before the journal-header. This is required during savepoint
  433. ** rollback (see pagerPlaybackSavepoint()).
  434. */
  435. typedef struct PagerSavepoint PagerSavepoint;
  436. struct PagerSavepoint {
  437. i64 iOffset; /* Starting offset in main journal */
  438. i64 iHdrOffset; /* See above */
  439. Bitvec *pInSavepoint; /* Set of pages in this savepoint */
  440. Pgno nOrig; /* Original number of pages in file */
  441. Pgno iSubRec; /* Index of first record in sub-journal */
  442. #ifndef SQLITE_OMIT_WAL
  443. u32 aWalData[WAL_SAVEPOINT_NDATA]; /* WAL savepoint context */
  444. #endif
  445. };
  446. /*
  447. ** Bits of the Pager.doNotSpill flag. See further description below.
  448. */
  449. #define SPILLFLAG_OFF 0x01 /* Never spill cache. Set via pragma */
  450. #define SPILLFLAG_ROLLBACK 0x02 /* Current rolling back, so do not spill */
  451. #define SPILLFLAG_NOSYNC 0x04 /* Spill is ok, but do not sync */
  452. /*
  453. ** A open page cache is an instance of struct Pager. A description of
  454. ** some of the more important member variables follows:
  455. **
  456. ** eState
  457. **
  458. ** The current 'state' of the pager object. See the comment and state
  459. ** diagram above for a description of the pager state.
  460. **
  461. ** eLock
  462. **
  463. ** For a real on-disk database, the current lock held on the database file -
  464. ** NO_LOCK, SHARED_LOCK, RESERVED_LOCK or EXCLUSIVE_LOCK.
  465. **
  466. ** For a temporary or in-memory database (neither of which require any
  467. ** locks), this variable is always set to EXCLUSIVE_LOCK. Since such
  468. ** databases always have Pager.exclusiveMode==1, this tricks the pager
  469. ** logic into thinking that it already has all the locks it will ever
  470. ** need (and no reason to release them).
  471. **
  472. ** In some (obscure) circumstances, this variable may also be set to
  473. ** UNKNOWN_LOCK. See the comment above the #define of UNKNOWN_LOCK for
  474. ** details.
  475. **
  476. ** changeCountDone
  477. **
  478. ** This boolean variable is used to make sure that the change-counter
  479. ** (the 4-byte header field at byte offset 24 of the database file) is
  480. ** not updated more often than necessary.
  481. **
  482. ** It is set to true when the change-counter field is updated, which
  483. ** can only happen if an exclusive lock is held on the database file.
  484. ** It is cleared (set to false) whenever an exclusive lock is
  485. ** relinquished on the database file. Each time a transaction is committed,
  486. ** The changeCountDone flag is inspected. If it is true, the work of
  487. ** updating the change-counter is omitted for the current transaction.
  488. **
  489. ** This mechanism means that when running in exclusive mode, a connection
  490. ** need only update the change-counter once, for the first transaction
  491. ** committed.
  492. **
  493. ** setMaster
  494. **
  495. ** When PagerCommitPhaseOne() is called to commit a transaction, it may
  496. ** (or may not) specify a master-journal name to be written into the
  497. ** journal file before it is synced to disk.
  498. **
  499. ** Whether or not a journal file contains a master-journal pointer affects
  500. ** the way in which the journal file is finalized after the transaction is
  501. ** committed or rolled back when running in "journal_mode=PERSIST" mode.
  502. ** If a journal file does not contain a master-journal pointer, it is
  503. ** finalized by overwriting the first journal header with zeroes. If
  504. ** it does contain a master-journal pointer the journal file is finalized
  505. ** by truncating it to zero bytes, just as if the connection were
  506. ** running in "journal_mode=truncate" mode.
  507. **
  508. ** Journal files that contain master journal pointers cannot be finalized
  509. ** simply by overwriting the first journal-header with zeroes, as the
  510. ** master journal pointer could interfere with hot-journal rollback of any
  511. ** subsequently interrupted transaction that reuses the journal file.
  512. **
  513. ** The flag is cleared as soon as the journal file is finalized (either
  514. ** by PagerCommitPhaseTwo or PagerRollback). If an IO error prevents the
  515. ** journal file from being successfully finalized, the setMaster flag
  516. ** is cleared anyway (and the pager will move to ERROR state).
  517. **
  518. ** doNotSpill
  519. **
  520. ** This variables control the behavior of cache-spills (calls made by
  521. ** the pcache module to the pagerStress() routine to write cached data
  522. ** to the file-system in order to free up memory).
  523. **
  524. ** When bits SPILLFLAG_OFF or SPILLFLAG_ROLLBACK of doNotSpill are set,
  525. ** writing to the database from pagerStress() is disabled altogether.
  526. ** The SPILLFLAG_ROLLBACK case is done in a very obscure case that
  527. ** comes up during savepoint rollback that requires the pcache module
  528. ** to allocate a new page to prevent the journal file from being written
  529. ** while it is being traversed by code in pager_playback(). The SPILLFLAG_OFF
  530. ** case is a user preference.
  531. **
  532. ** If the SPILLFLAG_NOSYNC bit is set, writing to the database from pagerStress()
  533. ** is permitted, but syncing the journal file is not. This flag is set
  534. ** by sqlite3PagerWrite() when the file-system sector-size is larger than
  535. ** the database page-size in order to prevent a journal sync from happening
  536. ** in between the journalling of two pages on the same sector.
  537. **
  538. ** subjInMemory
  539. **
  540. ** This is a boolean variable. If true, then any required sub-journal
  541. ** is opened as an in-memory journal file. If false, then in-memory
  542. ** sub-journals are only used for in-memory pager files.
  543. **
  544. ** This variable is updated by the upper layer each time a new
  545. ** write-transaction is opened.
  546. **
  547. ** dbSize, dbOrigSize, dbFileSize
  548. **
  549. ** Variable dbSize is set to the number of pages in the database file.
  550. ** It is valid in PAGER_READER and higher states (all states except for
  551. ** OPEN and ERROR).
  552. **
  553. ** dbSize is set based on the size of the database file, which may be
  554. ** larger than the size of the database (the value stored at offset
  555. ** 28 of the database header by the btree). If the size of the file
  556. ** is not an integer multiple of the page-size, the value stored in
  557. ** dbSize is rounded down (i.e. a 5KB file with 2K page-size has dbSize==2).
  558. ** Except, any file that is greater than 0 bytes in size is considered
  559. ** to have at least one page. (i.e. a 1KB file with 2K page-size leads
  560. ** to dbSize==1).
  561. **
  562. ** During a write-transaction, if pages with page-numbers greater than
  563. ** dbSize are modified in the cache, dbSize is updated accordingly.
  564. ** Similarly, if the database is truncated using PagerTruncateImage(),
  565. ** dbSize is updated.
  566. **
  567. ** Variables dbOrigSize and dbFileSize are valid in states
  568. ** PAGER_WRITER_LOCKED and higher. dbOrigSize is a copy of the dbSize
  569. ** variable at the start of the transaction. It is used during rollback,
  570. ** and to determine whether or not pages need to be journalled before
  571. ** being modified.
  572. **
  573. ** Throughout a write-transaction, dbFileSize contains the size of
  574. ** the file on disk in pages. It is set to a copy of dbSize when the
  575. ** write-transaction is first opened, and updated when VFS calls are made
  576. ** to write or truncate the database file on disk.
  577. **
  578. ** The only reason the dbFileSize variable is required is to suppress
  579. ** unnecessary calls to xTruncate() after committing a transaction. If,
  580. ** when a transaction is committed, the dbFileSize variable indicates
  581. ** that the database file is larger than the database image (Pager.dbSize),
  582. ** pager_truncate() is called. The pager_truncate() call uses xFilesize()
  583. ** to measure the database file on disk, and then truncates it if required.
  584. ** dbFileSize is not used when rolling back a transaction. In this case
  585. ** pager_truncate() is called unconditionally (which means there may be
  586. ** a call to xFilesize() that is not strictly required). In either case,
  587. ** pager_truncate() may cause the file to become smaller or larger.
  588. **
  589. ** dbHintSize
  590. **
  591. ** The dbHintSize variable is used to limit the number of calls made to
  592. ** the VFS xFileControl(FCNTL_SIZE_HINT) method.
  593. **
  594. ** dbHintSize is set to a copy of the dbSize variable when a
  595. ** write-transaction is opened (at the same time as dbFileSize and
  596. ** dbOrigSize). If the xFileControl(FCNTL_SIZE_HINT) method is called,
  597. ** dbHintSize is increased to the number of pages that correspond to the
  598. ** size-hint passed to the method call. See pager_write_pagelist() for
  599. ** details.
  600. **
  601. ** errCode
  602. **
  603. ** The Pager.errCode variable is only ever used in PAGER_ERROR state. It
  604. ** is set to zero in all other states. In PAGER_ERROR state, Pager.errCode
  605. ** is always set to SQLITE_FULL, SQLITE_IOERR or one of the SQLITE_IOERR_XXX
  606. ** sub-codes.
  607. */
  608. struct Pager {
  609. sqlite3_vfs *pVfs; /* OS functions to use for IO */
  610. u8 exclusiveMode; /* Boolean. True if locking_mode==EXCLUSIVE */
  611. u8 journalMode; /* One of the PAGER_JOURNALMODE_* values */
  612. u8 useJournal; /* Use a rollback journal on this file */
  613. u8 noSync; /* Do not sync the journal if true */
  614. u8 fullSync; /* Do extra syncs of the journal for robustness */
  615. u8 ckptSyncFlags; /* SYNC_NORMAL or SYNC_FULL for checkpoint */
  616. u8 walSyncFlags; /* SYNC_NORMAL or SYNC_FULL for wal writes */
  617. u8 syncFlags; /* SYNC_NORMAL or SYNC_FULL otherwise */
  618. u8 tempFile; /* zFilename is a temporary file */
  619. u8 readOnly; /* True for a read-only database */
  620. u8 memDb; /* True to inhibit all file I/O */
  621. /**************************************************************************
  622. ** The following block contains those class members that change during
  623. ** routine opertion. Class members not in this block are either fixed
  624. ** when the pager is first created or else only change when there is a
  625. ** significant mode change (such as changing the page_size, locking_mode,
  626. ** or the journal_mode). From another view, these class members describe
  627. ** the "state" of the pager, while other class members describe the
  628. ** "configuration" of the pager.
  629. */
  630. u8 eState; /* Pager state (OPEN, READER, WRITER_LOCKED..) */
  631. u8 eLock; /* Current lock held on database file */
  632. u8 changeCountDone; /* Set after incrementing the change-counter */
  633. u8 setMaster; /* True if a m-j name has been written to jrnl */
  634. u8 doNotSpill; /* Do not spill the cache when non-zero */
  635. u8 subjInMemory; /* True to use in-memory sub-journals */
  636. Pgno dbSize; /* Number of pages in the database */
  637. Pgno dbOrigSize; /* dbSize before the current transaction */
  638. Pgno dbFileSize; /* Number of pages in the database file */
  639. Pgno dbHintSize; /* Value passed to FCNTL_SIZE_HINT call */
  640. int errCode; /* One of several kinds of errors */
  641. int nRec; /* Pages journalled since last j-header written */
  642. u32 cksumInit; /* Quasi-random value added to every checksum */
  643. u32 nSubRec; /* Number of records written to sub-journal */
  644. Bitvec *pInJournal; /* One bit for each page in the database file */
  645. sqlite3_file *fd; /* File descriptor for database */
  646. sqlite3_file *jfd; /* File descriptor for main journal */
  647. sqlite3_file *sjfd; /* File descriptor for sub-journal */
  648. i64 journalOff; /* Current write offset in the journal file */
  649. i64 journalHdr; /* Byte offset to previous journal header */
  650. sqlite3_backup *pBackup; /* Pointer to list of ongoing backup processes */
  651. PagerSavepoint *aSavepoint; /* Array of active savepoints */
  652. int nSavepoint; /* Number of elements in aSavepoint[] */
  653. char dbFileVers[16]; /* Changes whenever database file changes */
  654. u8 bUseFetch; /* True to use xFetch() */
  655. int nMmapOut; /* Number of mmap pages currently outstanding */
  656. sqlite3_int64 szMmap; /* Desired maximum mmap size */
  657. PgHdr *pMmapFreelist; /* List of free mmap page headers (pDirty) */
  658. /*
  659. ** End of the routinely-changing class members
  660. ***************************************************************************/
  661. u16 nExtra; /* Add this many bytes to each in-memory page */
  662. i16 nReserve; /* Number of unused bytes at end of each page */
  663. u32 vfsFlags; /* Flags for sqlite3_vfs.xOpen() */
  664. u32 sectorSize; /* Assumed sector size during rollback */
  665. int pageSize; /* Number of bytes in a page */
  666. Pgno mxPgno; /* Maximum allowed size of the database */
  667. i64 journalSizeLimit; /* Size limit for persistent journal files */
  668. char *zFilename; /* Name of the database file */
  669. char *zJournal; /* Name of the journal file */
  670. int (*xBusyHandler)(void*); /* Function to call when busy */
  671. void *pBusyHandlerArg; /* Context argument for xBusyHandler */
  672. int aStat[3]; /* Total cache hits, misses and writes */
  673. #ifdef SQLITE_TEST
  674. int nRead; /* Database pages read */
  675. #endif
  676. void (*xReiniter)(DbPage*); /* Call this routine when reloading pages */
  677. #ifdef SQLITE_HAS_CODEC
  678. void *(*xCodec)(void*,void*,Pgno,int); /* Routine for en/decoding data */
  679. void (*xCodecSizeChng)(void*,int,int); /* Notify of page size changes */
  680. void (*xCodecFree)(void*); /* Destructor for the codec */
  681. void *pCodec; /* First argument to xCodec... methods */
  682. #endif
  683. char *pTmpSpace; /* Pager.pageSize bytes of space for tmp use */
  684. PCache *pPCache; /* Pointer to page cache object */
  685. #ifndef SQLITE_OMIT_WAL
  686. Wal *pWal; /* Write-ahead log used by "journal_mode=wal" */
  687. char *zWal; /* File name for write-ahead log */
  688. #endif
  689. };
  690. /*
  691. ** Indexes for use with Pager.aStat[]. The Pager.aStat[] array contains
  692. ** the values accessed by passing SQLITE_DBSTATUS_CACHE_HIT, CACHE_MISS
  693. ** or CACHE_WRITE to sqlite3_db_status().
  694. */
  695. #define PAGER_STAT_HIT 0
  696. #define PAGER_STAT_MISS 1
  697. #define PAGER_STAT_WRITE 2
  698. /*
  699. ** The following global variables hold counters used for
  700. ** testing purposes only. These variables do not exist in
  701. ** a non-testing build. These variables are not thread-safe.
  702. */
  703. #ifdef SQLITE_TEST
  704. int sqlite3_pager_readdb_count = 0; /* Number of full pages read from DB */
  705. int sqlite3_pager_writedb_count = 0; /* Number of full pages written to DB */
  706. int sqlite3_pager_writej_count = 0; /* Number of pages written to journal */
  707. # define PAGER_INCR(v) v++
  708. #else
  709. # define PAGER_INCR(v)
  710. #endif
  711. /*
  712. ** Journal files begin with the following magic string. The data
  713. ** was obtained from /dev/random. It is used only as a sanity check.
  714. **
  715. ** Since version 2.8.0, the journal format contains additional sanity
  716. ** checking information. If the power fails while the journal is being
  717. ** written, semi-random garbage data might appear in the journal
  718. ** file after power is restored. If an attempt is then made
  719. ** to roll the journal back, the database could be corrupted. The additional
  720. ** sanity checking data is an attempt to discover the garbage in the
  721. ** journal and ignore it.
  722. **
  723. ** The sanity checking information for the new journal format consists
  724. ** of a 32-bit checksum on each page of data. The checksum covers both
  725. ** the page number and the pPager->pageSize bytes of data for the page.
  726. ** This cksum is initialized to a 32-bit random value that appears in the
  727. ** journal file right after the header. The random initializer is important,
  728. ** because garbage data that appears at the end of a journal is likely
  729. ** data that was once in other files that have now been deleted. If the
  730. ** garbage data came from an obsolete journal file, the checksums might
  731. ** be correct. But by initializing the checksum to random value which
  732. ** is different for every journal, we minimize that risk.
  733. */
  734. static const unsigned char aJournalMagic[] = {
  735. 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd7,
  736. };
  737. /*
  738. ** The size of the of each page record in the journal is given by
  739. ** the following macro.
  740. */
  741. #define JOURNAL_PG_SZ(pPager) ((pPager->pageSize) + 8)
  742. /*
  743. ** The journal header size for this pager. This is usually the same
  744. ** size as a single disk sector. See also setSectorSize().
  745. */
  746. #define JOURNAL_HDR_SZ(pPager) (pPager->sectorSize)
  747. /*
  748. ** The macro MEMDB is true if we are dealing with an in-memory database.
  749. ** We do this as a macro so that if the SQLITE_OMIT_MEMORYDB macro is set,
  750. ** the value of MEMDB will be a constant and the compiler will optimize
  751. ** out code that would never execute.
  752. */
  753. #ifdef SQLITE_OMIT_MEMORYDB
  754. # define MEMDB 0
  755. #else
  756. # define MEMDB pPager->memDb
  757. #endif
  758. /*
  759. ** The macro USEFETCH is true if we are allowed to use the xFetch and xUnfetch
  760. ** interfaces to access the database using memory-mapped I/O.
  761. */
  762. #if SQLITE_MAX_MMAP_SIZE>0
  763. # define USEFETCH(x) ((x)->bUseFetch)
  764. #else
  765. # define USEFETCH(x) 0
  766. #endif
  767. /*
  768. ** The maximum legal page number is (2^31 - 1).
  769. */
  770. #define PAGER_MAX_PGNO 2147483647
  771. /*
  772. ** The argument to this macro is a file descriptor (type sqlite3_file*).
  773. ** Return 0 if it is not open, or non-zero (but not 1) if it is.
  774. **
  775. ** This is so that expressions can be written as:
  776. **
  777. ** if( isOpen(pPager->jfd) ){ ...
  778. **
  779. ** instead of
  780. **
  781. ** if( pPager->jfd->pMethods ){ ...
  782. */
  783. #define isOpen(pFd) ((pFd)->pMethods)
  784. /*
  785. ** Return true if this pager uses a write-ahead log instead of the usual
  786. ** rollback journal. Otherwise false.
  787. */
  788. #ifndef SQLITE_OMIT_WAL
  789. static int pagerUseWal(Pager *pPager){
  790. return (pPager->pWal!=0);
  791. }
  792. #else
  793. # define pagerUseWal(x) 0
  794. # define pagerRollbackWal(x) 0
  795. # define pagerWalFrames(v,w,x,y) 0
  796. # define pagerOpenWalIfPresent(z) SQLITE_OK
  797. # define pagerBeginReadTransaction(z) SQLITE_OK
  798. #endif
  799. #ifndef NDEBUG
  800. /*
  801. ** Usage:
  802. **
  803. ** assert( assert_pager_state(pPager) );
  804. **
  805. ** This function runs many asserts to try to find inconsistencies in
  806. ** the internal state of the Pager object.
  807. */
  808. static int assert_pager_state(Pager *p){
  809. Pager *pPager = p;
  810. /* State must be valid. */
  811. assert( p->eState==PAGER_OPEN
  812. || p->eState==PAGER_READER
  813. || p->eState==PAGER_WRITER_LOCKED
  814. || p->eState==PAGER_WRITER_CACHEMOD
  815. || p->eState==PAGER_WRITER_DBMOD
  816. || p->eState==PAGER_WRITER_FINISHED
  817. || p->eState==PAGER_ERROR
  818. );
  819. /* Regardless of the current state, a temp-file connection always behaves
  820. ** as if it has an exclusive lock on the database file. It never updates
  821. ** the change-counter field, so the changeCountDone flag is always set.
  822. */
  823. assert( p->tempFile==0 || p->eLock==EXCLUSIVE_LOCK );
  824. assert( p->tempFile==0 || pPager->changeCountDone );
  825. /* If the useJournal flag is clear, the journal-mode must be "OFF".
  826. ** And if the journal-mode is "OFF", the journal file must not be open.
  827. */
  828. assert( p->journalMode==PAGER_JOURNALMODE_OFF || p->useJournal );
  829. assert( p->journalMode!=PAGER_JOURNALMODE_OFF || !isOpen(p->jfd) );
  830. /* Check that MEMDB implies noSync. And an in-memory journal. Since
  831. ** this means an in-memory pager performs no IO at all, it cannot encounter
  832. ** either SQLITE_IOERR or SQLITE_FULL during rollback or while finalizing
  833. ** a journal file. (although the in-memory journal implementation may
  834. ** return SQLITE_IOERR_NOMEM while the journal file is being written). It
  835. ** is therefore not possible for an in-memory pager to enter the ERROR
  836. ** state.
  837. */
  838. if( MEMDB ){
  839. assert( p->noSync );
  840. assert( p->journalMode==PAGER_JOURNALMODE_OFF
  841. || p->journalMode==PAGER_JOURNALMODE_MEMORY
  842. );
  843. assert( p->eState!=PAGER_ERROR && p->eState!=PAGER_OPEN );
  844. assert( pagerUseWal(p)==0 );
  845. }
  846. /* If changeCountDone is set, a RESERVED lock or greater must be held
  847. ** on the file.
  848. */
  849. assert( pPager->changeCountDone==0 || pPager->eLock>=RESERVED_LOCK );
  850. assert( p->eLock!=PENDING_LOCK );
  851. switch( p->eState ){
  852. case PAGER_OPEN:
  853. assert( !MEMDB );
  854. assert( pPager->errCode==SQLITE_OK );
  855. assert( sqlite3PcacheRefCount(pPager->pPCache)==0 || pPager->tempFile );
  856. break;
  857. case PAGER_READER:
  858. assert( pPager->errCode==SQLITE_OK );
  859. assert( p->eLock!=UNKNOWN_LOCK );
  860. assert( p->eLock>=SHARED_LOCK );
  861. break;
  862. case PAGER_WRITER_LOCKED:
  863. assert( p->eLock!=UNKNOWN_LOCK );
  864. assert( pPager->errCode==SQLITE_OK );
  865. if( !pagerUseWal(pPager) ){
  866. assert( p->eLock>=RESERVED_LOCK );
  867. }
  868. assert( pPager->dbSize==pPager->dbOrigSize );
  869. assert( pPager->dbOrigSize==pPager->dbFileSize );
  870. assert( pPager->dbOrigSize==pPager->dbHintSize );
  871. assert( pPager->setMaster==0 );
  872. break;
  873. case PAGER_WRITER_CACHEMOD:
  874. assert( p->eLock!=UNKNOWN_LOCK );
  875. assert( pPager->errCode==SQLITE_OK );
  876. if( !pagerUseWal(pPager) ){
  877. /* It is possible that if journal_mode=wal here that neither the
  878. ** journal file nor the WAL file are open. This happens during
  879. ** a rollback transaction that switches from journal_mode=off
  880. ** to journal_mode=wal.
  881. */
  882. assert( p->eLock>=RESERVED_LOCK );
  883. assert( isOpen(p->jfd)
  884. || p->journalMode==PAGER_JOURNALMODE_OFF
  885. || p->journalMode==PAGER_JOURNALMODE_WAL
  886. );
  887. }
  888. assert( pPager->dbOrigSize==pPager->dbFileSize );
  889. assert( pPager->dbOrigSize==pPager->dbHintSize );
  890. break;
  891. case PAGER_WRITER_DBMOD:
  892. assert( p->eLock==EXCLUSIVE_LOCK );
  893. assert( pPager->errCode==SQLITE_OK );
  894. assert( !pagerUseWal(pPager) );
  895. assert( p->eLock>=EXCLUSIVE_LOCK );
  896. assert( isOpen(p->jfd)
  897. || p->journalMode==PAGER_JOURNALMODE_OFF
  898. || p->journalMode==PAGER_JOURNALMODE_WAL
  899. );
  900. assert( pPager->dbOrigSize<=pPager->dbHintSize );
  901. break;
  902. case PAGER_WRITER_FINISHED:
  903. assert( p->eLock==EXCLUSIVE_LOCK );
  904. assert( pPager->errCode==SQLITE_OK );
  905. assert( !pagerUseWal(pPager) );
  906. assert( isOpen(p->jfd)
  907. || p->journalMode==PAGER_JOURNALMODE_OFF
  908. || p->journalMode==PAGER_JOURNALMODE_WAL
  909. );
  910. break;
  911. case PAGER_ERROR:
  912. /* There must be at least one outstanding reference to the pager if
  913. ** in ERROR state. Otherwise the pager should have already dropped
  914. ** back to OPEN state.
  915. */
  916. assert( pPager->errCode!=SQLITE_OK );
  917. assert( sqlite3PcacheRefCount(pPager->pPCache)>0 );
  918. break;
  919. }
  920. return 1;
  921. }
  922. #endif /* ifndef NDEBUG */
  923. #ifdef SQLITE_DEBUG
  924. /*
  925. ** Return a pointer to a human readable string in a static buffer
  926. ** containing the state of the Pager object passed as an argument. This
  927. ** is intended to be used within debuggers. For example, as an alternative
  928. ** to "print *pPager" in gdb:
  929. **
  930. ** (gdb) printf "%s", print_pager_state(pPager)
  931. */
  932. static char *print_pager_state(Pager *p){
  933. static char zRet[1024];
  934. sqlite3_snprintf(1024, zRet,
  935. "Filename: %s\n"
  936. "State: %s errCode=%d\n"
  937. "Lock: %s\n"
  938. "Locking mode: locking_mode=%s\n"
  939. "Journal mode: journal_mode=%s\n"
  940. "Backing store: tempFile=%d memDb=%d useJournal=%d\n"
  941. "Journal: journalOff=%lld journalHdr=%lld\n"
  942. "Size: dbsize=%d dbOrigSize=%d dbFileSize=%d\n"
  943. , p->zFilename
  944. , p->eState==PAGER_OPEN ? "OPEN" :
  945. p->eState==PAGER_READER ? "READER" :
  946. p->eState==PAGER_WRITER_LOCKED ? "WRITER_LOCKED" :
  947. p->eState==PAGER_WRITER_CACHEMOD ? "WRITER_CACHEMOD" :
  948. p->eState==PAGER_WRITER_DBMOD ? "WRITER_DBMOD" :
  949. p->eState==PAGER_WRITER_FINISHED ? "WRITER_FINISHED" :
  950. p->eState==PAGER_ERROR ? "ERROR" : "?error?"
  951. , (int)p->errCode
  952. , p->eLock==NO_LOCK ? "NO_LOCK" :
  953. p->eLock==RESERVED_LOCK ? "RESERVED" :
  954. p->eLock==EXCLUSIVE_LOCK ? "EXCLUSIVE" :
  955. p->eLock==SHARED_LOCK ? "SHARED" :
  956. p->eLock==UNKNOWN_LOCK ? "UNKNOWN" : "?error?"
  957. , p->exclusiveMode ? "exclusive" : "normal"
  958. , p->journalMode==PAGER_JOURNALMODE_MEMORY ? "memory" :
  959. p->journalMode==PAGER_JOURNALMODE_OFF ? "off" :
  960. p->journalMode==PAGER_JOURNALMODE_DELETE ? "delete" :
  961. p->journalMode==PAGER_JOURNALMODE_PERSIST ? "persist" :
  962. p->journalMode==PAGER_JOURNALMODE_TRUNCATE ? "truncate" :
  963. p->journalMode==PAGER_JOURNALMODE_WAL ? "wal" : "?error?"
  964. , (int)p->tempFile, (int)p->memDb, (int)p->useJournal
  965. , p->journalOff, p->journalHdr
  966. , (int)p->dbSize, (int)p->dbOrigSize, (int)p->dbFileSize
  967. );
  968. return zRet;
  969. }
  970. #endif
  971. /*
  972. ** Return true if it is necessary to write page *pPg into the sub-journal.
  973. ** A page needs to be written into the sub-journal if there exists one
  974. ** or more open savepoints for which:
  975. **
  976. ** * The page-number is less than or equal to PagerSavepoint.nOrig, and
  977. ** * The bit corresponding to the page-number is not set in
  978. ** PagerSavepoint.pInSavepoint.
  979. */
  980. static int subjRequiresPage(PgHdr *pPg){
  981. Pager *pPager = pPg->pPager;
  982. PagerSavepoint *p;
  983. Pgno pgno;
  984. int i;
  985. if( pPager->nSavepoint ){
  986. pgno = pPg->pgno;
  987. for(i=0; i<pPager->nSavepoint; i++){
  988. p = &pPager->aSavepoint[i];
  989. if( p->nOrig>=pgno && 0==sqlite3BitvecTest(p->pInSavepoint, pgno) ){
  990. return 1;
  991. }
  992. }
  993. }
  994. return 0;
  995. }
  996. /*
  997. ** Return true if the page is already in the journal file.
  998. */
  999. static int pageInJournal(PgHdr *pPg){
  1000. return sqlite3BitvecTest(pPg->pPager->pInJournal, pPg->pgno);
  1001. }
  1002. /*
  1003. ** Read a 32-bit integer from the given file descriptor. Store the integer
  1004. ** that is read in *pRes. Return SQLITE_OK if everything worked, or an
  1005. ** error code is something goes wrong.
  1006. **
  1007. ** All values are stored on disk as big-endian.
  1008. */
  1009. static int read32bits(sqlite3_file *fd, i64 offset, u32 *pRes){
  1010. unsigned char ac[4];
  1011. int rc = sqlite3OsRead(fd, ac, sizeof(ac), offset);
  1012. if( rc==SQLITE_OK ){
  1013. *pRes = sqlite3Get4byte(ac);
  1014. }
  1015. return rc;
  1016. }
  1017. /*
  1018. ** Write a 32-bit integer into a string buffer in big-endian byte order.
  1019. */
  1020. #define put32bits(A,B) sqlite3Put4byte((u8*)A,B)
  1021. /*
  1022. ** Write a 32-bit integer into the given file descriptor. Return SQLITE_OK
  1023. ** on success or an error code is something goes wrong.
  1024. */
  1025. static int write32bits(sqlite3_file *fd, i64 offset, u32 val){
  1026. char ac[4];
  1027. put32bits(ac, val);
  1028. return sqlite3OsWrite(fd, ac, 4, offset);
  1029. }
  1030. /*
  1031. ** Unlock the database file to level eLock, which must be either NO_LOCK
  1032. ** or SHARED_LOCK. Regardless of whether or not the call to xUnlock()
  1033. ** succeeds, set the Pager.eLock variable to match the (attempted) new lock.
  1034. **
  1035. ** Except, if Pager.eLock is set to UNKNOWN_LOCK when this function is
  1036. ** called, do not modify it. See the comment above the #define of
  1037. ** UNKNOWN_LOCK for an explanation of this.
  1038. */
  1039. static int pagerUnlockDb(Pager *pPager, int eLock){
  1040. int rc = SQLITE_OK;
  1041. assert( !pPager->exclusiveMode || pPager->eLock==eLock );
  1042. assert( eLock==NO_LOCK || eLock==SHARED_LOCK );
  1043. assert( eLock!=NO_LOCK || pagerUseWal(pPager)==0 );
  1044. if( isOpen(pPager->fd) ){
  1045. assert( pPager->eLock>=eLock );
  1046. rc = sqlite3OsUnlock(pPager->fd, eLock);
  1047. if( pPager->eLock!=UNKNOWN_LOCK ){
  1048. pPager->eLock = (u8)eLock;
  1049. }
  1050. IOTRACE(("UNLOCK %p %d\n", pPager, eLock))
  1051. }
  1052. return rc;
  1053. }
  1054. /*
  1055. ** Lock the database file to level eLock, which must be either SHARED_LOCK,
  1056. ** RESERVED_LOCK or EXCLUSIVE_LOCK. If the caller is successful, set the
  1057. ** Pager.eLock variable to the new locking state.
  1058. **
  1059. ** Except, if Pager.eLock is set to UNKNOWN_LOCK when this function is
  1060. ** called, do not modify it unless the new locking state is EXCLUSIVE_LOCK.
  1061. ** See the comment above the #define of UNKNOWN_LOCK for an explanation
  1062. ** of this.
  1063. */
  1064. static int pagerLockDb(Pager *pPager, int eLock){
  1065. int rc = SQLITE_OK;
  1066. assert( eLock==SHARED_LOCK || eLock==RESERVED_LOCK || eLock==EXCLUSIVE_LOCK );
  1067. if( pPager->eLock<eLock || pPager->eLock==UNKNOWN_LOCK ){
  1068. rc = sqlite3OsLock(pPager->fd, eLock);
  1069. if( rc==SQLITE_OK && (pPager->eLock!=UNKNOWN_LOCK||eLock==EXCLUSIVE_LOCK) ){
  1070. pPager->eLock = (u8)eLock;
  1071. IOTRACE(("LOCK %p %d\n", pPager, eLock))
  1072. }
  1073. }
  1074. return rc;
  1075. }
  1076. /*
  1077. ** This function determines whether or not the atomic-write optimization
  1078. ** can be used with this pager. The optimization can be used if:
  1079. **
  1080. ** (a) the value returned by OsDeviceCharacteristics() indicates that
  1081. ** a database page may be written atomically, and
  1082. ** (b) the value returned by OsSectorSize() is less than or equal
  1083. ** to the page size.
  1084. **
  1085. ** The optimization is also always enabled for temporary files. It is
  1086. ** an error to call this function if pPager is opened on an in-memory
  1087. ** database.
  1088. **
  1089. ** If the optimization cannot be used, 0 is returned. If it can be used,
  1090. ** then the value returned is the size of the journal file when it
  1091. ** contains rollback data for exactly one page.
  1092. */
  1093. #ifdef SQLITE_ENABLE_ATOMIC_WRITE
  1094. static int jrnlBufferSize(Pager *pPager){
  1095. assert( !MEMDB );
  1096. if( !pPager->tempFile ){
  1097. int dc; /* Device characteristics */
  1098. int nSector; /* Sector size */
  1099. int szPage; /* Page size */
  1100. assert( isOpen(pPager->fd) );
  1101. dc = sqlite3OsDeviceCharacteristics(pPager->fd);
  1102. nSector = pPager->sectorSize;
  1103. szPage = pPager->pageSize;
  1104. assert(SQLITE_IOCAP_ATOMIC512==(512>>8));
  1105. assert(SQLITE_IOCAP_ATOMIC64K==(65536>>8));
  1106. if( 0==(dc&(SQLITE_IOCAP_ATOMIC|(szPage>>8)) || nSector>szPage) ){
  1107. return 0;
  1108. }
  1109. }
  1110. return JOURNAL_HDR_SZ(pPager) + JOURNAL_PG_SZ(pPager);
  1111. }
  1112. #endif
  1113. /*
  1114. ** If SQLITE_CHECK_PAGES is defined then we do some sanity checking
  1115. ** on the cache using a hash function. This is used for testing
  1116. ** and debugging only.
  1117. */
  1118. #ifdef SQLITE_CHECK_PAGES
  1119. /*
  1120. ** Return a 32-bit hash of the page data for pPage.
  1121. */
  1122. static u32 pager_datahash(int nByte, unsigned char *pData){
  1123. u32 hash = 0;
  1124. int i;
  1125. for(i=0; i<nByte; i++){
  1126. hash = (hash*1039) + pData[i];
  1127. }
  1128. return hash;
  1129. }
  1130. static u32 pager_pagehash(PgHdr *pPage){
  1131. return pager_datahash(pPage->pPager->pageSize, (unsigned char *)pPage->pData);
  1132. }
  1133. static void pager_set_pagehash(PgHdr *pPage){
  1134. pPage->pageHash = pager_pagehash(pPage);
  1135. }
  1136. /*
  1137. ** The CHECK_PAGE macro takes a PgHdr* as an argument. If SQLITE_CHECK_PAGES
  1138. ** is defined, and NDEBUG is not defined, an assert() statement checks
  1139. ** that the page is either dirty or still matches the calculated page-hash.
  1140. */
  1141. #define CHECK_PAGE(x) checkPage(x)
  1142. static void checkPage(PgHdr *pPg){
  1143. Pager *pPager = pPg->pPager;
  1144. assert( pPager->eState!=PAGER_ERROR );
  1145. assert( (pPg->flags&PGHDR_DIRTY) || pPg->pageHash==pager_pagehash(pPg) );
  1146. }
  1147. #else
  1148. #define pager_datahash(X,Y) 0
  1149. #define pager_pagehash(X) 0
  1150. #define pager_set_pagehash(X)
  1151. #define CHECK_PAGE(x)
  1152. #endif /* SQLITE_CHECK_PAGES */
  1153. /*
  1154. ** When this is called the journal file for pager pPager must be open.
  1155. ** This function attempts to read a master journal file name from the
  1156. ** end of the file and, if successful, copies it into memory supplied
  1157. ** by the caller. See comments above writeMasterJournal() for the format
  1158. ** used to store a master journal file name at the end of a journal file.
  1159. **
  1160. ** zMaster must point to a buffer of at least nMaster bytes allocated by
  1161. ** the caller. This should be sqlite3_vfs.mxPathname+1 (to ensure there is
  1162. ** enough space to write the master journal name). If the master journal
  1163. ** name in the journal is longer than nMaster bytes (including a
  1164. ** nul-terminator), then this is handled as if no master journal name
  1165. ** were present in the journal.
  1166. **
  1167. ** If a master journal file name is present at the end of the journal
  1168. ** file, then it is copied into the buffer pointed to by zMaster. A
  1169. ** nul-terminator byte is appended to the buffer following the master
  1170. ** journal file name.
  1171. **
  1172. ** If it is determined that no master journal file name is present
  1173. ** zMaster[0] is set to 0 and SQLITE_OK returned.
  1174. **
  1175. ** If an error occurs while reading from the journal file, an SQLite
  1176. ** error code is returned.
  1177. */
  1178. static int readMasterJournal(sqlite3_file *pJrnl, char *zMaster, u32 nMaster){
  1179. int rc; /* Return code */
  1180. u32 len; /* Length in bytes of master journal name */
  1181. i64 szJ; /* Total size in bytes of journal file pJrnl */
  1182. u32 cksum; /* MJ checksum value read from journal */
  1183. u32 u; /* Unsigned loop counter */
  1184. unsigned char aMagic[8]; /* A buffer to hold the magic header */
  1185. zMaster[0] = '\0';
  1186. if( SQLITE_OK!=(rc = sqlite3OsFileSize(pJrnl, &szJ))
  1187. || szJ<16
  1188. || SQLITE_OK!=(rc = read32bits(pJrnl, szJ-16, &len))
  1189. || len>=nMaster
  1190. || SQLITE_OK!=(rc = read32bits(pJrnl, szJ-12, &cksum))
  1191. || SQLITE_OK!=(rc = sqlite3OsRead(pJrnl, aMagic, 8, szJ-8))
  1192. || memcmp(aMagic, aJournalMagic, 8)
  1193. || SQLITE_OK!=(rc = sqlite3OsRead(pJrnl, zMaster, len, szJ-16-len))
  1194. ){
  1195. return rc;
  1196. }
  1197. /* See if the checksum matches the master journal name */
  1198. for(u=0; u<len; u++){
  1199. cksum -= zMaster[u];
  1200. }
  1201. if( cksum ){
  1202. /* If the checksum doesn't add up, then one or more of the disk sectors
  1203. ** containing the master journal filename is corrupted. This means
  1204. ** definitely roll back, so just return SQLITE_OK and report a (nul)
  1205. ** master-journal filename.
  1206. */
  1207. len = 0;
  1208. }
  1209. zMaster[len] = '\0';
  1210. return SQLITE_OK;
  1211. }
  1212. /*
  1213. ** Return the offset of the sector boundary at or immediately
  1214. ** following the value in pPager->journalOff, assuming a sector
  1215. ** size of pPager->sectorSize bytes.
  1216. **
  1217. ** i.e for a sector size of 512:
  1218. **
  1219. ** Pager.journalOff Return value
  1220. ** ---------------------------------------
  1221. ** 0 0
  1222. ** 512 512
  1223. ** 100 512
  1224. ** 2000 2048
  1225. **
  1226. */
  1227. static i64 journalHdrOffset(Pager *pPager){
  1228. i64 offset = 0;
  1229. i64 c = pPager->journalOff;
  1230. if( c ){
  1231. offset = ((c-1)/JOURNAL_HDR_SZ(pPager) + 1) * JOURNAL_HDR_SZ(pPager);
  1232. }
  1233. assert( offset%JOURNAL_HDR_SZ(pPager)==0 );
  1234. assert( offset>=c );
  1235. assert( (offset-c)<JOURNAL_HDR_SZ(pPager) );
  1236. return offset;
  1237. }
  1238. /*
  1239. ** The journal file must be open when this function is called.
  1240. **
  1241. ** This function is a no-op if the journal file has not been written to
  1242. ** within the current transaction (i.e. if Pager.journalOff==0).
  1243. **
  1244. ** If doTruncate is non-zero or the Pager.journalSizeLimit variable is
  1245. ** set to 0, then truncate the journal file to zero bytes in size. Otherwise,
  1246. ** zero the 28-byte header at the start of the journal file. In either case,
  1247. ** if the pager is not in no-sync mode, sync the journal file immediately
  1248. ** after writing or truncating it.
  1249. **
  1250. ** If Pager.journalSizeLimit is set to a positive, non-zero value, and
  1251. ** following the truncation or zeroing described above the size of the
  1252. ** journal file in bytes is larger than this value, then truncate the
  1253. ** journal file to Pager.journalSizeLimit bytes. The journal file does
  1254. ** not need to be synced following this operation.
  1255. **
  1256. ** If an IO error occurs, abandon processing and return the IO error code.
  1257. ** Otherwise, return SQLITE_OK.
  1258. */
  1259. static int zeroJournalHdr(Pager *pPager, int doTruncate){
  1260. int rc = SQLITE_OK; /* Return code */
  1261. assert( isOpen(pPager->jfd) );
  1262. if( pPager->journalOff ){
  1263. const i64 iLimit = pPager->journalSizeLimit; /* Local cache of jsl */
  1264. IOTRACE(("JZEROHDR %p\n", pPager))
  1265. if( doTruncate || iLimit==0 ){
  1266. rc = sqlite3OsTruncate(pPager->jfd, 0);
  1267. }else{
  1268. static const char zeroHdr[28] = {0};
  1269. rc = sqlite3OsWrite(pPager->jfd, zeroHdr, sizeof(zeroHdr), 0);
  1270. }
  1271. if( rc==SQLITE_OK && !pPager->noSync ){
  1272. rc = sqlite3OsSync(pPager->jfd, SQLITE_SYNC_DATAONLY|pPager->syncFlags);
  1273. }
  1274. /* At this point the transaction is committed but the write lock
  1275. ** is still held on the file. If there is a size limit configured for
  1276. ** the persistent journal and the journal file currently consumes more
  1277. ** space than that limit allows for, truncate it now. There is no need
  1278. ** to sync the file following this operation.
  1279. */
  1280. if( rc==SQLITE_OK && iLimit>0 ){
  1281. i64 sz;
  1282. rc = sqlite3OsFileSize(pPager->jfd, &sz);
  1283. if( rc==SQLITE_OK && sz>iLimit ){
  1284. rc = sqlite3OsTruncate(pPager->jfd, iLimit);
  1285. }
  1286. }
  1287. }
  1288. return rc;
  1289. }
  1290. /*
  1291. ** The journal file must be open when this routine is called. A journal
  1292. ** header (JOURNAL_HDR_SZ bytes) is written into the journal file at the
  1293. ** current location.
  1294. **
  1295. ** The format for the journal header is as follows:
  1296. ** - 8 bytes: Magic identifying journal format.
  1297. ** - 4 bytes: Number of records in journal, or -1 no-sync mode is on.
  1298. ** - 4 bytes: Random number used for page hash.
  1299. ** - 4 bytes: Initial database page count.
  1300. ** - 4 bytes: Sector size used by the process that wrote this journal.
  1301. ** - 4 bytes: Database page size.
  1302. **
  1303. ** Followed by (JOURNAL_HDR_SZ - 28) bytes of unused space.
  1304. */
  1305. static int writeJournalHdr(Pager *pPager){
  1306. int rc = SQLITE_OK; /* Return code */
  1307. char *zHeader = pPager->pTmpSpace; /* Temporary space used to build header */
  1308. u32 nHeader = (u32)pPager->pageSize;/* Size of buffer pointed to by zHeader */
  1309. u32 nWrite; /* Bytes of header sector written */
  1310. int ii; /* Loop counter */
  1311. assert( isOpen(pPager->jfd) ); /* Journal file must be open. */
  1312. if( nHeader>JOURNAL_HDR_SZ(pPager) ){
  1313. nHeader = JOURNAL_HDR_SZ(pPager);
  1314. }
  1315. /* If there are active savepoints and any of them were created
  1316. ** since the most recent journal header was written, update the
  1317. ** PagerSavepoint.iHdrOffset fields now.
  1318. */
  1319. for(ii=0; ii<pPager->nSavepoint; ii++){
  1320. if( pPager->aSavepoint[ii].iHdrOffset==0 ){
  1321. pPager->aSavepoint[ii].iHdrOffset = pPager->journalOff;
  1322. }
  1323. }
  1324. pPager->journalHdr = pPager->journalOff = journalHdrOffset(pPager);
  1325. /*
  1326. ** Write the nRec Field - the number of page records that follow this
  1327. ** journal header. Normally, zero is written to this value at this time.
  1328. ** After the records are added to the journal (and the journal synced,
  1329. ** if in full-sync mode), the zero is overwritten with the true number
  1330. ** of records (see syncJournal()).
  1331. **
  1332. ** A faster alternative is to write 0xFFFFFFFF to the nRec field. When
  1333. ** reading the journal this value tells SQLite to assume that the
  1334. ** rest of the journal file contains valid page records. This assumption
  1335. ** is dangerous, as if a failure occurred whilst writing to the journal
  1336. ** file it may contain some garbage data. There are two scenarios
  1337. ** where this risk can be ignored:
  1338. **
  1339. ** * When the pager is in no-sync mode. Corruption can follow a
  1340. ** power failure in this case anyway.
  1341. **
  1342. ** * When the SQLITE_IOCAP_SAFE_APPEND flag is set. This guarantees
  1343. ** that garbage data is never appended to the journal file.
  1344. */
  1345. assert( isOpen(pPager->fd) || pPager->noSync );
  1346. if( pPager->noSync || (pPager->journalMode==PAGER_JOURNALMODE_MEMORY)
  1347. || (sqlite3OsDeviceCharacteristics(pPager->fd)&SQLITE_IOCAP_SAFE_APPEND)
  1348. ){
  1349. memcpy(zHeader, aJournalMagic, sizeof(aJournalMagic));
  1350. put32bits(&zHeader[sizeof(aJournalMagic)], 0xffffffff);
  1351. }else{
  1352. memset(zHeader, 0, sizeof(aJournalMagic)+4);
  1353. }
  1354. /* The random check-hash initializer */
  1355. sqlite3_randomness(sizeof(pPager->cksumInit), &pPager->cksumInit);
  1356. put32bits(&zHeader[sizeof(aJournalMagic)+4], pPager->cksumInit);
  1357. /* The initial database size */
  1358. put32bits(&zHeader[sizeof(aJournalMagic)+8], pPager->dbOrigSize);
  1359. /* The assumed sector size for this process */
  1360. put32bits(&zHeader[sizeof(aJournalMagic)+12], pPager->sectorSize);
  1361. /* The page size */
  1362. put32bits(&zHeader[sizeof(aJournalMagic)+16], pPager->pageSize);
  1363. /* Initializing the tail of the buffer is not necessary. Everything
  1364. ** works find if the following memset() is omitted. But initializing
  1365. ** the memory prevents valgrind from complaining, so we are willing to
  1366. ** take the performance hit.
  1367. */
  1368. memset(&zHeader[sizeof(aJournalMagic)+20], 0,
  1369. nHeader-(sizeof(aJournalMagic)+20));
  1370. /* In theory, it is only necessary to write the 28 bytes that the
  1371. ** journal header consumes to the journal file here. Then increment the
  1372. ** Pager.journalOff variable by JOURNAL_HDR_SZ so that the next
  1373. ** record is written to the following sector (leaving a gap in the file
  1374. ** that will be implicitly filled in by the OS).
  1375. **
  1376. ** However it has been discovered that on some systems this pattern can
  1377. ** be significantly slower than contiguously writing data to the file,
  1378. ** even if that means explicitly writing data to the block of
  1379. ** (JOURNAL_HDR_SZ - 28) bytes that will not be used. So that is what
  1380. ** is done.
  1381. **
  1382. ** The loop is required here in case the sector-size is larger than the
  1383. ** database page size. Since the zHeader buffer is only Pager.pageSize
  1384. ** bytes in size, more than one call to sqlite3OsWrite() may be required
  1385. ** to populate the entire journal header sector.
  1386. */
  1387. for(nWrite=0; rc==SQLITE_OK&&nWrite<JOURNAL_HDR_SZ(pPager); nWrite+=nHeader){
  1388. IOTRACE(("JHDR %p %lld %d\n", pPager, pPager->journalHdr, nHeader))
  1389. rc = sqlite3OsWrite(pPager->jfd, zHeader, nHeader, pPager->journalOff);
  1390. assert( pPager->journalHdr <= pPager->journalOff );
  1391. pPager->journalOff += nHeader;
  1392. }
  1393. return rc;
  1394. }
  1395. /*
  1396. ** The journal file must be open when this is called. A journal header file
  1397. ** (JOURNAL_HDR_SZ bytes) is read from the current location in the journal
  1398. ** file. The current location in the journal file is given by
  1399. ** pPager->journalOff. See comments above function writeJournalHdr() for
  1400. ** a description of the journal header format.
  1401. **
  1402. ** If the header is read successfully, *pNRec is set to the number of
  1403. ** page records following this header and *pDbSize is set to the size of the
  1404. ** database before the transaction began, in pages. Also, pPager->cksumInit
  1405. ** is set to the value read from the journal header. SQLITE_OK is returned
  1406. ** in this case.
  1407. **
  1408. ** If the journal header file appears to be corrupted, SQLITE_DONE is
  1409. ** returned and *pNRec and *PDbSize are undefined. If JOURNAL_HDR_SZ bytes
  1410. ** cannot be read from the journal file an error code is returned.
  1411. */
  1412. static int readJournalHdr(
  1413. Pager *pPager, /* Pager object */
  1414. int isHot,
  1415. i64 journalSize, /* Size of the open journal file in bytes */
  1416. u32 *pNRec, /* OUT: Value read from the nRec field */
  1417. u32 *pDbSize /* OUT: Value of original database size field */
  1418. ){
  1419. int rc; /* Return code */
  1420. unsigned char aMagic[8]; /* A buffer to hold the magic header */
  1421. i64 iHdrOff; /* Offset of journal header being read */
  1422. assert( isOpen(pPager->jfd) ); /* Journal file must be open. */
  1423. /* Advance Pager.journalOff to the start of the next sector. If the
  1424. ** journal file is too small for there to be a header stored at this
  1425. ** point, return SQLITE_DONE.
  1426. */
  1427. pPager->journalOff = journalHdrOffset(pPager);
  1428. if( pPager->journalOff+JOURNAL_HDR_SZ(pPager) > journalSize ){
  1429. return SQLITE_DONE;
  1430. }
  1431. iHdrOff = pPager->journalOff;
  1432. /* Read in the first 8 bytes of the journal header. If they do not match
  1433. ** the magic string found at the start of each journal header, return
  1434. ** SQLITE_DONE. If an IO error occurs, return an error code. Otherwise,
  1435. ** proceed.
  1436. */
  1437. if( isHot || iHdrOff!=pPager->journalHdr ){
  1438. rc = sqlite3OsRead(pPager->jfd, aMagic, sizeof(aMagic), iHdrOff);
  1439. if( rc ){
  1440. return rc;
  1441. }
  1442. if( memcmp(aMagic, aJournalMagic, sizeof(aMagic))!=0 ){
  1443. return SQLITE_DONE;
  1444. }
  1445. }
  1446. /* Read the first three 32-bit fields of the journal header: The nRec
  1447. ** field, the checksum-initializer and the database size at the start
  1448. ** of the transaction. Return an error code if anything goes wrong.
  1449. */
  1450. if( SQLITE_OK!=(rc = read32bits(pPager->jfd, iHdrOff+8, pNRec))
  1451. || SQLITE_OK!=(rc = read32bits(pPager->jfd, iHdrOff+12, &pPager->cksumInit))
  1452. || SQLITE_OK!=(rc = read32bits(pPager->jfd, iHdrOff+16, pDbSize))
  1453. ){
  1454. return rc;
  1455. }
  1456. if( pPager->journalOff==0 ){
  1457. u32 iPageSize; /* Page-size field of journal header */
  1458. u32 iSectorSize; /* Sector-size field of journal header */
  1459. /* Read the page-size and sector-size journal header fields. */
  1460. if( SQLITE_OK!=(rc = read32bits(pPager->jfd, iHdrOff+20, &iSectorSize))
  1461. || SQLITE_OK!=(rc = read32bits(pPager->jfd, iHdrOff+24, &iPageSize))
  1462. ){
  1463. return rc;
  1464. }
  1465. /* Versions of SQLite prior to 3.5.8 set the page-size field of the
  1466. ** journal header to zero. In this case, assume that the Pager.pageSize
  1467. ** variable is already set to the correct page size.
  1468. */
  1469. if( iPageSize==0 ){
  1470. iPageSize = pPager->pageSize;
  1471. }
  1472. /* Check that the values read from the page-size and sector-size fields
  1473. ** are within range. To be 'in range', both values need to be a power
  1474. ** of two greater than or equal to 512 or 32, and not greater than their
  1475. ** respective compile time maximum limits.
  1476. */
  1477. if( iPageSize<512 || iSectorSize<32
  1478. || iPageSize>SQLITE_MAX_PAGE_SIZE || iSectorSize>MAX_SECTOR_SIZE
  1479. || ((iPageSize-1)&iPageSize)!=0 || ((iSectorSize-1)&iSectorSize)!=0
  1480. ){
  1481. /* If the either the page-size or sector-size in the journal-header is
  1482. ** invalid, then the process that wrote the journal-header must have
  1483. ** crashed before the header was synced. In this case stop reading
  1484. ** the journal file here.
  1485. */
  1486. return SQLITE_DONE;
  1487. }
  1488. /* Update the page-size to match the value read from the journal.
  1489. ** Use a testcase() macro to make sure that malloc failure within
  1490. ** PagerSetPagesize() is tested.
  1491. */
  1492. rc = sqlite3PagerSetPagesize(pPager, &iPageSize, -1);
  1493. testcase( rc!=SQLITE_OK );
  1494. /* Update the assumed sector-size to match the value used by
  1495. ** the process that created this journal. If this journal was
  1496. ** created by a process other than this one, then this routine
  1497. ** is being called from within pager_playback(). The local value
  1498. ** of Pager.sectorSize is restored at the end of that routine.
  1499. */
  1500. pPager->sectorSize = iSectorSize;
  1501. }
  1502. pPager->journalOff += JOURNAL_HDR_SZ(pPager);
  1503. return rc;
  1504. }
  1505. /*
  1506. ** Write the supplied master journal name into the journal file for pager
  1507. ** pPager at the current location. The master journal name must be the last
  1508. ** thing written to a journal file. If the pager is in full-sync mode, the
  1509. ** journal file descriptor is advanced to the next sector boundary before
  1510. ** anything is written. The format is:
  1511. **
  1512. ** + 4 bytes: PAGER_MJ_PGNO.
  1513. ** + N bytes: Master journal filename in utf-8.
  1514. ** + 4 bytes: N (length of master journal name in bytes, no nul-terminator).
  1515. ** + 4 bytes: Master journal name checksum.
  1516. ** + 8 bytes: aJournalMagic[].
  1517. **
  1518. ** The master journal page checksum is the sum of the bytes in the master
  1519. ** journal name, where each byte is interpreted as a signed 8-bit integer.
  1520. **
  1521. ** If zMaster is a NULL pointer (occurs for a single database transaction),
  1522. ** this call is a no-op.
  1523. */
  1524. static int writeMasterJournal(Pager *pPager, const char *zMaster){
  1525. int rc; /* Return code */
  1526. int nMaster; /* Length of string zMaster */
  1527. i64 iHdrOff; /* Offset of header in journal file */
  1528. i64 jrnlSize; /* Size of journal file on disk */
  1529. u32 cksum = 0; /* Checksum of string zMaster */
  1530. assert( pPager->setMaster==0 );
  1531. assert( !pagerUseWal(pPager) );
  1532. if( !zMaster
  1533. || pPager->journalMode==PAGER_JOURNALMODE_MEMORY
  1534. || pPager->journalMode==PAGER_JOURNALMODE_OFF
  1535. ){
  1536. return SQLITE_OK;
  1537. }
  1538. pPager->setMaster = 1;
  1539. assert( isOpen(pPager->jfd) );
  1540. assert( pPager->journalHdr <= pPager->journalOff );
  1541. /* Calculate the length in bytes and the checksum of zMaster */
  1542. for(nMaster=0; zMaster[nMaster]; nMaster++){
  1543. cksum += zMaster[nMaster];
  1544. }
  1545. /* If in full-sync mode, advance to the next disk sector before writing
  1546. ** the master journal name. This is in case the previous page written to
  1547. ** the journal has already been synced.
  1548. */
  1549. if( pPager->fullSync ){
  1550. pPager->journalOff = journalHdrOffset(pPager);
  1551. }
  1552. iHdrOff = pPager->journalOff;
  1553. /* Write the master journal data to the end of the journal file. If
  1554. ** an error occurs, return the error code to the caller.
  1555. */
  1556. if( (0 != (rc = write32bits(pPager->jfd, iHdrOff, PAGER_MJ_PGNO(pPager))))
  1557. || (0 != (rc = sqlite3OsWrite(pPager->jfd, zMaster, nMaster, iHdrOff+4)))
  1558. || (0 != (rc = write32bits(pPager->jfd, iHdrOff+4+nMaster, nMaster)))
  1559. || (0 != (rc = write32bits(pPager->jfd, iHdrOff+4+nMaster+4, cksum)))
  1560. || (0 != (rc = sqlite3OsWrite(pPager->jfd, aJournalMagic, 8, iHdrOff+4+nMaster+8)))
  1561. ){
  1562. return rc;
  1563. }
  1564. pPager->journalOff += (nMaster+20);
  1565. /* If the pager is in peristent-journal mode, then the physical
  1566. ** journal-file may extend past the end of the master-journal name
  1567. ** and 8 bytes of magic data just written to the file. This is
  1568. ** dangerous because the code to rollback a hot-journal file
  1569. ** will not be able to find the master-journal name to determine
  1570. ** whether or not the journal is hot.
  1571. **
  1572. ** Easiest thing to do in this scenario is to truncate the journal
  1573. ** file to the required size.
  1574. */
  1575. if( SQLITE_OK==(rc = sqlite3OsFileSize(pPager->jfd, &jrnlSize))
  1576. && jrnlSize>pPager->journalOff
  1577. ){
  1578. rc = sqlite3OsTruncate(pPager->jfd, pPager->journalOff);
  1579. }
  1580. return rc;
  1581. }
  1582. /*
  1583. ** Find a page in the hash table given its page number. Return
  1584. ** a pointer to the page or NULL if the requested page is not
  1585. ** already in memory.
  1586. */
  1587. static PgHdr *pager_lookup(Pager *pPager, Pgno pgno){
  1588. PgHdr *p; /* Return value */
  1589. /* It is not possible for a call to PcacheFetch() with createFlag==0 to
  1590. ** fail, since no attempt to allocate dynamic memory will be made.
  1591. */
  1592. (void)sqlite3PcacheFetch(pPager->pPCache, pgno, 0, &p);
  1593. return p;
  1594. }
  1595. /*
  1596. ** Discard the entire contents of the in-memory page-cache.
  1597. */
  1598. static void pager_reset(Pager *pPager){
  1599. sqlite3BackupRestart(pPager->pBackup);
  1600. sqlite3PcacheClear(pPager->pPCache);
  1601. }
  1602. /*
  1603. ** Free all structures in the Pager.aSavepoint[] array and set both
  1604. ** Pager.aSavepoint and Pager.nSavepoint to zero. Close the sub-journal
  1605. ** if it is open and the pager is not in exclusive mode.
  1606. */
  1607. static void releaseAllSavepoints(Pager *pPager){
  1608. int ii; /* Iterator for looping through Pager.aSavepoint */
  1609. for(ii=0; ii<pPager->nSavepoint; ii++){
  1610. sqlite3BitvecDestroy(pPager->aSavepoint[ii].pInSavepoint);
  1611. }
  1612. if( !pPager->exclusiveMode || sqlite3IsMemJournal(pPager->sjfd) ){
  1613. sqlite3OsClose(pPager->sjfd);
  1614. }
  1615. sqlite3_free(pPager->aSavepoint);
  1616. pPager->aSavepoint = 0;
  1617. pPager->nSavepoint = 0;
  1618. pPager->nSubRec = 0;
  1619. }
  1620. /*
  1621. ** Set the bit number pgno in the PagerSavepoint.pInSavepoint
  1622. ** bitvecs of all open savepoints. Return SQLITE_OK if successful
  1623. ** or SQLITE_NOMEM if a malloc failure occurs.
  1624. */
  1625. static int addToSavepointBitvecs(Pager *pPager, Pgno pgno){
  1626. int ii; /* Loop counter */
  1627. int rc = SQLITE_OK; /* Result code */
  1628. for(ii=0; ii<pPager->nSavepoint; ii++){
  1629. PagerSavepoint *p = &pPager->aSavepoint[ii];
  1630. if( pgno<=p->nOrig ){
  1631. rc |= sqlite3BitvecSet(p->pInSavepoint, pgno);
  1632. testcase( rc==SQLITE_NOMEM );
  1633. assert( rc==SQLITE_OK || rc==SQLITE_NOMEM );
  1634. }
  1635. }
  1636. return rc;
  1637. }
  1638. /*
  1639. ** This function is a no-op if the pager is in exclusive mode and not
  1640. ** in the ERROR state. Otherwise, it switches the pager to PAGER_OPEN
  1641. ** state.
  1642. **
  1643. ** If the pager is not in exclusive-access mode, the database file is
  1644. ** completely unlocked. If the file is unlocked and the file-system does
  1645. ** not exhibit the UNDELETABLE_WHEN_OPEN property, the journal file is
  1646. ** closed (if it is open).
  1647. **
  1648. ** If the pager is in ERROR state when this function is called, the
  1649. ** contents of the pager cache are discarded before switching back to
  1650. ** the OPEN state. Regardless of whether the pager is in exclusive-mode
  1651. ** or not, any journal file left in the file-system will be treated
  1652. ** as a hot-journal and rolled back the next time a read-transaction
  1653. ** is opened (by this or by any other connection).
  1654. */
  1655. static void pager_unlock(Pager *pPager){
  1656. assert( pPager->eState==PAGER_READER
  1657. || pPager->eState==PAGER_OPEN
  1658. || pPager->eState==PAGER_ERROR
  1659. );
  1660. sqlite3BitvecDestroy(pPager->pInJournal);
  1661. pPager->pInJournal = 0;
  1662. releaseAllSavepoints(pPager);
  1663. if( pagerUseWal(pPager) ){
  1664. assert( !isOpen(pPager->jfd) );
  1665. sqlite3WalEndReadTransaction(pPager->pWal);
  1666. pPager->eState = PAGER_OPEN;
  1667. }else if( !pPager->exclusiveMode ){
  1668. int rc; /* Error code returned by pagerUnlockDb() */
  1669. int iDc = isOpen(pPager->fd)?sqlite3OsDeviceCharacteristics(pPager->fd):0;
  1670. /* If the operating system support deletion of open files, then
  1671. ** close the journal file when dropping the database lock. Otherwise
  1672. ** another connection with journal_mode=delete might delete the file
  1673. ** out from under us.
  1674. */
  1675. assert( (PAGER_JOURNALMODE_MEMORY & 5)!=1 );
  1676. assert( (PAGER_JOURNALMODE_OFF & 5)!=1 );
  1677. assert( (PAGER_JOURNALMODE_WAL & 5)!=1 );
  1678. assert( (PAGER_JOURNALMODE_DELETE & 5)!=1 );
  1679. assert( (PAGER_JOURNALMODE_TRUNCATE & 5)==1 );
  1680. assert( (PAGER_JOURNALMODE_PERSIST & 5)==1 );
  1681. if( 0==(iDc & SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN)
  1682. || 1!=(pPager->journalMode & 5)
  1683. ){
  1684. sqlite3OsClose(pPager->jfd);
  1685. }
  1686. /* If the pager is in the ERROR state and the call to unlock the database
  1687. ** file fails, set the current lock to UNKNOWN_LOCK. See the comment
  1688. ** above the #define for UNKNOWN_LOCK for an explanation of why this
  1689. ** is necessary.
  1690. */
  1691. rc = pagerUnlockDb(pPager, NO_LOCK);
  1692. if( rc!=SQLITE_OK && pPager->eState==PAGER_ERROR ){
  1693. pPager->eLock = UNKNOWN_LOCK;
  1694. }
  1695. /* The pager state may be changed from PAGER_ERROR to PAGER_OPEN here
  1696. ** without clearing the error code. This is intentional - the error
  1697. ** code is cleared and the cache reset in the block below.
  1698. */
  1699. assert( pPager->errCode || pPager->eState!=PAGER_ERROR );
  1700. pPager->changeCountDone = 0;
  1701. pPager->eState = PAGER_OPEN;
  1702. }
  1703. /* If Pager.errCode is set, the contents of the pager cache cannot be
  1704. ** trusted. Now that there are no outstanding references to the pager,
  1705. ** it can safely move back to PAGER_OPEN state. This happens in both
  1706. ** normal and exclusive-locking mode.
  1707. */
  1708. if( pPager->errCode ){
  1709. assert( !MEMDB );
  1710. pager_reset(pPager);
  1711. pPager->changeCountDone = pPager->tempFile;
  1712. pPager->eState = PAGER_OPEN;
  1713. pPager->errCode = SQLITE_OK;
  1714. if( USEFETCH(pPager) ) sqlite3OsUnfetch(pPager->fd, 0, 0);
  1715. }
  1716. pPager->journalOff = 0;
  1717. pPager->journalHdr = 0;
  1718. pPager->setMaster = 0;
  1719. }
  1720. /*
  1721. ** This function is called whenever an IOERR or FULL error that requires
  1722. ** the pager to transition into the ERROR state may ahve occurred.
  1723. ** The first argument is a pointer to the pager structure, the second
  1724. ** the error-code about to be returned by a pager API function. The
  1725. ** value returned is a copy of the second argument to this function.
  1726. **
  1727. ** If the second argument is SQLITE_FULL, SQLITE_IOERR or one of the
  1728. ** IOERR sub-codes, the pager enters the ERROR state and the error code
  1729. ** is stored in Pager.errCode. While the pager remains in the ERROR state,
  1730. ** all major API calls on the Pager will immediately return Pager.errCode.
  1731. **
  1732. ** The ERROR state indicates that the contents of the pager-cache
  1733. ** cannot be trusted. This state can be cleared by completely discarding
  1734. ** the contents of the pager-cache. If a transaction was active when
  1735. ** the persistent error occurred, then the rollback journal may need
  1736. ** to be replayed to restore the contents of the database file (as if
  1737. ** it were a hot-journal).
  1738. */
  1739. static int pager_error(Pager *pPager, int rc){
  1740. int rc2 = rc & 0xff;
  1741. assert( rc==SQLITE_OK || !MEMDB );
  1742. assert(
  1743. pPager->errCode==SQLITE_FULL ||
  1744. pPager->errCode==SQLITE_OK ||
  1745. (pPager->errCode & 0xff)==SQLITE_IOERR
  1746. );
  1747. if( rc2==SQLITE_FULL || rc2==SQLITE_IOERR ){
  1748. pPager->errCode = rc;
  1749. pPager->eState = PAGER_ERROR;
  1750. }
  1751. return rc;
  1752. }
  1753. static int pager_truncate(Pager *pPager, Pgno nPage);
  1754. /*
  1755. ** This routine ends a transaction. A transaction is usually ended by
  1756. ** either a COMMIT or a ROLLBACK operation. This routine may be called
  1757. ** after rollback of a hot-journal, or if an error occurs while opening
  1758. ** the journal file or writing the very first journal-header of a
  1759. ** database transaction.
  1760. **
  1761. ** This routine is never called in PAGER_ERROR state. If it is called
  1762. ** in PAGER_NONE or PAGER_SHARED state and the lock held is less
  1763. ** exclusive than a RESERVED lock, it is a no-op.
  1764. **
  1765. ** Otherwise, any active savepoints are released.
  1766. **
  1767. ** If the journal file is open, then it is "finalized". Once a journal
  1768. ** file has been finalized it is not possible to use it to roll back a
  1769. ** transaction. Nor will it be considered to be a hot-journal by this
  1770. ** or any other database connection. Exactly how a journal is finalized
  1771. ** depends on whether or not the pager is running in exclusive mode and
  1772. ** the current journal-mode (Pager.journalMode value), as follows:
  1773. **
  1774. ** journalMode==MEMORY
  1775. ** Journal file descriptor is simply closed. This destroys an
  1776. ** in-memory journal.
  1777. **
  1778. ** journalMode==TRUNCATE
  1779. ** Journal file is truncated to zero bytes in size.
  1780. **
  1781. ** journalMode==PERSIST
  1782. ** The first 28 bytes of the journal file are zeroed. This invalidates
  1783. ** the first journal header in the file, and hence the entire journal
  1784. ** file. An invalid journal file cannot be rolled back.
  1785. **
  1786. ** journalMode==DELETE
  1787. ** The journal file is closed and deleted using sqlite3OsDelete().
  1788. **
  1789. ** If the pager is running in exclusive mode, this method of finalizing
  1790. ** the journal file is never used. Instead, if the journalMode is
  1791. ** DELETE and the pager is in exclusive mode, the method described under
  1792. ** journalMode==PERSIST is used instead.
  1793. **
  1794. ** After the journal is finalized, the pager moves to PAGER_READER state.
  1795. ** If running in non-exclusive rollback mode, the lock on the file is
  1796. ** downgraded to a SHARED_LOCK.
  1797. **
  1798. ** SQLITE_OK is returned if no error occurs. If an error occurs during
  1799. ** any of the IO operations to finalize the journal file or unlock the
  1800. ** database then the IO error code is returned to the user. If the
  1801. ** operation to finalize the journal file fails, then the code still
  1802. ** tries to unlock the database file if not in exclusive mode. If the
  1803. ** unlock operation fails as well, then the first error code related
  1804. ** to the first error encountered (the journal finalization one) is
  1805. ** returned.
  1806. */
  1807. static int pager_end_transaction(Pager *pPager, int hasMaster, int bCommit){
  1808. int rc = SQLITE_OK; /* Error code from journal finalization operation */
  1809. int rc2 = SQLITE_OK; /* Error code from db file unlock operation */
  1810. /* Do nothing if the pager does not have an open write transaction
  1811. ** or at least a RESERVED lock. This function may be called when there
  1812. ** is no write-transaction active but a RESERVED or greater lock is
  1813. ** held under two circumstances:
  1814. **
  1815. ** 1. After a successful hot-journal rollback, it is called with
  1816. ** eState==PAGER_NONE and eLock==EXCLUSIVE_LOCK.
  1817. **
  1818. ** 2. If a connection with locking_mode=exclusive holding an EXCLUSIVE
  1819. ** lock switches back to locking_mode=normal and then executes a
  1820. ** read-transaction, this function is called with eState==PAGER_READER
  1821. ** and eLock==EXCLUSIVE_LOCK when the read-transaction is closed.
  1822. */
  1823. assert( assert_pager_state(pPager) );
  1824. assert( pPager->eState!=PAGER_ERROR );
  1825. if( pPager->eState<PAGER_WRITER_LOCKED && pPager->eLock<RESERVED_LOCK ){
  1826. return SQLITE_OK;
  1827. }
  1828. releaseAllSavepoints(pPager);
  1829. assert( isOpen(pPager->jfd) || pPager->pInJournal==0 );
  1830. if( isOpen(pPager->jfd) ){
  1831. assert( !pagerUseWal(pPager) );
  1832. /* Finalize the journal file. */
  1833. if( sqlite3IsMemJournal(pPager->jfd) ){
  1834. assert( pPager->journalMode==PAGER_JOURNALMODE_MEMORY );
  1835. sqlite3OsClose(pPager->jfd);
  1836. }else if( pPager->journalMode==PAGER_JOURNALMODE_TRUNCATE ){
  1837. if( pPager->journalOff==0 ){
  1838. rc = SQLITE_OK;
  1839. }else{
  1840. rc = sqlite3OsTruncate(pPager->jfd, 0);
  1841. }
  1842. pPager->journalOff = 0;
  1843. }else if( pPager->journalMode==PAGER_JOURNALMODE_PERSIST
  1844. || (pPager->exclusiveMode && pPager->journalMode!=PAGER_JOURNALMODE_WAL)
  1845. ){
  1846. rc = zeroJournalHdr(pPager, hasMaster);
  1847. pPager->journalOff = 0;
  1848. }else{
  1849. /* This branch may be executed with Pager.journalMode==MEMORY if
  1850. ** a hot-journal was just rolled back. In this case the journal
  1851. ** file should be closed and deleted. If this connection writes to
  1852. ** the database file, it will do so using an in-memory journal.
  1853. */
  1854. int bDelete = (!pPager->tempFile && sqlite3JournalExists(pPager->jfd));
  1855. assert( pPager->journalMode==PAGER_JOURNALMODE_DELETE
  1856. || pPager->journalMode==PAGER_JOURNALMODE_MEMORY
  1857. || pPager->journalMode==PAGER_JOURNALMODE_WAL
  1858. );
  1859. sqlite3OsClose(pPager->jfd);
  1860. if( bDelete ){
  1861. rc = sqlite3OsDelete(pPager->pVfs, pPager->zJournal, 0);
  1862. }
  1863. }
  1864. }
  1865. #ifdef SQLITE_CHECK_PAGES
  1866. sqlite3PcacheIterateDirty(pPager->pPCache, pager_set_pagehash);
  1867. if( pPager->dbSize==0 && sqlite3PcacheRefCount(pPager->pPCache)>0 ){
  1868. PgHdr *p = pager_lookup(pPager, 1);
  1869. if( p ){
  1870. p->pageHash = 0;
  1871. sqlite3PagerUnref(p);
  1872. }
  1873. }
  1874. #endif
  1875. sqlite3BitvecDestroy(pPager->pInJournal);
  1876. pPager->pInJournal = 0;
  1877. pPager->nRec = 0;
  1878. sqlite3PcacheCleanAll(pPager->pPCache);
  1879. sqlite3PcacheTruncate(pPager->pPCache, pPager->dbSize);
  1880. if( pagerUseWal(pPager) ){
  1881. /* Drop the WAL write-lock, if any. Also, if the connection was in
  1882. ** locking_mode=exclusive mode but is no longer, drop the EXCLUSIVE
  1883. ** lock held on the database file.
  1884. */
  1885. rc2 = sqlite3WalEndWriteTransaction(pPager->pWal);
  1886. assert( rc2==SQLITE_OK );
  1887. }else if( rc==SQLITE_OK && bCommit && pPager->dbFileSize>pPager->dbSize ){
  1888. /* This branch is taken when committing a transaction in rollback-journal
  1889. ** mode if the database file on disk is larger than the database image.
  1890. ** At this point the journal has been finalized and the transaction
  1891. ** successfully committed, but the EXCLUSIVE lock is still held on the
  1892. ** file. So it is safe to truncate the database file to its minimum
  1893. ** required size. */
  1894. assert( pPager->eLock==EXCLUSIVE_LOCK );
  1895. rc = pager_truncate(pPager, pPager->dbSize);
  1896. }
  1897. if( !pPager->exclusiveMode
  1898. && (!pagerUseWal(pPager) || sqlite3WalExclusiveMode(pPager->pWal, 0))
  1899. ){
  1900. rc2 = pagerUnlockDb(pPager, SHARED_LOCK);
  1901. pPager->changeCountDone = 0;
  1902. }
  1903. pPager->eState = PAGER_READER;
  1904. pPager->setMaster = 0;
  1905. return (rc==SQLITE_OK?rc2:rc);
  1906. }
  1907. /*
  1908. ** Execute a rollback if a transaction is active and unlock the
  1909. ** database file.
  1910. **
  1911. ** If the pager has already entered the ERROR state, do not attempt
  1912. ** the rollback at this time. Instead, pager_unlock() is called. The
  1913. ** call to pager_unlock() will discard all in-memory pages, unlock
  1914. ** the database file and move the pager back to OPEN state. If this
  1915. ** means that there is a hot-journal left in the file-system, the next
  1916. ** connection to obtain a shared lock on the pager (which may be this one)
  1917. ** will roll it back.
  1918. **
  1919. ** If the pager has not already entered the ERROR state, but an IO or
  1920. ** malloc error occurs during a rollback, then this will itself cause
  1921. ** the pager to enter the ERROR state. Which will be cleared by the
  1922. ** call to pager_unlock(), as described above.
  1923. */
  1924. static void pagerUnlockAndRollback(Pager *pPager){
  1925. if( pPager->eState!=PAGER_ERROR && pPager->eState!=PAGER_OPEN ){
  1926. assert( assert_pager_state(pPager) );
  1927. if( pPager->eState>=PAGER_WRITER_LOCKED ){
  1928. sqlite3BeginBenignMalloc();
  1929. sqlite3PagerRollback(pPager);
  1930. sqlite3EndBenignMalloc();
  1931. }else if( !pPager->exclusiveMode ){
  1932. assert( pPager->eState==PAGER_READER );
  1933. pager_end_transaction(pPager, 0, 0);
  1934. }
  1935. }
  1936. pager_unlock(pPager);
  1937. }
  1938. /*
  1939. ** Parameter aData must point to a buffer of pPager->pageSize bytes
  1940. ** of data. Compute and return a checksum based ont the contents of the
  1941. ** page of data and the current value of pPager->cksumInit.
  1942. **
  1943. ** This is not a real checksum. It is really just the sum of the
  1944. ** random initial value (pPager->cksumInit) and every 200th byte
  1945. ** of the page data, starting with byte offset (pPager->pageSize%200).
  1946. ** Each byte is interpreted as an 8-bit unsigned integer.
  1947. **
  1948. ** Changing the formula used to compute this checksum results in an
  1949. ** incompatible journal file format.
  1950. **
  1951. ** If journal corruption occurs due to a power failure, the most likely
  1952. ** scenario is that one end or the other of the record will be changed.
  1953. ** It is much less likely that the two ends of the journal record will be
  1954. ** correct and the middle be corrupt. Thus, this "checksum" scheme,
  1955. ** though fast and simple, catches the mostly likely kind of corruption.
  1956. */
  1957. static u32 pager_cksum(Pager *pPager, const u8 *aData){
  1958. u32 cksum = pPager->cksumInit; /* Checksum value to return */
  1959. int i = pPager->pageSize-200; /* Loop counter */
  1960. while( i>0 ){
  1961. cksum += aData[i];
  1962. i -= 200;
  1963. }
  1964. return cksum;
  1965. }
  1966. /*
  1967. ** Report the current page size and number of reserved bytes back
  1968. ** to the codec.
  1969. */
  1970. #ifdef SQLITE_HAS_CODEC
  1971. static void pagerReportSize(Pager *pPager){
  1972. if( pPager->xCodecSizeChng ){
  1973. pPager->xCodecSizeChng(pPager->pCodec, pPager->pageSize,
  1974. (int)pPager->nReserve);
  1975. }
  1976. }
  1977. #else
  1978. # define pagerReportSize(X) /* No-op if we do not support a codec */
  1979. #endif
  1980. /*
  1981. ** Read a single page from either the journal file (if isMainJrnl==1) or
  1982. ** from the sub-journal (if isMainJrnl==0) and playback that page.
  1983. ** The page begins at offset *pOffset into the file. The *pOffset
  1984. ** value is increased to the start of the next page in the journal.
  1985. **
  1986. ** The main rollback journal uses checksums - the statement journal does
  1987. ** not.
  1988. **
  1989. ** If the page number of the page record read from the (sub-)journal file
  1990. ** is greater than the current value of Pager.dbSize, then playback is
  1991. ** skipped and SQLITE_OK is returned.
  1992. **
  1993. ** If pDone is not NULL, then it is a record of pages that have already
  1994. ** been played back. If the page at *pOffset has already been played back
  1995. ** (if the corresponding pDone bit is set) then skip the playback.
  1996. ** Make sure the pDone bit corresponding to the *pOffset page is set
  1997. ** prior to returning.
  1998. **
  1999. ** If the page record is successfully read from the (sub-)journal file
  2000. ** and played back, then SQLITE_OK is returned. If an IO error occurs
  2001. ** while reading the record from the (sub-)journal file or while writing
  2002. ** to the database file, then the IO error code is returned. If data
  2003. ** is successfully read from the (sub-)journal file but appears to be
  2004. ** corrupted, SQLITE_DONE is returned. Data is considered corrupted in
  2005. ** two circumstances:
  2006. **
  2007. ** * If the record page-number is illegal (0 or PAGER_MJ_PGNO), or
  2008. ** * If the record is being rolled back from the main journal file
  2009. ** and the checksum field does not match the record content.
  2010. **
  2011. ** Neither of these two scenarios are possible during a savepoint rollback.
  2012. **
  2013. ** If this is a savepoint rollback, then memory may have to be dynamically
  2014. ** allocated by this function. If this is the case and an allocation fails,
  2015. ** SQLITE_NOMEM is returned.
  2016. */
  2017. static int pager_playback_one_page(
  2018. Pager *pPager, /* The pager being played back */
  2019. i64 *pOffset, /* Offset of record to playback */
  2020. Bitvec *pDone, /* Bitvec of pages already played back */
  2021. int isMainJrnl, /* 1 -> main journal. 0 -> sub-journal. */
  2022. int isSavepnt /* True for a savepoint rollback */
  2023. ){
  2024. int rc;
  2025. PgHdr *pPg; /* An existing page in the cache */
  2026. Pgno pgno; /* The page number of a page in journal */
  2027. u32 cksum; /* Checksum used for sanity checking */
  2028. char *aData; /* Temporary storage for the page */
  2029. sqlite3_file *jfd; /* The file descriptor for the journal file */
  2030. int isSynced; /* True if journal page is synced */
  2031. assert( (isMainJrnl&~1)==0 ); /* isMainJrnl is 0 or 1 */
  2032. assert( (isSavepnt&~1)==0 ); /* isSavepnt is 0 or 1 */
  2033. assert( isMainJrnl || pDone ); /* pDone always used on sub-journals */
  2034. assert( isSavepnt || pDone==0 ); /* pDone never used on non-savepoint */
  2035. aData = pPager->pTmpSpace;
  2036. assert( aData ); /* Temp storage must have already been allocated */
  2037. assert( pagerUseWal(pPager)==0 || (!isMainJrnl && isSavepnt) );
  2038. /* Either the state is greater than PAGER_WRITER_CACHEMOD (a transaction
  2039. ** or savepoint rollback done at the request of the caller) or this is
  2040. ** a hot-journal rollback. If it is a hot-journal rollback, the pager
  2041. ** is in state OPEN and holds an EXCLUSIVE lock. Hot-journal rollback
  2042. ** only reads from the main journal, not the sub-journal.
  2043. */
  2044. assert( pPager->eState>=PAGER_WRITER_CACHEMOD
  2045. || (pPager->eState==PAGER_OPEN && pPager->eLock==EXCLUSIVE_LOCK)
  2046. );
  2047. assert( pPager->eState>=PAGER_WRITER_CACHEMOD || isMainJrnl );
  2048. /* Read the page number and page data from the journal or sub-journal
  2049. ** file. Return an error code to the caller if an IO error occurs.
  2050. */
  2051. jfd = isMainJrnl ? pPager->jfd : pPager->sjfd;
  2052. rc = read32bits(jfd, *pOffset, &pgno);
  2053. if( rc!=SQLITE_OK ) return rc;
  2054. rc = sqlite3OsRead(jfd, (u8*)aData, pPager->pageSize, (*pOffset)+4);
  2055. if( rc!=SQLITE_OK ) return rc;
  2056. *pOffset += pPager->pageSize + 4 + isMainJrnl*4;
  2057. /* Sanity checking on the page. This is more important that I originally
  2058. ** thought. If a power failure occurs while the journal is being written,
  2059. ** it could cause invalid data to be written into the journal. We need to
  2060. ** detect this invalid data (with high probability) and ignore it.
  2061. */
  2062. if( pgno==0 || pgno==PAGER_MJ_PGNO(pPager) ){
  2063. assert( !isSavepnt );
  2064. return SQLITE_DONE;
  2065. }
  2066. if( pgno>(Pgno)pPager->dbSize || sqlite3BitvecTest(pDone, pgno) ){
  2067. return SQLITE_OK;
  2068. }
  2069. if( isMainJrnl ){
  2070. rc = read32bits(jfd, (*pOffset)-4, &cksum);
  2071. if( rc ) return rc;
  2072. if( !isSavepnt && pager_cksum(pPager, (u8*)aData)!=cksum ){
  2073. return SQLITE_DONE;
  2074. }
  2075. }
  2076. /* If this page has already been played by before during the current
  2077. ** rollback, then don't bother to play it back again.
  2078. */
  2079. if( pDone && (rc = sqlite3BitvecSet(pDone, pgno))!=SQLITE_OK ){
  2080. return rc;
  2081. }
  2082. /* When playing back page 1, restore the nReserve setting
  2083. */
  2084. if( pgno==1 && pPager->nReserve!=((u8*)aData)[20] ){
  2085. pPager->nReserve = ((u8*)aData)[20];
  2086. pagerReportSize(pPager);
  2087. }
  2088. /* If the pager is in CACHEMOD state, then there must be a copy of this
  2089. ** page in the pager cache. In this case just update the pager cache,
  2090. ** not the database file. The page is left marked dirty in this case.
  2091. **
  2092. ** An exception to the above rule: If the database is in no-sync mode
  2093. ** and a page is moved during an incremental vacuum then the page may
  2094. ** not be in the pager cache. Later: if a malloc() or IO error occurs
  2095. ** during a Movepage() call, then the page may not be in the cache
  2096. ** either. So the condition described in the above paragraph is not
  2097. ** assert()able.
  2098. **
  2099. ** If in WRITER_DBMOD, WRITER_FINISHED or OPEN state, then we update the
  2100. ** pager cache if it exists and the main file. The page is then marked
  2101. ** not dirty. Since this code is only executed in PAGER_OPEN state for
  2102. ** a hot-journal rollback, it is guaranteed that the page-cache is empty
  2103. ** if the pager is in OPEN state.
  2104. **
  2105. ** Ticket #1171: The statement journal might contain page content that is
  2106. ** different from the page content at the start of the transaction.
  2107. ** This occurs when a page is changed prior to the start of a statement
  2108. ** then changed again within the statement. When rolling back such a
  2109. ** statement we must not write to the original database unless we know
  2110. ** for certain that original page contents are synced into the main rollback
  2111. ** journal. Otherwise, a power loss might leave modified data in the
  2112. ** database file without an entry in the rollback journal that can
  2113. ** restore the database to its original form. Two conditions must be
  2114. ** met before writing to the database files. (1) the database must be
  2115. ** locked. (2) we know that the original page content is fully synced
  2116. ** in the main journal either because the page is not in cache or else
  2117. ** the page is marked as needSync==0.
  2118. **
  2119. ** 2008-04-14: When attempting to vacuum a corrupt database file, it
  2120. ** is possible to fail a statement on a database that does not yet exist.
  2121. ** Do not attempt to write if database file has never been opened.
  2122. */
  2123. if( pagerUseWal(pPager) ){
  2124. pPg = 0;
  2125. }else{
  2126. pPg = pager_lookup(pPager, pgno);
  2127. }
  2128. assert( pPg || !MEMDB );
  2129. assert( pPager->eState!=PAGER_OPEN || pPg==0 );
  2130. PAGERTRACE(("PLAYBACK %d page %d hash(%08x) %s\n",
  2131. PAGERID(pPager), pgno, pager_datahash(pPager->pageSize, (u8*)aData),
  2132. (isMainJrnl?"main-journal":"sub-journal")
  2133. ));
  2134. if( isMainJrnl ){
  2135. isSynced = pPager->noSync || (*pOffset <= pPager->journalHdr);
  2136. }else{
  2137. isSynced = (pPg==0 || 0==(pPg->flags & PGHDR_NEED_SYNC));
  2138. }
  2139. if( isOpen(pPager->fd)
  2140. && (pPager->eState>=PAGER_WRITER_DBMOD || pPager->eState==PAGER_OPEN)
  2141. && isSynced
  2142. ){
  2143. i64 ofst = (pgno-1)*(i64)pPager->pageSize;
  2144. testcase( !isSavepnt && pPg!=0 && (pPg->flags&PGHDR_NEED_SYNC)!=0 );
  2145. assert( !pagerUseWal(pPager) );
  2146. rc = sqlite3OsWrite(pPager->fd, (u8 *)aData, pPager->pageSize, ofst);
  2147. if( pgno>pPager->dbFileSize ){
  2148. pPager->dbFileSize = pgno;
  2149. }
  2150. if( pPager->pBackup ){
  2151. CODEC1(pPager, aData, pgno, 3, rc=SQLITE_NOMEM);
  2152. sqlite3BackupUpdate(pPager->pBackup, pgno, (u8*)aData);
  2153. CODEC2(pPager, aData, pgno, 7, rc=SQLITE_NOMEM, aData);
  2154. }
  2155. }else if( !isMainJrnl && pPg==0 ){
  2156. /* If this is a rollback of a savepoint and data was not written to
  2157. ** the database and the page is not in-memory, there is a potential
  2158. ** problem. When the page is next fetched by the b-tree layer, it
  2159. ** will be read from the database file, which may or may not be
  2160. ** current.
  2161. **
  2162. ** There are a couple of different ways this can happen. All are quite
  2163. ** obscure. When running in synchronous mode, this can only happen
  2164. ** if the page is on the free-list at the start of the transaction, then
  2165. ** populated, then moved using sqlite3PagerMovepage().
  2166. **
  2167. ** The solution is to add an in-memory page to the cache containing
  2168. ** the data just read from the sub-journal. Mark the page as dirty
  2169. ** and if the pager requires a journal-sync, then mark the page as
  2170. ** requiring a journal-sync before it is written.
  2171. */
  2172. assert( isSavepnt );
  2173. assert( (pPager->doNotSpill & SPILLFLAG_ROLLBACK)==0 );
  2174. pPager->doNotSpill |= SPILLFLAG_ROLLBACK;
  2175. rc = sqlite3PagerAcquire(pPager, pgno, &pPg, 1);
  2176. assert( (pPager->doNotSpill & SPILLFLAG_ROLLBACK)!=0 );
  2177. pPager->doNotSpill &= ~SPILLFLAG_ROLLBACK;
  2178. if( rc!=SQLITE_OK ) return rc;
  2179. pPg->flags &= ~PGHDR_NEED_READ;
  2180. sqlite3PcacheMakeDirty(pPg);
  2181. }
  2182. if( pPg ){
  2183. /* No page should ever be explicitly rolled back that is in use, except
  2184. ** for page 1 which is held in use in order to keep the lock on the
  2185. ** database active. However such a page may be rolled back as a result
  2186. ** of an internal error resulting in an automatic call to
  2187. ** sqlite3PagerRollback().
  2188. */
  2189. void *pData;
  2190. pData = pPg->pData;
  2191. memcpy(pData, (u8*)aData, pPager->pageSize);
  2192. pPager->xReiniter(pPg);
  2193. if( isMainJrnl && (!isSavepnt || *pOffset<=pPager->journalHdr) ){
  2194. /* If the contents of this page were just restored from the main
  2195. ** journal file, then its content must be as they were when the
  2196. ** transaction was first opened. In this case we can mark the page
  2197. ** as clean, since there will be no need to write it out to the
  2198. ** database.
  2199. **
  2200. ** There is one exception to this rule. If the page is being rolled
  2201. ** back as part of a savepoint (or statement) rollback from an
  2202. ** unsynced portion of the main journal file, then it is not safe
  2203. ** to mark the page as clean. This is because marking the page as
  2204. ** clean will clear the PGHDR_NEED_SYNC flag. Since the page is
  2205. ** already in the journal file (recorded in Pager.pInJournal) and
  2206. ** the PGHDR_NEED_SYNC flag is cleared, if the page is written to
  2207. ** again within this transaction, it will be marked as dirty but
  2208. ** the PGHDR_NEED_SYNC flag will not be set. It could then potentially
  2209. ** be written out into the database file before its journal file
  2210. ** segment is synced. If a crash occurs during or following this,
  2211. ** database corruption may ensue.
  2212. */
  2213. assert( !pagerUseWal(pPager) );
  2214. sqlite3PcacheMakeClean(pPg);
  2215. }
  2216. pager_set_pagehash(pPg);
  2217. /* If this was page 1, then restore the value of Pager.dbFileVers.
  2218. ** Do this before any decoding. */
  2219. if( pgno==1 ){
  2220. memcpy(&pPager->dbFileVers, &((u8*)pData)[24],sizeof(pPager->dbFileVers));
  2221. }
  2222. /* Decode the page just read from disk */
  2223. CODEC1(pPager, pData, pPg->pgno, 3, rc=SQLITE_NOMEM);
  2224. sqlite3PcacheRelease(pPg);
  2225. }
  2226. return rc;
  2227. }
  2228. /*
  2229. ** Parameter zMaster is the name of a master journal file. A single journal
  2230. ** file that referred to the master journal file has just been rolled back.
  2231. ** This routine checks if it is possible to delete the master journal file,
  2232. ** and does so if it is.
  2233. **
  2234. ** Argument zMaster may point to Pager.pTmpSpace. So that buffer is not
  2235. ** available for use within this function.
  2236. **
  2237. ** When a master journal file is created, it is populated with the names
  2238. ** of all of its child journals, one after another, formatted as utf-8
  2239. ** encoded text. The end of each child journal file is marked with a
  2240. ** nul-terminator byte (0x00). i.e. the entire contents of a master journal
  2241. ** file for a transaction involving two databases might be:
  2242. **
  2243. ** "/home/bill/a.db-journal\x00/home/bill/b.db-journal\x00"
  2244. **
  2245. ** A master journal file may only be deleted once all of its child
  2246. ** journals have been rolled back.
  2247. **
  2248. ** This function reads the contents of the master-journal file into
  2249. ** memory and loops through each of the child journal names. For
  2250. ** each child journal, it checks if:
  2251. **
  2252. ** * if the child journal exists, and if so
  2253. ** * if the child journal contains a reference to master journal
  2254. ** file zMaster
  2255. **
  2256. ** If a child journal can be found that matches both of the criteria
  2257. ** above, this function returns without doing anything. Otherwise, if
  2258. ** no such child journal can be found, file zMaster is deleted from
  2259. ** the file-system using sqlite3OsDelete().
  2260. **
  2261. ** If an IO error within this function, an error code is returned. This
  2262. ** function allocates memory by calling sqlite3Malloc(). If an allocation
  2263. ** fails, SQLITE_NOMEM is returned. Otherwise, if no IO or malloc errors
  2264. ** occur, SQLITE_OK is returned.
  2265. **
  2266. ** TODO: This function allocates a single block of memory to load
  2267. ** the entire contents of the master journal file. This could be
  2268. ** a couple of kilobytes or so - potentially larger than the page
  2269. ** size.
  2270. */
  2271. static int pager_delmaster(Pager *pPager, const char *zMaster){
  2272. sqlite3_vfs *pVfs = pPager->pVfs;
  2273. int rc; /* Return code */
  2274. sqlite3_file *pMaster; /* Malloc'd master-journal file descriptor */
  2275. sqlite3_file *pJournal; /* Malloc'd child-journal file descriptor */
  2276. char *zMasterJournal = 0; /* Contents of master journal file */
  2277. i64 nMasterJournal; /* Size of master journal file */
  2278. char *zJournal; /* Pointer to one journal within MJ file */
  2279. char *zMasterPtr; /* Space to hold MJ filename from a journal file */
  2280. int nMasterPtr; /* Amount of space allocated to zMasterPtr[] */
  2281. /* Allocate space for both the pJournal and pMaster file descriptors.
  2282. ** If successful, open the master journal file for reading.
  2283. */
  2284. pMaster = (sqlite3_file *)sqlite3MallocZero(pVfs->szOsFile * 2);
  2285. pJournal = (sqlite3_file *)(((u8 *)pMaster) + pVfs->szOsFile);
  2286. if( !pMaster ){
  2287. rc = SQLITE_NOMEM;
  2288. }else{
  2289. const int flags = (SQLITE_OPEN_READONLY|SQLITE_OPEN_MASTER_JOURNAL);
  2290. rc = sqlite3OsOpen(pVfs, zMaster, pMaster, flags, 0);
  2291. }
  2292. if( rc!=SQLITE_OK ) goto delmaster_out;
  2293. /* Load the entire master journal file into space obtained from
  2294. ** sqlite3_malloc() and pointed to by zMasterJournal. Also obtain
  2295. ** sufficient space (in zMasterPtr) to hold the names of master
  2296. ** journal files extracted from regular rollback-journals.
  2297. */
  2298. rc = sqlite3OsFileSize(pMaster, &nMasterJournal);
  2299. if( rc!=SQLITE_OK ) goto delmaster_out;
  2300. nMasterPtr = pVfs->mxPathname+1;
  2301. zMasterJournal = sqlite3Malloc((int)nMasterJournal + nMasterPtr + 1);
  2302. if( !zMasterJournal ){
  2303. rc = SQLITE_NOMEM;
  2304. goto delmaster_out;
  2305. }
  2306. zMasterPtr = &zMasterJournal[nMasterJournal+1];
  2307. rc = sqlite3OsRead(pMaster, zMasterJournal, (int)nMasterJournal, 0);
  2308. if( rc!=SQLITE_OK ) goto delmaster_out;
  2309. zMasterJournal[nMasterJournal] = 0;
  2310. zJournal = zMasterJournal;
  2311. while( (zJournal-zMasterJournal)<nMasterJournal ){
  2312. int exists;
  2313. rc = sqlite3OsAccess(pVfs, zJournal, SQLITE_ACCESS_EXISTS, &exists);
  2314. if( rc!=SQLITE_OK ){
  2315. goto delmaster_out;
  2316. }
  2317. if( exists ){
  2318. /* One of the journals pointed to by the master journal exists.
  2319. ** Open it and check if it points at the master journal. If
  2320. ** so, return without deleting the master journal file.
  2321. */
  2322. int c;
  2323. int flags = (SQLITE_OPEN_READONLY|SQLITE_OPEN_MAIN_JOURNAL);
  2324. rc = sqlite3OsOpen(pVfs, zJournal, pJournal, flags, 0);
  2325. if( rc!=SQLITE_OK ){
  2326. goto delmaster_out;
  2327. }
  2328. rc = readMasterJournal(pJournal, zMasterPtr, nMasterPtr);
  2329. sqlite3OsClose(pJournal);
  2330. if( rc!=SQLITE_OK ){
  2331. goto delmaster_out;
  2332. }
  2333. c = zMasterPtr[0]!=0 && strcmp(zMasterPtr, zMaster)==0;
  2334. if( c ){
  2335. /* We have a match. Do not delete the master journal file. */
  2336. goto delmaster_out;
  2337. }
  2338. }
  2339. zJournal += (sqlite3Strlen30(zJournal)+1);
  2340. }
  2341. sqlite3OsClose(pMaster);
  2342. rc = sqlite3OsDelete(pVfs, zMaster, 0);
  2343. delmaster_out:
  2344. sqlite3_free(zMasterJournal);
  2345. if( pMaster ){
  2346. sqlite3OsClose(pMaster);
  2347. assert( !isOpen(pJournal) );
  2348. sqlite3_free(pMaster);
  2349. }
  2350. return rc;
  2351. }
  2352. /*
  2353. ** This function is used to change the actual size of the database
  2354. ** file in the file-system. This only happens when committing a transaction,
  2355. ** or rolling back a transaction (including rolling back a hot-journal).
  2356. **
  2357. ** If the main database file is not open, or the pager is not in either
  2358. ** DBMOD or OPEN state, this function is a no-op. Otherwise, the size
  2359. ** of the file is changed to nPage pages (nPage*pPager->pageSize bytes).
  2360. ** If the file on disk is currently larger than nPage pages, then use the VFS
  2361. ** xTruncate() method to truncate it.
  2362. **
  2363. ** Or, it might might be the case that the file on disk is smaller than
  2364. ** nPage pages. Some operating system implementations can get confused if
  2365. ** you try to truncate a file to some size that is larger than it
  2366. ** currently is, so detect this case and write a single zero byte to
  2367. ** the end of the new file instead.
  2368. **
  2369. ** If successful, return SQLITE_OK. If an IO error occurs while modifying
  2370. ** the database file, return the error code to the caller.
  2371. */
  2372. static int pager_truncate(Pager *pPager, Pgno nPage){
  2373. int rc = SQLITE_OK;
  2374. assert( pPager->eState!=PAGER_ERROR );
  2375. assert( pPager->eState!=PAGER_READER );
  2376. if( isOpen(pPager->fd)
  2377. && (pPager->eState>=PAGER_WRITER_DBMOD || pPager->eState==PAGER_OPEN)
  2378. ){
  2379. i64 currentSize, newSize;
  2380. int szPage = pPager->pageSize;
  2381. assert( pPager->eLock==EXCLUSIVE_LOCK );
  2382. /* TODO: Is it safe to use Pager.dbFileSize here? */
  2383. rc = sqlite3OsFileSize(pPager->fd, &currentSize);
  2384. newSize = szPage*(i64)nPage;
  2385. if( rc==SQLITE_OK && currentSize!=newSize ){
  2386. if( currentSize>newSize ){
  2387. rc = sqlite3OsTruncate(pPager->fd, newSize);
  2388. }else if( (currentSize+szPage)<=newSize ){
  2389. char *pTmp = pPager->pTmpSpace;
  2390. memset(pTmp, 0, szPage);
  2391. testcase( (newSize-szPage) == currentSize );
  2392. testcase( (newSize-szPage) > currentSize );
  2393. rc = sqlite3OsWrite(pPager->fd, pTmp, szPage, newSize-szPage);
  2394. }
  2395. if( rc==SQLITE_OK ){
  2396. pPager->dbFileSize = nPage;
  2397. }
  2398. }
  2399. }
  2400. return rc;
  2401. }
  2402. /*
  2403. ** Return a sanitized version of the sector-size of OS file pFile. The
  2404. ** return value is guaranteed to lie between 32 and MAX_SECTOR_SIZE.
  2405. */
  2406. int sqlite3SectorSize(sqlite3_file *pFile){
  2407. int iRet = sqlite3OsSectorSize(pFile);
  2408. if( iRet<32 ){
  2409. iRet = 512;
  2410. }else if( iRet>MAX_SECTOR_SIZE ){
  2411. assert( MAX_SECTOR_SIZE>=512 );
  2412. iRet = MAX_SECTOR_SIZE;
  2413. }
  2414. return iRet;
  2415. }
  2416. /*
  2417. ** Set the value of the Pager.sectorSize variable for the given
  2418. ** pager based on the value returned by the xSectorSize method
  2419. ** of the open database file. The sector size will be used used
  2420. ** to determine the size and alignment of journal header and
  2421. ** master journal pointers within created journal files.
  2422. **
  2423. ** For temporary files the effective sector size is always 512 bytes.
  2424. **
  2425. ** Otherwise, for non-temporary files, the effective sector size is
  2426. ** the value returned by the xSectorSize() method rounded up to 32 if
  2427. ** it is less than 32, or rounded down to MAX_SECTOR_SIZE if it
  2428. ** is greater than MAX_SECTOR_SIZE.
  2429. **
  2430. ** If the file has the SQLITE_IOCAP_POWERSAFE_OVERWRITE property, then set
  2431. ** the effective sector size to its minimum value (512). The purpose of
  2432. ** pPager->sectorSize is to define the "blast radius" of bytes that
  2433. ** might change if a crash occurs while writing to a single byte in
  2434. ** that range. But with POWERSAFE_OVERWRITE, the blast radius is zero
  2435. ** (that is what POWERSAFE_OVERWRITE means), so we minimize the sector
  2436. ** size. For backwards compatibility of the rollback journal file format,
  2437. ** we cannot reduce the effective sector size below 512.
  2438. */
  2439. static void setSectorSize(Pager *pPager){
  2440. assert( isOpen(pPager->fd) || pPager->tempFile );
  2441. if( pPager->tempFile
  2442. || (sqlite3OsDeviceCharacteristics(pPager->fd) &
  2443. SQLITE_IOCAP_POWERSAFE_OVERWRITE)!=0
  2444. ){
  2445. /* Sector size doesn't matter for temporary files. Also, the file
  2446. ** may not have been opened yet, in which case the OsSectorSize()
  2447. ** call will segfault. */
  2448. pPager->sectorSize = 512;
  2449. }else{
  2450. pPager->sectorSize = sqlite3SectorSize(pPager->fd);
  2451. }
  2452. }
  2453. /*
  2454. ** Playback the journal and thus restore the database file to
  2455. ** the state it was in before we started making changes.
  2456. **
  2457. ** The journal file format is as follows:
  2458. **
  2459. ** (1) 8 byte prefix. A copy of aJournalMagic[].
  2460. ** (2) 4 byte big-endian integer which is the number of valid page records
  2461. ** in the journal. If this value is 0xffffffff, then compute the
  2462. ** number of page records from the journal size.
  2463. ** (3) 4 byte big-endian integer which is the initial value for the
  2464. ** sanity checksum.
  2465. ** (4) 4 byte integer which is the number of pages to truncate the
  2466. ** database to during a rollback.
  2467. ** (5) 4 byte big-endian integer which is the sector size. The header
  2468. ** is this many bytes in size.
  2469. ** (6) 4 byte big-endian integer which is the page size.
  2470. ** (7) zero padding out to the next sector size.
  2471. ** (8) Zero or more pages instances, each as follows:
  2472. ** + 4 byte page number.
  2473. ** + pPager->pageSize bytes of data.
  2474. ** + 4 byte checksum
  2475. **
  2476. ** When we speak of the journal header, we mean the first 7 items above.
  2477. ** Each entry in the journal is an instance of the 8th item.
  2478. **
  2479. ** Call the value from the second bullet "nRec". nRec is the number of
  2480. ** valid page entries in the journal. In most cases, you can compute the
  2481. ** value of nRec from the size of the journal file. But if a power
  2482. ** failure occurred while the journal was being written, it could be the
  2483. ** case that the size of the journal file had already been increased but
  2484. ** the extra entries had not yet made it safely to disk. In such a case,
  2485. ** the value of nRec computed from the file size would be too large. For
  2486. ** that reason, we always use the nRec value in the header.
  2487. **
  2488. ** If the nRec value is 0xffffffff it means that nRec should be computed
  2489. ** from the file size. This value is used when the user selects the
  2490. ** no-sync option for the journal. A power failure could lead to corruption
  2491. ** in this case. But for things like temporary table (which will be
  2492. ** deleted when the power is restored) we don't care.
  2493. **
  2494. ** If the file opened as the journal file is not a well-formed
  2495. ** journal file then all pages up to the first corrupted page are rolled
  2496. ** back (or no pages if the journal header is corrupted). The journal file
  2497. ** is then deleted and SQLITE_OK returned, just as if no corruption had
  2498. ** been encountered.
  2499. **
  2500. ** If an I/O or malloc() error occurs, the journal-file is not deleted
  2501. ** and an error code is returned.
  2502. **
  2503. ** The isHot parameter indicates that we are trying to rollback a journal
  2504. ** that might be a hot journal. Or, it could be that the journal is
  2505. ** preserved because of JOURNALMODE_PERSIST or JOURNALMODE_TRUNCATE.
  2506. ** If the journal really is hot, reset the pager cache prior rolling
  2507. ** back any content. If the journal is merely persistent, no reset is
  2508. ** needed.
  2509. */
  2510. static int pager_playback(Pager *pPager, int isHot){
  2511. sqlite3_vfs *pVfs = pPager->pVfs;
  2512. i64 szJ; /* Size of the journal file in bytes */
  2513. u32 nRec; /* Number of Records in the journal */
  2514. u32 u; /* Unsigned loop counter */
  2515. Pgno mxPg = 0; /* Size of the original file in pages */
  2516. int rc; /* Result code of a subroutine */
  2517. int res = 1; /* Value returned by sqlite3OsAccess() */
  2518. char *zMaster = 0; /* Name of master journal file if any */
  2519. int needPagerReset; /* True to reset page prior to first page rollback */
  2520. int nPlayback = 0; /* Total number of pages restored from journal */
  2521. /* Figure out how many records are in the journal. Abort early if
  2522. ** the journal is empty.
  2523. */
  2524. assert( isOpen(pPager->jfd) );
  2525. rc = sqlite3OsFileSize(pPager->jfd, &szJ);
  2526. if( rc!=SQLITE_OK ){
  2527. goto end_playback;
  2528. }
  2529. /* Read the master journal name from the journal, if it is present.
  2530. ** If a master journal file name is specified, but the file is not
  2531. ** present on disk, then the journal is not hot and does not need to be
  2532. ** played back.
  2533. **
  2534. ** TODO: Technically the following is an error because it assumes that
  2535. ** buffer Pager.pTmpSpace is (mxPathname+1) bytes or larger. i.e. that
  2536. ** (pPager->pageSize >= pPager->pVfs->mxPathname+1). Using os_unix.c,
  2537. ** mxPathname is 512, which is the same as the minimum allowable value
  2538. ** for pageSize.
  2539. */
  2540. zMaster = pPager->pTmpSpace;
  2541. rc = readMasterJournal(pPager->jfd, zMaster, pPager->pVfs->mxPathname+1);
  2542. if( rc==SQLITE_OK && zMaster[0] ){
  2543. rc = sqlite3OsAccess(pVfs, zMaster, SQLITE_ACCESS_EXISTS, &res);
  2544. }
  2545. zMaster = 0;
  2546. if( rc!=SQLITE_OK || !res ){
  2547. goto end_playback;
  2548. }
  2549. pPager->journalOff = 0;
  2550. needPagerReset = isHot;
  2551. /* This loop terminates either when a readJournalHdr() or
  2552. ** pager_playback_one_page() call returns SQLITE_DONE or an IO error
  2553. ** occurs.
  2554. */
  2555. while( 1 ){
  2556. /* Read the next journal header from the journal file. If there are
  2557. ** not enough bytes left in the journal file for a complete header, or
  2558. ** it is corrupted, then a process must have failed while writing it.
  2559. ** This indicates nothing more needs to be rolled back.
  2560. */
  2561. rc = readJournalHdr(pPager, isHot, szJ, &nRec, &mxPg);
  2562. if( rc!=SQLITE_OK ){
  2563. if( rc==SQLITE_DONE ){
  2564. rc = SQLITE_OK;
  2565. }
  2566. goto end_playback;
  2567. }
  2568. /* If nRec is 0xffffffff, then this journal was created by a process
  2569. ** working in no-sync mode. This means that the rest of the journal
  2570. ** file consists of pages, there are no more journal headers. Compute
  2571. ** the value of nRec based on this assumption.
  2572. */
  2573. if( nRec==0xffffffff ){
  2574. assert( pPager->journalOff==JOURNAL_HDR_SZ(pPager) );
  2575. nRec = (int)((szJ - JOURNAL_HDR_SZ(pPager))/JOURNAL_PG_SZ(pPager));
  2576. }
  2577. /* If nRec is 0 and this rollback is of a transaction created by this
  2578. ** process and if this is the final header in the journal, then it means
  2579. ** that this part of the journal was being filled but has not yet been
  2580. ** synced to disk. Compute the number of pages based on the remaining
  2581. ** size of the file.
  2582. **
  2583. ** The third term of the test was added to fix ticket #2565.
  2584. ** When rolling back a hot journal, nRec==0 always means that the next
  2585. ** chunk of the journal contains zero pages to be rolled back. But
  2586. ** when doing a ROLLBACK and the nRec==0 chunk is the last chunk in
  2587. ** the journal, it means that the journal might contain additional
  2588. ** pages that need to be rolled back and that the number of pages
  2589. ** should be computed based on the journal file size.
  2590. */
  2591. if( nRec==0 && !isHot &&
  2592. pPager->journalHdr+JOURNAL_HDR_SZ(pPager)==pPager->journalOff ){
  2593. nRec = (int)((szJ - pPager->journalOff) / JOURNAL_PG_SZ(pPager));
  2594. }
  2595. /* If this is the first header read from the journal, truncate the
  2596. ** database file back to its original size.
  2597. */
  2598. if( pPager->journalOff==JOURNAL_HDR_SZ(pPager) ){
  2599. rc = pager_truncate(pPager, mxPg);
  2600. if( rc!=SQLITE_OK ){
  2601. goto end_playback;
  2602. }
  2603. pPager->dbSize = mxPg;
  2604. }
  2605. /* Copy original pages out of the journal and back into the
  2606. ** database file and/or page cache.
  2607. */
  2608. for(u=0; u<nRec; u++){
  2609. if( needPagerReset ){
  2610. pager_reset(pPager);
  2611. needPagerReset = 0;
  2612. }
  2613. rc = pager_playback_one_page(pPager,&pPager->journalOff,0,1,0);
  2614. if( rc==SQLITE_OK ){
  2615. nPlayback++;
  2616. }else{
  2617. if( rc==SQLITE_DONE ){
  2618. pPager->journalOff = szJ;
  2619. break;
  2620. }else if( rc==SQLITE_IOERR_SHORT_READ ){
  2621. /* If the journal has been truncated, simply stop reading and
  2622. ** processing the journal. This might happen if the journal was
  2623. ** not completely written and synced prior to a crash. In that
  2624. ** case, the database should have never been written in the
  2625. ** first place so it is OK to simply abandon the rollback. */
  2626. rc = SQLITE_OK;
  2627. goto end_playback;
  2628. }else{
  2629. /* If we are unable to rollback, quit and return the error
  2630. ** code. This will cause the pager to enter the error state
  2631. ** so that no further harm will be done. Perhaps the next
  2632. ** process to come along will be able to rollback the database.
  2633. */
  2634. goto end_playback;
  2635. }
  2636. }
  2637. }
  2638. }
  2639. /*NOTREACHED*/
  2640. assert( 0 );
  2641. end_playback:
  2642. /* Following a rollback, the database file should be back in its original
  2643. ** state prior to the start of the transaction, so invoke the
  2644. ** SQLITE_FCNTL_DB_UNCHANGED file-control method to disable the
  2645. ** assertion that the transaction counter was modified.
  2646. */
  2647. #ifdef SQLITE_DEBUG
  2648. if( pPager->fd->pMethods ){
  2649. sqlite3OsFileControlHint(pPager->fd,SQLITE_FCNTL_DB_UNCHANGED,0);
  2650. }
  2651. #endif
  2652. /* If this playback is happening automatically as a result of an IO or
  2653. ** malloc error that occurred after the change-counter was updated but
  2654. ** before the transaction was committed, then the change-counter
  2655. ** modification may just have been reverted. If this happens in exclusive
  2656. ** mode, then subsequent transactions performed by the connection will not
  2657. ** update the change-counter at all. This may lead to cache inconsistency
  2658. ** problems for other processes at some point in the future. So, just
  2659. ** in case this has happened, clear the changeCountDone flag now.
  2660. */
  2661. pPager->changeCountDone = pPager->tempFile;
  2662. if( rc==SQLITE_OK ){
  2663. zMaster = pPager->pTmpSpace;
  2664. rc = readMasterJournal(pPager->jfd, zMaster, pPager->pVfs->mxPathname+1);
  2665. testcase( rc!=SQLITE_OK );
  2666. }
  2667. if( rc==SQLITE_OK
  2668. && (pPager->eState>=PAGER_WRITER_DBMOD || pPager->eState==PAGER_OPEN)
  2669. ){
  2670. rc = sqlite3PagerSync(pPager);
  2671. }
  2672. if( rc==SQLITE_OK ){
  2673. rc = pager_end_transaction(pPager, zMaster[0]!='\0', 0);
  2674. testcase( rc!=SQLITE_OK );
  2675. }
  2676. if( rc==SQLITE_OK && zMaster[0] && res ){
  2677. /* If there was a master journal and this routine will return success,
  2678. ** see if it is possible to delete the master journal.
  2679. */
  2680. rc = pager_delmaster(pPager, zMaster);
  2681. testcase( rc!=SQLITE_OK );
  2682. }
  2683. if( isHot && nPlayback ){
  2684. sqlite3_log(SQLITE_NOTICE_RECOVER_ROLLBACK, "recovered %d pages from %s",
  2685. nPlayback, pPager->zJournal);
  2686. }
  2687. /* The Pager.sectorSize variable may have been updated while rolling
  2688. ** back a journal created by a process with a different sector size
  2689. ** value. Reset it to the correct value for this process.
  2690. */
  2691. setSectorSize(pPager);
  2692. return rc;
  2693. }
  2694. /*
  2695. ** Read the content for page pPg out of the database file and into
  2696. ** pPg->pData. A shared lock or greater must be held on the database
  2697. ** file before this function is called.
  2698. **
  2699. ** If page 1 is read, then the value of Pager.dbFileVers[] is set to
  2700. ** the value read from the database file.
  2701. **
  2702. ** If an IO error occurs, then the IO error is returned to the caller.
  2703. ** Otherwise, SQLITE_OK is returned.
  2704. */
  2705. static int readDbPage(PgHdr *pPg, u32 iFrame){
  2706. Pager *pPager = pPg->pPager; /* Pager object associated with page pPg */
  2707. Pgno pgno = pPg->pgno; /* Page number to read */
  2708. int rc = SQLITE_OK; /* Return code */
  2709. int pgsz = pPager->pageSize; /* Number of bytes to read */
  2710. assert( pPager->eState>=PAGER_READER && !MEMDB );
  2711. assert( isOpen(pPager->fd) );
  2712. #ifndef SQLITE_OMIT_WAL
  2713. if( iFrame ){
  2714. /* Try to pull the page from the write-ahead log. */
  2715. rc = sqlite3WalReadFrame(pPager->pWal, iFrame, pgsz, pPg->pData);
  2716. }else
  2717. #endif
  2718. {
  2719. i64 iOffset = (pgno-1)*(i64)pPager->pageSize;
  2720. rc = sqlite3OsRead(pPager->fd, pPg->pData, pgsz, iOffset);
  2721. if( rc==SQLITE_IOERR_SHORT_READ ){
  2722. rc = SQLITE_OK;
  2723. }
  2724. }
  2725. if( pgno==1 ){
  2726. if( rc ){
  2727. /* If the read is unsuccessful, set the dbFileVers[] to something
  2728. ** that will never be a valid file version. dbFileVers[] is a copy
  2729. ** of bytes 24..39 of the database. Bytes 28..31 should always be
  2730. ** zero or the size of the database in page. Bytes 32..35 and 35..39
  2731. ** should be page numbers which are never 0xffffffff. So filling
  2732. ** pPager->dbFileVers[] with all 0xff bytes should suffice.
  2733. **
  2734. ** For an encrypted database, the situation is more complex: bytes
  2735. ** 24..39 of the database are white noise. But the probability of
  2736. ** white noising equaling 16 bytes of 0xff is vanishingly small so
  2737. ** we should still be ok.
  2738. */
  2739. memset(pPager->dbFileVers, 0xff, sizeof(pPager->dbFileVers));
  2740. }else{
  2741. u8 *dbFileVers = &((u8*)pPg->pData)[24];
  2742. memcpy(&pPager->dbFileVers, dbFileVers, sizeof(pPager->dbFileVers));
  2743. }
  2744. }
  2745. CODEC1(pPager, pPg->pData, pgno, 3, rc = SQLITE_NOMEM);
  2746. PAGER_INCR(sqlite3_pager_readdb_count);
  2747. PAGER_INCR(pPager->nRead);
  2748. IOTRACE(("PGIN %p %d\n", pPager, pgno));
  2749. PAGERTRACE(("FETCH %d page %d hash(%08x)\n",
  2750. PAGERID(pPager), pgno, pager_pagehash(pPg)));
  2751. return rc;
  2752. }
  2753. /*
  2754. ** Update the value of the change-counter at offsets 24 and 92 in
  2755. ** the header and the sqlite version number at offset 96.
  2756. **
  2757. ** This is an unconditional update. See also the pager_incr_changecounter()
  2758. ** routine which only updates the change-counter if the update is actually
  2759. ** needed, as determined by the pPager->changeCountDone state variable.
  2760. */
  2761. static void pager_write_changecounter(PgHdr *pPg){
  2762. u32 change_counter;
  2763. /* Increment the value just read and write it back to byte 24. */
  2764. change_counter = sqlite3Get4byte((u8*)pPg->pPager->dbFileVers)+1;
  2765. put32bits(((char*)pPg->pData)+24, change_counter);
  2766. /* Also store the SQLite version number in bytes 96..99 and in
  2767. ** bytes 92..95 store the change counter for which the version number
  2768. ** is valid. */
  2769. put32bits(((char*)pPg->pData)+92, change_counter);
  2770. put32bits(((char*)pPg->pData)+96, SQLITE_VERSION_NUMBER);
  2771. }
  2772. #ifndef SQLITE_OMIT_WAL
  2773. /*
  2774. ** This function is invoked once for each page that has already been
  2775. ** written into the log file when a WAL transaction is rolled back.
  2776. ** Parameter iPg is the page number of said page. The pCtx argument
  2777. ** is actually a pointer to the Pager structure.
  2778. **
  2779. ** If page iPg is present in the cache, and has no outstanding references,
  2780. ** it is discarded. Otherwise, if there are one or more outstanding
  2781. ** references, the page content is reloaded from the database. If the
  2782. ** attempt to reload content from the database is required and fails,
  2783. ** return an SQLite error code. Otherwise, SQLITE_OK.
  2784. */
  2785. static int pagerUndoCallback(void *pCtx, Pgno iPg){
  2786. int rc = SQLITE_OK;
  2787. Pager *pPager = (Pager *)pCtx;
  2788. PgHdr *pPg;
  2789. assert( pagerUseWal(pPager) );
  2790. pPg = sqlite3PagerLookup(pPager, iPg);
  2791. if( pPg ){
  2792. if( sqlite3PcachePageRefcount(pPg)==1 ){
  2793. sqlite3PcacheDrop(pPg);
  2794. }else{
  2795. u32 iFrame = 0;
  2796. rc = sqlite3WalFindFrame(pPager->pWal, pPg->pgno, &iFrame);
  2797. if( rc==SQLITE_OK ){
  2798. rc = readDbPage(pPg, iFrame);
  2799. }
  2800. if( rc==SQLITE_OK ){
  2801. pPager->xReiniter(pPg);
  2802. }
  2803. sqlite3PagerUnref(pPg);
  2804. }
  2805. }
  2806. /* Normally, if a transaction is rolled back, any backup processes are
  2807. ** updated as data is copied out of the rollback journal and into the
  2808. ** database. This is not generally possible with a WAL database, as
  2809. ** rollback involves simply truncating the log file. Therefore, if one
  2810. ** or more frames have already been written to the log (and therefore
  2811. ** also copied into the backup databases) as part of this transaction,
  2812. ** the backups must be restarted.
  2813. */
  2814. sqlite3BackupRestart(pPager->pBackup);
  2815. return rc;
  2816. }
  2817. /*
  2818. ** This function is called to rollback a transaction on a WAL database.
  2819. */
  2820. static int pagerRollbackWal(Pager *pPager){
  2821. int rc; /* Return Code */
  2822. PgHdr *pList; /* List of dirty pages to revert */
  2823. /* For all pages in the cache that are currently dirty or have already
  2824. ** been written (but not committed) to the log file, do one of the
  2825. ** following:
  2826. **
  2827. ** + Discard the cached page (if refcount==0), or
  2828. ** + Reload page content from the database (if refcount>0).
  2829. */
  2830. pPager->dbSize = pPager->dbOrigSize;
  2831. rc = sqlite3WalUndo(pPager->pWal, pagerUndoCallback, (void *)pPager);
  2832. pList = sqlite3PcacheDirtyList(pPager->pPCache);
  2833. while( pList && rc==SQLITE_OK ){
  2834. PgHdr *pNext = pList->pDirty;
  2835. rc = pagerUndoCallback((void *)pPager, pList->pgno);
  2836. pList = pNext;
  2837. }
  2838. return rc;
  2839. }
  2840. /*
  2841. ** This function is a wrapper around sqlite3WalFrames(). As well as logging
  2842. ** the contents of the list of pages headed by pList (connected by pDirty),
  2843. ** this function notifies any active backup processes that the pages have
  2844. ** changed.
  2845. **
  2846. ** The list of pages passed into this routine is always sorted by page number.
  2847. ** Hence, if page 1 appears anywhere on the list, it will be the first page.
  2848. */
  2849. static int pagerWalFrames(
  2850. Pager *pPager, /* Pager object */
  2851. PgHdr *pList, /* List of frames to log */
  2852. Pgno nTruncate, /* Database size after this commit */
  2853. int isCommit /* True if this is a commit */
  2854. ){
  2855. int rc; /* Return code */
  2856. int nList; /* Number of pages in pList */
  2857. #if defined(SQLITE_DEBUG) || defined(SQLITE_CHECK_PAGES)
  2858. PgHdr *p; /* For looping over pages */
  2859. #endif
  2860. assert( pPager->pWal );
  2861. assert( pList );
  2862. #ifdef SQLITE_DEBUG
  2863. /* Verify that the page list is in accending order */
  2864. for(p=pList; p && p->pDirty; p=p->pDirty){
  2865. assert( p->pgno < p->pDirty->pgno );
  2866. }
  2867. #endif
  2868. assert( pList->pDirty==0 || isCommit );
  2869. if( isCommit ){
  2870. /* If a WAL transaction is being committed, there is no point in writing
  2871. ** any pages with page numbers greater than nTruncate into the WAL file.
  2872. ** They will never be read by any client. So remove them from the pDirty
  2873. ** list here. */
  2874. PgHdr *p;
  2875. PgHdr **ppNext = &pList;
  2876. nList = 0;
  2877. for(p=pList; (*ppNext = p)!=0; p=p->pDirty){
  2878. if( p->pgno<=nTruncate ){
  2879. ppNext = &p->pDirty;
  2880. nList++;
  2881. }
  2882. }
  2883. assert( pList );
  2884. }else{
  2885. nList = 1;
  2886. }
  2887. pPager->aStat[PAGER_STAT_WRITE] += nList;
  2888. if( pList->pgno==1 ) pager_write_changecounter(pList);
  2889. rc = sqlite3WalFrames(pPager->pWal,
  2890. pPager->pageSize, pList, nTruncate, isCommit, pPager->walSyncFlags
  2891. );
  2892. if( rc==SQLITE_OK && pPager->pBackup ){
  2893. PgHdr *p;
  2894. for(p=pList; p; p=p->pDirty){
  2895. sqlite3BackupUpdate(pPager->pBackup, p->pgno, (u8 *)p->pData);
  2896. }
  2897. }
  2898. #ifdef SQLITE_CHECK_PAGES
  2899. pList = sqlite3PcacheDirtyList(pPager->pPCache);
  2900. for(p=pList; p; p=p->pDirty){
  2901. pager_set_pagehash(p);
  2902. }
  2903. #endif
  2904. return rc;
  2905. }
  2906. /*
  2907. ** Begin a read transaction on the WAL.
  2908. **
  2909. ** This routine used to be called "pagerOpenSnapshot()" because it essentially
  2910. ** makes a snapshot of the database at the current point in time and preserves
  2911. ** that snapshot for use by the reader in spite of concurrently changes by
  2912. ** other writers or checkpointers.
  2913. */
  2914. static int pagerBeginReadTransaction(Pager *pPager){
  2915. int rc; /* Return code */
  2916. int changed = 0; /* True if cache must be reset */
  2917. assert( pagerUseWal(pPager) );
  2918. assert( pPager->eState==PAGER_OPEN || pPager->eState==PAGER_READER );
  2919. /* sqlite3WalEndReadTransaction() was not called for the previous
  2920. ** transaction in locking_mode=EXCLUSIVE. So call it now. If we
  2921. ** are in locking_mode=NORMAL and EndRead() was previously called,
  2922. ** the duplicate call is harmless.
  2923. */
  2924. sqlite3WalEndReadTransaction(pPager->pWal);
  2925. rc = sqlite3WalBeginReadTransaction(pPager->pWal, &changed);
  2926. if( rc!=SQLITE_OK || changed ){
  2927. pager_reset(pPager);
  2928. if( USEFETCH(pPager) ) sqlite3OsUnfetch(pPager->fd, 0, 0);
  2929. }
  2930. return rc;
  2931. }
  2932. #endif
  2933. /*
  2934. ** This function is called as part of the transition from PAGER_OPEN
  2935. ** to PAGER_READER state to determine the size of the database file
  2936. ** in pages (assuming the page size currently stored in Pager.pageSize).
  2937. **
  2938. ** If no error occurs, SQLITE_OK is returned and the size of the database
  2939. ** in pages is stored in *pnPage. Otherwise, an error code (perhaps
  2940. ** SQLITE_IOERR_FSTAT) is returned and *pnPage is left unmodified.
  2941. */
  2942. static int pagerPagecount(Pager *pPager, Pgno *pnPage){
  2943. Pgno nPage; /* Value to return via *pnPage */
  2944. /* Query the WAL sub-system for the database size. The WalDbsize()
  2945. ** function returns zero if the WAL is not open (i.e. Pager.pWal==0), or
  2946. ** if the database size is not available. The database size is not
  2947. ** available from the WAL sub-system if the log file is empty or
  2948. ** contains no valid committed transactions.
  2949. */
  2950. assert( pPager->eState==PAGER_OPEN );
  2951. assert( pPager->eLock>=SHARED_LOCK );
  2952. nPage = sqlite3WalDbsize(pPager->pWal);
  2953. /* If the database size was not available from the WAL sub-system,
  2954. ** determine it based on the size of the database file. If the size
  2955. ** of the database file is not an integer multiple of the page-size,
  2956. ** round down to the nearest page. Except, any file larger than 0
  2957. ** bytes in size is considered to contain at least one page.
  2958. */
  2959. if( nPage==0 ){
  2960. i64 n = 0; /* Size of db file in bytes */
  2961. assert( isOpen(pPager->fd) || pPager->tempFile );
  2962. if( isOpen(pPager->fd) ){
  2963. int rc = sqlite3OsFileSize(pPager->fd, &n);
  2964. if( rc!=SQLITE_OK ){
  2965. return rc;
  2966. }
  2967. }
  2968. nPage = (Pgno)((n+pPager->pageSize-1) / pPager->pageSize);
  2969. }
  2970. /* If the current number of pages in the file is greater than the
  2971. ** configured maximum pager number, increase the allowed limit so
  2972. ** that the file can be read.
  2973. */
  2974. if( nPage>pPager->mxPgno ){
  2975. pPager->mxPgno = (Pgno)nPage;
  2976. }
  2977. *pnPage = nPage;
  2978. return SQLITE_OK;
  2979. }
  2980. #ifndef SQLITE_OMIT_WAL
  2981. /*
  2982. ** Check if the *-wal file that corresponds to the database opened by pPager
  2983. ** exists if the database is not empy, or verify that the *-wal file does
  2984. ** not exist (by deleting it) if the database file is empty.
  2985. **
  2986. ** If the database is not empty and the *-wal file exists, open the pager
  2987. ** in WAL mode. If the database is empty or if no *-wal file exists and
  2988. ** if no error occurs, make sure Pager.journalMode is not set to
  2989. ** PAGER_JOURNALMODE_WAL.
  2990. **
  2991. ** Return SQLITE_OK or an error code.
  2992. **
  2993. ** The caller must hold a SHARED lock on the database file to call this
  2994. ** function. Because an EXCLUSIVE lock on the db file is required to delete
  2995. ** a WAL on a none-empty database, this ensures there is no race condition
  2996. ** between the xAccess() below and an xDelete() being executed by some
  2997. ** other connection.
  2998. */
  2999. static int pagerOpenWalIfPresent(Pager *pPager){
  3000. int rc = SQLITE_OK;
  3001. assert( pPager->eState==PAGER_OPEN );
  3002. assert( pPager->eLock>=SHARED_LOCK );
  3003. if( !pPager->tempFile ){
  3004. int isWal; /* True if WAL file exists */
  3005. Pgno nPage; /* Size of the database file */
  3006. rc = pagerPagecount(pPager, &nPage);
  3007. if( rc ) return rc;
  3008. if( nPage==0 ){
  3009. rc = sqlite3OsDelete(pPager->pVfs, pPager->zWal, 0);
  3010. if( rc==SQLITE_IOERR_DELETE_NOENT ) rc = SQLITE_OK;
  3011. isWal = 0;
  3012. }else{
  3013. rc = sqlite3OsAccess(
  3014. pPager->pVfs, pPager->zWal, SQLITE_ACCESS_EXISTS, &isWal
  3015. );
  3016. }
  3017. if( rc==SQLITE_OK ){
  3018. if( isWal ){
  3019. testcase( sqlite3PcachePagecount(pPager->pPCache)==0 );
  3020. rc = sqlite3PagerOpenWal(pPager, 0);
  3021. }else if( pPager->journalMode==PAGER_JOURNALMODE_WAL ){
  3022. pPager->journalMode = PAGER_JOURNALMODE_DELETE;
  3023. }
  3024. }
  3025. }
  3026. return rc;
  3027. }
  3028. #endif
  3029. /*
  3030. ** Playback savepoint pSavepoint. Or, if pSavepoint==NULL, then playback
  3031. ** the entire master journal file. The case pSavepoint==NULL occurs when
  3032. ** a ROLLBACK TO command is invoked on a SAVEPOINT that is a transaction
  3033. ** savepoint.
  3034. **
  3035. ** When pSavepoint is not NULL (meaning a non-transaction savepoint is
  3036. ** being rolled back), then the rollback consists of up to three stages,
  3037. ** performed in the order specified:
  3038. **
  3039. ** * Pages are played back from the main journal starting at byte
  3040. ** offset PagerSavepoint.iOffset and continuing to
  3041. ** PagerSavepoint.iHdrOffset, or to the end of the main journal
  3042. ** file if PagerSavepoint.iHdrOffset is zero.
  3043. **
  3044. ** * If PagerSavepoint.iHdrOffset is not zero, then pages are played
  3045. ** back starting from the journal header immediately following
  3046. ** PagerSavepoint.iHdrOffset to the end of the main journal file.
  3047. **
  3048. ** * Pages are then played back from the sub-journal file, starting
  3049. ** with the PagerSavepoint.iSubRec and continuing to the end of
  3050. ** the journal file.
  3051. **
  3052. ** Throughout the rollback process, each time a page is rolled back, the
  3053. ** corresponding bit is set in a bitvec structure (variable pDone in the
  3054. ** implementation below). This is used to ensure that a page is only
  3055. ** rolled back the first time it is encountered in either journal.
  3056. **
  3057. ** If pSavepoint is NULL, then pages are only played back from the main
  3058. ** journal file. There is no need for a bitvec in this case.
  3059. **
  3060. ** In either case, before playback commences the Pager.dbSize variable
  3061. ** is reset to the value that it held at the start of the savepoint
  3062. ** (or transaction). No page with a page-number greater than this value
  3063. ** is played back. If one is encountered it is simply skipped.
  3064. */
  3065. static int pagerPlaybackSavepoint(Pager *pPager, PagerSavepoint *pSavepoint){
  3066. i64 szJ; /* Effective size of the main journal */
  3067. i64 iHdrOff; /* End of first segment of main-journal records */
  3068. int rc = SQLITE_OK; /* Return code */
  3069. Bitvec *pDone = 0; /* Bitvec to ensure pages played back only once */
  3070. assert( pPager->eState!=PAGER_ERROR );
  3071. assert( pPager->eState>=PAGER_WRITER_LOCKED );
  3072. /* Allocate a bitvec to use to store the set of pages rolled back */
  3073. if( pSavepoint ){
  3074. pDone = sqlite3BitvecCreate(pSavepoint->nOrig);
  3075. if( !pDone ){
  3076. return SQLITE_NOMEM;
  3077. }
  3078. }
  3079. /* Set the database size back to the value it was before the savepoint
  3080. ** being reverted was opened.
  3081. */
  3082. pPager->dbSize = pSavepoint ? pSavepoint->nOrig : pPager->dbOrigSize;
  3083. pPager->changeCountDone = pPager->tempFile;
  3084. if( !pSavepoint && pagerUseWal(pPager) ){
  3085. return pagerRollbackWal(pPager);
  3086. }
  3087. /* Use pPager->journalOff as the effective size of the main rollback
  3088. ** journal. The actual file might be larger than this in
  3089. ** PAGER_JOURNALMODE_TRUNCATE or PAGER_JOURNALMODE_PERSIST. But anything
  3090. ** past pPager->journalOff is off-limits to us.
  3091. */
  3092. szJ = pPager->journalOff;
  3093. assert( pagerUseWal(pPager)==0 || szJ==0 );
  3094. /* Begin by rolling back records from the main journal starting at
  3095. ** PagerSavepoint.iOffset and continuing to the next journal header.
  3096. ** There might be records in the main journal that have a page number
  3097. ** greater than the current database size (pPager->dbSize) but those
  3098. ** will be skipped automatically. Pages are added to pDone as they
  3099. ** are played back.
  3100. */
  3101. if( pSavepoint && !pagerUseWal(pPager) ){
  3102. iHdrOff = pSavepoint->iHdrOffset ? pSavepoint->iHdrOffset : szJ;
  3103. pPager->journalOff = pSavepoint->iOffset;
  3104. while( rc==SQLITE_OK && pPager->journalOff<iHdrOff ){
  3105. rc = pager_playback_one_page(pPager, &pPager->journalOff, pDone, 1, 1);
  3106. }
  3107. assert( rc!=SQLITE_DONE );
  3108. }else{
  3109. pPager->journalOff = 0;
  3110. }
  3111. /* Continue rolling back records out of the main journal starting at
  3112. ** the first journal header seen and continuing until the effective end
  3113. ** of the main journal file. Continue to skip out-of-range pages and
  3114. ** continue adding pages rolled back to pDone.
  3115. */
  3116. while( rc==SQLITE_OK && pPager->journalOff<szJ ){
  3117. u32 ii; /* Loop counter */
  3118. u32 nJRec = 0; /* Number of Journal Records */
  3119. u32 dummy;
  3120. rc = readJournalHdr(pPager, 0, szJ, &nJRec, &dummy);
  3121. assert( rc!=SQLITE_DONE );
  3122. /*
  3123. ** The "pPager->journalHdr+JOURNAL_HDR_SZ(pPager)==pPager->journalOff"
  3124. ** test is related to ticket #2565. See the discussion in the
  3125. ** pager_playback() function for additional information.
  3126. */
  3127. if( nJRec==0
  3128. && pPager->journalHdr+JOURNAL_HDR_SZ(pPager)==pPager->journalOff
  3129. ){
  3130. nJRec = (u32)((szJ - pPager->journalOff)/JOURNAL_PG_SZ(pPager));
  3131. }
  3132. for(ii=0; rc==SQLITE_OK && ii<nJRec && pPager->journalOff<szJ; ii++){
  3133. rc = pager_playback_one_page(pPager, &pPager->journalOff, pDone, 1, 1);
  3134. }
  3135. assert( rc!=SQLITE_DONE );
  3136. }
  3137. assert( rc!=SQLITE_OK || pPager->journalOff>=szJ );
  3138. /* Finally, rollback pages from the sub-journal. Page that were
  3139. ** previously rolled back out of the main journal (and are hence in pDone)
  3140. ** will be skipped. Out-of-range pages are also skipped.
  3141. */
  3142. if( pSavepoint ){
  3143. u32 ii; /* Loop counter */
  3144. i64 offset = (i64)pSavepoint->iSubRec*(4+pPager->pageSize);
  3145. if( pagerUseWal(pPager) ){
  3146. rc = sqlite3WalSavepointUndo(pPager->pWal, pSavepoint->aWalData);
  3147. }
  3148. for(ii=pSavepoint->iSubRec; rc==SQLITE_OK && ii<pPager->nSubRec; ii++){
  3149. assert( offset==(i64)ii*(4+pPager->pageSize) );
  3150. rc = pager_playback_one_page(pPager, &offset, pDone, 0, 1);
  3151. }
  3152. assert( rc!=SQLITE_DONE );
  3153. }
  3154. sqlite3BitvecDestroy(pDone);
  3155. if( rc==SQLITE_OK ){
  3156. pPager->journalOff = szJ;
  3157. }
  3158. return rc;
  3159. }
  3160. /*
  3161. ** Change the maximum number of in-memory pages that are allowed.
  3162. */
  3163. void sqlite3PagerSetCachesize(Pager *pPager, int mxPage){
  3164. sqlite3PcacheSetCachesize(pPager->pPCache, mxPage);
  3165. }
  3166. /*
  3167. ** Invoke SQLITE_FCNTL_MMAP_SIZE based on the current value of szMmap.
  3168. */
  3169. static void pagerFixMaplimit(Pager *pPager){
  3170. #if SQLITE_MAX_MMAP_SIZE>0
  3171. sqlite3_file *fd = pPager->fd;
  3172. if( isOpen(fd) && fd->pMethods->iVersion>=3 ){
  3173. sqlite3_int64 sz;
  3174. sz = pPager->szMmap;
  3175. pPager->bUseFetch = (sz>0);
  3176. sqlite3OsFileControlHint(pPager->fd, SQLITE_FCNTL_MMAP_SIZE, &sz);
  3177. }
  3178. #endif
  3179. }
  3180. /*
  3181. ** Change the maximum size of any memory mapping made of the database file.
  3182. */
  3183. void sqlite3PagerSetMmapLimit(Pager *pPager, sqlite3_int64 szMmap){
  3184. pPager->szMmap = szMmap;
  3185. pagerFixMaplimit(pPager);
  3186. }
  3187. /*
  3188. ** Free as much memory as possible from the pager.
  3189. */
  3190. void sqlite3PagerShrink(Pager *pPager){
  3191. sqlite3PcacheShrink(pPager->pPCache);
  3192. }
  3193. /*
  3194. ** Adjust settings of the pager to those specified in the pgFlags parameter.
  3195. **
  3196. ** The "level" in pgFlags & PAGER_SYNCHRONOUS_MASK sets the robustness
  3197. ** of the database to damage due to OS crashes or power failures by
  3198. ** changing the number of syncs()s when writing the journals.
  3199. ** There are three levels:
  3200. **
  3201. ** OFF sqlite3OsSync() is never called. This is the default
  3202. ** for temporary and transient files.
  3203. **
  3204. ** NORMAL The journal is synced once before writes begin on the
  3205. ** database. This is normally adequate protection, but
  3206. ** it is theoretically possible, though very unlikely,
  3207. ** that an inopertune power failure could leave the journal
  3208. ** in a state which would cause damage to the database
  3209. ** when it is rolled back.
  3210. **
  3211. ** FULL The journal is synced twice before writes begin on the
  3212. ** database (with some additional information - the nRec field
  3213. ** of the journal header - being written in between the two
  3214. ** syncs). If we assume that writing a
  3215. ** single disk sector is atomic, then this mode provides
  3216. ** assurance that the journal will not be corrupted to the
  3217. ** point of causing damage to the database during rollback.
  3218. **
  3219. ** The above is for a rollback-journal mode. For WAL mode, OFF continues
  3220. ** to mean that no syncs ever occur. NORMAL means that the WAL is synced
  3221. ** prior to the start of checkpoint and that the database file is synced
  3222. ** at the conclusion of the checkpoint if the entire content of the WAL
  3223. ** was written back into the database. But no sync operations occur for
  3224. ** an ordinary commit in NORMAL mode with WAL. FULL means that the WAL
  3225. ** file is synced following each commit operation, in addition to the
  3226. ** syncs associated with NORMAL.
  3227. **
  3228. ** Do not confuse synchronous=FULL with SQLITE_SYNC_FULL. The
  3229. ** SQLITE_SYNC_FULL macro means to use the MacOSX-style full-fsync
  3230. ** using fcntl(F_FULLFSYNC). SQLITE_SYNC_NORMAL means to do an
  3231. ** ordinary fsync() call. There is no difference between SQLITE_SYNC_FULL
  3232. ** and SQLITE_SYNC_NORMAL on platforms other than MacOSX. But the
  3233. ** synchronous=FULL versus synchronous=NORMAL setting determines when
  3234. ** the xSync primitive is called and is relevant to all platforms.
  3235. **
  3236. ** Numeric values associated with these states are OFF==1, NORMAL=2,
  3237. ** and FULL=3.
  3238. */
  3239. #ifndef SQLITE_OMIT_PAGER_PRAGMAS
  3240. void sqlite3PagerSetFlags(
  3241. Pager *pPager, /* The pager to set safety level for */
  3242. unsigned pgFlags /* Various flags */
  3243. ){
  3244. unsigned level = pgFlags & PAGER_SYNCHRONOUS_MASK;
  3245. assert( level>=1 && level<=3 );
  3246. pPager->noSync = (level==1 || pPager->tempFile) ?1:0;
  3247. pPager->fullSync = (level==3 && !pPager->tempFile) ?1:0;
  3248. if( pPager->noSync ){
  3249. pPager->syncFlags = 0;
  3250. pPager->ckptSyncFlags = 0;
  3251. }else if( pgFlags & PAGER_FULLFSYNC ){
  3252. pPager->syncFlags = SQLITE_SYNC_FULL;
  3253. pPager->ckptSyncFlags = SQLITE_SYNC_FULL;
  3254. }else if( pgFlags & PAGER_CKPT_FULLFSYNC ){
  3255. pPager->syncFlags = SQLITE_SYNC_NORMAL;
  3256. pPager->ckptSyncFlags = SQLITE_SYNC_FULL;
  3257. }else{
  3258. pPager->syncFlags = SQLITE_SYNC_NORMAL;
  3259. pPager->ckptSyncFlags = SQLITE_SYNC_NORMAL;
  3260. }
  3261. pPager->walSyncFlags = pPager->syncFlags;
  3262. if( pPager->fullSync ){
  3263. pPager->walSyncFlags |= WAL_SYNC_TRANSACTIONS;
  3264. }
  3265. if( pgFlags & PAGER_CACHESPILL ){
  3266. pPager->doNotSpill &= ~SPILLFLAG_OFF;
  3267. }else{
  3268. pPager->doNotSpill |= SPILLFLAG_OFF;
  3269. }
  3270. }
  3271. #endif
  3272. /*
  3273. ** The following global variable is incremented whenever the library
  3274. ** attempts to open a temporary file. This information is used for
  3275. ** testing and analysis only.
  3276. */
  3277. #ifdef SQLITE_TEST
  3278. int sqlite3_opentemp_count = 0;
  3279. #endif
  3280. /*
  3281. ** Open a temporary file.
  3282. **
  3283. ** Write the file descriptor into *pFile. Return SQLITE_OK on success
  3284. ** or some other error code if we fail. The OS will automatically
  3285. ** delete the temporary file when it is closed.
  3286. **
  3287. ** The flags passed to the VFS layer xOpen() call are those specified
  3288. ** by parameter vfsFlags ORed with the following:
  3289. **
  3290. ** SQLITE_OPEN_READWRITE
  3291. ** SQLITE_OPEN_CREATE
  3292. ** SQLITE_OPEN_EXCLUSIVE
  3293. ** SQLITE_OPEN_DELETEONCLOSE
  3294. */
  3295. static int pagerOpentemp(
  3296. Pager *pPager, /* The pager object */
  3297. sqlite3_file *pFile, /* Write the file descriptor here */
  3298. int vfsFlags /* Flags passed through to the VFS */
  3299. ){
  3300. int rc; /* Return code */
  3301. #ifdef SQLITE_TEST
  3302. sqlite3_opentemp_count++; /* Used for testing and analysis only */
  3303. #endif
  3304. vfsFlags |= SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE |
  3305. SQLITE_OPEN_EXCLUSIVE | SQLITE_OPEN_DELETEONCLOSE;
  3306. rc = sqlite3OsOpen(pPager->pVfs, 0, pFile, vfsFlags, 0);
  3307. assert( rc!=SQLITE_OK || isOpen(pFile) );
  3308. return rc;
  3309. }
  3310. /*
  3311. ** Set the busy handler function.
  3312. **
  3313. ** The pager invokes the busy-handler if sqlite3OsLock() returns
  3314. ** SQLITE_BUSY when trying to upgrade from no-lock to a SHARED lock,
  3315. ** or when trying to upgrade from a RESERVED lock to an EXCLUSIVE
  3316. ** lock. It does *not* invoke the busy handler when upgrading from
  3317. ** SHARED to RESERVED, or when upgrading from SHARED to EXCLUSIVE
  3318. ** (which occurs during hot-journal rollback). Summary:
  3319. **
  3320. ** Transition | Invokes xBusyHandler
  3321. ** --------------------------------------------------------
  3322. ** NO_LOCK -> SHARED_LOCK | Yes
  3323. ** SHARED_LOCK -> RESERVED_LOCK | No
  3324. ** SHARED_LOCK -> EXCLUSIVE_LOCK | No
  3325. ** RESERVED_LOCK -> EXCLUSIVE_LOCK | Yes
  3326. **
  3327. ** If the busy-handler callback returns non-zero, the lock is
  3328. ** retried. If it returns zero, then the SQLITE_BUSY error is
  3329. ** returned to the caller of the pager API function.
  3330. */
  3331. void sqlite3PagerSetBusyhandler(
  3332. Pager *pPager, /* Pager object */
  3333. int (*xBusyHandler)(void *), /* Pointer to busy-handler function */
  3334. void *pBusyHandlerArg /* Argument to pass to xBusyHandler */
  3335. ){
  3336. pPager->xBusyHandler = xBusyHandler;
  3337. pPager->pBusyHandlerArg = pBusyHandlerArg;
  3338. if( isOpen(pPager->fd) ){
  3339. void **ap = (void **)&pPager->xBusyHandler;
  3340. assert( ((int(*)(void *))(ap[0]))==xBusyHandler );
  3341. assert( ap[1]==pBusyHandlerArg );
  3342. sqlite3OsFileControlHint(pPager->fd, SQLITE_FCNTL_BUSYHANDLER, (void *)ap);
  3343. }
  3344. }
  3345. /*
  3346. ** Change the page size used by the Pager object. The new page size
  3347. ** is passed in *pPageSize.
  3348. **
  3349. ** If the pager is in the error state when this function is called, it
  3350. ** is a no-op. The value returned is the error state error code (i.e.
  3351. ** one of SQLITE_IOERR, an SQLITE_IOERR_xxx sub-code or SQLITE_FULL).
  3352. **
  3353. ** Otherwise, if all of the following are true:
  3354. **
  3355. ** * the new page size (value of *pPageSize) is valid (a power
  3356. ** of two between 512 and SQLITE_MAX_PAGE_SIZE, inclusive), and
  3357. **
  3358. ** * there are no outstanding page references, and
  3359. **
  3360. ** * the database is either not an in-memory database or it is
  3361. ** an in-memory database that currently consists of zero pages.
  3362. **
  3363. ** then the pager object page size is set to *pPageSize.
  3364. **
  3365. ** If the page size is changed, then this function uses sqlite3PagerMalloc()
  3366. ** to obtain a new Pager.pTmpSpace buffer. If this allocation attempt
  3367. ** fails, SQLITE_NOMEM is returned and the page size remains unchanged.
  3368. ** In all other cases, SQLITE_OK is returned.
  3369. **
  3370. ** If the page size is not changed, either because one of the enumerated
  3371. ** conditions above is not true, the pager was in error state when this
  3372. ** function was called, or because the memory allocation attempt failed,
  3373. ** then *pPageSize is set to the old, retained page size before returning.
  3374. */
  3375. int sqlite3PagerSetPagesize(Pager *pPager, u32 *pPageSize, int nReserve){
  3376. int rc = SQLITE_OK;
  3377. /* It is not possible to do a full assert_pager_state() here, as this
  3378. ** function may be called from within PagerOpen(), before the state
  3379. ** of the Pager object is internally consistent.
  3380. **
  3381. ** At one point this function returned an error if the pager was in
  3382. ** PAGER_ERROR state. But since PAGER_ERROR state guarantees that
  3383. ** there is at least one outstanding page reference, this function
  3384. ** is a no-op for that case anyhow.
  3385. */
  3386. u32 pageSize = *pPageSize;
  3387. assert( pageSize==0 || (pageSize>=512 && pageSize<=SQLITE_MAX_PAGE_SIZE) );
  3388. if( (pPager->memDb==0 || pPager->dbSize==0)
  3389. && sqlite3PcacheRefCount(pPager->pPCache)==0
  3390. && pageSize && pageSize!=(u32)pPager->pageSize
  3391. ){
  3392. char *pNew = NULL; /* New temp space */
  3393. i64 nByte = 0;
  3394. if( pPager->eState>PAGER_OPEN && isOpen(pPager->fd) ){
  3395. rc = sqlite3OsFileSize(pPager->fd, &nByte);
  3396. }
  3397. if( rc==SQLITE_OK ){
  3398. pNew = (char *)sqlite3PageMalloc(pageSize);
  3399. if( !pNew ) rc = SQLITE_NOMEM;
  3400. }
  3401. if( rc==SQLITE_OK ){
  3402. pager_reset(pPager);
  3403. pPager->dbSize = (Pgno)((nByte+pageSize-1)/pageSize);
  3404. pPager->pageSize = pageSize;
  3405. sqlite3PageFree(pPager->pTmpSpace);
  3406. pPager->pTmpSpace = pNew;
  3407. sqlite3PcacheSetPageSize(pPager->pPCache, pageSize);
  3408. }
  3409. }
  3410. *pPageSize = pPager->pageSize;
  3411. if( rc==SQLITE_OK ){
  3412. if( nReserve<0 ) nReserve = pPager->nReserve;
  3413. assert( nReserve>=0 && nReserve<1000 );
  3414. pPager->nReserve = (i16)nReserve;
  3415. pagerReportSize(pPager);
  3416. pagerFixMaplimit(pPager);
  3417. }
  3418. return rc;
  3419. }
  3420. /*
  3421. ** Return a pointer to the "temporary page" buffer held internally
  3422. ** by the pager. This is a buffer that is big enough to hold the
  3423. ** entire content of a database page. This buffer is used internally
  3424. ** during rollback and will be overwritten whenever a rollback
  3425. ** occurs. But other modules are free to use it too, as long as
  3426. ** no rollbacks are happening.
  3427. */
  3428. void *sqlite3PagerTempSpace(Pager *pPager){
  3429. return pPager->pTmpSpace;
  3430. }
  3431. /*
  3432. ** Attempt to set the maximum database page count if mxPage is positive.
  3433. ** Make no changes if mxPage is zero or negative. And never reduce the
  3434. ** maximum page count below the current size of the database.
  3435. **
  3436. ** Regardless of mxPage, return the current maximum page count.
  3437. */
  3438. int sqlite3PagerMaxPageCount(Pager *pPager, int mxPage){
  3439. if( mxPage>0 ){
  3440. pPager->mxPgno = mxPage;
  3441. }
  3442. assert( pPager->eState!=PAGER_OPEN ); /* Called only by OP_MaxPgcnt */
  3443. assert( pPager->mxPgno>=pPager->dbSize ); /* OP_MaxPgcnt enforces this */
  3444. return pPager->mxPgno;
  3445. }
  3446. /*
  3447. ** The following set of routines are used to disable the simulated
  3448. ** I/O error mechanism. These routines are used to avoid simulated
  3449. ** errors in places where we do not care about errors.
  3450. **
  3451. ** Unless -DSQLITE_TEST=1 is used, these routines are all no-ops
  3452. ** and generate no code.
  3453. */
  3454. #ifdef SQLITE_TEST
  3455. extern int sqlite3_io_error_pending;
  3456. extern int sqlite3_io_error_hit;
  3457. static int saved_cnt;
  3458. void disable_simulated_io_errors(void){
  3459. saved_cnt = sqlite3_io_error_pending;
  3460. sqlite3_io_error_pending = -1;
  3461. }
  3462. void enable_simulated_io_errors(void){
  3463. sqlite3_io_error_pending = saved_cnt;
  3464. }
  3465. #else
  3466. # define disable_simulated_io_errors()
  3467. # define enable_simulated_io_errors()
  3468. #endif
  3469. /*
  3470. ** Read the first N bytes from the beginning of the file into memory
  3471. ** that pDest points to.
  3472. **
  3473. ** If the pager was opened on a transient file (zFilename==""), or
  3474. ** opened on a file less than N bytes in size, the output buffer is
  3475. ** zeroed and SQLITE_OK returned. The rationale for this is that this
  3476. ** function is used to read database headers, and a new transient or
  3477. ** zero sized database has a header than consists entirely of zeroes.
  3478. **
  3479. ** If any IO error apart from SQLITE_IOERR_SHORT_READ is encountered,
  3480. ** the error code is returned to the caller and the contents of the
  3481. ** output buffer undefined.
  3482. */
  3483. int sqlite3PagerReadFileheader(Pager *pPager, int N, unsigned char *pDest){
  3484. int rc = SQLITE_OK;
  3485. memset(pDest, 0, N);
  3486. assert( isOpen(pPager->fd) || pPager->tempFile );
  3487. /* This routine is only called by btree immediately after creating
  3488. ** the Pager object. There has not been an opportunity to transition
  3489. ** to WAL mode yet.
  3490. */
  3491. assert( !pagerUseWal(pPager) );
  3492. if( isOpen(pPager->fd) ){
  3493. IOTRACE(("DBHDR %p 0 %d\n", pPager, N))
  3494. rc = sqlite3OsRead(pPager->fd, pDest, N, 0);
  3495. if( rc==SQLITE_IOERR_SHORT_READ ){
  3496. rc = SQLITE_OK;
  3497. }
  3498. }
  3499. return rc;
  3500. }
  3501. /*
  3502. ** This function may only be called when a read-transaction is open on
  3503. ** the pager. It returns the total number of pages in the database.
  3504. **
  3505. ** However, if the file is between 1 and <page-size> bytes in size, then
  3506. ** this is considered a 1 page file.
  3507. */
  3508. void sqlite3PagerPagecount(Pager *pPager, int *pnPage){
  3509. assert( pPager->eState>=PAGER_READER );
  3510. assert( pPager->eState!=PAGER_WRITER_FINISHED );
  3511. *pnPage = (int)pPager->dbSize;
  3512. }
  3513. /*
  3514. ** Try to obtain a lock of type locktype on the database file. If
  3515. ** a similar or greater lock is already held, this function is a no-op
  3516. ** (returning SQLITE_OK immediately).
  3517. **
  3518. ** Otherwise, attempt to obtain the lock using sqlite3OsLock(). Invoke
  3519. ** the busy callback if the lock is currently not available. Repeat
  3520. ** until the busy callback returns false or until the attempt to
  3521. ** obtain the lock succeeds.
  3522. **
  3523. ** Return SQLITE_OK on success and an error code if we cannot obtain
  3524. ** the lock. If the lock is obtained successfully, set the Pager.state
  3525. ** variable to locktype before returning.
  3526. */
  3527. static int pager_wait_on_lock(Pager *pPager, int locktype){
  3528. int rc; /* Return code */
  3529. /* Check that this is either a no-op (because the requested lock is
  3530. ** already held, or one of the transistions that the busy-handler
  3531. ** may be invoked during, according to the comment above
  3532. ** sqlite3PagerSetBusyhandler().
  3533. */
  3534. assert( (pPager->eLock>=locktype)
  3535. || (pPager->eLock==NO_LOCK && locktype==SHARED_LOCK)
  3536. || (pPager->eLock==RESERVED_LOCK && locktype==EXCLUSIVE_LOCK)
  3537. );
  3538. do {
  3539. rc = pagerLockDb(pPager, locktype);
  3540. }while( rc==SQLITE_BUSY && pPager->xBusyHandler(pPager->pBusyHandlerArg) );
  3541. return rc;
  3542. }
  3543. /*
  3544. ** Function assertTruncateConstraint(pPager) checks that one of the
  3545. ** following is true for all dirty pages currently in the page-cache:
  3546. **
  3547. ** a) The page number is less than or equal to the size of the
  3548. ** current database image, in pages, OR
  3549. **
  3550. ** b) if the page content were written at this time, it would not
  3551. ** be necessary to write the current content out to the sub-journal
  3552. ** (as determined by function subjRequiresPage()).
  3553. **
  3554. ** If the condition asserted by this function were not true, and the
  3555. ** dirty page were to be discarded from the cache via the pagerStress()
  3556. ** routine, pagerStress() would not write the current page content to
  3557. ** the database file. If a savepoint transaction were rolled back after
  3558. ** this happened, the correct behavior would be to restore the current
  3559. ** content of the page. However, since this content is not present in either
  3560. ** the database file or the portion of the rollback journal and
  3561. ** sub-journal rolled back the content could not be restored and the
  3562. ** database image would become corrupt. It is therefore fortunate that
  3563. ** this circumstance cannot arise.
  3564. */
  3565. #if defined(SQLITE_DEBUG)
  3566. static void assertTruncateConstraintCb(PgHdr *pPg){
  3567. assert( pPg->flags&PGHDR_DIRTY );
  3568. assert( !subjRequiresPage(pPg) || pPg->pgno<=pPg->pPager->dbSize );
  3569. }
  3570. static void assertTruncateConstraint(Pager *pPager){
  3571. sqlite3PcacheIterateDirty(pPager->pPCache, assertTruncateConstraintCb);
  3572. }
  3573. #else
  3574. # define assertTruncateConstraint(pPager)
  3575. #endif
  3576. /*
  3577. ** Truncate the in-memory database file image to nPage pages. This
  3578. ** function does not actually modify the database file on disk. It
  3579. ** just sets the internal state of the pager object so that the
  3580. ** truncation will be done when the current transaction is committed.
  3581. **
  3582. ** This function is only called right before committing a transaction.
  3583. ** Once this function has been called, the transaction must either be
  3584. ** rolled back or committed. It is not safe to call this function and
  3585. ** then continue writing to the database.
  3586. */
  3587. void sqlite3PagerTruncateImage(Pager *pPager, Pgno nPage){
  3588. assert( pPager->dbSize>=nPage );
  3589. assert( pPager->eState>=PAGER_WRITER_CACHEMOD );
  3590. pPager->dbSize = nPage;
  3591. /* At one point the code here called assertTruncateConstraint() to
  3592. ** ensure that all pages being truncated away by this operation are,
  3593. ** if one or more savepoints are open, present in the savepoint
  3594. ** journal so that they can be restored if the savepoint is rolled
  3595. ** back. This is no longer necessary as this function is now only
  3596. ** called right before committing a transaction. So although the
  3597. ** Pager object may still have open savepoints (Pager.nSavepoint!=0),
  3598. ** they cannot be rolled back. So the assertTruncateConstraint() call
  3599. ** is no longer correct. */
  3600. }
  3601. /*
  3602. ** This function is called before attempting a hot-journal rollback. It
  3603. ** syncs the journal file to disk, then sets pPager->journalHdr to the
  3604. ** size of the journal file so that the pager_playback() routine knows
  3605. ** that the entire journal file has been synced.
  3606. **
  3607. ** Syncing a hot-journal to disk before attempting to roll it back ensures
  3608. ** that if a power-failure occurs during the rollback, the process that
  3609. ** attempts rollback following system recovery sees the same journal
  3610. ** content as this process.
  3611. **
  3612. ** If everything goes as planned, SQLITE_OK is returned. Otherwise,
  3613. ** an SQLite error code.
  3614. */
  3615. static int pagerSyncHotJournal(Pager *pPager){
  3616. int rc = SQLITE_OK;
  3617. if( !pPager->noSync ){
  3618. rc = sqlite3OsSync(pPager->jfd, SQLITE_SYNC_NORMAL);
  3619. }
  3620. if( rc==SQLITE_OK ){
  3621. rc = sqlite3OsFileSize(pPager->jfd, &pPager->journalHdr);
  3622. }
  3623. return rc;
  3624. }
  3625. /*
  3626. ** Obtain a reference to a memory mapped page object for page number pgno.
  3627. ** The new object will use the pointer pData, obtained from xFetch().
  3628. ** If successful, set *ppPage to point to the new page reference
  3629. ** and return SQLITE_OK. Otherwise, return an SQLite error code and set
  3630. ** *ppPage to zero.
  3631. **
  3632. ** Page references obtained by calling this function should be released
  3633. ** by calling pagerReleaseMapPage().
  3634. */
  3635. static int pagerAcquireMapPage(
  3636. Pager *pPager, /* Pager object */
  3637. Pgno pgno, /* Page number */
  3638. void *pData, /* xFetch()'d data for this page */
  3639. PgHdr **ppPage /* OUT: Acquired page object */
  3640. ){
  3641. PgHdr *p; /* Memory mapped page to return */
  3642. if( pPager->pMmapFreelist ){
  3643. *ppPage = p = pPager->pMmapFreelist;
  3644. pPager->pMmapFreelist = p->pDirty;
  3645. p->pDirty = 0;
  3646. memset(p->pExtra, 0, pPager->nExtra);
  3647. }else{
  3648. *ppPage = p = (PgHdr *)sqlite3MallocZero(sizeof(PgHdr) + pPager->nExtra);
  3649. if( p==0 ){
  3650. sqlite3OsUnfetch(pPager->fd, (i64)(pgno-1) * pPager->pageSize, pData);
  3651. return SQLITE_NOMEM;
  3652. }
  3653. p->pExtra = (void *)&p[1];
  3654. p->flags = PGHDR_MMAP;
  3655. p->nRef = 1;
  3656. p->pPager = pPager;
  3657. }
  3658. assert( p->pExtra==(void *)&p[1] );
  3659. assert( p->pPage==0 );
  3660. assert( p->flags==PGHDR_MMAP );
  3661. assert( p->pPager==pPager );
  3662. assert( p->nRef==1 );
  3663. p->pgno = pgno;
  3664. p->pData = pData;
  3665. pPager->nMmapOut++;
  3666. return SQLITE_OK;
  3667. }
  3668. /*
  3669. ** Release a reference to page pPg. pPg must have been returned by an
  3670. ** earlier call to pagerAcquireMapPage().
  3671. */
  3672. static void pagerReleaseMapPage(PgHdr *pPg){
  3673. Pager *pPager = pPg->pPager;
  3674. pPager->nMmapOut--;
  3675. pPg->pDirty = pPager->pMmapFreelist;
  3676. pPager->pMmapFreelist = pPg;
  3677. assert( pPager->fd->pMethods->iVersion>=3 );
  3678. sqlite3OsUnfetch(pPager->fd, (i64)(pPg->pgno-1)*pPager->pageSize, pPg->pData);
  3679. }
  3680. /*
  3681. ** Free all PgHdr objects stored in the Pager.pMmapFreelist list.
  3682. */
  3683. static void pagerFreeMapHdrs(Pager *pPager){
  3684. PgHdr *p;
  3685. PgHdr *pNext;
  3686. for(p=pPager->pMmapFreelist; p; p=pNext){
  3687. pNext = p->pDirty;
  3688. sqlite3_free(p);
  3689. }
  3690. }
  3691. /*
  3692. ** Shutdown the page cache. Free all memory and close all files.
  3693. **
  3694. ** If a transaction was in progress when this routine is called, that
  3695. ** transaction is rolled back. All outstanding pages are invalidated
  3696. ** and their memory is freed. Any attempt to use a page associated
  3697. ** with this page cache after this function returns will likely
  3698. ** result in a coredump.
  3699. **
  3700. ** This function always succeeds. If a transaction is active an attempt
  3701. ** is made to roll it back. If an error occurs during the rollback
  3702. ** a hot journal may be left in the filesystem but no error is returned
  3703. ** to the caller.
  3704. */
  3705. int sqlite3PagerClose(Pager *pPager){
  3706. u8 *pTmp = (u8 *)pPager->pTmpSpace;
  3707. assert( assert_pager_state(pPager) );
  3708. disable_simulated_io_errors();
  3709. sqlite3BeginBenignMalloc();
  3710. pagerFreeMapHdrs(pPager);
  3711. /* pPager->errCode = 0; */
  3712. pPager->exclusiveMode = 0;
  3713. #ifndef SQLITE_OMIT_WAL
  3714. sqlite3WalClose(pPager->pWal, pPager->ckptSyncFlags, pPager->pageSize, pTmp);
  3715. pPager->pWal = 0;
  3716. #endif
  3717. pager_reset(pPager);
  3718. if( MEMDB ){
  3719. pager_unlock(pPager);
  3720. }else{
  3721. /* If it is open, sync the journal file before calling UnlockAndRollback.
  3722. ** If this is not done, then an unsynced portion of the open journal
  3723. ** file may be played back into the database. If a power failure occurs
  3724. ** while this is happening, the database could become corrupt.
  3725. **
  3726. ** If an error occurs while trying to sync the journal, shift the pager
  3727. ** into the ERROR state. This causes UnlockAndRollback to unlock the
  3728. ** database and close the journal file without attempting to roll it
  3729. ** back or finalize it. The next database user will have to do hot-journal
  3730. ** rollback before accessing the database file.
  3731. */
  3732. if( isOpen(pPager->jfd) ){
  3733. pager_error(pPager, pagerSyncHotJournal(pPager));
  3734. }
  3735. pagerUnlockAndRollback(pPager);
  3736. }
  3737. sqlite3EndBenignMalloc();
  3738. enable_simulated_io_errors();
  3739. PAGERTRACE(("CLOSE %d\n", PAGERID(pPager)));
  3740. IOTRACE(("CLOSE %p\n", pPager))
  3741. sqlite3OsClose(pPager->jfd);
  3742. sqlite3OsClose(pPager->fd);
  3743. sqlite3PageFree(pTmp);
  3744. sqlite3PcacheClose(pPager->pPCache);
  3745. #ifdef SQLITE_HAS_CODEC
  3746. if( pPager->xCodecFree ) pPager->xCodecFree(pPager->pCodec);
  3747. #endif
  3748. assert( !pPager->aSavepoint && !pPager->pInJournal );
  3749. assert( !isOpen(pPager->jfd) && !isOpen(pPager->sjfd) );
  3750. sqlite3_free(pPager);
  3751. return SQLITE_OK;
  3752. }
  3753. #if !defined(NDEBUG) || defined(SQLITE_TEST)
  3754. /*
  3755. ** Return the page number for page pPg.
  3756. */
  3757. Pgno sqlite3PagerPagenumber(DbPage *pPg){
  3758. return pPg->pgno;
  3759. }
  3760. #endif
  3761. /*
  3762. ** Increment the reference count for page pPg.
  3763. */
  3764. void sqlite3PagerRef(DbPage *pPg){
  3765. sqlite3PcacheRef(pPg);
  3766. }
  3767. /*
  3768. ** Sync the journal. In other words, make sure all the pages that have
  3769. ** been written to the journal have actually reached the surface of the
  3770. ** disk and can be restored in the event of a hot-journal rollback.
  3771. **
  3772. ** If the Pager.noSync flag is set, then this function is a no-op.
  3773. ** Otherwise, the actions required depend on the journal-mode and the
  3774. ** device characteristics of the file-system, as follows:
  3775. **
  3776. ** * If the journal file is an in-memory journal file, no action need
  3777. ** be taken.
  3778. **
  3779. ** * Otherwise, if the device does not support the SAFE_APPEND property,
  3780. ** then the nRec field of the most recently written journal header
  3781. ** is updated to contain the number of journal records that have
  3782. ** been written following it. If the pager is operating in full-sync
  3783. ** mode, then the journal file is synced before this field is updated.
  3784. **
  3785. ** * If the device does not support the SEQUENTIAL property, then
  3786. ** journal file is synced.
  3787. **
  3788. ** Or, in pseudo-code:
  3789. **
  3790. ** if( NOT <in-memory journal> ){
  3791. ** if( NOT SAFE_APPEND ){
  3792. ** if( <full-sync mode> ) xSync(<journal file>);
  3793. ** <update nRec field>
  3794. ** }
  3795. ** if( NOT SEQUENTIAL ) xSync(<journal file>);
  3796. ** }
  3797. **
  3798. ** If successful, this routine clears the PGHDR_NEED_SYNC flag of every
  3799. ** page currently held in memory before returning SQLITE_OK. If an IO
  3800. ** error is encountered, then the IO error code is returned to the caller.
  3801. */
  3802. static int syncJournal(Pager *pPager, int newHdr){
  3803. int rc; /* Return code */
  3804. assert( pPager->eState==PAGER_WRITER_CACHEMOD
  3805. || pPager->eState==PAGER_WRITER_DBMOD
  3806. );
  3807. assert( assert_pager_state(pPager) );
  3808. assert( !pagerUseWal(pPager) );
  3809. rc = sqlite3PagerExclusiveLock(pPager);
  3810. if( rc!=SQLITE_OK ) return rc;
  3811. if( !pPager->noSync ){
  3812. assert( !pPager->tempFile );
  3813. if( isOpen(pPager->jfd) && pPager->journalMode!=PAGER_JOURNALMODE_MEMORY ){
  3814. const int iDc = sqlite3OsDeviceCharacteristics(pPager->fd);
  3815. assert( isOpen(pPager->jfd) );
  3816. if( 0==(iDc&SQLITE_IOCAP_SAFE_APPEND) ){
  3817. /* This block deals with an obscure problem. If the last connection
  3818. ** that wrote to this database was operating in persistent-journal
  3819. ** mode, then the journal file may at this point actually be larger
  3820. ** than Pager.journalOff bytes. If the next thing in the journal
  3821. ** file happens to be a journal-header (written as part of the
  3822. ** previous connection's transaction), and a crash or power-failure
  3823. ** occurs after nRec is updated but before this connection writes
  3824. ** anything else to the journal file (or commits/rolls back its
  3825. ** transaction), then SQLite may become confused when doing the
  3826. ** hot-journal rollback following recovery. It may roll back all
  3827. ** of this connections data, then proceed to rolling back the old,
  3828. ** out-of-date data that follows it. Database corruption.
  3829. **
  3830. ** To work around this, if the journal file does appear to contain
  3831. ** a valid header following Pager.journalOff, then write a 0x00
  3832. ** byte to the start of it to prevent it from being recognized.
  3833. **
  3834. ** Variable iNextHdrOffset is set to the offset at which this
  3835. ** problematic header will occur, if it exists. aMagic is used
  3836. ** as a temporary buffer to inspect the first couple of bytes of
  3837. ** the potential journal header.
  3838. */
  3839. i64 iNextHdrOffset;
  3840. u8 aMagic[8];
  3841. u8 zHeader[sizeof(aJournalMagic)+4];
  3842. memcpy(zHeader, aJournalMagic, sizeof(aJournalMagic));
  3843. put32bits(&zHeader[sizeof(aJournalMagic)], pPager->nRec);
  3844. iNextHdrOffset = journalHdrOffset(pPager);
  3845. rc = sqlite3OsRead(pPager->jfd, aMagic, 8, iNextHdrOffset);
  3846. if( rc==SQLITE_OK && 0==memcmp(aMagic, aJournalMagic, 8) ){
  3847. static const u8 zerobyte = 0;
  3848. rc = sqlite3OsWrite(pPager->jfd, &zerobyte, 1, iNextHdrOffset);
  3849. }
  3850. if( rc!=SQLITE_OK && rc!=SQLITE_IOERR_SHORT_READ ){
  3851. return rc;
  3852. }
  3853. /* Write the nRec value into the journal file header. If in
  3854. ** full-synchronous mode, sync the journal first. This ensures that
  3855. ** all data has really hit the disk before nRec is updated to mark
  3856. ** it as a candidate for rollback.
  3857. **
  3858. ** This is not required if the persistent media supports the
  3859. ** SAFE_APPEND property. Because in this case it is not possible
  3860. ** for garbage data to be appended to the file, the nRec field
  3861. ** is populated with 0xFFFFFFFF when the journal header is written
  3862. ** and never needs to be updated.
  3863. */
  3864. if( pPager->fullSync && 0==(iDc&SQLITE_IOCAP_SEQUENTIAL) ){
  3865. PAGERTRACE(("SYNC journal of %d\n", PAGERID(pPager)));
  3866. IOTRACE(("JSYNC %p\n", pPager))
  3867. rc = sqlite3OsSync(pPager->jfd, pPager->syncFlags);
  3868. if( rc!=SQLITE_OK ) return rc;
  3869. }
  3870. IOTRACE(("JHDR %p %lld\n", pPager, pPager->journalHdr));
  3871. rc = sqlite3OsWrite(
  3872. pPager->jfd, zHeader, sizeof(zHeader), pPager->journalHdr
  3873. );
  3874. if( rc!=SQLITE_OK ) return rc;
  3875. }
  3876. if( 0==(iDc&SQLITE_IOCAP_SEQUENTIAL) ){
  3877. PAGERTRACE(("SYNC journal of %d\n", PAGERID(pPager)));
  3878. IOTRACE(("JSYNC %p\n", pPager))
  3879. rc = sqlite3OsSync(pPager->jfd, pPager->syncFlags|
  3880. (pPager->syncFlags==SQLITE_SYNC_FULL?SQLITE_SYNC_DATAONLY:0)
  3881. );
  3882. if( rc!=SQLITE_OK ) return rc;
  3883. }
  3884. pPager->journalHdr = pPager->journalOff;
  3885. if( newHdr && 0==(iDc&SQLITE_IOCAP_SAFE_APPEND) ){
  3886. pPager->nRec = 0;
  3887. rc = writeJournalHdr(pPager);
  3888. if( rc!=SQLITE_OK ) return rc;
  3889. }
  3890. }else{
  3891. pPager->journalHdr = pPager->journalOff;
  3892. }
  3893. }
  3894. /* Unless the pager is in noSync mode, the journal file was just
  3895. ** successfully synced. Either way, clear the PGHDR_NEED_SYNC flag on
  3896. ** all pages.
  3897. */
  3898. sqlite3PcacheClearSyncFlags(pPager->pPCache);
  3899. pPager->eState = PAGER_WRITER_DBMOD;
  3900. assert( assert_pager_state(pPager) );
  3901. return SQLITE_OK;
  3902. }
  3903. /*
  3904. ** The argument is the first in a linked list of dirty pages connected
  3905. ** by the PgHdr.pDirty pointer. This function writes each one of the
  3906. ** in-memory pages in the list to the database file. The argument may
  3907. ** be NULL, representing an empty list. In this case this function is
  3908. ** a no-op.
  3909. **
  3910. ** The pager must hold at least a RESERVED lock when this function
  3911. ** is called. Before writing anything to the database file, this lock
  3912. ** is upgraded to an EXCLUSIVE lock. If the lock cannot be obtained,
  3913. ** SQLITE_BUSY is returned and no data is written to the database file.
  3914. **
  3915. ** If the pager is a temp-file pager and the actual file-system file
  3916. ** is not yet open, it is created and opened before any data is
  3917. ** written out.
  3918. **
  3919. ** Once the lock has been upgraded and, if necessary, the file opened,
  3920. ** the pages are written out to the database file in list order. Writing
  3921. ** a page is skipped if it meets either of the following criteria:
  3922. **
  3923. ** * The page number is greater than Pager.dbSize, or
  3924. ** * The PGHDR_DONT_WRITE flag is set on the page.
  3925. **
  3926. ** If writing out a page causes the database file to grow, Pager.dbFileSize
  3927. ** is updated accordingly. If page 1 is written out, then the value cached
  3928. ** in Pager.dbFileVers[] is updated to match the new value stored in
  3929. ** the database file.
  3930. **
  3931. ** If everything is successful, SQLITE_OK is returned. If an IO error
  3932. ** occurs, an IO error code is returned. Or, if the EXCLUSIVE lock cannot
  3933. ** be obtained, SQLITE_BUSY is returned.
  3934. */
  3935. static int pager_write_pagelist(Pager *pPager, PgHdr *pList){
  3936. int rc = SQLITE_OK; /* Return code */
  3937. /* This function is only called for rollback pagers in WRITER_DBMOD state. */
  3938. assert( !pagerUseWal(pPager) );
  3939. assert( pPager->eState==PAGER_WRITER_DBMOD );
  3940. assert( pPager->eLock==EXCLUSIVE_LOCK );
  3941. /* If the file is a temp-file has not yet been opened, open it now. It
  3942. ** is not possible for rc to be other than SQLITE_OK if this branch
  3943. ** is taken, as pager_wait_on_lock() is a no-op for temp-files.
  3944. */
  3945. if( !isOpen(pPager->fd) ){
  3946. assert( pPager->tempFile && rc==SQLITE_OK );
  3947. rc = pagerOpentemp(pPager, pPager->fd, pPager->vfsFlags);
  3948. }
  3949. /* Before the first write, give the VFS a hint of what the final
  3950. ** file size will be.
  3951. */
  3952. assert( rc!=SQLITE_OK || isOpen(pPager->fd) );
  3953. if( rc==SQLITE_OK
  3954. && pPager->dbHintSize<pPager->dbSize
  3955. && (pList->pDirty || pList->pgno>pPager->dbHintSize)
  3956. ){
  3957. sqlite3_int64 szFile = pPager->pageSize * (sqlite3_int64)pPager->dbSize;
  3958. sqlite3OsFileControlHint(pPager->fd, SQLITE_FCNTL_SIZE_HINT, &szFile);
  3959. pPager->dbHintSize = pPager->dbSize;
  3960. }
  3961. while( rc==SQLITE_OK && pList ){
  3962. Pgno pgno = pList->pgno;
  3963. /* If there are dirty pages in the page cache with page numbers greater
  3964. ** than Pager.dbSize, this means sqlite3PagerTruncateImage() was called to
  3965. ** make the file smaller (presumably by auto-vacuum code). Do not write
  3966. ** any such pages to the file.
  3967. **
  3968. ** Also, do not write out any page that has the PGHDR_DONT_WRITE flag
  3969. ** set (set by sqlite3PagerDontWrite()).
  3970. */
  3971. if( pgno<=pPager->dbSize && 0==(pList->flags&PGHDR_DONT_WRITE) ){
  3972. i64 offset = (pgno-1)*(i64)pPager->pageSize; /* Offset to write */
  3973. char *pData; /* Data to write */
  3974. assert( (pList->flags&PGHDR_NEED_SYNC)==0 );
  3975. if( pList->pgno==1 ) pager_write_changecounter(pList);
  3976. /* Encode the database */
  3977. CODEC2(pPager, pList->pData, pgno, 6, return SQLITE_NOMEM, pData);
  3978. /* Write out the page data. */
  3979. rc = sqlite3OsWrite(pPager->fd, pData, pPager->pageSize, offset);
  3980. /* If page 1 was just written, update Pager.dbFileVers to match
  3981. ** the value now stored in the database file. If writing this
  3982. ** page caused the database file to grow, update dbFileSize.
  3983. */
  3984. if( pgno==1 ){
  3985. memcpy(&pPager->dbFileVers, &pData[24], sizeof(pPager->dbFileVers));
  3986. }
  3987. if( pgno>pPager->dbFileSize ){
  3988. pPager->dbFileSize = pgno;
  3989. }
  3990. pPager->aStat[PAGER_STAT_WRITE]++;
  3991. /* Update any backup objects copying the contents of this pager. */
  3992. sqlite3BackupUpdate(pPager->pBackup, pgno, (u8*)pList->pData);
  3993. PAGERTRACE(("STORE %d page %d hash(%08x)\n",
  3994. PAGERID(pPager), pgno, pager_pagehash(pList)));
  3995. IOTRACE(("PGOUT %p %d\n", pPager, pgno));
  3996. PAGER_INCR(sqlite3_pager_writedb_count);
  3997. }else{
  3998. PAGERTRACE(("NOSTORE %d page %d\n", PAGERID(pPager), pgno));
  3999. }
  4000. pager_set_pagehash(pList);
  4001. pList = pList->pDirty;
  4002. }
  4003. return rc;
  4004. }
  4005. /*
  4006. ** Ensure that the sub-journal file is open. If it is already open, this
  4007. ** function is a no-op.
  4008. **
  4009. ** SQLITE_OK is returned if everything goes according to plan. An
  4010. ** SQLITE_IOERR_XXX error code is returned if a call to sqlite3OsOpen()
  4011. ** fails.
  4012. */
  4013. static int openSubJournal(Pager *pPager){
  4014. int rc = SQLITE_OK;
  4015. if( !isOpen(pPager->sjfd) ){
  4016. if( pPager->journalMode==PAGER_JOURNALMODE_MEMORY || pPager->subjInMemory ){
  4017. sqlite3MemJournalOpen(pPager->sjfd);
  4018. }else{
  4019. rc = pagerOpentemp(pPager, pPager->sjfd, SQLITE_OPEN_SUBJOURNAL);
  4020. }
  4021. }
  4022. return rc;
  4023. }
  4024. /*
  4025. ** Append a record of the current state of page pPg to the sub-journal.
  4026. ** It is the callers responsibility to use subjRequiresPage() to check
  4027. ** that it is really required before calling this function.
  4028. **
  4029. ** If successful, set the bit corresponding to pPg->pgno in the bitvecs
  4030. ** for all open savepoints before returning.
  4031. **
  4032. ** This function returns SQLITE_OK if everything is successful, an IO
  4033. ** error code if the attempt to write to the sub-journal fails, or
  4034. ** SQLITE_NOMEM if a malloc fails while setting a bit in a savepoint
  4035. ** bitvec.
  4036. */
  4037. static int subjournalPage(PgHdr *pPg){
  4038. int rc = SQLITE_OK;
  4039. Pager *pPager = pPg->pPager;
  4040. if( pPager->journalMode!=PAGER_JOURNALMODE_OFF ){
  4041. /* Open the sub-journal, if it has not already been opened */
  4042. assert( pPager->useJournal );
  4043. assert( isOpen(pPager->jfd) || pagerUseWal(pPager) );
  4044. assert( isOpen(pPager->sjfd) || pPager->nSubRec==0 );
  4045. assert( pagerUseWal(pPager)
  4046. || pageInJournal(pPg)
  4047. || pPg->pgno>pPager->dbOrigSize
  4048. );
  4049. rc = openSubJournal(pPager);
  4050. /* If the sub-journal was opened successfully (or was already open),
  4051. ** write the journal record into the file. */
  4052. if( rc==SQLITE_OK ){
  4053. void *pData = pPg->pData;
  4054. i64 offset = (i64)pPager->nSubRec*(4+pPager->pageSize);
  4055. char *pData2;
  4056. CODEC2(pPager, pData, pPg->pgno, 7, return SQLITE_NOMEM, pData2);
  4057. PAGERTRACE(("STMT-JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno));
  4058. rc = write32bits(pPager->sjfd, offset, pPg->pgno);
  4059. if( rc==SQLITE_OK ){
  4060. rc = sqlite3OsWrite(pPager->sjfd, pData2, pPager->pageSize, offset+4);
  4061. }
  4062. }
  4063. }
  4064. if( rc==SQLITE_OK ){
  4065. pPager->nSubRec++;
  4066. assert( pPager->nSavepoint>0 );
  4067. rc = addToSavepointBitvecs(pPager, pPg->pgno);
  4068. }
  4069. return rc;
  4070. }
  4071. /*
  4072. ** This function is called by the pcache layer when it has reached some
  4073. ** soft memory limit. The first argument is a pointer to a Pager object
  4074. ** (cast as a void*). The pager is always 'purgeable' (not an in-memory
  4075. ** database). The second argument is a reference to a page that is
  4076. ** currently dirty but has no outstanding references. The page
  4077. ** is always associated with the Pager object passed as the first
  4078. ** argument.
  4079. **
  4080. ** The job of this function is to make pPg clean by writing its contents
  4081. ** out to the database file, if possible. This may involve syncing the
  4082. ** journal file.
  4083. **
  4084. ** If successful, sqlite3PcacheMakeClean() is called on the page and
  4085. ** SQLITE_OK returned. If an IO error occurs while trying to make the
  4086. ** page clean, the IO error code is returned. If the page cannot be
  4087. ** made clean for some other reason, but no error occurs, then SQLITE_OK
  4088. ** is returned by sqlite3PcacheMakeClean() is not called.
  4089. */
  4090. static int pagerStress(void *p, PgHdr *pPg){
  4091. Pager *pPager = (Pager *)p;
  4092. int rc = SQLITE_OK;
  4093. assert( pPg->pPager==pPager );
  4094. assert( pPg->flags&PGHDR_DIRTY );
  4095. /* The doNotSpill NOSYNC bit is set during times when doing a sync of
  4096. ** journal (and adding a new header) is not allowed. This occurs
  4097. ** during calls to sqlite3PagerWrite() while trying to journal multiple
  4098. ** pages belonging to the same sector.
  4099. **
  4100. ** The doNotSpill ROLLBACK and OFF bits inhibits all cache spilling
  4101. ** regardless of whether or not a sync is required. This is set during
  4102. ** a rollback or by user request, respectively.
  4103. **
  4104. ** Spilling is also prohibited when in an error state since that could
  4105. ** lead to database corruption. In the current implementaton it
  4106. ** is impossible for sqlite3PcacheFetch() to be called with createFlag==1
  4107. ** while in the error state, hence it is impossible for this routine to
  4108. ** be called in the error state. Nevertheless, we include a NEVER()
  4109. ** test for the error state as a safeguard against future changes.
  4110. */
  4111. if( NEVER(pPager->errCode) ) return SQLITE_OK;
  4112. testcase( pPager->doNotSpill & SPILLFLAG_ROLLBACK );
  4113. testcase( pPager->doNotSpill & SPILLFLAG_OFF );
  4114. testcase( pPager->doNotSpill & SPILLFLAG_NOSYNC );
  4115. if( pPager->doNotSpill
  4116. && ((pPager->doNotSpill & (SPILLFLAG_ROLLBACK|SPILLFLAG_OFF))!=0
  4117. || (pPg->flags & PGHDR_NEED_SYNC)!=0)
  4118. ){
  4119. return SQLITE_OK;
  4120. }
  4121. pPg->pDirty = 0;
  4122. if( pagerUseWal(pPager) ){
  4123. /* Write a single frame for this page to the log. */
  4124. if( subjRequiresPage(pPg) ){
  4125. rc = subjournalPage(pPg);
  4126. }
  4127. if( rc==SQLITE_OK ){
  4128. rc = pagerWalFrames(pPager, pPg, 0, 0);
  4129. }
  4130. }else{
  4131. /* Sync the journal file if required. */
  4132. if( pPg->flags&PGHDR_NEED_SYNC
  4133. || pPager->eState==PAGER_WRITER_CACHEMOD
  4134. ){
  4135. rc = syncJournal(pPager, 1);
  4136. }
  4137. /* If the page number of this page is larger than the current size of
  4138. ** the database image, it may need to be written to the sub-journal.
  4139. ** This is because the call to pager_write_pagelist() below will not
  4140. ** actually write data to the file in this case.
  4141. **
  4142. ** Consider the following sequence of events:
  4143. **
  4144. ** BEGIN;
  4145. ** <journal page X>
  4146. ** <modify page X>
  4147. ** SAVEPOINT sp;
  4148. ** <shrink database file to Y pages>
  4149. ** pagerStress(page X)
  4150. ** ROLLBACK TO sp;
  4151. **
  4152. ** If (X>Y), then when pagerStress is called page X will not be written
  4153. ** out to the database file, but will be dropped from the cache. Then,
  4154. ** following the "ROLLBACK TO sp" statement, reading page X will read
  4155. ** data from the database file. This will be the copy of page X as it
  4156. ** was when the transaction started, not as it was when "SAVEPOINT sp"
  4157. ** was executed.
  4158. **
  4159. ** The solution is to write the current data for page X into the
  4160. ** sub-journal file now (if it is not already there), so that it will
  4161. ** be restored to its current value when the "ROLLBACK TO sp" is
  4162. ** executed.
  4163. */
  4164. if( NEVER(
  4165. rc==SQLITE_OK && pPg->pgno>pPager->dbSize && subjRequiresPage(pPg)
  4166. ) ){
  4167. rc = subjournalPage(pPg);
  4168. }
  4169. /* Write the contents of the page out to the database file. */
  4170. if( rc==SQLITE_OK ){
  4171. assert( (pPg->flags&PGHDR_NEED_SYNC)==0 );
  4172. rc = pager_write_pagelist(pPager, pPg);
  4173. }
  4174. }
  4175. /* Mark the page as clean. */
  4176. if( rc==SQLITE_OK ){
  4177. PAGERTRACE(("STRESS %d page %d\n", PAGERID(pPager), pPg->pgno));
  4178. sqlite3PcacheMakeClean(pPg);
  4179. }
  4180. return pager_error(pPager, rc);
  4181. }
  4182. /*
  4183. ** Allocate and initialize a new Pager object and put a pointer to it
  4184. ** in *ppPager. The pager should eventually be freed by passing it
  4185. ** to sqlite3PagerClose().
  4186. **
  4187. ** The zFilename argument is the path to the database file to open.
  4188. ** If zFilename is NULL then a randomly-named temporary file is created
  4189. ** and used as the file to be cached. Temporary files are be deleted
  4190. ** automatically when they are closed. If zFilename is ":memory:" then
  4191. ** all information is held in cache. It is never written to disk.
  4192. ** This can be used to implement an in-memory database.
  4193. **
  4194. ** The nExtra parameter specifies the number of bytes of space allocated
  4195. ** along with each page reference. This space is available to the user
  4196. ** via the sqlite3PagerGetExtra() API.
  4197. **
  4198. ** The flags argument is used to specify properties that affect the
  4199. ** operation of the pager. It should be passed some bitwise combination
  4200. ** of the PAGER_* flags.
  4201. **
  4202. ** The vfsFlags parameter is a bitmask to pass to the flags parameter
  4203. ** of the xOpen() method of the supplied VFS when opening files.
  4204. **
  4205. ** If the pager object is allocated and the specified file opened
  4206. ** successfully, SQLITE_OK is returned and *ppPager set to point to
  4207. ** the new pager object. If an error occurs, *ppPager is set to NULL
  4208. ** and error code returned. This function may return SQLITE_NOMEM
  4209. ** (sqlite3Malloc() is used to allocate memory), SQLITE_CANTOPEN or
  4210. ** various SQLITE_IO_XXX errors.
  4211. */
  4212. int sqlite3PagerOpen(
  4213. sqlite3_vfs *pVfs, /* The virtual file system to use */
  4214. Pager **ppPager, /* OUT: Return the Pager structure here */
  4215. const char *zFilename, /* Name of the database file to open */
  4216. int nExtra, /* Extra bytes append to each in-memory page */
  4217. int flags, /* flags controlling this file */
  4218. int vfsFlags, /* flags passed through to sqlite3_vfs.xOpen() */
  4219. void (*xReinit)(DbPage*) /* Function to reinitialize pages */
  4220. ){
  4221. u8 *pPtr;
  4222. Pager *pPager = 0; /* Pager object to allocate and return */
  4223. int rc = SQLITE_OK; /* Return code */
  4224. int tempFile = 0; /* True for temp files (incl. in-memory files) */
  4225. int memDb = 0; /* True if this is an in-memory file */
  4226. int readOnly = 0; /* True if this is a read-only file */
  4227. int journalFileSize; /* Bytes to allocate for each journal fd */
  4228. char *zPathname = 0; /* Full path to database file */
  4229. int nPathname = 0; /* Number of bytes in zPathname */
  4230. int useJournal = (flags & PAGER_OMIT_JOURNAL)==0; /* False to omit journal */
  4231. int pcacheSize = sqlite3PcacheSize(); /* Bytes to allocate for PCache */
  4232. u32 szPageDflt = SQLITE_DEFAULT_PAGE_SIZE; /* Default page size */
  4233. const char *zUri = 0; /* URI args to copy */
  4234. int nUri = 0; /* Number of bytes of URI args at *zUri */
  4235. /* Figure out how much space is required for each journal file-handle
  4236. ** (there are two of them, the main journal and the sub-journal). This
  4237. ** is the maximum space required for an in-memory journal file handle
  4238. ** and a regular journal file-handle. Note that a "regular journal-handle"
  4239. ** may be a wrapper capable of caching the first portion of the journal
  4240. ** file in memory to implement the atomic-write optimization (see
  4241. ** source file journal.c).
  4242. */
  4243. if( sqlite3JournalSize(pVfs)>sqlite3MemJournalSize() ){
  4244. journalFileSize = ROUND8(sqlite3JournalSize(pVfs));
  4245. }else{
  4246. journalFileSize = ROUND8(sqlite3MemJournalSize());
  4247. }
  4248. /* Set the output variable to NULL in case an error occurs. */
  4249. *ppPager = 0;
  4250. #ifndef SQLITE_OMIT_MEMORYDB
  4251. if( flags & PAGER_MEMORY ){
  4252. memDb = 1;
  4253. if( zFilename && zFilename[0] ){
  4254. zPathname = sqlite3DbStrDup(0, zFilename);
  4255. if( zPathname==0 ) return SQLITE_NOMEM;
  4256. nPathname = sqlite3Strlen30(zPathname);
  4257. zFilename = 0;
  4258. }
  4259. }
  4260. #endif
  4261. /* Compute and store the full pathname in an allocated buffer pointed
  4262. ** to by zPathname, length nPathname. Or, if this is a temporary file,
  4263. ** leave both nPathname and zPathname set to 0.
  4264. */
  4265. if( zFilename && zFilename[0] ){
  4266. const char *z;
  4267. nPathname = pVfs->mxPathname+1;
  4268. zPathname = sqlite3DbMallocRaw(0, nPathname*2);
  4269. if( zPathname==0 ){
  4270. return SQLITE_NOMEM;
  4271. }
  4272. zPathname[0] = 0; /* Make sure initialized even if FullPathname() fails */
  4273. rc = sqlite3OsFullPathname(pVfs, zFilename, nPathname, zPathname);
  4274. nPathname = sqlite3Strlen30(zPathname);
  4275. z = zUri = &zFilename[sqlite3Strlen30(zFilename)+1];
  4276. while( *z ){
  4277. z += sqlite3Strlen30(z)+1;
  4278. z += sqlite3Strlen30(z)+1;
  4279. }
  4280. nUri = (int)(&z[1] - zUri);
  4281. assert( nUri>=0 );
  4282. if( rc==SQLITE_OK && nPathname+8>pVfs->mxPathname ){
  4283. /* This branch is taken when the journal path required by
  4284. ** the database being opened will be more than pVfs->mxPathname
  4285. ** bytes in length. This means the database cannot be opened,
  4286. ** as it will not be possible to open the journal file or even
  4287. ** check for a hot-journal before reading.
  4288. */
  4289. rc = SQLITE_CANTOPEN_BKPT;
  4290. }
  4291. if( rc!=SQLITE_OK ){
  4292. sqlite3DbFree(0, zPathname);
  4293. return rc;
  4294. }
  4295. }
  4296. /* Allocate memory for the Pager structure, PCache object, the
  4297. ** three file descriptors, the database file name and the journal
  4298. ** file name. The layout in memory is as follows:
  4299. **
  4300. ** Pager object (sizeof(Pager) bytes)
  4301. ** PCache object (sqlite3PcacheSize() bytes)
  4302. ** Database file handle (pVfs->szOsFile bytes)
  4303. ** Sub-journal file handle (journalFileSize bytes)
  4304. ** Main journal file handle (journalFileSize bytes)
  4305. ** Database file name (nPathname+1 bytes)
  4306. ** Journal file name (nPathname+8+1 bytes)
  4307. */
  4308. pPtr = (u8 *)sqlite3MallocZero(
  4309. ROUND8(sizeof(*pPager)) + /* Pager structure */
  4310. ROUND8(pcacheSize) + /* PCache object */
  4311. ROUND8(pVfs->szOsFile) + /* The main db file */
  4312. journalFileSize * 2 + /* The two journal files */
  4313. nPathname + 1 + nUri + /* zFilename */
  4314. nPathname + 8 + 2 /* zJournal */
  4315. #ifndef SQLITE_OMIT_WAL
  4316. + nPathname + 4 + 2 /* zWal */
  4317. #endif
  4318. );
  4319. assert( EIGHT_BYTE_ALIGNMENT(SQLITE_INT_TO_PTR(journalFileSize)) );
  4320. if( !pPtr ){
  4321. sqlite3DbFree(0, zPathname);
  4322. return SQLITE_NOMEM;
  4323. }
  4324. pPager = (Pager*)(pPtr);
  4325. pPager->pPCache = (PCache*)(pPtr += ROUND8(sizeof(*pPager)));
  4326. pPager->fd = (sqlite3_file*)(pPtr += ROUND8(pcacheSize));
  4327. pPager->sjfd = (sqlite3_file*)(pPtr += ROUND8(pVfs->szOsFile));
  4328. pPager->jfd = (sqlite3_file*)(pPtr += journalFileSize);
  4329. pPager->zFilename = (char*)(pPtr += journalFileSize);
  4330. assert( EIGHT_BYTE_ALIGNMENT(pPager->jfd) );
  4331. /* Fill in the Pager.zFilename and Pager.zJournal buffers, if required. */
  4332. if( zPathname ){
  4333. assert( nPathname>0 );
  4334. pPager->zJournal = (char*)(pPtr += nPathname + 1 + nUri);
  4335. memcpy(pPager->zFilename, zPathname, nPathname);
  4336. if( nUri ) memcpy(&pPager->zFilename[nPathname+1], zUri, nUri);
  4337. memcpy(pPager->zJournal, zPathname, nPathname);
  4338. memcpy(&pPager->zJournal[nPathname], "-journal\000", 8+2);
  4339. sqlite3FileSuffix3(pPager->zFilename, pPager->zJournal);
  4340. #ifndef SQLITE_OMIT_WAL
  4341. pPager->zWal = &pPager->zJournal[nPathname+8+1];
  4342. memcpy(pPager->zWal, zPathname, nPathname);
  4343. memcpy(&pPager->zWal[nPathname], "-wal\000", 4+1);
  4344. sqlite3FileSuffix3(pPager->zFilename, pPager->zWal);
  4345. #endif
  4346. sqlite3DbFree(0, zPathname);
  4347. }
  4348. pPager->pVfs = pVfs;
  4349. pPager->vfsFlags = vfsFlags;
  4350. /* Open the pager file.
  4351. */
  4352. if( zFilename && zFilename[0] ){
  4353. int fout = 0; /* VFS flags returned by xOpen() */
  4354. rc = sqlite3OsOpen(pVfs, pPager->zFilename, pPager->fd, vfsFlags, &fout);
  4355. assert( !memDb );
  4356. readOnly = (fout&SQLITE_OPEN_READONLY);
  4357. /* If the file was successfully opened for read/write access,
  4358. ** choose a default page size in case we have to create the
  4359. ** database file. The default page size is the maximum of:
  4360. **
  4361. ** + SQLITE_DEFAULT_PAGE_SIZE,
  4362. ** + The value returned by sqlite3OsSectorSize()
  4363. ** + The largest page size that can be written atomically.
  4364. */
  4365. if( rc==SQLITE_OK && !readOnly ){
  4366. setSectorSize(pPager);
  4367. assert(SQLITE_DEFAULT_PAGE_SIZE<=SQLITE_MAX_DEFAULT_PAGE_SIZE);
  4368. if( szPageDflt<pPager->sectorSize ){
  4369. if( pPager->sectorSize>SQLITE_MAX_DEFAULT_PAGE_SIZE ){
  4370. szPageDflt = SQLITE_MAX_DEFAULT_PAGE_SIZE;
  4371. }else{
  4372. szPageDflt = (u32)pPager->sectorSize;
  4373. }
  4374. }
  4375. #ifdef SQLITE_ENABLE_ATOMIC_WRITE
  4376. {
  4377. int iDc = sqlite3OsDeviceCharacteristics(pPager->fd);
  4378. int ii;
  4379. assert(SQLITE_IOCAP_ATOMIC512==(512>>8));
  4380. assert(SQLITE_IOCAP_ATOMIC64K==(65536>>8));
  4381. assert(SQLITE_MAX_DEFAULT_PAGE_SIZE<=65536);
  4382. for(ii=szPageDflt; ii<=SQLITE_MAX_DEFAULT_PAGE_SIZE; ii=ii*2){
  4383. if( iDc&(SQLITE_IOCAP_ATOMIC|(ii>>8)) ){
  4384. szPageDflt = ii;
  4385. }
  4386. }
  4387. }
  4388. #endif
  4389. }
  4390. }else{
  4391. /* If a temporary file is requested, it is not opened immediately.
  4392. ** In this case we accept the default page size and delay actually
  4393. ** opening the file until the first call to OsWrite().
  4394. **
  4395. ** This branch is also run for an in-memory database. An in-memory
  4396. ** database is the same as a temp-file that is never written out to
  4397. ** disk and uses an in-memory rollback journal.
  4398. */
  4399. tempFile = 1;
  4400. pPager->eState = PAGER_READER;
  4401. pPager->eLock = EXCLUSIVE_LOCK;
  4402. readOnly = (vfsFlags&SQLITE_OPEN_READONLY);
  4403. }
  4404. /* The following call to PagerSetPagesize() serves to set the value of
  4405. ** Pager.pageSize and to allocate the Pager.pTmpSpace buffer.
  4406. */
  4407. if( rc==SQLITE_OK ){
  4408. assert( pPager->memDb==0 );
  4409. rc = sqlite3PagerSetPagesize(pPager, &szPageDflt, -1);
  4410. testcase( rc!=SQLITE_OK );
  4411. }
  4412. /* If an error occurred in either of the blocks above, free the
  4413. ** Pager structure and close the file.
  4414. */
  4415. if( rc!=SQLITE_OK ){
  4416. assert( !pPager->pTmpSpace );
  4417. sqlite3OsClose(pPager->fd);
  4418. sqlite3_free(pPager);
  4419. return rc;
  4420. }
  4421. /* Initialize the PCache object. */
  4422. assert( nExtra<1000 );
  4423. nExtra = ROUND8(nExtra);
  4424. sqlite3PcacheOpen(szPageDflt, nExtra, !memDb,
  4425. !memDb?pagerStress:0, (void *)pPager, pPager->pPCache);
  4426. PAGERTRACE(("OPEN %d %s\n", FILEHANDLEID(pPager->fd), pPager->zFilename));
  4427. IOTRACE(("OPEN %p %s\n", pPager, pPager->zFilename))
  4428. pPager->useJournal = (u8)useJournal;
  4429. /* pPager->stmtOpen = 0; */
  4430. /* pPager->stmtInUse = 0; */
  4431. /* pPager->nRef = 0; */
  4432. /* pPager->stmtSize = 0; */
  4433. /* pPager->stmtJSize = 0; */
  4434. /* pPager->nPage = 0; */
  4435. pPager->mxPgno = SQLITE_MAX_PAGE_COUNT;
  4436. /* pPager->state = PAGER_UNLOCK; */
  4437. #if 0
  4438. assert( pPager->state == (tempFile ? PAGER_EXCLUSIVE : PAGER_UNLOCK) );
  4439. #endif
  4440. /* pPager->errMask = 0; */
  4441. pPager->tempFile = (u8)tempFile;
  4442. assert( tempFile==PAGER_LOCKINGMODE_NORMAL
  4443. || tempFile==PAGER_LOCKINGMODE_EXCLUSIVE );
  4444. assert( PAGER_LOCKINGMODE_EXCLUSIVE==1 );
  4445. pPager->exclusiveMode = (u8)tempFile;
  4446. pPager->changeCountDone = pPager->tempFile;
  4447. pPager->memDb = (u8)memDb;
  4448. pPager->readOnly = (u8)readOnly;
  4449. assert( useJournal || pPager->tempFile );
  4450. pPager->noSync = pPager->tempFile;
  4451. if( pPager->noSync ){
  4452. assert( pPager->fullSync==0 );
  4453. assert( pPager->syncFlags==0 );
  4454. assert( pPager->walSyncFlags==0 );
  4455. assert( pPager->ckptSyncFlags==0 );
  4456. }else{
  4457. pPager->fullSync = 1;
  4458. pPager->syncFlags = SQLITE_SYNC_NORMAL;
  4459. pPager->walSyncFlags = SQLITE_SYNC_NORMAL | WAL_SYNC_TRANSACTIONS;
  4460. pPager->ckptSyncFlags = SQLITE_SYNC_NORMAL;
  4461. }
  4462. /* pPager->pFirst = 0; */
  4463. /* pPager->pFirstSynced = 0; */
  4464. /* pPager->pLast = 0; */
  4465. pPager->nExtra = (u16)nExtra;
  4466. pPager->journalSizeLimit = SQLITE_DEFAULT_JOURNAL_SIZE_LIMIT;
  4467. assert( isOpen(pPager->fd) || tempFile );
  4468. setSectorSize(pPager);
  4469. if( !useJournal ){
  4470. pPager->journalMode = PAGER_JOURNALMODE_OFF;
  4471. }else if( memDb ){
  4472. pPager->journalMode = PAGER_JOURNALMODE_MEMORY;
  4473. }
  4474. /* pPager->xBusyHandler = 0; */
  4475. /* pPager->pBusyHandlerArg = 0; */
  4476. pPager->xReiniter = xReinit;
  4477. /* memset(pPager->aHash, 0, sizeof(pPager->aHash)); */
  4478. /* pPager->szMmap = SQLITE_DEFAULT_MMAP_SIZE // will be set by btree.c */
  4479. *ppPager = pPager;
  4480. return SQLITE_OK;
  4481. }
  4482. /*
  4483. ** This function is called after transitioning from PAGER_UNLOCK to
  4484. ** PAGER_SHARED state. It tests if there is a hot journal present in
  4485. ** the file-system for the given pager. A hot journal is one that
  4486. ** needs to be played back. According to this function, a hot-journal
  4487. ** file exists if the following criteria are met:
  4488. **
  4489. ** * The journal file exists in the file system, and
  4490. ** * No process holds a RESERVED or greater lock on the database file, and
  4491. ** * The database file itself is greater than 0 bytes in size, and
  4492. ** * The first byte of the journal file exists and is not 0x00.
  4493. **
  4494. ** If the current size of the database file is 0 but a journal file
  4495. ** exists, that is probably an old journal left over from a prior
  4496. ** database with the same name. In this case the journal file is
  4497. ** just deleted using OsDelete, *pExists is set to 0 and SQLITE_OK
  4498. ** is returned.
  4499. **
  4500. ** This routine does not check if there is a master journal filename
  4501. ** at the end of the file. If there is, and that master journal file
  4502. ** does not exist, then the journal file is not really hot. In this
  4503. ** case this routine will return a false-positive. The pager_playback()
  4504. ** routine will discover that the journal file is not really hot and
  4505. ** will not roll it back.
  4506. **
  4507. ** If a hot-journal file is found to exist, *pExists is set to 1 and
  4508. ** SQLITE_OK returned. If no hot-journal file is present, *pExists is
  4509. ** set to 0 and SQLITE_OK returned. If an IO error occurs while trying
  4510. ** to determine whether or not a hot-journal file exists, the IO error
  4511. ** code is returned and the value of *pExists is undefined.
  4512. */
  4513. static int hasHotJournal(Pager *pPager, int *pExists){
  4514. sqlite3_vfs * const pVfs = pPager->pVfs;
  4515. int rc = SQLITE_OK; /* Return code */
  4516. int exists = 1; /* True if a journal file is present */
  4517. int jrnlOpen = !!isOpen(pPager->jfd);
  4518. assert( pPager->useJournal );
  4519. assert( isOpen(pPager->fd) );
  4520. assert( pPager->eState==PAGER_OPEN );
  4521. assert( jrnlOpen==0 || ( sqlite3OsDeviceCharacteristics(pPager->jfd) &
  4522. SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN
  4523. ));
  4524. *pExists = 0;
  4525. if( !jrnlOpen ){
  4526. rc = sqlite3OsAccess(pVfs, pPager->zJournal, SQLITE_ACCESS_EXISTS, &exists);
  4527. }
  4528. if( rc==SQLITE_OK && exists ){
  4529. int locked = 0; /* True if some process holds a RESERVED lock */
  4530. /* Race condition here: Another process might have been holding the
  4531. ** the RESERVED lock and have a journal open at the sqlite3OsAccess()
  4532. ** call above, but then delete the journal and drop the lock before
  4533. ** we get to the following sqlite3OsCheckReservedLock() call. If that
  4534. ** is the case, this routine might think there is a hot journal when
  4535. ** in fact there is none. This results in a false-positive which will
  4536. ** be dealt with by the playback routine. Ticket #3883.
  4537. */
  4538. rc = sqlite3OsCheckReservedLock(pPager->fd, &locked);
  4539. if( rc==SQLITE_OK && !locked ){
  4540. Pgno nPage; /* Number of pages in database file */
  4541. /* Check the size of the database file. If it consists of 0 pages,
  4542. ** then delete the journal file. See the header comment above for
  4543. ** the reasoning here. Delete the obsolete journal file under
  4544. ** a RESERVED lock to avoid race conditions and to avoid violating
  4545. ** [H33020].
  4546. */
  4547. rc = pagerPagecount(pPager, &nPage);
  4548. if( rc==SQLITE_OK ){
  4549. if( nPage==0 ){
  4550. sqlite3BeginBenignMalloc();
  4551. if( pagerLockDb(pPager, RESERVED_LOCK)==SQLITE_OK ){
  4552. sqlite3OsDelete(pVfs, pPager->zJournal, 0);
  4553. if( !pPager->exclusiveMode ) pagerUnlockDb(pPager, SHARED_LOCK);
  4554. }
  4555. sqlite3EndBenignMalloc();
  4556. }else{
  4557. /* The journal file exists and no other connection has a reserved
  4558. ** or greater lock on the database file. Now check that there is
  4559. ** at least one non-zero bytes at the start of the journal file.
  4560. ** If there is, then we consider this journal to be hot. If not,
  4561. ** it can be ignored.
  4562. */
  4563. if( !jrnlOpen ){
  4564. int f = SQLITE_OPEN_READONLY|SQLITE_OPEN_MAIN_JOURNAL;
  4565. rc = sqlite3OsOpen(pVfs, pPager->zJournal, pPager->jfd, f, &f);
  4566. }
  4567. if( rc==SQLITE_OK ){
  4568. u8 first = 0;
  4569. rc = sqlite3OsRead(pPager->jfd, (void *)&first, 1, 0);
  4570. if( rc==SQLITE_IOERR_SHORT_READ ){
  4571. rc = SQLITE_OK;
  4572. }
  4573. if( !jrnlOpen ){
  4574. sqlite3OsClose(pPager->jfd);
  4575. }
  4576. *pExists = (first!=0);
  4577. }else if( rc==SQLITE_CANTOPEN ){
  4578. /* If we cannot open the rollback journal file in order to see if
  4579. ** its has a zero header, that might be due to an I/O error, or
  4580. ** it might be due to the race condition described above and in
  4581. ** ticket #3883. Either way, assume that the journal is hot.
  4582. ** This might be a false positive. But if it is, then the
  4583. ** automatic journal playback and recovery mechanism will deal
  4584. ** with it under an EXCLUSIVE lock where we do not need to
  4585. ** worry so much with race conditions.
  4586. */
  4587. *pExists = 1;
  4588. rc = SQLITE_OK;
  4589. }
  4590. }
  4591. }
  4592. }
  4593. }
  4594. return rc;
  4595. }
  4596. /*
  4597. ** This function is called to obtain a shared lock on the database file.
  4598. ** It is illegal to call sqlite3PagerAcquire() until after this function
  4599. ** has been successfully called. If a shared-lock is already held when
  4600. ** this function is called, it is a no-op.
  4601. **
  4602. ** The following operations are also performed by this function.
  4603. **
  4604. ** 1) If the pager is currently in PAGER_OPEN state (no lock held
  4605. ** on the database file), then an attempt is made to obtain a
  4606. ** SHARED lock on the database file. Immediately after obtaining
  4607. ** the SHARED lock, the file-system is checked for a hot-journal,
  4608. ** which is played back if present. Following any hot-journal
  4609. ** rollback, the contents of the cache are validated by checking
  4610. ** the 'change-counter' field of the database file header and
  4611. ** discarded if they are found to be invalid.
  4612. **
  4613. ** 2) If the pager is running in exclusive-mode, and there are currently
  4614. ** no outstanding references to any pages, and is in the error state,
  4615. ** then an attempt is made to clear the error state by discarding
  4616. ** the contents of the page cache and rolling back any open journal
  4617. ** file.
  4618. **
  4619. ** If everything is successful, SQLITE_OK is returned. If an IO error
  4620. ** occurs while locking the database, checking for a hot-journal file or
  4621. ** rolling back a journal file, the IO error code is returned.
  4622. */
  4623. int sqlite3PagerSharedLock(Pager *pPager){
  4624. int rc = SQLITE_OK; /* Return code */
  4625. /* This routine is only called from b-tree and only when there are no
  4626. ** outstanding pages. This implies that the pager state should either
  4627. ** be OPEN or READER. READER is only possible if the pager is or was in
  4628. ** exclusive access mode.
  4629. */
  4630. assert( sqlite3PcacheRefCount(pPager->pPCache)==0 );
  4631. assert( assert_pager_state(pPager) );
  4632. assert( pPager->eState==PAGER_OPEN || pPager->eState==PAGER_READER );
  4633. if( NEVER(MEMDB && pPager->errCode) ){ return pPager->errCode; }
  4634. if( !pagerUseWal(pPager) && pPager->eState==PAGER_OPEN ){
  4635. int bHotJournal = 1; /* True if there exists a hot journal-file */
  4636. assert( !MEMDB );
  4637. rc = pager_wait_on_lock(pPager, SHARED_LOCK);
  4638. if( rc!=SQLITE_OK ){
  4639. assert( pPager->eLock==NO_LOCK || pPager->eLock==UNKNOWN_LOCK );
  4640. goto failed;
  4641. }
  4642. /* If a journal file exists, and there is no RESERVED lock on the
  4643. ** database file, then it either needs to be played back or deleted.
  4644. */
  4645. if( pPager->eLock<=SHARED_LOCK ){
  4646. rc = hasHotJournal(pPager, &bHotJournal);
  4647. }
  4648. if( rc!=SQLITE_OK ){
  4649. goto failed;
  4650. }
  4651. if( bHotJournal ){
  4652. if( pPager->readOnly ){
  4653. rc = SQLITE_READONLY_ROLLBACK;
  4654. goto failed;
  4655. }
  4656. /* Get an EXCLUSIVE lock on the database file. At this point it is
  4657. ** important that a RESERVED lock is not obtained on the way to the
  4658. ** EXCLUSIVE lock. If it were, another process might open the
  4659. ** database file, detect the RESERVED lock, and conclude that the
  4660. ** database is safe to read while this process is still rolling the
  4661. ** hot-journal back.
  4662. **
  4663. ** Because the intermediate RESERVED lock is not requested, any
  4664. ** other process attempting to access the database file will get to
  4665. ** this point in the code and fail to obtain its own EXCLUSIVE lock
  4666. ** on the database file.
  4667. **
  4668. ** Unless the pager is in locking_mode=exclusive mode, the lock is
  4669. ** downgraded to SHARED_LOCK before this function returns.
  4670. */
  4671. rc = pagerLockDb(pPager, EXCLUSIVE_LOCK);
  4672. if( rc!=SQLITE_OK ){
  4673. goto failed;
  4674. }
  4675. /* If it is not already open and the file exists on disk, open the
  4676. ** journal for read/write access. Write access is required because
  4677. ** in exclusive-access mode the file descriptor will be kept open
  4678. ** and possibly used for a transaction later on. Also, write-access
  4679. ** is usually required to finalize the journal in journal_mode=persist
  4680. ** mode (and also for journal_mode=truncate on some systems).
  4681. **
  4682. ** If the journal does not exist, it usually means that some
  4683. ** other connection managed to get in and roll it back before
  4684. ** this connection obtained the exclusive lock above. Or, it
  4685. ** may mean that the pager was in the error-state when this
  4686. ** function was called and the journal file does not exist.
  4687. */
  4688. if( !isOpen(pPager->jfd) ){
  4689. sqlite3_vfs * const pVfs = pPager->pVfs;
  4690. int bExists; /* True if journal file exists */
  4691. rc = sqlite3OsAccess(
  4692. pVfs, pPager->zJournal, SQLITE_ACCESS_EXISTS, &bExists);
  4693. if( rc==SQLITE_OK && bExists ){
  4694. int fout = 0;
  4695. int f = SQLITE_OPEN_READWRITE|SQLITE_OPEN_MAIN_JOURNAL;
  4696. assert( !pPager->tempFile );
  4697. rc = sqlite3OsOpen(pVfs, pPager->zJournal, pPager->jfd, f, &fout);
  4698. assert( rc!=SQLITE_OK || isOpen(pPager->jfd) );
  4699. if( rc==SQLITE_OK && fout&SQLITE_OPEN_READONLY ){
  4700. rc = SQLITE_CANTOPEN_BKPT;
  4701. sqlite3OsClose(pPager->jfd);
  4702. }
  4703. }
  4704. }
  4705. /* Playback and delete the journal. Drop the database write
  4706. ** lock and reacquire the read lock. Purge the cache before
  4707. ** playing back the hot-journal so that we don't end up with
  4708. ** an inconsistent cache. Sync the hot journal before playing
  4709. ** it back since the process that crashed and left the hot journal
  4710. ** probably did not sync it and we are required to always sync
  4711. ** the journal before playing it back.
  4712. */
  4713. if( isOpen(pPager->jfd) ){
  4714. assert( rc==SQLITE_OK );
  4715. rc = pagerSyncHotJournal(pPager);
  4716. if( rc==SQLITE_OK ){
  4717. rc = pager_playback(pPager, 1);
  4718. pPager->eState = PAGER_OPEN;
  4719. }
  4720. }else if( !pPager->exclusiveMode ){
  4721. pagerUnlockDb(pPager, SHARED_LOCK);
  4722. }
  4723. if( rc!=SQLITE_OK ){
  4724. /* This branch is taken if an error occurs while trying to open
  4725. ** or roll back a hot-journal while holding an EXCLUSIVE lock. The
  4726. ** pager_unlock() routine will be called before returning to unlock
  4727. ** the file. If the unlock attempt fails, then Pager.eLock must be
  4728. ** set to UNKNOWN_LOCK (see the comment above the #define for
  4729. ** UNKNOWN_LOCK above for an explanation).
  4730. **
  4731. ** In order to get pager_unlock() to do this, set Pager.eState to
  4732. ** PAGER_ERROR now. This is not actually counted as a transition
  4733. ** to ERROR state in the state diagram at the top of this file,
  4734. ** since we know that the same call to pager_unlock() will very
  4735. ** shortly transition the pager object to the OPEN state. Calling
  4736. ** assert_pager_state() would fail now, as it should not be possible
  4737. ** to be in ERROR state when there are zero outstanding page
  4738. ** references.
  4739. */
  4740. pager_error(pPager, rc);
  4741. goto failed;
  4742. }
  4743. assert( pPager->eState==PAGER_OPEN );
  4744. assert( (pPager->eLock==SHARED_LOCK)
  4745. || (pPager->exclusiveMode && pPager->eLock>SHARED_LOCK)
  4746. );
  4747. }
  4748. if( !pPager->tempFile && (
  4749. pPager->pBackup
  4750. || sqlite3PcachePagecount(pPager->pPCache)>0
  4751. || USEFETCH(pPager)
  4752. )){
  4753. /* The shared-lock has just been acquired on the database file
  4754. ** and there are already pages in the cache (from a previous
  4755. ** read or write transaction). Check to see if the database
  4756. ** has been modified. If the database has changed, flush the
  4757. ** cache.
  4758. **
  4759. ** Database changes is detected by looking at 15 bytes beginning
  4760. ** at offset 24 into the file. The first 4 of these 16 bytes are
  4761. ** a 32-bit counter that is incremented with each change. The
  4762. ** other bytes change randomly with each file change when
  4763. ** a codec is in use.
  4764. **
  4765. ** There is a vanishingly small chance that a change will not be
  4766. ** detected. The chance of an undetected change is so small that
  4767. ** it can be neglected.
  4768. */
  4769. Pgno nPage = 0;
  4770. char dbFileVers[sizeof(pPager->dbFileVers)];
  4771. rc = pagerPagecount(pPager, &nPage);
  4772. if( rc ) goto failed;
  4773. if( nPage>0 ){
  4774. IOTRACE(("CKVERS %p %d\n", pPager, sizeof(dbFileVers)));
  4775. rc = sqlite3OsRead(pPager->fd, &dbFileVers, sizeof(dbFileVers), 24);
  4776. if( rc!=SQLITE_OK && rc!=SQLITE_IOERR_SHORT_READ ){
  4777. goto failed;
  4778. }
  4779. }else{
  4780. memset(dbFileVers, 0, sizeof(dbFileVers));
  4781. }
  4782. if( memcmp(pPager->dbFileVers, dbFileVers, sizeof(dbFileVers))!=0 ){
  4783. pager_reset(pPager);
  4784. /* Unmap the database file. It is possible that external processes
  4785. ** may have truncated the database file and then extended it back
  4786. ** to its original size while this process was not holding a lock.
  4787. ** In this case there may exist a Pager.pMap mapping that appears
  4788. ** to be the right size but is not actually valid. Avoid this
  4789. ** possibility by unmapping the db here. */
  4790. if( USEFETCH(pPager) ){
  4791. sqlite3OsUnfetch(pPager->fd, 0, 0);
  4792. }
  4793. }
  4794. }
  4795. /* If there is a WAL file in the file-system, open this database in WAL
  4796. ** mode. Otherwise, the following function call is a no-op.
  4797. */
  4798. rc = pagerOpenWalIfPresent(pPager);
  4799. #ifndef SQLITE_OMIT_WAL
  4800. assert( pPager->pWal==0 || rc==SQLITE_OK );
  4801. #endif
  4802. }
  4803. if( pagerUseWal(pPager) ){
  4804. assert( rc==SQLITE_OK );
  4805. rc = pagerBeginReadTransaction(pPager);
  4806. }
  4807. if( pPager->eState==PAGER_OPEN && rc==SQLITE_OK ){
  4808. rc = pagerPagecount(pPager, &pPager->dbSize);
  4809. }
  4810. failed:
  4811. if( rc!=SQLITE_OK ){
  4812. assert( !MEMDB );
  4813. pager_unlock(pPager);
  4814. assert( pPager->eState==PAGER_OPEN );
  4815. }else{
  4816. pPager->eState = PAGER_READER;
  4817. }
  4818. return rc;
  4819. }
  4820. /*
  4821. ** If the reference count has reached zero, rollback any active
  4822. ** transaction and unlock the pager.
  4823. **
  4824. ** Except, in locking_mode=EXCLUSIVE when there is nothing to in
  4825. ** the rollback journal, the unlock is not performed and there is
  4826. ** nothing to rollback, so this routine is a no-op.
  4827. */
  4828. static void pagerUnlockIfUnused(Pager *pPager){
  4829. if( pPager->nMmapOut==0 && (sqlite3PcacheRefCount(pPager->pPCache)==0) ){
  4830. pagerUnlockAndRollback(pPager);
  4831. }
  4832. }
  4833. /*
  4834. ** Acquire a reference to page number pgno in pager pPager (a page
  4835. ** reference has type DbPage*). If the requested reference is
  4836. ** successfully obtained, it is copied to *ppPage and SQLITE_OK returned.
  4837. **
  4838. ** If the requested page is already in the cache, it is returned.
  4839. ** Otherwise, a new page object is allocated and populated with data
  4840. ** read from the database file. In some cases, the pcache module may
  4841. ** choose not to allocate a new page object and may reuse an existing
  4842. ** object with no outstanding references.
  4843. **
  4844. ** The extra data appended to a page is always initialized to zeros the
  4845. ** first time a page is loaded into memory. If the page requested is
  4846. ** already in the cache when this function is called, then the extra
  4847. ** data is left as it was when the page object was last used.
  4848. **
  4849. ** If the database image is smaller than the requested page or if a
  4850. ** non-zero value is passed as the noContent parameter and the
  4851. ** requested page is not already stored in the cache, then no
  4852. ** actual disk read occurs. In this case the memory image of the
  4853. ** page is initialized to all zeros.
  4854. **
  4855. ** If noContent is true, it means that we do not care about the contents
  4856. ** of the page. This occurs in two scenarios:
  4857. **
  4858. ** a) When reading a free-list leaf page from the database, and
  4859. **
  4860. ** b) When a savepoint is being rolled back and we need to load
  4861. ** a new page into the cache to be filled with the data read
  4862. ** from the savepoint journal.
  4863. **
  4864. ** If noContent is true, then the data returned is zeroed instead of
  4865. ** being read from the database. Additionally, the bits corresponding
  4866. ** to pgno in Pager.pInJournal (bitvec of pages already written to the
  4867. ** journal file) and the PagerSavepoint.pInSavepoint bitvecs of any open
  4868. ** savepoints are set. This means if the page is made writable at any
  4869. ** point in the future, using a call to sqlite3PagerWrite(), its contents
  4870. ** will not be journaled. This saves IO.
  4871. **
  4872. ** The acquisition might fail for several reasons. In all cases,
  4873. ** an appropriate error code is returned and *ppPage is set to NULL.
  4874. **
  4875. ** See also sqlite3PagerLookup(). Both this routine and Lookup() attempt
  4876. ** to find a page in the in-memory cache first. If the page is not already
  4877. ** in memory, this routine goes to disk to read it in whereas Lookup()
  4878. ** just returns 0. This routine acquires a read-lock the first time it
  4879. ** has to go to disk, and could also playback an old journal if necessary.
  4880. ** Since Lookup() never goes to disk, it never has to deal with locks
  4881. ** or journal files.
  4882. */
  4883. int sqlite3PagerAcquire(
  4884. Pager *pPager, /* The pager open on the database file */
  4885. Pgno pgno, /* Page number to fetch */
  4886. DbPage **ppPage, /* Write a pointer to the page here */
  4887. int flags /* PAGER_GET_XXX flags */
  4888. ){
  4889. int rc = SQLITE_OK;
  4890. PgHdr *pPg = 0;
  4891. u32 iFrame = 0; /* Frame to read from WAL file */
  4892. const int noContent = (flags & PAGER_GET_NOCONTENT);
  4893. /* It is acceptable to use a read-only (mmap) page for any page except
  4894. ** page 1 if there is no write-transaction open or the ACQUIRE_READONLY
  4895. ** flag was specified by the caller. And so long as the db is not a
  4896. ** temporary or in-memory database. */
  4897. const int bMmapOk = (pgno!=1 && USEFETCH(pPager)
  4898. && (pPager->eState==PAGER_READER || (flags & PAGER_GET_READONLY))
  4899. #ifdef SQLITE_HAS_CODEC
  4900. && pPager->xCodec==0
  4901. #endif
  4902. );
  4903. assert( pPager->eState>=PAGER_READER );
  4904. assert( assert_pager_state(pPager) );
  4905. assert( noContent==0 || bMmapOk==0 );
  4906. if( pgno==0 ){
  4907. return SQLITE_CORRUPT_BKPT;
  4908. }
  4909. /* If the pager is in the error state, return an error immediately.
  4910. ** Otherwise, request the page from the PCache layer. */
  4911. if( pPager->errCode!=SQLITE_OK ){
  4912. rc = pPager->errCode;
  4913. }else{
  4914. if( bMmapOk && pagerUseWal(pPager) ){
  4915. rc = sqlite3WalFindFrame(pPager->pWal, pgno, &iFrame);
  4916. if( rc!=SQLITE_OK ) goto pager_acquire_err;
  4917. }
  4918. if( iFrame==0 && bMmapOk ){
  4919. void *pData = 0;
  4920. rc = sqlite3OsFetch(pPager->fd,
  4921. (i64)(pgno-1) * pPager->pageSize, pPager->pageSize, &pData
  4922. );
  4923. if( rc==SQLITE_OK && pData ){
  4924. if( pPager->eState>PAGER_READER ){
  4925. (void)sqlite3PcacheFetch(pPager->pPCache, pgno, 0, &pPg);
  4926. }
  4927. if( pPg==0 ){
  4928. rc = pagerAcquireMapPage(pPager, pgno, pData, &pPg);
  4929. }else{
  4930. sqlite3OsUnfetch(pPager->fd, (i64)(pgno-1)*pPager->pageSize, pData);
  4931. }
  4932. if( pPg ){
  4933. assert( rc==SQLITE_OK );
  4934. *ppPage = pPg;
  4935. return SQLITE_OK;
  4936. }
  4937. }
  4938. if( rc!=SQLITE_OK ){
  4939. goto pager_acquire_err;
  4940. }
  4941. }
  4942. rc = sqlite3PcacheFetch(pPager->pPCache, pgno, 1, ppPage);
  4943. }
  4944. if( rc!=SQLITE_OK ){
  4945. /* Either the call to sqlite3PcacheFetch() returned an error or the
  4946. ** pager was already in the error-state when this function was called.
  4947. ** Set pPg to 0 and jump to the exception handler. */
  4948. pPg = 0;
  4949. goto pager_acquire_err;
  4950. }
  4951. assert( (*ppPage)->pgno==pgno );
  4952. assert( (*ppPage)->pPager==pPager || (*ppPage)->pPager==0 );
  4953. if( (*ppPage)->pPager && !noContent ){
  4954. /* In this case the pcache already contains an initialized copy of
  4955. ** the page. Return without further ado. */
  4956. assert( pgno<=PAGER_MAX_PGNO && pgno!=PAGER_MJ_PGNO(pPager) );
  4957. pPager->aStat[PAGER_STAT_HIT]++;
  4958. return SQLITE_OK;
  4959. }else{
  4960. /* The pager cache has created a new page. Its content needs to
  4961. ** be initialized. */
  4962. pPg = *ppPage;
  4963. pPg->pPager = pPager;
  4964. /* The maximum page number is 2^31. Return SQLITE_CORRUPT if a page
  4965. ** number greater than this, or the unused locking-page, is requested. */
  4966. if( pgno>PAGER_MAX_PGNO || pgno==PAGER_MJ_PGNO(pPager) ){
  4967. rc = SQLITE_CORRUPT_BKPT;
  4968. goto pager_acquire_err;
  4969. }
  4970. if( MEMDB || pPager->dbSize<pgno || noContent || !isOpen(pPager->fd) ){
  4971. if( pgno>pPager->mxPgno ){
  4972. rc = SQLITE_FULL;
  4973. goto pager_acquire_err;
  4974. }
  4975. if( noContent ){
  4976. /* Failure to set the bits in the InJournal bit-vectors is benign.
  4977. ** It merely means that we might do some extra work to journal a
  4978. ** page that does not need to be journaled. Nevertheless, be sure
  4979. ** to test the case where a malloc error occurs while trying to set
  4980. ** a bit in a bit vector.
  4981. */
  4982. sqlite3BeginBenignMalloc();
  4983. if( pgno<=pPager->dbOrigSize ){
  4984. TESTONLY( rc = ) sqlite3BitvecSet(pPager->pInJournal, pgno);
  4985. testcase( rc==SQLITE_NOMEM );
  4986. }
  4987. TESTONLY( rc = ) addToSavepointBitvecs(pPager, pgno);
  4988. testcase( rc==SQLITE_NOMEM );
  4989. sqlite3EndBenignMalloc();
  4990. }
  4991. memset(pPg->pData, 0, pPager->pageSize);
  4992. IOTRACE(("ZERO %p %d\n", pPager, pgno));
  4993. }else{
  4994. if( pagerUseWal(pPager) && bMmapOk==0 ){
  4995. rc = sqlite3WalFindFrame(pPager->pWal, pgno, &iFrame);
  4996. if( rc!=SQLITE_OK ) goto pager_acquire_err;
  4997. }
  4998. assert( pPg->pPager==pPager );
  4999. pPager->aStat[PAGER_STAT_MISS]++;
  5000. rc = readDbPage(pPg, iFrame);
  5001. if( rc!=SQLITE_OK ){
  5002. goto pager_acquire_err;
  5003. }
  5004. }
  5005. pager_set_pagehash(pPg);
  5006. }
  5007. return SQLITE_OK;
  5008. pager_acquire_err:
  5009. assert( rc!=SQLITE_OK );
  5010. if( pPg ){
  5011. sqlite3PcacheDrop(pPg);
  5012. }
  5013. pagerUnlockIfUnused(pPager);
  5014. *ppPage = 0;
  5015. return rc;
  5016. }
  5017. /*
  5018. ** Acquire a page if it is already in the in-memory cache. Do
  5019. ** not read the page from disk. Return a pointer to the page,
  5020. ** or 0 if the page is not in cache.
  5021. **
  5022. ** See also sqlite3PagerGet(). The difference between this routine
  5023. ** and sqlite3PagerGet() is that _get() will go to the disk and read
  5024. ** in the page if the page is not already in cache. This routine
  5025. ** returns NULL if the page is not in cache or if a disk I/O error
  5026. ** has ever happened.
  5027. */
  5028. DbPage *sqlite3PagerLookup(Pager *pPager, Pgno pgno){
  5029. PgHdr *pPg = 0;
  5030. assert( pPager!=0 );
  5031. assert( pgno!=0 );
  5032. assert( pPager->pPCache!=0 );
  5033. assert( pPager->eState>=PAGER_READER && pPager->eState!=PAGER_ERROR );
  5034. sqlite3PcacheFetch(pPager->pPCache, pgno, 0, &pPg);
  5035. return pPg;
  5036. }
  5037. /*
  5038. ** Release a page reference.
  5039. **
  5040. ** If the number of references to the page drop to zero, then the
  5041. ** page is added to the LRU list. When all references to all pages
  5042. ** are released, a rollback occurs and the lock on the database is
  5043. ** removed.
  5044. */
  5045. void sqlite3PagerUnref(DbPage *pPg){
  5046. if( pPg ){
  5047. Pager *pPager = pPg->pPager;
  5048. if( pPg->flags & PGHDR_MMAP ){
  5049. pagerReleaseMapPage(pPg);
  5050. }else{
  5051. sqlite3PcacheRelease(pPg);
  5052. }
  5053. pagerUnlockIfUnused(pPager);
  5054. }
  5055. }
  5056. /*
  5057. ** This function is called at the start of every write transaction.
  5058. ** There must already be a RESERVED or EXCLUSIVE lock on the database
  5059. ** file when this routine is called.
  5060. **
  5061. ** Open the journal file for pager pPager and write a journal header
  5062. ** to the start of it. If there are active savepoints, open the sub-journal
  5063. ** as well. This function is only used when the journal file is being
  5064. ** opened to write a rollback log for a transaction. It is not used
  5065. ** when opening a hot journal file to roll it back.
  5066. **
  5067. ** If the journal file is already open (as it may be in exclusive mode),
  5068. ** then this function just writes a journal header to the start of the
  5069. ** already open file.
  5070. **
  5071. ** Whether or not the journal file is opened by this function, the
  5072. ** Pager.pInJournal bitvec structure is allocated.
  5073. **
  5074. ** Return SQLITE_OK if everything is successful. Otherwise, return
  5075. ** SQLITE_NOMEM if the attempt to allocate Pager.pInJournal fails, or
  5076. ** an IO error code if opening or writing the journal file fails.
  5077. */
  5078. static int pager_open_journal(Pager *pPager){
  5079. int rc = SQLITE_OK; /* Return code */
  5080. sqlite3_vfs * const pVfs = pPager->pVfs; /* Local cache of vfs pointer */
  5081. assert( pPager->eState==PAGER_WRITER_LOCKED );
  5082. assert( assert_pager_state(pPager) );
  5083. assert( pPager->pInJournal==0 );
  5084. /* If already in the error state, this function is a no-op. But on
  5085. ** the other hand, this routine is never called if we are already in
  5086. ** an error state. */
  5087. if( NEVER(pPager->errCode) ) return pPager->errCode;
  5088. if( !pagerUseWal(pPager) && pPager->journalMode!=PAGER_JOURNALMODE_OFF ){
  5089. pPager->pInJournal = sqlite3BitvecCreate(pPager->dbSize);
  5090. if( pPager->pInJournal==0 ){
  5091. return SQLITE_NOMEM;
  5092. }
  5093. /* Open the journal file if it is not already open. */
  5094. if( !isOpen(pPager->jfd) ){
  5095. if( pPager->journalMode==PAGER_JOURNALMODE_MEMORY ){
  5096. sqlite3MemJournalOpen(pPager->jfd);
  5097. }else{
  5098. const int flags = /* VFS flags to open journal file */
  5099. SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE|
  5100. (pPager->tempFile ?
  5101. (SQLITE_OPEN_DELETEONCLOSE|SQLITE_OPEN_TEMP_JOURNAL):
  5102. (SQLITE_OPEN_MAIN_JOURNAL)
  5103. );
  5104. #ifdef SQLITE_ENABLE_ATOMIC_WRITE
  5105. rc = sqlite3JournalOpen(
  5106. pVfs, pPager->zJournal, pPager->jfd, flags, jrnlBufferSize(pPager)
  5107. );
  5108. #else
  5109. rc = sqlite3OsOpen(pVfs, pPager->zJournal, pPager->jfd, flags, 0);
  5110. #endif
  5111. }
  5112. assert( rc!=SQLITE_OK || isOpen(pPager->jfd) );
  5113. }
  5114. /* Write the first journal header to the journal file and open
  5115. ** the sub-journal if necessary.
  5116. */
  5117. if( rc==SQLITE_OK ){
  5118. /* TODO: Check if all of these are really required. */
  5119. pPager->nRec = 0;
  5120. pPager->journalOff = 0;
  5121. pPager->setMaster = 0;
  5122. pPager->journalHdr = 0;
  5123. rc = writeJournalHdr(pPager);
  5124. }
  5125. }
  5126. if( rc!=SQLITE_OK ){
  5127. sqlite3BitvecDestroy(pPager->pInJournal);
  5128. pPager->pInJournal = 0;
  5129. }else{
  5130. assert( pPager->eState==PAGER_WRITER_LOCKED );
  5131. pPager->eState = PAGER_WRITER_CACHEMOD;
  5132. }
  5133. return rc;
  5134. }
  5135. /*
  5136. ** Begin a write-transaction on the specified pager object. If a
  5137. ** write-transaction has already been opened, this function is a no-op.
  5138. **
  5139. ** If the exFlag argument is false, then acquire at least a RESERVED
  5140. ** lock on the database file. If exFlag is true, then acquire at least
  5141. ** an EXCLUSIVE lock. If such a lock is already held, no locking
  5142. ** functions need be called.
  5143. **
  5144. ** If the subjInMemory argument is non-zero, then any sub-journal opened
  5145. ** within this transaction will be opened as an in-memory file. This
  5146. ** has no effect if the sub-journal is already opened (as it may be when
  5147. ** running in exclusive mode) or if the transaction does not require a
  5148. ** sub-journal. If the subjInMemory argument is zero, then any required
  5149. ** sub-journal is implemented in-memory if pPager is an in-memory database,
  5150. ** or using a temporary file otherwise.
  5151. */
  5152. int sqlite3PagerBegin(Pager *pPager, int exFlag, int subjInMemory){
  5153. int rc = SQLITE_OK;
  5154. if( pPager->errCode ) return pPager->errCode;
  5155. assert( pPager->eState>=PAGER_READER && pPager->eState<PAGER_ERROR );
  5156. pPager->subjInMemory = (u8)subjInMemory;
  5157. if( ALWAYS(pPager->eState==PAGER_READER) ){
  5158. assert( pPager->pInJournal==0 );
  5159. if( pagerUseWal(pPager) ){
  5160. /* If the pager is configured to use locking_mode=exclusive, and an
  5161. ** exclusive lock on the database is not already held, obtain it now.
  5162. */
  5163. if( pPager->exclusiveMode && sqlite3WalExclusiveMode(pPager->pWal, -1) ){
  5164. rc = pagerLockDb(pPager, EXCLUSIVE_LOCK);
  5165. if( rc!=SQLITE_OK ){
  5166. return rc;
  5167. }
  5168. sqlite3WalExclusiveMode(pPager->pWal, 1);
  5169. }
  5170. /* Grab the write lock on the log file. If successful, upgrade to
  5171. ** PAGER_RESERVED state. Otherwise, return an error code to the caller.
  5172. ** The busy-handler is not invoked if another connection already
  5173. ** holds the write-lock. If possible, the upper layer will call it.
  5174. */
  5175. rc = sqlite3WalBeginWriteTransaction(pPager->pWal);
  5176. }else{
  5177. /* Obtain a RESERVED lock on the database file. If the exFlag parameter
  5178. ** is true, then immediately upgrade this to an EXCLUSIVE lock. The
  5179. ** busy-handler callback can be used when upgrading to the EXCLUSIVE
  5180. ** lock, but not when obtaining the RESERVED lock.
  5181. */
  5182. rc = pagerLockDb(pPager, RESERVED_LOCK);
  5183. if( rc==SQLITE_OK && exFlag ){
  5184. rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
  5185. }
  5186. }
  5187. if( rc==SQLITE_OK ){
  5188. /* Change to WRITER_LOCKED state.
  5189. **
  5190. ** WAL mode sets Pager.eState to PAGER_WRITER_LOCKED or CACHEMOD
  5191. ** when it has an open transaction, but never to DBMOD or FINISHED.
  5192. ** This is because in those states the code to roll back savepoint
  5193. ** transactions may copy data from the sub-journal into the database
  5194. ** file as well as into the page cache. Which would be incorrect in
  5195. ** WAL mode.
  5196. */
  5197. pPager->eState = PAGER_WRITER_LOCKED;
  5198. pPager->dbHintSize = pPager->dbSize;
  5199. pPager->dbFileSize = pPager->dbSize;
  5200. pPager->dbOrigSize = pPager->dbSize;
  5201. pPager->journalOff = 0;
  5202. }
  5203. assert( rc==SQLITE_OK || pPager->eState==PAGER_READER );
  5204. assert( rc!=SQLITE_OK || pPager->eState==PAGER_WRITER_LOCKED );
  5205. assert( assert_pager_state(pPager) );
  5206. }
  5207. PAGERTRACE(("TRANSACTION %d\n", PAGERID(pPager)));
  5208. return rc;
  5209. }
  5210. /*
  5211. ** Mark a single data page as writeable. The page is written into the
  5212. ** main journal or sub-journal as required. If the page is written into
  5213. ** one of the journals, the corresponding bit is set in the
  5214. ** Pager.pInJournal bitvec and the PagerSavepoint.pInSavepoint bitvecs
  5215. ** of any open savepoints as appropriate.
  5216. */
  5217. static int pager_write(PgHdr *pPg){
  5218. void *pData = pPg->pData;
  5219. Pager *pPager = pPg->pPager;
  5220. int rc = SQLITE_OK;
  5221. /* This routine is not called unless a write-transaction has already
  5222. ** been started. The journal file may or may not be open at this point.
  5223. ** It is never called in the ERROR state.
  5224. */
  5225. assert( pPager->eState==PAGER_WRITER_LOCKED
  5226. || pPager->eState==PAGER_WRITER_CACHEMOD
  5227. || pPager->eState==PAGER_WRITER_DBMOD
  5228. );
  5229. assert( assert_pager_state(pPager) );
  5230. /* If an error has been previously detected, report the same error
  5231. ** again. This should not happen, but the check provides robustness. */
  5232. if( NEVER(pPager->errCode) ) return pPager->errCode;
  5233. /* Higher-level routines never call this function if database is not
  5234. ** writable. But check anyway, just for robustness. */
  5235. if( NEVER(pPager->readOnly) ) return SQLITE_PERM;
  5236. CHECK_PAGE(pPg);
  5237. /* The journal file needs to be opened. Higher level routines have already
  5238. ** obtained the necessary locks to begin the write-transaction, but the
  5239. ** rollback journal might not yet be open. Open it now if this is the case.
  5240. **
  5241. ** This is done before calling sqlite3PcacheMakeDirty() on the page.
  5242. ** Otherwise, if it were done after calling sqlite3PcacheMakeDirty(), then
  5243. ** an error might occur and the pager would end up in WRITER_LOCKED state
  5244. ** with pages marked as dirty in the cache.
  5245. */
  5246. if( pPager->eState==PAGER_WRITER_LOCKED ){
  5247. rc = pager_open_journal(pPager);
  5248. if( rc!=SQLITE_OK ) return rc;
  5249. }
  5250. assert( pPager->eState>=PAGER_WRITER_CACHEMOD );
  5251. assert( assert_pager_state(pPager) );
  5252. /* Mark the page as dirty. If the page has already been written
  5253. ** to the journal then we can return right away.
  5254. */
  5255. sqlite3PcacheMakeDirty(pPg);
  5256. if( pageInJournal(pPg) && !subjRequiresPage(pPg) ){
  5257. assert( !pagerUseWal(pPager) );
  5258. }else{
  5259. /* The transaction journal now exists and we have a RESERVED or an
  5260. ** EXCLUSIVE lock on the main database file. Write the current page to
  5261. ** the transaction journal if it is not there already.
  5262. */
  5263. if( !pageInJournal(pPg) && !pagerUseWal(pPager) ){
  5264. assert( pagerUseWal(pPager)==0 );
  5265. if( pPg->pgno<=pPager->dbOrigSize && isOpen(pPager->jfd) ){
  5266. u32 cksum;
  5267. char *pData2;
  5268. i64 iOff = pPager->journalOff;
  5269. /* We should never write to the journal file the page that
  5270. ** contains the database locks. The following assert verifies
  5271. ** that we do not. */
  5272. assert( pPg->pgno!=PAGER_MJ_PGNO(pPager) );
  5273. assert( pPager->journalHdr<=pPager->journalOff );
  5274. CODEC2(pPager, pData, pPg->pgno, 7, return SQLITE_NOMEM, pData2);
  5275. cksum = pager_cksum(pPager, (u8*)pData2);
  5276. /* Even if an IO or diskfull error occurs while journalling the
  5277. ** page in the block above, set the need-sync flag for the page.
  5278. ** Otherwise, when the transaction is rolled back, the logic in
  5279. ** playback_one_page() will think that the page needs to be restored
  5280. ** in the database file. And if an IO error occurs while doing so,
  5281. ** then corruption may follow.
  5282. */
  5283. pPg->flags |= PGHDR_NEED_SYNC;
  5284. rc = write32bits(pPager->jfd, iOff, pPg->pgno);
  5285. if( rc!=SQLITE_OK ) return rc;
  5286. rc = sqlite3OsWrite(pPager->jfd, pData2, pPager->pageSize, iOff+4);
  5287. if( rc!=SQLITE_OK ) return rc;
  5288. rc = write32bits(pPager->jfd, iOff+pPager->pageSize+4, cksum);
  5289. if( rc!=SQLITE_OK ) return rc;
  5290. IOTRACE(("JOUT %p %d %lld %d\n", pPager, pPg->pgno,
  5291. pPager->journalOff, pPager->pageSize));
  5292. PAGER_INCR(sqlite3_pager_writej_count);
  5293. PAGERTRACE(("JOURNAL %d page %d needSync=%d hash(%08x)\n",
  5294. PAGERID(pPager), pPg->pgno,
  5295. ((pPg->flags&PGHDR_NEED_SYNC)?1:0), pager_pagehash(pPg)));
  5296. pPager->journalOff += 8 + pPager->pageSize;
  5297. pPager->nRec++;
  5298. assert( pPager->pInJournal!=0 );
  5299. rc = sqlite3BitvecSet(pPager->pInJournal, pPg->pgno);
  5300. testcase( rc==SQLITE_NOMEM );
  5301. assert( rc==SQLITE_OK || rc==SQLITE_NOMEM );
  5302. rc |= addToSavepointBitvecs(pPager, pPg->pgno);
  5303. if( rc!=SQLITE_OK ){
  5304. assert( rc==SQLITE_NOMEM );
  5305. return rc;
  5306. }
  5307. }else{
  5308. if( pPager->eState!=PAGER_WRITER_DBMOD ){
  5309. pPg->flags |= PGHDR_NEED_SYNC;
  5310. }
  5311. PAGERTRACE(("APPEND %d page %d needSync=%d\n",
  5312. PAGERID(pPager), pPg->pgno,
  5313. ((pPg->flags&PGHDR_NEED_SYNC)?1:0)));
  5314. }
  5315. }
  5316. /* If the statement journal is open and the page is not in it,
  5317. ** then write the current page to the statement journal. Note that
  5318. ** the statement journal format differs from the standard journal format
  5319. ** in that it omits the checksums and the header.
  5320. */
  5321. if( subjRequiresPage(pPg) ){
  5322. rc = subjournalPage(pPg);
  5323. }
  5324. }
  5325. /* Update the database size and return.
  5326. */
  5327. if( pPager->dbSize<pPg->pgno ){
  5328. pPager->dbSize = pPg->pgno;
  5329. }
  5330. return rc;
  5331. }
  5332. /*
  5333. ** Mark a data page as writeable. This routine must be called before
  5334. ** making changes to a page. The caller must check the return value
  5335. ** of this function and be careful not to change any page data unless
  5336. ** this routine returns SQLITE_OK.
  5337. **
  5338. ** The difference between this function and pager_write() is that this
  5339. ** function also deals with the special case where 2 or more pages
  5340. ** fit on a single disk sector. In this case all co-resident pages
  5341. ** must have been written to the journal file before returning.
  5342. **
  5343. ** If an error occurs, SQLITE_NOMEM or an IO error code is returned
  5344. ** as appropriate. Otherwise, SQLITE_OK.
  5345. */
  5346. int sqlite3PagerWrite(DbPage *pDbPage){
  5347. int rc = SQLITE_OK;
  5348. PgHdr *pPg = pDbPage;
  5349. Pager *pPager = pPg->pPager;
  5350. Pgno nPagePerSector = (pPager->sectorSize/pPager->pageSize);
  5351. assert( (pPg->flags & PGHDR_MMAP)==0 );
  5352. assert( pPager->eState>=PAGER_WRITER_LOCKED );
  5353. assert( pPager->eState!=PAGER_ERROR );
  5354. assert( assert_pager_state(pPager) );
  5355. if( nPagePerSector>1 ){
  5356. Pgno nPageCount; /* Total number of pages in database file */
  5357. Pgno pg1; /* First page of the sector pPg is located on. */
  5358. int nPage = 0; /* Number of pages starting at pg1 to journal */
  5359. int ii; /* Loop counter */
  5360. int needSync = 0; /* True if any page has PGHDR_NEED_SYNC */
  5361. /* Set the doNotSpill NOSYNC bit to 1. This is because we cannot allow
  5362. ** a journal header to be written between the pages journaled by
  5363. ** this function.
  5364. */
  5365. assert( !MEMDB );
  5366. assert( (pPager->doNotSpill & SPILLFLAG_NOSYNC)==0 );
  5367. pPager->doNotSpill |= SPILLFLAG_NOSYNC;
  5368. /* This trick assumes that both the page-size and sector-size are
  5369. ** an integer power of 2. It sets variable pg1 to the identifier
  5370. ** of the first page of the sector pPg is located on.
  5371. */
  5372. pg1 = ((pPg->pgno-1) & ~(nPagePerSector-1)) + 1;
  5373. nPageCount = pPager->dbSize;
  5374. if( pPg->pgno>nPageCount ){
  5375. nPage = (pPg->pgno - pg1)+1;
  5376. }else if( (pg1+nPagePerSector-1)>nPageCount ){
  5377. nPage = nPageCount+1-pg1;
  5378. }else{
  5379. nPage = nPagePerSector;
  5380. }
  5381. assert(nPage>0);
  5382. assert(pg1<=pPg->pgno);
  5383. assert((pg1+nPage)>pPg->pgno);
  5384. for(ii=0; ii<nPage && rc==SQLITE_OK; ii++){
  5385. Pgno pg = pg1+ii;
  5386. PgHdr *pPage;
  5387. if( pg==pPg->pgno || !sqlite3BitvecTest(pPager->pInJournal, pg) ){
  5388. if( pg!=PAGER_MJ_PGNO(pPager) ){
  5389. rc = sqlite3PagerGet(pPager, pg, &pPage);
  5390. if( rc==SQLITE_OK ){
  5391. rc = pager_write(pPage);
  5392. if( pPage->flags&PGHDR_NEED_SYNC ){
  5393. needSync = 1;
  5394. }
  5395. sqlite3PagerUnref(pPage);
  5396. }
  5397. }
  5398. }else if( (pPage = pager_lookup(pPager, pg))!=0 ){
  5399. if( pPage->flags&PGHDR_NEED_SYNC ){
  5400. needSync = 1;
  5401. }
  5402. sqlite3PagerUnref(pPage);
  5403. }
  5404. }
  5405. /* If the PGHDR_NEED_SYNC flag is set for any of the nPage pages
  5406. ** starting at pg1, then it needs to be set for all of them. Because
  5407. ** writing to any of these nPage pages may damage the others, the
  5408. ** journal file must contain sync()ed copies of all of them
  5409. ** before any of them can be written out to the database file.
  5410. */
  5411. if( rc==SQLITE_OK && needSync ){
  5412. assert( !MEMDB );
  5413. for(ii=0; ii<nPage; ii++){
  5414. PgHdr *pPage = pager_lookup(pPager, pg1+ii);
  5415. if( pPage ){
  5416. pPage->flags |= PGHDR_NEED_SYNC;
  5417. sqlite3PagerUnref(pPage);
  5418. }
  5419. }
  5420. }
  5421. assert( (pPager->doNotSpill & SPILLFLAG_NOSYNC)!=0 );
  5422. pPager->doNotSpill &= ~SPILLFLAG_NOSYNC;
  5423. }else{
  5424. rc = pager_write(pDbPage);
  5425. }
  5426. return rc;
  5427. }
  5428. /*
  5429. ** Return TRUE if the page given in the argument was previously passed
  5430. ** to sqlite3PagerWrite(). In other words, return TRUE if it is ok
  5431. ** to change the content of the page.
  5432. */
  5433. #ifndef NDEBUG
  5434. int sqlite3PagerIswriteable(DbPage *pPg){
  5435. return pPg->flags&PGHDR_DIRTY;
  5436. }
  5437. #endif
  5438. /*
  5439. ** A call to this routine tells the pager that it is not necessary to
  5440. ** write the information on page pPg back to the disk, even though
  5441. ** that page might be marked as dirty. This happens, for example, when
  5442. ** the page has been added as a leaf of the freelist and so its
  5443. ** content no longer matters.
  5444. **
  5445. ** The overlying software layer calls this routine when all of the data
  5446. ** on the given page is unused. The pager marks the page as clean so
  5447. ** that it does not get written to disk.
  5448. **
  5449. ** Tests show that this optimization can quadruple the speed of large
  5450. ** DELETE operations.
  5451. */
  5452. void sqlite3PagerDontWrite(PgHdr *pPg){
  5453. Pager *pPager = pPg->pPager;
  5454. if( (pPg->flags&PGHDR_DIRTY) && pPager->nSavepoint==0 ){
  5455. PAGERTRACE(("DONT_WRITE page %d of %d\n", pPg->pgno, PAGERID(pPager)));
  5456. IOTRACE(("CLEAN %p %d\n", pPager, pPg->pgno))
  5457. pPg->flags |= PGHDR_DONT_WRITE;
  5458. pager_set_pagehash(pPg);
  5459. }
  5460. }
  5461. /*
  5462. ** This routine is called to increment the value of the database file
  5463. ** change-counter, stored as a 4-byte big-endian integer starting at
  5464. ** byte offset 24 of the pager file. The secondary change counter at
  5465. ** 92 is also updated, as is the SQLite version number at offset 96.
  5466. **
  5467. ** But this only happens if the pPager->changeCountDone flag is false.
  5468. ** To avoid excess churning of page 1, the update only happens once.
  5469. ** See also the pager_write_changecounter() routine that does an
  5470. ** unconditional update of the change counters.
  5471. **
  5472. ** If the isDirectMode flag is zero, then this is done by calling
  5473. ** sqlite3PagerWrite() on page 1, then modifying the contents of the
  5474. ** page data. In this case the file will be updated when the current
  5475. ** transaction is committed.
  5476. **
  5477. ** The isDirectMode flag may only be non-zero if the library was compiled
  5478. ** with the SQLITE_ENABLE_ATOMIC_WRITE macro defined. In this case,
  5479. ** if isDirect is non-zero, then the database file is updated directly
  5480. ** by writing an updated version of page 1 using a call to the
  5481. ** sqlite3OsWrite() function.
  5482. */
  5483. static int pager_incr_changecounter(Pager *pPager, int isDirectMode){
  5484. int rc = SQLITE_OK;
  5485. assert( pPager->eState==PAGER_WRITER_CACHEMOD
  5486. || pPager->eState==PAGER_WRITER_DBMOD
  5487. );
  5488. assert( assert_pager_state(pPager) );
  5489. /* Declare and initialize constant integer 'isDirect'. If the
  5490. ** atomic-write optimization is enabled in this build, then isDirect
  5491. ** is initialized to the value passed as the isDirectMode parameter
  5492. ** to this function. Otherwise, it is always set to zero.
  5493. **
  5494. ** The idea is that if the atomic-write optimization is not
  5495. ** enabled at compile time, the compiler can omit the tests of
  5496. ** 'isDirect' below, as well as the block enclosed in the
  5497. ** "if( isDirect )" condition.
  5498. */
  5499. #ifndef SQLITE_ENABLE_ATOMIC_WRITE
  5500. # define DIRECT_MODE 0
  5501. assert( isDirectMode==0 );
  5502. UNUSED_PARAMETER(isDirectMode);
  5503. #else
  5504. # define DIRECT_MODE isDirectMode
  5505. #endif
  5506. if( !pPager->changeCountDone && ALWAYS(pPager->dbSize>0) ){
  5507. PgHdr *pPgHdr; /* Reference to page 1 */
  5508. assert( !pPager->tempFile && isOpen(pPager->fd) );
  5509. /* Open page 1 of the file for writing. */
  5510. rc = sqlite3PagerGet(pPager, 1, &pPgHdr);
  5511. assert( pPgHdr==0 || rc==SQLITE_OK );
  5512. /* If page one was fetched successfully, and this function is not
  5513. ** operating in direct-mode, make page 1 writable. When not in
  5514. ** direct mode, page 1 is always held in cache and hence the PagerGet()
  5515. ** above is always successful - hence the ALWAYS on rc==SQLITE_OK.
  5516. */
  5517. if( !DIRECT_MODE && ALWAYS(rc==SQLITE_OK) ){
  5518. rc = sqlite3PagerWrite(pPgHdr);
  5519. }
  5520. if( rc==SQLITE_OK ){
  5521. /* Actually do the update of the change counter */
  5522. pager_write_changecounter(pPgHdr);
  5523. /* If running in direct mode, write the contents of page 1 to the file. */
  5524. if( DIRECT_MODE ){
  5525. const void *zBuf;
  5526. assert( pPager->dbFileSize>0 );
  5527. CODEC2(pPager, pPgHdr->pData, 1, 6, rc=SQLITE_NOMEM, zBuf);
  5528. if( rc==SQLITE_OK ){
  5529. rc = sqlite3OsWrite(pPager->fd, zBuf, pPager->pageSize, 0);
  5530. pPager->aStat[PAGER_STAT_WRITE]++;
  5531. }
  5532. if( rc==SQLITE_OK ){
  5533. /* Update the pager's copy of the change-counter. Otherwise, the
  5534. ** next time a read transaction is opened the cache will be
  5535. ** flushed (as the change-counter values will not match). */
  5536. const void *pCopy = (const void *)&((const char *)zBuf)[24];
  5537. memcpy(&pPager->dbFileVers, pCopy, sizeof(pPager->dbFileVers));
  5538. pPager->changeCountDone = 1;
  5539. }
  5540. }else{
  5541. pPager->changeCountDone = 1;
  5542. }
  5543. }
  5544. /* Release the page reference. */
  5545. sqlite3PagerUnref(pPgHdr);
  5546. }
  5547. return rc;
  5548. }
  5549. /*
  5550. ** Sync the database file to disk. This is a no-op for in-memory databases
  5551. ** or pages with the Pager.noSync flag set.
  5552. **
  5553. ** If successful, or if called on a pager for which it is a no-op, this
  5554. ** function returns SQLITE_OK. Otherwise, an IO error code is returned.
  5555. */
  5556. int sqlite3PagerSync(Pager *pPager){
  5557. int rc = SQLITE_OK;
  5558. if( !pPager->noSync ){
  5559. assert( !MEMDB );
  5560. rc = sqlite3OsSync(pPager->fd, pPager->syncFlags);
  5561. }else if( isOpen(pPager->fd) ){
  5562. assert( !MEMDB );
  5563. rc = sqlite3OsFileControl(pPager->fd, SQLITE_FCNTL_SYNC_OMITTED, 0);
  5564. if( rc==SQLITE_NOTFOUND ){
  5565. rc = SQLITE_OK;
  5566. }
  5567. }
  5568. return rc;
  5569. }
  5570. /*
  5571. ** This function may only be called while a write-transaction is active in
  5572. ** rollback. If the connection is in WAL mode, this call is a no-op.
  5573. ** Otherwise, if the connection does not already have an EXCLUSIVE lock on
  5574. ** the database file, an attempt is made to obtain one.
  5575. **
  5576. ** If the EXCLUSIVE lock is already held or the attempt to obtain it is
  5577. ** successful, or the connection is in WAL mode, SQLITE_OK is returned.
  5578. ** Otherwise, either SQLITE_BUSY or an SQLITE_IOERR_XXX error code is
  5579. ** returned.
  5580. */
  5581. int sqlite3PagerExclusiveLock(Pager *pPager){
  5582. int rc = SQLITE_OK;
  5583. assert( pPager->eState==PAGER_WRITER_CACHEMOD
  5584. || pPager->eState==PAGER_WRITER_DBMOD
  5585. || pPager->eState==PAGER_WRITER_LOCKED
  5586. );
  5587. assert( assert_pager_state(pPager) );
  5588. if( 0==pagerUseWal(pPager) ){
  5589. rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
  5590. }
  5591. return rc;
  5592. }
  5593. /*
  5594. ** Sync the database file for the pager pPager. zMaster points to the name
  5595. ** of a master journal file that should be written into the individual
  5596. ** journal file. zMaster may be NULL, which is interpreted as no master
  5597. ** journal (a single database transaction).
  5598. **
  5599. ** This routine ensures that:
  5600. **
  5601. ** * The database file change-counter is updated,
  5602. ** * the journal is synced (unless the atomic-write optimization is used),
  5603. ** * all dirty pages are written to the database file,
  5604. ** * the database file is truncated (if required), and
  5605. ** * the database file synced.
  5606. **
  5607. ** The only thing that remains to commit the transaction is to finalize
  5608. ** (delete, truncate or zero the first part of) the journal file (or
  5609. ** delete the master journal file if specified).
  5610. **
  5611. ** Note that if zMaster==NULL, this does not overwrite a previous value
  5612. ** passed to an sqlite3PagerCommitPhaseOne() call.
  5613. **
  5614. ** If the final parameter - noSync - is true, then the database file itself
  5615. ** is not synced. The caller must call sqlite3PagerSync() directly to
  5616. ** sync the database file before calling CommitPhaseTwo() to delete the
  5617. ** journal file in this case.
  5618. */
  5619. int sqlite3PagerCommitPhaseOne(
  5620. Pager *pPager, /* Pager object */
  5621. const char *zMaster, /* If not NULL, the master journal name */
  5622. int noSync /* True to omit the xSync on the db file */
  5623. ){
  5624. int rc = SQLITE_OK; /* Return code */
  5625. assert( pPager->eState==PAGER_WRITER_LOCKED
  5626. || pPager->eState==PAGER_WRITER_CACHEMOD
  5627. || pPager->eState==PAGER_WRITER_DBMOD
  5628. || pPager->eState==PAGER_ERROR
  5629. );
  5630. assert( assert_pager_state(pPager) );
  5631. /* If a prior error occurred, report that error again. */
  5632. if( NEVER(pPager->errCode) ) return pPager->errCode;
  5633. PAGERTRACE(("DATABASE SYNC: File=%s zMaster=%s nSize=%d\n",
  5634. pPager->zFilename, zMaster, pPager->dbSize));
  5635. /* If no database changes have been made, return early. */
  5636. if( pPager->eState<PAGER_WRITER_CACHEMOD ) return SQLITE_OK;
  5637. if( MEMDB ){
  5638. /* If this is an in-memory db, or no pages have been written to, or this
  5639. ** function has already been called, it is mostly a no-op. However, any
  5640. ** backup in progress needs to be restarted.
  5641. */
  5642. sqlite3BackupRestart(pPager->pBackup);
  5643. }else{
  5644. if( pagerUseWal(pPager) ){
  5645. PgHdr *pList = sqlite3PcacheDirtyList(pPager->pPCache);
  5646. PgHdr *pPageOne = 0;
  5647. if( pList==0 ){
  5648. /* Must have at least one page for the WAL commit flag.
  5649. ** Ticket [2d1a5c67dfc2363e44f29d9bbd57f] 2011-05-18 */
  5650. rc = sqlite3PagerGet(pPager, 1, &pPageOne);
  5651. pList = pPageOne;
  5652. pList->pDirty = 0;
  5653. }
  5654. assert( rc==SQLITE_OK );
  5655. if( ALWAYS(pList) ){
  5656. rc = pagerWalFrames(pPager, pList, pPager->dbSize, 1);
  5657. }
  5658. sqlite3PagerUnref(pPageOne);
  5659. if( rc==SQLITE_OK ){
  5660. sqlite3PcacheCleanAll(pPager->pPCache);
  5661. }
  5662. }else{
  5663. /* The following block updates the change-counter. Exactly how it
  5664. ** does this depends on whether or not the atomic-update optimization
  5665. ** was enabled at compile time, and if this transaction meets the
  5666. ** runtime criteria to use the operation:
  5667. **
  5668. ** * The file-system supports the atomic-write property for
  5669. ** blocks of size page-size, and
  5670. ** * This commit is not part of a multi-file transaction, and
  5671. ** * Exactly one page has been modified and store in the journal file.
  5672. **
  5673. ** If the optimization was not enabled at compile time, then the
  5674. ** pager_incr_changecounter() function is called to update the change
  5675. ** counter in 'indirect-mode'. If the optimization is compiled in but
  5676. ** is not applicable to this transaction, call sqlite3JournalCreate()
  5677. ** to make sure the journal file has actually been created, then call
  5678. ** pager_incr_changecounter() to update the change-counter in indirect
  5679. ** mode.
  5680. **
  5681. ** Otherwise, if the optimization is both enabled and applicable,
  5682. ** then call pager_incr_changecounter() to update the change-counter
  5683. ** in 'direct' mode. In this case the journal file will never be
  5684. ** created for this transaction.
  5685. */
  5686. #ifdef SQLITE_ENABLE_ATOMIC_WRITE
  5687. PgHdr *pPg;
  5688. assert( isOpen(pPager->jfd)
  5689. || pPager->journalMode==PAGER_JOURNALMODE_OFF
  5690. || pPager->journalMode==PAGER_JOURNALMODE_WAL
  5691. );
  5692. if( !zMaster && isOpen(pPager->jfd)
  5693. && pPager->journalOff==jrnlBufferSize(pPager)
  5694. && pPager->dbSize>=pPager->dbOrigSize
  5695. && (0==(pPg = sqlite3PcacheDirtyList(pPager->pPCache)) || 0==pPg->pDirty)
  5696. ){
  5697. /* Update the db file change counter via the direct-write method. The
  5698. ** following call will modify the in-memory representation of page 1
  5699. ** to include the updated change counter and then write page 1
  5700. ** directly to the database file. Because of the atomic-write
  5701. ** property of the host file-system, this is safe.
  5702. */
  5703. rc = pager_incr_changecounter(pPager, 1);
  5704. }else{
  5705. rc = sqlite3JournalCreate(pPager->jfd);
  5706. if( rc==SQLITE_OK ){
  5707. rc = pager_incr_changecounter(pPager, 0);
  5708. }
  5709. }
  5710. #else
  5711. rc = pager_incr_changecounter(pPager, 0);
  5712. #endif
  5713. if( rc!=SQLITE_OK ) goto commit_phase_one_exit;
  5714. /* Write the master journal name into the journal file. If a master
  5715. ** journal file name has already been written to the journal file,
  5716. ** or if zMaster is NULL (no master journal), then this call is a no-op.
  5717. */
  5718. rc = writeMasterJournal(pPager, zMaster);
  5719. if( rc!=SQLITE_OK ) goto commit_phase_one_exit;
  5720. /* Sync the journal file and write all dirty pages to the database.
  5721. ** If the atomic-update optimization is being used, this sync will not
  5722. ** create the journal file or perform any real IO.
  5723. **
  5724. ** Because the change-counter page was just modified, unless the
  5725. ** atomic-update optimization is used it is almost certain that the
  5726. ** journal requires a sync here. However, in locking_mode=exclusive
  5727. ** on a system under memory pressure it is just possible that this is
  5728. ** not the case. In this case it is likely enough that the redundant
  5729. ** xSync() call will be changed to a no-op by the OS anyhow.
  5730. */
  5731. rc = syncJournal(pPager, 0);
  5732. if( rc!=SQLITE_OK ) goto commit_phase_one_exit;
  5733. rc = pager_write_pagelist(pPager,sqlite3PcacheDirtyList(pPager->pPCache));
  5734. if( rc!=SQLITE_OK ){
  5735. assert( rc!=SQLITE_IOERR_BLOCKED );
  5736. goto commit_phase_one_exit;
  5737. }
  5738. sqlite3PcacheCleanAll(pPager->pPCache);
  5739. /* If the file on disk is smaller than the database image, use
  5740. ** pager_truncate to grow the file here. This can happen if the database
  5741. ** image was extended as part of the current transaction and then the
  5742. ** last page in the db image moved to the free-list. In this case the
  5743. ** last page is never written out to disk, leaving the database file
  5744. ** undersized. Fix this now if it is the case. */
  5745. if( pPager->dbSize>pPager->dbFileSize ){
  5746. Pgno nNew = pPager->dbSize - (pPager->dbSize==PAGER_MJ_PGNO(pPager));
  5747. assert( pPager->eState==PAGER_WRITER_DBMOD );
  5748. rc = pager_truncate(pPager, nNew);
  5749. if( rc!=SQLITE_OK ) goto commit_phase_one_exit;
  5750. }
  5751. /* Finally, sync the database file. */
  5752. if( !noSync ){
  5753. rc = sqlite3PagerSync(pPager);
  5754. }
  5755. IOTRACE(("DBSYNC %p\n", pPager))
  5756. }
  5757. }
  5758. commit_phase_one_exit:
  5759. if( rc==SQLITE_OK && !pagerUseWal(pPager) ){
  5760. pPager->eState = PAGER_WRITER_FINISHED;
  5761. }
  5762. return rc;
  5763. }
  5764. /*
  5765. ** When this function is called, the database file has been completely
  5766. ** updated to reflect the changes made by the current transaction and
  5767. ** synced to disk. The journal file still exists in the file-system
  5768. ** though, and if a failure occurs at this point it will eventually
  5769. ** be used as a hot-journal and the current transaction rolled back.
  5770. **
  5771. ** This function finalizes the journal file, either by deleting,
  5772. ** truncating or partially zeroing it, so that it cannot be used
  5773. ** for hot-journal rollback. Once this is done the transaction is
  5774. ** irrevocably committed.
  5775. **
  5776. ** If an error occurs, an IO error code is returned and the pager
  5777. ** moves into the error state. Otherwise, SQLITE_OK is returned.
  5778. */
  5779. int sqlite3PagerCommitPhaseTwo(Pager *pPager){
  5780. int rc = SQLITE_OK; /* Return code */
  5781. /* This routine should not be called if a prior error has occurred.
  5782. ** But if (due to a coding error elsewhere in the system) it does get
  5783. ** called, just return the same error code without doing anything. */
  5784. if( NEVER(pPager->errCode) ) return pPager->errCode;
  5785. assert( pPager->eState==PAGER_WRITER_LOCKED
  5786. || pPager->eState==PAGER_WRITER_FINISHED
  5787. || (pagerUseWal(pPager) && pPager->eState==PAGER_WRITER_CACHEMOD)
  5788. );
  5789. assert( assert_pager_state(pPager) );
  5790. /* An optimization. If the database was not actually modified during
  5791. ** this transaction, the pager is running in exclusive-mode and is
  5792. ** using persistent journals, then this function is a no-op.
  5793. **
  5794. ** The start of the journal file currently contains a single journal
  5795. ** header with the nRec field set to 0. If such a journal is used as
  5796. ** a hot-journal during hot-journal rollback, 0 changes will be made
  5797. ** to the database file. So there is no need to zero the journal
  5798. ** header. Since the pager is in exclusive mode, there is no need
  5799. ** to drop any locks either.
  5800. */
  5801. if( pPager->eState==PAGER_WRITER_LOCKED
  5802. && pPager->exclusiveMode
  5803. && pPager->journalMode==PAGER_JOURNALMODE_PERSIST
  5804. ){
  5805. assert( pPager->journalOff==JOURNAL_HDR_SZ(pPager) || !pPager->journalOff );
  5806. pPager->eState = PAGER_READER;
  5807. return SQLITE_OK;
  5808. }
  5809. PAGERTRACE(("COMMIT %d\n", PAGERID(pPager)));
  5810. rc = pager_end_transaction(pPager, pPager->setMaster, 1);
  5811. return pager_error(pPager, rc);
  5812. }
  5813. /*
  5814. ** If a write transaction is open, then all changes made within the
  5815. ** transaction are reverted and the current write-transaction is closed.
  5816. ** The pager falls back to PAGER_READER state if successful, or PAGER_ERROR
  5817. ** state if an error occurs.
  5818. **
  5819. ** If the pager is already in PAGER_ERROR state when this function is called,
  5820. ** it returns Pager.errCode immediately. No work is performed in this case.
  5821. **
  5822. ** Otherwise, in rollback mode, this function performs two functions:
  5823. **
  5824. ** 1) It rolls back the journal file, restoring all database file and
  5825. ** in-memory cache pages to the state they were in when the transaction
  5826. ** was opened, and
  5827. **
  5828. ** 2) It finalizes the journal file, so that it is not used for hot
  5829. ** rollback at any point in the future.
  5830. **
  5831. ** Finalization of the journal file (task 2) is only performed if the
  5832. ** rollback is successful.
  5833. **
  5834. ** In WAL mode, all cache-entries containing data modified within the
  5835. ** current transaction are either expelled from the cache or reverted to
  5836. ** their pre-transaction state by re-reading data from the database or
  5837. ** WAL files. The WAL transaction is then closed.
  5838. */
  5839. int sqlite3PagerRollback(Pager *pPager){
  5840. int rc = SQLITE_OK; /* Return code */
  5841. PAGERTRACE(("ROLLBACK %d\n", PAGERID(pPager)));
  5842. /* PagerRollback() is a no-op if called in READER or OPEN state. If
  5843. ** the pager is already in the ERROR state, the rollback is not
  5844. ** attempted here. Instead, the error code is returned to the caller.
  5845. */
  5846. assert( assert_pager_state(pPager) );
  5847. if( pPager->eState==PAGER_ERROR ) return pPager->errCode;
  5848. if( pPager->eState<=PAGER_READER ) return SQLITE_OK;
  5849. if( pagerUseWal(pPager) ){
  5850. int rc2;
  5851. rc = sqlite3PagerSavepoint(pPager, SAVEPOINT_ROLLBACK, -1);
  5852. rc2 = pager_end_transaction(pPager, pPager->setMaster, 0);
  5853. if( rc==SQLITE_OK ) rc = rc2;
  5854. }else if( !isOpen(pPager->jfd) || pPager->eState==PAGER_WRITER_LOCKED ){
  5855. int eState = pPager->eState;
  5856. rc = pager_end_transaction(pPager, 0, 0);
  5857. if( !MEMDB && eState>PAGER_WRITER_LOCKED ){
  5858. /* This can happen using journal_mode=off. Move the pager to the error
  5859. ** state to indicate that the contents of the cache may not be trusted.
  5860. ** Any active readers will get SQLITE_ABORT.
  5861. */
  5862. pPager->errCode = SQLITE_ABORT;
  5863. pPager->eState = PAGER_ERROR;
  5864. return rc;
  5865. }
  5866. }else{
  5867. rc = pager_playback(pPager, 0);
  5868. }
  5869. assert( pPager->eState==PAGER_READER || rc!=SQLITE_OK );
  5870. assert( rc==SQLITE_OK || rc==SQLITE_FULL || rc==SQLITE_CORRUPT
  5871. || rc==SQLITE_NOMEM || (rc&0xFF)==SQLITE_IOERR );
  5872. /* If an error occurs during a ROLLBACK, we can no longer trust the pager
  5873. ** cache. So call pager_error() on the way out to make any error persistent.
  5874. */
  5875. return pager_error(pPager, rc);
  5876. }
  5877. /*
  5878. ** Return TRUE if the database file is opened read-only. Return FALSE
  5879. ** if the database is (in theory) writable.
  5880. */
  5881. u8 sqlite3PagerIsreadonly(Pager *pPager){
  5882. return pPager->readOnly;
  5883. }
  5884. /*
  5885. ** Return the number of references to the pager.
  5886. */
  5887. int sqlite3PagerRefcount(Pager *pPager){
  5888. return sqlite3PcacheRefCount(pPager->pPCache);
  5889. }
  5890. /*
  5891. ** Return the approximate number of bytes of memory currently
  5892. ** used by the pager and its associated cache.
  5893. */
  5894. int sqlite3PagerMemUsed(Pager *pPager){
  5895. int perPageSize = pPager->pageSize + pPager->nExtra + sizeof(PgHdr)
  5896. + 5*sizeof(void*);
  5897. return perPageSize*sqlite3PcachePagecount(pPager->pPCache)
  5898. + sqlite3MallocSize(pPager)
  5899. + pPager->pageSize;
  5900. }
  5901. /*
  5902. ** Return the number of references to the specified page.
  5903. */
  5904. int sqlite3PagerPageRefcount(DbPage *pPage){
  5905. return sqlite3PcachePageRefcount(pPage);
  5906. }
  5907. #ifdef SQLITE_TEST
  5908. /*
  5909. ** This routine is used for testing and analysis only.
  5910. */
  5911. int *sqlite3PagerStats(Pager *pPager){
  5912. static int a[11];
  5913. a[0] = sqlite3PcacheRefCount(pPager->pPCache);
  5914. a[1] = sqlite3PcachePagecount(pPager->pPCache);
  5915. a[2] = sqlite3PcacheGetCachesize(pPager->pPCache);
  5916. a[3] = pPager->eState==PAGER_OPEN ? -1 : (int) pPager->dbSize;
  5917. a[4] = pPager->eState;
  5918. a[5] = pPager->errCode;
  5919. a[6] = pPager->aStat[PAGER_STAT_HIT];
  5920. a[7] = pPager->aStat[PAGER_STAT_MISS];
  5921. a[8] = 0; /* Used to be pPager->nOvfl */
  5922. a[9] = pPager->nRead;
  5923. a[10] = pPager->aStat[PAGER_STAT_WRITE];
  5924. return a;
  5925. }
  5926. #endif
  5927. /*
  5928. ** Parameter eStat must be either SQLITE_DBSTATUS_CACHE_HIT or
  5929. ** SQLITE_DBSTATUS_CACHE_MISS. Before returning, *pnVal is incremented by the
  5930. ** current cache hit or miss count, according to the value of eStat. If the
  5931. ** reset parameter is non-zero, the cache hit or miss count is zeroed before
  5932. ** returning.
  5933. */
  5934. void sqlite3PagerCacheStat(Pager *pPager, int eStat, int reset, int *pnVal){
  5935. assert( eStat==SQLITE_DBSTATUS_CACHE_HIT
  5936. || eStat==SQLITE_DBSTATUS_CACHE_MISS
  5937. || eStat==SQLITE_DBSTATUS_CACHE_WRITE
  5938. );
  5939. assert( SQLITE_DBSTATUS_CACHE_HIT+1==SQLITE_DBSTATUS_CACHE_MISS );
  5940. assert( SQLITE_DBSTATUS_CACHE_HIT+2==SQLITE_DBSTATUS_CACHE_WRITE );
  5941. assert( PAGER_STAT_HIT==0 && PAGER_STAT_MISS==1 && PAGER_STAT_WRITE==2 );
  5942. *pnVal += pPager->aStat[eStat - SQLITE_DBSTATUS_CACHE_HIT];
  5943. if( reset ){
  5944. pPager->aStat[eStat - SQLITE_DBSTATUS_CACHE_HIT] = 0;
  5945. }
  5946. }
  5947. /*
  5948. ** Return true if this is an in-memory pager.
  5949. */
  5950. int sqlite3PagerIsMemdb(Pager *pPager){
  5951. return MEMDB;
  5952. }
  5953. /*
  5954. ** Check that there are at least nSavepoint savepoints open. If there are
  5955. ** currently less than nSavepoints open, then open one or more savepoints
  5956. ** to make up the difference. If the number of savepoints is already
  5957. ** equal to nSavepoint, then this function is a no-op.
  5958. **
  5959. ** If a memory allocation fails, SQLITE_NOMEM is returned. If an error
  5960. ** occurs while opening the sub-journal file, then an IO error code is
  5961. ** returned. Otherwise, SQLITE_OK.
  5962. */
  5963. int sqlite3PagerOpenSavepoint(Pager *pPager, int nSavepoint){
  5964. int rc = SQLITE_OK; /* Return code */
  5965. int nCurrent = pPager->nSavepoint; /* Current number of savepoints */
  5966. assert( pPager->eState>=PAGER_WRITER_LOCKED );
  5967. assert( assert_pager_state(pPager) );
  5968. if( nSavepoint>nCurrent && pPager->useJournal ){
  5969. int ii; /* Iterator variable */
  5970. PagerSavepoint *aNew; /* New Pager.aSavepoint array */
  5971. /* Grow the Pager.aSavepoint array using realloc(). Return SQLITE_NOMEM
  5972. ** if the allocation fails. Otherwise, zero the new portion in case a
  5973. ** malloc failure occurs while populating it in the for(...) loop below.
  5974. */
  5975. aNew = (PagerSavepoint *)sqlite3Realloc(
  5976. pPager->aSavepoint, sizeof(PagerSavepoint)*nSavepoint
  5977. );
  5978. if( !aNew ){
  5979. return SQLITE_NOMEM;
  5980. }
  5981. memset(&aNew[nCurrent], 0, (nSavepoint-nCurrent) * sizeof(PagerSavepoint));
  5982. pPager->aSavepoint = aNew;
  5983. /* Populate the PagerSavepoint structures just allocated. */
  5984. for(ii=nCurrent; ii<nSavepoint; ii++){
  5985. aNew[ii].nOrig = pPager->dbSize;
  5986. if( isOpen(pPager->jfd) && pPager->journalOff>0 ){
  5987. aNew[ii].iOffset = pPager->journalOff;
  5988. }else{
  5989. aNew[ii].iOffset = JOURNAL_HDR_SZ(pPager);
  5990. }
  5991. aNew[ii].iSubRec = pPager->nSubRec;
  5992. aNew[ii].pInSavepoint = sqlite3BitvecCreate(pPager->dbSize);
  5993. if( !aNew[ii].pInSavepoint ){
  5994. return SQLITE_NOMEM;
  5995. }
  5996. if( pagerUseWal(pPager) ){
  5997. sqlite3WalSavepoint(pPager->pWal, aNew[ii].aWalData);
  5998. }
  5999. pPager->nSavepoint = ii+1;
  6000. }
  6001. assert( pPager->nSavepoint==nSavepoint );
  6002. assertTruncateConstraint(pPager);
  6003. }
  6004. return rc;
  6005. }
  6006. /*
  6007. ** This function is called to rollback or release (commit) a savepoint.
  6008. ** The savepoint to release or rollback need not be the most recently
  6009. ** created savepoint.
  6010. **
  6011. ** Parameter op is always either SAVEPOINT_ROLLBACK or SAVEPOINT_RELEASE.
  6012. ** If it is SAVEPOINT_RELEASE, then release and destroy the savepoint with
  6013. ** index iSavepoint. If it is SAVEPOINT_ROLLBACK, then rollback all changes
  6014. ** that have occurred since the specified savepoint was created.
  6015. **
  6016. ** The savepoint to rollback or release is identified by parameter
  6017. ** iSavepoint. A value of 0 means to operate on the outermost savepoint
  6018. ** (the first created). A value of (Pager.nSavepoint-1) means operate
  6019. ** on the most recently created savepoint. If iSavepoint is greater than
  6020. ** (Pager.nSavepoint-1), then this function is a no-op.
  6021. **
  6022. ** If a negative value is passed to this function, then the current
  6023. ** transaction is rolled back. This is different to calling
  6024. ** sqlite3PagerRollback() because this function does not terminate
  6025. ** the transaction or unlock the database, it just restores the
  6026. ** contents of the database to its original state.
  6027. **
  6028. ** In any case, all savepoints with an index greater than iSavepoint
  6029. ** are destroyed. If this is a release operation (op==SAVEPOINT_RELEASE),
  6030. ** then savepoint iSavepoint is also destroyed.
  6031. **
  6032. ** This function may return SQLITE_NOMEM if a memory allocation fails,
  6033. ** or an IO error code if an IO error occurs while rolling back a
  6034. ** savepoint. If no errors occur, SQLITE_OK is returned.
  6035. */
  6036. int sqlite3PagerSavepoint(Pager *pPager, int op, int iSavepoint){
  6037. int rc = pPager->errCode; /* Return code */
  6038. assert( op==SAVEPOINT_RELEASE || op==SAVEPOINT_ROLLBACK );
  6039. assert( iSavepoint>=0 || op==SAVEPOINT_ROLLBACK );
  6040. if( rc==SQLITE_OK && iSavepoint<pPager->nSavepoint ){
  6041. int ii; /* Iterator variable */
  6042. int nNew; /* Number of remaining savepoints after this op. */
  6043. /* Figure out how many savepoints will still be active after this
  6044. ** operation. Store this value in nNew. Then free resources associated
  6045. ** with any savepoints that are destroyed by this operation.
  6046. */
  6047. nNew = iSavepoint + (( op==SAVEPOINT_RELEASE ) ? 0 : 1);
  6048. for(ii=nNew; ii<pPager->nSavepoint; ii++){
  6049. sqlite3BitvecDestroy(pPager->aSavepoint[ii].pInSavepoint);
  6050. }
  6051. pPager->nSavepoint = nNew;
  6052. /* If this is a release of the outermost savepoint, truncate
  6053. ** the sub-journal to zero bytes in size. */
  6054. if( op==SAVEPOINT_RELEASE ){
  6055. if( nNew==0 && isOpen(pPager->sjfd) ){
  6056. /* Only truncate if it is an in-memory sub-journal. */
  6057. if( sqlite3IsMemJournal(pPager->sjfd) ){
  6058. rc = sqlite3OsTruncate(pPager->sjfd, 0);
  6059. assert( rc==SQLITE_OK );
  6060. }
  6061. pPager->nSubRec = 0;
  6062. }
  6063. }
  6064. /* Else this is a rollback operation, playback the specified savepoint.
  6065. ** If this is a temp-file, it is possible that the journal file has
  6066. ** not yet been opened. In this case there have been no changes to
  6067. ** the database file, so the playback operation can be skipped.
  6068. */
  6069. else if( pagerUseWal(pPager) || isOpen(pPager->jfd) ){
  6070. PagerSavepoint *pSavepoint = (nNew==0)?0:&pPager->aSavepoint[nNew-1];
  6071. rc = pagerPlaybackSavepoint(pPager, pSavepoint);
  6072. assert(rc!=SQLITE_DONE);
  6073. }
  6074. }
  6075. return rc;
  6076. }
  6077. /*
  6078. ** Return the full pathname of the database file.
  6079. **
  6080. ** Except, if the pager is in-memory only, then return an empty string if
  6081. ** nullIfMemDb is true. This routine is called with nullIfMemDb==1 when
  6082. ** used to report the filename to the user, for compatibility with legacy
  6083. ** behavior. But when the Btree needs to know the filename for matching to
  6084. ** shared cache, it uses nullIfMemDb==0 so that in-memory databases can
  6085. ** participate in shared-cache.
  6086. */
  6087. const char *sqlite3PagerFilename(Pager *pPager, int nullIfMemDb){
  6088. return (nullIfMemDb && pPager->memDb) ? "" : pPager->zFilename;
  6089. }
  6090. /*
  6091. ** Return the VFS structure for the pager.
  6092. */
  6093. const sqlite3_vfs *sqlite3PagerVfs(Pager *pPager){
  6094. return pPager->pVfs;
  6095. }
  6096. /*
  6097. ** Return the file handle for the database file associated
  6098. ** with the pager. This might return NULL if the file has
  6099. ** not yet been opened.
  6100. */
  6101. sqlite3_file *sqlite3PagerFile(Pager *pPager){
  6102. return pPager->fd;
  6103. }
  6104. /*
  6105. ** Return the full pathname of the journal file.
  6106. */
  6107. const char *sqlite3PagerJournalname(Pager *pPager){
  6108. return pPager->zJournal;
  6109. }
  6110. /*
  6111. ** Return true if fsync() calls are disabled for this pager. Return FALSE
  6112. ** if fsync()s are executed normally.
  6113. */
  6114. int sqlite3PagerNosync(Pager *pPager){
  6115. return pPager->noSync;
  6116. }
  6117. #ifdef SQLITE_HAS_CODEC
  6118. /*
  6119. ** Set or retrieve the codec for this pager
  6120. */
  6121. void sqlite3PagerSetCodec(
  6122. Pager *pPager,
  6123. void *(*xCodec)(void*,void*,Pgno,int),
  6124. void (*xCodecSizeChng)(void*,int,int),
  6125. void (*xCodecFree)(void*),
  6126. void *pCodec
  6127. ){
  6128. if( pPager->xCodecFree ) pPager->xCodecFree(pPager->pCodec);
  6129. pPager->xCodec = pPager->memDb ? 0 : xCodec;
  6130. pPager->xCodecSizeChng = xCodecSizeChng;
  6131. pPager->xCodecFree = xCodecFree;
  6132. pPager->pCodec = pCodec;
  6133. pagerReportSize(pPager);
  6134. }
  6135. void *sqlite3PagerGetCodec(Pager *pPager){
  6136. return pPager->pCodec;
  6137. }
  6138. /*
  6139. ** This function is called by the wal module when writing page content
  6140. ** into the log file.
  6141. **
  6142. ** This function returns a pointer to a buffer containing the encrypted
  6143. ** page content. If a malloc fails, this function may return NULL.
  6144. */
  6145. void *sqlite3PagerCodec(PgHdr *pPg){
  6146. void *aData = 0;
  6147. CODEC2(pPg->pPager, pPg->pData, pPg->pgno, 6, return 0, aData);
  6148. return aData;
  6149. }
  6150. /*
  6151. ** Return the current pager state
  6152. */
  6153. int sqlite3PagerState(Pager *pPager){
  6154. return pPager->eState;
  6155. }
  6156. #endif /* SQLITE_HAS_CODEC */
  6157. #ifndef SQLITE_OMIT_AUTOVACUUM
  6158. /*
  6159. ** Move the page pPg to location pgno in the file.
  6160. **
  6161. ** There must be no references to the page previously located at
  6162. ** pgno (which we call pPgOld) though that page is allowed to be
  6163. ** in cache. If the page previously located at pgno is not already
  6164. ** in the rollback journal, it is not put there by by this routine.
  6165. **
  6166. ** References to the page pPg remain valid. Updating any
  6167. ** meta-data associated with pPg (i.e. data stored in the nExtra bytes
  6168. ** allocated along with the page) is the responsibility of the caller.
  6169. **
  6170. ** A transaction must be active when this routine is called. It used to be
  6171. ** required that a statement transaction was not active, but this restriction
  6172. ** has been removed (CREATE INDEX needs to move a page when a statement
  6173. ** transaction is active).
  6174. **
  6175. ** If the fourth argument, isCommit, is non-zero, then this page is being
  6176. ** moved as part of a database reorganization just before the transaction
  6177. ** is being committed. In this case, it is guaranteed that the database page
  6178. ** pPg refers to will not be written to again within this transaction.
  6179. **
  6180. ** This function may return SQLITE_NOMEM or an IO error code if an error
  6181. ** occurs. Otherwise, it returns SQLITE_OK.
  6182. */
  6183. int sqlite3PagerMovepage(Pager *pPager, DbPage *pPg, Pgno pgno, int isCommit){
  6184. PgHdr *pPgOld; /* The page being overwritten. */
  6185. Pgno needSyncPgno = 0; /* Old value of pPg->pgno, if sync is required */
  6186. int rc; /* Return code */
  6187. Pgno origPgno; /* The original page number */
  6188. assert( pPg->nRef>0 );
  6189. assert( pPager->eState==PAGER_WRITER_CACHEMOD
  6190. || pPager->eState==PAGER_WRITER_DBMOD
  6191. );
  6192. assert( assert_pager_state(pPager) );
  6193. /* In order to be able to rollback, an in-memory database must journal
  6194. ** the page we are moving from.
  6195. */
  6196. if( MEMDB ){
  6197. rc = sqlite3PagerWrite(pPg);
  6198. if( rc ) return rc;
  6199. }
  6200. /* If the page being moved is dirty and has not been saved by the latest
  6201. ** savepoint, then save the current contents of the page into the
  6202. ** sub-journal now. This is required to handle the following scenario:
  6203. **
  6204. ** BEGIN;
  6205. ** <journal page X, then modify it in memory>
  6206. ** SAVEPOINT one;
  6207. ** <Move page X to location Y>
  6208. ** ROLLBACK TO one;
  6209. **
  6210. ** If page X were not written to the sub-journal here, it would not
  6211. ** be possible to restore its contents when the "ROLLBACK TO one"
  6212. ** statement were is processed.
  6213. **
  6214. ** subjournalPage() may need to allocate space to store pPg->pgno into
  6215. ** one or more savepoint bitvecs. This is the reason this function
  6216. ** may return SQLITE_NOMEM.
  6217. */
  6218. if( pPg->flags&PGHDR_DIRTY
  6219. && subjRequiresPage(pPg)
  6220. && SQLITE_OK!=(rc = subjournalPage(pPg))
  6221. ){
  6222. return rc;
  6223. }
  6224. PAGERTRACE(("MOVE %d page %d (needSync=%d) moves to %d\n",
  6225. PAGERID(pPager), pPg->pgno, (pPg->flags&PGHDR_NEED_SYNC)?1:0, pgno));
  6226. IOTRACE(("MOVE %p %d %d\n", pPager, pPg->pgno, pgno))
  6227. /* If the journal needs to be sync()ed before page pPg->pgno can
  6228. ** be written to, store pPg->pgno in local variable needSyncPgno.
  6229. **
  6230. ** If the isCommit flag is set, there is no need to remember that
  6231. ** the journal needs to be sync()ed before database page pPg->pgno
  6232. ** can be written to. The caller has already promised not to write to it.
  6233. */
  6234. if( (pPg->flags&PGHDR_NEED_SYNC) && !isCommit ){
  6235. needSyncPgno = pPg->pgno;
  6236. assert( pPager->journalMode==PAGER_JOURNALMODE_OFF ||
  6237. pageInJournal(pPg) || pPg->pgno>pPager->dbOrigSize );
  6238. assert( pPg->flags&PGHDR_DIRTY );
  6239. }
  6240. /* If the cache contains a page with page-number pgno, remove it
  6241. ** from its hash chain. Also, if the PGHDR_NEED_SYNC flag was set for
  6242. ** page pgno before the 'move' operation, it needs to be retained
  6243. ** for the page moved there.
  6244. */
  6245. pPg->flags &= ~PGHDR_NEED_SYNC;
  6246. pPgOld = pager_lookup(pPager, pgno);
  6247. assert( !pPgOld || pPgOld->nRef==1 );
  6248. if( pPgOld ){
  6249. pPg->flags |= (pPgOld->flags&PGHDR_NEED_SYNC);
  6250. if( MEMDB ){
  6251. /* Do not discard pages from an in-memory database since we might
  6252. ** need to rollback later. Just move the page out of the way. */
  6253. sqlite3PcacheMove(pPgOld, pPager->dbSize+1);
  6254. }else{
  6255. sqlite3PcacheDrop(pPgOld);
  6256. }
  6257. }
  6258. origPgno = pPg->pgno;
  6259. sqlite3PcacheMove(pPg, pgno);
  6260. sqlite3PcacheMakeDirty(pPg);
  6261. /* For an in-memory database, make sure the original page continues
  6262. ** to exist, in case the transaction needs to roll back. Use pPgOld
  6263. ** as the original page since it has already been allocated.
  6264. */
  6265. if( MEMDB ){
  6266. assert( pPgOld );
  6267. sqlite3PcacheMove(pPgOld, origPgno);
  6268. sqlite3PagerUnref(pPgOld);
  6269. }
  6270. if( needSyncPgno ){
  6271. /* If needSyncPgno is non-zero, then the journal file needs to be
  6272. ** sync()ed before any data is written to database file page needSyncPgno.
  6273. ** Currently, no such page exists in the page-cache and the
  6274. ** "is journaled" bitvec flag has been set. This needs to be remedied by
  6275. ** loading the page into the pager-cache and setting the PGHDR_NEED_SYNC
  6276. ** flag.
  6277. **
  6278. ** If the attempt to load the page into the page-cache fails, (due
  6279. ** to a malloc() or IO failure), clear the bit in the pInJournal[]
  6280. ** array. Otherwise, if the page is loaded and written again in
  6281. ** this transaction, it may be written to the database file before
  6282. ** it is synced into the journal file. This way, it may end up in
  6283. ** the journal file twice, but that is not a problem.
  6284. */
  6285. PgHdr *pPgHdr;
  6286. rc = sqlite3PagerGet(pPager, needSyncPgno, &pPgHdr);
  6287. if( rc!=SQLITE_OK ){
  6288. if( needSyncPgno<=pPager->dbOrigSize ){
  6289. assert( pPager->pTmpSpace!=0 );
  6290. sqlite3BitvecClear(pPager->pInJournal, needSyncPgno, pPager->pTmpSpace);
  6291. }
  6292. return rc;
  6293. }
  6294. pPgHdr->flags |= PGHDR_NEED_SYNC;
  6295. sqlite3PcacheMakeDirty(pPgHdr);
  6296. sqlite3PagerUnref(pPgHdr);
  6297. }
  6298. return SQLITE_OK;
  6299. }
  6300. #endif
  6301. /*
  6302. ** Return a pointer to the data for the specified page.
  6303. */
  6304. void *sqlite3PagerGetData(DbPage *pPg){
  6305. assert( pPg->nRef>0 || pPg->pPager->memDb );
  6306. return pPg->pData;
  6307. }
  6308. /*
  6309. ** Return a pointer to the Pager.nExtra bytes of "extra" space
  6310. ** allocated along with the specified page.
  6311. */
  6312. void *sqlite3PagerGetExtra(DbPage *pPg){
  6313. return pPg->pExtra;
  6314. }
  6315. /*
  6316. ** Get/set the locking-mode for this pager. Parameter eMode must be one
  6317. ** of PAGER_LOCKINGMODE_QUERY, PAGER_LOCKINGMODE_NORMAL or
  6318. ** PAGER_LOCKINGMODE_EXCLUSIVE. If the parameter is not _QUERY, then
  6319. ** the locking-mode is set to the value specified.
  6320. **
  6321. ** The returned value is either PAGER_LOCKINGMODE_NORMAL or
  6322. ** PAGER_LOCKINGMODE_EXCLUSIVE, indicating the current (possibly updated)
  6323. ** locking-mode.
  6324. */
  6325. int sqlite3PagerLockingMode(Pager *pPager, int eMode){
  6326. assert( eMode==PAGER_LOCKINGMODE_QUERY
  6327. || eMode==PAGER_LOCKINGMODE_NORMAL
  6328. || eMode==PAGER_LOCKINGMODE_EXCLUSIVE );
  6329. assert( PAGER_LOCKINGMODE_QUERY<0 );
  6330. assert( PAGER_LOCKINGMODE_NORMAL>=0 && PAGER_LOCKINGMODE_EXCLUSIVE>=0 );
  6331. assert( pPager->exclusiveMode || 0==sqlite3WalHeapMemory(pPager->pWal) );
  6332. if( eMode>=0 && !pPager->tempFile && !sqlite3WalHeapMemory(pPager->pWal) ){
  6333. pPager->exclusiveMode = (u8)eMode;
  6334. }
  6335. return (int)pPager->exclusiveMode;
  6336. }
  6337. /*
  6338. ** Set the journal-mode for this pager. Parameter eMode must be one of:
  6339. **
  6340. ** PAGER_JOURNALMODE_DELETE
  6341. ** PAGER_JOURNALMODE_TRUNCATE
  6342. ** PAGER_JOURNALMODE_PERSIST
  6343. ** PAGER_JOURNALMODE_OFF
  6344. ** PAGER_JOURNALMODE_MEMORY
  6345. ** PAGER_JOURNALMODE_WAL
  6346. **
  6347. ** The journalmode is set to the value specified if the change is allowed.
  6348. ** The change may be disallowed for the following reasons:
  6349. **
  6350. ** * An in-memory database can only have its journal_mode set to _OFF
  6351. ** or _MEMORY.
  6352. **
  6353. ** * Temporary databases cannot have _WAL journalmode.
  6354. **
  6355. ** The returned indicate the current (possibly updated) journal-mode.
  6356. */
  6357. int sqlite3PagerSetJournalMode(Pager *pPager, int eMode){
  6358. u8 eOld = pPager->journalMode; /* Prior journalmode */
  6359. #ifdef SQLITE_DEBUG
  6360. /* The print_pager_state() routine is intended to be used by the debugger
  6361. ** only. We invoke it once here to suppress a compiler warning. */
  6362. print_pager_state(pPager);
  6363. #endif
  6364. /* The eMode parameter is always valid */
  6365. assert( eMode==PAGER_JOURNALMODE_DELETE
  6366. || eMode==PAGER_JOURNALMODE_TRUNCATE
  6367. || eMode==PAGER_JOURNALMODE_PERSIST
  6368. || eMode==PAGER_JOURNALMODE_OFF
  6369. || eMode==PAGER_JOURNALMODE_WAL
  6370. || eMode==PAGER_JOURNALMODE_MEMORY );
  6371. /* This routine is only called from the OP_JournalMode opcode, and
  6372. ** the logic there will never allow a temporary file to be changed
  6373. ** to WAL mode.
  6374. */
  6375. assert( pPager->tempFile==0 || eMode!=PAGER_JOURNALMODE_WAL );
  6376. /* Do allow the journalmode of an in-memory database to be set to
  6377. ** anything other than MEMORY or OFF
  6378. */
  6379. if( MEMDB ){
  6380. assert( eOld==PAGER_JOURNALMODE_MEMORY || eOld==PAGER_JOURNALMODE_OFF );
  6381. if( eMode!=PAGER_JOURNALMODE_MEMORY && eMode!=PAGER_JOURNALMODE_OFF ){
  6382. eMode = eOld;
  6383. }
  6384. }
  6385. if( eMode!=eOld ){
  6386. /* Change the journal mode. */
  6387. assert( pPager->eState!=PAGER_ERROR );
  6388. pPager->journalMode = (u8)eMode;
  6389. /* When transistioning from TRUNCATE or PERSIST to any other journal
  6390. ** mode except WAL, unless the pager is in locking_mode=exclusive mode,
  6391. ** delete the journal file.
  6392. */
  6393. assert( (PAGER_JOURNALMODE_TRUNCATE & 5)==1 );
  6394. assert( (PAGER_JOURNALMODE_PERSIST & 5)==1 );
  6395. assert( (PAGER_JOURNALMODE_DELETE & 5)==0 );
  6396. assert( (PAGER_JOURNALMODE_MEMORY & 5)==4 );
  6397. assert( (PAGER_JOURNALMODE_OFF & 5)==0 );
  6398. assert( (PAGER_JOURNALMODE_WAL & 5)==5 );
  6399. assert( isOpen(pPager->fd) || pPager->exclusiveMode );
  6400. if( !pPager->exclusiveMode && (eOld & 5)==1 && (eMode & 1)==0 ){
  6401. /* In this case we would like to delete the journal file. If it is
  6402. ** not possible, then that is not a problem. Deleting the journal file
  6403. ** here is an optimization only.
  6404. **
  6405. ** Before deleting the journal file, obtain a RESERVED lock on the
  6406. ** database file. This ensures that the journal file is not deleted
  6407. ** while it is in use by some other client.
  6408. */
  6409. sqlite3OsClose(pPager->jfd);
  6410. if( pPager->eLock>=RESERVED_LOCK ){
  6411. sqlite3OsDelete(pPager->pVfs, pPager->zJournal, 0);
  6412. }else{
  6413. int rc = SQLITE_OK;
  6414. int state = pPager->eState;
  6415. assert( state==PAGER_OPEN || state==PAGER_READER );
  6416. if( state==PAGER_OPEN ){
  6417. rc = sqlite3PagerSharedLock(pPager);
  6418. }
  6419. if( pPager->eState==PAGER_READER ){
  6420. assert( rc==SQLITE_OK );
  6421. rc = pagerLockDb(pPager, RESERVED_LOCK);
  6422. }
  6423. if( rc==SQLITE_OK ){
  6424. sqlite3OsDelete(pPager->pVfs, pPager->zJournal, 0);
  6425. }
  6426. if( rc==SQLITE_OK && state==PAGER_READER ){
  6427. pagerUnlockDb(pPager, SHARED_LOCK);
  6428. }else if( state==PAGER_OPEN ){
  6429. pager_unlock(pPager);
  6430. }
  6431. assert( state==pPager->eState );
  6432. }
  6433. }
  6434. }
  6435. /* Return the new journal mode */
  6436. return (int)pPager->journalMode;
  6437. }
  6438. /*
  6439. ** Return the current journal mode.
  6440. */
  6441. int sqlite3PagerGetJournalMode(Pager *pPager){
  6442. return (int)pPager->journalMode;
  6443. }
  6444. /*
  6445. ** Return TRUE if the pager is in a state where it is OK to change the
  6446. ** journalmode. Journalmode changes can only happen when the database
  6447. ** is unmodified.
  6448. */
  6449. int sqlite3PagerOkToChangeJournalMode(Pager *pPager){
  6450. assert( assert_pager_state(pPager) );
  6451. if( pPager->eState>=PAGER_WRITER_CACHEMOD ) return 0;
  6452. if( NEVER(isOpen(pPager->jfd) && pPager->journalOff>0) ) return 0;
  6453. return 1;
  6454. }
  6455. /*
  6456. ** Get/set the size-limit used for persistent journal files.
  6457. **
  6458. ** Setting the size limit to -1 means no limit is enforced.
  6459. ** An attempt to set a limit smaller than -1 is a no-op.
  6460. */
  6461. i64 sqlite3PagerJournalSizeLimit(Pager *pPager, i64 iLimit){
  6462. if( iLimit>=-1 ){
  6463. pPager->journalSizeLimit = iLimit;
  6464. sqlite3WalLimit(pPager->pWal, iLimit);
  6465. }
  6466. return pPager->journalSizeLimit;
  6467. }
  6468. /*
  6469. ** Return a pointer to the pPager->pBackup variable. The backup module
  6470. ** in backup.c maintains the content of this variable. This module
  6471. ** uses it opaquely as an argument to sqlite3BackupRestart() and
  6472. ** sqlite3BackupUpdate() only.
  6473. */
  6474. sqlite3_backup **sqlite3PagerBackupPtr(Pager *pPager){
  6475. return &pPager->pBackup;
  6476. }
  6477. #ifndef SQLITE_OMIT_VACUUM
  6478. /*
  6479. ** Unless this is an in-memory or temporary database, clear the pager cache.
  6480. */
  6481. void sqlite3PagerClearCache(Pager *pPager){
  6482. if( !MEMDB && pPager->tempFile==0 ) pager_reset(pPager);
  6483. }
  6484. #endif
  6485. #ifndef SQLITE_OMIT_WAL
  6486. /*
  6487. ** This function is called when the user invokes "PRAGMA wal_checkpoint",
  6488. ** "PRAGMA wal_blocking_checkpoint" or calls the sqlite3_wal_checkpoint()
  6489. ** or wal_blocking_checkpoint() API functions.
  6490. **
  6491. ** Parameter eMode is one of SQLITE_CHECKPOINT_PASSIVE, FULL or RESTART.
  6492. */
  6493. int sqlite3PagerCheckpoint(Pager *pPager, int eMode, int *pnLog, int *pnCkpt){
  6494. int rc = SQLITE_OK;
  6495. if( pPager->pWal ){
  6496. rc = sqlite3WalCheckpoint(pPager->pWal, eMode,
  6497. pPager->xBusyHandler, pPager->pBusyHandlerArg,
  6498. pPager->ckptSyncFlags, pPager->pageSize, (u8 *)pPager->pTmpSpace,
  6499. pnLog, pnCkpt
  6500. );
  6501. }
  6502. return rc;
  6503. }
  6504. int sqlite3PagerWalCallback(Pager *pPager){
  6505. return sqlite3WalCallback(pPager->pWal);
  6506. }
  6507. /*
  6508. ** Return true if the underlying VFS for the given pager supports the
  6509. ** primitives necessary for write-ahead logging.
  6510. */
  6511. int sqlite3PagerWalSupported(Pager *pPager){
  6512. const sqlite3_io_methods *pMethods = pPager->fd->pMethods;
  6513. return pPager->exclusiveMode || (pMethods->iVersion>=2 && pMethods->xShmMap);
  6514. }
  6515. /*
  6516. ** Attempt to take an exclusive lock on the database file. If a PENDING lock
  6517. ** is obtained instead, immediately release it.
  6518. */
  6519. static int pagerExclusiveLock(Pager *pPager){
  6520. int rc; /* Return code */
  6521. assert( pPager->eLock==SHARED_LOCK || pPager->eLock==EXCLUSIVE_LOCK );
  6522. rc = pagerLockDb(pPager, EXCLUSIVE_LOCK);
  6523. if( rc!=SQLITE_OK ){
  6524. /* If the attempt to grab the exclusive lock failed, release the
  6525. ** pending lock that may have been obtained instead. */
  6526. pagerUnlockDb(pPager, SHARED_LOCK);
  6527. }
  6528. return rc;
  6529. }
  6530. /*
  6531. ** Call sqlite3WalOpen() to open the WAL handle. If the pager is in
  6532. ** exclusive-locking mode when this function is called, take an EXCLUSIVE
  6533. ** lock on the database file and use heap-memory to store the wal-index
  6534. ** in. Otherwise, use the normal shared-memory.
  6535. */
  6536. static int pagerOpenWal(Pager *pPager){
  6537. int rc = SQLITE_OK;
  6538. assert( pPager->pWal==0 && pPager->tempFile==0 );
  6539. assert( pPager->eLock==SHARED_LOCK || pPager->eLock==EXCLUSIVE_LOCK );
  6540. /* If the pager is already in exclusive-mode, the WAL module will use
  6541. ** heap-memory for the wal-index instead of the VFS shared-memory
  6542. ** implementation. Take the exclusive lock now, before opening the WAL
  6543. ** file, to make sure this is safe.
  6544. */
  6545. if( pPager->exclusiveMode ){
  6546. rc = pagerExclusiveLock(pPager);
  6547. }
  6548. /* Open the connection to the log file. If this operation fails,
  6549. ** (e.g. due to malloc() failure), return an error code.
  6550. */
  6551. if( rc==SQLITE_OK ){
  6552. rc = sqlite3WalOpen(pPager->pVfs,
  6553. pPager->fd, pPager->zWal, pPager->exclusiveMode,
  6554. pPager->journalSizeLimit, &pPager->pWal
  6555. );
  6556. }
  6557. pagerFixMaplimit(pPager);
  6558. return rc;
  6559. }
  6560. /*
  6561. ** The caller must be holding a SHARED lock on the database file to call
  6562. ** this function.
  6563. **
  6564. ** If the pager passed as the first argument is open on a real database
  6565. ** file (not a temp file or an in-memory database), and the WAL file
  6566. ** is not already open, make an attempt to open it now. If successful,
  6567. ** return SQLITE_OK. If an error occurs or the VFS used by the pager does
  6568. ** not support the xShmXXX() methods, return an error code. *pbOpen is
  6569. ** not modified in either case.
  6570. **
  6571. ** If the pager is open on a temp-file (or in-memory database), or if
  6572. ** the WAL file is already open, set *pbOpen to 1 and return SQLITE_OK
  6573. ** without doing anything.
  6574. */
  6575. int sqlite3PagerOpenWal(
  6576. Pager *pPager, /* Pager object */
  6577. int *pbOpen /* OUT: Set to true if call is a no-op */
  6578. ){
  6579. int rc = SQLITE_OK; /* Return code */
  6580. assert( assert_pager_state(pPager) );
  6581. assert( pPager->eState==PAGER_OPEN || pbOpen );
  6582. assert( pPager->eState==PAGER_READER || !pbOpen );
  6583. assert( pbOpen==0 || *pbOpen==0 );
  6584. assert( pbOpen!=0 || (!pPager->tempFile && !pPager->pWal) );
  6585. if( !pPager->tempFile && !pPager->pWal ){
  6586. if( !sqlite3PagerWalSupported(pPager) ) return SQLITE_CANTOPEN;
  6587. /* Close any rollback journal previously open */
  6588. sqlite3OsClose(pPager->jfd);
  6589. rc = pagerOpenWal(pPager);
  6590. if( rc==SQLITE_OK ){
  6591. pPager->journalMode = PAGER_JOURNALMODE_WAL;
  6592. pPager->eState = PAGER_OPEN;
  6593. }
  6594. }else{
  6595. *pbOpen = 1;
  6596. }
  6597. return rc;
  6598. }
  6599. /*
  6600. ** This function is called to close the connection to the log file prior
  6601. ** to switching from WAL to rollback mode.
  6602. **
  6603. ** Before closing the log file, this function attempts to take an
  6604. ** EXCLUSIVE lock on the database file. If this cannot be obtained, an
  6605. ** error (SQLITE_BUSY) is returned and the log connection is not closed.
  6606. ** If successful, the EXCLUSIVE lock is not released before returning.
  6607. */
  6608. int sqlite3PagerCloseWal(Pager *pPager){
  6609. int rc = SQLITE_OK;
  6610. assert( pPager->journalMode==PAGER_JOURNALMODE_WAL );
  6611. /* If the log file is not already open, but does exist in the file-system,
  6612. ** it may need to be checkpointed before the connection can switch to
  6613. ** rollback mode. Open it now so this can happen.
  6614. */
  6615. if( !pPager->pWal ){
  6616. int logexists = 0;
  6617. rc = pagerLockDb(pPager, SHARED_LOCK);
  6618. if( rc==SQLITE_OK ){
  6619. rc = sqlite3OsAccess(
  6620. pPager->pVfs, pPager->zWal, SQLITE_ACCESS_EXISTS, &logexists
  6621. );
  6622. }
  6623. if( rc==SQLITE_OK && logexists ){
  6624. rc = pagerOpenWal(pPager);
  6625. }
  6626. }
  6627. /* Checkpoint and close the log. Because an EXCLUSIVE lock is held on
  6628. ** the database file, the log and log-summary files will be deleted.
  6629. */
  6630. if( rc==SQLITE_OK && pPager->pWal ){
  6631. rc = pagerExclusiveLock(pPager);
  6632. if( rc==SQLITE_OK ){
  6633. rc = sqlite3WalClose(pPager->pWal, pPager->ckptSyncFlags,
  6634. pPager->pageSize, (u8*)pPager->pTmpSpace);
  6635. pPager->pWal = 0;
  6636. pagerFixMaplimit(pPager);
  6637. }
  6638. }
  6639. return rc;
  6640. }
  6641. #endif /* !SQLITE_OMIT_WAL */
  6642. #ifdef SQLITE_ENABLE_ZIPVFS
  6643. /*
  6644. ** A read-lock must be held on the pager when this function is called. If
  6645. ** the pager is in WAL mode and the WAL file currently contains one or more
  6646. ** frames, return the size in bytes of the page images stored within the
  6647. ** WAL frames. Otherwise, if this is not a WAL database or the WAL file
  6648. ** is empty, return 0.
  6649. */
  6650. int sqlite3PagerWalFramesize(Pager *pPager){
  6651. assert( pPager->eState==PAGER_READER );
  6652. return sqlite3WalFramesize(pPager->pWal);
  6653. }
  6654. #endif
  6655. #endif /* SQLITE_OMIT_DISKIO */