xref: /sqlite-3.40.0/src/pager.c (revision ef5ecb41)
1 /*
2 ** 2001 September 15
3 **
4 ** The author disclaims copyright to this source code.  In place of
5 ** a legal notice, here is a blessing:
6 **
7 **    May you do good and not evil.
8 **    May you find forgiveness for yourself and forgive others.
9 **    May you share freely, never taking more than you give.
10 **
11 *************************************************************************
12 ** This is the implementation of the page cache subsystem or "pager".
13 **
14 ** The pager is used to access a database disk file.  It implements
15 ** atomic commit and rollback through the use of a journal file that
16 ** is separate from the database file.  The pager also implements file
17 ** locking to prevent two processes from writing the same database
18 ** file simultaneously, or one process from reading the database while
19 ** another is writing.
20 **
21 ** @(#) $Id: pager.c,v 1.122 2004/06/10 05:59:25 danielk1977 Exp $
22 */
23 #include "os.h"         /* Must be first to enable large file support */
24 #include "sqliteInt.h"
25 #include "pager.h"
26 #include <assert.h>
27 #include <string.h>
28 
29 /*
30 ** Macros for troubleshooting.  Normally turned off
31 */
32 #if 0
33 static Pager *mainPager = 0;
34 #define SET_PAGER(X)  if( mainPager==0 ) mainPager = (X)
35 #define CLR_PAGER(X)  if( mainPager==(X) ) mainPager = 0
36 #define TRACE1(X)     if( pPager==mainPager ) sqlite3DebugPrintf(X)
37 #define TRACE2(X,Y)   if( pPager==mainPager ) sqlite3DebugPrintf(X,Y)
38 #define TRACE3(X,Y,Z) if( pPager==mainPager ) sqlite3DebugPrintf(X,Y,Z)
39 #else
40 #define SET_PAGER(X)
41 #define CLR_PAGER(X)
42 #define TRACE1(X)
43 #define TRACE2(X,Y)
44 #define TRACE3(X,Y,Z)
45 #endif
46 
47 
48 /*
49 ** The page cache as a whole is always in one of the following
50 ** states:
51 **
52 **   PAGER_UNLOCK        The page cache is not currently reading or
53 **                       writing the database file.  There is no
54 **                       data held in memory.  This is the initial
55 **                       state.
56 **
57 **   PAGER_SHARED        The page cache is reading the database.
58 **                       Writing is not permitted.  There can be
59 **                       multiple readers accessing the same database
60 **                       file at the same time.
61 **
62 **   PAGER_RESERVED      Writing is permitted to the page cache only.
63 **                       The original database file has not been modified.
64 **                       Other processes may still be reading the on-disk
65 **                       database file.
66 **
67 **   PAGER_EXCLUSIVE     The page cache is writing the database.
68 **                       Access is exclusive.  No other processes or
69 **                       threads can be reading or writing while one
70 **                       process is writing.
71 **
72 ** The page cache comes up in PAGER_UNLOCK.  The first time a
73 ** sqlite_page_get() occurs, the state transitions to PAGER_SHARED.
74 ** After all pages have been released using sqlite_page_unref(),
75 ** the state transitions back to PAGER_UNLOCK.  The first time
76 ** that sqlite_page_write() is called, the state transitions to
77 ** PAGER_RESERVED.  (Note that sqlite_page_write() can only be
78 ** called on an outstanding page which means that the pager must
79 ** be in PAGER_SHARED before it transitions to PAGER_RESERVED.)
80 ** The sqlite_page_rollback() and sqlite_page_commit() functions
81 ** transition the state from PAGER_RESERVED to PAGER_EXCLUSIVE to
82 ** PAGER_SHARED.
83 */
84 #define PAGER_UNLOCK      0
85 #define PAGER_SHARED      1
86 #define PAGER_RESERVED    2
87 #define PAGER_EXCLUSIVE   3
88 
89 
90 /*
91 ** Each in-memory image of a page begins with the following header.
92 ** This header is only visible to this pager module.  The client
93 ** code that calls pager sees only the data that follows the header.
94 **
95 ** Client code should call sqlite3pager_write() on a page prior to making
96 ** any modifications to that page.  The first time sqlite3pager_write()
97 ** is called, the original page contents are written into the rollback
98 ** journal and PgHdr.inJournal and PgHdr.needSync are set.  Later, once
99 ** the journal page has made it onto the disk surface, PgHdr.needSync
100 ** is cleared.  The modified page cannot be written back into the original
101 ** database file until the journal pages has been synced to disk and the
102 ** PgHdr.needSync has been cleared.
103 **
104 ** The PgHdr.dirty flag is set when sqlite3pager_write() is called and
105 ** is cleared again when the page content is written back to the original
106 ** database file.
107 */
108 typedef struct PgHdr PgHdr;
109 struct PgHdr {
110   Pager *pPager;                 /* The pager to which this page belongs */
111   Pgno pgno;                     /* The page number for this page */
112   PgHdr *pNextHash, *pPrevHash;  /* Hash collision chain for PgHdr.pgno */
113   PgHdr *pNextFree, *pPrevFree;  /* Freelist of pages where nRef==0 */
114   PgHdr *pNextAll;               /* A list of all pages */
115   PgHdr *pNextStmt, *pPrevStmt;  /* List of pages in the statement journal */
116   u8 inJournal;                  /* TRUE if has been written to journal */
117   u8 inStmt;                     /* TRUE if in the statement subjournal */
118   u8 dirty;                      /* TRUE if we need to write back changes */
119   u8 needSync;                   /* Sync journal before writing this page */
120   u8 alwaysRollback;             /* Disable dont_rollback() for this page */
121   short int nRef;                /* Number of users of this page */
122   PgHdr *pDirty;                 /* Dirty pages sorted by PgHdr.pgno */
123   /* SQLITE_PAGE_SIZE bytes of page data follow this header */
124   /* Pager.nExtra bytes of local data follow the page data */
125 };
126 
127 /*
128 ** For an in-memory only database, some extra information is recorded about
129 ** each page so that changes can be rolled back.  (Journal files are not
130 ** used for in-memory databases.)  The following information is added to
131 ** the end of every EXTRA block for in-memory databases.
132 **
133 ** This information could have been added directly to the PgHdr structure.
134 ** But then it would take up an extra 8 bytes of storage on every PgHdr
135 ** even for disk-based databases.  Splitting it out saves 8 bytes.  This
136 ** is only a savings of 0.8% but those percentages add up.
137 */
138 typedef struct PgHistory PgHistory;
139 struct PgHistory {
140   u8 *pOrig;     /* Original page text.  Restore to this on a full rollback */
141   u8 *pStmt;     /* Text as it was at the beginning of the current statement */
142 };
143 
144 /*
145 ** A macro used for invoking the codec if there is one
146 */
147 #ifdef SQLITE_HAS_CODEC
148 # define CODEC(P,D,N,X) if( P->xCodec ){ P->xCodec(P->pCodecArg,D,N,X); }
149 #else
150 # define CODEC(P,D,N,X)
151 #endif
152 
153 /*
154 ** Convert a pointer to a PgHdr into a pointer to its data
155 ** and back again.
156 */
157 #define PGHDR_TO_DATA(P)  ((void*)(&(P)[1]))
158 #define DATA_TO_PGHDR(D)  (&((PgHdr*)(D))[-1])
159 #define PGHDR_TO_EXTRA(P) ((void*)&((char*)(&(P)[1]))[SQLITE_PAGE_SIZE])
160 #define PGHDR_TO_HIST(P,PGR)  \
161             ((PgHistory*)&((char*)(&(P)[1]))[(PGR)->pageSize+(PGR)->nExtra])
162 
163 /*
164 ** How big to make the hash table used for locating in-memory pages
165 ** by page number.
166 */
167 #define N_PG_HASH 2048
168 
169 /*
170 ** Hash a page number
171 */
172 #define pager_hash(PN)  ((PN)&(N_PG_HASH-1))
173 
174 /*
175 ** A open page cache is an instance of the following structure.
176 */
177 struct Pager {
178   char *zFilename;            /* Name of the database file */
179   char *zJournal;             /* Name of the journal file */
180   char *zDirectory;           /* Directory hold database and journal files */
181   OsFile fd, jfd;             /* File descriptors for database and journal */
182   OsFile stfd;                /* File descriptor for the statement subjournal*/
183   int dbSize;                 /* Number of pages in the file */
184   int origDbSize;             /* dbSize before the current change */
185   int stmtSize;               /* Size of database (in pages) at stmt_begin() */
186   off_t stmtJSize;            /* Size of journal at stmt_begin() */
187   int nRec;                   /* Number of pages written to the journal */
188   u32 cksumInit;              /* Quasi-random value added to every checksum */
189   int stmtNRec;               /* Number of records in stmt subjournal */
190   int nExtra;                 /* Add this many bytes to each in-memory page */
191   void (*xDestructor)(void*,int); /* Call this routine when freeing pages */
192   void (*xReiniter)(void*,int);   /* Call this routine when reloading pages */
193   int pageSize;               /* Number of bytes in a page */
194   int nPage;                  /* Total number of in-memory pages */
195   int nRef;                   /* Number of in-memory pages with PgHdr.nRef>0 */
196   int mxPage;                 /* Maximum number of pages to hold in cache */
197   int nHit, nMiss, nOvfl;     /* Cache hits, missing, and LRU overflows */
198   void (*xCodec)(void*,void*,Pgno,int); /* Routine for en/decoding data */
199   void *pCodecArg;            /* First argument to xCodec() */
200   u8 journalOpen;             /* True if journal file descriptors is valid */
201   u8 journalStarted;          /* True if header of journal is synced */
202   u8 useJournal;              /* Use a rollback journal on this file */
203   u8 stmtOpen;                /* True if the statement subjournal is open */
204   u8 stmtInUse;               /* True we are in a statement subtransaction */
205   u8 stmtAutoopen;            /* Open stmt journal when main journal is opened*/
206   u8 noSync;                  /* Do not sync the journal if true */
207   u8 fullSync;                /* Do extra syncs of the journal for robustness */
208   u8 state;                   /* PAGER_UNLOCK, _SHARED, _RESERVED, etc. */
209   u8 errMask;                 /* One of several kinds of errors */
210   u8 tempFile;                /* zFilename is a temporary file */
211   u8 readOnly;                /* True for a read-only database */
212   u8 needSync;                /* True if an fsync() is needed on the journal */
213   u8 dirtyCache;              /* True if cached pages have changed */
214   u8 alwaysRollback;          /* Disable dont_rollback() for all pages */
215   u8 memDb;                   /* True to inhibit all file I/O */
216   u8 *aInJournal;             /* One bit for each page in the database file */
217   u8 *aInStmt;                /* One bit for each page in the database */
218   PgHdr *pFirst, *pLast;      /* List of free pages */
219   PgHdr *pFirstSynced;        /* First free page with PgHdr.needSync==0 */
220   PgHdr *pAll;                /* List of all pages */
221   PgHdr *pStmt;               /* List of pages in the statement subjournal */
222   PgHdr *aHash[N_PG_HASH];    /* Hash table to map page number of PgHdr */
223   int nMaster;                /* Number of bytes to reserve for master j.p */
224   BusyHandler *pBusyHandler;  /* Pointer to sqlite.busyHandler */
225 };
226 
227 /*
228 ** These are bits that can be set in Pager.errMask.
229 */
230 #define PAGER_ERR_FULL     0x01  /* a write() failed */
231 #define PAGER_ERR_MEM      0x02  /* malloc() failed */
232 #define PAGER_ERR_LOCK     0x04  /* error in the locking protocol */
233 #define PAGER_ERR_CORRUPT  0x08  /* database or journal corruption */
234 #define PAGER_ERR_DISK     0x10  /* general disk I/O error - bad hard drive? */
235 
236 /*
237 ** Journal files begin with the following magic string.  The data
238 ** was obtained from /dev/random.  It is used only as a sanity check.
239 **
240 ** Since version 2.8.0, the journal format contains additional sanity
241 ** checking information.  If the power fails while the journal is begin
242 ** written, semi-random garbage data might appear in the journal
243 ** file after power is restored.  If an attempt is then made
244 ** to roll the journal back, the database could be corrupted.  The additional
245 ** sanity checking data is an attempt to discover the garbage in the
246 ** journal and ignore it.
247 **
248 ** The sanity checking information for the new journal format consists
249 ** of a 32-bit checksum on each page of data.  The checksum covers both
250 ** the page number and the SQLITE_PAGE_SIZE bytes of data for the page.
251 ** This cksum is initialized to a 32-bit random value that appears in the
252 ** journal file right after the header.  The random initializer is important,
253 ** because garbage data that appears at the end of a journal is likely
254 ** data that was once in other files that have now been deleted.  If the
255 ** garbage data came from an obsolete journal file, the checksums might
256 ** be correct.  But by initializing the checksum to random value which
257 ** is different for every journal, we minimize that risk.
258 */
259 static const unsigned char aJournalMagic[] = {
260   0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd7,
261 };
262 
263 /*
264 ** The size of the header and of each page in the journal varies according
265 ** to which journal format is being used.  The following macros figure out
266 ** the sizes based on format numbers.
267 */
268 #define JOURNAL_HDR_SZ(pPager) (24 + (pPager)->nMaster)
269 #define JOURNAL_PG_SZ(pPager)  ((pPager->pageSize) + 8)
270 
271 
272 /*
273 ** Enable reference count tracking here:
274 */
275 #ifdef SQLITE_TEST
276   int pager3_refinfo_enable = 0;
277   static void pager_refinfo(PgHdr *p){
278     static int cnt = 0;
279     if( !pager3_refinfo_enable ) return;
280     printf(
281        "REFCNT: %4d addr=0x%08x nRef=%d\n",
282        p->pgno, (int)PGHDR_TO_DATA(p), p->nRef
283     );
284     cnt++;   /* Something to set a breakpoint on */
285   }
286 # define REFINFO(X)  pager_refinfo(X)
287 #else
288 # define REFINFO(X)
289 #endif
290 
291 /*
292 ** Read a 32-bit integer from the given file descriptor.  Store the integer
293 ** that is read in *pRes.  Return SQLITE_OK if everything worked, or an
294 ** error code is something goes wrong.
295 */
296 static int read32bits(OsFile *fd, u32 *pRes){
297   u32 res;
298   int rc;
299   rc = sqlite3OsRead(fd, &res, sizeof(res));
300   if( rc==SQLITE_OK ){
301     unsigned char ac[4];
302     memcpy(ac, &res, 4);
303     res = (ac[0]<<24) | (ac[1]<<16) | (ac[2]<<8) | ac[3];
304   }
305   *pRes = res;
306   return rc;
307 }
308 
309 /*
310 ** Write a 32-bit integer into the given file descriptor.  Return SQLITE_OK
311 ** on success or an error code is something goes wrong.
312 */
313 static int write32bits(OsFile *fd, u32 val){
314   unsigned char ac[4];
315   ac[0] = (val>>24) & 0xff;
316   ac[1] = (val>>16) & 0xff;
317   ac[2] = (val>>8) & 0xff;
318   ac[3] = val & 0xff;
319   return sqlite3OsWrite(fd, ac, 4);
320 }
321 
322 /*
323 ** Write a 32-bit integer into a page header right before the
324 ** page data.  This will overwrite the PgHdr.pDirty pointer.
325 */
326 static void store32bits(u32 val, PgHdr *p, int offset){
327   unsigned char *ac;
328   ac = &((unsigned char*)PGHDR_TO_DATA(p))[offset];
329   ac[0] = (val>>24) & 0xff;
330   ac[1] = (val>>16) & 0xff;
331   ac[2] = (val>>8) & 0xff;
332   ac[3] = val & 0xff;
333 }
334 
335 
336 /*
337 ** Convert the bits in the pPager->errMask into an approprate
338 ** return code.
339 */
340 static int pager_errcode(Pager *pPager){
341   int rc = SQLITE_OK;
342   if( pPager->errMask & PAGER_ERR_LOCK )    rc = SQLITE_PROTOCOL;
343   if( pPager->errMask & PAGER_ERR_DISK )    rc = SQLITE_IOERR;
344   if( pPager->errMask & PAGER_ERR_FULL )    rc = SQLITE_FULL;
345   if( pPager->errMask & PAGER_ERR_MEM )     rc = SQLITE_NOMEM;
346   if( pPager->errMask & PAGER_ERR_CORRUPT ) rc = SQLITE_CORRUPT;
347   return rc;
348 }
349 
350 /*
351 ** Add or remove a page from the list of all pages that are in the
352 ** statement journal.
353 **
354 ** The Pager keeps a separate list of pages that are currently in
355 ** the statement journal.  This helps the sqlite3pager_stmt_commit()
356 ** routine run MUCH faster for the common case where there are many
357 ** pages in memory but only a few are in the statement journal.
358 */
359 static void page_add_to_stmt_list(PgHdr *pPg){
360   Pager *pPager = pPg->pPager;
361   if( pPg->inStmt ) return;
362   assert( pPg->pPrevStmt==0 && pPg->pNextStmt==0 );
363   pPg->pPrevStmt = 0;
364   if( pPager->pStmt ){
365     pPager->pStmt->pPrevStmt = pPg;
366   }
367   pPg->pNextStmt = pPager->pStmt;
368   pPager->pStmt = pPg;
369   pPg->inStmt = 1;
370 }
371 static void page_remove_from_stmt_list(PgHdr *pPg){
372   if( !pPg->inStmt ) return;
373   if( pPg->pPrevStmt ){
374     assert( pPg->pPrevStmt->pNextStmt==pPg );
375     pPg->pPrevStmt->pNextStmt = pPg->pNextStmt;
376   }else{
377     assert( pPg->pPager->pStmt==pPg );
378     pPg->pPager->pStmt = pPg->pNextStmt;
379   }
380   if( pPg->pNextStmt ){
381     assert( pPg->pNextStmt->pPrevStmt==pPg );
382     pPg->pNextStmt->pPrevStmt = pPg->pPrevStmt;
383   }
384   pPg->pNextStmt = 0;
385   pPg->pPrevStmt = 0;
386   pPg->inStmt = 0;
387 }
388 
389 /*
390 ** Find a page in the hash table given its page number.  Return
391 ** a pointer to the page or NULL if not found.
392 */
393 static PgHdr *pager_lookup(Pager *pPager, Pgno pgno){
394   PgHdr *p = pPager->aHash[pager_hash(pgno)];
395   while( p && p->pgno!=pgno ){
396     p = p->pNextHash;
397   }
398   return p;
399 }
400 
401 /*
402 ** Unlock the database and clear the in-memory cache.  This routine
403 ** sets the state of the pager back to what it was when it was first
404 ** opened.  Any outstanding pages are invalidated and subsequent attempts
405 ** to access those pages will likely result in a coredump.
406 */
407 static void pager_reset(Pager *pPager){
408   PgHdr *pPg, *pNext;
409   for(pPg=pPager->pAll; pPg; pPg=pNext){
410     pNext = pPg->pNextAll;
411     sqliteFree(pPg);
412   }
413   pPager->pFirst = 0;
414   pPager->pFirstSynced = 0;
415   pPager->pLast = 0;
416   pPager->pAll = 0;
417   memset(pPager->aHash, 0, sizeof(pPager->aHash));
418   pPager->nPage = 0;
419   if( pPager->state>=PAGER_RESERVED ){
420     sqlite3pager_rollback(pPager);
421   }
422   sqlite3OsUnlock(&pPager->fd, NO_LOCK);
423   pPager->state = PAGER_UNLOCK;
424   pPager->dbSize = -1;
425   pPager->nRef = 0;
426   assert( pPager->journalOpen==0 );
427 }
428 
429 /*
430 ** When this routine is called, the pager has the journal file open and
431 ** a RESERVED or EXCLUSIVE lock on the database.  This routine releases
432 ** the database lock and acquires a SHARED lock in its place.  The journal
433 ** file is deleted and closed.
434 **
435 ** TODO: Consider keeping the journal file open for temporary databases.
436 ** This might give a performance improvement on windows where opening
437 ** a file is an expensive operation.
438 */
439 static int pager_unwritelock(Pager *pPager){
440   PgHdr *pPg;
441   if( pPager->state<PAGER_RESERVED ){
442     return SQLITE_OK;
443   }
444   sqlite3pager_stmt_commit(pPager);
445   if( pPager->stmtOpen ){
446     sqlite3OsClose(&pPager->stfd);
447     pPager->stmtOpen = 0;
448   }
449   if( pPager->journalOpen ){
450     sqlite3OsClose(&pPager->jfd);
451     pPager->journalOpen = 0;
452     sqlite3OsDelete(pPager->zJournal);
453     sqliteFree( pPager->aInJournal );
454     pPager->aInJournal = 0;
455     for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
456       pPg->inJournal = 0;
457       pPg->dirty = 0;
458       pPg->needSync = 0;
459     }
460   }else{
461     assert( pPager->dirtyCache==0 || pPager->useJournal==0 );
462   }
463   sqlite3OsUnlock(&pPager->fd, SHARED_LOCK);
464   pPager->state = PAGER_SHARED;
465   return SQLITE_OK;
466 }
467 
468 /*
469 ** Compute and return a checksum for the page of data.
470 **
471 ** This is not a real checksum.  It is really just the sum of the
472 ** random initial value and the page number.  We considered do a checksum
473 ** of the database, but that was found to be too slow.
474 */
475 static u32 pager_cksum(Pager *pPager, Pgno pgno, const char *aData){
476   u32 cksum = pPager->cksumInit + pgno;
477   return cksum;
478 }
479 
480 /*
481 ** Read a single page from the journal file opened on file descriptor
482 ** jfd.  Playback this one page.
483 **
484 **
485 **
486 ** There are three different journal formats.  The format parameter determines
487 ** which format is used by the journal that is played back.
488 */
489 static int pager_playback_one_page(Pager *pPager, OsFile *jfd, int useCksum){
490   int rc;
491   PgHdr *pPg;                   /* An existing page in the cache */
492   Pgno pgno;                    /* The page number of a page in journal */
493   u32 cksum;                    /* Checksum used for sanity checking */
494   u8 aData[SQLITE_PAGE_SIZE];   /* Store data here */
495 
496   rc = read32bits(jfd, &pgno);
497   if( rc!=SQLITE_OK ) return rc;
498   rc = sqlite3OsRead(jfd, &aData, pPager->pageSize);
499   if( rc!=SQLITE_OK ) return rc;
500 
501   /* Sanity checking on the page.  This is more important that I originally
502   ** thought.  If a power failure occurs while the journal is being written,
503   ** it could cause invalid data to be written into the journal.  We need to
504   ** detect this invalid data (with high probability) and ignore it.
505   */
506   if( pgno==0 ){
507     return SQLITE_DONE;
508   }
509   if( pgno>(unsigned)pPager->dbSize ){
510     return SQLITE_OK;
511   }
512   if( useCksum ){
513     rc = read32bits(jfd, &cksum);
514     if( rc ) return rc;
515     if( pager_cksum(pPager, pgno, aData)!=cksum ){
516       return SQLITE_DONE;
517     }
518   }
519 
520   assert( pPager->state==PAGER_RESERVED || pPager->state==PAGER_EXCLUSIVE );
521 
522   /* If the pager is in RESERVED state, then there must be a copy of this
523   ** page in the pager cache. In this case just update the pager cache,
524   ** not the database file. The page is left marked dirty in this case.
525   **
526   ** FIX ME: Ideally the page would only be left marked dirty when the
527   ** pager is in RESERVED state if it was dirty when this statement
528   ** transaction was started.
529   **
530   ** If in EXCLUSIVE state, then we update the pager cache if it exists
531   ** and the main file. The page is then marked not dirty.
532   */
533   pPg = pager_lookup(pPager, pgno);
534   assert( pPager->state==PAGER_EXCLUSIVE || pPg );
535   TRACE2("PLAYBACK page %d\n", pgno);
536   if( pPager->state==PAGER_EXCLUSIVE ){
537     sqlite3OsSeek(&pPager->fd, (pgno-1)*(off_t)SQLITE_PAGE_SIZE);
538     rc = sqlite3OsWrite(&pPager->fd, aData, SQLITE_PAGE_SIZE);
539   }
540   if( pPg ){
541     /* No page should ever be rolled back that is in use, except for page
542     ** 1 which is held in use in order to keep the lock on the database
543     ** active.
544     */
545     void *pData;
546     assert( pPg->nRef==0 || pPg->pgno==1 );
547     pData = PGHDR_TO_DATA(pPg);
548     memcpy(pData, aData, pPager->pageSize);
549     if( pPager->xDestructor ){
550       pPager->xDestructor(pData, pPager->pageSize);
551     }
552     if( pPager->state==PAGER_EXCLUSIVE ){
553       pPg->dirty = 0;
554       pPg->needSync = 0;
555     }
556 
557     CODEC(pPager, pData, pPg->pgno, 3);
558   }
559   return rc;
560 }
561 
562 /*
563 ** Parameter zMaster is the name of a master journal file. A single journal
564 ** file that referred to the master journal file has just been rolled back.
565 ** This routine checks if it is possible to delete the master journal file,
566 ** and does so if it is.
567 */
568 static int pager_delmaster(const char *zMaster){
569   int rc;
570   int master_open = 0;
571   OsFile master;
572   char *zMasterJournal = 0; /* Contents of master journal file */
573   off_t nMasterJournal;     /* Size of master journal file */
574 
575   /* Open the master journal file exclusively in case some other process
576   ** is running this routine also. Not that it makes too much difference.
577   */
578   rc = sqlite3OsOpenExclusive(zMaster, &master, 0);
579   if( rc!=SQLITE_OK ) goto delmaster_out;
580   master_open = 1;
581 
582   rc = sqlite3OsFileSize(&master, &nMasterJournal);
583   if( rc!=SQLITE_OK ) goto delmaster_out;
584 
585   if( nMasterJournal>0 ){
586     char *zDb;
587     zMasterJournal = (char *)sqliteMalloc(nMasterJournal);
588     if( !zMasterJournal ){
589       rc = SQLITE_NOMEM;
590       goto delmaster_out;
591     }
592     rc = sqlite3OsRead(&master, zMasterJournal, nMasterJournal);
593     if( rc!=SQLITE_OK ) goto delmaster_out;
594 
595     zDb = zMasterJournal;
596     while( (zDb-zMasterJournal)<nMasterJournal ){
597       char *zJournal = 0;
598       sqlite3SetString(&zJournal, zDb, "-journal", 0);
599       if( !zJournal ){
600         rc = SQLITE_NOMEM;
601         goto delmaster_out;
602       }
603       if( sqlite3OsFileExists(zJournal) ){
604         /* One of the journals pointed to by the master journal exists.
605         ** Open it and check if it points at the master journal. If
606         ** so, return without deleting the master journal file.
607         */
608         OsFile journal;
609         int nMaster;
610         off_t jsz;
611 
612         rc = sqlite3OsOpenReadOnly(zJournal, &journal);
613         sqliteFree(zJournal);
614         if( rc!=SQLITE_OK ){
615           sqlite3OsClose(&journal);
616           goto delmaster_out;
617         }
618 
619 	/* Check if the file is big enough to be a journal file
620         ** with the required master journal name. If not, ignore it.
621         */
622         rc = sqlite3OsFileSize(&journal, &jsz);
623         if( rc!=SQLITE_OK ){
624           sqlite3OsClose(&journal);
625           goto delmaster_out;
626         }
627         if( jsz<(25+strlen(zMaster)) ){
628           sqlite3OsClose(&journal);
629           continue;
630         }
631 
632         /* Seek to the point in the journal where the master journal name
633         ** is stored. Read the master journal name into memory obtained
634         ** from malloc.
635         */
636         rc = sqlite3OsSeek(&journal, 20);
637         if( rc!=SQLITE_OK ) goto delmaster_out;
638         rc = read32bits(&journal, (u32*)&nMaster);
639         if( rc!=SQLITE_OK ) goto delmaster_out;
640         if( nMaster>0 && nMaster>=strlen(zMaster)+1 ){
641           char *zMasterPtr = (char *)sqliteMalloc(nMaster);
642           if( !zMasterPtr ){
643             rc = SQLITE_NOMEM;
644           }
645           rc = sqlite3OsRead(&journal, zMasterPtr, nMaster);
646           if( rc!=SQLITE_OK ){
647             sqliteFree(zMasterPtr);
648             goto delmaster_out;
649           }
650           if( 0==strncmp(zMasterPtr, zMaster, nMaster) ){
651             /* We have a match. Do not delete the master journal file. */
652             sqliteFree(zMasterPtr);
653             goto delmaster_out;
654           }
655         }
656       }
657       zDb += (strlen(zDb)+1);
658     }
659   }
660 
661   sqlite3OsDelete(zMaster);
662 
663 delmaster_out:
664   if( zMasterJournal ){
665     sqliteFree(zMasterJournal);
666   }
667   if( master_open ){
668     sqlite3OsClose(&master);
669   }
670   return rc;
671 }
672 
673 /*
674 ** Make every page in the cache agree with what is on disk.  In other words,
675 ** reread the disk to reset the state of the cache.
676 **
677 ** This routine is called after a rollback in which some of the dirty cache
678 ** pages had never been written out to disk.  We need to roll back the
679 ** cache content and the easiest way to do that is to reread the old content
680 ** back from the disk.
681 */
682 static int pager_reload_cache(Pager *pPager){
683   PgHdr *pPg;
684   int rc = SQLITE_OK;
685   for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
686     char zBuf[SQLITE_PAGE_SIZE];
687     if( !pPg->dirty ) continue;
688     if( (int)pPg->pgno <= pPager->origDbSize ){
689       sqlite3OsSeek(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)(pPg->pgno-1));
690       rc = sqlite3OsRead(&pPager->fd, zBuf, SQLITE_PAGE_SIZE);
691       TRACE2("REFETCH page %d\n", pPg->pgno);
692       CODEC(pPager, zBuf, pPg->pgno, 2);
693       if( rc ) break;
694     }else{
695       memset(zBuf, 0, SQLITE_PAGE_SIZE);
696     }
697     if( pPg->nRef==0 || memcmp(zBuf, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE) ){
698       memcpy(PGHDR_TO_DATA(pPg), zBuf, SQLITE_PAGE_SIZE);
699       if( pPager->xReiniter ){
700         pPager->xReiniter(PGHDR_TO_DATA(pPg), pPager->pageSize);
701       }else{
702         memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
703       }
704     }
705     pPg->needSync = 0;
706     pPg->dirty = 0;
707   }
708   return rc;
709 }
710 
711 
712 /*
713 ** Playback the journal and thus restore the database file to
714 ** the state it was in before we started making changes.
715 **
716 ** The journal file format is as follows:
717 **
718 **  (1)  8 byte prefix.  A copy of aJournalMagic[].
719 **  (2)  4 byte big-endian integer which is the number of valid page records
720 **       in the journal.  If this value is 0xffffffff, then compute the
721 **       number of page records from the journal size.
722 **  (3)  4 byte big-endian integer which is the initial value for the
723 **       sanity checksum.
724 **  (4)  4 byte integer which is the number of pages to truncate the
725 **       database to during a rollback.
726 **  (5)  4 byte integer which is the number of bytes in the master journal
727 **       name.  The value may be zero (indicate that there is no master
728 **       journal.)
729 **  (6)  N bytes of the master journal name.  The name will be nul-terminated
730 **       and might be shorter than the value read from (5).  If the first byte
731 **       of the name is \000 then there is no master journal.  The master
732 **       journal name is stored in UTF-8.
733 **  (7)  Zero or more pages instances, each as follows:
734 **        +  4 byte page number.
735 **        +  pPager->pageSize bytes of data.
736 **        +  4 byte checksum
737 **
738 ** When we speak of the journal header, we mean the first 6 items above.
739 ** Each entry in the journal is an instance of the 7th item.
740 **
741 ** Call the value from the second bullet "nRec".  nRec is the number of
742 ** valid page entries in the journal.  In most cases, you can compute the
743 ** value of nRec from the size of the journal file.  But if a power
744 ** failure occurred while the journal was being written, it could be the
745 ** case that the size of the journal file had already been increased but
746 ** the extra entries had not yet made it safely to disk.  In such a case,
747 ** the value of nRec computed from the file size would be too large.  For
748 ** that reason, we always use the nRec value in the header.
749 **
750 ** If the nRec value is 0xffffffff it means that nRec should be computed
751 ** from the file size.  This value is used when the user selects the
752 ** no-sync option for the journal.  A power failure could lead to corruption
753 ** in this case.  But for things like temporary table (which will be
754 ** deleted when the power is restored) we don't care.
755 **
756 ** If the file opened as the journal file is not a well-formed
757 ** journal file then the database will likely already be
758 ** corrupted, so the PAGER_ERR_CORRUPT bit is set in pPager->errMask
759 ** and SQLITE_CORRUPT is returned.  If it all works, then this routine
760 ** returns SQLITE_OK.
761 */
762 static int pager_playback(Pager *pPager, int useJournalSize){
763   off_t szJ;               /* Size of the journal file in bytes */
764   int nRec;                /* Number of Records in the journal */
765   int i;                   /* Loop counter */
766   Pgno mxPg = 0;           /* Size of the original file in pages */
767   unsigned char aMagic[8]; /* A buffer to hold the magic header */
768   int rc;                  /* Result code of a subroutine */
769   int nMaster;             /* Number of bytes in the name of master journal */
770   char *zMaster = 0;       /* Name of master journal file if any */
771 
772   /* Figure out how many records are in the journal.  Abort early if
773   ** the journal is empty.
774   */
775   assert( pPager->journalOpen );
776   sqlite3OsSeek(&pPager->jfd, 0);
777   rc = sqlite3OsFileSize(&pPager->jfd, &szJ);
778   if( rc!=SQLITE_OK ){
779     goto end_playback;
780   }
781 
782   /* If the journal file is too small to contain a complete header,
783   ** it must mean that the process that created the journal was just
784   ** beginning to write the journal file when it died.  In that case,
785   ** the database file should have still been completely unchanged.
786   ** Nothing needs to be rolled back.  We can safely ignore this journal.
787   */
788   if( szJ < 24 ){
789     goto end_playback;
790   }
791 
792   /* (1) Read the beginning of the journal and verify the magic string
793   ** at the beginning of the journal. */
794   rc = sqlite3OsRead(&pPager->jfd, aMagic, sizeof(aMagic));
795   if( rc!=SQLITE_OK || memcmp(aMagic, aJournalMagic, sizeof(aMagic))!=0 ){
796     rc = SQLITE_PROTOCOL;
797     goto end_playback;
798   }
799 
800   /* (2) Read the number of pages stored in the journal.  */
801   rc = read32bits(&pPager->jfd, (u32*)&nRec);
802   if( rc ) goto end_playback;
803   if( nRec==0xffffffff || useJournalSize ){
804     nRec = (szJ - JOURNAL_HDR_SZ(pPager))/JOURNAL_PG_SZ(pPager);
805   }
806 
807   /* (3) Read the initial value for the sanity checksum */
808   rc = read32bits(&pPager->jfd, &pPager->cksumInit);
809   if( rc ) goto end_playback;
810 
811   /* (4) Read the number of pages in the database file prior to the
812   ** start of the transaction */
813   rc = read32bits(&pPager->jfd, &mxPg);
814   if( rc!=SQLITE_OK ){
815     goto end_playback;
816   }
817 
818   /* (5) and (6): Check if a master journal file is specified. If one is
819   ** specified, only proceed with the playback if it still exists. */
820   rc = read32bits(&pPager->jfd, &nMaster);
821   if( rc ) goto end_playback;
822   if( nMaster>0 ){
823     zMaster = sqliteMalloc(nMaster);
824     if( !zMaster ){
825       rc = SQLITE_NOMEM;
826       goto end_playback;
827     }
828     rc = sqlite3OsRead(&pPager->jfd, zMaster, nMaster);
829     if( rc!=SQLITE_OK || (zMaster[0] && !sqlite3OsFileExists(zMaster)) ){
830       goto end_playback;
831     }
832   }
833 
834   /* Truncate the database file back to it's original size */
835   assert( pPager->origDbSize==0 || pPager->origDbSize==mxPg );
836   rc = sqlite3OsTruncate(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)mxPg);
837   if( rc!=SQLITE_OK ){
838     goto end_playback;
839   }
840   pPager->dbSize = mxPg;
841 
842   /* Copy original pages out of the journal and back into the database file.
843   */
844   for(i=0; i<nRec; i++){
845     rc = pager_playback_one_page(pPager, &pPager->jfd, 1);
846     if( rc!=SQLITE_OK ){
847       if( rc==SQLITE_DONE ){
848         rc = SQLITE_OK;
849       }
850       break;
851     }
852   }
853 
854   /* Pages that have been written to the journal but never synced
855   ** where not restored by the loop above.  We have to restore those
856   ** pages by reading them back from the original database.
857   */
858   if( rc==SQLITE_OK ){
859     pager_reload_cache(pPager);
860   }
861 
862 end_playback:
863   if( zMaster ){
864     /* If there was a master journal and this routine will return true,
865     ** see if it is possible to delete the master journal. If errors
866     ** occur during this process, ignore them.
867     */
868     if( rc==SQLITE_OK ){
869       pager_delmaster(zMaster);
870     }
871     sqliteFree(zMaster);
872   }
873   if( rc!=SQLITE_OK ){
874     /* FIX ME: We shouldn't delete the journal if an error occured during
875     ** rollback. It may have been a transient error and the rollback may
876     ** succeed next time it is attempted.
877     */
878     pager_unwritelock(pPager);
879     pPager->errMask |= PAGER_ERR_CORRUPT;
880     rc = SQLITE_CORRUPT;
881   }else{
882     rc = pager_unwritelock(pPager);
883   }
884   return rc;
885 }
886 
887 /*
888 ** Playback the statement journal.
889 **
890 ** This is similar to playing back the transaction journal but with
891 ** a few extra twists.
892 **
893 **    (1)  The number of pages in the database file at the start of
894 **         the statement is stored in pPager->stmtSize, not in the
895 **         journal file itself.
896 **
897 **    (2)  In addition to playing back the statement journal, also
898 **         playback all pages of the transaction journal beginning
899 **         at offset pPager->stmtJSize.
900 */
901 static int pager_stmt_playback(Pager *pPager){
902   off_t szJ;               /* Size of the full journal */
903   int nRec;                /* Number of Records */
904   int i;                   /* Loop counter */
905   int rc;
906 
907   /* Truncate the database back to its original size.
908   */
909   rc = sqlite3OsTruncate(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)pPager->stmtSize);
910   pPager->dbSize = pPager->stmtSize;
911 
912   /* Figure out how many records are in the statement journal.
913   */
914   assert( pPager->stmtInUse && pPager->journalOpen );
915   sqlite3OsSeek(&pPager->stfd, 0);
916   nRec = pPager->stmtNRec;
917 
918   /* Copy original pages out of the statement journal and back into the
919   ** database file.  Note that the statement journal omits checksums from
920   ** each record since power-failure recovery is not important to statement
921   ** journals.
922   */
923   for(i=nRec-1; i>=0; i--){
924     rc = pager_playback_one_page(pPager, &pPager->stfd, 0);
925     assert( rc!=SQLITE_DONE );
926     if( rc!=SQLITE_OK ) goto end_stmt_playback;
927   }
928 
929   /* Figure out how many pages need to be copied out of the transaction
930   ** journal.
931   */
932   rc = sqlite3OsSeek(&pPager->jfd, pPager->stmtJSize);
933   if( rc!=SQLITE_OK ){
934     goto end_stmt_playback;
935   }
936   rc = sqlite3OsFileSize(&pPager->jfd, &szJ);
937   if( rc!=SQLITE_OK ){
938     goto end_stmt_playback;
939   }
940   nRec = (szJ - pPager->stmtJSize)/JOURNAL_PG_SZ(pPager);
941   for(i=nRec-1; i>=0; i--){
942     rc = pager_playback_one_page(pPager, &pPager->jfd, 1);
943     if( rc!=SQLITE_OK ){
944       assert( rc!=SQLITE_DONE );
945       goto end_stmt_playback;
946     }
947   }
948 
949 end_stmt_playback:
950   if( rc!=SQLITE_OK ){
951     pPager->errMask |= PAGER_ERR_CORRUPT;
952     rc = SQLITE_CORRUPT;
953   }
954   return rc;
955 }
956 
957 /*
958 ** Change the maximum number of in-memory pages that are allowed.
959 **
960 ** The maximum number is the absolute value of the mxPage parameter.
961 ** If mxPage is negative, the noSync flag is also set.  noSync bypasses
962 ** calls to sqlite3OsSync().  The pager runs much faster with noSync on,
963 ** but if the operating system crashes or there is an abrupt power
964 ** failure, the database file might be left in an inconsistent and
965 ** unrepairable state.
966 */
967 void sqlite3pager_set_cachesize(Pager *pPager, int mxPage){
968   if( mxPage>=0 ){
969     pPager->noSync = pPager->tempFile;
970     if( pPager->noSync ) pPager->needSync = 0;
971   }else{
972     pPager->noSync = 1;
973     mxPage = -mxPage;
974   }
975   if( mxPage>10 ){
976     pPager->mxPage = mxPage;
977   }
978 }
979 
980 /*
981 ** Adjust the robustness of the database to damage due to OS crashes
982 ** or power failures by changing the number of syncs()s when writing
983 ** the rollback journal.  There are three levels:
984 **
985 **    OFF       sqlite3OsSync() is never called.  This is the default
986 **              for temporary and transient files.
987 **
988 **    NORMAL    The journal is synced once before writes begin on the
989 **              database.  This is normally adequate protection, but
990 **              it is theoretically possible, though very unlikely,
991 **              that an inopertune power failure could leave the journal
992 **              in a state which would cause damage to the database
993 **              when it is rolled back.
994 **
995 **    FULL      The journal is synced twice before writes begin on the
996 **              database (with some additional information - the nRec field
997 **              of the journal header - being written in between the two
998 **              syncs).  If we assume that writing a
999 **              single disk sector is atomic, then this mode provides
1000 **              assurance that the journal will not be corrupted to the
1001 **              point of causing damage to the database during rollback.
1002 **
1003 ** Numeric values associated with these states are OFF==1, NORMAL=2,
1004 ** and FULL=3.
1005 */
1006 void sqlite3pager_set_safety_level(Pager *pPager, int level){
1007   pPager->noSync =  level==1 || pPager->tempFile;
1008   pPager->fullSync = level==3 && !pPager->tempFile;
1009   if( pPager->noSync ) pPager->needSync = 0;
1010 }
1011 
1012 /*
1013 ** Open a temporary file.  Write the name of the file into zName
1014 ** (zName must be at least SQLITE_TEMPNAME_SIZE bytes long.)  Write
1015 ** the file descriptor into *fd.  Return SQLITE_OK on success or some
1016 ** other error code if we fail.
1017 **
1018 ** The OS will automatically delete the temporary file when it is
1019 ** closed.
1020 */
1021 static int sqlite3pager_opentemp(char *zFile, OsFile *fd){
1022   int cnt = 8;
1023   int rc;
1024   do{
1025     cnt--;
1026     sqlite3OsTempFileName(zFile);
1027     rc = sqlite3OsOpenExclusive(zFile, fd, 1);
1028   }while( cnt>0 && rc!=SQLITE_OK );
1029   return rc;
1030 }
1031 
1032 /*
1033 ** Create a new page cache and put a pointer to the page cache in *ppPager.
1034 ** The file to be cached need not exist.  The file is not locked until
1035 ** the first call to sqlite3pager_get() and is only held open until the
1036 ** last page is released using sqlite3pager_unref().
1037 **
1038 ** If zFilename is NULL then a randomly-named temporary file is created
1039 ** and used as the file to be cached.  The file will be deleted
1040 ** automatically when it is closed.
1041 */
1042 int sqlite3pager_open(
1043   Pager **ppPager,         /* Return the Pager structure here */
1044   const char *zFilename,   /* Name of the database file to open */
1045   int mxPage,              /* Max number of in-memory cache pages */
1046   int nExtra,              /* Extra bytes append to each in-memory page */
1047   int useJournal,          /* TRUE to use a rollback journal on this file */
1048   void  *pBusyHandler      /* Busy callback */
1049 ){
1050   Pager *pPager;
1051   char *zFullPathname;
1052   int nameLen;
1053   OsFile fd;
1054   int rc, i;
1055   int tempFile;
1056   int memDb = 0;
1057   int readOnly = 0;
1058   char zTemp[SQLITE_TEMPNAME_SIZE];
1059 
1060   *ppPager = 0;
1061   if( sqlite3_malloc_failed ){
1062     return SQLITE_NOMEM;
1063   }
1064   if( zFilename && zFilename[0] ){
1065     if( strcmp(zFilename,":memory:")==0 ){
1066       memDb = 1;
1067       zFullPathname = sqliteMalloc(4);
1068       if( zFullPathname ) strcpy(zFullPathname, "");
1069       rc = SQLITE_OK;
1070     }else{
1071       zFullPathname = sqlite3OsFullPathname(zFilename);
1072       rc = sqlite3OsOpenReadWrite(zFullPathname, &fd, &readOnly);
1073       tempFile = 0;
1074     }
1075   }else{
1076     rc = sqlite3pager_opentemp(zTemp, &fd);
1077     zFilename = zTemp;
1078     zFullPathname = sqlite3OsFullPathname(zFilename);
1079     tempFile = 1;
1080   }
1081   if( sqlite3_malloc_failed ){
1082     return SQLITE_NOMEM;
1083   }
1084   if( rc!=SQLITE_OK ){
1085     sqliteFree(zFullPathname);
1086     return SQLITE_CANTOPEN;
1087   }
1088   nameLen = strlen(zFullPathname);
1089   pPager = sqliteMalloc( sizeof(*pPager) + nameLen*3 + 30 );
1090   if( pPager==0 ){
1091     sqlite3OsClose(&fd);
1092     sqliteFree(zFullPathname);
1093     return SQLITE_NOMEM;
1094   }
1095   SET_PAGER(pPager);
1096   pPager->zFilename = (char*)&pPager[1];
1097   pPager->zDirectory = &pPager->zFilename[nameLen+1];
1098   pPager->zJournal = &pPager->zDirectory[nameLen+1];
1099   strcpy(pPager->zFilename, zFullPathname);
1100   strcpy(pPager->zDirectory, zFullPathname);
1101   for(i=nameLen; i>0 && pPager->zDirectory[i-1]!='/'; i--){}
1102   if( i>0 ) pPager->zDirectory[i-1] = 0;
1103   strcpy(pPager->zJournal, zFullPathname);
1104   sqliteFree(zFullPathname);
1105   strcpy(&pPager->zJournal[nameLen], "-journal");
1106   pPager->fd = fd;
1107   pPager->journalOpen = 0;
1108   pPager->useJournal = useJournal && !memDb;
1109   pPager->stmtOpen = 0;
1110   pPager->stmtInUse = 0;
1111   pPager->nRef = 0;
1112   pPager->dbSize = memDb-1;
1113   pPager->pageSize = SQLITE_PAGE_SIZE;
1114   pPager->stmtSize = 0;
1115   pPager->stmtJSize = 0;
1116   pPager->nPage = 0;
1117   pPager->mxPage = mxPage>5 ? mxPage : 10;
1118   pPager->state = PAGER_UNLOCK;
1119   pPager->errMask = 0;
1120   pPager->tempFile = tempFile;
1121   pPager->memDb = memDb;
1122   pPager->readOnly = readOnly;
1123   pPager->needSync = 0;
1124   pPager->noSync = pPager->tempFile || !useJournal;
1125   pPager->pFirst = 0;
1126   pPager->pFirstSynced = 0;
1127   pPager->pLast = 0;
1128   pPager->nExtra = nExtra;
1129   pPager->pBusyHandler = (BusyHandler *)pBusyHandler;
1130   memset(pPager->aHash, 0, sizeof(pPager->aHash));
1131   *ppPager = pPager;
1132   return SQLITE_OK;
1133 }
1134 
1135 /*
1136 ** Set the destructor for this pager.  If not NULL, the destructor is called
1137 ** when the reference count on each page reaches zero.  The destructor can
1138 ** be used to clean up information in the extra segment appended to each page.
1139 **
1140 ** The destructor is not called as a result sqlite3pager_close().
1141 ** Destructors are only called by sqlite3pager_unref().
1142 */
1143 void sqlite3pager_set_destructor(Pager *pPager, void (*xDesc)(void*,int)){
1144   pPager->xDestructor = xDesc;
1145 }
1146 
1147 /*
1148 ** Set the reinitializer for this pager.  If not NULL, the reinitializer
1149 ** is called when the content of a page in cache is restored to its original
1150 ** value as a result of a rollback.  The callback gives higher-level code
1151 ** an opportunity to restore the EXTRA section to agree with the restored
1152 ** page data.
1153 */
1154 void sqlite3pager_set_reiniter(Pager *pPager, void (*xReinit)(void*,int)){
1155   pPager->xReiniter = xReinit;
1156 }
1157 
1158 /*
1159 ** Return the total number of pages in the disk file associated with
1160 ** pPager.
1161 */
1162 int sqlite3pager_pagecount(Pager *pPager){
1163   off_t n;
1164   assert( pPager!=0 );
1165   if( pPager->dbSize>=0 ){
1166     return pPager->dbSize;
1167   }
1168   if( sqlite3OsFileSize(&pPager->fd, &n)!=SQLITE_OK ){
1169     pPager->errMask |= PAGER_ERR_DISK;
1170     return 0;
1171   }
1172   n /= SQLITE_PAGE_SIZE;
1173   if( pPager->state!=PAGER_UNLOCK ){
1174     pPager->dbSize = n;
1175   }
1176   return n;
1177 }
1178 
1179 /*
1180 ** Forward declaration
1181 */
1182 static int syncJournal(Pager*, const char*);
1183 
1184 
1185 /*
1186 ** Unlink a page from the free list (the list of all pages where nRef==0)
1187 ** and from its hash collision chain.
1188 */
1189 static void unlinkPage(PgHdr *pPg){
1190   Pager *pPager = pPg->pPager;
1191 
1192   /* Keep the pFirstSynced pointer pointing at the first synchronized page */
1193   if( pPg==pPager->pFirstSynced ){
1194     PgHdr *p = pPg->pNextFree;
1195     while( p && p->needSync ){ p = p->pNextFree; }
1196     pPager->pFirstSynced = p;
1197   }
1198 
1199   /* Unlink from the freelist */
1200   if( pPg->pPrevFree ){
1201     pPg->pPrevFree->pNextFree = pPg->pNextFree;
1202   }else{
1203     assert( pPager->pFirst==pPg );
1204     pPager->pFirst = pPg->pNextFree;
1205   }
1206   if( pPg->pNextFree ){
1207     pPg->pNextFree->pPrevFree = pPg->pPrevFree;
1208   }else{
1209     assert( pPager->pLast==pPg );
1210     pPager->pLast = pPg->pPrevFree;
1211   }
1212   pPg->pNextFree = pPg->pPrevFree = 0;
1213 
1214   /* Unlink from the pgno hash table */
1215   if( pPg->pNextHash ){
1216     pPg->pNextHash->pPrevHash = pPg->pPrevHash;
1217   }
1218   if( pPg->pPrevHash ){
1219     pPg->pPrevHash->pNextHash = pPg->pNextHash;
1220   }else{
1221     int h = pager_hash(pPg->pgno);
1222     assert( pPager->aHash[h]==pPg );
1223     pPager->aHash[h] = pPg->pNextHash;
1224   }
1225   pPg->pNextHash = pPg->pPrevHash = 0;
1226 }
1227 
1228 /*
1229 ** This routine is used to truncate an in-memory database.  Delete
1230 ** every pages whose pgno is larger than pPager->dbSize and is unreferenced.
1231 ** Referenced pages larger than pPager->dbSize are zeroed.
1232 */
1233 static void memoryTruncate(Pager *pPager){
1234   PgHdr *pPg;
1235   PgHdr **ppPg;
1236   int dbSize = pPager->dbSize;
1237 
1238   ppPg = &pPager->pAll;
1239   while( (pPg = *ppPg)!=0 ){
1240     if( pPg->pgno<=dbSize ){
1241       ppPg = &pPg->pNextAll;
1242     }else if( pPg->nRef>0 ){
1243       memset(PGHDR_TO_DATA(pPg), 0, pPager->pageSize);
1244       ppPg = &pPg->pNextAll;
1245     }else{
1246       *ppPg = pPg->pNextAll;
1247       unlinkPage(pPg);
1248       sqliteFree(pPg);
1249       pPager->nPage--;
1250     }
1251   }
1252 }
1253 
1254 /*
1255 ** Truncate the file to the number of pages specified.
1256 */
1257 int sqlite3pager_truncate(Pager *pPager, Pgno nPage){
1258   int rc;
1259   if( pPager->dbSize<0 ){
1260     sqlite3pager_pagecount(pPager);
1261   }
1262   if( pPager->errMask!=0 ){
1263     rc = pager_errcode(pPager);
1264     return rc;
1265   }
1266   if( nPage>=(unsigned)pPager->dbSize ){
1267     return SQLITE_OK;
1268   }
1269   if( pPager->memDb ){
1270     pPager->dbSize = nPage;
1271     memoryTruncate(pPager);
1272     return SQLITE_OK;
1273   }
1274   syncJournal(pPager, 0);
1275   rc = sqlite3OsTruncate(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)nPage);
1276   if( rc==SQLITE_OK ){
1277     pPager->dbSize = nPage;
1278   }
1279   return rc;
1280 }
1281 
1282 /*
1283 ** Shutdown the page cache.  Free all memory and close all files.
1284 **
1285 ** If a transaction was in progress when this routine is called, that
1286 ** transaction is rolled back.  All outstanding pages are invalidated
1287 ** and their memory is freed.  Any attempt to use a page associated
1288 ** with this page cache after this function returns will likely
1289 ** result in a coredump.
1290 */
1291 int sqlite3pager_close(Pager *pPager){
1292   PgHdr *pPg, *pNext;
1293   switch( pPager->state ){
1294     case PAGER_RESERVED:
1295     case PAGER_EXCLUSIVE: {
1296       sqlite3pager_rollback(pPager);
1297       if( !pPager->memDb ){
1298         sqlite3OsUnlock(&pPager->fd, NO_LOCK);
1299       }
1300       assert( pPager->journalOpen==0 );
1301       break;
1302     }
1303     case PAGER_SHARED: {
1304       if( !pPager->memDb ){
1305         sqlite3OsUnlock(&pPager->fd, NO_LOCK);
1306       }
1307       break;
1308     }
1309     default: {
1310       /* Do nothing */
1311       break;
1312     }
1313   }
1314   for(pPg=pPager->pAll; pPg; pPg=pNext){
1315     pNext = pPg->pNextAll;
1316     sqliteFree(pPg);
1317   }
1318   if( !pPager->memDb ){
1319     sqlite3OsClose(&pPager->fd);
1320   }
1321   assert( pPager->journalOpen==0 );
1322   /* Temp files are automatically deleted by the OS
1323   ** if( pPager->tempFile ){
1324   **   sqlite3OsDelete(pPager->zFilename);
1325   ** }
1326   */
1327   CLR_PAGER(pPager);
1328   if( pPager->zFilename!=(char*)&pPager[1] ){
1329     assert( 0 );  /* Cannot happen */
1330     sqliteFree(pPager->zFilename);
1331     sqliteFree(pPager->zJournal);
1332     sqliteFree(pPager->zDirectory);
1333   }
1334   sqliteFree(pPager);
1335   return SQLITE_OK;
1336 }
1337 
1338 /*
1339 ** Return the page number for the given page data.
1340 */
1341 Pgno sqlite3pager_pagenumber(void *pData){
1342   PgHdr *p = DATA_TO_PGHDR(pData);
1343   return p->pgno;
1344 }
1345 
1346 /*
1347 ** The page_ref() function increments the reference count for a page.
1348 ** If the page is currently on the freelist (the reference count is zero) then
1349 ** remove it from the freelist.
1350 **
1351 ** For non-test systems, page_ref() is a macro that calls _page_ref()
1352 ** online of the reference count is zero.  For test systems, page_ref()
1353 ** is a real function so that we can set breakpoints and trace it.
1354 */
1355 static void _page_ref(PgHdr *pPg){
1356   if( pPg->nRef==0 ){
1357     /* The page is currently on the freelist.  Remove it. */
1358     if( pPg==pPg->pPager->pFirstSynced ){
1359       PgHdr *p = pPg->pNextFree;
1360       while( p && p->needSync ){ p = p->pNextFree; }
1361       pPg->pPager->pFirstSynced = p;
1362     }
1363     if( pPg->pPrevFree ){
1364       pPg->pPrevFree->pNextFree = pPg->pNextFree;
1365     }else{
1366       pPg->pPager->pFirst = pPg->pNextFree;
1367     }
1368     if( pPg->pNextFree ){
1369       pPg->pNextFree->pPrevFree = pPg->pPrevFree;
1370     }else{
1371       pPg->pPager->pLast = pPg->pPrevFree;
1372     }
1373     pPg->pPager->nRef++;
1374   }
1375   pPg->nRef++;
1376   REFINFO(pPg);
1377 }
1378 #ifdef SQLITE_TEST
1379   static void page_ref(PgHdr *pPg){
1380     if( pPg->nRef==0 ){
1381       _page_ref(pPg);
1382     }else{
1383       pPg->nRef++;
1384       REFINFO(pPg);
1385     }
1386   }
1387 #else
1388 # define page_ref(P)   ((P)->nRef==0?_page_ref(P):(void)(P)->nRef++)
1389 #endif
1390 
1391 /*
1392 ** Increment the reference count for a page.  The input pointer is
1393 ** a reference to the page data.
1394 */
1395 int sqlite3pager_ref(void *pData){
1396   PgHdr *pPg = DATA_TO_PGHDR(pData);
1397   page_ref(pPg);
1398   return SQLITE_OK;
1399 }
1400 
1401 /*
1402 ** Sync the journal.  In other words, make sure all the pages that have
1403 ** been written to the journal have actually reached the surface of the
1404 ** disk.  It is not safe to modify the original database file until after
1405 ** the journal has been synced.  If the original database is modified before
1406 ** the journal is synced and a power failure occurs, the unsynced journal
1407 ** data would be lost and we would be unable to completely rollback the
1408 ** database changes.  Database corruption would occur.
1409 **
1410 ** This routine also updates the nRec field in the header of the journal.
1411 ** (See comments on the pager_playback() routine for additional information.)
1412 ** If the sync mode is FULL, two syncs will occur.  First the whole journal
1413 ** is synced, then the nRec field is updated, then a second sync occurs.
1414 **
1415 ** For temporary databases, we do not care if we are able to rollback
1416 ** after a power failure, so sync occurs.
1417 **
1418 ** This routine clears the needSync field of every page current held in
1419 ** memory.
1420 */
1421 static int syncJournal(Pager *pPager, const char *zMaster){
1422   PgHdr *pPg;
1423   int rc = SQLITE_OK;
1424 
1425   /* Sync the journal before modifying the main database
1426   ** (assuming there is a journal and it needs to be synced.)
1427   */
1428   if( pPager->needSync || zMaster ){
1429     if( !pPager->tempFile ){
1430       assert( pPager->journalOpen );
1431       /* assert( !pPager->noSync ); // noSync might be set if synchronous
1432       ** was turned off after the transaction was started.  Ticket #615 */
1433 #ifndef NDEBUG
1434       {
1435         /* Make sure the pPager->nRec counter we are keeping agrees
1436         ** with the nRec computed from the size of the journal file.
1437         */
1438         off_t hdrSz, pgSz, jSz;
1439         hdrSz = JOURNAL_HDR_SZ(pPager);
1440         pgSz = JOURNAL_PG_SZ(pPager);
1441         rc = sqlite3OsFileSize(&pPager->jfd, &jSz);
1442         if( rc!=0 ) return rc;
1443         assert( pPager->nRec*pgSz+hdrSz==jSz );
1444       }
1445 #endif
1446       {
1447         /* Write the nRec value into the journal file header */
1448         off_t szJ;
1449         if( pPager->fullSync ){
1450           TRACE2("SYNC journal of %d\n", pPager->fd.h);
1451           rc = sqlite3OsSync(&pPager->jfd);
1452           if( rc!=0 ) return rc;
1453         }
1454         sqlite3OsSeek(&pPager->jfd, sizeof(aJournalMagic));
1455         rc = write32bits(&pPager->jfd, pPager->nRec);
1456         if( rc ) return rc;
1457 
1458         /* Write the name of the master journal file if one is specified */
1459         if( zMaster ){
1460           assert( strlen(zMaster)<pPager->nMaster );
1461           rc = sqlite3OsSeek(&pPager->jfd, 20);
1462           if( rc ) return rc;
1463           rc = sqlite3OsWrite(&pPager->jfd, zMaster, strlen(zMaster)+1);
1464           if( rc ) return rc;
1465         }
1466 
1467         szJ = JOURNAL_HDR_SZ(pPager) +  pPager->nRec*JOURNAL_PG_SZ(pPager);
1468         sqlite3OsSeek(&pPager->jfd, szJ);
1469       }
1470       TRACE2("SYNC journal of %d\n", pPager->fd.h);
1471       rc = sqlite3OsSync(&pPager->jfd);
1472       if( rc!=0 ) return rc;
1473       pPager->journalStarted = 1;
1474     }
1475     pPager->needSync = 0;
1476 
1477     /* Erase the needSync flag from every page.
1478     */
1479     for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
1480       pPg->needSync = 0;
1481     }
1482     pPager->pFirstSynced = pPager->pFirst;
1483   }
1484 
1485 #ifndef NDEBUG
1486   /* If the Pager.needSync flag is clear then the PgHdr.needSync
1487   ** flag must also be clear for all pages.  Verify that this
1488   ** invariant is true.
1489   */
1490   else{
1491     for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
1492       assert( pPg->needSync==0 );
1493     }
1494     assert( pPager->pFirstSynced==pPager->pFirst );
1495   }
1496 #endif
1497 
1498   return rc;
1499 }
1500 
1501 /*
1502 ** Given a list of pages (connected by the PgHdr.pDirty pointer) write
1503 ** every one of those pages out to the database file and mark them all
1504 ** as clean.
1505 */
1506 static int pager_write_pagelist(PgHdr *pList){
1507   Pager *pPager;
1508   int rc;
1509   int busy = 1;
1510 
1511   if( pList==0 ) return SQLITE_OK;
1512   pPager = pList->pPager;
1513 
1514   /* At this point there may be either a RESERVED or EXCLUSIVE lock on the
1515   ** database file. If there is already an EXCLUSIVE lock, the following
1516   ** calls to sqlite3OsLock() are no-ops.
1517   **
1518   ** Moving the lock from RESERVED to EXCLUSIVE actually involves going
1519   ** through an intermediate state PENDING.   A PENDING lock prevents new
1520   ** readers from attaching to the database but is unsufficient for us to
1521   ** write.  The idea of a PENDING lock is to prevent new readers from
1522   ** coming in while we wait for existing readers to clear.
1523   **
1524   ** While the pager is in the RESERVED state, the original database file
1525   ** is unchanged and we can rollback without having to playback the
1526   ** journal into the original database file.  Once we transition to
1527   ** EXCLUSIVE, it means the database file has been changed and any rollback
1528   ** will require a journal playback.
1529   */
1530   do {
1531     rc = sqlite3OsLock(&pPager->fd, EXCLUSIVE_LOCK);
1532   }while( rc==SQLITE_BUSY &&
1533       pPager->pBusyHandler &&
1534       pPager->pBusyHandler->xFunc &&
1535       pPager->pBusyHandler->xFunc(pPager->pBusyHandler->pArg, "", busy++)
1536   );
1537   if( rc!=SQLITE_OK ){
1538     return rc;
1539   }
1540   pPager->state = PAGER_EXCLUSIVE;
1541 
1542   while( pList ){
1543     assert( pList->dirty );
1544     sqlite3OsSeek(&pPager->fd, (pList->pgno-1)*(off_t)SQLITE_PAGE_SIZE);
1545     CODEC(pPager, PGHDR_TO_DATA(pList), pList->pgno, 6);
1546     TRACE2("STORE page %d\n", pList->pgno);
1547     rc = sqlite3OsWrite(&pPager->fd, PGHDR_TO_DATA(pList), SQLITE_PAGE_SIZE);
1548     CODEC(pPager, PGHDR_TO_DATA(pList), pList->pgno, 0);
1549     if( rc ) return rc;
1550     pList->dirty = 0;
1551     pList = pList->pDirty;
1552   }
1553   return SQLITE_OK;
1554 }
1555 
1556 /*
1557 ** Collect every dirty page into a dirty list and
1558 ** return a pointer to the head of that list.  All pages are
1559 ** collected even if they are still in use.
1560 */
1561 static PgHdr *pager_get_all_dirty_pages(Pager *pPager){
1562   PgHdr *p, *pList;
1563   pList = 0;
1564   for(p=pPager->pAll; p; p=p->pNextAll){
1565     if( p->dirty ){
1566       p->pDirty = pList;
1567       pList = p;
1568     }
1569   }
1570   return pList;
1571 }
1572 
1573 /*
1574 ** Acquire a page.
1575 **
1576 ** A read lock on the disk file is obtained when the first page is acquired.
1577 ** This read lock is dropped when the last page is released.
1578 **
1579 ** A _get works for any page number greater than 0.  If the database
1580 ** file is smaller than the requested page, then no actual disk
1581 ** read occurs and the memory image of the page is initialized to
1582 ** all zeros.  The extra data appended to a page is always initialized
1583 ** to zeros the first time a page is loaded into memory.
1584 **
1585 ** The acquisition might fail for several reasons.  In all cases,
1586 ** an appropriate error code is returned and *ppPage is set to NULL.
1587 **
1588 ** See also sqlite3pager_lookup().  Both this routine and _lookup() attempt
1589 ** to find a page in the in-memory cache first.  If the page is not already
1590 ** in memory, this routine goes to disk to read it in whereas _lookup()
1591 ** just returns 0.  This routine acquires a read-lock the first time it
1592 ** has to go to disk, and could also playback an old journal if necessary.
1593 ** Since _lookup() never goes to disk, it never has to deal with locks
1594 ** or journal files.
1595 */
1596 int sqlite3pager_get(Pager *pPager, Pgno pgno, void **ppPage){
1597   PgHdr *pPg;
1598   int rc;
1599 
1600   /* Make sure we have not hit any critical errors.
1601   */
1602   assert( pPager!=0 );
1603   assert( pgno!=0 );
1604   *ppPage = 0;
1605   if( pPager->errMask & ~(PAGER_ERR_FULL) ){
1606     return pager_errcode(pPager);
1607   }
1608 
1609   /* If this is the first page accessed, then get a SHARED lock
1610   ** on the database file.
1611   */
1612   if( pPager->nRef==0 && !pPager->memDb ){
1613     int busy = 1;
1614     do {
1615       rc = sqlite3OsLock(&pPager->fd, SHARED_LOCK);
1616     }while( rc==SQLITE_BUSY &&
1617         pPager->pBusyHandler &&
1618         pPager->pBusyHandler->xFunc &&
1619         pPager->pBusyHandler->xFunc(pPager->pBusyHandler->pArg, "", busy++)
1620     );
1621     if( rc!=SQLITE_OK ){
1622       return rc;
1623     }
1624     pPager->state = PAGER_SHARED;
1625 
1626     /* If a journal file exists, and there is no RESERVED lock on the
1627     ** database file, then it either needs to be played back or deleted.
1628     */
1629     if( pPager->useJournal &&
1630         sqlite3OsFileExists(pPager->zJournal) &&
1631         !sqlite3OsCheckReservedLock(&pPager->fd)
1632     ){
1633        int rc;
1634 
1635        /* Get an EXCLUSIVE lock on the database file. */
1636        rc = sqlite3OsLock(&pPager->fd, EXCLUSIVE_LOCK);
1637        if( rc!=SQLITE_OK ){
1638          sqlite3OsUnlock(&pPager->fd, NO_LOCK);
1639          pPager->state = PAGER_UNLOCK;
1640          return rc;
1641        }
1642        pPager->state = PAGER_EXCLUSIVE;
1643 
1644        /* Open the journal for reading only.  Return SQLITE_BUSY if
1645        ** we are unable to open the journal file.
1646        **
1647        ** The journal file does not need to be locked itself.  The
1648        ** journal file is never open unless the main database file holds
1649        ** a write lock, so there is never any chance of two or more
1650        ** processes opening the journal at the same time.
1651        */
1652        rc = sqlite3OsOpenReadOnly(pPager->zJournal, &pPager->jfd);
1653        if( rc!=SQLITE_OK ){
1654          sqlite3OsUnlock(&pPager->fd, NO_LOCK);
1655          pPager->state = PAGER_UNLOCK;
1656          return SQLITE_BUSY;
1657        }
1658        pPager->journalOpen = 1;
1659        pPager->journalStarted = 0;
1660 
1661        /* Playback and delete the journal.  Drop the database write
1662        ** lock and reacquire the read lock.
1663        */
1664        rc = pager_playback(pPager, 0);
1665        if( rc!=SQLITE_OK ){
1666          return rc;
1667        }
1668     }
1669     pPg = 0;
1670   }else{
1671     /* Search for page in cache */
1672     pPg = pager_lookup(pPager, pgno);
1673     if( pPager->memDb && pPager->state==PAGER_UNLOCK ){
1674       pPager->state = PAGER_SHARED;
1675     }
1676   }
1677   if( pPg==0 ){
1678     /* The requested page is not in the page cache. */
1679     int h;
1680     pPager->nMiss++;
1681     if( pPager->nPage<pPager->mxPage || pPager->pFirst==0 || pPager->memDb ){
1682       /* Create a new page */
1683       pPg = sqliteMallocRaw( sizeof(*pPg) + SQLITE_PAGE_SIZE
1684                               + sizeof(u32) + pPager->nExtra
1685                               + pPager->memDb*sizeof(PgHistory) );
1686       if( pPg==0 ){
1687         pager_unwritelock(pPager);
1688         pPager->errMask |= PAGER_ERR_MEM;
1689         return SQLITE_NOMEM;
1690       }
1691       memset(pPg, 0, sizeof(*pPg));
1692       if( pPager->memDb ){
1693         memset(PGHDR_TO_HIST(pPg, pPager), 0, sizeof(PgHistory));
1694       }
1695       pPg->pPager = pPager;
1696       pPg->pNextAll = pPager->pAll;
1697       pPager->pAll = pPg;
1698       pPager->nPage++;
1699     }else{
1700       /* Find a page to recycle.  Try to locate a page that does not
1701       ** require us to do an fsync() on the journal.
1702       */
1703       pPg = pPager->pFirstSynced;
1704 
1705       /* If we could not find a page that does not require an fsync()
1706       ** on the journal file then fsync the journal file.  This is a
1707       ** very slow operation, so we work hard to avoid it.  But sometimes
1708       ** it can't be helped.
1709       */
1710       if( pPg==0 ){
1711         int rc = syncJournal(pPager, 0);
1712         if( rc!=0 ){
1713           sqlite3pager_rollback(pPager);
1714           return SQLITE_IOERR;
1715         }
1716         pPg = pPager->pFirst;
1717       }
1718       assert( pPg->nRef==0 );
1719 
1720       /* Write the page to the database file if it is dirty.
1721       */
1722       if( pPg->dirty ){
1723         assert( pPg->needSync==0 );
1724         pPg->pDirty = 0;
1725         rc = pager_write_pagelist( pPg );
1726         if( rc!=SQLITE_OK ){
1727           sqlite3pager_rollback(pPager);
1728           return SQLITE_IOERR;
1729         }
1730       }
1731       assert( pPg->dirty==0 );
1732 
1733       /* If the page we are recycling is marked as alwaysRollback, then
1734       ** set the global alwaysRollback flag, thus disabling the
1735       ** sqlite_dont_rollback() optimization for the rest of this transaction.
1736       ** It is necessary to do this because the page marked alwaysRollback
1737       ** might be reloaded at a later time but at that point we won't remember
1738       ** that is was marked alwaysRollback.  This means that all pages must
1739       ** be marked as alwaysRollback from here on out.
1740       */
1741       if( pPg->alwaysRollback ){
1742         pPager->alwaysRollback = 1;
1743       }
1744 
1745       /* Unlink the old page from the free list and the hash table
1746       */
1747       unlinkPage(pPg);
1748       pPager->nOvfl++;
1749     }
1750     pPg->pgno = pgno;
1751     if( pPager->aInJournal && (int)pgno<=pPager->origDbSize ){
1752       sqlite3CheckMemory(pPager->aInJournal, pgno/8);
1753       assert( pPager->journalOpen );
1754       pPg->inJournal = (pPager->aInJournal[pgno/8] & (1<<(pgno&7)))!=0;
1755       pPg->needSync = 0;
1756     }else{
1757       pPg->inJournal = 0;
1758       pPg->needSync = 0;
1759     }
1760     if( pPager->aInStmt && (int)pgno<=pPager->stmtSize
1761              && (pPager->aInStmt[pgno/8] & (1<<(pgno&7)))!=0 ){
1762       page_add_to_stmt_list(pPg);
1763     }else{
1764       page_remove_from_stmt_list(pPg);
1765     }
1766     pPg->dirty = 0;
1767     pPg->nRef = 1;
1768     REFINFO(pPg);
1769     pPager->nRef++;
1770     h = pager_hash(pgno);
1771     pPg->pNextHash = pPager->aHash[h];
1772     pPager->aHash[h] = pPg;
1773     if( pPg->pNextHash ){
1774       assert( pPg->pNextHash->pPrevHash==0 );
1775       pPg->pNextHash->pPrevHash = pPg;
1776     }
1777     if( pPager->nExtra>0 ){
1778       memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
1779     }
1780     if( pPager->dbSize<0 ) sqlite3pager_pagecount(pPager);
1781     if( pPager->errMask!=0 ){
1782       sqlite3pager_unref(PGHDR_TO_DATA(pPg));
1783       rc = pager_errcode(pPager);
1784       return rc;
1785     }
1786     if( pPager->dbSize<(int)pgno ){
1787       memset(PGHDR_TO_DATA(pPg), 0, SQLITE_PAGE_SIZE);
1788     }else{
1789       int rc;
1790       assert( pPager->memDb==0 );
1791       sqlite3OsSeek(&pPager->fd, (pgno-1)*(off_t)SQLITE_PAGE_SIZE);
1792       rc = sqlite3OsRead(&pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
1793       TRACE2("FETCH page %d\n", pPg->pgno);
1794       CODEC(pPager, PGHDR_TO_DATA(pPg), pPg->pgno, 3);
1795       if( rc!=SQLITE_OK ){
1796         off_t fileSize;
1797         if( sqlite3OsFileSize(&pPager->fd,&fileSize)!=SQLITE_OK
1798                || fileSize>=pgno*SQLITE_PAGE_SIZE ){
1799           sqlite3pager_unref(PGHDR_TO_DATA(pPg));
1800           return rc;
1801         }else{
1802           memset(PGHDR_TO_DATA(pPg), 0, SQLITE_PAGE_SIZE);
1803         }
1804       }
1805     }
1806   }else{
1807     /* The requested page is in the page cache. */
1808     pPager->nHit++;
1809     page_ref(pPg);
1810   }
1811   *ppPage = PGHDR_TO_DATA(pPg);
1812   return SQLITE_OK;
1813 }
1814 
1815 /*
1816 ** Acquire a page if it is already in the in-memory cache.  Do
1817 ** not read the page from disk.  Return a pointer to the page,
1818 ** or 0 if the page is not in cache.
1819 **
1820 ** See also sqlite3pager_get().  The difference between this routine
1821 ** and sqlite3pager_get() is that _get() will go to the disk and read
1822 ** in the page if the page is not already in cache.  This routine
1823 ** returns NULL if the page is not in cache or if a disk I/O error
1824 ** has ever happened.
1825 */
1826 void *sqlite3pager_lookup(Pager *pPager, Pgno pgno){
1827   PgHdr *pPg;
1828 
1829   assert( pPager!=0 );
1830   assert( pgno!=0 );
1831   if( pPager->errMask & ~(PAGER_ERR_FULL) ){
1832     return 0;
1833   }
1834   pPg = pager_lookup(pPager, pgno);
1835   if( pPg==0 ) return 0;
1836   page_ref(pPg);
1837   return PGHDR_TO_DATA(pPg);
1838 }
1839 
1840 /*
1841 ** Release a page.
1842 **
1843 ** If the number of references to the page drop to zero, then the
1844 ** page is added to the LRU list.  When all references to all pages
1845 ** are released, a rollback occurs and the lock on the database is
1846 ** removed.
1847 */
1848 int sqlite3pager_unref(void *pData){
1849   PgHdr *pPg;
1850 
1851   /* Decrement the reference count for this page
1852   */
1853   pPg = DATA_TO_PGHDR(pData);
1854   assert( pPg->nRef>0 );
1855   pPg->nRef--;
1856   REFINFO(pPg);
1857 
1858   /* When the number of references to a page reach 0, call the
1859   ** destructor and add the page to the freelist.
1860   */
1861   if( pPg->nRef==0 ){
1862     Pager *pPager;
1863     pPager = pPg->pPager;
1864     pPg->pNextFree = 0;
1865     pPg->pPrevFree = pPager->pLast;
1866     pPager->pLast = pPg;
1867     if( pPg->pPrevFree ){
1868       pPg->pPrevFree->pNextFree = pPg;
1869     }else{
1870       pPager->pFirst = pPg;
1871     }
1872     if( pPg->needSync==0 && pPager->pFirstSynced==0 ){
1873       pPager->pFirstSynced = pPg;
1874     }
1875     if( pPager->xDestructor ){
1876       pPager->xDestructor(pData, pPager->pageSize);
1877     }
1878 
1879     /* When all pages reach the freelist, drop the read lock from
1880     ** the database file.
1881     */
1882     pPager->nRef--;
1883     assert( pPager->nRef>=0 );
1884     if( pPager->nRef==0 && !pPager->memDb ){
1885       pager_reset(pPager);
1886     }
1887   }
1888   return SQLITE_OK;
1889 }
1890 
1891 /*
1892 ** Create a journal file for pPager.  There should already be a RESERVED
1893 ** or EXCLUSIVE lock on the database file when this routine is called.
1894 **
1895 ** Return SQLITE_OK if everything.  Return an error code and release the
1896 ** write lock if anything goes wrong.
1897 */
1898 static int pager_open_journal(Pager *pPager){
1899   int rc;
1900   assert( pPager->state>=PAGER_RESERVED );
1901   assert( pPager->journalOpen==0 );
1902   assert( pPager->useJournal );
1903   sqlite3pager_pagecount(pPager);
1904   pPager->aInJournal = sqliteMalloc( pPager->dbSize/8 + 1 );
1905   if( pPager->aInJournal==0 ){
1906     sqlite3OsUnlock(&pPager->fd, SHARED_LOCK);
1907     pPager->state = PAGER_SHARED;
1908     return SQLITE_NOMEM;
1909   }
1910   rc = sqlite3OsOpenExclusive(pPager->zJournal, &pPager->jfd,pPager->tempFile);
1911   if( rc!=SQLITE_OK ){
1912     sqliteFree(pPager->aInJournal);
1913     pPager->aInJournal = 0;
1914     sqlite3OsUnlock(&pPager->fd, SHARED_LOCK);
1915     pPager->state = PAGER_SHARED;
1916     return SQLITE_CANTOPEN;
1917   }
1918   sqlite3OsOpenDirectory(pPager->zDirectory, &pPager->jfd);
1919   pPager->journalOpen = 1;
1920   pPager->journalStarted = 0;
1921   pPager->needSync = 0;
1922   pPager->alwaysRollback = 0;
1923   pPager->nRec = 0;
1924   if( pPager->errMask!=0 ){
1925     rc = pager_errcode(pPager);
1926     return rc;
1927   }
1928   pPager->origDbSize = pPager->dbSize;
1929 
1930   /* Create the header for the journal:
1931   ** - 8 bytes: Magic identifying journal format.
1932   ** - 4 bytes: Number of records in journal, or -1 no-sync mode is on.
1933   ** - 4 bytes: Magic used for page checksums.
1934   ** - 4 bytes: Initial database page count.
1935   ** - 4 bytes: Number of bytes reserved for master journal ptr (nMaster)
1936   ** - nMaster bytes: Space for a master journal pointer.
1937   */
1938   rc = sqlite3OsWrite(&pPager->jfd, aJournalMagic, sizeof(aJournalMagic));
1939   if( rc==SQLITE_OK ){
1940     rc = write32bits(&pPager->jfd, pPager->noSync ? 0xffffffff : 0);
1941   }
1942   if( rc==SQLITE_OK ){
1943     sqlite3Randomness(sizeof(pPager->cksumInit), &pPager->cksumInit);
1944     rc = write32bits(&pPager->jfd, pPager->cksumInit);
1945   }
1946   if( rc==SQLITE_OK ){
1947     rc = write32bits(&pPager->jfd, pPager->dbSize);
1948   }
1949   if( rc==SQLITE_OK ){
1950     rc = write32bits(&pPager->jfd, pPager->nMaster);
1951   }
1952   if( rc==SQLITE_OK ){
1953     sqlite3OsSeek(&pPager->jfd, 24 + pPager->nMaster - 1);
1954     rc = sqlite3OsWrite(&pPager->jfd, "\000", 1);
1955   }
1956   if( pPager->stmtAutoopen && rc==SQLITE_OK ){
1957     rc = sqlite3pager_stmt_begin(pPager);
1958   }
1959   if( rc!=SQLITE_OK ){
1960     rc = pager_unwritelock(pPager);
1961     if( rc==SQLITE_OK ){
1962       rc = SQLITE_FULL;
1963     }
1964   }
1965   return rc;
1966 }
1967 
1968 /*
1969 ** Acquire a write-lock on the database.  The lock is removed when
1970 ** the any of the following happen:
1971 **
1972 **   *  sqlite3pager_commit() is called.
1973 **   *  sqlite3pager_rollback() is called.
1974 **   *  sqlite3pager_close() is called.
1975 **   *  sqlite3pager_unref() is called to on every outstanding page.
1976 **
1977 ** The first parameter to this routine is a pointer to any open page of the
1978 ** database file.  Nothing changes about the page - it is used merely to
1979 ** acquire a pointer to the Pager structure and as proof that there is
1980 ** already a read-lock on the database.
1981 **
1982 ** The second parameter indicates how much space in bytes to reserve for a
1983 ** master journal file-name at the start of the journal when it is created.
1984 **
1985 ** A journal file is opened if this is not a temporary file.  For temporary
1986 ** files, the opening of the journal file is deferred until there is an
1987 ** actual need to write to the journal.
1988 **
1989 ** If the database is already reserved for writing, this routine is a no-op.
1990 */
1991 int sqlite3pager_begin(void *pData, int nMaster){
1992   PgHdr *pPg = DATA_TO_PGHDR(pData);
1993   Pager *pPager = pPg->pPager;
1994   int rc = SQLITE_OK;
1995   assert( pPg->nRef>0 );
1996   assert( nMaster>=0 );
1997   assert( pPager->state!=PAGER_UNLOCK );
1998   if( pPager->state==PAGER_SHARED ){
1999     assert( pPager->aInJournal==0 );
2000     if( pPager->memDb ){
2001       pPager->state = PAGER_EXCLUSIVE;
2002       pPager->origDbSize = pPager->dbSize;
2003     }else{
2004       int busy = 1;
2005       do {
2006         rc = sqlite3OsLock(&pPager->fd, RESERVED_LOCK);
2007       }while( rc==SQLITE_BUSY &&
2008           pPager->pBusyHandler &&
2009           pPager->pBusyHandler->xFunc &&
2010           pPager->pBusyHandler->xFunc(pPager->pBusyHandler->pArg, "", busy++)
2011       );
2012       if( rc!=SQLITE_OK ){
2013         return rc;
2014       }
2015       pPager->nMaster = nMaster;
2016       pPager->state = PAGER_RESERVED;
2017       pPager->dirtyCache = 0;
2018       TRACE3("TRANSACTION %d nMaster=%d\n", pPager->fd.h, nMaster);
2019       if( pPager->useJournal && !pPager->tempFile ){
2020         rc = pager_open_journal(pPager);
2021       }
2022     }
2023   }
2024   return rc;
2025 }
2026 
2027 /*
2028 ** Mark a data page as writeable.  The page is written into the journal
2029 ** if it is not there already.  This routine must be called before making
2030 ** changes to a page.
2031 **
2032 ** The first time this routine is called, the pager creates a new
2033 ** journal and acquires a RESERVED lock on the database.  If the RESERVED
2034 ** lock could not be acquired, this routine returns SQLITE_BUSY.  The
2035 ** calling routine must check for that return value and be careful not to
2036 ** change any page data until this routine returns SQLITE_OK.
2037 **
2038 ** If the journal file could not be written because the disk is full,
2039 ** then this routine returns SQLITE_FULL and does an immediate rollback.
2040 ** All subsequent write attempts also return SQLITE_FULL until there
2041 ** is a call to sqlite3pager_commit() or sqlite3pager_rollback() to
2042 ** reset.
2043 */
2044 int sqlite3pager_write(void *pData){
2045   PgHdr *pPg = DATA_TO_PGHDR(pData);
2046   Pager *pPager = pPg->pPager;
2047   int rc = SQLITE_OK;
2048 
2049   /* Check for errors
2050   */
2051   if( pPager->errMask ){
2052     return pager_errcode(pPager);
2053   }
2054   if( pPager->readOnly ){
2055     return SQLITE_PERM;
2056   }
2057 
2058   /* Mark the page as dirty.  If the page has already been written
2059   ** to the journal then we can return right away.
2060   */
2061   pPg->dirty = 1;
2062   if( pPg->inJournal && (pPg->inStmt || pPager->stmtInUse==0) ){
2063     pPager->dirtyCache = 1;
2064     return SQLITE_OK;
2065   }
2066 
2067   /* If we get this far, it means that the page needs to be
2068   ** written to the transaction journal or the ckeckpoint journal
2069   ** or both.
2070   **
2071   ** First check to see that the transaction journal exists and
2072   ** create it if it does not.
2073   */
2074   assert( pPager->state!=PAGER_UNLOCK );
2075   rc = sqlite3pager_begin(pData, 0);
2076   if( rc!=SQLITE_OK ){
2077     return rc;
2078   }
2079   assert( pPager->state>=PAGER_RESERVED );
2080   if( !pPager->journalOpen && pPager->useJournal ){
2081     rc = pager_open_journal(pPager);
2082     if( rc!=SQLITE_OK ) return rc;
2083   }
2084   assert( pPager->journalOpen || !pPager->useJournal );
2085   pPager->dirtyCache = 1;
2086 
2087   /* The transaction journal now exists and we have a RESERVED or an
2088   ** EXCLUSIVE lock on the main database file.  Write the current page to
2089   ** the transaction journal if it is not there already.
2090   */
2091   if( !pPg->inJournal && (pPager->useJournal || pPager->memDb) ){
2092     if( (int)pPg->pgno <= pPager->origDbSize ){
2093       int szPg;
2094       u32 saved;
2095       if( pPager->memDb ){
2096         PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
2097         TRACE2("JOURNAL page %d\n", pPg->pgno);
2098         assert( pHist->pOrig==0 );
2099         pHist->pOrig = sqliteMallocRaw( pPager->pageSize );
2100         if( pHist->pOrig ){
2101           memcpy(pHist->pOrig, PGHDR_TO_DATA(pPg), pPager->pageSize);
2102         }
2103         pPg->inJournal = 1;
2104       }else{
2105         u32 cksum = pager_cksum(pPager, pPg->pgno, pData);
2106         saved = *(u32*)PGHDR_TO_EXTRA(pPg);
2107         store32bits(cksum, pPg, SQLITE_PAGE_SIZE);
2108         szPg = SQLITE_PAGE_SIZE+8;
2109         store32bits(pPg->pgno, pPg, -4);
2110         CODEC(pPager, pData, pPg->pgno, 7);
2111         rc = sqlite3OsWrite(&pPager->jfd, &((char*)pData)[-4], szPg);
2112         TRACE3("JOURNAL page %d needSync=%d\n", pPg->pgno, pPg->needSync);
2113         CODEC(pPager, pData, pPg->pgno, 0);
2114         *(u32*)PGHDR_TO_EXTRA(pPg) = saved;
2115         if( rc!=SQLITE_OK ){
2116           sqlite3pager_rollback(pPager);
2117           pPager->errMask |= PAGER_ERR_FULL;
2118           return rc;
2119         }
2120         pPager->nRec++;
2121         assert( pPager->aInJournal!=0 );
2122         pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
2123         pPg->needSync = !pPager->noSync;
2124         pPg->inJournal = 1;
2125         if( pPager->stmtInUse ){
2126           pPager->aInStmt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
2127           page_add_to_stmt_list(pPg);
2128         }
2129       }
2130     }else{
2131       pPg->needSync = !pPager->journalStarted && !pPager->noSync;
2132       TRACE3("APPEND page %d needSync=%d\n", pPg->pgno, pPg->needSync);
2133     }
2134     if( pPg->needSync ){
2135       pPager->needSync = 1;
2136     }
2137   }
2138 
2139   /* If the statement journal is open and the page is not in it,
2140   ** then write the current page to the statement journal.  Note that
2141   ** the statement journal format differs from the standard journal format
2142   ** in that it omits the checksums and the header.
2143   */
2144   if( pPager->stmtInUse && !pPg->inStmt && (int)pPg->pgno<=pPager->stmtSize ){
2145     assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize );
2146     if( pPager->memDb ){
2147       PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
2148       assert( pHist->pStmt==0 );
2149       pHist->pStmt = sqliteMallocRaw( pPager->pageSize );
2150       if( pHist->pStmt ){
2151         memcpy(pHist->pStmt, PGHDR_TO_DATA(pPg), pPager->pageSize);
2152       }
2153       TRACE2("STMT-JOURNAL page %d\n", pPg->pgno);
2154     }else{
2155       store32bits(pPg->pgno, pPg, -4);
2156       CODEC(pPager, pData, pPg->pgno, 7);
2157       rc = sqlite3OsWrite(&pPager->stfd, ((char*)pData)-4, SQLITE_PAGE_SIZE+4);
2158       TRACE2("STMT-JOURNAL page %d\n", pPg->pgno);
2159       CODEC(pPager, pData, pPg->pgno, 0);
2160       if( rc!=SQLITE_OK ){
2161         sqlite3pager_rollback(pPager);
2162         pPager->errMask |= PAGER_ERR_FULL;
2163         return rc;
2164       }
2165       pPager->stmtNRec++;
2166       assert( pPager->aInStmt!=0 );
2167       pPager->aInStmt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
2168     }
2169     page_add_to_stmt_list(pPg);
2170   }
2171 
2172   /* Update the database size and return.
2173   */
2174   if( pPager->dbSize<(int)pPg->pgno ){
2175     pPager->dbSize = pPg->pgno;
2176   }
2177   return rc;
2178 }
2179 
2180 /*
2181 ** Return TRUE if the page given in the argument was previously passed
2182 ** to sqlite3pager_write().  In other words, return TRUE if it is ok
2183 ** to change the content of the page.
2184 */
2185 int sqlite3pager_iswriteable(void *pData){
2186   PgHdr *pPg = DATA_TO_PGHDR(pData);
2187   return pPg->dirty;
2188 }
2189 
2190 /*
2191 ** Replace the content of a single page with the information in the third
2192 ** argument.
2193 */
2194 int sqlite3pager_overwrite(Pager *pPager, Pgno pgno, void *pData){
2195   void *pPage;
2196   int rc;
2197 
2198   rc = sqlite3pager_get(pPager, pgno, &pPage);
2199   if( rc==SQLITE_OK ){
2200     rc = sqlite3pager_write(pPage);
2201     if( rc==SQLITE_OK ){
2202       memcpy(pPage, pData, SQLITE_PAGE_SIZE);
2203     }
2204     sqlite3pager_unref(pPage);
2205   }
2206   return rc;
2207 }
2208 
2209 /*
2210 ** A call to this routine tells the pager that it is not necessary to
2211 ** write the information on page "pgno" back to the disk, even though
2212 ** that page might be marked as dirty.
2213 **
2214 ** The overlying software layer calls this routine when all of the data
2215 ** on the given page is unused.  The pager marks the page as clean so
2216 ** that it does not get written to disk.
2217 **
2218 ** Tests show that this optimization, together with the
2219 ** sqlite3pager_dont_rollback() below, more than double the speed
2220 ** of large INSERT operations and quadruple the speed of large DELETEs.
2221 **
2222 ** When this routine is called, set the alwaysRollback flag to true.
2223 ** Subsequent calls to sqlite3pager_dont_rollback() for the same page
2224 ** will thereafter be ignored.  This is necessary to avoid a problem
2225 ** where a page with data is added to the freelist during one part of
2226 ** a transaction then removed from the freelist during a later part
2227 ** of the same transaction and reused for some other purpose.  When it
2228 ** is first added to the freelist, this routine is called.  When reused,
2229 ** the dont_rollback() routine is called.  But because the page contains
2230 ** critical data, we still need to be sure it gets rolled back in spite
2231 ** of the dont_rollback() call.
2232 */
2233 void sqlite3pager_dont_write(Pager *pPager, Pgno pgno){
2234   PgHdr *pPg;
2235 
2236   pPg = pager_lookup(pPager, pgno);
2237   pPg->alwaysRollback = 1;
2238   if( pPg && pPg->dirty ){
2239     if( pPager->dbSize==(int)pPg->pgno && pPager->origDbSize<pPager->dbSize ){
2240       /* If this pages is the last page in the file and the file has grown
2241       ** during the current transaction, then do NOT mark the page as clean.
2242       ** When the database file grows, we must make sure that the last page
2243       ** gets written at least once so that the disk file will be the correct
2244       ** size. If you do not write this page and the size of the file
2245       ** on the disk ends up being too small, that can lead to database
2246       ** corruption during the next transaction.
2247       */
2248     }else{
2249       TRACE3("DONT_WRITE page %d of %d\n", pgno, pPager->fd.h);
2250       pPg->dirty = 0;
2251     }
2252   }
2253 }
2254 
2255 /*
2256 ** A call to this routine tells the pager that if a rollback occurs,
2257 ** it is not necessary to restore the data on the given page.  This
2258 ** means that the pager does not have to record the given page in the
2259 ** rollback journal.
2260 */
2261 void sqlite3pager_dont_rollback(void *pData){
2262   PgHdr *pPg = DATA_TO_PGHDR(pData);
2263   Pager *pPager = pPg->pPager;
2264 
2265   if( pPager->state!=PAGER_EXCLUSIVE || pPager->journalOpen==0 ) return;
2266   if( pPg->alwaysRollback || pPager->alwaysRollback || pPager->memDb ) return;
2267   if( !pPg->inJournal && (int)pPg->pgno <= pPager->origDbSize ){
2268     assert( pPager->aInJournal!=0 );
2269     pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
2270     pPg->inJournal = 1;
2271     if( pPager->stmtInUse ){
2272       pPager->aInStmt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
2273       page_add_to_stmt_list(pPg);
2274     }
2275     TRACE3("DONT_ROLLBACK page %d of %d\n", pPg->pgno, pPager->fd.h);
2276   }
2277   if( pPager->stmtInUse && !pPg->inStmt && (int)pPg->pgno<=pPager->stmtSize ){
2278     assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize );
2279     assert( pPager->aInStmt!=0 );
2280     pPager->aInStmt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
2281     page_add_to_stmt_list(pPg);
2282   }
2283 }
2284 
2285 
2286 /*
2287 ** Clear a PgHistory block
2288 */
2289 static void clearHistory(PgHistory *pHist){
2290   sqliteFree(pHist->pOrig);
2291   sqliteFree(pHist->pStmt);
2292   pHist->pOrig = 0;
2293   pHist->pStmt = 0;
2294 }
2295 
2296 /*
2297 ** Commit all changes to the database and release the write lock.
2298 **
2299 ** If the commit fails for any reason, a rollback attempt is made
2300 ** and an error code is returned.  If the commit worked, SQLITE_OK
2301 ** is returned.
2302 */
2303 int sqlite3pager_commit(Pager *pPager){
2304   int rc;
2305   PgHdr *pPg;
2306 
2307   if( pPager->errMask==PAGER_ERR_FULL ){
2308     rc = sqlite3pager_rollback(pPager);
2309     if( rc==SQLITE_OK ){
2310       rc = SQLITE_FULL;
2311     }
2312     return rc;
2313   }
2314   if( pPager->errMask!=0 ){
2315     rc = pager_errcode(pPager);
2316     return rc;
2317   }
2318   if( pPager->state<PAGER_RESERVED ){
2319     return SQLITE_ERROR;
2320   }
2321   TRACE2("COMMIT %d\n", pPager->fd.h);
2322   if( pPager->memDb ){
2323     pPg = pager_get_all_dirty_pages(pPager);
2324     while( pPg ){
2325       clearHistory(PGHDR_TO_HIST(pPg, pPager));
2326       pPg->dirty = 0;
2327       pPg->inJournal = 0;
2328       pPg->inStmt = 0;
2329       pPg->pPrevStmt = pPg->pNextStmt = 0;
2330       pPg = pPg->pDirty;
2331     }
2332     pPager->pStmt = 0;
2333     pPager->state = PAGER_SHARED;
2334     return SQLITE_OK;
2335   }
2336   if( pPager->dirtyCache==0 ){
2337     /* Exit early (without doing the time-consuming sqlite3OsSync() calls)
2338     ** if there have been no changes to the database file. */
2339     assert( pPager->needSync==0 );
2340     rc = pager_unwritelock(pPager);
2341     pPager->dbSize = -1;
2342     return rc;
2343   }
2344   assert( pPager->journalOpen );
2345 #if 0
2346   rc = syncJournal(pPager, 0);
2347   if( rc!=SQLITE_OK ){
2348     goto commit_abort;
2349   }
2350   pPg = pager_get_all_dirty_pages(pPager);
2351   if( pPg ){
2352     rc = pager_write_pagelist(pPg);
2353     if( rc || (!pPager->noSync && sqlite3OsSync(&pPager->fd)!=SQLITE_OK) ){
2354       goto commit_abort;
2355     }
2356   }
2357 #endif
2358   rc = sqlite3pager_sync(pPager, 0);
2359   if( rc!=SQLITE_OK ){
2360     goto commit_abort;
2361   }
2362   rc = pager_unwritelock(pPager);
2363   pPager->dbSize = -1;
2364   return rc;
2365 
2366   /* Jump here if anything goes wrong during the commit process.
2367   */
2368 commit_abort:
2369   rc = sqlite3pager_rollback(pPager);
2370   if( rc==SQLITE_OK ){
2371     rc = SQLITE_FULL;
2372   }
2373   return rc;
2374 }
2375 
2376 /*
2377 ** Rollback all changes.  The database falls back to PAGER_SHARED mode.
2378 ** All in-memory cache pages revert to their original data contents.
2379 ** The journal is deleted.
2380 **
2381 ** This routine cannot fail unless some other process is not following
2382 ** the correct locking protocol (SQLITE_PROTOCOL) or unless some other
2383 ** process is writing trash into the journal file (SQLITE_CORRUPT) or
2384 ** unless a prior malloc() failed (SQLITE_NOMEM).  Appropriate error
2385 ** codes are returned for all these occasions.  Otherwise,
2386 ** SQLITE_OK is returned.
2387 */
2388 int sqlite3pager_rollback(Pager *pPager){
2389   int rc;
2390   TRACE2("ROLLBACK %d\n", pPager->fd.h);
2391   if( pPager->memDb ){
2392     PgHdr *p;
2393     for(p=pPager->pAll; p; p=p->pNextAll){
2394       PgHistory *pHist;
2395       if( !p->dirty ) continue;
2396       pHist = PGHDR_TO_HIST(p, pPager);
2397       if( pHist->pOrig ){
2398         memcpy(PGHDR_TO_DATA(p), pHist->pOrig, pPager->pageSize);
2399         TRACE2("ROLLBACK-PAGE %d\n", p->pgno);
2400       }else{
2401         TRACE2("PAGE %d is clean\n", p->pgno);
2402       }
2403       clearHistory(pHist);
2404       p->dirty = 0;
2405       p->inJournal = 0;
2406       p->inStmt = 0;
2407       p->pPrevStmt = p->pNextStmt = 0;
2408     }
2409     pPager->pStmt = 0;
2410     pPager->dbSize = pPager->origDbSize;
2411     memoryTruncate(pPager);
2412     pPager->stmtInUse = 0;
2413     pPager->state = PAGER_SHARED;
2414     return SQLITE_OK;
2415   }
2416 
2417   if( !pPager->dirtyCache || !pPager->journalOpen ){
2418     rc = pager_unwritelock(pPager);
2419     pPager->dbSize = -1;
2420     return rc;
2421   }
2422 
2423   if( pPager->errMask!=0 && pPager->errMask!=PAGER_ERR_FULL ){
2424     if( pPager->state>=PAGER_EXCLUSIVE ){
2425       pager_playback(pPager, 1);
2426     }
2427     return pager_errcode(pPager);
2428   }
2429   if( pPager->state==PAGER_RESERVED ){
2430     int rc2;
2431     rc = pager_reload_cache(pPager);
2432     rc2 = pager_unwritelock(pPager);
2433     if( rc==SQLITE_OK ){
2434       rc = rc2;
2435     }
2436   }else{
2437     rc = pager_playback(pPager, 1);
2438   }
2439   if( rc!=SQLITE_OK ){
2440     rc = SQLITE_CORRUPT;
2441     pPager->errMask |= PAGER_ERR_CORRUPT;
2442   }
2443   pPager->dbSize = -1;
2444   return rc;
2445 }
2446 
2447 /*
2448 ** Return TRUE if the database file is opened read-only.  Return FALSE
2449 ** if the database is (in theory) writable.
2450 */
2451 int sqlite3pager_isreadonly(Pager *pPager){
2452   return pPager->readOnly;
2453 }
2454 
2455 /*
2456 ** This routine is used for testing and analysis only.
2457 */
2458 int *sqlite3pager_stats(Pager *pPager){
2459   static int a[9];
2460   a[0] = pPager->nRef;
2461   a[1] = pPager->nPage;
2462   a[2] = pPager->mxPage;
2463   a[3] = pPager->dbSize;
2464   a[4] = pPager->state;
2465   a[5] = pPager->errMask;
2466   a[6] = pPager->nHit;
2467   a[7] = pPager->nMiss;
2468   a[8] = pPager->nOvfl;
2469   return a;
2470 }
2471 
2472 /*
2473 ** Set the statement rollback point.
2474 **
2475 ** This routine should be called with the transaction journal already
2476 ** open.  A new statement journal is created that can be used to rollback
2477 ** changes of a single SQL command within a larger transaction.
2478 */
2479 int sqlite3pager_stmt_begin(Pager *pPager){
2480   int rc;
2481   char zTemp[SQLITE_TEMPNAME_SIZE];
2482   assert( !pPager->stmtInUse );
2483   TRACE2("STMT-BEGIN %d\n", pPager->fd.h);
2484   if( pPager->memDb ){
2485     pPager->stmtInUse = 1;
2486     pPager->stmtSize = pPager->dbSize;
2487     return SQLITE_OK;
2488   }
2489   if( !pPager->journalOpen ){
2490     pPager->stmtAutoopen = 1;
2491     return SQLITE_OK;
2492   }
2493   assert( pPager->journalOpen );
2494   pPager->aInStmt = sqliteMalloc( pPager->dbSize/8 + 1 );
2495   if( pPager->aInStmt==0 ){
2496     sqlite3OsLock(&pPager->fd, SHARED_LOCK);
2497     return SQLITE_NOMEM;
2498   }
2499 #ifndef NDEBUG
2500   rc = sqlite3OsFileSize(&pPager->jfd, &pPager->stmtJSize);
2501   if( rc ) goto stmt_begin_failed;
2502   assert( pPager->stmtJSize ==
2503     pPager->nRec*JOURNAL_PG_SZ(pPager) + JOURNAL_HDR_SZ(pPager) );
2504 #endif
2505   pPager->stmtJSize =
2506      pPager->nRec*JOURNAL_PG_SZ(pPager) + JOURNAL_HDR_SZ(pPager);
2507   pPager->stmtSize = pPager->dbSize;
2508   if( !pPager->stmtOpen ){
2509     rc = sqlite3pager_opentemp(zTemp, &pPager->stfd);
2510     if( rc ) goto stmt_begin_failed;
2511     pPager->stmtOpen = 1;
2512     pPager->stmtNRec = 0;
2513   }
2514   pPager->stmtInUse = 1;
2515   return SQLITE_OK;
2516 
2517 stmt_begin_failed:
2518   if( pPager->aInStmt ){
2519     sqliteFree(pPager->aInStmt);
2520     pPager->aInStmt = 0;
2521   }
2522   return rc;
2523 }
2524 
2525 /*
2526 ** Commit a statement.
2527 */
2528 int sqlite3pager_stmt_commit(Pager *pPager){
2529   if( pPager->stmtInUse ){
2530     PgHdr *pPg, *pNext;
2531     TRACE2("STMT-COMMIT %d\n", pPager->fd.h);
2532     if( !pPager->memDb ){
2533       sqlite3OsSeek(&pPager->stfd, 0);
2534       /* sqlite3OsTruncate(&pPager->stfd, 0); */
2535       sqliteFree( pPager->aInStmt );
2536       pPager->aInStmt = 0;
2537     }
2538     for(pPg=pPager->pStmt; pPg; pPg=pNext){
2539       pNext = pPg->pNextStmt;
2540       assert( pPg->inStmt );
2541       pPg->inStmt = 0;
2542       pPg->pPrevStmt = pPg->pNextStmt = 0;
2543       if( pPager->memDb ){
2544         PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
2545         sqliteFree(pHist->pStmt);
2546         pHist->pStmt = 0;
2547       }
2548     }
2549     pPager->stmtNRec = 0;
2550     pPager->stmtInUse = 0;
2551     pPager->pStmt = 0;
2552   }
2553   pPager->stmtAutoopen = 0;
2554   return SQLITE_OK;
2555 }
2556 
2557 /*
2558 ** Rollback a statement.
2559 */
2560 int sqlite3pager_stmt_rollback(Pager *pPager){
2561   int rc;
2562   if( pPager->stmtInUse ){
2563     TRACE2("STMT-ROLLBACK %d\n", pPager->fd.h);
2564     if( pPager->memDb ){
2565       PgHdr *pPg;
2566       for(pPg=pPager->pStmt; pPg; pPg=pPg->pNextStmt){
2567         PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
2568         if( pHist->pStmt ){
2569           memcpy(PGHDR_TO_DATA(pPg), pHist->pStmt, pPager->pageSize);
2570           sqliteFree(pHist->pStmt);
2571           pHist->pStmt = 0;
2572         }
2573       }
2574       pPager->dbSize = pPager->stmtSize;
2575       memoryTruncate(pPager);
2576       rc = SQLITE_OK;
2577     }else{
2578       rc = pager_stmt_playback(pPager);
2579     }
2580     sqlite3pager_stmt_commit(pPager);
2581   }else{
2582     rc = SQLITE_OK;
2583   }
2584   pPager->stmtAutoopen = 0;
2585   return rc;
2586 }
2587 
2588 /*
2589 ** Return the full pathname of the database file.
2590 */
2591 const char *sqlite3pager_filename(Pager *pPager){
2592   return pPager->zFilename;
2593 }
2594 
2595 /*
2596 ** Set the codec for this pager
2597 */
2598 void sqlite3pager_set_codec(
2599   Pager *pPager,
2600   void (*xCodec)(void*,void*,Pgno,int),
2601   void *pCodecArg
2602 ){
2603   pPager->xCodec = xCodec;
2604   pPager->pCodecArg = pCodecArg;
2605 }
2606 
2607 /*
2608 ** Sync the database file for the pager pPager. zMaster points to the name
2609 ** of a master journal file that should be written into the individual
2610 ** journal file. zMaster may be NULL, which is interpreted as no master
2611 ** journal (a single database transaction).
2612 **
2613 ** This routine ensures that the journal is synced, all dirty pages written
2614 ** to the database file and the database file synced. The only thing that
2615 ** remains to commit the transaction is to delete the journal file (or
2616 ** master journal file if specified).
2617 **
2618 ** Note that if zMaster==NULL, this does not overwrite a previous value
2619 ** passed to an sqlite3pager_sync() call.
2620 */
2621 int sqlite3pager_sync(Pager *pPager, const char *zMaster){
2622   int rc = SQLITE_OK;
2623 
2624   /* If this is an in-memory db, or no pages have been written to, this
2625   ** function is a no-op.
2626   */
2627   if( !pPager->memDb && pPager->dirtyCache ){
2628     PgHdr *pPg;
2629     assert( pPager->journalOpen );
2630 
2631     /* Sync the journal file */
2632     rc = syncJournal(pPager, zMaster);
2633     if( rc!=SQLITE_OK ) goto sync_exit;
2634 
2635     /* Write all dirty pages to the database file */
2636     pPg = pager_get_all_dirty_pages(pPager);
2637     rc = pager_write_pagelist(pPg);
2638     if( rc!=SQLITE_OK ) goto sync_exit;
2639 
2640     /* If any pages were actually written, sync the database file */
2641     if( pPg && !pPager->noSync ){
2642       rc = sqlite3OsSync(&pPager->fd);
2643     }
2644   }
2645 
2646 sync_exit:
2647   return rc;
2648 }
2649 
2650 #ifdef SQLITE_DEBUG
2651 /*
2652 ** Return the current state of the file lock for the given pager.
2653 ** The return value is one of NO_LOCK, SHARED_LOCK, RESERVED_LOCK,
2654 ** PENDING_LOCK, or EXCLUSIVE_LOCK.
2655 */
2656 int sqlite3pager_lockstate(Pager *pPager){
2657   return pPager->fd.locktype;
2658 }
2659 #endif
2660 
2661 #ifdef SQLITE_TEST
2662 /*
2663 ** Print a listing of all referenced pages and their ref count.
2664 */
2665 void sqlite3pager_refdump(Pager *pPager){
2666   PgHdr *pPg;
2667   for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
2668     if( pPg->nRef<=0 ) continue;
2669     printf("PAGE %3d addr=0x%08x nRef=%d\n",
2670        pPg->pgno, (int)PGHDR_TO_DATA(pPg), pPg->nRef);
2671   }
2672 }
2673 #endif
2674