xref: /sqlite-3.40.0/src/pager.c (revision 4dcbdbff)
1 /*
2 ** 2001 September 15
3 **
4 ** The author disclaims copyright to this source code.  In place of
5 ** a legal notice, here is a blessing:
6 **
7 **    May you do good and not evil.
8 **    May you find forgiveness for yourself and forgive others.
9 **    May you share freely, never taking more than you give.
10 **
11 *************************************************************************
12 ** This is the implementation of the page cache subsystem or "pager".
13 **
14 ** The pager is used to access a database disk file.  It implements
15 ** atomic commit and rollback through the use of a journal file that
16 ** is separate from the database file.  The pager also implements file
17 ** locking to prevent two processes from writing the same database
18 ** file simultaneously, or one process from reading the database while
19 ** another is writing.
20 **
21 ** @(#) $Id: pager.c,v 1.208 2005/07/09 02:16:03 drh Exp $
22 */
23 #ifndef SQLITE_OMIT_DISKIO
24 #include "sqliteInt.h"
25 #include "os.h"
26 #include "pager.h"
27 #include <assert.h>
28 #include <string.h>
29 
30 /*
31 ** Macros for troubleshooting.  Normally turned off
32 */
33 #if 0
34 #define TRACE1(X)       sqlite3DebugPrintf(X)
35 #define TRACE2(X,Y)     sqlite3DebugPrintf(X,Y)
36 #define TRACE3(X,Y,Z)   sqlite3DebugPrintf(X,Y,Z)
37 #define TRACE4(X,Y,Z,W) sqlite3DebugPrintf(X,Y,Z,W)
38 #define TRACE5(X,Y,Z,W,V) sqlite3DebugPrintf(X,Y,Z,W,V)
39 #else
40 #define TRACE1(X)
41 #define TRACE2(X,Y)
42 #define TRACE3(X,Y,Z)
43 #define TRACE4(X,Y,Z,W)
44 #define TRACE5(X,Y,Z,W,V)
45 #endif
46 
47 /*
48 ** The following two macros are used within the TRACEX() macros above
49 ** to print out file-descriptors. They are required so that tracing
50 ** can be turned on when using both the regular os_unix.c and os_test.c
51 ** backends.
52 **
53 ** PAGERID() takes a pointer to a Pager struct as it's argument. The
54 ** associated file-descriptor is returned. FILEHANDLEID() takes an OsFile
55 ** struct as it's argument.
56 */
57 #ifdef OS_TEST
58 #define PAGERID(p) (p->fd->fd.h)
59 #define FILEHANDLEID(fd) (fd->fd.h)
60 #else
61 #define PAGERID(p) (p->fd.h)
62 #define FILEHANDLEID(fd) (fd.h)
63 #endif
64 
65 /*
66 ** The page cache as a whole is always in one of the following
67 ** states:
68 **
69 **   PAGER_UNLOCK        The page cache is not currently reading or
70 **                       writing the database file.  There is no
71 **                       data held in memory.  This is the initial
72 **                       state.
73 **
74 **   PAGER_SHARED        The page cache is reading the database.
75 **                       Writing is not permitted.  There can be
76 **                       multiple readers accessing the same database
77 **                       file at the same time.
78 **
79 **   PAGER_RESERVED      This process has reserved the database for writing
80 **                       but has not yet made any changes.  Only one process
81 **                       at a time can reserve the database.  The original
82 **                       database file has not been modified so other
83 **                       processes may still be reading the on-disk
84 **                       database file.
85 **
86 **   PAGER_EXCLUSIVE     The page cache is writing the database.
87 **                       Access is exclusive.  No other processes or
88 **                       threads can be reading or writing while one
89 **                       process is writing.
90 **
91 **   PAGER_SYNCED        The pager moves to this state from PAGER_EXCLUSIVE
92 **                       after all dirty pages have been written to the
93 **                       database file and the file has been synced to
94 **                       disk. All that remains to do is to remove the
95 **                       journal file and the transaction will be
96 **                       committed.
97 **
98 ** The page cache comes up in PAGER_UNLOCK.  The first time a
99 ** sqlite3pager_get() occurs, the state transitions to PAGER_SHARED.
100 ** After all pages have been released using sqlite_page_unref(),
101 ** the state transitions back to PAGER_UNLOCK.  The first time
102 ** that sqlite3pager_write() is called, the state transitions to
103 ** PAGER_RESERVED.  (Note that sqlite_page_write() can only be
104 ** called on an outstanding page which means that the pager must
105 ** be in PAGER_SHARED before it transitions to PAGER_RESERVED.)
106 ** The transition to PAGER_EXCLUSIVE occurs when before any changes
107 ** are made to the database file.  After an sqlite3pager_rollback()
108 ** or sqlite_pager_commit(), the state goes back to PAGER_SHARED.
109 */
110 #define PAGER_UNLOCK      0
111 #define PAGER_SHARED      1   /* same as SHARED_LOCK */
112 #define PAGER_RESERVED    2   /* same as RESERVED_LOCK */
113 #define PAGER_EXCLUSIVE   4   /* same as EXCLUSIVE_LOCK */
114 #define PAGER_SYNCED      5
115 
116 /*
117 ** If the SQLITE_BUSY_RESERVED_LOCK macro is set to true at compile-time,
118 ** then failed attempts to get a reserved lock will invoke the busy callback.
119 ** This is off by default.  To see why, consider the following scenario:
120 **
121 ** Suppose thread A already has a shared lock and wants a reserved lock.
122 ** Thread B already has a reserved lock and wants an exclusive lock.  If
123 ** both threads are using their busy callbacks, it might be a long time
124 ** be for one of the threads give up and allows the other to proceed.
125 ** But if the thread trying to get the reserved lock gives up quickly
126 ** (if it never invokes its busy callback) then the contention will be
127 ** resolved quickly.
128 */
129 #ifndef SQLITE_BUSY_RESERVED_LOCK
130 # define SQLITE_BUSY_RESERVED_LOCK 0
131 #endif
132 
133 /*
134 ** This macro rounds values up so that if the value is an address it
135 ** is guaranteed to be an address that is aligned to an 8-byte boundary.
136 */
137 #define FORCE_ALIGNMENT(X)   (((X)+7)&~7)
138 
139 /*
140 ** Each in-memory image of a page begins with the following header.
141 ** This header is only visible to this pager module.  The client
142 ** code that calls pager sees only the data that follows the header.
143 **
144 ** Client code should call sqlite3pager_write() on a page prior to making
145 ** any modifications to that page.  The first time sqlite3pager_write()
146 ** is called, the original page contents are written into the rollback
147 ** journal and PgHdr.inJournal and PgHdr.needSync are set.  Later, once
148 ** the journal page has made it onto the disk surface, PgHdr.needSync
149 ** is cleared.  The modified page cannot be written back into the original
150 ** database file until the journal pages has been synced to disk and the
151 ** PgHdr.needSync has been cleared.
152 **
153 ** The PgHdr.dirty flag is set when sqlite3pager_write() is called and
154 ** is cleared again when the page content is written back to the original
155 ** database file.
156 */
157 typedef struct PgHdr PgHdr;
158 struct PgHdr {
159   Pager *pPager;                 /* The pager to which this page belongs */
160   Pgno pgno;                     /* The page number for this page */
161   PgHdr *pNextHash, *pPrevHash;  /* Hash collision chain for PgHdr.pgno */
162   PgHdr *pNextFree, *pPrevFree;  /* Freelist of pages where nRef==0 */
163   PgHdr *pNextAll;               /* A list of all pages */
164   PgHdr *pNextStmt, *pPrevStmt;  /* List of pages in the statement journal */
165   u8 inJournal;                  /* TRUE if has been written to journal */
166   u8 inStmt;                     /* TRUE if in the statement subjournal */
167   u8 dirty;                      /* TRUE if we need to write back changes */
168   u8 needSync;                   /* Sync journal before writing this page */
169   u8 alwaysRollback;             /* Disable dont_rollback() for this page */
170   short int nRef;                /* Number of users of this page */
171   PgHdr *pDirty;                 /* Dirty pages sorted by PgHdr.pgno */
172 #ifdef SQLITE_CHECK_PAGES
173   u32 pageHash;
174 #endif
175   /* pPager->pageSize bytes of page data follow this header */
176   /* Pager.nExtra bytes of local data follow the page data */
177 };
178 
179 /*
180 ** For an in-memory only database, some extra information is recorded about
181 ** each page so that changes can be rolled back.  (Journal files are not
182 ** used for in-memory databases.)  The following information is added to
183 ** the end of every EXTRA block for in-memory databases.
184 **
185 ** This information could have been added directly to the PgHdr structure.
186 ** But then it would take up an extra 8 bytes of storage on every PgHdr
187 ** even for disk-based databases.  Splitting it out saves 8 bytes.  This
188 ** is only a savings of 0.8% but those percentages add up.
189 */
190 typedef struct PgHistory PgHistory;
191 struct PgHistory {
192   u8 *pOrig;     /* Original page text.  Restore to this on a full rollback */
193   u8 *pStmt;     /* Text as it was at the beginning of the current statement */
194 };
195 
196 /*
197 ** A macro used for invoking the codec if there is one
198 */
199 #ifdef SQLITE_HAS_CODEC
200 # define CODEC(P,D,N,X) if( P->xCodec ){ P->xCodec(P->pCodecArg,D,N,X); }
201 #else
202 # define CODEC(P,D,N,X)
203 #endif
204 
205 /*
206 ** Convert a pointer to a PgHdr into a pointer to its data
207 ** and back again.
208 */
209 #define PGHDR_TO_DATA(P)  ((void*)(&(P)[1]))
210 #define DATA_TO_PGHDR(D)  (&((PgHdr*)(D))[-1])
211 #define PGHDR_TO_EXTRA(G,P) ((void*)&((char*)(&(G)[1]))[(P)->pageSize])
212 #define PGHDR_TO_HIST(P,PGR)  \
213             ((PgHistory*)&((char*)(&(P)[1]))[(PGR)->pageSize+(PGR)->nExtra])
214 
215 /*
216 ** How big to make the hash table used for locating in-memory pages
217 ** by page number. This macro looks a little silly, but is evaluated
218 ** at compile-time, not run-time (at least for gcc this is true).
219 */
220 #define N_PG_HASH (\
221   (MAX_PAGES>1024)?2048: \
222   (MAX_PAGES>512)?1024: \
223   (MAX_PAGES>256)?512: \
224   (MAX_PAGES>128)?256: \
225   (MAX_PAGES>64)?128:64 \
226 )
227 
228 /*
229 ** Hash a page number
230 */
231 #define pager_hash(PN)  ((PN)&(N_PG_HASH-1))
232 
233 /*
234 ** A open page cache is an instance of the following structure.
235 */
236 struct Pager {
237   u8 journalOpen;             /* True if journal file descriptors is valid */
238   u8 journalStarted;          /* True if header of journal is synced */
239   u8 useJournal;              /* Use a rollback journal on this file */
240   u8 noReadlock;              /* Do not bother to obtain readlocks */
241   u8 stmtOpen;                /* True if the statement subjournal is open */
242   u8 stmtInUse;               /* True we are in a statement subtransaction */
243   u8 stmtAutoopen;            /* Open stmt journal when main journal is opened*/
244   u8 noSync;                  /* Do not sync the journal if true */
245   u8 fullSync;                /* Do extra syncs of the journal for robustness */
246   u8 state;                   /* PAGER_UNLOCK, _SHARED, _RESERVED, etc. */
247   u8 errMask;                 /* One of several kinds of errors */
248   u8 tempFile;                /* zFilename is a temporary file */
249   u8 readOnly;                /* True for a read-only database */
250   u8 needSync;                /* True if an fsync() is needed on the journal */
251   u8 dirtyCache;              /* True if cached pages have changed */
252   u8 alwaysRollback;          /* Disable dont_rollback() for all pages */
253   u8 memDb;                   /* True to inhibit all file I/O */
254   u8 setMaster;               /* True if a m-j name has been written to jrnl */
255   int dbSize;                 /* Number of pages in the file */
256   int origDbSize;             /* dbSize before the current change */
257   int stmtSize;               /* Size of database (in pages) at stmt_begin() */
258   int nRec;                   /* Number of pages written to the journal */
259   u32 cksumInit;              /* Quasi-random value added to every checksum */
260   int stmtNRec;               /* Number of records in stmt subjournal */
261   int nExtra;                 /* Add this many bytes to each in-memory page */
262   int pageSize;               /* Number of bytes in a page */
263   int nPage;                  /* Total number of in-memory pages */
264   int nMaxPage;               /* High water mark of nPage */
265   int nRef;                   /* Number of in-memory pages with PgHdr.nRef>0 */
266   int mxPage;                 /* Maximum number of pages to hold in cache */
267   u8 *aInJournal;             /* One bit for each page in the database file */
268   u8 *aInStmt;                /* One bit for each page in the database */
269   char *zFilename;            /* Name of the database file */
270   char *zJournal;             /* Name of the journal file */
271   char *zDirectory;           /* Directory hold database and journal files */
272   OsFile fd, jfd;             /* File descriptors for database and journal */
273   OsFile stfd;                /* File descriptor for the statement subjournal*/
274   BusyHandler *pBusyHandler;  /* Pointer to sqlite.busyHandler */
275   PgHdr *pFirst, *pLast;      /* List of free pages */
276   PgHdr *pFirstSynced;        /* First free page with PgHdr.needSync==0 */
277   PgHdr *pAll;                /* List of all pages */
278   PgHdr *pStmt;               /* List of pages in the statement subjournal */
279   i64 journalOff;             /* Current byte offset in the journal file */
280   i64 journalHdr;             /* Byte offset to previous journal header */
281   i64 stmtHdrOff;             /* First journal header written this statement */
282   i64 stmtCksum;              /* cksumInit when statement was started */
283   i64 stmtJSize;              /* Size of journal at stmt_begin() */
284   int sectorSize;             /* Assumed sector size during rollback */
285 #ifdef SQLITE_TEST
286   int nHit, nMiss, nOvfl;     /* Cache hits, missing, and LRU overflows */
287   int nRead,nWrite;           /* Database pages read/written */
288 #endif
289   void (*xDestructor)(void*,int); /* Call this routine when freeing pages */
290   void (*xReiniter)(void*,int);   /* Call this routine when reloading pages */
291   void (*xCodec)(void*,void*,Pgno,int); /* Routine for en/decoding data */
292   void *pCodecArg;            /* First argument to xCodec() */
293   PgHdr *aHash[N_PG_HASH];    /* Hash table to map page number to PgHdr */
294 };
295 
296 /*
297 ** If SQLITE_TEST is defined then increment the variable given in
298 ** the argument
299 */
300 #ifdef SQLITE_TEST
301 # define TEST_INCR(x)  x++
302 #else
303 # define TEST_INCR(x)
304 #endif
305 
306 /*
307 ** These are bits that can be set in Pager.errMask.
308 */
309 #define PAGER_ERR_FULL     0x01  /* a write() failed */
310 #define PAGER_ERR_MEM      0x02  /* malloc() failed */
311 #define PAGER_ERR_LOCK     0x04  /* error in the locking protocol */
312 #define PAGER_ERR_CORRUPT  0x08  /* database or journal corruption */
313 #define PAGER_ERR_DISK     0x10  /* general disk I/O error - bad hard drive? */
314 
315 /*
316 ** Journal files begin with the following magic string.  The data
317 ** was obtained from /dev/random.  It is used only as a sanity check.
318 **
319 ** Since version 2.8.0, the journal format contains additional sanity
320 ** checking information.  If the power fails while the journal is begin
321 ** written, semi-random garbage data might appear in the journal
322 ** file after power is restored.  If an attempt is then made
323 ** to roll the journal back, the database could be corrupted.  The additional
324 ** sanity checking data is an attempt to discover the garbage in the
325 ** journal and ignore it.
326 **
327 ** The sanity checking information for the new journal format consists
328 ** of a 32-bit checksum on each page of data.  The checksum covers both
329 ** the page number and the pPager->pageSize bytes of data for the page.
330 ** This cksum is initialized to a 32-bit random value that appears in the
331 ** journal file right after the header.  The random initializer is important,
332 ** because garbage data that appears at the end of a journal is likely
333 ** data that was once in other files that have now been deleted.  If the
334 ** garbage data came from an obsolete journal file, the checksums might
335 ** be correct.  But by initializing the checksum to random value which
336 ** is different for every journal, we minimize that risk.
337 */
338 static const unsigned char aJournalMagic[] = {
339   0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd7,
340 };
341 
342 /*
343 ** The size of the header and of each page in the journal is determined
344 ** by the following macros.
345 */
346 #define JOURNAL_PG_SZ(pPager)  ((pPager->pageSize) + 8)
347 
348 /*
349 ** The journal header size for this pager. In the future, this could be
350 ** set to some value read from the disk controller. The important
351 ** characteristic is that it is the same size as a disk sector.
352 */
353 #define JOURNAL_HDR_SZ(pPager) (pPager->sectorSize)
354 
355 /*
356 ** The macro MEMDB is true if we are dealing with an in-memory database.
357 ** We do this as a macro so that if the SQLITE_OMIT_MEMORYDB macro is set,
358 ** the value of MEMDB will be a constant and the compiler will optimize
359 ** out code that would never execute.
360 */
361 #ifdef SQLITE_OMIT_MEMORYDB
362 # define MEMDB 0
363 #else
364 # define MEMDB pPager->memDb
365 #endif
366 
367 /*
368 ** The default size of a disk sector
369 */
370 #define PAGER_SECTOR_SIZE 512
371 
372 /*
373 ** Page number PAGER_MJ_PGNO is never used in an SQLite database (it is
374 ** reserved for working around a windows/posix incompatibility). It is
375 ** used in the journal to signify that the remainder of the journal file
376 ** is devoted to storing a master journal name - there are no more pages to
377 ** roll back. See comments for function writeMasterJournal() for details.
378 */
379 /* #define PAGER_MJ_PGNO(x) (PENDING_BYTE/((x)->pageSize)) */
380 #define PAGER_MJ_PGNO(x) ((PENDING_BYTE/((x)->pageSize))+1)
381 
382 /*
383 ** The maximum legal page number is (2^31 - 1).
384 */
385 #define PAGER_MAX_PGNO 2147483647
386 
387 /*
388 ** Enable reference count tracking (for debugging) here:
389 */
390 #ifdef SQLITE_DEBUG
391   int pager3_refinfo_enable = 0;
392   static void pager_refinfo(PgHdr *p){
393     static int cnt = 0;
394     if( !pager3_refinfo_enable ) return;
395     sqlite3DebugPrintf(
396        "REFCNT: %4d addr=%p nRef=%d\n",
397        p->pgno, PGHDR_TO_DATA(p), p->nRef
398     );
399     cnt++;   /* Something to set a breakpoint on */
400   }
401 # define REFINFO(X)  pager_refinfo(X)
402 #else
403 # define REFINFO(X)
404 #endif
405 
406 /*
407 ** Read a 32-bit integer from the given file descriptor.  Store the integer
408 ** that is read in *pRes.  Return SQLITE_OK if everything worked, or an
409 ** error code is something goes wrong.
410 **
411 ** All values are stored on disk as big-endian.
412 */
413 static int read32bits(OsFile *fd, u32 *pRes){
414   u32 res;
415   int rc;
416   rc = sqlite3OsRead(fd, &res, sizeof(res));
417   if( rc==SQLITE_OK ){
418     unsigned char ac[4];
419     memcpy(ac, &res, 4);
420     res = (ac[0]<<24) | (ac[1]<<16) | (ac[2]<<8) | ac[3];
421   }
422   *pRes = res;
423   return rc;
424 }
425 
426 /*
427 ** Write a 32-bit integer into the given file descriptor.  Return SQLITE_OK
428 ** on success or an error code is something goes wrong.
429 */
430 static int write32bits(OsFile *fd, u32 val){
431   unsigned char ac[4];
432   ac[0] = (val>>24) & 0xff;
433   ac[1] = (val>>16) & 0xff;
434   ac[2] = (val>>8) & 0xff;
435   ac[3] = val & 0xff;
436   return sqlite3OsWrite(fd, ac, 4);
437 }
438 
439 /*
440 ** Write the 32-bit integer 'val' into the page identified by page header
441 ** 'p' at offset 'offset'.
442 */
443 static void store32bits(u32 val, PgHdr *p, int offset){
444   unsigned char *ac;
445   ac = &((unsigned char*)PGHDR_TO_DATA(p))[offset];
446   ac[0] = (val>>24) & 0xff;
447   ac[1] = (val>>16) & 0xff;
448   ac[2] = (val>>8) & 0xff;
449   ac[3] = val & 0xff;
450 }
451 
452 /*
453 ** Read a 32-bit integer at offset 'offset' from the page identified by
454 ** page header 'p'.
455 */
456 static u32 retrieve32bits(PgHdr *p, int offset){
457   unsigned char *ac;
458   ac = &((unsigned char*)PGHDR_TO_DATA(p))[offset];
459   return (ac[0]<<24) | (ac[1]<<16) | (ac[2]<<8) | ac[3];
460 }
461 
462 
463 /*
464 ** Convert the bits in the pPager->errMask into an approprate
465 ** return code.
466 */
467 static int pager_errcode(Pager *pPager){
468   int rc = SQLITE_OK;
469   if( pPager->errMask & PAGER_ERR_LOCK )    rc = SQLITE_PROTOCOL;
470   if( pPager->errMask & PAGER_ERR_DISK )    rc = SQLITE_IOERR;
471   if( pPager->errMask & PAGER_ERR_FULL )    rc = SQLITE_FULL;
472   if( pPager->errMask & PAGER_ERR_MEM )     rc = SQLITE_NOMEM;
473   if( pPager->errMask & PAGER_ERR_CORRUPT ) rc = SQLITE_CORRUPT;
474   return rc;
475 }
476 
477 #ifdef SQLITE_CHECK_PAGES
478 /*
479 ** Return a 32-bit hash of the page data for pPage.
480 */
481 static u32 pager_pagehash(PgHdr *pPage){
482   u32 hash = 0;
483   int i;
484   unsigned char *pData = (unsigned char *)PGHDR_TO_DATA(pPage);
485   for(i=0; i<pPage->pPager->pageSize; i++){
486     hash = (hash+i)^pData[i];
487   }
488   return hash;
489 }
490 
491 /*
492 ** The CHECK_PAGE macro takes a PgHdr* as an argument. If SQLITE_CHECK_PAGES
493 ** is defined, and NDEBUG is not defined, an assert() statement checks
494 ** that the page is either dirty or still matches the calculated page-hash.
495 */
496 #define CHECK_PAGE(x) checkPage(x)
497 static void checkPage(PgHdr *pPg){
498   Pager *pPager = pPg->pPager;
499   assert( !pPg->pageHash || pPager->errMask || MEMDB || pPg->dirty ||
500       pPg->pageHash==pager_pagehash(pPg) );
501 }
502 
503 #else
504 #define CHECK_PAGE(x)
505 #endif
506 
507 /*
508 ** When this is called the journal file for pager pPager must be open.
509 ** The master journal file name is read from the end of the file and
510 ** written into memory obtained from sqliteMalloc(). *pzMaster is
511 ** set to point at the memory and SQLITE_OK returned. The caller must
512 ** sqliteFree() *pzMaster.
513 **
514 ** If no master journal file name is present *pzMaster is set to 0 and
515 ** SQLITE_OK returned.
516 */
517 static int readMasterJournal(OsFile *pJrnl, char **pzMaster){
518   int rc;
519   u32 len;
520   i64 szJ;
521   u32 cksum;
522   int i;
523   unsigned char aMagic[8]; /* A buffer to hold the magic header */
524 
525   *pzMaster = 0;
526 
527   rc = sqlite3OsFileSize(pJrnl, &szJ);
528   if( rc!=SQLITE_OK || szJ<16 ) return rc;
529 
530   rc = sqlite3OsSeek(pJrnl, szJ-16);
531   if( rc!=SQLITE_OK ) return rc;
532 
533   rc = read32bits(pJrnl, &len);
534   if( rc!=SQLITE_OK ) return rc;
535 
536   rc = read32bits(pJrnl, &cksum);
537   if( rc!=SQLITE_OK ) return rc;
538 
539   rc = sqlite3OsRead(pJrnl, aMagic, 8);
540   if( rc!=SQLITE_OK || memcmp(aMagic, aJournalMagic, 8) ) return rc;
541 
542   rc = sqlite3OsSeek(pJrnl, szJ-16-len);
543   if( rc!=SQLITE_OK ) return rc;
544 
545   *pzMaster = (char *)sqliteMalloc(len+1);
546   if( !*pzMaster ){
547     return SQLITE_NOMEM;
548   }
549   rc = sqlite3OsRead(pJrnl, *pzMaster, len);
550   if( rc!=SQLITE_OK ){
551     sqliteFree(*pzMaster);
552     *pzMaster = 0;
553     return rc;
554   }
555 
556   /* See if the checksum matches the master journal name */
557   for(i=0; i<len; i++){
558     cksum -= (*pzMaster)[i];
559   }
560   if( cksum ){
561     /* If the checksum doesn't add up, then one or more of the disk sectors
562     ** containing the master journal filename is corrupted. This means
563     ** definitely roll back, so just return SQLITE_OK and report a (nul)
564     ** master-journal filename.
565     */
566     sqliteFree(*pzMaster);
567     *pzMaster = 0;
568   }else{
569     (*pzMaster)[len] = '\0';
570   }
571 
572   return SQLITE_OK;
573 }
574 
575 /*
576 ** Seek the journal file descriptor to the next sector boundary where a
577 ** journal header may be read or written. Pager.journalOff is updated with
578 ** the new seek offset.
579 **
580 ** i.e for a sector size of 512:
581 **
582 ** Input Offset              Output Offset
583 ** ---------------------------------------
584 ** 0                         0
585 ** 512                       512
586 ** 100                       512
587 ** 2000                      2048
588 **
589 */
590 static int seekJournalHdr(Pager *pPager){
591   i64 offset = 0;
592   i64 c = pPager->journalOff;
593   if( c ){
594     offset = ((c-1)/JOURNAL_HDR_SZ(pPager) + 1) * JOURNAL_HDR_SZ(pPager);
595   }
596   assert( offset%JOURNAL_HDR_SZ(pPager)==0 );
597   assert( offset>=c );
598   assert( (offset-c)<JOURNAL_HDR_SZ(pPager) );
599   pPager->journalOff = offset;
600   return sqlite3OsSeek(&pPager->jfd, pPager->journalOff);
601 }
602 
603 /*
604 ** The journal file must be open when this routine is called. A journal
605 ** header (JOURNAL_HDR_SZ bytes) is written into the journal file at the
606 ** current location.
607 **
608 ** The format for the journal header is as follows:
609 ** - 8 bytes: Magic identifying journal format.
610 ** - 4 bytes: Number of records in journal, or -1 no-sync mode is on.
611 ** - 4 bytes: Random number used for page hash.
612 ** - 4 bytes: Initial database page count.
613 ** - 4 bytes: Sector size used by the process that wrote this journal.
614 **
615 ** Followed by (JOURNAL_HDR_SZ - 24) bytes of unused space.
616 */
617 static int writeJournalHdr(Pager *pPager){
618 
619   int rc = seekJournalHdr(pPager);
620   if( rc ) return rc;
621 
622   pPager->journalHdr = pPager->journalOff;
623   if( pPager->stmtHdrOff==0 ){
624     pPager->stmtHdrOff = pPager->journalHdr;
625   }
626   pPager->journalOff += JOURNAL_HDR_SZ(pPager);
627 
628   /* FIX ME:
629   **
630   ** Possibly for a pager not in no-sync mode, the journal magic should not
631   ** be written until nRec is filled in as part of next syncJournal().
632   **
633   ** Actually maybe the whole journal header should be delayed until that
634   ** point. Think about this.
635   */
636   rc = sqlite3OsWrite(&pPager->jfd, aJournalMagic, sizeof(aJournalMagic));
637 
638   if( rc==SQLITE_OK ){
639     /* The nRec Field. 0xFFFFFFFF for no-sync journals. */
640     rc = write32bits(&pPager->jfd, pPager->noSync ? 0xffffffff : 0);
641   }
642   if( rc==SQLITE_OK ){
643     /* The random check-hash initialiser */
644     sqlite3Randomness(sizeof(pPager->cksumInit), &pPager->cksumInit);
645     rc = write32bits(&pPager->jfd, pPager->cksumInit);
646   }
647   if( rc==SQLITE_OK ){
648     /* The initial database size */
649     rc = write32bits(&pPager->jfd, pPager->dbSize);
650   }
651   if( rc==SQLITE_OK ){
652     /* The assumed sector size for this process */
653     rc = write32bits(&pPager->jfd, pPager->sectorSize);
654   }
655 
656   /* The journal header has been written successfully. Seek the journal
657   ** file descriptor to the end of the journal header sector.
658   */
659   if( rc==SQLITE_OK ){
660     sqlite3OsSeek(&pPager->jfd, pPager->journalOff-1);
661     rc = sqlite3OsWrite(&pPager->jfd, "\000", 1);
662   }
663   return rc;
664 }
665 
666 /*
667 ** The journal file must be open when this is called. A journal header file
668 ** (JOURNAL_HDR_SZ bytes) is read from the current location in the journal
669 ** file. See comments above function writeJournalHdr() for a description of
670 ** the journal header format.
671 **
672 ** If the header is read successfully, *nRec is set to the number of
673 ** page records following this header and *dbSize is set to the size of the
674 ** database before the transaction began, in pages. Also, pPager->cksumInit
675 ** is set to the value read from the journal header. SQLITE_OK is returned
676 ** in this case.
677 **
678 ** If the journal header file appears to be corrupted, SQLITE_DONE is
679 ** returned and *nRec and *dbSize are not set.  If JOURNAL_HDR_SZ bytes
680 ** cannot be read from the journal file an error code is returned.
681 */
682 static int readJournalHdr(
683   Pager *pPager,
684   i64 journalSize,
685   u32 *pNRec,
686   u32 *pDbSize
687 ){
688   int rc;
689   unsigned char aMagic[8]; /* A buffer to hold the magic header */
690 
691   rc = seekJournalHdr(pPager);
692   if( rc ) return rc;
693 
694   if( pPager->journalOff+JOURNAL_HDR_SZ(pPager) > journalSize ){
695     return SQLITE_DONE;
696   }
697 
698   rc = sqlite3OsRead(&pPager->jfd, aMagic, sizeof(aMagic));
699   if( rc ) return rc;
700 
701   if( memcmp(aMagic, aJournalMagic, sizeof(aMagic))!=0 ){
702     return SQLITE_DONE;
703   }
704 
705   rc = read32bits(&pPager->jfd, pNRec);
706   if( rc ) return rc;
707 
708   rc = read32bits(&pPager->jfd, &pPager->cksumInit);
709   if( rc ) return rc;
710 
711   rc = read32bits(&pPager->jfd, pDbSize);
712   if( rc ) return rc;
713 
714   /* Update the assumed sector-size to match the value used by
715   ** the process that created this journal. If this journal was
716   ** created by a process other than this one, then this routine
717   ** is being called from within pager_playback(). The local value
718   ** of Pager.sectorSize is restored at the end of that routine.
719   */
720   rc = read32bits(&pPager->jfd, (u32 *)&pPager->sectorSize);
721   if( rc ) return rc;
722 
723   pPager->journalOff += JOURNAL_HDR_SZ(pPager);
724   rc = sqlite3OsSeek(&pPager->jfd, pPager->journalOff);
725   return rc;
726 }
727 
728 
729 /*
730 ** Write the supplied master journal name into the journal file for pager
731 ** pPager at the current location. The master journal name must be the last
732 ** thing written to a journal file. If the pager is in full-sync mode, the
733 ** journal file descriptor is advanced to the next sector boundary before
734 ** anything is written. The format is:
735 **
736 ** + 4 bytes: PAGER_MJ_PGNO.
737 ** + N bytes: length of master journal name.
738 ** + 4 bytes: N
739 ** + 4 bytes: Master journal name checksum.
740 ** + 8 bytes: aJournalMagic[].
741 **
742 ** The master journal page checksum is the sum of the bytes in the master
743 ** journal name.
744 */
745 static int writeMasterJournal(Pager *pPager, const char *zMaster){
746   int rc;
747   int len;
748   int i;
749   u32 cksum = 0;
750 
751   if( !zMaster || pPager->setMaster) return SQLITE_OK;
752   pPager->setMaster = 1;
753 
754   len = strlen(zMaster);
755   for(i=0; i<len; i++){
756     cksum += zMaster[i];
757   }
758 
759   /* If in full-sync mode, advance to the next disk sector before writing
760   ** the master journal name. This is in case the previous page written to
761   ** the journal has already been synced.
762   */
763   if( pPager->fullSync ){
764     rc = seekJournalHdr(pPager);
765     if( rc!=SQLITE_OK ) return rc;
766   }
767   pPager->journalOff += (len+20);
768 
769   rc = write32bits(&pPager->jfd, PAGER_MJ_PGNO(pPager));
770   if( rc!=SQLITE_OK ) return rc;
771 
772   rc = sqlite3OsWrite(&pPager->jfd, zMaster, len);
773   if( rc!=SQLITE_OK ) return rc;
774 
775   rc = write32bits(&pPager->jfd, len);
776   if( rc!=SQLITE_OK ) return rc;
777 
778   rc = write32bits(&pPager->jfd, cksum);
779   if( rc!=SQLITE_OK ) return rc;
780 
781   rc = sqlite3OsWrite(&pPager->jfd, aJournalMagic, sizeof(aJournalMagic));
782   pPager->needSync = 1;
783   return rc;
784 }
785 
786 /*
787 ** Add or remove a page from the list of all pages that are in the
788 ** statement journal.
789 **
790 ** The Pager keeps a separate list of pages that are currently in
791 ** the statement journal.  This helps the sqlite3pager_stmt_commit()
792 ** routine run MUCH faster for the common case where there are many
793 ** pages in memory but only a few are in the statement journal.
794 */
795 static void page_add_to_stmt_list(PgHdr *pPg){
796   Pager *pPager = pPg->pPager;
797   if( pPg->inStmt ) return;
798   assert( pPg->pPrevStmt==0 && pPg->pNextStmt==0 );
799   pPg->pPrevStmt = 0;
800   if( pPager->pStmt ){
801     pPager->pStmt->pPrevStmt = pPg;
802   }
803   pPg->pNextStmt = pPager->pStmt;
804   pPager->pStmt = pPg;
805   pPg->inStmt = 1;
806 }
807 static void page_remove_from_stmt_list(PgHdr *pPg){
808   if( !pPg->inStmt ) return;
809   if( pPg->pPrevStmt ){
810     assert( pPg->pPrevStmt->pNextStmt==pPg );
811     pPg->pPrevStmt->pNextStmt = pPg->pNextStmt;
812   }else{
813     assert( pPg->pPager->pStmt==pPg );
814     pPg->pPager->pStmt = pPg->pNextStmt;
815   }
816   if( pPg->pNextStmt ){
817     assert( pPg->pNextStmt->pPrevStmt==pPg );
818     pPg->pNextStmt->pPrevStmt = pPg->pPrevStmt;
819   }
820   pPg->pNextStmt = 0;
821   pPg->pPrevStmt = 0;
822   pPg->inStmt = 0;
823 }
824 
825 /*
826 ** Find a page in the hash table given its page number.  Return
827 ** a pointer to the page or NULL if not found.
828 */
829 static PgHdr *pager_lookup(Pager *pPager, Pgno pgno){
830   PgHdr *p = pPager->aHash[pager_hash(pgno)];
831   while( p && p->pgno!=pgno ){
832     p = p->pNextHash;
833   }
834   return p;
835 }
836 
837 /*
838 ** Unlock the database and clear the in-memory cache.  This routine
839 ** sets the state of the pager back to what it was when it was first
840 ** opened.  Any outstanding pages are invalidated and subsequent attempts
841 ** to access those pages will likely result in a coredump.
842 */
843 static void pager_reset(Pager *pPager){
844   PgHdr *pPg, *pNext;
845   if( pPager->errMask ) return;
846   for(pPg=pPager->pAll; pPg; pPg=pNext){
847     pNext = pPg->pNextAll;
848     sqliteFree(pPg);
849   }
850   pPager->pFirst = 0;
851   pPager->pFirstSynced = 0;
852   pPager->pLast = 0;
853   pPager->pAll = 0;
854   memset(pPager->aHash, 0, sizeof(pPager->aHash));
855   pPager->nPage = 0;
856   if( pPager->state>=PAGER_RESERVED ){
857     sqlite3pager_rollback(pPager);
858   }
859   sqlite3OsUnlock(&pPager->fd, NO_LOCK);
860   pPager->state = PAGER_UNLOCK;
861   pPager->dbSize = -1;
862   pPager->nRef = 0;
863   assert( pPager->journalOpen==0 );
864 }
865 
866 /*
867 ** This function is used to reset the pager after a malloc() failure. This
868 ** doesn't work with in-memory databases. If a malloc() fails when an
869 ** in-memory database is in use it is not possible to recover.
870 **
871 ** If a transaction or statement transaction is active, it is rolled back.
872 **
873 ** It is an error to call this function if any pages are in use.
874 */
875 #ifndef SQLITE_OMIT_GLOBALRECOVER
876 int sqlite3pager_reset(Pager *pPager){
877   if( pPager ){
878     if( pPager->nRef || MEMDB ){
879       return SQLITE_ERROR;
880     }
881     pPager->errMask &= ~(PAGER_ERR_MEM);
882     pager_reset(pPager);
883   }
884   return SQLITE_OK;
885 }
886 #endif
887 
888 
889 /*
890 ** When this routine is called, the pager has the journal file open and
891 ** a RESERVED or EXCLUSIVE lock on the database.  This routine releases
892 ** the database lock and acquires a SHARED lock in its place.  The journal
893 ** file is deleted and closed.
894 **
895 ** TODO: Consider keeping the journal file open for temporary databases.
896 ** This might give a performance improvement on windows where opening
897 ** a file is an expensive operation.
898 */
899 static int pager_unwritelock(Pager *pPager){
900   PgHdr *pPg;
901   int rc;
902   assert( !MEMDB );
903   if( pPager->state<PAGER_RESERVED ){
904     return SQLITE_OK;
905   }
906   sqlite3pager_stmt_commit(pPager);
907   if( pPager->stmtOpen ){
908     sqlite3OsClose(&pPager->stfd);
909     pPager->stmtOpen = 0;
910   }
911   if( pPager->journalOpen ){
912     sqlite3OsClose(&pPager->jfd);
913     pPager->journalOpen = 0;
914     sqlite3OsDelete(pPager->zJournal);
915     sqliteFree( pPager->aInJournal );
916     pPager->aInJournal = 0;
917     for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
918       pPg->inJournal = 0;
919       pPg->dirty = 0;
920       pPg->needSync = 0;
921 #ifdef SQLITE_CHECK_PAGES
922       pPg->pageHash = pager_pagehash(pPg);
923 #endif
924     }
925     pPager->dirtyCache = 0;
926     pPager->nRec = 0;
927   }else{
928     assert( pPager->aInJournal==0 );
929     assert( pPager->dirtyCache==0 || pPager->useJournal==0 );
930   }
931   rc = sqlite3OsUnlock(&pPager->fd, SHARED_LOCK);
932   pPager->state = PAGER_SHARED;
933   pPager->origDbSize = 0;
934   pPager->setMaster = 0;
935   return rc;
936 }
937 
938 /*
939 ** Compute and return a checksum for the page of data.
940 **
941 ** This is not a real checksum.  It is really just the sum of the
942 ** random initial value and the page number.  We experimented with
943 ** a checksum of the entire data, but that was found to be too slow.
944 **
945 ** Note that the page number is stored at the beginning of data and
946 ** the checksum is stored at the end.  This is important.  If journal
947 ** corruption occurs due to a power failure, the most likely scenario
948 ** is that one end or the other of the record will be changed.  It is
949 ** much less likely that the two ends of the journal record will be
950 ** correct and the middle be corrupt.  Thus, this "checksum" scheme,
951 ** though fast and simple, catches the mostly likely kind of corruption.
952 **
953 ** FIX ME:  Consider adding every 200th (or so) byte of the data to the
954 ** checksum.  That way if a single page spans 3 or more disk sectors and
955 ** only the middle sector is corrupt, we will still have a reasonable
956 ** chance of failing the checksum and thus detecting the problem.
957 */
958 static u32 pager_cksum(Pager *pPager, Pgno pgno, const char *aData){
959   u32 cksum = pPager->cksumInit;
960   int i = pPager->pageSize-200;
961   while( i>0 ){
962     cksum += aData[i];
963     i -= 200;
964   }
965   return cksum;
966 }
967 
968 /*
969 ** Read a single page from the journal file opened on file descriptor
970 ** jfd.  Playback this one page.
971 **
972 ** If useCksum==0 it means this journal does not use checksums.  Checksums
973 ** are not used in statement journals because statement journals do not
974 ** need to survive power failures.
975 */
976 static int pager_playback_one_page(Pager *pPager, OsFile *jfd, int useCksum){
977   int rc;
978   PgHdr *pPg;                   /* An existing page in the cache */
979   Pgno pgno;                    /* The page number of a page in journal */
980   u32 cksum;                    /* Checksum used for sanity checking */
981   u8 aData[SQLITE_MAX_PAGE_SIZE];  /* Temp storage for a page */
982 
983   /* useCksum should be true for the main journal and false for
984   ** statement journals.  Verify that this is always the case
985   */
986   assert( jfd == (useCksum ? &pPager->jfd : &pPager->stfd) );
987 
988 
989   rc = read32bits(jfd, &pgno);
990   if( rc!=SQLITE_OK ) return rc;
991   rc = sqlite3OsRead(jfd, &aData, pPager->pageSize);
992   if( rc!=SQLITE_OK ) return rc;
993   pPager->journalOff += pPager->pageSize + 4;
994 
995   /* Sanity checking on the page.  This is more important that I originally
996   ** thought.  If a power failure occurs while the journal is being written,
997   ** it could cause invalid data to be written into the journal.  We need to
998   ** detect this invalid data (with high probability) and ignore it.
999   */
1000   if( pgno==0 || pgno==PAGER_MJ_PGNO(pPager) ){
1001     return SQLITE_DONE;
1002   }
1003   if( pgno>(unsigned)pPager->dbSize ){
1004     return SQLITE_OK;
1005   }
1006   if( useCksum ){
1007     rc = read32bits(jfd, &cksum);
1008     if( rc ) return rc;
1009     pPager->journalOff += 4;
1010     if( pager_cksum(pPager, pgno, aData)!=cksum ){
1011       return SQLITE_DONE;
1012     }
1013   }
1014 
1015   assert( pPager->state==PAGER_RESERVED || pPager->state>=PAGER_EXCLUSIVE );
1016 
1017   /* If the pager is in RESERVED state, then there must be a copy of this
1018   ** page in the pager cache. In this case just update the pager cache,
1019   ** not the database file. The page is left marked dirty in this case.
1020   **
1021   ** If in EXCLUSIVE state, then we update the pager cache if it exists
1022   ** and the main file. The page is then marked not dirty.
1023   **
1024   ** Ticket #1171:  The statement journal might contain page content that is
1025   ** different from the page content at the start of the transaction.
1026   ** This occurs when a page is changed prior to the start of a statement
1027   ** then changed again within the statement.  When rolling back such a
1028   ** statement we must not write to the original database unless we know
1029   ** for certain that original page contents are in the main rollback
1030   ** journal.  Otherwise, if a full ROLLBACK occurs after the statement
1031   ** rollback the full ROLLBACK will not restore the page to its original
1032   ** content.  Two conditions must be met before writing to the database
1033   ** files. (1) the database must be locked.  (2) we know that the original
1034   ** page content is in the main journal either because the page is not in
1035   ** cache or else it is marked as needSync==0.
1036   */
1037   pPg = pager_lookup(pPager, pgno);
1038   assert( pPager->state>=PAGER_EXCLUSIVE || pPg!=0 );
1039   TRACE3("PLAYBACK %d page %d\n", PAGERID(pPager), pgno);
1040   if( pPager->state>=PAGER_EXCLUSIVE && (pPg==0 || pPg->needSync==0) ){
1041     sqlite3OsSeek(&pPager->fd, (pgno-1)*(i64)pPager->pageSize);
1042     rc = sqlite3OsWrite(&pPager->fd, aData, pPager->pageSize);
1043     if( pPg ) pPg->dirty = 0;
1044   }
1045   if( pPg ){
1046     /* No page should ever be explicitly rolled back that is in use, except
1047     ** for page 1 which is held in use in order to keep the lock on the
1048     ** database active. However such a page may be rolled back as a result
1049     ** of an internal error resulting in an automatic call to
1050     ** sqlite3pager_rollback().
1051     */
1052     void *pData;
1053     /* assert( pPg->nRef==0 || pPg->pgno==1 ); */
1054     pData = PGHDR_TO_DATA(pPg);
1055     memcpy(pData, aData, pPager->pageSize);
1056     if( pPager->xDestructor ){  /*** FIX ME:  Should this be xReinit? ***/
1057       pPager->xDestructor(pData, pPager->pageSize);
1058     }
1059 #ifdef SQLITE_CHECK_PAGES
1060     pPg->pageHash = pager_pagehash(pPg);
1061 #endif
1062     CODEC(pPager, pData, pPg->pgno, 3);
1063   }
1064   return rc;
1065 }
1066 
1067 /*
1068 ** Parameter zMaster is the name of a master journal file. A single journal
1069 ** file that referred to the master journal file has just been rolled back.
1070 ** This routine checks if it is possible to delete the master journal file,
1071 ** and does so if it is.
1072 **
1073 ** The master journal file contains the names of all child journals.
1074 ** To tell if a master journal can be deleted, check to each of the
1075 ** children.  If all children are either missing or do not refer to
1076 ** a different master journal, then this master journal can be deleted.
1077 */
1078 static int pager_delmaster(const char *zMaster){
1079   int rc;
1080   int master_open = 0;
1081   OsFile master;
1082   char *zMasterJournal = 0; /* Contents of master journal file */
1083   i64 nMasterJournal;       /* Size of master journal file */
1084 
1085   /* Open the master journal file exclusively in case some other process
1086   ** is running this routine also. Not that it makes too much difference.
1087   */
1088   memset(&master, 0, sizeof(master));
1089   rc = sqlite3OsOpenReadOnly(zMaster, &master);
1090   if( rc!=SQLITE_OK ) goto delmaster_out;
1091   master_open = 1;
1092   rc = sqlite3OsFileSize(&master, &nMasterJournal);
1093   if( rc!=SQLITE_OK ) goto delmaster_out;
1094 
1095   if( nMasterJournal>0 ){
1096     char *zJournal;
1097     char *zMasterPtr = 0;
1098 
1099     /* Load the entire master journal file into space obtained from
1100     ** sqliteMalloc() and pointed to by zMasterJournal.
1101     */
1102     zMasterJournal = (char *)sqliteMalloc(nMasterJournal);
1103     if( !zMasterJournal ){
1104       rc = SQLITE_NOMEM;
1105       goto delmaster_out;
1106     }
1107     rc = sqlite3OsRead(&master, zMasterJournal, nMasterJournal);
1108     if( rc!=SQLITE_OK ) goto delmaster_out;
1109 
1110     zJournal = zMasterJournal;
1111     while( (zJournal-zMasterJournal)<nMasterJournal ){
1112       if( sqlite3OsFileExists(zJournal) ){
1113         /* One of the journals pointed to by the master journal exists.
1114         ** Open it and check if it points at the master journal. If
1115         ** so, return without deleting the master journal file.
1116         */
1117         OsFile journal;
1118         int c;
1119 
1120         memset(&journal, 0, sizeof(journal));
1121         rc = sqlite3OsOpenReadOnly(zJournal, &journal);
1122         if( rc!=SQLITE_OK ){
1123           goto delmaster_out;
1124         }
1125 
1126         rc = readMasterJournal(&journal, &zMasterPtr);
1127         sqlite3OsClose(&journal);
1128         if( rc!=SQLITE_OK ){
1129           goto delmaster_out;
1130         }
1131 
1132         c = zMasterPtr!=0 && strcmp(zMasterPtr, zMaster)==0;
1133         sqliteFree(zMasterPtr);
1134         if( c ){
1135           /* We have a match. Do not delete the master journal file. */
1136           goto delmaster_out;
1137         }
1138       }
1139       zJournal += (strlen(zJournal)+1);
1140     }
1141   }
1142 
1143   sqlite3OsDelete(zMaster);
1144 
1145 delmaster_out:
1146   if( zMasterJournal ){
1147     sqliteFree(zMasterJournal);
1148   }
1149   if( master_open ){
1150     sqlite3OsClose(&master);
1151   }
1152   return rc;
1153 }
1154 
1155 /*
1156 ** Make every page in the cache agree with what is on disk.  In other words,
1157 ** reread the disk to reset the state of the cache.
1158 **
1159 ** This routine is called after a rollback in which some of the dirty cache
1160 ** pages had never been written out to disk.  We need to roll back the
1161 ** cache content and the easiest way to do that is to reread the old content
1162 ** back from the disk.
1163 */
1164 static int pager_reload_cache(Pager *pPager){
1165   PgHdr *pPg;
1166   int rc = SQLITE_OK;
1167   for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
1168     char zBuf[SQLITE_MAX_PAGE_SIZE];
1169     if( !pPg->dirty ) continue;
1170     if( (int)pPg->pgno <= pPager->origDbSize ){
1171       sqlite3OsSeek(&pPager->fd, pPager->pageSize*(i64)(pPg->pgno-1));
1172       rc = sqlite3OsRead(&pPager->fd, zBuf, pPager->pageSize);
1173       TRACE3("REFETCH %d page %d\n", PAGERID(pPager), pPg->pgno);
1174       if( rc ) break;
1175       CODEC(pPager, zBuf, pPg->pgno, 2);
1176     }else{
1177       memset(zBuf, 0, pPager->pageSize);
1178     }
1179     if( pPg->nRef==0 || memcmp(zBuf, PGHDR_TO_DATA(pPg), pPager->pageSize) ){
1180       memcpy(PGHDR_TO_DATA(pPg), zBuf, pPager->pageSize);
1181       if( pPager->xReiniter ){
1182         pPager->xReiniter(PGHDR_TO_DATA(pPg), pPager->pageSize);
1183       }else{
1184         memset(PGHDR_TO_EXTRA(pPg, pPager), 0, pPager->nExtra);
1185       }
1186     }
1187     pPg->needSync = 0;
1188     pPg->dirty = 0;
1189 #ifdef SQLITE_CHECK_PAGES
1190     pPg->pageHash = pager_pagehash(pPg);
1191 #endif
1192   }
1193   return rc;
1194 }
1195 
1196 /*
1197 ** Truncate the main file of the given pager to the number of pages
1198 ** indicated.
1199 */
1200 static int pager_truncate(Pager *pPager, int nPage){
1201   assert( pPager->state>=PAGER_EXCLUSIVE );
1202   return sqlite3OsTruncate(&pPager->fd, pPager->pageSize*(i64)nPage);
1203 }
1204 
1205 /*
1206 ** Playback the journal and thus restore the database file to
1207 ** the state it was in before we started making changes.
1208 **
1209 ** The journal file format is as follows:
1210 **
1211 **  (1)  8 byte prefix.  A copy of aJournalMagic[].
1212 **  (2)  4 byte big-endian integer which is the number of valid page records
1213 **       in the journal.  If this value is 0xffffffff, then compute the
1214 **       number of page records from the journal size.
1215 **  (3)  4 byte big-endian integer which is the initial value for the
1216 **       sanity checksum.
1217 **  (4)  4 byte integer which is the number of pages to truncate the
1218 **       database to during a rollback.
1219 **  (5)  4 byte integer which is the number of bytes in the master journal
1220 **       name.  The value may be zero (indicate that there is no master
1221 **       journal.)
1222 **  (6)  N bytes of the master journal name.  The name will be nul-terminated
1223 **       and might be shorter than the value read from (5).  If the first byte
1224 **       of the name is \000 then there is no master journal.  The master
1225 **       journal name is stored in UTF-8.
1226 **  (7)  Zero or more pages instances, each as follows:
1227 **        +  4 byte page number.
1228 **        +  pPager->pageSize bytes of data.
1229 **        +  4 byte checksum
1230 **
1231 ** When we speak of the journal header, we mean the first 6 items above.
1232 ** Each entry in the journal is an instance of the 7th item.
1233 **
1234 ** Call the value from the second bullet "nRec".  nRec is the number of
1235 ** valid page entries in the journal.  In most cases, you can compute the
1236 ** value of nRec from the size of the journal file.  But if a power
1237 ** failure occurred while the journal was being written, it could be the
1238 ** case that the size of the journal file had already been increased but
1239 ** the extra entries had not yet made it safely to disk.  In such a case,
1240 ** the value of nRec computed from the file size would be too large.  For
1241 ** that reason, we always use the nRec value in the header.
1242 **
1243 ** If the nRec value is 0xffffffff it means that nRec should be computed
1244 ** from the file size.  This value is used when the user selects the
1245 ** no-sync option for the journal.  A power failure could lead to corruption
1246 ** in this case.  But for things like temporary table (which will be
1247 ** deleted when the power is restored) we don't care.
1248 **
1249 ** If the file opened as the journal file is not a well-formed
1250 ** journal file then all pages up to the first corrupted page are rolled
1251 ** back (or no pages if the journal header is corrupted). The journal file
1252 ** is then deleted and SQLITE_OK returned, just as if no corruption had
1253 ** been encountered.
1254 **
1255 ** If an I/O or malloc() error occurs, the journal-file is not deleted
1256 ** and an error code is returned.
1257 */
1258 static int pager_playback(Pager *pPager){
1259   i64 szJ;                 /* Size of the journal file in bytes */
1260   u32 nRec;                /* Number of Records in the journal */
1261   int i;                   /* Loop counter */
1262   Pgno mxPg = 0;           /* Size of the original file in pages */
1263   int rc;                  /* Result code of a subroutine */
1264   char *zMaster = 0;       /* Name of master journal file if any */
1265 
1266   /* Figure out how many records are in the journal.  Abort early if
1267   ** the journal is empty.
1268   */
1269   assert( pPager->journalOpen );
1270   rc = sqlite3OsFileSize(&pPager->jfd, &szJ);
1271   if( rc!=SQLITE_OK ){
1272     goto end_playback;
1273   }
1274 
1275   /* Read the master journal name from the journal, if it is present.
1276   ** If a master journal file name is specified, but the file is not
1277   ** present on disk, then the journal is not hot and does not need to be
1278   ** played back.
1279   */
1280   rc = readMasterJournal(&pPager->jfd, &zMaster);
1281   assert( rc!=SQLITE_DONE );
1282   if( rc!=SQLITE_OK || (zMaster && !sqlite3OsFileExists(zMaster)) ){
1283     sqliteFree(zMaster);
1284     zMaster = 0;
1285     if( rc==SQLITE_DONE ) rc = SQLITE_OK;
1286     goto end_playback;
1287   }
1288   sqlite3OsSeek(&pPager->jfd, 0);
1289   pPager->journalOff = 0;
1290 
1291   /* This loop terminates either when the readJournalHdr() call returns
1292   ** SQLITE_DONE or an IO error occurs. */
1293   while( 1 ){
1294 
1295     /* Read the next journal header from the journal file.  If there are
1296     ** not enough bytes left in the journal file for a complete header, or
1297     ** it is corrupted, then a process must of failed while writing it.
1298     ** This indicates nothing more needs to be rolled back.
1299     */
1300     rc = readJournalHdr(pPager, szJ, &nRec, &mxPg);
1301     if( rc!=SQLITE_OK ){
1302       if( rc==SQLITE_DONE ){
1303         rc = SQLITE_OK;
1304       }
1305       goto end_playback;
1306     }
1307 
1308     /* If nRec is 0xffffffff, then this journal was created by a process
1309     ** working in no-sync mode. This means that the rest of the journal
1310     ** file consists of pages, there are no more journal headers. Compute
1311     ** the value of nRec based on this assumption.
1312     */
1313     if( nRec==0xffffffff ){
1314       assert( pPager->journalOff==JOURNAL_HDR_SZ(pPager) );
1315       nRec = (szJ - JOURNAL_HDR_SZ(pPager))/JOURNAL_PG_SZ(pPager);
1316     }
1317 
1318     /* If this is the first header read from the journal, truncate the
1319     ** database file back to it's original size.
1320     */
1321     if( pPager->state>=PAGER_EXCLUSIVE &&
1322         pPager->journalOff==JOURNAL_HDR_SZ(pPager) ){
1323       assert( pPager->origDbSize==0 || pPager->origDbSize==mxPg );
1324       rc = pager_truncate(pPager, mxPg);
1325       if( rc!=SQLITE_OK ){
1326         goto end_playback;
1327       }
1328       pPager->dbSize = mxPg;
1329     }
1330 
1331     /* rc = sqlite3OsSeek(&pPager->jfd, JOURNAL_HDR_SZ(pPager)); */
1332     if( rc!=SQLITE_OK ) goto end_playback;
1333 
1334     /* Copy original pages out of the journal and back into the database file.
1335     */
1336     for(i=0; i<nRec; i++){
1337       rc = pager_playback_one_page(pPager, &pPager->jfd, 1);
1338       if( rc!=SQLITE_OK ){
1339         if( rc==SQLITE_DONE ){
1340           rc = SQLITE_OK;
1341           pPager->journalOff = szJ;
1342           break;
1343         }else{
1344           goto end_playback;
1345         }
1346       }
1347     }
1348   }
1349 
1350   /* Pages that have been written to the journal but never synced
1351   ** where not restored by the loop above.  We have to restore those
1352   ** pages by reading them back from the original database.
1353   */
1354   assert( rc==SQLITE_OK );
1355   pager_reload_cache(pPager);
1356 
1357 end_playback:
1358   if( rc==SQLITE_OK ){
1359     rc = pager_unwritelock(pPager);
1360   }
1361   if( zMaster ){
1362     /* If there was a master journal and this routine will return true,
1363     ** see if it is possible to delete the master journal.
1364     */
1365     if( rc==SQLITE_OK ){
1366       rc = pager_delmaster(zMaster);
1367     }
1368     sqliteFree(zMaster);
1369   }
1370 
1371   /* The Pager.sectorSize variable may have been updated while rolling
1372   ** back a journal created by a process with a different PAGER_SECTOR_SIZE
1373   ** value. Reset it to the correct value for this process.
1374   */
1375   pPager->sectorSize = PAGER_SECTOR_SIZE;
1376   return rc;
1377 }
1378 
1379 /*
1380 ** Playback the statement journal.
1381 **
1382 ** This is similar to playing back the transaction journal but with
1383 ** a few extra twists.
1384 **
1385 **    (1)  The number of pages in the database file at the start of
1386 **         the statement is stored in pPager->stmtSize, not in the
1387 **         journal file itself.
1388 **
1389 **    (2)  In addition to playing back the statement journal, also
1390 **         playback all pages of the transaction journal beginning
1391 **         at offset pPager->stmtJSize.
1392 */
1393 static int pager_stmt_playback(Pager *pPager){
1394   i64 szJ;                 /* Size of the full journal */
1395   i64 hdrOff;
1396   int nRec;                /* Number of Records */
1397   int i;                   /* Loop counter */
1398   int rc;
1399 
1400   szJ = pPager->journalOff;
1401 #ifndef NDEBUG
1402   {
1403     i64 os_szJ;
1404     rc = sqlite3OsFileSize(&pPager->jfd, &os_szJ);
1405     if( rc!=SQLITE_OK ) return rc;
1406     assert( szJ==os_szJ );
1407   }
1408 #endif
1409 
1410   /* Set hdrOff to be the offset to the first journal header written
1411   ** this statement transaction, or the end of the file if no journal
1412   ** header was written.
1413   */
1414   hdrOff = pPager->stmtHdrOff;
1415   assert( pPager->fullSync || !hdrOff );
1416   if( !hdrOff ){
1417     hdrOff = szJ;
1418   }
1419 
1420   /* Truncate the database back to its original size.
1421   */
1422   if( pPager->state>=PAGER_EXCLUSIVE ){
1423     rc = pager_truncate(pPager, pPager->stmtSize);
1424   }
1425   pPager->dbSize = pPager->stmtSize;
1426 
1427   /* Figure out how many records are in the statement journal.
1428   */
1429   assert( pPager->stmtInUse && pPager->journalOpen );
1430   sqlite3OsSeek(&pPager->stfd, 0);
1431   nRec = pPager->stmtNRec;
1432 
1433   /* Copy original pages out of the statement journal and back into the
1434   ** database file.  Note that the statement journal omits checksums from
1435   ** each record since power-failure recovery is not important to statement
1436   ** journals.
1437   */
1438   for(i=nRec-1; i>=0; i--){
1439     rc = pager_playback_one_page(pPager, &pPager->stfd, 0);
1440     assert( rc!=SQLITE_DONE );
1441     if( rc!=SQLITE_OK ) goto end_stmt_playback;
1442   }
1443 
1444   /* Now roll some pages back from the transaction journal. Pager.stmtJSize
1445   ** was the size of the journal file when this statement was started, so
1446   ** everything after that needs to be rolled back, either into the
1447   ** database, the memory cache, or both.
1448   **
1449   ** If it is not zero, then Pager.stmtHdrOff is the offset to the start
1450   ** of the first journal header written during this statement transaction.
1451   */
1452   rc = sqlite3OsSeek(&pPager->jfd, pPager->stmtJSize);
1453   if( rc!=SQLITE_OK ){
1454     goto end_stmt_playback;
1455   }
1456   pPager->journalOff = pPager->stmtJSize;
1457   pPager->cksumInit = pPager->stmtCksum;
1458   assert( JOURNAL_HDR_SZ(pPager)<(pPager->pageSize+8) );
1459   while( pPager->journalOff <= (hdrOff-(pPager->pageSize+8)) ){
1460     rc = pager_playback_one_page(pPager, &pPager->jfd, 1);
1461     assert( rc!=SQLITE_DONE );
1462     if( rc!=SQLITE_OK ) goto end_stmt_playback;
1463   }
1464 
1465   while( pPager->journalOff < szJ ){
1466     u32 nRec;
1467     u32 dummy;
1468     rc = readJournalHdr(pPager, szJ, &nRec, &dummy);
1469     if( rc!=SQLITE_OK ){
1470       assert( rc!=SQLITE_DONE );
1471       goto end_stmt_playback;
1472     }
1473     if( nRec==0 ){
1474       nRec = (szJ - pPager->journalOff) / (pPager->pageSize+8);
1475     }
1476     for(i=nRec-1; i>=0 && pPager->journalOff < szJ; i--){
1477       rc = pager_playback_one_page(pPager, &pPager->jfd, 1);
1478       assert( rc!=SQLITE_DONE );
1479       if( rc!=SQLITE_OK ) goto end_stmt_playback;
1480     }
1481   }
1482 
1483   pPager->journalOff = szJ;
1484 
1485 end_stmt_playback:
1486   if( rc!=SQLITE_OK ){
1487     pPager->errMask |= PAGER_ERR_CORRUPT;
1488     rc = SQLITE_CORRUPT;  /* bkpt-CORRUPT */
1489   }else{
1490     pPager->journalOff = szJ;
1491     /* pager_reload_cache(pPager); */
1492   }
1493   return rc;
1494 }
1495 
1496 /*
1497 ** Change the maximum number of in-memory pages that are allowed.
1498 */
1499 void sqlite3pager_set_cachesize(Pager *pPager, int mxPage){
1500   if( mxPage>10 ){
1501     pPager->mxPage = mxPage;
1502   }else{
1503     pPager->mxPage = 10;
1504   }
1505 }
1506 
1507 /*
1508 ** Adjust the robustness of the database to damage due to OS crashes
1509 ** or power failures by changing the number of syncs()s when writing
1510 ** the rollback journal.  There are three levels:
1511 **
1512 **    OFF       sqlite3OsSync() is never called.  This is the default
1513 **              for temporary and transient files.
1514 **
1515 **    NORMAL    The journal is synced once before writes begin on the
1516 **              database.  This is normally adequate protection, but
1517 **              it is theoretically possible, though very unlikely,
1518 **              that an inopertune power failure could leave the journal
1519 **              in a state which would cause damage to the database
1520 **              when it is rolled back.
1521 **
1522 **    FULL      The journal is synced twice before writes begin on the
1523 **              database (with some additional information - the nRec field
1524 **              of the journal header - being written in between the two
1525 **              syncs).  If we assume that writing a
1526 **              single disk sector is atomic, then this mode provides
1527 **              assurance that the journal will not be corrupted to the
1528 **              point of causing damage to the database during rollback.
1529 **
1530 ** Numeric values associated with these states are OFF==1, NORMAL=2,
1531 ** and FULL=3.
1532 */
1533 #ifndef SQLITE_OMIT_PAGER_PRAGMAS
1534 void sqlite3pager_set_safety_level(Pager *pPager, int level){
1535   pPager->noSync =  level==1 || pPager->tempFile;
1536   pPager->fullSync = level==3 && !pPager->tempFile;
1537   if( pPager->noSync ) pPager->needSync = 0;
1538 }
1539 #endif
1540 
1541 /*
1542 ** The following global variable is incremented whenever the library
1543 ** attempts to open a temporary file.  This information is used for
1544 ** testing and analysis only.
1545 */
1546 int sqlite3_opentemp_count = 0;
1547 
1548 /*
1549 ** Open a temporary file.  Write the name of the file into zFile
1550 ** (zFile must be at least SQLITE_TEMPNAME_SIZE bytes long.)  Write
1551 ** the file descriptor into *fd.  Return SQLITE_OK on success or some
1552 ** other error code if we fail.
1553 **
1554 ** The OS will automatically delete the temporary file when it is
1555 ** closed.
1556 */
1557 static int sqlite3pager_opentemp(char *zFile, OsFile *fd){
1558   int cnt = 8;
1559   int rc;
1560   sqlite3_opentemp_count++;  /* Used for testing and analysis only */
1561   do{
1562     cnt--;
1563     sqlite3OsTempFileName(zFile);
1564     rc = sqlite3OsOpenExclusive(zFile, fd, 1);
1565   }while( cnt>0 && rc!=SQLITE_OK && rc!=SQLITE_NOMEM );
1566   return rc;
1567 }
1568 
1569 /*
1570 ** Create a new page cache and put a pointer to the page cache in *ppPager.
1571 ** The file to be cached need not exist.  The file is not locked until
1572 ** the first call to sqlite3pager_get() and is only held open until the
1573 ** last page is released using sqlite3pager_unref().
1574 **
1575 ** If zFilename is NULL then a randomly-named temporary file is created
1576 ** and used as the file to be cached.  The file will be deleted
1577 ** automatically when it is closed.
1578 **
1579 ** If zFilename is ":memory:" then all information is held in cache.
1580 ** It is never written to disk.  This can be used to implement an
1581 ** in-memory database.
1582 */
1583 int sqlite3pager_open(
1584   Pager **ppPager,         /* Return the Pager structure here */
1585   const char *zFilename,   /* Name of the database file to open */
1586   int nExtra,              /* Extra bytes append to each in-memory page */
1587   int flags                /* flags controlling this file */
1588 ){
1589   Pager *pPager;
1590   char *zFullPathname = 0;
1591   int nameLen;
1592   OsFile fd;
1593   int rc = SQLITE_OK;
1594   int i;
1595   int tempFile = 0;
1596   int memDb = 0;
1597   int readOnly = 0;
1598   int useJournal = (flags & PAGER_OMIT_JOURNAL)==0;
1599   int noReadlock = (flags & PAGER_NO_READLOCK)!=0;
1600   char zTemp[SQLITE_TEMPNAME_SIZE];
1601 
1602   *ppPager = 0;
1603   memset(&fd, 0, sizeof(fd));
1604   if( sqlite3_malloc_failed ){
1605     return SQLITE_NOMEM;
1606   }
1607   if( zFilename && zFilename[0] ){
1608 #ifndef SQLITE_OMIT_MEMORYDB
1609     if( strcmp(zFilename,":memory:")==0 ){
1610       memDb = 1;
1611       zFullPathname = sqliteStrDup("");
1612       rc = SQLITE_OK;
1613     }else
1614 #endif
1615     {
1616       zFullPathname = sqlite3OsFullPathname(zFilename);
1617       if( zFullPathname ){
1618         rc = sqlite3OsOpenReadWrite(zFullPathname, &fd, &readOnly);
1619       }
1620     }
1621   }else{
1622     rc = sqlite3pager_opentemp(zTemp, &fd);
1623     zFilename = zTemp;
1624     zFullPathname = sqlite3OsFullPathname(zFilename);
1625     if( rc==SQLITE_OK ){
1626       tempFile = 1;
1627     }
1628   }
1629   if( !zFullPathname ){
1630     sqlite3OsClose(&fd);
1631     return SQLITE_NOMEM;
1632   }
1633   if( rc!=SQLITE_OK ){
1634     sqlite3OsClose(&fd);
1635     sqliteFree(zFullPathname);
1636     return rc;
1637   }
1638   nameLen = strlen(zFullPathname);
1639   pPager = sqliteMalloc( sizeof(*pPager) + nameLen*3 + 30 );
1640   if( pPager==0 ){
1641     sqlite3OsClose(&fd);
1642     sqliteFree(zFullPathname);
1643     return SQLITE_NOMEM;
1644   }
1645   TRACE3("OPEN %d %s\n", FILEHANDLEID(fd), zFullPathname);
1646   pPager->zFilename = (char*)&pPager[1];
1647   pPager->zDirectory = &pPager->zFilename[nameLen+1];
1648   pPager->zJournal = &pPager->zDirectory[nameLen+1];
1649   strcpy(pPager->zFilename, zFullPathname);
1650   strcpy(pPager->zDirectory, zFullPathname);
1651   for(i=nameLen; i>0 && pPager->zDirectory[i-1]!='/'; i--){}
1652   if( i>0 ) pPager->zDirectory[i-1] = 0;
1653   strcpy(pPager->zJournal, zFullPathname);
1654   sqliteFree(zFullPathname);
1655   strcpy(&pPager->zJournal[nameLen], "-journal");
1656   pPager->fd = fd;
1657 #if OS_UNIX
1658   pPager->fd.pPager = pPager;
1659 #endif
1660   pPager->journalOpen = 0;
1661   pPager->useJournal = useJournal && !memDb;
1662   pPager->noReadlock = noReadlock && readOnly;
1663   pPager->stmtOpen = 0;
1664   pPager->stmtInUse = 0;
1665   pPager->nRef = 0;
1666   pPager->dbSize = memDb-1;
1667   pPager->pageSize = SQLITE_DEFAULT_PAGE_SIZE;
1668   pPager->stmtSize = 0;
1669   pPager->stmtJSize = 0;
1670   pPager->nPage = 0;
1671   pPager->nMaxPage = 0;
1672   pPager->mxPage = 100;
1673   pPager->state = PAGER_UNLOCK;
1674   pPager->errMask = 0;
1675   pPager->tempFile = tempFile;
1676   pPager->memDb = memDb;
1677   pPager->readOnly = readOnly;
1678   pPager->needSync = 0;
1679   pPager->noSync = pPager->tempFile || !useJournal;
1680   pPager->fullSync = (pPager->noSync?0:1);
1681   pPager->pFirst = 0;
1682   pPager->pFirstSynced = 0;
1683   pPager->pLast = 0;
1684   pPager->nExtra = FORCE_ALIGNMENT(nExtra);
1685   pPager->sectorSize = PAGER_SECTOR_SIZE;
1686   pPager->pBusyHandler = 0;
1687   memset(pPager->aHash, 0, sizeof(pPager->aHash));
1688   *ppPager = pPager;
1689   return SQLITE_OK;
1690 }
1691 
1692 /*
1693 ** Set the busy handler function.
1694 */
1695 void sqlite3pager_set_busyhandler(Pager *pPager, BusyHandler *pBusyHandler){
1696   pPager->pBusyHandler = pBusyHandler;
1697 }
1698 
1699 /*
1700 ** Set the destructor for this pager.  If not NULL, the destructor is called
1701 ** when the reference count on each page reaches zero.  The destructor can
1702 ** be used to clean up information in the extra segment appended to each page.
1703 **
1704 ** The destructor is not called as a result sqlite3pager_close().
1705 ** Destructors are only called by sqlite3pager_unref().
1706 */
1707 void sqlite3pager_set_destructor(Pager *pPager, void (*xDesc)(void*,int)){
1708   pPager->xDestructor = xDesc;
1709 }
1710 
1711 /*
1712 ** Set the reinitializer for this pager.  If not NULL, the reinitializer
1713 ** is called when the content of a page in cache is restored to its original
1714 ** value as a result of a rollback.  The callback gives higher-level code
1715 ** an opportunity to restore the EXTRA section to agree with the restored
1716 ** page data.
1717 */
1718 void sqlite3pager_set_reiniter(Pager *pPager, void (*xReinit)(void*,int)){
1719   pPager->xReiniter = xReinit;
1720 }
1721 
1722 /*
1723 ** Set the page size.  Return the new size.  If the suggest new page
1724 ** size is inappropriate, then an alternative page size is selected
1725 ** and returned.
1726 */
1727 int sqlite3pager_set_pagesize(Pager *pPager, int pageSize){
1728   assert( pageSize>=512 && pageSize<=SQLITE_MAX_PAGE_SIZE );
1729   if( !pPager->memDb ){
1730     pPager->pageSize = pageSize;
1731   }
1732   return pPager->pageSize;
1733 }
1734 
1735 /*
1736 ** Read the first N bytes from the beginning of the file into memory
1737 ** that pDest points to.  No error checking is done.
1738 */
1739 void sqlite3pager_read_fileheader(Pager *pPager, int N, unsigned char *pDest){
1740   memset(pDest, 0, N);
1741   if( MEMDB==0 ){
1742     sqlite3OsSeek(&pPager->fd, 0);
1743     sqlite3OsRead(&pPager->fd, pDest, N);
1744   }
1745 }
1746 
1747 /*
1748 ** Return the total number of pages in the disk file associated with
1749 ** pPager.
1750 */
1751 int sqlite3pager_pagecount(Pager *pPager){
1752   i64 n;
1753   assert( pPager!=0 );
1754   if( pPager->dbSize>=0 ){
1755     return pPager->dbSize;
1756   }
1757   if( sqlite3OsFileSize(&pPager->fd, &n)!=SQLITE_OK ){
1758     pPager->errMask |= PAGER_ERR_DISK;
1759     return 0;
1760   }
1761   n /= pPager->pageSize;
1762   if( !MEMDB && n==PENDING_BYTE/pPager->pageSize ){
1763     n++;
1764   }
1765   if( pPager->state!=PAGER_UNLOCK ){
1766     pPager->dbSize = n;
1767   }
1768   return n;
1769 }
1770 
1771 /*
1772 ** Forward declaration
1773 */
1774 static int syncJournal(Pager*);
1775 
1776 
1777 /*
1778 ** Unlink pPg from it's hash chain. Also set the page number to 0 to indicate
1779 ** that the page is not part of any hash chain. This is required because the
1780 ** sqlite3pager_movepage() routine can leave a page in the
1781 ** pNextFree/pPrevFree list that is not a part of any hash-chain.
1782 */
1783 static void unlinkHashChain(Pager *pPager, PgHdr *pPg){
1784   if( pPg->pgno==0 ){
1785     /* If the page number is zero, then this page is not in any hash chain. */
1786     return;
1787   }
1788   if( pPg->pNextHash ){
1789     pPg->pNextHash->pPrevHash = pPg->pPrevHash;
1790   }
1791   if( pPg->pPrevHash ){
1792     assert( pPager->aHash[pager_hash(pPg->pgno)]!=pPg );
1793     pPg->pPrevHash->pNextHash = pPg->pNextHash;
1794   }else{
1795     int h = pager_hash(pPg->pgno);
1796     assert( pPager->aHash[h]==pPg );
1797     pPager->aHash[h] = pPg->pNextHash;
1798   }
1799 
1800   pPg->pgno = 0;
1801   pPg->pNextHash = pPg->pPrevHash = 0;
1802 }
1803 
1804 /*
1805 ** Unlink a page from the free list (the list of all pages where nRef==0)
1806 ** and from its hash collision chain.
1807 */
1808 static void unlinkPage(PgHdr *pPg){
1809   Pager *pPager = pPg->pPager;
1810 
1811   /* Keep the pFirstSynced pointer pointing at the first synchronized page */
1812   if( pPg==pPager->pFirstSynced ){
1813     PgHdr *p = pPg->pNextFree;
1814     while( p && p->needSync ){ p = p->pNextFree; }
1815     pPager->pFirstSynced = p;
1816   }
1817 
1818   /* Unlink from the freelist */
1819   if( pPg->pPrevFree ){
1820     pPg->pPrevFree->pNextFree = pPg->pNextFree;
1821   }else{
1822     assert( pPager->pFirst==pPg );
1823     pPager->pFirst = pPg->pNextFree;
1824   }
1825   if( pPg->pNextFree ){
1826     pPg->pNextFree->pPrevFree = pPg->pPrevFree;
1827   }else{
1828     assert( pPager->pLast==pPg );
1829     pPager->pLast = pPg->pPrevFree;
1830   }
1831   pPg->pNextFree = pPg->pPrevFree = 0;
1832 
1833   /* Unlink from the pgno hash table */
1834   unlinkHashChain(pPager, pPg);
1835 }
1836 
1837 #ifndef SQLITE_OMIT_MEMORYDB
1838 /*
1839 ** This routine is used to truncate an in-memory database.  Delete
1840 ** all pages whose pgno is larger than pPager->dbSize and is unreferenced.
1841 ** Referenced pages larger than pPager->dbSize are zeroed.
1842 */
1843 static void memoryTruncate(Pager *pPager){
1844   PgHdr *pPg;
1845   PgHdr **ppPg;
1846   int dbSize = pPager->dbSize;
1847 
1848   ppPg = &pPager->pAll;
1849   while( (pPg = *ppPg)!=0 ){
1850     if( pPg->pgno<=dbSize ){
1851       ppPg = &pPg->pNextAll;
1852     }else if( pPg->nRef>0 ){
1853       memset(PGHDR_TO_DATA(pPg), 0, pPager->pageSize);
1854       ppPg = &pPg->pNextAll;
1855     }else{
1856       *ppPg = pPg->pNextAll;
1857       unlinkPage(pPg);
1858       sqliteFree(pPg);
1859       pPager->nPage--;
1860     }
1861   }
1862 }
1863 #else
1864 #define memoryTruncate(p)
1865 #endif
1866 
1867 /*
1868 ** Try to obtain a lock on a file.  Invoke the busy callback if the lock
1869 ** is currently not available.  Repeat until the busy callback returns
1870 ** false or until the lock succeeds.
1871 **
1872 ** Return SQLITE_OK on success and an error code if we cannot obtain
1873 ** the lock.
1874 */
1875 static int pager_wait_on_lock(Pager *pPager, int locktype){
1876   int rc;
1877   assert( PAGER_SHARED==SHARED_LOCK );
1878   assert( PAGER_RESERVED==RESERVED_LOCK );
1879   assert( PAGER_EXCLUSIVE==EXCLUSIVE_LOCK );
1880   if( pPager->state>=locktype ){
1881     rc = SQLITE_OK;
1882   }else{
1883     do {
1884       rc = sqlite3OsLock(&pPager->fd, locktype);
1885     }while( rc==SQLITE_BUSY && sqlite3InvokeBusyHandler(pPager->pBusyHandler) );
1886     if( rc==SQLITE_OK ){
1887       pPager->state = locktype;
1888     }
1889   }
1890   return rc;
1891 }
1892 
1893 /*
1894 ** Truncate the file to the number of pages specified.
1895 */
1896 int sqlite3pager_truncate(Pager *pPager, Pgno nPage){
1897   int rc;
1898   sqlite3pager_pagecount(pPager);
1899   if( pPager->errMask!=0 ){
1900     rc = pager_errcode(pPager);
1901     return rc;
1902   }
1903   if( nPage>=(unsigned)pPager->dbSize ){
1904     return SQLITE_OK;
1905   }
1906   if( MEMDB ){
1907     pPager->dbSize = nPage;
1908     memoryTruncate(pPager);
1909     return SQLITE_OK;
1910   }
1911   rc = syncJournal(pPager);
1912   if( rc!=SQLITE_OK ){
1913     return rc;
1914   }
1915 
1916   /* Get an exclusive lock on the database before truncating. */
1917   rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
1918   if( rc!=SQLITE_OK ){
1919     return rc;
1920   }
1921 
1922   rc = pager_truncate(pPager, nPage);
1923   if( rc==SQLITE_OK ){
1924     pPager->dbSize = nPage;
1925   }
1926   return rc;
1927 }
1928 
1929 /*
1930 ** Shutdown the page cache.  Free all memory and close all files.
1931 **
1932 ** If a transaction was in progress when this routine is called, that
1933 ** transaction is rolled back.  All outstanding pages are invalidated
1934 ** and their memory is freed.  Any attempt to use a page associated
1935 ** with this page cache after this function returns will likely
1936 ** result in a coredump.
1937 */
1938 int sqlite3pager_close(Pager *pPager){
1939   PgHdr *pPg, *pNext;
1940   switch( pPager->state ){
1941     case PAGER_RESERVED:
1942     case PAGER_SYNCED:
1943     case PAGER_EXCLUSIVE: {
1944       /* We ignore any IO errors that occur during the rollback
1945       ** operation. So disable IO error simulation so that testing
1946       ** works more easily.
1947       */
1948 #if defined(SQLITE_TEST) && (defined(OS_UNIX) || defined(OS_WIN))
1949       extern int sqlite3_io_error_pending;
1950       int ioerr_cnt = sqlite3_io_error_pending;
1951       sqlite3_io_error_pending = -1;
1952 #endif
1953       sqlite3pager_rollback(pPager);
1954 #if defined(SQLITE_TEST) && (defined(OS_UNIX) || defined(OS_WIN))
1955       sqlite3_io_error_pending = ioerr_cnt;
1956 #endif
1957       if( !MEMDB ){
1958         sqlite3OsUnlock(&pPager->fd, NO_LOCK);
1959       }
1960       assert( pPager->errMask || pPager->journalOpen==0 );
1961       break;
1962     }
1963     case PAGER_SHARED: {
1964       if( !MEMDB ){
1965         sqlite3OsUnlock(&pPager->fd, NO_LOCK);
1966       }
1967       break;
1968     }
1969     default: {
1970       /* Do nothing */
1971       break;
1972     }
1973   }
1974   for(pPg=pPager->pAll; pPg; pPg=pNext){
1975 #ifndef NDEBUG
1976     if( MEMDB ){
1977       PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
1978       assert( !pPg->alwaysRollback );
1979       assert( !pHist->pOrig );
1980       assert( !pHist->pStmt );
1981     }
1982 #endif
1983     pNext = pPg->pNextAll;
1984     sqliteFree(pPg);
1985   }
1986   TRACE2("CLOSE %d\n", PAGERID(pPager));
1987   assert( pPager->errMask || (pPager->journalOpen==0 && pPager->stmtOpen==0) );
1988   if( pPager->journalOpen ){
1989     sqlite3OsClose(&pPager->jfd);
1990   }
1991   sqliteFree(pPager->aInJournal);
1992   if( pPager->stmtOpen ){
1993     sqlite3OsClose(&pPager->stfd);
1994   }
1995   sqlite3OsClose(&pPager->fd);
1996   /* Temp files are automatically deleted by the OS
1997   ** if( pPager->tempFile ){
1998   **   sqlite3OsDelete(pPager->zFilename);
1999   ** }
2000   */
2001 
2002   sqliteFree(pPager);
2003   return SQLITE_OK;
2004 }
2005 
2006 /*
2007 ** Return the page number for the given page data.
2008 */
2009 Pgno sqlite3pager_pagenumber(void *pData){
2010   PgHdr *p = DATA_TO_PGHDR(pData);
2011   return p->pgno;
2012 }
2013 
2014 /*
2015 ** The page_ref() function increments the reference count for a page.
2016 ** If the page is currently on the freelist (the reference count is zero) then
2017 ** remove it from the freelist.
2018 **
2019 ** For non-test systems, page_ref() is a macro that calls _page_ref()
2020 ** online of the reference count is zero.  For test systems, page_ref()
2021 ** is a real function so that we can set breakpoints and trace it.
2022 */
2023 static void _page_ref(PgHdr *pPg){
2024   if( pPg->nRef==0 ){
2025     /* The page is currently on the freelist.  Remove it. */
2026     if( pPg==pPg->pPager->pFirstSynced ){
2027       PgHdr *p = pPg->pNextFree;
2028       while( p && p->needSync ){ p = p->pNextFree; }
2029       pPg->pPager->pFirstSynced = p;
2030     }
2031     if( pPg->pPrevFree ){
2032       pPg->pPrevFree->pNextFree = pPg->pNextFree;
2033     }else{
2034       pPg->pPager->pFirst = pPg->pNextFree;
2035     }
2036     if( pPg->pNextFree ){
2037       pPg->pNextFree->pPrevFree = pPg->pPrevFree;
2038     }else{
2039       pPg->pPager->pLast = pPg->pPrevFree;
2040     }
2041     pPg->pPager->nRef++;
2042   }
2043   pPg->nRef++;
2044   REFINFO(pPg);
2045 }
2046 #ifdef SQLITE_DEBUG
2047   static void page_ref(PgHdr *pPg){
2048     if( pPg->nRef==0 ){
2049       _page_ref(pPg);
2050     }else{
2051       pPg->nRef++;
2052       REFINFO(pPg);
2053     }
2054   }
2055 #else
2056 # define page_ref(P)   ((P)->nRef==0?_page_ref(P):(void)(P)->nRef++)
2057 #endif
2058 
2059 /*
2060 ** Increment the reference count for a page.  The input pointer is
2061 ** a reference to the page data.
2062 */
2063 int sqlite3pager_ref(void *pData){
2064   PgHdr *pPg = DATA_TO_PGHDR(pData);
2065   page_ref(pPg);
2066   return SQLITE_OK;
2067 }
2068 
2069 /*
2070 ** Sync the journal.  In other words, make sure all the pages that have
2071 ** been written to the journal have actually reached the surface of the
2072 ** disk.  It is not safe to modify the original database file until after
2073 ** the journal has been synced.  If the original database is modified before
2074 ** the journal is synced and a power failure occurs, the unsynced journal
2075 ** data would be lost and we would be unable to completely rollback the
2076 ** database changes.  Database corruption would occur.
2077 **
2078 ** This routine also updates the nRec field in the header of the journal.
2079 ** (See comments on the pager_playback() routine for additional information.)
2080 ** If the sync mode is FULL, two syncs will occur.  First the whole journal
2081 ** is synced, then the nRec field is updated, then a second sync occurs.
2082 **
2083 ** For temporary databases, we do not care if we are able to rollback
2084 ** after a power failure, so sync occurs.
2085 **
2086 ** This routine clears the needSync field of every page current held in
2087 ** memory.
2088 */
2089 static int syncJournal(Pager *pPager){
2090   PgHdr *pPg;
2091   int rc = SQLITE_OK;
2092 
2093   /* Sync the journal before modifying the main database
2094   ** (assuming there is a journal and it needs to be synced.)
2095   */
2096   if( pPager->needSync ){
2097     if( !pPager->tempFile ){
2098       assert( pPager->journalOpen );
2099       /* assert( !pPager->noSync ); // noSync might be set if synchronous
2100       ** was turned off after the transaction was started.  Ticket #615 */
2101 #ifndef NDEBUG
2102       {
2103         /* Make sure the pPager->nRec counter we are keeping agrees
2104         ** with the nRec computed from the size of the journal file.
2105         */
2106         i64 jSz;
2107         rc = sqlite3OsFileSize(&pPager->jfd, &jSz);
2108         if( rc!=0 ) return rc;
2109         assert( pPager->journalOff==jSz );
2110       }
2111 #endif
2112       {
2113         /* Write the nRec value into the journal file header. If in
2114         ** full-synchronous mode, sync the journal first. This ensures that
2115         ** all data has really hit the disk before nRec is updated to mark
2116         ** it as a candidate for rollback.
2117         */
2118         if( pPager->fullSync ){
2119           TRACE2("SYNC journal of %d\n", PAGERID(pPager));
2120           rc = sqlite3OsSync(&pPager->jfd);
2121           if( rc!=0 ) return rc;
2122         }
2123         sqlite3OsSeek(&pPager->jfd, pPager->journalHdr + sizeof(aJournalMagic));
2124         rc = write32bits(&pPager->jfd, pPager->nRec);
2125         if( rc ) return rc;
2126 
2127         sqlite3OsSeek(&pPager->jfd, pPager->journalOff);
2128       }
2129       TRACE2("SYNC journal of %d\n", PAGERID(pPager));
2130       rc = sqlite3OsSync(&pPager->jfd);
2131       if( rc!=0 ) return rc;
2132       pPager->journalStarted = 1;
2133     }
2134     pPager->needSync = 0;
2135 
2136     /* Erase the needSync flag from every page.
2137     */
2138     for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
2139       pPg->needSync = 0;
2140     }
2141     pPager->pFirstSynced = pPager->pFirst;
2142   }
2143 
2144 #ifndef NDEBUG
2145   /* If the Pager.needSync flag is clear then the PgHdr.needSync
2146   ** flag must also be clear for all pages.  Verify that this
2147   ** invariant is true.
2148   */
2149   else{
2150     for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
2151       assert( pPg->needSync==0 );
2152     }
2153     assert( pPager->pFirstSynced==pPager->pFirst );
2154   }
2155 #endif
2156 
2157   return rc;
2158 }
2159 
2160 /*
2161 ** Given a list of pages (connected by the PgHdr.pDirty pointer) write
2162 ** every one of those pages out to the database file and mark them all
2163 ** as clean.
2164 */
2165 static int pager_write_pagelist(PgHdr *pList){
2166   Pager *pPager;
2167   int rc;
2168 
2169   if( pList==0 ) return SQLITE_OK;
2170   pPager = pList->pPager;
2171 
2172   /* At this point there may be either a RESERVED or EXCLUSIVE lock on the
2173   ** database file. If there is already an EXCLUSIVE lock, the following
2174   ** calls to sqlite3OsLock() are no-ops.
2175   **
2176   ** Moving the lock from RESERVED to EXCLUSIVE actually involves going
2177   ** through an intermediate state PENDING.   A PENDING lock prevents new
2178   ** readers from attaching to the database but is unsufficient for us to
2179   ** write.  The idea of a PENDING lock is to prevent new readers from
2180   ** coming in while we wait for existing readers to clear.
2181   **
2182   ** While the pager is in the RESERVED state, the original database file
2183   ** is unchanged and we can rollback without having to playback the
2184   ** journal into the original database file.  Once we transition to
2185   ** EXCLUSIVE, it means the database file has been changed and any rollback
2186   ** will require a journal playback.
2187   */
2188   rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
2189   if( rc!=SQLITE_OK ){
2190     return rc;
2191   }
2192 
2193   while( pList ){
2194     assert( pList->dirty );
2195     sqlite3OsSeek(&pPager->fd, (pList->pgno-1)*(i64)pPager->pageSize);
2196     /* If there are dirty pages in the page cache with page numbers greater
2197     ** than Pager.dbSize, this means sqlite3pager_truncate() was called to
2198     ** make the file smaller (presumably by auto-vacuum code). Do not write
2199     ** any such pages to the file.
2200     */
2201     if( pList->pgno<=pPager->dbSize ){
2202       CODEC(pPager, PGHDR_TO_DATA(pList), pList->pgno, 6);
2203       TRACE3("STORE %d page %d\n", PAGERID(pPager), pList->pgno);
2204       rc = sqlite3OsWrite(&pPager->fd, PGHDR_TO_DATA(pList), pPager->pageSize);
2205       CODEC(pPager, PGHDR_TO_DATA(pList), pList->pgno, 0);
2206       TEST_INCR(pPager->nWrite);
2207     }
2208 #ifndef NDEBUG
2209     else{
2210       TRACE3("NOSTORE %d page %d\n", PAGERID(pPager), pList->pgno);
2211     }
2212 #endif
2213     if( rc ) return rc;
2214     pList->dirty = 0;
2215 #ifdef SQLITE_CHECK_PAGES
2216     pList->pageHash = pager_pagehash(pList);
2217 #endif
2218     pList = pList->pDirty;
2219   }
2220   return SQLITE_OK;
2221 }
2222 
2223 /*
2224 ** Collect every dirty page into a dirty list and
2225 ** return a pointer to the head of that list.  All pages are
2226 ** collected even if they are still in use.
2227 */
2228 static PgHdr *pager_get_all_dirty_pages(Pager *pPager){
2229   PgHdr *p, *pList;
2230   pList = 0;
2231   for(p=pPager->pAll; p; p=p->pNextAll){
2232     if( p->dirty ){
2233       p->pDirty = pList;
2234       pList = p;
2235     }
2236   }
2237   return pList;
2238 }
2239 
2240 /*
2241 ** Return TRUE if there is a hot journal on the given pager.
2242 ** A hot journal is one that needs to be played back.
2243 **
2244 ** If the current size of the database file is 0 but a journal file
2245 ** exists, that is probably an old journal left over from a prior
2246 ** database with the same name.  Just delete the journal.
2247 */
2248 static int hasHotJournal(Pager *pPager){
2249   if( !pPager->useJournal ) return 0;
2250   if( !sqlite3OsFileExists(pPager->zJournal) ) return 0;
2251   if( sqlite3OsCheckReservedLock(&pPager->fd) ) return 0;
2252   if( sqlite3pager_pagecount(pPager)==0 ){
2253     sqlite3OsDelete(pPager->zJournal);
2254     return 0;
2255   }else{
2256     return 1;
2257   }
2258 }
2259 
2260 /*
2261 ** Acquire a page.
2262 **
2263 ** A read lock on the disk file is obtained when the first page is acquired.
2264 ** This read lock is dropped when the last page is released.
2265 **
2266 ** A _get works for any page number greater than 0.  If the database
2267 ** file is smaller than the requested page, then no actual disk
2268 ** read occurs and the memory image of the page is initialized to
2269 ** all zeros.  The extra data appended to a page is always initialized
2270 ** to zeros the first time a page is loaded into memory.
2271 **
2272 ** The acquisition might fail for several reasons.  In all cases,
2273 ** an appropriate error code is returned and *ppPage is set to NULL.
2274 **
2275 ** See also sqlite3pager_lookup().  Both this routine and _lookup() attempt
2276 ** to find a page in the in-memory cache first.  If the page is not already
2277 ** in memory, this routine goes to disk to read it in whereas _lookup()
2278 ** just returns 0.  This routine acquires a read-lock the first time it
2279 ** has to go to disk, and could also playback an old journal if necessary.
2280 ** Since _lookup() never goes to disk, it never has to deal with locks
2281 ** or journal files.
2282 */
2283 int sqlite3pager_get(Pager *pPager, Pgno pgno, void **ppPage){
2284   PgHdr *pPg;
2285   int rc;
2286 
2287   /* The maximum page number is 2^31. Return SQLITE_CORRUPT if a page
2288   ** number greater than this, or zero, is requested.
2289   */
2290   if( pgno>PAGER_MAX_PGNO || pgno==0 ){
2291     return SQLITE_CORRUPT;
2292   }
2293 
2294   /* Make sure we have not hit any critical errors.
2295   */
2296   assert( pPager!=0 );
2297   *ppPage = 0;
2298   if( pPager->errMask & ~(PAGER_ERR_FULL) ){
2299     return pager_errcode(pPager);
2300   }
2301 
2302   /* If this is the first page accessed, then get a SHARED lock
2303   ** on the database file.
2304   */
2305   if( pPager->nRef==0 && !MEMDB ){
2306     if( !pPager->noReadlock ){
2307       rc = pager_wait_on_lock(pPager, SHARED_LOCK);
2308       if( rc!=SQLITE_OK ){
2309         return rc;
2310       }
2311     }
2312 
2313     /* If a journal file exists, and there is no RESERVED lock on the
2314     ** database file, then it either needs to be played back or deleted.
2315     */
2316     if( hasHotJournal(pPager) ){
2317        int rc;
2318 
2319        /* Get an EXCLUSIVE lock on the database file. At this point it is
2320        ** important that a RESERVED lock is not obtained on the way to the
2321        ** EXCLUSIVE lock. If it were, another process might open the
2322        ** database file, detect the RESERVED lock, and conclude that the
2323        ** database is safe to read while this process is still rolling it
2324        ** back.
2325        **
2326        ** Because the intermediate RESERVED lock is not requested, the
2327        ** second process will get to this point in the code and fail to
2328        ** obtain it's own EXCLUSIVE lock on the database file.
2329        */
2330        rc = sqlite3OsLock(&pPager->fd, EXCLUSIVE_LOCK);
2331        if( rc!=SQLITE_OK ){
2332          sqlite3OsUnlock(&pPager->fd, NO_LOCK);
2333          pPager->state = PAGER_UNLOCK;
2334          return rc;
2335        }
2336        pPager->state = PAGER_EXCLUSIVE;
2337 
2338        /* Open the journal for reading only.  Return SQLITE_BUSY if
2339        ** we are unable to open the journal file.
2340        **
2341        ** The journal file does not need to be locked itself.  The
2342        ** journal file is never open unless the main database file holds
2343        ** a write lock, so there is never any chance of two or more
2344        ** processes opening the journal at the same time.
2345        */
2346        rc = sqlite3OsOpenReadOnly(pPager->zJournal, &pPager->jfd);
2347        if( rc!=SQLITE_OK ){
2348          sqlite3OsUnlock(&pPager->fd, NO_LOCK);
2349          pPager->state = PAGER_UNLOCK;
2350          return SQLITE_BUSY;
2351        }
2352        pPager->journalOpen = 1;
2353        pPager->journalStarted = 0;
2354        pPager->journalOff = 0;
2355        pPager->setMaster = 0;
2356        pPager->journalHdr = 0;
2357 
2358        /* Playback and delete the journal.  Drop the database write
2359        ** lock and reacquire the read lock.
2360        */
2361        rc = pager_playback(pPager);
2362        if( rc!=SQLITE_OK ){
2363          return rc;
2364        }
2365     }
2366     pPg = 0;
2367   }else{
2368     /* Search for page in cache */
2369     pPg = pager_lookup(pPager, pgno);
2370     if( MEMDB && pPager->state==PAGER_UNLOCK ){
2371       pPager->state = PAGER_SHARED;
2372     }
2373   }
2374   if( pPg==0 ){
2375     /* The requested page is not in the page cache. */
2376     int h;
2377     TEST_INCR(pPager->nMiss);
2378     if( pPager->nPage<pPager->mxPage || pPager->pFirst==0 || MEMDB ){
2379       /* Create a new page */
2380       pPg = sqliteMallocRaw( sizeof(*pPg) + pPager->pageSize
2381                               + sizeof(u32) + pPager->nExtra
2382                               + MEMDB*sizeof(PgHistory) );
2383       if( pPg==0 ){
2384         pPager->errMask |= PAGER_ERR_MEM;
2385         return SQLITE_NOMEM;
2386       }
2387       memset(pPg, 0, sizeof(*pPg));
2388       if( MEMDB ){
2389         memset(PGHDR_TO_HIST(pPg, pPager), 0, sizeof(PgHistory));
2390       }
2391       pPg->pPager = pPager;
2392       pPg->pNextAll = pPager->pAll;
2393       pPager->pAll = pPg;
2394       pPager->nPage++;
2395       if( pPager->nPage>pPager->nMaxPage ){
2396         assert( pPager->nMaxPage==(pPager->nPage-1) );
2397         pPager->nMaxPage++;
2398       }
2399     }else{
2400       /* Find a page to recycle.  Try to locate a page that does not
2401       ** require us to do an fsync() on the journal.
2402       */
2403       pPg = pPager->pFirstSynced;
2404 
2405       /* If we could not find a page that does not require an fsync()
2406       ** on the journal file then fsync the journal file.  This is a
2407       ** very slow operation, so we work hard to avoid it.  But sometimes
2408       ** it can't be helped.
2409       */
2410       if( pPg==0 ){
2411         int rc = syncJournal(pPager);
2412         if( rc!=0 ){
2413           sqlite3pager_rollback(pPager);
2414           return SQLITE_IOERR;
2415         }
2416         if( pPager->fullSync ){
2417           /* If in full-sync mode, write a new journal header into the
2418 	  ** journal file. This is done to avoid ever modifying a journal
2419 	  ** header that is involved in the rollback of pages that have
2420 	  ** already been written to the database (in case the header is
2421 	  ** trashed when the nRec field is updated).
2422           */
2423           pPager->nRec = 0;
2424           assert( pPager->journalOff > 0 );
2425           rc = writeJournalHdr(pPager);
2426           if( rc!=0 ){
2427             sqlite3pager_rollback(pPager);
2428             return SQLITE_IOERR;
2429           }
2430         }
2431         pPg = pPager->pFirst;
2432       }
2433       assert( pPg->nRef==0 );
2434 
2435       /* Write the page to the database file if it is dirty.
2436       */
2437       if( pPg->dirty ){
2438         assert( pPg->needSync==0 );
2439         pPg->pDirty = 0;
2440         rc = pager_write_pagelist( pPg );
2441         if( rc!=SQLITE_OK ){
2442           sqlite3pager_rollback(pPager);
2443           return SQLITE_IOERR;
2444         }
2445       }
2446       assert( pPg->dirty==0 );
2447 
2448       /* If the page we are recycling is marked as alwaysRollback, then
2449       ** set the global alwaysRollback flag, thus disabling the
2450       ** sqlite_dont_rollback() optimization for the rest of this transaction.
2451       ** It is necessary to do this because the page marked alwaysRollback
2452       ** might be reloaded at a later time but at that point we won't remember
2453       ** that is was marked alwaysRollback.  This means that all pages must
2454       ** be marked as alwaysRollback from here on out.
2455       */
2456       if( pPg->alwaysRollback ){
2457         pPager->alwaysRollback = 1;
2458       }
2459 
2460       /* Unlink the old page from the free list and the hash table
2461       */
2462       unlinkPage(pPg);
2463       TEST_INCR(pPager->nOvfl);
2464     }
2465     pPg->pgno = pgno;
2466     if( pPager->aInJournal && (int)pgno<=pPager->origDbSize ){
2467       sqlite3CheckMemory(pPager->aInJournal, pgno/8);
2468       assert( pPager->journalOpen );
2469       pPg->inJournal = (pPager->aInJournal[pgno/8] & (1<<(pgno&7)))!=0;
2470       pPg->needSync = 0;
2471     }else{
2472       pPg->inJournal = 0;
2473       pPg->needSync = 0;
2474     }
2475     if( pPager->aInStmt && (int)pgno<=pPager->stmtSize
2476              && (pPager->aInStmt[pgno/8] & (1<<(pgno&7)))!=0 ){
2477       page_add_to_stmt_list(pPg);
2478     }else{
2479       page_remove_from_stmt_list(pPg);
2480     }
2481     pPg->dirty = 0;
2482     pPg->nRef = 1;
2483     REFINFO(pPg);
2484     pPager->nRef++;
2485     h = pager_hash(pgno);
2486     pPg->pNextHash = pPager->aHash[h];
2487     pPager->aHash[h] = pPg;
2488     if( pPg->pNextHash ){
2489       assert( pPg->pNextHash->pPrevHash==0 );
2490       pPg->pNextHash->pPrevHash = pPg;
2491     }
2492     if( pPager->nExtra>0 ){
2493       memset(PGHDR_TO_EXTRA(pPg, pPager), 0, pPager->nExtra);
2494     }
2495     if( pPager->errMask!=0 ){
2496       sqlite3pager_unref(PGHDR_TO_DATA(pPg));
2497       rc = pager_errcode(pPager);
2498       return rc;
2499     }
2500     if( sqlite3pager_pagecount(pPager)<(int)pgno ){
2501       memset(PGHDR_TO_DATA(pPg), 0, pPager->pageSize);
2502     }else{
2503       int rc;
2504       assert( MEMDB==0 );
2505       sqlite3OsSeek(&pPager->fd, (pgno-1)*(i64)pPager->pageSize);
2506       rc = sqlite3OsRead(&pPager->fd, PGHDR_TO_DATA(pPg), pPager->pageSize);
2507       TRACE3("FETCH %d page %d\n", PAGERID(pPager), pPg->pgno);
2508       CODEC(pPager, PGHDR_TO_DATA(pPg), pPg->pgno, 3);
2509       if( rc!=SQLITE_OK ){
2510         i64 fileSize;
2511         if( sqlite3OsFileSize(&pPager->fd,&fileSize)!=SQLITE_OK
2512                || fileSize>=pgno*pPager->pageSize ){
2513           sqlite3pager_unref(PGHDR_TO_DATA(pPg));
2514           return rc;
2515         }else{
2516           memset(PGHDR_TO_DATA(pPg), 0, pPager->pageSize);
2517         }
2518       }else{
2519         TEST_INCR(pPager->nRead);
2520       }
2521     }
2522 #ifdef SQLITE_CHECK_PAGES
2523     pPg->pageHash = pager_pagehash(pPg);
2524 #endif
2525   }else{
2526     /* The requested page is in the page cache. */
2527     TEST_INCR(pPager->nHit);
2528     page_ref(pPg);
2529   }
2530   *ppPage = PGHDR_TO_DATA(pPg);
2531   return SQLITE_OK;
2532 }
2533 
2534 /*
2535 ** Acquire a page if it is already in the in-memory cache.  Do
2536 ** not read the page from disk.  Return a pointer to the page,
2537 ** or 0 if the page is not in cache.
2538 **
2539 ** See also sqlite3pager_get().  The difference between this routine
2540 ** and sqlite3pager_get() is that _get() will go to the disk and read
2541 ** in the page if the page is not already in cache.  This routine
2542 ** returns NULL if the page is not in cache or if a disk I/O error
2543 ** has ever happened.
2544 */
2545 void *sqlite3pager_lookup(Pager *pPager, Pgno pgno){
2546   PgHdr *pPg;
2547 
2548   assert( pPager!=0 );
2549   assert( pgno!=0 );
2550   if( pPager->errMask & ~(PAGER_ERR_FULL) ){
2551     return 0;
2552   }
2553   pPg = pager_lookup(pPager, pgno);
2554   if( pPg==0 ) return 0;
2555   page_ref(pPg);
2556   return PGHDR_TO_DATA(pPg);
2557 }
2558 
2559 /*
2560 ** Release a page.
2561 **
2562 ** If the number of references to the page drop to zero, then the
2563 ** page is added to the LRU list.  When all references to all pages
2564 ** are released, a rollback occurs and the lock on the database is
2565 ** removed.
2566 */
2567 int sqlite3pager_unref(void *pData){
2568   PgHdr *pPg;
2569 
2570   /* Decrement the reference count for this page
2571   */
2572   pPg = DATA_TO_PGHDR(pData);
2573   assert( pPg->nRef>0 );
2574   pPg->nRef--;
2575   REFINFO(pPg);
2576 
2577   CHECK_PAGE(pPg);
2578 
2579   /* When the number of references to a page reach 0, call the
2580   ** destructor and add the page to the freelist.
2581   */
2582   if( pPg->nRef==0 ){
2583     Pager *pPager;
2584     pPager = pPg->pPager;
2585     pPg->pNextFree = 0;
2586     pPg->pPrevFree = pPager->pLast;
2587     pPager->pLast = pPg;
2588     if( pPg->pPrevFree ){
2589       pPg->pPrevFree->pNextFree = pPg;
2590     }else{
2591       pPager->pFirst = pPg;
2592     }
2593     if( pPg->needSync==0 && pPager->pFirstSynced==0 ){
2594       pPager->pFirstSynced = pPg;
2595     }
2596     if( pPager->xDestructor ){
2597       pPager->xDestructor(pData, pPager->pageSize);
2598     }
2599 
2600     /* When all pages reach the freelist, drop the read lock from
2601     ** the database file.
2602     */
2603     pPager->nRef--;
2604     assert( pPager->nRef>=0 );
2605     if( pPager->nRef==0 && !MEMDB ){
2606       pager_reset(pPager);
2607     }
2608   }
2609   return SQLITE_OK;
2610 }
2611 
2612 /*
2613 ** Create a journal file for pPager.  There should already be a RESERVED
2614 ** or EXCLUSIVE lock on the database file when this routine is called.
2615 **
2616 ** Return SQLITE_OK if everything.  Return an error code and release the
2617 ** write lock if anything goes wrong.
2618 */
2619 static int pager_open_journal(Pager *pPager){
2620   int rc;
2621   assert( !MEMDB );
2622   assert( pPager->state>=PAGER_RESERVED );
2623   assert( pPager->journalOpen==0 );
2624   assert( pPager->useJournal );
2625   assert( pPager->aInJournal==0 );
2626   sqlite3pager_pagecount(pPager);
2627   pPager->aInJournal = sqliteMalloc( pPager->dbSize/8 + 1 );
2628   if( pPager->aInJournal==0 ){
2629     rc = SQLITE_NOMEM;
2630     goto failed_to_open_journal;
2631   }
2632   rc = sqlite3OsOpenExclusive(pPager->zJournal, &pPager->jfd,pPager->tempFile);
2633   pPager->journalOff = 0;
2634   pPager->setMaster = 0;
2635   pPager->journalHdr = 0;
2636   if( rc!=SQLITE_OK ){
2637     goto failed_to_open_journal;
2638   }
2639   SET_FULLSYNC(pPager->jfd, pPager->fullSync);
2640   SET_FULLSYNC(pPager->fd, pPager->fullSync);
2641   sqlite3OsOpenDirectory(pPager->zDirectory, &pPager->jfd);
2642   pPager->journalOpen = 1;
2643   pPager->journalStarted = 0;
2644   pPager->needSync = 0;
2645   pPager->alwaysRollback = 0;
2646   pPager->nRec = 0;
2647   if( pPager->errMask!=0 ){
2648     rc = pager_errcode(pPager);
2649     goto failed_to_open_journal;
2650   }
2651   pPager->origDbSize = pPager->dbSize;
2652 
2653   rc = writeJournalHdr(pPager);
2654 
2655   if( pPager->stmtAutoopen && rc==SQLITE_OK ){
2656     rc = sqlite3pager_stmt_begin(pPager);
2657   }
2658   if( rc!=SQLITE_OK ){
2659     rc = pager_unwritelock(pPager);
2660     if( rc==SQLITE_OK ){
2661       rc = SQLITE_FULL;
2662     }
2663   }
2664   return rc;
2665 
2666 failed_to_open_journal:
2667   sqliteFree(pPager->aInJournal);
2668   pPager->aInJournal = 0;
2669   sqlite3OsUnlock(&pPager->fd, NO_LOCK);
2670   pPager->state = PAGER_UNLOCK;
2671   return rc;
2672 }
2673 
2674 /*
2675 ** Acquire a write-lock on the database.  The lock is removed when
2676 ** the any of the following happen:
2677 **
2678 **   *  sqlite3pager_commit() is called.
2679 **   *  sqlite3pager_rollback() is called.
2680 **   *  sqlite3pager_close() is called.
2681 **   *  sqlite3pager_unref() is called to on every outstanding page.
2682 **
2683 ** The first parameter to this routine is a pointer to any open page of the
2684 ** database file.  Nothing changes about the page - it is used merely to
2685 ** acquire a pointer to the Pager structure and as proof that there is
2686 ** already a read-lock on the database.
2687 **
2688 ** The second parameter indicates how much space in bytes to reserve for a
2689 ** master journal file-name at the start of the journal when it is created.
2690 **
2691 ** A journal file is opened if this is not a temporary file.  For temporary
2692 ** files, the opening of the journal file is deferred until there is an
2693 ** actual need to write to the journal.
2694 **
2695 ** If the database is already reserved for writing, this routine is a no-op.
2696 **
2697 ** If exFlag is true, go ahead and get an EXCLUSIVE lock on the file
2698 ** immediately instead of waiting until we try to flush the cache.  The
2699 ** exFlag is ignored if a transaction is already active.
2700 */
2701 int sqlite3pager_begin(void *pData, int exFlag){
2702   PgHdr *pPg = DATA_TO_PGHDR(pData);
2703   Pager *pPager = pPg->pPager;
2704   int rc = SQLITE_OK;
2705   assert( pPg->nRef>0 );
2706   assert( pPager->state!=PAGER_UNLOCK );
2707   if( pPager->state==PAGER_SHARED ){
2708     assert( pPager->aInJournal==0 );
2709     if( MEMDB ){
2710       pPager->state = PAGER_EXCLUSIVE;
2711       pPager->origDbSize = pPager->dbSize;
2712     }else{
2713       rc = sqlite3OsLock(&pPager->fd, RESERVED_LOCK);
2714       if( rc==SQLITE_OK ){
2715         pPager->state = PAGER_RESERVED;
2716         if( exFlag ){
2717           rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
2718         }
2719       }
2720       if( rc!=SQLITE_OK ){
2721         return rc;
2722       }
2723       pPager->dirtyCache = 0;
2724       TRACE2("TRANSACTION %d\n", PAGERID(pPager));
2725       if( pPager->useJournal && !pPager->tempFile ){
2726         rc = pager_open_journal(pPager);
2727       }
2728     }
2729   }
2730   return rc;
2731 }
2732 
2733 /*
2734 ** Mark a data page as writeable.  The page is written into the journal
2735 ** if it is not there already.  This routine must be called before making
2736 ** changes to a page.
2737 **
2738 ** The first time this routine is called, the pager creates a new
2739 ** journal and acquires a RESERVED lock on the database.  If the RESERVED
2740 ** lock could not be acquired, this routine returns SQLITE_BUSY.  The
2741 ** calling routine must check for that return value and be careful not to
2742 ** change any page data until this routine returns SQLITE_OK.
2743 **
2744 ** If the journal file could not be written because the disk is full,
2745 ** then this routine returns SQLITE_FULL and does an immediate rollback.
2746 ** All subsequent write attempts also return SQLITE_FULL until there
2747 ** is a call to sqlite3pager_commit() or sqlite3pager_rollback() to
2748 ** reset.
2749 */
2750 int sqlite3pager_write(void *pData){
2751   PgHdr *pPg = DATA_TO_PGHDR(pData);
2752   Pager *pPager = pPg->pPager;
2753   int rc = SQLITE_OK;
2754 
2755   /* Check for errors
2756   */
2757   if( pPager->errMask ){
2758     return pager_errcode(pPager);
2759   }
2760   if( pPager->readOnly ){
2761     return SQLITE_PERM;
2762   }
2763 
2764   assert( !pPager->setMaster );
2765 
2766   CHECK_PAGE(pPg);
2767 
2768   /* Mark the page as dirty.  If the page has already been written
2769   ** to the journal then we can return right away.
2770   */
2771   pPg->dirty = 1;
2772   if( pPg->inJournal && (pPg->inStmt || pPager->stmtInUse==0) ){
2773     pPager->dirtyCache = 1;
2774   }else{
2775 
2776     /* If we get this far, it means that the page needs to be
2777     ** written to the transaction journal or the ckeckpoint journal
2778     ** or both.
2779     **
2780     ** First check to see that the transaction journal exists and
2781     ** create it if it does not.
2782     */
2783     assert( pPager->state!=PAGER_UNLOCK );
2784     rc = sqlite3pager_begin(pData, 0);
2785     if( rc!=SQLITE_OK ){
2786       return rc;
2787     }
2788     assert( pPager->state>=PAGER_RESERVED );
2789     if( !pPager->journalOpen && pPager->useJournal ){
2790       rc = pager_open_journal(pPager);
2791       if( rc!=SQLITE_OK ) return rc;
2792     }
2793     assert( pPager->journalOpen || !pPager->useJournal );
2794     pPager->dirtyCache = 1;
2795 
2796     /* The transaction journal now exists and we have a RESERVED or an
2797     ** EXCLUSIVE lock on the main database file.  Write the current page to
2798     ** the transaction journal if it is not there already.
2799     */
2800     if( !pPg->inJournal && (pPager->useJournal || MEMDB) ){
2801       if( (int)pPg->pgno <= pPager->origDbSize ){
2802         int szPg;
2803         u32 saved;
2804         if( MEMDB ){
2805           PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
2806           TRACE3("JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno);
2807           assert( pHist->pOrig==0 );
2808           pHist->pOrig = sqliteMallocRaw( pPager->pageSize );
2809           if( pHist->pOrig ){
2810             memcpy(pHist->pOrig, PGHDR_TO_DATA(pPg), pPager->pageSize);
2811           }
2812         }else{
2813           u32 cksum;
2814           CODEC(pPager, pData, pPg->pgno, 7);
2815           cksum = pager_cksum(pPager, pPg->pgno, pData);
2816           saved = *(u32*)PGHDR_TO_EXTRA(pPg, pPager);
2817           store32bits(cksum, pPg, pPager->pageSize);
2818           szPg = pPager->pageSize+8;
2819           store32bits(pPg->pgno, pPg, -4);
2820           rc = sqlite3OsWrite(&pPager->jfd, &((char*)pData)[-4], szPg);
2821           pPager->journalOff += szPg;
2822           TRACE4("JOURNAL %d page %d needSync=%d\n",
2823                   PAGERID(pPager), pPg->pgno, pPg->needSync);
2824           CODEC(pPager, pData, pPg->pgno, 0);
2825           *(u32*)PGHDR_TO_EXTRA(pPg, pPager) = saved;
2826           if( rc!=SQLITE_OK ){
2827             sqlite3pager_rollback(pPager);
2828             pPager->errMask |= PAGER_ERR_FULL;
2829             return rc;
2830           }
2831           pPager->nRec++;
2832           assert( pPager->aInJournal!=0 );
2833           pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
2834           pPg->needSync = !pPager->noSync;
2835           if( pPager->stmtInUse ){
2836             pPager->aInStmt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
2837             page_add_to_stmt_list(pPg);
2838           }
2839         }
2840       }else{
2841         pPg->needSync = !pPager->journalStarted && !pPager->noSync;
2842         TRACE4("APPEND %d page %d needSync=%d\n",
2843                 PAGERID(pPager), pPg->pgno, pPg->needSync);
2844       }
2845       if( pPg->needSync ){
2846         pPager->needSync = 1;
2847       }
2848       pPg->inJournal = 1;
2849     }
2850 
2851     /* If the statement journal is open and the page is not in it,
2852     ** then write the current page to the statement journal.  Note that
2853     ** the statement journal format differs from the standard journal format
2854     ** in that it omits the checksums and the header.
2855     */
2856     if( pPager->stmtInUse && !pPg->inStmt && (int)pPg->pgno<=pPager->stmtSize ){
2857       assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize );
2858       if( MEMDB ){
2859         PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
2860         assert( pHist->pStmt==0 );
2861         pHist->pStmt = sqliteMallocRaw( pPager->pageSize );
2862         if( pHist->pStmt ){
2863           memcpy(pHist->pStmt, PGHDR_TO_DATA(pPg), pPager->pageSize);
2864         }
2865         TRACE3("STMT-JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno);
2866       }else{
2867         store32bits(pPg->pgno, pPg, -4);
2868         CODEC(pPager, pData, pPg->pgno, 7);
2869         rc = sqlite3OsWrite(&pPager->stfd,((char*)pData)-4, pPager->pageSize+4);
2870         TRACE3("STMT-JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno);
2871         CODEC(pPager, pData, pPg->pgno, 0);
2872         if( rc!=SQLITE_OK ){
2873           sqlite3pager_rollback(pPager);
2874           pPager->errMask |= PAGER_ERR_FULL;
2875           return rc;
2876         }
2877         pPager->stmtNRec++;
2878         assert( pPager->aInStmt!=0 );
2879         pPager->aInStmt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
2880       }
2881       page_add_to_stmt_list(pPg);
2882     }
2883   }
2884 
2885   /* Update the database size and return.
2886   */
2887   if( pPager->dbSize<(int)pPg->pgno ){
2888     pPager->dbSize = pPg->pgno;
2889     if( !MEMDB && pPager->dbSize==PENDING_BYTE/pPager->pageSize ){
2890       pPager->dbSize++;
2891     }
2892   }
2893   return rc;
2894 }
2895 
2896 /*
2897 ** Return TRUE if the page given in the argument was previously passed
2898 ** to sqlite3pager_write().  In other words, return TRUE if it is ok
2899 ** to change the content of the page.
2900 */
2901 int sqlite3pager_iswriteable(void *pData){
2902   PgHdr *pPg = DATA_TO_PGHDR(pData);
2903   return pPg->dirty;
2904 }
2905 
2906 #ifndef SQLITE_OMIT_VACUUM
2907 /*
2908 ** Replace the content of a single page with the information in the third
2909 ** argument.
2910 */
2911 int sqlite3pager_overwrite(Pager *pPager, Pgno pgno, void *pData){
2912   void *pPage;
2913   int rc;
2914 
2915   rc = sqlite3pager_get(pPager, pgno, &pPage);
2916   if( rc==SQLITE_OK ){
2917     rc = sqlite3pager_write(pPage);
2918     if( rc==SQLITE_OK ){
2919       memcpy(pPage, pData, pPager->pageSize);
2920     }
2921     sqlite3pager_unref(pPage);
2922   }
2923   return rc;
2924 }
2925 #endif
2926 
2927 /*
2928 ** A call to this routine tells the pager that it is not necessary to
2929 ** write the information on page "pgno" back to the disk, even though
2930 ** that page might be marked as dirty.
2931 **
2932 ** The overlying software layer calls this routine when all of the data
2933 ** on the given page is unused.  The pager marks the page as clean so
2934 ** that it does not get written to disk.
2935 **
2936 ** Tests show that this optimization, together with the
2937 ** sqlite3pager_dont_rollback() below, more than double the speed
2938 ** of large INSERT operations and quadruple the speed of large DELETEs.
2939 **
2940 ** When this routine is called, set the alwaysRollback flag to true.
2941 ** Subsequent calls to sqlite3pager_dont_rollback() for the same page
2942 ** will thereafter be ignored.  This is necessary to avoid a problem
2943 ** where a page with data is added to the freelist during one part of
2944 ** a transaction then removed from the freelist during a later part
2945 ** of the same transaction and reused for some other purpose.  When it
2946 ** is first added to the freelist, this routine is called.  When reused,
2947 ** the dont_rollback() routine is called.  But because the page contains
2948 ** critical data, we still need to be sure it gets rolled back in spite
2949 ** of the dont_rollback() call.
2950 */
2951 void sqlite3pager_dont_write(Pager *pPager, Pgno pgno){
2952   PgHdr *pPg;
2953 
2954   if( MEMDB ) return;
2955 
2956   pPg = pager_lookup(pPager, pgno);
2957   pPg->alwaysRollback = 1;
2958   if( pPg && pPg->dirty ){
2959     if( pPager->dbSize==(int)pPg->pgno && pPager->origDbSize<pPager->dbSize ){
2960       /* If this pages is the last page in the file and the file has grown
2961       ** during the current transaction, then do NOT mark the page as clean.
2962       ** When the database file grows, we must make sure that the last page
2963       ** gets written at least once so that the disk file will be the correct
2964       ** size. If you do not write this page and the size of the file
2965       ** on the disk ends up being too small, that can lead to database
2966       ** corruption during the next transaction.
2967       */
2968     }else{
2969       TRACE3("DONT_WRITE page %d of %d\n", pgno, PAGERID(pPager));
2970       pPg->dirty = 0;
2971 #ifdef SQLITE_CHECK_PAGES
2972       pPg->pageHash = pager_pagehash(pPg);
2973 #endif
2974     }
2975   }
2976 }
2977 
2978 /*
2979 ** A call to this routine tells the pager that if a rollback occurs,
2980 ** it is not necessary to restore the data on the given page.  This
2981 ** means that the pager does not have to record the given page in the
2982 ** rollback journal.
2983 */
2984 void sqlite3pager_dont_rollback(void *pData){
2985   PgHdr *pPg = DATA_TO_PGHDR(pData);
2986   Pager *pPager = pPg->pPager;
2987 
2988   if( pPager->state!=PAGER_EXCLUSIVE || pPager->journalOpen==0 ) return;
2989   if( pPg->alwaysRollback || pPager->alwaysRollback || MEMDB ) return;
2990   if( !pPg->inJournal && (int)pPg->pgno <= pPager->origDbSize ){
2991     assert( pPager->aInJournal!=0 );
2992     pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
2993     pPg->inJournal = 1;
2994     if( pPager->stmtInUse ){
2995       pPager->aInStmt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
2996       page_add_to_stmt_list(pPg);
2997     }
2998     TRACE3("DONT_ROLLBACK page %d of %d\n", pPg->pgno, PAGERID(pPager));
2999   }
3000   if( pPager->stmtInUse && !pPg->inStmt && (int)pPg->pgno<=pPager->stmtSize ){
3001     assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize );
3002     assert( pPager->aInStmt!=0 );
3003     pPager->aInStmt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
3004     page_add_to_stmt_list(pPg);
3005   }
3006 }
3007 
3008 
3009 #ifndef SQLITE_OMIT_MEMORYDB
3010 /*
3011 ** Clear a PgHistory block
3012 */
3013 static void clearHistory(PgHistory *pHist){
3014   sqliteFree(pHist->pOrig);
3015   sqliteFree(pHist->pStmt);
3016   pHist->pOrig = 0;
3017   pHist->pStmt = 0;
3018 }
3019 #else
3020 #define clearHistory(x)
3021 #endif
3022 
3023 /*
3024 ** Commit all changes to the database and release the write lock.
3025 **
3026 ** If the commit fails for any reason, a rollback attempt is made
3027 ** and an error code is returned.  If the commit worked, SQLITE_OK
3028 ** is returned.
3029 */
3030 int sqlite3pager_commit(Pager *pPager){
3031   int rc;
3032   PgHdr *pPg;
3033 
3034   if( pPager->errMask==PAGER_ERR_FULL ){
3035     rc = sqlite3pager_rollback(pPager);
3036     if( rc==SQLITE_OK ){
3037       rc = SQLITE_FULL;
3038     }
3039     return rc;
3040   }
3041   if( pPager->errMask!=0 ){
3042     rc = pager_errcode(pPager);
3043     return rc;
3044   }
3045   if( pPager->state<PAGER_RESERVED ){
3046     return SQLITE_ERROR;
3047   }
3048   TRACE2("COMMIT %d\n", PAGERID(pPager));
3049   if( MEMDB ){
3050     pPg = pager_get_all_dirty_pages(pPager);
3051     while( pPg ){
3052       clearHistory(PGHDR_TO_HIST(pPg, pPager));
3053       pPg->dirty = 0;
3054       pPg->inJournal = 0;
3055       pPg->inStmt = 0;
3056       pPg->pPrevStmt = pPg->pNextStmt = 0;
3057       pPg = pPg->pDirty;
3058     }
3059 #ifndef NDEBUG
3060     for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
3061       PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
3062       assert( !pPg->alwaysRollback );
3063       assert( !pHist->pOrig );
3064       assert( !pHist->pStmt );
3065     }
3066 #endif
3067     pPager->pStmt = 0;
3068     pPager->state = PAGER_SHARED;
3069     return SQLITE_OK;
3070   }
3071   if( pPager->dirtyCache==0 ){
3072     /* Exit early (without doing the time-consuming sqlite3OsSync() calls)
3073     ** if there have been no changes to the database file. */
3074     assert( pPager->needSync==0 );
3075     rc = pager_unwritelock(pPager);
3076     pPager->dbSize = -1;
3077     return rc;
3078   }
3079   assert( pPager->journalOpen );
3080   rc = sqlite3pager_sync(pPager, 0, 0);
3081   if( rc!=SQLITE_OK ){
3082     goto commit_abort;
3083   }
3084   rc = pager_unwritelock(pPager);
3085   pPager->dbSize = -1;
3086   return rc;
3087 
3088   /* Jump here if anything goes wrong during the commit process.
3089   */
3090 commit_abort:
3091   sqlite3pager_rollback(pPager);
3092   return rc;
3093 }
3094 
3095 /*
3096 ** Rollback all changes.  The database falls back to PAGER_SHARED mode.
3097 ** All in-memory cache pages revert to their original data contents.
3098 ** The journal is deleted.
3099 **
3100 ** This routine cannot fail unless some other process is not following
3101 ** the correct locking protocol (SQLITE_PROTOCOL) or unless some other
3102 ** process is writing trash into the journal file (SQLITE_CORRUPT) or
3103 ** unless a prior malloc() failed (SQLITE_NOMEM).  Appropriate error
3104 ** codes are returned for all these occasions.  Otherwise,
3105 ** SQLITE_OK is returned.
3106 */
3107 int sqlite3pager_rollback(Pager *pPager){
3108   int rc;
3109   TRACE2("ROLLBACK %d\n", PAGERID(pPager));
3110   if( MEMDB ){
3111     PgHdr *p;
3112     for(p=pPager->pAll; p; p=p->pNextAll){
3113       PgHistory *pHist;
3114       assert( !p->alwaysRollback );
3115       if( !p->dirty ){
3116         assert( !((PgHistory *)PGHDR_TO_HIST(p, pPager))->pOrig );
3117         assert( !((PgHistory *)PGHDR_TO_HIST(p, pPager))->pStmt );
3118         continue;
3119       }
3120 
3121       pHist = PGHDR_TO_HIST(p, pPager);
3122       if( pHist->pOrig ){
3123         memcpy(PGHDR_TO_DATA(p), pHist->pOrig, pPager->pageSize);
3124         TRACE3("ROLLBACK-PAGE %d of %d\n", p->pgno, PAGERID(pPager));
3125       }else{
3126         TRACE3("PAGE %d is clean on %d\n", p->pgno, PAGERID(pPager));
3127       }
3128       clearHistory(pHist);
3129       p->dirty = 0;
3130       p->inJournal = 0;
3131       p->inStmt = 0;
3132       p->pPrevStmt = p->pNextStmt = 0;
3133 
3134       if( pPager->xReiniter ){
3135         pPager->xReiniter(PGHDR_TO_DATA(p), pPager->pageSize);
3136       }
3137 
3138     }
3139     pPager->pStmt = 0;
3140     pPager->dbSize = pPager->origDbSize;
3141     memoryTruncate(pPager);
3142     pPager->stmtInUse = 0;
3143     pPager->state = PAGER_SHARED;
3144     return SQLITE_OK;
3145   }
3146 
3147   if( !pPager->dirtyCache || !pPager->journalOpen ){
3148     rc = pager_unwritelock(pPager);
3149     pPager->dbSize = -1;
3150     return rc;
3151   }
3152 
3153   if( pPager->errMask!=0 && pPager->errMask!=PAGER_ERR_FULL ){
3154     if( pPager->state>=PAGER_EXCLUSIVE ){
3155       pager_playback(pPager);
3156     }
3157     return pager_errcode(pPager);
3158   }
3159   if( pPager->state==PAGER_RESERVED ){
3160     int rc2;
3161     rc = pager_reload_cache(pPager);
3162     rc2 = pager_unwritelock(pPager);
3163     if( rc==SQLITE_OK ){
3164       rc = rc2;
3165     }
3166   }else{
3167     rc = pager_playback(pPager);
3168   }
3169   if( rc!=SQLITE_OK ){
3170     rc = SQLITE_CORRUPT;  /* bkpt-CORRUPT */
3171     pPager->errMask |= PAGER_ERR_CORRUPT;
3172   }
3173   pPager->dbSize = -1;
3174   return rc;
3175 }
3176 
3177 /*
3178 ** Return TRUE if the database file is opened read-only.  Return FALSE
3179 ** if the database is (in theory) writable.
3180 */
3181 int sqlite3pager_isreadonly(Pager *pPager){
3182   return pPager->readOnly;
3183 }
3184 
3185 /*
3186 ** This routine is used for testing and analysis only.
3187 */
3188 int *sqlite3pager_stats(Pager *pPager){
3189   static int a[11];
3190   a[0] = pPager->nRef;
3191   a[1] = pPager->nPage;
3192   a[2] = pPager->mxPage;
3193   a[3] = pPager->dbSize;
3194   a[4] = pPager->state;
3195   a[5] = pPager->errMask;
3196 #ifdef SQLITE_TEST
3197   a[6] = pPager->nHit;
3198   a[7] = pPager->nMiss;
3199   a[8] = pPager->nOvfl;
3200   a[9] = pPager->nRead;
3201   a[10] = pPager->nWrite;
3202 #endif
3203   return a;
3204 }
3205 
3206 /*
3207 ** Set the statement rollback point.
3208 **
3209 ** This routine should be called with the transaction journal already
3210 ** open.  A new statement journal is created that can be used to rollback
3211 ** changes of a single SQL command within a larger transaction.
3212 */
3213 int sqlite3pager_stmt_begin(Pager *pPager){
3214   int rc;
3215   char zTemp[SQLITE_TEMPNAME_SIZE];
3216   assert( !pPager->stmtInUse );
3217   assert( pPager->dbSize>=0 );
3218   TRACE2("STMT-BEGIN %d\n", PAGERID(pPager));
3219   if( MEMDB ){
3220     pPager->stmtInUse = 1;
3221     pPager->stmtSize = pPager->dbSize;
3222     return SQLITE_OK;
3223   }
3224   if( !pPager->journalOpen ){
3225     pPager->stmtAutoopen = 1;
3226     return SQLITE_OK;
3227   }
3228   assert( pPager->journalOpen );
3229   pPager->aInStmt = sqliteMalloc( pPager->dbSize/8 + 1 );
3230   if( pPager->aInStmt==0 ){
3231     sqlite3OsLock(&pPager->fd, SHARED_LOCK);
3232     return SQLITE_NOMEM;
3233   }
3234 #ifndef NDEBUG
3235   rc = sqlite3OsFileSize(&pPager->jfd, &pPager->stmtJSize);
3236   if( rc ) goto stmt_begin_failed;
3237   assert( pPager->stmtJSize == pPager->journalOff );
3238 #endif
3239   pPager->stmtJSize = pPager->journalOff;
3240   pPager->stmtSize = pPager->dbSize;
3241   pPager->stmtHdrOff = 0;
3242   pPager->stmtCksum = pPager->cksumInit;
3243   if( !pPager->stmtOpen ){
3244     rc = sqlite3pager_opentemp(zTemp, &pPager->stfd);
3245     if( rc ) goto stmt_begin_failed;
3246     pPager->stmtOpen = 1;
3247     pPager->stmtNRec = 0;
3248   }
3249   pPager->stmtInUse = 1;
3250   return SQLITE_OK;
3251 
3252 stmt_begin_failed:
3253   if( pPager->aInStmt ){
3254     sqliteFree(pPager->aInStmt);
3255     pPager->aInStmt = 0;
3256   }
3257   return rc;
3258 }
3259 
3260 /*
3261 ** Commit a statement.
3262 */
3263 int sqlite3pager_stmt_commit(Pager *pPager){
3264   if( pPager->stmtInUse ){
3265     PgHdr *pPg, *pNext;
3266     TRACE2("STMT-COMMIT %d\n", PAGERID(pPager));
3267     if( !MEMDB ){
3268       sqlite3OsSeek(&pPager->stfd, 0);
3269       /* sqlite3OsTruncate(&pPager->stfd, 0); */
3270       sqliteFree( pPager->aInStmt );
3271       pPager->aInStmt = 0;
3272     }
3273     for(pPg=pPager->pStmt; pPg; pPg=pNext){
3274       pNext = pPg->pNextStmt;
3275       assert( pPg->inStmt );
3276       pPg->inStmt = 0;
3277       pPg->pPrevStmt = pPg->pNextStmt = 0;
3278       if( MEMDB ){
3279         PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
3280         sqliteFree(pHist->pStmt);
3281         pHist->pStmt = 0;
3282       }
3283     }
3284     pPager->stmtNRec = 0;
3285     pPager->stmtInUse = 0;
3286     pPager->pStmt = 0;
3287   }
3288   pPager->stmtAutoopen = 0;
3289   return SQLITE_OK;
3290 }
3291 
3292 /*
3293 ** Rollback a statement.
3294 */
3295 int sqlite3pager_stmt_rollback(Pager *pPager){
3296   int rc;
3297   if( pPager->stmtInUse ){
3298     TRACE2("STMT-ROLLBACK %d\n", PAGERID(pPager));
3299     if( MEMDB ){
3300       PgHdr *pPg;
3301       for(pPg=pPager->pStmt; pPg; pPg=pPg->pNextStmt){
3302         PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
3303         if( pHist->pStmt ){
3304           memcpy(PGHDR_TO_DATA(pPg), pHist->pStmt, pPager->pageSize);
3305           sqliteFree(pHist->pStmt);
3306           pHist->pStmt = 0;
3307         }
3308       }
3309       pPager->dbSize = pPager->stmtSize;
3310       memoryTruncate(pPager);
3311       rc = SQLITE_OK;
3312     }else{
3313       rc = pager_stmt_playback(pPager);
3314     }
3315     sqlite3pager_stmt_commit(pPager);
3316   }else{
3317     rc = SQLITE_OK;
3318   }
3319   pPager->stmtAutoopen = 0;
3320   return rc;
3321 }
3322 
3323 /*
3324 ** Return the full pathname of the database file.
3325 */
3326 const char *sqlite3pager_filename(Pager *pPager){
3327   return pPager->zFilename;
3328 }
3329 
3330 /*
3331 ** Return the directory of the database file.
3332 */
3333 const char *sqlite3pager_dirname(Pager *pPager){
3334   return pPager->zDirectory;
3335 }
3336 
3337 /*
3338 ** Return the full pathname of the journal file.
3339 */
3340 const char *sqlite3pager_journalname(Pager *pPager){
3341   return pPager->zJournal;
3342 }
3343 
3344 /*
3345 ** Set the codec for this pager
3346 */
3347 void sqlite3pager_set_codec(
3348   Pager *pPager,
3349   void (*xCodec)(void*,void*,Pgno,int),
3350   void *pCodecArg
3351 ){
3352   pPager->xCodec = xCodec;
3353   pPager->pCodecArg = pCodecArg;
3354 }
3355 
3356 /*
3357 ** This routine is called to increment the database file change-counter,
3358 ** stored at byte 24 of the pager file.
3359 */
3360 static int pager_incr_changecounter(Pager *pPager){
3361   void *pPage;
3362   PgHdr *pPgHdr;
3363   u32 change_counter;
3364   int rc;
3365 
3366   /* Open page 1 of the file for writing. */
3367   rc = sqlite3pager_get(pPager, 1, &pPage);
3368   if( rc!=SQLITE_OK ) return rc;
3369   rc = sqlite3pager_write(pPage);
3370   if( rc!=SQLITE_OK ) return rc;
3371 
3372   /* Read the current value at byte 24. */
3373   pPgHdr = DATA_TO_PGHDR(pPage);
3374   change_counter = retrieve32bits(pPgHdr, 24);
3375 
3376   /* Increment the value just read and write it back to byte 24. */
3377   change_counter++;
3378   store32bits(change_counter, pPgHdr, 24);
3379 
3380   /* Release the page reference. */
3381   sqlite3pager_unref(pPage);
3382   return SQLITE_OK;
3383 }
3384 
3385 /*
3386 ** Sync the database file for the pager pPager. zMaster points to the name
3387 ** of a master journal file that should be written into the individual
3388 ** journal file. zMaster may be NULL, which is interpreted as no master
3389 ** journal (a single database transaction).
3390 **
3391 ** This routine ensures that the journal is synced, all dirty pages written
3392 ** to the database file and the database file synced. The only thing that
3393 ** remains to commit the transaction is to delete the journal file (or
3394 ** master journal file if specified).
3395 **
3396 ** Note that if zMaster==NULL, this does not overwrite a previous value
3397 ** passed to an sqlite3pager_sync() call.
3398 **
3399 ** If parameter nTrunc is non-zero, then the pager file is truncated to
3400 ** nTrunc pages (this is used by auto-vacuum databases).
3401 */
3402 int sqlite3pager_sync(Pager *pPager, const char *zMaster, Pgno nTrunc){
3403   int rc = SQLITE_OK;
3404 
3405   TRACE4("DATABASE SYNC: File=%s zMaster=%s nTrunc=%d\n",
3406       pPager->zFilename, zMaster, nTrunc);
3407 
3408   /* If this is an in-memory db, or no pages have been written to, or this
3409   ** function has already been called, it is a no-op.
3410   */
3411   if( pPager->state!=PAGER_SYNCED && !MEMDB && pPager->dirtyCache ){
3412     PgHdr *pPg;
3413     assert( pPager->journalOpen );
3414 
3415     /* If a master journal file name has already been written to the
3416     ** journal file, then no sync is required. This happens when it is
3417     ** written, then the process fails to upgrade from a RESERVED to an
3418     ** EXCLUSIVE lock. The next time the process tries to commit the
3419     ** transaction the m-j name will have already been written.
3420     */
3421     if( !pPager->setMaster ){
3422       rc = pager_incr_changecounter(pPager);
3423       if( rc!=SQLITE_OK ) goto sync_exit;
3424 #ifndef SQLITE_OMIT_AUTOVACUUM
3425       if( nTrunc!=0 ){
3426         /* If this transaction has made the database smaller, then all pages
3427         ** being discarded by the truncation must be written to the journal
3428         ** file.
3429         */
3430         Pgno i;
3431         void *pPage;
3432         for( i=nTrunc+1; i<=pPager->origDbSize; i++ ){
3433           if( !(pPager->aInJournal[i/8] & (1<<(i&7))) ){
3434             rc = sqlite3pager_get(pPager, i, &pPage);
3435             if( rc!=SQLITE_OK ) goto sync_exit;
3436             rc = sqlite3pager_write(pPage);
3437             sqlite3pager_unref(pPage);
3438             if( rc!=SQLITE_OK ) goto sync_exit;
3439           }
3440         }
3441       }
3442 #endif
3443       rc = writeMasterJournal(pPager, zMaster);
3444       if( rc!=SQLITE_OK ) goto sync_exit;
3445       rc = syncJournal(pPager);
3446       if( rc!=SQLITE_OK ) goto sync_exit;
3447     }
3448 
3449 #ifndef SQLITE_OMIT_AUTOVACUUM
3450     if( nTrunc!=0 ){
3451       rc = sqlite3pager_truncate(pPager, nTrunc);
3452       if( rc!=SQLITE_OK ) goto sync_exit;
3453     }
3454 #endif
3455 
3456     /* Write all dirty pages to the database file */
3457     pPg = pager_get_all_dirty_pages(pPager);
3458     rc = pager_write_pagelist(pPg);
3459     if( rc!=SQLITE_OK ) goto sync_exit;
3460 
3461     /* Sync the database file. */
3462     if( !pPager->noSync ){
3463       rc = sqlite3OsSync(&pPager->fd);
3464     }
3465 
3466     pPager->state = PAGER_SYNCED;
3467   }
3468 
3469 sync_exit:
3470   return rc;
3471 }
3472 
3473 #ifndef SQLITE_OMIT_AUTOVACUUM
3474 /*
3475 ** Move the page identified by pData to location pgno in the file.
3476 **
3477 ** There must be no references to the current page pgno. If current page
3478 ** pgno is not already in the rollback journal, it is not written there by
3479 ** by this routine. The same applies to the page pData refers to on entry to
3480 ** this routine.
3481 **
3482 ** References to the page refered to by pData remain valid. Updating any
3483 ** meta-data associated with page pData (i.e. data stored in the nExtra bytes
3484 ** allocated along with the page) is the responsibility of the caller.
3485 **
3486 ** A transaction must be active when this routine is called. It used to be
3487 ** required that a statement transaction was not active, but this restriction
3488 ** has been removed (CREATE INDEX needs to move a page when a statement
3489 ** transaction is active).
3490 */
3491 int sqlite3pager_movepage(Pager *pPager, void *pData, Pgno pgno){
3492   PgHdr *pPg = DATA_TO_PGHDR(pData);
3493   PgHdr *pPgOld;
3494   int h;
3495   Pgno needSyncPgno = 0;
3496 
3497   assert( pPg->nRef>0 );
3498 
3499   TRACE5("MOVE %d page %d (needSync=%d) moves to %d\n",
3500       PAGERID(pPager), pPg->pgno, pPg->needSync, pgno);
3501 
3502   if( pPg->needSync ){
3503     needSyncPgno = pPg->pgno;
3504     assert( pPg->inJournal );
3505     assert( pPg->dirty );
3506     assert( pPager->needSync );
3507   }
3508 
3509   /* Unlink pPg from it's hash-chain */
3510   unlinkHashChain(pPager, pPg);
3511 
3512   /* If the cache contains a page with page-number pgno, remove it
3513   ** from it's hash chain. Also, if the PgHdr.needSync was set for
3514   ** page pgno before the 'move' operation, it needs to be retained
3515   ** for the page moved there.
3516   */
3517   pPgOld = pager_lookup(pPager, pgno);
3518   if( pPgOld ){
3519     assert( pPgOld->nRef==0 );
3520     unlinkHashChain(pPager, pPgOld);
3521     pPgOld->dirty = 0;
3522     if( pPgOld->needSync ){
3523       assert( pPgOld->inJournal );
3524       pPg->inJournal = 1;
3525       pPg->needSync = 1;
3526       assert( pPager->needSync );
3527     }
3528   }
3529 
3530   /* Change the page number for pPg and insert it into the new hash-chain. */
3531   pPg->pgno = pgno;
3532   h = pager_hash(pgno);
3533   if( pPager->aHash[h] ){
3534     assert( pPager->aHash[h]->pPrevHash==0 );
3535     pPager->aHash[h]->pPrevHash = pPg;
3536   }
3537   pPg->pNextHash = pPager->aHash[h];
3538   pPager->aHash[h] = pPg;
3539   pPg->pPrevHash = 0;
3540 
3541   pPg->dirty = 1;
3542   pPager->dirtyCache = 1;
3543 
3544   if( needSyncPgno ){
3545     /* If needSyncPgno is non-zero, then the journal file needs to be
3546     ** sync()ed before any data is written to database file page needSyncPgno.
3547     ** Currently, no such page exists in the page-cache and the
3548     ** Pager.aInJournal bit has been set. This needs to be remedied by loading
3549     ** the page into the pager-cache and setting the PgHdr.needSync flag.
3550     **
3551     ** The sqlite3pager_get() call may cause the journal to sync. So make
3552     ** sure the Pager.needSync flag is set too.
3553     */
3554     int rc;
3555     void *pNeedSync;
3556     assert( pPager->needSync );
3557     rc = sqlite3pager_get(pPager, needSyncPgno, &pNeedSync);
3558     if( rc!=SQLITE_OK ) return rc;
3559     pPager->needSync = 1;
3560     DATA_TO_PGHDR(pNeedSync)->needSync = 1;
3561     DATA_TO_PGHDR(pNeedSync)->inJournal = 1;
3562     DATA_TO_PGHDR(pNeedSync)->dirty = 1;
3563     sqlite3pager_unref(pNeedSync);
3564   }
3565 
3566   return SQLITE_OK;
3567 }
3568 #endif
3569 
3570 #if defined(SQLITE_DEBUG) || defined(SQLITE_TEST)
3571 /*
3572 ** Return the current state of the file lock for the given pager.
3573 ** The return value is one of NO_LOCK, SHARED_LOCK, RESERVED_LOCK,
3574 ** PENDING_LOCK, or EXCLUSIVE_LOCK.
3575 */
3576 int sqlite3pager_lockstate(Pager *pPager){
3577 #ifdef OS_TEST
3578   return pPager->fd->fd.locktype;
3579 #else
3580   return pPager->fd.locktype;
3581 #endif
3582 }
3583 #endif
3584 
3585 #ifdef SQLITE_DEBUG
3586 /*
3587 ** Print a listing of all referenced pages and their ref count.
3588 */
3589 void sqlite3pager_refdump(Pager *pPager){
3590   PgHdr *pPg;
3591   for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
3592     if( pPg->nRef<=0 ) continue;
3593     sqlite3DebugPrintf("PAGE %3d addr=%p nRef=%d\n",
3594        pPg->pgno, PGHDR_TO_DATA(pPg), pPg->nRef);
3595   }
3596 }
3597 #endif
3598 
3599 #endif /* SQLITE_OMIT_DISKIO */
3600