1 /* 2 ** 2001 September 15 3 ** 4 ** The author disclaims copyright to this source code. In place of 5 ** a legal notice, here is a blessing: 6 ** 7 ** May you do good and not evil. 8 ** May you find forgiveness for yourself and forgive others. 9 ** May you share freely, never taking more than you give. 10 ** 11 ************************************************************************* 12 ** This is the implementation of the page cache subsystem or "pager". 13 ** 14 ** The pager is used to access a database disk file. It implements 15 ** atomic commit and rollback through the use of a journal file that 16 ** is separate from the database file. The pager also implements file 17 ** locking to prevent two processes from writing the same database 18 ** file simultaneously, or one process from reading the database while 19 ** another is writing. 20 ** 21 ** @(#) $Id: pager.c,v 1.208 2005/07/09 02:16:03 drh Exp $ 22 */ 23 #ifndef SQLITE_OMIT_DISKIO 24 #include "sqliteInt.h" 25 #include "os.h" 26 #include "pager.h" 27 #include <assert.h> 28 #include <string.h> 29 30 /* 31 ** Macros for troubleshooting. Normally turned off 32 */ 33 #if 0 34 #define TRACE1(X) sqlite3DebugPrintf(X) 35 #define TRACE2(X,Y) sqlite3DebugPrintf(X,Y) 36 #define TRACE3(X,Y,Z) sqlite3DebugPrintf(X,Y,Z) 37 #define TRACE4(X,Y,Z,W) sqlite3DebugPrintf(X,Y,Z,W) 38 #define TRACE5(X,Y,Z,W,V) sqlite3DebugPrintf(X,Y,Z,W,V) 39 #else 40 #define TRACE1(X) 41 #define TRACE2(X,Y) 42 #define TRACE3(X,Y,Z) 43 #define TRACE4(X,Y,Z,W) 44 #define TRACE5(X,Y,Z,W,V) 45 #endif 46 47 /* 48 ** The following two macros are used within the TRACEX() macros above 49 ** to print out file-descriptors. They are required so that tracing 50 ** can be turned on when using both the regular os_unix.c and os_test.c 51 ** backends. 52 ** 53 ** PAGERID() takes a pointer to a Pager struct as it's argument. The 54 ** associated file-descriptor is returned. FILEHANDLEID() takes an OsFile 55 ** struct as it's argument. 56 */ 57 #ifdef OS_TEST 58 #define PAGERID(p) (p->fd->fd.h) 59 #define FILEHANDLEID(fd) (fd->fd.h) 60 #else 61 #define PAGERID(p) (p->fd.h) 62 #define FILEHANDLEID(fd) (fd.h) 63 #endif 64 65 /* 66 ** The page cache as a whole is always in one of the following 67 ** states: 68 ** 69 ** PAGER_UNLOCK The page cache is not currently reading or 70 ** writing the database file. There is no 71 ** data held in memory. This is the initial 72 ** state. 73 ** 74 ** PAGER_SHARED The page cache is reading the database. 75 ** Writing is not permitted. There can be 76 ** multiple readers accessing the same database 77 ** file at the same time. 78 ** 79 ** PAGER_RESERVED This process has reserved the database for writing 80 ** but has not yet made any changes. Only one process 81 ** at a time can reserve the database. The original 82 ** database file has not been modified so other 83 ** processes may still be reading the on-disk 84 ** database file. 85 ** 86 ** PAGER_EXCLUSIVE The page cache is writing the database. 87 ** Access is exclusive. No other processes or 88 ** threads can be reading or writing while one 89 ** process is writing. 90 ** 91 ** PAGER_SYNCED The pager moves to this state from PAGER_EXCLUSIVE 92 ** after all dirty pages have been written to the 93 ** database file and the file has been synced to 94 ** disk. All that remains to do is to remove the 95 ** journal file and the transaction will be 96 ** committed. 97 ** 98 ** The page cache comes up in PAGER_UNLOCK. The first time a 99 ** sqlite3pager_get() occurs, the state transitions to PAGER_SHARED. 100 ** After all pages have been released using sqlite_page_unref(), 101 ** the state transitions back to PAGER_UNLOCK. The first time 102 ** that sqlite3pager_write() is called, the state transitions to 103 ** PAGER_RESERVED. (Note that sqlite_page_write() can only be 104 ** called on an outstanding page which means that the pager must 105 ** be in PAGER_SHARED before it transitions to PAGER_RESERVED.) 106 ** The transition to PAGER_EXCLUSIVE occurs when before any changes 107 ** are made to the database file. After an sqlite3pager_rollback() 108 ** or sqlite_pager_commit(), the state goes back to PAGER_SHARED. 109 */ 110 #define PAGER_UNLOCK 0 111 #define PAGER_SHARED 1 /* same as SHARED_LOCK */ 112 #define PAGER_RESERVED 2 /* same as RESERVED_LOCK */ 113 #define PAGER_EXCLUSIVE 4 /* same as EXCLUSIVE_LOCK */ 114 #define PAGER_SYNCED 5 115 116 /* 117 ** If the SQLITE_BUSY_RESERVED_LOCK macro is set to true at compile-time, 118 ** then failed attempts to get a reserved lock will invoke the busy callback. 119 ** This is off by default. To see why, consider the following scenario: 120 ** 121 ** Suppose thread A already has a shared lock and wants a reserved lock. 122 ** Thread B already has a reserved lock and wants an exclusive lock. If 123 ** both threads are using their busy callbacks, it might be a long time 124 ** be for one of the threads give up and allows the other to proceed. 125 ** But if the thread trying to get the reserved lock gives up quickly 126 ** (if it never invokes its busy callback) then the contention will be 127 ** resolved quickly. 128 */ 129 #ifndef SQLITE_BUSY_RESERVED_LOCK 130 # define SQLITE_BUSY_RESERVED_LOCK 0 131 #endif 132 133 /* 134 ** This macro rounds values up so that if the value is an address it 135 ** is guaranteed to be an address that is aligned to an 8-byte boundary. 136 */ 137 #define FORCE_ALIGNMENT(X) (((X)+7)&~7) 138 139 /* 140 ** Each in-memory image of a page begins with the following header. 141 ** This header is only visible to this pager module. The client 142 ** code that calls pager sees only the data that follows the header. 143 ** 144 ** Client code should call sqlite3pager_write() on a page prior to making 145 ** any modifications to that page. The first time sqlite3pager_write() 146 ** is called, the original page contents are written into the rollback 147 ** journal and PgHdr.inJournal and PgHdr.needSync are set. Later, once 148 ** the journal page has made it onto the disk surface, PgHdr.needSync 149 ** is cleared. The modified page cannot be written back into the original 150 ** database file until the journal pages has been synced to disk and the 151 ** PgHdr.needSync has been cleared. 152 ** 153 ** The PgHdr.dirty flag is set when sqlite3pager_write() is called and 154 ** is cleared again when the page content is written back to the original 155 ** database file. 156 */ 157 typedef struct PgHdr PgHdr; 158 struct PgHdr { 159 Pager *pPager; /* The pager to which this page belongs */ 160 Pgno pgno; /* The page number for this page */ 161 PgHdr *pNextHash, *pPrevHash; /* Hash collision chain for PgHdr.pgno */ 162 PgHdr *pNextFree, *pPrevFree; /* Freelist of pages where nRef==0 */ 163 PgHdr *pNextAll; /* A list of all pages */ 164 PgHdr *pNextStmt, *pPrevStmt; /* List of pages in the statement journal */ 165 u8 inJournal; /* TRUE if has been written to journal */ 166 u8 inStmt; /* TRUE if in the statement subjournal */ 167 u8 dirty; /* TRUE if we need to write back changes */ 168 u8 needSync; /* Sync journal before writing this page */ 169 u8 alwaysRollback; /* Disable dont_rollback() for this page */ 170 short int nRef; /* Number of users of this page */ 171 PgHdr *pDirty; /* Dirty pages sorted by PgHdr.pgno */ 172 #ifdef SQLITE_CHECK_PAGES 173 u32 pageHash; 174 #endif 175 /* pPager->pageSize bytes of page data follow this header */ 176 /* Pager.nExtra bytes of local data follow the page data */ 177 }; 178 179 /* 180 ** For an in-memory only database, some extra information is recorded about 181 ** each page so that changes can be rolled back. (Journal files are not 182 ** used for in-memory databases.) The following information is added to 183 ** the end of every EXTRA block for in-memory databases. 184 ** 185 ** This information could have been added directly to the PgHdr structure. 186 ** But then it would take up an extra 8 bytes of storage on every PgHdr 187 ** even for disk-based databases. Splitting it out saves 8 bytes. This 188 ** is only a savings of 0.8% but those percentages add up. 189 */ 190 typedef struct PgHistory PgHistory; 191 struct PgHistory { 192 u8 *pOrig; /* Original page text. Restore to this on a full rollback */ 193 u8 *pStmt; /* Text as it was at the beginning of the current statement */ 194 }; 195 196 /* 197 ** A macro used for invoking the codec if there is one 198 */ 199 #ifdef SQLITE_HAS_CODEC 200 # define CODEC(P,D,N,X) if( P->xCodec ){ P->xCodec(P->pCodecArg,D,N,X); } 201 #else 202 # define CODEC(P,D,N,X) 203 #endif 204 205 /* 206 ** Convert a pointer to a PgHdr into a pointer to its data 207 ** and back again. 208 */ 209 #define PGHDR_TO_DATA(P) ((void*)(&(P)[1])) 210 #define DATA_TO_PGHDR(D) (&((PgHdr*)(D))[-1]) 211 #define PGHDR_TO_EXTRA(G,P) ((void*)&((char*)(&(G)[1]))[(P)->pageSize]) 212 #define PGHDR_TO_HIST(P,PGR) \ 213 ((PgHistory*)&((char*)(&(P)[1]))[(PGR)->pageSize+(PGR)->nExtra]) 214 215 /* 216 ** How big to make the hash table used for locating in-memory pages 217 ** by page number. This macro looks a little silly, but is evaluated 218 ** at compile-time, not run-time (at least for gcc this is true). 219 */ 220 #define N_PG_HASH (\ 221 (MAX_PAGES>1024)?2048: \ 222 (MAX_PAGES>512)?1024: \ 223 (MAX_PAGES>256)?512: \ 224 (MAX_PAGES>128)?256: \ 225 (MAX_PAGES>64)?128:64 \ 226 ) 227 228 /* 229 ** Hash a page number 230 */ 231 #define pager_hash(PN) ((PN)&(N_PG_HASH-1)) 232 233 /* 234 ** A open page cache is an instance of the following structure. 235 */ 236 struct Pager { 237 u8 journalOpen; /* True if journal file descriptors is valid */ 238 u8 journalStarted; /* True if header of journal is synced */ 239 u8 useJournal; /* Use a rollback journal on this file */ 240 u8 noReadlock; /* Do not bother to obtain readlocks */ 241 u8 stmtOpen; /* True if the statement subjournal is open */ 242 u8 stmtInUse; /* True we are in a statement subtransaction */ 243 u8 stmtAutoopen; /* Open stmt journal when main journal is opened*/ 244 u8 noSync; /* Do not sync the journal if true */ 245 u8 fullSync; /* Do extra syncs of the journal for robustness */ 246 u8 state; /* PAGER_UNLOCK, _SHARED, _RESERVED, etc. */ 247 u8 errMask; /* One of several kinds of errors */ 248 u8 tempFile; /* zFilename is a temporary file */ 249 u8 readOnly; /* True for a read-only database */ 250 u8 needSync; /* True if an fsync() is needed on the journal */ 251 u8 dirtyCache; /* True if cached pages have changed */ 252 u8 alwaysRollback; /* Disable dont_rollback() for all pages */ 253 u8 memDb; /* True to inhibit all file I/O */ 254 u8 setMaster; /* True if a m-j name has been written to jrnl */ 255 int dbSize; /* Number of pages in the file */ 256 int origDbSize; /* dbSize before the current change */ 257 int stmtSize; /* Size of database (in pages) at stmt_begin() */ 258 int nRec; /* Number of pages written to the journal */ 259 u32 cksumInit; /* Quasi-random value added to every checksum */ 260 int stmtNRec; /* Number of records in stmt subjournal */ 261 int nExtra; /* Add this many bytes to each in-memory page */ 262 int pageSize; /* Number of bytes in a page */ 263 int nPage; /* Total number of in-memory pages */ 264 int nMaxPage; /* High water mark of nPage */ 265 int nRef; /* Number of in-memory pages with PgHdr.nRef>0 */ 266 int mxPage; /* Maximum number of pages to hold in cache */ 267 u8 *aInJournal; /* One bit for each page in the database file */ 268 u8 *aInStmt; /* One bit for each page in the database */ 269 char *zFilename; /* Name of the database file */ 270 char *zJournal; /* Name of the journal file */ 271 char *zDirectory; /* Directory hold database and journal files */ 272 OsFile fd, jfd; /* File descriptors for database and journal */ 273 OsFile stfd; /* File descriptor for the statement subjournal*/ 274 BusyHandler *pBusyHandler; /* Pointer to sqlite.busyHandler */ 275 PgHdr *pFirst, *pLast; /* List of free pages */ 276 PgHdr *pFirstSynced; /* First free page with PgHdr.needSync==0 */ 277 PgHdr *pAll; /* List of all pages */ 278 PgHdr *pStmt; /* List of pages in the statement subjournal */ 279 i64 journalOff; /* Current byte offset in the journal file */ 280 i64 journalHdr; /* Byte offset to previous journal header */ 281 i64 stmtHdrOff; /* First journal header written this statement */ 282 i64 stmtCksum; /* cksumInit when statement was started */ 283 i64 stmtJSize; /* Size of journal at stmt_begin() */ 284 int sectorSize; /* Assumed sector size during rollback */ 285 #ifdef SQLITE_TEST 286 int nHit, nMiss, nOvfl; /* Cache hits, missing, and LRU overflows */ 287 int nRead,nWrite; /* Database pages read/written */ 288 #endif 289 void (*xDestructor)(void*,int); /* Call this routine when freeing pages */ 290 void (*xReiniter)(void*,int); /* Call this routine when reloading pages */ 291 void (*xCodec)(void*,void*,Pgno,int); /* Routine for en/decoding data */ 292 void *pCodecArg; /* First argument to xCodec() */ 293 PgHdr *aHash[N_PG_HASH]; /* Hash table to map page number to PgHdr */ 294 }; 295 296 /* 297 ** If SQLITE_TEST is defined then increment the variable given in 298 ** the argument 299 */ 300 #ifdef SQLITE_TEST 301 # define TEST_INCR(x) x++ 302 #else 303 # define TEST_INCR(x) 304 #endif 305 306 /* 307 ** These are bits that can be set in Pager.errMask. 308 */ 309 #define PAGER_ERR_FULL 0x01 /* a write() failed */ 310 #define PAGER_ERR_MEM 0x02 /* malloc() failed */ 311 #define PAGER_ERR_LOCK 0x04 /* error in the locking protocol */ 312 #define PAGER_ERR_CORRUPT 0x08 /* database or journal corruption */ 313 #define PAGER_ERR_DISK 0x10 /* general disk I/O error - bad hard drive? */ 314 315 /* 316 ** Journal files begin with the following magic string. The data 317 ** was obtained from /dev/random. It is used only as a sanity check. 318 ** 319 ** Since version 2.8.0, the journal format contains additional sanity 320 ** checking information. If the power fails while the journal is begin 321 ** written, semi-random garbage data might appear in the journal 322 ** file after power is restored. If an attempt is then made 323 ** to roll the journal back, the database could be corrupted. The additional 324 ** sanity checking data is an attempt to discover the garbage in the 325 ** journal and ignore it. 326 ** 327 ** The sanity checking information for the new journal format consists 328 ** of a 32-bit checksum on each page of data. The checksum covers both 329 ** the page number and the pPager->pageSize bytes of data for the page. 330 ** This cksum is initialized to a 32-bit random value that appears in the 331 ** journal file right after the header. The random initializer is important, 332 ** because garbage data that appears at the end of a journal is likely 333 ** data that was once in other files that have now been deleted. If the 334 ** garbage data came from an obsolete journal file, the checksums might 335 ** be correct. But by initializing the checksum to random value which 336 ** is different for every journal, we minimize that risk. 337 */ 338 static const unsigned char aJournalMagic[] = { 339 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd7, 340 }; 341 342 /* 343 ** The size of the header and of each page in the journal is determined 344 ** by the following macros. 345 */ 346 #define JOURNAL_PG_SZ(pPager) ((pPager->pageSize) + 8) 347 348 /* 349 ** The journal header size for this pager. In the future, this could be 350 ** set to some value read from the disk controller. The important 351 ** characteristic is that it is the same size as a disk sector. 352 */ 353 #define JOURNAL_HDR_SZ(pPager) (pPager->sectorSize) 354 355 /* 356 ** The macro MEMDB is true if we are dealing with an in-memory database. 357 ** We do this as a macro so that if the SQLITE_OMIT_MEMORYDB macro is set, 358 ** the value of MEMDB will be a constant and the compiler will optimize 359 ** out code that would never execute. 360 */ 361 #ifdef SQLITE_OMIT_MEMORYDB 362 # define MEMDB 0 363 #else 364 # define MEMDB pPager->memDb 365 #endif 366 367 /* 368 ** The default size of a disk sector 369 */ 370 #define PAGER_SECTOR_SIZE 512 371 372 /* 373 ** Page number PAGER_MJ_PGNO is never used in an SQLite database (it is 374 ** reserved for working around a windows/posix incompatibility). It is 375 ** used in the journal to signify that the remainder of the journal file 376 ** is devoted to storing a master journal name - there are no more pages to 377 ** roll back. See comments for function writeMasterJournal() for details. 378 */ 379 /* #define PAGER_MJ_PGNO(x) (PENDING_BYTE/((x)->pageSize)) */ 380 #define PAGER_MJ_PGNO(x) ((PENDING_BYTE/((x)->pageSize))+1) 381 382 /* 383 ** The maximum legal page number is (2^31 - 1). 384 */ 385 #define PAGER_MAX_PGNO 2147483647 386 387 /* 388 ** Enable reference count tracking (for debugging) here: 389 */ 390 #ifdef SQLITE_DEBUG 391 int pager3_refinfo_enable = 0; 392 static void pager_refinfo(PgHdr *p){ 393 static int cnt = 0; 394 if( !pager3_refinfo_enable ) return; 395 sqlite3DebugPrintf( 396 "REFCNT: %4d addr=%p nRef=%d\n", 397 p->pgno, PGHDR_TO_DATA(p), p->nRef 398 ); 399 cnt++; /* Something to set a breakpoint on */ 400 } 401 # define REFINFO(X) pager_refinfo(X) 402 #else 403 # define REFINFO(X) 404 #endif 405 406 /* 407 ** Read a 32-bit integer from the given file descriptor. Store the integer 408 ** that is read in *pRes. Return SQLITE_OK if everything worked, or an 409 ** error code is something goes wrong. 410 ** 411 ** All values are stored on disk as big-endian. 412 */ 413 static int read32bits(OsFile *fd, u32 *pRes){ 414 u32 res; 415 int rc; 416 rc = sqlite3OsRead(fd, &res, sizeof(res)); 417 if( rc==SQLITE_OK ){ 418 unsigned char ac[4]; 419 memcpy(ac, &res, 4); 420 res = (ac[0]<<24) | (ac[1]<<16) | (ac[2]<<8) | ac[3]; 421 } 422 *pRes = res; 423 return rc; 424 } 425 426 /* 427 ** Write a 32-bit integer into the given file descriptor. Return SQLITE_OK 428 ** on success or an error code is something goes wrong. 429 */ 430 static int write32bits(OsFile *fd, u32 val){ 431 unsigned char ac[4]; 432 ac[0] = (val>>24) & 0xff; 433 ac[1] = (val>>16) & 0xff; 434 ac[2] = (val>>8) & 0xff; 435 ac[3] = val & 0xff; 436 return sqlite3OsWrite(fd, ac, 4); 437 } 438 439 /* 440 ** Write the 32-bit integer 'val' into the page identified by page header 441 ** 'p' at offset 'offset'. 442 */ 443 static void store32bits(u32 val, PgHdr *p, int offset){ 444 unsigned char *ac; 445 ac = &((unsigned char*)PGHDR_TO_DATA(p))[offset]; 446 ac[0] = (val>>24) & 0xff; 447 ac[1] = (val>>16) & 0xff; 448 ac[2] = (val>>8) & 0xff; 449 ac[3] = val & 0xff; 450 } 451 452 /* 453 ** Read a 32-bit integer at offset 'offset' from the page identified by 454 ** page header 'p'. 455 */ 456 static u32 retrieve32bits(PgHdr *p, int offset){ 457 unsigned char *ac; 458 ac = &((unsigned char*)PGHDR_TO_DATA(p))[offset]; 459 return (ac[0]<<24) | (ac[1]<<16) | (ac[2]<<8) | ac[3]; 460 } 461 462 463 /* 464 ** Convert the bits in the pPager->errMask into an approprate 465 ** return code. 466 */ 467 static int pager_errcode(Pager *pPager){ 468 int rc = SQLITE_OK; 469 if( pPager->errMask & PAGER_ERR_LOCK ) rc = SQLITE_PROTOCOL; 470 if( pPager->errMask & PAGER_ERR_DISK ) rc = SQLITE_IOERR; 471 if( pPager->errMask & PAGER_ERR_FULL ) rc = SQLITE_FULL; 472 if( pPager->errMask & PAGER_ERR_MEM ) rc = SQLITE_NOMEM; 473 if( pPager->errMask & PAGER_ERR_CORRUPT ) rc = SQLITE_CORRUPT; 474 return rc; 475 } 476 477 #ifdef SQLITE_CHECK_PAGES 478 /* 479 ** Return a 32-bit hash of the page data for pPage. 480 */ 481 static u32 pager_pagehash(PgHdr *pPage){ 482 u32 hash = 0; 483 int i; 484 unsigned char *pData = (unsigned char *)PGHDR_TO_DATA(pPage); 485 for(i=0; i<pPage->pPager->pageSize; i++){ 486 hash = (hash+i)^pData[i]; 487 } 488 return hash; 489 } 490 491 /* 492 ** The CHECK_PAGE macro takes a PgHdr* as an argument. If SQLITE_CHECK_PAGES 493 ** is defined, and NDEBUG is not defined, an assert() statement checks 494 ** that the page is either dirty or still matches the calculated page-hash. 495 */ 496 #define CHECK_PAGE(x) checkPage(x) 497 static void checkPage(PgHdr *pPg){ 498 Pager *pPager = pPg->pPager; 499 assert( !pPg->pageHash || pPager->errMask || MEMDB || pPg->dirty || 500 pPg->pageHash==pager_pagehash(pPg) ); 501 } 502 503 #else 504 #define CHECK_PAGE(x) 505 #endif 506 507 /* 508 ** When this is called the journal file for pager pPager must be open. 509 ** The master journal file name is read from the end of the file and 510 ** written into memory obtained from sqliteMalloc(). *pzMaster is 511 ** set to point at the memory and SQLITE_OK returned. The caller must 512 ** sqliteFree() *pzMaster. 513 ** 514 ** If no master journal file name is present *pzMaster is set to 0 and 515 ** SQLITE_OK returned. 516 */ 517 static int readMasterJournal(OsFile *pJrnl, char **pzMaster){ 518 int rc; 519 u32 len; 520 i64 szJ; 521 u32 cksum; 522 int i; 523 unsigned char aMagic[8]; /* A buffer to hold the magic header */ 524 525 *pzMaster = 0; 526 527 rc = sqlite3OsFileSize(pJrnl, &szJ); 528 if( rc!=SQLITE_OK || szJ<16 ) return rc; 529 530 rc = sqlite3OsSeek(pJrnl, szJ-16); 531 if( rc!=SQLITE_OK ) return rc; 532 533 rc = read32bits(pJrnl, &len); 534 if( rc!=SQLITE_OK ) return rc; 535 536 rc = read32bits(pJrnl, &cksum); 537 if( rc!=SQLITE_OK ) return rc; 538 539 rc = sqlite3OsRead(pJrnl, aMagic, 8); 540 if( rc!=SQLITE_OK || memcmp(aMagic, aJournalMagic, 8) ) return rc; 541 542 rc = sqlite3OsSeek(pJrnl, szJ-16-len); 543 if( rc!=SQLITE_OK ) return rc; 544 545 *pzMaster = (char *)sqliteMalloc(len+1); 546 if( !*pzMaster ){ 547 return SQLITE_NOMEM; 548 } 549 rc = sqlite3OsRead(pJrnl, *pzMaster, len); 550 if( rc!=SQLITE_OK ){ 551 sqliteFree(*pzMaster); 552 *pzMaster = 0; 553 return rc; 554 } 555 556 /* See if the checksum matches the master journal name */ 557 for(i=0; i<len; i++){ 558 cksum -= (*pzMaster)[i]; 559 } 560 if( cksum ){ 561 /* If the checksum doesn't add up, then one or more of the disk sectors 562 ** containing the master journal filename is corrupted. This means 563 ** definitely roll back, so just return SQLITE_OK and report a (nul) 564 ** master-journal filename. 565 */ 566 sqliteFree(*pzMaster); 567 *pzMaster = 0; 568 }else{ 569 (*pzMaster)[len] = '\0'; 570 } 571 572 return SQLITE_OK; 573 } 574 575 /* 576 ** Seek the journal file descriptor to the next sector boundary where a 577 ** journal header may be read or written. Pager.journalOff is updated with 578 ** the new seek offset. 579 ** 580 ** i.e for a sector size of 512: 581 ** 582 ** Input Offset Output Offset 583 ** --------------------------------------- 584 ** 0 0 585 ** 512 512 586 ** 100 512 587 ** 2000 2048 588 ** 589 */ 590 static int seekJournalHdr(Pager *pPager){ 591 i64 offset = 0; 592 i64 c = pPager->journalOff; 593 if( c ){ 594 offset = ((c-1)/JOURNAL_HDR_SZ(pPager) + 1) * JOURNAL_HDR_SZ(pPager); 595 } 596 assert( offset%JOURNAL_HDR_SZ(pPager)==0 ); 597 assert( offset>=c ); 598 assert( (offset-c)<JOURNAL_HDR_SZ(pPager) ); 599 pPager->journalOff = offset; 600 return sqlite3OsSeek(&pPager->jfd, pPager->journalOff); 601 } 602 603 /* 604 ** The journal file must be open when this routine is called. A journal 605 ** header (JOURNAL_HDR_SZ bytes) is written into the journal file at the 606 ** current location. 607 ** 608 ** The format for the journal header is as follows: 609 ** - 8 bytes: Magic identifying journal format. 610 ** - 4 bytes: Number of records in journal, or -1 no-sync mode is on. 611 ** - 4 bytes: Random number used for page hash. 612 ** - 4 bytes: Initial database page count. 613 ** - 4 bytes: Sector size used by the process that wrote this journal. 614 ** 615 ** Followed by (JOURNAL_HDR_SZ - 24) bytes of unused space. 616 */ 617 static int writeJournalHdr(Pager *pPager){ 618 619 int rc = seekJournalHdr(pPager); 620 if( rc ) return rc; 621 622 pPager->journalHdr = pPager->journalOff; 623 if( pPager->stmtHdrOff==0 ){ 624 pPager->stmtHdrOff = pPager->journalHdr; 625 } 626 pPager->journalOff += JOURNAL_HDR_SZ(pPager); 627 628 /* FIX ME: 629 ** 630 ** Possibly for a pager not in no-sync mode, the journal magic should not 631 ** be written until nRec is filled in as part of next syncJournal(). 632 ** 633 ** Actually maybe the whole journal header should be delayed until that 634 ** point. Think about this. 635 */ 636 rc = sqlite3OsWrite(&pPager->jfd, aJournalMagic, sizeof(aJournalMagic)); 637 638 if( rc==SQLITE_OK ){ 639 /* The nRec Field. 0xFFFFFFFF for no-sync journals. */ 640 rc = write32bits(&pPager->jfd, pPager->noSync ? 0xffffffff : 0); 641 } 642 if( rc==SQLITE_OK ){ 643 /* The random check-hash initialiser */ 644 sqlite3Randomness(sizeof(pPager->cksumInit), &pPager->cksumInit); 645 rc = write32bits(&pPager->jfd, pPager->cksumInit); 646 } 647 if( rc==SQLITE_OK ){ 648 /* The initial database size */ 649 rc = write32bits(&pPager->jfd, pPager->dbSize); 650 } 651 if( rc==SQLITE_OK ){ 652 /* The assumed sector size for this process */ 653 rc = write32bits(&pPager->jfd, pPager->sectorSize); 654 } 655 656 /* The journal header has been written successfully. Seek the journal 657 ** file descriptor to the end of the journal header sector. 658 */ 659 if( rc==SQLITE_OK ){ 660 sqlite3OsSeek(&pPager->jfd, pPager->journalOff-1); 661 rc = sqlite3OsWrite(&pPager->jfd, "\000", 1); 662 } 663 return rc; 664 } 665 666 /* 667 ** The journal file must be open when this is called. A journal header file 668 ** (JOURNAL_HDR_SZ bytes) is read from the current location in the journal 669 ** file. See comments above function writeJournalHdr() for a description of 670 ** the journal header format. 671 ** 672 ** If the header is read successfully, *nRec is set to the number of 673 ** page records following this header and *dbSize is set to the size of the 674 ** database before the transaction began, in pages. Also, pPager->cksumInit 675 ** is set to the value read from the journal header. SQLITE_OK is returned 676 ** in this case. 677 ** 678 ** If the journal header file appears to be corrupted, SQLITE_DONE is 679 ** returned and *nRec and *dbSize are not set. If JOURNAL_HDR_SZ bytes 680 ** cannot be read from the journal file an error code is returned. 681 */ 682 static int readJournalHdr( 683 Pager *pPager, 684 i64 journalSize, 685 u32 *pNRec, 686 u32 *pDbSize 687 ){ 688 int rc; 689 unsigned char aMagic[8]; /* A buffer to hold the magic header */ 690 691 rc = seekJournalHdr(pPager); 692 if( rc ) return rc; 693 694 if( pPager->journalOff+JOURNAL_HDR_SZ(pPager) > journalSize ){ 695 return SQLITE_DONE; 696 } 697 698 rc = sqlite3OsRead(&pPager->jfd, aMagic, sizeof(aMagic)); 699 if( rc ) return rc; 700 701 if( memcmp(aMagic, aJournalMagic, sizeof(aMagic))!=0 ){ 702 return SQLITE_DONE; 703 } 704 705 rc = read32bits(&pPager->jfd, pNRec); 706 if( rc ) return rc; 707 708 rc = read32bits(&pPager->jfd, &pPager->cksumInit); 709 if( rc ) return rc; 710 711 rc = read32bits(&pPager->jfd, pDbSize); 712 if( rc ) return rc; 713 714 /* Update the assumed sector-size to match the value used by 715 ** the process that created this journal. If this journal was 716 ** created by a process other than this one, then this routine 717 ** is being called from within pager_playback(). The local value 718 ** of Pager.sectorSize is restored at the end of that routine. 719 */ 720 rc = read32bits(&pPager->jfd, (u32 *)&pPager->sectorSize); 721 if( rc ) return rc; 722 723 pPager->journalOff += JOURNAL_HDR_SZ(pPager); 724 rc = sqlite3OsSeek(&pPager->jfd, pPager->journalOff); 725 return rc; 726 } 727 728 729 /* 730 ** Write the supplied master journal name into the journal file for pager 731 ** pPager at the current location. The master journal name must be the last 732 ** thing written to a journal file. If the pager is in full-sync mode, the 733 ** journal file descriptor is advanced to the next sector boundary before 734 ** anything is written. The format is: 735 ** 736 ** + 4 bytes: PAGER_MJ_PGNO. 737 ** + N bytes: length of master journal name. 738 ** + 4 bytes: N 739 ** + 4 bytes: Master journal name checksum. 740 ** + 8 bytes: aJournalMagic[]. 741 ** 742 ** The master journal page checksum is the sum of the bytes in the master 743 ** journal name. 744 */ 745 static int writeMasterJournal(Pager *pPager, const char *zMaster){ 746 int rc; 747 int len; 748 int i; 749 u32 cksum = 0; 750 751 if( !zMaster || pPager->setMaster) return SQLITE_OK; 752 pPager->setMaster = 1; 753 754 len = strlen(zMaster); 755 for(i=0; i<len; i++){ 756 cksum += zMaster[i]; 757 } 758 759 /* If in full-sync mode, advance to the next disk sector before writing 760 ** the master journal name. This is in case the previous page written to 761 ** the journal has already been synced. 762 */ 763 if( pPager->fullSync ){ 764 rc = seekJournalHdr(pPager); 765 if( rc!=SQLITE_OK ) return rc; 766 } 767 pPager->journalOff += (len+20); 768 769 rc = write32bits(&pPager->jfd, PAGER_MJ_PGNO(pPager)); 770 if( rc!=SQLITE_OK ) return rc; 771 772 rc = sqlite3OsWrite(&pPager->jfd, zMaster, len); 773 if( rc!=SQLITE_OK ) return rc; 774 775 rc = write32bits(&pPager->jfd, len); 776 if( rc!=SQLITE_OK ) return rc; 777 778 rc = write32bits(&pPager->jfd, cksum); 779 if( rc!=SQLITE_OK ) return rc; 780 781 rc = sqlite3OsWrite(&pPager->jfd, aJournalMagic, sizeof(aJournalMagic)); 782 pPager->needSync = 1; 783 return rc; 784 } 785 786 /* 787 ** Add or remove a page from the list of all pages that are in the 788 ** statement journal. 789 ** 790 ** The Pager keeps a separate list of pages that are currently in 791 ** the statement journal. This helps the sqlite3pager_stmt_commit() 792 ** routine run MUCH faster for the common case where there are many 793 ** pages in memory but only a few are in the statement journal. 794 */ 795 static void page_add_to_stmt_list(PgHdr *pPg){ 796 Pager *pPager = pPg->pPager; 797 if( pPg->inStmt ) return; 798 assert( pPg->pPrevStmt==0 && pPg->pNextStmt==0 ); 799 pPg->pPrevStmt = 0; 800 if( pPager->pStmt ){ 801 pPager->pStmt->pPrevStmt = pPg; 802 } 803 pPg->pNextStmt = pPager->pStmt; 804 pPager->pStmt = pPg; 805 pPg->inStmt = 1; 806 } 807 static void page_remove_from_stmt_list(PgHdr *pPg){ 808 if( !pPg->inStmt ) return; 809 if( pPg->pPrevStmt ){ 810 assert( pPg->pPrevStmt->pNextStmt==pPg ); 811 pPg->pPrevStmt->pNextStmt = pPg->pNextStmt; 812 }else{ 813 assert( pPg->pPager->pStmt==pPg ); 814 pPg->pPager->pStmt = pPg->pNextStmt; 815 } 816 if( pPg->pNextStmt ){ 817 assert( pPg->pNextStmt->pPrevStmt==pPg ); 818 pPg->pNextStmt->pPrevStmt = pPg->pPrevStmt; 819 } 820 pPg->pNextStmt = 0; 821 pPg->pPrevStmt = 0; 822 pPg->inStmt = 0; 823 } 824 825 /* 826 ** Find a page in the hash table given its page number. Return 827 ** a pointer to the page or NULL if not found. 828 */ 829 static PgHdr *pager_lookup(Pager *pPager, Pgno pgno){ 830 PgHdr *p = pPager->aHash[pager_hash(pgno)]; 831 while( p && p->pgno!=pgno ){ 832 p = p->pNextHash; 833 } 834 return p; 835 } 836 837 /* 838 ** Unlock the database and clear the in-memory cache. This routine 839 ** sets the state of the pager back to what it was when it was first 840 ** opened. Any outstanding pages are invalidated and subsequent attempts 841 ** to access those pages will likely result in a coredump. 842 */ 843 static void pager_reset(Pager *pPager){ 844 PgHdr *pPg, *pNext; 845 if( pPager->errMask ) return; 846 for(pPg=pPager->pAll; pPg; pPg=pNext){ 847 pNext = pPg->pNextAll; 848 sqliteFree(pPg); 849 } 850 pPager->pFirst = 0; 851 pPager->pFirstSynced = 0; 852 pPager->pLast = 0; 853 pPager->pAll = 0; 854 memset(pPager->aHash, 0, sizeof(pPager->aHash)); 855 pPager->nPage = 0; 856 if( pPager->state>=PAGER_RESERVED ){ 857 sqlite3pager_rollback(pPager); 858 } 859 sqlite3OsUnlock(&pPager->fd, NO_LOCK); 860 pPager->state = PAGER_UNLOCK; 861 pPager->dbSize = -1; 862 pPager->nRef = 0; 863 assert( pPager->journalOpen==0 ); 864 } 865 866 /* 867 ** This function is used to reset the pager after a malloc() failure. This 868 ** doesn't work with in-memory databases. If a malloc() fails when an 869 ** in-memory database is in use it is not possible to recover. 870 ** 871 ** If a transaction or statement transaction is active, it is rolled back. 872 ** 873 ** It is an error to call this function if any pages are in use. 874 */ 875 #ifndef SQLITE_OMIT_GLOBALRECOVER 876 int sqlite3pager_reset(Pager *pPager){ 877 if( pPager ){ 878 if( pPager->nRef || MEMDB ){ 879 return SQLITE_ERROR; 880 } 881 pPager->errMask &= ~(PAGER_ERR_MEM); 882 pager_reset(pPager); 883 } 884 return SQLITE_OK; 885 } 886 #endif 887 888 889 /* 890 ** When this routine is called, the pager has the journal file open and 891 ** a RESERVED or EXCLUSIVE lock on the database. This routine releases 892 ** the database lock and acquires a SHARED lock in its place. The journal 893 ** file is deleted and closed. 894 ** 895 ** TODO: Consider keeping the journal file open for temporary databases. 896 ** This might give a performance improvement on windows where opening 897 ** a file is an expensive operation. 898 */ 899 static int pager_unwritelock(Pager *pPager){ 900 PgHdr *pPg; 901 int rc; 902 assert( !MEMDB ); 903 if( pPager->state<PAGER_RESERVED ){ 904 return SQLITE_OK; 905 } 906 sqlite3pager_stmt_commit(pPager); 907 if( pPager->stmtOpen ){ 908 sqlite3OsClose(&pPager->stfd); 909 pPager->stmtOpen = 0; 910 } 911 if( pPager->journalOpen ){ 912 sqlite3OsClose(&pPager->jfd); 913 pPager->journalOpen = 0; 914 sqlite3OsDelete(pPager->zJournal); 915 sqliteFree( pPager->aInJournal ); 916 pPager->aInJournal = 0; 917 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){ 918 pPg->inJournal = 0; 919 pPg->dirty = 0; 920 pPg->needSync = 0; 921 #ifdef SQLITE_CHECK_PAGES 922 pPg->pageHash = pager_pagehash(pPg); 923 #endif 924 } 925 pPager->dirtyCache = 0; 926 pPager->nRec = 0; 927 }else{ 928 assert( pPager->aInJournal==0 ); 929 assert( pPager->dirtyCache==0 || pPager->useJournal==0 ); 930 } 931 rc = sqlite3OsUnlock(&pPager->fd, SHARED_LOCK); 932 pPager->state = PAGER_SHARED; 933 pPager->origDbSize = 0; 934 pPager->setMaster = 0; 935 return rc; 936 } 937 938 /* 939 ** Compute and return a checksum for the page of data. 940 ** 941 ** This is not a real checksum. It is really just the sum of the 942 ** random initial value and the page number. We experimented with 943 ** a checksum of the entire data, but that was found to be too slow. 944 ** 945 ** Note that the page number is stored at the beginning of data and 946 ** the checksum is stored at the end. This is important. If journal 947 ** corruption occurs due to a power failure, the most likely scenario 948 ** is that one end or the other of the record will be changed. It is 949 ** much less likely that the two ends of the journal record will be 950 ** correct and the middle be corrupt. Thus, this "checksum" scheme, 951 ** though fast and simple, catches the mostly likely kind of corruption. 952 ** 953 ** FIX ME: Consider adding every 200th (or so) byte of the data to the 954 ** checksum. That way if a single page spans 3 or more disk sectors and 955 ** only the middle sector is corrupt, we will still have a reasonable 956 ** chance of failing the checksum and thus detecting the problem. 957 */ 958 static u32 pager_cksum(Pager *pPager, Pgno pgno, const char *aData){ 959 u32 cksum = pPager->cksumInit; 960 int i = pPager->pageSize-200; 961 while( i>0 ){ 962 cksum += aData[i]; 963 i -= 200; 964 } 965 return cksum; 966 } 967 968 /* 969 ** Read a single page from the journal file opened on file descriptor 970 ** jfd. Playback this one page. 971 ** 972 ** If useCksum==0 it means this journal does not use checksums. Checksums 973 ** are not used in statement journals because statement journals do not 974 ** need to survive power failures. 975 */ 976 static int pager_playback_one_page(Pager *pPager, OsFile *jfd, int useCksum){ 977 int rc; 978 PgHdr *pPg; /* An existing page in the cache */ 979 Pgno pgno; /* The page number of a page in journal */ 980 u32 cksum; /* Checksum used for sanity checking */ 981 u8 aData[SQLITE_MAX_PAGE_SIZE]; /* Temp storage for a page */ 982 983 /* useCksum should be true for the main journal and false for 984 ** statement journals. Verify that this is always the case 985 */ 986 assert( jfd == (useCksum ? &pPager->jfd : &pPager->stfd) ); 987 988 989 rc = read32bits(jfd, &pgno); 990 if( rc!=SQLITE_OK ) return rc; 991 rc = sqlite3OsRead(jfd, &aData, pPager->pageSize); 992 if( rc!=SQLITE_OK ) return rc; 993 pPager->journalOff += pPager->pageSize + 4; 994 995 /* Sanity checking on the page. This is more important that I originally 996 ** thought. If a power failure occurs while the journal is being written, 997 ** it could cause invalid data to be written into the journal. We need to 998 ** detect this invalid data (with high probability) and ignore it. 999 */ 1000 if( pgno==0 || pgno==PAGER_MJ_PGNO(pPager) ){ 1001 return SQLITE_DONE; 1002 } 1003 if( pgno>(unsigned)pPager->dbSize ){ 1004 return SQLITE_OK; 1005 } 1006 if( useCksum ){ 1007 rc = read32bits(jfd, &cksum); 1008 if( rc ) return rc; 1009 pPager->journalOff += 4; 1010 if( pager_cksum(pPager, pgno, aData)!=cksum ){ 1011 return SQLITE_DONE; 1012 } 1013 } 1014 1015 assert( pPager->state==PAGER_RESERVED || pPager->state>=PAGER_EXCLUSIVE ); 1016 1017 /* If the pager is in RESERVED state, then there must be a copy of this 1018 ** page in the pager cache. In this case just update the pager cache, 1019 ** not the database file. The page is left marked dirty in this case. 1020 ** 1021 ** If in EXCLUSIVE state, then we update the pager cache if it exists 1022 ** and the main file. The page is then marked not dirty. 1023 ** 1024 ** Ticket #1171: The statement journal might contain page content that is 1025 ** different from the page content at the start of the transaction. 1026 ** This occurs when a page is changed prior to the start of a statement 1027 ** then changed again within the statement. When rolling back such a 1028 ** statement we must not write to the original database unless we know 1029 ** for certain that original page contents are in the main rollback 1030 ** journal. Otherwise, if a full ROLLBACK occurs after the statement 1031 ** rollback the full ROLLBACK will not restore the page to its original 1032 ** content. Two conditions must be met before writing to the database 1033 ** files. (1) the database must be locked. (2) we know that the original 1034 ** page content is in the main journal either because the page is not in 1035 ** cache or else it is marked as needSync==0. 1036 */ 1037 pPg = pager_lookup(pPager, pgno); 1038 assert( pPager->state>=PAGER_EXCLUSIVE || pPg!=0 ); 1039 TRACE3("PLAYBACK %d page %d\n", PAGERID(pPager), pgno); 1040 if( pPager->state>=PAGER_EXCLUSIVE && (pPg==0 || pPg->needSync==0) ){ 1041 sqlite3OsSeek(&pPager->fd, (pgno-1)*(i64)pPager->pageSize); 1042 rc = sqlite3OsWrite(&pPager->fd, aData, pPager->pageSize); 1043 if( pPg ) pPg->dirty = 0; 1044 } 1045 if( pPg ){ 1046 /* No page should ever be explicitly rolled back that is in use, except 1047 ** for page 1 which is held in use in order to keep the lock on the 1048 ** database active. However such a page may be rolled back as a result 1049 ** of an internal error resulting in an automatic call to 1050 ** sqlite3pager_rollback(). 1051 */ 1052 void *pData; 1053 /* assert( pPg->nRef==0 || pPg->pgno==1 ); */ 1054 pData = PGHDR_TO_DATA(pPg); 1055 memcpy(pData, aData, pPager->pageSize); 1056 if( pPager->xDestructor ){ /*** FIX ME: Should this be xReinit? ***/ 1057 pPager->xDestructor(pData, pPager->pageSize); 1058 } 1059 #ifdef SQLITE_CHECK_PAGES 1060 pPg->pageHash = pager_pagehash(pPg); 1061 #endif 1062 CODEC(pPager, pData, pPg->pgno, 3); 1063 } 1064 return rc; 1065 } 1066 1067 /* 1068 ** Parameter zMaster is the name of a master journal file. A single journal 1069 ** file that referred to the master journal file has just been rolled back. 1070 ** This routine checks if it is possible to delete the master journal file, 1071 ** and does so if it is. 1072 ** 1073 ** The master journal file contains the names of all child journals. 1074 ** To tell if a master journal can be deleted, check to each of the 1075 ** children. If all children are either missing or do not refer to 1076 ** a different master journal, then this master journal can be deleted. 1077 */ 1078 static int pager_delmaster(const char *zMaster){ 1079 int rc; 1080 int master_open = 0; 1081 OsFile master; 1082 char *zMasterJournal = 0; /* Contents of master journal file */ 1083 i64 nMasterJournal; /* Size of master journal file */ 1084 1085 /* Open the master journal file exclusively in case some other process 1086 ** is running this routine also. Not that it makes too much difference. 1087 */ 1088 memset(&master, 0, sizeof(master)); 1089 rc = sqlite3OsOpenReadOnly(zMaster, &master); 1090 if( rc!=SQLITE_OK ) goto delmaster_out; 1091 master_open = 1; 1092 rc = sqlite3OsFileSize(&master, &nMasterJournal); 1093 if( rc!=SQLITE_OK ) goto delmaster_out; 1094 1095 if( nMasterJournal>0 ){ 1096 char *zJournal; 1097 char *zMasterPtr = 0; 1098 1099 /* Load the entire master journal file into space obtained from 1100 ** sqliteMalloc() and pointed to by zMasterJournal. 1101 */ 1102 zMasterJournal = (char *)sqliteMalloc(nMasterJournal); 1103 if( !zMasterJournal ){ 1104 rc = SQLITE_NOMEM; 1105 goto delmaster_out; 1106 } 1107 rc = sqlite3OsRead(&master, zMasterJournal, nMasterJournal); 1108 if( rc!=SQLITE_OK ) goto delmaster_out; 1109 1110 zJournal = zMasterJournal; 1111 while( (zJournal-zMasterJournal)<nMasterJournal ){ 1112 if( sqlite3OsFileExists(zJournal) ){ 1113 /* One of the journals pointed to by the master journal exists. 1114 ** Open it and check if it points at the master journal. If 1115 ** so, return without deleting the master journal file. 1116 */ 1117 OsFile journal; 1118 int c; 1119 1120 memset(&journal, 0, sizeof(journal)); 1121 rc = sqlite3OsOpenReadOnly(zJournal, &journal); 1122 if( rc!=SQLITE_OK ){ 1123 goto delmaster_out; 1124 } 1125 1126 rc = readMasterJournal(&journal, &zMasterPtr); 1127 sqlite3OsClose(&journal); 1128 if( rc!=SQLITE_OK ){ 1129 goto delmaster_out; 1130 } 1131 1132 c = zMasterPtr!=0 && strcmp(zMasterPtr, zMaster)==0; 1133 sqliteFree(zMasterPtr); 1134 if( c ){ 1135 /* We have a match. Do not delete the master journal file. */ 1136 goto delmaster_out; 1137 } 1138 } 1139 zJournal += (strlen(zJournal)+1); 1140 } 1141 } 1142 1143 sqlite3OsDelete(zMaster); 1144 1145 delmaster_out: 1146 if( zMasterJournal ){ 1147 sqliteFree(zMasterJournal); 1148 } 1149 if( master_open ){ 1150 sqlite3OsClose(&master); 1151 } 1152 return rc; 1153 } 1154 1155 /* 1156 ** Make every page in the cache agree with what is on disk. In other words, 1157 ** reread the disk to reset the state of the cache. 1158 ** 1159 ** This routine is called after a rollback in which some of the dirty cache 1160 ** pages had never been written out to disk. We need to roll back the 1161 ** cache content and the easiest way to do that is to reread the old content 1162 ** back from the disk. 1163 */ 1164 static int pager_reload_cache(Pager *pPager){ 1165 PgHdr *pPg; 1166 int rc = SQLITE_OK; 1167 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){ 1168 char zBuf[SQLITE_MAX_PAGE_SIZE]; 1169 if( !pPg->dirty ) continue; 1170 if( (int)pPg->pgno <= pPager->origDbSize ){ 1171 sqlite3OsSeek(&pPager->fd, pPager->pageSize*(i64)(pPg->pgno-1)); 1172 rc = sqlite3OsRead(&pPager->fd, zBuf, pPager->pageSize); 1173 TRACE3("REFETCH %d page %d\n", PAGERID(pPager), pPg->pgno); 1174 if( rc ) break; 1175 CODEC(pPager, zBuf, pPg->pgno, 2); 1176 }else{ 1177 memset(zBuf, 0, pPager->pageSize); 1178 } 1179 if( pPg->nRef==0 || memcmp(zBuf, PGHDR_TO_DATA(pPg), pPager->pageSize) ){ 1180 memcpy(PGHDR_TO_DATA(pPg), zBuf, pPager->pageSize); 1181 if( pPager->xReiniter ){ 1182 pPager->xReiniter(PGHDR_TO_DATA(pPg), pPager->pageSize); 1183 }else{ 1184 memset(PGHDR_TO_EXTRA(pPg, pPager), 0, pPager->nExtra); 1185 } 1186 } 1187 pPg->needSync = 0; 1188 pPg->dirty = 0; 1189 #ifdef SQLITE_CHECK_PAGES 1190 pPg->pageHash = pager_pagehash(pPg); 1191 #endif 1192 } 1193 return rc; 1194 } 1195 1196 /* 1197 ** Truncate the main file of the given pager to the number of pages 1198 ** indicated. 1199 */ 1200 static int pager_truncate(Pager *pPager, int nPage){ 1201 assert( pPager->state>=PAGER_EXCLUSIVE ); 1202 return sqlite3OsTruncate(&pPager->fd, pPager->pageSize*(i64)nPage); 1203 } 1204 1205 /* 1206 ** Playback the journal and thus restore the database file to 1207 ** the state it was in before we started making changes. 1208 ** 1209 ** The journal file format is as follows: 1210 ** 1211 ** (1) 8 byte prefix. A copy of aJournalMagic[]. 1212 ** (2) 4 byte big-endian integer which is the number of valid page records 1213 ** in the journal. If this value is 0xffffffff, then compute the 1214 ** number of page records from the journal size. 1215 ** (3) 4 byte big-endian integer which is the initial value for the 1216 ** sanity checksum. 1217 ** (4) 4 byte integer which is the number of pages to truncate the 1218 ** database to during a rollback. 1219 ** (5) 4 byte integer which is the number of bytes in the master journal 1220 ** name. The value may be zero (indicate that there is no master 1221 ** journal.) 1222 ** (6) N bytes of the master journal name. The name will be nul-terminated 1223 ** and might be shorter than the value read from (5). If the first byte 1224 ** of the name is \000 then there is no master journal. The master 1225 ** journal name is stored in UTF-8. 1226 ** (7) Zero or more pages instances, each as follows: 1227 ** + 4 byte page number. 1228 ** + pPager->pageSize bytes of data. 1229 ** + 4 byte checksum 1230 ** 1231 ** When we speak of the journal header, we mean the first 6 items above. 1232 ** Each entry in the journal is an instance of the 7th item. 1233 ** 1234 ** Call the value from the second bullet "nRec". nRec is the number of 1235 ** valid page entries in the journal. In most cases, you can compute the 1236 ** value of nRec from the size of the journal file. But if a power 1237 ** failure occurred while the journal was being written, it could be the 1238 ** case that the size of the journal file had already been increased but 1239 ** the extra entries had not yet made it safely to disk. In such a case, 1240 ** the value of nRec computed from the file size would be too large. For 1241 ** that reason, we always use the nRec value in the header. 1242 ** 1243 ** If the nRec value is 0xffffffff it means that nRec should be computed 1244 ** from the file size. This value is used when the user selects the 1245 ** no-sync option for the journal. A power failure could lead to corruption 1246 ** in this case. But for things like temporary table (which will be 1247 ** deleted when the power is restored) we don't care. 1248 ** 1249 ** If the file opened as the journal file is not a well-formed 1250 ** journal file then all pages up to the first corrupted page are rolled 1251 ** back (or no pages if the journal header is corrupted). The journal file 1252 ** is then deleted and SQLITE_OK returned, just as if no corruption had 1253 ** been encountered. 1254 ** 1255 ** If an I/O or malloc() error occurs, the journal-file is not deleted 1256 ** and an error code is returned. 1257 */ 1258 static int pager_playback(Pager *pPager){ 1259 i64 szJ; /* Size of the journal file in bytes */ 1260 u32 nRec; /* Number of Records in the journal */ 1261 int i; /* Loop counter */ 1262 Pgno mxPg = 0; /* Size of the original file in pages */ 1263 int rc; /* Result code of a subroutine */ 1264 char *zMaster = 0; /* Name of master journal file if any */ 1265 1266 /* Figure out how many records are in the journal. Abort early if 1267 ** the journal is empty. 1268 */ 1269 assert( pPager->journalOpen ); 1270 rc = sqlite3OsFileSize(&pPager->jfd, &szJ); 1271 if( rc!=SQLITE_OK ){ 1272 goto end_playback; 1273 } 1274 1275 /* Read the master journal name from the journal, if it is present. 1276 ** If a master journal file name is specified, but the file is not 1277 ** present on disk, then the journal is not hot and does not need to be 1278 ** played back. 1279 */ 1280 rc = readMasterJournal(&pPager->jfd, &zMaster); 1281 assert( rc!=SQLITE_DONE ); 1282 if( rc!=SQLITE_OK || (zMaster && !sqlite3OsFileExists(zMaster)) ){ 1283 sqliteFree(zMaster); 1284 zMaster = 0; 1285 if( rc==SQLITE_DONE ) rc = SQLITE_OK; 1286 goto end_playback; 1287 } 1288 sqlite3OsSeek(&pPager->jfd, 0); 1289 pPager->journalOff = 0; 1290 1291 /* This loop terminates either when the readJournalHdr() call returns 1292 ** SQLITE_DONE or an IO error occurs. */ 1293 while( 1 ){ 1294 1295 /* Read the next journal header from the journal file. If there are 1296 ** not enough bytes left in the journal file for a complete header, or 1297 ** it is corrupted, then a process must of failed while writing it. 1298 ** This indicates nothing more needs to be rolled back. 1299 */ 1300 rc = readJournalHdr(pPager, szJ, &nRec, &mxPg); 1301 if( rc!=SQLITE_OK ){ 1302 if( rc==SQLITE_DONE ){ 1303 rc = SQLITE_OK; 1304 } 1305 goto end_playback; 1306 } 1307 1308 /* If nRec is 0xffffffff, then this journal was created by a process 1309 ** working in no-sync mode. This means that the rest of the journal 1310 ** file consists of pages, there are no more journal headers. Compute 1311 ** the value of nRec based on this assumption. 1312 */ 1313 if( nRec==0xffffffff ){ 1314 assert( pPager->journalOff==JOURNAL_HDR_SZ(pPager) ); 1315 nRec = (szJ - JOURNAL_HDR_SZ(pPager))/JOURNAL_PG_SZ(pPager); 1316 } 1317 1318 /* If this is the first header read from the journal, truncate the 1319 ** database file back to it's original size. 1320 */ 1321 if( pPager->state>=PAGER_EXCLUSIVE && 1322 pPager->journalOff==JOURNAL_HDR_SZ(pPager) ){ 1323 assert( pPager->origDbSize==0 || pPager->origDbSize==mxPg ); 1324 rc = pager_truncate(pPager, mxPg); 1325 if( rc!=SQLITE_OK ){ 1326 goto end_playback; 1327 } 1328 pPager->dbSize = mxPg; 1329 } 1330 1331 /* rc = sqlite3OsSeek(&pPager->jfd, JOURNAL_HDR_SZ(pPager)); */ 1332 if( rc!=SQLITE_OK ) goto end_playback; 1333 1334 /* Copy original pages out of the journal and back into the database file. 1335 */ 1336 for(i=0; i<nRec; i++){ 1337 rc = pager_playback_one_page(pPager, &pPager->jfd, 1); 1338 if( rc!=SQLITE_OK ){ 1339 if( rc==SQLITE_DONE ){ 1340 rc = SQLITE_OK; 1341 pPager->journalOff = szJ; 1342 break; 1343 }else{ 1344 goto end_playback; 1345 } 1346 } 1347 } 1348 } 1349 1350 /* Pages that have been written to the journal but never synced 1351 ** where not restored by the loop above. We have to restore those 1352 ** pages by reading them back from the original database. 1353 */ 1354 assert( rc==SQLITE_OK ); 1355 pager_reload_cache(pPager); 1356 1357 end_playback: 1358 if( rc==SQLITE_OK ){ 1359 rc = pager_unwritelock(pPager); 1360 } 1361 if( zMaster ){ 1362 /* If there was a master journal and this routine will return true, 1363 ** see if it is possible to delete the master journal. 1364 */ 1365 if( rc==SQLITE_OK ){ 1366 rc = pager_delmaster(zMaster); 1367 } 1368 sqliteFree(zMaster); 1369 } 1370 1371 /* The Pager.sectorSize variable may have been updated while rolling 1372 ** back a journal created by a process with a different PAGER_SECTOR_SIZE 1373 ** value. Reset it to the correct value for this process. 1374 */ 1375 pPager->sectorSize = PAGER_SECTOR_SIZE; 1376 return rc; 1377 } 1378 1379 /* 1380 ** Playback the statement journal. 1381 ** 1382 ** This is similar to playing back the transaction journal but with 1383 ** a few extra twists. 1384 ** 1385 ** (1) The number of pages in the database file at the start of 1386 ** the statement is stored in pPager->stmtSize, not in the 1387 ** journal file itself. 1388 ** 1389 ** (2) In addition to playing back the statement journal, also 1390 ** playback all pages of the transaction journal beginning 1391 ** at offset pPager->stmtJSize. 1392 */ 1393 static int pager_stmt_playback(Pager *pPager){ 1394 i64 szJ; /* Size of the full journal */ 1395 i64 hdrOff; 1396 int nRec; /* Number of Records */ 1397 int i; /* Loop counter */ 1398 int rc; 1399 1400 szJ = pPager->journalOff; 1401 #ifndef NDEBUG 1402 { 1403 i64 os_szJ; 1404 rc = sqlite3OsFileSize(&pPager->jfd, &os_szJ); 1405 if( rc!=SQLITE_OK ) return rc; 1406 assert( szJ==os_szJ ); 1407 } 1408 #endif 1409 1410 /* Set hdrOff to be the offset to the first journal header written 1411 ** this statement transaction, or the end of the file if no journal 1412 ** header was written. 1413 */ 1414 hdrOff = pPager->stmtHdrOff; 1415 assert( pPager->fullSync || !hdrOff ); 1416 if( !hdrOff ){ 1417 hdrOff = szJ; 1418 } 1419 1420 /* Truncate the database back to its original size. 1421 */ 1422 if( pPager->state>=PAGER_EXCLUSIVE ){ 1423 rc = pager_truncate(pPager, pPager->stmtSize); 1424 } 1425 pPager->dbSize = pPager->stmtSize; 1426 1427 /* Figure out how many records are in the statement journal. 1428 */ 1429 assert( pPager->stmtInUse && pPager->journalOpen ); 1430 sqlite3OsSeek(&pPager->stfd, 0); 1431 nRec = pPager->stmtNRec; 1432 1433 /* Copy original pages out of the statement journal and back into the 1434 ** database file. Note that the statement journal omits checksums from 1435 ** each record since power-failure recovery is not important to statement 1436 ** journals. 1437 */ 1438 for(i=nRec-1; i>=0; i--){ 1439 rc = pager_playback_one_page(pPager, &pPager->stfd, 0); 1440 assert( rc!=SQLITE_DONE ); 1441 if( rc!=SQLITE_OK ) goto end_stmt_playback; 1442 } 1443 1444 /* Now roll some pages back from the transaction journal. Pager.stmtJSize 1445 ** was the size of the journal file when this statement was started, so 1446 ** everything after that needs to be rolled back, either into the 1447 ** database, the memory cache, or both. 1448 ** 1449 ** If it is not zero, then Pager.stmtHdrOff is the offset to the start 1450 ** of the first journal header written during this statement transaction. 1451 */ 1452 rc = sqlite3OsSeek(&pPager->jfd, pPager->stmtJSize); 1453 if( rc!=SQLITE_OK ){ 1454 goto end_stmt_playback; 1455 } 1456 pPager->journalOff = pPager->stmtJSize; 1457 pPager->cksumInit = pPager->stmtCksum; 1458 assert( JOURNAL_HDR_SZ(pPager)<(pPager->pageSize+8) ); 1459 while( pPager->journalOff <= (hdrOff-(pPager->pageSize+8)) ){ 1460 rc = pager_playback_one_page(pPager, &pPager->jfd, 1); 1461 assert( rc!=SQLITE_DONE ); 1462 if( rc!=SQLITE_OK ) goto end_stmt_playback; 1463 } 1464 1465 while( pPager->journalOff < szJ ){ 1466 u32 nRec; 1467 u32 dummy; 1468 rc = readJournalHdr(pPager, szJ, &nRec, &dummy); 1469 if( rc!=SQLITE_OK ){ 1470 assert( rc!=SQLITE_DONE ); 1471 goto end_stmt_playback; 1472 } 1473 if( nRec==0 ){ 1474 nRec = (szJ - pPager->journalOff) / (pPager->pageSize+8); 1475 } 1476 for(i=nRec-1; i>=0 && pPager->journalOff < szJ; i--){ 1477 rc = pager_playback_one_page(pPager, &pPager->jfd, 1); 1478 assert( rc!=SQLITE_DONE ); 1479 if( rc!=SQLITE_OK ) goto end_stmt_playback; 1480 } 1481 } 1482 1483 pPager->journalOff = szJ; 1484 1485 end_stmt_playback: 1486 if( rc!=SQLITE_OK ){ 1487 pPager->errMask |= PAGER_ERR_CORRUPT; 1488 rc = SQLITE_CORRUPT; /* bkpt-CORRUPT */ 1489 }else{ 1490 pPager->journalOff = szJ; 1491 /* pager_reload_cache(pPager); */ 1492 } 1493 return rc; 1494 } 1495 1496 /* 1497 ** Change the maximum number of in-memory pages that are allowed. 1498 */ 1499 void sqlite3pager_set_cachesize(Pager *pPager, int mxPage){ 1500 if( mxPage>10 ){ 1501 pPager->mxPage = mxPage; 1502 }else{ 1503 pPager->mxPage = 10; 1504 } 1505 } 1506 1507 /* 1508 ** Adjust the robustness of the database to damage due to OS crashes 1509 ** or power failures by changing the number of syncs()s when writing 1510 ** the rollback journal. There are three levels: 1511 ** 1512 ** OFF sqlite3OsSync() is never called. This is the default 1513 ** for temporary and transient files. 1514 ** 1515 ** NORMAL The journal is synced once before writes begin on the 1516 ** database. This is normally adequate protection, but 1517 ** it is theoretically possible, though very unlikely, 1518 ** that an inopertune power failure could leave the journal 1519 ** in a state which would cause damage to the database 1520 ** when it is rolled back. 1521 ** 1522 ** FULL The journal is synced twice before writes begin on the 1523 ** database (with some additional information - the nRec field 1524 ** of the journal header - being written in between the two 1525 ** syncs). If we assume that writing a 1526 ** single disk sector is atomic, then this mode provides 1527 ** assurance that the journal will not be corrupted to the 1528 ** point of causing damage to the database during rollback. 1529 ** 1530 ** Numeric values associated with these states are OFF==1, NORMAL=2, 1531 ** and FULL=3. 1532 */ 1533 #ifndef SQLITE_OMIT_PAGER_PRAGMAS 1534 void sqlite3pager_set_safety_level(Pager *pPager, int level){ 1535 pPager->noSync = level==1 || pPager->tempFile; 1536 pPager->fullSync = level==3 && !pPager->tempFile; 1537 if( pPager->noSync ) pPager->needSync = 0; 1538 } 1539 #endif 1540 1541 /* 1542 ** The following global variable is incremented whenever the library 1543 ** attempts to open a temporary file. This information is used for 1544 ** testing and analysis only. 1545 */ 1546 int sqlite3_opentemp_count = 0; 1547 1548 /* 1549 ** Open a temporary file. Write the name of the file into zFile 1550 ** (zFile must be at least SQLITE_TEMPNAME_SIZE bytes long.) Write 1551 ** the file descriptor into *fd. Return SQLITE_OK on success or some 1552 ** other error code if we fail. 1553 ** 1554 ** The OS will automatically delete the temporary file when it is 1555 ** closed. 1556 */ 1557 static int sqlite3pager_opentemp(char *zFile, OsFile *fd){ 1558 int cnt = 8; 1559 int rc; 1560 sqlite3_opentemp_count++; /* Used for testing and analysis only */ 1561 do{ 1562 cnt--; 1563 sqlite3OsTempFileName(zFile); 1564 rc = sqlite3OsOpenExclusive(zFile, fd, 1); 1565 }while( cnt>0 && rc!=SQLITE_OK && rc!=SQLITE_NOMEM ); 1566 return rc; 1567 } 1568 1569 /* 1570 ** Create a new page cache and put a pointer to the page cache in *ppPager. 1571 ** The file to be cached need not exist. The file is not locked until 1572 ** the first call to sqlite3pager_get() and is only held open until the 1573 ** last page is released using sqlite3pager_unref(). 1574 ** 1575 ** If zFilename is NULL then a randomly-named temporary file is created 1576 ** and used as the file to be cached. The file will be deleted 1577 ** automatically when it is closed. 1578 ** 1579 ** If zFilename is ":memory:" then all information is held in cache. 1580 ** It is never written to disk. This can be used to implement an 1581 ** in-memory database. 1582 */ 1583 int sqlite3pager_open( 1584 Pager **ppPager, /* Return the Pager structure here */ 1585 const char *zFilename, /* Name of the database file to open */ 1586 int nExtra, /* Extra bytes append to each in-memory page */ 1587 int flags /* flags controlling this file */ 1588 ){ 1589 Pager *pPager; 1590 char *zFullPathname = 0; 1591 int nameLen; 1592 OsFile fd; 1593 int rc = SQLITE_OK; 1594 int i; 1595 int tempFile = 0; 1596 int memDb = 0; 1597 int readOnly = 0; 1598 int useJournal = (flags & PAGER_OMIT_JOURNAL)==0; 1599 int noReadlock = (flags & PAGER_NO_READLOCK)!=0; 1600 char zTemp[SQLITE_TEMPNAME_SIZE]; 1601 1602 *ppPager = 0; 1603 memset(&fd, 0, sizeof(fd)); 1604 if( sqlite3_malloc_failed ){ 1605 return SQLITE_NOMEM; 1606 } 1607 if( zFilename && zFilename[0] ){ 1608 #ifndef SQLITE_OMIT_MEMORYDB 1609 if( strcmp(zFilename,":memory:")==0 ){ 1610 memDb = 1; 1611 zFullPathname = sqliteStrDup(""); 1612 rc = SQLITE_OK; 1613 }else 1614 #endif 1615 { 1616 zFullPathname = sqlite3OsFullPathname(zFilename); 1617 if( zFullPathname ){ 1618 rc = sqlite3OsOpenReadWrite(zFullPathname, &fd, &readOnly); 1619 } 1620 } 1621 }else{ 1622 rc = sqlite3pager_opentemp(zTemp, &fd); 1623 zFilename = zTemp; 1624 zFullPathname = sqlite3OsFullPathname(zFilename); 1625 if( rc==SQLITE_OK ){ 1626 tempFile = 1; 1627 } 1628 } 1629 if( !zFullPathname ){ 1630 sqlite3OsClose(&fd); 1631 return SQLITE_NOMEM; 1632 } 1633 if( rc!=SQLITE_OK ){ 1634 sqlite3OsClose(&fd); 1635 sqliteFree(zFullPathname); 1636 return rc; 1637 } 1638 nameLen = strlen(zFullPathname); 1639 pPager = sqliteMalloc( sizeof(*pPager) + nameLen*3 + 30 ); 1640 if( pPager==0 ){ 1641 sqlite3OsClose(&fd); 1642 sqliteFree(zFullPathname); 1643 return SQLITE_NOMEM; 1644 } 1645 TRACE3("OPEN %d %s\n", FILEHANDLEID(fd), zFullPathname); 1646 pPager->zFilename = (char*)&pPager[1]; 1647 pPager->zDirectory = &pPager->zFilename[nameLen+1]; 1648 pPager->zJournal = &pPager->zDirectory[nameLen+1]; 1649 strcpy(pPager->zFilename, zFullPathname); 1650 strcpy(pPager->zDirectory, zFullPathname); 1651 for(i=nameLen; i>0 && pPager->zDirectory[i-1]!='/'; i--){} 1652 if( i>0 ) pPager->zDirectory[i-1] = 0; 1653 strcpy(pPager->zJournal, zFullPathname); 1654 sqliteFree(zFullPathname); 1655 strcpy(&pPager->zJournal[nameLen], "-journal"); 1656 pPager->fd = fd; 1657 #if OS_UNIX 1658 pPager->fd.pPager = pPager; 1659 #endif 1660 pPager->journalOpen = 0; 1661 pPager->useJournal = useJournal && !memDb; 1662 pPager->noReadlock = noReadlock && readOnly; 1663 pPager->stmtOpen = 0; 1664 pPager->stmtInUse = 0; 1665 pPager->nRef = 0; 1666 pPager->dbSize = memDb-1; 1667 pPager->pageSize = SQLITE_DEFAULT_PAGE_SIZE; 1668 pPager->stmtSize = 0; 1669 pPager->stmtJSize = 0; 1670 pPager->nPage = 0; 1671 pPager->nMaxPage = 0; 1672 pPager->mxPage = 100; 1673 pPager->state = PAGER_UNLOCK; 1674 pPager->errMask = 0; 1675 pPager->tempFile = tempFile; 1676 pPager->memDb = memDb; 1677 pPager->readOnly = readOnly; 1678 pPager->needSync = 0; 1679 pPager->noSync = pPager->tempFile || !useJournal; 1680 pPager->fullSync = (pPager->noSync?0:1); 1681 pPager->pFirst = 0; 1682 pPager->pFirstSynced = 0; 1683 pPager->pLast = 0; 1684 pPager->nExtra = FORCE_ALIGNMENT(nExtra); 1685 pPager->sectorSize = PAGER_SECTOR_SIZE; 1686 pPager->pBusyHandler = 0; 1687 memset(pPager->aHash, 0, sizeof(pPager->aHash)); 1688 *ppPager = pPager; 1689 return SQLITE_OK; 1690 } 1691 1692 /* 1693 ** Set the busy handler function. 1694 */ 1695 void sqlite3pager_set_busyhandler(Pager *pPager, BusyHandler *pBusyHandler){ 1696 pPager->pBusyHandler = pBusyHandler; 1697 } 1698 1699 /* 1700 ** Set the destructor for this pager. If not NULL, the destructor is called 1701 ** when the reference count on each page reaches zero. The destructor can 1702 ** be used to clean up information in the extra segment appended to each page. 1703 ** 1704 ** The destructor is not called as a result sqlite3pager_close(). 1705 ** Destructors are only called by sqlite3pager_unref(). 1706 */ 1707 void sqlite3pager_set_destructor(Pager *pPager, void (*xDesc)(void*,int)){ 1708 pPager->xDestructor = xDesc; 1709 } 1710 1711 /* 1712 ** Set the reinitializer for this pager. If not NULL, the reinitializer 1713 ** is called when the content of a page in cache is restored to its original 1714 ** value as a result of a rollback. The callback gives higher-level code 1715 ** an opportunity to restore the EXTRA section to agree with the restored 1716 ** page data. 1717 */ 1718 void sqlite3pager_set_reiniter(Pager *pPager, void (*xReinit)(void*,int)){ 1719 pPager->xReiniter = xReinit; 1720 } 1721 1722 /* 1723 ** Set the page size. Return the new size. If the suggest new page 1724 ** size is inappropriate, then an alternative page size is selected 1725 ** and returned. 1726 */ 1727 int sqlite3pager_set_pagesize(Pager *pPager, int pageSize){ 1728 assert( pageSize>=512 && pageSize<=SQLITE_MAX_PAGE_SIZE ); 1729 if( !pPager->memDb ){ 1730 pPager->pageSize = pageSize; 1731 } 1732 return pPager->pageSize; 1733 } 1734 1735 /* 1736 ** Read the first N bytes from the beginning of the file into memory 1737 ** that pDest points to. No error checking is done. 1738 */ 1739 void sqlite3pager_read_fileheader(Pager *pPager, int N, unsigned char *pDest){ 1740 memset(pDest, 0, N); 1741 if( MEMDB==0 ){ 1742 sqlite3OsSeek(&pPager->fd, 0); 1743 sqlite3OsRead(&pPager->fd, pDest, N); 1744 } 1745 } 1746 1747 /* 1748 ** Return the total number of pages in the disk file associated with 1749 ** pPager. 1750 */ 1751 int sqlite3pager_pagecount(Pager *pPager){ 1752 i64 n; 1753 assert( pPager!=0 ); 1754 if( pPager->dbSize>=0 ){ 1755 return pPager->dbSize; 1756 } 1757 if( sqlite3OsFileSize(&pPager->fd, &n)!=SQLITE_OK ){ 1758 pPager->errMask |= PAGER_ERR_DISK; 1759 return 0; 1760 } 1761 n /= pPager->pageSize; 1762 if( !MEMDB && n==PENDING_BYTE/pPager->pageSize ){ 1763 n++; 1764 } 1765 if( pPager->state!=PAGER_UNLOCK ){ 1766 pPager->dbSize = n; 1767 } 1768 return n; 1769 } 1770 1771 /* 1772 ** Forward declaration 1773 */ 1774 static int syncJournal(Pager*); 1775 1776 1777 /* 1778 ** Unlink pPg from it's hash chain. Also set the page number to 0 to indicate 1779 ** that the page is not part of any hash chain. This is required because the 1780 ** sqlite3pager_movepage() routine can leave a page in the 1781 ** pNextFree/pPrevFree list that is not a part of any hash-chain. 1782 */ 1783 static void unlinkHashChain(Pager *pPager, PgHdr *pPg){ 1784 if( pPg->pgno==0 ){ 1785 /* If the page number is zero, then this page is not in any hash chain. */ 1786 return; 1787 } 1788 if( pPg->pNextHash ){ 1789 pPg->pNextHash->pPrevHash = pPg->pPrevHash; 1790 } 1791 if( pPg->pPrevHash ){ 1792 assert( pPager->aHash[pager_hash(pPg->pgno)]!=pPg ); 1793 pPg->pPrevHash->pNextHash = pPg->pNextHash; 1794 }else{ 1795 int h = pager_hash(pPg->pgno); 1796 assert( pPager->aHash[h]==pPg ); 1797 pPager->aHash[h] = pPg->pNextHash; 1798 } 1799 1800 pPg->pgno = 0; 1801 pPg->pNextHash = pPg->pPrevHash = 0; 1802 } 1803 1804 /* 1805 ** Unlink a page from the free list (the list of all pages where nRef==0) 1806 ** and from its hash collision chain. 1807 */ 1808 static void unlinkPage(PgHdr *pPg){ 1809 Pager *pPager = pPg->pPager; 1810 1811 /* Keep the pFirstSynced pointer pointing at the first synchronized page */ 1812 if( pPg==pPager->pFirstSynced ){ 1813 PgHdr *p = pPg->pNextFree; 1814 while( p && p->needSync ){ p = p->pNextFree; } 1815 pPager->pFirstSynced = p; 1816 } 1817 1818 /* Unlink from the freelist */ 1819 if( pPg->pPrevFree ){ 1820 pPg->pPrevFree->pNextFree = pPg->pNextFree; 1821 }else{ 1822 assert( pPager->pFirst==pPg ); 1823 pPager->pFirst = pPg->pNextFree; 1824 } 1825 if( pPg->pNextFree ){ 1826 pPg->pNextFree->pPrevFree = pPg->pPrevFree; 1827 }else{ 1828 assert( pPager->pLast==pPg ); 1829 pPager->pLast = pPg->pPrevFree; 1830 } 1831 pPg->pNextFree = pPg->pPrevFree = 0; 1832 1833 /* Unlink from the pgno hash table */ 1834 unlinkHashChain(pPager, pPg); 1835 } 1836 1837 #ifndef SQLITE_OMIT_MEMORYDB 1838 /* 1839 ** This routine is used to truncate an in-memory database. Delete 1840 ** all pages whose pgno is larger than pPager->dbSize and is unreferenced. 1841 ** Referenced pages larger than pPager->dbSize are zeroed. 1842 */ 1843 static void memoryTruncate(Pager *pPager){ 1844 PgHdr *pPg; 1845 PgHdr **ppPg; 1846 int dbSize = pPager->dbSize; 1847 1848 ppPg = &pPager->pAll; 1849 while( (pPg = *ppPg)!=0 ){ 1850 if( pPg->pgno<=dbSize ){ 1851 ppPg = &pPg->pNextAll; 1852 }else if( pPg->nRef>0 ){ 1853 memset(PGHDR_TO_DATA(pPg), 0, pPager->pageSize); 1854 ppPg = &pPg->pNextAll; 1855 }else{ 1856 *ppPg = pPg->pNextAll; 1857 unlinkPage(pPg); 1858 sqliteFree(pPg); 1859 pPager->nPage--; 1860 } 1861 } 1862 } 1863 #else 1864 #define memoryTruncate(p) 1865 #endif 1866 1867 /* 1868 ** Try to obtain a lock on a file. Invoke the busy callback if the lock 1869 ** is currently not available. Repeat until the busy callback returns 1870 ** false or until the lock succeeds. 1871 ** 1872 ** Return SQLITE_OK on success and an error code if we cannot obtain 1873 ** the lock. 1874 */ 1875 static int pager_wait_on_lock(Pager *pPager, int locktype){ 1876 int rc; 1877 assert( PAGER_SHARED==SHARED_LOCK ); 1878 assert( PAGER_RESERVED==RESERVED_LOCK ); 1879 assert( PAGER_EXCLUSIVE==EXCLUSIVE_LOCK ); 1880 if( pPager->state>=locktype ){ 1881 rc = SQLITE_OK; 1882 }else{ 1883 do { 1884 rc = sqlite3OsLock(&pPager->fd, locktype); 1885 }while( rc==SQLITE_BUSY && sqlite3InvokeBusyHandler(pPager->pBusyHandler) ); 1886 if( rc==SQLITE_OK ){ 1887 pPager->state = locktype; 1888 } 1889 } 1890 return rc; 1891 } 1892 1893 /* 1894 ** Truncate the file to the number of pages specified. 1895 */ 1896 int sqlite3pager_truncate(Pager *pPager, Pgno nPage){ 1897 int rc; 1898 sqlite3pager_pagecount(pPager); 1899 if( pPager->errMask!=0 ){ 1900 rc = pager_errcode(pPager); 1901 return rc; 1902 } 1903 if( nPage>=(unsigned)pPager->dbSize ){ 1904 return SQLITE_OK; 1905 } 1906 if( MEMDB ){ 1907 pPager->dbSize = nPage; 1908 memoryTruncate(pPager); 1909 return SQLITE_OK; 1910 } 1911 rc = syncJournal(pPager); 1912 if( rc!=SQLITE_OK ){ 1913 return rc; 1914 } 1915 1916 /* Get an exclusive lock on the database before truncating. */ 1917 rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK); 1918 if( rc!=SQLITE_OK ){ 1919 return rc; 1920 } 1921 1922 rc = pager_truncate(pPager, nPage); 1923 if( rc==SQLITE_OK ){ 1924 pPager->dbSize = nPage; 1925 } 1926 return rc; 1927 } 1928 1929 /* 1930 ** Shutdown the page cache. Free all memory and close all files. 1931 ** 1932 ** If a transaction was in progress when this routine is called, that 1933 ** transaction is rolled back. All outstanding pages are invalidated 1934 ** and their memory is freed. Any attempt to use a page associated 1935 ** with this page cache after this function returns will likely 1936 ** result in a coredump. 1937 */ 1938 int sqlite3pager_close(Pager *pPager){ 1939 PgHdr *pPg, *pNext; 1940 switch( pPager->state ){ 1941 case PAGER_RESERVED: 1942 case PAGER_SYNCED: 1943 case PAGER_EXCLUSIVE: { 1944 /* We ignore any IO errors that occur during the rollback 1945 ** operation. So disable IO error simulation so that testing 1946 ** works more easily. 1947 */ 1948 #if defined(SQLITE_TEST) && (defined(OS_UNIX) || defined(OS_WIN)) 1949 extern int sqlite3_io_error_pending; 1950 int ioerr_cnt = sqlite3_io_error_pending; 1951 sqlite3_io_error_pending = -1; 1952 #endif 1953 sqlite3pager_rollback(pPager); 1954 #if defined(SQLITE_TEST) && (defined(OS_UNIX) || defined(OS_WIN)) 1955 sqlite3_io_error_pending = ioerr_cnt; 1956 #endif 1957 if( !MEMDB ){ 1958 sqlite3OsUnlock(&pPager->fd, NO_LOCK); 1959 } 1960 assert( pPager->errMask || pPager->journalOpen==0 ); 1961 break; 1962 } 1963 case PAGER_SHARED: { 1964 if( !MEMDB ){ 1965 sqlite3OsUnlock(&pPager->fd, NO_LOCK); 1966 } 1967 break; 1968 } 1969 default: { 1970 /* Do nothing */ 1971 break; 1972 } 1973 } 1974 for(pPg=pPager->pAll; pPg; pPg=pNext){ 1975 #ifndef NDEBUG 1976 if( MEMDB ){ 1977 PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager); 1978 assert( !pPg->alwaysRollback ); 1979 assert( !pHist->pOrig ); 1980 assert( !pHist->pStmt ); 1981 } 1982 #endif 1983 pNext = pPg->pNextAll; 1984 sqliteFree(pPg); 1985 } 1986 TRACE2("CLOSE %d\n", PAGERID(pPager)); 1987 assert( pPager->errMask || (pPager->journalOpen==0 && pPager->stmtOpen==0) ); 1988 if( pPager->journalOpen ){ 1989 sqlite3OsClose(&pPager->jfd); 1990 } 1991 sqliteFree(pPager->aInJournal); 1992 if( pPager->stmtOpen ){ 1993 sqlite3OsClose(&pPager->stfd); 1994 } 1995 sqlite3OsClose(&pPager->fd); 1996 /* Temp files are automatically deleted by the OS 1997 ** if( pPager->tempFile ){ 1998 ** sqlite3OsDelete(pPager->zFilename); 1999 ** } 2000 */ 2001 2002 sqliteFree(pPager); 2003 return SQLITE_OK; 2004 } 2005 2006 /* 2007 ** Return the page number for the given page data. 2008 */ 2009 Pgno sqlite3pager_pagenumber(void *pData){ 2010 PgHdr *p = DATA_TO_PGHDR(pData); 2011 return p->pgno; 2012 } 2013 2014 /* 2015 ** The page_ref() function increments the reference count for a page. 2016 ** If the page is currently on the freelist (the reference count is zero) then 2017 ** remove it from the freelist. 2018 ** 2019 ** For non-test systems, page_ref() is a macro that calls _page_ref() 2020 ** online of the reference count is zero. For test systems, page_ref() 2021 ** is a real function so that we can set breakpoints and trace it. 2022 */ 2023 static void _page_ref(PgHdr *pPg){ 2024 if( pPg->nRef==0 ){ 2025 /* The page is currently on the freelist. Remove it. */ 2026 if( pPg==pPg->pPager->pFirstSynced ){ 2027 PgHdr *p = pPg->pNextFree; 2028 while( p && p->needSync ){ p = p->pNextFree; } 2029 pPg->pPager->pFirstSynced = p; 2030 } 2031 if( pPg->pPrevFree ){ 2032 pPg->pPrevFree->pNextFree = pPg->pNextFree; 2033 }else{ 2034 pPg->pPager->pFirst = pPg->pNextFree; 2035 } 2036 if( pPg->pNextFree ){ 2037 pPg->pNextFree->pPrevFree = pPg->pPrevFree; 2038 }else{ 2039 pPg->pPager->pLast = pPg->pPrevFree; 2040 } 2041 pPg->pPager->nRef++; 2042 } 2043 pPg->nRef++; 2044 REFINFO(pPg); 2045 } 2046 #ifdef SQLITE_DEBUG 2047 static void page_ref(PgHdr *pPg){ 2048 if( pPg->nRef==0 ){ 2049 _page_ref(pPg); 2050 }else{ 2051 pPg->nRef++; 2052 REFINFO(pPg); 2053 } 2054 } 2055 #else 2056 # define page_ref(P) ((P)->nRef==0?_page_ref(P):(void)(P)->nRef++) 2057 #endif 2058 2059 /* 2060 ** Increment the reference count for a page. The input pointer is 2061 ** a reference to the page data. 2062 */ 2063 int sqlite3pager_ref(void *pData){ 2064 PgHdr *pPg = DATA_TO_PGHDR(pData); 2065 page_ref(pPg); 2066 return SQLITE_OK; 2067 } 2068 2069 /* 2070 ** Sync the journal. In other words, make sure all the pages that have 2071 ** been written to the journal have actually reached the surface of the 2072 ** disk. It is not safe to modify the original database file until after 2073 ** the journal has been synced. If the original database is modified before 2074 ** the journal is synced and a power failure occurs, the unsynced journal 2075 ** data would be lost and we would be unable to completely rollback the 2076 ** database changes. Database corruption would occur. 2077 ** 2078 ** This routine also updates the nRec field in the header of the journal. 2079 ** (See comments on the pager_playback() routine for additional information.) 2080 ** If the sync mode is FULL, two syncs will occur. First the whole journal 2081 ** is synced, then the nRec field is updated, then a second sync occurs. 2082 ** 2083 ** For temporary databases, we do not care if we are able to rollback 2084 ** after a power failure, so sync occurs. 2085 ** 2086 ** This routine clears the needSync field of every page current held in 2087 ** memory. 2088 */ 2089 static int syncJournal(Pager *pPager){ 2090 PgHdr *pPg; 2091 int rc = SQLITE_OK; 2092 2093 /* Sync the journal before modifying the main database 2094 ** (assuming there is a journal and it needs to be synced.) 2095 */ 2096 if( pPager->needSync ){ 2097 if( !pPager->tempFile ){ 2098 assert( pPager->journalOpen ); 2099 /* assert( !pPager->noSync ); // noSync might be set if synchronous 2100 ** was turned off after the transaction was started. Ticket #615 */ 2101 #ifndef NDEBUG 2102 { 2103 /* Make sure the pPager->nRec counter we are keeping agrees 2104 ** with the nRec computed from the size of the journal file. 2105 */ 2106 i64 jSz; 2107 rc = sqlite3OsFileSize(&pPager->jfd, &jSz); 2108 if( rc!=0 ) return rc; 2109 assert( pPager->journalOff==jSz ); 2110 } 2111 #endif 2112 { 2113 /* Write the nRec value into the journal file header. If in 2114 ** full-synchronous mode, sync the journal first. This ensures that 2115 ** all data has really hit the disk before nRec is updated to mark 2116 ** it as a candidate for rollback. 2117 */ 2118 if( pPager->fullSync ){ 2119 TRACE2("SYNC journal of %d\n", PAGERID(pPager)); 2120 rc = sqlite3OsSync(&pPager->jfd); 2121 if( rc!=0 ) return rc; 2122 } 2123 sqlite3OsSeek(&pPager->jfd, pPager->journalHdr + sizeof(aJournalMagic)); 2124 rc = write32bits(&pPager->jfd, pPager->nRec); 2125 if( rc ) return rc; 2126 2127 sqlite3OsSeek(&pPager->jfd, pPager->journalOff); 2128 } 2129 TRACE2("SYNC journal of %d\n", PAGERID(pPager)); 2130 rc = sqlite3OsSync(&pPager->jfd); 2131 if( rc!=0 ) return rc; 2132 pPager->journalStarted = 1; 2133 } 2134 pPager->needSync = 0; 2135 2136 /* Erase the needSync flag from every page. 2137 */ 2138 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){ 2139 pPg->needSync = 0; 2140 } 2141 pPager->pFirstSynced = pPager->pFirst; 2142 } 2143 2144 #ifndef NDEBUG 2145 /* If the Pager.needSync flag is clear then the PgHdr.needSync 2146 ** flag must also be clear for all pages. Verify that this 2147 ** invariant is true. 2148 */ 2149 else{ 2150 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){ 2151 assert( pPg->needSync==0 ); 2152 } 2153 assert( pPager->pFirstSynced==pPager->pFirst ); 2154 } 2155 #endif 2156 2157 return rc; 2158 } 2159 2160 /* 2161 ** Given a list of pages (connected by the PgHdr.pDirty pointer) write 2162 ** every one of those pages out to the database file and mark them all 2163 ** as clean. 2164 */ 2165 static int pager_write_pagelist(PgHdr *pList){ 2166 Pager *pPager; 2167 int rc; 2168 2169 if( pList==0 ) return SQLITE_OK; 2170 pPager = pList->pPager; 2171 2172 /* At this point there may be either a RESERVED or EXCLUSIVE lock on the 2173 ** database file. If there is already an EXCLUSIVE lock, the following 2174 ** calls to sqlite3OsLock() are no-ops. 2175 ** 2176 ** Moving the lock from RESERVED to EXCLUSIVE actually involves going 2177 ** through an intermediate state PENDING. A PENDING lock prevents new 2178 ** readers from attaching to the database but is unsufficient for us to 2179 ** write. The idea of a PENDING lock is to prevent new readers from 2180 ** coming in while we wait for existing readers to clear. 2181 ** 2182 ** While the pager is in the RESERVED state, the original database file 2183 ** is unchanged and we can rollback without having to playback the 2184 ** journal into the original database file. Once we transition to 2185 ** EXCLUSIVE, it means the database file has been changed and any rollback 2186 ** will require a journal playback. 2187 */ 2188 rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK); 2189 if( rc!=SQLITE_OK ){ 2190 return rc; 2191 } 2192 2193 while( pList ){ 2194 assert( pList->dirty ); 2195 sqlite3OsSeek(&pPager->fd, (pList->pgno-1)*(i64)pPager->pageSize); 2196 /* If there are dirty pages in the page cache with page numbers greater 2197 ** than Pager.dbSize, this means sqlite3pager_truncate() was called to 2198 ** make the file smaller (presumably by auto-vacuum code). Do not write 2199 ** any such pages to the file. 2200 */ 2201 if( pList->pgno<=pPager->dbSize ){ 2202 CODEC(pPager, PGHDR_TO_DATA(pList), pList->pgno, 6); 2203 TRACE3("STORE %d page %d\n", PAGERID(pPager), pList->pgno); 2204 rc = sqlite3OsWrite(&pPager->fd, PGHDR_TO_DATA(pList), pPager->pageSize); 2205 CODEC(pPager, PGHDR_TO_DATA(pList), pList->pgno, 0); 2206 TEST_INCR(pPager->nWrite); 2207 } 2208 #ifndef NDEBUG 2209 else{ 2210 TRACE3("NOSTORE %d page %d\n", PAGERID(pPager), pList->pgno); 2211 } 2212 #endif 2213 if( rc ) return rc; 2214 pList->dirty = 0; 2215 #ifdef SQLITE_CHECK_PAGES 2216 pList->pageHash = pager_pagehash(pList); 2217 #endif 2218 pList = pList->pDirty; 2219 } 2220 return SQLITE_OK; 2221 } 2222 2223 /* 2224 ** Collect every dirty page into a dirty list and 2225 ** return a pointer to the head of that list. All pages are 2226 ** collected even if they are still in use. 2227 */ 2228 static PgHdr *pager_get_all_dirty_pages(Pager *pPager){ 2229 PgHdr *p, *pList; 2230 pList = 0; 2231 for(p=pPager->pAll; p; p=p->pNextAll){ 2232 if( p->dirty ){ 2233 p->pDirty = pList; 2234 pList = p; 2235 } 2236 } 2237 return pList; 2238 } 2239 2240 /* 2241 ** Return TRUE if there is a hot journal on the given pager. 2242 ** A hot journal is one that needs to be played back. 2243 ** 2244 ** If the current size of the database file is 0 but a journal file 2245 ** exists, that is probably an old journal left over from a prior 2246 ** database with the same name. Just delete the journal. 2247 */ 2248 static int hasHotJournal(Pager *pPager){ 2249 if( !pPager->useJournal ) return 0; 2250 if( !sqlite3OsFileExists(pPager->zJournal) ) return 0; 2251 if( sqlite3OsCheckReservedLock(&pPager->fd) ) return 0; 2252 if( sqlite3pager_pagecount(pPager)==0 ){ 2253 sqlite3OsDelete(pPager->zJournal); 2254 return 0; 2255 }else{ 2256 return 1; 2257 } 2258 } 2259 2260 /* 2261 ** Acquire a page. 2262 ** 2263 ** A read lock on the disk file is obtained when the first page is acquired. 2264 ** This read lock is dropped when the last page is released. 2265 ** 2266 ** A _get works for any page number greater than 0. If the database 2267 ** file is smaller than the requested page, then no actual disk 2268 ** read occurs and the memory image of the page is initialized to 2269 ** all zeros. The extra data appended to a page is always initialized 2270 ** to zeros the first time a page is loaded into memory. 2271 ** 2272 ** The acquisition might fail for several reasons. In all cases, 2273 ** an appropriate error code is returned and *ppPage is set to NULL. 2274 ** 2275 ** See also sqlite3pager_lookup(). Both this routine and _lookup() attempt 2276 ** to find a page in the in-memory cache first. If the page is not already 2277 ** in memory, this routine goes to disk to read it in whereas _lookup() 2278 ** just returns 0. This routine acquires a read-lock the first time it 2279 ** has to go to disk, and could also playback an old journal if necessary. 2280 ** Since _lookup() never goes to disk, it never has to deal with locks 2281 ** or journal files. 2282 */ 2283 int sqlite3pager_get(Pager *pPager, Pgno pgno, void **ppPage){ 2284 PgHdr *pPg; 2285 int rc; 2286 2287 /* The maximum page number is 2^31. Return SQLITE_CORRUPT if a page 2288 ** number greater than this, or zero, is requested. 2289 */ 2290 if( pgno>PAGER_MAX_PGNO || pgno==0 ){ 2291 return SQLITE_CORRUPT; 2292 } 2293 2294 /* Make sure we have not hit any critical errors. 2295 */ 2296 assert( pPager!=0 ); 2297 *ppPage = 0; 2298 if( pPager->errMask & ~(PAGER_ERR_FULL) ){ 2299 return pager_errcode(pPager); 2300 } 2301 2302 /* If this is the first page accessed, then get a SHARED lock 2303 ** on the database file. 2304 */ 2305 if( pPager->nRef==0 && !MEMDB ){ 2306 if( !pPager->noReadlock ){ 2307 rc = pager_wait_on_lock(pPager, SHARED_LOCK); 2308 if( rc!=SQLITE_OK ){ 2309 return rc; 2310 } 2311 } 2312 2313 /* If a journal file exists, and there is no RESERVED lock on the 2314 ** database file, then it either needs to be played back or deleted. 2315 */ 2316 if( hasHotJournal(pPager) ){ 2317 int rc; 2318 2319 /* Get an EXCLUSIVE lock on the database file. At this point it is 2320 ** important that a RESERVED lock is not obtained on the way to the 2321 ** EXCLUSIVE lock. If it were, another process might open the 2322 ** database file, detect the RESERVED lock, and conclude that the 2323 ** database is safe to read while this process is still rolling it 2324 ** back. 2325 ** 2326 ** Because the intermediate RESERVED lock is not requested, the 2327 ** second process will get to this point in the code and fail to 2328 ** obtain it's own EXCLUSIVE lock on the database file. 2329 */ 2330 rc = sqlite3OsLock(&pPager->fd, EXCLUSIVE_LOCK); 2331 if( rc!=SQLITE_OK ){ 2332 sqlite3OsUnlock(&pPager->fd, NO_LOCK); 2333 pPager->state = PAGER_UNLOCK; 2334 return rc; 2335 } 2336 pPager->state = PAGER_EXCLUSIVE; 2337 2338 /* Open the journal for reading only. Return SQLITE_BUSY if 2339 ** we are unable to open the journal file. 2340 ** 2341 ** The journal file does not need to be locked itself. The 2342 ** journal file is never open unless the main database file holds 2343 ** a write lock, so there is never any chance of two or more 2344 ** processes opening the journal at the same time. 2345 */ 2346 rc = sqlite3OsOpenReadOnly(pPager->zJournal, &pPager->jfd); 2347 if( rc!=SQLITE_OK ){ 2348 sqlite3OsUnlock(&pPager->fd, NO_LOCK); 2349 pPager->state = PAGER_UNLOCK; 2350 return SQLITE_BUSY; 2351 } 2352 pPager->journalOpen = 1; 2353 pPager->journalStarted = 0; 2354 pPager->journalOff = 0; 2355 pPager->setMaster = 0; 2356 pPager->journalHdr = 0; 2357 2358 /* Playback and delete the journal. Drop the database write 2359 ** lock and reacquire the read lock. 2360 */ 2361 rc = pager_playback(pPager); 2362 if( rc!=SQLITE_OK ){ 2363 return rc; 2364 } 2365 } 2366 pPg = 0; 2367 }else{ 2368 /* Search for page in cache */ 2369 pPg = pager_lookup(pPager, pgno); 2370 if( MEMDB && pPager->state==PAGER_UNLOCK ){ 2371 pPager->state = PAGER_SHARED; 2372 } 2373 } 2374 if( pPg==0 ){ 2375 /* The requested page is not in the page cache. */ 2376 int h; 2377 TEST_INCR(pPager->nMiss); 2378 if( pPager->nPage<pPager->mxPage || pPager->pFirst==0 || MEMDB ){ 2379 /* Create a new page */ 2380 pPg = sqliteMallocRaw( sizeof(*pPg) + pPager->pageSize 2381 + sizeof(u32) + pPager->nExtra 2382 + MEMDB*sizeof(PgHistory) ); 2383 if( pPg==0 ){ 2384 pPager->errMask |= PAGER_ERR_MEM; 2385 return SQLITE_NOMEM; 2386 } 2387 memset(pPg, 0, sizeof(*pPg)); 2388 if( MEMDB ){ 2389 memset(PGHDR_TO_HIST(pPg, pPager), 0, sizeof(PgHistory)); 2390 } 2391 pPg->pPager = pPager; 2392 pPg->pNextAll = pPager->pAll; 2393 pPager->pAll = pPg; 2394 pPager->nPage++; 2395 if( pPager->nPage>pPager->nMaxPage ){ 2396 assert( pPager->nMaxPage==(pPager->nPage-1) ); 2397 pPager->nMaxPage++; 2398 } 2399 }else{ 2400 /* Find a page to recycle. Try to locate a page that does not 2401 ** require us to do an fsync() on the journal. 2402 */ 2403 pPg = pPager->pFirstSynced; 2404 2405 /* If we could not find a page that does not require an fsync() 2406 ** on the journal file then fsync the journal file. This is a 2407 ** very slow operation, so we work hard to avoid it. But sometimes 2408 ** it can't be helped. 2409 */ 2410 if( pPg==0 ){ 2411 int rc = syncJournal(pPager); 2412 if( rc!=0 ){ 2413 sqlite3pager_rollback(pPager); 2414 return SQLITE_IOERR; 2415 } 2416 if( pPager->fullSync ){ 2417 /* If in full-sync mode, write a new journal header into the 2418 ** journal file. This is done to avoid ever modifying a journal 2419 ** header that is involved in the rollback of pages that have 2420 ** already been written to the database (in case the header is 2421 ** trashed when the nRec field is updated). 2422 */ 2423 pPager->nRec = 0; 2424 assert( pPager->journalOff > 0 ); 2425 rc = writeJournalHdr(pPager); 2426 if( rc!=0 ){ 2427 sqlite3pager_rollback(pPager); 2428 return SQLITE_IOERR; 2429 } 2430 } 2431 pPg = pPager->pFirst; 2432 } 2433 assert( pPg->nRef==0 ); 2434 2435 /* Write the page to the database file if it is dirty. 2436 */ 2437 if( pPg->dirty ){ 2438 assert( pPg->needSync==0 ); 2439 pPg->pDirty = 0; 2440 rc = pager_write_pagelist( pPg ); 2441 if( rc!=SQLITE_OK ){ 2442 sqlite3pager_rollback(pPager); 2443 return SQLITE_IOERR; 2444 } 2445 } 2446 assert( pPg->dirty==0 ); 2447 2448 /* If the page we are recycling is marked as alwaysRollback, then 2449 ** set the global alwaysRollback flag, thus disabling the 2450 ** sqlite_dont_rollback() optimization for the rest of this transaction. 2451 ** It is necessary to do this because the page marked alwaysRollback 2452 ** might be reloaded at a later time but at that point we won't remember 2453 ** that is was marked alwaysRollback. This means that all pages must 2454 ** be marked as alwaysRollback from here on out. 2455 */ 2456 if( pPg->alwaysRollback ){ 2457 pPager->alwaysRollback = 1; 2458 } 2459 2460 /* Unlink the old page from the free list and the hash table 2461 */ 2462 unlinkPage(pPg); 2463 TEST_INCR(pPager->nOvfl); 2464 } 2465 pPg->pgno = pgno; 2466 if( pPager->aInJournal && (int)pgno<=pPager->origDbSize ){ 2467 sqlite3CheckMemory(pPager->aInJournal, pgno/8); 2468 assert( pPager->journalOpen ); 2469 pPg->inJournal = (pPager->aInJournal[pgno/8] & (1<<(pgno&7)))!=0; 2470 pPg->needSync = 0; 2471 }else{ 2472 pPg->inJournal = 0; 2473 pPg->needSync = 0; 2474 } 2475 if( pPager->aInStmt && (int)pgno<=pPager->stmtSize 2476 && (pPager->aInStmt[pgno/8] & (1<<(pgno&7)))!=0 ){ 2477 page_add_to_stmt_list(pPg); 2478 }else{ 2479 page_remove_from_stmt_list(pPg); 2480 } 2481 pPg->dirty = 0; 2482 pPg->nRef = 1; 2483 REFINFO(pPg); 2484 pPager->nRef++; 2485 h = pager_hash(pgno); 2486 pPg->pNextHash = pPager->aHash[h]; 2487 pPager->aHash[h] = pPg; 2488 if( pPg->pNextHash ){ 2489 assert( pPg->pNextHash->pPrevHash==0 ); 2490 pPg->pNextHash->pPrevHash = pPg; 2491 } 2492 if( pPager->nExtra>0 ){ 2493 memset(PGHDR_TO_EXTRA(pPg, pPager), 0, pPager->nExtra); 2494 } 2495 if( pPager->errMask!=0 ){ 2496 sqlite3pager_unref(PGHDR_TO_DATA(pPg)); 2497 rc = pager_errcode(pPager); 2498 return rc; 2499 } 2500 if( sqlite3pager_pagecount(pPager)<(int)pgno ){ 2501 memset(PGHDR_TO_DATA(pPg), 0, pPager->pageSize); 2502 }else{ 2503 int rc; 2504 assert( MEMDB==0 ); 2505 sqlite3OsSeek(&pPager->fd, (pgno-1)*(i64)pPager->pageSize); 2506 rc = sqlite3OsRead(&pPager->fd, PGHDR_TO_DATA(pPg), pPager->pageSize); 2507 TRACE3("FETCH %d page %d\n", PAGERID(pPager), pPg->pgno); 2508 CODEC(pPager, PGHDR_TO_DATA(pPg), pPg->pgno, 3); 2509 if( rc!=SQLITE_OK ){ 2510 i64 fileSize; 2511 if( sqlite3OsFileSize(&pPager->fd,&fileSize)!=SQLITE_OK 2512 || fileSize>=pgno*pPager->pageSize ){ 2513 sqlite3pager_unref(PGHDR_TO_DATA(pPg)); 2514 return rc; 2515 }else{ 2516 memset(PGHDR_TO_DATA(pPg), 0, pPager->pageSize); 2517 } 2518 }else{ 2519 TEST_INCR(pPager->nRead); 2520 } 2521 } 2522 #ifdef SQLITE_CHECK_PAGES 2523 pPg->pageHash = pager_pagehash(pPg); 2524 #endif 2525 }else{ 2526 /* The requested page is in the page cache. */ 2527 TEST_INCR(pPager->nHit); 2528 page_ref(pPg); 2529 } 2530 *ppPage = PGHDR_TO_DATA(pPg); 2531 return SQLITE_OK; 2532 } 2533 2534 /* 2535 ** Acquire a page if it is already in the in-memory cache. Do 2536 ** not read the page from disk. Return a pointer to the page, 2537 ** or 0 if the page is not in cache. 2538 ** 2539 ** See also sqlite3pager_get(). The difference between this routine 2540 ** and sqlite3pager_get() is that _get() will go to the disk and read 2541 ** in the page if the page is not already in cache. This routine 2542 ** returns NULL if the page is not in cache or if a disk I/O error 2543 ** has ever happened. 2544 */ 2545 void *sqlite3pager_lookup(Pager *pPager, Pgno pgno){ 2546 PgHdr *pPg; 2547 2548 assert( pPager!=0 ); 2549 assert( pgno!=0 ); 2550 if( pPager->errMask & ~(PAGER_ERR_FULL) ){ 2551 return 0; 2552 } 2553 pPg = pager_lookup(pPager, pgno); 2554 if( pPg==0 ) return 0; 2555 page_ref(pPg); 2556 return PGHDR_TO_DATA(pPg); 2557 } 2558 2559 /* 2560 ** Release a page. 2561 ** 2562 ** If the number of references to the page drop to zero, then the 2563 ** page is added to the LRU list. When all references to all pages 2564 ** are released, a rollback occurs and the lock on the database is 2565 ** removed. 2566 */ 2567 int sqlite3pager_unref(void *pData){ 2568 PgHdr *pPg; 2569 2570 /* Decrement the reference count for this page 2571 */ 2572 pPg = DATA_TO_PGHDR(pData); 2573 assert( pPg->nRef>0 ); 2574 pPg->nRef--; 2575 REFINFO(pPg); 2576 2577 CHECK_PAGE(pPg); 2578 2579 /* When the number of references to a page reach 0, call the 2580 ** destructor and add the page to the freelist. 2581 */ 2582 if( pPg->nRef==0 ){ 2583 Pager *pPager; 2584 pPager = pPg->pPager; 2585 pPg->pNextFree = 0; 2586 pPg->pPrevFree = pPager->pLast; 2587 pPager->pLast = pPg; 2588 if( pPg->pPrevFree ){ 2589 pPg->pPrevFree->pNextFree = pPg; 2590 }else{ 2591 pPager->pFirst = pPg; 2592 } 2593 if( pPg->needSync==0 && pPager->pFirstSynced==0 ){ 2594 pPager->pFirstSynced = pPg; 2595 } 2596 if( pPager->xDestructor ){ 2597 pPager->xDestructor(pData, pPager->pageSize); 2598 } 2599 2600 /* When all pages reach the freelist, drop the read lock from 2601 ** the database file. 2602 */ 2603 pPager->nRef--; 2604 assert( pPager->nRef>=0 ); 2605 if( pPager->nRef==0 && !MEMDB ){ 2606 pager_reset(pPager); 2607 } 2608 } 2609 return SQLITE_OK; 2610 } 2611 2612 /* 2613 ** Create a journal file for pPager. There should already be a RESERVED 2614 ** or EXCLUSIVE lock on the database file when this routine is called. 2615 ** 2616 ** Return SQLITE_OK if everything. Return an error code and release the 2617 ** write lock if anything goes wrong. 2618 */ 2619 static int pager_open_journal(Pager *pPager){ 2620 int rc; 2621 assert( !MEMDB ); 2622 assert( pPager->state>=PAGER_RESERVED ); 2623 assert( pPager->journalOpen==0 ); 2624 assert( pPager->useJournal ); 2625 assert( pPager->aInJournal==0 ); 2626 sqlite3pager_pagecount(pPager); 2627 pPager->aInJournal = sqliteMalloc( pPager->dbSize/8 + 1 ); 2628 if( pPager->aInJournal==0 ){ 2629 rc = SQLITE_NOMEM; 2630 goto failed_to_open_journal; 2631 } 2632 rc = sqlite3OsOpenExclusive(pPager->zJournal, &pPager->jfd,pPager->tempFile); 2633 pPager->journalOff = 0; 2634 pPager->setMaster = 0; 2635 pPager->journalHdr = 0; 2636 if( rc!=SQLITE_OK ){ 2637 goto failed_to_open_journal; 2638 } 2639 SET_FULLSYNC(pPager->jfd, pPager->fullSync); 2640 SET_FULLSYNC(pPager->fd, pPager->fullSync); 2641 sqlite3OsOpenDirectory(pPager->zDirectory, &pPager->jfd); 2642 pPager->journalOpen = 1; 2643 pPager->journalStarted = 0; 2644 pPager->needSync = 0; 2645 pPager->alwaysRollback = 0; 2646 pPager->nRec = 0; 2647 if( pPager->errMask!=0 ){ 2648 rc = pager_errcode(pPager); 2649 goto failed_to_open_journal; 2650 } 2651 pPager->origDbSize = pPager->dbSize; 2652 2653 rc = writeJournalHdr(pPager); 2654 2655 if( pPager->stmtAutoopen && rc==SQLITE_OK ){ 2656 rc = sqlite3pager_stmt_begin(pPager); 2657 } 2658 if( rc!=SQLITE_OK ){ 2659 rc = pager_unwritelock(pPager); 2660 if( rc==SQLITE_OK ){ 2661 rc = SQLITE_FULL; 2662 } 2663 } 2664 return rc; 2665 2666 failed_to_open_journal: 2667 sqliteFree(pPager->aInJournal); 2668 pPager->aInJournal = 0; 2669 sqlite3OsUnlock(&pPager->fd, NO_LOCK); 2670 pPager->state = PAGER_UNLOCK; 2671 return rc; 2672 } 2673 2674 /* 2675 ** Acquire a write-lock on the database. The lock is removed when 2676 ** the any of the following happen: 2677 ** 2678 ** * sqlite3pager_commit() is called. 2679 ** * sqlite3pager_rollback() is called. 2680 ** * sqlite3pager_close() is called. 2681 ** * sqlite3pager_unref() is called to on every outstanding page. 2682 ** 2683 ** The first parameter to this routine is a pointer to any open page of the 2684 ** database file. Nothing changes about the page - it is used merely to 2685 ** acquire a pointer to the Pager structure and as proof that there is 2686 ** already a read-lock on the database. 2687 ** 2688 ** The second parameter indicates how much space in bytes to reserve for a 2689 ** master journal file-name at the start of the journal when it is created. 2690 ** 2691 ** A journal file is opened if this is not a temporary file. For temporary 2692 ** files, the opening of the journal file is deferred until there is an 2693 ** actual need to write to the journal. 2694 ** 2695 ** If the database is already reserved for writing, this routine is a no-op. 2696 ** 2697 ** If exFlag is true, go ahead and get an EXCLUSIVE lock on the file 2698 ** immediately instead of waiting until we try to flush the cache. The 2699 ** exFlag is ignored if a transaction is already active. 2700 */ 2701 int sqlite3pager_begin(void *pData, int exFlag){ 2702 PgHdr *pPg = DATA_TO_PGHDR(pData); 2703 Pager *pPager = pPg->pPager; 2704 int rc = SQLITE_OK; 2705 assert( pPg->nRef>0 ); 2706 assert( pPager->state!=PAGER_UNLOCK ); 2707 if( pPager->state==PAGER_SHARED ){ 2708 assert( pPager->aInJournal==0 ); 2709 if( MEMDB ){ 2710 pPager->state = PAGER_EXCLUSIVE; 2711 pPager->origDbSize = pPager->dbSize; 2712 }else{ 2713 rc = sqlite3OsLock(&pPager->fd, RESERVED_LOCK); 2714 if( rc==SQLITE_OK ){ 2715 pPager->state = PAGER_RESERVED; 2716 if( exFlag ){ 2717 rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK); 2718 } 2719 } 2720 if( rc!=SQLITE_OK ){ 2721 return rc; 2722 } 2723 pPager->dirtyCache = 0; 2724 TRACE2("TRANSACTION %d\n", PAGERID(pPager)); 2725 if( pPager->useJournal && !pPager->tempFile ){ 2726 rc = pager_open_journal(pPager); 2727 } 2728 } 2729 } 2730 return rc; 2731 } 2732 2733 /* 2734 ** Mark a data page as writeable. The page is written into the journal 2735 ** if it is not there already. This routine must be called before making 2736 ** changes to a page. 2737 ** 2738 ** The first time this routine is called, the pager creates a new 2739 ** journal and acquires a RESERVED lock on the database. If the RESERVED 2740 ** lock could not be acquired, this routine returns SQLITE_BUSY. The 2741 ** calling routine must check for that return value and be careful not to 2742 ** change any page data until this routine returns SQLITE_OK. 2743 ** 2744 ** If the journal file could not be written because the disk is full, 2745 ** then this routine returns SQLITE_FULL and does an immediate rollback. 2746 ** All subsequent write attempts also return SQLITE_FULL until there 2747 ** is a call to sqlite3pager_commit() or sqlite3pager_rollback() to 2748 ** reset. 2749 */ 2750 int sqlite3pager_write(void *pData){ 2751 PgHdr *pPg = DATA_TO_PGHDR(pData); 2752 Pager *pPager = pPg->pPager; 2753 int rc = SQLITE_OK; 2754 2755 /* Check for errors 2756 */ 2757 if( pPager->errMask ){ 2758 return pager_errcode(pPager); 2759 } 2760 if( pPager->readOnly ){ 2761 return SQLITE_PERM; 2762 } 2763 2764 assert( !pPager->setMaster ); 2765 2766 CHECK_PAGE(pPg); 2767 2768 /* Mark the page as dirty. If the page has already been written 2769 ** to the journal then we can return right away. 2770 */ 2771 pPg->dirty = 1; 2772 if( pPg->inJournal && (pPg->inStmt || pPager->stmtInUse==0) ){ 2773 pPager->dirtyCache = 1; 2774 }else{ 2775 2776 /* If we get this far, it means that the page needs to be 2777 ** written to the transaction journal or the ckeckpoint journal 2778 ** or both. 2779 ** 2780 ** First check to see that the transaction journal exists and 2781 ** create it if it does not. 2782 */ 2783 assert( pPager->state!=PAGER_UNLOCK ); 2784 rc = sqlite3pager_begin(pData, 0); 2785 if( rc!=SQLITE_OK ){ 2786 return rc; 2787 } 2788 assert( pPager->state>=PAGER_RESERVED ); 2789 if( !pPager->journalOpen && pPager->useJournal ){ 2790 rc = pager_open_journal(pPager); 2791 if( rc!=SQLITE_OK ) return rc; 2792 } 2793 assert( pPager->journalOpen || !pPager->useJournal ); 2794 pPager->dirtyCache = 1; 2795 2796 /* The transaction journal now exists and we have a RESERVED or an 2797 ** EXCLUSIVE lock on the main database file. Write the current page to 2798 ** the transaction journal if it is not there already. 2799 */ 2800 if( !pPg->inJournal && (pPager->useJournal || MEMDB) ){ 2801 if( (int)pPg->pgno <= pPager->origDbSize ){ 2802 int szPg; 2803 u32 saved; 2804 if( MEMDB ){ 2805 PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager); 2806 TRACE3("JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno); 2807 assert( pHist->pOrig==0 ); 2808 pHist->pOrig = sqliteMallocRaw( pPager->pageSize ); 2809 if( pHist->pOrig ){ 2810 memcpy(pHist->pOrig, PGHDR_TO_DATA(pPg), pPager->pageSize); 2811 } 2812 }else{ 2813 u32 cksum; 2814 CODEC(pPager, pData, pPg->pgno, 7); 2815 cksum = pager_cksum(pPager, pPg->pgno, pData); 2816 saved = *(u32*)PGHDR_TO_EXTRA(pPg, pPager); 2817 store32bits(cksum, pPg, pPager->pageSize); 2818 szPg = pPager->pageSize+8; 2819 store32bits(pPg->pgno, pPg, -4); 2820 rc = sqlite3OsWrite(&pPager->jfd, &((char*)pData)[-4], szPg); 2821 pPager->journalOff += szPg; 2822 TRACE4("JOURNAL %d page %d needSync=%d\n", 2823 PAGERID(pPager), pPg->pgno, pPg->needSync); 2824 CODEC(pPager, pData, pPg->pgno, 0); 2825 *(u32*)PGHDR_TO_EXTRA(pPg, pPager) = saved; 2826 if( rc!=SQLITE_OK ){ 2827 sqlite3pager_rollback(pPager); 2828 pPager->errMask |= PAGER_ERR_FULL; 2829 return rc; 2830 } 2831 pPager->nRec++; 2832 assert( pPager->aInJournal!=0 ); 2833 pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7); 2834 pPg->needSync = !pPager->noSync; 2835 if( pPager->stmtInUse ){ 2836 pPager->aInStmt[pPg->pgno/8] |= 1<<(pPg->pgno&7); 2837 page_add_to_stmt_list(pPg); 2838 } 2839 } 2840 }else{ 2841 pPg->needSync = !pPager->journalStarted && !pPager->noSync; 2842 TRACE4("APPEND %d page %d needSync=%d\n", 2843 PAGERID(pPager), pPg->pgno, pPg->needSync); 2844 } 2845 if( pPg->needSync ){ 2846 pPager->needSync = 1; 2847 } 2848 pPg->inJournal = 1; 2849 } 2850 2851 /* If the statement journal is open and the page is not in it, 2852 ** then write the current page to the statement journal. Note that 2853 ** the statement journal format differs from the standard journal format 2854 ** in that it omits the checksums and the header. 2855 */ 2856 if( pPager->stmtInUse && !pPg->inStmt && (int)pPg->pgno<=pPager->stmtSize ){ 2857 assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize ); 2858 if( MEMDB ){ 2859 PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager); 2860 assert( pHist->pStmt==0 ); 2861 pHist->pStmt = sqliteMallocRaw( pPager->pageSize ); 2862 if( pHist->pStmt ){ 2863 memcpy(pHist->pStmt, PGHDR_TO_DATA(pPg), pPager->pageSize); 2864 } 2865 TRACE3("STMT-JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno); 2866 }else{ 2867 store32bits(pPg->pgno, pPg, -4); 2868 CODEC(pPager, pData, pPg->pgno, 7); 2869 rc = sqlite3OsWrite(&pPager->stfd,((char*)pData)-4, pPager->pageSize+4); 2870 TRACE3("STMT-JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno); 2871 CODEC(pPager, pData, pPg->pgno, 0); 2872 if( rc!=SQLITE_OK ){ 2873 sqlite3pager_rollback(pPager); 2874 pPager->errMask |= PAGER_ERR_FULL; 2875 return rc; 2876 } 2877 pPager->stmtNRec++; 2878 assert( pPager->aInStmt!=0 ); 2879 pPager->aInStmt[pPg->pgno/8] |= 1<<(pPg->pgno&7); 2880 } 2881 page_add_to_stmt_list(pPg); 2882 } 2883 } 2884 2885 /* Update the database size and return. 2886 */ 2887 if( pPager->dbSize<(int)pPg->pgno ){ 2888 pPager->dbSize = pPg->pgno; 2889 if( !MEMDB && pPager->dbSize==PENDING_BYTE/pPager->pageSize ){ 2890 pPager->dbSize++; 2891 } 2892 } 2893 return rc; 2894 } 2895 2896 /* 2897 ** Return TRUE if the page given in the argument was previously passed 2898 ** to sqlite3pager_write(). In other words, return TRUE if it is ok 2899 ** to change the content of the page. 2900 */ 2901 int sqlite3pager_iswriteable(void *pData){ 2902 PgHdr *pPg = DATA_TO_PGHDR(pData); 2903 return pPg->dirty; 2904 } 2905 2906 #ifndef SQLITE_OMIT_VACUUM 2907 /* 2908 ** Replace the content of a single page with the information in the third 2909 ** argument. 2910 */ 2911 int sqlite3pager_overwrite(Pager *pPager, Pgno pgno, void *pData){ 2912 void *pPage; 2913 int rc; 2914 2915 rc = sqlite3pager_get(pPager, pgno, &pPage); 2916 if( rc==SQLITE_OK ){ 2917 rc = sqlite3pager_write(pPage); 2918 if( rc==SQLITE_OK ){ 2919 memcpy(pPage, pData, pPager->pageSize); 2920 } 2921 sqlite3pager_unref(pPage); 2922 } 2923 return rc; 2924 } 2925 #endif 2926 2927 /* 2928 ** A call to this routine tells the pager that it is not necessary to 2929 ** write the information on page "pgno" back to the disk, even though 2930 ** that page might be marked as dirty. 2931 ** 2932 ** The overlying software layer calls this routine when all of the data 2933 ** on the given page is unused. The pager marks the page as clean so 2934 ** that it does not get written to disk. 2935 ** 2936 ** Tests show that this optimization, together with the 2937 ** sqlite3pager_dont_rollback() below, more than double the speed 2938 ** of large INSERT operations and quadruple the speed of large DELETEs. 2939 ** 2940 ** When this routine is called, set the alwaysRollback flag to true. 2941 ** Subsequent calls to sqlite3pager_dont_rollback() for the same page 2942 ** will thereafter be ignored. This is necessary to avoid a problem 2943 ** where a page with data is added to the freelist during one part of 2944 ** a transaction then removed from the freelist during a later part 2945 ** of the same transaction and reused for some other purpose. When it 2946 ** is first added to the freelist, this routine is called. When reused, 2947 ** the dont_rollback() routine is called. But because the page contains 2948 ** critical data, we still need to be sure it gets rolled back in spite 2949 ** of the dont_rollback() call. 2950 */ 2951 void sqlite3pager_dont_write(Pager *pPager, Pgno pgno){ 2952 PgHdr *pPg; 2953 2954 if( MEMDB ) return; 2955 2956 pPg = pager_lookup(pPager, pgno); 2957 pPg->alwaysRollback = 1; 2958 if( pPg && pPg->dirty ){ 2959 if( pPager->dbSize==(int)pPg->pgno && pPager->origDbSize<pPager->dbSize ){ 2960 /* If this pages is the last page in the file and the file has grown 2961 ** during the current transaction, then do NOT mark the page as clean. 2962 ** When the database file grows, we must make sure that the last page 2963 ** gets written at least once so that the disk file will be the correct 2964 ** size. If you do not write this page and the size of the file 2965 ** on the disk ends up being too small, that can lead to database 2966 ** corruption during the next transaction. 2967 */ 2968 }else{ 2969 TRACE3("DONT_WRITE page %d of %d\n", pgno, PAGERID(pPager)); 2970 pPg->dirty = 0; 2971 #ifdef SQLITE_CHECK_PAGES 2972 pPg->pageHash = pager_pagehash(pPg); 2973 #endif 2974 } 2975 } 2976 } 2977 2978 /* 2979 ** A call to this routine tells the pager that if a rollback occurs, 2980 ** it is not necessary to restore the data on the given page. This 2981 ** means that the pager does not have to record the given page in the 2982 ** rollback journal. 2983 */ 2984 void sqlite3pager_dont_rollback(void *pData){ 2985 PgHdr *pPg = DATA_TO_PGHDR(pData); 2986 Pager *pPager = pPg->pPager; 2987 2988 if( pPager->state!=PAGER_EXCLUSIVE || pPager->journalOpen==0 ) return; 2989 if( pPg->alwaysRollback || pPager->alwaysRollback || MEMDB ) return; 2990 if( !pPg->inJournal && (int)pPg->pgno <= pPager->origDbSize ){ 2991 assert( pPager->aInJournal!=0 ); 2992 pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7); 2993 pPg->inJournal = 1; 2994 if( pPager->stmtInUse ){ 2995 pPager->aInStmt[pPg->pgno/8] |= 1<<(pPg->pgno&7); 2996 page_add_to_stmt_list(pPg); 2997 } 2998 TRACE3("DONT_ROLLBACK page %d of %d\n", pPg->pgno, PAGERID(pPager)); 2999 } 3000 if( pPager->stmtInUse && !pPg->inStmt && (int)pPg->pgno<=pPager->stmtSize ){ 3001 assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize ); 3002 assert( pPager->aInStmt!=0 ); 3003 pPager->aInStmt[pPg->pgno/8] |= 1<<(pPg->pgno&7); 3004 page_add_to_stmt_list(pPg); 3005 } 3006 } 3007 3008 3009 #ifndef SQLITE_OMIT_MEMORYDB 3010 /* 3011 ** Clear a PgHistory block 3012 */ 3013 static void clearHistory(PgHistory *pHist){ 3014 sqliteFree(pHist->pOrig); 3015 sqliteFree(pHist->pStmt); 3016 pHist->pOrig = 0; 3017 pHist->pStmt = 0; 3018 } 3019 #else 3020 #define clearHistory(x) 3021 #endif 3022 3023 /* 3024 ** Commit all changes to the database and release the write lock. 3025 ** 3026 ** If the commit fails for any reason, a rollback attempt is made 3027 ** and an error code is returned. If the commit worked, SQLITE_OK 3028 ** is returned. 3029 */ 3030 int sqlite3pager_commit(Pager *pPager){ 3031 int rc; 3032 PgHdr *pPg; 3033 3034 if( pPager->errMask==PAGER_ERR_FULL ){ 3035 rc = sqlite3pager_rollback(pPager); 3036 if( rc==SQLITE_OK ){ 3037 rc = SQLITE_FULL; 3038 } 3039 return rc; 3040 } 3041 if( pPager->errMask!=0 ){ 3042 rc = pager_errcode(pPager); 3043 return rc; 3044 } 3045 if( pPager->state<PAGER_RESERVED ){ 3046 return SQLITE_ERROR; 3047 } 3048 TRACE2("COMMIT %d\n", PAGERID(pPager)); 3049 if( MEMDB ){ 3050 pPg = pager_get_all_dirty_pages(pPager); 3051 while( pPg ){ 3052 clearHistory(PGHDR_TO_HIST(pPg, pPager)); 3053 pPg->dirty = 0; 3054 pPg->inJournal = 0; 3055 pPg->inStmt = 0; 3056 pPg->pPrevStmt = pPg->pNextStmt = 0; 3057 pPg = pPg->pDirty; 3058 } 3059 #ifndef NDEBUG 3060 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){ 3061 PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager); 3062 assert( !pPg->alwaysRollback ); 3063 assert( !pHist->pOrig ); 3064 assert( !pHist->pStmt ); 3065 } 3066 #endif 3067 pPager->pStmt = 0; 3068 pPager->state = PAGER_SHARED; 3069 return SQLITE_OK; 3070 } 3071 if( pPager->dirtyCache==0 ){ 3072 /* Exit early (without doing the time-consuming sqlite3OsSync() calls) 3073 ** if there have been no changes to the database file. */ 3074 assert( pPager->needSync==0 ); 3075 rc = pager_unwritelock(pPager); 3076 pPager->dbSize = -1; 3077 return rc; 3078 } 3079 assert( pPager->journalOpen ); 3080 rc = sqlite3pager_sync(pPager, 0, 0); 3081 if( rc!=SQLITE_OK ){ 3082 goto commit_abort; 3083 } 3084 rc = pager_unwritelock(pPager); 3085 pPager->dbSize = -1; 3086 return rc; 3087 3088 /* Jump here if anything goes wrong during the commit process. 3089 */ 3090 commit_abort: 3091 sqlite3pager_rollback(pPager); 3092 return rc; 3093 } 3094 3095 /* 3096 ** Rollback all changes. The database falls back to PAGER_SHARED mode. 3097 ** All in-memory cache pages revert to their original data contents. 3098 ** The journal is deleted. 3099 ** 3100 ** This routine cannot fail unless some other process is not following 3101 ** the correct locking protocol (SQLITE_PROTOCOL) or unless some other 3102 ** process is writing trash into the journal file (SQLITE_CORRUPT) or 3103 ** unless a prior malloc() failed (SQLITE_NOMEM). Appropriate error 3104 ** codes are returned for all these occasions. Otherwise, 3105 ** SQLITE_OK is returned. 3106 */ 3107 int sqlite3pager_rollback(Pager *pPager){ 3108 int rc; 3109 TRACE2("ROLLBACK %d\n", PAGERID(pPager)); 3110 if( MEMDB ){ 3111 PgHdr *p; 3112 for(p=pPager->pAll; p; p=p->pNextAll){ 3113 PgHistory *pHist; 3114 assert( !p->alwaysRollback ); 3115 if( !p->dirty ){ 3116 assert( !((PgHistory *)PGHDR_TO_HIST(p, pPager))->pOrig ); 3117 assert( !((PgHistory *)PGHDR_TO_HIST(p, pPager))->pStmt ); 3118 continue; 3119 } 3120 3121 pHist = PGHDR_TO_HIST(p, pPager); 3122 if( pHist->pOrig ){ 3123 memcpy(PGHDR_TO_DATA(p), pHist->pOrig, pPager->pageSize); 3124 TRACE3("ROLLBACK-PAGE %d of %d\n", p->pgno, PAGERID(pPager)); 3125 }else{ 3126 TRACE3("PAGE %d is clean on %d\n", p->pgno, PAGERID(pPager)); 3127 } 3128 clearHistory(pHist); 3129 p->dirty = 0; 3130 p->inJournal = 0; 3131 p->inStmt = 0; 3132 p->pPrevStmt = p->pNextStmt = 0; 3133 3134 if( pPager->xReiniter ){ 3135 pPager->xReiniter(PGHDR_TO_DATA(p), pPager->pageSize); 3136 } 3137 3138 } 3139 pPager->pStmt = 0; 3140 pPager->dbSize = pPager->origDbSize; 3141 memoryTruncate(pPager); 3142 pPager->stmtInUse = 0; 3143 pPager->state = PAGER_SHARED; 3144 return SQLITE_OK; 3145 } 3146 3147 if( !pPager->dirtyCache || !pPager->journalOpen ){ 3148 rc = pager_unwritelock(pPager); 3149 pPager->dbSize = -1; 3150 return rc; 3151 } 3152 3153 if( pPager->errMask!=0 && pPager->errMask!=PAGER_ERR_FULL ){ 3154 if( pPager->state>=PAGER_EXCLUSIVE ){ 3155 pager_playback(pPager); 3156 } 3157 return pager_errcode(pPager); 3158 } 3159 if( pPager->state==PAGER_RESERVED ){ 3160 int rc2; 3161 rc = pager_reload_cache(pPager); 3162 rc2 = pager_unwritelock(pPager); 3163 if( rc==SQLITE_OK ){ 3164 rc = rc2; 3165 } 3166 }else{ 3167 rc = pager_playback(pPager); 3168 } 3169 if( rc!=SQLITE_OK ){ 3170 rc = SQLITE_CORRUPT; /* bkpt-CORRUPT */ 3171 pPager->errMask |= PAGER_ERR_CORRUPT; 3172 } 3173 pPager->dbSize = -1; 3174 return rc; 3175 } 3176 3177 /* 3178 ** Return TRUE if the database file is opened read-only. Return FALSE 3179 ** if the database is (in theory) writable. 3180 */ 3181 int sqlite3pager_isreadonly(Pager *pPager){ 3182 return pPager->readOnly; 3183 } 3184 3185 /* 3186 ** This routine is used for testing and analysis only. 3187 */ 3188 int *sqlite3pager_stats(Pager *pPager){ 3189 static int a[11]; 3190 a[0] = pPager->nRef; 3191 a[1] = pPager->nPage; 3192 a[2] = pPager->mxPage; 3193 a[3] = pPager->dbSize; 3194 a[4] = pPager->state; 3195 a[5] = pPager->errMask; 3196 #ifdef SQLITE_TEST 3197 a[6] = pPager->nHit; 3198 a[7] = pPager->nMiss; 3199 a[8] = pPager->nOvfl; 3200 a[9] = pPager->nRead; 3201 a[10] = pPager->nWrite; 3202 #endif 3203 return a; 3204 } 3205 3206 /* 3207 ** Set the statement rollback point. 3208 ** 3209 ** This routine should be called with the transaction journal already 3210 ** open. A new statement journal is created that can be used to rollback 3211 ** changes of a single SQL command within a larger transaction. 3212 */ 3213 int sqlite3pager_stmt_begin(Pager *pPager){ 3214 int rc; 3215 char zTemp[SQLITE_TEMPNAME_SIZE]; 3216 assert( !pPager->stmtInUse ); 3217 assert( pPager->dbSize>=0 ); 3218 TRACE2("STMT-BEGIN %d\n", PAGERID(pPager)); 3219 if( MEMDB ){ 3220 pPager->stmtInUse = 1; 3221 pPager->stmtSize = pPager->dbSize; 3222 return SQLITE_OK; 3223 } 3224 if( !pPager->journalOpen ){ 3225 pPager->stmtAutoopen = 1; 3226 return SQLITE_OK; 3227 } 3228 assert( pPager->journalOpen ); 3229 pPager->aInStmt = sqliteMalloc( pPager->dbSize/8 + 1 ); 3230 if( pPager->aInStmt==0 ){ 3231 sqlite3OsLock(&pPager->fd, SHARED_LOCK); 3232 return SQLITE_NOMEM; 3233 } 3234 #ifndef NDEBUG 3235 rc = sqlite3OsFileSize(&pPager->jfd, &pPager->stmtJSize); 3236 if( rc ) goto stmt_begin_failed; 3237 assert( pPager->stmtJSize == pPager->journalOff ); 3238 #endif 3239 pPager->stmtJSize = pPager->journalOff; 3240 pPager->stmtSize = pPager->dbSize; 3241 pPager->stmtHdrOff = 0; 3242 pPager->stmtCksum = pPager->cksumInit; 3243 if( !pPager->stmtOpen ){ 3244 rc = sqlite3pager_opentemp(zTemp, &pPager->stfd); 3245 if( rc ) goto stmt_begin_failed; 3246 pPager->stmtOpen = 1; 3247 pPager->stmtNRec = 0; 3248 } 3249 pPager->stmtInUse = 1; 3250 return SQLITE_OK; 3251 3252 stmt_begin_failed: 3253 if( pPager->aInStmt ){ 3254 sqliteFree(pPager->aInStmt); 3255 pPager->aInStmt = 0; 3256 } 3257 return rc; 3258 } 3259 3260 /* 3261 ** Commit a statement. 3262 */ 3263 int sqlite3pager_stmt_commit(Pager *pPager){ 3264 if( pPager->stmtInUse ){ 3265 PgHdr *pPg, *pNext; 3266 TRACE2("STMT-COMMIT %d\n", PAGERID(pPager)); 3267 if( !MEMDB ){ 3268 sqlite3OsSeek(&pPager->stfd, 0); 3269 /* sqlite3OsTruncate(&pPager->stfd, 0); */ 3270 sqliteFree( pPager->aInStmt ); 3271 pPager->aInStmt = 0; 3272 } 3273 for(pPg=pPager->pStmt; pPg; pPg=pNext){ 3274 pNext = pPg->pNextStmt; 3275 assert( pPg->inStmt ); 3276 pPg->inStmt = 0; 3277 pPg->pPrevStmt = pPg->pNextStmt = 0; 3278 if( MEMDB ){ 3279 PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager); 3280 sqliteFree(pHist->pStmt); 3281 pHist->pStmt = 0; 3282 } 3283 } 3284 pPager->stmtNRec = 0; 3285 pPager->stmtInUse = 0; 3286 pPager->pStmt = 0; 3287 } 3288 pPager->stmtAutoopen = 0; 3289 return SQLITE_OK; 3290 } 3291 3292 /* 3293 ** Rollback a statement. 3294 */ 3295 int sqlite3pager_stmt_rollback(Pager *pPager){ 3296 int rc; 3297 if( pPager->stmtInUse ){ 3298 TRACE2("STMT-ROLLBACK %d\n", PAGERID(pPager)); 3299 if( MEMDB ){ 3300 PgHdr *pPg; 3301 for(pPg=pPager->pStmt; pPg; pPg=pPg->pNextStmt){ 3302 PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager); 3303 if( pHist->pStmt ){ 3304 memcpy(PGHDR_TO_DATA(pPg), pHist->pStmt, pPager->pageSize); 3305 sqliteFree(pHist->pStmt); 3306 pHist->pStmt = 0; 3307 } 3308 } 3309 pPager->dbSize = pPager->stmtSize; 3310 memoryTruncate(pPager); 3311 rc = SQLITE_OK; 3312 }else{ 3313 rc = pager_stmt_playback(pPager); 3314 } 3315 sqlite3pager_stmt_commit(pPager); 3316 }else{ 3317 rc = SQLITE_OK; 3318 } 3319 pPager->stmtAutoopen = 0; 3320 return rc; 3321 } 3322 3323 /* 3324 ** Return the full pathname of the database file. 3325 */ 3326 const char *sqlite3pager_filename(Pager *pPager){ 3327 return pPager->zFilename; 3328 } 3329 3330 /* 3331 ** Return the directory of the database file. 3332 */ 3333 const char *sqlite3pager_dirname(Pager *pPager){ 3334 return pPager->zDirectory; 3335 } 3336 3337 /* 3338 ** Return the full pathname of the journal file. 3339 */ 3340 const char *sqlite3pager_journalname(Pager *pPager){ 3341 return pPager->zJournal; 3342 } 3343 3344 /* 3345 ** Set the codec for this pager 3346 */ 3347 void sqlite3pager_set_codec( 3348 Pager *pPager, 3349 void (*xCodec)(void*,void*,Pgno,int), 3350 void *pCodecArg 3351 ){ 3352 pPager->xCodec = xCodec; 3353 pPager->pCodecArg = pCodecArg; 3354 } 3355 3356 /* 3357 ** This routine is called to increment the database file change-counter, 3358 ** stored at byte 24 of the pager file. 3359 */ 3360 static int pager_incr_changecounter(Pager *pPager){ 3361 void *pPage; 3362 PgHdr *pPgHdr; 3363 u32 change_counter; 3364 int rc; 3365 3366 /* Open page 1 of the file for writing. */ 3367 rc = sqlite3pager_get(pPager, 1, &pPage); 3368 if( rc!=SQLITE_OK ) return rc; 3369 rc = sqlite3pager_write(pPage); 3370 if( rc!=SQLITE_OK ) return rc; 3371 3372 /* Read the current value at byte 24. */ 3373 pPgHdr = DATA_TO_PGHDR(pPage); 3374 change_counter = retrieve32bits(pPgHdr, 24); 3375 3376 /* Increment the value just read and write it back to byte 24. */ 3377 change_counter++; 3378 store32bits(change_counter, pPgHdr, 24); 3379 3380 /* Release the page reference. */ 3381 sqlite3pager_unref(pPage); 3382 return SQLITE_OK; 3383 } 3384 3385 /* 3386 ** Sync the database file for the pager pPager. zMaster points to the name 3387 ** of a master journal file that should be written into the individual 3388 ** journal file. zMaster may be NULL, which is interpreted as no master 3389 ** journal (a single database transaction). 3390 ** 3391 ** This routine ensures that the journal is synced, all dirty pages written 3392 ** to the database file and the database file synced. The only thing that 3393 ** remains to commit the transaction is to delete the journal file (or 3394 ** master journal file if specified). 3395 ** 3396 ** Note that if zMaster==NULL, this does not overwrite a previous value 3397 ** passed to an sqlite3pager_sync() call. 3398 ** 3399 ** If parameter nTrunc is non-zero, then the pager file is truncated to 3400 ** nTrunc pages (this is used by auto-vacuum databases). 3401 */ 3402 int sqlite3pager_sync(Pager *pPager, const char *zMaster, Pgno nTrunc){ 3403 int rc = SQLITE_OK; 3404 3405 TRACE4("DATABASE SYNC: File=%s zMaster=%s nTrunc=%d\n", 3406 pPager->zFilename, zMaster, nTrunc); 3407 3408 /* If this is an in-memory db, or no pages have been written to, or this 3409 ** function has already been called, it is a no-op. 3410 */ 3411 if( pPager->state!=PAGER_SYNCED && !MEMDB && pPager->dirtyCache ){ 3412 PgHdr *pPg; 3413 assert( pPager->journalOpen ); 3414 3415 /* If a master journal file name has already been written to the 3416 ** journal file, then no sync is required. This happens when it is 3417 ** written, then the process fails to upgrade from a RESERVED to an 3418 ** EXCLUSIVE lock. The next time the process tries to commit the 3419 ** transaction the m-j name will have already been written. 3420 */ 3421 if( !pPager->setMaster ){ 3422 rc = pager_incr_changecounter(pPager); 3423 if( rc!=SQLITE_OK ) goto sync_exit; 3424 #ifndef SQLITE_OMIT_AUTOVACUUM 3425 if( nTrunc!=0 ){ 3426 /* If this transaction has made the database smaller, then all pages 3427 ** being discarded by the truncation must be written to the journal 3428 ** file. 3429 */ 3430 Pgno i; 3431 void *pPage; 3432 for( i=nTrunc+1; i<=pPager->origDbSize; i++ ){ 3433 if( !(pPager->aInJournal[i/8] & (1<<(i&7))) ){ 3434 rc = sqlite3pager_get(pPager, i, &pPage); 3435 if( rc!=SQLITE_OK ) goto sync_exit; 3436 rc = sqlite3pager_write(pPage); 3437 sqlite3pager_unref(pPage); 3438 if( rc!=SQLITE_OK ) goto sync_exit; 3439 } 3440 } 3441 } 3442 #endif 3443 rc = writeMasterJournal(pPager, zMaster); 3444 if( rc!=SQLITE_OK ) goto sync_exit; 3445 rc = syncJournal(pPager); 3446 if( rc!=SQLITE_OK ) goto sync_exit; 3447 } 3448 3449 #ifndef SQLITE_OMIT_AUTOVACUUM 3450 if( nTrunc!=0 ){ 3451 rc = sqlite3pager_truncate(pPager, nTrunc); 3452 if( rc!=SQLITE_OK ) goto sync_exit; 3453 } 3454 #endif 3455 3456 /* Write all dirty pages to the database file */ 3457 pPg = pager_get_all_dirty_pages(pPager); 3458 rc = pager_write_pagelist(pPg); 3459 if( rc!=SQLITE_OK ) goto sync_exit; 3460 3461 /* Sync the database file. */ 3462 if( !pPager->noSync ){ 3463 rc = sqlite3OsSync(&pPager->fd); 3464 } 3465 3466 pPager->state = PAGER_SYNCED; 3467 } 3468 3469 sync_exit: 3470 return rc; 3471 } 3472 3473 #ifndef SQLITE_OMIT_AUTOVACUUM 3474 /* 3475 ** Move the page identified by pData to location pgno in the file. 3476 ** 3477 ** There must be no references to the current page pgno. If current page 3478 ** pgno is not already in the rollback journal, it is not written there by 3479 ** by this routine. The same applies to the page pData refers to on entry to 3480 ** this routine. 3481 ** 3482 ** References to the page refered to by pData remain valid. Updating any 3483 ** meta-data associated with page pData (i.e. data stored in the nExtra bytes 3484 ** allocated along with the page) is the responsibility of the caller. 3485 ** 3486 ** A transaction must be active when this routine is called. It used to be 3487 ** required that a statement transaction was not active, but this restriction 3488 ** has been removed (CREATE INDEX needs to move a page when a statement 3489 ** transaction is active). 3490 */ 3491 int sqlite3pager_movepage(Pager *pPager, void *pData, Pgno pgno){ 3492 PgHdr *pPg = DATA_TO_PGHDR(pData); 3493 PgHdr *pPgOld; 3494 int h; 3495 Pgno needSyncPgno = 0; 3496 3497 assert( pPg->nRef>0 ); 3498 3499 TRACE5("MOVE %d page %d (needSync=%d) moves to %d\n", 3500 PAGERID(pPager), pPg->pgno, pPg->needSync, pgno); 3501 3502 if( pPg->needSync ){ 3503 needSyncPgno = pPg->pgno; 3504 assert( pPg->inJournal ); 3505 assert( pPg->dirty ); 3506 assert( pPager->needSync ); 3507 } 3508 3509 /* Unlink pPg from it's hash-chain */ 3510 unlinkHashChain(pPager, pPg); 3511 3512 /* If the cache contains a page with page-number pgno, remove it 3513 ** from it's hash chain. Also, if the PgHdr.needSync was set for 3514 ** page pgno before the 'move' operation, it needs to be retained 3515 ** for the page moved there. 3516 */ 3517 pPgOld = pager_lookup(pPager, pgno); 3518 if( pPgOld ){ 3519 assert( pPgOld->nRef==0 ); 3520 unlinkHashChain(pPager, pPgOld); 3521 pPgOld->dirty = 0; 3522 if( pPgOld->needSync ){ 3523 assert( pPgOld->inJournal ); 3524 pPg->inJournal = 1; 3525 pPg->needSync = 1; 3526 assert( pPager->needSync ); 3527 } 3528 } 3529 3530 /* Change the page number for pPg and insert it into the new hash-chain. */ 3531 pPg->pgno = pgno; 3532 h = pager_hash(pgno); 3533 if( pPager->aHash[h] ){ 3534 assert( pPager->aHash[h]->pPrevHash==0 ); 3535 pPager->aHash[h]->pPrevHash = pPg; 3536 } 3537 pPg->pNextHash = pPager->aHash[h]; 3538 pPager->aHash[h] = pPg; 3539 pPg->pPrevHash = 0; 3540 3541 pPg->dirty = 1; 3542 pPager->dirtyCache = 1; 3543 3544 if( needSyncPgno ){ 3545 /* If needSyncPgno is non-zero, then the journal file needs to be 3546 ** sync()ed before any data is written to database file page needSyncPgno. 3547 ** Currently, no such page exists in the page-cache and the 3548 ** Pager.aInJournal bit has been set. This needs to be remedied by loading 3549 ** the page into the pager-cache and setting the PgHdr.needSync flag. 3550 ** 3551 ** The sqlite3pager_get() call may cause the journal to sync. So make 3552 ** sure the Pager.needSync flag is set too. 3553 */ 3554 int rc; 3555 void *pNeedSync; 3556 assert( pPager->needSync ); 3557 rc = sqlite3pager_get(pPager, needSyncPgno, &pNeedSync); 3558 if( rc!=SQLITE_OK ) return rc; 3559 pPager->needSync = 1; 3560 DATA_TO_PGHDR(pNeedSync)->needSync = 1; 3561 DATA_TO_PGHDR(pNeedSync)->inJournal = 1; 3562 DATA_TO_PGHDR(pNeedSync)->dirty = 1; 3563 sqlite3pager_unref(pNeedSync); 3564 } 3565 3566 return SQLITE_OK; 3567 } 3568 #endif 3569 3570 #if defined(SQLITE_DEBUG) || defined(SQLITE_TEST) 3571 /* 3572 ** Return the current state of the file lock for the given pager. 3573 ** The return value is one of NO_LOCK, SHARED_LOCK, RESERVED_LOCK, 3574 ** PENDING_LOCK, or EXCLUSIVE_LOCK. 3575 */ 3576 int sqlite3pager_lockstate(Pager *pPager){ 3577 #ifdef OS_TEST 3578 return pPager->fd->fd.locktype; 3579 #else 3580 return pPager->fd.locktype; 3581 #endif 3582 } 3583 #endif 3584 3585 #ifdef SQLITE_DEBUG 3586 /* 3587 ** Print a listing of all referenced pages and their ref count. 3588 */ 3589 void sqlite3pager_refdump(Pager *pPager){ 3590 PgHdr *pPg; 3591 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){ 3592 if( pPg->nRef<=0 ) continue; 3593 sqlite3DebugPrintf("PAGE %3d addr=%p nRef=%d\n", 3594 pPg->pgno, PGHDR_TO_DATA(pPg), pPg->nRef); 3595 } 3596 } 3597 #endif 3598 3599 #endif /* SQLITE_OMIT_DISKIO */ 3600