1 /* 2 ** 2001 September 15 3 ** 4 ** The author disclaims copyright to this source code. In place of 5 ** a legal notice, here is a blessing: 6 ** 7 ** May you do good and not evil. 8 ** May you find forgiveness for yourself and forgive others. 9 ** May you share freely, never taking more than you give. 10 ** 11 ************************************************************************* 12 ** This is the implementation of the page cache subsystem or "pager". 13 ** 14 ** The pager is used to access a database disk file. It implements 15 ** atomic commit and rollback through the use of a journal file that 16 ** is separate from the database file. The pager also implements file 17 ** locking to prevent two processes from writing the same database 18 ** file simultaneously, or one process from reading the database while 19 ** another is writing. 20 ** 21 ** @(#) $Id: pager.c,v 1.122 2004/06/10 05:59:25 danielk1977 Exp $ 22 */ 23 #include "os.h" /* Must be first to enable large file support */ 24 #include "sqliteInt.h" 25 #include "pager.h" 26 #include <assert.h> 27 #include <string.h> 28 29 /* 30 ** Macros for troubleshooting. Normally turned off 31 */ 32 #if 0 33 static Pager *mainPager = 0; 34 #define SET_PAGER(X) if( mainPager==0 ) mainPager = (X) 35 #define CLR_PAGER(X) if( mainPager==(X) ) mainPager = 0 36 #define TRACE1(X) if( pPager==mainPager ) sqlite3DebugPrintf(X) 37 #define TRACE2(X,Y) if( pPager==mainPager ) sqlite3DebugPrintf(X,Y) 38 #define TRACE3(X,Y,Z) if( pPager==mainPager ) sqlite3DebugPrintf(X,Y,Z) 39 #else 40 #define SET_PAGER(X) 41 #define CLR_PAGER(X) 42 #define TRACE1(X) 43 #define TRACE2(X,Y) 44 #define TRACE3(X,Y,Z) 45 #endif 46 47 48 /* 49 ** The page cache as a whole is always in one of the following 50 ** states: 51 ** 52 ** PAGER_UNLOCK The page cache is not currently reading or 53 ** writing the database file. There is no 54 ** data held in memory. This is the initial 55 ** state. 56 ** 57 ** PAGER_SHARED The page cache is reading the database. 58 ** Writing is not permitted. There can be 59 ** multiple readers accessing the same database 60 ** file at the same time. 61 ** 62 ** PAGER_RESERVED Writing is permitted to the page cache only. 63 ** The original database file has not been modified. 64 ** Other processes may still be reading the on-disk 65 ** database file. 66 ** 67 ** PAGER_EXCLUSIVE The page cache is writing the database. 68 ** Access is exclusive. No other processes or 69 ** threads can be reading or writing while one 70 ** process is writing. 71 ** 72 ** The page cache comes up in PAGER_UNLOCK. The first time a 73 ** sqlite_page_get() occurs, the state transitions to PAGER_SHARED. 74 ** After all pages have been released using sqlite_page_unref(), 75 ** the state transitions back to PAGER_UNLOCK. The first time 76 ** that sqlite_page_write() is called, the state transitions to 77 ** PAGER_RESERVED. (Note that sqlite_page_write() can only be 78 ** called on an outstanding page which means that the pager must 79 ** be in PAGER_SHARED before it transitions to PAGER_RESERVED.) 80 ** The sqlite_page_rollback() and sqlite_page_commit() functions 81 ** transition the state from PAGER_RESERVED to PAGER_EXCLUSIVE to 82 ** PAGER_SHARED. 83 */ 84 #define PAGER_UNLOCK 0 85 #define PAGER_SHARED 1 86 #define PAGER_RESERVED 2 87 #define PAGER_EXCLUSIVE 3 88 89 90 /* 91 ** Each in-memory image of a page begins with the following header. 92 ** This header is only visible to this pager module. The client 93 ** code that calls pager sees only the data that follows the header. 94 ** 95 ** Client code should call sqlite3pager_write() on a page prior to making 96 ** any modifications to that page. The first time sqlite3pager_write() 97 ** is called, the original page contents are written into the rollback 98 ** journal and PgHdr.inJournal and PgHdr.needSync are set. Later, once 99 ** the journal page has made it onto the disk surface, PgHdr.needSync 100 ** is cleared. The modified page cannot be written back into the original 101 ** database file until the journal pages has been synced to disk and the 102 ** PgHdr.needSync has been cleared. 103 ** 104 ** The PgHdr.dirty flag is set when sqlite3pager_write() is called and 105 ** is cleared again when the page content is written back to the original 106 ** database file. 107 */ 108 typedef struct PgHdr PgHdr; 109 struct PgHdr { 110 Pager *pPager; /* The pager to which this page belongs */ 111 Pgno pgno; /* The page number for this page */ 112 PgHdr *pNextHash, *pPrevHash; /* Hash collision chain for PgHdr.pgno */ 113 PgHdr *pNextFree, *pPrevFree; /* Freelist of pages where nRef==0 */ 114 PgHdr *pNextAll; /* A list of all pages */ 115 PgHdr *pNextStmt, *pPrevStmt; /* List of pages in the statement journal */ 116 u8 inJournal; /* TRUE if has been written to journal */ 117 u8 inStmt; /* TRUE if in the statement subjournal */ 118 u8 dirty; /* TRUE if we need to write back changes */ 119 u8 needSync; /* Sync journal before writing this page */ 120 u8 alwaysRollback; /* Disable dont_rollback() for this page */ 121 short int nRef; /* Number of users of this page */ 122 PgHdr *pDirty; /* Dirty pages sorted by PgHdr.pgno */ 123 /* SQLITE_PAGE_SIZE bytes of page data follow this header */ 124 /* Pager.nExtra bytes of local data follow the page data */ 125 }; 126 127 /* 128 ** For an in-memory only database, some extra information is recorded about 129 ** each page so that changes can be rolled back. (Journal files are not 130 ** used for in-memory databases.) The following information is added to 131 ** the end of every EXTRA block for in-memory databases. 132 ** 133 ** This information could have been added directly to the PgHdr structure. 134 ** But then it would take up an extra 8 bytes of storage on every PgHdr 135 ** even for disk-based databases. Splitting it out saves 8 bytes. This 136 ** is only a savings of 0.8% but those percentages add up. 137 */ 138 typedef struct PgHistory PgHistory; 139 struct PgHistory { 140 u8 *pOrig; /* Original page text. Restore to this on a full rollback */ 141 u8 *pStmt; /* Text as it was at the beginning of the current statement */ 142 }; 143 144 /* 145 ** A macro used for invoking the codec if there is one 146 */ 147 #ifdef SQLITE_HAS_CODEC 148 # define CODEC(P,D,N,X) if( P->xCodec ){ P->xCodec(P->pCodecArg,D,N,X); } 149 #else 150 # define CODEC(P,D,N,X) 151 #endif 152 153 /* 154 ** Convert a pointer to a PgHdr into a pointer to its data 155 ** and back again. 156 */ 157 #define PGHDR_TO_DATA(P) ((void*)(&(P)[1])) 158 #define DATA_TO_PGHDR(D) (&((PgHdr*)(D))[-1]) 159 #define PGHDR_TO_EXTRA(P) ((void*)&((char*)(&(P)[1]))[SQLITE_PAGE_SIZE]) 160 #define PGHDR_TO_HIST(P,PGR) \ 161 ((PgHistory*)&((char*)(&(P)[1]))[(PGR)->pageSize+(PGR)->nExtra]) 162 163 /* 164 ** How big to make the hash table used for locating in-memory pages 165 ** by page number. 166 */ 167 #define N_PG_HASH 2048 168 169 /* 170 ** Hash a page number 171 */ 172 #define pager_hash(PN) ((PN)&(N_PG_HASH-1)) 173 174 /* 175 ** A open page cache is an instance of the following structure. 176 */ 177 struct Pager { 178 char *zFilename; /* Name of the database file */ 179 char *zJournal; /* Name of the journal file */ 180 char *zDirectory; /* Directory hold database and journal files */ 181 OsFile fd, jfd; /* File descriptors for database and journal */ 182 OsFile stfd; /* File descriptor for the statement subjournal*/ 183 int dbSize; /* Number of pages in the file */ 184 int origDbSize; /* dbSize before the current change */ 185 int stmtSize; /* Size of database (in pages) at stmt_begin() */ 186 off_t stmtJSize; /* Size of journal at stmt_begin() */ 187 int nRec; /* Number of pages written to the journal */ 188 u32 cksumInit; /* Quasi-random value added to every checksum */ 189 int stmtNRec; /* Number of records in stmt subjournal */ 190 int nExtra; /* Add this many bytes to each in-memory page */ 191 void (*xDestructor)(void*,int); /* Call this routine when freeing pages */ 192 void (*xReiniter)(void*,int); /* Call this routine when reloading pages */ 193 int pageSize; /* Number of bytes in a page */ 194 int nPage; /* Total number of in-memory pages */ 195 int nRef; /* Number of in-memory pages with PgHdr.nRef>0 */ 196 int mxPage; /* Maximum number of pages to hold in cache */ 197 int nHit, nMiss, nOvfl; /* Cache hits, missing, and LRU overflows */ 198 void (*xCodec)(void*,void*,Pgno,int); /* Routine for en/decoding data */ 199 void *pCodecArg; /* First argument to xCodec() */ 200 u8 journalOpen; /* True if journal file descriptors is valid */ 201 u8 journalStarted; /* True if header of journal is synced */ 202 u8 useJournal; /* Use a rollback journal on this file */ 203 u8 stmtOpen; /* True if the statement subjournal is open */ 204 u8 stmtInUse; /* True we are in a statement subtransaction */ 205 u8 stmtAutoopen; /* Open stmt journal when main journal is opened*/ 206 u8 noSync; /* Do not sync the journal if true */ 207 u8 fullSync; /* Do extra syncs of the journal for robustness */ 208 u8 state; /* PAGER_UNLOCK, _SHARED, _RESERVED, etc. */ 209 u8 errMask; /* One of several kinds of errors */ 210 u8 tempFile; /* zFilename is a temporary file */ 211 u8 readOnly; /* True for a read-only database */ 212 u8 needSync; /* True if an fsync() is needed on the journal */ 213 u8 dirtyCache; /* True if cached pages have changed */ 214 u8 alwaysRollback; /* Disable dont_rollback() for all pages */ 215 u8 memDb; /* True to inhibit all file I/O */ 216 u8 *aInJournal; /* One bit for each page in the database file */ 217 u8 *aInStmt; /* One bit for each page in the database */ 218 PgHdr *pFirst, *pLast; /* List of free pages */ 219 PgHdr *pFirstSynced; /* First free page with PgHdr.needSync==0 */ 220 PgHdr *pAll; /* List of all pages */ 221 PgHdr *pStmt; /* List of pages in the statement subjournal */ 222 PgHdr *aHash[N_PG_HASH]; /* Hash table to map page number of PgHdr */ 223 int nMaster; /* Number of bytes to reserve for master j.p */ 224 BusyHandler *pBusyHandler; /* Pointer to sqlite.busyHandler */ 225 }; 226 227 /* 228 ** These are bits that can be set in Pager.errMask. 229 */ 230 #define PAGER_ERR_FULL 0x01 /* a write() failed */ 231 #define PAGER_ERR_MEM 0x02 /* malloc() failed */ 232 #define PAGER_ERR_LOCK 0x04 /* error in the locking protocol */ 233 #define PAGER_ERR_CORRUPT 0x08 /* database or journal corruption */ 234 #define PAGER_ERR_DISK 0x10 /* general disk I/O error - bad hard drive? */ 235 236 /* 237 ** Journal files begin with the following magic string. The data 238 ** was obtained from /dev/random. It is used only as a sanity check. 239 ** 240 ** Since version 2.8.0, the journal format contains additional sanity 241 ** checking information. If the power fails while the journal is begin 242 ** written, semi-random garbage data might appear in the journal 243 ** file after power is restored. If an attempt is then made 244 ** to roll the journal back, the database could be corrupted. The additional 245 ** sanity checking data is an attempt to discover the garbage in the 246 ** journal and ignore it. 247 ** 248 ** The sanity checking information for the new journal format consists 249 ** of a 32-bit checksum on each page of data. The checksum covers both 250 ** the page number and the SQLITE_PAGE_SIZE bytes of data for the page. 251 ** This cksum is initialized to a 32-bit random value that appears in the 252 ** journal file right after the header. The random initializer is important, 253 ** because garbage data that appears at the end of a journal is likely 254 ** data that was once in other files that have now been deleted. If the 255 ** garbage data came from an obsolete journal file, the checksums might 256 ** be correct. But by initializing the checksum to random value which 257 ** is different for every journal, we minimize that risk. 258 */ 259 static const unsigned char aJournalMagic[] = { 260 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd7, 261 }; 262 263 /* 264 ** The size of the header and of each page in the journal varies according 265 ** to which journal format is being used. The following macros figure out 266 ** the sizes based on format numbers. 267 */ 268 #define JOURNAL_HDR_SZ(pPager) (24 + (pPager)->nMaster) 269 #define JOURNAL_PG_SZ(pPager) ((pPager->pageSize) + 8) 270 271 272 /* 273 ** Enable reference count tracking here: 274 */ 275 #ifdef SQLITE_TEST 276 int pager3_refinfo_enable = 0; 277 static void pager_refinfo(PgHdr *p){ 278 static int cnt = 0; 279 if( !pager3_refinfo_enable ) return; 280 printf( 281 "REFCNT: %4d addr=0x%08x nRef=%d\n", 282 p->pgno, (int)PGHDR_TO_DATA(p), p->nRef 283 ); 284 cnt++; /* Something to set a breakpoint on */ 285 } 286 # define REFINFO(X) pager_refinfo(X) 287 #else 288 # define REFINFO(X) 289 #endif 290 291 /* 292 ** Read a 32-bit integer from the given file descriptor. Store the integer 293 ** that is read in *pRes. Return SQLITE_OK if everything worked, or an 294 ** error code is something goes wrong. 295 */ 296 static int read32bits(OsFile *fd, u32 *pRes){ 297 u32 res; 298 int rc; 299 rc = sqlite3OsRead(fd, &res, sizeof(res)); 300 if( rc==SQLITE_OK ){ 301 unsigned char ac[4]; 302 memcpy(ac, &res, 4); 303 res = (ac[0]<<24) | (ac[1]<<16) | (ac[2]<<8) | ac[3]; 304 } 305 *pRes = res; 306 return rc; 307 } 308 309 /* 310 ** Write a 32-bit integer into the given file descriptor. Return SQLITE_OK 311 ** on success or an error code is something goes wrong. 312 */ 313 static int write32bits(OsFile *fd, u32 val){ 314 unsigned char ac[4]; 315 ac[0] = (val>>24) & 0xff; 316 ac[1] = (val>>16) & 0xff; 317 ac[2] = (val>>8) & 0xff; 318 ac[3] = val & 0xff; 319 return sqlite3OsWrite(fd, ac, 4); 320 } 321 322 /* 323 ** Write a 32-bit integer into a page header right before the 324 ** page data. This will overwrite the PgHdr.pDirty pointer. 325 */ 326 static void store32bits(u32 val, PgHdr *p, int offset){ 327 unsigned char *ac; 328 ac = &((unsigned char*)PGHDR_TO_DATA(p))[offset]; 329 ac[0] = (val>>24) & 0xff; 330 ac[1] = (val>>16) & 0xff; 331 ac[2] = (val>>8) & 0xff; 332 ac[3] = val & 0xff; 333 } 334 335 336 /* 337 ** Convert the bits in the pPager->errMask into an approprate 338 ** return code. 339 */ 340 static int pager_errcode(Pager *pPager){ 341 int rc = SQLITE_OK; 342 if( pPager->errMask & PAGER_ERR_LOCK ) rc = SQLITE_PROTOCOL; 343 if( pPager->errMask & PAGER_ERR_DISK ) rc = SQLITE_IOERR; 344 if( pPager->errMask & PAGER_ERR_FULL ) rc = SQLITE_FULL; 345 if( pPager->errMask & PAGER_ERR_MEM ) rc = SQLITE_NOMEM; 346 if( pPager->errMask & PAGER_ERR_CORRUPT ) rc = SQLITE_CORRUPT; 347 return rc; 348 } 349 350 /* 351 ** Add or remove a page from the list of all pages that are in the 352 ** statement journal. 353 ** 354 ** The Pager keeps a separate list of pages that are currently in 355 ** the statement journal. This helps the sqlite3pager_stmt_commit() 356 ** routine run MUCH faster for the common case where there are many 357 ** pages in memory but only a few are in the statement journal. 358 */ 359 static void page_add_to_stmt_list(PgHdr *pPg){ 360 Pager *pPager = pPg->pPager; 361 if( pPg->inStmt ) return; 362 assert( pPg->pPrevStmt==0 && pPg->pNextStmt==0 ); 363 pPg->pPrevStmt = 0; 364 if( pPager->pStmt ){ 365 pPager->pStmt->pPrevStmt = pPg; 366 } 367 pPg->pNextStmt = pPager->pStmt; 368 pPager->pStmt = pPg; 369 pPg->inStmt = 1; 370 } 371 static void page_remove_from_stmt_list(PgHdr *pPg){ 372 if( !pPg->inStmt ) return; 373 if( pPg->pPrevStmt ){ 374 assert( pPg->pPrevStmt->pNextStmt==pPg ); 375 pPg->pPrevStmt->pNextStmt = pPg->pNextStmt; 376 }else{ 377 assert( pPg->pPager->pStmt==pPg ); 378 pPg->pPager->pStmt = pPg->pNextStmt; 379 } 380 if( pPg->pNextStmt ){ 381 assert( pPg->pNextStmt->pPrevStmt==pPg ); 382 pPg->pNextStmt->pPrevStmt = pPg->pPrevStmt; 383 } 384 pPg->pNextStmt = 0; 385 pPg->pPrevStmt = 0; 386 pPg->inStmt = 0; 387 } 388 389 /* 390 ** Find a page in the hash table given its page number. Return 391 ** a pointer to the page or NULL if not found. 392 */ 393 static PgHdr *pager_lookup(Pager *pPager, Pgno pgno){ 394 PgHdr *p = pPager->aHash[pager_hash(pgno)]; 395 while( p && p->pgno!=pgno ){ 396 p = p->pNextHash; 397 } 398 return p; 399 } 400 401 /* 402 ** Unlock the database and clear the in-memory cache. This routine 403 ** sets the state of the pager back to what it was when it was first 404 ** opened. Any outstanding pages are invalidated and subsequent attempts 405 ** to access those pages will likely result in a coredump. 406 */ 407 static void pager_reset(Pager *pPager){ 408 PgHdr *pPg, *pNext; 409 for(pPg=pPager->pAll; pPg; pPg=pNext){ 410 pNext = pPg->pNextAll; 411 sqliteFree(pPg); 412 } 413 pPager->pFirst = 0; 414 pPager->pFirstSynced = 0; 415 pPager->pLast = 0; 416 pPager->pAll = 0; 417 memset(pPager->aHash, 0, sizeof(pPager->aHash)); 418 pPager->nPage = 0; 419 if( pPager->state>=PAGER_RESERVED ){ 420 sqlite3pager_rollback(pPager); 421 } 422 sqlite3OsUnlock(&pPager->fd, NO_LOCK); 423 pPager->state = PAGER_UNLOCK; 424 pPager->dbSize = -1; 425 pPager->nRef = 0; 426 assert( pPager->journalOpen==0 ); 427 } 428 429 /* 430 ** When this routine is called, the pager has the journal file open and 431 ** a RESERVED or EXCLUSIVE lock on the database. This routine releases 432 ** the database lock and acquires a SHARED lock in its place. The journal 433 ** file is deleted and closed. 434 ** 435 ** TODO: Consider keeping the journal file open for temporary databases. 436 ** This might give a performance improvement on windows where opening 437 ** a file is an expensive operation. 438 */ 439 static int pager_unwritelock(Pager *pPager){ 440 PgHdr *pPg; 441 if( pPager->state<PAGER_RESERVED ){ 442 return SQLITE_OK; 443 } 444 sqlite3pager_stmt_commit(pPager); 445 if( pPager->stmtOpen ){ 446 sqlite3OsClose(&pPager->stfd); 447 pPager->stmtOpen = 0; 448 } 449 if( pPager->journalOpen ){ 450 sqlite3OsClose(&pPager->jfd); 451 pPager->journalOpen = 0; 452 sqlite3OsDelete(pPager->zJournal); 453 sqliteFree( pPager->aInJournal ); 454 pPager->aInJournal = 0; 455 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){ 456 pPg->inJournal = 0; 457 pPg->dirty = 0; 458 pPg->needSync = 0; 459 } 460 }else{ 461 assert( pPager->dirtyCache==0 || pPager->useJournal==0 ); 462 } 463 sqlite3OsUnlock(&pPager->fd, SHARED_LOCK); 464 pPager->state = PAGER_SHARED; 465 return SQLITE_OK; 466 } 467 468 /* 469 ** Compute and return a checksum for the page of data. 470 ** 471 ** This is not a real checksum. It is really just the sum of the 472 ** random initial value and the page number. We considered do a checksum 473 ** of the database, but that was found to be too slow. 474 */ 475 static u32 pager_cksum(Pager *pPager, Pgno pgno, const char *aData){ 476 u32 cksum = pPager->cksumInit + pgno; 477 return cksum; 478 } 479 480 /* 481 ** Read a single page from the journal file opened on file descriptor 482 ** jfd. Playback this one page. 483 ** 484 ** 485 ** 486 ** There are three different journal formats. The format parameter determines 487 ** which format is used by the journal that is played back. 488 */ 489 static int pager_playback_one_page(Pager *pPager, OsFile *jfd, int useCksum){ 490 int rc; 491 PgHdr *pPg; /* An existing page in the cache */ 492 Pgno pgno; /* The page number of a page in journal */ 493 u32 cksum; /* Checksum used for sanity checking */ 494 u8 aData[SQLITE_PAGE_SIZE]; /* Store data here */ 495 496 rc = read32bits(jfd, &pgno); 497 if( rc!=SQLITE_OK ) return rc; 498 rc = sqlite3OsRead(jfd, &aData, pPager->pageSize); 499 if( rc!=SQLITE_OK ) return rc; 500 501 /* Sanity checking on the page. This is more important that I originally 502 ** thought. If a power failure occurs while the journal is being written, 503 ** it could cause invalid data to be written into the journal. We need to 504 ** detect this invalid data (with high probability) and ignore it. 505 */ 506 if( pgno==0 ){ 507 return SQLITE_DONE; 508 } 509 if( pgno>(unsigned)pPager->dbSize ){ 510 return SQLITE_OK; 511 } 512 if( useCksum ){ 513 rc = read32bits(jfd, &cksum); 514 if( rc ) return rc; 515 if( pager_cksum(pPager, pgno, aData)!=cksum ){ 516 return SQLITE_DONE; 517 } 518 } 519 520 assert( pPager->state==PAGER_RESERVED || pPager->state==PAGER_EXCLUSIVE ); 521 522 /* If the pager is in RESERVED state, then there must be a copy of this 523 ** page in the pager cache. In this case just update the pager cache, 524 ** not the database file. The page is left marked dirty in this case. 525 ** 526 ** FIX ME: Ideally the page would only be left marked dirty when the 527 ** pager is in RESERVED state if it was dirty when this statement 528 ** transaction was started. 529 ** 530 ** If in EXCLUSIVE state, then we update the pager cache if it exists 531 ** and the main file. The page is then marked not dirty. 532 */ 533 pPg = pager_lookup(pPager, pgno); 534 assert( pPager->state==PAGER_EXCLUSIVE || pPg ); 535 TRACE2("PLAYBACK page %d\n", pgno); 536 if( pPager->state==PAGER_EXCLUSIVE ){ 537 sqlite3OsSeek(&pPager->fd, (pgno-1)*(off_t)SQLITE_PAGE_SIZE); 538 rc = sqlite3OsWrite(&pPager->fd, aData, SQLITE_PAGE_SIZE); 539 } 540 if( pPg ){ 541 /* No page should ever be rolled back that is in use, except for page 542 ** 1 which is held in use in order to keep the lock on the database 543 ** active. 544 */ 545 void *pData; 546 assert( pPg->nRef==0 || pPg->pgno==1 ); 547 pData = PGHDR_TO_DATA(pPg); 548 memcpy(pData, aData, pPager->pageSize); 549 if( pPager->xDestructor ){ 550 pPager->xDestructor(pData, pPager->pageSize); 551 } 552 if( pPager->state==PAGER_EXCLUSIVE ){ 553 pPg->dirty = 0; 554 pPg->needSync = 0; 555 } 556 557 CODEC(pPager, pData, pPg->pgno, 3); 558 } 559 return rc; 560 } 561 562 /* 563 ** Parameter zMaster is the name of a master journal file. A single journal 564 ** file that referred to the master journal file has just been rolled back. 565 ** This routine checks if it is possible to delete the master journal file, 566 ** and does so if it is. 567 */ 568 static int pager_delmaster(const char *zMaster){ 569 int rc; 570 int master_open = 0; 571 OsFile master; 572 char *zMasterJournal = 0; /* Contents of master journal file */ 573 off_t nMasterJournal; /* Size of master journal file */ 574 575 /* Open the master journal file exclusively in case some other process 576 ** is running this routine also. Not that it makes too much difference. 577 */ 578 rc = sqlite3OsOpenExclusive(zMaster, &master, 0); 579 if( rc!=SQLITE_OK ) goto delmaster_out; 580 master_open = 1; 581 582 rc = sqlite3OsFileSize(&master, &nMasterJournal); 583 if( rc!=SQLITE_OK ) goto delmaster_out; 584 585 if( nMasterJournal>0 ){ 586 char *zDb; 587 zMasterJournal = (char *)sqliteMalloc(nMasterJournal); 588 if( !zMasterJournal ){ 589 rc = SQLITE_NOMEM; 590 goto delmaster_out; 591 } 592 rc = sqlite3OsRead(&master, zMasterJournal, nMasterJournal); 593 if( rc!=SQLITE_OK ) goto delmaster_out; 594 595 zDb = zMasterJournal; 596 while( (zDb-zMasterJournal)<nMasterJournal ){ 597 char *zJournal = 0; 598 sqlite3SetString(&zJournal, zDb, "-journal", 0); 599 if( !zJournal ){ 600 rc = SQLITE_NOMEM; 601 goto delmaster_out; 602 } 603 if( sqlite3OsFileExists(zJournal) ){ 604 /* One of the journals pointed to by the master journal exists. 605 ** Open it and check if it points at the master journal. If 606 ** so, return without deleting the master journal file. 607 */ 608 OsFile journal; 609 int nMaster; 610 off_t jsz; 611 612 rc = sqlite3OsOpenReadOnly(zJournal, &journal); 613 sqliteFree(zJournal); 614 if( rc!=SQLITE_OK ){ 615 sqlite3OsClose(&journal); 616 goto delmaster_out; 617 } 618 619 /* Check if the file is big enough to be a journal file 620 ** with the required master journal name. If not, ignore it. 621 */ 622 rc = sqlite3OsFileSize(&journal, &jsz); 623 if( rc!=SQLITE_OK ){ 624 sqlite3OsClose(&journal); 625 goto delmaster_out; 626 } 627 if( jsz<(25+strlen(zMaster)) ){ 628 sqlite3OsClose(&journal); 629 continue; 630 } 631 632 /* Seek to the point in the journal where the master journal name 633 ** is stored. Read the master journal name into memory obtained 634 ** from malloc. 635 */ 636 rc = sqlite3OsSeek(&journal, 20); 637 if( rc!=SQLITE_OK ) goto delmaster_out; 638 rc = read32bits(&journal, (u32*)&nMaster); 639 if( rc!=SQLITE_OK ) goto delmaster_out; 640 if( nMaster>0 && nMaster>=strlen(zMaster)+1 ){ 641 char *zMasterPtr = (char *)sqliteMalloc(nMaster); 642 if( !zMasterPtr ){ 643 rc = SQLITE_NOMEM; 644 } 645 rc = sqlite3OsRead(&journal, zMasterPtr, nMaster); 646 if( rc!=SQLITE_OK ){ 647 sqliteFree(zMasterPtr); 648 goto delmaster_out; 649 } 650 if( 0==strncmp(zMasterPtr, zMaster, nMaster) ){ 651 /* We have a match. Do not delete the master journal file. */ 652 sqliteFree(zMasterPtr); 653 goto delmaster_out; 654 } 655 } 656 } 657 zDb += (strlen(zDb)+1); 658 } 659 } 660 661 sqlite3OsDelete(zMaster); 662 663 delmaster_out: 664 if( zMasterJournal ){ 665 sqliteFree(zMasterJournal); 666 } 667 if( master_open ){ 668 sqlite3OsClose(&master); 669 } 670 return rc; 671 } 672 673 /* 674 ** Make every page in the cache agree with what is on disk. In other words, 675 ** reread the disk to reset the state of the cache. 676 ** 677 ** This routine is called after a rollback in which some of the dirty cache 678 ** pages had never been written out to disk. We need to roll back the 679 ** cache content and the easiest way to do that is to reread the old content 680 ** back from the disk. 681 */ 682 static int pager_reload_cache(Pager *pPager){ 683 PgHdr *pPg; 684 int rc = SQLITE_OK; 685 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){ 686 char zBuf[SQLITE_PAGE_SIZE]; 687 if( !pPg->dirty ) continue; 688 if( (int)pPg->pgno <= pPager->origDbSize ){ 689 sqlite3OsSeek(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)(pPg->pgno-1)); 690 rc = sqlite3OsRead(&pPager->fd, zBuf, SQLITE_PAGE_SIZE); 691 TRACE2("REFETCH page %d\n", pPg->pgno); 692 CODEC(pPager, zBuf, pPg->pgno, 2); 693 if( rc ) break; 694 }else{ 695 memset(zBuf, 0, SQLITE_PAGE_SIZE); 696 } 697 if( pPg->nRef==0 || memcmp(zBuf, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE) ){ 698 memcpy(PGHDR_TO_DATA(pPg), zBuf, SQLITE_PAGE_SIZE); 699 if( pPager->xReiniter ){ 700 pPager->xReiniter(PGHDR_TO_DATA(pPg), pPager->pageSize); 701 }else{ 702 memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra); 703 } 704 } 705 pPg->needSync = 0; 706 pPg->dirty = 0; 707 } 708 return rc; 709 } 710 711 712 /* 713 ** Playback the journal and thus restore the database file to 714 ** the state it was in before we started making changes. 715 ** 716 ** The journal file format is as follows: 717 ** 718 ** (1) 8 byte prefix. A copy of aJournalMagic[]. 719 ** (2) 4 byte big-endian integer which is the number of valid page records 720 ** in the journal. If this value is 0xffffffff, then compute the 721 ** number of page records from the journal size. 722 ** (3) 4 byte big-endian integer which is the initial value for the 723 ** sanity checksum. 724 ** (4) 4 byte integer which is the number of pages to truncate the 725 ** database to during a rollback. 726 ** (5) 4 byte integer which is the number of bytes in the master journal 727 ** name. The value may be zero (indicate that there is no master 728 ** journal.) 729 ** (6) N bytes of the master journal name. The name will be nul-terminated 730 ** and might be shorter than the value read from (5). If the first byte 731 ** of the name is \000 then there is no master journal. The master 732 ** journal name is stored in UTF-8. 733 ** (7) Zero or more pages instances, each as follows: 734 ** + 4 byte page number. 735 ** + pPager->pageSize bytes of data. 736 ** + 4 byte checksum 737 ** 738 ** When we speak of the journal header, we mean the first 6 items above. 739 ** Each entry in the journal is an instance of the 7th item. 740 ** 741 ** Call the value from the second bullet "nRec". nRec is the number of 742 ** valid page entries in the journal. In most cases, you can compute the 743 ** value of nRec from the size of the journal file. But if a power 744 ** failure occurred while the journal was being written, it could be the 745 ** case that the size of the journal file had already been increased but 746 ** the extra entries had not yet made it safely to disk. In such a case, 747 ** the value of nRec computed from the file size would be too large. For 748 ** that reason, we always use the nRec value in the header. 749 ** 750 ** If the nRec value is 0xffffffff it means that nRec should be computed 751 ** from the file size. This value is used when the user selects the 752 ** no-sync option for the journal. A power failure could lead to corruption 753 ** in this case. But for things like temporary table (which will be 754 ** deleted when the power is restored) we don't care. 755 ** 756 ** If the file opened as the journal file is not a well-formed 757 ** journal file then the database will likely already be 758 ** corrupted, so the PAGER_ERR_CORRUPT bit is set in pPager->errMask 759 ** and SQLITE_CORRUPT is returned. If it all works, then this routine 760 ** returns SQLITE_OK. 761 */ 762 static int pager_playback(Pager *pPager, int useJournalSize){ 763 off_t szJ; /* Size of the journal file in bytes */ 764 int nRec; /* Number of Records in the journal */ 765 int i; /* Loop counter */ 766 Pgno mxPg = 0; /* Size of the original file in pages */ 767 unsigned char aMagic[8]; /* A buffer to hold the magic header */ 768 int rc; /* Result code of a subroutine */ 769 int nMaster; /* Number of bytes in the name of master journal */ 770 char *zMaster = 0; /* Name of master journal file if any */ 771 772 /* Figure out how many records are in the journal. Abort early if 773 ** the journal is empty. 774 */ 775 assert( pPager->journalOpen ); 776 sqlite3OsSeek(&pPager->jfd, 0); 777 rc = sqlite3OsFileSize(&pPager->jfd, &szJ); 778 if( rc!=SQLITE_OK ){ 779 goto end_playback; 780 } 781 782 /* If the journal file is too small to contain a complete header, 783 ** it must mean that the process that created the journal was just 784 ** beginning to write the journal file when it died. In that case, 785 ** the database file should have still been completely unchanged. 786 ** Nothing needs to be rolled back. We can safely ignore this journal. 787 */ 788 if( szJ < 24 ){ 789 goto end_playback; 790 } 791 792 /* (1) Read the beginning of the journal and verify the magic string 793 ** at the beginning of the journal. */ 794 rc = sqlite3OsRead(&pPager->jfd, aMagic, sizeof(aMagic)); 795 if( rc!=SQLITE_OK || memcmp(aMagic, aJournalMagic, sizeof(aMagic))!=0 ){ 796 rc = SQLITE_PROTOCOL; 797 goto end_playback; 798 } 799 800 /* (2) Read the number of pages stored in the journal. */ 801 rc = read32bits(&pPager->jfd, (u32*)&nRec); 802 if( rc ) goto end_playback; 803 if( nRec==0xffffffff || useJournalSize ){ 804 nRec = (szJ - JOURNAL_HDR_SZ(pPager))/JOURNAL_PG_SZ(pPager); 805 } 806 807 /* (3) Read the initial value for the sanity checksum */ 808 rc = read32bits(&pPager->jfd, &pPager->cksumInit); 809 if( rc ) goto end_playback; 810 811 /* (4) Read the number of pages in the database file prior to the 812 ** start of the transaction */ 813 rc = read32bits(&pPager->jfd, &mxPg); 814 if( rc!=SQLITE_OK ){ 815 goto end_playback; 816 } 817 818 /* (5) and (6): Check if a master journal file is specified. If one is 819 ** specified, only proceed with the playback if it still exists. */ 820 rc = read32bits(&pPager->jfd, &nMaster); 821 if( rc ) goto end_playback; 822 if( nMaster>0 ){ 823 zMaster = sqliteMalloc(nMaster); 824 if( !zMaster ){ 825 rc = SQLITE_NOMEM; 826 goto end_playback; 827 } 828 rc = sqlite3OsRead(&pPager->jfd, zMaster, nMaster); 829 if( rc!=SQLITE_OK || (zMaster[0] && !sqlite3OsFileExists(zMaster)) ){ 830 goto end_playback; 831 } 832 } 833 834 /* Truncate the database file back to it's original size */ 835 assert( pPager->origDbSize==0 || pPager->origDbSize==mxPg ); 836 rc = sqlite3OsTruncate(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)mxPg); 837 if( rc!=SQLITE_OK ){ 838 goto end_playback; 839 } 840 pPager->dbSize = mxPg; 841 842 /* Copy original pages out of the journal and back into the database file. 843 */ 844 for(i=0; i<nRec; i++){ 845 rc = pager_playback_one_page(pPager, &pPager->jfd, 1); 846 if( rc!=SQLITE_OK ){ 847 if( rc==SQLITE_DONE ){ 848 rc = SQLITE_OK; 849 } 850 break; 851 } 852 } 853 854 /* Pages that have been written to the journal but never synced 855 ** where not restored by the loop above. We have to restore those 856 ** pages by reading them back from the original database. 857 */ 858 if( rc==SQLITE_OK ){ 859 pager_reload_cache(pPager); 860 } 861 862 end_playback: 863 if( zMaster ){ 864 /* If there was a master journal and this routine will return true, 865 ** see if it is possible to delete the master journal. If errors 866 ** occur during this process, ignore them. 867 */ 868 if( rc==SQLITE_OK ){ 869 pager_delmaster(zMaster); 870 } 871 sqliteFree(zMaster); 872 } 873 if( rc!=SQLITE_OK ){ 874 /* FIX ME: We shouldn't delete the journal if an error occured during 875 ** rollback. It may have been a transient error and the rollback may 876 ** succeed next time it is attempted. 877 */ 878 pager_unwritelock(pPager); 879 pPager->errMask |= PAGER_ERR_CORRUPT; 880 rc = SQLITE_CORRUPT; 881 }else{ 882 rc = pager_unwritelock(pPager); 883 } 884 return rc; 885 } 886 887 /* 888 ** Playback the statement journal. 889 ** 890 ** This is similar to playing back the transaction journal but with 891 ** a few extra twists. 892 ** 893 ** (1) The number of pages in the database file at the start of 894 ** the statement is stored in pPager->stmtSize, not in the 895 ** journal file itself. 896 ** 897 ** (2) In addition to playing back the statement journal, also 898 ** playback all pages of the transaction journal beginning 899 ** at offset pPager->stmtJSize. 900 */ 901 static int pager_stmt_playback(Pager *pPager){ 902 off_t szJ; /* Size of the full journal */ 903 int nRec; /* Number of Records */ 904 int i; /* Loop counter */ 905 int rc; 906 907 /* Truncate the database back to its original size. 908 */ 909 rc = sqlite3OsTruncate(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)pPager->stmtSize); 910 pPager->dbSize = pPager->stmtSize; 911 912 /* Figure out how many records are in the statement journal. 913 */ 914 assert( pPager->stmtInUse && pPager->journalOpen ); 915 sqlite3OsSeek(&pPager->stfd, 0); 916 nRec = pPager->stmtNRec; 917 918 /* Copy original pages out of the statement journal and back into the 919 ** database file. Note that the statement journal omits checksums from 920 ** each record since power-failure recovery is not important to statement 921 ** journals. 922 */ 923 for(i=nRec-1; i>=0; i--){ 924 rc = pager_playback_one_page(pPager, &pPager->stfd, 0); 925 assert( rc!=SQLITE_DONE ); 926 if( rc!=SQLITE_OK ) goto end_stmt_playback; 927 } 928 929 /* Figure out how many pages need to be copied out of the transaction 930 ** journal. 931 */ 932 rc = sqlite3OsSeek(&pPager->jfd, pPager->stmtJSize); 933 if( rc!=SQLITE_OK ){ 934 goto end_stmt_playback; 935 } 936 rc = sqlite3OsFileSize(&pPager->jfd, &szJ); 937 if( rc!=SQLITE_OK ){ 938 goto end_stmt_playback; 939 } 940 nRec = (szJ - pPager->stmtJSize)/JOURNAL_PG_SZ(pPager); 941 for(i=nRec-1; i>=0; i--){ 942 rc = pager_playback_one_page(pPager, &pPager->jfd, 1); 943 if( rc!=SQLITE_OK ){ 944 assert( rc!=SQLITE_DONE ); 945 goto end_stmt_playback; 946 } 947 } 948 949 end_stmt_playback: 950 if( rc!=SQLITE_OK ){ 951 pPager->errMask |= PAGER_ERR_CORRUPT; 952 rc = SQLITE_CORRUPT; 953 } 954 return rc; 955 } 956 957 /* 958 ** Change the maximum number of in-memory pages that are allowed. 959 ** 960 ** The maximum number is the absolute value of the mxPage parameter. 961 ** If mxPage is negative, the noSync flag is also set. noSync bypasses 962 ** calls to sqlite3OsSync(). The pager runs much faster with noSync on, 963 ** but if the operating system crashes or there is an abrupt power 964 ** failure, the database file might be left in an inconsistent and 965 ** unrepairable state. 966 */ 967 void sqlite3pager_set_cachesize(Pager *pPager, int mxPage){ 968 if( mxPage>=0 ){ 969 pPager->noSync = pPager->tempFile; 970 if( pPager->noSync ) pPager->needSync = 0; 971 }else{ 972 pPager->noSync = 1; 973 mxPage = -mxPage; 974 } 975 if( mxPage>10 ){ 976 pPager->mxPage = mxPage; 977 } 978 } 979 980 /* 981 ** Adjust the robustness of the database to damage due to OS crashes 982 ** or power failures by changing the number of syncs()s when writing 983 ** the rollback journal. There are three levels: 984 ** 985 ** OFF sqlite3OsSync() is never called. This is the default 986 ** for temporary and transient files. 987 ** 988 ** NORMAL The journal is synced once before writes begin on the 989 ** database. This is normally adequate protection, but 990 ** it is theoretically possible, though very unlikely, 991 ** that an inopertune power failure could leave the journal 992 ** in a state which would cause damage to the database 993 ** when it is rolled back. 994 ** 995 ** FULL The journal is synced twice before writes begin on the 996 ** database (with some additional information - the nRec field 997 ** of the journal header - being written in between the two 998 ** syncs). If we assume that writing a 999 ** single disk sector is atomic, then this mode provides 1000 ** assurance that the journal will not be corrupted to the 1001 ** point of causing damage to the database during rollback. 1002 ** 1003 ** Numeric values associated with these states are OFF==1, NORMAL=2, 1004 ** and FULL=3. 1005 */ 1006 void sqlite3pager_set_safety_level(Pager *pPager, int level){ 1007 pPager->noSync = level==1 || pPager->tempFile; 1008 pPager->fullSync = level==3 && !pPager->tempFile; 1009 if( pPager->noSync ) pPager->needSync = 0; 1010 } 1011 1012 /* 1013 ** Open a temporary file. Write the name of the file into zName 1014 ** (zName must be at least SQLITE_TEMPNAME_SIZE bytes long.) Write 1015 ** the file descriptor into *fd. Return SQLITE_OK on success or some 1016 ** other error code if we fail. 1017 ** 1018 ** The OS will automatically delete the temporary file when it is 1019 ** closed. 1020 */ 1021 static int sqlite3pager_opentemp(char *zFile, OsFile *fd){ 1022 int cnt = 8; 1023 int rc; 1024 do{ 1025 cnt--; 1026 sqlite3OsTempFileName(zFile); 1027 rc = sqlite3OsOpenExclusive(zFile, fd, 1); 1028 }while( cnt>0 && rc!=SQLITE_OK ); 1029 return rc; 1030 } 1031 1032 /* 1033 ** Create a new page cache and put a pointer to the page cache in *ppPager. 1034 ** The file to be cached need not exist. The file is not locked until 1035 ** the first call to sqlite3pager_get() and is only held open until the 1036 ** last page is released using sqlite3pager_unref(). 1037 ** 1038 ** If zFilename is NULL then a randomly-named temporary file is created 1039 ** and used as the file to be cached. The file will be deleted 1040 ** automatically when it is closed. 1041 */ 1042 int sqlite3pager_open( 1043 Pager **ppPager, /* Return the Pager structure here */ 1044 const char *zFilename, /* Name of the database file to open */ 1045 int mxPage, /* Max number of in-memory cache pages */ 1046 int nExtra, /* Extra bytes append to each in-memory page */ 1047 int useJournal, /* TRUE to use a rollback journal on this file */ 1048 void *pBusyHandler /* Busy callback */ 1049 ){ 1050 Pager *pPager; 1051 char *zFullPathname; 1052 int nameLen; 1053 OsFile fd; 1054 int rc, i; 1055 int tempFile; 1056 int memDb = 0; 1057 int readOnly = 0; 1058 char zTemp[SQLITE_TEMPNAME_SIZE]; 1059 1060 *ppPager = 0; 1061 if( sqlite3_malloc_failed ){ 1062 return SQLITE_NOMEM; 1063 } 1064 if( zFilename && zFilename[0] ){ 1065 if( strcmp(zFilename,":memory:")==0 ){ 1066 memDb = 1; 1067 zFullPathname = sqliteMalloc(4); 1068 if( zFullPathname ) strcpy(zFullPathname, ""); 1069 rc = SQLITE_OK; 1070 }else{ 1071 zFullPathname = sqlite3OsFullPathname(zFilename); 1072 rc = sqlite3OsOpenReadWrite(zFullPathname, &fd, &readOnly); 1073 tempFile = 0; 1074 } 1075 }else{ 1076 rc = sqlite3pager_opentemp(zTemp, &fd); 1077 zFilename = zTemp; 1078 zFullPathname = sqlite3OsFullPathname(zFilename); 1079 tempFile = 1; 1080 } 1081 if( sqlite3_malloc_failed ){ 1082 return SQLITE_NOMEM; 1083 } 1084 if( rc!=SQLITE_OK ){ 1085 sqliteFree(zFullPathname); 1086 return SQLITE_CANTOPEN; 1087 } 1088 nameLen = strlen(zFullPathname); 1089 pPager = sqliteMalloc( sizeof(*pPager) + nameLen*3 + 30 ); 1090 if( pPager==0 ){ 1091 sqlite3OsClose(&fd); 1092 sqliteFree(zFullPathname); 1093 return SQLITE_NOMEM; 1094 } 1095 SET_PAGER(pPager); 1096 pPager->zFilename = (char*)&pPager[1]; 1097 pPager->zDirectory = &pPager->zFilename[nameLen+1]; 1098 pPager->zJournal = &pPager->zDirectory[nameLen+1]; 1099 strcpy(pPager->zFilename, zFullPathname); 1100 strcpy(pPager->zDirectory, zFullPathname); 1101 for(i=nameLen; i>0 && pPager->zDirectory[i-1]!='/'; i--){} 1102 if( i>0 ) pPager->zDirectory[i-1] = 0; 1103 strcpy(pPager->zJournal, zFullPathname); 1104 sqliteFree(zFullPathname); 1105 strcpy(&pPager->zJournal[nameLen], "-journal"); 1106 pPager->fd = fd; 1107 pPager->journalOpen = 0; 1108 pPager->useJournal = useJournal && !memDb; 1109 pPager->stmtOpen = 0; 1110 pPager->stmtInUse = 0; 1111 pPager->nRef = 0; 1112 pPager->dbSize = memDb-1; 1113 pPager->pageSize = SQLITE_PAGE_SIZE; 1114 pPager->stmtSize = 0; 1115 pPager->stmtJSize = 0; 1116 pPager->nPage = 0; 1117 pPager->mxPage = mxPage>5 ? mxPage : 10; 1118 pPager->state = PAGER_UNLOCK; 1119 pPager->errMask = 0; 1120 pPager->tempFile = tempFile; 1121 pPager->memDb = memDb; 1122 pPager->readOnly = readOnly; 1123 pPager->needSync = 0; 1124 pPager->noSync = pPager->tempFile || !useJournal; 1125 pPager->pFirst = 0; 1126 pPager->pFirstSynced = 0; 1127 pPager->pLast = 0; 1128 pPager->nExtra = nExtra; 1129 pPager->pBusyHandler = (BusyHandler *)pBusyHandler; 1130 memset(pPager->aHash, 0, sizeof(pPager->aHash)); 1131 *ppPager = pPager; 1132 return SQLITE_OK; 1133 } 1134 1135 /* 1136 ** Set the destructor for this pager. If not NULL, the destructor is called 1137 ** when the reference count on each page reaches zero. The destructor can 1138 ** be used to clean up information in the extra segment appended to each page. 1139 ** 1140 ** The destructor is not called as a result sqlite3pager_close(). 1141 ** Destructors are only called by sqlite3pager_unref(). 1142 */ 1143 void sqlite3pager_set_destructor(Pager *pPager, void (*xDesc)(void*,int)){ 1144 pPager->xDestructor = xDesc; 1145 } 1146 1147 /* 1148 ** Set the reinitializer for this pager. If not NULL, the reinitializer 1149 ** is called when the content of a page in cache is restored to its original 1150 ** value as a result of a rollback. The callback gives higher-level code 1151 ** an opportunity to restore the EXTRA section to agree with the restored 1152 ** page data. 1153 */ 1154 void sqlite3pager_set_reiniter(Pager *pPager, void (*xReinit)(void*,int)){ 1155 pPager->xReiniter = xReinit; 1156 } 1157 1158 /* 1159 ** Return the total number of pages in the disk file associated with 1160 ** pPager. 1161 */ 1162 int sqlite3pager_pagecount(Pager *pPager){ 1163 off_t n; 1164 assert( pPager!=0 ); 1165 if( pPager->dbSize>=0 ){ 1166 return pPager->dbSize; 1167 } 1168 if( sqlite3OsFileSize(&pPager->fd, &n)!=SQLITE_OK ){ 1169 pPager->errMask |= PAGER_ERR_DISK; 1170 return 0; 1171 } 1172 n /= SQLITE_PAGE_SIZE; 1173 if( pPager->state!=PAGER_UNLOCK ){ 1174 pPager->dbSize = n; 1175 } 1176 return n; 1177 } 1178 1179 /* 1180 ** Forward declaration 1181 */ 1182 static int syncJournal(Pager*, const char*); 1183 1184 1185 /* 1186 ** Unlink a page from the free list (the list of all pages where nRef==0) 1187 ** and from its hash collision chain. 1188 */ 1189 static void unlinkPage(PgHdr *pPg){ 1190 Pager *pPager = pPg->pPager; 1191 1192 /* Keep the pFirstSynced pointer pointing at the first synchronized page */ 1193 if( pPg==pPager->pFirstSynced ){ 1194 PgHdr *p = pPg->pNextFree; 1195 while( p && p->needSync ){ p = p->pNextFree; } 1196 pPager->pFirstSynced = p; 1197 } 1198 1199 /* Unlink from the freelist */ 1200 if( pPg->pPrevFree ){ 1201 pPg->pPrevFree->pNextFree = pPg->pNextFree; 1202 }else{ 1203 assert( pPager->pFirst==pPg ); 1204 pPager->pFirst = pPg->pNextFree; 1205 } 1206 if( pPg->pNextFree ){ 1207 pPg->pNextFree->pPrevFree = pPg->pPrevFree; 1208 }else{ 1209 assert( pPager->pLast==pPg ); 1210 pPager->pLast = pPg->pPrevFree; 1211 } 1212 pPg->pNextFree = pPg->pPrevFree = 0; 1213 1214 /* Unlink from the pgno hash table */ 1215 if( pPg->pNextHash ){ 1216 pPg->pNextHash->pPrevHash = pPg->pPrevHash; 1217 } 1218 if( pPg->pPrevHash ){ 1219 pPg->pPrevHash->pNextHash = pPg->pNextHash; 1220 }else{ 1221 int h = pager_hash(pPg->pgno); 1222 assert( pPager->aHash[h]==pPg ); 1223 pPager->aHash[h] = pPg->pNextHash; 1224 } 1225 pPg->pNextHash = pPg->pPrevHash = 0; 1226 } 1227 1228 /* 1229 ** This routine is used to truncate an in-memory database. Delete 1230 ** every pages whose pgno is larger than pPager->dbSize and is unreferenced. 1231 ** Referenced pages larger than pPager->dbSize are zeroed. 1232 */ 1233 static void memoryTruncate(Pager *pPager){ 1234 PgHdr *pPg; 1235 PgHdr **ppPg; 1236 int dbSize = pPager->dbSize; 1237 1238 ppPg = &pPager->pAll; 1239 while( (pPg = *ppPg)!=0 ){ 1240 if( pPg->pgno<=dbSize ){ 1241 ppPg = &pPg->pNextAll; 1242 }else if( pPg->nRef>0 ){ 1243 memset(PGHDR_TO_DATA(pPg), 0, pPager->pageSize); 1244 ppPg = &pPg->pNextAll; 1245 }else{ 1246 *ppPg = pPg->pNextAll; 1247 unlinkPage(pPg); 1248 sqliteFree(pPg); 1249 pPager->nPage--; 1250 } 1251 } 1252 } 1253 1254 /* 1255 ** Truncate the file to the number of pages specified. 1256 */ 1257 int sqlite3pager_truncate(Pager *pPager, Pgno nPage){ 1258 int rc; 1259 if( pPager->dbSize<0 ){ 1260 sqlite3pager_pagecount(pPager); 1261 } 1262 if( pPager->errMask!=0 ){ 1263 rc = pager_errcode(pPager); 1264 return rc; 1265 } 1266 if( nPage>=(unsigned)pPager->dbSize ){ 1267 return SQLITE_OK; 1268 } 1269 if( pPager->memDb ){ 1270 pPager->dbSize = nPage; 1271 memoryTruncate(pPager); 1272 return SQLITE_OK; 1273 } 1274 syncJournal(pPager, 0); 1275 rc = sqlite3OsTruncate(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)nPage); 1276 if( rc==SQLITE_OK ){ 1277 pPager->dbSize = nPage; 1278 } 1279 return rc; 1280 } 1281 1282 /* 1283 ** Shutdown the page cache. Free all memory and close all files. 1284 ** 1285 ** If a transaction was in progress when this routine is called, that 1286 ** transaction is rolled back. All outstanding pages are invalidated 1287 ** and their memory is freed. Any attempt to use a page associated 1288 ** with this page cache after this function returns will likely 1289 ** result in a coredump. 1290 */ 1291 int sqlite3pager_close(Pager *pPager){ 1292 PgHdr *pPg, *pNext; 1293 switch( pPager->state ){ 1294 case PAGER_RESERVED: 1295 case PAGER_EXCLUSIVE: { 1296 sqlite3pager_rollback(pPager); 1297 if( !pPager->memDb ){ 1298 sqlite3OsUnlock(&pPager->fd, NO_LOCK); 1299 } 1300 assert( pPager->journalOpen==0 ); 1301 break; 1302 } 1303 case PAGER_SHARED: { 1304 if( !pPager->memDb ){ 1305 sqlite3OsUnlock(&pPager->fd, NO_LOCK); 1306 } 1307 break; 1308 } 1309 default: { 1310 /* Do nothing */ 1311 break; 1312 } 1313 } 1314 for(pPg=pPager->pAll; pPg; pPg=pNext){ 1315 pNext = pPg->pNextAll; 1316 sqliteFree(pPg); 1317 } 1318 if( !pPager->memDb ){ 1319 sqlite3OsClose(&pPager->fd); 1320 } 1321 assert( pPager->journalOpen==0 ); 1322 /* Temp files are automatically deleted by the OS 1323 ** if( pPager->tempFile ){ 1324 ** sqlite3OsDelete(pPager->zFilename); 1325 ** } 1326 */ 1327 CLR_PAGER(pPager); 1328 if( pPager->zFilename!=(char*)&pPager[1] ){ 1329 assert( 0 ); /* Cannot happen */ 1330 sqliteFree(pPager->zFilename); 1331 sqliteFree(pPager->zJournal); 1332 sqliteFree(pPager->zDirectory); 1333 } 1334 sqliteFree(pPager); 1335 return SQLITE_OK; 1336 } 1337 1338 /* 1339 ** Return the page number for the given page data. 1340 */ 1341 Pgno sqlite3pager_pagenumber(void *pData){ 1342 PgHdr *p = DATA_TO_PGHDR(pData); 1343 return p->pgno; 1344 } 1345 1346 /* 1347 ** The page_ref() function increments the reference count for a page. 1348 ** If the page is currently on the freelist (the reference count is zero) then 1349 ** remove it from the freelist. 1350 ** 1351 ** For non-test systems, page_ref() is a macro that calls _page_ref() 1352 ** online of the reference count is zero. For test systems, page_ref() 1353 ** is a real function so that we can set breakpoints and trace it. 1354 */ 1355 static void _page_ref(PgHdr *pPg){ 1356 if( pPg->nRef==0 ){ 1357 /* The page is currently on the freelist. Remove it. */ 1358 if( pPg==pPg->pPager->pFirstSynced ){ 1359 PgHdr *p = pPg->pNextFree; 1360 while( p && p->needSync ){ p = p->pNextFree; } 1361 pPg->pPager->pFirstSynced = p; 1362 } 1363 if( pPg->pPrevFree ){ 1364 pPg->pPrevFree->pNextFree = pPg->pNextFree; 1365 }else{ 1366 pPg->pPager->pFirst = pPg->pNextFree; 1367 } 1368 if( pPg->pNextFree ){ 1369 pPg->pNextFree->pPrevFree = pPg->pPrevFree; 1370 }else{ 1371 pPg->pPager->pLast = pPg->pPrevFree; 1372 } 1373 pPg->pPager->nRef++; 1374 } 1375 pPg->nRef++; 1376 REFINFO(pPg); 1377 } 1378 #ifdef SQLITE_TEST 1379 static void page_ref(PgHdr *pPg){ 1380 if( pPg->nRef==0 ){ 1381 _page_ref(pPg); 1382 }else{ 1383 pPg->nRef++; 1384 REFINFO(pPg); 1385 } 1386 } 1387 #else 1388 # define page_ref(P) ((P)->nRef==0?_page_ref(P):(void)(P)->nRef++) 1389 #endif 1390 1391 /* 1392 ** Increment the reference count for a page. The input pointer is 1393 ** a reference to the page data. 1394 */ 1395 int sqlite3pager_ref(void *pData){ 1396 PgHdr *pPg = DATA_TO_PGHDR(pData); 1397 page_ref(pPg); 1398 return SQLITE_OK; 1399 } 1400 1401 /* 1402 ** Sync the journal. In other words, make sure all the pages that have 1403 ** been written to the journal have actually reached the surface of the 1404 ** disk. It is not safe to modify the original database file until after 1405 ** the journal has been synced. If the original database is modified before 1406 ** the journal is synced and a power failure occurs, the unsynced journal 1407 ** data would be lost and we would be unable to completely rollback the 1408 ** database changes. Database corruption would occur. 1409 ** 1410 ** This routine also updates the nRec field in the header of the journal. 1411 ** (See comments on the pager_playback() routine for additional information.) 1412 ** If the sync mode is FULL, two syncs will occur. First the whole journal 1413 ** is synced, then the nRec field is updated, then a second sync occurs. 1414 ** 1415 ** For temporary databases, we do not care if we are able to rollback 1416 ** after a power failure, so sync occurs. 1417 ** 1418 ** This routine clears the needSync field of every page current held in 1419 ** memory. 1420 */ 1421 static int syncJournal(Pager *pPager, const char *zMaster){ 1422 PgHdr *pPg; 1423 int rc = SQLITE_OK; 1424 1425 /* Sync the journal before modifying the main database 1426 ** (assuming there is a journal and it needs to be synced.) 1427 */ 1428 if( pPager->needSync || zMaster ){ 1429 if( !pPager->tempFile ){ 1430 assert( pPager->journalOpen ); 1431 /* assert( !pPager->noSync ); // noSync might be set if synchronous 1432 ** was turned off after the transaction was started. Ticket #615 */ 1433 #ifndef NDEBUG 1434 { 1435 /* Make sure the pPager->nRec counter we are keeping agrees 1436 ** with the nRec computed from the size of the journal file. 1437 */ 1438 off_t hdrSz, pgSz, jSz; 1439 hdrSz = JOURNAL_HDR_SZ(pPager); 1440 pgSz = JOURNAL_PG_SZ(pPager); 1441 rc = sqlite3OsFileSize(&pPager->jfd, &jSz); 1442 if( rc!=0 ) return rc; 1443 assert( pPager->nRec*pgSz+hdrSz==jSz ); 1444 } 1445 #endif 1446 { 1447 /* Write the nRec value into the journal file header */ 1448 off_t szJ; 1449 if( pPager->fullSync ){ 1450 TRACE2("SYNC journal of %d\n", pPager->fd.h); 1451 rc = sqlite3OsSync(&pPager->jfd); 1452 if( rc!=0 ) return rc; 1453 } 1454 sqlite3OsSeek(&pPager->jfd, sizeof(aJournalMagic)); 1455 rc = write32bits(&pPager->jfd, pPager->nRec); 1456 if( rc ) return rc; 1457 1458 /* Write the name of the master journal file if one is specified */ 1459 if( zMaster ){ 1460 assert( strlen(zMaster)<pPager->nMaster ); 1461 rc = sqlite3OsSeek(&pPager->jfd, 20); 1462 if( rc ) return rc; 1463 rc = sqlite3OsWrite(&pPager->jfd, zMaster, strlen(zMaster)+1); 1464 if( rc ) return rc; 1465 } 1466 1467 szJ = JOURNAL_HDR_SZ(pPager) + pPager->nRec*JOURNAL_PG_SZ(pPager); 1468 sqlite3OsSeek(&pPager->jfd, szJ); 1469 } 1470 TRACE2("SYNC journal of %d\n", pPager->fd.h); 1471 rc = sqlite3OsSync(&pPager->jfd); 1472 if( rc!=0 ) return rc; 1473 pPager->journalStarted = 1; 1474 } 1475 pPager->needSync = 0; 1476 1477 /* Erase the needSync flag from every page. 1478 */ 1479 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){ 1480 pPg->needSync = 0; 1481 } 1482 pPager->pFirstSynced = pPager->pFirst; 1483 } 1484 1485 #ifndef NDEBUG 1486 /* If the Pager.needSync flag is clear then the PgHdr.needSync 1487 ** flag must also be clear for all pages. Verify that this 1488 ** invariant is true. 1489 */ 1490 else{ 1491 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){ 1492 assert( pPg->needSync==0 ); 1493 } 1494 assert( pPager->pFirstSynced==pPager->pFirst ); 1495 } 1496 #endif 1497 1498 return rc; 1499 } 1500 1501 /* 1502 ** Given a list of pages (connected by the PgHdr.pDirty pointer) write 1503 ** every one of those pages out to the database file and mark them all 1504 ** as clean. 1505 */ 1506 static int pager_write_pagelist(PgHdr *pList){ 1507 Pager *pPager; 1508 int rc; 1509 int busy = 1; 1510 1511 if( pList==0 ) return SQLITE_OK; 1512 pPager = pList->pPager; 1513 1514 /* At this point there may be either a RESERVED or EXCLUSIVE lock on the 1515 ** database file. If there is already an EXCLUSIVE lock, the following 1516 ** calls to sqlite3OsLock() are no-ops. 1517 ** 1518 ** Moving the lock from RESERVED to EXCLUSIVE actually involves going 1519 ** through an intermediate state PENDING. A PENDING lock prevents new 1520 ** readers from attaching to the database but is unsufficient for us to 1521 ** write. The idea of a PENDING lock is to prevent new readers from 1522 ** coming in while we wait for existing readers to clear. 1523 ** 1524 ** While the pager is in the RESERVED state, the original database file 1525 ** is unchanged and we can rollback without having to playback the 1526 ** journal into the original database file. Once we transition to 1527 ** EXCLUSIVE, it means the database file has been changed and any rollback 1528 ** will require a journal playback. 1529 */ 1530 do { 1531 rc = sqlite3OsLock(&pPager->fd, EXCLUSIVE_LOCK); 1532 }while( rc==SQLITE_BUSY && 1533 pPager->pBusyHandler && 1534 pPager->pBusyHandler->xFunc && 1535 pPager->pBusyHandler->xFunc(pPager->pBusyHandler->pArg, "", busy++) 1536 ); 1537 if( rc!=SQLITE_OK ){ 1538 return rc; 1539 } 1540 pPager->state = PAGER_EXCLUSIVE; 1541 1542 while( pList ){ 1543 assert( pList->dirty ); 1544 sqlite3OsSeek(&pPager->fd, (pList->pgno-1)*(off_t)SQLITE_PAGE_SIZE); 1545 CODEC(pPager, PGHDR_TO_DATA(pList), pList->pgno, 6); 1546 TRACE2("STORE page %d\n", pList->pgno); 1547 rc = sqlite3OsWrite(&pPager->fd, PGHDR_TO_DATA(pList), SQLITE_PAGE_SIZE); 1548 CODEC(pPager, PGHDR_TO_DATA(pList), pList->pgno, 0); 1549 if( rc ) return rc; 1550 pList->dirty = 0; 1551 pList = pList->pDirty; 1552 } 1553 return SQLITE_OK; 1554 } 1555 1556 /* 1557 ** Collect every dirty page into a dirty list and 1558 ** return a pointer to the head of that list. All pages are 1559 ** collected even if they are still in use. 1560 */ 1561 static PgHdr *pager_get_all_dirty_pages(Pager *pPager){ 1562 PgHdr *p, *pList; 1563 pList = 0; 1564 for(p=pPager->pAll; p; p=p->pNextAll){ 1565 if( p->dirty ){ 1566 p->pDirty = pList; 1567 pList = p; 1568 } 1569 } 1570 return pList; 1571 } 1572 1573 /* 1574 ** Acquire a page. 1575 ** 1576 ** A read lock on the disk file is obtained when the first page is acquired. 1577 ** This read lock is dropped when the last page is released. 1578 ** 1579 ** A _get works for any page number greater than 0. If the database 1580 ** file is smaller than the requested page, then no actual disk 1581 ** read occurs and the memory image of the page is initialized to 1582 ** all zeros. The extra data appended to a page is always initialized 1583 ** to zeros the first time a page is loaded into memory. 1584 ** 1585 ** The acquisition might fail for several reasons. In all cases, 1586 ** an appropriate error code is returned and *ppPage is set to NULL. 1587 ** 1588 ** See also sqlite3pager_lookup(). Both this routine and _lookup() attempt 1589 ** to find a page in the in-memory cache first. If the page is not already 1590 ** in memory, this routine goes to disk to read it in whereas _lookup() 1591 ** just returns 0. This routine acquires a read-lock the first time it 1592 ** has to go to disk, and could also playback an old journal if necessary. 1593 ** Since _lookup() never goes to disk, it never has to deal with locks 1594 ** or journal files. 1595 */ 1596 int sqlite3pager_get(Pager *pPager, Pgno pgno, void **ppPage){ 1597 PgHdr *pPg; 1598 int rc; 1599 1600 /* Make sure we have not hit any critical errors. 1601 */ 1602 assert( pPager!=0 ); 1603 assert( pgno!=0 ); 1604 *ppPage = 0; 1605 if( pPager->errMask & ~(PAGER_ERR_FULL) ){ 1606 return pager_errcode(pPager); 1607 } 1608 1609 /* If this is the first page accessed, then get a SHARED lock 1610 ** on the database file. 1611 */ 1612 if( pPager->nRef==0 && !pPager->memDb ){ 1613 int busy = 1; 1614 do { 1615 rc = sqlite3OsLock(&pPager->fd, SHARED_LOCK); 1616 }while( rc==SQLITE_BUSY && 1617 pPager->pBusyHandler && 1618 pPager->pBusyHandler->xFunc && 1619 pPager->pBusyHandler->xFunc(pPager->pBusyHandler->pArg, "", busy++) 1620 ); 1621 if( rc!=SQLITE_OK ){ 1622 return rc; 1623 } 1624 pPager->state = PAGER_SHARED; 1625 1626 /* If a journal file exists, and there is no RESERVED lock on the 1627 ** database file, then it either needs to be played back or deleted. 1628 */ 1629 if( pPager->useJournal && 1630 sqlite3OsFileExists(pPager->zJournal) && 1631 !sqlite3OsCheckReservedLock(&pPager->fd) 1632 ){ 1633 int rc; 1634 1635 /* Get an EXCLUSIVE lock on the database file. */ 1636 rc = sqlite3OsLock(&pPager->fd, EXCLUSIVE_LOCK); 1637 if( rc!=SQLITE_OK ){ 1638 sqlite3OsUnlock(&pPager->fd, NO_LOCK); 1639 pPager->state = PAGER_UNLOCK; 1640 return rc; 1641 } 1642 pPager->state = PAGER_EXCLUSIVE; 1643 1644 /* Open the journal for reading only. Return SQLITE_BUSY if 1645 ** we are unable to open the journal file. 1646 ** 1647 ** The journal file does not need to be locked itself. The 1648 ** journal file is never open unless the main database file holds 1649 ** a write lock, so there is never any chance of two or more 1650 ** processes opening the journal at the same time. 1651 */ 1652 rc = sqlite3OsOpenReadOnly(pPager->zJournal, &pPager->jfd); 1653 if( rc!=SQLITE_OK ){ 1654 sqlite3OsUnlock(&pPager->fd, NO_LOCK); 1655 pPager->state = PAGER_UNLOCK; 1656 return SQLITE_BUSY; 1657 } 1658 pPager->journalOpen = 1; 1659 pPager->journalStarted = 0; 1660 1661 /* Playback and delete the journal. Drop the database write 1662 ** lock and reacquire the read lock. 1663 */ 1664 rc = pager_playback(pPager, 0); 1665 if( rc!=SQLITE_OK ){ 1666 return rc; 1667 } 1668 } 1669 pPg = 0; 1670 }else{ 1671 /* Search for page in cache */ 1672 pPg = pager_lookup(pPager, pgno); 1673 if( pPager->memDb && pPager->state==PAGER_UNLOCK ){ 1674 pPager->state = PAGER_SHARED; 1675 } 1676 } 1677 if( pPg==0 ){ 1678 /* The requested page is not in the page cache. */ 1679 int h; 1680 pPager->nMiss++; 1681 if( pPager->nPage<pPager->mxPage || pPager->pFirst==0 || pPager->memDb ){ 1682 /* Create a new page */ 1683 pPg = sqliteMallocRaw( sizeof(*pPg) + SQLITE_PAGE_SIZE 1684 + sizeof(u32) + pPager->nExtra 1685 + pPager->memDb*sizeof(PgHistory) ); 1686 if( pPg==0 ){ 1687 pager_unwritelock(pPager); 1688 pPager->errMask |= PAGER_ERR_MEM; 1689 return SQLITE_NOMEM; 1690 } 1691 memset(pPg, 0, sizeof(*pPg)); 1692 if( pPager->memDb ){ 1693 memset(PGHDR_TO_HIST(pPg, pPager), 0, sizeof(PgHistory)); 1694 } 1695 pPg->pPager = pPager; 1696 pPg->pNextAll = pPager->pAll; 1697 pPager->pAll = pPg; 1698 pPager->nPage++; 1699 }else{ 1700 /* Find a page to recycle. Try to locate a page that does not 1701 ** require us to do an fsync() on the journal. 1702 */ 1703 pPg = pPager->pFirstSynced; 1704 1705 /* If we could not find a page that does not require an fsync() 1706 ** on the journal file then fsync the journal file. This is a 1707 ** very slow operation, so we work hard to avoid it. But sometimes 1708 ** it can't be helped. 1709 */ 1710 if( pPg==0 ){ 1711 int rc = syncJournal(pPager, 0); 1712 if( rc!=0 ){ 1713 sqlite3pager_rollback(pPager); 1714 return SQLITE_IOERR; 1715 } 1716 pPg = pPager->pFirst; 1717 } 1718 assert( pPg->nRef==0 ); 1719 1720 /* Write the page to the database file if it is dirty. 1721 */ 1722 if( pPg->dirty ){ 1723 assert( pPg->needSync==0 ); 1724 pPg->pDirty = 0; 1725 rc = pager_write_pagelist( pPg ); 1726 if( rc!=SQLITE_OK ){ 1727 sqlite3pager_rollback(pPager); 1728 return SQLITE_IOERR; 1729 } 1730 } 1731 assert( pPg->dirty==0 ); 1732 1733 /* If the page we are recycling is marked as alwaysRollback, then 1734 ** set the global alwaysRollback flag, thus disabling the 1735 ** sqlite_dont_rollback() optimization for the rest of this transaction. 1736 ** It is necessary to do this because the page marked alwaysRollback 1737 ** might be reloaded at a later time but at that point we won't remember 1738 ** that is was marked alwaysRollback. This means that all pages must 1739 ** be marked as alwaysRollback from here on out. 1740 */ 1741 if( pPg->alwaysRollback ){ 1742 pPager->alwaysRollback = 1; 1743 } 1744 1745 /* Unlink the old page from the free list and the hash table 1746 */ 1747 unlinkPage(pPg); 1748 pPager->nOvfl++; 1749 } 1750 pPg->pgno = pgno; 1751 if( pPager->aInJournal && (int)pgno<=pPager->origDbSize ){ 1752 sqlite3CheckMemory(pPager->aInJournal, pgno/8); 1753 assert( pPager->journalOpen ); 1754 pPg->inJournal = (pPager->aInJournal[pgno/8] & (1<<(pgno&7)))!=0; 1755 pPg->needSync = 0; 1756 }else{ 1757 pPg->inJournal = 0; 1758 pPg->needSync = 0; 1759 } 1760 if( pPager->aInStmt && (int)pgno<=pPager->stmtSize 1761 && (pPager->aInStmt[pgno/8] & (1<<(pgno&7)))!=0 ){ 1762 page_add_to_stmt_list(pPg); 1763 }else{ 1764 page_remove_from_stmt_list(pPg); 1765 } 1766 pPg->dirty = 0; 1767 pPg->nRef = 1; 1768 REFINFO(pPg); 1769 pPager->nRef++; 1770 h = pager_hash(pgno); 1771 pPg->pNextHash = pPager->aHash[h]; 1772 pPager->aHash[h] = pPg; 1773 if( pPg->pNextHash ){ 1774 assert( pPg->pNextHash->pPrevHash==0 ); 1775 pPg->pNextHash->pPrevHash = pPg; 1776 } 1777 if( pPager->nExtra>0 ){ 1778 memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra); 1779 } 1780 if( pPager->dbSize<0 ) sqlite3pager_pagecount(pPager); 1781 if( pPager->errMask!=0 ){ 1782 sqlite3pager_unref(PGHDR_TO_DATA(pPg)); 1783 rc = pager_errcode(pPager); 1784 return rc; 1785 } 1786 if( pPager->dbSize<(int)pgno ){ 1787 memset(PGHDR_TO_DATA(pPg), 0, SQLITE_PAGE_SIZE); 1788 }else{ 1789 int rc; 1790 assert( pPager->memDb==0 ); 1791 sqlite3OsSeek(&pPager->fd, (pgno-1)*(off_t)SQLITE_PAGE_SIZE); 1792 rc = sqlite3OsRead(&pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE); 1793 TRACE2("FETCH page %d\n", pPg->pgno); 1794 CODEC(pPager, PGHDR_TO_DATA(pPg), pPg->pgno, 3); 1795 if( rc!=SQLITE_OK ){ 1796 off_t fileSize; 1797 if( sqlite3OsFileSize(&pPager->fd,&fileSize)!=SQLITE_OK 1798 || fileSize>=pgno*SQLITE_PAGE_SIZE ){ 1799 sqlite3pager_unref(PGHDR_TO_DATA(pPg)); 1800 return rc; 1801 }else{ 1802 memset(PGHDR_TO_DATA(pPg), 0, SQLITE_PAGE_SIZE); 1803 } 1804 } 1805 } 1806 }else{ 1807 /* The requested page is in the page cache. */ 1808 pPager->nHit++; 1809 page_ref(pPg); 1810 } 1811 *ppPage = PGHDR_TO_DATA(pPg); 1812 return SQLITE_OK; 1813 } 1814 1815 /* 1816 ** Acquire a page if it is already in the in-memory cache. Do 1817 ** not read the page from disk. Return a pointer to the page, 1818 ** or 0 if the page is not in cache. 1819 ** 1820 ** See also sqlite3pager_get(). The difference between this routine 1821 ** and sqlite3pager_get() is that _get() will go to the disk and read 1822 ** in the page if the page is not already in cache. This routine 1823 ** returns NULL if the page is not in cache or if a disk I/O error 1824 ** has ever happened. 1825 */ 1826 void *sqlite3pager_lookup(Pager *pPager, Pgno pgno){ 1827 PgHdr *pPg; 1828 1829 assert( pPager!=0 ); 1830 assert( pgno!=0 ); 1831 if( pPager->errMask & ~(PAGER_ERR_FULL) ){ 1832 return 0; 1833 } 1834 pPg = pager_lookup(pPager, pgno); 1835 if( pPg==0 ) return 0; 1836 page_ref(pPg); 1837 return PGHDR_TO_DATA(pPg); 1838 } 1839 1840 /* 1841 ** Release a page. 1842 ** 1843 ** If the number of references to the page drop to zero, then the 1844 ** page is added to the LRU list. When all references to all pages 1845 ** are released, a rollback occurs and the lock on the database is 1846 ** removed. 1847 */ 1848 int sqlite3pager_unref(void *pData){ 1849 PgHdr *pPg; 1850 1851 /* Decrement the reference count for this page 1852 */ 1853 pPg = DATA_TO_PGHDR(pData); 1854 assert( pPg->nRef>0 ); 1855 pPg->nRef--; 1856 REFINFO(pPg); 1857 1858 /* When the number of references to a page reach 0, call the 1859 ** destructor and add the page to the freelist. 1860 */ 1861 if( pPg->nRef==0 ){ 1862 Pager *pPager; 1863 pPager = pPg->pPager; 1864 pPg->pNextFree = 0; 1865 pPg->pPrevFree = pPager->pLast; 1866 pPager->pLast = pPg; 1867 if( pPg->pPrevFree ){ 1868 pPg->pPrevFree->pNextFree = pPg; 1869 }else{ 1870 pPager->pFirst = pPg; 1871 } 1872 if( pPg->needSync==0 && pPager->pFirstSynced==0 ){ 1873 pPager->pFirstSynced = pPg; 1874 } 1875 if( pPager->xDestructor ){ 1876 pPager->xDestructor(pData, pPager->pageSize); 1877 } 1878 1879 /* When all pages reach the freelist, drop the read lock from 1880 ** the database file. 1881 */ 1882 pPager->nRef--; 1883 assert( pPager->nRef>=0 ); 1884 if( pPager->nRef==0 && !pPager->memDb ){ 1885 pager_reset(pPager); 1886 } 1887 } 1888 return SQLITE_OK; 1889 } 1890 1891 /* 1892 ** Create a journal file for pPager. There should already be a RESERVED 1893 ** or EXCLUSIVE lock on the database file when this routine is called. 1894 ** 1895 ** Return SQLITE_OK if everything. Return an error code and release the 1896 ** write lock if anything goes wrong. 1897 */ 1898 static int pager_open_journal(Pager *pPager){ 1899 int rc; 1900 assert( pPager->state>=PAGER_RESERVED ); 1901 assert( pPager->journalOpen==0 ); 1902 assert( pPager->useJournal ); 1903 sqlite3pager_pagecount(pPager); 1904 pPager->aInJournal = sqliteMalloc( pPager->dbSize/8 + 1 ); 1905 if( pPager->aInJournal==0 ){ 1906 sqlite3OsUnlock(&pPager->fd, SHARED_LOCK); 1907 pPager->state = PAGER_SHARED; 1908 return SQLITE_NOMEM; 1909 } 1910 rc = sqlite3OsOpenExclusive(pPager->zJournal, &pPager->jfd,pPager->tempFile); 1911 if( rc!=SQLITE_OK ){ 1912 sqliteFree(pPager->aInJournal); 1913 pPager->aInJournal = 0; 1914 sqlite3OsUnlock(&pPager->fd, SHARED_LOCK); 1915 pPager->state = PAGER_SHARED; 1916 return SQLITE_CANTOPEN; 1917 } 1918 sqlite3OsOpenDirectory(pPager->zDirectory, &pPager->jfd); 1919 pPager->journalOpen = 1; 1920 pPager->journalStarted = 0; 1921 pPager->needSync = 0; 1922 pPager->alwaysRollback = 0; 1923 pPager->nRec = 0; 1924 if( pPager->errMask!=0 ){ 1925 rc = pager_errcode(pPager); 1926 return rc; 1927 } 1928 pPager->origDbSize = pPager->dbSize; 1929 1930 /* Create the header for the journal: 1931 ** - 8 bytes: Magic identifying journal format. 1932 ** - 4 bytes: Number of records in journal, or -1 no-sync mode is on. 1933 ** - 4 bytes: Magic used for page checksums. 1934 ** - 4 bytes: Initial database page count. 1935 ** - 4 bytes: Number of bytes reserved for master journal ptr (nMaster) 1936 ** - nMaster bytes: Space for a master journal pointer. 1937 */ 1938 rc = sqlite3OsWrite(&pPager->jfd, aJournalMagic, sizeof(aJournalMagic)); 1939 if( rc==SQLITE_OK ){ 1940 rc = write32bits(&pPager->jfd, pPager->noSync ? 0xffffffff : 0); 1941 } 1942 if( rc==SQLITE_OK ){ 1943 sqlite3Randomness(sizeof(pPager->cksumInit), &pPager->cksumInit); 1944 rc = write32bits(&pPager->jfd, pPager->cksumInit); 1945 } 1946 if( rc==SQLITE_OK ){ 1947 rc = write32bits(&pPager->jfd, pPager->dbSize); 1948 } 1949 if( rc==SQLITE_OK ){ 1950 rc = write32bits(&pPager->jfd, pPager->nMaster); 1951 } 1952 if( rc==SQLITE_OK ){ 1953 sqlite3OsSeek(&pPager->jfd, 24 + pPager->nMaster - 1); 1954 rc = sqlite3OsWrite(&pPager->jfd, "\000", 1); 1955 } 1956 if( pPager->stmtAutoopen && rc==SQLITE_OK ){ 1957 rc = sqlite3pager_stmt_begin(pPager); 1958 } 1959 if( rc!=SQLITE_OK ){ 1960 rc = pager_unwritelock(pPager); 1961 if( rc==SQLITE_OK ){ 1962 rc = SQLITE_FULL; 1963 } 1964 } 1965 return rc; 1966 } 1967 1968 /* 1969 ** Acquire a write-lock on the database. The lock is removed when 1970 ** the any of the following happen: 1971 ** 1972 ** * sqlite3pager_commit() is called. 1973 ** * sqlite3pager_rollback() is called. 1974 ** * sqlite3pager_close() is called. 1975 ** * sqlite3pager_unref() is called to on every outstanding page. 1976 ** 1977 ** The first parameter to this routine is a pointer to any open page of the 1978 ** database file. Nothing changes about the page - it is used merely to 1979 ** acquire a pointer to the Pager structure and as proof that there is 1980 ** already a read-lock on the database. 1981 ** 1982 ** The second parameter indicates how much space in bytes to reserve for a 1983 ** master journal file-name at the start of the journal when it is created. 1984 ** 1985 ** A journal file is opened if this is not a temporary file. For temporary 1986 ** files, the opening of the journal file is deferred until there is an 1987 ** actual need to write to the journal. 1988 ** 1989 ** If the database is already reserved for writing, this routine is a no-op. 1990 */ 1991 int sqlite3pager_begin(void *pData, int nMaster){ 1992 PgHdr *pPg = DATA_TO_PGHDR(pData); 1993 Pager *pPager = pPg->pPager; 1994 int rc = SQLITE_OK; 1995 assert( pPg->nRef>0 ); 1996 assert( nMaster>=0 ); 1997 assert( pPager->state!=PAGER_UNLOCK ); 1998 if( pPager->state==PAGER_SHARED ){ 1999 assert( pPager->aInJournal==0 ); 2000 if( pPager->memDb ){ 2001 pPager->state = PAGER_EXCLUSIVE; 2002 pPager->origDbSize = pPager->dbSize; 2003 }else{ 2004 int busy = 1; 2005 do { 2006 rc = sqlite3OsLock(&pPager->fd, RESERVED_LOCK); 2007 }while( rc==SQLITE_BUSY && 2008 pPager->pBusyHandler && 2009 pPager->pBusyHandler->xFunc && 2010 pPager->pBusyHandler->xFunc(pPager->pBusyHandler->pArg, "", busy++) 2011 ); 2012 if( rc!=SQLITE_OK ){ 2013 return rc; 2014 } 2015 pPager->nMaster = nMaster; 2016 pPager->state = PAGER_RESERVED; 2017 pPager->dirtyCache = 0; 2018 TRACE3("TRANSACTION %d nMaster=%d\n", pPager->fd.h, nMaster); 2019 if( pPager->useJournal && !pPager->tempFile ){ 2020 rc = pager_open_journal(pPager); 2021 } 2022 } 2023 } 2024 return rc; 2025 } 2026 2027 /* 2028 ** Mark a data page as writeable. The page is written into the journal 2029 ** if it is not there already. This routine must be called before making 2030 ** changes to a page. 2031 ** 2032 ** The first time this routine is called, the pager creates a new 2033 ** journal and acquires a RESERVED lock on the database. If the RESERVED 2034 ** lock could not be acquired, this routine returns SQLITE_BUSY. The 2035 ** calling routine must check for that return value and be careful not to 2036 ** change any page data until this routine returns SQLITE_OK. 2037 ** 2038 ** If the journal file could not be written because the disk is full, 2039 ** then this routine returns SQLITE_FULL and does an immediate rollback. 2040 ** All subsequent write attempts also return SQLITE_FULL until there 2041 ** is a call to sqlite3pager_commit() or sqlite3pager_rollback() to 2042 ** reset. 2043 */ 2044 int sqlite3pager_write(void *pData){ 2045 PgHdr *pPg = DATA_TO_PGHDR(pData); 2046 Pager *pPager = pPg->pPager; 2047 int rc = SQLITE_OK; 2048 2049 /* Check for errors 2050 */ 2051 if( pPager->errMask ){ 2052 return pager_errcode(pPager); 2053 } 2054 if( pPager->readOnly ){ 2055 return SQLITE_PERM; 2056 } 2057 2058 /* Mark the page as dirty. If the page has already been written 2059 ** to the journal then we can return right away. 2060 */ 2061 pPg->dirty = 1; 2062 if( pPg->inJournal && (pPg->inStmt || pPager->stmtInUse==0) ){ 2063 pPager->dirtyCache = 1; 2064 return SQLITE_OK; 2065 } 2066 2067 /* If we get this far, it means that the page needs to be 2068 ** written to the transaction journal or the ckeckpoint journal 2069 ** or both. 2070 ** 2071 ** First check to see that the transaction journal exists and 2072 ** create it if it does not. 2073 */ 2074 assert( pPager->state!=PAGER_UNLOCK ); 2075 rc = sqlite3pager_begin(pData, 0); 2076 if( rc!=SQLITE_OK ){ 2077 return rc; 2078 } 2079 assert( pPager->state>=PAGER_RESERVED ); 2080 if( !pPager->journalOpen && pPager->useJournal ){ 2081 rc = pager_open_journal(pPager); 2082 if( rc!=SQLITE_OK ) return rc; 2083 } 2084 assert( pPager->journalOpen || !pPager->useJournal ); 2085 pPager->dirtyCache = 1; 2086 2087 /* The transaction journal now exists and we have a RESERVED or an 2088 ** EXCLUSIVE lock on the main database file. Write the current page to 2089 ** the transaction journal if it is not there already. 2090 */ 2091 if( !pPg->inJournal && (pPager->useJournal || pPager->memDb) ){ 2092 if( (int)pPg->pgno <= pPager->origDbSize ){ 2093 int szPg; 2094 u32 saved; 2095 if( pPager->memDb ){ 2096 PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager); 2097 TRACE2("JOURNAL page %d\n", pPg->pgno); 2098 assert( pHist->pOrig==0 ); 2099 pHist->pOrig = sqliteMallocRaw( pPager->pageSize ); 2100 if( pHist->pOrig ){ 2101 memcpy(pHist->pOrig, PGHDR_TO_DATA(pPg), pPager->pageSize); 2102 } 2103 pPg->inJournal = 1; 2104 }else{ 2105 u32 cksum = pager_cksum(pPager, pPg->pgno, pData); 2106 saved = *(u32*)PGHDR_TO_EXTRA(pPg); 2107 store32bits(cksum, pPg, SQLITE_PAGE_SIZE); 2108 szPg = SQLITE_PAGE_SIZE+8; 2109 store32bits(pPg->pgno, pPg, -4); 2110 CODEC(pPager, pData, pPg->pgno, 7); 2111 rc = sqlite3OsWrite(&pPager->jfd, &((char*)pData)[-4], szPg); 2112 TRACE3("JOURNAL page %d needSync=%d\n", pPg->pgno, pPg->needSync); 2113 CODEC(pPager, pData, pPg->pgno, 0); 2114 *(u32*)PGHDR_TO_EXTRA(pPg) = saved; 2115 if( rc!=SQLITE_OK ){ 2116 sqlite3pager_rollback(pPager); 2117 pPager->errMask |= PAGER_ERR_FULL; 2118 return rc; 2119 } 2120 pPager->nRec++; 2121 assert( pPager->aInJournal!=0 ); 2122 pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7); 2123 pPg->needSync = !pPager->noSync; 2124 pPg->inJournal = 1; 2125 if( pPager->stmtInUse ){ 2126 pPager->aInStmt[pPg->pgno/8] |= 1<<(pPg->pgno&7); 2127 page_add_to_stmt_list(pPg); 2128 } 2129 } 2130 }else{ 2131 pPg->needSync = !pPager->journalStarted && !pPager->noSync; 2132 TRACE3("APPEND page %d needSync=%d\n", pPg->pgno, pPg->needSync); 2133 } 2134 if( pPg->needSync ){ 2135 pPager->needSync = 1; 2136 } 2137 } 2138 2139 /* If the statement journal is open and the page is not in it, 2140 ** then write the current page to the statement journal. Note that 2141 ** the statement journal format differs from the standard journal format 2142 ** in that it omits the checksums and the header. 2143 */ 2144 if( pPager->stmtInUse && !pPg->inStmt && (int)pPg->pgno<=pPager->stmtSize ){ 2145 assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize ); 2146 if( pPager->memDb ){ 2147 PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager); 2148 assert( pHist->pStmt==0 ); 2149 pHist->pStmt = sqliteMallocRaw( pPager->pageSize ); 2150 if( pHist->pStmt ){ 2151 memcpy(pHist->pStmt, PGHDR_TO_DATA(pPg), pPager->pageSize); 2152 } 2153 TRACE2("STMT-JOURNAL page %d\n", pPg->pgno); 2154 }else{ 2155 store32bits(pPg->pgno, pPg, -4); 2156 CODEC(pPager, pData, pPg->pgno, 7); 2157 rc = sqlite3OsWrite(&pPager->stfd, ((char*)pData)-4, SQLITE_PAGE_SIZE+4); 2158 TRACE2("STMT-JOURNAL page %d\n", pPg->pgno); 2159 CODEC(pPager, pData, pPg->pgno, 0); 2160 if( rc!=SQLITE_OK ){ 2161 sqlite3pager_rollback(pPager); 2162 pPager->errMask |= PAGER_ERR_FULL; 2163 return rc; 2164 } 2165 pPager->stmtNRec++; 2166 assert( pPager->aInStmt!=0 ); 2167 pPager->aInStmt[pPg->pgno/8] |= 1<<(pPg->pgno&7); 2168 } 2169 page_add_to_stmt_list(pPg); 2170 } 2171 2172 /* Update the database size and return. 2173 */ 2174 if( pPager->dbSize<(int)pPg->pgno ){ 2175 pPager->dbSize = pPg->pgno; 2176 } 2177 return rc; 2178 } 2179 2180 /* 2181 ** Return TRUE if the page given in the argument was previously passed 2182 ** to sqlite3pager_write(). In other words, return TRUE if it is ok 2183 ** to change the content of the page. 2184 */ 2185 int sqlite3pager_iswriteable(void *pData){ 2186 PgHdr *pPg = DATA_TO_PGHDR(pData); 2187 return pPg->dirty; 2188 } 2189 2190 /* 2191 ** Replace the content of a single page with the information in the third 2192 ** argument. 2193 */ 2194 int sqlite3pager_overwrite(Pager *pPager, Pgno pgno, void *pData){ 2195 void *pPage; 2196 int rc; 2197 2198 rc = sqlite3pager_get(pPager, pgno, &pPage); 2199 if( rc==SQLITE_OK ){ 2200 rc = sqlite3pager_write(pPage); 2201 if( rc==SQLITE_OK ){ 2202 memcpy(pPage, pData, SQLITE_PAGE_SIZE); 2203 } 2204 sqlite3pager_unref(pPage); 2205 } 2206 return rc; 2207 } 2208 2209 /* 2210 ** A call to this routine tells the pager that it is not necessary to 2211 ** write the information on page "pgno" back to the disk, even though 2212 ** that page might be marked as dirty. 2213 ** 2214 ** The overlying software layer calls this routine when all of the data 2215 ** on the given page is unused. The pager marks the page as clean so 2216 ** that it does not get written to disk. 2217 ** 2218 ** Tests show that this optimization, together with the 2219 ** sqlite3pager_dont_rollback() below, more than double the speed 2220 ** of large INSERT operations and quadruple the speed of large DELETEs. 2221 ** 2222 ** When this routine is called, set the alwaysRollback flag to true. 2223 ** Subsequent calls to sqlite3pager_dont_rollback() for the same page 2224 ** will thereafter be ignored. This is necessary to avoid a problem 2225 ** where a page with data is added to the freelist during one part of 2226 ** a transaction then removed from the freelist during a later part 2227 ** of the same transaction and reused for some other purpose. When it 2228 ** is first added to the freelist, this routine is called. When reused, 2229 ** the dont_rollback() routine is called. But because the page contains 2230 ** critical data, we still need to be sure it gets rolled back in spite 2231 ** of the dont_rollback() call. 2232 */ 2233 void sqlite3pager_dont_write(Pager *pPager, Pgno pgno){ 2234 PgHdr *pPg; 2235 2236 pPg = pager_lookup(pPager, pgno); 2237 pPg->alwaysRollback = 1; 2238 if( pPg && pPg->dirty ){ 2239 if( pPager->dbSize==(int)pPg->pgno && pPager->origDbSize<pPager->dbSize ){ 2240 /* If this pages is the last page in the file and the file has grown 2241 ** during the current transaction, then do NOT mark the page as clean. 2242 ** When the database file grows, we must make sure that the last page 2243 ** gets written at least once so that the disk file will be the correct 2244 ** size. If you do not write this page and the size of the file 2245 ** on the disk ends up being too small, that can lead to database 2246 ** corruption during the next transaction. 2247 */ 2248 }else{ 2249 TRACE3("DONT_WRITE page %d of %d\n", pgno, pPager->fd.h); 2250 pPg->dirty = 0; 2251 } 2252 } 2253 } 2254 2255 /* 2256 ** A call to this routine tells the pager that if a rollback occurs, 2257 ** it is not necessary to restore the data on the given page. This 2258 ** means that the pager does not have to record the given page in the 2259 ** rollback journal. 2260 */ 2261 void sqlite3pager_dont_rollback(void *pData){ 2262 PgHdr *pPg = DATA_TO_PGHDR(pData); 2263 Pager *pPager = pPg->pPager; 2264 2265 if( pPager->state!=PAGER_EXCLUSIVE || pPager->journalOpen==0 ) return; 2266 if( pPg->alwaysRollback || pPager->alwaysRollback || pPager->memDb ) return; 2267 if( !pPg->inJournal && (int)pPg->pgno <= pPager->origDbSize ){ 2268 assert( pPager->aInJournal!=0 ); 2269 pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7); 2270 pPg->inJournal = 1; 2271 if( pPager->stmtInUse ){ 2272 pPager->aInStmt[pPg->pgno/8] |= 1<<(pPg->pgno&7); 2273 page_add_to_stmt_list(pPg); 2274 } 2275 TRACE3("DONT_ROLLBACK page %d of %d\n", pPg->pgno, pPager->fd.h); 2276 } 2277 if( pPager->stmtInUse && !pPg->inStmt && (int)pPg->pgno<=pPager->stmtSize ){ 2278 assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize ); 2279 assert( pPager->aInStmt!=0 ); 2280 pPager->aInStmt[pPg->pgno/8] |= 1<<(pPg->pgno&7); 2281 page_add_to_stmt_list(pPg); 2282 } 2283 } 2284 2285 2286 /* 2287 ** Clear a PgHistory block 2288 */ 2289 static void clearHistory(PgHistory *pHist){ 2290 sqliteFree(pHist->pOrig); 2291 sqliteFree(pHist->pStmt); 2292 pHist->pOrig = 0; 2293 pHist->pStmt = 0; 2294 } 2295 2296 /* 2297 ** Commit all changes to the database and release the write lock. 2298 ** 2299 ** If the commit fails for any reason, a rollback attempt is made 2300 ** and an error code is returned. If the commit worked, SQLITE_OK 2301 ** is returned. 2302 */ 2303 int sqlite3pager_commit(Pager *pPager){ 2304 int rc; 2305 PgHdr *pPg; 2306 2307 if( pPager->errMask==PAGER_ERR_FULL ){ 2308 rc = sqlite3pager_rollback(pPager); 2309 if( rc==SQLITE_OK ){ 2310 rc = SQLITE_FULL; 2311 } 2312 return rc; 2313 } 2314 if( pPager->errMask!=0 ){ 2315 rc = pager_errcode(pPager); 2316 return rc; 2317 } 2318 if( pPager->state<PAGER_RESERVED ){ 2319 return SQLITE_ERROR; 2320 } 2321 TRACE2("COMMIT %d\n", pPager->fd.h); 2322 if( pPager->memDb ){ 2323 pPg = pager_get_all_dirty_pages(pPager); 2324 while( pPg ){ 2325 clearHistory(PGHDR_TO_HIST(pPg, pPager)); 2326 pPg->dirty = 0; 2327 pPg->inJournal = 0; 2328 pPg->inStmt = 0; 2329 pPg->pPrevStmt = pPg->pNextStmt = 0; 2330 pPg = pPg->pDirty; 2331 } 2332 pPager->pStmt = 0; 2333 pPager->state = PAGER_SHARED; 2334 return SQLITE_OK; 2335 } 2336 if( pPager->dirtyCache==0 ){ 2337 /* Exit early (without doing the time-consuming sqlite3OsSync() calls) 2338 ** if there have been no changes to the database file. */ 2339 assert( pPager->needSync==0 ); 2340 rc = pager_unwritelock(pPager); 2341 pPager->dbSize = -1; 2342 return rc; 2343 } 2344 assert( pPager->journalOpen ); 2345 #if 0 2346 rc = syncJournal(pPager, 0); 2347 if( rc!=SQLITE_OK ){ 2348 goto commit_abort; 2349 } 2350 pPg = pager_get_all_dirty_pages(pPager); 2351 if( pPg ){ 2352 rc = pager_write_pagelist(pPg); 2353 if( rc || (!pPager->noSync && sqlite3OsSync(&pPager->fd)!=SQLITE_OK) ){ 2354 goto commit_abort; 2355 } 2356 } 2357 #endif 2358 rc = sqlite3pager_sync(pPager, 0); 2359 if( rc!=SQLITE_OK ){ 2360 goto commit_abort; 2361 } 2362 rc = pager_unwritelock(pPager); 2363 pPager->dbSize = -1; 2364 return rc; 2365 2366 /* Jump here if anything goes wrong during the commit process. 2367 */ 2368 commit_abort: 2369 rc = sqlite3pager_rollback(pPager); 2370 if( rc==SQLITE_OK ){ 2371 rc = SQLITE_FULL; 2372 } 2373 return rc; 2374 } 2375 2376 /* 2377 ** Rollback all changes. The database falls back to PAGER_SHARED mode. 2378 ** All in-memory cache pages revert to their original data contents. 2379 ** The journal is deleted. 2380 ** 2381 ** This routine cannot fail unless some other process is not following 2382 ** the correct locking protocol (SQLITE_PROTOCOL) or unless some other 2383 ** process is writing trash into the journal file (SQLITE_CORRUPT) or 2384 ** unless a prior malloc() failed (SQLITE_NOMEM). Appropriate error 2385 ** codes are returned for all these occasions. Otherwise, 2386 ** SQLITE_OK is returned. 2387 */ 2388 int sqlite3pager_rollback(Pager *pPager){ 2389 int rc; 2390 TRACE2("ROLLBACK %d\n", pPager->fd.h); 2391 if( pPager->memDb ){ 2392 PgHdr *p; 2393 for(p=pPager->pAll; p; p=p->pNextAll){ 2394 PgHistory *pHist; 2395 if( !p->dirty ) continue; 2396 pHist = PGHDR_TO_HIST(p, pPager); 2397 if( pHist->pOrig ){ 2398 memcpy(PGHDR_TO_DATA(p), pHist->pOrig, pPager->pageSize); 2399 TRACE2("ROLLBACK-PAGE %d\n", p->pgno); 2400 }else{ 2401 TRACE2("PAGE %d is clean\n", p->pgno); 2402 } 2403 clearHistory(pHist); 2404 p->dirty = 0; 2405 p->inJournal = 0; 2406 p->inStmt = 0; 2407 p->pPrevStmt = p->pNextStmt = 0; 2408 } 2409 pPager->pStmt = 0; 2410 pPager->dbSize = pPager->origDbSize; 2411 memoryTruncate(pPager); 2412 pPager->stmtInUse = 0; 2413 pPager->state = PAGER_SHARED; 2414 return SQLITE_OK; 2415 } 2416 2417 if( !pPager->dirtyCache || !pPager->journalOpen ){ 2418 rc = pager_unwritelock(pPager); 2419 pPager->dbSize = -1; 2420 return rc; 2421 } 2422 2423 if( pPager->errMask!=0 && pPager->errMask!=PAGER_ERR_FULL ){ 2424 if( pPager->state>=PAGER_EXCLUSIVE ){ 2425 pager_playback(pPager, 1); 2426 } 2427 return pager_errcode(pPager); 2428 } 2429 if( pPager->state==PAGER_RESERVED ){ 2430 int rc2; 2431 rc = pager_reload_cache(pPager); 2432 rc2 = pager_unwritelock(pPager); 2433 if( rc==SQLITE_OK ){ 2434 rc = rc2; 2435 } 2436 }else{ 2437 rc = pager_playback(pPager, 1); 2438 } 2439 if( rc!=SQLITE_OK ){ 2440 rc = SQLITE_CORRUPT; 2441 pPager->errMask |= PAGER_ERR_CORRUPT; 2442 } 2443 pPager->dbSize = -1; 2444 return rc; 2445 } 2446 2447 /* 2448 ** Return TRUE if the database file is opened read-only. Return FALSE 2449 ** if the database is (in theory) writable. 2450 */ 2451 int sqlite3pager_isreadonly(Pager *pPager){ 2452 return pPager->readOnly; 2453 } 2454 2455 /* 2456 ** This routine is used for testing and analysis only. 2457 */ 2458 int *sqlite3pager_stats(Pager *pPager){ 2459 static int a[9]; 2460 a[0] = pPager->nRef; 2461 a[1] = pPager->nPage; 2462 a[2] = pPager->mxPage; 2463 a[3] = pPager->dbSize; 2464 a[4] = pPager->state; 2465 a[5] = pPager->errMask; 2466 a[6] = pPager->nHit; 2467 a[7] = pPager->nMiss; 2468 a[8] = pPager->nOvfl; 2469 return a; 2470 } 2471 2472 /* 2473 ** Set the statement rollback point. 2474 ** 2475 ** This routine should be called with the transaction journal already 2476 ** open. A new statement journal is created that can be used to rollback 2477 ** changes of a single SQL command within a larger transaction. 2478 */ 2479 int sqlite3pager_stmt_begin(Pager *pPager){ 2480 int rc; 2481 char zTemp[SQLITE_TEMPNAME_SIZE]; 2482 assert( !pPager->stmtInUse ); 2483 TRACE2("STMT-BEGIN %d\n", pPager->fd.h); 2484 if( pPager->memDb ){ 2485 pPager->stmtInUse = 1; 2486 pPager->stmtSize = pPager->dbSize; 2487 return SQLITE_OK; 2488 } 2489 if( !pPager->journalOpen ){ 2490 pPager->stmtAutoopen = 1; 2491 return SQLITE_OK; 2492 } 2493 assert( pPager->journalOpen ); 2494 pPager->aInStmt = sqliteMalloc( pPager->dbSize/8 + 1 ); 2495 if( pPager->aInStmt==0 ){ 2496 sqlite3OsLock(&pPager->fd, SHARED_LOCK); 2497 return SQLITE_NOMEM; 2498 } 2499 #ifndef NDEBUG 2500 rc = sqlite3OsFileSize(&pPager->jfd, &pPager->stmtJSize); 2501 if( rc ) goto stmt_begin_failed; 2502 assert( pPager->stmtJSize == 2503 pPager->nRec*JOURNAL_PG_SZ(pPager) + JOURNAL_HDR_SZ(pPager) ); 2504 #endif 2505 pPager->stmtJSize = 2506 pPager->nRec*JOURNAL_PG_SZ(pPager) + JOURNAL_HDR_SZ(pPager); 2507 pPager->stmtSize = pPager->dbSize; 2508 if( !pPager->stmtOpen ){ 2509 rc = sqlite3pager_opentemp(zTemp, &pPager->stfd); 2510 if( rc ) goto stmt_begin_failed; 2511 pPager->stmtOpen = 1; 2512 pPager->stmtNRec = 0; 2513 } 2514 pPager->stmtInUse = 1; 2515 return SQLITE_OK; 2516 2517 stmt_begin_failed: 2518 if( pPager->aInStmt ){ 2519 sqliteFree(pPager->aInStmt); 2520 pPager->aInStmt = 0; 2521 } 2522 return rc; 2523 } 2524 2525 /* 2526 ** Commit a statement. 2527 */ 2528 int sqlite3pager_stmt_commit(Pager *pPager){ 2529 if( pPager->stmtInUse ){ 2530 PgHdr *pPg, *pNext; 2531 TRACE2("STMT-COMMIT %d\n", pPager->fd.h); 2532 if( !pPager->memDb ){ 2533 sqlite3OsSeek(&pPager->stfd, 0); 2534 /* sqlite3OsTruncate(&pPager->stfd, 0); */ 2535 sqliteFree( pPager->aInStmt ); 2536 pPager->aInStmt = 0; 2537 } 2538 for(pPg=pPager->pStmt; pPg; pPg=pNext){ 2539 pNext = pPg->pNextStmt; 2540 assert( pPg->inStmt ); 2541 pPg->inStmt = 0; 2542 pPg->pPrevStmt = pPg->pNextStmt = 0; 2543 if( pPager->memDb ){ 2544 PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager); 2545 sqliteFree(pHist->pStmt); 2546 pHist->pStmt = 0; 2547 } 2548 } 2549 pPager->stmtNRec = 0; 2550 pPager->stmtInUse = 0; 2551 pPager->pStmt = 0; 2552 } 2553 pPager->stmtAutoopen = 0; 2554 return SQLITE_OK; 2555 } 2556 2557 /* 2558 ** Rollback a statement. 2559 */ 2560 int sqlite3pager_stmt_rollback(Pager *pPager){ 2561 int rc; 2562 if( pPager->stmtInUse ){ 2563 TRACE2("STMT-ROLLBACK %d\n", pPager->fd.h); 2564 if( pPager->memDb ){ 2565 PgHdr *pPg; 2566 for(pPg=pPager->pStmt; pPg; pPg=pPg->pNextStmt){ 2567 PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager); 2568 if( pHist->pStmt ){ 2569 memcpy(PGHDR_TO_DATA(pPg), pHist->pStmt, pPager->pageSize); 2570 sqliteFree(pHist->pStmt); 2571 pHist->pStmt = 0; 2572 } 2573 } 2574 pPager->dbSize = pPager->stmtSize; 2575 memoryTruncate(pPager); 2576 rc = SQLITE_OK; 2577 }else{ 2578 rc = pager_stmt_playback(pPager); 2579 } 2580 sqlite3pager_stmt_commit(pPager); 2581 }else{ 2582 rc = SQLITE_OK; 2583 } 2584 pPager->stmtAutoopen = 0; 2585 return rc; 2586 } 2587 2588 /* 2589 ** Return the full pathname of the database file. 2590 */ 2591 const char *sqlite3pager_filename(Pager *pPager){ 2592 return pPager->zFilename; 2593 } 2594 2595 /* 2596 ** Set the codec for this pager 2597 */ 2598 void sqlite3pager_set_codec( 2599 Pager *pPager, 2600 void (*xCodec)(void*,void*,Pgno,int), 2601 void *pCodecArg 2602 ){ 2603 pPager->xCodec = xCodec; 2604 pPager->pCodecArg = pCodecArg; 2605 } 2606 2607 /* 2608 ** Sync the database file for the pager pPager. zMaster points to the name 2609 ** of a master journal file that should be written into the individual 2610 ** journal file. zMaster may be NULL, which is interpreted as no master 2611 ** journal (a single database transaction). 2612 ** 2613 ** This routine ensures that the journal is synced, all dirty pages written 2614 ** to the database file and the database file synced. The only thing that 2615 ** remains to commit the transaction is to delete the journal file (or 2616 ** master journal file if specified). 2617 ** 2618 ** Note that if zMaster==NULL, this does not overwrite a previous value 2619 ** passed to an sqlite3pager_sync() call. 2620 */ 2621 int sqlite3pager_sync(Pager *pPager, const char *zMaster){ 2622 int rc = SQLITE_OK; 2623 2624 /* If this is an in-memory db, or no pages have been written to, this 2625 ** function is a no-op. 2626 */ 2627 if( !pPager->memDb && pPager->dirtyCache ){ 2628 PgHdr *pPg; 2629 assert( pPager->journalOpen ); 2630 2631 /* Sync the journal file */ 2632 rc = syncJournal(pPager, zMaster); 2633 if( rc!=SQLITE_OK ) goto sync_exit; 2634 2635 /* Write all dirty pages to the database file */ 2636 pPg = pager_get_all_dirty_pages(pPager); 2637 rc = pager_write_pagelist(pPg); 2638 if( rc!=SQLITE_OK ) goto sync_exit; 2639 2640 /* If any pages were actually written, sync the database file */ 2641 if( pPg && !pPager->noSync ){ 2642 rc = sqlite3OsSync(&pPager->fd); 2643 } 2644 } 2645 2646 sync_exit: 2647 return rc; 2648 } 2649 2650 #ifdef SQLITE_DEBUG 2651 /* 2652 ** Return the current state of the file lock for the given pager. 2653 ** The return value is one of NO_LOCK, SHARED_LOCK, RESERVED_LOCK, 2654 ** PENDING_LOCK, or EXCLUSIVE_LOCK. 2655 */ 2656 int sqlite3pager_lockstate(Pager *pPager){ 2657 return pPager->fd.locktype; 2658 } 2659 #endif 2660 2661 #ifdef SQLITE_TEST 2662 /* 2663 ** Print a listing of all referenced pages and their ref count. 2664 */ 2665 void sqlite3pager_refdump(Pager *pPager){ 2666 PgHdr *pPg; 2667 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){ 2668 if( pPg->nRef<=0 ) continue; 2669 printf("PAGE %3d addr=0x%08x nRef=%d\n", 2670 pPg->pgno, (int)PGHDR_TO_DATA(pPg), pPg->nRef); 2671 } 2672 } 2673 #endif 2674