1ed7c855cSdrh /* 2b19a2bc6Sdrh ** 2001 September 15 3ed7c855cSdrh ** 4b19a2bc6Sdrh ** The author disclaims copyright to this source code. In place of 5b19a2bc6Sdrh ** a legal notice, here is a blessing: 6ed7c855cSdrh ** 7b19a2bc6Sdrh ** May you do good and not evil. 8b19a2bc6Sdrh ** May you find forgiveness for yourself and forgive others. 9b19a2bc6Sdrh ** May you share freely, never taking more than you give. 10ed7c855cSdrh ** 11ed7c855cSdrh ************************************************************************* 12b19a2bc6Sdrh ** This is the implementation of the page cache subsystem or "pager". 13ed7c855cSdrh ** 14b19a2bc6Sdrh ** The pager is used to access a database disk file. It implements 15b19a2bc6Sdrh ** atomic commit and rollback through the use of a journal file that 16b19a2bc6Sdrh ** is separate from the database file. The pager also implements file 17b19a2bc6Sdrh ** locking to prevent two processes from writing the same database 18b19a2bc6Sdrh ** file simultaneously, or one process from reading the database while 19b19a2bc6Sdrh ** another is writing. 20ed7c855cSdrh ** 21*104f1fefSdanielk1977 ** @(#) $Id: pager.c,v 1.550 2009/01/14 17:45:58 danielk1977 Exp $ 22ed7c855cSdrh */ 232e66f0b9Sdrh #ifndef SQLITE_OMIT_DISKIO 24d9b0257aSdrh #include "sqliteInt.h" 25ed7c855cSdrh 26ed7c855cSdrh /* 27db48ee02Sdrh ** Macros for troubleshooting. Normally turned off 28db48ee02Sdrh */ 29466be56bSdanielk1977 #if 0 30f2c31ad8Sdanielk1977 int sqlite3PagerTrace=1; /* True to enable tracing */ 31d3627afcSdrh #define sqlite3DebugPrintf printf 3230d53701Sdrh #define PAGERTRACE(X) if( sqlite3PagerTrace ){ sqlite3DebugPrintf X; } 33db48ee02Sdrh #else 3430d53701Sdrh #define PAGERTRACE(X) 35db48ee02Sdrh #endif 36db48ee02Sdrh 37599fcbaeSdanielk1977 /* 3830d53701Sdrh ** The following two macros are used within the PAGERTRACE() macros above 39d86959f5Sdrh ** to print out file-descriptors. 40599fcbaeSdanielk1977 ** 4185b623f2Sdrh ** PAGERID() takes a pointer to a Pager struct as its argument. The 4262079060Sdanielk1977 ** associated file-descriptor is returned. FILEHANDLEID() takes an sqlite3_file 4385b623f2Sdrh ** struct as its argument. 44599fcbaeSdanielk1977 */ 45c001c58aSdrh #define PAGERID(p) ((int)(p->fd)) 46c001c58aSdrh #define FILEHANDLEID(fd) ((int)fd) 47db48ee02Sdrh 48db48ee02Sdrh /* 49ed7c855cSdrh ** The page cache as a whole is always in one of the following 50ed7c855cSdrh ** states: 51ed7c855cSdrh ** 52a6abd041Sdrh ** PAGER_UNLOCK The page cache is not currently reading or 53ed7c855cSdrh ** writing the database file. There is no 54ed7c855cSdrh ** data held in memory. This is the initial 55ed7c855cSdrh ** state. 56ed7c855cSdrh ** 57a6abd041Sdrh ** PAGER_SHARED The page cache is reading the database. 58ed7c855cSdrh ** Writing is not permitted. There can be 59ed7c855cSdrh ** multiple readers accessing the same database 6069688d5fSdrh ** file at the same time. 61ed7c855cSdrh ** 62726de599Sdrh ** PAGER_RESERVED This process has reserved the database for writing 63726de599Sdrh ** but has not yet made any changes. Only one process 64726de599Sdrh ** at a time can reserve the database. The original 65726de599Sdrh ** database file has not been modified so other 66726de599Sdrh ** processes may still be reading the on-disk 67a6abd041Sdrh ** database file. 68a6abd041Sdrh ** 69a6abd041Sdrh ** PAGER_EXCLUSIVE The page cache is writing the database. 70ed7c855cSdrh ** Access is exclusive. No other processes or 71ed7c855cSdrh ** threads can be reading or writing while one 72ed7c855cSdrh ** process is writing. 73ed7c855cSdrh ** 74aa5ccdf5Sdanielk1977 ** PAGER_SYNCED The pager moves to this state from PAGER_EXCLUSIVE 75aa5ccdf5Sdanielk1977 ** after all dirty pages have been written to the 76aa5ccdf5Sdanielk1977 ** database file and the file has been synced to 77369339dbSdrh ** disk. All that remains to do is to remove or 78369339dbSdrh ** truncate the journal file and the transaction 79369339dbSdrh ** will be committed. 80aa5ccdf5Sdanielk1977 ** 81a6abd041Sdrh ** The page cache comes up in PAGER_UNLOCK. The first time a 823b8a05f6Sdanielk1977 ** sqlite3PagerGet() occurs, the state transitions to PAGER_SHARED. 83ed7c855cSdrh ** After all pages have been released using sqlite_page_unref(), 84a6abd041Sdrh ** the state transitions back to PAGER_UNLOCK. The first time 853b8a05f6Sdanielk1977 ** that sqlite3PagerWrite() is called, the state transitions to 86369339dbSdrh ** PAGER_RESERVED. (Note that sqlite3PagerWrite() can only be 87306dc213Sdrh ** called on an outstanding page which means that the pager must 88a6abd041Sdrh ** be in PAGER_SHARED before it transitions to PAGER_RESERVED.) 89369339dbSdrh ** PAGER_RESERVED means that there is an open rollback journal. 90369339dbSdrh ** The transition to PAGER_EXCLUSIVE occurs before any changes 91369339dbSdrh ** are made to the database file, though writes to the rollback 92369339dbSdrh ** journal occurs with just PAGER_RESERVED. After an sqlite3PagerRollback() 93369339dbSdrh ** or sqlite3PagerCommitPhaseTwo(), the state can go back to PAGER_SHARED, 94369339dbSdrh ** or it can stay at PAGER_EXCLUSIVE if we are in exclusive access mode. 95ed7c855cSdrh */ 96a6abd041Sdrh #define PAGER_UNLOCK 0 97684917c2Sdrh #define PAGER_SHARED 1 /* same as SHARED_LOCK */ 98684917c2Sdrh #define PAGER_RESERVED 2 /* same as RESERVED_LOCK */ 99684917c2Sdrh #define PAGER_EXCLUSIVE 4 /* same as EXCLUSIVE_LOCK */ 100684917c2Sdrh #define PAGER_SYNCED 5 101ed7c855cSdrh 102684917c2Sdrh /* 103887dc4c2Sdrh ** This macro rounds values up so that if the value is an address it 104887dc4c2Sdrh ** is guaranteed to be an address that is aligned to an 8-byte boundary. 105887dc4c2Sdrh */ 106887dc4c2Sdrh #define FORCE_ALIGNMENT(X) (((X)+7)&~7) 107887dc4c2Sdrh 1089eb9e26bSdrh /* 1099eb9e26bSdrh ** A macro used for invoking the codec if there is one 1109eb9e26bSdrh */ 1119eb9e26bSdrh #ifdef SQLITE_HAS_CODEC 112c001c58aSdrh # define CODEC1(P,D,N,X) if( P->xCodec!=0 ){ P->xCodec(P->pCodecArg,D,N,X); } 113c001c58aSdrh # define CODEC2(P,D,N,X) ((char*)(P->xCodec!=0?P->xCodec(P->pCodecArg,D,N,X):D)) 1149eb9e26bSdrh #else 115c001c58aSdrh # define CODEC1(P,D,N,X) /* NO-OP */ 116c001c58aSdrh # define CODEC2(P,D,N,X) ((char*)D) 1179eb9e26bSdrh #endif 1189eb9e26bSdrh 119ed7c855cSdrh /* 1207cbd589dSdanielk1977 ** The maximum allowed sector size. 16MB. If the xSectorsize() method 1217cbd589dSdanielk1977 ** returns a value larger than this, then MAX_SECTOR_SIZE is used instead. 1227cbd589dSdanielk1977 ** This could conceivably cause corruption following a power failure on 1237cbd589dSdanielk1977 ** such a system. This is currently an undocumented limit. 1247cbd589dSdanielk1977 */ 1257cbd589dSdanielk1977 #define MAX_SECTOR_SIZE 0x0100000 1267cbd589dSdanielk1977 1277cbd589dSdanielk1977 /* 128fd7f0452Sdanielk1977 ** An instance of the following structure is allocated for each active 129fd7f0452Sdanielk1977 ** savepoint and statement transaction in the system. All such structures 130fd7f0452Sdanielk1977 ** are stored in the Pager.aSavepoint[] array, which is allocated and 131fd7f0452Sdanielk1977 ** resized using sqlite3Realloc(). 132fd7f0452Sdanielk1977 ** 133fd7f0452Sdanielk1977 ** When a savepoint is created, the PagerSavepoint.iHdrOffset field is 134fd7f0452Sdanielk1977 ** set to 0. If a journal-header is written into the main journal while 135fd7f0452Sdanielk1977 ** the savepoint is active, then iHdrOffset is set to the byte offset 136fd7f0452Sdanielk1977 ** immediately following the last journal record written into the main 137fd7f0452Sdanielk1977 ** journal before the journal-header. This is required during savepoint 138fd7f0452Sdanielk1977 ** rollback (see pagerPlaybackSavepoint()). 139fd7f0452Sdanielk1977 */ 140fd7f0452Sdanielk1977 typedef struct PagerSavepoint PagerSavepoint; 141fd7f0452Sdanielk1977 struct PagerSavepoint { 142fd7f0452Sdanielk1977 i64 iOffset; /* Starting offset in main journal */ 143fd7f0452Sdanielk1977 i64 iHdrOffset; /* See above */ 144fd7f0452Sdanielk1977 Bitvec *pInSavepoint; /* Set of pages in this savepoint */ 145fd7f0452Sdanielk1977 Pgno nOrig; /* Original number of pages in file */ 146fd7f0452Sdanielk1977 Pgno iSubRec; /* Index of first record in sub-journal */ 147fd7f0452Sdanielk1977 }; 148fd7f0452Sdanielk1977 149fd7f0452Sdanielk1977 /* 150ed7c855cSdrh ** A open page cache is an instance of the following structure. 151efaaf579Sdanielk1977 ** 1524f0ee686Sdrh ** Pager.errCode may be set to SQLITE_IOERR, SQLITE_CORRUPT, or 153efaaf579Sdanielk1977 ** or SQLITE_FULL. Once one of the first three errors occurs, it persists 154efaaf579Sdanielk1977 ** and is returned as the result of every major pager API call. The 155efaaf579Sdanielk1977 ** SQLITE_FULL return code is slightly different. It persists only until the 156efaaf579Sdanielk1977 ** next successful rollback is performed on the pager cache. Also, 1573b8a05f6Sdanielk1977 ** SQLITE_FULL does not affect the sqlite3PagerGet() and sqlite3PagerLookup() 158efaaf579Sdanielk1977 ** APIs, they may still be used successfully. 1593460d19cSdanielk1977 ** 1603460d19cSdanielk1977 ** Managing the size of the database file in pages is a little complicated. 1613460d19cSdanielk1977 ** The variable Pager.dbSize contains the number of pages that the database 1623460d19cSdanielk1977 ** image currently contains. As the database image grows or shrinks this 1633460d19cSdanielk1977 ** variable is updated. The variable Pager.dbFileSize contains the number 1643460d19cSdanielk1977 ** of pages in the database file. This may be different from Pager.dbSize 1653460d19cSdanielk1977 ** if some pages have been appended to the database image but not yet written 1663460d19cSdanielk1977 ** out from the cache to the actual file on disk. Or if the image has been 1673460d19cSdanielk1977 ** truncated by an incremental-vacuum operation. The Pager.dbOrigSize variable 1683460d19cSdanielk1977 ** contains the number of pages in the database image when the current 1693460d19cSdanielk1977 ** transaction was opened. The contents of all three of these variables is 1703460d19cSdanielk1977 ** only guaranteed to be correct if the boolean Pager.dbSizeValid is true. 171ed7c855cSdrh */ 172ed7c855cSdrh struct Pager { 173b4b47411Sdanielk1977 sqlite3_vfs *pVfs; /* OS functions to use for IO */ 174603240cfSdrh u8 journalOpen; /* True if journal file descriptors is valid */ 17534e79ceeSdrh u8 journalStarted; /* True if header of journal is synced */ 17634e79ceeSdrh u8 useJournal; /* Use a rollback journal on this file */ 1777bec505eSdrh u8 noReadlock; /* Do not bother to obtain readlocks */ 178603240cfSdrh u8 noSync; /* Do not sync the journal if true */ 179968af52aSdrh u8 fullSync; /* Do extra syncs of the journal for robustness */ 180f036aef0Sdanielk1977 u8 sync_flags; /* One of SYNC_NORMAL or SYNC_FULL */ 181a6abd041Sdrh u8 state; /* PAGER_UNLOCK, _SHARED, _RESERVED, etc. */ 182603240cfSdrh u8 tempFile; /* zFilename is a temporary file */ 183603240cfSdrh u8 readOnly; /* True for a read-only database */ 184603240cfSdrh u8 needSync; /* True if an fsync() is needed on the journal */ 185a6abd041Sdrh u8 dirtyCache; /* True if cached pages have changed */ 186ac69b05eSdrh u8 memDb; /* True to inhibit all file I/O */ 1876d156e46Sdrh u8 setMaster; /* True if a m-j name has been written to jrnl */ 18880e35f46Sdrh u8 doNotSync; /* Boolean. While true, do not spill the cache */ 18980e35f46Sdrh u8 exclusiveMode; /* Boolean. True if locking_mode==EXCLUSIVE */ 190fdc40e91Sdrh u8 journalMode; /* On of the PAGER_JOURNALMODE_* values */ 191d138c016Sdrh u8 dbModified; /* True if there are any changes to the Db */ 19280e35f46Sdrh u8 changeCountDone; /* Set after incrementing the change-counter */ 193d92db531Sdanielk1977 u8 dbSizeValid; /* Set when dbSize is correct */ 1943460d19cSdanielk1977 Pgno dbSize; /* Number of pages in the database */ 1953460d19cSdanielk1977 Pgno dbOrigSize; /* dbSize before the current transaction */ 1963460d19cSdanielk1977 Pgno dbFileSize; /* Number of pages in the database file */ 19733f4e02aSdrh u32 vfsFlags; /* Flags for sqlite3_vfs.xOpen() */ 198e49f9827Sdrh int errCode; /* One of several kinds of errors */ 199fcd35c7bSdrh int nRec; /* Number of pages written to the journal */ 200fcd35c7bSdrh u32 cksumInit; /* Quasi-random value added to every checksum */ 201fcd35c7bSdrh int stmtNRec; /* Number of records in stmt subjournal */ 202fcd35c7bSdrh int nExtra; /* Add this many bytes to each in-memory page */ 203fcd35c7bSdrh int pageSize; /* Number of bytes in a page */ 204fcd35c7bSdrh int nPage; /* Total number of in-memory pages */ 205fcd35c7bSdrh int mxPage; /* Maximum number of pages to hold in cache */ 206f8e632b6Sdrh Pgno mxPgno; /* Maximum allowed size of the database */ 207f5e7bb51Sdrh Bitvec *pInJournal; /* One bit for each page in the database file */ 208a1fa00d9Sdanielk1977 Bitvec *pAlwaysRollback; /* One bit for each page marked always-rollback */ 209fcd35c7bSdrh char *zFilename; /* Name of the database file */ 210fcd35c7bSdrh char *zJournal; /* Name of the journal file */ 211fcd35c7bSdrh char *zDirectory; /* Directory hold database and journal files */ 21262079060Sdanielk1977 sqlite3_file *fd, *jfd; /* File descriptors for database and journal */ 213fd7f0452Sdanielk1977 sqlite3_file *sjfd; /* File descriptor for the sub-journal*/ 2141ceedd37Sdanielk1977 int (*xBusyHandler)(void*); /* Function to call when busy */ 2151ceedd37Sdanielk1977 void *pBusyHandlerArg; /* Context argument for xBusyHandler */ 216eb206256Sdrh i64 journalOff; /* Current byte offset in the journal file */ 217eb206256Sdrh i64 journalHdr; /* Byte offset to previous journal header */ 21898c58356Sdrh u32 sectorSize; /* Assumed sector size during rollback */ 219fcd35c7bSdrh #ifdef SQLITE_TEST 2207c4ac0c5Sdrh int nHit, nMiss; /* Cache hits and missing */ 2216d156e46Sdrh int nRead, nWrite; /* Database pages read/written */ 222fcd35c7bSdrh #endif 223eaa06f69Sdanielk1977 void (*xReiniter)(DbPage*); /* Call this routine when reloading pages */ 2247c4ac0c5Sdrh #ifdef SQLITE_HAS_CODEC 225c001c58aSdrh void *(*xCodec)(void*,void*,Pgno,int); /* Routine for en/decoding data */ 2266d156e46Sdrh void *pCodecArg; /* First argument to xCodec() */ 2277c4ac0c5Sdrh #endif 2288186df86Sdanielk1977 char *pTmpSpace; /* Pager.pageSize bytes of space for tmp use */ 22986a88114Sdrh char dbFileVers[16]; /* Changes whenever database file changes */ 230b53e4960Sdanielk1977 i64 journalSizeLimit; /* Size limit for persistent journal files */ 2318c0a791aSdanielk1977 PCache *pPCache; /* Pointer to page cache object */ 2329f0bbf9cSdrh PagerSavepoint *aSavepoint; /* Array of active savepoints */ 2339f0bbf9cSdrh int nSavepoint; /* Number of elements in aSavepoint[] */ 234d9b0257aSdrh }; 235d9b0257aSdrh 236d9b0257aSdrh /* 237538f570cSdrh ** The following global variables hold counters used for 238538f570cSdrh ** testing purposes only. These variables do not exist in 239538f570cSdrh ** a non-testing build. These variables are not thread-safe. 240fcd35c7bSdrh */ 241fcd35c7bSdrh #ifdef SQLITE_TEST 242538f570cSdrh int sqlite3_pager_readdb_count = 0; /* Number of full pages read from DB */ 243538f570cSdrh int sqlite3_pager_writedb_count = 0; /* Number of full pages written to DB */ 244538f570cSdrh int sqlite3_pager_writej_count = 0; /* Number of pages written to journal */ 245538f570cSdrh # define PAGER_INCR(v) v++ 246fcd35c7bSdrh #else 247538f570cSdrh # define PAGER_INCR(v) 248fcd35c7bSdrh #endif 249fcd35c7bSdrh 250538f570cSdrh 251538f570cSdrh 252fcd35c7bSdrh /* 2535e00f6c7Sdrh ** Journal files begin with the following magic string. The data 2545e00f6c7Sdrh ** was obtained from /dev/random. It is used only as a sanity check. 25594f3331aSdrh ** 256ae2b40c4Sdrh ** Since version 2.8.0, the journal format contains additional sanity 25730d53701Sdrh ** checking information. If the power fails while the journal is being 258ae2b40c4Sdrh ** written, semi-random garbage data might appear in the journal 259ae2b40c4Sdrh ** file after power is restored. If an attempt is then made 260968af52aSdrh ** to roll the journal back, the database could be corrupted. The additional 261968af52aSdrh ** sanity checking data is an attempt to discover the garbage in the 262968af52aSdrh ** journal and ignore it. 263968af52aSdrh ** 264ae2b40c4Sdrh ** The sanity checking information for the new journal format consists 265968af52aSdrh ** of a 32-bit checksum on each page of data. The checksum covers both 26690f5ecb3Sdrh ** the page number and the pPager->pageSize bytes of data for the page. 267968af52aSdrh ** This cksum is initialized to a 32-bit random value that appears in the 268968af52aSdrh ** journal file right after the header. The random initializer is important, 269968af52aSdrh ** because garbage data that appears at the end of a journal is likely 270968af52aSdrh ** data that was once in other files that have now been deleted. If the 271968af52aSdrh ** garbage data came from an obsolete journal file, the checksums might 272968af52aSdrh ** be correct. But by initializing the checksum to random value which 273968af52aSdrh ** is different for every journal, we minimize that risk. 274d9b0257aSdrh */ 275ae2b40c4Sdrh static const unsigned char aJournalMagic[] = { 276ae2b40c4Sdrh 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd7, 277ed7c855cSdrh }; 278ed7c855cSdrh 279ed7c855cSdrh /* 280726de599Sdrh ** The size of the header and of each page in the journal is determined 281726de599Sdrh ** by the following macros. 282968af52aSdrh */ 283ae2b40c4Sdrh #define JOURNAL_PG_SZ(pPager) ((pPager->pageSize) + 8) 284968af52aSdrh 2857657240aSdanielk1977 /* 2867657240aSdanielk1977 ** The journal header size for this pager. In the future, this could be 2877657240aSdanielk1977 ** set to some value read from the disk controller. The important 2887657240aSdanielk1977 ** characteristic is that it is the same size as a disk sector. 2897657240aSdanielk1977 */ 2907657240aSdanielk1977 #define JOURNAL_HDR_SZ(pPager) (pPager->sectorSize) 2917657240aSdanielk1977 292b7f9164eSdrh /* 293b7f9164eSdrh ** The macro MEMDB is true if we are dealing with an in-memory database. 294b7f9164eSdrh ** We do this as a macro so that if the SQLITE_OMIT_MEMORYDB macro is set, 295b7f9164eSdrh ** the value of MEMDB will be a constant and the compiler will optimize 296b7f9164eSdrh ** out code that would never execute. 297b7f9164eSdrh */ 298b7f9164eSdrh #ifdef SQLITE_OMIT_MEMORYDB 299b7f9164eSdrh # define MEMDB 0 300b7f9164eSdrh #else 301b7f9164eSdrh # define MEMDB pPager->memDb 302b7f9164eSdrh #endif 303b7f9164eSdrh 304b7f9164eSdrh /* 3057657240aSdanielk1977 ** Page number PAGER_MJ_PGNO is never used in an SQLite database (it is 3067657240aSdanielk1977 ** reserved for working around a windows/posix incompatibility). It is 3077657240aSdanielk1977 ** used in the journal to signify that the remainder of the journal file 3087657240aSdanielk1977 ** is devoted to storing a master journal name - there are no more pages to 3097657240aSdanielk1977 ** roll back. See comments for function writeMasterJournal() for details. 3107657240aSdanielk1977 */ 311599fcbaeSdanielk1977 /* #define PAGER_MJ_PGNO(x) (PENDING_BYTE/((x)->pageSize)) */ 312d92db531Sdanielk1977 #define PAGER_MJ_PGNO(x) ((Pgno)((PENDING_BYTE/((x)->pageSize))+1)) 31313adf8a0Sdanielk1977 314968af52aSdrh /* 31526836654Sdanielk1977 ** The maximum legal page number is (2^31 - 1). 31626836654Sdanielk1977 */ 31726836654Sdanielk1977 #define PAGER_MAX_PGNO 2147483647 31826836654Sdanielk1977 31926836654Sdanielk1977 /* 3203460d19cSdanielk1977 ** Return true if it is necessary to write page *pPg into the sub-journal. 3213460d19cSdanielk1977 ** A page needs to be written into the sub-journal if there exists one 3223460d19cSdanielk1977 ** or more open savepoints for which: 323fd7f0452Sdanielk1977 ** 3243460d19cSdanielk1977 ** * The page-number is less than or equal to PagerSavepoint.nOrig, and 3253460d19cSdanielk1977 ** * The bit corresponding to the page-number is not set in 3263460d19cSdanielk1977 ** PagerSavepoint.pInSavepoint. 327f35843b5Sdanielk1977 */ 3283460d19cSdanielk1977 static int subjRequiresPage(PgHdr *pPg){ 3293460d19cSdanielk1977 Pgno pgno = pPg->pgno; 330f35843b5Sdanielk1977 Pager *pPager = pPg->pPager; 3313460d19cSdanielk1977 int i; 3323460d19cSdanielk1977 for(i=0; i<pPager->nSavepoint; i++){ 3333460d19cSdanielk1977 PagerSavepoint *p = &pPager->aSavepoint[i]; 3343460d19cSdanielk1977 if( p->nOrig>=pgno && 0==sqlite3BitvecTest(p->pInSavepoint, pgno) ){ 335fd7f0452Sdanielk1977 return 1; 336fd7f0452Sdanielk1977 } 3373460d19cSdanielk1977 } 3383460d19cSdanielk1977 return 0; 339f35843b5Sdanielk1977 } 3408ca0c724Sdrh 3413460d19cSdanielk1977 /* 3423460d19cSdanielk1977 ** Return true if the page is already in the journal file. 3433460d19cSdanielk1977 */ 344bc2ca9ebSdanielk1977 static int pageInJournal(PgHdr *pPg){ 345bc2ca9ebSdanielk1977 return sqlite3BitvecTest(pPg->pPager->pInJournal, pPg->pgno); 346bc2ca9ebSdanielk1977 } 347bc2ca9ebSdanielk1977 3488ca0c724Sdrh /* 34934e79ceeSdrh ** Read a 32-bit integer from the given file descriptor. Store the integer 35034e79ceeSdrh ** that is read in *pRes. Return SQLITE_OK if everything worked, or an 35134e79ceeSdrh ** error code is something goes wrong. 352726de599Sdrh ** 353726de599Sdrh ** All values are stored on disk as big-endian. 35494f3331aSdrh */ 35562079060Sdanielk1977 static int read32bits(sqlite3_file *fd, i64 offset, u32 *pRes){ 35694f3331aSdrh unsigned char ac[4]; 35762079060Sdanielk1977 int rc = sqlite3OsRead(fd, ac, sizeof(ac), offset); 3583b59a5ccSdrh if( rc==SQLITE_OK ){ 359a3152895Sdrh *pRes = sqlite3Get4byte(ac); 36094f3331aSdrh } 36194f3331aSdrh return rc; 36294f3331aSdrh } 36394f3331aSdrh 36494f3331aSdrh /* 36597b57484Sdrh ** Write a 32-bit integer into a string buffer in big-endian byte order. 36697b57484Sdrh */ 367a3152895Sdrh #define put32bits(A,B) sqlite3Put4byte((u8*)A,B) 36897b57484Sdrh 36997b57484Sdrh /* 37034e79ceeSdrh ** Write a 32-bit integer into the given file descriptor. Return SQLITE_OK 37134e79ceeSdrh ** on success or an error code is something goes wrong. 37294f3331aSdrh */ 37362079060Sdanielk1977 static int write32bits(sqlite3_file *fd, i64 offset, u32 val){ 374bab45c64Sdanielk1977 char ac[4]; 37597b57484Sdrh put32bits(ac, val); 37662079060Sdanielk1977 return sqlite3OsWrite(fd, ac, 4, offset); 37794f3331aSdrh } 37894f3331aSdrh 3792554f8b0Sdrh /* 3807a2b1eebSdanielk1977 ** If file pFd is open, call sqlite3OsUnlock() on it. 3817a2b1eebSdanielk1977 */ 3827a2b1eebSdanielk1977 static int osUnlock(sqlite3_file *pFd, int eLock){ 3837a2b1eebSdanielk1977 if( !pFd->pMethods ){ 3847a2b1eebSdanielk1977 return SQLITE_OK; 3857a2b1eebSdanielk1977 } 3867a2b1eebSdanielk1977 return sqlite3OsUnlock(pFd, eLock); 3877a2b1eebSdanielk1977 } 3887a2b1eebSdanielk1977 3897a2b1eebSdanielk1977 /* 390c7b6017cSdanielk1977 ** This function determines whether or not the atomic-write optimization 391c7b6017cSdanielk1977 ** can be used with this pager. The optimization can be used if: 392c7b6017cSdanielk1977 ** 393c7b6017cSdanielk1977 ** (a) the value returned by OsDeviceCharacteristics() indicates that 394c7b6017cSdanielk1977 ** a database page may be written atomically, and 395c7b6017cSdanielk1977 ** (b) the value returned by OsSectorSize() is less than or equal 396c7b6017cSdanielk1977 ** to the page size. 397c7b6017cSdanielk1977 ** 398c7b6017cSdanielk1977 ** If the optimization cannot be used, 0 is returned. If it can be used, 399c7b6017cSdanielk1977 ** then the value returned is the size of the journal file when it 400c7b6017cSdanielk1977 ** contains rollback data for exactly one page. 401c7b6017cSdanielk1977 */ 402c7b6017cSdanielk1977 #ifdef SQLITE_ENABLE_ATOMIC_WRITE 403c7b6017cSdanielk1977 static int jrnlBufferSize(Pager *pPager){ 404c7b6017cSdanielk1977 int dc; /* Device characteristics */ 405c7b6017cSdanielk1977 int nSector; /* Sector size */ 406facf0307Sdrh int szPage; /* Page size */ 407c7b6017cSdanielk1977 sqlite3_file *fd = pPager->fd; 408c7b6017cSdanielk1977 409c7b6017cSdanielk1977 if( fd->pMethods ){ 410c7b6017cSdanielk1977 dc = sqlite3OsDeviceCharacteristics(fd); 4117cbd589dSdanielk1977 nSector = pPager->sectorSize; 412facf0307Sdrh szPage = pPager->pageSize; 413c7b6017cSdanielk1977 } 414c7b6017cSdanielk1977 415c7b6017cSdanielk1977 assert(SQLITE_IOCAP_ATOMIC512==(512>>8)); 416c7b6017cSdanielk1977 assert(SQLITE_IOCAP_ATOMIC64K==(65536>>8)); 417c7b6017cSdanielk1977 418facf0307Sdrh if( !fd->pMethods || 419facf0307Sdrh (dc & (SQLITE_IOCAP_ATOMIC|(szPage>>8)) && nSector<=szPage) ){ 420c7b6017cSdanielk1977 return JOURNAL_HDR_SZ(pPager) + JOURNAL_PG_SZ(pPager); 421c7b6017cSdanielk1977 } 422c7b6017cSdanielk1977 return 0; 423c7b6017cSdanielk1977 } 424c7b6017cSdanielk1977 #endif 425c7b6017cSdanielk1977 426c7b6017cSdanielk1977 /* 427aef0bf64Sdanielk1977 ** This function should be called when an error occurs within the pager 428a96a7103Sdanielk1977 ** code. The first argument is a pointer to the pager structure, the 429a96a7103Sdanielk1977 ** second the error-code about to be returned by a pager API function. 430a96a7103Sdanielk1977 ** The value returned is a copy of the second argument to this function. 431a96a7103Sdanielk1977 ** 4324f0ee686Sdrh ** If the second argument is SQLITE_IOERR, SQLITE_CORRUPT, or SQLITE_FULL 433ae72d982Sdanielk1977 ** the error becomes persistent. Until the persisten error is cleared, 434ae72d982Sdanielk1977 ** subsequent API calls on this Pager will immediately return the same 435ae72d982Sdanielk1977 ** error code. 436ae72d982Sdanielk1977 ** 437ae72d982Sdanielk1977 ** A persistent error indicates that the contents of the pager-cache 438ae72d982Sdanielk1977 ** cannot be trusted. This state can be cleared by completely discarding 439ae72d982Sdanielk1977 ** the contents of the pager-cache. If a transaction was active when 440ae72d982Sdanielk1977 ** the persistent error occured, then the rollback journal may need 441ae72d982Sdanielk1977 ** to be replayed. 442aef0bf64Sdanielk1977 */ 443ae72d982Sdanielk1977 static void pager_unlock(Pager *pPager); 444aef0bf64Sdanielk1977 static int pager_error(Pager *pPager, int rc){ 4454ac285a1Sdrh int rc2 = rc & 0xff; 44634f5621fSdrh assert( 44734f5621fSdrh pPager->errCode==SQLITE_FULL || 44834f5621fSdrh pPager->errCode==SQLITE_OK || 44934f5621fSdrh (pPager->errCode & 0xff)==SQLITE_IOERR 45034f5621fSdrh ); 451efaaf579Sdanielk1977 if( 4524ac285a1Sdrh rc2==SQLITE_FULL || 4534ac285a1Sdrh rc2==SQLITE_IOERR || 4544f0ee686Sdrh rc2==SQLITE_CORRUPT 455efaaf579Sdanielk1977 ){ 456efaaf579Sdanielk1977 pPager->errCode = rc; 4578c0a791aSdanielk1977 if( pPager->state==PAGER_UNLOCK 4588c0a791aSdanielk1977 && sqlite3PcacheRefCount(pPager->pPCache)==0 4598c0a791aSdanielk1977 ){ 460ae72d982Sdanielk1977 /* If the pager is already unlocked, call pager_unlock() now to 461ae72d982Sdanielk1977 ** clear the error state and ensure that the pager-cache is 462ae72d982Sdanielk1977 ** completely empty. 463ae72d982Sdanielk1977 */ 464ae72d982Sdanielk1977 pager_unlock(pPager); 465ae72d982Sdanielk1977 } 466aef0bf64Sdanielk1977 } 467aef0bf64Sdanielk1977 return rc; 468aef0bf64Sdanielk1977 } 469aef0bf64Sdanielk1977 470477731b5Sdrh /* 471477731b5Sdrh ** If SQLITE_CHECK_PAGES is defined then we do some sanity checking 472477731b5Sdrh ** on the cache using a hash function. This is used for testing 473477731b5Sdrh ** and debugging only. 474477731b5Sdrh */ 4753c407374Sdanielk1977 #ifdef SQLITE_CHECK_PAGES 4763c407374Sdanielk1977 /* 4773c407374Sdanielk1977 ** Return a 32-bit hash of the page data for pPage. 4783c407374Sdanielk1977 */ 479477731b5Sdrh static u32 pager_datahash(int nByte, unsigned char *pData){ 4803c407374Sdanielk1977 u32 hash = 0; 4813c407374Sdanielk1977 int i; 482477731b5Sdrh for(i=0; i<nByte; i++){ 483477731b5Sdrh hash = (hash*1039) + pData[i]; 4843c407374Sdanielk1977 } 4853c407374Sdanielk1977 return hash; 4863c407374Sdanielk1977 } 487477731b5Sdrh static u32 pager_pagehash(PgHdr *pPage){ 4888c0a791aSdanielk1977 return pager_datahash(pPage->pPager->pageSize, (unsigned char *)pPage->pData); 4898c0a791aSdanielk1977 } 490bc2ca9ebSdanielk1977 static void pager_set_pagehash(PgHdr *pPage){ 4918c0a791aSdanielk1977 pPage->pageHash = pager_pagehash(pPage); 492477731b5Sdrh } 4933c407374Sdanielk1977 4943c407374Sdanielk1977 /* 4953c407374Sdanielk1977 ** The CHECK_PAGE macro takes a PgHdr* as an argument. If SQLITE_CHECK_PAGES 4963c407374Sdanielk1977 ** is defined, and NDEBUG is not defined, an assert() statement checks 4973c407374Sdanielk1977 ** that the page is either dirty or still matches the calculated page-hash. 4983c407374Sdanielk1977 */ 4993c407374Sdanielk1977 #define CHECK_PAGE(x) checkPage(x) 5003c407374Sdanielk1977 static void checkPage(PgHdr *pPg){ 5013c407374Sdanielk1977 Pager *pPager = pPg->pPager; 502b3175389Sdanielk1977 assert( !pPg->pageHash || pPager->errCode 5038c0a791aSdanielk1977 || (pPg->flags&PGHDR_DIRTY) || pPg->pageHash==pager_pagehash(pPg) ); 5043c407374Sdanielk1977 } 5053c407374Sdanielk1977 5063c407374Sdanielk1977 #else 5078ffa8173Sdrh #define pager_datahash(X,Y) 0 508477731b5Sdrh #define pager_pagehash(X) 0 5093c407374Sdanielk1977 #define CHECK_PAGE(x) 51041d3027cSdrh #endif /* SQLITE_CHECK_PAGES */ 5113c407374Sdanielk1977 512ed7c855cSdrh /* 5137657240aSdanielk1977 ** When this is called the journal file for pager pPager must be open. 5147657240aSdanielk1977 ** The master journal file name is read from the end of the file and 51565839c6aSdanielk1977 ** written into memory supplied by the caller. 5167657240aSdanielk1977 ** 51765839c6aSdanielk1977 ** zMaster must point to a buffer of at least nMaster bytes allocated by 51865839c6aSdanielk1977 ** the caller. This should be sqlite3_vfs.mxPathname+1 (to ensure there is 51965839c6aSdanielk1977 ** enough space to write the master journal name). If the master journal 52065839c6aSdanielk1977 ** name in the journal is longer than nMaster bytes (including a 52165839c6aSdanielk1977 ** nul-terminator), then this is handled as if no master journal name 52265839c6aSdanielk1977 ** were present in the journal. 52365839c6aSdanielk1977 ** 52465839c6aSdanielk1977 ** If no master journal file name is present zMaster[0] is set to 0 and 5257657240aSdanielk1977 ** SQLITE_OK returned. 5267657240aSdanielk1977 */ 527d92db531Sdanielk1977 static int readMasterJournal(sqlite3_file *pJrnl, char *zMaster, u32 nMaster){ 5287657240aSdanielk1977 int rc; 5297657240aSdanielk1977 u32 len; 530eb206256Sdrh i64 szJ; 531c3e8f5efSdanielk1977 u32 cksum; 5320b8d2766Sshane u32 u; /* Unsigned loop counter */ 5337657240aSdanielk1977 unsigned char aMagic[8]; /* A buffer to hold the magic header */ 5347657240aSdanielk1977 53565839c6aSdanielk1977 zMaster[0] = '\0'; 5367657240aSdanielk1977 537054889ecSdrh rc = sqlite3OsFileSize(pJrnl, &szJ); 538cafadbacSdanielk1977 if( rc!=SQLITE_OK || szJ<16 ) return rc; 5397657240aSdanielk1977 54062079060Sdanielk1977 rc = read32bits(pJrnl, szJ-16, &len); 5417657240aSdanielk1977 if( rc!=SQLITE_OK ) return rc; 5427657240aSdanielk1977 54365839c6aSdanielk1977 if( len>=nMaster ){ 54465839c6aSdanielk1977 return SQLITE_OK; 54565839c6aSdanielk1977 } 54665839c6aSdanielk1977 54762079060Sdanielk1977 rc = read32bits(pJrnl, szJ-12, &cksum); 5487657240aSdanielk1977 if( rc!=SQLITE_OK ) return rc; 5497657240aSdanielk1977 55062079060Sdanielk1977 rc = sqlite3OsRead(pJrnl, aMagic, 8, szJ-8); 5517657240aSdanielk1977 if( rc!=SQLITE_OK || memcmp(aMagic, aJournalMagic, 8) ) return rc; 5527657240aSdanielk1977 55365839c6aSdanielk1977 rc = sqlite3OsRead(pJrnl, zMaster, len, szJ-16-len); 5547657240aSdanielk1977 if( rc!=SQLITE_OK ){ 5557657240aSdanielk1977 return rc; 5567657240aSdanielk1977 } 55765839c6aSdanielk1977 zMaster[len] = '\0'; 5587657240aSdanielk1977 559cafadbacSdanielk1977 /* See if the checksum matches the master journal name */ 5600b8d2766Sshane for(u=0; u<len; u++){ 5610b8d2766Sshane cksum -= zMaster[u]; 562cafadbacSdanielk1977 } 5638191bff0Sdanielk1977 if( cksum ){ 5648191bff0Sdanielk1977 /* If the checksum doesn't add up, then one or more of the disk sectors 5658191bff0Sdanielk1977 ** containing the master journal filename is corrupted. This means 5668191bff0Sdanielk1977 ** definitely roll back, so just return SQLITE_OK and report a (nul) 5678191bff0Sdanielk1977 ** master-journal filename. 5688191bff0Sdanielk1977 */ 56965839c6aSdanielk1977 zMaster[0] = '\0'; 570aca790acSdanielk1977 } 571cafadbacSdanielk1977 5727657240aSdanielk1977 return SQLITE_OK; 5737657240aSdanielk1977 } 5747657240aSdanielk1977 5757657240aSdanielk1977 /* 5767657240aSdanielk1977 ** Seek the journal file descriptor to the next sector boundary where a 5777657240aSdanielk1977 ** journal header may be read or written. Pager.journalOff is updated with 5787657240aSdanielk1977 ** the new seek offset. 5797657240aSdanielk1977 ** 5807657240aSdanielk1977 ** i.e for a sector size of 512: 5817657240aSdanielk1977 ** 5827657240aSdanielk1977 ** Input Offset Output Offset 5837657240aSdanielk1977 ** --------------------------------------- 5847657240aSdanielk1977 ** 0 0 5857657240aSdanielk1977 ** 512 512 5867657240aSdanielk1977 ** 100 512 5877657240aSdanielk1977 ** 2000 2048 5887657240aSdanielk1977 ** 5897657240aSdanielk1977 */ 590112f752bSdanielk1977 static i64 journalHdrOffset(Pager *pPager){ 591eb206256Sdrh i64 offset = 0; 592eb206256Sdrh i64 c = pPager->journalOff; 5937657240aSdanielk1977 if( c ){ 5947657240aSdanielk1977 offset = ((c-1)/JOURNAL_HDR_SZ(pPager) + 1) * JOURNAL_HDR_SZ(pPager); 5957657240aSdanielk1977 } 5967657240aSdanielk1977 assert( offset%JOURNAL_HDR_SZ(pPager)==0 ); 5977657240aSdanielk1977 assert( offset>=c ); 5987657240aSdanielk1977 assert( (offset-c)<JOURNAL_HDR_SZ(pPager) ); 599112f752bSdanielk1977 return offset; 600112f752bSdanielk1977 } 601112f752bSdanielk1977 static void seekJournalHdr(Pager *pPager){ 602112f752bSdanielk1977 pPager->journalOff = journalHdrOffset(pPager); 6037657240aSdanielk1977 } 6047657240aSdanielk1977 6057657240aSdanielk1977 /* 606f3a87624Sdrh ** Write zeros over the header of the journal file. This has the 607f3a87624Sdrh ** effect of invalidating the journal file and committing the 608f3a87624Sdrh ** transaction. 609f3a87624Sdrh */ 610df2566a3Sdanielk1977 static int zeroJournalHdr(Pager *pPager, int doTruncate){ 611df2566a3Sdanielk1977 int rc = SQLITE_OK; 61255a25a12Sdanielk1977 static const char zeroHdr[28] = {0}; 613f3a87624Sdrh 614df2566a3Sdanielk1977 if( pPager->journalOff ){ 615b53e4960Sdanielk1977 i64 iLimit = pPager->journalSizeLimit; 616b53e4960Sdanielk1977 617f3a87624Sdrh IOTRACE(("JZEROHDR %p\n", pPager)) 618b53e4960Sdanielk1977 if( doTruncate || iLimit==0 ){ 619df2566a3Sdanielk1977 rc = sqlite3OsTruncate(pPager->jfd, 0); 620df2566a3Sdanielk1977 }else{ 621f3a87624Sdrh rc = sqlite3OsWrite(pPager->jfd, zeroHdr, sizeof(zeroHdr), 0); 622df2566a3Sdanielk1977 } 6238162054bSdanielk1977 if( rc==SQLITE_OK && !pPager->noSync ){ 624a06ecba2Sdrh rc = sqlite3OsSync(pPager->jfd, SQLITE_SYNC_DATAONLY|pPager->sync_flags); 625a06ecba2Sdrh } 626b53e4960Sdanielk1977 627b53e4960Sdanielk1977 /* At this point the transaction is committed but the write lock 628b53e4960Sdanielk1977 ** is still held on the file. If there is a size limit configured for 629b53e4960Sdanielk1977 ** the persistent journal and the journal file currently consumes more 630b53e4960Sdanielk1977 ** space than that limit allows for, truncate it now. There is no need 631b53e4960Sdanielk1977 ** to sync the file following this operation. 632b53e4960Sdanielk1977 */ 633b53e4960Sdanielk1977 if( rc==SQLITE_OK && iLimit>0 ){ 634b53e4960Sdanielk1977 i64 sz; 635b53e4960Sdanielk1977 rc = sqlite3OsFileSize(pPager->jfd, &sz); 636b53e4960Sdanielk1977 if( rc==SQLITE_OK && sz>iLimit ){ 637b53e4960Sdanielk1977 rc = sqlite3OsTruncate(pPager->jfd, iLimit); 638b53e4960Sdanielk1977 } 639b53e4960Sdanielk1977 } 640df2566a3Sdanielk1977 } 641f3a87624Sdrh return rc; 642f3a87624Sdrh } 643f3a87624Sdrh 644f3a87624Sdrh /* 6457657240aSdanielk1977 ** The journal file must be open when this routine is called. A journal 6467657240aSdanielk1977 ** header (JOURNAL_HDR_SZ bytes) is written into the journal file at the 6477657240aSdanielk1977 ** current location. 6487657240aSdanielk1977 ** 6497657240aSdanielk1977 ** The format for the journal header is as follows: 6507657240aSdanielk1977 ** - 8 bytes: Magic identifying journal format. 6517657240aSdanielk1977 ** - 4 bytes: Number of records in journal, or -1 no-sync mode is on. 6527657240aSdanielk1977 ** - 4 bytes: Random number used for page hash. 6537657240aSdanielk1977 ** - 4 bytes: Initial database page count. 6547657240aSdanielk1977 ** - 4 bytes: Sector size used by the process that wrote this journal. 65567c007bfSdanielk1977 ** - 4 bytes: Database page size. 6567657240aSdanielk1977 ** 65767c007bfSdanielk1977 ** Followed by (JOURNAL_HDR_SZ - 28) bytes of unused space. 6587657240aSdanielk1977 */ 6597657240aSdanielk1977 static int writeJournalHdr(Pager *pPager){ 660a664f8ebSdanielk1977 int rc = SQLITE_OK; 661a664f8ebSdanielk1977 char *zHeader = pPager->pTmpSpace; 662d92db531Sdanielk1977 u32 nHeader = pPager->pageSize; 663d92db531Sdanielk1977 u32 nWrite; 664fd7f0452Sdanielk1977 int ii; 665a664f8ebSdanielk1977 666a664f8ebSdanielk1977 if( nHeader>JOURNAL_HDR_SZ(pPager) ){ 667a664f8ebSdanielk1977 nHeader = JOURNAL_HDR_SZ(pPager); 668a664f8ebSdanielk1977 } 6697657240aSdanielk1977 670fd7f0452Sdanielk1977 /* If there are active savepoints and any of them were created since the 671fd7f0452Sdanielk1977 ** most recent journal header was written, update the PagerSavepoint.iHdrOff 672fd7f0452Sdanielk1977 ** fields now. 673fd7f0452Sdanielk1977 */ 674fd7f0452Sdanielk1977 for(ii=0; ii<pPager->nSavepoint; ii++){ 675fd7f0452Sdanielk1977 if( pPager->aSavepoint[ii].iHdrOffset==0 ){ 676fd7f0452Sdanielk1977 pPager->aSavepoint[ii].iHdrOffset = pPager->journalOff; 677fd7f0452Sdanielk1977 } 6784099f6e1Sdanielk1977 } 6794099f6e1Sdanielk1977 68062079060Sdanielk1977 seekJournalHdr(pPager); 6817657240aSdanielk1977 pPager->journalHdr = pPager->journalOff; 6827657240aSdanielk1977 68397b57484Sdrh memcpy(zHeader, aJournalMagic, sizeof(aJournalMagic)); 6844cd2cd5cSdanielk1977 6854cd2cd5cSdanielk1977 /* 6864cd2cd5cSdanielk1977 ** Write the nRec Field - the number of page records that follow this 6874cd2cd5cSdanielk1977 ** journal header. Normally, zero is written to this value at this time. 6884cd2cd5cSdanielk1977 ** After the records are added to the journal (and the journal synced, 6894cd2cd5cSdanielk1977 ** if in full-sync mode), the zero is overwritten with the true number 6904cd2cd5cSdanielk1977 ** of records (see syncJournal()). 6914cd2cd5cSdanielk1977 ** 6924cd2cd5cSdanielk1977 ** A faster alternative is to write 0xFFFFFFFF to the nRec field. When 6934cd2cd5cSdanielk1977 ** reading the journal this value tells SQLite to assume that the 6944cd2cd5cSdanielk1977 ** rest of the journal file contains valid page records. This assumption 6954cd2cd5cSdanielk1977 ** is dangerous, as if a failure occured whilst writing to the journal 6964cd2cd5cSdanielk1977 ** file it may contain some garbage data. There are two scenarios 6974cd2cd5cSdanielk1977 ** where this risk can be ignored: 6984cd2cd5cSdanielk1977 ** 6994cd2cd5cSdanielk1977 ** * When the pager is in no-sync mode. Corruption can follow a 7004cd2cd5cSdanielk1977 ** power failure in this case anyway. 7014cd2cd5cSdanielk1977 ** 7024cd2cd5cSdanielk1977 ** * When the SQLITE_IOCAP_SAFE_APPEND flag is set. This guarantees 7034cd2cd5cSdanielk1977 ** that garbage data is never appended to the journal file. 7044cd2cd5cSdanielk1977 */ 7054cd2cd5cSdanielk1977 assert(pPager->fd->pMethods||pPager->noSync); 706b3175389Sdanielk1977 if( (pPager->noSync) || (pPager->journalMode==PAGER_JOURNALMODE_MEMORY) 7074cd2cd5cSdanielk1977 || (sqlite3OsDeviceCharacteristics(pPager->fd)&SQLITE_IOCAP_SAFE_APPEND) 7084cd2cd5cSdanielk1977 ){ 7094cd2cd5cSdanielk1977 put32bits(&zHeader[sizeof(aJournalMagic)], 0xffffffff); 7104cd2cd5cSdanielk1977 }else{ 7114cd2cd5cSdanielk1977 put32bits(&zHeader[sizeof(aJournalMagic)], 0); 7124cd2cd5cSdanielk1977 } 7134cd2cd5cSdanielk1977 7147657240aSdanielk1977 /* The random check-hash initialiser */ 7152fa1868fSdrh sqlite3_randomness(sizeof(pPager->cksumInit), &pPager->cksumInit); 71697b57484Sdrh put32bits(&zHeader[sizeof(aJournalMagic)+4], pPager->cksumInit); 7177657240aSdanielk1977 /* The initial database size */ 7183460d19cSdanielk1977 put32bits(&zHeader[sizeof(aJournalMagic)+8], pPager->dbOrigSize); 7197657240aSdanielk1977 /* The assumed sector size for this process */ 72097b57484Sdrh put32bits(&zHeader[sizeof(aJournalMagic)+12], pPager->sectorSize); 72108609ce7Sdrh 72208609ce7Sdrh /* Initializing the tail of the buffer is not necessary. Everything 72308609ce7Sdrh ** works find if the following memset() is omitted. But initializing 72408609ce7Sdrh ** the memory prevents valgrind from complaining, so we are willing to 72508609ce7Sdrh ** take the performance hit. 72608609ce7Sdrh */ 72708609ce7Sdrh memset(&zHeader[sizeof(aJournalMagic)+16], 0, 72808609ce7Sdrh nHeader-(sizeof(aJournalMagic)+16)); 72908609ce7Sdrh 73067c007bfSdanielk1977 if( pPager->journalHdr==0 ){ 73167c007bfSdanielk1977 /* The page size */ 73267c007bfSdanielk1977 put32bits(&zHeader[sizeof(aJournalMagic)+16], pPager->pageSize); 73367c007bfSdanielk1977 } 7347657240aSdanielk1977 735a664f8ebSdanielk1977 for(nWrite=0; rc==SQLITE_OK&&nWrite<JOURNAL_HDR_SZ(pPager); nWrite+=nHeader){ 736a664f8ebSdanielk1977 IOTRACE(("JHDR %p %lld %d\n", pPager, pPager->journalHdr, nHeader)) 737a664f8ebSdanielk1977 rc = sqlite3OsWrite(pPager->jfd, zHeader, nHeader, pPager->journalOff); 738a664f8ebSdanielk1977 pPager->journalOff += nHeader; 739b4746b9eSdrh } 740a664f8ebSdanielk1977 7417657240aSdanielk1977 return rc; 7427657240aSdanielk1977 } 7437657240aSdanielk1977 7447657240aSdanielk1977 /* 7457657240aSdanielk1977 ** The journal file must be open when this is called. A journal header file 7467657240aSdanielk1977 ** (JOURNAL_HDR_SZ bytes) is read from the current location in the journal 747d6e5e098Sdrh ** file. The current location in the journal file is given by 748d6e5e098Sdrh ** pPager->journalOff. See comments above function writeJournalHdr() for 749d6e5e098Sdrh ** a description of the journal header format. 7507657240aSdanielk1977 ** 7517657240aSdanielk1977 ** If the header is read successfully, *nRec is set to the number of 7527657240aSdanielk1977 ** page records following this header and *dbSize is set to the size of the 7537657240aSdanielk1977 ** database before the transaction began, in pages. Also, pPager->cksumInit 7547657240aSdanielk1977 ** is set to the value read from the journal header. SQLITE_OK is returned 7557657240aSdanielk1977 ** in this case. 7567657240aSdanielk1977 ** 7577657240aSdanielk1977 ** If the journal header file appears to be corrupted, SQLITE_DONE is 758d6e5e098Sdrh ** returned and *nRec and *dbSize are undefined. If JOURNAL_HDR_SZ bytes 7597657240aSdanielk1977 ** cannot be read from the journal file an error code is returned. 7607657240aSdanielk1977 */ 7617657240aSdanielk1977 static int readJournalHdr( 7627657240aSdanielk1977 Pager *pPager, 763eb206256Sdrh i64 journalSize, 7647657240aSdanielk1977 u32 *pNRec, 7657657240aSdanielk1977 u32 *pDbSize 7667657240aSdanielk1977 ){ 7677657240aSdanielk1977 int rc; 7687657240aSdanielk1977 unsigned char aMagic[8]; /* A buffer to hold the magic header */ 76962079060Sdanielk1977 i64 jrnlOff; 7707cbd589dSdanielk1977 u32 iPageSize; 7717cbd589dSdanielk1977 u32 iSectorSize; 7727657240aSdanielk1977 77362079060Sdanielk1977 seekJournalHdr(pPager); 7747657240aSdanielk1977 if( pPager->journalOff+JOURNAL_HDR_SZ(pPager) > journalSize ){ 7757657240aSdanielk1977 return SQLITE_DONE; 7767657240aSdanielk1977 } 77762079060Sdanielk1977 jrnlOff = pPager->journalOff; 7787657240aSdanielk1977 77962079060Sdanielk1977 rc = sqlite3OsRead(pPager->jfd, aMagic, sizeof(aMagic), jrnlOff); 7807657240aSdanielk1977 if( rc ) return rc; 78162079060Sdanielk1977 jrnlOff += sizeof(aMagic); 7827657240aSdanielk1977 7837657240aSdanielk1977 if( memcmp(aMagic, aJournalMagic, sizeof(aMagic))!=0 ){ 7847657240aSdanielk1977 return SQLITE_DONE; 7857657240aSdanielk1977 } 7867657240aSdanielk1977 78762079060Sdanielk1977 rc = read32bits(pPager->jfd, jrnlOff, pNRec); 7887657240aSdanielk1977 if( rc ) return rc; 7897657240aSdanielk1977 79062079060Sdanielk1977 rc = read32bits(pPager->jfd, jrnlOff+4, &pPager->cksumInit); 7917657240aSdanielk1977 if( rc ) return rc; 7927657240aSdanielk1977 79362079060Sdanielk1977 rc = read32bits(pPager->jfd, jrnlOff+8, pDbSize); 7947657240aSdanielk1977 if( rc ) return rc; 7957657240aSdanielk1977 7967cbd589dSdanielk1977 if( pPager->journalOff==0 ){ 7977cbd589dSdanielk1977 rc = read32bits(pPager->jfd, jrnlOff+16, &iPageSize); 7987cbd589dSdanielk1977 if( rc ) return rc; 7997cbd589dSdanielk1977 8007cbd589dSdanielk1977 if( iPageSize<512 8017cbd589dSdanielk1977 || iPageSize>SQLITE_MAX_PAGE_SIZE 8027cbd589dSdanielk1977 || ((iPageSize-1)&iPageSize)!=0 80367c007bfSdanielk1977 ){ 8047cbd589dSdanielk1977 /* If the page-size in the journal-header is invalid, then the process 8057cbd589dSdanielk1977 ** that wrote the journal-header must have crashed before the header 8067cbd589dSdanielk1977 ** was synced. In this case stop reading the journal file here. 8077cbd589dSdanielk1977 */ 8087cbd589dSdanielk1977 rc = SQLITE_DONE; 8097cbd589dSdanielk1977 }else{ 8104f21c4afSdrh u16 pagesize = (u16)iPageSize; 81167c007bfSdanielk1977 rc = sqlite3PagerSetPagesize(pPager, &pagesize); 8127cbd589dSdanielk1977 assert( rc!=SQLITE_OK || pagesize==(u16)iPageSize ); 81367c007bfSdanielk1977 } 81467c007bfSdanielk1977 if( rc ) return rc; 81567c007bfSdanielk1977 8167657240aSdanielk1977 /* Update the assumed sector-size to match the value used by 8177657240aSdanielk1977 ** the process that created this journal. If this journal was 8187657240aSdanielk1977 ** created by a process other than this one, then this routine 8197657240aSdanielk1977 ** is being called from within pager_playback(). The local value 8207657240aSdanielk1977 ** of Pager.sectorSize is restored at the end of that routine. 8217657240aSdanielk1977 */ 8227cbd589dSdanielk1977 rc = read32bits(pPager->jfd, jrnlOff+12, &iSectorSize); 8237657240aSdanielk1977 if( rc ) return rc; 8247cbd589dSdanielk1977 if( (iSectorSize&(iSectorSize-1)) 8257cbd589dSdanielk1977 || iSectorSize<512 8267cbd589dSdanielk1977 || iSectorSize>MAX_SECTOR_SIZE 8277cbd589dSdanielk1977 ){ 82898c58356Sdrh return SQLITE_DONE; 82998c58356Sdrh } 8307cbd589dSdanielk1977 pPager->sectorSize = iSectorSize; 8317cbd589dSdanielk1977 } 8327657240aSdanielk1977 8337657240aSdanielk1977 pPager->journalOff += JOURNAL_HDR_SZ(pPager); 83462079060Sdanielk1977 return SQLITE_OK; 8357657240aSdanielk1977 } 8367657240aSdanielk1977 8377657240aSdanielk1977 8387657240aSdanielk1977 /* 8397657240aSdanielk1977 ** Write the supplied master journal name into the journal file for pager 840cafadbacSdanielk1977 ** pPager at the current location. The master journal name must be the last 841cafadbacSdanielk1977 ** thing written to a journal file. If the pager is in full-sync mode, the 842cafadbacSdanielk1977 ** journal file descriptor is advanced to the next sector boundary before 843cafadbacSdanielk1977 ** anything is written. The format is: 844cafadbacSdanielk1977 ** 845cafadbacSdanielk1977 ** + 4 bytes: PAGER_MJ_PGNO. 846cafadbacSdanielk1977 ** + N bytes: length of master journal name. 847cafadbacSdanielk1977 ** + 4 bytes: N 848cafadbacSdanielk1977 ** + 4 bytes: Master journal name checksum. 849cafadbacSdanielk1977 ** + 8 bytes: aJournalMagic[]. 850cafadbacSdanielk1977 ** 851cafadbacSdanielk1977 ** The master journal page checksum is the sum of the bytes in the master 852cafadbacSdanielk1977 ** journal name. 853aef0bf64Sdanielk1977 ** 854aef0bf64Sdanielk1977 ** If zMaster is a NULL pointer (occurs for a single database transaction), 855aef0bf64Sdanielk1977 ** this call is a no-op. 8567657240aSdanielk1977 */ 8577657240aSdanielk1977 static int writeMasterJournal(Pager *pPager, const char *zMaster){ 8587657240aSdanielk1977 int rc; 8597657240aSdanielk1977 int len; 860cafadbacSdanielk1977 int i; 86162079060Sdanielk1977 i64 jrnlOff; 862df2566a3Sdanielk1977 i64 jrnlSize; 863c3e8f5efSdanielk1977 u32 cksum = 0; 86497b57484Sdrh char zBuf[sizeof(aJournalMagic)+2*4]; 8657657240aSdanielk1977 8667657240aSdanielk1977 if( !zMaster || pPager->setMaster ) return SQLITE_OK; 867b3175389Sdanielk1977 if( pPager->journalMode==PAGER_JOURNALMODE_MEMORY ) return SQLITE_OK; 8687657240aSdanielk1977 pPager->setMaster = 1; 8697657240aSdanielk1977 870ea678832Sdrh len = sqlite3Strlen30(zMaster); 871cafadbacSdanielk1977 for(i=0; i<len; i++){ 872cafadbacSdanielk1977 cksum += zMaster[i]; 873cafadbacSdanielk1977 } 8747657240aSdanielk1977 8757657240aSdanielk1977 /* If in full-sync mode, advance to the next disk sector before writing 8767657240aSdanielk1977 ** the master journal name. This is in case the previous page written to 8777657240aSdanielk1977 ** the journal has already been synced. 8787657240aSdanielk1977 */ 8797657240aSdanielk1977 if( pPager->fullSync ){ 88062079060Sdanielk1977 seekJournalHdr(pPager); 8817657240aSdanielk1977 } 88262079060Sdanielk1977 jrnlOff = pPager->journalOff; 883cafadbacSdanielk1977 pPager->journalOff += (len+20); 8847657240aSdanielk1977 88562079060Sdanielk1977 rc = write32bits(pPager->jfd, jrnlOff, PAGER_MJ_PGNO(pPager)); 8867657240aSdanielk1977 if( rc!=SQLITE_OK ) return rc; 88762079060Sdanielk1977 jrnlOff += 4; 8887657240aSdanielk1977 88962079060Sdanielk1977 rc = sqlite3OsWrite(pPager->jfd, zMaster, len, jrnlOff); 8907657240aSdanielk1977 if( rc!=SQLITE_OK ) return rc; 89162079060Sdanielk1977 jrnlOff += len; 8927657240aSdanielk1977 89397b57484Sdrh put32bits(zBuf, len); 89497b57484Sdrh put32bits(&zBuf[4], cksum); 89597b57484Sdrh memcpy(&zBuf[8], aJournalMagic, sizeof(aJournalMagic)); 89662079060Sdanielk1977 rc = sqlite3OsWrite(pPager->jfd, zBuf, 8+sizeof(aJournalMagic), jrnlOff); 897df2566a3Sdanielk1977 jrnlOff += 8+sizeof(aJournalMagic); 8982c8997b9Sdrh pPager->needSync = !pPager->noSync; 899df2566a3Sdanielk1977 900df2566a3Sdanielk1977 /* If the pager is in peristent-journal mode, then the physical 901df2566a3Sdanielk1977 ** journal-file may extend past the end of the master-journal name 902df2566a3Sdanielk1977 ** and 8 bytes of magic data just written to the file. This is 903df2566a3Sdanielk1977 ** dangerous because the code to rollback a hot-journal file 904df2566a3Sdanielk1977 ** will not be able to find the master-journal name to determine 905df2566a3Sdanielk1977 ** whether or not the journal is hot. 906df2566a3Sdanielk1977 ** 907df2566a3Sdanielk1977 ** Easiest thing to do in this scenario is to truncate the journal 908df2566a3Sdanielk1977 ** file to the required size. 909df2566a3Sdanielk1977 */ 910df2566a3Sdanielk1977 if( (rc==SQLITE_OK) 911df2566a3Sdanielk1977 && (rc = sqlite3OsFileSize(pPager->jfd, &jrnlSize))==SQLITE_OK 912df2566a3Sdanielk1977 && jrnlSize>jrnlOff 913df2566a3Sdanielk1977 ){ 914df2566a3Sdanielk1977 rc = sqlite3OsTruncate(pPager->jfd, jrnlOff); 915df2566a3Sdanielk1977 } 9167657240aSdanielk1977 return rc; 9177657240aSdanielk1977 } 9187657240aSdanielk1977 9197657240aSdanielk1977 /* 920ed7c855cSdrh ** Find a page in the hash table given its page number. Return 921ed7c855cSdrh ** a pointer to the page or NULL if not found. 922ed7c855cSdrh */ 923d9b0257aSdrh static PgHdr *pager_lookup(Pager *pPager, Pgno pgno){ 9248ca0c724Sdrh PgHdr *p; 9258c0a791aSdanielk1977 sqlite3PcacheFetch(pPager->pPCache, pgno, 0, &p); 926ed7c855cSdrh return p; 927ed7c855cSdrh } 928ed7c855cSdrh 929ed7c855cSdrh /* 930e180dd93Sdanielk1977 ** Clear the in-memory cache. This routine 931ed7c855cSdrh ** sets the state of the pager back to what it was when it was first 932ed7c855cSdrh ** opened. Any outstanding pages are invalidated and subsequent attempts 933ed7c855cSdrh ** to access those pages will likely result in a coredump. 934ed7c855cSdrh */ 935d9b0257aSdrh static void pager_reset(Pager *pPager){ 936efaaf579Sdanielk1977 if( pPager->errCode ) return; 9378c0a791aSdanielk1977 sqlite3PcacheClear(pPager->pPCache); 938e277be05Sdanielk1977 } 939e277be05Sdanielk1977 94034cf35daSdanielk1977 /* 94134cf35daSdanielk1977 ** Free all structures in the Pager.aSavepoint[] array and set both 94234cf35daSdanielk1977 ** Pager.aSavepoint and Pager.nSavepoint to zero. Close the sub-journal 94334cf35daSdanielk1977 ** if it is open and the pager is not in exclusive mode. 94434cf35daSdanielk1977 */ 945fd7f0452Sdanielk1977 static void releaseAllSavepoint(Pager *pPager){ 946fd7f0452Sdanielk1977 int ii; 947fd7f0452Sdanielk1977 for(ii=0; ii<pPager->nSavepoint; ii++){ 948fd7f0452Sdanielk1977 sqlite3BitvecDestroy(pPager->aSavepoint[ii].pInSavepoint); 949fd7f0452Sdanielk1977 } 950fd7f0452Sdanielk1977 if( !pPager->exclusiveMode ){ 951fd7f0452Sdanielk1977 sqlite3OsClose(pPager->sjfd); 952fd7f0452Sdanielk1977 } 953fd7f0452Sdanielk1977 sqlite3_free(pPager->aSavepoint); 954fd7f0452Sdanielk1977 pPager->aSavepoint = 0; 955fd7f0452Sdanielk1977 pPager->nSavepoint = 0; 95667ddef69Sdanielk1977 pPager->stmtNRec = 0; 957fd7f0452Sdanielk1977 } 958fd7f0452Sdanielk1977 95934cf35daSdanielk1977 /* 96034cf35daSdanielk1977 ** Set the bit number pgno in the PagerSavepoint.pInSavepoint bitvecs of 96134cf35daSdanielk1977 ** all open savepoints. 96234cf35daSdanielk1977 */ 963fd7f0452Sdanielk1977 static int addToSavepointBitvecs(Pager *pPager, Pgno pgno){ 9647539b6b8Sdrh int ii; /* Loop counter */ 9657539b6b8Sdrh int rc = SQLITE_OK; /* Result code */ 9667539b6b8Sdrh 967fd7f0452Sdanielk1977 for(ii=0; ii<pPager->nSavepoint; ii++){ 968fd7f0452Sdanielk1977 PagerSavepoint *p = &pPager->aSavepoint[ii]; 969fd7f0452Sdanielk1977 if( pgno<=p->nOrig ){ 9707539b6b8Sdrh rc |= sqlite3BitvecSet(p->pInSavepoint, pgno); 9717539b6b8Sdrh assert( rc==SQLITE_OK || rc==SQLITE_NOMEM ); 972fd7f0452Sdanielk1977 } 973fd7f0452Sdanielk1977 } 9747539b6b8Sdrh return rc; 975fd7f0452Sdanielk1977 } 976fd7f0452Sdanielk1977 977e277be05Sdanielk1977 /* 978ae72d982Sdanielk1977 ** Unlock the database file. 979ae72d982Sdanielk1977 ** 980ae72d982Sdanielk1977 ** If the pager is currently in error state, discard the contents of 981ae72d982Sdanielk1977 ** the cache and reset the Pager structure internal state. If there is 982ae72d982Sdanielk1977 ** an open journal-file, then the next time a shared-lock is obtained 983ae72d982Sdanielk1977 ** on the pager file (by this or any other process), it will be 984ae72d982Sdanielk1977 ** treated as a hot-journal and rolled back. 985ae72d982Sdanielk1977 */ 986ae72d982Sdanielk1977 static void pager_unlock(Pager *pPager){ 987ae72d982Sdanielk1977 if( !pPager->exclusiveMode ){ 9885f2d46b3Sdanielk1977 int rc; 989ae72d982Sdanielk1977 99016e45a43Sdrh /* Always close the journal file when dropping the database lock. 99116e45a43Sdrh ** Otherwise, another connection with journal_mode=delete might 99216e45a43Sdrh ** delete the file out from under us. 99316e45a43Sdrh */ 99416e45a43Sdrh if( pPager->journalOpen ){ 99516e45a43Sdrh sqlite3OsClose(pPager->jfd); 99616e45a43Sdrh pPager->journalOpen = 0; 99716e45a43Sdrh sqlite3BitvecDestroy(pPager->pInJournal); 99816e45a43Sdrh pPager->pInJournal = 0; 999a1fa00d9Sdanielk1977 sqlite3BitvecDestroy(pPager->pAlwaysRollback); 1000a1fa00d9Sdanielk1977 pPager->pAlwaysRollback = 0; 100116e45a43Sdrh } 100216e45a43Sdrh 10035f2d46b3Sdanielk1977 rc = osUnlock(pPager->fd, NO_LOCK); 10045f2d46b3Sdanielk1977 if( rc ) pPager->errCode = rc; 10055f2d46b3Sdanielk1977 pPager->dbSizeValid = 0; 10065f2d46b3Sdanielk1977 IOTRACE(("UNLOCK %p\n", pPager)) 10075f2d46b3Sdanielk1977 1008ae72d982Sdanielk1977 /* If Pager.errCode is set, the contents of the pager cache cannot be 1009ae72d982Sdanielk1977 ** trusted. Now that the pager file is unlocked, the contents of the 1010ae72d982Sdanielk1977 ** cache can be discarded and the error code safely cleared. 1011ae72d982Sdanielk1977 */ 1012ae72d982Sdanielk1977 if( pPager->errCode ){ 10131aa5af11Sdrh if( rc==SQLITE_OK ) pPager->errCode = SQLITE_OK; 1014ae72d982Sdanielk1977 pager_reset(pPager); 1015fd7f0452Sdanielk1977 releaseAllSavepoint(pPager); 1016ae72d982Sdanielk1977 pPager->journalOff = 0; 1017ae72d982Sdanielk1977 pPager->journalStarted = 0; 10183460d19cSdanielk1977 pPager->dbOrigSize = 0; 1019ae72d982Sdanielk1977 } 1020ae72d982Sdanielk1977 1021ae72d982Sdanielk1977 pPager->state = PAGER_UNLOCK; 1022ae72d982Sdanielk1977 pPager->changeCountDone = 0; 1023ae72d982Sdanielk1977 } 1024ae72d982Sdanielk1977 } 1025ae72d982Sdanielk1977 1026ae72d982Sdanielk1977 /* 1027ae72d982Sdanielk1977 ** Execute a rollback if a transaction is active and unlock the 1028ae72d982Sdanielk1977 ** database file. If the pager has already entered the error state, 1029ae72d982Sdanielk1977 ** do not attempt the rollback. 1030ae72d982Sdanielk1977 */ 1031ae72d982Sdanielk1977 static void pagerUnlockAndRollback(Pager *p){ 1032ae72d982Sdanielk1977 if( p->errCode==SQLITE_OK && p->state>=PAGER_RESERVED ){ 10332d1d86fbSdanielk1977 sqlite3BeginBenignMalloc(); 1034ae72d982Sdanielk1977 sqlite3PagerRollback(p); 10352d1d86fbSdanielk1977 sqlite3EndBenignMalloc(); 1036ae72d982Sdanielk1977 } 1037ae72d982Sdanielk1977 pager_unlock(p); 1038ae72d982Sdanielk1977 } 1039ae72d982Sdanielk1977 1040ae72d982Sdanielk1977 /* 104180e35f46Sdrh ** This routine ends a transaction. A transaction is ended by either 104280e35f46Sdrh ** a COMMIT or a ROLLBACK. 104380e35f46Sdrh ** 1044ed7c855cSdrh ** When this routine is called, the pager has the journal file open and 104580e35f46Sdrh ** a RESERVED or EXCLUSIVE lock on the database. This routine will release 104680e35f46Sdrh ** the database lock and acquires a SHARED lock in its place if that is 104780e35f46Sdrh ** the appropriate thing to do. Release locks usually is appropriate, 104880e35f46Sdrh ** unless we are in exclusive access mode or unless this is a 104980e35f46Sdrh ** COMMIT AND BEGIN or ROLLBACK AND BEGIN operation. 105080e35f46Sdrh ** 105180e35f46Sdrh ** The journal file is either deleted or truncated. 105250457896Sdrh ** 105350457896Sdrh ** TODO: Consider keeping the journal file open for temporary databases. 105450457896Sdrh ** This might give a performance improvement on windows where opening 105550457896Sdrh ** a file is an expensive operation. 1056ed7c855cSdrh */ 1057df2566a3Sdanielk1977 static int pager_end_transaction(Pager *pPager, int hasMaster){ 105841483468Sdanielk1977 int rc = SQLITE_OK; 1059979f38e5Sdanielk1977 int rc2 = SQLITE_OK; 1060a6abd041Sdrh if( pPager->state<PAGER_RESERVED ){ 1061a6abd041Sdrh return SQLITE_OK; 1062a6abd041Sdrh } 1063fd7f0452Sdanielk1977 releaseAllSavepoint(pPager); 1064da47d774Sdrh if( pPager->journalOpen ){ 1065b3175389Sdanielk1977 if( pPager->journalMode==PAGER_JOURNALMODE_MEMORY ){ 1066b3175389Sdanielk1977 int isMemoryJournal = sqlite3IsMemJournal(pPager->jfd); 1067b3175389Sdanielk1977 sqlite3OsClose(pPager->jfd); 1068b3175389Sdanielk1977 pPager->journalOpen = 0; 1069b3175389Sdanielk1977 if( !isMemoryJournal ){ 1070b3175389Sdanielk1977 rc = sqlite3OsDelete(pPager->pVfs, pPager->zJournal, 0); 1071b3175389Sdanielk1977 } 1072b3175389Sdanielk1977 }else if( pPager->journalMode==PAGER_JOURNALMODE_TRUNCATE 107304335886Sdrh && (rc = sqlite3OsTruncate(pPager->jfd, 0))==SQLITE_OK ){ 107404335886Sdrh pPager->journalOff = 0; 107504335886Sdrh pPager->journalStarted = 0; 107604335886Sdrh }else if( pPager->exclusiveMode 107793f7af97Sdanielk1977 || pPager->journalMode==PAGER_JOURNALMODE_PERSIST 107893f7af97Sdanielk1977 ){ 107993f7af97Sdanielk1977 rc = zeroJournalHdr(pPager, hasMaster); 108093f7af97Sdanielk1977 pager_error(pPager, rc); 108141483468Sdanielk1977 pPager->journalOff = 0; 1082334cdb63Sdanielk1977 pPager->journalStarted = 0; 108341483468Sdanielk1977 }else{ 108404335886Sdrh assert( pPager->journalMode==PAGER_JOURNALMODE_DELETE || rc ); 1085b4b47411Sdanielk1977 sqlite3OsClose(pPager->jfd); 10868cfbf08fSdrh pPager->journalOpen = 0; 10870f01fdaeSdanielk1977 if( rc==SQLITE_OK && !pPager->tempFile ){ 1088fee2d25aSdanielk1977 rc = sqlite3OsDelete(pPager->pVfs, pPager->zJournal, 0); 108941483468Sdanielk1977 } 10907152de8dSdanielk1977 } 1091f5e7bb51Sdrh sqlite3BitvecDestroy(pPager->pInJournal); 1092f5e7bb51Sdrh pPager->pInJournal = 0; 1093a1fa00d9Sdanielk1977 sqlite3BitvecDestroy(pPager->pAlwaysRollback); 1094a1fa00d9Sdanielk1977 pPager->pAlwaysRollback = 0; 10953c407374Sdanielk1977 #ifdef SQLITE_CHECK_PAGES 1096bc2ca9ebSdanielk1977 sqlite3PcacheIterateDirty(pPager->pPCache, pager_set_pagehash); 10973c407374Sdanielk1977 #endif 1098bc2ca9ebSdanielk1977 sqlite3PcacheCleanAll(pPager->pPCache); 1099ef317ab5Sdanielk1977 pPager->dirtyCache = 0; 1100ef317ab5Sdanielk1977 pPager->nRec = 0; 1101da47d774Sdrh }else{ 1102f5e7bb51Sdrh assert( pPager->pInJournal==0 ); 1103da47d774Sdrh } 1104979f38e5Sdanielk1977 110541483468Sdanielk1977 if( !pPager->exclusiveMode ){ 11067a2b1eebSdanielk1977 rc2 = osUnlock(pPager->fd, SHARED_LOCK); 1107a6abd041Sdrh pPager->state = PAGER_SHARED; 1108*104f1fefSdanielk1977 pPager->changeCountDone = 0; 1109334cdb63Sdanielk1977 }else if( pPager->state==PAGER_SYNCED ){ 1110334cdb63Sdanielk1977 pPager->state = PAGER_EXCLUSIVE; 1111334cdb63Sdanielk1977 } 11123460d19cSdanielk1977 pPager->dbOrigSize = 0; 11137657240aSdanielk1977 pPager->setMaster = 0; 1114c4da5b9fSdanielk1977 pPager->needSync = 0; 11158c0a791aSdanielk1977 /* lruListSetFirstSynced(pPager); */ 1116f90b7260Sdanielk1977 sqlite3PcacheTruncate(pPager->pPCache, pPager->dbSize); 1117b3175389Sdanielk1977 if( !MEMDB ){ 1118d92db531Sdanielk1977 pPager->dbSizeValid = 0; 1119b3175389Sdanielk1977 } 1120d138c016Sdrh pPager->dbModified = 0; 1121979f38e5Sdanielk1977 1122979f38e5Sdanielk1977 return (rc==SQLITE_OK?rc2:rc); 1123ed7c855cSdrh } 1124ed7c855cSdrh 1125ed7c855cSdrh /* 1126968af52aSdrh ** Compute and return a checksum for the page of data. 112734e79ceeSdrh ** 112834e79ceeSdrh ** This is not a real checksum. It is really just the sum of the 1129726de599Sdrh ** random initial value and the page number. We experimented with 1130726de599Sdrh ** a checksum of the entire data, but that was found to be too slow. 1131726de599Sdrh ** 1132726de599Sdrh ** Note that the page number is stored at the beginning of data and 1133726de599Sdrh ** the checksum is stored at the end. This is important. If journal 1134726de599Sdrh ** corruption occurs due to a power failure, the most likely scenario 1135726de599Sdrh ** is that one end or the other of the record will be changed. It is 1136726de599Sdrh ** much less likely that the two ends of the journal record will be 1137726de599Sdrh ** correct and the middle be corrupt. Thus, this "checksum" scheme, 1138726de599Sdrh ** though fast and simple, catches the mostly likely kind of corruption. 1139726de599Sdrh ** 1140726de599Sdrh ** FIX ME: Consider adding every 200th (or so) byte of the data to the 1141726de599Sdrh ** checksum. That way if a single page spans 3 or more disk sectors and 1142726de599Sdrh ** only the middle sector is corrupt, we will still have a reasonable 1143726de599Sdrh ** chance of failing the checksum and thus detecting the problem. 1144968af52aSdrh */ 114574161705Sdrh static u32 pager_cksum(Pager *pPager, const u8 *aData){ 1146ef317ab5Sdanielk1977 u32 cksum = pPager->cksumInit; 1147ef317ab5Sdanielk1977 int i = pPager->pageSize-200; 1148ef317ab5Sdanielk1977 while( i>0 ){ 1149ef317ab5Sdanielk1977 cksum += aData[i]; 1150ef317ab5Sdanielk1977 i -= 200; 1151ef317ab5Sdanielk1977 } 1152968af52aSdrh return cksum; 1153968af52aSdrh } 1154968af52aSdrh 1155968af52aSdrh /* 1156d6e5e098Sdrh ** Read a single page from either the journal file (if isMainJrnl==1) or 1157d6e5e098Sdrh ** from the sub-journal (if isMainJrnl==0) and playback that page. 1158d6e5e098Sdrh ** The page begins at offset *pOffset into the file. The *pOffset 1159d6e5e098Sdrh ** value is increased to the start of the next page in the journal. 1160968af52aSdrh ** 1161c13148ffSdrh ** The isMainJrnl flag is true if this is the main rollback journal and 1162c13148ffSdrh ** false for the statement journal. The main rollback journal uses 1163c13148ffSdrh ** checksums - the statement journal does not. 1164d6e5e098Sdrh ** 1165d6e5e098Sdrh ** If pDone is not NULL, then it is a record of pages that have already 1166d6e5e098Sdrh ** been played back. If the page at *pOffset has already been played back 1167d6e5e098Sdrh ** (if the corresponding pDone bit is set) then skip the playback. 1168d6e5e098Sdrh ** Make sure the pDone bit corresponding to the *pOffset page is set 1169d6e5e098Sdrh ** prior to returning. 1170fa86c412Sdrh */ 117162079060Sdanielk1977 static int pager_playback_one_page( 1172c13148ffSdrh Pager *pPager, /* The pager being played back */ 1173fd7f0452Sdanielk1977 int isMainJrnl, /* 1 -> main journal. 0 -> sub-journal. */ 1174d6e5e098Sdrh i64 *pOffset, /* Offset of record to playback */ 1175ecfef985Sdanielk1977 int isSavepnt, /* True for a savepoint rollback */ 1176fd7f0452Sdanielk1977 Bitvec *pDone /* Bitvec of pages already played back */ 117762079060Sdanielk1977 ){ 1178fa86c412Sdrh int rc; 1179fa86c412Sdrh PgHdr *pPg; /* An existing page in the cache */ 1180ae2b40c4Sdrh Pgno pgno; /* The page number of a page in journal */ 1181ae2b40c4Sdrh u32 cksum; /* Checksum used for sanity checking */ 1182d6e5e098Sdrh u8 *aData; /* Temporary storage for the page */ 1183d6e5e098Sdrh sqlite3_file *jfd; /* The file descriptor for the journal file */ 1184fa86c412Sdrh 1185d6e5e098Sdrh assert( (isMainJrnl&~1)==0 ); /* isMainJrnl is 0 or 1 */ 1186d6e5e098Sdrh assert( (isSavepnt&~1)==0 ); /* isSavepnt is 0 or 1 */ 1187d6e5e098Sdrh assert( isMainJrnl || pDone ); /* pDone always used on sub-journals */ 1188d6e5e098Sdrh assert( isSavepnt || pDone==0 ); /* pDone never used on non-savepoint */ 11899636284eSdrh 1190d6e5e098Sdrh aData = (u8*)pPager->pTmpSpace; 1191d6e5e098Sdrh assert( aData ); /* Temp storage must have already been allocated */ 1192d6e5e098Sdrh 1193d6e5e098Sdrh jfd = isMainJrnl ? pPager->jfd : pPager->sjfd; 1194d6e5e098Sdrh 1195d6e5e098Sdrh rc = read32bits(jfd, *pOffset, &pgno); 119699ee3600Sdrh if( rc!=SQLITE_OK ) return rc; 1197d6e5e098Sdrh rc = sqlite3OsRead(jfd, aData, pPager->pageSize, (*pOffset)+4); 119899ee3600Sdrh if( rc!=SQLITE_OK ) return rc; 1199d6e5e098Sdrh *pOffset += pPager->pageSize + 4 + isMainJrnl*4; 1200fa86c412Sdrh 1201968af52aSdrh /* Sanity checking on the page. This is more important that I originally 1202968af52aSdrh ** thought. If a power failure occurs while the journal is being written, 1203968af52aSdrh ** it could cause invalid data to be written into the journal. We need to 1204968af52aSdrh ** detect this invalid data (with high probability) and ignore it. 1205968af52aSdrh */ 120675edc16fSdanielk1977 if( pgno==0 || pgno==PAGER_MJ_PGNO(pPager) ){ 1207968af52aSdrh return SQLITE_DONE; 1208968af52aSdrh } 1209fd7f0452Sdanielk1977 if( pgno>(Pgno)pPager->dbSize || sqlite3BitvecTest(pDone, pgno) ){ 1210968af52aSdrh return SQLITE_OK; 1211968af52aSdrh } 1212c13148ffSdrh if( isMainJrnl ){ 1213d6e5e098Sdrh rc = read32bits(jfd, (*pOffset)-4, &cksum); 121499ee3600Sdrh if( rc ) return rc; 1215ecfef985Sdanielk1977 if( !isSavepnt && pager_cksum(pPager, aData)!=cksum ){ 1216968af52aSdrh return SQLITE_DONE; 1217968af52aSdrh } 1218968af52aSdrh } 1219fd7f0452Sdanielk1977 if( pDone && (rc = sqlite3BitvecSet(pDone, pgno)) ){ 1220fd7f0452Sdanielk1977 return rc; 1221fd7f0452Sdanielk1977 } 1222fa86c412Sdrh 1223aa5ccdf5Sdanielk1977 assert( pPager->state==PAGER_RESERVED || pPager->state>=PAGER_EXCLUSIVE ); 1224a3f3a5f3Sdanielk1977 1225a3f3a5f3Sdanielk1977 /* If the pager is in RESERVED state, then there must be a copy of this 1226a3f3a5f3Sdanielk1977 ** page in the pager cache. In this case just update the pager cache, 12270de0bb33Sdanielk1977 ** not the database file. The page is left marked dirty in this case. 12280de0bb33Sdanielk1977 ** 12292df71c74Sdanielk1977 ** An exception to the above rule: If the database is in no-sync mode 12302df71c74Sdanielk1977 ** and a page is moved during an incremental vacuum then the page may 1231369f3a05Sdanielk1977 ** not be in the pager cache. Later: if a malloc() or IO error occurs 1232369f3a05Sdanielk1977 ** during a Movepage() call, then the page may not be in the cache 1233369f3a05Sdanielk1977 ** either. So the condition described in the above paragraph is not 1234369f3a05Sdanielk1977 ** assert()able. 12352df71c74Sdanielk1977 ** 1236a3f3a5f3Sdanielk1977 ** If in EXCLUSIVE state, then we update the pager cache if it exists 1237a3f3a5f3Sdanielk1977 ** and the main file. The page is then marked not dirty. 12389636284eSdrh ** 12399636284eSdrh ** Ticket #1171: The statement journal might contain page content that is 12409636284eSdrh ** different from the page content at the start of the transaction. 12419636284eSdrh ** This occurs when a page is changed prior to the start of a statement 12429636284eSdrh ** then changed again within the statement. When rolling back such a 12439636284eSdrh ** statement we must not write to the original database unless we know 12445e385311Sdrh ** for certain that original page contents are synced into the main rollback 12455e385311Sdrh ** journal. Otherwise, a power loss might leave modified data in the 12465e385311Sdrh ** database file without an entry in the rollback journal that can 12475e385311Sdrh ** restore the database to its original form. Two conditions must be 12485e385311Sdrh ** met before writing to the database files. (1) the database must be 12495e385311Sdrh ** locked. (2) we know that the original page content is fully synced 12505e385311Sdrh ** in the main journal either because the page is not in cache or else 12515e385311Sdrh ** the page is marked as needSync==0. 12524c02a235Sdrh ** 12534c02a235Sdrh ** 2008-04-14: When attempting to vacuum a corrupt database file, it 12544c02a235Sdrh ** is possible to fail a statement on a database that does not yet exist. 12554c02a235Sdrh ** Do not attempt to write if database file has never been opened. 1256fa86c412Sdrh */ 1257ae2b40c4Sdrh pPg = pager_lookup(pPager, pgno); 125830d53701Sdrh PAGERTRACE(("PLAYBACK %d page %d hash(%08x) %s\n", 1259ecfef985Sdanielk1977 PAGERID(pPager), pgno, pager_datahash(pPager->pageSize, aData), 1260ecfef985Sdanielk1977 (isMainJrnl?"main-journal":"sub-journal") 126130d53701Sdrh )); 12628c0a791aSdanielk1977 if( (pPager->state>=PAGER_EXCLUSIVE) 12638c0a791aSdanielk1977 && (pPg==0 || 0==(pPg->flags&PGHDR_NEED_SYNC)) 12648c0a791aSdanielk1977 && (pPager->fd->pMethods) 12658c0a791aSdanielk1977 ){ 1266281b21daSdrh i64 ofst = (pgno-1)*(i64)pPager->pageSize; 1267281b21daSdrh rc = sqlite3OsWrite(pPager->fd, aData, pPager->pageSize, ofst); 12683460d19cSdanielk1977 if( pgno>pPager->dbFileSize ){ 12693460d19cSdanielk1977 pPager->dbFileSize = pgno; 12703460d19cSdanielk1977 } 1271f2c31ad8Sdanielk1977 }else if( !isMainJrnl && pPg==0 ){ 1272f2c31ad8Sdanielk1977 /* If this is a rollback of a savepoint and data was not written to 1273f2c31ad8Sdanielk1977 ** the database and the page is not in-memory, there is a potential 1274f2c31ad8Sdanielk1977 ** problem. When the page is next fetched by the b-tree layer, it 1275f2c31ad8Sdanielk1977 ** will be read from the database file, which may or may not be 1276f2c31ad8Sdanielk1977 ** current. 1277f2c31ad8Sdanielk1977 ** 1278f2c31ad8Sdanielk1977 ** There are a couple of different ways this can happen. All are quite 1279401b65edSdanielk1977 ** obscure. When running in synchronous mode, this can only happen 1280f2c31ad8Sdanielk1977 ** if the page is on the free-list at the start of the transaction, then 1281f2c31ad8Sdanielk1977 ** populated, then moved using sqlite3PagerMovepage(). 1282f2c31ad8Sdanielk1977 ** 1283f2c31ad8Sdanielk1977 ** The solution is to add an in-memory page to the cache containing 1284f2c31ad8Sdanielk1977 ** the data just read from the sub-journal. Mark the page as dirty 1285f2c31ad8Sdanielk1977 ** and if the pager requires a journal-sync, then mark the page as 1286f2c31ad8Sdanielk1977 ** requiring a journal-sync before it is written. 1287f2c31ad8Sdanielk1977 */ 1288f2c31ad8Sdanielk1977 assert( isSavepnt ); 1289f2c31ad8Sdanielk1977 if( (rc = sqlite3PagerAcquire(pPager, pgno, &pPg, 1)) ){ 1290f2c31ad8Sdanielk1977 return rc; 1291f2c31ad8Sdanielk1977 } 1292f2c31ad8Sdanielk1977 pPg->flags &= ~PGHDR_NEED_READ; 1293f2c31ad8Sdanielk1977 sqlite3PcacheMakeDirty(pPg); 1294a3f3a5f3Sdanielk1977 } 1295fa86c412Sdrh if( pPg ){ 12962812956bSdanielk1977 /* No page should ever be explicitly rolled back that is in use, except 12972812956bSdanielk1977 ** for page 1 which is held in use in order to keep the lock on the 12982812956bSdanielk1977 ** database active. However such a page may be rolled back as a result 12992812956bSdanielk1977 ** of an internal error resulting in an automatic call to 13003b8a05f6Sdanielk1977 ** sqlite3PagerRollback(). 13013a84069dSdrh */ 1302b6f41486Sdrh void *pData; 13038c0a791aSdanielk1977 pData = pPg->pData; 1304ae2b40c4Sdrh memcpy(pData, aData, pPager->pageSize); 13059038bb64Sdanielk1977 if( pPager->xReiniter ){ 1306eaa06f69Sdanielk1977 pPager->xReiniter(pPg); 1307de647130Sdrh } 1308ecfef985Sdanielk1977 if( isMainJrnl && (!isSavepnt || pPager->journalOff<=pPager->journalHdr) ){ 1309488af099Sdanielk1977 /* If the contents of this page were just restored from the main 1310488af099Sdanielk1977 ** journal file, then its content must be as they were when the 1311488af099Sdanielk1977 ** transaction was first opened. In this case we can mark the page 1312488af099Sdanielk1977 ** as clean, since there will be no need to write it out to the. 1313488af099Sdanielk1977 ** 1314488af099Sdanielk1977 ** There is one exception to this rule. If the page is being rolled 1315488af099Sdanielk1977 ** back as part of a savepoint (or statement) rollback from an 1316488af099Sdanielk1977 ** unsynced portion of the main journal file, then it is not safe 1317488af099Sdanielk1977 ** to mark the page as clean. This is because marking the page as 1318488af099Sdanielk1977 ** clean will clear the PGHDR_NEED_SYNC flag. Since the page is 1319488af099Sdanielk1977 ** already in the journal file (recorded in Pager.pInJournal) and 1320488af099Sdanielk1977 ** the PGHDR_NEED_SYNC flag is cleared, if the page is written to 1321488af099Sdanielk1977 ** again within this transaction, it will be marked as dirty but 1322488af099Sdanielk1977 ** the PGHDR_NEED_SYNC flag will not be set. It could then potentially 1323488af099Sdanielk1977 ** be written out into the database file before its journal file 1324488af099Sdanielk1977 ** segment is synced. If a crash occurs during or following this, 1325488af099Sdanielk1977 ** database corruption may ensue. 1326488af099Sdanielk1977 */ 1327c047b9f7Sdrh sqlite3PcacheMakeClean(pPg); 1328c047b9f7Sdrh } 13293c407374Sdanielk1977 #ifdef SQLITE_CHECK_PAGES 13303c407374Sdanielk1977 pPg->pageHash = pager_pagehash(pPg); 13313c407374Sdanielk1977 #endif 133286a88114Sdrh /* If this was page 1, then restore the value of Pager.dbFileVers. 133386a88114Sdrh ** Do this before any decoding. */ 133441483468Sdanielk1977 if( pgno==1 ){ 133586a88114Sdrh memcpy(&pPager->dbFileVers, &((u8*)pData)[24],sizeof(pPager->dbFileVers)); 133641483468Sdanielk1977 } 133786a88114Sdrh 133886a88114Sdrh /* Decode the page just read from disk */ 133986a88114Sdrh CODEC1(pPager, pData, pPg->pgno, 3); 13408c0a791aSdanielk1977 sqlite3PcacheRelease(pPg); 1341fa86c412Sdrh } 1342fa86c412Sdrh return rc; 1343fa86c412Sdrh } 1344fa86c412Sdrh 1345ee03d629Sdrh #if !defined(NDEBUG) || defined(SQLITE_COVERAGE_TEST) 1346d6e5e098Sdrh /* 1347d6e5e098Sdrh ** This routine looks ahead into the main journal file and determines 1348d6e5e098Sdrh ** whether or not the next record (the record that begins at file 1349d6e5e098Sdrh ** offset pPager->journalOff) is a well-formed page record consisting 1350d6e5e098Sdrh ** of a valid page number, pPage->pageSize bytes of content, followed 1351d6e5e098Sdrh ** by a valid checksum. 1352d6e5e098Sdrh ** 1353d6e5e098Sdrh ** The pager never needs to know this in order to do its job. This 1354d6e5e098Sdrh ** routine is only used from with assert() and testcase() macros. 1355d6e5e098Sdrh */ 1356d6e5e098Sdrh static int pagerNextJournalPageIsValid(Pager *pPager){ 1357d6e5e098Sdrh Pgno pgno; /* The page number of the page */ 1358d6e5e098Sdrh u32 cksum; /* The page checksum */ 1359d6e5e098Sdrh int rc; /* Return code from read operations */ 1360d6e5e098Sdrh sqlite3_file *fd; /* The file descriptor from which we are reading */ 1361d6e5e098Sdrh u8 *aData; /* Content of the page */ 1362d6e5e098Sdrh 1363d6e5e098Sdrh /* Read the page number header */ 1364d6e5e098Sdrh fd = pPager->jfd; 1365d6e5e098Sdrh rc = read32bits(fd, pPager->journalOff, &pgno); 1366d6e5e098Sdrh if( rc!=SQLITE_OK ){ return 0; } /*NO_TEST*/ 1367d6e5e098Sdrh if( pgno==0 || pgno==PAGER_MJ_PGNO(pPager) ){ return 0; } /*NO_TEST*/ 1368d6e5e098Sdrh if( pgno>(Pgno)pPager->dbSize ){ return 0; } /*NO_TEST*/ 1369d6e5e098Sdrh 1370d6e5e098Sdrh /* Read the checksum */ 1371d6e5e098Sdrh rc = read32bits(fd, pPager->journalOff+pPager->pageSize+4, &cksum); 1372d6e5e098Sdrh if( rc!=SQLITE_OK ){ return 0; } /*NO_TEST*/ 1373d6e5e098Sdrh 1374d6e5e098Sdrh /* Read the data and verify the checksum */ 1375d6e5e098Sdrh aData = (u8*)pPager->pTmpSpace; 1376d6e5e098Sdrh rc = sqlite3OsRead(fd, aData, pPager->pageSize, pPager->journalOff+4); 1377d6e5e098Sdrh if( rc!=SQLITE_OK ){ return 0; } /*NO_TEST*/ 1378d6e5e098Sdrh if( pager_cksum(pPager, aData)!=cksum ){ return 0; } /*NO_TEST*/ 1379d6e5e098Sdrh 1380d6e5e098Sdrh /* Reach this point only if the page is valid */ 1381d6e5e098Sdrh return 1; 1382d6e5e098Sdrh } 1383d6e5e098Sdrh #endif /* !defined(NDEBUG) || defined(SQLITE_COVERAGE_TEST) */ 1384d6e5e098Sdrh 1385fa86c412Sdrh /* 138613adf8a0Sdanielk1977 ** Parameter zMaster is the name of a master journal file. A single journal 138713adf8a0Sdanielk1977 ** file that referred to the master journal file has just been rolled back. 138813adf8a0Sdanielk1977 ** This routine checks if it is possible to delete the master journal file, 138913adf8a0Sdanielk1977 ** and does so if it is. 1390726de599Sdrh ** 139165839c6aSdanielk1977 ** Argument zMaster may point to Pager.pTmpSpace. So that buffer is not 139265839c6aSdanielk1977 ** available for use within this function. 139365839c6aSdanielk1977 ** 139465839c6aSdanielk1977 ** 1395726de599Sdrh ** The master journal file contains the names of all child journals. 1396726de599Sdrh ** To tell if a master journal can be deleted, check to each of the 1397726de599Sdrh ** children. If all children are either missing or do not refer to 1398726de599Sdrh ** a different master journal, then this master journal can be deleted. 139913adf8a0Sdanielk1977 */ 1400b4b47411Sdanielk1977 static int pager_delmaster(Pager *pPager, const char *zMaster){ 1401b4b47411Sdanielk1977 sqlite3_vfs *pVfs = pPager->pVfs; 140213adf8a0Sdanielk1977 int rc; 140313adf8a0Sdanielk1977 int master_open = 0; 1404b4b47411Sdanielk1977 sqlite3_file *pMaster; 1405b4b47411Sdanielk1977 sqlite3_file *pJournal; 140613adf8a0Sdanielk1977 char *zMasterJournal = 0; /* Contents of master journal file */ 1407eb206256Sdrh i64 nMasterJournal; /* Size of master journal file */ 140813adf8a0Sdanielk1977 140913adf8a0Sdanielk1977 /* Open the master journal file exclusively in case some other process 141013adf8a0Sdanielk1977 ** is running this routine also. Not that it makes too much difference. 141113adf8a0Sdanielk1977 */ 1412e5ae5735Sdrh pMaster = (sqlite3_file *)sqlite3Malloc(pVfs->szOsFile * 2); 1413fee2d25aSdanielk1977 pJournal = (sqlite3_file *)(((u8 *)pMaster) + pVfs->szOsFile); 1414b4b47411Sdanielk1977 if( !pMaster ){ 1415b4b47411Sdanielk1977 rc = SQLITE_NOMEM; 1416b4b47411Sdanielk1977 }else{ 1417fee2d25aSdanielk1977 int flags = (SQLITE_OPEN_READONLY|SQLITE_OPEN_MASTER_JOURNAL); 1418fee2d25aSdanielk1977 rc = sqlite3OsOpen(pVfs, zMaster, pMaster, flags, 0); 1419b4b47411Sdanielk1977 } 142013adf8a0Sdanielk1977 if( rc!=SQLITE_OK ) goto delmaster_out; 142113adf8a0Sdanielk1977 master_open = 1; 1422b4b47411Sdanielk1977 1423b4b47411Sdanielk1977 rc = sqlite3OsFileSize(pMaster, &nMasterJournal); 142413adf8a0Sdanielk1977 if( rc!=SQLITE_OK ) goto delmaster_out; 142513adf8a0Sdanielk1977 142613adf8a0Sdanielk1977 if( nMasterJournal>0 ){ 14275865e3d5Sdanielk1977 char *zJournal; 14287657240aSdanielk1977 char *zMasterPtr = 0; 142965839c6aSdanielk1977 int nMasterPtr = pPager->pVfs->mxPathname+1; 14305865e3d5Sdanielk1977 14315865e3d5Sdanielk1977 /* Load the entire master journal file into space obtained from 143217435752Sdrh ** sqlite3_malloc() and pointed to by zMasterJournal. 14335865e3d5Sdanielk1977 */ 14344f21c4afSdrh zMasterJournal = (char *)sqlite3Malloc((int)nMasterJournal + nMasterPtr); 143513adf8a0Sdanielk1977 if( !zMasterJournal ){ 143613adf8a0Sdanielk1977 rc = SQLITE_NOMEM; 143713adf8a0Sdanielk1977 goto delmaster_out; 143813adf8a0Sdanielk1977 } 143965839c6aSdanielk1977 zMasterPtr = &zMasterJournal[nMasterJournal]; 14404f21c4afSdrh rc = sqlite3OsRead(pMaster, zMasterJournal, (int)nMasterJournal, 0); 144113adf8a0Sdanielk1977 if( rc!=SQLITE_OK ) goto delmaster_out; 144213adf8a0Sdanielk1977 14435865e3d5Sdanielk1977 zJournal = zMasterJournal; 14445865e3d5Sdanielk1977 while( (zJournal-zMasterJournal)<nMasterJournal ){ 1445861f7456Sdanielk1977 int exists; 1446861f7456Sdanielk1977 rc = sqlite3OsAccess(pVfs, zJournal, SQLITE_ACCESS_EXISTS, &exists); 1447861f7456Sdanielk1977 if( rc!=SQLITE_OK ){ 144819db9352Sdrh goto delmaster_out; 144919db9352Sdrh } 1450861f7456Sdanielk1977 if( exists ){ 145113adf8a0Sdanielk1977 /* One of the journals pointed to by the master journal exists. 145213adf8a0Sdanielk1977 ** Open it and check if it points at the master journal. If 145313adf8a0Sdanielk1977 ** so, return without deleting the master journal file. 145413adf8a0Sdanielk1977 */ 14553b7b78b3Sdrh int c; 1456fee2d25aSdanielk1977 int flags = (SQLITE_OPEN_READONLY|SQLITE_OPEN_MAIN_JOURNAL); 1457fee2d25aSdanielk1977 rc = sqlite3OsOpen(pVfs, zJournal, pJournal, flags, 0); 145813adf8a0Sdanielk1977 if( rc!=SQLITE_OK ){ 145913adf8a0Sdanielk1977 goto delmaster_out; 146013adf8a0Sdanielk1977 } 14619eed5057Sdanielk1977 146265839c6aSdanielk1977 rc = readMasterJournal(pJournal, zMasterPtr, nMasterPtr); 1463b4b47411Sdanielk1977 sqlite3OsClose(pJournal); 14649eed5057Sdanielk1977 if( rc!=SQLITE_OK ){ 14659eed5057Sdanielk1977 goto delmaster_out; 14669eed5057Sdanielk1977 } 146713adf8a0Sdanielk1977 146865839c6aSdanielk1977 c = zMasterPtr[0]!=0 && strcmp(zMasterPtr, zMaster)==0; 14693b7b78b3Sdrh if( c ){ 147013adf8a0Sdanielk1977 /* We have a match. Do not delete the master journal file. */ 147113adf8a0Sdanielk1977 goto delmaster_out; 147213adf8a0Sdanielk1977 } 147313adf8a0Sdanielk1977 } 1474ea678832Sdrh zJournal += (sqlite3Strlen30(zJournal)+1); 147513adf8a0Sdanielk1977 } 147613adf8a0Sdanielk1977 } 147713adf8a0Sdanielk1977 1478fee2d25aSdanielk1977 rc = sqlite3OsDelete(pVfs, zMaster, 0); 147913adf8a0Sdanielk1977 148013adf8a0Sdanielk1977 delmaster_out: 148113adf8a0Sdanielk1977 if( zMasterJournal ){ 148217435752Sdrh sqlite3_free(zMasterJournal); 148313adf8a0Sdanielk1977 } 148413adf8a0Sdanielk1977 if( master_open ){ 1485b4b47411Sdanielk1977 sqlite3OsClose(pMaster); 148613adf8a0Sdanielk1977 } 1487b4b47411Sdanielk1977 sqlite3_free(pMaster); 148813adf8a0Sdanielk1977 return rc; 148913adf8a0Sdanielk1977 } 149013adf8a0Sdanielk1977 1491a6abd041Sdrh 1492a6abd041Sdrh /* 1493f90b7260Sdanielk1977 ** If the main database file is open and an exclusive lock is held, 1494f90b7260Sdanielk1977 ** truncate the main file of the given pager to the specified number 1495f90b7260Sdanielk1977 ** of pages. 14967fe3f7e9Sdrh ** 1497f90b7260Sdanielk1977 ** It might might be the case that the file on disk is smaller than nPage. 14987fe3f7e9Sdrh ** This can happen, for example, if we are in the middle of a transaction 14997fe3f7e9Sdrh ** which has extended the file size and the new pages are still all held 15007fe3f7e9Sdrh ** in cache, then an INSERT or UPDATE does a statement rollback. Some 15017fe3f7e9Sdrh ** operating system implementations can get confused if you try to 15027fe3f7e9Sdrh ** truncate a file to some size that is larger than it currently is, 150306e11af9Sdanielk1977 ** so detect this case and write a single zero byte to the end of the new 150406e11af9Sdanielk1977 ** file instead. 1505cb4c40baSdrh */ 1506d92db531Sdanielk1977 static int pager_truncate(Pager *pPager, Pgno nPage){ 1507e180dd93Sdanielk1977 int rc = SQLITE_OK; 15087a2b1eebSdanielk1977 if( pPager->state>=PAGER_EXCLUSIVE && pPager->fd->pMethods ){ 15097fe3f7e9Sdrh i64 currentSize, newSize; 15107fe3f7e9Sdrh rc = sqlite3OsFileSize(pPager->fd, ¤tSize); 15117fe3f7e9Sdrh newSize = pPager->pageSize*(i64)nPage; 151206e11af9Sdanielk1977 if( rc==SQLITE_OK && currentSize!=newSize ){ 151306e11af9Sdanielk1977 if( currentSize>newSize ){ 15147fe3f7e9Sdrh rc = sqlite3OsTruncate(pPager->fd, newSize); 151506e11af9Sdanielk1977 }else{ 151606e11af9Sdanielk1977 rc = sqlite3OsWrite(pPager->fd, "", 1, newSize-1); 151706e11af9Sdanielk1977 } 15183460d19cSdanielk1977 if( rc==SQLITE_OK ){ 15193460d19cSdanielk1977 pPager->dbFileSize = nPage; 15203460d19cSdanielk1977 } 15217fe3f7e9Sdrh } 1522e180dd93Sdanielk1977 } 1523e180dd93Sdanielk1977 return rc; 1524cb4c40baSdrh } 1525cb4c40baSdrh 1526cb4c40baSdrh /* 1527c80f058dSdrh ** Set the sectorSize for the given pager. 1528c80f058dSdrh ** 1529334c80d6Sdrh ** The sector size is at least as big as the sector size reported 1530334c80d6Sdrh ** by sqlite3OsSectorSize(). The minimum sector size is 512. 1531c80f058dSdrh */ 1532c80f058dSdrh static void setSectorSize(Pager *pPager){ 15337a2b1eebSdanielk1977 assert(pPager->fd->pMethods||pPager->tempFile); 15347a2b1eebSdanielk1977 if( !pPager->tempFile ){ 15357a2b1eebSdanielk1977 /* Sector size doesn't matter for temporary files. Also, the file 15367a2b1eebSdanielk1977 ** may not have been opened yet, in whcih case the OsSectorSize() 15377a2b1eebSdanielk1977 ** call will segfault. 15387a2b1eebSdanielk1977 */ 1539c80f058dSdrh pPager->sectorSize = sqlite3OsSectorSize(pPager->fd); 15407a2b1eebSdanielk1977 } 1541334c80d6Sdrh if( pPager->sectorSize<512 ){ 1542334c80d6Sdrh pPager->sectorSize = 512; 1543c80f058dSdrh } 15447cbd589dSdanielk1977 if( pPager->sectorSize>MAX_SECTOR_SIZE ){ 15457cbd589dSdanielk1977 pPager->sectorSize = MAX_SECTOR_SIZE; 15467cbd589dSdanielk1977 } 1547c80f058dSdrh } 1548c80f058dSdrh 1549c80f058dSdrh /* 1550ed7c855cSdrh ** Playback the journal and thus restore the database file to 1551ed7c855cSdrh ** the state it was in before we started making changes. 1552ed7c855cSdrh ** 155334e79ceeSdrh ** The journal file format is as follows: 155434e79ceeSdrh ** 1555ae2b40c4Sdrh ** (1) 8 byte prefix. A copy of aJournalMagic[]. 1556ae2b40c4Sdrh ** (2) 4 byte big-endian integer which is the number of valid page records 155734e79ceeSdrh ** in the journal. If this value is 0xffffffff, then compute the 1558ae2b40c4Sdrh ** number of page records from the journal size. 1559ae2b40c4Sdrh ** (3) 4 byte big-endian integer which is the initial value for the 1560ae2b40c4Sdrh ** sanity checksum. 1561ae2b40c4Sdrh ** (4) 4 byte integer which is the number of pages to truncate the 156234e79ceeSdrh ** database to during a rollback. 1563334c80d6Sdrh ** (5) 4 byte big-endian integer which is the sector size. The header 1564334c80d6Sdrh ** is this many bytes in size. 1565334c80d6Sdrh ** (6) 4 byte big-endian integer which is the page case. 1566334c80d6Sdrh ** (7) 4 byte integer which is the number of bytes in the master journal 1567ae2b40c4Sdrh ** name. The value may be zero (indicate that there is no master 1568ae2b40c4Sdrh ** journal.) 1569334c80d6Sdrh ** (8) N bytes of the master journal name. The name will be nul-terminated 1570ae2b40c4Sdrh ** and might be shorter than the value read from (5). If the first byte 1571ae2b40c4Sdrh ** of the name is \000 then there is no master journal. The master 1572ae2b40c4Sdrh ** journal name is stored in UTF-8. 1573334c80d6Sdrh ** (9) Zero or more pages instances, each as follows: 157434e79ceeSdrh ** + 4 byte page number. 1575ae2b40c4Sdrh ** + pPager->pageSize bytes of data. 1576ae2b40c4Sdrh ** + 4 byte checksum 157734e79ceeSdrh ** 1578334c80d6Sdrh ** When we speak of the journal header, we mean the first 8 items above. 1579334c80d6Sdrh ** Each entry in the journal is an instance of the 9th item. 158034e79ceeSdrh ** 158134e79ceeSdrh ** Call the value from the second bullet "nRec". nRec is the number of 158234e79ceeSdrh ** valid page entries in the journal. In most cases, you can compute the 158334e79ceeSdrh ** value of nRec from the size of the journal file. But if a power 158434e79ceeSdrh ** failure occurred while the journal was being written, it could be the 158534e79ceeSdrh ** case that the size of the journal file had already been increased but 158634e79ceeSdrh ** the extra entries had not yet made it safely to disk. In such a case, 158734e79ceeSdrh ** the value of nRec computed from the file size would be too large. For 158834e79ceeSdrh ** that reason, we always use the nRec value in the header. 158934e79ceeSdrh ** 159034e79ceeSdrh ** If the nRec value is 0xffffffff it means that nRec should be computed 159134e79ceeSdrh ** from the file size. This value is used when the user selects the 159234e79ceeSdrh ** no-sync option for the journal. A power failure could lead to corruption 159334e79ceeSdrh ** in this case. But for things like temporary table (which will be 159434e79ceeSdrh ** deleted when the power is restored) we don't care. 159534e79ceeSdrh ** 1596d9b0257aSdrh ** If the file opened as the journal file is not a well-formed 1597ece80f1eSdanielk1977 ** journal file then all pages up to the first corrupted page are rolled 1598ece80f1eSdanielk1977 ** back (or no pages if the journal header is corrupted). The journal file 1599ece80f1eSdanielk1977 ** is then deleted and SQLITE_OK returned, just as if no corruption had 1600ece80f1eSdanielk1977 ** been encountered. 1601ece80f1eSdanielk1977 ** 1602ece80f1eSdanielk1977 ** If an I/O or malloc() error occurs, the journal-file is not deleted 1603ece80f1eSdanielk1977 ** and an error code is returned. 1604ed7c855cSdrh */ 1605e277be05Sdanielk1977 static int pager_playback(Pager *pPager, int isHot){ 1606b4b47411Sdanielk1977 sqlite3_vfs *pVfs = pPager->pVfs; 1607eb206256Sdrh i64 szJ; /* Size of the journal file in bytes */ 1608c3e8f5efSdanielk1977 u32 nRec; /* Number of Records in the journal */ 16090b8d2766Sshane u32 u; /* Unsigned loop counter */ 1610ed7c855cSdrh Pgno mxPg = 0; /* Size of the original file in pages */ 1611ae2b40c4Sdrh int rc; /* Result code of a subroutine */ 1612861f7456Sdanielk1977 int res = 1; /* Value returned by sqlite3OsAccess() */ 161313adf8a0Sdanielk1977 char *zMaster = 0; /* Name of master journal file if any */ 1614ed7c855cSdrh 1615c3a64ba0Sdrh /* Figure out how many records are in the journal. Abort early if 1616c3a64ba0Sdrh ** the journal is empty. 1617ed7c855cSdrh */ 16188cfbf08fSdrh assert( pPager->journalOpen ); 1619054889ecSdrh rc = sqlite3OsFileSize(pPager->jfd, &szJ); 1620334cdb63Sdanielk1977 if( rc!=SQLITE_OK || szJ==0 ){ 1621c3a64ba0Sdrh goto end_playback; 1622c3a64ba0Sdrh } 1623240c5795Sdrh 16247657240aSdanielk1977 /* Read the master journal name from the journal, if it is present. 16257657240aSdanielk1977 ** If a master journal file name is specified, but the file is not 16267657240aSdanielk1977 ** present on disk, then the journal is not hot and does not need to be 16277657240aSdanielk1977 ** played back. 1628240c5795Sdrh */ 162965839c6aSdanielk1977 zMaster = pPager->pTmpSpace; 163065839c6aSdanielk1977 rc = readMasterJournal(pPager->jfd, zMaster, pPager->pVfs->mxPathname+1); 1631861f7456Sdanielk1977 if( rc==SQLITE_OK && zMaster[0] ){ 1632861f7456Sdanielk1977 rc = sqlite3OsAccess(pVfs, zMaster, SQLITE_ACCESS_EXISTS, &res); 16337657240aSdanielk1977 } 163465839c6aSdanielk1977 zMaster = 0; 1635861f7456Sdanielk1977 if( rc!=SQLITE_OK || !res ){ 1636ce98bba2Sdanielk1977 goto end_playback; 1637ce98bba2Sdanielk1977 } 1638ce98bba2Sdanielk1977 pPager->journalOff = 0; 16397657240aSdanielk1977 16407657240aSdanielk1977 /* This loop terminates either when the readJournalHdr() call returns 16417657240aSdanielk1977 ** SQLITE_DONE or an IO error occurs. */ 16427657240aSdanielk1977 while( 1 ){ 16437657240aSdanielk1977 16447657240aSdanielk1977 /* Read the next journal header from the journal file. If there are 16457657240aSdanielk1977 ** not enough bytes left in the journal file for a complete header, or 16467657240aSdanielk1977 ** it is corrupted, then a process must of failed while writing it. 16477657240aSdanielk1977 ** This indicates nothing more needs to be rolled back. 16487657240aSdanielk1977 */ 16497657240aSdanielk1977 rc = readJournalHdr(pPager, szJ, &nRec, &mxPg); 16507657240aSdanielk1977 if( rc!=SQLITE_OK ){ 16517657240aSdanielk1977 if( rc==SQLITE_DONE ){ 16527657240aSdanielk1977 rc = SQLITE_OK; 16537657240aSdanielk1977 } 1654c3a64ba0Sdrh goto end_playback; 1655c3a64ba0Sdrh } 1656c3a64ba0Sdrh 16577657240aSdanielk1977 /* If nRec is 0xffffffff, then this journal was created by a process 16587657240aSdanielk1977 ** working in no-sync mode. This means that the rest of the journal 16597657240aSdanielk1977 ** file consists of pages, there are no more journal headers. Compute 16607657240aSdanielk1977 ** the value of nRec based on this assumption. 16617657240aSdanielk1977 */ 16627657240aSdanielk1977 if( nRec==0xffffffff ){ 16637657240aSdanielk1977 assert( pPager->journalOff==JOURNAL_HDR_SZ(pPager) ); 16644f21c4afSdrh nRec = (int)((szJ - JOURNAL_HDR_SZ(pPager))/JOURNAL_PG_SZ(pPager)); 166513adf8a0Sdanielk1977 } 166613adf8a0Sdanielk1977 1667e277be05Sdanielk1977 /* If nRec is 0 and this rollback is of a transaction created by this 16688940f4eeSdrh ** process and if this is the final header in the journal, then it means 16698940f4eeSdrh ** that this part of the journal was being filled but has not yet been 16708940f4eeSdrh ** synced to disk. Compute the number of pages based on the remaining 16718940f4eeSdrh ** size of the file. 16728940f4eeSdrh ** 16738940f4eeSdrh ** The third term of the test was added to fix ticket #2565. 1674d6e5e098Sdrh ** When rolling back a hot journal, nRec==0 always means that the next 1675d6e5e098Sdrh ** chunk of the journal contains zero pages to be rolled back. But 1676d6e5e098Sdrh ** when doing a ROLLBACK and the nRec==0 chunk is the last chunk in 1677d6e5e098Sdrh ** the journal, it means that the journal might contain additional 1678d6e5e098Sdrh ** pages that need to be rolled back and that the number of pages 1679d6e5e098Sdrh ** should be computed based on the journal file size. 1680e277be05Sdanielk1977 */ 16814fd18c4bSdrh testcase( nRec==0 && !isHot 1682d6e5e098Sdrh && pPager->journalHdr+JOURNAL_HDR_SZ(pPager)!=pPager->journalOff 1683d6e5e098Sdrh && ((szJ - pPager->journalOff) / JOURNAL_PG_SZ(pPager))>0 16844fd18c4bSdrh && pagerNextJournalPageIsValid(pPager) 1685d6e5e098Sdrh ); 16868940f4eeSdrh if( nRec==0 && !isHot && 16878940f4eeSdrh pPager->journalHdr+JOURNAL_HDR_SZ(pPager)==pPager->journalOff ){ 16884f21c4afSdrh nRec = (int)((szJ - pPager->journalOff) / JOURNAL_PG_SZ(pPager)); 1689e277be05Sdanielk1977 } 1690e277be05Sdanielk1977 16917657240aSdanielk1977 /* If this is the first header read from the journal, truncate the 169285b623f2Sdrh ** database file back to its original size. 16937657240aSdanielk1977 */ 1694e180dd93Sdanielk1977 if( pPager->journalOff==JOURNAL_HDR_SZ(pPager) ){ 1695cb4c40baSdrh rc = pager_truncate(pPager, mxPg); 169681a20f21Sdrh if( rc!=SQLITE_OK ){ 169781a20f21Sdrh goto end_playback; 169881a20f21Sdrh } 1699f90b7260Sdanielk1977 pPager->dbSize = mxPg; 17007657240aSdanielk1977 } 17017657240aSdanielk1977 1702fa86c412Sdrh /* Copy original pages out of the journal and back into the database file. 1703ed7c855cSdrh */ 17040b8d2766Sshane for(u=0; u<nRec; u++){ 1705d6e5e098Sdrh rc = pager_playback_one_page(pPager, 1, &pPager->journalOff, 0, 0); 1706968af52aSdrh if( rc!=SQLITE_OK ){ 1707968af52aSdrh if( rc==SQLITE_DONE ){ 1708968af52aSdrh rc = SQLITE_OK; 17097657240aSdanielk1977 pPager->journalOff = szJ; 1710968af52aSdrh break; 17117657240aSdanielk1977 }else{ 1712a9625eaeSdrh /* If we are unable to rollback, then the database is probably 1713a9625eaeSdrh ** going to end up being corrupt. It is corrupt to us, anyhow. 1714a9625eaeSdrh ** Perhaps the next process to come along can fix it.... 1715a9625eaeSdrh */ 171698c21903Sdanielk1977 rc = SQLITE_CORRUPT_BKPT; 17177657240aSdanielk1977 goto end_playback; 17187657240aSdanielk1977 } 17197657240aSdanielk1977 } 1720968af52aSdrh } 1721d9b0257aSdrh } 1722580eeaf3Sdrh /*NOTREACHED*/ 1723580eeaf3Sdrh assert( 0 ); 17244a0681efSdrh 17254a0681efSdrh end_playback: 17268191bff0Sdanielk1977 if( rc==SQLITE_OK ){ 172765839c6aSdanielk1977 zMaster = pPager->pTmpSpace; 172865839c6aSdanielk1977 rc = readMasterJournal(pPager->jfd, zMaster, pPager->pVfs->mxPathname+1); 172965839c6aSdanielk1977 } 173065839c6aSdanielk1977 if( rc==SQLITE_OK ){ 1731df2566a3Sdanielk1977 rc = pager_end_transaction(pPager, zMaster[0]!='\0'); 17328191bff0Sdanielk1977 } 1733c56774e2Sdanielk1977 if( rc==SQLITE_OK && zMaster[0] && res ){ 1734979f38e5Sdanielk1977 /* If there was a master journal and this routine will return success, 173532554c10Sdanielk1977 ** see if it is possible to delete the master journal. 173613adf8a0Sdanielk1977 */ 1737b4b47411Sdanielk1977 rc = pager_delmaster(pPager, zMaster); 173813adf8a0Sdanielk1977 } 17397657240aSdanielk1977 17407657240aSdanielk1977 /* The Pager.sectorSize variable may have been updated while rolling 17413ceeb756Sdrh ** back a journal created by a process with a different sector size 17427657240aSdanielk1977 ** value. Reset it to the correct value for this process. 17437657240aSdanielk1977 */ 1744c80f058dSdrh setSectorSize(pPager); 1745d9b0257aSdrh return rc; 1746ed7c855cSdrh } 1747ed7c855cSdrh 1748ed7c855cSdrh /* 1749d6e5e098Sdrh ** Playback savepoint pSavepoint. Or, if pSavepoint==NULL, then playback 1750d6e5e098Sdrh ** the entire master journal file. 1751d6e5e098Sdrh ** 1752d6e5e098Sdrh ** The case pSavepoint==NULL occurs when a ROLLBACK TO command is invoked 1753d6e5e098Sdrh ** on a SAVEPOINT that is a transaction savepoint. 1754fa86c412Sdrh */ 1755fd7f0452Sdanielk1977 static int pagerPlaybackSavepoint(Pager *pPager, PagerSavepoint *pSavepoint){ 1756d6e5e098Sdrh i64 szJ; /* Effective size of the main journal */ 1757fd7f0452Sdanielk1977 i64 iHdrOff; /* End of first segment of main-journal records */ 1758fd7f0452Sdanielk1977 Pgno ii; /* Loop counter */ 1759f2c31ad8Sdanielk1977 int rc = SQLITE_OK; /* Return code */ 1760fd7f0452Sdanielk1977 Bitvec *pDone = 0; /* Bitvec to ensure pages played back only once */ 1761fa86c412Sdrh 1762fd7f0452Sdanielk1977 /* Allocate a bitvec to use to store the set of pages rolled back */ 1763fd7f0452Sdanielk1977 if( pSavepoint ){ 1764fd7f0452Sdanielk1977 pDone = sqlite3BitvecCreate(pSavepoint->nOrig); 1765fd7f0452Sdanielk1977 if( !pDone ){ 1766fd7f0452Sdanielk1977 return SQLITE_NOMEM; 1767fd7f0452Sdanielk1977 } 17687657240aSdanielk1977 } 17697657240aSdanielk1977 1770fd7f0452Sdanielk1977 /* Truncate the database back to the size it was before the 1771fd7f0452Sdanielk1977 ** savepoint being reverted was opened. 1772fa86c412Sdrh */ 1773f2c31ad8Sdanielk1977 pPager->dbSize = pSavepoint ? pSavepoint->nOrig : pPager->dbOrigSize; 17741aa2d8b5Sdrh assert( pPager->state>=PAGER_SHARED ); 1775fa86c412Sdrh 1776d6e5e098Sdrh /* Use pPager->journalOff as the effective size of the main rollback 1777d6e5e098Sdrh ** journal. The actual file might be larger than this in 1778d6e5e098Sdrh ** PAGER_JOURNALMODE_TRUNCATE or PAGER_JOURNALMODE_PERSIST. But anything 1779d6e5e098Sdrh ** past pPager->journalOff is off-limits to us. 1780fa86c412Sdrh */ 1781fd7f0452Sdanielk1977 szJ = pPager->journalOff; 1782d6e5e098Sdrh 1783d6e5e098Sdrh /* Begin by rolling back records from the main journal starting at 1784d6e5e098Sdrh ** PagerSavepoint.iOffset and continuing to the next journal header. 1785d6e5e098Sdrh ** There might be records in the main journal that have a page number 1786d6e5e098Sdrh ** greater than the current database size (pPager->dbSize) but those 1787d6e5e098Sdrh ** will be skipped automatically. Pages are added to pDone as they 1788d6e5e098Sdrh ** are played back. 1789d6e5e098Sdrh */ 1790fd7f0452Sdanielk1977 if( pSavepoint ){ 1791fd7f0452Sdanielk1977 iHdrOff = pSavepoint->iHdrOffset ? pSavepoint->iHdrOffset : szJ; 1792fd7f0452Sdanielk1977 pPager->journalOff = pSavepoint->iOffset; 1793fd7f0452Sdanielk1977 while( rc==SQLITE_OK && pPager->journalOff<iHdrOff ){ 1794d6e5e098Sdrh rc = pager_playback_one_page(pPager, 1, &pPager->journalOff, 1, pDone); 1795968af52aSdrh assert( rc!=SQLITE_DONE ); 1796fa86c412Sdrh } 1797fd7f0452Sdanielk1977 }else{ 1798fd7f0452Sdanielk1977 pPager->journalOff = 0; 17997657240aSdanielk1977 } 1800d6e5e098Sdrh 1801d6e5e098Sdrh /* Continue rolling back records out of the main journal starting at 1802d6e5e098Sdrh ** the first journal header seen and continuing until the effective end 1803d6e5e098Sdrh ** of the main journal file. Continue to skip out-of-range pages and 1804d6e5e098Sdrh ** continue adding pages rolled back to pDone. 1805d6e5e098Sdrh */ 1806fd7f0452Sdanielk1977 while( rc==SQLITE_OK && pPager->journalOff<szJ ){ 1807c81806f3Sdanielk1977 u32 nJRec = 0; /* Number of Journal Records */ 18087657240aSdanielk1977 u32 dummy; 1809f0113000Sdanielk1977 rc = readJournalHdr(pPager, szJ, &nJRec, &dummy); 1810968af52aSdrh assert( rc!=SQLITE_DONE ); 1811d6e5e098Sdrh 1812d6e5e098Sdrh /* 1813d6e5e098Sdrh ** The "pPager->journalHdr+JOURNAL_HDR_SZ(pPager)==pPager->journalOff" 1814d6e5e098Sdrh ** test is related to ticket #2565. See the discussion in the 1815d6e5e098Sdrh ** pager_playback() function for additional information. 1816d6e5e098Sdrh */ 1817ee03d629Sdrh assert( !(nJRec==0 1818d6e5e098Sdrh && pPager->journalHdr+JOURNAL_HDR_SZ(pPager)!=pPager->journalOff 1819d6e5e098Sdrh && ((szJ - pPager->journalOff) / JOURNAL_PG_SZ(pPager))>0 1820ee03d629Sdrh && pagerNextJournalPageIsValid(pPager)) 1821d6e5e098Sdrh ); 1822d6e5e098Sdrh if( nJRec==0 1823d6e5e098Sdrh && pPager->journalHdr+JOURNAL_HDR_SZ(pPager)==pPager->journalOff 1824d6e5e098Sdrh ){ 1825d6e5e098Sdrh nJRec = (szJ - pPager->journalOff)/JOURNAL_PG_SZ(pPager); 182675edc16fSdanielk1977 } 182712dd5496Sdanielk1977 for(ii=0; rc==SQLITE_OK && ii<nJRec && pPager->journalOff<szJ; ii++){ 1828d6e5e098Sdrh rc = pager_playback_one_page(pPager, 1, &pPager->journalOff, 1, pDone); 18297657240aSdanielk1977 assert( rc!=SQLITE_DONE ); 1830fd7f0452Sdanielk1977 } 1831fd7f0452Sdanielk1977 } 1832fd7f0452Sdanielk1977 assert( rc!=SQLITE_OK || pPager->journalOff==szJ ); 1833fd7f0452Sdanielk1977 1834d6e5e098Sdrh /* Finally, rollback pages from the sub-journal. Page that were 1835d6e5e098Sdrh ** previously rolled back out of the main journal (and are hence in pDone) 1836d6e5e098Sdrh ** will be skipped. Out-of-range pages are also skipped. 1837d6e5e098Sdrh */ 1838fd7f0452Sdanielk1977 if( pSavepoint ){ 1839d6e5e098Sdrh i64 offset = pSavepoint->iSubRec*(4+pPager->pageSize); 184049b9d338Sdrh for(ii=pSavepoint->iSubRec; rc==SQLITE_OK&&ii<(u32)pPager->stmtNRec; ii++){ 1841d6e5e098Sdrh assert( offset == ii*(4+pPager->pageSize) ); 1842d6e5e098Sdrh rc = pager_playback_one_page(pPager, 0, &offset, 1, pDone); 1843fd7f0452Sdanielk1977 assert( rc!=SQLITE_DONE ); 1844968af52aSdrh } 18457657240aSdanielk1977 } 18467657240aSdanielk1977 1847fd7f0452Sdanielk1977 sqlite3BitvecDestroy(pDone); 18488a7aea3bSdanielk1977 if( rc==SQLITE_OK ){ 184975edc16fSdanielk1977 pPager->journalOff = szJ; 1850fa86c412Sdrh } 1851fa86c412Sdrh return rc; 1852fa86c412Sdrh } 1853fa86c412Sdrh 1854fa86c412Sdrh /* 1855f57b14a6Sdrh ** Change the maximum number of in-memory pages that are allowed. 1856f57b14a6Sdrh */ 18573b8a05f6Sdanielk1977 void sqlite3PagerSetCachesize(Pager *pPager, int mxPage){ 18588c0a791aSdanielk1977 sqlite3PcacheSetCachesize(pPager->pPCache, mxPage); 1859f57b14a6Sdrh } 1860f57b14a6Sdrh 1861f57b14a6Sdrh /* 1862973b6e33Sdrh ** Adjust the robustness of the database to damage due to OS crashes 1863973b6e33Sdrh ** or power failures by changing the number of syncs()s when writing 1864973b6e33Sdrh ** the rollback journal. There are three levels: 1865973b6e33Sdrh ** 1866054889ecSdrh ** OFF sqlite3OsSync() is never called. This is the default 1867973b6e33Sdrh ** for temporary and transient files. 1868973b6e33Sdrh ** 1869973b6e33Sdrh ** NORMAL The journal is synced once before writes begin on the 1870973b6e33Sdrh ** database. This is normally adequate protection, but 1871973b6e33Sdrh ** it is theoretically possible, though very unlikely, 1872973b6e33Sdrh ** that an inopertune power failure could leave the journal 1873973b6e33Sdrh ** in a state which would cause damage to the database 1874973b6e33Sdrh ** when it is rolled back. 1875973b6e33Sdrh ** 1876973b6e33Sdrh ** FULL The journal is synced twice before writes begin on the 187734e79ceeSdrh ** database (with some additional information - the nRec field 187834e79ceeSdrh ** of the journal header - being written in between the two 187934e79ceeSdrh ** syncs). If we assume that writing a 1880973b6e33Sdrh ** single disk sector is atomic, then this mode provides 1881973b6e33Sdrh ** assurance that the journal will not be corrupted to the 1882973b6e33Sdrh ** point of causing damage to the database during rollback. 1883973b6e33Sdrh ** 1884973b6e33Sdrh ** Numeric values associated with these states are OFF==1, NORMAL=2, 1885973b6e33Sdrh ** and FULL=3. 1886973b6e33Sdrh */ 188793758c8dSdanielk1977 #ifndef SQLITE_OMIT_PAGER_PRAGMAS 1888281b21daSdrh void sqlite3PagerSetSafetyLevel(Pager *pPager, int level, int bFullFsync){ 18894f21c4afSdrh pPager->noSync = (level==1 || pPager->tempFile) ?1:0; 18904f21c4afSdrh pPager->fullSync = (level==3 && !pPager->tempFile) ?1:0; 1891281b21daSdrh pPager->sync_flags = (bFullFsync?SQLITE_SYNC_FULL:SQLITE_SYNC_NORMAL); 18921d850a72Sdanielk1977 if( pPager->noSync ) pPager->needSync = 0; 1893973b6e33Sdrh } 189493758c8dSdanielk1977 #endif 1895973b6e33Sdrh 1896973b6e33Sdrh /* 1897af6df11fSdrh ** The following global variable is incremented whenever the library 1898af6df11fSdrh ** attempts to open a temporary file. This information is used for 1899af6df11fSdrh ** testing and analysis only. 1900af6df11fSdrh */ 19010f7eb611Sdrh #ifdef SQLITE_TEST 1902af6df11fSdrh int sqlite3_opentemp_count = 0; 19030f7eb611Sdrh #endif 1904af6df11fSdrh 1905af6df11fSdrh /* 19063f56e6ebSdrh ** Open a temporary file. 19073f56e6ebSdrh ** 19083f56e6ebSdrh ** Write the file descriptor into *fd. Return SQLITE_OK on success or some 1909fee2d25aSdanielk1977 ** other error code if we fail. The OS will automatically delete the temporary 1910fee2d25aSdanielk1977 ** file when it is closed. 1911fa86c412Sdrh */ 1912b4b47411Sdanielk1977 static int sqlite3PagerOpentemp( 191317b90b53Sdanielk1977 Pager *pPager, /* The pager object */ 191433f4e02aSdrh sqlite3_file *pFile, /* Write the file descriptor here */ 191533f4e02aSdrh int vfsFlags /* Flags passed through to the VFS */ 1916b4b47411Sdanielk1977 ){ 1917fa86c412Sdrh int rc; 19183f56e6ebSdrh 19190f7eb611Sdrh #ifdef SQLITE_TEST 1920af6df11fSdrh sqlite3_opentemp_count++; /* Used for testing and analysis only */ 19210f7eb611Sdrh #endif 1922b4b47411Sdanielk1977 192333f4e02aSdrh vfsFlags |= SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE | 192433f4e02aSdrh SQLITE_OPEN_EXCLUSIVE | SQLITE_OPEN_DELETEONCLOSE; 192517b90b53Sdanielk1977 rc = sqlite3OsOpen(pPager->pVfs, 0, pFile, vfsFlags, 0); 1926b4b47411Sdanielk1977 assert( rc!=SQLITE_OK || pFile->pMethods ); 1927fa86c412Sdrh return rc; 1928fa86c412Sdrh } 1929fa86c412Sdrh 1930a858aa2eSdanielk1977 static int pagerStress(void *,PgHdr *); 19318c0a791aSdanielk1977 1932fa86c412Sdrh /* 1933ed7c855cSdrh ** Create a new page cache and put a pointer to the page cache in *ppPager. 19345e00f6c7Sdrh ** The file to be cached need not exist. The file is not locked until 19353b8a05f6Sdanielk1977 ** the first call to sqlite3PagerGet() and is only held open until the 19363b8a05f6Sdanielk1977 ** last page is released using sqlite3PagerUnref(). 1937382c0247Sdrh ** 19386446c4dcSdrh ** If zFilename is NULL then a randomly-named temporary file is created 19391cc8c448Sdrh ** and used as the file to be cached. The file will be deleted 19406446c4dcSdrh ** automatically when it is closed. 194190f5ecb3Sdrh ** 194290f5ecb3Sdrh ** If zFilename is ":memory:" then all information is held in cache. 194390f5ecb3Sdrh ** It is never written to disk. This can be used to implement an 194490f5ecb3Sdrh ** in-memory database. 1945ed7c855cSdrh */ 19463b8a05f6Sdanielk1977 int sqlite3PagerOpen( 194786f8c197Sdrh sqlite3_vfs *pVfs, /* The virtual file system to use */ 19487e3b0a07Sdrh Pager **ppPager, /* Return the Pager structure here */ 19497e3b0a07Sdrh const char *zFilename, /* Name of the database file to open */ 1950da47d774Sdrh int nExtra, /* Extra bytes append to each in-memory page */ 195133f4e02aSdrh int flags, /* flags controlling this file */ 195233f4e02aSdrh int vfsFlags /* flags passed through to sqlite3_vfs.xOpen() */ 19537e3b0a07Sdrh ){ 1954b4b47411Sdanielk1977 u8 *pPtr; 1955aef0bf64Sdanielk1977 Pager *pPager = 0; 1956cfe9a69fSdanielk1977 int rc = SQLITE_OK; 1957cfe9a69fSdanielk1977 int i; 19588def5ea2Sdanielk1977 int tempFile = 0; 1959ac69b05eSdrh int memDb = 0; 19605e00f6c7Sdrh int readOnly = 0; 19617bec505eSdrh int useJournal = (flags & PAGER_OMIT_JOURNAL)==0; 19627bec505eSdrh int noReadlock = (flags & PAGER_NO_READLOCK)!=0; 1963b3175389Sdanielk1977 int journalFileSize; 19648c0a791aSdanielk1977 int pcacheSize = sqlite3PcacheSize(); 1965facf0307Sdrh int szPageDflt = SQLITE_DEFAULT_PAGE_SIZE; 196617b90b53Sdanielk1977 char *zPathname = 0; 196717b90b53Sdanielk1977 int nPathname = 0; 1968b4b47411Sdanielk1977 1969b3175389Sdanielk1977 if( sqlite3JournalSize(pVfs)>sqlite3MemJournalSize() ){ 1970b3175389Sdanielk1977 journalFileSize = sqlite3JournalSize(pVfs); 1971b3175389Sdanielk1977 }else{ 1972b3175389Sdanielk1977 journalFileSize = sqlite3MemJournalSize(); 1973b3175389Sdanielk1977 } 1974b3175389Sdanielk1977 197586f8c197Sdrh /* The default return is a NULL pointer */ 1976d9b0257aSdrh *ppPager = 0; 1977aef0bf64Sdanielk1977 197817b90b53Sdanielk1977 /* Compute and store the full pathname in an allocated buffer pointed 197917b90b53Sdanielk1977 ** to by zPathname, length nPathname. Or, if this is a temporary file, 198017b90b53Sdanielk1977 ** leave both nPathname and zPathname set to 0. 198117b90b53Sdanielk1977 */ 198217b90b53Sdanielk1977 if( zFilename && zFilename[0] ){ 1983adfb9b05Sdanielk1977 nPathname = pVfs->mxPathname+1; 1984e5ae5735Sdrh zPathname = sqlite3Malloc(nPathname*2); 19851cc8c448Sdrh if( zPathname==0 ){ 19861cc8c448Sdrh return SQLITE_NOMEM; 19871cc8c448Sdrh } 19881cc8c448Sdrh #ifndef SQLITE_OMIT_MEMORYDB 19891cc8c448Sdrh if( strcmp(zFilename,":memory:")==0 ){ 19901cc8c448Sdrh memDb = 1; 19911cc8c448Sdrh zPathname[0] = 0; 19921cc8c448Sdrh }else 19931cc8c448Sdrh #endif 19941cc8c448Sdrh { 1995adfb9b05Sdanielk1977 rc = sqlite3OsFullPathname(pVfs, zFilename, nPathname, zPathname); 1996ae28c01aSdrh } 19971cc8c448Sdrh if( rc!=SQLITE_OK ){ 19981cc8c448Sdrh sqlite3_free(zPathname); 19991cc8c448Sdrh return rc; 20001cc8c448Sdrh } 2001ea678832Sdrh nPathname = sqlite3Strlen30(zPathname); 200299b90c3fSdrh } 200399b90c3fSdrh 2004b4b47411Sdanielk1977 /* Allocate memory for the pager structure */ 2005b4b47411Sdanielk1977 pPager = sqlite3MallocZero( 2006b4b47411Sdanielk1977 sizeof(*pPager) + /* Pager structure */ 20078c0a791aSdanielk1977 pcacheSize + /* PCache object */ 2008c7b6017cSdanielk1977 journalFileSize + /* The journal file structure */ 2009b3175389Sdanielk1977 pVfs->szOsFile + /* The main db file */ 2010b3175389Sdanielk1977 journalFileSize * 2 + /* The two journal files */ 201117b90b53Sdanielk1977 3*nPathname + 40 /* zFilename, zDirectory, zJournal */ 2012b4b47411Sdanielk1977 ); 2013b4b47411Sdanielk1977 if( !pPager ){ 20141cc8c448Sdrh sqlite3_free(zPathname); 2015b4b47411Sdanielk1977 return SQLITE_NOMEM; 2016b4b47411Sdanielk1977 } 20178c0a791aSdanielk1977 pPager->pPCache = (PCache *)&pPager[1]; 20188c0a791aSdanielk1977 pPtr = ((u8 *)&pPager[1]) + pcacheSize; 201933f4e02aSdrh pPager->vfsFlags = vfsFlags; 2020b4b47411Sdanielk1977 pPager->fd = (sqlite3_file*)&pPtr[pVfs->szOsFile*0]; 2021fd7f0452Sdanielk1977 pPager->sjfd = (sqlite3_file*)&pPtr[pVfs->szOsFile]; 2022b3175389Sdanielk1977 pPager->jfd = (sqlite3_file*)&pPtr[pVfs->szOsFile+journalFileSize]; 2023b3175389Sdanielk1977 pPager->zFilename = (char*)&pPtr[pVfs->szOsFile+2*journalFileSize]; 20241cc8c448Sdrh pPager->zDirectory = &pPager->zFilename[nPathname+1]; 20251cc8c448Sdrh pPager->zJournal = &pPager->zDirectory[nPathname+1]; 2026b4b47411Sdanielk1977 pPager->pVfs = pVfs; 202717b90b53Sdanielk1977 if( zPathname ){ 20281cc8c448Sdrh memcpy(pPager->zFilename, zPathname, nPathname+1); 20291cc8c448Sdrh sqlite3_free(zPathname); 203017b90b53Sdanielk1977 } 2031b4b47411Sdanielk1977 2032153c62c4Sdrh /* Open the pager file. 2033aef0bf64Sdanielk1977 */ 2034ae28c01aSdrh if( zFilename && zFilename[0] && !memDb ){ 2035d92db531Sdanielk1977 if( nPathname>(pVfs->mxPathname - (int)sizeof("-journal")) ){ 2036b4b47411Sdanielk1977 rc = SQLITE_CANTOPEN; 2037b4b47411Sdanielk1977 }else{ 2038b4b47411Sdanielk1977 int fout = 0; 203933f4e02aSdrh rc = sqlite3OsOpen(pVfs, pPager->zFilename, pPager->fd, 204033f4e02aSdrh pPager->vfsFlags, &fout); 2041b4b47411Sdanielk1977 readOnly = (fout&SQLITE_OPEN_READONLY); 20429663b8f9Sdanielk1977 20439663b8f9Sdanielk1977 /* If the file was successfully opened for read/write access, 20449663b8f9Sdanielk1977 ** choose a default page size in case we have to create the 20459663b8f9Sdanielk1977 ** database file. The default page size is the maximum of: 20469663b8f9Sdanielk1977 ** 20479663b8f9Sdanielk1977 ** + SQLITE_DEFAULT_PAGE_SIZE, 20489663b8f9Sdanielk1977 ** + The value returned by sqlite3OsSectorSize() 20499663b8f9Sdanielk1977 ** + The largest page size that can be written atomically. 20509663b8f9Sdanielk1977 */ 20519663b8f9Sdanielk1977 if( rc==SQLITE_OK && !readOnly ){ 20527cbd589dSdanielk1977 setSectorSize(pPager); 20537cbd589dSdanielk1977 if( szPageDflt<pPager->sectorSize ){ 20547cbd589dSdanielk1977 szPageDflt = pPager->sectorSize; 20559663b8f9Sdanielk1977 } 20569663b8f9Sdanielk1977 #ifdef SQLITE_ENABLE_ATOMIC_WRITE 20579663b8f9Sdanielk1977 { 20589663b8f9Sdanielk1977 int iDc = sqlite3OsDeviceCharacteristics(pPager->fd); 20599663b8f9Sdanielk1977 int ii; 20609663b8f9Sdanielk1977 assert(SQLITE_IOCAP_ATOMIC512==(512>>8)); 20619663b8f9Sdanielk1977 assert(SQLITE_IOCAP_ATOMIC64K==(65536>>8)); 20629663b8f9Sdanielk1977 assert(SQLITE_MAX_DEFAULT_PAGE_SIZE<=65536); 2063facf0307Sdrh for(ii=szPageDflt; ii<=SQLITE_MAX_DEFAULT_PAGE_SIZE; ii=ii*2){ 2064facf0307Sdrh if( iDc&(SQLITE_IOCAP_ATOMIC|(ii>>8)) ) szPageDflt = ii; 20659663b8f9Sdanielk1977 } 20669663b8f9Sdanielk1977 } 20679663b8f9Sdanielk1977 #endif 2068facf0307Sdrh if( szPageDflt>SQLITE_MAX_DEFAULT_PAGE_SIZE ){ 2069facf0307Sdrh szPageDflt = SQLITE_MAX_DEFAULT_PAGE_SIZE; 20709663b8f9Sdanielk1977 } 20719663b8f9Sdanielk1977 } 2072b4b47411Sdanielk1977 } 2073b3175389Sdanielk1977 }else{ 20747a2b1eebSdanielk1977 /* If a temporary file is requested, it is not opened immediately. 20757a2b1eebSdanielk1977 ** In this case we accept the default page size and delay actually 20767a2b1eebSdanielk1977 ** opening the file until the first call to OsWrite(). 2077b3175389Sdanielk1977 ** 2078b3175389Sdanielk1977 ** This branch is also run for an in-memory database. An in-memory 2079b3175389Sdanielk1977 ** database is the same as a temp-file that is never written out to 2080b3175389Sdanielk1977 ** disk and uses an in-memory rollback journal. 20817a2b1eebSdanielk1977 */ 20825e00f6c7Sdrh tempFile = 1; 20837a2b1eebSdanielk1977 pPager->state = PAGER_EXCLUSIVE; 20848def5ea2Sdanielk1977 } 2085aef0bf64Sdanielk1977 20868186df86Sdanielk1977 if( pPager && rc==SQLITE_OK ){ 2087facf0307Sdrh pPager->pTmpSpace = sqlite3PageMalloc(szPageDflt); 2088aef0bf64Sdanielk1977 } 2089aef0bf64Sdanielk1977 2090153c62c4Sdrh /* If an error occured in either of the blocks above. 2091153c62c4Sdrh ** Free the Pager structure and close the file. 2092153c62c4Sdrh ** Since the pager is not allocated there is no need to set 2093aef0bf64Sdanielk1977 ** any Pager.errMask variables. 2094aef0bf64Sdanielk1977 */ 2095b4b47411Sdanielk1977 if( !pPager || !pPager->pTmpSpace ){ 2096b4b47411Sdanielk1977 sqlite3OsClose(pPager->fd); 209717435752Sdrh sqlite3_free(pPager); 2098aef0bf64Sdanielk1977 return ((rc==SQLITE_OK)?SQLITE_NOMEM:rc); 2099d9b0257aSdrh } 21008c0a791aSdanielk1977 nExtra = FORCE_ALIGNMENT(nExtra); 210171d5d2cdSdanielk1977 sqlite3PcacheOpen(szPageDflt, nExtra, !memDb, 210241d3027cSdrh !memDb?pagerStress:0, (void *)pPager, pPager->pPCache); 2103aef0bf64Sdanielk1977 210430d53701Sdrh PAGERTRACE(("OPEN %d %s\n", FILEHANDLEID(pPager->fd), pPager->zFilename)); 2105153c62c4Sdrh IOTRACE(("OPEN %p %s\n", pPager, pPager->zFilename)) 2106aef0bf64Sdanielk1977 2107b4b47411Sdanielk1977 /* Fill in Pager.zDirectory[] */ 21081cc8c448Sdrh memcpy(pPager->zDirectory, pPager->zFilename, nPathname+1); 2109ea678832Sdrh for(i=sqlite3Strlen30(pPager->zDirectory); 2110ea678832Sdrh i>0 && pPager->zDirectory[i-1]!='/'; i--){} 2111a76c82ebSdrh if( i>0 ) pPager->zDirectory[i-1] = 0; 2112b4b47411Sdanielk1977 211399b90c3fSdrh /* Fill in Pager.zJournal[] */ 211417b90b53Sdanielk1977 if( zPathname ){ 21151cc8c448Sdrh memcpy(pPager->zJournal, pPager->zFilename, nPathname); 21161cc8c448Sdrh memcpy(&pPager->zJournal[nPathname], "-journal", 9); 211717b90b53Sdanielk1977 }else{ 211817b90b53Sdanielk1977 pPager->zJournal = 0; 211917b90b53Sdanielk1977 } 2120b4b47411Sdanielk1977 21213b59a5ccSdrh /* pPager->journalOpen = 0; */ 21224f21c4afSdrh pPager->useJournal = (u8)useJournal; 21234f21c4afSdrh pPager->noReadlock = (noReadlock && readOnly) ?1:0; 21243b59a5ccSdrh /* pPager->stmtOpen = 0; */ 21253b59a5ccSdrh /* pPager->stmtInUse = 0; */ 21263b59a5ccSdrh /* pPager->nRef = 0; */ 21274f21c4afSdrh pPager->dbSizeValid = (u8)memDb; 2128facf0307Sdrh pPager->pageSize = szPageDflt; 21293b59a5ccSdrh /* pPager->stmtSize = 0; */ 21303b59a5ccSdrh /* pPager->stmtJSize = 0; */ 21313b59a5ccSdrh /* pPager->nPage = 0; */ 213290f5ecb3Sdrh pPager->mxPage = 100; 2133f8e632b6Sdrh pPager->mxPgno = SQLITE_MAX_PAGE_COUNT; 21343b59a5ccSdrh /* pPager->state = PAGER_UNLOCK; */ 21351cc8c448Sdrh assert( pPager->state == (tempFile ? PAGER_EXCLUSIVE : PAGER_UNLOCK) ); 21363b59a5ccSdrh /* pPager->errMask = 0; */ 21374f21c4afSdrh pPager->tempFile = (u8)tempFile; 2138369339dbSdrh assert( tempFile==PAGER_LOCKINGMODE_NORMAL 2139369339dbSdrh || tempFile==PAGER_LOCKINGMODE_EXCLUSIVE ); 2140369339dbSdrh assert( PAGER_LOCKINGMODE_EXCLUSIVE==1 ); 21414f21c4afSdrh pPager->exclusiveMode = (u8)tempFile; 21424f21c4afSdrh pPager->memDb = (u8)memDb; 21434f21c4afSdrh pPager->readOnly = (u8)readOnly; 21443b59a5ccSdrh /* pPager->needSync = 0; */ 21454f21c4afSdrh pPager->noSync = (pPager->tempFile || !useJournal) ?1:0; 21464f21c4afSdrh pPager->fullSync = pPager->noSync ?0:1; 2147f036aef0Sdanielk1977 pPager->sync_flags = SQLITE_SYNC_NORMAL; 21483b59a5ccSdrh /* pPager->pFirst = 0; */ 21493b59a5ccSdrh /* pPager->pFirstSynced = 0; */ 21503b59a5ccSdrh /* pPager->pLast = 0; */ 21518c0a791aSdanielk1977 pPager->nExtra = nExtra; 2152b53e4960Sdanielk1977 pPager->journalSizeLimit = SQLITE_DEFAULT_JOURNAL_SIZE_LIMIT; 2153b3175389Sdanielk1977 assert(pPager->fd->pMethods||tempFile); 2154c80f058dSdrh setSectorSize(pPager); 2155b3175389Sdanielk1977 if( memDb ){ 2156b3175389Sdanielk1977 pPager->journalMode = PAGER_JOURNALMODE_MEMORY; 2157b472117cSdanielk1977 } 21581ceedd37Sdanielk1977 /* pPager->xBusyHandler = 0; */ 21591ceedd37Sdanielk1977 /* pPager->pBusyHandlerArg = 0; */ 21603b59a5ccSdrh /* memset(pPager->aHash, 0, sizeof(pPager->aHash)); */ 2161ed7c855cSdrh *ppPager = pPager; 2162ed7c855cSdrh return SQLITE_OK; 2163ed7c855cSdrh } 2164ed7c855cSdrh 2165ed7c855cSdrh /* 216690f5ecb3Sdrh ** Set the busy handler function. 216790f5ecb3Sdrh */ 21681ceedd37Sdanielk1977 void sqlite3PagerSetBusyhandler( 21691ceedd37Sdanielk1977 Pager *pPager, 21701ceedd37Sdanielk1977 int (*xBusyHandler)(void *), 21711ceedd37Sdanielk1977 void *pBusyHandlerArg 21721ceedd37Sdanielk1977 ){ 21731ceedd37Sdanielk1977 pPager->xBusyHandler = xBusyHandler; 21741ceedd37Sdanielk1977 pPager->pBusyHandlerArg = pBusyHandlerArg; 217590f5ecb3Sdrh } 217690f5ecb3Sdrh 217790f5ecb3Sdrh /* 2178a6abd041Sdrh ** Set the reinitializer for this pager. If not NULL, the reinitializer 2179a6abd041Sdrh ** is called when the content of a page in cache is restored to its original 2180a6abd041Sdrh ** value as a result of a rollback. The callback gives higher-level code 2181a6abd041Sdrh ** an opportunity to restore the EXTRA section to agree with the restored 2182a6abd041Sdrh ** page data. 2183a6abd041Sdrh */ 2184eaa06f69Sdanielk1977 void sqlite3PagerSetReiniter(Pager *pPager, void (*xReinit)(DbPage*)){ 2185a6abd041Sdrh pPager->xReiniter = xReinit; 2186a6abd041Sdrh } 2187a6abd041Sdrh 2188a6abd041Sdrh /* 2189a1644fd8Sdanielk1977 ** Set the page size to *pPageSize. If the suggest new page size is 2190a1644fd8Sdanielk1977 ** inappropriate, then an alternative page size is set to that 2191a1644fd8Sdanielk1977 ** value before returning. 219290f5ecb3Sdrh */ 2193a1644fd8Sdanielk1977 int sqlite3PagerSetPagesize(Pager *pPager, u16 *pPageSize){ 21941357d9f5Sdanielk1977 int rc = pPager->errCode; 21951357d9f5Sdanielk1977 if( rc==SQLITE_OK ){ 2196a1644fd8Sdanielk1977 u16 pageSize = *pPageSize; 21979663b8f9Sdanielk1977 assert( pageSize==0 || (pageSize>=512 && pageSize<=SQLITE_MAX_PAGE_SIZE) ); 2198a1644fd8Sdanielk1977 if( pageSize && pageSize!=pPager->pageSize 21997426f864Sdrh && (pPager->memDb==0 || pPager->dbSize==0) 22007426f864Sdrh && sqlite3PcacheRefCount(pPager->pPCache)==0 2201a1644fd8Sdanielk1977 ){ 2202facf0307Sdrh char *pNew = (char *)sqlite3PageMalloc(pageSize); 2203a1644fd8Sdanielk1977 if( !pNew ){ 2204a1644fd8Sdanielk1977 rc = SQLITE_NOMEM; 2205a1644fd8Sdanielk1977 }else{ 2206c7c7e623Sdanielk1977 pager_reset(pPager); 220790f5ecb3Sdrh pPager->pageSize = pageSize; 22087426f864Sdrh if( !pPager->memDb ) setSectorSize(pPager); 2209facf0307Sdrh sqlite3PageFree(pPager->pTmpSpace); 2210a1644fd8Sdanielk1977 pPager->pTmpSpace = pNew; 22118c0a791aSdanielk1977 sqlite3PcacheSetPageSize(pPager->pPCache, pageSize); 22121c7880e5Sdrh } 2213a1644fd8Sdanielk1977 } 22144f21c4afSdrh *pPageSize = (u16)pPager->pageSize; 22151357d9f5Sdanielk1977 } 2216a1644fd8Sdanielk1977 return rc; 221790f5ecb3Sdrh } 221890f5ecb3Sdrh 221990f5ecb3Sdrh /* 222026b7994aSdrh ** Return a pointer to the "temporary page" buffer held internally 222126b7994aSdrh ** by the pager. This is a buffer that is big enough to hold the 222226b7994aSdrh ** entire content of a database page. This buffer is used internally 222326b7994aSdrh ** during rollback and will be overwritten whenever a rollback 222426b7994aSdrh ** occurs. But other modules are free to use it too, as long as 222526b7994aSdrh ** no rollbacks are happening. 222626b7994aSdrh */ 222726b7994aSdrh void *sqlite3PagerTempSpace(Pager *pPager){ 222826b7994aSdrh return pPager->pTmpSpace; 222926b7994aSdrh } 223026b7994aSdrh 223126b7994aSdrh /* 2232f8e632b6Sdrh ** Attempt to set the maximum database page count if mxPage is positive. 2233f8e632b6Sdrh ** Make no changes if mxPage is zero or negative. And never reduce the 2234f8e632b6Sdrh ** maximum page count below the current size of the database. 2235f8e632b6Sdrh ** 2236f8e632b6Sdrh ** Regardless of mxPage, return the current maximum page count. 2237f8e632b6Sdrh */ 2238f8e632b6Sdrh int sqlite3PagerMaxPageCount(Pager *pPager, int mxPage){ 2239f8e632b6Sdrh if( mxPage>0 ){ 2240f8e632b6Sdrh pPager->mxPgno = mxPage; 2241f8e632b6Sdrh } 2242ad0132dfSdanielk1977 sqlite3PagerPagecount(pPager, 0); 2243f8e632b6Sdrh return pPager->mxPgno; 2244f8e632b6Sdrh } 2245f8e632b6Sdrh 2246f8e632b6Sdrh /* 2247c9ac5caaSdrh ** The following set of routines are used to disable the simulated 2248c9ac5caaSdrh ** I/O error mechanism. These routines are used to avoid simulated 2249c9ac5caaSdrh ** errors in places where we do not care about errors. 2250c9ac5caaSdrh ** 2251c9ac5caaSdrh ** Unless -DSQLITE_TEST=1 is used, these routines are all no-ops 2252c9ac5caaSdrh ** and generate no code. 2253c9ac5caaSdrh */ 2254c9ac5caaSdrh #ifdef SQLITE_TEST 2255c9ac5caaSdrh extern int sqlite3_io_error_pending; 2256c9ac5caaSdrh extern int sqlite3_io_error_hit; 2257c9ac5caaSdrh static int saved_cnt; 2258c9ac5caaSdrh void disable_simulated_io_errors(void){ 2259c9ac5caaSdrh saved_cnt = sqlite3_io_error_pending; 2260c9ac5caaSdrh sqlite3_io_error_pending = -1; 2261c9ac5caaSdrh } 2262c9ac5caaSdrh void enable_simulated_io_errors(void){ 2263c9ac5caaSdrh sqlite3_io_error_pending = saved_cnt; 2264c9ac5caaSdrh } 2265c9ac5caaSdrh #else 2266152410faSdrh # define disable_simulated_io_errors() 2267152410faSdrh # define enable_simulated_io_errors() 2268c9ac5caaSdrh #endif 2269c9ac5caaSdrh 2270c9ac5caaSdrh /* 227190f5ecb3Sdrh ** Read the first N bytes from the beginning of the file into memory 2272aef0bf64Sdanielk1977 ** that pDest points to. 2273aef0bf64Sdanielk1977 ** 2274aef0bf64Sdanielk1977 ** No error checking is done. The rational for this is that this function 2275aef0bf64Sdanielk1977 ** may be called even if the file does not exist or contain a header. In 2276aef0bf64Sdanielk1977 ** these cases sqlite3OsRead() will return an error, to which the correct 2277aef0bf64Sdanielk1977 ** response is to zero the memory at pDest and continue. A real IO error 2278aef0bf64Sdanielk1977 ** will presumably recur and be picked up later (Todo: Think about this). 227990f5ecb3Sdrh */ 22803b8a05f6Sdanielk1977 int sqlite3PagerReadFileheader(Pager *pPager, int N, unsigned char *pDest){ 2281551b7736Sdrh int rc = SQLITE_OK; 228290f5ecb3Sdrh memset(pDest, 0, N); 2283b3175389Sdanielk1977 assert(pPager->fd->pMethods||pPager->tempFile); 22847a2b1eebSdanielk1977 if( pPager->fd->pMethods ){ 2285b0603416Sdrh IOTRACE(("DBHDR %p 0 %d\n", pPager, N)) 228662079060Sdanielk1977 rc = sqlite3OsRead(pPager->fd, pDest, N, 0); 2287551b7736Sdrh if( rc==SQLITE_IOERR_SHORT_READ ){ 2288551b7736Sdrh rc = SQLITE_OK; 228990f5ecb3Sdrh } 229090f5ecb3Sdrh } 2291551b7736Sdrh return rc; 2292551b7736Sdrh } 229390f5ecb3Sdrh 229490f5ecb3Sdrh /* 22955e00f6c7Sdrh ** Return the total number of pages in the disk file associated with 22965e00f6c7Sdrh ** pPager. 229715f411dbSdanielk1977 ** 229815f411dbSdanielk1977 ** If the PENDING_BYTE lies on the page directly after the end of the 229915f411dbSdanielk1977 ** file, then consider this page part of the file too. For example, if 230015f411dbSdanielk1977 ** PENDING_BYTE is byte 4096 (the first byte of page 5) and the size of the 230115f411dbSdanielk1977 ** file is 4096 bytes, 5 is returned instead of 4. 2302ed7c855cSdrh */ 2303ad0132dfSdanielk1977 int sqlite3PagerPagecount(Pager *pPager, int *pnPage){ 23047a2b1eebSdanielk1977 i64 n = 0; 2305e49f9827Sdrh int rc; 2306d9b0257aSdrh assert( pPager!=0 ); 2307a7aea3ddSdrh if( pPager->errCode ){ 23088c0a791aSdanielk1977 rc = pPager->errCode; 23098c0a791aSdanielk1977 return rc; 2310a7aea3ddSdrh } 2311d92db531Sdanielk1977 if( pPager->dbSizeValid ){ 231215f411dbSdanielk1977 n = pPager->dbSize; 231315f411dbSdanielk1977 } else { 23147a2b1eebSdanielk1977 assert(pPager->fd->pMethods||pPager->tempFile); 23157a2b1eebSdanielk1977 if( (pPager->fd->pMethods) 23167a2b1eebSdanielk1977 && (rc = sqlite3OsFileSize(pPager->fd, &n))!=SQLITE_OK ){ 2317e49f9827Sdrh pager_error(pPager, rc); 2318ad0132dfSdanielk1977 return rc; 2319ed7c855cSdrh } 2320992f2d78Sdrh if( n>0 && n<pPager->pageSize ){ 2321992f2d78Sdrh n = 1; 2322992f2d78Sdrh }else{ 232390f5ecb3Sdrh n /= pPager->pageSize; 2324992f2d78Sdrh } 2325a6abd041Sdrh if( pPager->state!=PAGER_UNLOCK ){ 23263460d19cSdanielk1977 pPager->dbSize = (Pgno)n; 23273460d19cSdanielk1977 pPager->dbFileSize = (Pgno)n; 2328d92db531Sdanielk1977 pPager->dbSizeValid = 1; 2329ed7c855cSdrh } 233015f411dbSdanielk1977 } 233115f411dbSdanielk1977 if( n==(PENDING_BYTE/pPager->pageSize) ){ 233215f411dbSdanielk1977 n++; 233315f411dbSdanielk1977 } 2334f8e632b6Sdrh if( n>pPager->mxPgno ){ 23354f21c4afSdrh pPager->mxPgno = (Pgno)n; 2336f8e632b6Sdrh } 2337ad0132dfSdanielk1977 if( pnPage ){ 23384f21c4afSdrh *pnPage = (int)n; 2339ad0132dfSdanielk1977 } 2340ad0132dfSdanielk1977 return SQLITE_OK; 2341ed7c855cSdrh } 2342ed7c855cSdrh 2343ed7c855cSdrh /* 2344f7c57531Sdrh ** Forward declaration 2345f7c57531Sdrh */ 23467657240aSdanielk1977 static int syncJournal(Pager*); 2347ac69b05eSdrh 2348ac69b05eSdrh /* 234917221813Sdanielk1977 ** Try to obtain a lock on a file. Invoke the busy callback if the lock 2350a4afb65cSdrh ** is currently not available. Repeat until the busy callback returns 235117221813Sdanielk1977 ** false or until the lock succeeds. 235217221813Sdanielk1977 ** 235317221813Sdanielk1977 ** Return SQLITE_OK on success and an error code if we cannot obtain 235417221813Sdanielk1977 ** the lock. 235517221813Sdanielk1977 */ 235617221813Sdanielk1977 static int pager_wait_on_lock(Pager *pPager, int locktype){ 235717221813Sdanielk1977 int rc; 23581aa2d8b5Sdrh 23591aa2d8b5Sdrh /* The OS lock values must be the same as the Pager lock values */ 236017221813Sdanielk1977 assert( PAGER_SHARED==SHARED_LOCK ); 236117221813Sdanielk1977 assert( PAGER_RESERVED==RESERVED_LOCK ); 236217221813Sdanielk1977 assert( PAGER_EXCLUSIVE==EXCLUSIVE_LOCK ); 23631aa2d8b5Sdrh 23641aa2d8b5Sdrh /* If the file is currently unlocked then the size must be unknown */ 2365d92db531Sdanielk1977 assert( pPager->state>=PAGER_SHARED || pPager->dbSizeValid==0 ); 23661aa2d8b5Sdrh 236717221813Sdanielk1977 if( pPager->state>=locktype ){ 236817221813Sdanielk1977 rc = SQLITE_OK; 236917221813Sdanielk1977 }else{ 237017221813Sdanielk1977 do { 2371054889ecSdrh rc = sqlite3OsLock(pPager->fd, locktype); 23721ceedd37Sdanielk1977 }while( rc==SQLITE_BUSY && pPager->xBusyHandler(pPager->pBusyHandlerArg) ); 237317221813Sdanielk1977 if( rc==SQLITE_OK ){ 23744f21c4afSdrh pPager->state = (u8)locktype; 2375b0603416Sdrh IOTRACE(("LOCK %p %d\n", pPager, locktype)) 237617221813Sdanielk1977 } 237717221813Sdanielk1977 } 237817221813Sdanielk1977 return rc; 237917221813Sdanielk1977 } 238017221813Sdanielk1977 23813460d19cSdanielk1977 #ifndef SQLITE_OMIT_AUTOVACUUM 23823460d19cSdanielk1977 /* 2383f90b7260Sdanielk1977 ** Truncate the in-memory database file image to nPage pages. This 2384f90b7260Sdanielk1977 ** function does not actually modify the database file on disk. It 2385f90b7260Sdanielk1977 ** just sets the internal state of the pager object so that the 2386f90b7260Sdanielk1977 ** truncation will be done when the current transaction is committed. 23873460d19cSdanielk1977 */ 23883460d19cSdanielk1977 void sqlite3PagerTruncateImage(Pager *pPager, Pgno nPage){ 23893460d19cSdanielk1977 assert( pPager->dbSizeValid ); 23903460d19cSdanielk1977 assert( pPager->dbSize>=nPage ); 23913460d19cSdanielk1977 pPager->dbSize = nPage; 23923460d19cSdanielk1977 } 23933460d19cSdanielk1977 23943460d19cSdanielk1977 /* 23953460d19cSdanielk1977 ** Return the current size of the database file image in pages. This 23963460d19cSdanielk1977 ** function differs from sqlite3PagerPagecount() in two ways: 23973460d19cSdanielk1977 ** 23983460d19cSdanielk1977 ** a) It may only be called when at least one reference to a database 23993460d19cSdanielk1977 ** page is held. This guarantees that the database size is already 24003460d19cSdanielk1977 ** known and a call to sqlite3OsFileSize() is not required. 24013460d19cSdanielk1977 ** 24023460d19cSdanielk1977 ** b) The return value is not adjusted for the locking page. 24033460d19cSdanielk1977 */ 24043460d19cSdanielk1977 Pgno sqlite3PagerImageSize(Pager *pPager){ 24053460d19cSdanielk1977 assert( pPager->dbSizeValid ); 24063460d19cSdanielk1977 return pPager->dbSize; 24073460d19cSdanielk1977 } 24083460d19cSdanielk1977 #endif /* ifndef SQLITE_OMIT_AUTOVACUUM */ 24093460d19cSdanielk1977 2410f7c57531Sdrh /* 2411ed7c855cSdrh ** Shutdown the page cache. Free all memory and close all files. 2412ed7c855cSdrh ** 2413ed7c855cSdrh ** If a transaction was in progress when this routine is called, that 2414ed7c855cSdrh ** transaction is rolled back. All outstanding pages are invalidated 2415ed7c855cSdrh ** and their memory is freed. Any attempt to use a page associated 2416ed7c855cSdrh ** with this page cache after this function returns will likely 2417ed7c855cSdrh ** result in a coredump. 2418aef0bf64Sdanielk1977 ** 2419aef0bf64Sdanielk1977 ** This function always succeeds. If a transaction is active an attempt 2420aef0bf64Sdanielk1977 ** is made to roll it back. If an error occurs during the rollback 2421aef0bf64Sdanielk1977 ** a hot journal may be left in the filesystem but no error is returned 2422aef0bf64Sdanielk1977 ** to the caller. 2423ed7c855cSdrh */ 24243b8a05f6Sdanielk1977 int sqlite3PagerClose(Pager *pPager){ 242513f7299bSdanielk1977 2426c9ac5caaSdrh disable_simulated_io_errors(); 24272d1d86fbSdanielk1977 sqlite3BeginBenignMalloc(); 2428c2ee76cbSdrh pPager->errCode = 0; 242941483468Sdanielk1977 pPager->exclusiveMode = 0; 2430bafda096Sdrh pager_reset(pPager); 2431b3175389Sdanielk1977 if( !MEMDB ){ 2432f2c31ad8Sdanielk1977 /* Set Pager.journalHdr to -1 for the benefit of the pager_playback() 2433f2c31ad8Sdanielk1977 ** call which may be made from within pagerUnlockAndRollback(). If it 2434f2c31ad8Sdanielk1977 ** is not -1, then the unsynced portion of an open journal file may 2435f2c31ad8Sdanielk1977 ** be played back into the database. If a power failure occurs while 2436f2c31ad8Sdanielk1977 ** this is happening, the database may become corrupt. 2437f2c31ad8Sdanielk1977 */ 2438f2c31ad8Sdanielk1977 pPager->journalHdr = -1; 2439e277be05Sdanielk1977 pagerUnlockAndRollback(pPager); 2440b3175389Sdanielk1977 } 2441c9ac5caaSdrh enable_simulated_io_errors(); 24422d1d86fbSdanielk1977 sqlite3EndBenignMalloc(); 244330d53701Sdrh PAGERTRACE(("CLOSE %d\n", PAGERID(pPager))); 2444b0603416Sdrh IOTRACE(("CLOSE %p\n", pPager)) 2445e94ddc9eSdanielk1977 if( pPager->journalOpen ){ 2446b4b47411Sdanielk1977 sqlite3OsClose(pPager->jfd); 2447e94ddc9eSdanielk1977 } 2448f5e7bb51Sdrh sqlite3BitvecDestroy(pPager->pInJournal); 2449a1fa00d9Sdanielk1977 sqlite3BitvecDestroy(pPager->pAlwaysRollback); 2450fd7f0452Sdanielk1977 releaseAllSavepoint(pPager); 2451b4b47411Sdanielk1977 sqlite3OsClose(pPager->fd); 24520f89253eSdrh /* Temp files are automatically deleted by the OS 24530f89253eSdrh ** if( pPager->tempFile ){ 245466560adaSdrh ** sqlite3OsDelete(pPager->zFilename); 24550f89253eSdrh ** } 24560f89253eSdrh */ 2457aca790acSdanielk1977 2458facf0307Sdrh sqlite3PageFree(pPager->pTmpSpace); 24598c0a791aSdanielk1977 sqlite3PcacheClose(pPager->pPCache); 246017435752Sdrh sqlite3_free(pPager); 2461ed7c855cSdrh return SQLITE_OK; 2462ed7c855cSdrh } 2463ed7c855cSdrh 246487cc3b31Sdrh #if !defined(NDEBUG) || defined(SQLITE_TEST) 2465ed7c855cSdrh /* 24665e00f6c7Sdrh ** Return the page number for the given page data. 2467ed7c855cSdrh */ 24683b8a05f6Sdanielk1977 Pgno sqlite3PagerPagenumber(DbPage *p){ 2469ed7c855cSdrh return p->pgno; 2470ed7c855cSdrh } 247187cc3b31Sdrh #endif 2472ed7c855cSdrh 2473ed7c855cSdrh /* 2474df0b3b09Sdrh ** Increment the reference count for a page. The input pointer is 2475df0b3b09Sdrh ** a reference to the page data. 2476df0b3b09Sdrh */ 24773b8a05f6Sdanielk1977 int sqlite3PagerRef(DbPage *pPg){ 24788c0a791aSdanielk1977 sqlite3PcacheRef(pPg); 24798c42ca93Sdrh return SQLITE_OK; 24807e3b0a07Sdrh } 24817e3b0a07Sdrh 24827e3b0a07Sdrh /* 248334e79ceeSdrh ** Sync the journal. In other words, make sure all the pages that have 248434e79ceeSdrh ** been written to the journal have actually reached the surface of the 248534e79ceeSdrh ** disk. It is not safe to modify the original database file until after 248634e79ceeSdrh ** the journal has been synced. If the original database is modified before 248734e79ceeSdrh ** the journal is synced and a power failure occurs, the unsynced journal 248834e79ceeSdrh ** data would be lost and we would be unable to completely rollback the 248934e79ceeSdrh ** database changes. Database corruption would occur. 2490b19a2bc6Sdrh ** 249134e79ceeSdrh ** This routine also updates the nRec field in the header of the journal. 249234e79ceeSdrh ** (See comments on the pager_playback() routine for additional information.) 249334e79ceeSdrh ** If the sync mode is FULL, two syncs will occur. First the whole journal 249434e79ceeSdrh ** is synced, then the nRec field is updated, then a second sync occurs. 2495fa86c412Sdrh ** 249634e79ceeSdrh ** For temporary databases, we do not care if we are able to rollback 24974cd2cd5cSdanielk1977 ** after a power failure, so no sync occurs. 24984cd2cd5cSdanielk1977 ** 24994cd2cd5cSdanielk1977 ** If the IOCAP_SEQUENTIAL flag is set for the persistent media on which 25004cd2cd5cSdanielk1977 ** the database is stored, then OsSync() is never called on the journal 25014cd2cd5cSdanielk1977 ** file. In this case all that is required is to update the nRec field in 25024cd2cd5cSdanielk1977 ** the journal header. 250334e79ceeSdrh ** 250434e79ceeSdrh ** This routine clears the needSync field of every page current held in 250534e79ceeSdrh ** memory. 250650e5dadfSdrh */ 25077657240aSdanielk1977 static int syncJournal(Pager *pPager){ 250850e5dadfSdrh int rc = SQLITE_OK; 250903eb96a7Sdrh 251003eb96a7Sdrh /* Sync the journal before modifying the main database 251103eb96a7Sdrh ** (assuming there is a journal and it needs to be synced.) 251203eb96a7Sdrh */ 25137657240aSdanielk1977 if( pPager->needSync ){ 2514b3175389Sdanielk1977 assert( !pPager->tempFile ); 2515b3175389Sdanielk1977 if( pPager->journalMode!=PAGER_JOURNALMODE_MEMORY ){ 25164cd2cd5cSdanielk1977 int iDc = sqlite3OsDeviceCharacteristics(pPager->fd); 2517db48ee02Sdrh assert( pPager->journalOpen ); 25184cd2cd5cSdanielk1977 25194cd2cd5cSdanielk1977 if( 0==(iDc&SQLITE_IOCAP_SAFE_APPEND) ){ 2520112f752bSdanielk1977 i64 jrnlOff = journalHdrOffset(pPager); 2521112f752bSdanielk1977 u8 zMagic[8]; 2522112f752bSdanielk1977 2523112f752bSdanielk1977 /* This block deals with an obscure problem. If the last connection 2524112f752bSdanielk1977 ** that wrote to this database was operating in persistent-journal 2525112f752bSdanielk1977 ** mode, then the journal file may at this point actually be larger 2526112f752bSdanielk1977 ** than Pager.journalOff bytes. If the next thing in the journal 2527112f752bSdanielk1977 ** file happens to be a journal-header (written as part of the 2528112f752bSdanielk1977 ** previous connections transaction), and a crash or power-failure 2529112f752bSdanielk1977 ** occurs after nRec is updated but before this connection writes 2530112f752bSdanielk1977 ** anything else to the journal file (or commits/rolls back its 2531112f752bSdanielk1977 ** transaction), then SQLite may become confused when doing the 2532112f752bSdanielk1977 ** hot-journal rollback following recovery. It may roll back all 2533112f752bSdanielk1977 ** of this connections data, then proceed to rolling back the old, 2534112f752bSdanielk1977 ** out-of-date data that follows it. Database corruption. 2535112f752bSdanielk1977 ** 2536112f752bSdanielk1977 ** To work around this, if the journal file does appear to contain 2537112f752bSdanielk1977 ** a valid header following Pager.journalOff, then write a 0x00 2538112f752bSdanielk1977 ** byte to the start of it to prevent it from being recognized. 2539112f752bSdanielk1977 */ 2540112f752bSdanielk1977 rc = sqlite3OsRead(pPager->jfd, zMagic, 8, jrnlOff); 2541112f752bSdanielk1977 if( rc==SQLITE_OK && 0==memcmp(zMagic, aJournalMagic, 8) ){ 2542112f752bSdanielk1977 static const u8 zerobyte = 0; 2543112f752bSdanielk1977 rc = sqlite3OsWrite(pPager->jfd, &zerobyte, 1, jrnlOff); 2544112f752bSdanielk1977 } 2545112f752bSdanielk1977 if( rc!=SQLITE_OK && rc!=SQLITE_IOERR_SHORT_READ ){ 2546112f752bSdanielk1977 return rc; 2547112f752bSdanielk1977 } 2548112f752bSdanielk1977 25497657240aSdanielk1977 /* Write the nRec value into the journal file header. If in 25507657240aSdanielk1977 ** full-synchronous mode, sync the journal first. This ensures that 25517657240aSdanielk1977 ** all data has really hit the disk before nRec is updated to mark 25527657240aSdanielk1977 ** it as a candidate for rollback. 25534cd2cd5cSdanielk1977 ** 25544cd2cd5cSdanielk1977 ** This is not required if the persistent media supports the 25554cd2cd5cSdanielk1977 ** SAFE_APPEND property. Because in this case it is not possible 25564cd2cd5cSdanielk1977 ** for garbage data to be appended to the file, the nRec field 25574cd2cd5cSdanielk1977 ** is populated with 0xFFFFFFFF when the journal header is written 25584cd2cd5cSdanielk1977 ** and never needs to be updated. 25597657240aSdanielk1977 */ 25604cd2cd5cSdanielk1977 if( pPager->fullSync && 0==(iDc&SQLITE_IOCAP_SEQUENTIAL) ){ 256130d53701Sdrh PAGERTRACE(("SYNC journal of %d\n", PAGERID(pPager))); 2562b0603416Sdrh IOTRACE(("JSYNC %p\n", pPager)) 2563f036aef0Sdanielk1977 rc = sqlite3OsSync(pPager->jfd, pPager->sync_flags); 256450e5dadfSdrh if( rc!=0 ) return rc; 2565968af52aSdrh } 256613adf8a0Sdanielk1977 256762079060Sdanielk1977 jrnlOff = pPager->journalHdr + sizeof(aJournalMagic); 256862079060Sdanielk1977 IOTRACE(("JHDR %p %lld %d\n", pPager, jrnlOff, 4)); 256962079060Sdanielk1977 rc = write32bits(pPager->jfd, jrnlOff, pPager->nRec); 2570b4746b9eSdrh if( rc ) return rc; 2571d8d66e8cSdrh } 25724cd2cd5cSdanielk1977 if( 0==(iDc&SQLITE_IOCAP_SEQUENTIAL) ){ 257330d53701Sdrh PAGERTRACE(("SYNC journal of %d\n", PAGERID(pPager))); 2574126afe6bSdrh IOTRACE(("JSYNC %p\n", pPager)) 2575f036aef0Sdanielk1977 rc = sqlite3OsSync(pPager->jfd, pPager->sync_flags| 2576f036aef0Sdanielk1977 (pPager->sync_flags==SQLITE_SYNC_FULL?SQLITE_SYNC_DATAONLY:0) 2577f036aef0Sdanielk1977 ); 2578db48ee02Sdrh if( rc!=0 ) return rc; 25794cd2cd5cSdanielk1977 } 2580db48ee02Sdrh pPager->journalStarted = 1; 2581fa86c412Sdrh } 258250e5dadfSdrh pPager->needSync = 0; 258303eb96a7Sdrh 2584db48ee02Sdrh /* Erase the needSync flag from every page. 258503eb96a7Sdrh */ 2586bc2ca9ebSdanielk1977 sqlite3PcacheClearSyncFlags(pPager->pPCache); 2587341eae8dSdrh } 2588341eae8dSdrh 258981a20f21Sdrh return rc; 259050e5dadfSdrh } 259150e5dadfSdrh 259250e5dadfSdrh /* 25932554f8b0Sdrh ** Given a list of pages (connected by the PgHdr.pDirty pointer) write 2594a858aa2eSdanielk1977 ** every one of those pages out to the database file. No calls are made 2595a858aa2eSdanielk1977 ** to the page-cache to mark the pages as clean. It is the responsibility 2596a858aa2eSdanielk1977 ** of the caller to use PcacheCleanAll() or PcacheMakeClean() to mark 2597a858aa2eSdanielk1977 ** the pages as clean. 25982554f8b0Sdrh */ 25992554f8b0Sdrh static int pager_write_pagelist(PgHdr *pList){ 26002554f8b0Sdrh Pager *pPager; 26012554f8b0Sdrh int rc; 26022554f8b0Sdrh 26032554f8b0Sdrh if( pList==0 ) return SQLITE_OK; 26042554f8b0Sdrh pPager = pList->pPager; 26059eed5057Sdanielk1977 26069eed5057Sdanielk1977 /* At this point there may be either a RESERVED or EXCLUSIVE lock on the 26079eed5057Sdanielk1977 ** database file. If there is already an EXCLUSIVE lock, the following 2608054889ecSdrh ** calls to sqlite3OsLock() are no-ops. 26099eed5057Sdanielk1977 ** 2610a6abd041Sdrh ** Moving the lock from RESERVED to EXCLUSIVE actually involves going 2611a6abd041Sdrh ** through an intermediate state PENDING. A PENDING lock prevents new 2612a6abd041Sdrh ** readers from attaching to the database but is unsufficient for us to 2613a6abd041Sdrh ** write. The idea of a PENDING lock is to prevent new readers from 2614a6abd041Sdrh ** coming in while we wait for existing readers to clear. 26159eed5057Sdanielk1977 ** 2616a6abd041Sdrh ** While the pager is in the RESERVED state, the original database file 2617a6abd041Sdrh ** is unchanged and we can rollback without having to playback the 2618a6abd041Sdrh ** journal into the original database file. Once we transition to 2619a6abd041Sdrh ** EXCLUSIVE, it means the database file has been changed and any rollback 2620a6abd041Sdrh ** will require a journal playback. 26219eed5057Sdanielk1977 */ 2622684917c2Sdrh rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK); 26239eed5057Sdanielk1977 if( rc!=SQLITE_OK ){ 26249eed5057Sdanielk1977 return rc; 26259eed5057Sdanielk1977 } 26269eed5057Sdanielk1977 26272554f8b0Sdrh while( pList ){ 26287a2b1eebSdanielk1977 26297a2b1eebSdanielk1977 /* If the file has not yet been opened, open it now. */ 26307a2b1eebSdanielk1977 if( !pPager->fd->pMethods ){ 26317a2b1eebSdanielk1977 assert(pPager->tempFile); 263217b90b53Sdanielk1977 rc = sqlite3PagerOpentemp(pPager, pPager->fd, pPager->vfsFlags); 26337a2b1eebSdanielk1977 if( rc ) return rc; 26347a2b1eebSdanielk1977 } 26357a2b1eebSdanielk1977 2636687566d7Sdanielk1977 /* If there are dirty pages in the page cache with page numbers greater 2637f90b7260Sdanielk1977 ** than Pager.dbSize, this means sqlite3PagerTruncateImage() was called to 2638687566d7Sdanielk1977 ** make the file smaller (presumably by auto-vacuum code). Do not write 2639687566d7Sdanielk1977 ** any such pages to the file. 2640687566d7Sdanielk1977 */ 264133e3216aSdanielk1977 if( pList->pgno<=pPager->dbSize && 0==(pList->flags&PGHDR_DONT_WRITE) ){ 264262079060Sdanielk1977 i64 offset = (pList->pgno-1)*(i64)pPager->pageSize; 26438c0a791aSdanielk1977 char *pData = CODEC2(pPager, pList->pData, pList->pgno, 6); 264412dd5496Sdanielk1977 264530d53701Sdrh PAGERTRACE(("STORE %d page %d hash(%08x)\n", 264630d53701Sdrh PAGERID(pPager), pList->pgno, pager_pagehash(pList))); 2647538f570cSdrh IOTRACE(("PGOUT %p %d\n", pPager, pList->pgno)); 264862079060Sdanielk1977 rc = sqlite3OsWrite(pPager->fd, pData, pPager->pageSize, offset); 2649538f570cSdrh PAGER_INCR(sqlite3_pager_writedb_count); 2650538f570cSdrh PAGER_INCR(pPager->nWrite); 265186a88114Sdrh if( pList->pgno==1 ){ 265286a88114Sdrh memcpy(&pPager->dbFileVers, &pData[24], sizeof(pPager->dbFileVers)); 265386a88114Sdrh } 26543460d19cSdanielk1977 if( pList->pgno>pPager->dbFileSize ){ 26553460d19cSdanielk1977 pPager->dbFileSize = pList->pgno; 26563460d19cSdanielk1977 } 2657687566d7Sdanielk1977 } 2658687566d7Sdanielk1977 #ifndef NDEBUG 2659687566d7Sdanielk1977 else{ 266030d53701Sdrh PAGERTRACE(("NOSTORE %d page %d\n", PAGERID(pPager), pList->pgno)); 2661687566d7Sdanielk1977 } 2662687566d7Sdanielk1977 #endif 26632554f8b0Sdrh if( rc ) return rc; 26643c407374Sdanielk1977 #ifdef SQLITE_CHECK_PAGES 26653c407374Sdanielk1977 pList->pageHash = pager_pagehash(pList); 26663c407374Sdanielk1977 #endif 26672554f8b0Sdrh pList = pList->pDirty; 26682554f8b0Sdrh } 26698c0a791aSdanielk1977 26702554f8b0Sdrh return SQLITE_OK; 26712554f8b0Sdrh } 26722554f8b0Sdrh 26732554f8b0Sdrh /* 2674f2c31ad8Sdanielk1977 ** Add the page to the sub-journal. It is the callers responsibility to 2675f2c31ad8Sdanielk1977 ** use subjRequiresPage() to check that it is really required before 2676f2c31ad8Sdanielk1977 ** calling this function. 2677f2c31ad8Sdanielk1977 */ 2678f2c31ad8Sdanielk1977 static int subjournalPage(PgHdr *pPg){ 2679f2c31ad8Sdanielk1977 int rc; 2680f2c31ad8Sdanielk1977 void *pData = pPg->pData; 2681f2c31ad8Sdanielk1977 Pager *pPager = pPg->pPager; 2682f2c31ad8Sdanielk1977 i64 offset = pPager->stmtNRec*(4+pPager->pageSize); 2683f2c31ad8Sdanielk1977 char *pData2 = CODEC2(pPager, pData, pPg->pgno, 7); 2684f2c31ad8Sdanielk1977 268530d53701Sdrh PAGERTRACE(("STMT-JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno)); 2686f2c31ad8Sdanielk1977 2687f2c31ad8Sdanielk1977 assert( pageInJournal(pPg) || pPg->pgno>pPager->dbOrigSize ); 2688f2c31ad8Sdanielk1977 rc = write32bits(pPager->sjfd, offset, pPg->pgno); 2689f2c31ad8Sdanielk1977 if( rc==SQLITE_OK ){ 2690f2c31ad8Sdanielk1977 rc = sqlite3OsWrite(pPager->sjfd, pData2, pPager->pageSize, offset+4); 2691f2c31ad8Sdanielk1977 } 2692f2c31ad8Sdanielk1977 if( rc==SQLITE_OK ){ 2693f2c31ad8Sdanielk1977 pPager->stmtNRec++; 2694f2c31ad8Sdanielk1977 assert( pPager->nSavepoint>0 ); 2695f2c31ad8Sdanielk1977 rc = addToSavepointBitvecs(pPager, pPg->pgno); 2696f2c31ad8Sdanielk1977 } 2697f2c31ad8Sdanielk1977 return rc; 2698f2c31ad8Sdanielk1977 } 2699f2c31ad8Sdanielk1977 2700f2c31ad8Sdanielk1977 2701f2c31ad8Sdanielk1977 /* 27028c0a791aSdanielk1977 ** This function is called by the pcache layer when it has reached some 27038c0a791aSdanielk1977 ** soft memory limit. The argument is a pointer to a purgeable Pager 27048c0a791aSdanielk1977 ** object. This function attempts to make a single dirty page that has no 27058c0a791aSdanielk1977 ** outstanding references (if one exists) clean so that it can be recycled 27068c0a791aSdanielk1977 ** by the pcache layer. 27072554f8b0Sdrh */ 2708a858aa2eSdanielk1977 static int pagerStress(void *p, PgHdr *pPg){ 27098c0a791aSdanielk1977 Pager *pPager = (Pager *)p; 27108c0a791aSdanielk1977 int rc = SQLITE_OK; 27118f2e9a1aSdrh 27128c20014aSdanielk1977 if( pPager->doNotSync ){ 27138c20014aSdanielk1977 return SQLITE_OK; 27148c20014aSdanielk1977 } 27158c20014aSdanielk1977 27168c0a791aSdanielk1977 assert( pPg->flags&PGHDR_DIRTY ); 271767e3da7aSdanielk1977 if( pPager->errCode==SQLITE_OK ){ 27188c0a791aSdanielk1977 if( pPg->flags&PGHDR_NEED_SYNC ){ 27198c0a791aSdanielk1977 rc = syncJournal(pPager); 272067e3da7aSdanielk1977 if( rc==SQLITE_OK && pPager->fullSync && 2721b3175389Sdanielk1977 !(pPager->journalMode==PAGER_JOURNALMODE_MEMORY) && 272267e3da7aSdanielk1977 !(sqlite3OsDeviceCharacteristics(pPager->fd)&SQLITE_IOCAP_SAFE_APPEND) 27238c0a791aSdanielk1977 ){ 27248c0a791aSdanielk1977 pPager->nRec = 0; 27258c0a791aSdanielk1977 rc = writeJournalHdr(pPager); 27262554f8b0Sdrh } 27278c0a791aSdanielk1977 } 27288c0a791aSdanielk1977 if( rc==SQLITE_OK ){ 2729a858aa2eSdanielk1977 pPg->pDirty = 0; 2730f2c31ad8Sdanielk1977 if( pPg->pgno>pPager->dbSize && subjRequiresPage(pPg) ){ 2731f2c31ad8Sdanielk1977 rc = subjournalPage(pPg); 2732f2c31ad8Sdanielk1977 } 2733f2c31ad8Sdanielk1977 if( rc==SQLITE_OK ){ 27348c0a791aSdanielk1977 rc = pager_write_pagelist(pPg); 27358c0a791aSdanielk1977 } 2736f2c31ad8Sdanielk1977 } 27378c0a791aSdanielk1977 if( rc!=SQLITE_OK ){ 27388c0a791aSdanielk1977 pager_error(pPager, rc); 27398c0a791aSdanielk1977 } 274067e3da7aSdanielk1977 } 2741a858aa2eSdanielk1977 2742a858aa2eSdanielk1977 if( rc==SQLITE_OK ){ 274330d53701Sdrh PAGERTRACE(("STRESS %d page %d\n", PAGERID(pPager), pPg->pgno)); 2744a858aa2eSdanielk1977 sqlite3PcacheMakeClean(pPg); 27458c0a791aSdanielk1977 } 27468c0a791aSdanielk1977 return rc; 27478c0a791aSdanielk1977 } 27488c0a791aSdanielk1977 27492554f8b0Sdrh 27502554f8b0Sdrh /* 275119db9352Sdrh ** Return 1 if there is a hot journal on the given pager. 2752165ffe97Sdrh ** A hot journal is one that needs to be played back. 2753165ffe97Sdrh ** 2754165ffe97Sdrh ** If the current size of the database file is 0 but a journal file 2755165ffe97Sdrh ** exists, that is probably an old journal left over from a prior 2756165ffe97Sdrh ** database with the same name. Just delete the journal. 275719db9352Sdrh ** 275819db9352Sdrh ** Return negative if unable to determine the status of the journal. 275982ed1e5bSdrh ** 276082ed1e5bSdrh ** This routine does not open the journal file to examine its 276182ed1e5bSdrh ** content. Hence, the journal might contain the name of a master 276282ed1e5bSdrh ** journal file that has been deleted, and hence not be hot. Or 276382ed1e5bSdrh ** the header of the journal might be zeroed out. This routine 276482ed1e5bSdrh ** does not discover these cases of a non-hot journal - if the 276582ed1e5bSdrh ** journal file exists and is not empty this routine assumes it 276682ed1e5bSdrh ** is hot. The pager_playback() routine will discover that the 276782ed1e5bSdrh ** journal file is not really hot and will no-op. 2768165ffe97Sdrh */ 2769d300b8a3Sdanielk1977 static int hasHotJournal(Pager *pPager, int *pExists){ 2770b4b47411Sdanielk1977 sqlite3_vfs *pVfs = pPager->pVfs; 2771d300b8a3Sdanielk1977 int rc = SQLITE_OK; 2772ea678832Sdrh int exists = 0; 2773ea678832Sdrh int locked = 0; 27740a846f96Sdrh assert( pPager!=0 ); 27750a846f96Sdrh assert( pPager->useJournal ); 27760a846f96Sdrh assert( pPager->fd->pMethods ); 27770a846f96Sdrh *pExists = 0; 2778861f7456Sdanielk1977 rc = sqlite3OsAccess(pVfs, pPager->zJournal, SQLITE_ACCESS_EXISTS, &exists); 2779861f7456Sdanielk1977 if( rc==SQLITE_OK && exists ){ 2780861f7456Sdanielk1977 rc = sqlite3OsCheckReservedLock(pPager->fd, &locked); 2781bb5f18d2Sdrh } 2782861f7456Sdanielk1977 if( rc==SQLITE_OK && exists && !locked ){ 2783ad0132dfSdanielk1977 int nPage; 2784ad0132dfSdanielk1977 rc = sqlite3PagerPagecount(pPager, &nPage); 2785d300b8a3Sdanielk1977 if( rc==SQLITE_OK ){ 2786d300b8a3Sdanielk1977 if( nPage==0 ){ 2787fee2d25aSdanielk1977 sqlite3OsDelete(pVfs, pPager->zJournal, 0); 2788d300b8a3Sdanielk1977 }else{ 2789d300b8a3Sdanielk1977 *pExists = 1; 2790d300b8a3Sdanielk1977 } 2791d300b8a3Sdanielk1977 } 2792165ffe97Sdrh } 2793d300b8a3Sdanielk1977 return rc; 2794861f7456Sdanielk1977 } 2795861f7456Sdanielk1977 2796165ffe97Sdrh /* 2797e180dd93Sdanielk1977 ** Read the content of page pPg out of the database file. 2798e180dd93Sdanielk1977 */ 2799e180dd93Sdanielk1977 static int readDbPage(Pager *pPager, PgHdr *pPg, Pgno pgno){ 2800e180dd93Sdanielk1977 int rc; 280162079060Sdanielk1977 i64 offset; 2802e180dd93Sdanielk1977 assert( MEMDB==0 ); 28037a2b1eebSdanielk1977 assert(pPager->fd->pMethods||pPager->tempFile); 28047a2b1eebSdanielk1977 if( !pPager->fd->pMethods ){ 28057a2b1eebSdanielk1977 return SQLITE_IOERR_SHORT_READ; 28067a2b1eebSdanielk1977 } 280762079060Sdanielk1977 offset = (pgno-1)*(i64)pPager->pageSize; 28088c0a791aSdanielk1977 rc = sqlite3OsRead(pPager->fd, pPg->pData, pPager->pageSize, offset); 2809538f570cSdrh PAGER_INCR(sqlite3_pager_readdb_count); 2810538f570cSdrh PAGER_INCR(pPager->nRead); 2811538f570cSdrh IOTRACE(("PGIN %p %d\n", pPager, pgno)); 281286a88114Sdrh if( pgno==1 ){ 28138c0a791aSdanielk1977 memcpy(&pPager->dbFileVers, &((u8*)pPg->pData)[24], 281486a88114Sdrh sizeof(pPager->dbFileVers)); 281586a88114Sdrh } 28163084952aSdanielk1977 CODEC1(pPager, pPg->pData, pPg->pgno, 3); 281730d53701Sdrh PAGERTRACE(("FETCH %d page %d hash(%08x)\n", 281830d53701Sdrh PAGERID(pPager), pPg->pgno, pager_pagehash(pPg))); 2819e180dd93Sdanielk1977 return rc; 2820e180dd93Sdanielk1977 } 2821e180dd93Sdanielk1977 2822e180dd93Sdanielk1977 2823e180dd93Sdanielk1977 /* 2824e277be05Sdanielk1977 ** This function is called to obtain the shared lock required before 2825e277be05Sdanielk1977 ** data may be read from the pager cache. If the shared lock has already 2826e277be05Sdanielk1977 ** been obtained, this function is a no-op. 2827393f0689Sdanielk1977 ** 2828393f0689Sdanielk1977 ** Immediately after obtaining the shared lock (if required), this function 2829393f0689Sdanielk1977 ** checks for a hot-journal file. If one is found, an emergency rollback 2830393f0689Sdanielk1977 ** is performed immediately. 2831ed7c855cSdrh */ 2832e277be05Sdanielk1977 static int pagerSharedLock(Pager *pPager){ 2833e277be05Sdanielk1977 int rc = SQLITE_OK; 2834d300b8a3Sdanielk1977 int isErrorReset = 0; 2835ed7c855cSdrh 2836ae72d982Sdanielk1977 /* If this database is opened for exclusive access, has no outstanding 2837ae72d982Sdanielk1977 ** page references and is in an error-state, now is the chance to clear 2838ae72d982Sdanielk1977 ** the error. Discard the contents of the pager-cache and treat any 2839ae72d982Sdanielk1977 ** open journal file as a hot-journal. 2840ae72d982Sdanielk1977 */ 28418c0a791aSdanielk1977 if( !MEMDB && pPager->exclusiveMode 28428c0a791aSdanielk1977 && sqlite3PcacheRefCount(pPager->pPCache)==0 && pPager->errCode 28438c0a791aSdanielk1977 ){ 2844ae72d982Sdanielk1977 if( pPager->journalOpen ){ 2845d300b8a3Sdanielk1977 isErrorReset = 1; 2846ae72d982Sdanielk1977 } 2847ae72d982Sdanielk1977 pPager->errCode = SQLITE_OK; 284893f7af97Sdanielk1977 pager_reset(pPager); 2849ae72d982Sdanielk1977 } 2850ae72d982Sdanielk1977 2851ae72d982Sdanielk1977 /* If the pager is still in an error state, do not proceed. The error 2852ae72d982Sdanielk1977 ** state will be cleared at some point in the future when all page 2853ae72d982Sdanielk1977 ** references are dropped and the cache can be discarded. 2854ae72d982Sdanielk1977 */ 2855ae72d982Sdanielk1977 if( pPager->errCode && pPager->errCode!=SQLITE_FULL ){ 2856ae72d982Sdanielk1977 return pPager->errCode; 2857ae72d982Sdanielk1977 } 2858ae72d982Sdanielk1977 2859d300b8a3Sdanielk1977 if( pPager->state==PAGER_UNLOCK || isErrorReset ){ 2860b4b47411Sdanielk1977 sqlite3_vfs *pVfs = pPager->pVfs; 28614f21c4afSdrh int isHotJournal = 0; 2862049fc21dSshane assert( !MEMDB ); 28638c0a791aSdanielk1977 assert( sqlite3PcacheRefCount(pPager->pPCache)==0 ); 28647bec505eSdrh if( !pPager->noReadlock ){ 2865684917c2Sdrh rc = pager_wait_on_lock(pPager, SHARED_LOCK); 28668766c343Sdrh if( rc!=SQLITE_OK ){ 286752b472aeSdanielk1977 assert( pPager->state==PAGER_UNLOCK ); 2868aef0bf64Sdanielk1977 return pager_error(pPager, rc); 2869ed7c855cSdrh } 28700371f1b2Sdanielk1977 }else if( pPager->state==PAGER_UNLOCK ){ 28710371f1b2Sdanielk1977 pPager->state = PAGER_SHARED; 28727bec505eSdrh } 28730371f1b2Sdanielk1977 assert( pPager->state>=SHARED_LOCK ); 2874ed7c855cSdrh 287513adf8a0Sdanielk1977 /* If a journal file exists, and there is no RESERVED lock on the 287613adf8a0Sdanielk1977 ** database file, then it either needs to be played back or deleted. 2877ed7c855cSdrh */ 2878d300b8a3Sdanielk1977 if( !isErrorReset ){ 2879d300b8a3Sdanielk1977 rc = hasHotJournal(pPager, &isHotJournal); 2880d300b8a3Sdanielk1977 if( rc!=SQLITE_OK ){ 288152b472aeSdanielk1977 goto failed; 288219db9352Sdrh } 2883d300b8a3Sdanielk1977 } 2884d300b8a3Sdanielk1977 if( isErrorReset || isHotJournal ){ 288590ba3bd0Sdanielk1977 /* Get an EXCLUSIVE lock on the database file. At this point it is 288690ba3bd0Sdanielk1977 ** important that a RESERVED lock is not obtained on the way to the 288790ba3bd0Sdanielk1977 ** EXCLUSIVE lock. If it were, another process might open the 288890ba3bd0Sdanielk1977 ** database file, detect the RESERVED lock, and conclude that the 288990ba3bd0Sdanielk1977 ** database is safe to read while this process is still rolling it 289090ba3bd0Sdanielk1977 ** back. 289190ba3bd0Sdanielk1977 ** 289290ba3bd0Sdanielk1977 ** Because the intermediate RESERVED lock is not requested, the 289390ba3bd0Sdanielk1977 ** second process will get to this point in the code and fail to 289485b623f2Sdrh ** obtain its own EXCLUSIVE lock on the database file. 289590ba3bd0Sdanielk1977 */ 2896ae72d982Sdanielk1977 if( pPager->state<EXCLUSIVE_LOCK ){ 2897054889ecSdrh rc = sqlite3OsLock(pPager->fd, EXCLUSIVE_LOCK); 2898a7fcb059Sdrh if( rc!=SQLITE_OK ){ 289952b472aeSdanielk1977 rc = pager_error(pPager, rc); 290052b472aeSdanielk1977 goto failed; 2901a7fcb059Sdrh } 2902a6abd041Sdrh pPager->state = PAGER_EXCLUSIVE; 2903ae72d982Sdanielk1977 } 2904a7fcb059Sdrh 290516e45a43Sdrh /* Open the journal for read/write access. This is because in 2906979f38e5Sdanielk1977 ** exclusive-access mode the file descriptor will be kept open and 2907979f38e5Sdanielk1977 ** possibly used for a transaction later on. On some systems, the 2908979f38e5Sdanielk1977 ** OsTruncate() call used in exclusive-access mode also requires 2909979f38e5Sdanielk1977 ** a read/write file handle. 2910ed7c855cSdrh */ 2911d300b8a3Sdanielk1977 if( !isErrorReset && pPager->journalOpen==0 ){ 2912861f7456Sdanielk1977 int res; 2913861f7456Sdanielk1977 rc = sqlite3OsAccess(pVfs,pPager->zJournal,SQLITE_ACCESS_EXISTS,&res); 2914861f7456Sdanielk1977 if( rc==SQLITE_OK ){ 2915861f7456Sdanielk1977 if( res ){ 2916b4b47411Sdanielk1977 int fout = 0; 2917ae72d982Sdanielk1977 int f = SQLITE_OPEN_READWRITE|SQLITE_OPEN_MAIN_JOURNAL; 29187152de8dSdanielk1977 assert( !pPager->tempFile ); 2919ae72d982Sdanielk1977 rc = sqlite3OsOpen(pVfs, pPager->zJournal, pPager->jfd, f, &fout); 2920b4b47411Sdanielk1977 assert( rc!=SQLITE_OK || pPager->jfd->pMethods ); 2921281d8bd3Sdanielk1977 if( rc==SQLITE_OK && fout&SQLITE_OPEN_READONLY ){ 2922281d8bd3Sdanielk1977 rc = SQLITE_CANTOPEN; 2923b4b47411Sdanielk1977 sqlite3OsClose(pPager->jfd); 2924979f38e5Sdanielk1977 } 2925861f7456Sdanielk1977 }else{ 292616e45a43Sdrh /* If the journal does not exist, that means some other process 292716e45a43Sdrh ** has already rolled it back */ 292816e45a43Sdrh rc = SQLITE_BUSY; 2929861f7456Sdanielk1977 } 2930979f38e5Sdanielk1977 } 2931ae72d982Sdanielk1977 } 2932a7fcb059Sdrh if( rc!=SQLITE_OK ){ 293352b472aeSdanielk1977 goto failed; 2934ed7c855cSdrh } 2935a7fcb059Sdrh pPager->journalOpen = 1; 2936db48ee02Sdrh pPager->journalStarted = 0; 29377657240aSdanielk1977 pPager->journalOff = 0; 29387657240aSdanielk1977 pPager->setMaster = 0; 29397657240aSdanielk1977 pPager->journalHdr = 0; 2940ed7c855cSdrh 2941ed7c855cSdrh /* Playback and delete the journal. Drop the database write 2942112f752bSdanielk1977 ** lock and reacquire the read lock. Purge the cache before 2943112f752bSdanielk1977 ** playing back the hot-journal so that we don't end up with 2944ad0ea228Sdanielk1977 ** an inconsistent cache. 2945ed7c855cSdrh */ 2946112f752bSdanielk1977 sqlite3PcacheClear(pPager->pPCache); 2947e277be05Sdanielk1977 rc = pager_playback(pPager, 1); 2948ed7c855cSdrh if( rc!=SQLITE_OK ){ 294952b472aeSdanielk1977 rc = pager_error(pPager, rc); 295052b472aeSdanielk1977 goto failed; 2951ed7c855cSdrh } 2952c5859718Sdanielk1977 assert(pPager->state==PAGER_SHARED || 2953c5859718Sdanielk1977 (pPager->exclusiveMode && pPager->state>PAGER_SHARED) 2954c5859718Sdanielk1977 ); 2955ed7c855cSdrh } 2956e277be05Sdanielk1977 29578c0a791aSdanielk1977 if( sqlite3PcachePagecount(pPager->pPCache)>0 ){ 295824168728Sdanielk1977 /* The shared-lock has just been acquired on the database file 295924168728Sdanielk1977 ** and there are already pages in the cache (from a previous 296086a88114Sdrh ** read or write transaction). Check to see if the database 296186a88114Sdrh ** has been modified. If the database has changed, flush the 296286a88114Sdrh ** cache. 296386a88114Sdrh ** 296486a88114Sdrh ** Database changes is detected by looking at 15 bytes beginning 296586a88114Sdrh ** at offset 24 into the file. The first 4 of these 16 bytes are 296686a88114Sdrh ** a 32-bit counter that is incremented with each change. The 296786a88114Sdrh ** other bytes change randomly with each file change when 296886a88114Sdrh ** a codec is in use. 296986a88114Sdrh ** 297086a88114Sdrh ** There is a vanishingly small chance that a change will not be 29716fa51035Sdrh ** detected. The chance of an undetected change is so small that 297286a88114Sdrh ** it can be neglected. 297324168728Sdanielk1977 */ 297486a88114Sdrh char dbFileVers[sizeof(pPager->dbFileVers)]; 2975ad0132dfSdanielk1977 sqlite3PagerPagecount(pPager, 0); 297624168728Sdanielk1977 2977e180dd93Sdanielk1977 if( pPager->errCode ){ 297852b472aeSdanielk1977 rc = pPager->errCode; 297952b472aeSdanielk1977 goto failed; 2980e277be05Sdanielk1977 } 2981e277be05Sdanielk1977 2982d92db531Sdanielk1977 assert( pPager->dbSizeValid ); 2983e180dd93Sdanielk1977 if( pPager->dbSize>0 ){ 2984ae5e445bSdrh IOTRACE(("CKVERS %p %d\n", pPager, sizeof(dbFileVers))); 298562079060Sdanielk1977 rc = sqlite3OsRead(pPager->fd, &dbFileVers, sizeof(dbFileVers), 24); 2986e180dd93Sdanielk1977 if( rc!=SQLITE_OK ){ 298752b472aeSdanielk1977 goto failed; 2988e180dd93Sdanielk1977 } 298986a88114Sdrh }else{ 299086a88114Sdrh memset(dbFileVers, 0, sizeof(dbFileVers)); 2991e180dd93Sdanielk1977 } 2992e180dd93Sdanielk1977 299386a88114Sdrh if( memcmp(pPager->dbFileVers, dbFileVers, sizeof(dbFileVers))!=0 ){ 2994e277be05Sdanielk1977 pager_reset(pPager); 2995e277be05Sdanielk1977 } 2996e277be05Sdanielk1977 } 29970371f1b2Sdanielk1977 assert( pPager->exclusiveMode || pPager->state==PAGER_SHARED ); 2998c5859718Sdanielk1977 } 2999e277be05Sdanielk1977 300052b472aeSdanielk1977 failed: 300152b472aeSdanielk1977 if( rc!=SQLITE_OK ){ 300252b472aeSdanielk1977 /* pager_unlock() is a no-op for exclusive mode and in-memory databases. */ 300352b472aeSdanielk1977 pager_unlock(pPager); 300452b472aeSdanielk1977 } 3005e277be05Sdanielk1977 return rc; 3006d9b0257aSdrh } 3007e277be05Sdanielk1977 3008e277be05Sdanielk1977 /* 3009d33d5a89Sdrh ** Make sure we have the content for a page. If the page was 3010d33d5a89Sdrh ** previously acquired with noContent==1, then the content was 3011d33d5a89Sdrh ** just initialized to zeros instead of being read from disk. 3012d33d5a89Sdrh ** But now we need the real data off of disk. So make sure we 3013d33d5a89Sdrh ** have it. Read it in if we do not have it already. 3014d33d5a89Sdrh */ 3015d33d5a89Sdrh static int pager_get_content(PgHdr *pPg){ 30168c0a791aSdanielk1977 if( pPg->flags&PGHDR_NEED_READ ){ 3017d33d5a89Sdrh int rc = readDbPage(pPg->pPager, pPg, pPg->pgno); 3018d33d5a89Sdrh if( rc==SQLITE_OK ){ 30198c0a791aSdanielk1977 pPg->flags &= ~PGHDR_NEED_READ; 3020d33d5a89Sdrh }else{ 3021d33d5a89Sdrh return rc; 3022d33d5a89Sdrh } 3023d33d5a89Sdrh } 3024d33d5a89Sdrh return SQLITE_OK; 3025d33d5a89Sdrh } 3026d33d5a89Sdrh 3027d33d5a89Sdrh /* 30288c0a791aSdanielk1977 ** If the reference count has reached zero, and the pager is not in the 30298c0a791aSdanielk1977 ** middle of a write transaction or opened in exclusive mode, unlock it. 30308c0a791aSdanielk1977 */ 30318c0a791aSdanielk1977 static void pagerUnlockIfUnused(Pager *pPager){ 30328c0a791aSdanielk1977 if( (sqlite3PcacheRefCount(pPager->pPCache)==0) 30338c0a791aSdanielk1977 && (!pPager->exclusiveMode || pPager->journalOff>0) 30348c0a791aSdanielk1977 ){ 30358c0a791aSdanielk1977 pagerUnlockAndRollback(pPager); 30368c0a791aSdanielk1977 } 30378c0a791aSdanielk1977 } 30388c0a791aSdanielk1977 30398c0a791aSdanielk1977 /* 30408c0a791aSdanielk1977 ** Drop a page from the cache using sqlite3PcacheDrop(). 30418c0a791aSdanielk1977 ** 30428c0a791aSdanielk1977 ** If this means there are now no pages with references to them, a rollback 30438c0a791aSdanielk1977 ** occurs and the lock on the database is removed. 30448c0a791aSdanielk1977 */ 30458c0a791aSdanielk1977 static void pagerDropPage(DbPage *pPg){ 30468c0a791aSdanielk1977 Pager *pPager = pPg->pPager; 30478c0a791aSdanielk1977 sqlite3PcacheDrop(pPg); 30488c0a791aSdanielk1977 pagerUnlockIfUnused(pPager); 30498c0a791aSdanielk1977 } 30508c0a791aSdanielk1977 30518c0a791aSdanielk1977 /* 3052e277be05Sdanielk1977 ** Acquire a page. 3053e277be05Sdanielk1977 ** 3054e277be05Sdanielk1977 ** A read lock on the disk file is obtained when the first page is acquired. 3055e277be05Sdanielk1977 ** This read lock is dropped when the last page is released. 3056e277be05Sdanielk1977 ** 3057d33d5a89Sdrh ** This routine works for any page number greater than 0. If the database 3058e277be05Sdanielk1977 ** file is smaller than the requested page, then no actual disk 3059e277be05Sdanielk1977 ** read occurs and the memory image of the page is initialized to 3060e277be05Sdanielk1977 ** all zeros. The extra data appended to a page is always initialized 3061e277be05Sdanielk1977 ** to zeros the first time a page is loaded into memory. 3062e277be05Sdanielk1977 ** 3063e277be05Sdanielk1977 ** The acquisition might fail for several reasons. In all cases, 3064e277be05Sdanielk1977 ** an appropriate error code is returned and *ppPage is set to NULL. 3065e277be05Sdanielk1977 ** 3066d33d5a89Sdrh ** See also sqlite3PagerLookup(). Both this routine and Lookup() attempt 3067e277be05Sdanielk1977 ** to find a page in the in-memory cache first. If the page is not already 3068d33d5a89Sdrh ** in memory, this routine goes to disk to read it in whereas Lookup() 3069e277be05Sdanielk1977 ** just returns 0. This routine acquires a read-lock the first time it 3070e277be05Sdanielk1977 ** has to go to disk, and could also playback an old journal if necessary. 3071d33d5a89Sdrh ** Since Lookup() never goes to disk, it never has to deal with locks 3072e277be05Sdanielk1977 ** or journal files. 3073e277be05Sdanielk1977 ** 3074538f570cSdrh ** If noContent is false, the page contents are actually read from disk. 3075538f570cSdrh ** If noContent is true, it means that we do not care about the contents 3076538f570cSdrh ** of the page at this time, so do not do a disk read. Just fill in the 3077538f570cSdrh ** page content with zeros. But mark the fact that we have not read the 3078538f570cSdrh ** content by setting the PgHdr.needRead flag. Later on, if 3079d33d5a89Sdrh ** sqlite3PagerWrite() is called on this page or if this routine is 3080d33d5a89Sdrh ** called again with noContent==0, that means that the content is needed 3081d33d5a89Sdrh ** and the disk read should occur at that point. 3082e277be05Sdanielk1977 */ 308365e0ff32Sdanielk1977 int sqlite3PagerAcquire( 3084538f570cSdrh Pager *pPager, /* The pager open on the database file */ 3085538f570cSdrh Pgno pgno, /* Page number to fetch */ 3086538f570cSdrh DbPage **ppPage, /* Write a pointer to the page here */ 3087538f570cSdrh int noContent /* Do not bother reading content from disk if true */ 3088538f570cSdrh ){ 30898c0a791aSdanielk1977 PgHdr *pPg = 0; 3090e277be05Sdanielk1977 int rc; 3091e277be05Sdanielk1977 30928c0a791aSdanielk1977 assert( pPager->state==PAGER_UNLOCK 30938c0a791aSdanielk1977 || sqlite3PcacheRefCount(pPager->pPCache)>0 30948c0a791aSdanielk1977 || pgno==1 30958c0a791aSdanielk1977 ); 3096e277be05Sdanielk1977 3097e277be05Sdanielk1977 /* The maximum page number is 2^31. Return SQLITE_CORRUPT if a page 3098e277be05Sdanielk1977 ** number greater than this, or zero, is requested. 3099e277be05Sdanielk1977 */ 3100e277be05Sdanielk1977 if( pgno>PAGER_MAX_PGNO || pgno==0 || pgno==PAGER_MJ_PGNO(pPager) ){ 3101e277be05Sdanielk1977 return SQLITE_CORRUPT_BKPT; 3102e277be05Sdanielk1977 } 3103e277be05Sdanielk1977 3104e277be05Sdanielk1977 /* Make sure we have not hit any critical errors. 3105e277be05Sdanielk1977 */ 3106e277be05Sdanielk1977 assert( pPager!=0 ); 3107e277be05Sdanielk1977 *ppPage = 0; 3108e277be05Sdanielk1977 3109e277be05Sdanielk1977 /* If this is the first page accessed, then get a SHARED lock 3110334cdb63Sdanielk1977 ** on the database file. pagerSharedLock() is a no-op if 3111334cdb63Sdanielk1977 ** a database lock is already held. 3112e277be05Sdanielk1977 */ 3113e277be05Sdanielk1977 rc = pagerSharedLock(pPager); 3114e277be05Sdanielk1977 if( rc!=SQLITE_OK ){ 3115e277be05Sdanielk1977 return rc; 3116e277be05Sdanielk1977 } 3117e277be05Sdanielk1977 assert( pPager->state!=PAGER_UNLOCK ); 3118e277be05Sdanielk1977 31198c0a791aSdanielk1977 rc = sqlite3PcacheFetch(pPager->pPCache, pgno, 1, &pPg); 3120db48ee02Sdrh if( rc!=SQLITE_OK ){ 312175bab7d6Sdanielk1977 return rc; 3122db48ee02Sdrh } 31238c0a791aSdanielk1977 if( pPg->pPager==0 ){ 31248c0a791aSdanielk1977 /* The pager cache has created a new page. Its content needs to 31258c0a791aSdanielk1977 ** be initialized. 31268c0a791aSdanielk1977 */ 31278c0a791aSdanielk1977 int nMax; 31288c0a791aSdanielk1977 PAGER_INCR(pPager->nMiss); 31298c0a791aSdanielk1977 pPg->pPager = pPager; 31308c0a791aSdanielk1977 memset(pPg->pExtra, 0, pPager->nExtra); 31318c0a791aSdanielk1977 3132ad0132dfSdanielk1977 rc = sqlite3PagerPagecount(pPager, &nMax); 3133ad0132dfSdanielk1977 if( rc!=SQLITE_OK ){ 3134ae72d982Sdanielk1977 sqlite3PagerUnref(pPg); 31352e6d11bcSdrh return rc; 31362e6d11bcSdrh } 313775bab7d6Sdanielk1977 3138a1fa00d9Sdanielk1977 if( nMax<(int)pgno || MEMDB || noContent ){ 3139f8e632b6Sdrh if( pgno>pPager->mxPgno ){ 3140de3bea7bSdanielk1977 sqlite3PagerUnref(pPg); 3141f8e632b6Sdrh return SQLITE_FULL; 3142f8e632b6Sdrh } 31438c0a791aSdanielk1977 memset(pPg->pData, 0, pPager->pageSize); 3144a1fa00d9Sdanielk1977 if( noContent ){ 31458c0a791aSdanielk1977 pPg->flags |= PGHDR_NEED_READ; 31468c0a791aSdanielk1977 } 3147538f570cSdrh IOTRACE(("ZERO %p %d\n", pPager, pgno)); 3148306dc213Sdrh }else{ 3149e180dd93Sdanielk1977 rc = readDbPage(pPager, pPg, pgno); 3150551b7736Sdrh if( rc!=SQLITE_OK && rc!=SQLITE_IOERR_SHORT_READ ){ 31518c0a791aSdanielk1977 /* sqlite3PagerUnref(pPg); */ 31528c0a791aSdanielk1977 pagerDropPage(pPg); 315397a227c9Sdanielk1977 return rc; 315481a20f21Sdrh } 3155306dc213Sdrh } 31563c407374Sdanielk1977 #ifdef SQLITE_CHECK_PAGES 31573c407374Sdanielk1977 pPg->pageHash = pager_pagehash(pPg); 31583c407374Sdanielk1977 #endif 3159ed7c855cSdrh }else{ 3160d9b0257aSdrh /* The requested page is in the page cache. */ 31618c0a791aSdanielk1977 assert(sqlite3PcacheRefCount(pPager->pPCache)>0 || pgno==1); 3162538f570cSdrh PAGER_INCR(pPager->nHit); 3163d33d5a89Sdrh if( !noContent ){ 3164d33d5a89Sdrh rc = pager_get_content(pPg); 3165d33d5a89Sdrh if( rc ){ 31668c0a791aSdanielk1977 sqlite3PagerUnref(pPg); 3167d33d5a89Sdrh return rc; 3168d33d5a89Sdrh } 3169d33d5a89Sdrh } 3170ed7c855cSdrh } 31718c0a791aSdanielk1977 31723b8a05f6Sdanielk1977 *ppPage = pPg; 3173ed7c855cSdrh return SQLITE_OK; 3174ed7c855cSdrh } 31758c0a791aSdanielk1977 3176ed7c855cSdrh /* 31777e3b0a07Sdrh ** Acquire a page if it is already in the in-memory cache. Do 31787e3b0a07Sdrh ** not read the page from disk. Return a pointer to the page, 31797e3b0a07Sdrh ** or 0 if the page is not in cache. 31807e3b0a07Sdrh ** 31813b8a05f6Sdanielk1977 ** See also sqlite3PagerGet(). The difference between this routine 31823b8a05f6Sdanielk1977 ** and sqlite3PagerGet() is that _get() will go to the disk and read 31837e3b0a07Sdrh ** in the page if the page is not already in cache. This routine 31845e00f6c7Sdrh ** returns NULL if the page is not in cache or if a disk I/O error 31855e00f6c7Sdrh ** has ever happened. 31867e3b0a07Sdrh */ 31873b8a05f6Sdanielk1977 DbPage *sqlite3PagerLookup(Pager *pPager, Pgno pgno){ 318886f8c197Sdrh PgHdr *pPg = 0; 3189836faa48Sdrh assert( pPager!=0 ); 3190836faa48Sdrh assert( pgno!=0 ); 3191e277be05Sdanielk1977 31928c0a791aSdanielk1977 if( (pPager->state!=PAGER_UNLOCK) 31938c0a791aSdanielk1977 && (pPager->errCode==SQLITE_OK || pPager->errCode==SQLITE_FULL) 31948c0a791aSdanielk1977 ){ 31958c0a791aSdanielk1977 sqlite3PcacheFetch(pPager->pPCache, pgno, 0, &pPg); 319686f8c197Sdrh } 31978c0a791aSdanielk1977 31983b8a05f6Sdanielk1977 return pPg; 31997e3b0a07Sdrh } 32007e3b0a07Sdrh 32017e3b0a07Sdrh /* 3202ed7c855cSdrh ** Release a page. 3203ed7c855cSdrh ** 3204ed7c855cSdrh ** If the number of references to the page drop to zero, then the 3205ed7c855cSdrh ** page is added to the LRU list. When all references to all pages 3206d9b0257aSdrh ** are released, a rollback occurs and the lock on the database is 3207ed7c855cSdrh ** removed. 3208ed7c855cSdrh */ 32093b8a05f6Sdanielk1977 int sqlite3PagerUnref(DbPage *pPg){ 32108c0a791aSdanielk1977 if( pPg ){ 32118c0a791aSdanielk1977 Pager *pPager = pPg->pPager; 32128c0a791aSdanielk1977 sqlite3PcacheRelease(pPg); 32138c0a791aSdanielk1977 pagerUnlockIfUnused(pPager); 32148c0a791aSdanielk1977 } 3215d9b0257aSdrh return SQLITE_OK; 3216d9b0257aSdrh } 3217ed7c855cSdrh 32189153d850Sdanielk1977 /* 32199153d850Sdanielk1977 ** If the main journal file has already been opened, ensure that the 32209153d850Sdanielk1977 ** sub-journal file is open too. If the main journal is not open, 32219153d850Sdanielk1977 ** this function is a no-op. 32229153d850Sdanielk1977 ** 32239153d850Sdanielk1977 ** SQLITE_OK is returned if everything goes according to plan. An 32249153d850Sdanielk1977 ** SQLITE_IOERR_XXX error code is returned if the call to 32259153d850Sdanielk1977 ** sqlite3OsOpen() fails. 32269153d850Sdanielk1977 */ 3227fd7f0452Sdanielk1977 static int openSubJournal(Pager *pPager){ 3228fd7f0452Sdanielk1977 int rc = SQLITE_OK; 3229fd7f0452Sdanielk1977 if( pPager->journalOpen && !pPager->sjfd->pMethods ){ 3230fd7f0452Sdanielk1977 if( pPager->journalMode==PAGER_JOURNALMODE_MEMORY ){ 3231fd7f0452Sdanielk1977 sqlite3MemJournalOpen(pPager->sjfd); 3232fd7f0452Sdanielk1977 }else{ 3233fd7f0452Sdanielk1977 rc = sqlite3PagerOpentemp(pPager, pPager->sjfd, SQLITE_OPEN_SUBJOURNAL); 3234fd7f0452Sdanielk1977 } 3235fd7f0452Sdanielk1977 } 3236fd7f0452Sdanielk1977 return rc; 3237fd7f0452Sdanielk1977 } 3238fd7f0452Sdanielk1977 3239ed7c855cSdrh /* 3240a6abd041Sdrh ** Create a journal file for pPager. There should already be a RESERVED 3241a6abd041Sdrh ** or EXCLUSIVE lock on the database file when this routine is called. 3242da47d774Sdrh ** 3243da47d774Sdrh ** Return SQLITE_OK if everything. Return an error code and release the 3244da47d774Sdrh ** write lock if anything goes wrong. 3245da47d774Sdrh */ 3246da47d774Sdrh static int pager_open_journal(Pager *pPager){ 3247b4b47411Sdanielk1977 sqlite3_vfs *pVfs = pPager->pVfs; 3248b4b47411Sdanielk1977 int flags = (SQLITE_OPEN_READWRITE|SQLITE_OPEN_EXCLUSIVE|SQLITE_OPEN_CREATE); 3249b4b47411Sdanielk1977 3250da47d774Sdrh int rc; 3251a6abd041Sdrh assert( pPager->state>=PAGER_RESERVED ); 3252da47d774Sdrh assert( pPager->useJournal ); 3253f5e7bb51Sdrh assert( pPager->pInJournal==0 ); 3254ad0132dfSdanielk1977 sqlite3PagerPagecount(pPager, 0); 3255f5e7bb51Sdrh pPager->pInJournal = sqlite3BitvecCreate(pPager->dbSize); 3256f5e7bb51Sdrh if( pPager->pInJournal==0 ){ 32579c105bb9Sdrh rc = SQLITE_NOMEM; 32589c105bb9Sdrh goto failed_to_open_journal; 3259da47d774Sdrh } 3260b4b47411Sdanielk1977 3261fdc40e91Sdrh if( pPager->journalOpen==0 ){ 3262b4b47411Sdanielk1977 if( pPager->tempFile ){ 3263fee2d25aSdanielk1977 flags |= (SQLITE_OPEN_DELETEONCLOSE|SQLITE_OPEN_TEMP_JOURNAL); 3264fee2d25aSdanielk1977 }else{ 3265fee2d25aSdanielk1977 flags |= (SQLITE_OPEN_MAIN_JOURNAL); 3266b4b47411Sdanielk1977 } 3267b3175389Sdanielk1977 if( pPager->journalMode==PAGER_JOURNALMODE_MEMORY ){ 3268b3175389Sdanielk1977 sqlite3MemJournalOpen(pPager->jfd); 3269b3175389Sdanielk1977 rc = SQLITE_OK; 3270b3175389Sdanielk1977 }else{ 3271c7b6017cSdanielk1977 #ifdef SQLITE_ENABLE_ATOMIC_WRITE 3272c7b6017cSdanielk1977 rc = sqlite3JournalOpen( 3273c7b6017cSdanielk1977 pVfs, pPager->zJournal, pPager->jfd, flags, jrnlBufferSize(pPager) 3274c7b6017cSdanielk1977 ); 3275c7b6017cSdanielk1977 #else 3276b4b47411Sdanielk1977 rc = sqlite3OsOpen(pVfs, pPager->zJournal, pPager->jfd, flags, 0); 3277c7b6017cSdanielk1977 #endif 3278b3175389Sdanielk1977 } 3279b4b47411Sdanielk1977 assert( rc!=SQLITE_OK || pPager->jfd->pMethods ); 32807657240aSdanielk1977 pPager->journalOff = 0; 32817657240aSdanielk1977 pPager->setMaster = 0; 32827657240aSdanielk1977 pPager->journalHdr = 0; 3283da47d774Sdrh if( rc!=SQLITE_OK ){ 3284600e46a0Sdrh if( rc==SQLITE_NOMEM ){ 3285fee2d25aSdanielk1977 sqlite3OsDelete(pVfs, pPager->zJournal, 0); 3286600e46a0Sdrh } 32879c105bb9Sdrh goto failed_to_open_journal; 3288da47d774Sdrh } 3289fdc40e91Sdrh } 3290da47d774Sdrh pPager->journalOpen = 1; 3291db48ee02Sdrh pPager->journalStarted = 0; 3292da47d774Sdrh pPager->needSync = 0; 3293968af52aSdrh pPager->nRec = 0; 3294efaaf579Sdanielk1977 if( pPager->errCode ){ 3295efaaf579Sdanielk1977 rc = pPager->errCode; 3296dd5b2fa5Sdrh goto failed_to_open_journal; 32972e6d11bcSdrh } 32983460d19cSdanielk1977 pPager->dbOrigSize = pPager->dbSize; 3299ae2b40c4Sdrh 33007657240aSdanielk1977 rc = writeJournalHdr(pPager); 33017657240aSdanielk1977 3302fd7f0452Sdanielk1977 if( pPager->nSavepoint && rc==SQLITE_OK ){ 3303fd7f0452Sdanielk1977 rc = openSubJournal(pPager); 3304da47d774Sdrh } 3305ae72d982Sdanielk1977 if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM && rc!=SQLITE_IOERR_NOMEM ){ 3306df2566a3Sdanielk1977 rc = pager_end_transaction(pPager, 0); 3307da47d774Sdrh if( rc==SQLITE_OK ){ 3308da47d774Sdrh rc = SQLITE_FULL; 3309da47d774Sdrh } 3310da47d774Sdrh } 3311da47d774Sdrh return rc; 33129c105bb9Sdrh 33139c105bb9Sdrh failed_to_open_journal: 3314f5e7bb51Sdrh sqlite3BitvecDestroy(pPager->pInJournal); 3315f5e7bb51Sdrh pPager->pInJournal = 0; 33169c105bb9Sdrh return rc; 3317da47d774Sdrh } 3318da47d774Sdrh 3319da47d774Sdrh /* 33204b845d7eSdrh ** Acquire a write-lock on the database. The lock is removed when 33214b845d7eSdrh ** the any of the following happen: 33224b845d7eSdrh ** 332380e35f46Sdrh ** * sqlite3PagerCommitPhaseTwo() is called. 33243b8a05f6Sdanielk1977 ** * sqlite3PagerRollback() is called. 33253b8a05f6Sdanielk1977 ** * sqlite3PagerClose() is called. 33263b8a05f6Sdanielk1977 ** * sqlite3PagerUnref() is called to on every outstanding page. 33274b845d7eSdrh ** 332813adf8a0Sdanielk1977 ** The first parameter to this routine is a pointer to any open page of the 332913adf8a0Sdanielk1977 ** database file. Nothing changes about the page - it is used merely to 333013adf8a0Sdanielk1977 ** acquire a pointer to the Pager structure and as proof that there is 333113adf8a0Sdanielk1977 ** already a read-lock on the database. 33324b845d7eSdrh ** 333313adf8a0Sdanielk1977 ** The second parameter indicates how much space in bytes to reserve for a 333413adf8a0Sdanielk1977 ** master journal file-name at the start of the journal when it is created. 333513adf8a0Sdanielk1977 ** 333613adf8a0Sdanielk1977 ** A journal file is opened if this is not a temporary file. For temporary 333713adf8a0Sdanielk1977 ** files, the opening of the journal file is deferred until there is an 333813adf8a0Sdanielk1977 ** actual need to write to the journal. 3339da47d774Sdrh ** 3340a6abd041Sdrh ** If the database is already reserved for writing, this routine is a no-op. 3341684917c2Sdrh ** 3342684917c2Sdrh ** If exFlag is true, go ahead and get an EXCLUSIVE lock on the file 3343684917c2Sdrh ** immediately instead of waiting until we try to flush the cache. The 3344684917c2Sdrh ** exFlag is ignored if a transaction is already active. 33454b845d7eSdrh */ 33463b8a05f6Sdanielk1977 int sqlite3PagerBegin(DbPage *pPg, int exFlag){ 33474b845d7eSdrh Pager *pPager = pPg->pPager; 33484b845d7eSdrh int rc = SQLITE_OK; 33494b845d7eSdrh assert( pPg->nRef>0 ); 3350a6abd041Sdrh assert( pPager->state!=PAGER_UNLOCK ); 3351a6abd041Sdrh if( pPager->state==PAGER_SHARED ){ 3352f5e7bb51Sdrh assert( pPager->pInJournal==0 ); 3353b3175389Sdanielk1977 assert( !MEMDB ); 3354054889ecSdrh rc = sqlite3OsLock(pPager->fd, RESERVED_LOCK); 3355684917c2Sdrh if( rc==SQLITE_OK ){ 3356684917c2Sdrh pPager->state = PAGER_RESERVED; 3357684917c2Sdrh if( exFlag ){ 3358684917c2Sdrh rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK); 3359684917c2Sdrh } 3360684917c2Sdrh } 33614b845d7eSdrh if( rc!=SQLITE_OK ){ 33624b845d7eSdrh return rc; 33634b845d7eSdrh } 3364a6abd041Sdrh pPager->dirtyCache = 0; 336530d53701Sdrh PAGERTRACE(("TRANSACTION %d\n", PAGERID(pPager))); 3366fdc40e91Sdrh if( pPager->useJournal && !pPager->tempFile 3367fdc40e91Sdrh && pPager->journalMode!=PAGER_JOURNALMODE_OFF ){ 3368da47d774Sdrh rc = pager_open_journal(pPager); 33694b845d7eSdrh } 3370334cdb63Sdanielk1977 }else if( pPager->journalOpen && pPager->journalOff==0 ){ 3371d138c016Sdrh /* This happens when the pager was in exclusive-access mode the last 3372334cdb63Sdanielk1977 ** time a (read or write) transaction was successfully concluded 3373334cdb63Sdanielk1977 ** by this connection. Instead of deleting the journal file it was 3374d138c016Sdrh ** kept open and either was truncated to 0 bytes or its header was 3375d138c016Sdrh ** overwritten with zeros. 3376334cdb63Sdanielk1977 */ 3377334cdb63Sdanielk1977 assert( pPager->nRec==0 ); 33783460d19cSdanielk1977 assert( pPager->dbOrigSize==0 ); 3379f5e7bb51Sdrh assert( pPager->pInJournal==0 ); 3380ad0132dfSdanielk1977 sqlite3PagerPagecount(pPager, 0); 3381f5e7bb51Sdrh pPager->pInJournal = sqlite3BitvecCreate( pPager->dbSize ); 3382f5e7bb51Sdrh if( !pPager->pInJournal ){ 3383334cdb63Sdanielk1977 rc = SQLITE_NOMEM; 3384334cdb63Sdanielk1977 }else{ 33853460d19cSdanielk1977 pPager->dbOrigSize = pPager->dbSize; 3386334cdb63Sdanielk1977 rc = writeJournalHdr(pPager); 3387ac69b05eSdrh } 3388334cdb63Sdanielk1977 } 3389334cdb63Sdanielk1977 assert( !pPager->journalOpen || pPager->journalOff>0 || rc!=SQLITE_OK ); 33904b845d7eSdrh return rc; 33914b845d7eSdrh } 33924b845d7eSdrh 33934b845d7eSdrh /* 3394ed7c855cSdrh ** Mark a data page as writeable. The page is written into the journal 3395ed7c855cSdrh ** if it is not there already. This routine must be called before making 3396ed7c855cSdrh ** changes to a page. 3397ed7c855cSdrh ** 3398ed7c855cSdrh ** The first time this routine is called, the pager creates a new 3399a6abd041Sdrh ** journal and acquires a RESERVED lock on the database. If the RESERVED 3400ed7c855cSdrh ** lock could not be acquired, this routine returns SQLITE_BUSY. The 3401306dc213Sdrh ** calling routine must check for that return value and be careful not to 3402ed7c855cSdrh ** change any page data until this routine returns SQLITE_OK. 3403d9b0257aSdrh ** 3404d9b0257aSdrh ** If the journal file could not be written because the disk is full, 3405d9b0257aSdrh ** then this routine returns SQLITE_FULL and does an immediate rollback. 3406d9b0257aSdrh ** All subsequent write attempts also return SQLITE_FULL until there 34073b8a05f6Sdanielk1977 ** is a call to sqlite3PagerCommit() or sqlite3PagerRollback() to 3408d9b0257aSdrh ** reset. 3409ed7c855cSdrh */ 34103b8a05f6Sdanielk1977 static int pager_write(PgHdr *pPg){ 34118c0a791aSdanielk1977 void *pData = pPg->pData; 341269688d5fSdrh Pager *pPager = pPg->pPager; 3413d79caebaSdrh int rc = SQLITE_OK; 341469688d5fSdrh 34156446c4dcSdrh /* Check for errors 34166446c4dcSdrh */ 3417efaaf579Sdanielk1977 if( pPager->errCode ){ 3418efaaf579Sdanielk1977 return pPager->errCode; 3419d9b0257aSdrh } 34205e00f6c7Sdrh if( pPager->readOnly ){ 34215e00f6c7Sdrh return SQLITE_PERM; 34225e00f6c7Sdrh } 34236446c4dcSdrh 34247657240aSdanielk1977 assert( !pPager->setMaster ); 34257657240aSdanielk1977 34263c407374Sdanielk1977 CHECK_PAGE(pPg); 34273c407374Sdanielk1977 3428538f570cSdrh /* If this page was previously acquired with noContent==1, that means 3429538f570cSdrh ** we didn't really read in the content of the page. This can happen 3430538f570cSdrh ** (for example) when the page is being moved to the freelist. But 3431538f570cSdrh ** now we are (perhaps) moving the page off of the freelist for 3432538f570cSdrh ** reuse and we need to know its original content so that content 3433538f570cSdrh ** can be stored in the rollback journal. So do the read at this 3434538f570cSdrh ** time. 3435538f570cSdrh */ 3436d33d5a89Sdrh rc = pager_get_content(pPg); 3437d33d5a89Sdrh if( rc ){ 3438538f570cSdrh return rc; 3439538f570cSdrh } 3440538f570cSdrh 34416446c4dcSdrh /* Mark the page as dirty. If the page has already been written 34426446c4dcSdrh ** to the journal then we can return right away. 34436446c4dcSdrh */ 3444c047b9f7Sdrh sqlite3PcacheMakeDirty(pPg); 34453460d19cSdanielk1977 if( pageInJournal(pPg) && !subjRequiresPage(pPg) ){ 3446a6abd041Sdrh pPager->dirtyCache = 1; 3447d138c016Sdrh pPager->dbModified = 1; 3448a0bf2652Sdanielk1977 }else{ 34496446c4dcSdrh 34506446c4dcSdrh /* If we get this far, it means that the page needs to be 3451fa86c412Sdrh ** written to the transaction journal or the ckeckpoint journal 3452fa86c412Sdrh ** or both. 3453fa86c412Sdrh ** 3454fa86c412Sdrh ** First check to see that the transaction journal exists and 3455fa86c412Sdrh ** create it if it does not. 34566446c4dcSdrh */ 3457a6abd041Sdrh assert( pPager->state!=PAGER_UNLOCK ); 34583b8a05f6Sdanielk1977 rc = sqlite3PagerBegin(pPg, 0); 3459da47d774Sdrh if( rc!=SQLITE_OK ){ 3460da47d774Sdrh return rc; 3461da47d774Sdrh } 3462a6abd041Sdrh assert( pPager->state>=PAGER_RESERVED ); 3463fdc40e91Sdrh if( !pPager->journalOpen && pPager->useJournal 3464fdc40e91Sdrh && pPager->journalMode!=PAGER_JOURNALMODE_OFF ){ 3465da47d774Sdrh rc = pager_open_journal(pPager); 3466da47d774Sdrh if( rc!=SQLITE_OK ) return rc; 3467da47d774Sdrh } 3468a6abd041Sdrh pPager->dirtyCache = 1; 3469d138c016Sdrh pPager->dbModified = 1; 34706446c4dcSdrh 3471a6abd041Sdrh /* The transaction journal now exists and we have a RESERVED or an 3472a6abd041Sdrh ** EXCLUSIVE lock on the main database file. Write the current page to 3473a6abd041Sdrh ** the transaction journal if it is not there already. 34746446c4dcSdrh */ 3475bc2ca9ebSdanielk1977 if( !pageInJournal(pPg) && pPager->journalOpen ){ 34763460d19cSdanielk1977 if( pPg->pgno<=pPager->dbOrigSize ){ 3477bf4bca54Sdrh u32 cksum; 3478bf4bca54Sdrh char *pData2; 3479dd97a49cSdanielk1977 3480267cb326Sdrh /* We should never write to the journal file the page that 3481267cb326Sdrh ** contains the database locks. The following assert verifies 3482267cb326Sdrh ** that we do not. */ 3483267cb326Sdrh assert( pPg->pgno!=PAGER_MJ_PGNO(pPager) ); 3484c001c58aSdrh pData2 = CODEC2(pPager, pData, pPg->pgno, 7); 34853752785fSdrh cksum = pager_cksum(pPager, (u8*)pData2); 3486bf4bca54Sdrh rc = write32bits(pPager->jfd, pPager->journalOff, pPg->pgno); 3487bf4bca54Sdrh if( rc==SQLITE_OK ){ 3488bf4bca54Sdrh rc = sqlite3OsWrite(pPager->jfd, pData2, pPager->pageSize, 3489bf4bca54Sdrh pPager->journalOff + 4); 3490bf4bca54Sdrh pPager->journalOff += pPager->pageSize+4; 3491bf4bca54Sdrh } 3492bf4bca54Sdrh if( rc==SQLITE_OK ){ 3493bf4bca54Sdrh rc = write32bits(pPager->jfd, pPager->journalOff, cksum); 3494bf4bca54Sdrh pPager->journalOff += 4; 3495bf4bca54Sdrh } 3496b0603416Sdrh IOTRACE(("JOUT %p %d %lld %d\n", pPager, pPg->pgno, 3497667a6c98Sdanielk1977 pPager->journalOff, pPager->pageSize)); 3498538f570cSdrh PAGER_INCR(sqlite3_pager_writej_count); 349930d53701Sdrh PAGERTRACE(("JOURNAL %d page %d needSync=%d hash(%08x)\n", 35008c0a791aSdanielk1977 PAGERID(pPager), pPg->pgno, 350130d53701Sdrh ((pPg->flags&PGHDR_NEED_SYNC)?1:0), pager_pagehash(pPg))); 350207cb560bSdanielk1977 3503f3107512Sdanielk1977 /* Even if an IO or diskfull error occurred while journalling the 3504f3107512Sdanielk1977 ** page in the block above, set the need-sync flag for the page. 3505f3107512Sdanielk1977 ** Otherwise, when the transaction is rolled back, the logic in 3506f3107512Sdanielk1977 ** playback_one_page() will think that the page needs to be restored 3507f3107512Sdanielk1977 ** in the database file. And if an IO error occurs while doing so, 3508f3107512Sdanielk1977 ** then corruption may follow. 3509f3107512Sdanielk1977 */ 3510f3107512Sdanielk1977 if( !pPager->noSync ){ 3511f3107512Sdanielk1977 pPg->flags |= PGHDR_NEED_SYNC; 3512a4124bdfSdanielk1977 pPager->needSync = 1; 3513f3107512Sdanielk1977 } 3514f3107512Sdanielk1977 351507cb560bSdanielk1977 /* An error has occured writing to the journal file. The 351607cb560bSdanielk1977 ** transaction will be rolled back by the layer above. 351707cb560bSdanielk1977 */ 3518d9b0257aSdrh if( rc!=SQLITE_OK ){ 3519d9b0257aSdrh return rc; 3520d9b0257aSdrh } 352107cb560bSdanielk1977 352299ee3600Sdrh pPager->nRec++; 3523f5e7bb51Sdrh assert( pPager->pInJournal!=0 ); 35247539b6b8Sdrh rc = sqlite3BitvecSet(pPager->pInJournal, pPg->pgno); 35257539b6b8Sdrh testcase( rc==SQLITE_NOMEM ); 35267539b6b8Sdrh assert( rc==SQLITE_OK || rc==SQLITE_NOMEM ); 35277539b6b8Sdrh rc |= addToSavepointBitvecs(pPager, pPg->pgno); 35287539b6b8Sdrh if( rc!=SQLITE_OK ){ 35297539b6b8Sdrh assert( rc==SQLITE_NOMEM ); 35307539b6b8Sdrh return rc; 35317539b6b8Sdrh } 3532db48ee02Sdrh }else{ 35338c0a791aSdanielk1977 if( !pPager->journalStarted && !pPager->noSync ){ 35348c0a791aSdanielk1977 pPg->flags |= PGHDR_NEED_SYNC; 3535a4124bdfSdanielk1977 pPager->needSync = 1; 3536db48ee02Sdrh } 353730d53701Sdrh PAGERTRACE(("APPEND %d page %d needSync=%d\n", 35388c0a791aSdanielk1977 PAGERID(pPager), pPg->pgno, 353930d53701Sdrh ((pPg->flags&PGHDR_NEED_SYNC)?1:0))); 35408c0a791aSdanielk1977 } 3541d9b0257aSdrh } 35426446c4dcSdrh 3543ac69b05eSdrh /* If the statement journal is open and the page is not in it, 3544ac69b05eSdrh ** then write the current page to the statement journal. Note that 3545ae2b40c4Sdrh ** the statement journal format differs from the standard journal format 3546ae2b40c4Sdrh ** in that it omits the checksums and the header. 35476446c4dcSdrh */ 35483460d19cSdanielk1977 if( subjRequiresPage(pPg) ){ 3549f2c31ad8Sdanielk1977 rc = subjournalPage(pPg); 3550ac69b05eSdrh } 3551fa86c412Sdrh } 3552fa86c412Sdrh 3553fa86c412Sdrh /* Update the database size and return. 3554fa86c412Sdrh */ 35551aa2d8b5Sdrh assert( pPager->state>=PAGER_SHARED ); 3556d92db531Sdanielk1977 if( pPager->dbSize<pPg->pgno ){ 3557306dc213Sdrh pPager->dbSize = pPg->pgno; 3558d92db531Sdanielk1977 if( pPager->dbSize==(PAGER_MJ_PGNO(pPager)-1) ){ 35591f595716Sdrh pPager->dbSize++; 35601f595716Sdrh } 3561306dc213Sdrh } 356269688d5fSdrh return rc; 3563ed7c855cSdrh } 3564ed7c855cSdrh 3565ed7c855cSdrh /* 35664099f6e1Sdanielk1977 ** This function is used to mark a data-page as writable. It uses 35674099f6e1Sdanielk1977 ** pager_write() to open a journal file (if it is not already open) 35684099f6e1Sdanielk1977 ** and write the page *pData to the journal. 35694099f6e1Sdanielk1977 ** 35704099f6e1Sdanielk1977 ** The difference between this function and pager_write() is that this 35714099f6e1Sdanielk1977 ** function also deals with the special case where 2 or more pages 35724099f6e1Sdanielk1977 ** fit on a single disk sector. In this case all co-resident pages 35734099f6e1Sdanielk1977 ** must have been written to the journal file before returning. 35744099f6e1Sdanielk1977 */ 35753b8a05f6Sdanielk1977 int sqlite3PagerWrite(DbPage *pDbPage){ 35764099f6e1Sdanielk1977 int rc = SQLITE_OK; 35774099f6e1Sdanielk1977 35783b8a05f6Sdanielk1977 PgHdr *pPg = pDbPage; 35794099f6e1Sdanielk1977 Pager *pPager = pPg->pPager; 35804099f6e1Sdanielk1977 Pgno nPagePerSector = (pPager->sectorSize/pPager->pageSize); 35814099f6e1Sdanielk1977 3582b3175389Sdanielk1977 if( nPagePerSector>1 ){ 35834099f6e1Sdanielk1977 Pgno nPageCount; /* Total number of pages in database file */ 35844099f6e1Sdanielk1977 Pgno pg1; /* First page of the sector pPg is located on. */ 35854099f6e1Sdanielk1977 int nPage; /* Number of pages starting at pg1 to journal */ 35864099f6e1Sdanielk1977 int ii; 3587dd97a49cSdanielk1977 int needSync = 0; 35884099f6e1Sdanielk1977 35894099f6e1Sdanielk1977 /* Set the doNotSync flag to 1. This is because we cannot allow a journal 35904099f6e1Sdanielk1977 ** header to be written between the pages journaled by this function. 35914099f6e1Sdanielk1977 */ 3592b3175389Sdanielk1977 assert( !MEMDB ); 35934099f6e1Sdanielk1977 assert( pPager->doNotSync==0 ); 35944099f6e1Sdanielk1977 pPager->doNotSync = 1; 35954099f6e1Sdanielk1977 35964099f6e1Sdanielk1977 /* This trick assumes that both the page-size and sector-size are 35974099f6e1Sdanielk1977 ** an integer power of 2. It sets variable pg1 to the identifier 35984099f6e1Sdanielk1977 ** of the first page of the sector pPg is located on. 35994099f6e1Sdanielk1977 */ 36004099f6e1Sdanielk1977 pg1 = ((pPg->pgno-1) & ~(nPagePerSector-1)) + 1; 36014099f6e1Sdanielk1977 3602ad0132dfSdanielk1977 sqlite3PagerPagecount(pPager, (int *)&nPageCount); 36034099f6e1Sdanielk1977 if( pPg->pgno>nPageCount ){ 36044099f6e1Sdanielk1977 nPage = (pPg->pgno - pg1)+1; 36054099f6e1Sdanielk1977 }else if( (pg1+nPagePerSector-1)>nPageCount ){ 36064099f6e1Sdanielk1977 nPage = nPageCount+1-pg1; 36074099f6e1Sdanielk1977 }else{ 36084099f6e1Sdanielk1977 nPage = nPagePerSector; 36094099f6e1Sdanielk1977 } 36104099f6e1Sdanielk1977 assert(nPage>0); 36114099f6e1Sdanielk1977 assert(pg1<=pPg->pgno); 36124099f6e1Sdanielk1977 assert((pg1+nPage)>pPg->pgno); 36134099f6e1Sdanielk1977 36144099f6e1Sdanielk1977 for(ii=0; ii<nPage && rc==SQLITE_OK; ii++){ 36154099f6e1Sdanielk1977 Pgno pg = pg1+ii; 3616dd97a49cSdanielk1977 PgHdr *pPage; 3617f5e7bb51Sdrh if( pg==pPg->pgno || !sqlite3BitvecTest(pPager->pInJournal, pg) ){ 36184099f6e1Sdanielk1977 if( pg!=PAGER_MJ_PGNO(pPager) ){ 36193b8a05f6Sdanielk1977 rc = sqlite3PagerGet(pPager, pg, &pPage); 36204099f6e1Sdanielk1977 if( rc==SQLITE_OK ){ 36214099f6e1Sdanielk1977 rc = pager_write(pPage); 36228c0a791aSdanielk1977 if( pPage->flags&PGHDR_NEED_SYNC ){ 3623dd97a49cSdanielk1977 needSync = 1; 3624a4124bdfSdanielk1977 assert(pPager->needSync); 3625dd97a49cSdanielk1977 } 36263b8a05f6Sdanielk1977 sqlite3PagerUnref(pPage); 36274099f6e1Sdanielk1977 } 36284099f6e1Sdanielk1977 } 3629c81945e4Sdrh }else if( (pPage = pager_lookup(pPager, pg))!=0 ){ 36308c0a791aSdanielk1977 if( pPage->flags&PGHDR_NEED_SYNC ){ 3631dd97a49cSdanielk1977 needSync = 1; 36324099f6e1Sdanielk1977 } 36338c0a791aSdanielk1977 sqlite3PagerUnref(pPage); 36344099f6e1Sdanielk1977 } 3635dd97a49cSdanielk1977 } 3636dd97a49cSdanielk1977 3637ee03d629Sdrh /* If the PGHDR_NEED_SYNC flag is set for any of the nPage pages 3638dd97a49cSdanielk1977 ** starting at pg1, then it needs to be set for all of them. Because 3639dd97a49cSdanielk1977 ** writing to any of these nPage pages may damage the others, the 3640dd97a49cSdanielk1977 ** journal file must contain sync()ed copies of all of them 3641dd97a49cSdanielk1977 ** before any of them can be written out to the database file. 3642dd97a49cSdanielk1977 */ 3643dd97a49cSdanielk1977 if( needSync ){ 3644b3df2e1cSdrh assert( !MEMDB && pPager->noSync==0 ); 3645dd97a49cSdanielk1977 for(ii=0; ii<nPage && needSync; ii++){ 3646dd97a49cSdanielk1977 PgHdr *pPage = pager_lookup(pPager, pg1+ii); 3647ee03d629Sdrh if( pPage ){ 3648ee03d629Sdrh pPage->flags |= PGHDR_NEED_SYNC; 36498c0a791aSdanielk1977 sqlite3PagerUnref(pPage); 3650dd97a49cSdanielk1977 } 3651ee03d629Sdrh } 3652dd97a49cSdanielk1977 assert(pPager->needSync); 3653dd97a49cSdanielk1977 } 36544099f6e1Sdanielk1977 36554099f6e1Sdanielk1977 assert( pPager->doNotSync==1 ); 36564099f6e1Sdanielk1977 pPager->doNotSync = 0; 36574099f6e1Sdanielk1977 }else{ 36583b8a05f6Sdanielk1977 rc = pager_write(pDbPage); 36594099f6e1Sdanielk1977 } 36604099f6e1Sdanielk1977 return rc; 36614099f6e1Sdanielk1977 } 36624099f6e1Sdanielk1977 36634099f6e1Sdanielk1977 /* 3664aacc543eSdrh ** Return TRUE if the page given in the argument was previously passed 36653b8a05f6Sdanielk1977 ** to sqlite3PagerWrite(). In other words, return TRUE if it is ok 36666019e168Sdrh ** to change the content of the page. 36676019e168Sdrh */ 36687d3a666fSdanielk1977 #ifndef NDEBUG 36693b8a05f6Sdanielk1977 int sqlite3PagerIswriteable(DbPage *pPg){ 36708c0a791aSdanielk1977 return pPg->flags&PGHDR_DIRTY; 36716019e168Sdrh } 36727d3a666fSdanielk1977 #endif 36736019e168Sdrh 3674001bbcbbSdrh /* 367530e58750Sdrh ** A call to this routine tells the pager that it is not necessary to 3676538f570cSdrh ** write the information on page pPg back to the disk, even though 3677dfe88eceSdrh ** that page might be marked as dirty. This happens, for example, when 3678dfe88eceSdrh ** the page has been added as a leaf of the freelist and so its 3679dfe88eceSdrh ** content no longer matters. 368030e58750Sdrh ** 368130e58750Sdrh ** The overlying software layer calls this routine when all of the data 368230e58750Sdrh ** on the given page is unused. The pager marks the page as clean so 368330e58750Sdrh ** that it does not get written to disk. 368430e58750Sdrh ** 368530e58750Sdrh ** Tests show that this optimization, together with the 36863b8a05f6Sdanielk1977 ** sqlite3PagerDontRollback() below, more than double the speed 368730e58750Sdrh ** of large INSERT operations and quadruple the speed of large DELETEs. 36888e298f92Sdrh ** 368902983931Sdanielk1977 ** When this routine is called, set the bit corresponding to pDbPage in 369002983931Sdanielk1977 ** the Pager.pAlwaysRollback bitvec. Subsequent calls to 369102983931Sdanielk1977 ** sqlite3PagerDontRollback() for the same page will thereafter be ignored. 369202983931Sdanielk1977 ** This is necessary to avoid a problem where a page with data is added to 369302983931Sdanielk1977 ** the freelist during one part of a transaction then removed from the 369402983931Sdanielk1977 ** freelist during a later part of the same transaction and reused for some 369502983931Sdanielk1977 ** other purpose. When it is first added to the freelist, this routine is 369602983931Sdanielk1977 ** called. When reused, the sqlite3PagerDontRollback() routine is called. 369702983931Sdanielk1977 ** But because the page contains critical data, we still need to be sure it 369802983931Sdanielk1977 ** gets rolled back in spite of the sqlite3PagerDontRollback() call. 369930e58750Sdrh */ 3700a1fa00d9Sdanielk1977 int sqlite3PagerDontWrite(DbPage *pDbPage){ 3701538f570cSdrh PgHdr *pPg = pDbPage; 3702538f570cSdrh Pager *pPager = pPg->pPager; 3703a1fa00d9Sdanielk1977 int rc; 37048e298f92Sdrh 37053460d19cSdanielk1977 if( pPg->pgno>pPager->dbOrigSize ){ 3706a1fa00d9Sdanielk1977 return SQLITE_OK; 3707a1fa00d9Sdanielk1977 } 3708a1fa00d9Sdanielk1977 if( pPager->pAlwaysRollback==0 ){ 3709a1fa00d9Sdanielk1977 assert( pPager->pInJournal ); 37103460d19cSdanielk1977 pPager->pAlwaysRollback = sqlite3BitvecCreate(pPager->dbOrigSize); 3711a1fa00d9Sdanielk1977 if( !pPager->pAlwaysRollback ){ 3712a1fa00d9Sdanielk1977 return SQLITE_NOMEM; 3713a1fa00d9Sdanielk1977 } 3714a1fa00d9Sdanielk1977 } 3715a1fa00d9Sdanielk1977 rc = sqlite3BitvecSet(pPager->pAlwaysRollback, pPg->pgno); 3716a1fa00d9Sdanielk1977 3717fd7f0452Sdanielk1977 if( rc==SQLITE_OK && (pPg->flags&PGHDR_DIRTY) && pPager->nSavepoint==0 ){ 37181aa2d8b5Sdrh assert( pPager->state>=PAGER_SHARED ); 37193460d19cSdanielk1977 if( pPager->dbSize==pPg->pgno && pPager->dbOrigSize<pPager->dbSize ){ 37208124a30fSdrh /* If this pages is the last page in the file and the file has grown 37218124a30fSdrh ** during the current transaction, then do NOT mark the page as clean. 37228124a30fSdrh ** When the database file grows, we must make sure that the last page 37238124a30fSdrh ** gets written at least once so that the disk file will be the correct 37248124a30fSdrh ** size. If you do not write this page and the size of the file 37258124a30fSdrh ** on the disk ends up being too small, that can lead to database 37268124a30fSdrh ** corruption during the next transaction. 37278124a30fSdrh */ 37288124a30fSdrh }else{ 372930d53701Sdrh PAGERTRACE(("DONT_WRITE page %d of %d\n", pPg->pgno, PAGERID(pPager))); 3730538f570cSdrh IOTRACE(("CLEAN %p %d\n", pPager, pPg->pgno)) 373133e3216aSdanielk1977 pPg->flags |= PGHDR_DONT_WRITE; 37323c407374Sdanielk1977 #ifdef SQLITE_CHECK_PAGES 37333c407374Sdanielk1977 pPg->pageHash = pager_pagehash(pPg); 37343c407374Sdanielk1977 #endif 373530e58750Sdrh } 373630e58750Sdrh } 3737a1fa00d9Sdanielk1977 return rc; 37388124a30fSdrh } 373930e58750Sdrh 374030e58750Sdrh /* 374130e58750Sdrh ** A call to this routine tells the pager that if a rollback occurs, 374230e58750Sdrh ** it is not necessary to restore the data on the given page. This 374330e58750Sdrh ** means that the pager does not have to record the given page in the 374430e58750Sdrh ** rollback journal. 3745538f570cSdrh ** 3746538f570cSdrh ** If we have not yet actually read the content of this page (if 3747538f570cSdrh ** the PgHdr.needRead flag is set) then this routine acts as a promise 3748538f570cSdrh ** that we will never need to read the page content in the future. 3749538f570cSdrh ** so the needRead flag can be cleared at this point. 375030e58750Sdrh */ 37513b8a05f6Sdanielk1977 void sqlite3PagerDontRollback(DbPage *pPg){ 375230e58750Sdrh Pager *pPager = pPg->pPager; 37537539b6b8Sdrh TESTONLY( int rc; ) /* Return value from sqlite3BitvecSet() */ 375430e58750Sdrh 3755d3627afcSdrh assert( pPager->state>=PAGER_RESERVED ); 3756a55e9355Sdanielk1977 3757a55e9355Sdanielk1977 /* If the journal file is not open, or DontWrite() has been called on 375802983931Sdanielk1977 ** this page (DontWrite() sets the Pager.pAlwaysRollback bit), then this 3759a55e9355Sdanielk1977 ** function is a no-op. 3760a55e9355Sdanielk1977 */ 3761a1fa00d9Sdanielk1977 if( pPager->journalOpen==0 3762a1fa00d9Sdanielk1977 || sqlite3BitvecTest(pPager->pAlwaysRollback, pPg->pgno) 37633460d19cSdanielk1977 || pPg->pgno>pPager->dbOrigSize 37648c0a791aSdanielk1977 ){ 376587c29a94Sdanielk1977 return; 376687c29a94Sdanielk1977 } 3767a55e9355Sdanielk1977 3768c5d0bd90Sdrh #ifdef SQLITE_SECURE_DELETE 37691feb7dd3Sdrh if( sqlite3BitvecTest(pPager->pInJournal, pPg->pgno)!=0 37703460d19cSdanielk1977 || pPg->pgno>pPager->dbOrigSize ){ 3771c5d0bd90Sdrh return; 3772c5d0bd90Sdrh } 3773c5d0bd90Sdrh #endif 3774c5d0bd90Sdrh 3775c5d0bd90Sdrh /* If SECURE_DELETE is disabled, then there is no way that this 3776c5d0bd90Sdrh ** routine can be called on a page for which sqlite3PagerDontWrite() 3777c5d0bd90Sdrh ** has not been previously called during the same transaction. 3778c5d0bd90Sdrh ** And if DontWrite() has previously been called, the following 3779c5d0bd90Sdrh ** conditions must be met. 37801013148bSdrh ** 37811013148bSdrh ** (Later:) Not true. If the database is corrupted by having duplicate 37821013148bSdrh ** pages on the freelist (ex: corrupt9.test) then the following is not 37831013148bSdrh ** necessarily true: 3784a55e9355Sdanielk1977 */ 37853460d19cSdanielk1977 /* assert( !pPg->inJournal && (int)pPg->pgno <= pPager->dbOrigSize ); */ 3786a55e9355Sdanielk1977 3787f5e7bb51Sdrh assert( pPager->pInJournal!=0 ); 37888c0a791aSdanielk1977 pPg->flags &= ~PGHDR_NEED_READ; 37897539b6b8Sdrh 37907539b6b8Sdrh /* Failure to set the bits in the InJournal bit-vectors is benign. 37917539b6b8Sdrh ** It merely means that we might do some extra work to journal a page 3792a8a71bacSdrh ** that does not need to be journaled. Nevertheless, be sure to test the 37937539b6b8Sdrh ** case where a malloc error occurs while trying to set a bit in a 37947539b6b8Sdrh ** bit vector. 37957539b6b8Sdrh */ 37967539b6b8Sdrh sqlite3BeginBenignMalloc(); 37977539b6b8Sdrh TESTONLY( rc = ) sqlite3BitvecSet(pPager->pInJournal, pPg->pgno); 37987539b6b8Sdrh testcase( rc==SQLITE_NOMEM ); 37997539b6b8Sdrh TESTONLY( rc = ) addToSavepointBitvecs(pPager, pPg->pgno); 38007539b6b8Sdrh testcase( rc==SQLITE_NOMEM ); 38017539b6b8Sdrh sqlite3EndBenignMalloc(); 38027539b6b8Sdrh 38037539b6b8Sdrh 380430d53701Sdrh PAGERTRACE(("DONT_ROLLBACK page %d of %d\n", pPg->pgno, PAGERID(pPager))); 3805b0603416Sdrh IOTRACE(("GARBAGE %p %d\n", pPager, pPg->pgno)) 380630e58750Sdrh } 380730e58750Sdrh 3808ac69b05eSdrh 380930e58750Sdrh /* 381080e35f46Sdrh ** This routine is called to increment the database file change-counter, 381180e35f46Sdrh ** stored at byte 24 of the pager file. 381280e35f46Sdrh */ 3813c7b6017cSdanielk1977 static int pager_incr_changecounter(Pager *pPager, int isDirect){ 381480e35f46Sdrh PgHdr *pPgHdr; 381580e35f46Sdrh u32 change_counter; 3816c7b6017cSdanielk1977 int rc = SQLITE_OK; 381780e35f46Sdrh 3818701bb3b4Sdrh #ifndef SQLITE_ENABLE_ATOMIC_WRITE 3819701bb3b4Sdrh assert( isDirect==0 ); /* isDirect is only true for atomic writes */ 3820701bb3b4Sdrh #endif 382112dd5496Sdanielk1977 if( !pPager->changeCountDone && pPager->dbSize>0 ){ 382280e35f46Sdrh /* Open page 1 of the file for writing. */ 382380e35f46Sdrh rc = sqlite3PagerGet(pPager, 1, &pPgHdr); 382480e35f46Sdrh if( rc!=SQLITE_OK ) return rc; 3825c7b6017cSdanielk1977 3826c7b6017cSdanielk1977 if( !isDirect ){ 382780e35f46Sdrh rc = sqlite3PagerWrite(pPgHdr); 3828ae72d982Sdanielk1977 if( rc!=SQLITE_OK ){ 3829ae72d982Sdanielk1977 sqlite3PagerUnref(pPgHdr); 3830ae72d982Sdanielk1977 return rc; 3831ae72d982Sdanielk1977 } 3832c7b6017cSdanielk1977 } 383380e35f46Sdrh 383480e35f46Sdrh /* Increment the value just read and write it back to byte 24. */ 3835b1003913Sdrh change_counter = sqlite3Get4byte((u8*)pPager->dbFileVers); 383680e35f46Sdrh change_counter++; 38378c0a791aSdanielk1977 put32bits(((char*)pPgHdr->pData)+24, change_counter); 3838c7b6017cSdanielk1977 3839701bb3b4Sdrh #ifdef SQLITE_ENABLE_ATOMIC_WRITE 3840c7b6017cSdanielk1977 if( isDirect && pPager->fd->pMethods ){ 38418c0a791aSdanielk1977 const void *zBuf = pPgHdr->pData; 38423460d19cSdanielk1977 assert( pPager->dbFileSize>0 ); 3843c7b6017cSdanielk1977 rc = sqlite3OsWrite(pPager->fd, zBuf, pPager->pageSize, 0); 3844c7b6017cSdanielk1977 } 3845701bb3b4Sdrh #endif 3846c7b6017cSdanielk1977 384780e35f46Sdrh /* Release the page reference. */ 384880e35f46Sdrh sqlite3PagerUnref(pPgHdr); 384980e35f46Sdrh pPager->changeCountDone = 1; 385080e35f46Sdrh } 3851c7b6017cSdanielk1977 return rc; 385280e35f46Sdrh } 385380e35f46Sdrh 385480e35f46Sdrh /* 3855f653d782Sdanielk1977 ** Sync the pager file to disk. 3856f653d782Sdanielk1977 */ 3857f653d782Sdanielk1977 int sqlite3PagerSync(Pager *pPager){ 3858f653d782Sdanielk1977 int rc; 38597426f864Sdrh if( MEMDB ){ 38607426f864Sdrh rc = SQLITE_OK; 38617426f864Sdrh }else{ 3862f653d782Sdanielk1977 rc = sqlite3OsSync(pPager->fd, pPager->sync_flags); 38637426f864Sdrh } 3864f653d782Sdanielk1977 return rc; 3865f653d782Sdanielk1977 } 3866f653d782Sdanielk1977 3867f653d782Sdanielk1977 /* 386880e35f46Sdrh ** Sync the database file for the pager pPager. zMaster points to the name 386980e35f46Sdrh ** of a master journal file that should be written into the individual 387080e35f46Sdrh ** journal file. zMaster may be NULL, which is interpreted as no master 387180e35f46Sdrh ** journal (a single database transaction). 387280e35f46Sdrh ** 387380e35f46Sdrh ** This routine ensures that the journal is synced, all dirty pages written 387480e35f46Sdrh ** to the database file and the database file synced. The only thing that 387580e35f46Sdrh ** remains to commit the transaction is to delete the journal file (or 387680e35f46Sdrh ** master journal file if specified). 387780e35f46Sdrh ** 387880e35f46Sdrh ** Note that if zMaster==NULL, this does not overwrite a previous value 387980e35f46Sdrh ** passed to an sqlite3PagerCommitPhaseOne() call. 388080e35f46Sdrh ** 3881f653d782Sdanielk1977 ** If the final parameter - noSync - is true, then the database file itself 3882f653d782Sdanielk1977 ** is not synced. The caller must call sqlite3PagerSync() directly to 3883f653d782Sdanielk1977 ** sync the database file before calling CommitPhaseTwo() to delete the 3884f653d782Sdanielk1977 ** journal file in this case. 388580e35f46Sdrh */ 3886f653d782Sdanielk1977 int sqlite3PagerCommitPhaseOne( 3887f653d782Sdanielk1977 Pager *pPager, 3888f653d782Sdanielk1977 const char *zMaster, 3889f653d782Sdanielk1977 int noSync 3890f653d782Sdanielk1977 ){ 389180e35f46Sdrh int rc = SQLITE_OK; 389280e35f46Sdrh 3893dad31b5eSdanielk1977 if( pPager->errCode ){ 3894dad31b5eSdanielk1977 return pPager->errCode; 3895dad31b5eSdanielk1977 } 3896dad31b5eSdanielk1977 3897d138c016Sdrh /* If no changes have been made, we can leave the transaction early. 3898d138c016Sdrh */ 3899d138c016Sdrh if( pPager->dbModified==0 && 3900d138c016Sdrh (pPager->journalMode!=PAGER_JOURNALMODE_DELETE || 3901d138c016Sdrh pPager->exclusiveMode!=0) ){ 3902d138c016Sdrh assert( pPager->dirtyCache==0 || pPager->journalOpen==0 ); 3903d138c016Sdrh return SQLITE_OK; 3904d138c016Sdrh } 3905d138c016Sdrh 390630d53701Sdrh PAGERTRACE(("DATABASE SYNC: File=%s zMaster=%s nSize=%d\n", 390730d53701Sdrh pPager->zFilename, zMaster, pPager->dbSize)); 390880e35f46Sdrh 390980e35f46Sdrh /* If this is an in-memory db, or no pages have been written to, or this 391080e35f46Sdrh ** function has already been called, it is a no-op. 391180e35f46Sdrh */ 391280e35f46Sdrh if( pPager->state!=PAGER_SYNCED && !MEMDB && pPager->dirtyCache ){ 391380e35f46Sdrh PgHdr *pPg; 391480e35f46Sdrh 3915c7b6017cSdanielk1977 #ifdef SQLITE_ENABLE_ATOMIC_WRITE 3916c7b6017cSdanielk1977 /* The atomic-write optimization can be used if all of the 3917c7b6017cSdanielk1977 ** following are true: 3918c7b6017cSdanielk1977 ** 3919c7b6017cSdanielk1977 ** + The file-system supports the atomic-write property for 3920c7b6017cSdanielk1977 ** blocks of size page-size, and 3921c7b6017cSdanielk1977 ** + This commit is not part of a multi-file transaction, and 3922c7b6017cSdanielk1977 ** + Exactly one page has been modified and store in the journal file. 3923c7b6017cSdanielk1977 ** 3924c7b6017cSdanielk1977 ** If the optimization can be used, then the journal file will never 3925c7b6017cSdanielk1977 ** be created for this transaction. 3926c7b6017cSdanielk1977 */ 39274d60af9bSdanielk1977 int useAtomicWrite; 39288c0a791aSdanielk1977 pPg = sqlite3PcacheDirtyList(pPager->pPCache); 39294d60af9bSdanielk1977 useAtomicWrite = ( 3930f55b8998Sdanielk1977 !zMaster && 3931700b9c5aSdanielk1977 pPager->journalOpen && 3932f55b8998Sdanielk1977 pPager->journalOff==jrnlBufferSize(pPager) && 39333460d19cSdanielk1977 pPager->dbSize>=pPager->dbFileSize && 39348c0a791aSdanielk1977 (pPg==0 || pPg->pDirty==0) 3935f55b8998Sdanielk1977 ); 3936700b9c5aSdanielk1977 assert( pPager->journalOpen || pPager->journalMode==PAGER_JOURNALMODE_OFF ); 3937f55b8998Sdanielk1977 if( useAtomicWrite ){ 3938c7b6017cSdanielk1977 /* Update the nRec field in the journal file. */ 3939c7b6017cSdanielk1977 int offset = pPager->journalHdr + sizeof(aJournalMagic); 3940c7b6017cSdanielk1977 assert(pPager->nRec==1); 3941c7b6017cSdanielk1977 rc = write32bits(pPager->jfd, offset, pPager->nRec); 3942c7b6017cSdanielk1977 3943c7b6017cSdanielk1977 /* Update the db file change counter. The following call will modify 3944c7b6017cSdanielk1977 ** the in-memory representation of page 1 to include the updated 3945c7b6017cSdanielk1977 ** change counter and then write page 1 directly to the database 3946c7b6017cSdanielk1977 ** file. Because of the atomic-write property of the host file-system, 3947c7b6017cSdanielk1977 ** this is safe. 3948c7b6017cSdanielk1977 */ 3949ae72d982Sdanielk1977 if( rc==SQLITE_OK ){ 3950c7b6017cSdanielk1977 rc = pager_incr_changecounter(pPager, 1); 3951ae72d982Sdanielk1977 } 3952f55b8998Sdanielk1977 }else{ 3953f55b8998Sdanielk1977 rc = sqlite3JournalCreate(pPager->jfd); 3954f55b8998Sdanielk1977 } 3955f55b8998Sdanielk1977 3956ae72d982Sdanielk1977 if( !useAtomicWrite && rc==SQLITE_OK ) 3957c7b6017cSdanielk1977 #endif 3958c7b6017cSdanielk1977 395980e35f46Sdrh /* If a master journal file name has already been written to the 396080e35f46Sdrh ** journal file, then no sync is required. This happens when it is 396180e35f46Sdrh ** written, then the process fails to upgrade from a RESERVED to an 396280e35f46Sdrh ** EXCLUSIVE lock. The next time the process tries to commit the 396380e35f46Sdrh ** transaction the m-j name will have already been written. 396480e35f46Sdrh */ 396580e35f46Sdrh if( !pPager->setMaster ){ 3966c7b6017cSdanielk1977 rc = pager_incr_changecounter(pPager, 0); 396780e35f46Sdrh if( rc!=SQLITE_OK ) goto sync_exit; 396871aa7fffSdanielk1977 if( pPager->journalMode!=PAGER_JOURNALMODE_OFF ){ 396980e35f46Sdrh #ifndef SQLITE_OMIT_AUTOVACUUM 39703460d19cSdanielk1977 if( pPager->dbSize<pPager->dbOrigSize ){ 397180e35f46Sdrh /* If this transaction has made the database smaller, then all pages 397280e35f46Sdrh ** being discarded by the truncation must be written to the journal 397380e35f46Sdrh ** file. 397480e35f46Sdrh */ 397580e35f46Sdrh Pgno i; 3976d92db531Sdanielk1977 Pgno iSkip = PAGER_MJ_PGNO(pPager); 39773460d19cSdanielk1977 Pgno dbSize = pPager->dbSize; 3978f9bce3c5Sdanielk1977 pPager->dbSize = pPager->dbOrigSize; 3979f70c1feeSdanielk1977 for( i=dbSize+1; i<=pPager->dbOrigSize; i++ ){ 3980f5e7bb51Sdrh if( !sqlite3BitvecTest(pPager->pInJournal, i) && i!=iSkip ){ 398180e35f46Sdrh rc = sqlite3PagerGet(pPager, i, &pPg); 398280e35f46Sdrh if( rc!=SQLITE_OK ) goto sync_exit; 398380e35f46Sdrh rc = sqlite3PagerWrite(pPg); 398480e35f46Sdrh sqlite3PagerUnref(pPg); 398580e35f46Sdrh if( rc!=SQLITE_OK ) goto sync_exit; 398680e35f46Sdrh } 398780e35f46Sdrh } 39883460d19cSdanielk1977 pPager->dbSize = dbSize; 398980e35f46Sdrh } 399080e35f46Sdrh #endif 399180e35f46Sdrh rc = writeMasterJournal(pPager, zMaster); 399280e35f46Sdrh if( rc!=SQLITE_OK ) goto sync_exit; 399380e35f46Sdrh rc = syncJournal(pPager); 399480e35f46Sdrh } 399571aa7fffSdanielk1977 } 3996c7b6017cSdanielk1977 if( rc!=SQLITE_OK ) goto sync_exit; 399780e35f46Sdrh 399880e35f46Sdrh /* Write all dirty pages to the database file */ 39998c0a791aSdanielk1977 pPg = sqlite3PcacheDirtyList(pPager->pPCache); 400080e35f46Sdrh rc = pager_write_pagelist(pPg); 4001153c62c4Sdrh if( rc!=SQLITE_OK ){ 400204c3a46eSdrh assert( rc!=SQLITE_IOERR_BLOCKED ); 400304c3a46eSdrh /* The error might have left the dirty list all fouled up here, 400404c3a46eSdrh ** but that does not matter because if the if the dirty list did 400504c3a46eSdrh ** get corrupted, then the transaction will roll back and 400604c3a46eSdrh ** discard the dirty list. There is an assert in 400704c3a46eSdrh ** pager_get_all_dirty_pages() that verifies that no attempt 400804c3a46eSdrh ** is made to use an invalid dirty list. 400904c3a46eSdrh */ 4010153c62c4Sdrh goto sync_exit; 4011153c62c4Sdrh } 40128c0a791aSdanielk1977 sqlite3PcacheCleanAll(pPager->pPCache); 401380e35f46Sdrh 4014f90b7260Sdanielk1977 if( pPager->dbSize<pPager->dbFileSize ){ 4015f90b7260Sdanielk1977 assert( pPager->state>=PAGER_EXCLUSIVE ); 4016f90b7260Sdanielk1977 rc = pager_truncate(pPager, pPager->dbSize); 4017f90b7260Sdanielk1977 if( rc!=SQLITE_OK ) goto sync_exit; 4018f90b7260Sdanielk1977 } 4019f90b7260Sdanielk1977 402080e35f46Sdrh /* Sync the database file. */ 4021f653d782Sdanielk1977 if( !pPager->noSync && !noSync ){ 4022f036aef0Sdanielk1977 rc = sqlite3OsSync(pPager->fd, pPager->sync_flags); 402380e35f46Sdrh } 402480e35f46Sdrh IOTRACE(("DBSYNC %p\n", pPager)) 402580e35f46Sdrh 402680e35f46Sdrh pPager->state = PAGER_SYNCED; 402780e35f46Sdrh } 402880e35f46Sdrh 402980e35f46Sdrh sync_exit: 4030e965ac77Sdanielk1977 if( rc==SQLITE_IOERR_BLOCKED ){ 4031e965ac77Sdanielk1977 /* pager_incr_changecounter() may attempt to obtain an exclusive 4032e965ac77Sdanielk1977 * lock to spill the cache and return IOERR_BLOCKED. But since 403385b623f2Sdrh * there is no chance the cache is inconsistent, it is 4034e965ac77Sdanielk1977 * better to return SQLITE_BUSY. 4035e965ac77Sdanielk1977 */ 4036e965ac77Sdanielk1977 rc = SQLITE_BUSY; 4037e965ac77Sdanielk1977 } 403880e35f46Sdrh return rc; 403980e35f46Sdrh } 404080e35f46Sdrh 404180e35f46Sdrh 404280e35f46Sdrh /* 4043ed7c855cSdrh ** Commit all changes to the database and release the write lock. 4044d9b0257aSdrh ** 4045d9b0257aSdrh ** If the commit fails for any reason, a rollback attempt is made 4046d9b0257aSdrh ** and an error code is returned. If the commit worked, SQLITE_OK 4047d9b0257aSdrh ** is returned. 4048ed7c855cSdrh */ 404980e35f46Sdrh int sqlite3PagerCommitPhaseTwo(Pager *pPager){ 40508c0a791aSdanielk1977 int rc = SQLITE_OK; 4051d9b0257aSdrh 4052efaaf579Sdanielk1977 if( pPager->errCode ){ 40537f7bc66eSdanielk1977 return pPager->errCode; 4054d9b0257aSdrh } 4055a6abd041Sdrh if( pPager->state<PAGER_RESERVED ){ 4056d9b0257aSdrh return SQLITE_ERROR; 4057d9b0257aSdrh } 4058d138c016Sdrh if( pPager->dbModified==0 && 4059d138c016Sdrh (pPager->journalMode!=PAGER_JOURNALMODE_DELETE || 4060d138c016Sdrh pPager->exclusiveMode!=0) ){ 4061d138c016Sdrh assert( pPager->dirtyCache==0 || pPager->journalOpen==0 ); 4062d138c016Sdrh return SQLITE_OK; 4063d138c016Sdrh } 406430d53701Sdrh PAGERTRACE(("COMMIT %d\n", PAGERID(pPager))); 4065b3175389Sdanielk1977 assert( pPager->state==PAGER_SYNCED || MEMDB || !pPager->dirtyCache ); 4066df2566a3Sdanielk1977 rc = pager_end_transaction(pPager, pPager->setMaster); 406786f8c197Sdrh rc = pager_error(pPager, rc); 406886f8c197Sdrh return rc; 4069ed7c855cSdrh } 4070ed7c855cSdrh 4071ed7c855cSdrh /* 4072a6abd041Sdrh ** Rollback all changes. The database falls back to PAGER_SHARED mode. 4073ed7c855cSdrh ** All in-memory cache pages revert to their original data contents. 4074ed7c855cSdrh ** The journal is deleted. 4075d9b0257aSdrh ** 4076d9b0257aSdrh ** This routine cannot fail unless some other process is not following 40774f0ee686Sdrh ** the correct locking protocol or unless some other 4078d9b0257aSdrh ** process is writing trash into the journal file (SQLITE_CORRUPT) or 4079d9b0257aSdrh ** unless a prior malloc() failed (SQLITE_NOMEM). Appropriate error 4080d9b0257aSdrh ** codes are returned for all these occasions. Otherwise, 4081d9b0257aSdrh ** SQLITE_OK is returned. 4082ed7c855cSdrh */ 40833b8a05f6Sdanielk1977 int sqlite3PagerRollback(Pager *pPager){ 40848c0a791aSdanielk1977 int rc = SQLITE_OK; 408530d53701Sdrh PAGERTRACE(("ROLLBACK %d\n", PAGERID(pPager))); 4086b3175389Sdanielk1977 if( !pPager->dirtyCache || !pPager->journalOpen ){ 4087df2566a3Sdanielk1977 rc = pager_end_transaction(pPager, pPager->setMaster); 40888c0a791aSdanielk1977 }else if( pPager->errCode && pPager->errCode!=SQLITE_FULL ){ 4089a6abd041Sdrh if( pPager->state>=PAGER_EXCLUSIVE ){ 4090e277be05Sdanielk1977 pager_playback(pPager, 0); 40914b845d7eSdrh } 40928c0a791aSdanielk1977 rc = pPager->errCode; 40938c0a791aSdanielk1977 }else{ 4094a6abd041Sdrh if( pPager->state==PAGER_RESERVED ){ 409517221813Sdanielk1977 int rc2; 4096e277be05Sdanielk1977 rc = pager_playback(pPager, 0); 4097df2566a3Sdanielk1977 rc2 = pager_end_transaction(pPager, pPager->setMaster); 4098a6abd041Sdrh if( rc==SQLITE_OK ){ 4099a6abd041Sdrh rc = rc2; 4100d9b0257aSdrh } 4101a6abd041Sdrh }else{ 4102e277be05Sdanielk1977 rc = pager_playback(pPager, 0); 4103a6abd041Sdrh } 41048c0a791aSdanielk1977 4105b3175389Sdanielk1977 if( !MEMDB ){ 4106d92db531Sdanielk1977 pPager->dbSizeValid = 0; 4107b3175389Sdanielk1977 } 410807cb560bSdanielk1977 410907cb560bSdanielk1977 /* If an error occurs during a ROLLBACK, we can no longer trust the pager 411007cb560bSdanielk1977 ** cache. So call pager_error() on the way out to make any error 411107cb560bSdanielk1977 ** persistent. 411207cb560bSdanielk1977 */ 411386f8c197Sdrh rc = pager_error(pPager, rc); 41148c0a791aSdanielk1977 } 411586f8c197Sdrh return rc; 411698808babSdrh } 4117d9b0257aSdrh 4118d9b0257aSdrh /* 41195e00f6c7Sdrh ** Return TRUE if the database file is opened read-only. Return FALSE 41205e00f6c7Sdrh ** if the database is (in theory) writable. 41215e00f6c7Sdrh */ 4122f49661a4Sdrh u8 sqlite3PagerIsreadonly(Pager *pPager){ 4123be0072d2Sdrh return pPager->readOnly; 41245e00f6c7Sdrh } 41255e00f6c7Sdrh 41265e00f6c7Sdrh /* 41270f7eb611Sdrh ** Return the number of references to the pager. 41280f7eb611Sdrh */ 41293b8a05f6Sdanielk1977 int sqlite3PagerRefcount(Pager *pPager){ 41308c0a791aSdanielk1977 return sqlite3PcacheRefCount(pPager->pPCache); 41310f7eb611Sdrh } 41320f7eb611Sdrh 413371d5d2cdSdanielk1977 /* 413471d5d2cdSdanielk1977 ** Return the number of references to the specified page. 413571d5d2cdSdanielk1977 */ 413671d5d2cdSdanielk1977 int sqlite3PagerPageRefcount(DbPage *pPage){ 413771d5d2cdSdanielk1977 return sqlite3PcachePageRefcount(pPage); 413871d5d2cdSdanielk1977 } 413971d5d2cdSdanielk1977 41400f7eb611Sdrh #ifdef SQLITE_TEST 41410f7eb611Sdrh /* 4142d9b0257aSdrh ** This routine is used for testing and analysis only. 4143d9b0257aSdrh */ 41443b8a05f6Sdanielk1977 int *sqlite3PagerStats(Pager *pPager){ 414542741be9Sdanielk1977 static int a[11]; 41468c0a791aSdanielk1977 a[0] = sqlite3PcacheRefCount(pPager->pPCache); 41478c0a791aSdanielk1977 a[1] = sqlite3PcachePagecount(pPager->pPCache); 41488c0a791aSdanielk1977 a[2] = sqlite3PcacheGetCachesize(pPager->pPCache); 4149d92db531Sdanielk1977 a[3] = pPager->dbSizeValid ? (int) pPager->dbSize : -1; 4150d9b0257aSdrh a[4] = pPager->state; 4151efaaf579Sdanielk1977 a[5] = pPager->errCode; 4152d9b0257aSdrh a[6] = pPager->nHit; 4153d9b0257aSdrh a[7] = pPager->nMiss; 41547c4ac0c5Sdrh a[8] = 0; /* Used to be pPager->nOvfl */ 415542741be9Sdanielk1977 a[9] = pPager->nRead; 415642741be9Sdanielk1977 a[10] = pPager->nWrite; 4157d9b0257aSdrh return a; 4158d9b0257aSdrh } 415917b90b53Sdanielk1977 int sqlite3PagerIsMemdb(Pager *pPager){ 416017b90b53Sdanielk1977 return MEMDB; 416117b90b53Sdanielk1977 } 41620f7eb611Sdrh #endif 4163dd79342eSdrh 4164fa86c412Sdrh /* 4165fd7f0452Sdanielk1977 ** Ensure that there are at least nSavepoint savepoints open. 4166fa86c412Sdrh */ 4167fd7f0452Sdanielk1977 int sqlite3PagerOpenSavepoint(Pager *pPager, int nSavepoint){ 4168fd7f0452Sdanielk1977 int rc = SQLITE_OK; 4169fd7f0452Sdanielk1977 417012dd5496Sdanielk1977 if( nSavepoint>pPager->nSavepoint && pPager->useJournal ){ 4171fd7f0452Sdanielk1977 int ii; 417249b9d338Sdrh PagerSavepoint *aNew; 4173fd7f0452Sdanielk1977 4174c0731c9dSdrh /* Either there is no active journal or the sub-journal is open or 4175c0731c9dSdrh ** the journal is always stored in memory */ 4176c0731c9dSdrh assert( pPager->nSavepoint==0 || pPager->sjfd->pMethods || 4177c0731c9dSdrh pPager->journalMode==PAGER_JOURNALMODE_MEMORY ); 4178fd7f0452Sdanielk1977 4179fd7f0452Sdanielk1977 /* Grow the Pager.aSavepoint array using realloc(). Return SQLITE_NOMEM 4180fd7f0452Sdanielk1977 ** if the allocation fails. Otherwise, zero the new portion in case a 4181fd7f0452Sdanielk1977 ** malloc failure occurs while populating it in the for(...) loop below. 4182fd7f0452Sdanielk1977 */ 418349b9d338Sdrh aNew = (PagerSavepoint *)sqlite3Realloc( 4184fd7f0452Sdanielk1977 pPager->aSavepoint, sizeof(PagerSavepoint)*nSavepoint 4185fd7f0452Sdanielk1977 ); 4186fd7f0452Sdanielk1977 if( !aNew ){ 4187fa86c412Sdrh return SQLITE_NOMEM; 4188fa86c412Sdrh } 4189fd7f0452Sdanielk1977 memset(&aNew[pPager->nSavepoint], 0, 4190fd7f0452Sdanielk1977 (nSavepoint - pPager->nSavepoint) * sizeof(PagerSavepoint) 4191fd7f0452Sdanielk1977 ); 4192fd7f0452Sdanielk1977 pPager->aSavepoint = aNew; 4193fd7f0452Sdanielk1977 ii = pPager->nSavepoint; 4194fd7f0452Sdanielk1977 pPager->nSavepoint = nSavepoint; 4195fa86c412Sdrh 4196fd7f0452Sdanielk1977 /* Populate the PagerSavepoint structures just allocated. */ 4197fd7f0452Sdanielk1977 for(/* no-op */; ii<nSavepoint; ii++){ 419812dd5496Sdanielk1977 assert( pPager->dbSizeValid ); 4199fd7f0452Sdanielk1977 aNew[ii].nOrig = pPager->dbSize; 420067ddef69Sdanielk1977 if( pPager->journalOpen && pPager->journalOff>0 ){ 420167ddef69Sdanielk1977 aNew[ii].iOffset = pPager->journalOff; 420267ddef69Sdanielk1977 }else{ 420367ddef69Sdanielk1977 aNew[ii].iOffset = JOURNAL_HDR_SZ(pPager); 420467ddef69Sdanielk1977 } 4205fd7f0452Sdanielk1977 aNew[ii].iSubRec = pPager->stmtNRec; 4206fd7f0452Sdanielk1977 aNew[ii].pInSavepoint = sqlite3BitvecCreate(pPager->dbSize); 4207fd7f0452Sdanielk1977 if( !aNew[ii].pInSavepoint ){ 4208fd7f0452Sdanielk1977 return SQLITE_NOMEM; 4209fa86c412Sdrh } 4210fa86c412Sdrh } 4211fd7f0452Sdanielk1977 4212fd7f0452Sdanielk1977 /* Open the sub-journal, if it is not already opened. */ 4213fd7f0452Sdanielk1977 rc = openSubJournal(pPager); 4214fd7f0452Sdanielk1977 } 4215fd7f0452Sdanielk1977 421686f8c197Sdrh return rc; 421786f8c197Sdrh } 4218fa86c412Sdrh 4219fa86c412Sdrh /* 4220fd7f0452Sdanielk1977 ** Parameter op is always either SAVEPOINT_ROLLBACK or SAVEPOINT_RELEASE. 4221fd7f0452Sdanielk1977 ** If it is SAVEPOINT_RELEASE, then release and destroy the savepoint with 4222fd7f0452Sdanielk1977 ** index iSavepoint. If it is SAVEPOINT_ROLLBACK, then rollback all changes 4223fd7f0452Sdanielk1977 ** that have occured since savepoint iSavepoint was created. 4224fd7f0452Sdanielk1977 ** 4225fd7f0452Sdanielk1977 ** In either case, all savepoints with an index greater than iSavepoint 4226fd7f0452Sdanielk1977 ** are destroyed. 4227fd7f0452Sdanielk1977 ** 4228fd7f0452Sdanielk1977 ** If there are less than (iSavepoint+1) active savepoints when this 4229fd7f0452Sdanielk1977 ** function is called it is a no-op. 4230fa86c412Sdrh */ 4231fd7f0452Sdanielk1977 int sqlite3PagerSavepoint(Pager *pPager, int op, int iSavepoint){ 4232fd7f0452Sdanielk1977 int rc = SQLITE_OK; 4233fd7f0452Sdanielk1977 4234fd7f0452Sdanielk1977 assert( op==SAVEPOINT_RELEASE || op==SAVEPOINT_ROLLBACK ); 4235fd7f0452Sdanielk1977 4236fd7f0452Sdanielk1977 if( iSavepoint<pPager->nSavepoint ){ 4237fd7f0452Sdanielk1977 int ii; 4238fd7f0452Sdanielk1977 int nNew = iSavepoint + (op==SAVEPOINT_ROLLBACK); 4239fd7f0452Sdanielk1977 for(ii=nNew; ii<pPager->nSavepoint; ii++){ 4240fd7f0452Sdanielk1977 sqlite3BitvecDestroy(pPager->aSavepoint[ii].pInSavepoint); 4241b3175389Sdanielk1977 } 4242fd7f0452Sdanielk1977 pPager->nSavepoint = nNew; 4243fd7f0452Sdanielk1977 42448a7adb0dSdanielk1977 if( op==SAVEPOINT_ROLLBACK && pPager->jfd->pMethods ){ 4245fd7f0452Sdanielk1977 PagerSavepoint *pSavepoint = (nNew==0)?0:&pPager->aSavepoint[nNew-1]; 4246fd7f0452Sdanielk1977 rc = pagerPlaybackSavepoint(pPager, pSavepoint); 4247fd7f0452Sdanielk1977 assert(rc!=SQLITE_DONE); 4248fa86c412Sdrh } 4249fa86c412Sdrh 4250fd7f0452Sdanielk1977 /* If this is a release of the outermost savepoint, truncate 4251fd7f0452Sdanielk1977 ** the sub-journal. */ 4252fd7f0452Sdanielk1977 if( nNew==0 && op==SAVEPOINT_RELEASE && pPager->sjfd->pMethods ){ 4253fd7f0452Sdanielk1977 assert( rc==SQLITE_OK ); 4254fd7f0452Sdanielk1977 rc = sqlite3OsTruncate(pPager->sjfd, 0); 4255fd7f0452Sdanielk1977 pPager->stmtNRec = 0; 4256663fc63aSdrh } 4257fd7f0452Sdanielk1977 } 4258fa86c412Sdrh return rc; 4259fa86c412Sdrh } 4260fa86c412Sdrh 426173509eeeSdrh /* 426273509eeeSdrh ** Return the full pathname of the database file. 426373509eeeSdrh */ 42643b8a05f6Sdanielk1977 const char *sqlite3PagerFilename(Pager *pPager){ 426573509eeeSdrh return pPager->zFilename; 426673509eeeSdrh } 426773509eeeSdrh 4268b20ea9d2Sdrh /* 4269d0679edcSdrh ** Return the VFS structure for the pager. 4270d0679edcSdrh */ 4271d0679edcSdrh const sqlite3_vfs *sqlite3PagerVfs(Pager *pPager){ 4272d0679edcSdrh return pPager->pVfs; 4273d0679edcSdrh } 4274d0679edcSdrh 4275d0679edcSdrh /* 4276cc6bb3eaSdrh ** Return the file handle for the database file associated 4277cc6bb3eaSdrh ** with the pager. This might return NULL if the file has 4278cc6bb3eaSdrh ** not yet been opened. 4279cc6bb3eaSdrh */ 4280cc6bb3eaSdrh sqlite3_file *sqlite3PagerFile(Pager *pPager){ 4281cc6bb3eaSdrh return pPager->fd; 4282cc6bb3eaSdrh } 4283cc6bb3eaSdrh 4284cc6bb3eaSdrh /* 42855865e3d5Sdanielk1977 ** Return the directory of the database file. 42865865e3d5Sdanielk1977 */ 42873b8a05f6Sdanielk1977 const char *sqlite3PagerDirname(Pager *pPager){ 42885865e3d5Sdanielk1977 return pPager->zDirectory; 42895865e3d5Sdanielk1977 } 42905865e3d5Sdanielk1977 42915865e3d5Sdanielk1977 /* 42925865e3d5Sdanielk1977 ** Return the full pathname of the journal file. 42935865e3d5Sdanielk1977 */ 42943b8a05f6Sdanielk1977 const char *sqlite3PagerJournalname(Pager *pPager){ 42955865e3d5Sdanielk1977 return pPager->zJournal; 42965865e3d5Sdanielk1977 } 42975865e3d5Sdanielk1977 42985865e3d5Sdanielk1977 /* 42992c8997b9Sdrh ** Return true if fsync() calls are disabled for this pager. Return FALSE 43002c8997b9Sdrh ** if fsync()s are executed normally. 43012c8997b9Sdrh */ 43023b8a05f6Sdanielk1977 int sqlite3PagerNosync(Pager *pPager){ 43032c8997b9Sdrh return pPager->noSync; 43042c8997b9Sdrh } 43052c8997b9Sdrh 43067c4ac0c5Sdrh #ifdef SQLITE_HAS_CODEC 43072c8997b9Sdrh /* 4308b20ea9d2Sdrh ** Set the codec for this pager 4309b20ea9d2Sdrh */ 43103b8a05f6Sdanielk1977 void sqlite3PagerSetCodec( 4311b20ea9d2Sdrh Pager *pPager, 4312c001c58aSdrh void *(*xCodec)(void*,void*,Pgno,int), 4313b20ea9d2Sdrh void *pCodecArg 4314b20ea9d2Sdrh ){ 4315b20ea9d2Sdrh pPager->xCodec = xCodec; 4316b20ea9d2Sdrh pPager->pCodecArg = pCodecArg; 4317b20ea9d2Sdrh } 43187c4ac0c5Sdrh #endif 4319b20ea9d2Sdrh 4320687566d7Sdanielk1977 #ifndef SQLITE_OMIT_AUTOVACUUM 4321687566d7Sdanielk1977 /* 43225e385311Sdrh ** Move the page pPg to location pgno in the file. 4323687566d7Sdanielk1977 ** 43245e385311Sdrh ** There must be no references to the page previously located at 43255e385311Sdrh ** pgno (which we call pPgOld) though that page is allowed to be 4326b3df2e1cSdrh ** in cache. If the page previously located at pgno is not already 43275e385311Sdrh ** in the rollback journal, it is not put there by by this routine. 4328687566d7Sdanielk1977 ** 43295e385311Sdrh ** References to the page pPg remain valid. Updating any 43305e385311Sdrh ** meta-data associated with pPg (i.e. data stored in the nExtra bytes 4331687566d7Sdanielk1977 ** allocated along with the page) is the responsibility of the caller. 4332687566d7Sdanielk1977 ** 43335fd057afSdanielk1977 ** A transaction must be active when this routine is called. It used to be 43345fd057afSdanielk1977 ** required that a statement transaction was not active, but this restriction 43355fd057afSdanielk1977 ** has been removed (CREATE INDEX needs to move a page when a statement 43365fd057afSdanielk1977 ** transaction is active). 43374c999999Sdanielk1977 ** 43384c999999Sdanielk1977 ** If the fourth argument, isCommit, is non-zero, then this page is being 43394c999999Sdanielk1977 ** moved as part of a database reorganization just before the transaction 43404c999999Sdanielk1977 ** is being committed. In this case, it is guaranteed that the database page 43414c999999Sdanielk1977 ** pPg refers to will not be written to again within this transaction. 4342687566d7Sdanielk1977 */ 43434c999999Sdanielk1977 int sqlite3PagerMovepage(Pager *pPager, DbPage *pPg, Pgno pgno, int isCommit){ 43445e385311Sdrh PgHdr *pPgOld; /* The page being overwritten. */ 434594daf7fdSdanielk1977 Pgno needSyncPgno = 0; 43461fab7b66Sdanielk1977 int rc; 4347687566d7Sdanielk1977 4348687566d7Sdanielk1977 assert( pPg->nRef>0 ); 4349687566d7Sdanielk1977 43501fab7b66Sdanielk1977 /* If the page being moved is dirty and has not been saved by the latest 43511fab7b66Sdanielk1977 ** savepoint, then save the current contents of the page into the 43521fab7b66Sdanielk1977 ** sub-journal now. This is required to handle the following scenario: 43531fab7b66Sdanielk1977 ** 43541fab7b66Sdanielk1977 ** BEGIN; 43551fab7b66Sdanielk1977 ** <journal page X, then modify it in memory> 43561fab7b66Sdanielk1977 ** SAVEPOINT one; 43571fab7b66Sdanielk1977 ** <Move page X to location Y> 43581fab7b66Sdanielk1977 ** ROLLBACK TO one; 43591fab7b66Sdanielk1977 ** 43601fab7b66Sdanielk1977 ** If page X were not written to the sub-journal here, it would not 43611fab7b66Sdanielk1977 ** be possible to restore its contents when the "ROLLBACK TO one" 43621fab7b66Sdanielk1977 ** statement were processed. 43631fab7b66Sdanielk1977 */ 43641fab7b66Sdanielk1977 if( pPg->flags&PGHDR_DIRTY 43651fab7b66Sdanielk1977 && subjRequiresPage(pPg) 43661fab7b66Sdanielk1977 && SQLITE_OK!=(rc = subjournalPage(pPg)) 43671fab7b66Sdanielk1977 ){ 43681fab7b66Sdanielk1977 return rc; 43691fab7b66Sdanielk1977 } 43701fab7b66Sdanielk1977 437130d53701Sdrh PAGERTRACE(("MOVE %d page %d (needSync=%d) moves to %d\n", 437230d53701Sdrh PAGERID(pPager), pPg->pgno, (pPg->flags&PGHDR_NEED_SYNC)?1:0, pgno)); 4373b0603416Sdrh IOTRACE(("MOVE %p %d %d\n", pPager, pPg->pgno, pgno)) 4374ef73ee9aSdanielk1977 4375b4626a3eSdanielk1977 pager_get_content(pPg); 43764c999999Sdanielk1977 43774c999999Sdanielk1977 /* If the journal needs to be sync()ed before page pPg->pgno can 43784c999999Sdanielk1977 ** be written to, store pPg->pgno in local variable needSyncPgno. 43794c999999Sdanielk1977 ** 43804c999999Sdanielk1977 ** If the isCommit flag is set, there is no need to remember that 43814c999999Sdanielk1977 ** the journal needs to be sync()ed before database page pPg->pgno 43824c999999Sdanielk1977 ** can be written to. The caller has already promised not to write to it. 43834c999999Sdanielk1977 */ 43848c0a791aSdanielk1977 if( (pPg->flags&PGHDR_NEED_SYNC) && !isCommit ){ 438594daf7fdSdanielk1977 needSyncPgno = pPg->pgno; 43863460d19cSdanielk1977 assert( pageInJournal(pPg) || pPg->pgno>pPager->dbOrigSize ); 43878c0a791aSdanielk1977 assert( pPg->flags&PGHDR_DIRTY ); 4388ae82558bSdanielk1977 assert( pPager->needSync ); 438994daf7fdSdanielk1977 } 439094daf7fdSdanielk1977 4391ef73ee9aSdanielk1977 /* If the cache contains a page with page-number pgno, remove it 439285b623f2Sdrh ** from its hash chain. Also, if the PgHdr.needSync was set for 4393599fcbaeSdanielk1977 ** page pgno before the 'move' operation, it needs to be retained 4394599fcbaeSdanielk1977 ** for the page moved there. 4395f5fdda82Sdanielk1977 */ 4396bc2ca9ebSdanielk1977 pPg->flags &= ~PGHDR_NEED_SYNC; 4397687566d7Sdanielk1977 pPgOld = pager_lookup(pPager, pgno); 43988c0a791aSdanielk1977 assert( !pPgOld || pPgOld->nRef==1 ); 4399687566d7Sdanielk1977 if( pPgOld ){ 44008c0a791aSdanielk1977 pPg->flags |= (pPgOld->flags&PGHDR_NEED_SYNC); 4401ef73ee9aSdanielk1977 } 4402687566d7Sdanielk1977 44038c0a791aSdanielk1977 sqlite3PcacheMove(pPg, pgno); 44048c0a791aSdanielk1977 if( pPgOld ){ 4405bc2ca9ebSdanielk1977 sqlite3PcacheDrop(pPgOld); 4406f5fdda82Sdanielk1977 } 4407f5fdda82Sdanielk1977 4408c047b9f7Sdrh sqlite3PcacheMakeDirty(pPg); 4409687566d7Sdanielk1977 pPager->dirtyCache = 1; 4410d138c016Sdrh pPager->dbModified = 1; 4411687566d7Sdanielk1977 441294daf7fdSdanielk1977 if( needSyncPgno ){ 441394daf7fdSdanielk1977 /* If needSyncPgno is non-zero, then the journal file needs to be 441494daf7fdSdanielk1977 ** sync()ed before any data is written to database file page needSyncPgno. 441594daf7fdSdanielk1977 ** Currently, no such page exists in the page-cache and the 44164c999999Sdanielk1977 ** "is journaled" bitvec flag has been set. This needs to be remedied by 44174c999999Sdanielk1977 ** loading the page into the pager-cache and setting the PgHdr.needSync 44184c999999Sdanielk1977 ** flag. 4419ae82558bSdanielk1977 ** 4420a98d7b47Sdanielk1977 ** If the attempt to load the page into the page-cache fails, (due 4421f5e7bb51Sdrh ** to a malloc() or IO failure), clear the bit in the pInJournal[] 4422a98d7b47Sdanielk1977 ** array. Otherwise, if the page is loaded and written again in 4423a98d7b47Sdanielk1977 ** this transaction, it may be written to the database file before 4424a98d7b47Sdanielk1977 ** it is synced into the journal file. This way, it may end up in 4425a98d7b47Sdanielk1977 ** the journal file twice, but that is not a problem. 4426a98d7b47Sdanielk1977 ** 44273b8a05f6Sdanielk1977 ** The sqlite3PagerGet() call may cause the journal to sync. So make 4428ae82558bSdanielk1977 ** sure the Pager.needSync flag is set too. 442994daf7fdSdanielk1977 */ 44303b8a05f6Sdanielk1977 PgHdr *pPgHdr; 4431ae82558bSdanielk1977 assert( pPager->needSync ); 44323b8a05f6Sdanielk1977 rc = sqlite3PagerGet(pPager, needSyncPgno, &pPgHdr); 443387c29a94Sdanielk1977 if( rc!=SQLITE_OK ){ 44343460d19cSdanielk1977 if( pPager->pInJournal && needSyncPgno<=pPager->dbOrigSize ){ 4435f5e7bb51Sdrh sqlite3BitvecClear(pPager->pInJournal, needSyncPgno); 4436a98d7b47Sdanielk1977 } 443787c29a94Sdanielk1977 return rc; 443887c29a94Sdanielk1977 } 4439ae82558bSdanielk1977 pPager->needSync = 1; 4440b3df2e1cSdrh assert( pPager->noSync==0 && !MEMDB ); 44418c0a791aSdanielk1977 pPgHdr->flags |= PGHDR_NEED_SYNC; 4442c047b9f7Sdrh sqlite3PcacheMakeDirty(pPgHdr); 44433b8a05f6Sdanielk1977 sqlite3PagerUnref(pPgHdr); 444494daf7fdSdanielk1977 } 444594daf7fdSdanielk1977 4446687566d7Sdanielk1977 return SQLITE_OK; 4447687566d7Sdanielk1977 } 4448687566d7Sdanielk1977 #endif 4449687566d7Sdanielk1977 44503b8a05f6Sdanielk1977 /* 44513b8a05f6Sdanielk1977 ** Return a pointer to the data for the specified page. 44523b8a05f6Sdanielk1977 */ 44533b8a05f6Sdanielk1977 void *sqlite3PagerGetData(DbPage *pPg){ 445471d5d2cdSdanielk1977 assert( pPg->nRef>0 || pPg->pPager->memDb ); 44558c0a791aSdanielk1977 return pPg->pData; 44563b8a05f6Sdanielk1977 } 44573b8a05f6Sdanielk1977 44583b8a05f6Sdanielk1977 /* 44593b8a05f6Sdanielk1977 ** Return a pointer to the Pager.nExtra bytes of "extra" space 44603b8a05f6Sdanielk1977 ** allocated along with the specified page. 44613b8a05f6Sdanielk1977 */ 44623b8a05f6Sdanielk1977 void *sqlite3PagerGetExtra(DbPage *pPg){ 44633b8a05f6Sdanielk1977 Pager *pPager = pPg->pPager; 44648c0a791aSdanielk1977 return (pPager?pPg->pExtra:0); 44653b8a05f6Sdanielk1977 } 44663b8a05f6Sdanielk1977 446741483468Sdanielk1977 /* 446841483468Sdanielk1977 ** Get/set the locking-mode for this pager. Parameter eMode must be one 446941483468Sdanielk1977 ** of PAGER_LOCKINGMODE_QUERY, PAGER_LOCKINGMODE_NORMAL or 447041483468Sdanielk1977 ** PAGER_LOCKINGMODE_EXCLUSIVE. If the parameter is not _QUERY, then 447141483468Sdanielk1977 ** the locking-mode is set to the value specified. 447241483468Sdanielk1977 ** 447341483468Sdanielk1977 ** The returned value is either PAGER_LOCKINGMODE_NORMAL or 447441483468Sdanielk1977 ** PAGER_LOCKINGMODE_EXCLUSIVE, indicating the current (possibly updated) 447541483468Sdanielk1977 ** locking-mode. 447641483468Sdanielk1977 */ 447741483468Sdanielk1977 int sqlite3PagerLockingMode(Pager *pPager, int eMode){ 4478369339dbSdrh assert( eMode==PAGER_LOCKINGMODE_QUERY 4479369339dbSdrh || eMode==PAGER_LOCKINGMODE_NORMAL 4480369339dbSdrh || eMode==PAGER_LOCKINGMODE_EXCLUSIVE ); 4481369339dbSdrh assert( PAGER_LOCKINGMODE_QUERY<0 ); 4482369339dbSdrh assert( PAGER_LOCKINGMODE_NORMAL>=0 && PAGER_LOCKINGMODE_EXCLUSIVE>=0 ); 4483369339dbSdrh if( eMode>=0 && !pPager->tempFile ){ 44841bd10f8aSdrh pPager->exclusiveMode = (u8)eMode; 448541483468Sdanielk1977 } 448641483468Sdanielk1977 return (int)pPager->exclusiveMode; 448741483468Sdanielk1977 } 448841483468Sdanielk1977 44893b02013eSdrh /* 449004335886Sdrh ** Get/set the journal-mode for this pager. Parameter eMode must be one of: 44913b02013eSdrh ** 449204335886Sdrh ** PAGER_JOURNALMODE_QUERY 449304335886Sdrh ** PAGER_JOURNALMODE_DELETE 449404335886Sdrh ** PAGER_JOURNALMODE_TRUNCATE 449504335886Sdrh ** PAGER_JOURNALMODE_PERSIST 449604335886Sdrh ** PAGER_JOURNALMODE_OFF 449704335886Sdrh ** 449804335886Sdrh ** If the parameter is not _QUERY, then the journal-mode is set to the 449904335886Sdrh ** value specified. 450004335886Sdrh ** 450104335886Sdrh ** The returned indicate the current (possibly updated) 45023b02013eSdrh ** journal-mode. 45033b02013eSdrh */ 45043b02013eSdrh int sqlite3PagerJournalMode(Pager *pPager, int eMode){ 4505b3175389Sdanielk1977 if( !MEMDB ){ 45063b02013eSdrh assert( eMode==PAGER_JOURNALMODE_QUERY 45073b02013eSdrh || eMode==PAGER_JOURNALMODE_DELETE 450804335886Sdrh || eMode==PAGER_JOURNALMODE_TRUNCATE 4509fdc40e91Sdrh || eMode==PAGER_JOURNALMODE_PERSIST 4510b3175389Sdanielk1977 || eMode==PAGER_JOURNALMODE_OFF 4511b3175389Sdanielk1977 || eMode==PAGER_JOURNALMODE_MEMORY ); 45123b02013eSdrh assert( PAGER_JOURNALMODE_QUERY<0 ); 4513fdc40e91Sdrh if( eMode>=0 ){ 45141bd10f8aSdrh pPager->journalMode = (u8)eMode; 451504335886Sdrh }else{ 451604335886Sdrh assert( eMode==PAGER_JOURNALMODE_QUERY ); 45173b02013eSdrh } 4518b3175389Sdanielk1977 } 4519fdc40e91Sdrh return (int)pPager->journalMode; 45203b02013eSdrh } 45213b02013eSdrh 4522b53e4960Sdanielk1977 /* 4523b53e4960Sdanielk1977 ** Get/set the size-limit used for persistent journal files. 4524b53e4960Sdanielk1977 */ 4525b53e4960Sdanielk1977 i64 sqlite3PagerJournalSizeLimit(Pager *pPager, i64 iLimit){ 4526b53e4960Sdanielk1977 if( iLimit>=-1 ){ 4527b53e4960Sdanielk1977 pPager->journalSizeLimit = iLimit; 4528b53e4960Sdanielk1977 } 4529b53e4960Sdanielk1977 return pPager->journalSizeLimit; 4530b53e4960Sdanielk1977 } 4531b53e4960Sdanielk1977 45322e66f0b9Sdrh #endif /* SQLITE_OMIT_DISKIO */ 4533