1 /* 2 ** 2008 October 7 3 ** 4 ** The author disclaims copyright to this source code. In place of 5 ** a legal notice, here is a blessing: 6 ** 7 ** May you do good and not evil. 8 ** May you find forgiveness for yourself and forgive others. 9 ** May you share freely, never taking more than you give. 10 ** 11 ************************************************************************* 12 ** 13 ** This file contains code use to implement an in-memory rollback journal. 14 ** The in-memory rollback journal is used to journal transactions for 15 ** ":memory:" databases and when the journal_mode=MEMORY pragma is used. 16 ** 17 ** Update: The in-memory journal is also used to temporarily cache 18 ** smaller journals that are not critical for power-loss recovery. 19 ** For example, statement journals that are not too big will be held 20 ** entirely in memory, thus reducing the number of file I/O calls, and 21 ** more importantly, reducing temporary file creation events. If these 22 ** journals become too large for memory, they are spilled to disk. But 23 ** in the common case, they are usually small and no file I/O needs to 24 ** occur. 25 */ 26 #include "sqliteInt.h" 27 28 /* Forward references to internal structures */ 29 typedef struct MemJournal MemJournal; 30 typedef struct FilePoint FilePoint; 31 typedef struct FileChunk FileChunk; 32 33 /* 34 ** The rollback journal is composed of a linked list of these structures. 35 ** 36 ** The zChunk array is always at least 8 bytes in size - usually much more. 37 ** Its actual size is stored in the MemJournal.nChunkSize variable. 38 */ 39 struct FileChunk { 40 FileChunk *pNext; /* Next chunk in the journal */ 41 u8 zChunk[8]; /* Content of this chunk */ 42 }; 43 44 /* 45 ** By default, allocate this many bytes of memory for each FileChunk object. 46 */ 47 #define MEMJOURNAL_DFLT_FILECHUNKSIZE 1024 48 49 /* 50 ** For chunk size nChunkSize, return the number of bytes that should 51 ** be allocated for each FileChunk structure. 52 */ 53 #define fileChunkSize(nChunkSize) (sizeof(FileChunk) + ((nChunkSize)-8)) 54 55 /* 56 ** An instance of this object serves as a cursor into the rollback journal. 57 ** The cursor can be either for reading or writing. 58 */ 59 struct FilePoint { 60 sqlite3_int64 iOffset; /* Offset from the beginning of the file */ 61 FileChunk *pChunk; /* Specific chunk into which cursor points */ 62 }; 63 64 /* 65 ** This structure is a subclass of sqlite3_file. Each open memory-journal 66 ** is an instance of this class. 67 */ 68 struct MemJournal { 69 const sqlite3_io_methods *pMethod; /* Parent class. MUST BE FIRST */ 70 int nChunkSize; /* In-memory chunk-size */ 71 72 int nSpill; /* Bytes of data before flushing */ 73 FileChunk *pFirst; /* Head of in-memory chunk-list */ 74 FilePoint endpoint; /* Pointer to the end of the file */ 75 FilePoint readpoint; /* Pointer to the end of the last xRead() */ 76 77 int flags; /* xOpen flags */ 78 sqlite3_vfs *pVfs; /* The "real" underlying VFS */ 79 const char *zJournal; /* Name of the journal file */ 80 }; 81 82 /* 83 ** Read data from the in-memory journal file. This is the implementation 84 ** of the sqlite3_vfs.xRead method. 85 */ 86 static int memjrnlRead( 87 sqlite3_file *pJfd, /* The journal file from which to read */ 88 void *zBuf, /* Put the results here */ 89 int iAmt, /* Number of bytes to read */ 90 sqlite_int64 iOfst /* Begin reading at this offset */ 91 ){ 92 MemJournal *p = (MemJournal *)pJfd; 93 u8 *zOut = zBuf; 94 int nRead = iAmt; 95 int iChunkOffset; 96 FileChunk *pChunk; 97 98 if( (iAmt+iOfst)>p->endpoint.iOffset ){ 99 return SQLITE_IOERR_SHORT_READ; 100 } 101 assert( p->readpoint.iOffset==0 || p->readpoint.pChunk!=0 ); 102 if( p->readpoint.iOffset!=iOfst || iOfst==0 ){ 103 sqlite3_int64 iOff = 0; 104 for(pChunk=p->pFirst; 105 ALWAYS(pChunk) && (iOff+p->nChunkSize)<=iOfst; 106 pChunk=pChunk->pNext 107 ){ 108 iOff += p->nChunkSize; 109 } 110 }else{ 111 pChunk = p->readpoint.pChunk; 112 assert( pChunk!=0 ); 113 } 114 115 iChunkOffset = (int)(iOfst%p->nChunkSize); 116 do { 117 int iSpace = p->nChunkSize - iChunkOffset; 118 int nCopy = MIN(nRead, (p->nChunkSize - iChunkOffset)); 119 memcpy(zOut, (u8*)pChunk->zChunk + iChunkOffset, nCopy); 120 zOut += nCopy; 121 nRead -= iSpace; 122 iChunkOffset = 0; 123 } while( nRead>=0 && (pChunk=pChunk->pNext)!=0 && nRead>0 ); 124 p->readpoint.iOffset = pChunk ? iOfst+iAmt : 0; 125 p->readpoint.pChunk = pChunk; 126 127 return SQLITE_OK; 128 } 129 130 /* 131 ** Free the list of FileChunk structures headed at MemJournal.pFirst. 132 */ 133 static void memjrnlFreeChunks(FileChunk *pFirst){ 134 FileChunk *pIter; 135 FileChunk *pNext; 136 for(pIter=pFirst; pIter; pIter=pNext){ 137 pNext = pIter->pNext; 138 sqlite3_free(pIter); 139 } 140 } 141 142 /* 143 ** Flush the contents of memory to a real file on disk. 144 */ 145 static int memjrnlCreateFile(MemJournal *p){ 146 int rc; 147 sqlite3_file *pReal = (sqlite3_file*)p; 148 MemJournal copy = *p; 149 150 memset(p, 0, sizeof(MemJournal)); 151 rc = sqlite3OsOpen(copy.pVfs, copy.zJournal, pReal, copy.flags, 0); 152 if( rc==SQLITE_OK ){ 153 int nChunk = copy.nChunkSize; 154 i64 iOff = 0; 155 FileChunk *pIter; 156 for(pIter=copy.pFirst; pIter; pIter=pIter->pNext){ 157 if( iOff + nChunk > copy.endpoint.iOffset ){ 158 nChunk = copy.endpoint.iOffset - iOff; 159 } 160 rc = sqlite3OsWrite(pReal, (u8*)pIter->zChunk, nChunk, iOff); 161 if( rc ) break; 162 iOff += nChunk; 163 } 164 if( rc==SQLITE_OK ){ 165 /* No error has occurred. Free the in-memory buffers. */ 166 memjrnlFreeChunks(copy.pFirst); 167 } 168 } 169 if( rc!=SQLITE_OK ){ 170 /* If an error occurred while creating or writing to the file, restore 171 ** the original before returning. This way, SQLite uses the in-memory 172 ** journal data to roll back changes made to the internal page-cache 173 ** before this function was called. */ 174 sqlite3OsClose(pReal); 175 *p = copy; 176 } 177 return rc; 178 } 179 180 181 /* Forward reference */ 182 static int memjrnlTruncate(sqlite3_file *pJfd, sqlite_int64 size); 183 184 /* 185 ** Write data to the file. 186 */ 187 static int memjrnlWrite( 188 sqlite3_file *pJfd, /* The journal file into which to write */ 189 const void *zBuf, /* Take data to be written from here */ 190 int iAmt, /* Number of bytes to write */ 191 sqlite_int64 iOfst /* Begin writing at this offset into the file */ 192 ){ 193 MemJournal *p = (MemJournal *)pJfd; 194 int nWrite = iAmt; 195 u8 *zWrite = (u8 *)zBuf; 196 197 /* If the file should be created now, create it and write the new data 198 ** into the file on disk. */ 199 if( p->nSpill>0 && (iAmt+iOfst)>p->nSpill ){ 200 int rc = memjrnlCreateFile(p); 201 if( rc==SQLITE_OK ){ 202 rc = sqlite3OsWrite(pJfd, zBuf, iAmt, iOfst); 203 } 204 return rc; 205 } 206 207 /* If the contents of this write should be stored in memory */ 208 else{ 209 /* An in-memory journal file should only ever be appended to. Random 210 ** access writes are not required. The only exception to this is when 211 ** the in-memory journal is being used by a connection using the 212 ** atomic-write optimization. In this case the first 28 bytes of the 213 ** journal file may be written as part of committing the transaction. */ 214 assert( iOfst<=p->endpoint.iOffset ); 215 if( iOfst>0 && iOfst!=p->endpoint.iOffset ){ 216 memjrnlTruncate(pJfd, iOfst); 217 } 218 if( iOfst==0 && p->pFirst ){ 219 assert( p->nChunkSize>iAmt ); 220 memcpy((u8*)p->pFirst->zChunk, zBuf, iAmt); 221 }else{ 222 while( nWrite>0 ){ 223 FileChunk *pChunk = p->endpoint.pChunk; 224 int iChunkOffset = (int)(p->endpoint.iOffset%p->nChunkSize); 225 int iSpace = MIN(nWrite, p->nChunkSize - iChunkOffset); 226 227 assert( pChunk!=0 || iChunkOffset==0 ); 228 if( iChunkOffset==0 ){ 229 /* New chunk is required to extend the file. */ 230 FileChunk *pNew = sqlite3_malloc(fileChunkSize(p->nChunkSize)); 231 if( !pNew ){ 232 return SQLITE_IOERR_NOMEM_BKPT; 233 } 234 pNew->pNext = 0; 235 if( pChunk ){ 236 assert( p->pFirst ); 237 pChunk->pNext = pNew; 238 }else{ 239 assert( !p->pFirst ); 240 p->pFirst = pNew; 241 } 242 pChunk = p->endpoint.pChunk = pNew; 243 } 244 245 assert( pChunk!=0 ); 246 memcpy((u8*)pChunk->zChunk + iChunkOffset, zWrite, iSpace); 247 zWrite += iSpace; 248 nWrite -= iSpace; 249 p->endpoint.iOffset += iSpace; 250 } 251 } 252 } 253 254 return SQLITE_OK; 255 } 256 257 /* 258 ** Truncate the in-memory file. 259 */ 260 static int memjrnlTruncate(sqlite3_file *pJfd, sqlite_int64 size){ 261 MemJournal *p = (MemJournal *)pJfd; 262 assert( p->endpoint.pChunk==0 || p->endpoint.pChunk->pNext==0 ); 263 if( size<p->endpoint.iOffset ){ 264 FileChunk *pIter = 0; 265 if( size==0 ){ 266 memjrnlFreeChunks(p->pFirst); 267 p->pFirst = 0; 268 }else{ 269 i64 iOff = p->nChunkSize; 270 for(pIter=p->pFirst; ALWAYS(pIter) && iOff<size; pIter=pIter->pNext){ 271 iOff += p->nChunkSize; 272 } 273 if( ALWAYS(pIter) ){ 274 memjrnlFreeChunks(pIter->pNext); 275 pIter->pNext = 0; 276 } 277 } 278 279 p->endpoint.pChunk = pIter; 280 p->endpoint.iOffset = size; 281 p->readpoint.pChunk = 0; 282 p->readpoint.iOffset = 0; 283 } 284 return SQLITE_OK; 285 } 286 287 /* 288 ** Close the file. 289 */ 290 static int memjrnlClose(sqlite3_file *pJfd){ 291 MemJournal *p = (MemJournal *)pJfd; 292 memjrnlFreeChunks(p->pFirst); 293 return SQLITE_OK; 294 } 295 296 /* 297 ** Sync the file. 298 ** 299 ** If the real file has been created, call its xSync method. Otherwise, 300 ** syncing an in-memory journal is a no-op. 301 */ 302 static int memjrnlSync(sqlite3_file *pJfd, int flags){ 303 UNUSED_PARAMETER2(pJfd, flags); 304 return SQLITE_OK; 305 } 306 307 /* 308 ** Query the size of the file in bytes. 309 */ 310 static int memjrnlFileSize(sqlite3_file *pJfd, sqlite_int64 *pSize){ 311 MemJournal *p = (MemJournal *)pJfd; 312 *pSize = (sqlite_int64) p->endpoint.iOffset; 313 return SQLITE_OK; 314 } 315 316 /* 317 ** Table of methods for MemJournal sqlite3_file object. 318 */ 319 static const struct sqlite3_io_methods MemJournalMethods = { 320 1, /* iVersion */ 321 memjrnlClose, /* xClose */ 322 memjrnlRead, /* xRead */ 323 memjrnlWrite, /* xWrite */ 324 memjrnlTruncate, /* xTruncate */ 325 memjrnlSync, /* xSync */ 326 memjrnlFileSize, /* xFileSize */ 327 0, /* xLock */ 328 0, /* xUnlock */ 329 0, /* xCheckReservedLock */ 330 0, /* xFileControl */ 331 0, /* xSectorSize */ 332 0, /* xDeviceCharacteristics */ 333 0, /* xShmMap */ 334 0, /* xShmLock */ 335 0, /* xShmBarrier */ 336 0, /* xShmUnmap */ 337 0, /* xFetch */ 338 0 /* xUnfetch */ 339 }; 340 341 /* 342 ** Open a journal file. 343 ** 344 ** The behaviour of the journal file depends on the value of parameter 345 ** nSpill. If nSpill is 0, then the journal file is always create and 346 ** accessed using the underlying VFS. If nSpill is less than zero, then 347 ** all content is always stored in main-memory. Finally, if nSpill is a 348 ** positive value, then the journal file is initially created in-memory 349 ** but may be flushed to disk later on. In this case the journal file is 350 ** flushed to disk either when it grows larger than nSpill bytes in size, 351 ** or when sqlite3JournalCreate() is called. 352 */ 353 int sqlite3JournalOpen( 354 sqlite3_vfs *pVfs, /* The VFS to use for actual file I/O */ 355 const char *zName, /* Name of the journal file */ 356 sqlite3_file *pJfd, /* Preallocated, blank file handle */ 357 int flags, /* Opening flags */ 358 int nSpill /* Bytes buffered before opening the file */ 359 ){ 360 MemJournal *p = (MemJournal*)pJfd; 361 362 /* Zero the file-handle object. If nSpill was passed zero, initialize 363 ** it using the sqlite3OsOpen() function of the underlying VFS. In this 364 ** case none of the code in this module is executed as a result of calls 365 ** made on the journal file-handle. */ 366 memset(p, 0, sizeof(MemJournal)); 367 if( nSpill==0 ){ 368 return sqlite3OsOpen(pVfs, zName, pJfd, flags, 0); 369 } 370 371 if( nSpill>0 ){ 372 p->nChunkSize = nSpill; 373 }else{ 374 p->nChunkSize = 8 + MEMJOURNAL_DFLT_FILECHUNKSIZE - sizeof(FileChunk); 375 assert( MEMJOURNAL_DFLT_FILECHUNKSIZE==fileChunkSize(p->nChunkSize) ); 376 } 377 378 pJfd->pMethods = (const sqlite3_io_methods*)&MemJournalMethods; 379 p->nSpill = nSpill; 380 p->flags = flags; 381 p->zJournal = zName; 382 p->pVfs = pVfs; 383 return SQLITE_OK; 384 } 385 386 /* 387 ** Open an in-memory journal file. 388 */ 389 void sqlite3MemJournalOpen(sqlite3_file *pJfd){ 390 sqlite3JournalOpen(0, 0, pJfd, 0, -1); 391 } 392 393 #if defined(SQLITE_ENABLE_ATOMIC_WRITE) \ 394 || defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE) 395 /* 396 ** If the argument p points to a MemJournal structure that is not an 397 ** in-memory-only journal file (i.e. is one that was opened with a +ve 398 ** nSpill parameter or as SQLITE_OPEN_MAIN_JOURNAL), and the underlying 399 ** file has not yet been created, create it now. 400 */ 401 int sqlite3JournalCreate(sqlite3_file *pJfd){ 402 int rc = SQLITE_OK; 403 MemJournal *p = (MemJournal*)pJfd; 404 if( pJfd->pMethods==&MemJournalMethods && ( 405 #ifdef SQLITE_ENABLE_ATOMIC_WRITE 406 p->nSpill>0 407 #else 408 /* While this appears to not be possible without ATOMIC_WRITE, the 409 ** paths are complex, so it seems prudent to leave the test in as 410 ** a NEVER(), in case our analysis is subtly flawed. */ 411 NEVER(p->nSpill>0) 412 #endif 413 #ifdef SQLITE_ENABLE_BATCH_ATOMIC_WRITE 414 || (p->flags & SQLITE_OPEN_MAIN_JOURNAL) 415 #endif 416 )){ 417 rc = memjrnlCreateFile(p); 418 } 419 return rc; 420 } 421 #endif 422 423 /* 424 ** The file-handle passed as the only argument is open on a journal file. 425 ** Return true if this "journal file" is currently stored in heap memory, 426 ** or false otherwise. 427 */ 428 int sqlite3JournalIsInMemory(sqlite3_file *p){ 429 return p->pMethods==&MemJournalMethods; 430 } 431 432 /* 433 ** Return the number of bytes required to store a JournalFile that uses vfs 434 ** pVfs to create the underlying on-disk files. 435 */ 436 int sqlite3JournalSize(sqlite3_vfs *pVfs){ 437 return MAX(pVfs->szOsFile, (int)sizeof(MemJournal)); 438 } 439