1 /* 2 ** 2008 October 7 3 ** 4 ** The author disclaims copyright to this source code. In place of 5 ** a legal notice, here is a blessing: 6 ** 7 ** May you do good and not evil. 8 ** May you find forgiveness for yourself and forgive others. 9 ** May you share freely, never taking more than you give. 10 ** 11 ************************************************************************* 12 ** 13 ** This file contains code use to implement an in-memory rollback journal. 14 ** The in-memory rollback journal is used to journal transactions for 15 ** ":memory:" databases and when the journal_mode=MEMORY pragma is used. 16 ** 17 ** Update: The in-memory journal is also used to temporarily cache 18 ** smaller journals that are not critical for power-loss recovery. 19 ** For example, statement journals that are not too big will be held 20 ** entirely in memory, thus reducing the number of file I/O calls, and 21 ** more importantly, reducing temporary file creation events. If these 22 ** journals become too large for memory, they are spilled to disk. But 23 ** in the common case, they are usually small and no file I/O needs to 24 ** occur. 25 */ 26 #include "sqliteInt.h" 27 28 /* Forward references to internal structures */ 29 typedef struct MemJournal MemJournal; 30 typedef struct FilePoint FilePoint; 31 typedef struct FileChunk FileChunk; 32 33 /* 34 ** The rollback journal is composed of a linked list of these structures. 35 ** 36 ** The zChunk array is always at least 8 bytes in size - usually much more. 37 ** Its actual size is stored in the MemJournal.nChunkSize variable. 38 */ 39 struct FileChunk { 40 FileChunk *pNext; /* Next chunk in the journal */ 41 u8 zChunk[8]; /* Content of this chunk */ 42 }; 43 44 /* 45 ** By default, allocate this many bytes of memory for each FileChunk object. 46 */ 47 #define MEMJOURNAL_DFLT_FILECHUNKSIZE 1024 48 49 /* 50 ** For chunk size nChunkSize, return the number of bytes that should 51 ** be allocated for each FileChunk structure. 52 */ 53 #define fileChunkSize(nChunkSize) (sizeof(FileChunk) + ((nChunkSize)-8)) 54 55 /* 56 ** An instance of this object serves as a cursor into the rollback journal. 57 ** The cursor can be either for reading or writing. 58 */ 59 struct FilePoint { 60 sqlite3_int64 iOffset; /* Offset from the beginning of the file */ 61 FileChunk *pChunk; /* Specific chunk into which cursor points */ 62 }; 63 64 /* 65 ** This structure is a subclass of sqlite3_file. Each open memory-journal 66 ** is an instance of this class. 67 */ 68 struct MemJournal { 69 const sqlite3_io_methods *pMethod; /* Parent class. MUST BE FIRST */ 70 int nChunkSize; /* In-memory chunk-size */ 71 72 int nSpill; /* Bytes of data before flushing */ 73 int nSize; /* Bytes of data currently in memory */ 74 FileChunk *pFirst; /* Head of in-memory chunk-list */ 75 FilePoint endpoint; /* Pointer to the end of the file */ 76 FilePoint readpoint; /* Pointer to the end of the last xRead() */ 77 78 int flags; /* xOpen flags */ 79 sqlite3_vfs *pVfs; /* The "real" underlying VFS */ 80 const char *zJournal; /* Name of the journal file */ 81 }; 82 83 /* 84 ** Read data from the in-memory journal file. This is the implementation 85 ** of the sqlite3_vfs.xRead method. 86 */ 87 static int memjrnlRead( 88 sqlite3_file *pJfd, /* The journal file from which to read */ 89 void *zBuf, /* Put the results here */ 90 int iAmt, /* Number of bytes to read */ 91 sqlite_int64 iOfst /* Begin reading at this offset */ 92 ){ 93 MemJournal *p = (MemJournal *)pJfd; 94 u8 *zOut = zBuf; 95 int nRead = iAmt; 96 int iChunkOffset; 97 FileChunk *pChunk; 98 99 #ifdef SQLITE_ENABLE_ATOMIC_WRITE 100 if( (iAmt+iOfst)>p->endpoint.iOffset ){ 101 return SQLITE_IOERR_SHORT_READ; 102 } 103 #endif 104 105 assert( (iAmt+iOfst)<=p->endpoint.iOffset ); 106 assert( p->readpoint.iOffset==0 || p->readpoint.pChunk!=0 ); 107 if( p->readpoint.iOffset!=iOfst || iOfst==0 ){ 108 sqlite3_int64 iOff = 0; 109 for(pChunk=p->pFirst; 110 ALWAYS(pChunk) && (iOff+p->nChunkSize)<=iOfst; 111 pChunk=pChunk->pNext 112 ){ 113 iOff += p->nChunkSize; 114 } 115 }else{ 116 pChunk = p->readpoint.pChunk; 117 assert( pChunk!=0 ); 118 } 119 120 iChunkOffset = (int)(iOfst%p->nChunkSize); 121 do { 122 int iSpace = p->nChunkSize - iChunkOffset; 123 int nCopy = MIN(nRead, (p->nChunkSize - iChunkOffset)); 124 memcpy(zOut, (u8*)pChunk->zChunk + iChunkOffset, nCopy); 125 zOut += nCopy; 126 nRead -= iSpace; 127 iChunkOffset = 0; 128 } while( nRead>=0 && (pChunk=pChunk->pNext)!=0 && nRead>0 ); 129 p->readpoint.iOffset = pChunk ? iOfst+iAmt : 0; 130 p->readpoint.pChunk = pChunk; 131 132 return SQLITE_OK; 133 } 134 135 /* 136 ** Free the list of FileChunk structures headed at MemJournal.pFirst. 137 */ 138 static void memjrnlFreeChunks(MemJournal *p){ 139 FileChunk *pIter; 140 FileChunk *pNext; 141 for(pIter=p->pFirst; pIter; pIter=pNext){ 142 pNext = pIter->pNext; 143 sqlite3_free(pIter); 144 } 145 p->pFirst = 0; 146 } 147 148 /* 149 ** Flush the contents of memory to a real file on disk. 150 */ 151 static int memjrnlCreateFile(MemJournal *p){ 152 int rc; 153 sqlite3_file *pReal = (sqlite3_file*)p; 154 MemJournal copy = *p; 155 156 memset(p, 0, sizeof(MemJournal)); 157 rc = sqlite3OsOpen(copy.pVfs, copy.zJournal, pReal, copy.flags, 0); 158 if( rc==SQLITE_OK ){ 159 int nChunk = copy.nChunkSize; 160 i64 iOff = 0; 161 FileChunk *pIter; 162 for(pIter=copy.pFirst; pIter; pIter=pIter->pNext){ 163 if( iOff + nChunk > copy.endpoint.iOffset ){ 164 nChunk = copy.endpoint.iOffset - iOff; 165 } 166 rc = sqlite3OsWrite(pReal, (u8*)pIter->zChunk, nChunk, iOff); 167 if( rc ) break; 168 iOff += nChunk; 169 } 170 if( rc==SQLITE_OK ){ 171 /* No error has occurred. Free the in-memory buffers. */ 172 memjrnlFreeChunks(©); 173 } 174 } 175 if( rc!=SQLITE_OK ){ 176 /* If an error occurred while creating or writing to the file, restore 177 ** the original before returning. This way, SQLite uses the in-memory 178 ** journal data to roll back changes made to the internal page-cache 179 ** before this function was called. */ 180 sqlite3OsClose(pReal); 181 *p = copy; 182 } 183 return rc; 184 } 185 186 187 /* 188 ** Write data to the file. 189 */ 190 static int memjrnlWrite( 191 sqlite3_file *pJfd, /* The journal file into which to write */ 192 const void *zBuf, /* Take data to be written from here */ 193 int iAmt, /* Number of bytes to write */ 194 sqlite_int64 iOfst /* Begin writing at this offset into the file */ 195 ){ 196 MemJournal *p = (MemJournal *)pJfd; 197 int nWrite = iAmt; 198 u8 *zWrite = (u8 *)zBuf; 199 200 /* If the file should be created now, create it and write the new data 201 ** into the file on disk. */ 202 if( p->nSpill>0 && (iAmt+iOfst)>p->nSpill ){ 203 int rc = memjrnlCreateFile(p); 204 if( rc==SQLITE_OK ){ 205 rc = sqlite3OsWrite(pJfd, zBuf, iAmt, iOfst); 206 } 207 return rc; 208 } 209 210 /* If the contents of this write should be stored in memory */ 211 else{ 212 /* An in-memory journal file should only ever be appended to. Random 213 ** access writes are not required. The only exception to this is when 214 ** the in-memory journal is being used by a connection using the 215 ** atomic-write optimization. In this case the first 28 bytes of the 216 ** journal file may be written as part of committing the transaction. */ 217 assert( iOfst==p->endpoint.iOffset || iOfst==0 ); 218 #ifdef SQLITE_ENABLE_ATOMIC_WRITE 219 if( iOfst==0 && p->pFirst ){ 220 assert( p->nChunkSize>iAmt ); 221 memcpy((u8*)p->pFirst->zChunk, zBuf, iAmt); 222 }else 223 #else 224 assert( iOfst>0 || p->pFirst==0 ); 225 #endif 226 { 227 while( nWrite>0 ){ 228 FileChunk *pChunk = p->endpoint.pChunk; 229 int iChunkOffset = (int)(p->endpoint.iOffset%p->nChunkSize); 230 int iSpace = MIN(nWrite, p->nChunkSize - iChunkOffset); 231 232 if( iChunkOffset==0 ){ 233 /* New chunk is required to extend the file. */ 234 FileChunk *pNew = sqlite3_malloc(fileChunkSize(p->nChunkSize)); 235 if( !pNew ){ 236 return SQLITE_IOERR_NOMEM_BKPT; 237 } 238 pNew->pNext = 0; 239 if( pChunk ){ 240 assert( p->pFirst ); 241 pChunk->pNext = pNew; 242 }else{ 243 assert( !p->pFirst ); 244 p->pFirst = pNew; 245 } 246 p->endpoint.pChunk = pNew; 247 } 248 249 memcpy((u8*)p->endpoint.pChunk->zChunk + iChunkOffset, zWrite, iSpace); 250 zWrite += iSpace; 251 nWrite -= iSpace; 252 p->endpoint.iOffset += iSpace; 253 } 254 p->nSize = iAmt + iOfst; 255 } 256 } 257 258 return SQLITE_OK; 259 } 260 261 /* 262 ** Truncate the file. 263 ** 264 ** If the journal file is already on disk, truncate it there. Or, if it 265 ** is still in main memory but is being truncated to zero bytes in size, 266 ** ignore 267 */ 268 static int memjrnlTruncate(sqlite3_file *pJfd, sqlite_int64 size){ 269 MemJournal *p = (MemJournal *)pJfd; 270 if( ALWAYS(size==0) ){ 271 memjrnlFreeChunks(p); 272 p->nSize = 0; 273 p->endpoint.pChunk = 0; 274 p->endpoint.iOffset = 0; 275 p->readpoint.pChunk = 0; 276 p->readpoint.iOffset = 0; 277 } 278 return SQLITE_OK; 279 } 280 281 /* 282 ** Close the file. 283 */ 284 static int memjrnlClose(sqlite3_file *pJfd){ 285 MemJournal *p = (MemJournal *)pJfd; 286 memjrnlFreeChunks(p); 287 return SQLITE_OK; 288 } 289 290 /* 291 ** Sync the file. 292 ** 293 ** If the real file has been created, call its xSync method. Otherwise, 294 ** syncing an in-memory journal is a no-op. 295 */ 296 static int memjrnlSync(sqlite3_file *pJfd, int flags){ 297 UNUSED_PARAMETER2(pJfd, flags); 298 return SQLITE_OK; 299 } 300 301 /* 302 ** Query the size of the file in bytes. 303 */ 304 static int memjrnlFileSize(sqlite3_file *pJfd, sqlite_int64 *pSize){ 305 MemJournal *p = (MemJournal *)pJfd; 306 *pSize = (sqlite_int64) p->endpoint.iOffset; 307 return SQLITE_OK; 308 } 309 310 /* 311 ** Table of methods for MemJournal sqlite3_file object. 312 */ 313 static const struct sqlite3_io_methods MemJournalMethods = { 314 1, /* iVersion */ 315 memjrnlClose, /* xClose */ 316 memjrnlRead, /* xRead */ 317 memjrnlWrite, /* xWrite */ 318 memjrnlTruncate, /* xTruncate */ 319 memjrnlSync, /* xSync */ 320 memjrnlFileSize, /* xFileSize */ 321 0, /* xLock */ 322 0, /* xUnlock */ 323 0, /* xCheckReservedLock */ 324 0, /* xFileControl */ 325 0, /* xSectorSize */ 326 0, /* xDeviceCharacteristics */ 327 0, /* xShmMap */ 328 0, /* xShmLock */ 329 0, /* xShmBarrier */ 330 0, /* xShmUnmap */ 331 0, /* xFetch */ 332 0 /* xUnfetch */ 333 }; 334 335 /* 336 ** Open a journal file. 337 ** 338 ** The behaviour of the journal file depends on the value of parameter 339 ** nSpill. If nSpill is 0, then the journal file is always create and 340 ** accessed using the underlying VFS. If nSpill is less than zero, then 341 ** all content is always stored in main-memory. Finally, if nSpill is a 342 ** positive value, then the journal file is initially created in-memory 343 ** but may be flushed to disk later on. In this case the journal file is 344 ** flushed to disk either when it grows larger than nSpill bytes in size, 345 ** or when sqlite3JournalCreate() is called. 346 */ 347 int sqlite3JournalOpen( 348 sqlite3_vfs *pVfs, /* The VFS to use for actual file I/O */ 349 const char *zName, /* Name of the journal file */ 350 sqlite3_file *pJfd, /* Preallocated, blank file handle */ 351 int flags, /* Opening flags */ 352 int nSpill /* Bytes buffered before opening the file */ 353 ){ 354 MemJournal *p = (MemJournal*)pJfd; 355 356 /* Zero the file-handle object. If nSpill was passed zero, initialize 357 ** it using the sqlite3OsOpen() function of the underlying VFS. In this 358 ** case none of the code in this module is executed as a result of calls 359 ** made on the journal file-handle. */ 360 memset(p, 0, sizeof(MemJournal)); 361 if( nSpill==0 ){ 362 return sqlite3OsOpen(pVfs, zName, pJfd, flags, 0); 363 } 364 365 if( nSpill>0 ){ 366 p->nChunkSize = nSpill; 367 }else{ 368 p->nChunkSize = 8 + MEMJOURNAL_DFLT_FILECHUNKSIZE - sizeof(FileChunk); 369 assert( MEMJOURNAL_DFLT_FILECHUNKSIZE==fileChunkSize(p->nChunkSize) ); 370 } 371 372 p->pMethod = (const sqlite3_io_methods*)&MemJournalMethods; 373 p->nSpill = nSpill; 374 p->flags = flags; 375 p->zJournal = zName; 376 p->pVfs = pVfs; 377 return SQLITE_OK; 378 } 379 380 /* 381 ** Open an in-memory journal file. 382 */ 383 void sqlite3MemJournalOpen(sqlite3_file *pJfd){ 384 sqlite3JournalOpen(0, 0, pJfd, 0, -1); 385 } 386 387 #ifdef SQLITE_ENABLE_ATOMIC_WRITE 388 /* 389 ** If the argument p points to a MemJournal structure that is not an 390 ** in-memory-only journal file (i.e. is one that was opened with a +ve 391 ** nSpill parameter), and the underlying file has not yet been created, 392 ** create it now. 393 */ 394 int sqlite3JournalCreate(sqlite3_file *p){ 395 int rc = SQLITE_OK; 396 if( p->pMethods==&MemJournalMethods && ((MemJournal*)p)->nSpill>0 ){ 397 rc = memjrnlCreateFile((MemJournal*)p); 398 } 399 return rc; 400 } 401 #endif 402 403 /* 404 ** The file-handle passed as the only argument is open on a journal file. 405 ** Return true if this "journal file" is currently stored in heap memory, 406 ** or false otherwise. 407 */ 408 int sqlite3JournalIsInMemory(sqlite3_file *p){ 409 return p->pMethods==&MemJournalMethods; 410 } 411 412 /* 413 ** Return the number of bytes required to store a JournalFile that uses vfs 414 ** pVfs to create the underlying on-disk files. 415 */ 416 int sqlite3JournalSize(sqlite3_vfs *pVfs){ 417 return MAX(pVfs->szOsFile, (int)sizeof(MemJournal)); 418 } 419