xref: /sqlite-3.40.0/src/memjournal.c (revision dedd51ae)
1 /*
2 ** 2008 October 7
3 **
4 ** The author disclaims copyright to this source code.  In place of
5 ** a legal notice, here is a blessing:
6 **
7 **    May you do good and not evil.
8 **    May you find forgiveness for yourself and forgive others.
9 **    May you share freely, never taking more than you give.
10 **
11 *************************************************************************
12 **
13 ** This file contains code use to implement an in-memory rollback journal.
14 ** The in-memory rollback journal is used to journal transactions for
15 ** ":memory:" databases and when the journal_mode=MEMORY pragma is used.
16 **
17 ** Update:  The in-memory journal is also used to temporarily cache
18 ** smaller journals that are not critical for power-loss recovery.
19 ** For example, statement journals that are not too big will be held
20 ** entirely in memory, thus reducing the number of file I/O calls, and
21 ** more importantly, reducing temporary file creation events.  If these
22 ** journals become too large for memory, they are spilled to disk.  But
23 ** in the common case, they are usually small and no file I/O needs to
24 ** occur.
25 */
26 #include "sqliteInt.h"
27 
28 /* Forward references to internal structures */
29 typedef struct MemJournal MemJournal;
30 typedef struct FilePoint FilePoint;
31 typedef struct FileChunk FileChunk;
32 
33 /*
34 ** The rollback journal is composed of a linked list of these structures.
35 **
36 ** The zChunk array is always at least 8 bytes in size - usually much more.
37 ** Its actual size is stored in the MemJournal.nChunkSize variable.
38 */
39 struct FileChunk {
40   FileChunk *pNext;               /* Next chunk in the journal */
41   u8 zChunk[8];                   /* Content of this chunk */
42 };
43 
44 /*
45 ** By default, allocate this many bytes of memory for each FileChunk object.
46 */
47 #define MEMJOURNAL_DFLT_FILECHUNKSIZE 1024
48 
49 /*
50 ** For chunk size nChunkSize, return the number of bytes that should
51 ** be allocated for each FileChunk structure.
52 */
53 #define fileChunkSize(nChunkSize) (sizeof(FileChunk) + ((nChunkSize)-8))
54 
55 /*
56 ** An instance of this object serves as a cursor into the rollback journal.
57 ** The cursor can be either for reading or writing.
58 */
59 struct FilePoint {
60   sqlite3_int64 iOffset;          /* Offset from the beginning of the file */
61   FileChunk *pChunk;              /* Specific chunk into which cursor points */
62 };
63 
64 /*
65 ** This structure is a subclass of sqlite3_file. Each open memory-journal
66 ** is an instance of this class.
67 */
68 struct MemJournal {
69   const sqlite3_io_methods *pMethod; /* Parent class. MUST BE FIRST */
70   int nChunkSize;                 /* In-memory chunk-size */
71 
72   int nSpill;                     /* Bytes of data before flushing */
73   FileChunk *pFirst;              /* Head of in-memory chunk-list */
74   FilePoint endpoint;             /* Pointer to the end of the file */
75   FilePoint readpoint;            /* Pointer to the end of the last xRead() */
76 
77   int flags;                      /* xOpen flags */
78   sqlite3_vfs *pVfs;              /* The "real" underlying VFS */
79   const char *zJournal;           /* Name of the journal file */
80 };
81 
82 /*
83 ** Read data from the in-memory journal file.  This is the implementation
84 ** of the sqlite3_vfs.xRead method.
85 */
86 static int memjrnlRead(
87   sqlite3_file *pJfd,    /* The journal file from which to read */
88   void *zBuf,            /* Put the results here */
89   int iAmt,              /* Number of bytes to read */
90   sqlite_int64 iOfst     /* Begin reading at this offset */
91 ){
92   MemJournal *p = (MemJournal *)pJfd;
93   u8 *zOut = zBuf;
94   int nRead = iAmt;
95   int iChunkOffset;
96   FileChunk *pChunk;
97 
98   if( (iAmt+iOfst)>p->endpoint.iOffset ){
99     return SQLITE_IOERR_SHORT_READ;
100   }
101   assert( p->readpoint.iOffset==0 || p->readpoint.pChunk!=0 );
102   if( p->readpoint.iOffset!=iOfst || iOfst==0 ){
103     sqlite3_int64 iOff = 0;
104     for(pChunk=p->pFirst;
105         ALWAYS(pChunk) && (iOff+p->nChunkSize)<=iOfst;
106         pChunk=pChunk->pNext
107     ){
108       iOff += p->nChunkSize;
109     }
110   }else{
111     pChunk = p->readpoint.pChunk;
112     assert( pChunk!=0 );
113   }
114 
115   iChunkOffset = (int)(iOfst%p->nChunkSize);
116   do {
117     int iSpace = p->nChunkSize - iChunkOffset;
118     int nCopy = MIN(nRead, (p->nChunkSize - iChunkOffset));
119     memcpy(zOut, (u8*)pChunk->zChunk + iChunkOffset, nCopy);
120     zOut += nCopy;
121     nRead -= iSpace;
122     iChunkOffset = 0;
123   } while( nRead>=0 && (pChunk=pChunk->pNext)!=0 && nRead>0 );
124   p->readpoint.iOffset = pChunk ? iOfst+iAmt : 0;
125   p->readpoint.pChunk = pChunk;
126 
127   return SQLITE_OK;
128 }
129 
130 /*
131 ** Free the list of FileChunk structures headed at MemJournal.pFirst.
132 */
133 static void memjrnlFreeChunks(FileChunk *pFirst){
134   FileChunk *pIter;
135   FileChunk *pNext;
136   for(pIter=pFirst; pIter; pIter=pNext){
137     pNext = pIter->pNext;
138     sqlite3_free(pIter);
139   }
140 }
141 
142 /*
143 ** Flush the contents of memory to a real file on disk.
144 */
145 static int memjrnlCreateFile(MemJournal *p){
146   int rc;
147   sqlite3_file *pReal = (sqlite3_file*)p;
148   MemJournal copy = *p;
149 
150   memset(p, 0, sizeof(MemJournal));
151   rc = sqlite3OsOpen(copy.pVfs, copy.zJournal, pReal, copy.flags, 0);
152   if( rc==SQLITE_OK ){
153     int nChunk = copy.nChunkSize;
154     i64 iOff = 0;
155     FileChunk *pIter;
156     for(pIter=copy.pFirst; pIter; pIter=pIter->pNext){
157       if( iOff + nChunk > copy.endpoint.iOffset ){
158         nChunk = copy.endpoint.iOffset - iOff;
159       }
160       rc = sqlite3OsWrite(pReal, (u8*)pIter->zChunk, nChunk, iOff);
161       if( rc ) break;
162       iOff += nChunk;
163     }
164     if( rc==SQLITE_OK ){
165       /* No error has occurred. Free the in-memory buffers. */
166       memjrnlFreeChunks(copy.pFirst);
167     }
168   }
169   if( rc!=SQLITE_OK ){
170     /* If an error occurred while creating or writing to the file, restore
171     ** the original before returning. This way, SQLite uses the in-memory
172     ** journal data to roll back changes made to the internal page-cache
173     ** before this function was called.  */
174     sqlite3OsClose(pReal);
175     *p = copy;
176   }
177   return rc;
178 }
179 
180 
181 /* Forward reference */
182 static int memjrnlTruncate(sqlite3_file *pJfd, sqlite_int64 size);
183 
184 /*
185 ** Write data to the file.
186 */
187 static int memjrnlWrite(
188   sqlite3_file *pJfd,    /* The journal file into which to write */
189   const void *zBuf,      /* Take data to be written from here */
190   int iAmt,              /* Number of bytes to write */
191   sqlite_int64 iOfst     /* Begin writing at this offset into the file */
192 ){
193   MemJournal *p = (MemJournal *)pJfd;
194   int nWrite = iAmt;
195   u8 *zWrite = (u8 *)zBuf;
196 
197   /* If the file should be created now, create it and write the new data
198   ** into the file on disk. */
199   if( p->nSpill>0 && (iAmt+iOfst)>p->nSpill ){
200     int rc = memjrnlCreateFile(p);
201     if( rc==SQLITE_OK ){
202       rc = sqlite3OsWrite(pJfd, zBuf, iAmt, iOfst);
203     }
204     return rc;
205   }
206 
207   /* If the contents of this write should be stored in memory */
208   else{
209     /* An in-memory journal file should only ever be appended to. Random
210     ** access writes are not required. The only exception to this is when
211     ** the in-memory journal is being used by a connection using the
212     ** atomic-write optimization. In this case the first 28 bytes of the
213     ** journal file may be written as part of committing the transaction. */
214     assert( iOfst<=p->endpoint.iOffset );
215     if( iOfst>0 && iOfst!=p->endpoint.iOffset ){
216       memjrnlTruncate(pJfd, iOfst);
217     }
218     if( iOfst==0 && p->pFirst ){
219       assert( p->nChunkSize>iAmt );
220       memcpy((u8*)p->pFirst->zChunk, zBuf, iAmt);
221     }else{
222       while( nWrite>0 ){
223         FileChunk *pChunk = p->endpoint.pChunk;
224         int iChunkOffset = (int)(p->endpoint.iOffset%p->nChunkSize);
225         int iSpace = MIN(nWrite, p->nChunkSize - iChunkOffset);
226 
227         assert( pChunk!=0 || iChunkOffset==0 );
228         if( iChunkOffset==0 ){
229           /* New chunk is required to extend the file. */
230           FileChunk *pNew = sqlite3_malloc(fileChunkSize(p->nChunkSize));
231           if( !pNew ){
232             return SQLITE_IOERR_NOMEM_BKPT;
233           }
234           pNew->pNext = 0;
235           if( pChunk ){
236             assert( p->pFirst );
237             pChunk->pNext = pNew;
238           }else{
239             assert( !p->pFirst );
240             p->pFirst = pNew;
241           }
242           pChunk = p->endpoint.pChunk = pNew;
243         }
244 
245         assert( pChunk!=0 );
246         memcpy((u8*)pChunk->zChunk + iChunkOffset, zWrite, iSpace);
247         zWrite += iSpace;
248         nWrite -= iSpace;
249         p->endpoint.iOffset += iSpace;
250       }
251     }
252   }
253 
254   return SQLITE_OK;
255 }
256 
257 /*
258 ** Truncate the in-memory file.
259 */
260 static int memjrnlTruncate(sqlite3_file *pJfd, sqlite_int64 size){
261   MemJournal *p = (MemJournal *)pJfd;
262   assert( p->endpoint.pChunk==0 || p->endpoint.pChunk->pNext==0 );
263   if( size<p->endpoint.iOffset ){
264     FileChunk *pIter = 0;
265     if( size==0 ){
266       memjrnlFreeChunks(p->pFirst);
267       p->pFirst = 0;
268     }else{
269       i64 iOff = p->nChunkSize;
270       for(pIter=p->pFirst; ALWAYS(pIter) && iOff<size; pIter=pIter->pNext){
271         iOff += p->nChunkSize;
272       }
273       if( ALWAYS(pIter) ){
274         memjrnlFreeChunks(pIter->pNext);
275         pIter->pNext = 0;
276       }
277     }
278 
279     p->endpoint.pChunk = pIter;
280     p->endpoint.iOffset = size;
281     p->readpoint.pChunk = 0;
282     p->readpoint.iOffset = 0;
283   }
284   return SQLITE_OK;
285 }
286 
287 /*
288 ** Close the file.
289 */
290 static int memjrnlClose(sqlite3_file *pJfd){
291   MemJournal *p = (MemJournal *)pJfd;
292   memjrnlFreeChunks(p->pFirst);
293   return SQLITE_OK;
294 }
295 
296 /*
297 ** Sync the file.
298 **
299 ** If the real file has been created, call its xSync method. Otherwise,
300 ** syncing an in-memory journal is a no-op.
301 */
302 static int memjrnlSync(sqlite3_file *pJfd, int flags){
303   UNUSED_PARAMETER2(pJfd, flags);
304   return SQLITE_OK;
305 }
306 
307 /*
308 ** Query the size of the file in bytes.
309 */
310 static int memjrnlFileSize(sqlite3_file *pJfd, sqlite_int64 *pSize){
311   MemJournal *p = (MemJournal *)pJfd;
312   *pSize = (sqlite_int64) p->endpoint.iOffset;
313   return SQLITE_OK;
314 }
315 
316 /*
317 ** Table of methods for MemJournal sqlite3_file object.
318 */
319 static const struct sqlite3_io_methods MemJournalMethods = {
320   1,                /* iVersion */
321   memjrnlClose,     /* xClose */
322   memjrnlRead,      /* xRead */
323   memjrnlWrite,     /* xWrite */
324   memjrnlTruncate,  /* xTruncate */
325   memjrnlSync,      /* xSync */
326   memjrnlFileSize,  /* xFileSize */
327   0,                /* xLock */
328   0,                /* xUnlock */
329   0,                /* xCheckReservedLock */
330   0,                /* xFileControl */
331   0,                /* xSectorSize */
332   0,                /* xDeviceCharacteristics */
333   0,                /* xShmMap */
334   0,                /* xShmLock */
335   0,                /* xShmBarrier */
336   0,                /* xShmUnmap */
337   0,                /* xFetch */
338   0                 /* xUnfetch */
339 };
340 
341 /*
342 ** Open a journal file.
343 **
344 ** The behaviour of the journal file depends on the value of parameter
345 ** nSpill. If nSpill is 0, then the journal file is always create and
346 ** accessed using the underlying VFS. If nSpill is less than zero, then
347 ** all content is always stored in main-memory. Finally, if nSpill is a
348 ** positive value, then the journal file is initially created in-memory
349 ** but may be flushed to disk later on. In this case the journal file is
350 ** flushed to disk either when it grows larger than nSpill bytes in size,
351 ** or when sqlite3JournalCreate() is called.
352 */
353 int sqlite3JournalOpen(
354   sqlite3_vfs *pVfs,         /* The VFS to use for actual file I/O */
355   const char *zName,         /* Name of the journal file */
356   sqlite3_file *pJfd,        /* Preallocated, blank file handle */
357   int flags,                 /* Opening flags */
358   int nSpill                 /* Bytes buffered before opening the file */
359 ){
360   MemJournal *p = (MemJournal*)pJfd;
361 
362   /* Zero the file-handle object. If nSpill was passed zero, initialize
363   ** it using the sqlite3OsOpen() function of the underlying VFS. In this
364   ** case none of the code in this module is executed as a result of calls
365   ** made on the journal file-handle.  */
366   memset(p, 0, sizeof(MemJournal));
367   if( nSpill==0 ){
368     return sqlite3OsOpen(pVfs, zName, pJfd, flags, 0);
369   }
370 
371   if( nSpill>0 ){
372     p->nChunkSize = nSpill;
373   }else{
374     p->nChunkSize = 8 + MEMJOURNAL_DFLT_FILECHUNKSIZE - sizeof(FileChunk);
375     assert( MEMJOURNAL_DFLT_FILECHUNKSIZE==fileChunkSize(p->nChunkSize) );
376   }
377 
378   pJfd->pMethods = (const sqlite3_io_methods*)&MemJournalMethods;
379   p->nSpill = nSpill;
380   p->flags = flags;
381   p->zJournal = zName;
382   p->pVfs = pVfs;
383   return SQLITE_OK;
384 }
385 
386 /*
387 ** Open an in-memory journal file.
388 */
389 void sqlite3MemJournalOpen(sqlite3_file *pJfd){
390   sqlite3JournalOpen(0, 0, pJfd, 0, -1);
391 }
392 
393 #if defined(SQLITE_ENABLE_ATOMIC_WRITE) \
394  || defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE)
395 /*
396 ** If the argument p points to a MemJournal structure that is not an
397 ** in-memory-only journal file (i.e. is one that was opened with a +ve
398 ** nSpill parameter or as SQLITE_OPEN_MAIN_JOURNAL), and the underlying
399 ** file has not yet been created, create it now.
400 */
401 int sqlite3JournalCreate(sqlite3_file *pJfd){
402   int rc = SQLITE_OK;
403   MemJournal *p = (MemJournal*)pJfd;
404   if( pJfd->pMethods==&MemJournalMethods && (
405 #ifdef SQLITE_ENABLE_ATOMIC_WRITE
406      p->nSpill>0
407 #else
408      /* While this appears to not be possible without ATOMIC_WRITE, the
409      ** paths are complex, so it seems prudent to leave the test in as
410      ** a NEVER(), in case our analysis is subtly flawed. */
411      NEVER(p->nSpill>0)
412 #endif
413 #ifdef SQLITE_ENABLE_BATCH_ATOMIC_WRITE
414      || (p->flags & SQLITE_OPEN_MAIN_JOURNAL)
415 #endif
416   )){
417     rc = memjrnlCreateFile(p);
418   }
419   return rc;
420 }
421 #endif
422 
423 /*
424 ** The file-handle passed as the only argument is open on a journal file.
425 ** Return true if this "journal file" is currently stored in heap memory,
426 ** or false otherwise.
427 */
428 int sqlite3JournalIsInMemory(sqlite3_file *p){
429   return p->pMethods==&MemJournalMethods;
430 }
431 
432 /*
433 ** Return the number of bytes required to store a JournalFile that uses vfs
434 ** pVfs to create the underlying on-disk files.
435 */
436 int sqlite3JournalSize(sqlite3_vfs *pVfs){
437   return MAX(pVfs->szOsFile, (int)sizeof(MemJournal));
438 }
439