xref: /sqlite-3.40.0/ext/async/sqlite3async.c (revision debcfd2d)
1 /*
2 ** 2005 December 14
3 **
4 ** The author disclaims copyright to this source code.  In place of
5 ** a legal notice, here is a blessing:
6 **
7 **    May you do good and not evil.
8 **    May you find forgiveness for yourself and forgive others.
9 **    May you share freely, never taking more than you give.
10 **
11 *************************************************************************
12 **
13 ** $Id: sqlite3async.c,v 1.2 2009/04/24 09:27:16 danielk1977 Exp $
14 **
15 ** This file contains the implementation of an asynchronous IO backend
16 ** for SQLite.
17 */
18 
19 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ASYNCIO)
20 
21 #include "sqlite3async.h"
22 
23 #define ENABLE_FILE_LOCKING
24 
25 /* Useful macros used in several places */
26 #define MIN(x,y) ((x)<(y)?(x):(y))
27 #define MAX(x,y) ((x)>(y)?(x):(y))
28 
29 /* Forward references */
30 typedef struct AsyncWrite AsyncWrite;
31 typedef struct AsyncFile AsyncFile;
32 typedef struct AsyncFileData AsyncFileData;
33 typedef struct AsyncFileLock AsyncFileLock;
34 typedef struct AsyncLock AsyncLock;
35 
36 /* Enable for debugging */
37 static int sqlite3async_trace = 0;
38 # define ASYNC_TRACE(X) if( sqlite3async_trace ) asyncTrace X
39 static void asyncTrace(const char *zFormat, ...){
40   char *z;
41   va_list ap;
42   va_start(ap, zFormat);
43   z = sqlite3_vmprintf(zFormat, ap);
44   va_end(ap);
45   fprintf(stderr, "[%d] %s", 0 /* (int)pthread_self() */, z);
46   sqlite3_free(z);
47 }
48 
49 /*
50 ** THREAD SAFETY NOTES
51 **
52 ** Basic rules:
53 **
54 **     * Both read and write access to the global write-op queue must be
55 **       protected by the async.queueMutex. As are the async.ioError and
56 **       async.nFile variables.
57 **
58 **     * The async.pLock list and all AsyncLock and AsyncFileLock
59 **       structures must be protected by the async.lockMutex mutex.
60 **
61 **     * The file handles from the underlying system are not assumed to
62 **       be thread safe.
63 **
64 **     * See the last two paragraphs under "The Writer Thread" for
65 **       an assumption to do with file-handle synchronization by the Os.
66 **
67 ** Deadlock prevention:
68 **
69 **     There are three mutex used by the system: the "writer" mutex,
70 **     the "queue" mutex and the "lock" mutex. Rules are:
71 **
72 **     * It is illegal to block on the writer mutex when any other mutex
73 **       are held, and
74 **
75 **     * It is illegal to block on the queue mutex when the lock mutex
76 **       is held.
77 **
78 **     i.e. mutex's must be grabbed in the order "writer", "queue", "lock".
79 **
80 ** File system operations (invoked by SQLite thread):
81 **
82 **     xOpen
83 **     xDelete
84 **     xFileExists
85 **
86 ** File handle operations (invoked by SQLite thread):
87 **
88 **         asyncWrite, asyncClose, asyncTruncate, asyncSync
89 **
90 **     The operations above add an entry to the global write-op list. They
91 **     prepare the entry, acquire the async.queueMutex momentarily while
92 **     list pointers are  manipulated to insert the new entry, then release
93 **     the mutex and signal the writer thread to wake up in case it happens
94 **     to be asleep.
95 **
96 **
97 **         asyncRead, asyncFileSize.
98 **
99 **     Read operations. Both of these read from both the underlying file
100 **     first then adjust their result based on pending writes in the
101 **     write-op queue.   So async.queueMutex is held for the duration
102 **     of these operations to prevent other threads from changing the
103 **     queue in mid operation.
104 **
105 **
106 **         asyncLock, asyncUnlock, asyncCheckReservedLock
107 **
108 **     These primitives implement in-process locking using a hash table
109 **     on the file name.  Files are locked correctly for connections coming
110 **     from the same process.  But other processes cannot see these locks
111 **     and will therefore not honor them.
112 **
113 **
114 ** The writer thread:
115 **
116 **     The async.writerMutex is used to make sure only there is only
117 **     a single writer thread running at a time.
118 **
119 **     Inside the writer thread is a loop that works like this:
120 **
121 **         WHILE (write-op list is not empty)
122 **             Do IO operation at head of write-op list
123 **             Remove entry from head of write-op list
124 **         END WHILE
125 **
126 **     The async.queueMutex is always held during the <write-op list is
127 **     not empty> test, and when the entry is removed from the head
128 **     of the write-op list. Sometimes it is held for the interim
129 **     period (while the IO is performed), and sometimes it is
130 **     relinquished. It is relinquished if (a) the IO op is an
131 **     ASYNC_CLOSE or (b) when the file handle was opened, two of
132 **     the underlying systems handles were opened on the same
133 **     file-system entry.
134 **
135 **     If condition (b) above is true, then one file-handle
136 **     (AsyncFile.pBaseRead) is used exclusively by sqlite threads to read the
137 **     file, the other (AsyncFile.pBaseWrite) by sqlite3_async_flush()
138 **     threads to perform write() operations. This means that read
139 **     operations are not blocked by asynchronous writes (although
140 **     asynchronous writes may still be blocked by reads).
141 **
142 **     This assumes that the OS keeps two handles open on the same file
143 **     properly in sync. That is, any read operation that starts after a
144 **     write operation on the same file system entry has completed returns
145 **     data consistent with the write. We also assume that if one thread
146 **     reads a file while another is writing it all bytes other than the
147 **     ones actually being written contain valid data.
148 **
149 **     If the above assumptions are not true, set the preprocessor symbol
150 **     SQLITE_ASYNC_TWO_FILEHANDLES to 0.
151 */
152 
153 
154 #ifndef NDEBUG
155 # define TESTONLY( X ) X
156 #else
157 # define TESTONLY( X )
158 #endif
159 
160 /*
161 ** PORTING FUNCTIONS
162 **
163 ** There are two definitions of the following functions. One for pthreads
164 ** compatible systems and one for Win32. These functions isolate the OS
165 ** specific code required by each platform.
166 **
167 ** The system uses three mutexes and a single condition variable. To
168 ** block on a mutex, async_mutex_enter() is called. The parameter passed
169 ** to async_mutex_enter(), which must be one of ASYNC_MUTEX_LOCK,
170 ** ASYNC_MUTEX_QUEUE or ASYNC_MUTEX_WRITER, identifies which of the three
171 ** mutexes to lock. Similarly, to unlock a mutex, async_mutex_leave() is
172 ** called with a parameter identifying the mutex being unlocked. Mutexes
173 ** are not recursive - it is an error to call async_mutex_enter() to
174 ** lock a mutex that is already locked, or to call async_mutex_leave()
175 ** to unlock a mutex that is not currently locked.
176 **
177 ** The async_cond_wait() and async_cond_signal() functions are modelled
178 ** on the pthreads functions with similar names. The first parameter to
179 ** both functions is always ASYNC_COND_QUEUE. When async_cond_wait()
180 ** is called the mutex identified by the second parameter must be held.
181 ** The mutex is unlocked, and the calling thread simultaneously begins
182 ** waiting for the condition variable to be signalled by another thread.
183 ** After another thread signals the condition variable, the calling
184 ** thread stops waiting, locks mutex eMutex and returns. The
185 ** async_cond_signal() function is used to signal the condition variable.
186 ** It is assumed that the mutex used by the thread calling async_cond_wait()
187 ** is held by the caller of async_cond_signal() (otherwise there would be
188 ** a race condition).
189 **
190 ** It is guaranteed that no other thread will call async_cond_wait() when
191 ** there is already a thread waiting on the condition variable.
192 **
193 ** The async_sched_yield() function is called to suggest to the operating
194 ** system that it would be a good time to shift the current thread off the
195 ** CPU. The system will still work if this function is not implemented
196 ** (it is not currently implemented for win32), but it might be marginally
197 ** more efficient if it is.
198 */
199 static void async_mutex_enter(int eMutex);
200 static void async_mutex_leave(int eMutex);
201 static void async_cond_wait(int eCond, int eMutex);
202 static void async_cond_signal(int eCond);
203 static void async_sched_yield(void);
204 
205 /*
206 ** There are also two definitions of the following. async_os_initialize()
207 ** is called when the asynchronous VFS is first installed, and os_shutdown()
208 ** is called when it is uninstalled (from within sqlite3async_shutdown()).
209 **
210 ** For pthreads builds, both of these functions are no-ops. For win32,
211 ** they provide an opportunity to initialize and finalize the required
212 ** mutex and condition variables.
213 **
214 ** If async_os_initialize() returns other than zero, then the initialization
215 ** fails and SQLITE_ERROR is returned to the user.
216 */
217 static int async_os_initialize(void);
218 static void async_os_shutdown(void);
219 
220 /* Values for use as the 'eMutex' argument of the above functions. The
221 ** integer values assigned to these constants are important for assert()
222 ** statements that verify that mutexes are locked in the correct order.
223 ** Specifically, it is unsafe to try to lock mutex N while holding a lock
224 ** on mutex M if (M<=N).
225 */
226 #define ASYNC_MUTEX_LOCK    0
227 #define ASYNC_MUTEX_QUEUE   1
228 #define ASYNC_MUTEX_WRITER  2
229 
230 /* Values for use as the 'eCond' argument of the above functions. */
231 #define ASYNC_COND_QUEUE    0
232 
233 /*************************************************************************
234 ** Start of OS specific code.
235 */
236 #if SQLITE_OS_WIN || defined(_WIN32) || defined(WIN32) || defined(__CYGWIN__) || defined(__MINGW32__) || defined(__BORLANDC__)
237 
238 /* The following block contains the win32 specific code. */
239 
240 #define mutex_held(X) (GetCurrentThreadId()==primitives.aHolder[X])
241 
242 static struct AsyncPrimitives {
243   int isInit;
244   DWORD aHolder[3];
245   CRITICAL_SECTION aMutex[3];
246   HANDLE aCond[1];
247 } primitives = { 0 };
248 
249 static int async_os_initialize(void){
250   if( !primitives.isInit ){
251     primitives.aCond[0] = CreateEvent(NULL, TRUE, FALSE, 0);
252     if( primitives.aCond[0]==NULL ){
253       return 1;
254     }
255     InitializeCriticalSection(&primitives.aMutex[0]);
256     InitializeCriticalSection(&primitives.aMutex[1]);
257     InitializeCriticalSection(&primitives.aMutex[2]);
258     primitives.isInit = 1;
259   }
260   return 0;
261 }
262 static void async_os_shutdown(void){
263   if( primitives.isInit ){
264     DeleteCriticalSection(&primitives.aMutex[0]);
265     DeleteCriticalSection(&primitives.aMutex[1]);
266     DeleteCriticalSection(&primitives.aMutex[2]);
267     CloseHandle(primitives.aCond[0]);
268     primitives.isInit = 0;
269   }
270 }
271 
272 /* The following block contains the Win32 specific code. */
273 static void async_mutex_enter(int eMutex){
274   assert( eMutex==0 || eMutex==1 || eMutex==2 );
275   assert( eMutex!=2 || (!mutex_held(0) && !mutex_held(1) && !mutex_held(2)) );
276   assert( eMutex!=1 || (!mutex_held(0) && !mutex_held(1)) );
277   assert( eMutex!=0 || (!mutex_held(0)) );
278   EnterCriticalSection(&primitives.aMutex[eMutex]);
279   TESTONLY( primitives.aHolder[eMutex] = GetCurrentThreadId(); )
280 }
281 static void async_mutex_leave(int eMutex){
282   assert( eMutex==0 || eMutex==1 || eMutex==2 );
283   assert( mutex_held(eMutex) );
284   TESTONLY( primitives.aHolder[eMutex] = 0; )
285   LeaveCriticalSection(&primitives.aMutex[eMutex]);
286 }
287 static void async_cond_wait(int eCond, int eMutex){
288   ResetEvent(primitives.aCond[eCond]);
289   async_mutex_leave(eMutex);
290   WaitForSingleObject(primitives.aCond[eCond], INFINITE);
291   async_mutex_enter(eMutex);
292 }
293 static void async_cond_signal(int eCond){
294   assert( mutex_held(ASYNC_MUTEX_QUEUE) );
295   SetEvent(primitives.aCond[eCond]);
296 }
297 static void async_sched_yield(void){
298   /* Todo: Find out if win32 offers anything like sched_yield() */
299 }
300 #else
301 
302 /* The following block contains the pthreads specific code. */
303 #include <pthread.h>
304 #include <sched.h>
305 
306 #define mutex_held(X) pthread_equal(primitives.aHolder[X], pthread_self())
307 
308 static int  async_os_initialize(void) {return 0;}
309 static void async_os_shutdown(void) {}
310 
311 static struct AsyncPrimitives {
312   pthread_mutex_t aMutex[3];
313   pthread_cond_t aCond[1];
314   pthread_t aHolder[3];
315 } primitives = {
316   { PTHREAD_MUTEX_INITIALIZER,
317     PTHREAD_MUTEX_INITIALIZER,
318     PTHREAD_MUTEX_INITIALIZER
319   } , {
320     PTHREAD_COND_INITIALIZER
321   } , { 0, 0, 0 }
322 };
323 
324 static void async_mutex_enter(int eMutex){
325   assert( eMutex==0 || eMutex==1 || eMutex==2 );
326   assert( eMutex!=2 || (!mutex_held(0) && !mutex_held(1) && !mutex_held(2)) );
327   assert( eMutex!=1 || (!mutex_held(0) && !mutex_held(1)) );
328   assert( eMutex!=0 || (!mutex_held(0)) );
329   pthread_mutex_lock(&primitives.aMutex[eMutex]);
330   TESTONLY( primitives.aHolder[eMutex] = pthread_self(); )
331 }
332 static void async_mutex_leave(int eMutex){
333   assert( eMutex==0 || eMutex==1 || eMutex==2 );
334   assert( mutex_held(eMutex) );
335   TESTONLY( primitives.aHolder[eMutex] = 0; )
336   pthread_mutex_unlock(&primitives.aMutex[eMutex]);
337 }
338 static void async_cond_wait(int eCond, int eMutex){
339   assert( eMutex==0 || eMutex==1 || eMutex==2 );
340   assert( mutex_held(eMutex) );
341   TESTONLY( primitives.aHolder[eMutex] = 0; )
342   pthread_cond_wait(&primitives.aCond[eCond], &primitives.aMutex[eMutex]);
343   TESTONLY( primitives.aHolder[eMutex] = pthread_self(); )
344 }
345 static void async_cond_signal(int eCond){
346   assert( mutex_held(ASYNC_MUTEX_QUEUE) );
347   pthread_cond_signal(&primitives.aCond[eCond]);
348 }
349 static void async_sched_yield(void){
350   sched_yield();
351 }
352 #endif
353 /*
354 ** End of OS specific code.
355 *************************************************************************/
356 
357 #define assert_mutex_is_held(X) assert( mutex_held(X) )
358 
359 
360 #ifndef SQLITE_ASYNC_TWO_FILEHANDLES
361 /* #define SQLITE_ASYNC_TWO_FILEHANDLES 0 */
362 #define SQLITE_ASYNC_TWO_FILEHANDLES 1
363 #endif
364 
365 /*
366 ** State information is held in the static variable "async" defined
367 ** as the following structure.
368 **
369 ** Both async.ioError and async.nFile are protected by async.queueMutex.
370 */
371 static struct TestAsyncStaticData {
372   AsyncWrite *pQueueFirst;     /* Next write operation to be processed */
373   AsyncWrite *pQueueLast;      /* Last write operation on the list */
374   AsyncLock *pLock;            /* Linked list of all AsyncLock structures */
375   volatile int ioDelay;        /* Extra delay between write operations */
376   volatile int eHalt;          /* One of the SQLITEASYNC_HALT_XXX values */
377   int ioError;                 /* True if an IO error has occurred */
378   int nFile;                   /* Number of open files (from sqlite pov) */
379 } async = { 0,0,0,0,0,0,0 };
380 
381 /* Possible values of AsyncWrite.op */
382 #define ASYNC_NOOP          0
383 #define ASYNC_WRITE         1
384 #define ASYNC_SYNC          2
385 #define ASYNC_TRUNCATE      3
386 #define ASYNC_CLOSE         4
387 #define ASYNC_DELETE        5
388 #define ASYNC_OPENEXCLUSIVE 6
389 #define ASYNC_UNLOCK        7
390 
391 /* Names of opcodes.  Used for debugging only.
392 ** Make sure these stay in sync with the macros above!
393 */
394 static const char *azOpcodeName[] = {
395   "NOOP", "WRITE", "SYNC", "TRUNCATE", "CLOSE", "DELETE", "OPENEX", "UNLOCK"
396 };
397 
398 /*
399 ** Entries on the write-op queue are instances of the AsyncWrite
400 ** structure, defined here.
401 **
402 ** The interpretation of the iOffset and nByte variables varies depending
403 ** on the value of AsyncWrite.op:
404 **
405 ** ASYNC_NOOP:
406 **     No values used.
407 **
408 ** ASYNC_WRITE:
409 **     iOffset -> Offset in file to write to.
410 **     nByte   -> Number of bytes of data to write (pointed to by zBuf).
411 **
412 ** ASYNC_SYNC:
413 **     nByte   -> flags to pass to sqlite3OsSync().
414 **
415 ** ASYNC_TRUNCATE:
416 **     iOffset -> Size to truncate file to.
417 **     nByte   -> Unused.
418 **
419 ** ASYNC_CLOSE:
420 **     iOffset -> Unused.
421 **     nByte   -> Unused.
422 **
423 ** ASYNC_DELETE:
424 **     iOffset -> Contains the "syncDir" flag.
425 **     nByte   -> Number of bytes of zBuf points to (file name).
426 **
427 ** ASYNC_OPENEXCLUSIVE:
428 **     iOffset -> Value of "delflag".
429 **     nByte   -> Number of bytes of zBuf points to (file name).
430 **
431 ** ASYNC_UNLOCK:
432 **     nByte   -> Argument to sqlite3OsUnlock().
433 **
434 **
435 ** For an ASYNC_WRITE operation, zBuf points to the data to write to the file.
436 ** This space is sqlite3_malloc()d along with the AsyncWrite structure in a
437 ** single blob, so is deleted when sqlite3_free() is called on the parent
438 ** structure.
439 */
440 struct AsyncWrite {
441   AsyncFileData *pFileData;    /* File to write data to or sync */
442   int op;                      /* One of ASYNC_xxx etc. */
443   sqlite_int64 iOffset;        /* See above */
444   int nByte;          /* See above */
445   char *zBuf;         /* Data to write to file (or NULL if op!=ASYNC_WRITE) */
446   AsyncWrite *pNext;  /* Next write operation (to any file) */
447 };
448 
449 /*
450 ** An instance of this structure is created for each distinct open file
451 ** (i.e. if two handles are opened on the one file, only one of these
452 ** structures is allocated) and stored in the async.aLock hash table. The
453 ** keys for async.aLock are the full pathnames of the opened files.
454 **
455 ** AsyncLock.pList points to the head of a linked list of AsyncFileLock
456 ** structures, one for each handle currently open on the file.
457 **
458 ** If the opened file is not a main-database (the SQLITE_OPEN_MAIN_DB is
459 ** not passed to the sqlite3OsOpen() call), or if ENABLE_FILE_LOCKING is
460 ** not defined at compile time, variables AsyncLock.pFile and
461 ** AsyncLock.eLock are never used. Otherwise, pFile is a file handle
462 ** opened on the file in question and used to obtain the file-system
463 ** locks required by database connections within this process.
464 **
465 ** See comments above the asyncLock() function for more details on
466 ** the implementation of database locking used by this backend.
467 */
468 struct AsyncLock {
469   char *zFile;
470   int nFile;
471   sqlite3_file *pFile;
472   int eLock;
473   AsyncFileLock *pList;
474   AsyncLock *pNext;           /* Next in linked list headed by async.pLock */
475 };
476 
477 /*
478 ** An instance of the following structure is allocated along with each
479 ** AsyncFileData structure (see AsyncFileData.lock), but is only used if the
480 ** file was opened with the SQLITE_OPEN_MAIN_DB.
481 */
482 struct AsyncFileLock {
483   int eLock;                /* Internally visible lock state (sqlite pov) */
484   int eAsyncLock;           /* Lock-state with write-queue unlock */
485   AsyncFileLock *pNext;
486 };
487 
488 /*
489 ** The AsyncFile structure is a subclass of sqlite3_file used for
490 ** asynchronous IO.
491 **
492 ** All of the actual data for the structure is stored in the structure
493 ** pointed to by AsyncFile.pData, which is allocated as part of the
494 ** sqlite3OsOpen() using sqlite3_malloc(). The reason for this is that the
495 ** lifetime of the AsyncFile structure is ended by the caller after OsClose()
496 ** is called, but the data in AsyncFileData may be required by the
497 ** writer thread after that point.
498 */
499 struct AsyncFile {
500   sqlite3_io_methods *pMethod;
501   AsyncFileData *pData;
502 };
503 struct AsyncFileData {
504   char *zName;               /* Underlying OS filename - used for debugging */
505   int nName;                 /* Number of characters in zName */
506   sqlite3_file *pBaseRead;   /* Read handle to the underlying Os file */
507   sqlite3_file *pBaseWrite;  /* Write handle to the underlying Os file */
508   AsyncFileLock lock;        /* Lock state for this handle */
509   AsyncLock *pLock;          /* AsyncLock object for this file system entry */
510   AsyncWrite closeOp;        /* Preallocated close operation */
511 };
512 
513 /*
514 ** Add an entry to the end of the global write-op list. pWrite should point
515 ** to an AsyncWrite structure allocated using sqlite3_malloc().  The writer
516 ** thread will call sqlite3_free() to free the structure after the specified
517 ** operation has been completed.
518 **
519 ** Once an AsyncWrite structure has been added to the list, it becomes the
520 ** property of the writer thread and must not be read or modified by the
521 ** caller.
522 */
523 static void addAsyncWrite(AsyncWrite *pWrite){
524   /* We must hold the queue mutex in order to modify the queue pointers */
525   if( pWrite->op!=ASYNC_UNLOCK ){
526     async_mutex_enter(ASYNC_MUTEX_QUEUE);
527   }
528 
529   /* Add the record to the end of the write-op queue */
530   assert( !pWrite->pNext );
531   if( async.pQueueLast ){
532     assert( async.pQueueFirst );
533     async.pQueueLast->pNext = pWrite;
534   }else{
535     async.pQueueFirst = pWrite;
536   }
537   async.pQueueLast = pWrite;
538   ASYNC_TRACE(("PUSH %p (%s %s %d)\n", pWrite, azOpcodeName[pWrite->op],
539          pWrite->pFileData ? pWrite->pFileData->zName : "-", pWrite->iOffset));
540 
541   if( pWrite->op==ASYNC_CLOSE ){
542     async.nFile--;
543   }
544 
545   /* The writer thread might have been idle because there was nothing
546   ** on the write-op queue for it to do.  So wake it up. */
547   async_cond_signal(ASYNC_COND_QUEUE);
548 
549   /* Drop the queue mutex */
550   if( pWrite->op!=ASYNC_UNLOCK ){
551     async_mutex_leave(ASYNC_MUTEX_QUEUE);
552   }
553 }
554 
555 /*
556 ** Increment async.nFile in a thread-safe manner.
557 */
558 static void incrOpenFileCount(void){
559   /* We must hold the queue mutex in order to modify async.nFile */
560   async_mutex_enter(ASYNC_MUTEX_QUEUE);
561   if( async.nFile==0 ){
562     async.ioError = SQLITE_OK;
563   }
564   async.nFile++;
565   async_mutex_leave(ASYNC_MUTEX_QUEUE);
566 }
567 
568 /*
569 ** This is a utility function to allocate and populate a new AsyncWrite
570 ** structure and insert it (via addAsyncWrite() ) into the global list.
571 */
572 static int addNewAsyncWrite(
573   AsyncFileData *pFileData,
574   int op,
575   sqlite3_int64 iOffset,
576   int nByte,
577   const char *zByte
578 ){
579   AsyncWrite *p;
580   if( op!=ASYNC_CLOSE && async.ioError ){
581     return async.ioError;
582   }
583   p = sqlite3_malloc(sizeof(AsyncWrite) + (zByte?nByte:0));
584   if( !p ){
585     /* The upper layer does not expect operations like OsWrite() to
586     ** return SQLITE_NOMEM. This is partly because under normal conditions
587     ** SQLite is required to do rollback without calling malloc(). So
588     ** if malloc() fails here, treat it as an I/O error. The above
589     ** layer knows how to handle that.
590     */
591     return SQLITE_IOERR;
592   }
593   p->op = op;
594   p->iOffset = iOffset;
595   p->nByte = nByte;
596   p->pFileData = pFileData;
597   p->pNext = 0;
598   if( zByte ){
599     p->zBuf = (char *)&p[1];
600     memcpy(p->zBuf, zByte, nByte);
601   }else{
602     p->zBuf = 0;
603   }
604   addAsyncWrite(p);
605   return SQLITE_OK;
606 }
607 
608 /*
609 ** Close the file. This just adds an entry to the write-op list, the file is
610 ** not actually closed.
611 */
612 static int asyncClose(sqlite3_file *pFile){
613   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
614 
615   /* Unlock the file, if it is locked */
616   async_mutex_enter(ASYNC_MUTEX_LOCK);
617   p->lock.eLock = 0;
618   async_mutex_leave(ASYNC_MUTEX_LOCK);
619 
620   addAsyncWrite(&p->closeOp);
621   return SQLITE_OK;
622 }
623 
624 /*
625 ** Implementation of sqlite3OsWrite() for asynchronous files. Instead of
626 ** writing to the underlying file, this function adds an entry to the end of
627 ** the global AsyncWrite list. Either SQLITE_OK or SQLITE_NOMEM may be
628 ** returned.
629 */
630 static int asyncWrite(
631   sqlite3_file *pFile,
632   const void *pBuf,
633   int amt,
634   sqlite3_int64 iOff
635 ){
636   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
637   return addNewAsyncWrite(p, ASYNC_WRITE, iOff, amt, pBuf);
638 }
639 
640 /*
641 ** Read data from the file. First we read from the filesystem, then adjust
642 ** the contents of the buffer based on ASYNC_WRITE operations in the
643 ** write-op queue.
644 **
645 ** This method holds the mutex from start to finish.
646 */
647 static int asyncRead(
648   sqlite3_file *pFile,
649   void *zOut,
650   int iAmt,
651   sqlite3_int64 iOffset
652 ){
653   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
654   int rc = SQLITE_OK;
655   sqlite3_int64 filesize;
656   int nRead;
657   sqlite3_file *pBase = p->pBaseRead;
658 
659   /* Grab the write queue mutex for the duration of the call */
660   async_mutex_enter(ASYNC_MUTEX_QUEUE);
661 
662   /* If an I/O error has previously occurred in this virtual file
663   ** system, then all subsequent operations fail.
664   */
665   if( async.ioError!=SQLITE_OK ){
666     rc = async.ioError;
667     goto asyncread_out;
668   }
669 
670   if( pBase->pMethods ){
671     rc = pBase->pMethods->xFileSize(pBase, &filesize);
672     if( rc!=SQLITE_OK ){
673       goto asyncread_out;
674     }
675     nRead = MIN(filesize - iOffset, iAmt);
676     if( nRead>0 ){
677       rc = pBase->pMethods->xRead(pBase, zOut, nRead, iOffset);
678       ASYNC_TRACE(("READ %s %d bytes at %d\n", p->zName, nRead, iOffset));
679     }
680   }
681 
682   if( rc==SQLITE_OK ){
683     AsyncWrite *pWrite;
684     char *zName = p->zName;
685 
686     for(pWrite=async.pQueueFirst; pWrite; pWrite = pWrite->pNext){
687       if( pWrite->op==ASYNC_WRITE && (
688         (pWrite->pFileData==p) ||
689         (zName && pWrite->pFileData->zName==zName)
690       )){
691         int iBeginOut = (pWrite->iOffset-iOffset);
692         int iBeginIn = -iBeginOut;
693         int nCopy;
694 
695         if( iBeginIn<0 ) iBeginIn = 0;
696         if( iBeginOut<0 ) iBeginOut = 0;
697         nCopy = MIN(pWrite->nByte-iBeginIn, iAmt-iBeginOut);
698 
699         if( nCopy>0 ){
700           memcpy(&((char *)zOut)[iBeginOut], &pWrite->zBuf[iBeginIn], nCopy);
701           ASYNC_TRACE(("OVERREAD %d bytes at %d\n", nCopy, iBeginOut+iOffset));
702         }
703       }
704     }
705   }
706 
707 asyncread_out:
708   async_mutex_leave(ASYNC_MUTEX_QUEUE);
709   return rc;
710 }
711 
712 /*
713 ** Truncate the file to nByte bytes in length. This just adds an entry to
714 ** the write-op list, no IO actually takes place.
715 */
716 static int asyncTruncate(sqlite3_file *pFile, sqlite3_int64 nByte){
717   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
718   return addNewAsyncWrite(p, ASYNC_TRUNCATE, nByte, 0, 0);
719 }
720 
721 /*
722 ** Sync the file. This just adds an entry to the write-op list, the
723 ** sync() is done later by sqlite3_async_flush().
724 */
725 static int asyncSync(sqlite3_file *pFile, int flags){
726   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
727   return addNewAsyncWrite(p, ASYNC_SYNC, 0, flags, 0);
728 }
729 
730 /*
731 ** Read the size of the file. First we read the size of the file system
732 ** entry, then adjust for any ASYNC_WRITE or ASYNC_TRUNCATE operations
733 ** currently in the write-op list.
734 **
735 ** This method holds the mutex from start to finish.
736 */
737 int asyncFileSize(sqlite3_file *pFile, sqlite3_int64 *piSize){
738   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
739   int rc = SQLITE_OK;
740   sqlite3_int64 s = 0;
741   sqlite3_file *pBase;
742 
743   async_mutex_enter(ASYNC_MUTEX_QUEUE);
744 
745   /* Read the filesystem size from the base file. If pBaseRead is NULL, this
746   ** means the file hasn't been opened yet. In this case all relevant data
747   ** must be in the write-op queue anyway, so we can omit reading from the
748   ** file-system.
749   */
750   pBase = p->pBaseRead;
751   if( pBase->pMethods ){
752     rc = pBase->pMethods->xFileSize(pBase, &s);
753   }
754 
755   if( rc==SQLITE_OK ){
756     AsyncWrite *pWrite;
757     for(pWrite=async.pQueueFirst; pWrite; pWrite = pWrite->pNext){
758       if( pWrite->op==ASYNC_DELETE
759        && p->zName
760        && strcmp(p->zName, pWrite->zBuf)==0
761       ){
762         s = 0;
763       }else if( pWrite->pFileData && (
764           (pWrite->pFileData==p)
765        || (p->zName && pWrite->pFileData->zName==p->zName)
766       )){
767         switch( pWrite->op ){
768           case ASYNC_WRITE:
769             s = MAX(pWrite->iOffset + (sqlite3_int64)(pWrite->nByte), s);
770             break;
771           case ASYNC_TRUNCATE:
772             s = MIN(s, pWrite->iOffset);
773             break;
774         }
775       }
776     }
777     *piSize = s;
778   }
779   async_mutex_leave(ASYNC_MUTEX_QUEUE);
780   return rc;
781 }
782 
783 /*
784 ** Lock or unlock the actual file-system entry.
785 */
786 static int getFileLock(AsyncLock *pLock){
787   int rc = SQLITE_OK;
788   AsyncFileLock *pIter;
789   int eRequired = 0;
790 
791   if( pLock->pFile ){
792     for(pIter=pLock->pList; pIter; pIter=pIter->pNext){
793       assert(pIter->eAsyncLock>=pIter->eLock);
794       if( pIter->eAsyncLock>eRequired ){
795         eRequired = pIter->eAsyncLock;
796         assert(eRequired>=0 && eRequired<=SQLITE_LOCK_EXCLUSIVE);
797       }
798     }
799 
800     if( eRequired>pLock->eLock ){
801       rc = pLock->pFile->pMethods->xLock(pLock->pFile, eRequired);
802       if( rc==SQLITE_OK ){
803         pLock->eLock = eRequired;
804       }
805     }
806     else if( eRequired<pLock->eLock && eRequired<=SQLITE_LOCK_SHARED ){
807       rc = pLock->pFile->pMethods->xUnlock(pLock->pFile, eRequired);
808       if( rc==SQLITE_OK ){
809         pLock->eLock = eRequired;
810       }
811     }
812   }
813 
814   return rc;
815 }
816 
817 /*
818 ** Return the AsyncLock structure from the global async.pLock list
819 ** associated with the file-system entry identified by path zName
820 ** (a string of nName bytes). If no such structure exists, return 0.
821 */
822 static AsyncLock *findLock(const char *zName, int nName){
823   AsyncLock *p = async.pLock;
824   while( p && (p->nFile!=nName || memcmp(p->zFile, zName, nName)) ){
825     p = p->pNext;
826   }
827   return p;
828 }
829 
830 /*
831 ** The following two methods - asyncLock() and asyncUnlock() - are used
832 ** to obtain and release locks on database files opened with the
833 ** asynchronous backend.
834 */
835 static int asyncLock(sqlite3_file *pFile, int eLock){
836   int rc = SQLITE_OK;
837   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
838 
839   if( p->zName ){
840     async_mutex_enter(ASYNC_MUTEX_LOCK);
841     if( p->lock.eLock<eLock ){
842       AsyncLock *pLock = p->pLock;
843       AsyncFileLock *pIter;
844       assert(pLock && pLock->pList);
845       for(pIter=pLock->pList; pIter; pIter=pIter->pNext){
846         if( pIter!=&p->lock && (
847           (eLock==SQLITE_LOCK_EXCLUSIVE && pIter->eLock>=SQLITE_LOCK_SHARED) ||
848           (eLock==SQLITE_LOCK_PENDING && pIter->eLock>=SQLITE_LOCK_RESERVED) ||
849           (eLock==SQLITE_LOCK_RESERVED && pIter->eLock>=SQLITE_LOCK_RESERVED) ||
850           (eLock==SQLITE_LOCK_SHARED && pIter->eLock>=SQLITE_LOCK_PENDING)
851         )){
852           rc = SQLITE_BUSY;
853         }
854       }
855       if( rc==SQLITE_OK ){
856         p->lock.eLock = eLock;
857         p->lock.eAsyncLock = MAX(p->lock.eAsyncLock, eLock);
858       }
859       assert(p->lock.eAsyncLock>=p->lock.eLock);
860       if( rc==SQLITE_OK ){
861         rc = getFileLock(pLock);
862       }
863     }
864     async_mutex_leave(ASYNC_MUTEX_LOCK);
865   }
866 
867   ASYNC_TRACE(("LOCK %d (%s) rc=%d\n", eLock, p->zName, rc));
868   return rc;
869 }
870 static int asyncUnlock(sqlite3_file *pFile, int eLock){
871   int rc = SQLITE_OK;
872   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
873   if( p->zName ){
874     AsyncFileLock *pLock = &p->lock;
875     async_mutex_enter(ASYNC_MUTEX_QUEUE);
876     async_mutex_enter(ASYNC_MUTEX_LOCK);
877     pLock->eLock = MIN(pLock->eLock, eLock);
878     rc = addNewAsyncWrite(p, ASYNC_UNLOCK, 0, eLock, 0);
879     async_mutex_leave(ASYNC_MUTEX_LOCK);
880     async_mutex_leave(ASYNC_MUTEX_QUEUE);
881   }
882   return rc;
883 }
884 
885 /*
886 ** This function is called when the pager layer first opens a database file
887 ** and is checking for a hot-journal.
888 */
889 static int asyncCheckReservedLock(sqlite3_file *pFile, int *pResOut){
890   int ret = 0;
891   AsyncFileLock *pIter;
892   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
893 
894   async_mutex_enter(ASYNC_MUTEX_LOCK);
895   for(pIter=p->pLock->pList; pIter; pIter=pIter->pNext){
896     if( pIter->eLock>=SQLITE_LOCK_RESERVED ){
897       ret = 1;
898     }
899   }
900   async_mutex_leave(ASYNC_MUTEX_LOCK);
901 
902   ASYNC_TRACE(("CHECK-LOCK %d (%s)\n", ret, p->zName));
903   *pResOut = ret;
904   return SQLITE_OK;
905 }
906 
907 /*
908 ** sqlite3_file_control() implementation.
909 */
910 static int asyncFileControl(sqlite3_file *id, int op, void *pArg){
911   switch( op ){
912     case SQLITE_FCNTL_LOCKSTATE: {
913       async_mutex_enter(ASYNC_MUTEX_LOCK);
914       *(int*)pArg = ((AsyncFile*)id)->pData->lock.eLock;
915       async_mutex_leave(ASYNC_MUTEX_LOCK);
916       return SQLITE_OK;
917     }
918   }
919   return SQLITE_ERROR;
920 }
921 
922 /*
923 ** Return the device characteristics and sector-size of the device. It
924 ** is not tricky to implement these correctly, as this backend might
925 ** not have an open file handle at this point.
926 */
927 static int asyncSectorSize(sqlite3_file *pFile){
928   return 512;
929 }
930 static int asyncDeviceCharacteristics(sqlite3_file *pFile){
931   return 0;
932 }
933 
934 static int unlinkAsyncFile(AsyncFileData *pData){
935   AsyncFileLock **ppIter;
936   int rc = SQLITE_OK;
937 
938   if( pData->zName ){
939     AsyncLock *pLock = pData->pLock;
940     for(ppIter=&pLock->pList; *ppIter; ppIter=&((*ppIter)->pNext)){
941       if( (*ppIter)==&pData->lock ){
942         *ppIter = pData->lock.pNext;
943         break;
944       }
945     }
946     if( !pLock->pList ){
947       AsyncLock **pp;
948       if( pLock->pFile ){
949         pLock->pFile->pMethods->xClose(pLock->pFile);
950       }
951       for(pp=&async.pLock; *pp!=pLock; pp=&((*pp)->pNext));
952       *pp = pLock->pNext;
953       sqlite3_free(pLock);
954     }else{
955       rc = getFileLock(pLock);
956     }
957   }
958 
959   return rc;
960 }
961 
962 /*
963 ** The parameter passed to this function is a copy of a 'flags' parameter
964 ** passed to this modules xOpen() method. This function returns true
965 ** if the file should be opened asynchronously, or false if it should
966 ** be opened immediately.
967 **
968 ** If the file is to be opened asynchronously, then asyncOpen() will add
969 ** an entry to the event queue and the file will not actually be opened
970 ** until the event is processed. Otherwise, the file is opened directly
971 ** by the caller.
972 */
973 static int doAsynchronousOpen(int flags){
974   return (flags&SQLITE_OPEN_CREATE) && (
975       (flags&SQLITE_OPEN_MAIN_JOURNAL) ||
976       (flags&SQLITE_OPEN_TEMP_JOURNAL) ||
977       (flags&SQLITE_OPEN_DELETEONCLOSE)
978   );
979 }
980 
981 /*
982 ** Open a file.
983 */
984 static int asyncOpen(
985   sqlite3_vfs *pAsyncVfs,
986   const char *zName,
987   sqlite3_file *pFile,
988   int flags,
989   int *pOutFlags
990 ){
991   static sqlite3_io_methods async_methods = {
992     1,                               /* iVersion */
993     asyncClose,                      /* xClose */
994     asyncRead,                       /* xRead */
995     asyncWrite,                      /* xWrite */
996     asyncTruncate,                   /* xTruncate */
997     asyncSync,                       /* xSync */
998     asyncFileSize,                   /* xFileSize */
999     asyncLock,                       /* xLock */
1000     asyncUnlock,                     /* xUnlock */
1001     asyncCheckReservedLock,          /* xCheckReservedLock */
1002     asyncFileControl,                /* xFileControl */
1003     asyncSectorSize,                 /* xSectorSize */
1004     asyncDeviceCharacteristics       /* xDeviceCharacteristics */
1005   };
1006 
1007   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1008   AsyncFile *p = (AsyncFile *)pFile;
1009   int nName = 0;
1010   int rc = SQLITE_OK;
1011   int nByte;
1012   AsyncFileData *pData;
1013   AsyncLock *pLock = 0;
1014   char *z;
1015   int isAsyncOpen = doAsynchronousOpen(flags);
1016 
1017   /* If zName is NULL, then the upper layer is requesting an anonymous file */
1018   if( zName ){
1019     nName = strlen(zName)+1;
1020   }
1021 
1022   nByte = (
1023     sizeof(AsyncFileData) +        /* AsyncFileData structure */
1024     2 * pVfs->szOsFile +           /* AsyncFileData.pBaseRead and pBaseWrite */
1025     nName                          /* AsyncFileData.zName */
1026   );
1027   z = sqlite3_malloc(nByte);
1028   if( !z ){
1029     return SQLITE_NOMEM;
1030   }
1031   memset(z, 0, nByte);
1032   pData = (AsyncFileData*)z;
1033   z += sizeof(pData[0]);
1034   pData->pBaseRead = (sqlite3_file*)z;
1035   z += pVfs->szOsFile;
1036   pData->pBaseWrite = (sqlite3_file*)z;
1037   pData->closeOp.pFileData = pData;
1038   pData->closeOp.op = ASYNC_CLOSE;
1039 
1040   if( zName ){
1041     z += pVfs->szOsFile;
1042     pData->zName = z;
1043     pData->nName = nName;
1044     memcpy(pData->zName, zName, nName);
1045   }
1046 
1047   if( !isAsyncOpen ){
1048     int flagsout;
1049     rc = pVfs->xOpen(pVfs, pData->zName, pData->pBaseRead, flags, &flagsout);
1050     if( rc==SQLITE_OK && (flagsout&SQLITE_OPEN_READWRITE) ){
1051       rc = pVfs->xOpen(pVfs, pData->zName, pData->pBaseWrite, flags, 0);
1052     }
1053     if( pOutFlags ){
1054       *pOutFlags = flagsout;
1055     }
1056   }
1057 
1058   async_mutex_enter(ASYNC_MUTEX_LOCK);
1059 
1060   if( zName && rc==SQLITE_OK ){
1061     pLock = findLock(pData->zName, pData->nName);
1062     if( !pLock ){
1063       int nByte = pVfs->szOsFile + sizeof(AsyncLock) + pData->nName + 1;
1064       pLock = (AsyncLock *)sqlite3_malloc(nByte);
1065       if( pLock ){
1066         memset(pLock, 0, nByte);
1067 #ifdef ENABLE_FILE_LOCKING
1068         if( flags&SQLITE_OPEN_MAIN_DB ){
1069           pLock->pFile = (sqlite3_file *)&pLock[1];
1070           rc = pVfs->xOpen(pVfs, pData->zName, pLock->pFile, flags, 0);
1071           if( rc!=SQLITE_OK ){
1072             sqlite3_free(pLock);
1073             pLock = 0;
1074           }
1075         }
1076 #endif
1077         if( pLock ){
1078           pLock->nFile = pData->nName;
1079           pLock->zFile = &((char *)(&pLock[1]))[pVfs->szOsFile];
1080           memcpy(pLock->zFile, pData->zName, pLock->nFile);
1081           pLock->pNext = async.pLock;
1082           async.pLock = pLock;
1083         }
1084       }else{
1085         rc = SQLITE_NOMEM;
1086       }
1087     }
1088   }
1089 
1090   if( rc==SQLITE_OK ){
1091     p->pMethod = &async_methods;
1092     p->pData = pData;
1093 
1094     /* Link AsyncFileData.lock into the linked list of
1095     ** AsyncFileLock structures for this file.
1096     */
1097     if( zName ){
1098       pData->lock.pNext = pLock->pList;
1099       pLock->pList = &pData->lock;
1100       pData->zName = pLock->zFile;
1101     }
1102   }else{
1103     if( pData->pBaseRead->pMethods ){
1104       pData->pBaseRead->pMethods->xClose(pData->pBaseRead);
1105     }
1106     if( pData->pBaseWrite->pMethods ){
1107       pData->pBaseWrite->pMethods->xClose(pData->pBaseWrite);
1108     }
1109     sqlite3_free(pData);
1110   }
1111 
1112   async_mutex_leave(ASYNC_MUTEX_LOCK);
1113 
1114   if( rc==SQLITE_OK ){
1115     incrOpenFileCount();
1116     pData->pLock = pLock;
1117   }
1118 
1119   if( rc==SQLITE_OK && isAsyncOpen ){
1120     rc = addNewAsyncWrite(pData, ASYNC_OPENEXCLUSIVE, (sqlite3_int64)flags,0,0);
1121     if( rc==SQLITE_OK ){
1122       if( pOutFlags ) *pOutFlags = flags;
1123     }else{
1124       async_mutex_enter(ASYNC_MUTEX_LOCK);
1125       unlinkAsyncFile(pData);
1126       async_mutex_leave(ASYNC_MUTEX_LOCK);
1127       sqlite3_free(pData);
1128     }
1129   }
1130   if( rc!=SQLITE_OK ){
1131     p->pMethod = 0;
1132   }
1133   return rc;
1134 }
1135 
1136 /*
1137 ** Implementation of sqlite3OsDelete. Add an entry to the end of the
1138 ** write-op queue to perform the delete.
1139 */
1140 static int asyncDelete(sqlite3_vfs *pAsyncVfs, const char *z, int syncDir){
1141   return addNewAsyncWrite(0, ASYNC_DELETE, syncDir, strlen(z)+1, z);
1142 }
1143 
1144 /*
1145 ** Implementation of sqlite3OsAccess. This method holds the mutex from
1146 ** start to finish.
1147 */
1148 static int asyncAccess(
1149   sqlite3_vfs *pAsyncVfs,
1150   const char *zName,
1151   int flags,
1152   int *pResOut
1153 ){
1154   int rc;
1155   int ret;
1156   AsyncWrite *p;
1157   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1158 
1159   assert(flags==SQLITE_ACCESS_READWRITE
1160       || flags==SQLITE_ACCESS_READ
1161       || flags==SQLITE_ACCESS_EXISTS
1162   );
1163 
1164   async_mutex_enter(ASYNC_MUTEX_QUEUE);
1165   rc = pVfs->xAccess(pVfs, zName, flags, &ret);
1166   if( rc==SQLITE_OK && flags==SQLITE_ACCESS_EXISTS ){
1167     for(p=async.pQueueFirst; p; p = p->pNext){
1168       if( p->op==ASYNC_DELETE && 0==strcmp(p->zBuf, zName) ){
1169         ret = 0;
1170       }else if( p->op==ASYNC_OPENEXCLUSIVE
1171              && p->pFileData->zName
1172              && 0==strcmp(p->pFileData->zName, zName)
1173       ){
1174         ret = 1;
1175       }
1176     }
1177   }
1178   ASYNC_TRACE(("ACCESS(%s): %s = %d\n",
1179     flags==SQLITE_ACCESS_READWRITE?"read-write":
1180     flags==SQLITE_ACCESS_READ?"read":"exists"
1181     , zName, ret)
1182   );
1183   async_mutex_leave(ASYNC_MUTEX_QUEUE);
1184   *pResOut = ret;
1185   return rc;
1186 }
1187 
1188 /*
1189 ** Fill in zPathOut with the full path to the file identified by zPath.
1190 */
1191 static int asyncFullPathname(
1192   sqlite3_vfs *pAsyncVfs,
1193   const char *zPath,
1194   int nPathOut,
1195   char *zPathOut
1196 ){
1197   int rc;
1198   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1199   rc = pVfs->xFullPathname(pVfs, zPath, nPathOut, zPathOut);
1200 
1201   /* Because of the way intra-process file locking works, this backend
1202   ** needs to return a canonical path. The following block assumes the
1203   ** file-system uses unix style paths.
1204   */
1205   if( rc==SQLITE_OK ){
1206     int i, j;
1207     int n = nPathOut;
1208     char *z = zPathOut;
1209     while( n>1 && z[n-1]=='/' ){ n--; }
1210     for(i=j=0; i<n; i++){
1211       if( z[i]=='/' ){
1212         if( z[i+1]=='/' ) continue;
1213         if( z[i+1]=='.' && i+2<n && z[i+2]=='/' ){
1214           i += 1;
1215           continue;
1216         }
1217         if( z[i+1]=='.' && i+3<n && z[i+2]=='.' && z[i+3]=='/' ){
1218           while( j>0 && z[j-1]!='/' ){ j--; }
1219           if( j>0 ){ j--; }
1220           i += 2;
1221           continue;
1222         }
1223       }
1224       z[j++] = z[i];
1225     }
1226     z[j] = 0;
1227   }
1228 
1229   return rc;
1230 }
1231 static void *asyncDlOpen(sqlite3_vfs *pAsyncVfs, const char *zPath){
1232   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1233   return pVfs->xDlOpen(pVfs, zPath);
1234 }
1235 static void asyncDlError(sqlite3_vfs *pAsyncVfs, int nByte, char *zErrMsg){
1236   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1237   pVfs->xDlError(pVfs, nByte, zErrMsg);
1238 }
1239 static void (*asyncDlSym(
1240   sqlite3_vfs *pAsyncVfs,
1241   void *pHandle,
1242   const char *zSymbol
1243 ))(void){
1244   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1245   return pVfs->xDlSym(pVfs, pHandle, zSymbol);
1246 }
1247 static void asyncDlClose(sqlite3_vfs *pAsyncVfs, void *pHandle){
1248   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1249   pVfs->xDlClose(pVfs, pHandle);
1250 }
1251 static int asyncRandomness(sqlite3_vfs *pAsyncVfs, int nByte, char *zBufOut){
1252   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1253   return pVfs->xRandomness(pVfs, nByte, zBufOut);
1254 }
1255 static int asyncSleep(sqlite3_vfs *pAsyncVfs, int nMicro){
1256   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1257   return pVfs->xSleep(pVfs, nMicro);
1258 }
1259 static int asyncCurrentTime(sqlite3_vfs *pAsyncVfs, double *pTimeOut){
1260   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1261   return pVfs->xCurrentTime(pVfs, pTimeOut);
1262 }
1263 
1264 static sqlite3_vfs async_vfs = {
1265   1,                    /* iVersion */
1266   sizeof(AsyncFile),    /* szOsFile */
1267   0,                    /* mxPathname */
1268   0,                    /* pNext */
1269   SQLITEASYNC_VFSNAME,  /* zName */
1270   0,                    /* pAppData */
1271   asyncOpen,            /* xOpen */
1272   asyncDelete,          /* xDelete */
1273   asyncAccess,          /* xAccess */
1274   asyncFullPathname,    /* xFullPathname */
1275   asyncDlOpen,          /* xDlOpen */
1276   asyncDlError,         /* xDlError */
1277   asyncDlSym,           /* xDlSym */
1278   asyncDlClose,         /* xDlClose */
1279   asyncRandomness,      /* xDlError */
1280   asyncSleep,           /* xDlSym */
1281   asyncCurrentTime      /* xDlClose */
1282 };
1283 
1284 /*
1285 ** This procedure runs in a separate thread, reading messages off of the
1286 ** write queue and processing them one by one.
1287 **
1288 ** If async.writerHaltNow is true, then this procedure exits
1289 ** after processing a single message.
1290 **
1291 ** If async.writerHaltWhenIdle is true, then this procedure exits when
1292 ** the write queue is empty.
1293 **
1294 ** If both of the above variables are false, this procedure runs
1295 ** indefinately, waiting for operations to be added to the write queue
1296 ** and processing them in the order in which they arrive.
1297 **
1298 ** An artifical delay of async.ioDelay milliseconds is inserted before
1299 ** each write operation in order to simulate the effect of a slow disk.
1300 **
1301 ** Only one instance of this procedure may be running at a time.
1302 */
1303 static void asyncWriterThread(void){
1304   sqlite3_vfs *pVfs = (sqlite3_vfs *)(async_vfs.pAppData);
1305   AsyncWrite *p = 0;
1306   int rc = SQLITE_OK;
1307   int holdingMutex = 0;
1308 
1309   async_mutex_enter(ASYNC_MUTEX_WRITER);
1310 
1311   while( async.eHalt!=SQLITEASYNC_HALT_NOW ){
1312     int doNotFree = 0;
1313     sqlite3_file *pBase = 0;
1314 
1315     if( !holdingMutex ){
1316       async_mutex_enter(ASYNC_MUTEX_QUEUE);
1317     }
1318     while( (p = async.pQueueFirst)==0 ){
1319       if( async.eHalt!=SQLITEASYNC_HALT_NEVER ){
1320         async_mutex_leave(ASYNC_MUTEX_QUEUE);
1321         break;
1322       }else{
1323         ASYNC_TRACE(("IDLE\n"));
1324         async_cond_wait(ASYNC_COND_QUEUE, ASYNC_MUTEX_QUEUE);
1325         ASYNC_TRACE(("WAKEUP\n"));
1326       }
1327     }
1328     if( p==0 ) break;
1329     holdingMutex = 1;
1330 
1331     /* Right now this thread is holding the mutex on the write-op queue.
1332     ** Variable 'p' points to the first entry in the write-op queue. In
1333     ** the general case, we hold on to the mutex for the entire body of
1334     ** the loop.
1335     **
1336     ** However in the cases enumerated below, we relinquish the mutex,
1337     ** perform the IO, and then re-request the mutex before removing 'p' from
1338     ** the head of the write-op queue. The idea is to increase concurrency with
1339     ** sqlite threads.
1340     **
1341     **     * An ASYNC_CLOSE operation.
1342     **     * An ASYNC_OPENEXCLUSIVE operation. For this one, we relinquish
1343     **       the mutex, call the underlying xOpenExclusive() function, then
1344     **       re-aquire the mutex before seting the AsyncFile.pBaseRead
1345     **       variable.
1346     **     * ASYNC_SYNC and ASYNC_WRITE operations, if
1347     **       SQLITE_ASYNC_TWO_FILEHANDLES was set at compile time and two
1348     **       file-handles are open for the particular file being "synced".
1349     */
1350     if( async.ioError!=SQLITE_OK && p->op!=ASYNC_CLOSE ){
1351       p->op = ASYNC_NOOP;
1352     }
1353     if( p->pFileData ){
1354       pBase = p->pFileData->pBaseWrite;
1355       if(
1356         p->op==ASYNC_CLOSE ||
1357         p->op==ASYNC_OPENEXCLUSIVE ||
1358         (pBase->pMethods && (p->op==ASYNC_SYNC || p->op==ASYNC_WRITE) )
1359       ){
1360         async_mutex_leave(ASYNC_MUTEX_QUEUE);
1361         holdingMutex = 0;
1362       }
1363       if( !pBase->pMethods ){
1364         pBase = p->pFileData->pBaseRead;
1365       }
1366     }
1367 
1368     switch( p->op ){
1369       case ASYNC_NOOP:
1370         break;
1371 
1372       case ASYNC_WRITE:
1373         assert( pBase );
1374         ASYNC_TRACE(("WRITE %s %d bytes at %d\n",
1375                 p->pFileData->zName, p->nByte, p->iOffset));
1376         rc = pBase->pMethods->xWrite(pBase, (void *)(p->zBuf), p->nByte, p->iOffset);
1377         break;
1378 
1379       case ASYNC_SYNC:
1380         assert( pBase );
1381         ASYNC_TRACE(("SYNC %s\n", p->pFileData->zName));
1382         rc = pBase->pMethods->xSync(pBase, p->nByte);
1383         break;
1384 
1385       case ASYNC_TRUNCATE:
1386         assert( pBase );
1387         ASYNC_TRACE(("TRUNCATE %s to %d bytes\n",
1388                 p->pFileData->zName, p->iOffset));
1389         rc = pBase->pMethods->xTruncate(pBase, p->iOffset);
1390         break;
1391 
1392       case ASYNC_CLOSE: {
1393         AsyncFileData *pData = p->pFileData;
1394         ASYNC_TRACE(("CLOSE %s\n", p->pFileData->zName));
1395         if( pData->pBaseWrite->pMethods ){
1396           pData->pBaseWrite->pMethods->xClose(pData->pBaseWrite);
1397         }
1398         if( pData->pBaseRead->pMethods ){
1399           pData->pBaseRead->pMethods->xClose(pData->pBaseRead);
1400         }
1401 
1402         /* Unlink AsyncFileData.lock from the linked list of AsyncFileLock
1403         ** structures for this file. Obtain the async.lockMutex mutex
1404         ** before doing so.
1405         */
1406         async_mutex_enter(ASYNC_MUTEX_LOCK);
1407         rc = unlinkAsyncFile(pData);
1408         async_mutex_leave(ASYNC_MUTEX_LOCK);
1409 
1410         if( !holdingMutex ){
1411           async_mutex_enter(ASYNC_MUTEX_QUEUE);
1412           holdingMutex = 1;
1413         }
1414         assert_mutex_is_held(ASYNC_MUTEX_QUEUE);
1415         async.pQueueFirst = p->pNext;
1416         sqlite3_free(pData);
1417         doNotFree = 1;
1418         break;
1419       }
1420 
1421       case ASYNC_UNLOCK: {
1422         AsyncWrite *pIter;
1423         AsyncFileData *pData = p->pFileData;
1424         int eLock = p->nByte;
1425 
1426         /* When a file is locked by SQLite using the async backend, it is
1427         ** locked within the 'real' file-system synchronously. When it is
1428         ** unlocked, an ASYNC_UNLOCK event is added to the write-queue to
1429         ** unlock the file asynchronously. The design of the async backend
1430         ** requires that the 'real' file-system file be locked from the
1431         ** time that SQLite first locks it (and probably reads from it)
1432         ** until all asynchronous write events that were scheduled before
1433         ** SQLite unlocked the file have been processed.
1434         **
1435         ** This is more complex if SQLite locks and unlocks the file multiple
1436         ** times in quick succession. For example, if SQLite does:
1437         **
1438         **   lock, write, unlock, lock, write, unlock
1439         **
1440         ** Each "lock" operation locks the file immediately. Each "write"
1441         ** and "unlock" operation adds an event to the event queue. If the
1442         ** second "lock" operation is performed before the first "unlock"
1443         ** operation has been processed asynchronously, then the first
1444         ** "unlock" cannot be safely processed as is, since this would mean
1445         ** the file was unlocked when the second "write" operation is
1446         ** processed. To work around this, when processing an ASYNC_UNLOCK
1447         ** operation, SQLite:
1448         **
1449         **   1) Unlocks the file to the minimum of the argument passed to
1450         **      the xUnlock() call and the current lock from SQLite's point
1451         **      of view, and
1452         **
1453         **   2) Only unlocks the file at all if this event is the last
1454         **      ASYNC_UNLOCK event on this file in the write-queue.
1455         */
1456         assert( holdingMutex==1 );
1457         assert( async.pQueueFirst==p );
1458         for(pIter=async.pQueueFirst->pNext; pIter; pIter=pIter->pNext){
1459           if( pIter->pFileData==pData && pIter->op==ASYNC_UNLOCK ) break;
1460         }
1461         if( !pIter ){
1462           async_mutex_enter(ASYNC_MUTEX_LOCK);
1463           pData->lock.eAsyncLock = MIN(
1464               pData->lock.eAsyncLock, MAX(pData->lock.eLock, eLock)
1465           );
1466           assert(pData->lock.eAsyncLock>=pData->lock.eLock);
1467           rc = getFileLock(pData->pLock);
1468           async_mutex_leave(ASYNC_MUTEX_LOCK);
1469         }
1470         break;
1471       }
1472 
1473       case ASYNC_DELETE:
1474         ASYNC_TRACE(("DELETE %s\n", p->zBuf));
1475         rc = pVfs->xDelete(pVfs, p->zBuf, (int)p->iOffset);
1476         break;
1477 
1478       case ASYNC_OPENEXCLUSIVE: {
1479         int flags = (int)p->iOffset;
1480         AsyncFileData *pData = p->pFileData;
1481         ASYNC_TRACE(("OPEN %s flags=%d\n", p->zBuf, (int)p->iOffset));
1482         assert(pData->pBaseRead->pMethods==0 && pData->pBaseWrite->pMethods==0);
1483         rc = pVfs->xOpen(pVfs, pData->zName, pData->pBaseRead, flags, 0);
1484         assert( holdingMutex==0 );
1485         async_mutex_enter(ASYNC_MUTEX_QUEUE);
1486         holdingMutex = 1;
1487         break;
1488       }
1489 
1490       default: assert(!"Illegal value for AsyncWrite.op");
1491     }
1492 
1493     /* If we didn't hang on to the mutex during the IO op, obtain it now
1494     ** so that the AsyncWrite structure can be safely removed from the
1495     ** global write-op queue.
1496     */
1497     if( !holdingMutex ){
1498       async_mutex_enter(ASYNC_MUTEX_QUEUE);
1499       holdingMutex = 1;
1500     }
1501     /* ASYNC_TRACE(("UNLINK %p\n", p)); */
1502     if( p==async.pQueueLast ){
1503       async.pQueueLast = 0;
1504     }
1505     if( !doNotFree ){
1506       assert_mutex_is_held(ASYNC_MUTEX_QUEUE);
1507       async.pQueueFirst = p->pNext;
1508       sqlite3_free(p);
1509     }
1510     assert( holdingMutex );
1511 
1512     /* An IO error has occurred. We cannot report the error back to the
1513     ** connection that requested the I/O since the error happened
1514     ** asynchronously.  The connection has already moved on.  There
1515     ** really is nobody to report the error to.
1516     **
1517     ** The file for which the error occurred may have been a database or
1518     ** journal file. Regardless, none of the currently queued operations
1519     ** associated with the same database should now be performed. Nor should
1520     ** any subsequently requested IO on either a database or journal file
1521     ** handle for the same database be accepted until the main database
1522     ** file handle has been closed and reopened.
1523     **
1524     ** Furthermore, no further IO should be queued or performed on any file
1525     ** handle associated with a database that may have been part of a
1526     ** multi-file transaction that included the database associated with
1527     ** the IO error (i.e. a database ATTACHed to the same handle at some
1528     ** point in time).
1529     */
1530     if( rc!=SQLITE_OK ){
1531       async.ioError = rc;
1532     }
1533 
1534     if( async.ioError && !async.pQueueFirst ){
1535       async_mutex_enter(ASYNC_MUTEX_LOCK);
1536       if( 0==async.pLock ){
1537         async.ioError = SQLITE_OK;
1538       }
1539       async_mutex_leave(ASYNC_MUTEX_LOCK);
1540     }
1541 
1542     /* Drop the queue mutex before continuing to the next write operation
1543     ** in order to give other threads a chance to work with the write queue.
1544     */
1545     if( !async.pQueueFirst || !async.ioError ){
1546       async_mutex_leave(ASYNC_MUTEX_QUEUE);
1547       holdingMutex = 0;
1548       if( async.ioDelay>0 ){
1549         pVfs->xSleep(pVfs, async.ioDelay);
1550       }else{
1551         async_sched_yield();
1552       }
1553     }
1554   }
1555 
1556   async_mutex_leave(ASYNC_MUTEX_WRITER);
1557   return;
1558 }
1559 
1560 /*
1561 ** Install the asynchronous VFS.
1562 */
1563 int sqlite3async_initialize(const char *zParent, int isDefault){
1564   int rc = SQLITE_OK;
1565   if( async_vfs.pAppData==0 ){
1566     sqlite3_vfs *pParent = sqlite3_vfs_find(zParent);
1567     if( !pParent || async_os_initialize() ){
1568       rc = SQLITE_ERROR;
1569     }else if( SQLITE_OK!=(rc = sqlite3_vfs_register(&async_vfs, isDefault)) ){
1570       async_os_shutdown();
1571     }else{
1572       async_vfs.pAppData = (void *)pParent;
1573       async_vfs.mxPathname = ((sqlite3_vfs *)async_vfs.pAppData)->mxPathname;
1574     }
1575   }
1576   return rc;
1577 }
1578 
1579 /*
1580 ** Uninstall the asynchronous VFS.
1581 */
1582 void sqlite3async_shutdown(void){
1583   if( async_vfs.pAppData ){
1584     async_os_shutdown();
1585     sqlite3_vfs_unregister((sqlite3_vfs *)&async_vfs);
1586     async_vfs.pAppData = 0;
1587   }
1588 }
1589 
1590 /*
1591 ** Process events on the write-queue.
1592 */
1593 void sqlite3async_run(void){
1594   asyncWriterThread();
1595 }
1596 
1597 /*
1598 ** Control/configure the asynchronous IO system.
1599 */
1600 int sqlite3async_control(int op, ...){
1601   va_list ap;
1602   va_start(ap, op);
1603   switch( op ){
1604     case SQLITEASYNC_HALT: {
1605       int eWhen = va_arg(ap, int);
1606       if( eWhen!=SQLITEASYNC_HALT_NEVER
1607        && eWhen!=SQLITEASYNC_HALT_NOW
1608        && eWhen!=SQLITEASYNC_HALT_IDLE
1609       ){
1610         return SQLITE_ERROR;
1611       }
1612       async.eHalt = eWhen;
1613       async_mutex_enter(ASYNC_MUTEX_QUEUE);
1614       async_cond_signal(ASYNC_COND_QUEUE);
1615       async_mutex_leave(ASYNC_MUTEX_QUEUE);
1616       break;
1617     }
1618 
1619     case SQLITEASYNC_DELAY: {
1620       int iDelay = va_arg(ap, int);
1621       async.ioDelay = iDelay;
1622       break;
1623     }
1624 
1625     case SQLITEASYNC_GET_HALT: {
1626       int *peWhen = va_arg(ap, int *);
1627       *peWhen = async.eHalt;
1628       break;
1629     }
1630     case SQLITEASYNC_GET_DELAY: {
1631       int *piDelay = va_arg(ap, int *);
1632       *piDelay = async.ioDelay;
1633       break;
1634     }
1635 
1636     default:
1637       return SQLITE_ERROR;
1638   }
1639   return SQLITE_OK;
1640 }
1641 
1642 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ASYNCIO) */
1643 
1644