xref: /sqlite-3.40.0/ext/async/sqlite3async.c (revision 6f050aa2)
1 /*
2 ** 2005 December 14
3 **
4 ** The author disclaims copyright to this source code.  In place of
5 ** a legal notice, here is a blessing:
6 **
7 **    May you do good and not evil.
8 **    May you find forgiveness for yourself and forgive others.
9 **    May you share freely, never taking more than you give.
10 **
11 *************************************************************************
12 **
13 ** $Id: sqlite3async.c,v 1.4 2009/04/25 08:39:15 danielk1977 Exp $
14 **
15 ** This file contains the implementation of an asynchronous IO backend
16 ** for SQLite.
17 */
18 
19 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ASYNCIO)
20 
21 #include "sqlite3async.h"
22 #include "sqlite3.h"
23 #include <stdarg.h>
24 #include <string.h>
25 #include <assert.h>
26 
27 /* Useful macros used in several places */
28 #define MIN(x,y) ((x)<(y)?(x):(y))
29 #define MAX(x,y) ((x)>(y)?(x):(y))
30 
31 /* Forward references */
32 typedef struct AsyncWrite AsyncWrite;
33 typedef struct AsyncFile AsyncFile;
34 typedef struct AsyncFileData AsyncFileData;
35 typedef struct AsyncFileLock AsyncFileLock;
36 typedef struct AsyncLock AsyncLock;
37 
38 /* Enable for debugging */
39 #ifndef NDEBUG
40 #include <stdio.h>
41 static int sqlite3async_trace = 0;
42 # define ASYNC_TRACE(X) if( sqlite3async_trace ) asyncTrace X
43 static void asyncTrace(const char *zFormat, ...){
44   char *z;
45   va_list ap;
46   va_start(ap, zFormat);
47   z = sqlite3_vmprintf(zFormat, ap);
48   va_end(ap);
49   fprintf(stderr, "[%d] %s", 0 /* (int)pthread_self() */, z);
50   sqlite3_free(z);
51 }
52 #endif
53 
54 /*
55 ** THREAD SAFETY NOTES
56 **
57 ** Basic rules:
58 **
59 **     * Both read and write access to the global write-op queue must be
60 **       protected by the async.queueMutex. As are the async.ioError and
61 **       async.nFile variables.
62 **
63 **     * The async.pLock list and all AsyncLock and AsyncFileLock
64 **       structures must be protected by the async.lockMutex mutex.
65 **
66 **     * The file handles from the underlying system are not assumed to
67 **       be thread safe.
68 **
69 **     * See the last two paragraphs under "The Writer Thread" for
70 **       an assumption to do with file-handle synchronization by the Os.
71 **
72 ** Deadlock prevention:
73 **
74 **     There are three mutex used by the system: the "writer" mutex,
75 **     the "queue" mutex and the "lock" mutex. Rules are:
76 **
77 **     * It is illegal to block on the writer mutex when any other mutex
78 **       are held, and
79 **
80 **     * It is illegal to block on the queue mutex when the lock mutex
81 **       is held.
82 **
83 **     i.e. mutex's must be grabbed in the order "writer", "queue", "lock".
84 **
85 ** File system operations (invoked by SQLite thread):
86 **
87 **     xOpen
88 **     xDelete
89 **     xFileExists
90 **
91 ** File handle operations (invoked by SQLite thread):
92 **
93 **         asyncWrite, asyncClose, asyncTruncate, asyncSync
94 **
95 **     The operations above add an entry to the global write-op list. They
96 **     prepare the entry, acquire the async.queueMutex momentarily while
97 **     list pointers are  manipulated to insert the new entry, then release
98 **     the mutex and signal the writer thread to wake up in case it happens
99 **     to be asleep.
100 **
101 **
102 **         asyncRead, asyncFileSize.
103 **
104 **     Read operations. Both of these read from both the underlying file
105 **     first then adjust their result based on pending writes in the
106 **     write-op queue.   So async.queueMutex is held for the duration
107 **     of these operations to prevent other threads from changing the
108 **     queue in mid operation.
109 **
110 **
111 **         asyncLock, asyncUnlock, asyncCheckReservedLock
112 **
113 **     These primitives implement in-process locking using a hash table
114 **     on the file name.  Files are locked correctly for connections coming
115 **     from the same process.  But other processes cannot see these locks
116 **     and will therefore not honor them.
117 **
118 **
119 ** The writer thread:
120 **
121 **     The async.writerMutex is used to make sure only there is only
122 **     a single writer thread running at a time.
123 **
124 **     Inside the writer thread is a loop that works like this:
125 **
126 **         WHILE (write-op list is not empty)
127 **             Do IO operation at head of write-op list
128 **             Remove entry from head of write-op list
129 **         END WHILE
130 **
131 **     The async.queueMutex is always held during the <write-op list is
132 **     not empty> test, and when the entry is removed from the head
133 **     of the write-op list. Sometimes it is held for the interim
134 **     period (while the IO is performed), and sometimes it is
135 **     relinquished. It is relinquished if (a) the IO op is an
136 **     ASYNC_CLOSE or (b) when the file handle was opened, two of
137 **     the underlying systems handles were opened on the same
138 **     file-system entry.
139 **
140 **     If condition (b) above is true, then one file-handle
141 **     (AsyncFile.pBaseRead) is used exclusively by sqlite threads to read the
142 **     file, the other (AsyncFile.pBaseWrite) by sqlite3_async_flush()
143 **     threads to perform write() operations. This means that read
144 **     operations are not blocked by asynchronous writes (although
145 **     asynchronous writes may still be blocked by reads).
146 **
147 **     This assumes that the OS keeps two handles open on the same file
148 **     properly in sync. That is, any read operation that starts after a
149 **     write operation on the same file system entry has completed returns
150 **     data consistent with the write. We also assume that if one thread
151 **     reads a file while another is writing it all bytes other than the
152 **     ones actually being written contain valid data.
153 **
154 **     If the above assumptions are not true, set the preprocessor symbol
155 **     SQLITE_ASYNC_TWO_FILEHANDLES to 0.
156 */
157 
158 
159 #ifndef NDEBUG
160 # define TESTONLY( X ) X
161 #else
162 # define TESTONLY( X )
163 #endif
164 
165 /*
166 ** PORTING FUNCTIONS
167 **
168 ** There are two definitions of the following functions. One for pthreads
169 ** compatible systems and one for Win32. These functions isolate the OS
170 ** specific code required by each platform.
171 **
172 ** The system uses three mutexes and a single condition variable. To
173 ** block on a mutex, async_mutex_enter() is called. The parameter passed
174 ** to async_mutex_enter(), which must be one of ASYNC_MUTEX_LOCK,
175 ** ASYNC_MUTEX_QUEUE or ASYNC_MUTEX_WRITER, identifies which of the three
176 ** mutexes to lock. Similarly, to unlock a mutex, async_mutex_leave() is
177 ** called with a parameter identifying the mutex being unlocked. Mutexes
178 ** are not recursive - it is an error to call async_mutex_enter() to
179 ** lock a mutex that is already locked, or to call async_mutex_leave()
180 ** to unlock a mutex that is not currently locked.
181 **
182 ** The async_cond_wait() and async_cond_signal() functions are modelled
183 ** on the pthreads functions with similar names. The first parameter to
184 ** both functions is always ASYNC_COND_QUEUE. When async_cond_wait()
185 ** is called the mutex identified by the second parameter must be held.
186 ** The mutex is unlocked, and the calling thread simultaneously begins
187 ** waiting for the condition variable to be signalled by another thread.
188 ** After another thread signals the condition variable, the calling
189 ** thread stops waiting, locks mutex eMutex and returns. The
190 ** async_cond_signal() function is used to signal the condition variable.
191 ** It is assumed that the mutex used by the thread calling async_cond_wait()
192 ** is held by the caller of async_cond_signal() (otherwise there would be
193 ** a race condition).
194 **
195 ** It is guaranteed that no other thread will call async_cond_wait() when
196 ** there is already a thread waiting on the condition variable.
197 **
198 ** The async_sched_yield() function is called to suggest to the operating
199 ** system that it would be a good time to shift the current thread off the
200 ** CPU. The system will still work if this function is not implemented
201 ** (it is not currently implemented for win32), but it might be marginally
202 ** more efficient if it is.
203 */
204 static void async_mutex_enter(int eMutex);
205 static void async_mutex_leave(int eMutex);
206 static void async_cond_wait(int eCond, int eMutex);
207 static void async_cond_signal(int eCond);
208 static void async_sched_yield(void);
209 
210 /*
211 ** There are also two definitions of the following. async_os_initialize()
212 ** is called when the asynchronous VFS is first installed, and os_shutdown()
213 ** is called when it is uninstalled (from within sqlite3async_shutdown()).
214 **
215 ** For pthreads builds, both of these functions are no-ops. For win32,
216 ** they provide an opportunity to initialize and finalize the required
217 ** mutex and condition variables.
218 **
219 ** If async_os_initialize() returns other than zero, then the initialization
220 ** fails and SQLITE_ERROR is returned to the user.
221 */
222 static int async_os_initialize(void);
223 static void async_os_shutdown(void);
224 
225 /* Values for use as the 'eMutex' argument of the above functions. The
226 ** integer values assigned to these constants are important for assert()
227 ** statements that verify that mutexes are locked in the correct order.
228 ** Specifically, it is unsafe to try to lock mutex N while holding a lock
229 ** on mutex M if (M<=N).
230 */
231 #define ASYNC_MUTEX_LOCK    0
232 #define ASYNC_MUTEX_QUEUE   1
233 #define ASYNC_MUTEX_WRITER  2
234 
235 /* Values for use as the 'eCond' argument of the above functions. */
236 #define ASYNC_COND_QUEUE    0
237 
238 /*************************************************************************
239 ** Start of OS specific code.
240 */
241 #if SQLITE_OS_WIN || defined(_WIN32) || defined(WIN32) || defined(__CYGWIN__) || defined(__MINGW32__) || defined(__BORLANDC__)
242 
243 /* The following block contains the win32 specific code. */
244 
245 #define mutex_held(X) (GetCurrentThreadId()==primitives.aHolder[X])
246 
247 static struct AsyncPrimitives {
248   int isInit;
249   DWORD aHolder[3];
250   CRITICAL_SECTION aMutex[3];
251   HANDLE aCond[1];
252 } primitives = { 0 };
253 
254 static int async_os_initialize(void){
255   if( !primitives.isInit ){
256     primitives.aCond[0] = CreateEvent(NULL, TRUE, FALSE, 0);
257     if( primitives.aCond[0]==NULL ){
258       return 1;
259     }
260     InitializeCriticalSection(&primitives.aMutex[0]);
261     InitializeCriticalSection(&primitives.aMutex[1]);
262     InitializeCriticalSection(&primitives.aMutex[2]);
263     primitives.isInit = 1;
264   }
265   return 0;
266 }
267 static void async_os_shutdown(void){
268   if( primitives.isInit ){
269     DeleteCriticalSection(&primitives.aMutex[0]);
270     DeleteCriticalSection(&primitives.aMutex[1]);
271     DeleteCriticalSection(&primitives.aMutex[2]);
272     CloseHandle(primitives.aCond[0]);
273     primitives.isInit = 0;
274   }
275 }
276 
277 /* The following block contains the Win32 specific code. */
278 static void async_mutex_enter(int eMutex){
279   assert( eMutex==0 || eMutex==1 || eMutex==2 );
280   assert( eMutex!=2 || (!mutex_held(0) && !mutex_held(1) && !mutex_held(2)) );
281   assert( eMutex!=1 || (!mutex_held(0) && !mutex_held(1)) );
282   assert( eMutex!=0 || (!mutex_held(0)) );
283   EnterCriticalSection(&primitives.aMutex[eMutex]);
284   TESTONLY( primitives.aHolder[eMutex] = GetCurrentThreadId(); )
285 }
286 static void async_mutex_leave(int eMutex){
287   assert( eMutex==0 || eMutex==1 || eMutex==2 );
288   assert( mutex_held(eMutex) );
289   TESTONLY( primitives.aHolder[eMutex] = 0; )
290   LeaveCriticalSection(&primitives.aMutex[eMutex]);
291 }
292 static void async_cond_wait(int eCond, int eMutex){
293   ResetEvent(primitives.aCond[eCond]);
294   async_mutex_leave(eMutex);
295   WaitForSingleObject(primitives.aCond[eCond], INFINITE);
296   async_mutex_enter(eMutex);
297 }
298 static void async_cond_signal(int eCond){
299   assert( mutex_held(ASYNC_MUTEX_QUEUE) );
300   SetEvent(primitives.aCond[eCond]);
301 }
302 static void async_sched_yield(void){
303   /* Todo: Find out if win32 offers anything like sched_yield() */
304 }
305 #else
306 
307 /* The following block contains the pthreads specific code. */
308 #include <pthread.h>
309 #include <sched.h>
310 
311 #define mutex_held(X) pthread_equal(primitives.aHolder[X], pthread_self())
312 
313 static int  async_os_initialize(void) {return 0;}
314 static void async_os_shutdown(void) {}
315 
316 static struct AsyncPrimitives {
317   pthread_mutex_t aMutex[3];
318   pthread_cond_t aCond[1];
319   pthread_t aHolder[3];
320 } primitives = {
321   { PTHREAD_MUTEX_INITIALIZER,
322     PTHREAD_MUTEX_INITIALIZER,
323     PTHREAD_MUTEX_INITIALIZER
324   } , {
325     PTHREAD_COND_INITIALIZER
326   } , { 0, 0, 0 }
327 };
328 
329 static void async_mutex_enter(int eMutex){
330   assert( eMutex==0 || eMutex==1 || eMutex==2 );
331   assert( eMutex!=2 || (!mutex_held(0) && !mutex_held(1) && !mutex_held(2)) );
332   assert( eMutex!=1 || (!mutex_held(0) && !mutex_held(1)) );
333   assert( eMutex!=0 || (!mutex_held(0)) );
334   pthread_mutex_lock(&primitives.aMutex[eMutex]);
335   TESTONLY( primitives.aHolder[eMutex] = pthread_self(); )
336 }
337 static void async_mutex_leave(int eMutex){
338   assert( eMutex==0 || eMutex==1 || eMutex==2 );
339   assert( mutex_held(eMutex) );
340   TESTONLY( primitives.aHolder[eMutex] = 0; )
341   pthread_mutex_unlock(&primitives.aMutex[eMutex]);
342 }
343 static void async_cond_wait(int eCond, int eMutex){
344   assert( eMutex==0 || eMutex==1 || eMutex==2 );
345   assert( mutex_held(eMutex) );
346   TESTONLY( primitives.aHolder[eMutex] = 0; )
347   pthread_cond_wait(&primitives.aCond[eCond], &primitives.aMutex[eMutex]);
348   TESTONLY( primitives.aHolder[eMutex] = pthread_self(); )
349 }
350 static void async_cond_signal(int eCond){
351   assert( mutex_held(ASYNC_MUTEX_QUEUE) );
352   pthread_cond_signal(&primitives.aCond[eCond]);
353 }
354 static void async_sched_yield(void){
355   sched_yield();
356 }
357 #endif
358 /*
359 ** End of OS specific code.
360 *************************************************************************/
361 
362 #define assert_mutex_is_held(X) assert( mutex_held(X) )
363 
364 
365 #ifndef SQLITE_ASYNC_TWO_FILEHANDLES
366 /* #define SQLITE_ASYNC_TWO_FILEHANDLES 0 */
367 #define SQLITE_ASYNC_TWO_FILEHANDLES 1
368 #endif
369 
370 /*
371 ** State information is held in the static variable "async" defined
372 ** as the following structure.
373 **
374 ** Both async.ioError and async.nFile are protected by async.queueMutex.
375 */
376 static struct TestAsyncStaticData {
377   AsyncWrite *pQueueFirst;     /* Next write operation to be processed */
378   AsyncWrite *pQueueLast;      /* Last write operation on the list */
379   AsyncLock *pLock;            /* Linked list of all AsyncLock structures */
380   volatile int ioDelay;        /* Extra delay between write operations */
381   volatile int eHalt;          /* One of the SQLITEASYNC_HALT_XXX values */
382   volatile int bLockFiles;     /* Current value of "lockfiles" parameter */
383   int ioError;                 /* True if an IO error has occurred */
384   int nFile;                   /* Number of open files (from sqlite pov) */
385 } async = { 0,0,0,0,0,1,0,0 };
386 
387 /* Possible values of AsyncWrite.op */
388 #define ASYNC_NOOP          0
389 #define ASYNC_WRITE         1
390 #define ASYNC_SYNC          2
391 #define ASYNC_TRUNCATE      3
392 #define ASYNC_CLOSE         4
393 #define ASYNC_DELETE        5
394 #define ASYNC_OPENEXCLUSIVE 6
395 #define ASYNC_UNLOCK        7
396 
397 /* Names of opcodes.  Used for debugging only.
398 ** Make sure these stay in sync with the macros above!
399 */
400 static const char *azOpcodeName[] = {
401   "NOOP", "WRITE", "SYNC", "TRUNCATE", "CLOSE", "DELETE", "OPENEX", "UNLOCK"
402 };
403 
404 /*
405 ** Entries on the write-op queue are instances of the AsyncWrite
406 ** structure, defined here.
407 **
408 ** The interpretation of the iOffset and nByte variables varies depending
409 ** on the value of AsyncWrite.op:
410 **
411 ** ASYNC_NOOP:
412 **     No values used.
413 **
414 ** ASYNC_WRITE:
415 **     iOffset -> Offset in file to write to.
416 **     nByte   -> Number of bytes of data to write (pointed to by zBuf).
417 **
418 ** ASYNC_SYNC:
419 **     nByte   -> flags to pass to sqlite3OsSync().
420 **
421 ** ASYNC_TRUNCATE:
422 **     iOffset -> Size to truncate file to.
423 **     nByte   -> Unused.
424 **
425 ** ASYNC_CLOSE:
426 **     iOffset -> Unused.
427 **     nByte   -> Unused.
428 **
429 ** ASYNC_DELETE:
430 **     iOffset -> Contains the "syncDir" flag.
431 **     nByte   -> Number of bytes of zBuf points to (file name).
432 **
433 ** ASYNC_OPENEXCLUSIVE:
434 **     iOffset -> Value of "delflag".
435 **     nByte   -> Number of bytes of zBuf points to (file name).
436 **
437 ** ASYNC_UNLOCK:
438 **     nByte   -> Argument to sqlite3OsUnlock().
439 **
440 **
441 ** For an ASYNC_WRITE operation, zBuf points to the data to write to the file.
442 ** This space is sqlite3_malloc()d along with the AsyncWrite structure in a
443 ** single blob, so is deleted when sqlite3_free() is called on the parent
444 ** structure.
445 */
446 struct AsyncWrite {
447   AsyncFileData *pFileData;    /* File to write data to or sync */
448   int op;                      /* One of ASYNC_xxx etc. */
449   sqlite_int64 iOffset;        /* See above */
450   int nByte;          /* See above */
451   char *zBuf;         /* Data to write to file (or NULL if op!=ASYNC_WRITE) */
452   AsyncWrite *pNext;  /* Next write operation (to any file) */
453 };
454 
455 /*
456 ** An instance of this structure is created for each distinct open file
457 ** (i.e. if two handles are opened on the one file, only one of these
458 ** structures is allocated) and stored in the async.aLock hash table. The
459 ** keys for async.aLock are the full pathnames of the opened files.
460 **
461 ** AsyncLock.pList points to the head of a linked list of AsyncFileLock
462 ** structures, one for each handle currently open on the file.
463 **
464 ** If the opened file is not a main-database (the SQLITE_OPEN_MAIN_DB is
465 ** not passed to the sqlite3OsOpen() call), or if async.bLockFiles is
466 ** false, variables AsyncLock.pFile and AsyncLock.eLock are never used.
467 ** Otherwise, pFile is a file handle opened on the file in question and
468 ** used to obtain the file-system locks required by database connections
469 ** within this process.
470 **
471 ** See comments above the asyncLock() function for more details on
472 ** the implementation of database locking used by this backend.
473 */
474 struct AsyncLock {
475   char *zFile;
476   int nFile;
477   sqlite3_file *pFile;
478   int eLock;
479   AsyncFileLock *pList;
480   AsyncLock *pNext;           /* Next in linked list headed by async.pLock */
481 };
482 
483 /*
484 ** An instance of the following structure is allocated along with each
485 ** AsyncFileData structure (see AsyncFileData.lock), but is only used if the
486 ** file was opened with the SQLITE_OPEN_MAIN_DB.
487 */
488 struct AsyncFileLock {
489   int eLock;                /* Internally visible lock state (sqlite pov) */
490   int eAsyncLock;           /* Lock-state with write-queue unlock */
491   AsyncFileLock *pNext;
492 };
493 
494 /*
495 ** The AsyncFile structure is a subclass of sqlite3_file used for
496 ** asynchronous IO.
497 **
498 ** All of the actual data for the structure is stored in the structure
499 ** pointed to by AsyncFile.pData, which is allocated as part of the
500 ** sqlite3OsOpen() using sqlite3_malloc(). The reason for this is that the
501 ** lifetime of the AsyncFile structure is ended by the caller after OsClose()
502 ** is called, but the data in AsyncFileData may be required by the
503 ** writer thread after that point.
504 */
505 struct AsyncFile {
506   sqlite3_io_methods *pMethod;
507   AsyncFileData *pData;
508 };
509 struct AsyncFileData {
510   char *zName;               /* Underlying OS filename - used for debugging */
511   int nName;                 /* Number of characters in zName */
512   sqlite3_file *pBaseRead;   /* Read handle to the underlying Os file */
513   sqlite3_file *pBaseWrite;  /* Write handle to the underlying Os file */
514   AsyncFileLock lock;        /* Lock state for this handle */
515   AsyncLock *pLock;          /* AsyncLock object for this file system entry */
516   AsyncWrite closeOp;        /* Preallocated close operation */
517 };
518 
519 /*
520 ** Add an entry to the end of the global write-op list. pWrite should point
521 ** to an AsyncWrite structure allocated using sqlite3_malloc().  The writer
522 ** thread will call sqlite3_free() to free the structure after the specified
523 ** operation has been completed.
524 **
525 ** Once an AsyncWrite structure has been added to the list, it becomes the
526 ** property of the writer thread and must not be read or modified by the
527 ** caller.
528 */
529 static void addAsyncWrite(AsyncWrite *pWrite){
530   /* We must hold the queue mutex in order to modify the queue pointers */
531   if( pWrite->op!=ASYNC_UNLOCK ){
532     async_mutex_enter(ASYNC_MUTEX_QUEUE);
533   }
534 
535   /* Add the record to the end of the write-op queue */
536   assert( !pWrite->pNext );
537   if( async.pQueueLast ){
538     assert( async.pQueueFirst );
539     async.pQueueLast->pNext = pWrite;
540   }else{
541     async.pQueueFirst = pWrite;
542   }
543   async.pQueueLast = pWrite;
544   ASYNC_TRACE(("PUSH %p (%s %s %d)\n", pWrite, azOpcodeName[pWrite->op],
545          pWrite->pFileData ? pWrite->pFileData->zName : "-", pWrite->iOffset));
546 
547   if( pWrite->op==ASYNC_CLOSE ){
548     async.nFile--;
549   }
550 
551   /* The writer thread might have been idle because there was nothing
552   ** on the write-op queue for it to do.  So wake it up. */
553   async_cond_signal(ASYNC_COND_QUEUE);
554 
555   /* Drop the queue mutex */
556   if( pWrite->op!=ASYNC_UNLOCK ){
557     async_mutex_leave(ASYNC_MUTEX_QUEUE);
558   }
559 }
560 
561 /*
562 ** Increment async.nFile in a thread-safe manner.
563 */
564 static void incrOpenFileCount(void){
565   /* We must hold the queue mutex in order to modify async.nFile */
566   async_mutex_enter(ASYNC_MUTEX_QUEUE);
567   if( async.nFile==0 ){
568     async.ioError = SQLITE_OK;
569   }
570   async.nFile++;
571   async_mutex_leave(ASYNC_MUTEX_QUEUE);
572 }
573 
574 /*
575 ** This is a utility function to allocate and populate a new AsyncWrite
576 ** structure and insert it (via addAsyncWrite() ) into the global list.
577 */
578 static int addNewAsyncWrite(
579   AsyncFileData *pFileData,
580   int op,
581   sqlite3_int64 iOffset,
582   int nByte,
583   const char *zByte
584 ){
585   AsyncWrite *p;
586   if( op!=ASYNC_CLOSE && async.ioError ){
587     return async.ioError;
588   }
589   p = sqlite3_malloc(sizeof(AsyncWrite) + (zByte?nByte:0));
590   if( !p ){
591     /* The upper layer does not expect operations like OsWrite() to
592     ** return SQLITE_NOMEM. This is partly because under normal conditions
593     ** SQLite is required to do rollback without calling malloc(). So
594     ** if malloc() fails here, treat it as an I/O error. The above
595     ** layer knows how to handle that.
596     */
597     return SQLITE_IOERR;
598   }
599   p->op = op;
600   p->iOffset = iOffset;
601   p->nByte = nByte;
602   p->pFileData = pFileData;
603   p->pNext = 0;
604   if( zByte ){
605     p->zBuf = (char *)&p[1];
606     memcpy(p->zBuf, zByte, nByte);
607   }else{
608     p->zBuf = 0;
609   }
610   addAsyncWrite(p);
611   return SQLITE_OK;
612 }
613 
614 /*
615 ** Close the file. This just adds an entry to the write-op list, the file is
616 ** not actually closed.
617 */
618 static int asyncClose(sqlite3_file *pFile){
619   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
620 
621   /* Unlock the file, if it is locked */
622   async_mutex_enter(ASYNC_MUTEX_LOCK);
623   p->lock.eLock = 0;
624   async_mutex_leave(ASYNC_MUTEX_LOCK);
625 
626   addAsyncWrite(&p->closeOp);
627   return SQLITE_OK;
628 }
629 
630 /*
631 ** Implementation of sqlite3OsWrite() for asynchronous files. Instead of
632 ** writing to the underlying file, this function adds an entry to the end of
633 ** the global AsyncWrite list. Either SQLITE_OK or SQLITE_NOMEM may be
634 ** returned.
635 */
636 static int asyncWrite(
637   sqlite3_file *pFile,
638   const void *pBuf,
639   int amt,
640   sqlite3_int64 iOff
641 ){
642   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
643   return addNewAsyncWrite(p, ASYNC_WRITE, iOff, amt, pBuf);
644 }
645 
646 /*
647 ** Read data from the file. First we read from the filesystem, then adjust
648 ** the contents of the buffer based on ASYNC_WRITE operations in the
649 ** write-op queue.
650 **
651 ** This method holds the mutex from start to finish.
652 */
653 static int asyncRead(
654   sqlite3_file *pFile,
655   void *zOut,
656   int iAmt,
657   sqlite3_int64 iOffset
658 ){
659   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
660   int rc = SQLITE_OK;
661   sqlite3_int64 filesize;
662   int nRead;
663   sqlite3_file *pBase = p->pBaseRead;
664 
665   /* Grab the write queue mutex for the duration of the call */
666   async_mutex_enter(ASYNC_MUTEX_QUEUE);
667 
668   /* If an I/O error has previously occurred in this virtual file
669   ** system, then all subsequent operations fail.
670   */
671   if( async.ioError!=SQLITE_OK ){
672     rc = async.ioError;
673     goto asyncread_out;
674   }
675 
676   if( pBase->pMethods ){
677     rc = pBase->pMethods->xFileSize(pBase, &filesize);
678     if( rc!=SQLITE_OK ){
679       goto asyncread_out;
680     }
681     nRead = MIN(filesize - iOffset, iAmt);
682     if( nRead>0 ){
683       rc = pBase->pMethods->xRead(pBase, zOut, nRead, iOffset);
684       ASYNC_TRACE(("READ %s %d bytes at %d\n", p->zName, nRead, iOffset));
685     }
686   }
687 
688   if( rc==SQLITE_OK ){
689     AsyncWrite *pWrite;
690     char *zName = p->zName;
691 
692     for(pWrite=async.pQueueFirst; pWrite; pWrite = pWrite->pNext){
693       if( pWrite->op==ASYNC_WRITE && (
694         (pWrite->pFileData==p) ||
695         (zName && pWrite->pFileData->zName==zName)
696       )){
697         int iBeginOut = (pWrite->iOffset-iOffset);
698         int iBeginIn = -iBeginOut;
699         int nCopy;
700 
701         if( iBeginIn<0 ) iBeginIn = 0;
702         if( iBeginOut<0 ) iBeginOut = 0;
703         nCopy = MIN(pWrite->nByte-iBeginIn, iAmt-iBeginOut);
704 
705         if( nCopy>0 ){
706           memcpy(&((char *)zOut)[iBeginOut], &pWrite->zBuf[iBeginIn], nCopy);
707           ASYNC_TRACE(("OVERREAD %d bytes at %d\n", nCopy, iBeginOut+iOffset));
708         }
709       }
710     }
711   }
712 
713 asyncread_out:
714   async_mutex_leave(ASYNC_MUTEX_QUEUE);
715   return rc;
716 }
717 
718 /*
719 ** Truncate the file to nByte bytes in length. This just adds an entry to
720 ** the write-op list, no IO actually takes place.
721 */
722 static int asyncTruncate(sqlite3_file *pFile, sqlite3_int64 nByte){
723   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
724   return addNewAsyncWrite(p, ASYNC_TRUNCATE, nByte, 0, 0);
725 }
726 
727 /*
728 ** Sync the file. This just adds an entry to the write-op list, the
729 ** sync() is done later by sqlite3_async_flush().
730 */
731 static int asyncSync(sqlite3_file *pFile, int flags){
732   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
733   return addNewAsyncWrite(p, ASYNC_SYNC, 0, flags, 0);
734 }
735 
736 /*
737 ** Read the size of the file. First we read the size of the file system
738 ** entry, then adjust for any ASYNC_WRITE or ASYNC_TRUNCATE operations
739 ** currently in the write-op list.
740 **
741 ** This method holds the mutex from start to finish.
742 */
743 int asyncFileSize(sqlite3_file *pFile, sqlite3_int64 *piSize){
744   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
745   int rc = SQLITE_OK;
746   sqlite3_int64 s = 0;
747   sqlite3_file *pBase;
748 
749   async_mutex_enter(ASYNC_MUTEX_QUEUE);
750 
751   /* Read the filesystem size from the base file. If pBaseRead is NULL, this
752   ** means the file hasn't been opened yet. In this case all relevant data
753   ** must be in the write-op queue anyway, so we can omit reading from the
754   ** file-system.
755   */
756   pBase = p->pBaseRead;
757   if( pBase->pMethods ){
758     rc = pBase->pMethods->xFileSize(pBase, &s);
759   }
760 
761   if( rc==SQLITE_OK ){
762     AsyncWrite *pWrite;
763     for(pWrite=async.pQueueFirst; pWrite; pWrite = pWrite->pNext){
764       if( pWrite->op==ASYNC_DELETE
765        && p->zName
766        && strcmp(p->zName, pWrite->zBuf)==0
767       ){
768         s = 0;
769       }else if( pWrite->pFileData && (
770           (pWrite->pFileData==p)
771        || (p->zName && pWrite->pFileData->zName==p->zName)
772       )){
773         switch( pWrite->op ){
774           case ASYNC_WRITE:
775             s = MAX(pWrite->iOffset + (sqlite3_int64)(pWrite->nByte), s);
776             break;
777           case ASYNC_TRUNCATE:
778             s = MIN(s, pWrite->iOffset);
779             break;
780         }
781       }
782     }
783     *piSize = s;
784   }
785   async_mutex_leave(ASYNC_MUTEX_QUEUE);
786   return rc;
787 }
788 
789 /*
790 ** Lock or unlock the actual file-system entry.
791 */
792 static int getFileLock(AsyncLock *pLock){
793   int rc = SQLITE_OK;
794   AsyncFileLock *pIter;
795   int eRequired = 0;
796 
797   if( pLock->pFile ){
798     for(pIter=pLock->pList; pIter; pIter=pIter->pNext){
799       assert(pIter->eAsyncLock>=pIter->eLock);
800       if( pIter->eAsyncLock>eRequired ){
801         eRequired = pIter->eAsyncLock;
802         assert(eRequired>=0 && eRequired<=SQLITE_LOCK_EXCLUSIVE);
803       }
804     }
805 
806     if( eRequired>pLock->eLock ){
807       rc = pLock->pFile->pMethods->xLock(pLock->pFile, eRequired);
808       if( rc==SQLITE_OK ){
809         pLock->eLock = eRequired;
810       }
811     }
812     else if( eRequired<pLock->eLock && eRequired<=SQLITE_LOCK_SHARED ){
813       rc = pLock->pFile->pMethods->xUnlock(pLock->pFile, eRequired);
814       if( rc==SQLITE_OK ){
815         pLock->eLock = eRequired;
816       }
817     }
818   }
819 
820   return rc;
821 }
822 
823 /*
824 ** Return the AsyncLock structure from the global async.pLock list
825 ** associated with the file-system entry identified by path zName
826 ** (a string of nName bytes). If no such structure exists, return 0.
827 */
828 static AsyncLock *findLock(const char *zName, int nName){
829   AsyncLock *p = async.pLock;
830   while( p && (p->nFile!=nName || memcmp(p->zFile, zName, nName)) ){
831     p = p->pNext;
832   }
833   return p;
834 }
835 
836 /*
837 ** The following two methods - asyncLock() and asyncUnlock() - are used
838 ** to obtain and release locks on database files opened with the
839 ** asynchronous backend.
840 */
841 static int asyncLock(sqlite3_file *pFile, int eLock){
842   int rc = SQLITE_OK;
843   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
844 
845   if( p->zName ){
846     async_mutex_enter(ASYNC_MUTEX_LOCK);
847     if( p->lock.eLock<eLock ){
848       AsyncLock *pLock = p->pLock;
849       AsyncFileLock *pIter;
850       assert(pLock && pLock->pList);
851       for(pIter=pLock->pList; pIter; pIter=pIter->pNext){
852         if( pIter!=&p->lock && (
853           (eLock==SQLITE_LOCK_EXCLUSIVE && pIter->eLock>=SQLITE_LOCK_SHARED) ||
854           (eLock==SQLITE_LOCK_PENDING && pIter->eLock>=SQLITE_LOCK_RESERVED) ||
855           (eLock==SQLITE_LOCK_RESERVED && pIter->eLock>=SQLITE_LOCK_RESERVED) ||
856           (eLock==SQLITE_LOCK_SHARED && pIter->eLock>=SQLITE_LOCK_PENDING)
857         )){
858           rc = SQLITE_BUSY;
859         }
860       }
861       if( rc==SQLITE_OK ){
862         p->lock.eLock = eLock;
863         p->lock.eAsyncLock = MAX(p->lock.eAsyncLock, eLock);
864       }
865       assert(p->lock.eAsyncLock>=p->lock.eLock);
866       if( rc==SQLITE_OK ){
867         rc = getFileLock(pLock);
868       }
869     }
870     async_mutex_leave(ASYNC_MUTEX_LOCK);
871   }
872 
873   ASYNC_TRACE(("LOCK %d (%s) rc=%d\n", eLock, p->zName, rc));
874   return rc;
875 }
876 static int asyncUnlock(sqlite3_file *pFile, int eLock){
877   int rc = SQLITE_OK;
878   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
879   if( p->zName ){
880     AsyncFileLock *pLock = &p->lock;
881     async_mutex_enter(ASYNC_MUTEX_QUEUE);
882     async_mutex_enter(ASYNC_MUTEX_LOCK);
883     pLock->eLock = MIN(pLock->eLock, eLock);
884     rc = addNewAsyncWrite(p, ASYNC_UNLOCK, 0, eLock, 0);
885     async_mutex_leave(ASYNC_MUTEX_LOCK);
886     async_mutex_leave(ASYNC_MUTEX_QUEUE);
887   }
888   return rc;
889 }
890 
891 /*
892 ** This function is called when the pager layer first opens a database file
893 ** and is checking for a hot-journal.
894 */
895 static int asyncCheckReservedLock(sqlite3_file *pFile, int *pResOut){
896   int ret = 0;
897   AsyncFileLock *pIter;
898   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
899 
900   async_mutex_enter(ASYNC_MUTEX_LOCK);
901   for(pIter=p->pLock->pList; pIter; pIter=pIter->pNext){
902     if( pIter->eLock>=SQLITE_LOCK_RESERVED ){
903       ret = 1;
904     }
905   }
906   async_mutex_leave(ASYNC_MUTEX_LOCK);
907 
908   ASYNC_TRACE(("CHECK-LOCK %d (%s)\n", ret, p->zName));
909   *pResOut = ret;
910   return SQLITE_OK;
911 }
912 
913 /*
914 ** sqlite3_file_control() implementation.
915 */
916 static int asyncFileControl(sqlite3_file *id, int op, void *pArg){
917   switch( op ){
918     case SQLITE_FCNTL_LOCKSTATE: {
919       async_mutex_enter(ASYNC_MUTEX_LOCK);
920       *(int*)pArg = ((AsyncFile*)id)->pData->lock.eLock;
921       async_mutex_leave(ASYNC_MUTEX_LOCK);
922       return SQLITE_OK;
923     }
924   }
925   return SQLITE_ERROR;
926 }
927 
928 /*
929 ** Return the device characteristics and sector-size of the device. It
930 ** is not tricky to implement these correctly, as this backend might
931 ** not have an open file handle at this point.
932 */
933 static int asyncSectorSize(sqlite3_file *pFile){
934   return 512;
935 }
936 static int asyncDeviceCharacteristics(sqlite3_file *pFile){
937   return 0;
938 }
939 
940 static int unlinkAsyncFile(AsyncFileData *pData){
941   AsyncFileLock **ppIter;
942   int rc = SQLITE_OK;
943 
944   if( pData->zName ){
945     AsyncLock *pLock = pData->pLock;
946     for(ppIter=&pLock->pList; *ppIter; ppIter=&((*ppIter)->pNext)){
947       if( (*ppIter)==&pData->lock ){
948         *ppIter = pData->lock.pNext;
949         break;
950       }
951     }
952     if( !pLock->pList ){
953       AsyncLock **pp;
954       if( pLock->pFile ){
955         pLock->pFile->pMethods->xClose(pLock->pFile);
956       }
957       for(pp=&async.pLock; *pp!=pLock; pp=&((*pp)->pNext));
958       *pp = pLock->pNext;
959       sqlite3_free(pLock);
960     }else{
961       rc = getFileLock(pLock);
962     }
963   }
964 
965   return rc;
966 }
967 
968 /*
969 ** The parameter passed to this function is a copy of a 'flags' parameter
970 ** passed to this modules xOpen() method. This function returns true
971 ** if the file should be opened asynchronously, or false if it should
972 ** be opened immediately.
973 **
974 ** If the file is to be opened asynchronously, then asyncOpen() will add
975 ** an entry to the event queue and the file will not actually be opened
976 ** until the event is processed. Otherwise, the file is opened directly
977 ** by the caller.
978 */
979 static int doAsynchronousOpen(int flags){
980   return (flags&SQLITE_OPEN_CREATE) && (
981       (flags&SQLITE_OPEN_MAIN_JOURNAL) ||
982       (flags&SQLITE_OPEN_TEMP_JOURNAL) ||
983       (flags&SQLITE_OPEN_DELETEONCLOSE)
984   );
985 }
986 
987 /*
988 ** Open a file.
989 */
990 static int asyncOpen(
991   sqlite3_vfs *pAsyncVfs,
992   const char *zName,
993   sqlite3_file *pFile,
994   int flags,
995   int *pOutFlags
996 ){
997   static sqlite3_io_methods async_methods = {
998     1,                               /* iVersion */
999     asyncClose,                      /* xClose */
1000     asyncRead,                       /* xRead */
1001     asyncWrite,                      /* xWrite */
1002     asyncTruncate,                   /* xTruncate */
1003     asyncSync,                       /* xSync */
1004     asyncFileSize,                   /* xFileSize */
1005     asyncLock,                       /* xLock */
1006     asyncUnlock,                     /* xUnlock */
1007     asyncCheckReservedLock,          /* xCheckReservedLock */
1008     asyncFileControl,                /* xFileControl */
1009     asyncSectorSize,                 /* xSectorSize */
1010     asyncDeviceCharacteristics       /* xDeviceCharacteristics */
1011   };
1012 
1013   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1014   AsyncFile *p = (AsyncFile *)pFile;
1015   int nName = 0;
1016   int rc = SQLITE_OK;
1017   int nByte;
1018   AsyncFileData *pData;
1019   AsyncLock *pLock = 0;
1020   char *z;
1021   int isAsyncOpen = doAsynchronousOpen(flags);
1022 
1023   /* If zName is NULL, then the upper layer is requesting an anonymous file */
1024   if( zName ){
1025     nName = strlen(zName)+1;
1026   }
1027 
1028   nByte = (
1029     sizeof(AsyncFileData) +        /* AsyncFileData structure */
1030     2 * pVfs->szOsFile +           /* AsyncFileData.pBaseRead and pBaseWrite */
1031     nName                          /* AsyncFileData.zName */
1032   );
1033   z = sqlite3_malloc(nByte);
1034   if( !z ){
1035     return SQLITE_NOMEM;
1036   }
1037   memset(z, 0, nByte);
1038   pData = (AsyncFileData*)z;
1039   z += sizeof(pData[0]);
1040   pData->pBaseRead = (sqlite3_file*)z;
1041   z += pVfs->szOsFile;
1042   pData->pBaseWrite = (sqlite3_file*)z;
1043   pData->closeOp.pFileData = pData;
1044   pData->closeOp.op = ASYNC_CLOSE;
1045 
1046   if( zName ){
1047     z += pVfs->szOsFile;
1048     pData->zName = z;
1049     pData->nName = nName;
1050     memcpy(pData->zName, zName, nName);
1051   }
1052 
1053   if( !isAsyncOpen ){
1054     int flagsout;
1055     rc = pVfs->xOpen(pVfs, pData->zName, pData->pBaseRead, flags, &flagsout);
1056     if( rc==SQLITE_OK && (flagsout&SQLITE_OPEN_READWRITE) ){
1057       rc = pVfs->xOpen(pVfs, pData->zName, pData->pBaseWrite, flags, 0);
1058     }
1059     if( pOutFlags ){
1060       *pOutFlags = flagsout;
1061     }
1062   }
1063 
1064   async_mutex_enter(ASYNC_MUTEX_LOCK);
1065 
1066   if( zName && rc==SQLITE_OK ){
1067     pLock = findLock(pData->zName, pData->nName);
1068     if( !pLock ){
1069       int nByte = pVfs->szOsFile + sizeof(AsyncLock) + pData->nName + 1;
1070       pLock = (AsyncLock *)sqlite3_malloc(nByte);
1071       if( pLock ){
1072         memset(pLock, 0, nByte);
1073         if( async.bLockFiles && (flags&SQLITE_OPEN_MAIN_DB) ){
1074           pLock->pFile = (sqlite3_file *)&pLock[1];
1075           rc = pVfs->xOpen(pVfs, pData->zName, pLock->pFile, flags, 0);
1076           if( rc!=SQLITE_OK ){
1077             sqlite3_free(pLock);
1078             pLock = 0;
1079           }
1080         }
1081         if( pLock ){
1082           pLock->nFile = pData->nName;
1083           pLock->zFile = &((char *)(&pLock[1]))[pVfs->szOsFile];
1084           memcpy(pLock->zFile, pData->zName, pLock->nFile);
1085           pLock->pNext = async.pLock;
1086           async.pLock = pLock;
1087         }
1088       }else{
1089         rc = SQLITE_NOMEM;
1090       }
1091     }
1092   }
1093 
1094   if( rc==SQLITE_OK ){
1095     p->pMethod = &async_methods;
1096     p->pData = pData;
1097 
1098     /* Link AsyncFileData.lock into the linked list of
1099     ** AsyncFileLock structures for this file.
1100     */
1101     if( zName ){
1102       pData->lock.pNext = pLock->pList;
1103       pLock->pList = &pData->lock;
1104       pData->zName = pLock->zFile;
1105     }
1106   }else{
1107     if( pData->pBaseRead->pMethods ){
1108       pData->pBaseRead->pMethods->xClose(pData->pBaseRead);
1109     }
1110     if( pData->pBaseWrite->pMethods ){
1111       pData->pBaseWrite->pMethods->xClose(pData->pBaseWrite);
1112     }
1113     sqlite3_free(pData);
1114   }
1115 
1116   async_mutex_leave(ASYNC_MUTEX_LOCK);
1117 
1118   if( rc==SQLITE_OK ){
1119     incrOpenFileCount();
1120     pData->pLock = pLock;
1121   }
1122 
1123   if( rc==SQLITE_OK && isAsyncOpen ){
1124     rc = addNewAsyncWrite(pData, ASYNC_OPENEXCLUSIVE, (sqlite3_int64)flags,0,0);
1125     if( rc==SQLITE_OK ){
1126       if( pOutFlags ) *pOutFlags = flags;
1127     }else{
1128       async_mutex_enter(ASYNC_MUTEX_LOCK);
1129       unlinkAsyncFile(pData);
1130       async_mutex_leave(ASYNC_MUTEX_LOCK);
1131       sqlite3_free(pData);
1132     }
1133   }
1134   if( rc!=SQLITE_OK ){
1135     p->pMethod = 0;
1136   }
1137   return rc;
1138 }
1139 
1140 /*
1141 ** Implementation of sqlite3OsDelete. Add an entry to the end of the
1142 ** write-op queue to perform the delete.
1143 */
1144 static int asyncDelete(sqlite3_vfs *pAsyncVfs, const char *z, int syncDir){
1145   return addNewAsyncWrite(0, ASYNC_DELETE, syncDir, strlen(z)+1, z);
1146 }
1147 
1148 /*
1149 ** Implementation of sqlite3OsAccess. This method holds the mutex from
1150 ** start to finish.
1151 */
1152 static int asyncAccess(
1153   sqlite3_vfs *pAsyncVfs,
1154   const char *zName,
1155   int flags,
1156   int *pResOut
1157 ){
1158   int rc;
1159   int ret;
1160   AsyncWrite *p;
1161   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1162 
1163   assert(flags==SQLITE_ACCESS_READWRITE
1164       || flags==SQLITE_ACCESS_READ
1165       || flags==SQLITE_ACCESS_EXISTS
1166   );
1167 
1168   async_mutex_enter(ASYNC_MUTEX_QUEUE);
1169   rc = pVfs->xAccess(pVfs, zName, flags, &ret);
1170   if( rc==SQLITE_OK && flags==SQLITE_ACCESS_EXISTS ){
1171     for(p=async.pQueueFirst; p; p = p->pNext){
1172       if( p->op==ASYNC_DELETE && 0==strcmp(p->zBuf, zName) ){
1173         ret = 0;
1174       }else if( p->op==ASYNC_OPENEXCLUSIVE
1175              && p->pFileData->zName
1176              && 0==strcmp(p->pFileData->zName, zName)
1177       ){
1178         ret = 1;
1179       }
1180     }
1181   }
1182   ASYNC_TRACE(("ACCESS(%s): %s = %d\n",
1183     flags==SQLITE_ACCESS_READWRITE?"read-write":
1184     flags==SQLITE_ACCESS_READ?"read":"exists"
1185     , zName, ret)
1186   );
1187   async_mutex_leave(ASYNC_MUTEX_QUEUE);
1188   *pResOut = ret;
1189   return rc;
1190 }
1191 
1192 /*
1193 ** Fill in zPathOut with the full path to the file identified by zPath.
1194 */
1195 static int asyncFullPathname(
1196   sqlite3_vfs *pAsyncVfs,
1197   const char *zPath,
1198   int nPathOut,
1199   char *zPathOut
1200 ){
1201   int rc;
1202   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1203   rc = pVfs->xFullPathname(pVfs, zPath, nPathOut, zPathOut);
1204 
1205   /* Because of the way intra-process file locking works, this backend
1206   ** needs to return a canonical path. The following block assumes the
1207   ** file-system uses unix style paths.
1208   */
1209   if( rc==SQLITE_OK ){
1210     int i, j;
1211     int n = nPathOut;
1212     char *z = zPathOut;
1213     while( n>1 && z[n-1]=='/' ){ n--; }
1214     for(i=j=0; i<n; i++){
1215       if( z[i]=='/' ){
1216         if( z[i+1]=='/' ) continue;
1217         if( z[i+1]=='.' && i+2<n && z[i+2]=='/' ){
1218           i += 1;
1219           continue;
1220         }
1221         if( z[i+1]=='.' && i+3<n && z[i+2]=='.' && z[i+3]=='/' ){
1222           while( j>0 && z[j-1]!='/' ){ j--; }
1223           if( j>0 ){ j--; }
1224           i += 2;
1225           continue;
1226         }
1227       }
1228       z[j++] = z[i];
1229     }
1230     z[j] = 0;
1231   }
1232 
1233   return rc;
1234 }
1235 static void *asyncDlOpen(sqlite3_vfs *pAsyncVfs, const char *zPath){
1236   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1237   return pVfs->xDlOpen(pVfs, zPath);
1238 }
1239 static void asyncDlError(sqlite3_vfs *pAsyncVfs, int nByte, char *zErrMsg){
1240   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1241   pVfs->xDlError(pVfs, nByte, zErrMsg);
1242 }
1243 static void (*asyncDlSym(
1244   sqlite3_vfs *pAsyncVfs,
1245   void *pHandle,
1246   const char *zSymbol
1247 ))(void){
1248   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1249   return pVfs->xDlSym(pVfs, pHandle, zSymbol);
1250 }
1251 static void asyncDlClose(sqlite3_vfs *pAsyncVfs, void *pHandle){
1252   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1253   pVfs->xDlClose(pVfs, pHandle);
1254 }
1255 static int asyncRandomness(sqlite3_vfs *pAsyncVfs, int nByte, char *zBufOut){
1256   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1257   return pVfs->xRandomness(pVfs, nByte, zBufOut);
1258 }
1259 static int asyncSleep(sqlite3_vfs *pAsyncVfs, int nMicro){
1260   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1261   return pVfs->xSleep(pVfs, nMicro);
1262 }
1263 static int asyncCurrentTime(sqlite3_vfs *pAsyncVfs, double *pTimeOut){
1264   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1265   return pVfs->xCurrentTime(pVfs, pTimeOut);
1266 }
1267 
1268 static sqlite3_vfs async_vfs = {
1269   1,                    /* iVersion */
1270   sizeof(AsyncFile),    /* szOsFile */
1271   0,                    /* mxPathname */
1272   0,                    /* pNext */
1273   SQLITEASYNC_VFSNAME,  /* zName */
1274   0,                    /* pAppData */
1275   asyncOpen,            /* xOpen */
1276   asyncDelete,          /* xDelete */
1277   asyncAccess,          /* xAccess */
1278   asyncFullPathname,    /* xFullPathname */
1279   asyncDlOpen,          /* xDlOpen */
1280   asyncDlError,         /* xDlError */
1281   asyncDlSym,           /* xDlSym */
1282   asyncDlClose,         /* xDlClose */
1283   asyncRandomness,      /* xDlError */
1284   asyncSleep,           /* xDlSym */
1285   asyncCurrentTime      /* xDlClose */
1286 };
1287 
1288 /*
1289 ** This procedure runs in a separate thread, reading messages off of the
1290 ** write queue and processing them one by one.
1291 **
1292 ** If async.writerHaltNow is true, then this procedure exits
1293 ** after processing a single message.
1294 **
1295 ** If async.writerHaltWhenIdle is true, then this procedure exits when
1296 ** the write queue is empty.
1297 **
1298 ** If both of the above variables are false, this procedure runs
1299 ** indefinately, waiting for operations to be added to the write queue
1300 ** and processing them in the order in which they arrive.
1301 **
1302 ** An artifical delay of async.ioDelay milliseconds is inserted before
1303 ** each write operation in order to simulate the effect of a slow disk.
1304 **
1305 ** Only one instance of this procedure may be running at a time.
1306 */
1307 static void asyncWriterThread(void){
1308   sqlite3_vfs *pVfs = (sqlite3_vfs *)(async_vfs.pAppData);
1309   AsyncWrite *p = 0;
1310   int rc = SQLITE_OK;
1311   int holdingMutex = 0;
1312 
1313   async_mutex_enter(ASYNC_MUTEX_WRITER);
1314 
1315   while( async.eHalt!=SQLITEASYNC_HALT_NOW ){
1316     int doNotFree = 0;
1317     sqlite3_file *pBase = 0;
1318 
1319     if( !holdingMutex ){
1320       async_mutex_enter(ASYNC_MUTEX_QUEUE);
1321     }
1322     while( (p = async.pQueueFirst)==0 ){
1323       if( async.eHalt!=SQLITEASYNC_HALT_NEVER ){
1324         async_mutex_leave(ASYNC_MUTEX_QUEUE);
1325         break;
1326       }else{
1327         ASYNC_TRACE(("IDLE\n"));
1328         async_cond_wait(ASYNC_COND_QUEUE, ASYNC_MUTEX_QUEUE);
1329         ASYNC_TRACE(("WAKEUP\n"));
1330       }
1331     }
1332     if( p==0 ) break;
1333     holdingMutex = 1;
1334 
1335     /* Right now this thread is holding the mutex on the write-op queue.
1336     ** Variable 'p' points to the first entry in the write-op queue. In
1337     ** the general case, we hold on to the mutex for the entire body of
1338     ** the loop.
1339     **
1340     ** However in the cases enumerated below, we relinquish the mutex,
1341     ** perform the IO, and then re-request the mutex before removing 'p' from
1342     ** the head of the write-op queue. The idea is to increase concurrency with
1343     ** sqlite threads.
1344     **
1345     **     * An ASYNC_CLOSE operation.
1346     **     * An ASYNC_OPENEXCLUSIVE operation. For this one, we relinquish
1347     **       the mutex, call the underlying xOpenExclusive() function, then
1348     **       re-aquire the mutex before seting the AsyncFile.pBaseRead
1349     **       variable.
1350     **     * ASYNC_SYNC and ASYNC_WRITE operations, if
1351     **       SQLITE_ASYNC_TWO_FILEHANDLES was set at compile time and two
1352     **       file-handles are open for the particular file being "synced".
1353     */
1354     if( async.ioError!=SQLITE_OK && p->op!=ASYNC_CLOSE ){
1355       p->op = ASYNC_NOOP;
1356     }
1357     if( p->pFileData ){
1358       pBase = p->pFileData->pBaseWrite;
1359       if(
1360         p->op==ASYNC_CLOSE ||
1361         p->op==ASYNC_OPENEXCLUSIVE ||
1362         (pBase->pMethods && (p->op==ASYNC_SYNC || p->op==ASYNC_WRITE) )
1363       ){
1364         async_mutex_leave(ASYNC_MUTEX_QUEUE);
1365         holdingMutex = 0;
1366       }
1367       if( !pBase->pMethods ){
1368         pBase = p->pFileData->pBaseRead;
1369       }
1370     }
1371 
1372     switch( p->op ){
1373       case ASYNC_NOOP:
1374         break;
1375 
1376       case ASYNC_WRITE:
1377         assert( pBase );
1378         ASYNC_TRACE(("WRITE %s %d bytes at %d\n",
1379                 p->pFileData->zName, p->nByte, p->iOffset));
1380         rc = pBase->pMethods->xWrite(pBase, (void *)(p->zBuf), p->nByte, p->iOffset);
1381         break;
1382 
1383       case ASYNC_SYNC:
1384         assert( pBase );
1385         ASYNC_TRACE(("SYNC %s\n", p->pFileData->zName));
1386         rc = pBase->pMethods->xSync(pBase, p->nByte);
1387         break;
1388 
1389       case ASYNC_TRUNCATE:
1390         assert( pBase );
1391         ASYNC_TRACE(("TRUNCATE %s to %d bytes\n",
1392                 p->pFileData->zName, p->iOffset));
1393         rc = pBase->pMethods->xTruncate(pBase, p->iOffset);
1394         break;
1395 
1396       case ASYNC_CLOSE: {
1397         AsyncFileData *pData = p->pFileData;
1398         ASYNC_TRACE(("CLOSE %s\n", p->pFileData->zName));
1399         if( pData->pBaseWrite->pMethods ){
1400           pData->pBaseWrite->pMethods->xClose(pData->pBaseWrite);
1401         }
1402         if( pData->pBaseRead->pMethods ){
1403           pData->pBaseRead->pMethods->xClose(pData->pBaseRead);
1404         }
1405 
1406         /* Unlink AsyncFileData.lock from the linked list of AsyncFileLock
1407         ** structures for this file. Obtain the async.lockMutex mutex
1408         ** before doing so.
1409         */
1410         async_mutex_enter(ASYNC_MUTEX_LOCK);
1411         rc = unlinkAsyncFile(pData);
1412         async_mutex_leave(ASYNC_MUTEX_LOCK);
1413 
1414         if( !holdingMutex ){
1415           async_mutex_enter(ASYNC_MUTEX_QUEUE);
1416           holdingMutex = 1;
1417         }
1418         assert_mutex_is_held(ASYNC_MUTEX_QUEUE);
1419         async.pQueueFirst = p->pNext;
1420         sqlite3_free(pData);
1421         doNotFree = 1;
1422         break;
1423       }
1424 
1425       case ASYNC_UNLOCK: {
1426         AsyncWrite *pIter;
1427         AsyncFileData *pData = p->pFileData;
1428         int eLock = p->nByte;
1429 
1430         /* When a file is locked by SQLite using the async backend, it is
1431         ** locked within the 'real' file-system synchronously. When it is
1432         ** unlocked, an ASYNC_UNLOCK event is added to the write-queue to
1433         ** unlock the file asynchronously. The design of the async backend
1434         ** requires that the 'real' file-system file be locked from the
1435         ** time that SQLite first locks it (and probably reads from it)
1436         ** until all asynchronous write events that were scheduled before
1437         ** SQLite unlocked the file have been processed.
1438         **
1439         ** This is more complex if SQLite locks and unlocks the file multiple
1440         ** times in quick succession. For example, if SQLite does:
1441         **
1442         **   lock, write, unlock, lock, write, unlock
1443         **
1444         ** Each "lock" operation locks the file immediately. Each "write"
1445         ** and "unlock" operation adds an event to the event queue. If the
1446         ** second "lock" operation is performed before the first "unlock"
1447         ** operation has been processed asynchronously, then the first
1448         ** "unlock" cannot be safely processed as is, since this would mean
1449         ** the file was unlocked when the second "write" operation is
1450         ** processed. To work around this, when processing an ASYNC_UNLOCK
1451         ** operation, SQLite:
1452         **
1453         **   1) Unlocks the file to the minimum of the argument passed to
1454         **      the xUnlock() call and the current lock from SQLite's point
1455         **      of view, and
1456         **
1457         **   2) Only unlocks the file at all if this event is the last
1458         **      ASYNC_UNLOCK event on this file in the write-queue.
1459         */
1460         assert( holdingMutex==1 );
1461         assert( async.pQueueFirst==p );
1462         for(pIter=async.pQueueFirst->pNext; pIter; pIter=pIter->pNext){
1463           if( pIter->pFileData==pData && pIter->op==ASYNC_UNLOCK ) break;
1464         }
1465         if( !pIter ){
1466           async_mutex_enter(ASYNC_MUTEX_LOCK);
1467           pData->lock.eAsyncLock = MIN(
1468               pData->lock.eAsyncLock, MAX(pData->lock.eLock, eLock)
1469           );
1470           assert(pData->lock.eAsyncLock>=pData->lock.eLock);
1471           rc = getFileLock(pData->pLock);
1472           async_mutex_leave(ASYNC_MUTEX_LOCK);
1473         }
1474         break;
1475       }
1476 
1477       case ASYNC_DELETE:
1478         ASYNC_TRACE(("DELETE %s\n", p->zBuf));
1479         rc = pVfs->xDelete(pVfs, p->zBuf, (int)p->iOffset);
1480         break;
1481 
1482       case ASYNC_OPENEXCLUSIVE: {
1483         int flags = (int)p->iOffset;
1484         AsyncFileData *pData = p->pFileData;
1485         ASYNC_TRACE(("OPEN %s flags=%d\n", p->zBuf, (int)p->iOffset));
1486         assert(pData->pBaseRead->pMethods==0 && pData->pBaseWrite->pMethods==0);
1487         rc = pVfs->xOpen(pVfs, pData->zName, pData->pBaseRead, flags, 0);
1488         assert( holdingMutex==0 );
1489         async_mutex_enter(ASYNC_MUTEX_QUEUE);
1490         holdingMutex = 1;
1491         break;
1492       }
1493 
1494       default: assert(!"Illegal value for AsyncWrite.op");
1495     }
1496 
1497     /* If we didn't hang on to the mutex during the IO op, obtain it now
1498     ** so that the AsyncWrite structure can be safely removed from the
1499     ** global write-op queue.
1500     */
1501     if( !holdingMutex ){
1502       async_mutex_enter(ASYNC_MUTEX_QUEUE);
1503       holdingMutex = 1;
1504     }
1505     /* ASYNC_TRACE(("UNLINK %p\n", p)); */
1506     if( p==async.pQueueLast ){
1507       async.pQueueLast = 0;
1508     }
1509     if( !doNotFree ){
1510       assert_mutex_is_held(ASYNC_MUTEX_QUEUE);
1511       async.pQueueFirst = p->pNext;
1512       sqlite3_free(p);
1513     }
1514     assert( holdingMutex );
1515 
1516     /* An IO error has occurred. We cannot report the error back to the
1517     ** connection that requested the I/O since the error happened
1518     ** asynchronously.  The connection has already moved on.  There
1519     ** really is nobody to report the error to.
1520     **
1521     ** The file for which the error occurred may have been a database or
1522     ** journal file. Regardless, none of the currently queued operations
1523     ** associated with the same database should now be performed. Nor should
1524     ** any subsequently requested IO on either a database or journal file
1525     ** handle for the same database be accepted until the main database
1526     ** file handle has been closed and reopened.
1527     **
1528     ** Furthermore, no further IO should be queued or performed on any file
1529     ** handle associated with a database that may have been part of a
1530     ** multi-file transaction that included the database associated with
1531     ** the IO error (i.e. a database ATTACHed to the same handle at some
1532     ** point in time).
1533     */
1534     if( rc!=SQLITE_OK ){
1535       async.ioError = rc;
1536     }
1537 
1538     if( async.ioError && !async.pQueueFirst ){
1539       async_mutex_enter(ASYNC_MUTEX_LOCK);
1540       if( 0==async.pLock ){
1541         async.ioError = SQLITE_OK;
1542       }
1543       async_mutex_leave(ASYNC_MUTEX_LOCK);
1544     }
1545 
1546     /* Drop the queue mutex before continuing to the next write operation
1547     ** in order to give other threads a chance to work with the write queue.
1548     */
1549     if( !async.pQueueFirst || !async.ioError ){
1550       async_mutex_leave(ASYNC_MUTEX_QUEUE);
1551       holdingMutex = 0;
1552       if( async.ioDelay>0 ){
1553         pVfs->xSleep(pVfs, async.ioDelay*1000);
1554       }else{
1555         async_sched_yield();
1556       }
1557     }
1558   }
1559 
1560   async_mutex_leave(ASYNC_MUTEX_WRITER);
1561   return;
1562 }
1563 
1564 /*
1565 ** Install the asynchronous VFS.
1566 */
1567 int sqlite3async_initialize(const char *zParent, int isDefault){
1568   int rc = SQLITE_OK;
1569   if( async_vfs.pAppData==0 ){
1570     sqlite3_vfs *pParent = sqlite3_vfs_find(zParent);
1571     if( !pParent || async_os_initialize() ){
1572       rc = SQLITE_ERROR;
1573     }else if( SQLITE_OK!=(rc = sqlite3_vfs_register(&async_vfs, isDefault)) ){
1574       async_os_shutdown();
1575     }else{
1576       async_vfs.pAppData = (void *)pParent;
1577       async_vfs.mxPathname = ((sqlite3_vfs *)async_vfs.pAppData)->mxPathname;
1578     }
1579   }
1580   return rc;
1581 }
1582 
1583 /*
1584 ** Uninstall the asynchronous VFS.
1585 */
1586 void sqlite3async_shutdown(void){
1587   if( async_vfs.pAppData ){
1588     async_os_shutdown();
1589     sqlite3_vfs_unregister((sqlite3_vfs *)&async_vfs);
1590     async_vfs.pAppData = 0;
1591   }
1592 }
1593 
1594 /*
1595 ** Process events on the write-queue.
1596 */
1597 void sqlite3async_run(void){
1598   asyncWriterThread();
1599 }
1600 
1601 /*
1602 ** Control/configure the asynchronous IO system.
1603 */
1604 int sqlite3async_control(int op, ...){
1605   va_list ap;
1606   va_start(ap, op);
1607   switch( op ){
1608     case SQLITEASYNC_HALT: {
1609       int eWhen = va_arg(ap, int);
1610       if( eWhen!=SQLITEASYNC_HALT_NEVER
1611        && eWhen!=SQLITEASYNC_HALT_NOW
1612        && eWhen!=SQLITEASYNC_HALT_IDLE
1613       ){
1614         return SQLITE_MISUSE;
1615       }
1616       async.eHalt = eWhen;
1617       async_mutex_enter(ASYNC_MUTEX_QUEUE);
1618       async_cond_signal(ASYNC_COND_QUEUE);
1619       async_mutex_leave(ASYNC_MUTEX_QUEUE);
1620       break;
1621     }
1622 
1623     case SQLITEASYNC_DELAY: {
1624       int iDelay = va_arg(ap, int);
1625       if( iDelay<0 ){
1626         return SQLITE_MISUSE;
1627       }
1628       async.ioDelay = iDelay;
1629       break;
1630     }
1631 
1632     case SQLITEASYNC_LOCKFILES: {
1633       int bLock = va_arg(ap, int);
1634       async_mutex_enter(ASYNC_MUTEX_QUEUE);
1635       if( async.nFile || async.pQueueFirst ){
1636         async_mutex_leave(ASYNC_MUTEX_QUEUE);
1637         return SQLITE_MISUSE;
1638       }
1639       async.bLockFiles = bLock;
1640       async_mutex_leave(ASYNC_MUTEX_QUEUE);
1641       break;
1642     }
1643 
1644     case SQLITEASYNC_GET_HALT: {
1645       int *peWhen = va_arg(ap, int *);
1646       *peWhen = async.eHalt;
1647       break;
1648     }
1649     case SQLITEASYNC_GET_DELAY: {
1650       int *piDelay = va_arg(ap, int *);
1651       *piDelay = async.ioDelay;
1652       break;
1653     }
1654     case SQLITEASYNC_GET_LOCKFILES: {
1655       int *piDelay = va_arg(ap, int *);
1656       *piDelay = async.bLockFiles;
1657       break;
1658     }
1659 
1660     default:
1661       return SQLITE_ERROR;
1662   }
1663   return SQLITE_OK;
1664 }
1665 
1666 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ASYNCIO) */
1667 
1668