xref: /sqlite-3.40.0/src/test_async.c (revision 961303c1)
1 /*
2 ** 2005 December 14
3 **
4 ** The author disclaims copyright to this source code.  In place of
5 ** a legal notice, here is a blessing:
6 **
7 **    May you do good and not evil.
8 **    May you find forgiveness for yourself and forgive others.
9 **    May you share freely, never taking more than you give.
10 **
11 *************************************************************************
12 **
13 ** This file contains an example implementation of an asynchronous IO
14 ** backend for SQLite.
15 **
16 ** WHAT IS ASYNCHRONOUS I/O?
17 **
18 ** With asynchronous I/O, write requests are handled by a separate thread
19 ** running in the background.  This means that the thread that initiates
20 ** a database write does not have to wait for (sometimes slow) disk I/O
21 ** to occur.  The write seems to happen very quickly, though in reality
22 ** it is happening at its usual slow pace in the background.
23 **
24 ** Asynchronous I/O appears to give better responsiveness, but at a price.
25 ** You lose the Durable property.  With the default I/O backend of SQLite,
26 ** once a write completes, you know that the information you wrote is
27 ** safely on disk.  With the asynchronous I/O, this is not the case.  If
28 ** your program crashes or if a power lose occurs after the database
29 ** write but before the asynchronous write thread has completed, then the
30 ** database change might never make it to disk and the next user of the
31 ** database might not see your change.
32 **
33 ** You lose Durability with asynchronous I/O, but you still retain the
34 ** other parts of ACID:  Atomic,  Consistent, and Isolated.  Many
35 ** appliations get along fine without the Durablity.
36 **
37 ** HOW IT WORKS
38 **
39 ** Asynchronous I/O works by creating a special SQLite "vfs" structure
40 ** and registering it with sqlite3_vfs_register(). When files opened via
41 ** this vfs are written to (using sqlite3OsWrite()), the data is not
42 ** written directly to disk, but is placed in the "write-queue" to be
43 ** handled by the background thread.
44 **
45 ** When files opened with the asynchronous vfs are read from
46 ** (using sqlite3OsRead()), the data is read from the file on
47 ** disk and the write-queue, so that from the point of view of
48 ** the vfs reader the OsWrite() appears to have already completed.
49 **
50 ** The special vfs is registered (and unregistered) by calls to
51 ** function asyncEnable() (see below).
52 **
53 ** LIMITATIONS
54 **
55 ** This demonstration code is deliberately kept simple in order to keep
56 ** the main ideas clear and easy to understand.  Real applications that
57 ** want to do asynchronous I/O might want to add additional capabilities.
58 ** For example, in this demonstration if writes are happening at a steady
59 ** stream that exceeds the I/O capability of the background writer thread,
60 ** the queue of pending write operations will grow without bound until we
61 ** run out of memory.  Users of this technique may want to keep track of
62 ** the quantity of pending writes and stop accepting new write requests
63 ** when the buffer gets to be too big.
64 **
65 ** LOCKING + CONCURRENCY
66 **
67 ** Multiple connections from within a single process that use this
68 ** implementation of asynchronous IO may access a single database
69 ** file concurrently. From the point of view of the user, if all
70 ** connections are from within a single process, there is no difference
71 ** between the concurrency offered by "normal" SQLite and SQLite
72 ** using the asynchronous backend.
73 **
74 ** If connections from within multiple database files may access the
75 ** database file, the ENABLE_FILE_LOCKING symbol (see below) must be
76 ** defined. If it is not defined, then no locks are established on
77 ** the database file. In this case, if multiple processes access
78 ** the database file, corruption will quickly result.
79 **
80 ** If ENABLE_FILE_LOCKING is defined (the default), then connections
81 ** from within multiple processes may access a single database file
82 ** without risking corruption. However concurrency is reduced as
83 ** follows:
84 **
85 **   * When a connection using asynchronous IO begins a database
86 **     transaction, the database is locked immediately. However the
87 **     lock is not released until after all relevant operations
88 **     in the write-queue have been flushed to disk. This means
89 **     (for example) that the database may remain locked for some
90 **     time after a "COMMIT" or "ROLLBACK" is issued.
91 **
92 **   * If an application using asynchronous IO executes transactions
93 **     in quick succession, other database users may be effectively
94 **     locked out of the database. This is because when a BEGIN
95 **     is executed, a database lock is established immediately. But
96 **     when the corresponding COMMIT or ROLLBACK occurs, the lock
97 **     is not released until the relevant part of the write-queue
98 **     has been flushed through. As a result, if a COMMIT is followed
99 **     by a BEGIN before the write-queue is flushed through, the database
100 **     is never unlocked,preventing other processes from accessing
101 **     the database.
102 **
103 ** Defining ENABLE_FILE_LOCKING when using an NFS or other remote
104 ** file-system may slow things down, as synchronous round-trips to the
105 ** server may be required to establish database file locks.
106 */
107 #define ENABLE_FILE_LOCKING
108 
109 #include "sqliteInt.h"
110 #include <tcl.h>
111 
112 /*
113 ** This test uses pthreads and hence only works on unix and with
114 ** a threadsafe build of SQLite.
115 */
116 #if OS_UNIX && SQLITE_THREADSAFE
117 
118 /*
119 ** This demo uses pthreads.  If you do not have a pthreads implementation
120 ** for your operating system, you will need to recode the threading
121 ** logic.
122 */
123 #include <pthread.h>
124 #include <sched.h>
125 
126 /* Useful macros used in several places */
127 #define MIN(x,y) ((x)<(y)?(x):(y))
128 #define MAX(x,y) ((x)>(y)?(x):(y))
129 
130 /* Forward references */
131 typedef struct AsyncWrite AsyncWrite;
132 typedef struct AsyncFile AsyncFile;
133 typedef struct AsyncFileData AsyncFileData;
134 typedef struct AsyncFileLock AsyncFileLock;
135 typedef struct AsyncLock AsyncLock;
136 
137 /* Enable for debugging */
138 static int sqlite3async_trace = 0;
139 # define ASYNC_TRACE(X) if( sqlite3async_trace ) asyncTrace X
140 static void asyncTrace(const char *zFormat, ...){
141   char *z;
142   va_list ap;
143   va_start(ap, zFormat);
144   z = sqlite3_vmprintf(zFormat, ap);
145   va_end(ap);
146   fprintf(stderr, "[%d] %s", (int)pthread_self(), z);
147   sqlite3_free(z);
148 }
149 
150 /*
151 ** THREAD SAFETY NOTES
152 **
153 ** Basic rules:
154 **
155 **     * Both read and write access to the global write-op queue must be
156 **       protected by the async.queueMutex. As are the async.ioError and
157 **       async.nFile variables.
158 **
159 **     * The async.aLock hash-table and all AsyncLock and AsyncFileLock
160 **       structures must be protected by teh async.lockMutex mutex.
161 **
162 **     * The file handles from the underlying system are assumed not to
163 **       be thread safe.
164 **
165 **     * See the last two paragraphs under "The Writer Thread" for
166 **       an assumption to do with file-handle synchronization by the Os.
167 **
168 ** Deadlock prevention:
169 **
170 **     There are three mutex used by the system: the "writer" mutex,
171 **     the "queue" mutex and the "lock" mutex. Rules are:
172 **
173 **     * It is illegal to block on the writer mutex when any other mutex
174 **       are held, and
175 **
176 **     * It is illegal to block on the queue mutex when the lock mutex
177 **       is held.
178 **
179 **     i.e. mutex's must be grabbed in the order "writer", "queue", "lock".
180 **
181 ** File system operations (invoked by SQLite thread):
182 **
183 **     xOpen
184 **     xDelete
185 **     xFileExists
186 **
187 ** File handle operations (invoked by SQLite thread):
188 **
189 **         asyncWrite, asyncClose, asyncTruncate, asyncSync
190 **
191 **     The operations above add an entry to the global write-op list. They
192 **     prepare the entry, acquire the async.queueMutex momentarily while
193 **     list pointers are  manipulated to insert the new entry, then release
194 **     the mutex and signal the writer thread to wake up in case it happens
195 **     to be asleep.
196 **
197 **
198 **         asyncRead, asyncFileSize.
199 **
200 **     Read operations. Both of these read from both the underlying file
201 **     first then adjust their result based on pending writes in the
202 **     write-op queue.   So async.queueMutex is held for the duration
203 **     of these operations to prevent other threads from changing the
204 **     queue in mid operation.
205 **
206 **
207 **         asyncLock, asyncUnlock, asyncCheckReservedLock
208 **
209 **     These primitives implement in-process locking using a hash table
210 **     on the file name.  Files are locked correctly for connections coming
211 **     from the same process.  But other processes cannot see these locks
212 **     and will therefore not honor them.
213 **
214 **
215 ** The writer thread:
216 **
217 **     The async.writerMutex is used to make sure only there is only
218 **     a single writer thread running at a time.
219 **
220 **     Inside the writer thread is a loop that works like this:
221 **
222 **         WHILE (write-op list is not empty)
223 **             Do IO operation at head of write-op list
224 **             Remove entry from head of write-op list
225 **         END WHILE
226 **
227 **     The async.queueMutex is always held during the <write-op list is
228 **     not empty> test, and when the entry is removed from the head
229 **     of the write-op list. Sometimes it is held for the interim
230 **     period (while the IO is performed), and sometimes it is
231 **     relinquished. It is relinquished if (a) the IO op is an
232 **     ASYNC_CLOSE or (b) when the file handle was opened, two of
233 **     the underlying systems handles were opened on the same
234 **     file-system entry.
235 **
236 **     If condition (b) above is true, then one file-handle
237 **     (AsyncFile.pBaseRead) is used exclusively by sqlite threads to read the
238 **     file, the other (AsyncFile.pBaseWrite) by sqlite3_async_flush()
239 **     threads to perform write() operations. This means that read
240 **     operations are not blocked by asynchronous writes (although
241 **     asynchronous writes may still be blocked by reads).
242 **
243 **     This assumes that the OS keeps two handles open on the same file
244 **     properly in sync. That is, any read operation that starts after a
245 **     write operation on the same file system entry has completed returns
246 **     data consistent with the write. We also assume that if one thread
247 **     reads a file while another is writing it all bytes other than the
248 **     ones actually being written contain valid data.
249 **
250 **     If the above assumptions are not true, set the preprocessor symbol
251 **     SQLITE_ASYNC_TWO_FILEHANDLES to 0.
252 */
253 
254 #ifndef SQLITE_ASYNC_TWO_FILEHANDLES
255 /* #define SQLITE_ASYNC_TWO_FILEHANDLES 0 */
256 #define SQLITE_ASYNC_TWO_FILEHANDLES 1
257 #endif
258 
259 /*
260 ** State information is held in the static variable "async" defined
261 ** as the following structure.
262 **
263 ** Both async.ioError and async.nFile are protected by async.queueMutex.
264 */
265 static struct TestAsyncStaticData {
266   pthread_mutex_t queueMutex;  /* Mutex for access to write operation queue */
267   pthread_mutex_t writerMutex; /* Prevents multiple writer threads */
268   pthread_mutex_t lockMutex;   /* For access to aLock hash table */
269   pthread_cond_t queueSignal;  /* For waking up sleeping writer thread */
270   pthread_cond_t emptySignal;  /* Notify when the write queue is empty */
271   AsyncWrite *pQueueFirst;     /* Next write operation to be processed */
272   AsyncWrite *pQueueLast;      /* Last write operation on the list */
273   Hash aLock;                  /* Files locked */
274   volatile int ioDelay;             /* Extra delay between write operations */
275   volatile int writerHaltWhenIdle;  /* Writer thread halts when queue empty */
276   volatile int writerHaltNow;       /* Writer thread halts after next op */
277   int ioError;                 /* True if an IO error has occured */
278   int nFile;                   /* Number of open files (from sqlite pov) */
279 } async = {
280   PTHREAD_MUTEX_INITIALIZER,
281   PTHREAD_MUTEX_INITIALIZER,
282   PTHREAD_MUTEX_INITIALIZER,
283   PTHREAD_COND_INITIALIZER,
284   PTHREAD_COND_INITIALIZER,
285 };
286 
287 /* Possible values of AsyncWrite.op */
288 #define ASYNC_NOOP          0
289 #define ASYNC_WRITE         1
290 #define ASYNC_SYNC          2
291 #define ASYNC_TRUNCATE      3
292 #define ASYNC_CLOSE         4
293 #define ASYNC_DELETE        5
294 #define ASYNC_OPENEXCLUSIVE 6
295 #define ASYNC_UNLOCK        7
296 
297 /* Names of opcodes.  Used for debugging only.
298 ** Make sure these stay in sync with the macros above!
299 */
300 static const char *azOpcodeName[] = {
301   "NOOP", "WRITE", "SYNC", "TRUNCATE", "CLOSE", "DELETE", "OPENEX", "UNLOCK"
302 };
303 
304 /*
305 ** Entries on the write-op queue are instances of the AsyncWrite
306 ** structure, defined here.
307 **
308 ** The interpretation of the iOffset and nByte variables varies depending
309 ** on the value of AsyncWrite.op:
310 **
311 ** ASYNC_NOOP:
312 **     No values used.
313 **
314 ** ASYNC_WRITE:
315 **     iOffset -> Offset in file to write to.
316 **     nByte   -> Number of bytes of data to write (pointed to by zBuf).
317 **
318 ** ASYNC_SYNC:
319 **     nByte   -> flags to pass to sqlite3OsSync().
320 **
321 ** ASYNC_TRUNCATE:
322 **     iOffset -> Size to truncate file to.
323 **     nByte   -> Unused.
324 **
325 ** ASYNC_CLOSE:
326 **     iOffset -> Unused.
327 **     nByte   -> Unused.
328 **
329 ** ASYNC_DELETE:
330 **     iOffset -> Contains the "syncDir" flag.
331 **     nByte   -> Number of bytes of zBuf points to (file name).
332 **
333 ** ASYNC_OPENEXCLUSIVE:
334 **     iOffset -> Value of "delflag".
335 **     nByte   -> Number of bytes of zBuf points to (file name).
336 **
337 ** ASYNC_UNLOCK:
338 **     nByte   -> Argument to sqlite3OsUnlock().
339 **
340 **
341 ** For an ASYNC_WRITE operation, zBuf points to the data to write to the file.
342 ** This space is sqlite3_malloc()d along with the AsyncWrite structure in a
343 ** single blob, so is deleted when sqlite3_free() is called on the parent
344 ** structure.
345 */
346 struct AsyncWrite {
347   AsyncFileData *pFileData;    /* File to write data to or sync */
348   int op;                      /* One of ASYNC_xxx etc. */
349   i64 iOffset;        /* See above */
350   int nByte;          /* See above */
351   char *zBuf;         /* Data to write to file (or NULL if op!=ASYNC_WRITE) */
352   AsyncWrite *pNext;  /* Next write operation (to any file) */
353 };
354 
355 /*
356 ** An instance of this structure is created for each distinct open file
357 ** (i.e. if two handles are opened on the one file, only one of these
358 ** structures is allocated) and stored in the async.aLock hash table. The
359 ** keys for async.aLock are the full pathnames of the opened files.
360 **
361 ** AsyncLock.pList points to the head of a linked list of AsyncFileLock
362 ** structures, one for each handle currently open on the file.
363 **
364 ** If the opened file is not a main-database (the SQLITE_OPEN_MAIN_DB is
365 ** not passed to the sqlite3OsOpen() call), or if ENABLE_FILE_LOCKING is
366 ** not defined at compile time, variables AsyncLock.pFile and
367 ** AsyncLock.eLock are never used. Otherwise, pFile is a file handle
368 ** opened on the file in question and used to obtain the file-system
369 ** locks required by database connections within this process.
370 **
371 ** See comments above the asyncLock() function for more details on
372 ** the implementation of database locking used by this backend.
373 */
374 struct AsyncLock {
375   sqlite3_file *pFile;
376   int eLock;
377   AsyncFileLock *pList;
378 };
379 
380 /*
381 ** An instance of the following structure is allocated along with each
382 ** AsyncFileData structure (see AsyncFileData.lock), but is only used if the
383 ** file was opened with the SQLITE_OPEN_MAIN_DB.
384 */
385 struct AsyncFileLock {
386   int eLock;                /* Internally visible lock state (sqlite pov) */
387   int eAsyncLock;           /* Lock-state with write-queue unlock */
388   AsyncFileLock *pNext;
389 };
390 
391 /*
392 ** The AsyncFile structure is a subclass of sqlite3_file used for
393 ** asynchronous IO.
394 **
395 ** All of the actual data for the structure is stored in the structure
396 ** pointed to by AsyncFile.pData, which is allocated as part of the
397 ** sqlite3OsOpen() using sqlite3_malloc(). The reason for this is that the
398 ** lifetime of the AsyncFile structure is ended by the caller after OsClose()
399 ** is called, but the data in AsyncFileData may be required by the
400 ** writer thread after that point.
401 */
402 struct AsyncFile {
403   sqlite3_io_methods *pMethod;
404   AsyncFileData *pData;
405 };
406 struct AsyncFileData {
407   char *zName;               /* Underlying OS filename - used for debugging */
408   int nName;                 /* Number of characters in zName */
409   sqlite3_file *pBaseRead;   /* Read handle to the underlying Os file */
410   sqlite3_file *pBaseWrite;  /* Write handle to the underlying Os file */
411   AsyncFileLock lock;
412   AsyncWrite close;
413 };
414 
415 /*
416 ** Add an entry to the end of the global write-op list. pWrite should point
417 ** to an AsyncWrite structure allocated using sqlite3_malloc().  The writer
418 ** thread will call sqlite3_free() to free the structure after the specified
419 ** operation has been completed.
420 **
421 ** Once an AsyncWrite structure has been added to the list, it becomes the
422 ** property of the writer thread and must not be read or modified by the
423 ** caller.
424 */
425 static void addAsyncWrite(AsyncWrite *pWrite){
426   /* We must hold the queue mutex in order to modify the queue pointers */
427   pthread_mutex_lock(&async.queueMutex);
428 
429   /* Add the record to the end of the write-op queue */
430   assert( !pWrite->pNext );
431   if( async.pQueueLast ){
432     assert( async.pQueueFirst );
433     async.pQueueLast->pNext = pWrite;
434   }else{
435     async.pQueueFirst = pWrite;
436   }
437   async.pQueueLast = pWrite;
438   ASYNC_TRACE(("PUSH %p (%s %s %d)\n", pWrite, azOpcodeName[pWrite->op],
439          pWrite->pFileData ? pWrite->pFileData->zName : "-", pWrite->iOffset));
440 
441   if( pWrite->op==ASYNC_CLOSE ){
442     async.nFile--;
443   }
444 
445   /* Drop the queue mutex */
446   pthread_mutex_unlock(&async.queueMutex);
447 
448   /* The writer thread might have been idle because there was nothing
449   ** on the write-op queue for it to do.  So wake it up. */
450   pthread_cond_signal(&async.queueSignal);
451 }
452 
453 /*
454 ** Increment async.nFile in a thread-safe manner.
455 */
456 static void incrOpenFileCount(){
457   /* We must hold the queue mutex in order to modify async.nFile */
458   pthread_mutex_lock(&async.queueMutex);
459   if( async.nFile==0 ){
460     async.ioError = SQLITE_OK;
461   }
462   async.nFile++;
463   pthread_mutex_unlock(&async.queueMutex);
464 }
465 
466 /*
467 ** This is a utility function to allocate and populate a new AsyncWrite
468 ** structure and insert it (via addAsyncWrite() ) into the global list.
469 */
470 static int addNewAsyncWrite(
471   AsyncFileData *pFileData,
472   int op,
473   i64 iOffset,
474   int nByte,
475   const char *zByte
476 ){
477   AsyncWrite *p;
478   if( op!=ASYNC_CLOSE && async.ioError ){
479     return async.ioError;
480   }
481   p = sqlite3_malloc(sizeof(AsyncWrite) + (zByte?nByte:0));
482   if( !p ){
483     /* The upper layer does not expect operations like OsWrite() to
484     ** return SQLITE_NOMEM. This is partly because under normal conditions
485     ** SQLite is required to do rollback without calling malloc(). So
486     ** if malloc() fails here, treat it as an I/O error. The above
487     ** layer knows how to handle that.
488     */
489     return SQLITE_IOERR;
490   }
491   p->op = op;
492   p->iOffset = iOffset;
493   p->nByte = nByte;
494   p->pFileData = pFileData;
495   p->pNext = 0;
496   if( zByte ){
497     p->zBuf = (char *)&p[1];
498     memcpy(p->zBuf, zByte, nByte);
499   }else{
500     p->zBuf = 0;
501   }
502   addAsyncWrite(p);
503   return SQLITE_OK;
504 }
505 
506 /*
507 ** Close the file. This just adds an entry to the write-op list, the file is
508 ** not actually closed.
509 */
510 static int asyncClose(sqlite3_file *pFile){
511   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
512 
513   /* Unlock the file, if it is locked */
514   pthread_mutex_lock(&async.lockMutex);
515   p->lock.eLock = 0;
516   pthread_mutex_unlock(&async.lockMutex);
517 
518   addAsyncWrite(&p->close);
519   return SQLITE_OK;
520 }
521 
522 /*
523 ** Implementation of sqlite3OsWrite() for asynchronous files. Instead of
524 ** writing to the underlying file, this function adds an entry to the end of
525 ** the global AsyncWrite list. Either SQLITE_OK or SQLITE_NOMEM may be
526 ** returned.
527 */
528 static int asyncWrite(sqlite3_file *pFile, const void *pBuf, int amt, i64 iOff){
529   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
530   return addNewAsyncWrite(p, ASYNC_WRITE, iOff, amt, pBuf);
531 }
532 
533 /*
534 ** Read data from the file. First we read from the filesystem, then adjust
535 ** the contents of the buffer based on ASYNC_WRITE operations in the
536 ** write-op queue.
537 **
538 ** This method holds the mutex from start to finish.
539 */
540 static int asyncRead(sqlite3_file *pFile, void *zOut, int iAmt, i64 iOffset){
541   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
542   int rc = SQLITE_OK;
543   i64 filesize;
544   int nRead;
545   sqlite3_file *pBase = p->pBaseRead;
546 
547   /* Grab the write queue mutex for the duration of the call */
548   pthread_mutex_lock(&async.queueMutex);
549 
550   /* If an I/O error has previously occurred in this virtual file
551   ** system, then all subsequent operations fail.
552   */
553   if( async.ioError!=SQLITE_OK ){
554     rc = async.ioError;
555     goto asyncread_out;
556   }
557 
558   if( pBase->pMethods ){
559     rc = sqlite3OsFileSize(pBase, &filesize);
560     if( rc!=SQLITE_OK ){
561       goto asyncread_out;
562     }
563     nRead = MIN(filesize - iOffset, iAmt);
564     if( nRead>0 ){
565       rc = sqlite3OsRead(pBase, zOut, nRead, iOffset);
566       ASYNC_TRACE(("READ %s %d bytes at %d\n", p->zName, nRead, iOffset));
567     }
568   }
569 
570   if( rc==SQLITE_OK ){
571     AsyncWrite *pWrite;
572     char *zName = p->zName;
573 
574     for(pWrite=async.pQueueFirst; pWrite; pWrite = pWrite->pNext){
575       if( pWrite->op==ASYNC_WRITE && pWrite->pFileData->zName==zName ){
576         int iBeginOut = (pWrite->iOffset-iOffset);
577         int iBeginIn = -iBeginOut;
578         int nCopy;
579 
580         if( iBeginIn<0 ) iBeginIn = 0;
581         if( iBeginOut<0 ) iBeginOut = 0;
582         nCopy = MIN(pWrite->nByte-iBeginIn, iAmt-iBeginOut);
583 
584         if( nCopy>0 ){
585           memcpy(&((char *)zOut)[iBeginOut], &pWrite->zBuf[iBeginIn], nCopy);
586           ASYNC_TRACE(("OVERREAD %d bytes at %d\n", nCopy, iBeginOut+iOffset));
587         }
588       }
589     }
590   }
591 
592 asyncread_out:
593   pthread_mutex_unlock(&async.queueMutex);
594   return rc;
595 }
596 
597 /*
598 ** Truncate the file to nByte bytes in length. This just adds an entry to
599 ** the write-op list, no IO actually takes place.
600 */
601 static int asyncTruncate(sqlite3_file *pFile, i64 nByte){
602   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
603   return addNewAsyncWrite(p, ASYNC_TRUNCATE, nByte, 0, 0);
604 }
605 
606 /*
607 ** Sync the file. This just adds an entry to the write-op list, the
608 ** sync() is done later by sqlite3_async_flush().
609 */
610 static int asyncSync(sqlite3_file *pFile, int flags){
611   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
612   return addNewAsyncWrite(p, ASYNC_SYNC, 0, flags, 0);
613 }
614 
615 /*
616 ** Read the size of the file. First we read the size of the file system
617 ** entry, then adjust for any ASYNC_WRITE or ASYNC_TRUNCATE operations
618 ** currently in the write-op list.
619 **
620 ** This method holds the mutex from start to finish.
621 */
622 int asyncFileSize(sqlite3_file *pFile, i64 *piSize){
623   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
624   int rc = SQLITE_OK;
625   i64 s = 0;
626   sqlite3_file *pBase;
627 
628   pthread_mutex_lock(&async.queueMutex);
629 
630   /* Read the filesystem size from the base file. If pBaseRead is NULL, this
631   ** means the file hasn't been opened yet. In this case all relevant data
632   ** must be in the write-op queue anyway, so we can omit reading from the
633   ** file-system.
634   */
635   pBase = p->pBaseRead;
636   if( pBase->pMethods ){
637     rc = sqlite3OsFileSize(pBase, &s);
638   }
639 
640   if( rc==SQLITE_OK ){
641     AsyncWrite *pWrite;
642     for(pWrite=async.pQueueFirst; pWrite; pWrite = pWrite->pNext){
643       if( pWrite->op==ASYNC_DELETE && strcmp(p->zName, pWrite->zBuf)==0 ){
644         s = 0;
645       }else if( pWrite->pFileData && pWrite->pFileData->zName==p->zName){
646         switch( pWrite->op ){
647           case ASYNC_WRITE:
648             s = MAX(pWrite->iOffset + (i64)(pWrite->nByte), s);
649             break;
650           case ASYNC_TRUNCATE:
651             s = MIN(s, pWrite->iOffset);
652             break;
653         }
654       }
655     }
656     *piSize = s;
657   }
658   pthread_mutex_unlock(&async.queueMutex);
659   return rc;
660 }
661 
662 /*
663 ** Lock or unlock the actual file-system entry.
664 */
665 static int getFileLock(AsyncLock *pLock){
666   int rc = SQLITE_OK;
667   AsyncFileLock *pIter;
668   int eRequired = 0;
669 
670   if( pLock->pFile ){
671     for(pIter=pLock->pList; pIter; pIter=pIter->pNext){
672       assert(pIter->eAsyncLock>=pIter->eLock);
673       if( pIter->eAsyncLock>eRequired ){
674         eRequired = pIter->eAsyncLock;
675         assert(eRequired>=0 && eRequired<=SQLITE_LOCK_EXCLUSIVE);
676       }
677     }
678 
679     if( eRequired>pLock->eLock ){
680       rc = sqlite3OsLock(pLock->pFile, eRequired);
681       if( rc==SQLITE_OK ){
682         pLock->eLock = eRequired;
683       }
684     }
685     else if( eRequired<pLock->eLock && eRequired<=SQLITE_LOCK_SHARED ){
686       rc = sqlite3OsUnlock(pLock->pFile, eRequired);
687       if( rc==SQLITE_OK ){
688         pLock->eLock = eRequired;
689       }
690     }
691   }
692 
693   return rc;
694 }
695 
696 /*
697 ** The following two methods - asyncLock() and asyncUnlock() - are used
698 ** to obtain and release locks on database files opened with the
699 ** asynchronous backend.
700 */
701 static int asyncLock(sqlite3_file *pFile, int eLock){
702   int rc = SQLITE_OK;
703   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
704 
705   pthread_mutex_lock(&async.lockMutex);
706   if( p->lock.eLock<eLock ){
707     AsyncLock *pLock;
708     AsyncFileLock *pIter;
709     pLock = (AsyncLock *)sqlite3HashFind(&async.aLock, p->zName, p->nName);
710     assert(pLock && pLock->pList);
711     for(pIter=pLock->pList; pIter; pIter=pIter->pNext){
712       if( pIter!=&p->lock && (
713         (eLock==SQLITE_LOCK_EXCLUSIVE && pIter->eLock>=SQLITE_LOCK_SHARED) ||
714         (eLock==SQLITE_LOCK_PENDING && pIter->eLock>=SQLITE_LOCK_RESERVED) ||
715         (eLock==SQLITE_LOCK_RESERVED && pIter->eLock>=SQLITE_LOCK_RESERVED) ||
716         (eLock==SQLITE_LOCK_SHARED && pIter->eLock>=SQLITE_LOCK_PENDING)
717       )){
718         rc = SQLITE_BUSY;
719       }
720     }
721     if( rc==SQLITE_OK ){
722       p->lock.eLock = eLock;
723       p->lock.eAsyncLock = MAX(p->lock.eAsyncLock, eLock);
724     }
725     assert(p->lock.eAsyncLock>=p->lock.eLock);
726     if( rc==SQLITE_OK ){
727       rc = getFileLock(pLock);
728     }
729   }
730   pthread_mutex_unlock(&async.lockMutex);
731 
732   ASYNC_TRACE(("LOCK %d (%s) rc=%d\n", eLock, p->zName, rc));
733   return rc;
734 }
735 static int asyncUnlock(sqlite3_file *pFile, int eLock){
736   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
737   AsyncFileLock *pLock = &p->lock;
738   pthread_mutex_lock(&async.lockMutex);
739   pLock->eLock = MIN(pLock->eLock, eLock);
740   pthread_mutex_unlock(&async.lockMutex);
741   return addNewAsyncWrite(p, ASYNC_UNLOCK, 0, eLock, 0);
742 }
743 
744 /*
745 ** This function is called when the pager layer first opens a database file
746 ** and is checking for a hot-journal.
747 */
748 static int asyncCheckReservedLock(sqlite3_file *pFile){
749   int ret = 0;
750   AsyncFileLock *pIter;
751   AsyncLock *pLock;
752   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
753 
754   pthread_mutex_lock(&async.lockMutex);
755   pLock = (AsyncLock *)sqlite3HashFind(&async.aLock, p->zName, p->nName);
756   for(pIter=pLock->pList; pIter; pIter=pIter->pNext){
757     if( pIter->eLock>=SQLITE_LOCK_RESERVED ){
758       ret = 1;
759     }
760   }
761   pthread_mutex_unlock(&async.lockMutex);
762 
763   ASYNC_TRACE(("CHECK-LOCK %d (%s)\n", ret, p->zName));
764   return ret;
765 }
766 
767 /*
768 ** This is a no-op, as the asynchronous backend does not support locking.
769 */
770 static int asyncFileControl(sqlite3_file *id, int op, void *pArg){
771   switch( op ){
772     case SQLITE_FCNTL_LOCKSTATE: {
773       pthread_mutex_lock(&async.lockMutex);
774       *(int*)pArg = ((AsyncFile*)id)->pData->lock.eLock;
775       pthread_mutex_unlock(&async.lockMutex);
776       return SQLITE_OK;
777     }
778   }
779   return SQLITE_ERROR;
780 }
781 
782 /*
783 ** Return the device characteristics and sector-size of the device. It
784 ** is not tricky to implement these correctly, as this backend might
785 ** not have an open file handle at this point.
786 */
787 static int asyncSectorSize(sqlite3_file *pFile){
788   return 512;
789 }
790 static int asyncDeviceCharacteristics(sqlite3_file *pFile){
791   return 0;
792 }
793 
794 static int unlinkAsyncFile(AsyncFileData *pData){
795   AsyncLock *pLock;
796   AsyncFileLock **ppIter;
797   int rc = SQLITE_OK;
798 
799   pLock = sqlite3HashFind(&async.aLock, pData->zName, pData->nName);
800   for(ppIter=&pLock->pList; *ppIter; ppIter=&((*ppIter)->pNext)){
801     if( (*ppIter)==&pData->lock ){
802       *ppIter = pData->lock.pNext;
803       break;
804     }
805   }
806   if( !pLock->pList ){
807     if( pLock->pFile ){
808       sqlite3OsClose(pLock->pFile);
809     }
810     sqlite3_free(pLock);
811     sqlite3HashInsert(&async.aLock, pData->zName, pData->nName, 0);
812     if( !sqliteHashFirst(&async.aLock) ){
813       sqlite3HashClear(&async.aLock);
814     }
815   }else{
816     rc = getFileLock(pLock);
817   }
818 
819   return rc;
820 }
821 
822 /*
823 ** Open a file.
824 */
825 static int asyncOpen(
826   sqlite3_vfs *pAsyncVfs,
827   const char *zName,
828   sqlite3_file *pFile,
829   int flags,
830   int *pOutFlags
831 ){
832   static sqlite3_io_methods async_methods = {
833     1,                               /* iVersion */
834     asyncClose,                      /* xClose */
835     asyncRead,                       /* xRead */
836     asyncWrite,                      /* xWrite */
837     asyncTruncate,                   /* xTruncate */
838     asyncSync,                       /* xSync */
839     asyncFileSize,                   /* xFileSize */
840     asyncLock,                       /* xLock */
841     asyncUnlock,                     /* xUnlock */
842     asyncCheckReservedLock,          /* xCheckReservedLock */
843     asyncFileControl,                /* xFileControl */
844     asyncSectorSize,                 /* xSectorSize */
845     asyncDeviceCharacteristics       /* xDeviceCharacteristics */
846   };
847 
848   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
849   AsyncFile *p = (AsyncFile *)pFile;
850   int nName = strlen(zName)+1;
851   int rc = SQLITE_OK;
852   int nByte;
853   AsyncFileData *pData;
854   AsyncLock *pLock = 0;
855   int isExclusive = (flags&SQLITE_OPEN_EXCLUSIVE);
856 
857   nByte = (
858     sizeof(AsyncFileData) +        /* AsyncFileData structure */
859     2 * pVfs->szOsFile +           /* AsyncFileData.pBaseRead and pBaseWrite */
860     nName                          /* AsyncFileData.zName */
861   );
862   pData = sqlite3_malloc(nByte);
863   if( !pData ){
864     return SQLITE_NOMEM;
865   }
866   memset(pData, 0, nByte);
867   pData->zName = (char *)&pData[1];
868   pData->nName = nName;
869   pData->pBaseRead = (sqlite3_file *)&pData->zName[nName];
870   pData->pBaseWrite = (sqlite3_file *)&pData->zName[nName+pVfs->szOsFile];
871   pData->close.pFileData = pData;
872   pData->close.op = ASYNC_CLOSE;
873   memcpy(pData->zName, zName, nName);
874 
875   if( !isExclusive ){
876     rc = sqlite3OsOpen(pVfs, zName, pData->pBaseRead, flags, pOutFlags);
877     if( rc==SQLITE_OK && ((*pOutFlags)&SQLITE_OPEN_READWRITE) ){
878       rc = sqlite3OsOpen(pVfs, zName, pData->pBaseWrite, flags, 0);
879     }
880   }
881 
882   pthread_mutex_lock(&async.lockMutex);
883 
884   if( rc==SQLITE_OK ){
885     pLock = sqlite3HashFind(&async.aLock, pData->zName, pData->nName);
886     if( !pLock ){
887       pLock = sqlite3MallocZero(pVfs->szOsFile + sizeof(AsyncLock));
888       if( pLock ){
889         AsyncLock *pDelete;
890 #ifdef ENABLE_FILE_LOCKING
891         if( flags&SQLITE_OPEN_MAIN_DB ){
892           pLock->pFile = (sqlite3_file *)&pLock[1];
893           rc = sqlite3OsOpen(pVfs, zName, pLock->pFile, flags, 0);
894           if( rc!=SQLITE_OK ){
895             sqlite3_free(pLock);
896             pLock = 0;
897           }
898         }
899 #endif
900         pDelete = sqlite3HashInsert(
901           &async.aLock, pData->zName, pData->nName, (void *)pLock
902         );
903         if( pDelete ){
904           rc = SQLITE_NOMEM;
905           sqlite3_free(pLock);
906         }
907       }else{
908         rc = SQLITE_NOMEM;
909       }
910     }
911   }
912 
913   if( rc==SQLITE_OK ){
914     HashElem *pElem;
915     p->pMethod = &async_methods;
916     p->pData = pData;
917     incrOpenFileCount();
918 
919     /* Link AsyncFileData.lock into the linked list of
920     ** AsyncFileLock structures for this file.
921     */
922     pData->lock.pNext = pLock->pList;
923     pLock->pList = &pData->lock;
924 
925     pElem = sqlite3HashFindElem(&async.aLock, pData->zName, pData->nName);
926     pData->zName = (char *)sqliteHashKey(pElem);
927   }else{
928     sqlite3OsClose(pData->pBaseRead);
929     sqlite3OsClose(pData->pBaseWrite);
930     sqlite3_free(pData);
931   }
932 
933   pthread_mutex_unlock(&async.lockMutex);
934 
935   if( rc==SQLITE_OK && isExclusive ){
936     rc = addNewAsyncWrite(pData, ASYNC_OPENEXCLUSIVE, (i64)flags, 0, 0);
937     if( rc==SQLITE_OK ){
938       if( pOutFlags ) *pOutFlags = flags;
939     }else{
940       pthread_mutex_lock(&async.lockMutex);
941       unlinkAsyncFile(pData);
942       pthread_mutex_unlock(&async.lockMutex);
943       sqlite3_free(pData);
944     }
945   }
946   return rc;
947 }
948 
949 /*
950 ** Implementation of sqlite3OsDelete. Add an entry to the end of the
951 ** write-op queue to perform the delete.
952 */
953 static int asyncDelete(sqlite3_vfs *pAsyncVfs, const char *z, int syncDir){
954   return addNewAsyncWrite(0, ASYNC_DELETE, syncDir, strlen(z)+1, z);
955 }
956 
957 /*
958 ** Implementation of sqlite3OsAccess. This method holds the mutex from
959 ** start to finish.
960 */
961 static int asyncAccess(sqlite3_vfs *pAsyncVfs, const char *zName, int flags){
962   int ret;
963   AsyncWrite *p;
964   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
965 
966   assert(flags==SQLITE_ACCESS_READWRITE
967       || flags==SQLITE_ACCESS_READ
968       || flags==SQLITE_ACCESS_EXISTS
969   );
970 
971   pthread_mutex_lock(&async.queueMutex);
972   ret = sqlite3OsAccess(pVfs, zName, flags);
973   if( flags==SQLITE_ACCESS_EXISTS ){
974     for(p=async.pQueueFirst; p; p = p->pNext){
975       if( p->op==ASYNC_DELETE && 0==strcmp(p->zBuf, zName) ){
976         ret = 0;
977       }else if( p->op==ASYNC_OPENEXCLUSIVE
978              && 0==strcmp(p->pFileData->zName, zName)
979       ){
980         ret = 1;
981       }
982     }
983   }
984   ASYNC_TRACE(("ACCESS(%s): %s = %d\n",
985     flags==SQLITE_ACCESS_READWRITE?"read-write":
986     flags==SQLITE_ACCESS_READ?"read":"exists"
987     , zName, ret)
988   );
989   pthread_mutex_unlock(&async.queueMutex);
990   return ret;
991 }
992 
993 static int asyncGetTempName(sqlite3_vfs *pAsyncVfs, char *zBufOut){
994   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
995   return pVfs->xGetTempName(pVfs, zBufOut);
996 }
997 
998 /*
999 ** Fill in zPathOut with the full path to the file identified by zPath.
1000 */
1001 static int asyncFullPathname(
1002   sqlite3_vfs *pAsyncVfs,
1003   const char *zPath,
1004   char *zPathOut
1005 ){
1006   int rc;
1007   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1008   rc = sqlite3OsFullPathname(pVfs, zPath, zPathOut);
1009 
1010   /* Because of the way intra-process file locking works, this backend
1011   ** needs to return a canonical path. The following block assumes the
1012   ** file-system uses unix style paths.
1013   */
1014   if( rc==SQLITE_OK ){
1015     int iIn;
1016     int iOut = 0;
1017     int nPathOut = strlen(zPathOut);
1018 
1019     for(iIn=0; iIn<nPathOut; iIn++){
1020 
1021       /* Replace any occurences of "//" with "/" */
1022       if( iIn<=(nPathOut-2) && zPathOut[iIn]=='/' && zPathOut[iIn+1]=='/'
1023       ){
1024         continue;
1025       }
1026 
1027       /* Replace any occurences of "/./" with "/" */
1028       if( iIn<=(nPathOut-3)
1029        && zPathOut[iIn]=='/' && zPathOut[iIn+1]=='.' && zPathOut[iIn+2]=='/'
1030       ){
1031         iIn++;
1032         continue;
1033       }
1034 
1035       /* Replace any occurences of "<path-component>/../" with "" */
1036       if( iOut>0 && iIn<=(nPathOut-4)
1037        && zPathOut[iIn]=='/' && zPathOut[iIn+1]=='.'
1038        && zPathOut[iIn+2]=='.' && zPathOut[iIn+3]=='/'
1039       ){
1040         iIn += 3;
1041         iOut--;
1042         for( ; iOut>0 && zPathOut[iOut-1]!='/'; iOut--);
1043         continue;
1044       }
1045 
1046       zPathOut[iOut++] = zPathOut[iIn];
1047     }
1048     zPathOut[iOut] = '\0';
1049   }
1050 
1051   return rc;
1052 }
1053 static void *asyncDlOpen(sqlite3_vfs *pAsyncVfs, const char *zPath){
1054   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1055   return pVfs->xDlOpen(pVfs, zPath);
1056 }
1057 static void asyncDlError(sqlite3_vfs *pAsyncVfs, int nByte, char *zErrMsg){
1058   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1059   pVfs->xDlError(pVfs, nByte, zErrMsg);
1060 }
1061 static void *asyncDlSym(
1062   sqlite3_vfs *pAsyncVfs,
1063   void *pHandle,
1064   const char *zSymbol
1065 ){
1066   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1067   return pVfs->xDlSym(pVfs, pHandle, zSymbol);
1068 }
1069 static void asyncDlClose(sqlite3_vfs *pAsyncVfs, void *pHandle){
1070   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1071   pVfs->xDlClose(pVfs, pHandle);
1072 }
1073 static int asyncRandomness(sqlite3_vfs *pAsyncVfs, int nByte, char *zBufOut){
1074   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1075   return pVfs->xRandomness(pVfs, nByte, zBufOut);
1076 }
1077 static int asyncSleep(sqlite3_vfs *pAsyncVfs, int nMicro){
1078   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1079   return pVfs->xSleep(pVfs, nMicro);
1080 }
1081 static int asyncCurrentTime(sqlite3_vfs *pAsyncVfs, double *pTimeOut){
1082   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1083   return pVfs->xCurrentTime(pVfs, pTimeOut);
1084 }
1085 
1086 static sqlite3_vfs async_vfs = {
1087   1,                    /* iVersion */
1088   sizeof(AsyncFile),    /* szOsFile */
1089   0,                    /* mxPathname */
1090   0,                    /* pNext */
1091   "async",              /* zName */
1092   0,                    /* pAppData */
1093   asyncOpen,            /* xOpen */
1094   asyncDelete,          /* xDelete */
1095   asyncAccess,          /* xAccess */
1096   asyncGetTempName,     /* xGetTempName */
1097   asyncFullPathname,    /* xFullPathname */
1098   asyncDlOpen,          /* xDlOpen */
1099   asyncDlError,         /* xDlError */
1100   asyncDlSym,           /* xDlSym */
1101   asyncDlClose,         /* xDlClose */
1102   asyncRandomness,      /* xDlError */
1103   asyncSleep,           /* xDlSym */
1104   asyncCurrentTime      /* xDlClose */
1105 };
1106 
1107 /*
1108 ** Call this routine to enable or disable the
1109 ** asynchronous IO features implemented in this file.
1110 **
1111 ** This routine is not even remotely threadsafe.  Do not call
1112 ** this routine while any SQLite database connections are open.
1113 */
1114 static void asyncEnable(int enable){
1115   if( enable ){
1116     if( !async_vfs.pAppData ){
1117       static int hashTableInit = 0;
1118       async_vfs.pAppData = (void *)sqlite3_vfs_find(0);
1119       async_vfs.mxPathname = ((sqlite3_vfs *)async_vfs.pAppData)->mxPathname;
1120       sqlite3_vfs_register(&async_vfs, 1);
1121       if( !hashTableInit ){
1122         sqlite3HashInit(&async.aLock, SQLITE_HASH_BINARY, 1);
1123         hashTableInit = 1;
1124       }
1125     }
1126   }else{
1127     if( async_vfs.pAppData ){
1128       sqlite3_vfs_unregister(&async_vfs);
1129       async_vfs.pAppData = 0;
1130     }
1131   }
1132 }
1133 
1134 /*
1135 ** This procedure runs in a separate thread, reading messages off of the
1136 ** write queue and processing them one by one.
1137 **
1138 ** If async.writerHaltNow is true, then this procedure exits
1139 ** after processing a single message.
1140 **
1141 ** If async.writerHaltWhenIdle is true, then this procedure exits when
1142 ** the write queue is empty.
1143 **
1144 ** If both of the above variables are false, this procedure runs
1145 ** indefinately, waiting for operations to be added to the write queue
1146 ** and processing them in the order in which they arrive.
1147 **
1148 ** An artifical delay of async.ioDelay milliseconds is inserted before
1149 ** each write operation in order to simulate the effect of a slow disk.
1150 **
1151 ** Only one instance of this procedure may be running at a time.
1152 */
1153 static void *asyncWriterThread(void *NotUsed){
1154   sqlite3_vfs *pVfs = (sqlite3_vfs *)(async_vfs.pAppData);
1155   AsyncWrite *p = 0;
1156   int rc = SQLITE_OK;
1157   int holdingMutex = 0;
1158 
1159   if( pthread_mutex_trylock(&async.writerMutex) ){
1160     return 0;
1161   }
1162   while( async.writerHaltNow==0 ){
1163     int doNotFree = 0;
1164     sqlite3_file *pBase = 0;
1165 
1166     if( !holdingMutex ){
1167       pthread_mutex_lock(&async.queueMutex);
1168     }
1169     while( (p = async.pQueueFirst)==0 ){
1170       pthread_cond_broadcast(&async.emptySignal);
1171       if( async.writerHaltWhenIdle ){
1172         pthread_mutex_unlock(&async.queueMutex);
1173         break;
1174       }else{
1175         ASYNC_TRACE(("IDLE\n"));
1176         pthread_cond_wait(&async.queueSignal, &async.queueMutex);
1177         ASYNC_TRACE(("WAKEUP\n"));
1178       }
1179     }
1180     if( p==0 ) break;
1181     holdingMutex = 1;
1182 
1183     /* Right now this thread is holding the mutex on the write-op queue.
1184     ** Variable 'p' points to the first entry in the write-op queue. In
1185     ** the general case, we hold on to the mutex for the entire body of
1186     ** the loop.
1187     **
1188     ** However in the cases enumerated below, we relinquish the mutex,
1189     ** perform the IO, and then re-request the mutex before removing 'p' from
1190     ** the head of the write-op queue. The idea is to increase concurrency with
1191     ** sqlite threads.
1192     **
1193     **     * An ASYNC_CLOSE operation.
1194     **     * An ASYNC_OPENEXCLUSIVE operation. For this one, we relinquish
1195     **       the mutex, call the underlying xOpenExclusive() function, then
1196     **       re-aquire the mutex before seting the AsyncFile.pBaseRead
1197     **       variable.
1198     **     * ASYNC_SYNC and ASYNC_WRITE operations, if
1199     **       SQLITE_ASYNC_TWO_FILEHANDLES was set at compile time and two
1200     **       file-handles are open for the particular file being "synced".
1201     */
1202     if( async.ioError!=SQLITE_OK && p->op!=ASYNC_CLOSE ){
1203       p->op = ASYNC_NOOP;
1204     }
1205     if( p->pFileData ){
1206       pBase = p->pFileData->pBaseWrite;
1207       if(
1208         p->op==ASYNC_CLOSE ||
1209         p->op==ASYNC_OPENEXCLUSIVE ||
1210         (pBase->pMethods && (p->op==ASYNC_SYNC || p->op==ASYNC_WRITE) )
1211       ){
1212         pthread_mutex_unlock(&async.queueMutex);
1213         holdingMutex = 0;
1214       }
1215       if( !pBase->pMethods ){
1216         pBase = p->pFileData->pBaseRead;
1217       }
1218     }
1219 
1220     switch( p->op ){
1221       case ASYNC_NOOP:
1222         break;
1223 
1224       case ASYNC_WRITE:
1225         assert( pBase );
1226         ASYNC_TRACE(("WRITE %s %d bytes at %d\n",
1227                 p->pFileData->zName, p->nByte, p->iOffset));
1228         rc = sqlite3OsWrite(pBase, (void *)(p->zBuf), p->nByte, p->iOffset);
1229         break;
1230 
1231       case ASYNC_SYNC:
1232         assert( pBase );
1233         ASYNC_TRACE(("SYNC %s\n", p->pFileData->zName));
1234         rc = sqlite3OsSync(pBase, p->nByte);
1235         break;
1236 
1237       case ASYNC_TRUNCATE:
1238         assert( pBase );
1239         ASYNC_TRACE(("TRUNCATE %s to %d bytes\n",
1240                 p->pFileData->zName, p->iOffset));
1241         rc = sqlite3OsTruncate(pBase, p->iOffset);
1242         break;
1243 
1244       case ASYNC_CLOSE: {
1245         AsyncFileData *pData = p->pFileData;
1246         ASYNC_TRACE(("CLOSE %s\n", p->pFileData->zName));
1247         sqlite3OsClose(pData->pBaseWrite);
1248         sqlite3OsClose(pData->pBaseRead);
1249 
1250         /* Unlink AsyncFileData.lock from the linked list of AsyncFileLock
1251         ** structures for this file. Obtain the async.lockMutex mutex
1252         ** before doing so.
1253         */
1254         pthread_mutex_lock(&async.lockMutex);
1255         rc = unlinkAsyncFile(pData);
1256         pthread_mutex_unlock(&async.lockMutex);
1257 
1258         async.pQueueFirst = p->pNext;
1259         sqlite3_free(pData);
1260         doNotFree = 1;
1261         break;
1262       }
1263 
1264       case ASYNC_UNLOCK: {
1265         AsyncLock *pLock;
1266         AsyncFileData *pData = p->pFileData;
1267         int eLock = p->nByte;
1268         pthread_mutex_lock(&async.lockMutex);
1269         pData->lock.eAsyncLock = MIN(
1270             pData->lock.eAsyncLock, MAX(pData->lock.eLock, eLock)
1271         );
1272         assert(pData->lock.eAsyncLock>=pData->lock.eLock);
1273         pLock = sqlite3HashFind(&async.aLock, pData->zName, pData->nName);
1274         rc = getFileLock(pLock);
1275         pthread_mutex_unlock(&async.lockMutex);
1276         break;
1277       }
1278 
1279       case ASYNC_DELETE:
1280         ASYNC_TRACE(("DELETE %s\n", p->zBuf));
1281         rc = sqlite3OsDelete(pVfs, p->zBuf, (int)p->iOffset);
1282         break;
1283 
1284       case ASYNC_OPENEXCLUSIVE: {
1285         int flags = (int)p->iOffset;
1286         AsyncFileData *pData = p->pFileData;
1287         ASYNC_TRACE(("OPEN %s flags=%d\n", p->zBuf, (int)p->iOffset));
1288         assert(pData->pBaseRead->pMethods==0 && pData->pBaseWrite->pMethods==0);
1289         rc = sqlite3OsOpen(pVfs, pData->zName, pData->pBaseRead, flags, 0);
1290         assert( holdingMutex==0 );
1291         pthread_mutex_lock(&async.queueMutex);
1292         holdingMutex = 1;
1293         break;
1294       }
1295 
1296       default: assert(!"Illegal value for AsyncWrite.op");
1297     }
1298 
1299     /* If we didn't hang on to the mutex during the IO op, obtain it now
1300     ** so that the AsyncWrite structure can be safely removed from the
1301     ** global write-op queue.
1302     */
1303     if( !holdingMutex ){
1304       pthread_mutex_lock(&async.queueMutex);
1305       holdingMutex = 1;
1306     }
1307     /* ASYNC_TRACE(("UNLINK %p\n", p)); */
1308     if( p==async.pQueueLast ){
1309       async.pQueueLast = 0;
1310     }
1311     if( !doNotFree ){
1312       async.pQueueFirst = p->pNext;
1313       sqlite3_free(p);
1314     }
1315     assert( holdingMutex );
1316 
1317     /* An IO error has occured. We cannot report the error back to the
1318     ** connection that requested the I/O since the error happened
1319     ** asynchronously.  The connection has already moved on.  There
1320     ** really is nobody to report the error to.
1321     **
1322     ** The file for which the error occured may have been a database or
1323     ** journal file. Regardless, none of the currently queued operations
1324     ** associated with the same database should now be performed. Nor should
1325     ** any subsequently requested IO on either a database or journal file
1326     ** handle for the same database be accepted until the main database
1327     ** file handle has been closed and reopened.
1328     **
1329     ** Furthermore, no further IO should be queued or performed on any file
1330     ** handle associated with a database that may have been part of a
1331     ** multi-file transaction that included the database associated with
1332     ** the IO error (i.e. a database ATTACHed to the same handle at some
1333     ** point in time).
1334     */
1335     if( rc!=SQLITE_OK ){
1336       async.ioError = rc;
1337     }
1338 
1339     if( async.ioError && !async.pQueueFirst ){
1340       pthread_mutex_lock(&async.lockMutex);
1341       if( 0==sqliteHashFirst(&async.aLock) ){
1342         async.ioError = SQLITE_OK;
1343       }
1344       pthread_mutex_unlock(&async.lockMutex);
1345     }
1346 
1347     /* Drop the queue mutex before continuing to the next write operation
1348     ** in order to give other threads a chance to work with the write queue.
1349     */
1350     if( !async.pQueueFirst || !async.ioError ){
1351       pthread_mutex_unlock(&async.queueMutex);
1352       holdingMutex = 0;
1353       if( async.ioDelay>0 ){
1354         sqlite3OsSleep(pVfs, async.ioDelay);
1355       }else{
1356         sched_yield();
1357       }
1358     }
1359   }
1360 
1361   pthread_mutex_unlock(&async.writerMutex);
1362   return 0;
1363 }
1364 
1365 /**************************************************************************
1366 ** The remaining code defines a Tcl interface for testing the asynchronous
1367 ** IO implementation in this file.
1368 **
1369 ** To adapt the code to a non-TCL environment, delete or comment out
1370 ** the code that follows.
1371 */
1372 
1373 /*
1374 ** sqlite3async_enable ?YES/NO?
1375 **
1376 ** Enable or disable the asynchronous I/O backend.  This command is
1377 ** not thread-safe.  Do not call it while any database connections
1378 ** are open.
1379 */
1380 static int testAsyncEnable(
1381   void * clientData,
1382   Tcl_Interp *interp,
1383   int objc,
1384   Tcl_Obj *CONST objv[]
1385 ){
1386   if( objc!=1 && objc!=2 ){
1387     Tcl_WrongNumArgs(interp, 1, objv, "?YES/NO?");
1388     return TCL_ERROR;
1389   }
1390   if( objc==1 ){
1391     Tcl_SetObjResult(interp, Tcl_NewBooleanObj(async_vfs.pAppData!=0));
1392   }else{
1393     int en;
1394     if( Tcl_GetBooleanFromObj(interp, objv[1], &en) ) return TCL_ERROR;
1395     asyncEnable(en);
1396   }
1397   return TCL_OK;
1398 }
1399 
1400 /*
1401 ** sqlite3async_halt  "now"|"idle"|"never"
1402 **
1403 ** Set the conditions at which the writer thread will halt.
1404 */
1405 static int testAsyncHalt(
1406   void * clientData,
1407   Tcl_Interp *interp,
1408   int objc,
1409   Tcl_Obj *CONST objv[]
1410 ){
1411   const char *zCond;
1412   if( objc!=2 ){
1413     Tcl_WrongNumArgs(interp, 1, objv, "\"now\"|\"idle\"|\"never\"");
1414     return TCL_ERROR;
1415   }
1416   zCond = Tcl_GetString(objv[1]);
1417   if( strcmp(zCond, "now")==0 ){
1418     async.writerHaltNow = 1;
1419     pthread_cond_broadcast(&async.queueSignal);
1420   }else if( strcmp(zCond, "idle")==0 ){
1421     async.writerHaltWhenIdle = 1;
1422     async.writerHaltNow = 0;
1423     pthread_cond_broadcast(&async.queueSignal);
1424   }else if( strcmp(zCond, "never")==0 ){
1425     async.writerHaltWhenIdle = 0;
1426     async.writerHaltNow = 0;
1427   }else{
1428     Tcl_AppendResult(interp,
1429       "should be one of: \"now\", \"idle\", or \"never\"", (char*)0);
1430     return TCL_ERROR;
1431   }
1432   return TCL_OK;
1433 }
1434 
1435 /*
1436 ** sqlite3async_delay ?MS?
1437 **
1438 ** Query or set the number of milliseconds of delay in the writer
1439 ** thread after each write operation.  The default is 0.  By increasing
1440 ** the memory delay we can simulate the effect of slow disk I/O.
1441 */
1442 static int testAsyncDelay(
1443   void * clientData,
1444   Tcl_Interp *interp,
1445   int objc,
1446   Tcl_Obj *CONST objv[]
1447 ){
1448   if( objc!=1 && objc!=2 ){
1449     Tcl_WrongNumArgs(interp, 1, objv, "?MS?");
1450     return TCL_ERROR;
1451   }
1452   if( objc==1 ){
1453     Tcl_SetObjResult(interp, Tcl_NewIntObj(async.ioDelay));
1454   }else{
1455     int ioDelay;
1456     if( Tcl_GetIntFromObj(interp, objv[1], &ioDelay) ) return TCL_ERROR;
1457     async.ioDelay = ioDelay;
1458   }
1459   return TCL_OK;
1460 }
1461 
1462 /*
1463 ** sqlite3async_start
1464 **
1465 ** Start a new writer thread.
1466 */
1467 static int testAsyncStart(
1468   void * clientData,
1469   Tcl_Interp *interp,
1470   int objc,
1471   Tcl_Obj *CONST objv[]
1472 ){
1473   pthread_t x;
1474   int rc;
1475   rc = pthread_create(&x, 0, asyncWriterThread, 0);
1476   if( rc ){
1477     Tcl_AppendResult(interp, "failed to create the thread", 0);
1478     return TCL_ERROR;
1479   }
1480   pthread_detach(x);
1481   return TCL_OK;
1482 }
1483 
1484 /*
1485 ** sqlite3async_wait
1486 **
1487 ** Wait for the current writer thread to terminate.
1488 **
1489 ** If the current writer thread is set to run forever then this
1490 ** command would block forever.  To prevent that, an error is returned.
1491 */
1492 static int testAsyncWait(
1493   void * clientData,
1494   Tcl_Interp *interp,
1495   int objc,
1496   Tcl_Obj *CONST objv[]
1497 ){
1498   int cnt = 10;
1499   if( async.writerHaltNow==0 && async.writerHaltWhenIdle==0 ){
1500     Tcl_AppendResult(interp, "would block forever", (char*)0);
1501     return TCL_ERROR;
1502   }
1503 
1504   while( cnt-- && !pthread_mutex_trylock(&async.writerMutex) ){
1505     pthread_mutex_unlock(&async.writerMutex);
1506     sched_yield();
1507   }
1508   if( cnt>=0 ){
1509     ASYNC_TRACE(("WAIT\n"));
1510     pthread_mutex_lock(&async.queueMutex);
1511     pthread_cond_broadcast(&async.queueSignal);
1512     pthread_mutex_unlock(&async.queueMutex);
1513     pthread_mutex_lock(&async.writerMutex);
1514     pthread_mutex_unlock(&async.writerMutex);
1515   }else{
1516     ASYNC_TRACE(("NO-WAIT\n"));
1517   }
1518   return TCL_OK;
1519 }
1520 
1521 
1522 #endif  /* OS_UNIX and SQLITE_THREADSAFE */
1523 
1524 /*
1525 ** This routine registers the custom TCL commands defined in this
1526 ** module.  This should be the only procedure visible from outside
1527 ** of this module.
1528 */
1529 int Sqlitetestasync_Init(Tcl_Interp *interp){
1530 #if OS_UNIX && SQLITE_THREADSAFE
1531   Tcl_CreateObjCommand(interp,"sqlite3async_enable",testAsyncEnable,0,0);
1532   Tcl_CreateObjCommand(interp,"sqlite3async_halt",testAsyncHalt,0,0);
1533   Tcl_CreateObjCommand(interp,"sqlite3async_delay",testAsyncDelay,0,0);
1534   Tcl_CreateObjCommand(interp,"sqlite3async_start",testAsyncStart,0,0);
1535   Tcl_CreateObjCommand(interp,"sqlite3async_wait",testAsyncWait,0,0);
1536   Tcl_LinkVar(interp, "sqlite3async_trace",
1537       (char*)&sqlite3async_trace, TCL_LINK_INT);
1538 #endif  /* OS_UNIX and SQLITE_THREADSAFE */
1539   return TCL_OK;
1540 }
1541