xref: /sqlite-3.40.0/ext/async/sqlite3async.c (revision eb4ac06f)
1a3f06598Sdanielk1977 /*
2a3f06598Sdanielk1977 ** 2005 December 14
3a3f06598Sdanielk1977 **
4a3f06598Sdanielk1977 ** The author disclaims copyright to this source code.  In place of
5a3f06598Sdanielk1977 ** a legal notice, here is a blessing:
6a3f06598Sdanielk1977 **
7a3f06598Sdanielk1977 **    May you do good and not evil.
8a3f06598Sdanielk1977 **    May you find forgiveness for yourself and forgive others.
9a3f06598Sdanielk1977 **    May you share freely, never taking more than you give.
10a3f06598Sdanielk1977 **
11a3f06598Sdanielk1977 *************************************************************************
12a3f06598Sdanielk1977 **
13*eb4ac06fSshane ** $Id: sqlite3async.c,v 1.6 2009/04/30 17:45:34 shane Exp $
14a3f06598Sdanielk1977 **
15debcfd2dSdanielk1977 ** This file contains the implementation of an asynchronous IO backend
16debcfd2dSdanielk1977 ** for SQLite.
17a3f06598Sdanielk1977 */
18a3f06598Sdanielk1977 
19a3f06598Sdanielk1977 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ASYNCIO)
20a3f06598Sdanielk1977 
21a3f06598Sdanielk1977 #include "sqlite3async.h"
22*eb4ac06fSshane #include "sqlite3.h"
234598b8e4Sdanielk1977 #include <stdarg.h>
244598b8e4Sdanielk1977 #include <string.h>
254598b8e4Sdanielk1977 #include <assert.h>
26a3f06598Sdanielk1977 
27a3f06598Sdanielk1977 /* Useful macros used in several places */
28a3f06598Sdanielk1977 #define MIN(x,y) ((x)<(y)?(x):(y))
29a3f06598Sdanielk1977 #define MAX(x,y) ((x)>(y)?(x):(y))
30a3f06598Sdanielk1977 
31*eb4ac06fSshane #ifndef SQLITE_AMALGAMATION
32*eb4ac06fSshane /* Macro to mark parameters as unused and silence compiler warnings. */
33*eb4ac06fSshane #define UNUSED_PARAMETER(x) (void)(x)
34*eb4ac06fSshane #endif
35*eb4ac06fSshane 
36a3f06598Sdanielk1977 /* Forward references */
37a3f06598Sdanielk1977 typedef struct AsyncWrite AsyncWrite;
38a3f06598Sdanielk1977 typedef struct AsyncFile AsyncFile;
39a3f06598Sdanielk1977 typedef struct AsyncFileData AsyncFileData;
40a3f06598Sdanielk1977 typedef struct AsyncFileLock AsyncFileLock;
41a3f06598Sdanielk1977 typedef struct AsyncLock AsyncLock;
42a3f06598Sdanielk1977 
43a3f06598Sdanielk1977 /* Enable for debugging */
444598b8e4Sdanielk1977 #ifndef NDEBUG
454598b8e4Sdanielk1977 #include <stdio.h>
46a3f06598Sdanielk1977 static int sqlite3async_trace = 0;
47a3f06598Sdanielk1977 # define ASYNC_TRACE(X) if( sqlite3async_trace ) asyncTrace X
48a3f06598Sdanielk1977 static void asyncTrace(const char *zFormat, ...){
49a3f06598Sdanielk1977   char *z;
50a3f06598Sdanielk1977   va_list ap;
51a3f06598Sdanielk1977   va_start(ap, zFormat);
52a3f06598Sdanielk1977   z = sqlite3_vmprintf(zFormat, ap);
53a3f06598Sdanielk1977   va_end(ap);
54a3f06598Sdanielk1977   fprintf(stderr, "[%d] %s", 0 /* (int)pthread_self() */, z);
55a3f06598Sdanielk1977   sqlite3_free(z);
56a3f06598Sdanielk1977 }
57a3628d14Sshane #else
58a3628d14Sshane # define ASYNC_TRACE(X)
594598b8e4Sdanielk1977 #endif
60a3f06598Sdanielk1977 
61a3f06598Sdanielk1977 /*
62a3f06598Sdanielk1977 ** THREAD SAFETY NOTES
63a3f06598Sdanielk1977 **
64a3f06598Sdanielk1977 ** Basic rules:
65a3f06598Sdanielk1977 **
66a3f06598Sdanielk1977 **     * Both read and write access to the global write-op queue must be
67a3f06598Sdanielk1977 **       protected by the async.queueMutex. As are the async.ioError and
68a3f06598Sdanielk1977 **       async.nFile variables.
69a3f06598Sdanielk1977 **
70a3f06598Sdanielk1977 **     * The async.pLock list and all AsyncLock and AsyncFileLock
71a3f06598Sdanielk1977 **       structures must be protected by the async.lockMutex mutex.
72a3f06598Sdanielk1977 **
73a3f06598Sdanielk1977 **     * The file handles from the underlying system are not assumed to
74a3f06598Sdanielk1977 **       be thread safe.
75a3f06598Sdanielk1977 **
76a3f06598Sdanielk1977 **     * See the last two paragraphs under "The Writer Thread" for
77a3f06598Sdanielk1977 **       an assumption to do with file-handle synchronization by the Os.
78a3f06598Sdanielk1977 **
79a3f06598Sdanielk1977 ** Deadlock prevention:
80a3f06598Sdanielk1977 **
81a3f06598Sdanielk1977 **     There are three mutex used by the system: the "writer" mutex,
82a3f06598Sdanielk1977 **     the "queue" mutex and the "lock" mutex. Rules are:
83a3f06598Sdanielk1977 **
84a3f06598Sdanielk1977 **     * It is illegal to block on the writer mutex when any other mutex
85a3f06598Sdanielk1977 **       are held, and
86a3f06598Sdanielk1977 **
87a3f06598Sdanielk1977 **     * It is illegal to block on the queue mutex when the lock mutex
88a3f06598Sdanielk1977 **       is held.
89a3f06598Sdanielk1977 **
90a3f06598Sdanielk1977 **     i.e. mutex's must be grabbed in the order "writer", "queue", "lock".
91a3f06598Sdanielk1977 **
92a3f06598Sdanielk1977 ** File system operations (invoked by SQLite thread):
93a3f06598Sdanielk1977 **
94a3f06598Sdanielk1977 **     xOpen
95a3f06598Sdanielk1977 **     xDelete
96a3f06598Sdanielk1977 **     xFileExists
97a3f06598Sdanielk1977 **
98a3f06598Sdanielk1977 ** File handle operations (invoked by SQLite thread):
99a3f06598Sdanielk1977 **
100a3f06598Sdanielk1977 **         asyncWrite, asyncClose, asyncTruncate, asyncSync
101a3f06598Sdanielk1977 **
102a3f06598Sdanielk1977 **     The operations above add an entry to the global write-op list. They
103a3f06598Sdanielk1977 **     prepare the entry, acquire the async.queueMutex momentarily while
104a3f06598Sdanielk1977 **     list pointers are  manipulated to insert the new entry, then release
105a3f06598Sdanielk1977 **     the mutex and signal the writer thread to wake up in case it happens
106a3f06598Sdanielk1977 **     to be asleep.
107a3f06598Sdanielk1977 **
108a3f06598Sdanielk1977 **
109a3f06598Sdanielk1977 **         asyncRead, asyncFileSize.
110a3f06598Sdanielk1977 **
111a3f06598Sdanielk1977 **     Read operations. Both of these read from both the underlying file
112a3f06598Sdanielk1977 **     first then adjust their result based on pending writes in the
113a3f06598Sdanielk1977 **     write-op queue.   So async.queueMutex is held for the duration
114a3f06598Sdanielk1977 **     of these operations to prevent other threads from changing the
115a3f06598Sdanielk1977 **     queue in mid operation.
116a3f06598Sdanielk1977 **
117a3f06598Sdanielk1977 **
118a3f06598Sdanielk1977 **         asyncLock, asyncUnlock, asyncCheckReservedLock
119a3f06598Sdanielk1977 **
120a3f06598Sdanielk1977 **     These primitives implement in-process locking using a hash table
121a3f06598Sdanielk1977 **     on the file name.  Files are locked correctly for connections coming
122a3f06598Sdanielk1977 **     from the same process.  But other processes cannot see these locks
123a3f06598Sdanielk1977 **     and will therefore not honor them.
124a3f06598Sdanielk1977 **
125a3f06598Sdanielk1977 **
126a3f06598Sdanielk1977 ** The writer thread:
127a3f06598Sdanielk1977 **
128a3f06598Sdanielk1977 **     The async.writerMutex is used to make sure only there is only
129a3f06598Sdanielk1977 **     a single writer thread running at a time.
130a3f06598Sdanielk1977 **
131a3f06598Sdanielk1977 **     Inside the writer thread is a loop that works like this:
132a3f06598Sdanielk1977 **
133a3f06598Sdanielk1977 **         WHILE (write-op list is not empty)
134a3f06598Sdanielk1977 **             Do IO operation at head of write-op list
135a3f06598Sdanielk1977 **             Remove entry from head of write-op list
136a3f06598Sdanielk1977 **         END WHILE
137a3f06598Sdanielk1977 **
138a3f06598Sdanielk1977 **     The async.queueMutex is always held during the <write-op list is
139a3f06598Sdanielk1977 **     not empty> test, and when the entry is removed from the head
140a3f06598Sdanielk1977 **     of the write-op list. Sometimes it is held for the interim
141a3f06598Sdanielk1977 **     period (while the IO is performed), and sometimes it is
142a3f06598Sdanielk1977 **     relinquished. It is relinquished if (a) the IO op is an
143a3f06598Sdanielk1977 **     ASYNC_CLOSE or (b) when the file handle was opened, two of
144a3f06598Sdanielk1977 **     the underlying systems handles were opened on the same
145a3f06598Sdanielk1977 **     file-system entry.
146a3f06598Sdanielk1977 **
147a3f06598Sdanielk1977 **     If condition (b) above is true, then one file-handle
148a3f06598Sdanielk1977 **     (AsyncFile.pBaseRead) is used exclusively by sqlite threads to read the
149a3f06598Sdanielk1977 **     file, the other (AsyncFile.pBaseWrite) by sqlite3_async_flush()
150a3f06598Sdanielk1977 **     threads to perform write() operations. This means that read
151a3f06598Sdanielk1977 **     operations are not blocked by asynchronous writes (although
152a3f06598Sdanielk1977 **     asynchronous writes may still be blocked by reads).
153a3f06598Sdanielk1977 **
154a3f06598Sdanielk1977 **     This assumes that the OS keeps two handles open on the same file
155a3f06598Sdanielk1977 **     properly in sync. That is, any read operation that starts after a
156a3f06598Sdanielk1977 **     write operation on the same file system entry has completed returns
157a3f06598Sdanielk1977 **     data consistent with the write. We also assume that if one thread
158a3f06598Sdanielk1977 **     reads a file while another is writing it all bytes other than the
159a3f06598Sdanielk1977 **     ones actually being written contain valid data.
160a3f06598Sdanielk1977 **
161a3f06598Sdanielk1977 **     If the above assumptions are not true, set the preprocessor symbol
162a3f06598Sdanielk1977 **     SQLITE_ASYNC_TWO_FILEHANDLES to 0.
163a3f06598Sdanielk1977 */
164a3f06598Sdanielk1977 
165a3f06598Sdanielk1977 
166a3f06598Sdanielk1977 #ifndef NDEBUG
167a3f06598Sdanielk1977 # define TESTONLY( X ) X
168a3f06598Sdanielk1977 #else
169a3f06598Sdanielk1977 # define TESTONLY( X )
170a3f06598Sdanielk1977 #endif
171a3f06598Sdanielk1977 
172a3f06598Sdanielk1977 /*
173debcfd2dSdanielk1977 ** PORTING FUNCTIONS
174debcfd2dSdanielk1977 **
175a3f06598Sdanielk1977 ** There are two definitions of the following functions. One for pthreads
176a3f06598Sdanielk1977 ** compatible systems and one for Win32. These functions isolate the OS
177a3f06598Sdanielk1977 ** specific code required by each platform.
178a3f06598Sdanielk1977 **
179a3f06598Sdanielk1977 ** The system uses three mutexes and a single condition variable. To
180a3f06598Sdanielk1977 ** block on a mutex, async_mutex_enter() is called. The parameter passed
181a3f06598Sdanielk1977 ** to async_mutex_enter(), which must be one of ASYNC_MUTEX_LOCK,
182a3f06598Sdanielk1977 ** ASYNC_MUTEX_QUEUE or ASYNC_MUTEX_WRITER, identifies which of the three
183a3f06598Sdanielk1977 ** mutexes to lock. Similarly, to unlock a mutex, async_mutex_leave() is
184a3f06598Sdanielk1977 ** called with a parameter identifying the mutex being unlocked. Mutexes
185a3f06598Sdanielk1977 ** are not recursive - it is an error to call async_mutex_enter() to
186a3f06598Sdanielk1977 ** lock a mutex that is already locked, or to call async_mutex_leave()
187a3f06598Sdanielk1977 ** to unlock a mutex that is not currently locked.
188a3f06598Sdanielk1977 **
189a3f06598Sdanielk1977 ** The async_cond_wait() and async_cond_signal() functions are modelled
190a3f06598Sdanielk1977 ** on the pthreads functions with similar names. The first parameter to
191a3f06598Sdanielk1977 ** both functions is always ASYNC_COND_QUEUE. When async_cond_wait()
192a3f06598Sdanielk1977 ** is called the mutex identified by the second parameter must be held.
193a3f06598Sdanielk1977 ** The mutex is unlocked, and the calling thread simultaneously begins
194a3f06598Sdanielk1977 ** waiting for the condition variable to be signalled by another thread.
195a3f06598Sdanielk1977 ** After another thread signals the condition variable, the calling
196a3f06598Sdanielk1977 ** thread stops waiting, locks mutex eMutex and returns. The
197a3f06598Sdanielk1977 ** async_cond_signal() function is used to signal the condition variable.
198a3f06598Sdanielk1977 ** It is assumed that the mutex used by the thread calling async_cond_wait()
199a3f06598Sdanielk1977 ** is held by the caller of async_cond_signal() (otherwise there would be
200a3f06598Sdanielk1977 ** a race condition).
201a3f06598Sdanielk1977 **
202a3f06598Sdanielk1977 ** It is guaranteed that no other thread will call async_cond_wait() when
203a3f06598Sdanielk1977 ** there is already a thread waiting on the condition variable.
204a3f06598Sdanielk1977 **
205a3f06598Sdanielk1977 ** The async_sched_yield() function is called to suggest to the operating
206a3f06598Sdanielk1977 ** system that it would be a good time to shift the current thread off the
207a3f06598Sdanielk1977 ** CPU. The system will still work if this function is not implemented
208a3f06598Sdanielk1977 ** (it is not currently implemented for win32), but it might be marginally
209a3f06598Sdanielk1977 ** more efficient if it is.
210a3f06598Sdanielk1977 */
211a3f06598Sdanielk1977 static void async_mutex_enter(int eMutex);
212a3f06598Sdanielk1977 static void async_mutex_leave(int eMutex);
213a3f06598Sdanielk1977 static void async_cond_wait(int eCond, int eMutex);
214a3f06598Sdanielk1977 static void async_cond_signal(int eCond);
215a3f06598Sdanielk1977 static void async_sched_yield(void);
216a3f06598Sdanielk1977 
217a3f06598Sdanielk1977 /*
218a3f06598Sdanielk1977 ** There are also two definitions of the following. async_os_initialize()
219a3f06598Sdanielk1977 ** is called when the asynchronous VFS is first installed, and os_shutdown()
220a3f06598Sdanielk1977 ** is called when it is uninstalled (from within sqlite3async_shutdown()).
221a3f06598Sdanielk1977 **
222a3f06598Sdanielk1977 ** For pthreads builds, both of these functions are no-ops. For win32,
223a3f06598Sdanielk1977 ** they provide an opportunity to initialize and finalize the required
224a3f06598Sdanielk1977 ** mutex and condition variables.
225a3f06598Sdanielk1977 **
226a3f06598Sdanielk1977 ** If async_os_initialize() returns other than zero, then the initialization
227a3f06598Sdanielk1977 ** fails and SQLITE_ERROR is returned to the user.
228a3f06598Sdanielk1977 */
229a3f06598Sdanielk1977 static int async_os_initialize(void);
230a3f06598Sdanielk1977 static void async_os_shutdown(void);
231a3f06598Sdanielk1977 
232a3f06598Sdanielk1977 /* Values for use as the 'eMutex' argument of the above functions. The
233a3f06598Sdanielk1977 ** integer values assigned to these constants are important for assert()
234a3f06598Sdanielk1977 ** statements that verify that mutexes are locked in the correct order.
235a3f06598Sdanielk1977 ** Specifically, it is unsafe to try to lock mutex N while holding a lock
236a3f06598Sdanielk1977 ** on mutex M if (M<=N).
237a3f06598Sdanielk1977 */
238a3f06598Sdanielk1977 #define ASYNC_MUTEX_LOCK    0
239a3f06598Sdanielk1977 #define ASYNC_MUTEX_QUEUE   1
240a3f06598Sdanielk1977 #define ASYNC_MUTEX_WRITER  2
241a3f06598Sdanielk1977 
242a3f06598Sdanielk1977 /* Values for use as the 'eCond' argument of the above functions. */
243a3f06598Sdanielk1977 #define ASYNC_COND_QUEUE    0
244a3f06598Sdanielk1977 
245a3f06598Sdanielk1977 /*************************************************************************
246a3f06598Sdanielk1977 ** Start of OS specific code.
247a3f06598Sdanielk1977 */
248a3f06598Sdanielk1977 #if SQLITE_OS_WIN || defined(_WIN32) || defined(WIN32) || defined(__CYGWIN__) || defined(__MINGW32__) || defined(__BORLANDC__)
249a3f06598Sdanielk1977 
250*eb4ac06fSshane #include <windows.h>
251*eb4ac06fSshane 
252a3f06598Sdanielk1977 /* The following block contains the win32 specific code. */
253a3f06598Sdanielk1977 
254a3f06598Sdanielk1977 #define mutex_held(X) (GetCurrentThreadId()==primitives.aHolder[X])
255a3f06598Sdanielk1977 
256a3f06598Sdanielk1977 static struct AsyncPrimitives {
257a3f06598Sdanielk1977   int isInit;
258a3f06598Sdanielk1977   DWORD aHolder[3];
259a3f06598Sdanielk1977   CRITICAL_SECTION aMutex[3];
260a3f06598Sdanielk1977   HANDLE aCond[1];
261a3f06598Sdanielk1977 } primitives = { 0 };
262a3f06598Sdanielk1977 
263a3f06598Sdanielk1977 static int async_os_initialize(void){
264a3f06598Sdanielk1977   if( !primitives.isInit ){
265a3f06598Sdanielk1977     primitives.aCond[0] = CreateEvent(NULL, TRUE, FALSE, 0);
266a3f06598Sdanielk1977     if( primitives.aCond[0]==NULL ){
267a3f06598Sdanielk1977       return 1;
268a3f06598Sdanielk1977     }
269a3f06598Sdanielk1977     InitializeCriticalSection(&primitives.aMutex[0]);
270a3f06598Sdanielk1977     InitializeCriticalSection(&primitives.aMutex[1]);
271a3f06598Sdanielk1977     InitializeCriticalSection(&primitives.aMutex[2]);
272a3f06598Sdanielk1977     primitives.isInit = 1;
273a3f06598Sdanielk1977   }
274a3f06598Sdanielk1977   return 0;
275a3f06598Sdanielk1977 }
276a3f06598Sdanielk1977 static void async_os_shutdown(void){
277a3f06598Sdanielk1977   if( primitives.isInit ){
278a3f06598Sdanielk1977     DeleteCriticalSection(&primitives.aMutex[0]);
279a3f06598Sdanielk1977     DeleteCriticalSection(&primitives.aMutex[1]);
280a3f06598Sdanielk1977     DeleteCriticalSection(&primitives.aMutex[2]);
281a3f06598Sdanielk1977     CloseHandle(primitives.aCond[0]);
282a3f06598Sdanielk1977     primitives.isInit = 0;
283a3f06598Sdanielk1977   }
284a3f06598Sdanielk1977 }
285a3f06598Sdanielk1977 
286a3f06598Sdanielk1977 /* The following block contains the Win32 specific code. */
287a3f06598Sdanielk1977 static void async_mutex_enter(int eMutex){
288a3f06598Sdanielk1977   assert( eMutex==0 || eMutex==1 || eMutex==2 );
289a3f06598Sdanielk1977   assert( eMutex!=2 || (!mutex_held(0) && !mutex_held(1) && !mutex_held(2)) );
290a3f06598Sdanielk1977   assert( eMutex!=1 || (!mutex_held(0) && !mutex_held(1)) );
291a3f06598Sdanielk1977   assert( eMutex!=0 || (!mutex_held(0)) );
292a3f06598Sdanielk1977   EnterCriticalSection(&primitives.aMutex[eMutex]);
293a3f06598Sdanielk1977   TESTONLY( primitives.aHolder[eMutex] = GetCurrentThreadId(); )
294a3f06598Sdanielk1977 }
295a3f06598Sdanielk1977 static void async_mutex_leave(int eMutex){
296a3f06598Sdanielk1977   assert( eMutex==0 || eMutex==1 || eMutex==2 );
297a3f06598Sdanielk1977   assert( mutex_held(eMutex) );
298a3f06598Sdanielk1977   TESTONLY( primitives.aHolder[eMutex] = 0; )
299a3f06598Sdanielk1977   LeaveCriticalSection(&primitives.aMutex[eMutex]);
300a3f06598Sdanielk1977 }
301a3f06598Sdanielk1977 static void async_cond_wait(int eCond, int eMutex){
302a3f06598Sdanielk1977   ResetEvent(primitives.aCond[eCond]);
303a3f06598Sdanielk1977   async_mutex_leave(eMutex);
304a3f06598Sdanielk1977   WaitForSingleObject(primitives.aCond[eCond], INFINITE);
305a3f06598Sdanielk1977   async_mutex_enter(eMutex);
306a3f06598Sdanielk1977 }
307a3f06598Sdanielk1977 static void async_cond_signal(int eCond){
308a3f06598Sdanielk1977   assert( mutex_held(ASYNC_MUTEX_QUEUE) );
309a3f06598Sdanielk1977   SetEvent(primitives.aCond[eCond]);
310a3f06598Sdanielk1977 }
311a3f06598Sdanielk1977 static void async_sched_yield(void){
312a3628d14Sshane   Sleep(0);
313a3f06598Sdanielk1977 }
314a3f06598Sdanielk1977 #else
315a3f06598Sdanielk1977 
316a3f06598Sdanielk1977 /* The following block contains the pthreads specific code. */
317a3f06598Sdanielk1977 #include <pthread.h>
318a3f06598Sdanielk1977 #include <sched.h>
319a3f06598Sdanielk1977 
320a3f06598Sdanielk1977 #define mutex_held(X) pthread_equal(primitives.aHolder[X], pthread_self())
321a3f06598Sdanielk1977 
322a3f06598Sdanielk1977 static int  async_os_initialize(void) {return 0;}
323a3f06598Sdanielk1977 static void async_os_shutdown(void) {}
324a3f06598Sdanielk1977 
325a3f06598Sdanielk1977 static struct AsyncPrimitives {
326a3f06598Sdanielk1977   pthread_mutex_t aMutex[3];
327a3f06598Sdanielk1977   pthread_cond_t aCond[1];
328a3f06598Sdanielk1977   pthread_t aHolder[3];
329a3f06598Sdanielk1977 } primitives = {
330a3f06598Sdanielk1977   { PTHREAD_MUTEX_INITIALIZER,
331a3f06598Sdanielk1977     PTHREAD_MUTEX_INITIALIZER,
332a3f06598Sdanielk1977     PTHREAD_MUTEX_INITIALIZER
333a3f06598Sdanielk1977   } , {
334a3f06598Sdanielk1977     PTHREAD_COND_INITIALIZER
335a3f06598Sdanielk1977   } , { 0, 0, 0 }
336a3f06598Sdanielk1977 };
337a3f06598Sdanielk1977 
338a3f06598Sdanielk1977 static void async_mutex_enter(int eMutex){
339a3f06598Sdanielk1977   assert( eMutex==0 || eMutex==1 || eMutex==2 );
340a3f06598Sdanielk1977   assert( eMutex!=2 || (!mutex_held(0) && !mutex_held(1) && !mutex_held(2)) );
341a3f06598Sdanielk1977   assert( eMutex!=1 || (!mutex_held(0) && !mutex_held(1)) );
342a3f06598Sdanielk1977   assert( eMutex!=0 || (!mutex_held(0)) );
343a3f06598Sdanielk1977   pthread_mutex_lock(&primitives.aMutex[eMutex]);
344a3f06598Sdanielk1977   TESTONLY( primitives.aHolder[eMutex] = pthread_self(); )
345a3f06598Sdanielk1977 }
346a3f06598Sdanielk1977 static void async_mutex_leave(int eMutex){
347a3f06598Sdanielk1977   assert( eMutex==0 || eMutex==1 || eMutex==2 );
348a3f06598Sdanielk1977   assert( mutex_held(eMutex) );
349a3f06598Sdanielk1977   TESTONLY( primitives.aHolder[eMutex] = 0; )
350a3f06598Sdanielk1977   pthread_mutex_unlock(&primitives.aMutex[eMutex]);
351a3f06598Sdanielk1977 }
352a3f06598Sdanielk1977 static void async_cond_wait(int eCond, int eMutex){
353a3f06598Sdanielk1977   assert( eMutex==0 || eMutex==1 || eMutex==2 );
354a3f06598Sdanielk1977   assert( mutex_held(eMutex) );
355a3f06598Sdanielk1977   TESTONLY( primitives.aHolder[eMutex] = 0; )
356a3f06598Sdanielk1977   pthread_cond_wait(&primitives.aCond[eCond], &primitives.aMutex[eMutex]);
357a3f06598Sdanielk1977   TESTONLY( primitives.aHolder[eMutex] = pthread_self(); )
358a3f06598Sdanielk1977 }
359a3f06598Sdanielk1977 static void async_cond_signal(int eCond){
360a3f06598Sdanielk1977   assert( mutex_held(ASYNC_MUTEX_QUEUE) );
361a3f06598Sdanielk1977   pthread_cond_signal(&primitives.aCond[eCond]);
362a3f06598Sdanielk1977 }
363a3f06598Sdanielk1977 static void async_sched_yield(void){
364a3f06598Sdanielk1977   sched_yield();
365a3f06598Sdanielk1977 }
366a3f06598Sdanielk1977 #endif
367a3f06598Sdanielk1977 /*
368a3f06598Sdanielk1977 ** End of OS specific code.
369a3f06598Sdanielk1977 *************************************************************************/
370a3f06598Sdanielk1977 
371a3f06598Sdanielk1977 #define assert_mutex_is_held(X) assert( mutex_held(X) )
372a3f06598Sdanielk1977 
373a3f06598Sdanielk1977 
374a3f06598Sdanielk1977 #ifndef SQLITE_ASYNC_TWO_FILEHANDLES
375a3f06598Sdanielk1977 /* #define SQLITE_ASYNC_TWO_FILEHANDLES 0 */
376a3f06598Sdanielk1977 #define SQLITE_ASYNC_TWO_FILEHANDLES 1
377a3f06598Sdanielk1977 #endif
378a3f06598Sdanielk1977 
379a3f06598Sdanielk1977 /*
380a3f06598Sdanielk1977 ** State information is held in the static variable "async" defined
381a3f06598Sdanielk1977 ** as the following structure.
382a3f06598Sdanielk1977 **
383a3f06598Sdanielk1977 ** Both async.ioError and async.nFile are protected by async.queueMutex.
384a3f06598Sdanielk1977 */
385a3f06598Sdanielk1977 static struct TestAsyncStaticData {
386a3f06598Sdanielk1977   AsyncWrite *pQueueFirst;     /* Next write operation to be processed */
387a3f06598Sdanielk1977   AsyncWrite *pQueueLast;      /* Last write operation on the list */
388a3f06598Sdanielk1977   AsyncLock *pLock;            /* Linked list of all AsyncLock structures */
389a3f06598Sdanielk1977   volatile int ioDelay;        /* Extra delay between write operations */
390a3f06598Sdanielk1977   volatile int eHalt;          /* One of the SQLITEASYNC_HALT_XXX values */
3914598b8e4Sdanielk1977   volatile int bLockFiles;     /* Current value of "lockfiles" parameter */
392a3f06598Sdanielk1977   int ioError;                 /* True if an IO error has occurred */
393a3f06598Sdanielk1977   int nFile;                   /* Number of open files (from sqlite pov) */
3944598b8e4Sdanielk1977 } async = { 0,0,0,0,0,1,0,0 };
395a3f06598Sdanielk1977 
396a3f06598Sdanielk1977 /* Possible values of AsyncWrite.op */
397a3f06598Sdanielk1977 #define ASYNC_NOOP          0
398a3f06598Sdanielk1977 #define ASYNC_WRITE         1
399a3f06598Sdanielk1977 #define ASYNC_SYNC          2
400a3f06598Sdanielk1977 #define ASYNC_TRUNCATE      3
401a3f06598Sdanielk1977 #define ASYNC_CLOSE         4
402a3f06598Sdanielk1977 #define ASYNC_DELETE        5
403a3f06598Sdanielk1977 #define ASYNC_OPENEXCLUSIVE 6
404a3f06598Sdanielk1977 #define ASYNC_UNLOCK        7
405a3f06598Sdanielk1977 
406a3f06598Sdanielk1977 /* Names of opcodes.  Used for debugging only.
407a3f06598Sdanielk1977 ** Make sure these stay in sync with the macros above!
408a3f06598Sdanielk1977 */
409a3f06598Sdanielk1977 static const char *azOpcodeName[] = {
410a3f06598Sdanielk1977   "NOOP", "WRITE", "SYNC", "TRUNCATE", "CLOSE", "DELETE", "OPENEX", "UNLOCK"
411a3f06598Sdanielk1977 };
412a3f06598Sdanielk1977 
413a3f06598Sdanielk1977 /*
414a3f06598Sdanielk1977 ** Entries on the write-op queue are instances of the AsyncWrite
415a3f06598Sdanielk1977 ** structure, defined here.
416a3f06598Sdanielk1977 **
417a3f06598Sdanielk1977 ** The interpretation of the iOffset and nByte variables varies depending
418a3f06598Sdanielk1977 ** on the value of AsyncWrite.op:
419a3f06598Sdanielk1977 **
420a3f06598Sdanielk1977 ** ASYNC_NOOP:
421a3f06598Sdanielk1977 **     No values used.
422a3f06598Sdanielk1977 **
423a3f06598Sdanielk1977 ** ASYNC_WRITE:
424a3f06598Sdanielk1977 **     iOffset -> Offset in file to write to.
425a3f06598Sdanielk1977 **     nByte   -> Number of bytes of data to write (pointed to by zBuf).
426a3f06598Sdanielk1977 **
427a3f06598Sdanielk1977 ** ASYNC_SYNC:
428a3f06598Sdanielk1977 **     nByte   -> flags to pass to sqlite3OsSync().
429a3f06598Sdanielk1977 **
430a3f06598Sdanielk1977 ** ASYNC_TRUNCATE:
431a3f06598Sdanielk1977 **     iOffset -> Size to truncate file to.
432a3f06598Sdanielk1977 **     nByte   -> Unused.
433a3f06598Sdanielk1977 **
434a3f06598Sdanielk1977 ** ASYNC_CLOSE:
435a3f06598Sdanielk1977 **     iOffset -> Unused.
436a3f06598Sdanielk1977 **     nByte   -> Unused.
437a3f06598Sdanielk1977 **
438a3f06598Sdanielk1977 ** ASYNC_DELETE:
439a3f06598Sdanielk1977 **     iOffset -> Contains the "syncDir" flag.
440a3f06598Sdanielk1977 **     nByte   -> Number of bytes of zBuf points to (file name).
441a3f06598Sdanielk1977 **
442a3f06598Sdanielk1977 ** ASYNC_OPENEXCLUSIVE:
443a3f06598Sdanielk1977 **     iOffset -> Value of "delflag".
444a3f06598Sdanielk1977 **     nByte   -> Number of bytes of zBuf points to (file name).
445a3f06598Sdanielk1977 **
446a3f06598Sdanielk1977 ** ASYNC_UNLOCK:
447a3f06598Sdanielk1977 **     nByte   -> Argument to sqlite3OsUnlock().
448a3f06598Sdanielk1977 **
449a3f06598Sdanielk1977 **
450a3f06598Sdanielk1977 ** For an ASYNC_WRITE operation, zBuf points to the data to write to the file.
451a3f06598Sdanielk1977 ** This space is sqlite3_malloc()d along with the AsyncWrite structure in a
452a3f06598Sdanielk1977 ** single blob, so is deleted when sqlite3_free() is called on the parent
453a3f06598Sdanielk1977 ** structure.
454a3f06598Sdanielk1977 */
455a3f06598Sdanielk1977 struct AsyncWrite {
456a3f06598Sdanielk1977   AsyncFileData *pFileData;    /* File to write data to or sync */
457a3f06598Sdanielk1977   int op;                      /* One of ASYNC_xxx etc. */
458a3f06598Sdanielk1977   sqlite_int64 iOffset;        /* See above */
459a3f06598Sdanielk1977   int nByte;          /* See above */
460a3f06598Sdanielk1977   char *zBuf;         /* Data to write to file (or NULL if op!=ASYNC_WRITE) */
461a3f06598Sdanielk1977   AsyncWrite *pNext;  /* Next write operation (to any file) */
462a3f06598Sdanielk1977 };
463a3f06598Sdanielk1977 
464a3f06598Sdanielk1977 /*
465a3f06598Sdanielk1977 ** An instance of this structure is created for each distinct open file
466a3f06598Sdanielk1977 ** (i.e. if two handles are opened on the one file, only one of these
467a3f06598Sdanielk1977 ** structures is allocated) and stored in the async.aLock hash table. The
468a3f06598Sdanielk1977 ** keys for async.aLock are the full pathnames of the opened files.
469a3f06598Sdanielk1977 **
470a3f06598Sdanielk1977 ** AsyncLock.pList points to the head of a linked list of AsyncFileLock
471a3f06598Sdanielk1977 ** structures, one for each handle currently open on the file.
472a3f06598Sdanielk1977 **
473a3f06598Sdanielk1977 ** If the opened file is not a main-database (the SQLITE_OPEN_MAIN_DB is
4744598b8e4Sdanielk1977 ** not passed to the sqlite3OsOpen() call), or if async.bLockFiles is
4754598b8e4Sdanielk1977 ** false, variables AsyncLock.pFile and AsyncLock.eLock are never used.
4764598b8e4Sdanielk1977 ** Otherwise, pFile is a file handle opened on the file in question and
4774598b8e4Sdanielk1977 ** used to obtain the file-system locks required by database connections
4784598b8e4Sdanielk1977 ** within this process.
479a3f06598Sdanielk1977 **
480a3f06598Sdanielk1977 ** See comments above the asyncLock() function for more details on
481a3f06598Sdanielk1977 ** the implementation of database locking used by this backend.
482a3f06598Sdanielk1977 */
483a3f06598Sdanielk1977 struct AsyncLock {
484a3f06598Sdanielk1977   char *zFile;
485a3f06598Sdanielk1977   int nFile;
486a3f06598Sdanielk1977   sqlite3_file *pFile;
487a3f06598Sdanielk1977   int eLock;
488a3f06598Sdanielk1977   AsyncFileLock *pList;
489a3f06598Sdanielk1977   AsyncLock *pNext;           /* Next in linked list headed by async.pLock */
490a3f06598Sdanielk1977 };
491a3f06598Sdanielk1977 
492a3f06598Sdanielk1977 /*
493a3f06598Sdanielk1977 ** An instance of the following structure is allocated along with each
494a3f06598Sdanielk1977 ** AsyncFileData structure (see AsyncFileData.lock), but is only used if the
495a3f06598Sdanielk1977 ** file was opened with the SQLITE_OPEN_MAIN_DB.
496a3f06598Sdanielk1977 */
497a3f06598Sdanielk1977 struct AsyncFileLock {
498a3f06598Sdanielk1977   int eLock;                /* Internally visible lock state (sqlite pov) */
499a3f06598Sdanielk1977   int eAsyncLock;           /* Lock-state with write-queue unlock */
500a3f06598Sdanielk1977   AsyncFileLock *pNext;
501a3f06598Sdanielk1977 };
502a3f06598Sdanielk1977 
503a3f06598Sdanielk1977 /*
504a3f06598Sdanielk1977 ** The AsyncFile structure is a subclass of sqlite3_file used for
505a3f06598Sdanielk1977 ** asynchronous IO.
506a3f06598Sdanielk1977 **
507a3f06598Sdanielk1977 ** All of the actual data for the structure is stored in the structure
508a3f06598Sdanielk1977 ** pointed to by AsyncFile.pData, which is allocated as part of the
509a3f06598Sdanielk1977 ** sqlite3OsOpen() using sqlite3_malloc(). The reason for this is that the
510a3f06598Sdanielk1977 ** lifetime of the AsyncFile structure is ended by the caller after OsClose()
511a3f06598Sdanielk1977 ** is called, but the data in AsyncFileData may be required by the
512a3f06598Sdanielk1977 ** writer thread after that point.
513a3f06598Sdanielk1977 */
514a3f06598Sdanielk1977 struct AsyncFile {
515a3f06598Sdanielk1977   sqlite3_io_methods *pMethod;
516a3f06598Sdanielk1977   AsyncFileData *pData;
517a3f06598Sdanielk1977 };
518a3f06598Sdanielk1977 struct AsyncFileData {
519a3f06598Sdanielk1977   char *zName;               /* Underlying OS filename - used for debugging */
520a3f06598Sdanielk1977   int nName;                 /* Number of characters in zName */
521a3f06598Sdanielk1977   sqlite3_file *pBaseRead;   /* Read handle to the underlying Os file */
522a3f06598Sdanielk1977   sqlite3_file *pBaseWrite;  /* Write handle to the underlying Os file */
523a3f06598Sdanielk1977   AsyncFileLock lock;        /* Lock state for this handle */
524a3f06598Sdanielk1977   AsyncLock *pLock;          /* AsyncLock object for this file system entry */
525a3f06598Sdanielk1977   AsyncWrite closeOp;        /* Preallocated close operation */
526a3f06598Sdanielk1977 };
527a3f06598Sdanielk1977 
528a3f06598Sdanielk1977 /*
529a3f06598Sdanielk1977 ** Add an entry to the end of the global write-op list. pWrite should point
530a3f06598Sdanielk1977 ** to an AsyncWrite structure allocated using sqlite3_malloc().  The writer
531a3f06598Sdanielk1977 ** thread will call sqlite3_free() to free the structure after the specified
532a3f06598Sdanielk1977 ** operation has been completed.
533a3f06598Sdanielk1977 **
534a3f06598Sdanielk1977 ** Once an AsyncWrite structure has been added to the list, it becomes the
535a3f06598Sdanielk1977 ** property of the writer thread and must not be read or modified by the
536a3f06598Sdanielk1977 ** caller.
537a3f06598Sdanielk1977 */
538a3f06598Sdanielk1977 static void addAsyncWrite(AsyncWrite *pWrite){
539a3f06598Sdanielk1977   /* We must hold the queue mutex in order to modify the queue pointers */
540a3f06598Sdanielk1977   if( pWrite->op!=ASYNC_UNLOCK ){
541a3f06598Sdanielk1977     async_mutex_enter(ASYNC_MUTEX_QUEUE);
542a3f06598Sdanielk1977   }
543a3f06598Sdanielk1977 
544a3f06598Sdanielk1977   /* Add the record to the end of the write-op queue */
545a3f06598Sdanielk1977   assert( !pWrite->pNext );
546a3f06598Sdanielk1977   if( async.pQueueLast ){
547a3f06598Sdanielk1977     assert( async.pQueueFirst );
548a3f06598Sdanielk1977     async.pQueueLast->pNext = pWrite;
549a3f06598Sdanielk1977   }else{
550a3f06598Sdanielk1977     async.pQueueFirst = pWrite;
551a3f06598Sdanielk1977   }
552a3f06598Sdanielk1977   async.pQueueLast = pWrite;
553a3f06598Sdanielk1977   ASYNC_TRACE(("PUSH %p (%s %s %d)\n", pWrite, azOpcodeName[pWrite->op],
554a3f06598Sdanielk1977          pWrite->pFileData ? pWrite->pFileData->zName : "-", pWrite->iOffset));
555a3f06598Sdanielk1977 
556a3f06598Sdanielk1977   if( pWrite->op==ASYNC_CLOSE ){
557a3f06598Sdanielk1977     async.nFile--;
558a3f06598Sdanielk1977   }
559a3f06598Sdanielk1977 
560a3f06598Sdanielk1977   /* The writer thread might have been idle because there was nothing
561a3f06598Sdanielk1977   ** on the write-op queue for it to do.  So wake it up. */
562a3f06598Sdanielk1977   async_cond_signal(ASYNC_COND_QUEUE);
563a3f06598Sdanielk1977 
564a3f06598Sdanielk1977   /* Drop the queue mutex */
565a3f06598Sdanielk1977   if( pWrite->op!=ASYNC_UNLOCK ){
566a3f06598Sdanielk1977     async_mutex_leave(ASYNC_MUTEX_QUEUE);
567a3f06598Sdanielk1977   }
568a3f06598Sdanielk1977 }
569a3f06598Sdanielk1977 
570a3f06598Sdanielk1977 /*
571a3f06598Sdanielk1977 ** Increment async.nFile in a thread-safe manner.
572a3f06598Sdanielk1977 */
573a3f06598Sdanielk1977 static void incrOpenFileCount(void){
574a3f06598Sdanielk1977   /* We must hold the queue mutex in order to modify async.nFile */
575a3f06598Sdanielk1977   async_mutex_enter(ASYNC_MUTEX_QUEUE);
576a3f06598Sdanielk1977   if( async.nFile==0 ){
577a3f06598Sdanielk1977     async.ioError = SQLITE_OK;
578a3f06598Sdanielk1977   }
579a3f06598Sdanielk1977   async.nFile++;
580a3f06598Sdanielk1977   async_mutex_leave(ASYNC_MUTEX_QUEUE);
581a3f06598Sdanielk1977 }
582a3f06598Sdanielk1977 
583a3f06598Sdanielk1977 /*
584a3f06598Sdanielk1977 ** This is a utility function to allocate and populate a new AsyncWrite
585a3f06598Sdanielk1977 ** structure and insert it (via addAsyncWrite() ) into the global list.
586a3f06598Sdanielk1977 */
587a3f06598Sdanielk1977 static int addNewAsyncWrite(
588a3f06598Sdanielk1977   AsyncFileData *pFileData,
589a3f06598Sdanielk1977   int op,
590a3f06598Sdanielk1977   sqlite3_int64 iOffset,
591a3f06598Sdanielk1977   int nByte,
592a3f06598Sdanielk1977   const char *zByte
593a3f06598Sdanielk1977 ){
594a3f06598Sdanielk1977   AsyncWrite *p;
595a3f06598Sdanielk1977   if( op!=ASYNC_CLOSE && async.ioError ){
596a3f06598Sdanielk1977     return async.ioError;
597a3f06598Sdanielk1977   }
598a3f06598Sdanielk1977   p = sqlite3_malloc(sizeof(AsyncWrite) + (zByte?nByte:0));
599a3f06598Sdanielk1977   if( !p ){
600a3f06598Sdanielk1977     /* The upper layer does not expect operations like OsWrite() to
601a3f06598Sdanielk1977     ** return SQLITE_NOMEM. This is partly because under normal conditions
602a3f06598Sdanielk1977     ** SQLite is required to do rollback without calling malloc(). So
603a3f06598Sdanielk1977     ** if malloc() fails here, treat it as an I/O error. The above
604a3f06598Sdanielk1977     ** layer knows how to handle that.
605a3f06598Sdanielk1977     */
606a3f06598Sdanielk1977     return SQLITE_IOERR;
607a3f06598Sdanielk1977   }
608a3f06598Sdanielk1977   p->op = op;
609a3f06598Sdanielk1977   p->iOffset = iOffset;
610a3f06598Sdanielk1977   p->nByte = nByte;
611a3f06598Sdanielk1977   p->pFileData = pFileData;
612a3f06598Sdanielk1977   p->pNext = 0;
613a3f06598Sdanielk1977   if( zByte ){
614a3f06598Sdanielk1977     p->zBuf = (char *)&p[1];
615a3f06598Sdanielk1977     memcpy(p->zBuf, zByte, nByte);
616a3f06598Sdanielk1977   }else{
617a3f06598Sdanielk1977     p->zBuf = 0;
618a3f06598Sdanielk1977   }
619a3f06598Sdanielk1977   addAsyncWrite(p);
620a3f06598Sdanielk1977   return SQLITE_OK;
621a3f06598Sdanielk1977 }
622a3f06598Sdanielk1977 
623a3f06598Sdanielk1977 /*
624a3f06598Sdanielk1977 ** Close the file. This just adds an entry to the write-op list, the file is
625a3f06598Sdanielk1977 ** not actually closed.
626a3f06598Sdanielk1977 */
627a3f06598Sdanielk1977 static int asyncClose(sqlite3_file *pFile){
628a3f06598Sdanielk1977   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
629a3f06598Sdanielk1977 
630a3f06598Sdanielk1977   /* Unlock the file, if it is locked */
631a3f06598Sdanielk1977   async_mutex_enter(ASYNC_MUTEX_LOCK);
632a3f06598Sdanielk1977   p->lock.eLock = 0;
633a3f06598Sdanielk1977   async_mutex_leave(ASYNC_MUTEX_LOCK);
634a3f06598Sdanielk1977 
635a3f06598Sdanielk1977   addAsyncWrite(&p->closeOp);
636a3f06598Sdanielk1977   return SQLITE_OK;
637a3f06598Sdanielk1977 }
638a3f06598Sdanielk1977 
639a3f06598Sdanielk1977 /*
640a3f06598Sdanielk1977 ** Implementation of sqlite3OsWrite() for asynchronous files. Instead of
641a3f06598Sdanielk1977 ** writing to the underlying file, this function adds an entry to the end of
642a3f06598Sdanielk1977 ** the global AsyncWrite list. Either SQLITE_OK or SQLITE_NOMEM may be
643a3f06598Sdanielk1977 ** returned.
644a3f06598Sdanielk1977 */
645a3f06598Sdanielk1977 static int asyncWrite(
646a3f06598Sdanielk1977   sqlite3_file *pFile,
647a3f06598Sdanielk1977   const void *pBuf,
648a3f06598Sdanielk1977   int amt,
649a3f06598Sdanielk1977   sqlite3_int64 iOff
650a3f06598Sdanielk1977 ){
651a3f06598Sdanielk1977   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
652a3f06598Sdanielk1977   return addNewAsyncWrite(p, ASYNC_WRITE, iOff, amt, pBuf);
653a3f06598Sdanielk1977 }
654a3f06598Sdanielk1977 
655a3f06598Sdanielk1977 /*
656a3f06598Sdanielk1977 ** Read data from the file. First we read from the filesystem, then adjust
657a3f06598Sdanielk1977 ** the contents of the buffer based on ASYNC_WRITE operations in the
658a3f06598Sdanielk1977 ** write-op queue.
659a3f06598Sdanielk1977 **
660a3f06598Sdanielk1977 ** This method holds the mutex from start to finish.
661a3f06598Sdanielk1977 */
662a3f06598Sdanielk1977 static int asyncRead(
663a3f06598Sdanielk1977   sqlite3_file *pFile,
664a3f06598Sdanielk1977   void *zOut,
665a3f06598Sdanielk1977   int iAmt,
666a3f06598Sdanielk1977   sqlite3_int64 iOffset
667a3f06598Sdanielk1977 ){
668a3f06598Sdanielk1977   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
669a3f06598Sdanielk1977   int rc = SQLITE_OK;
670a3f06598Sdanielk1977   sqlite3_int64 filesize;
671a3f06598Sdanielk1977   int nRead;
672a3f06598Sdanielk1977   sqlite3_file *pBase = p->pBaseRead;
673a3f06598Sdanielk1977 
674a3f06598Sdanielk1977   /* Grab the write queue mutex for the duration of the call */
675a3f06598Sdanielk1977   async_mutex_enter(ASYNC_MUTEX_QUEUE);
676a3f06598Sdanielk1977 
677a3f06598Sdanielk1977   /* If an I/O error has previously occurred in this virtual file
678a3f06598Sdanielk1977   ** system, then all subsequent operations fail.
679a3f06598Sdanielk1977   */
680a3f06598Sdanielk1977   if( async.ioError!=SQLITE_OK ){
681a3f06598Sdanielk1977     rc = async.ioError;
682a3f06598Sdanielk1977     goto asyncread_out;
683a3f06598Sdanielk1977   }
684a3f06598Sdanielk1977 
685a3f06598Sdanielk1977   if( pBase->pMethods ){
686a3f06598Sdanielk1977     rc = pBase->pMethods->xFileSize(pBase, &filesize);
687a3f06598Sdanielk1977     if( rc!=SQLITE_OK ){
688a3f06598Sdanielk1977       goto asyncread_out;
689a3f06598Sdanielk1977     }
690a3628d14Sshane     nRead = (int)MIN(filesize - iOffset, iAmt);
691a3f06598Sdanielk1977     if( nRead>0 ){
692a3f06598Sdanielk1977       rc = pBase->pMethods->xRead(pBase, zOut, nRead, iOffset);
693a3f06598Sdanielk1977       ASYNC_TRACE(("READ %s %d bytes at %d\n", p->zName, nRead, iOffset));
694a3f06598Sdanielk1977     }
695a3f06598Sdanielk1977   }
696a3f06598Sdanielk1977 
697a3f06598Sdanielk1977   if( rc==SQLITE_OK ){
698a3f06598Sdanielk1977     AsyncWrite *pWrite;
699a3f06598Sdanielk1977     char *zName = p->zName;
700a3f06598Sdanielk1977 
701a3f06598Sdanielk1977     for(pWrite=async.pQueueFirst; pWrite; pWrite = pWrite->pNext){
702a3f06598Sdanielk1977       if( pWrite->op==ASYNC_WRITE && (
703a3f06598Sdanielk1977         (pWrite->pFileData==p) ||
704a3f06598Sdanielk1977         (zName && pWrite->pFileData->zName==zName)
705a3f06598Sdanielk1977       )){
706a3628d14Sshane         sqlite3_int64 iBeginOut = (pWrite->iOffset-iOffset);
707a3628d14Sshane         sqlite3_int64 iBeginIn = -iBeginOut;
708a3f06598Sdanielk1977         int nCopy;
709a3f06598Sdanielk1977 
710a3f06598Sdanielk1977         if( iBeginIn<0 ) iBeginIn = 0;
711a3f06598Sdanielk1977         if( iBeginOut<0 ) iBeginOut = 0;
712*eb4ac06fSshane         nCopy = (int)MIN(pWrite->nByte-iBeginIn, iAmt-iBeginOut);
713a3f06598Sdanielk1977 
714a3f06598Sdanielk1977         if( nCopy>0 ){
715a3f06598Sdanielk1977           memcpy(&((char *)zOut)[iBeginOut], &pWrite->zBuf[iBeginIn], nCopy);
716a3f06598Sdanielk1977           ASYNC_TRACE(("OVERREAD %d bytes at %d\n", nCopy, iBeginOut+iOffset));
717a3f06598Sdanielk1977         }
718a3f06598Sdanielk1977       }
719a3f06598Sdanielk1977     }
720a3f06598Sdanielk1977   }
721a3f06598Sdanielk1977 
722a3f06598Sdanielk1977 asyncread_out:
723a3f06598Sdanielk1977   async_mutex_leave(ASYNC_MUTEX_QUEUE);
724a3f06598Sdanielk1977   return rc;
725a3f06598Sdanielk1977 }
726a3f06598Sdanielk1977 
727a3f06598Sdanielk1977 /*
728a3f06598Sdanielk1977 ** Truncate the file to nByte bytes in length. This just adds an entry to
729a3f06598Sdanielk1977 ** the write-op list, no IO actually takes place.
730a3f06598Sdanielk1977 */
731a3f06598Sdanielk1977 static int asyncTruncate(sqlite3_file *pFile, sqlite3_int64 nByte){
732a3f06598Sdanielk1977   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
733a3f06598Sdanielk1977   return addNewAsyncWrite(p, ASYNC_TRUNCATE, nByte, 0, 0);
734a3f06598Sdanielk1977 }
735a3f06598Sdanielk1977 
736a3f06598Sdanielk1977 /*
737a3f06598Sdanielk1977 ** Sync the file. This just adds an entry to the write-op list, the
738a3f06598Sdanielk1977 ** sync() is done later by sqlite3_async_flush().
739a3f06598Sdanielk1977 */
740a3f06598Sdanielk1977 static int asyncSync(sqlite3_file *pFile, int flags){
741a3f06598Sdanielk1977   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
742a3f06598Sdanielk1977   return addNewAsyncWrite(p, ASYNC_SYNC, 0, flags, 0);
743a3f06598Sdanielk1977 }
744a3f06598Sdanielk1977 
745a3f06598Sdanielk1977 /*
746a3f06598Sdanielk1977 ** Read the size of the file. First we read the size of the file system
747a3f06598Sdanielk1977 ** entry, then adjust for any ASYNC_WRITE or ASYNC_TRUNCATE operations
748a3f06598Sdanielk1977 ** currently in the write-op list.
749a3f06598Sdanielk1977 **
750a3f06598Sdanielk1977 ** This method holds the mutex from start to finish.
751a3f06598Sdanielk1977 */
752a3f06598Sdanielk1977 int asyncFileSize(sqlite3_file *pFile, sqlite3_int64 *piSize){
753a3f06598Sdanielk1977   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
754a3f06598Sdanielk1977   int rc = SQLITE_OK;
755a3f06598Sdanielk1977   sqlite3_int64 s = 0;
756a3f06598Sdanielk1977   sqlite3_file *pBase;
757a3f06598Sdanielk1977 
758a3f06598Sdanielk1977   async_mutex_enter(ASYNC_MUTEX_QUEUE);
759a3f06598Sdanielk1977 
760*eb4ac06fSshane   /* Read the filesystem size from the base file. If pMethods is NULL, this
761a3f06598Sdanielk1977   ** means the file hasn't been opened yet. In this case all relevant data
762a3f06598Sdanielk1977   ** must be in the write-op queue anyway, so we can omit reading from the
763a3f06598Sdanielk1977   ** file-system.
764a3f06598Sdanielk1977   */
765a3f06598Sdanielk1977   pBase = p->pBaseRead;
766a3f06598Sdanielk1977   if( pBase->pMethods ){
767a3f06598Sdanielk1977     rc = pBase->pMethods->xFileSize(pBase, &s);
768a3f06598Sdanielk1977   }
769a3f06598Sdanielk1977 
770a3f06598Sdanielk1977   if( rc==SQLITE_OK ){
771a3f06598Sdanielk1977     AsyncWrite *pWrite;
772a3f06598Sdanielk1977     for(pWrite=async.pQueueFirst; pWrite; pWrite = pWrite->pNext){
773a3f06598Sdanielk1977       if( pWrite->op==ASYNC_DELETE
774a3f06598Sdanielk1977        && p->zName
775a3f06598Sdanielk1977        && strcmp(p->zName, pWrite->zBuf)==0
776a3f06598Sdanielk1977       ){
777a3f06598Sdanielk1977         s = 0;
778a3f06598Sdanielk1977       }else if( pWrite->pFileData && (
779a3f06598Sdanielk1977           (pWrite->pFileData==p)
780a3f06598Sdanielk1977        || (p->zName && pWrite->pFileData->zName==p->zName)
781a3f06598Sdanielk1977       )){
782a3f06598Sdanielk1977         switch( pWrite->op ){
783a3f06598Sdanielk1977           case ASYNC_WRITE:
784a3f06598Sdanielk1977             s = MAX(pWrite->iOffset + (sqlite3_int64)(pWrite->nByte), s);
785a3f06598Sdanielk1977             break;
786a3f06598Sdanielk1977           case ASYNC_TRUNCATE:
787a3f06598Sdanielk1977             s = MIN(s, pWrite->iOffset);
788a3f06598Sdanielk1977             break;
789a3f06598Sdanielk1977         }
790a3f06598Sdanielk1977       }
791a3f06598Sdanielk1977     }
792a3f06598Sdanielk1977     *piSize = s;
793a3f06598Sdanielk1977   }
794a3f06598Sdanielk1977   async_mutex_leave(ASYNC_MUTEX_QUEUE);
795a3f06598Sdanielk1977   return rc;
796a3f06598Sdanielk1977 }
797a3f06598Sdanielk1977 
798a3f06598Sdanielk1977 /*
799a3f06598Sdanielk1977 ** Lock or unlock the actual file-system entry.
800a3f06598Sdanielk1977 */
801a3f06598Sdanielk1977 static int getFileLock(AsyncLock *pLock){
802a3f06598Sdanielk1977   int rc = SQLITE_OK;
803a3f06598Sdanielk1977   AsyncFileLock *pIter;
804a3f06598Sdanielk1977   int eRequired = 0;
805a3f06598Sdanielk1977 
806a3f06598Sdanielk1977   if( pLock->pFile ){
807a3f06598Sdanielk1977     for(pIter=pLock->pList; pIter; pIter=pIter->pNext){
808a3f06598Sdanielk1977       assert(pIter->eAsyncLock>=pIter->eLock);
809a3f06598Sdanielk1977       if( pIter->eAsyncLock>eRequired ){
810a3f06598Sdanielk1977         eRequired = pIter->eAsyncLock;
811a3f06598Sdanielk1977         assert(eRequired>=0 && eRequired<=SQLITE_LOCK_EXCLUSIVE);
812a3f06598Sdanielk1977       }
813a3f06598Sdanielk1977     }
814a3f06598Sdanielk1977 
815a3f06598Sdanielk1977     if( eRequired>pLock->eLock ){
816a3f06598Sdanielk1977       rc = pLock->pFile->pMethods->xLock(pLock->pFile, eRequired);
817a3f06598Sdanielk1977       if( rc==SQLITE_OK ){
818a3f06598Sdanielk1977         pLock->eLock = eRequired;
819a3f06598Sdanielk1977       }
820a3f06598Sdanielk1977     }
821a3f06598Sdanielk1977     else if( eRequired<pLock->eLock && eRequired<=SQLITE_LOCK_SHARED ){
822a3f06598Sdanielk1977       rc = pLock->pFile->pMethods->xUnlock(pLock->pFile, eRequired);
823a3f06598Sdanielk1977       if( rc==SQLITE_OK ){
824a3f06598Sdanielk1977         pLock->eLock = eRequired;
825a3f06598Sdanielk1977       }
826a3f06598Sdanielk1977     }
827a3f06598Sdanielk1977   }
828a3f06598Sdanielk1977 
829a3f06598Sdanielk1977   return rc;
830a3f06598Sdanielk1977 }
831a3f06598Sdanielk1977 
832a3f06598Sdanielk1977 /*
833a3f06598Sdanielk1977 ** Return the AsyncLock structure from the global async.pLock list
834a3f06598Sdanielk1977 ** associated with the file-system entry identified by path zName
835a3f06598Sdanielk1977 ** (a string of nName bytes). If no such structure exists, return 0.
836a3f06598Sdanielk1977 */
837a3f06598Sdanielk1977 static AsyncLock *findLock(const char *zName, int nName){
838a3f06598Sdanielk1977   AsyncLock *p = async.pLock;
839a3f06598Sdanielk1977   while( p && (p->nFile!=nName || memcmp(p->zFile, zName, nName)) ){
840a3f06598Sdanielk1977     p = p->pNext;
841a3f06598Sdanielk1977   }
842a3f06598Sdanielk1977   return p;
843a3f06598Sdanielk1977 }
844a3f06598Sdanielk1977 
845a3f06598Sdanielk1977 /*
846a3f06598Sdanielk1977 ** The following two methods - asyncLock() and asyncUnlock() - are used
847a3f06598Sdanielk1977 ** to obtain and release locks on database files opened with the
848a3f06598Sdanielk1977 ** asynchronous backend.
849a3f06598Sdanielk1977 */
850a3f06598Sdanielk1977 static int asyncLock(sqlite3_file *pFile, int eLock){
851a3f06598Sdanielk1977   int rc = SQLITE_OK;
852a3f06598Sdanielk1977   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
853a3f06598Sdanielk1977 
854a3f06598Sdanielk1977   if( p->zName ){
855a3f06598Sdanielk1977     async_mutex_enter(ASYNC_MUTEX_LOCK);
856a3f06598Sdanielk1977     if( p->lock.eLock<eLock ){
857a3f06598Sdanielk1977       AsyncLock *pLock = p->pLock;
858a3f06598Sdanielk1977       AsyncFileLock *pIter;
859a3f06598Sdanielk1977       assert(pLock && pLock->pList);
860a3f06598Sdanielk1977       for(pIter=pLock->pList; pIter; pIter=pIter->pNext){
861a3f06598Sdanielk1977         if( pIter!=&p->lock && (
862a3f06598Sdanielk1977           (eLock==SQLITE_LOCK_EXCLUSIVE && pIter->eLock>=SQLITE_LOCK_SHARED) ||
863a3f06598Sdanielk1977           (eLock==SQLITE_LOCK_PENDING && pIter->eLock>=SQLITE_LOCK_RESERVED) ||
864a3f06598Sdanielk1977           (eLock==SQLITE_LOCK_RESERVED && pIter->eLock>=SQLITE_LOCK_RESERVED) ||
865a3f06598Sdanielk1977           (eLock==SQLITE_LOCK_SHARED && pIter->eLock>=SQLITE_LOCK_PENDING)
866a3f06598Sdanielk1977         )){
867a3f06598Sdanielk1977           rc = SQLITE_BUSY;
868a3f06598Sdanielk1977         }
869a3f06598Sdanielk1977       }
870a3f06598Sdanielk1977       if( rc==SQLITE_OK ){
871a3f06598Sdanielk1977         p->lock.eLock = eLock;
872a3f06598Sdanielk1977         p->lock.eAsyncLock = MAX(p->lock.eAsyncLock, eLock);
873a3f06598Sdanielk1977       }
874a3f06598Sdanielk1977       assert(p->lock.eAsyncLock>=p->lock.eLock);
875a3f06598Sdanielk1977       if( rc==SQLITE_OK ){
876a3f06598Sdanielk1977         rc = getFileLock(pLock);
877a3f06598Sdanielk1977       }
878a3f06598Sdanielk1977     }
879a3f06598Sdanielk1977     async_mutex_leave(ASYNC_MUTEX_LOCK);
880a3f06598Sdanielk1977   }
881a3f06598Sdanielk1977 
882a3f06598Sdanielk1977   ASYNC_TRACE(("LOCK %d (%s) rc=%d\n", eLock, p->zName, rc));
883a3f06598Sdanielk1977   return rc;
884a3f06598Sdanielk1977 }
885a3f06598Sdanielk1977 static int asyncUnlock(sqlite3_file *pFile, int eLock){
886a3f06598Sdanielk1977   int rc = SQLITE_OK;
887a3f06598Sdanielk1977   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
888a3f06598Sdanielk1977   if( p->zName ){
889a3f06598Sdanielk1977     AsyncFileLock *pLock = &p->lock;
890a3f06598Sdanielk1977     async_mutex_enter(ASYNC_MUTEX_QUEUE);
891a3f06598Sdanielk1977     async_mutex_enter(ASYNC_MUTEX_LOCK);
892a3f06598Sdanielk1977     pLock->eLock = MIN(pLock->eLock, eLock);
893a3f06598Sdanielk1977     rc = addNewAsyncWrite(p, ASYNC_UNLOCK, 0, eLock, 0);
894a3f06598Sdanielk1977     async_mutex_leave(ASYNC_MUTEX_LOCK);
895a3f06598Sdanielk1977     async_mutex_leave(ASYNC_MUTEX_QUEUE);
896a3f06598Sdanielk1977   }
897a3f06598Sdanielk1977   return rc;
898a3f06598Sdanielk1977 }
899a3f06598Sdanielk1977 
900a3f06598Sdanielk1977 /*
901a3f06598Sdanielk1977 ** This function is called when the pager layer first opens a database file
902a3f06598Sdanielk1977 ** and is checking for a hot-journal.
903a3f06598Sdanielk1977 */
904a3f06598Sdanielk1977 static int asyncCheckReservedLock(sqlite3_file *pFile, int *pResOut){
905a3f06598Sdanielk1977   int ret = 0;
906a3f06598Sdanielk1977   AsyncFileLock *pIter;
907a3f06598Sdanielk1977   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
908a3f06598Sdanielk1977 
909a3f06598Sdanielk1977   async_mutex_enter(ASYNC_MUTEX_LOCK);
910a3f06598Sdanielk1977   for(pIter=p->pLock->pList; pIter; pIter=pIter->pNext){
911a3f06598Sdanielk1977     if( pIter->eLock>=SQLITE_LOCK_RESERVED ){
912a3f06598Sdanielk1977       ret = 1;
913a3628d14Sshane       break;
914a3f06598Sdanielk1977     }
915a3f06598Sdanielk1977   }
916a3f06598Sdanielk1977   async_mutex_leave(ASYNC_MUTEX_LOCK);
917a3f06598Sdanielk1977 
918a3f06598Sdanielk1977   ASYNC_TRACE(("CHECK-LOCK %d (%s)\n", ret, p->zName));
919a3f06598Sdanielk1977   *pResOut = ret;
920a3f06598Sdanielk1977   return SQLITE_OK;
921a3f06598Sdanielk1977 }
922a3f06598Sdanielk1977 
923a3f06598Sdanielk1977 /*
924a3f06598Sdanielk1977 ** sqlite3_file_control() implementation.
925a3f06598Sdanielk1977 */
926a3f06598Sdanielk1977 static int asyncFileControl(sqlite3_file *id, int op, void *pArg){
927a3f06598Sdanielk1977   switch( op ){
928a3f06598Sdanielk1977     case SQLITE_FCNTL_LOCKSTATE: {
929a3f06598Sdanielk1977       async_mutex_enter(ASYNC_MUTEX_LOCK);
930a3f06598Sdanielk1977       *(int*)pArg = ((AsyncFile*)id)->pData->lock.eLock;
931a3f06598Sdanielk1977       async_mutex_leave(ASYNC_MUTEX_LOCK);
932a3f06598Sdanielk1977       return SQLITE_OK;
933a3f06598Sdanielk1977     }
934a3f06598Sdanielk1977   }
935a3f06598Sdanielk1977   return SQLITE_ERROR;
936a3f06598Sdanielk1977 }
937a3f06598Sdanielk1977 
938a3f06598Sdanielk1977 /*
939a3f06598Sdanielk1977 ** Return the device characteristics and sector-size of the device. It
940a3628d14Sshane ** is tricky to implement these correctly, as this backend might
941a3f06598Sdanielk1977 ** not have an open file handle at this point.
942a3f06598Sdanielk1977 */
943a3f06598Sdanielk1977 static int asyncSectorSize(sqlite3_file *pFile){
944a3628d14Sshane   UNUSED_PARAMETER(pFile);
945a3f06598Sdanielk1977   return 512;
946a3f06598Sdanielk1977 }
947a3f06598Sdanielk1977 static int asyncDeviceCharacteristics(sqlite3_file *pFile){
948a3628d14Sshane   UNUSED_PARAMETER(pFile);
949a3f06598Sdanielk1977   return 0;
950a3f06598Sdanielk1977 }
951a3f06598Sdanielk1977 
952a3f06598Sdanielk1977 static int unlinkAsyncFile(AsyncFileData *pData){
953a3f06598Sdanielk1977   AsyncFileLock **ppIter;
954a3f06598Sdanielk1977   int rc = SQLITE_OK;
955a3f06598Sdanielk1977 
956a3f06598Sdanielk1977   if( pData->zName ){
957a3f06598Sdanielk1977     AsyncLock *pLock = pData->pLock;
958a3f06598Sdanielk1977     for(ppIter=&pLock->pList; *ppIter; ppIter=&((*ppIter)->pNext)){
959a3f06598Sdanielk1977       if( (*ppIter)==&pData->lock ){
960a3f06598Sdanielk1977         *ppIter = pData->lock.pNext;
961a3f06598Sdanielk1977         break;
962a3f06598Sdanielk1977       }
963a3f06598Sdanielk1977     }
964a3f06598Sdanielk1977     if( !pLock->pList ){
965a3f06598Sdanielk1977       AsyncLock **pp;
966a3f06598Sdanielk1977       if( pLock->pFile ){
967a3f06598Sdanielk1977         pLock->pFile->pMethods->xClose(pLock->pFile);
968a3f06598Sdanielk1977       }
969a3f06598Sdanielk1977       for(pp=&async.pLock; *pp!=pLock; pp=&((*pp)->pNext));
970a3f06598Sdanielk1977       *pp = pLock->pNext;
971a3f06598Sdanielk1977       sqlite3_free(pLock);
972a3f06598Sdanielk1977     }else{
973a3f06598Sdanielk1977       rc = getFileLock(pLock);
974a3f06598Sdanielk1977     }
975a3f06598Sdanielk1977   }
976a3f06598Sdanielk1977 
977a3f06598Sdanielk1977   return rc;
978a3f06598Sdanielk1977 }
979a3f06598Sdanielk1977 
980a3f06598Sdanielk1977 /*
981a3f06598Sdanielk1977 ** The parameter passed to this function is a copy of a 'flags' parameter
982a3f06598Sdanielk1977 ** passed to this modules xOpen() method. This function returns true
983a3f06598Sdanielk1977 ** if the file should be opened asynchronously, or false if it should
984a3f06598Sdanielk1977 ** be opened immediately.
985a3f06598Sdanielk1977 **
986a3f06598Sdanielk1977 ** If the file is to be opened asynchronously, then asyncOpen() will add
987a3f06598Sdanielk1977 ** an entry to the event queue and the file will not actually be opened
988a3f06598Sdanielk1977 ** until the event is processed. Otherwise, the file is opened directly
989a3f06598Sdanielk1977 ** by the caller.
990a3f06598Sdanielk1977 */
991a3f06598Sdanielk1977 static int doAsynchronousOpen(int flags){
992a3f06598Sdanielk1977   return (flags&SQLITE_OPEN_CREATE) && (
993a3f06598Sdanielk1977       (flags&SQLITE_OPEN_MAIN_JOURNAL) ||
994a3f06598Sdanielk1977       (flags&SQLITE_OPEN_TEMP_JOURNAL) ||
995a3f06598Sdanielk1977       (flags&SQLITE_OPEN_DELETEONCLOSE)
996a3f06598Sdanielk1977   );
997a3f06598Sdanielk1977 }
998a3f06598Sdanielk1977 
999a3f06598Sdanielk1977 /*
1000a3f06598Sdanielk1977 ** Open a file.
1001a3f06598Sdanielk1977 */
1002a3f06598Sdanielk1977 static int asyncOpen(
1003a3f06598Sdanielk1977   sqlite3_vfs *pAsyncVfs,
1004a3f06598Sdanielk1977   const char *zName,
1005a3f06598Sdanielk1977   sqlite3_file *pFile,
1006a3f06598Sdanielk1977   int flags,
1007a3f06598Sdanielk1977   int *pOutFlags
1008a3f06598Sdanielk1977 ){
1009a3f06598Sdanielk1977   static sqlite3_io_methods async_methods = {
1010a3f06598Sdanielk1977     1,                               /* iVersion */
1011a3f06598Sdanielk1977     asyncClose,                      /* xClose */
1012a3f06598Sdanielk1977     asyncRead,                       /* xRead */
1013a3f06598Sdanielk1977     asyncWrite,                      /* xWrite */
1014a3f06598Sdanielk1977     asyncTruncate,                   /* xTruncate */
1015a3f06598Sdanielk1977     asyncSync,                       /* xSync */
1016a3f06598Sdanielk1977     asyncFileSize,                   /* xFileSize */
1017a3f06598Sdanielk1977     asyncLock,                       /* xLock */
1018a3f06598Sdanielk1977     asyncUnlock,                     /* xUnlock */
1019a3f06598Sdanielk1977     asyncCheckReservedLock,          /* xCheckReservedLock */
1020a3f06598Sdanielk1977     asyncFileControl,                /* xFileControl */
1021a3f06598Sdanielk1977     asyncSectorSize,                 /* xSectorSize */
1022a3f06598Sdanielk1977     asyncDeviceCharacteristics       /* xDeviceCharacteristics */
1023a3f06598Sdanielk1977   };
1024a3f06598Sdanielk1977 
1025a3f06598Sdanielk1977   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1026a3f06598Sdanielk1977   AsyncFile *p = (AsyncFile *)pFile;
1027a3f06598Sdanielk1977   int nName = 0;
1028a3f06598Sdanielk1977   int rc = SQLITE_OK;
1029a3f06598Sdanielk1977   int nByte;
1030a3f06598Sdanielk1977   AsyncFileData *pData;
1031a3f06598Sdanielk1977   AsyncLock *pLock = 0;
1032a3f06598Sdanielk1977   char *z;
1033a3f06598Sdanielk1977   int isAsyncOpen = doAsynchronousOpen(flags);
1034a3f06598Sdanielk1977 
1035a3f06598Sdanielk1977   /* If zName is NULL, then the upper layer is requesting an anonymous file */
1036a3f06598Sdanielk1977   if( zName ){
1037a3628d14Sshane     nName = (int)strlen(zName)+1;
1038a3f06598Sdanielk1977   }
1039a3f06598Sdanielk1977 
1040a3f06598Sdanielk1977   nByte = (
1041a3f06598Sdanielk1977     sizeof(AsyncFileData) +        /* AsyncFileData structure */
1042a3f06598Sdanielk1977     2 * pVfs->szOsFile +           /* AsyncFileData.pBaseRead and pBaseWrite */
1043a3f06598Sdanielk1977     nName                          /* AsyncFileData.zName */
1044a3f06598Sdanielk1977   );
1045a3f06598Sdanielk1977   z = sqlite3_malloc(nByte);
1046a3f06598Sdanielk1977   if( !z ){
1047a3f06598Sdanielk1977     return SQLITE_NOMEM;
1048a3f06598Sdanielk1977   }
1049a3f06598Sdanielk1977   memset(z, 0, nByte);
1050a3f06598Sdanielk1977   pData = (AsyncFileData*)z;
1051a3f06598Sdanielk1977   z += sizeof(pData[0]);
1052a3f06598Sdanielk1977   pData->pBaseRead = (sqlite3_file*)z;
1053a3f06598Sdanielk1977   z += pVfs->szOsFile;
1054a3f06598Sdanielk1977   pData->pBaseWrite = (sqlite3_file*)z;
1055a3f06598Sdanielk1977   pData->closeOp.pFileData = pData;
1056a3f06598Sdanielk1977   pData->closeOp.op = ASYNC_CLOSE;
1057a3f06598Sdanielk1977 
1058a3f06598Sdanielk1977   if( zName ){
1059a3f06598Sdanielk1977     z += pVfs->szOsFile;
1060a3f06598Sdanielk1977     pData->zName = z;
1061a3f06598Sdanielk1977     pData->nName = nName;
1062a3f06598Sdanielk1977     memcpy(pData->zName, zName, nName);
1063a3f06598Sdanielk1977   }
1064a3f06598Sdanielk1977 
1065a3f06598Sdanielk1977   if( !isAsyncOpen ){
1066a3f06598Sdanielk1977     int flagsout;
1067a3f06598Sdanielk1977     rc = pVfs->xOpen(pVfs, pData->zName, pData->pBaseRead, flags, &flagsout);
1068a3f06598Sdanielk1977     if( rc==SQLITE_OK && (flagsout&SQLITE_OPEN_READWRITE) ){
1069a3f06598Sdanielk1977       rc = pVfs->xOpen(pVfs, pData->zName, pData->pBaseWrite, flags, 0);
1070a3f06598Sdanielk1977     }
1071a3f06598Sdanielk1977     if( pOutFlags ){
1072a3f06598Sdanielk1977       *pOutFlags = flagsout;
1073a3f06598Sdanielk1977     }
1074a3f06598Sdanielk1977   }
1075a3f06598Sdanielk1977 
1076a3f06598Sdanielk1977   async_mutex_enter(ASYNC_MUTEX_LOCK);
1077a3f06598Sdanielk1977 
1078a3f06598Sdanielk1977   if( zName && rc==SQLITE_OK ){
1079a3f06598Sdanielk1977     pLock = findLock(pData->zName, pData->nName);
1080a3f06598Sdanielk1977     if( !pLock ){
1081a3f06598Sdanielk1977       int nByte = pVfs->szOsFile + sizeof(AsyncLock) + pData->nName + 1;
1082a3f06598Sdanielk1977       pLock = (AsyncLock *)sqlite3_malloc(nByte);
1083a3f06598Sdanielk1977       if( pLock ){
1084a3f06598Sdanielk1977         memset(pLock, 0, nByte);
10854598b8e4Sdanielk1977         if( async.bLockFiles && (flags&SQLITE_OPEN_MAIN_DB) ){
1086a3f06598Sdanielk1977           pLock->pFile = (sqlite3_file *)&pLock[1];
1087a3f06598Sdanielk1977           rc = pVfs->xOpen(pVfs, pData->zName, pLock->pFile, flags, 0);
1088a3f06598Sdanielk1977           if( rc!=SQLITE_OK ){
1089a3f06598Sdanielk1977             sqlite3_free(pLock);
1090a3f06598Sdanielk1977             pLock = 0;
1091a3f06598Sdanielk1977           }
1092a3f06598Sdanielk1977         }
1093a3f06598Sdanielk1977         if( pLock ){
1094a3f06598Sdanielk1977           pLock->nFile = pData->nName;
1095a3f06598Sdanielk1977           pLock->zFile = &((char *)(&pLock[1]))[pVfs->szOsFile];
1096a3f06598Sdanielk1977           memcpy(pLock->zFile, pData->zName, pLock->nFile);
1097a3f06598Sdanielk1977           pLock->pNext = async.pLock;
1098a3f06598Sdanielk1977           async.pLock = pLock;
1099a3f06598Sdanielk1977         }
1100a3f06598Sdanielk1977       }else{
1101a3f06598Sdanielk1977         rc = SQLITE_NOMEM;
1102a3f06598Sdanielk1977       }
1103a3f06598Sdanielk1977     }
1104a3f06598Sdanielk1977   }
1105a3f06598Sdanielk1977 
1106a3f06598Sdanielk1977   if( rc==SQLITE_OK ){
1107a3f06598Sdanielk1977     p->pMethod = &async_methods;
1108a3f06598Sdanielk1977     p->pData = pData;
1109a3f06598Sdanielk1977 
1110a3f06598Sdanielk1977     /* Link AsyncFileData.lock into the linked list of
1111a3f06598Sdanielk1977     ** AsyncFileLock structures for this file.
1112a3f06598Sdanielk1977     */
1113a3f06598Sdanielk1977     if( zName ){
1114a3f06598Sdanielk1977       pData->lock.pNext = pLock->pList;
1115a3f06598Sdanielk1977       pLock->pList = &pData->lock;
1116a3f06598Sdanielk1977       pData->zName = pLock->zFile;
1117a3f06598Sdanielk1977     }
1118a3f06598Sdanielk1977   }else{
1119a3f06598Sdanielk1977     if( pData->pBaseRead->pMethods ){
1120a3f06598Sdanielk1977       pData->pBaseRead->pMethods->xClose(pData->pBaseRead);
1121a3f06598Sdanielk1977     }
1122a3f06598Sdanielk1977     if( pData->pBaseWrite->pMethods ){
1123a3f06598Sdanielk1977       pData->pBaseWrite->pMethods->xClose(pData->pBaseWrite);
1124a3f06598Sdanielk1977     }
1125a3f06598Sdanielk1977     sqlite3_free(pData);
1126a3f06598Sdanielk1977   }
1127a3f06598Sdanielk1977 
1128a3f06598Sdanielk1977   async_mutex_leave(ASYNC_MUTEX_LOCK);
1129a3f06598Sdanielk1977 
1130a3f06598Sdanielk1977   if( rc==SQLITE_OK ){
1131a3f06598Sdanielk1977     incrOpenFileCount();
1132a3f06598Sdanielk1977     pData->pLock = pLock;
1133a3f06598Sdanielk1977   }
1134a3f06598Sdanielk1977 
1135a3f06598Sdanielk1977   if( rc==SQLITE_OK && isAsyncOpen ){
1136a3f06598Sdanielk1977     rc = addNewAsyncWrite(pData, ASYNC_OPENEXCLUSIVE, (sqlite3_int64)flags,0,0);
1137a3f06598Sdanielk1977     if( rc==SQLITE_OK ){
1138a3f06598Sdanielk1977       if( pOutFlags ) *pOutFlags = flags;
1139a3f06598Sdanielk1977     }else{
1140a3f06598Sdanielk1977       async_mutex_enter(ASYNC_MUTEX_LOCK);
1141a3f06598Sdanielk1977       unlinkAsyncFile(pData);
1142a3f06598Sdanielk1977       async_mutex_leave(ASYNC_MUTEX_LOCK);
1143a3f06598Sdanielk1977       sqlite3_free(pData);
1144a3f06598Sdanielk1977     }
1145a3f06598Sdanielk1977   }
1146a3f06598Sdanielk1977   if( rc!=SQLITE_OK ){
1147a3f06598Sdanielk1977     p->pMethod = 0;
1148a3f06598Sdanielk1977   }
1149a3f06598Sdanielk1977   return rc;
1150a3f06598Sdanielk1977 }
1151a3f06598Sdanielk1977 
1152a3f06598Sdanielk1977 /*
1153a3f06598Sdanielk1977 ** Implementation of sqlite3OsDelete. Add an entry to the end of the
1154a3f06598Sdanielk1977 ** write-op queue to perform the delete.
1155a3f06598Sdanielk1977 */
1156a3f06598Sdanielk1977 static int asyncDelete(sqlite3_vfs *pAsyncVfs, const char *z, int syncDir){
1157a3628d14Sshane   UNUSED_PARAMETER(pAsyncVfs);
1158a3628d14Sshane   return addNewAsyncWrite(0, ASYNC_DELETE, syncDir, (int)strlen(z)+1, z);
1159a3f06598Sdanielk1977 }
1160a3f06598Sdanielk1977 
1161a3f06598Sdanielk1977 /*
1162a3f06598Sdanielk1977 ** Implementation of sqlite3OsAccess. This method holds the mutex from
1163a3f06598Sdanielk1977 ** start to finish.
1164a3f06598Sdanielk1977 */
1165a3f06598Sdanielk1977 static int asyncAccess(
1166a3f06598Sdanielk1977   sqlite3_vfs *pAsyncVfs,
1167a3f06598Sdanielk1977   const char *zName,
1168a3f06598Sdanielk1977   int flags,
1169a3f06598Sdanielk1977   int *pResOut
1170a3f06598Sdanielk1977 ){
1171a3f06598Sdanielk1977   int rc;
1172a3f06598Sdanielk1977   int ret;
1173a3f06598Sdanielk1977   AsyncWrite *p;
1174a3f06598Sdanielk1977   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1175a3f06598Sdanielk1977 
1176a3f06598Sdanielk1977   assert(flags==SQLITE_ACCESS_READWRITE
1177a3f06598Sdanielk1977       || flags==SQLITE_ACCESS_READ
1178a3f06598Sdanielk1977       || flags==SQLITE_ACCESS_EXISTS
1179a3f06598Sdanielk1977   );
1180a3f06598Sdanielk1977 
1181a3f06598Sdanielk1977   async_mutex_enter(ASYNC_MUTEX_QUEUE);
1182a3f06598Sdanielk1977   rc = pVfs->xAccess(pVfs, zName, flags, &ret);
1183a3f06598Sdanielk1977   if( rc==SQLITE_OK && flags==SQLITE_ACCESS_EXISTS ){
1184a3f06598Sdanielk1977     for(p=async.pQueueFirst; p; p = p->pNext){
1185a3f06598Sdanielk1977       if( p->op==ASYNC_DELETE && 0==strcmp(p->zBuf, zName) ){
1186a3f06598Sdanielk1977         ret = 0;
1187a3f06598Sdanielk1977       }else if( p->op==ASYNC_OPENEXCLUSIVE
1188a3f06598Sdanielk1977              && p->pFileData->zName
1189a3f06598Sdanielk1977              && 0==strcmp(p->pFileData->zName, zName)
1190a3f06598Sdanielk1977       ){
1191a3f06598Sdanielk1977         ret = 1;
1192a3f06598Sdanielk1977       }
1193a3f06598Sdanielk1977     }
1194a3f06598Sdanielk1977   }
1195a3f06598Sdanielk1977   ASYNC_TRACE(("ACCESS(%s): %s = %d\n",
1196a3f06598Sdanielk1977     flags==SQLITE_ACCESS_READWRITE?"read-write":
1197a3f06598Sdanielk1977     flags==SQLITE_ACCESS_READ?"read":"exists"
1198a3f06598Sdanielk1977     , zName, ret)
1199a3f06598Sdanielk1977   );
1200a3f06598Sdanielk1977   async_mutex_leave(ASYNC_MUTEX_QUEUE);
1201a3f06598Sdanielk1977   *pResOut = ret;
1202a3f06598Sdanielk1977   return rc;
1203a3f06598Sdanielk1977 }
1204a3f06598Sdanielk1977 
1205a3f06598Sdanielk1977 /*
1206a3f06598Sdanielk1977 ** Fill in zPathOut with the full path to the file identified by zPath.
1207a3f06598Sdanielk1977 */
1208a3f06598Sdanielk1977 static int asyncFullPathname(
1209a3f06598Sdanielk1977   sqlite3_vfs *pAsyncVfs,
1210a3f06598Sdanielk1977   const char *zPath,
1211a3f06598Sdanielk1977   int nPathOut,
1212a3f06598Sdanielk1977   char *zPathOut
1213a3f06598Sdanielk1977 ){
1214a3f06598Sdanielk1977   int rc;
1215a3f06598Sdanielk1977   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1216a3f06598Sdanielk1977   rc = pVfs->xFullPathname(pVfs, zPath, nPathOut, zPathOut);
1217a3f06598Sdanielk1977 
1218a3f06598Sdanielk1977   /* Because of the way intra-process file locking works, this backend
1219a3f06598Sdanielk1977   ** needs to return a canonical path. The following block assumes the
1220a3f06598Sdanielk1977   ** file-system uses unix style paths.
1221a3f06598Sdanielk1977   */
1222a3f06598Sdanielk1977   if( rc==SQLITE_OK ){
1223a3f06598Sdanielk1977     int i, j;
1224a3f06598Sdanielk1977     int n = nPathOut;
1225a3f06598Sdanielk1977     char *z = zPathOut;
1226a3f06598Sdanielk1977     while( n>1 && z[n-1]=='/' ){ n--; }
1227a3f06598Sdanielk1977     for(i=j=0; i<n; i++){
1228a3f06598Sdanielk1977       if( z[i]=='/' ){
1229a3f06598Sdanielk1977         if( z[i+1]=='/' ) continue;
1230a3f06598Sdanielk1977         if( z[i+1]=='.' && i+2<n && z[i+2]=='/' ){
1231a3f06598Sdanielk1977           i += 1;
1232a3f06598Sdanielk1977           continue;
1233a3f06598Sdanielk1977         }
1234a3f06598Sdanielk1977         if( z[i+1]=='.' && i+3<n && z[i+2]=='.' && z[i+3]=='/' ){
1235a3f06598Sdanielk1977           while( j>0 && z[j-1]!='/' ){ j--; }
1236a3f06598Sdanielk1977           if( j>0 ){ j--; }
1237a3f06598Sdanielk1977           i += 2;
1238a3f06598Sdanielk1977           continue;
1239a3f06598Sdanielk1977         }
1240a3f06598Sdanielk1977       }
1241a3f06598Sdanielk1977       z[j++] = z[i];
1242a3f06598Sdanielk1977     }
1243a3f06598Sdanielk1977     z[j] = 0;
1244a3f06598Sdanielk1977   }
1245a3f06598Sdanielk1977 
1246a3f06598Sdanielk1977   return rc;
1247a3f06598Sdanielk1977 }
1248a3f06598Sdanielk1977 static void *asyncDlOpen(sqlite3_vfs *pAsyncVfs, const char *zPath){
1249a3f06598Sdanielk1977   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1250a3f06598Sdanielk1977   return pVfs->xDlOpen(pVfs, zPath);
1251a3f06598Sdanielk1977 }
1252a3f06598Sdanielk1977 static void asyncDlError(sqlite3_vfs *pAsyncVfs, int nByte, char *zErrMsg){
1253a3f06598Sdanielk1977   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1254a3f06598Sdanielk1977   pVfs->xDlError(pVfs, nByte, zErrMsg);
1255a3f06598Sdanielk1977 }
1256a3f06598Sdanielk1977 static void (*asyncDlSym(
1257a3f06598Sdanielk1977   sqlite3_vfs *pAsyncVfs,
1258a3f06598Sdanielk1977   void *pHandle,
1259a3f06598Sdanielk1977   const char *zSymbol
1260a3f06598Sdanielk1977 ))(void){
1261a3f06598Sdanielk1977   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1262a3f06598Sdanielk1977   return pVfs->xDlSym(pVfs, pHandle, zSymbol);
1263a3f06598Sdanielk1977 }
1264a3f06598Sdanielk1977 static void asyncDlClose(sqlite3_vfs *pAsyncVfs, void *pHandle){
1265a3f06598Sdanielk1977   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1266a3f06598Sdanielk1977   pVfs->xDlClose(pVfs, pHandle);
1267a3f06598Sdanielk1977 }
1268a3f06598Sdanielk1977 static int asyncRandomness(sqlite3_vfs *pAsyncVfs, int nByte, char *zBufOut){
1269a3f06598Sdanielk1977   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1270a3f06598Sdanielk1977   return pVfs->xRandomness(pVfs, nByte, zBufOut);
1271a3f06598Sdanielk1977 }
1272a3f06598Sdanielk1977 static int asyncSleep(sqlite3_vfs *pAsyncVfs, int nMicro){
1273a3f06598Sdanielk1977   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1274a3f06598Sdanielk1977   return pVfs->xSleep(pVfs, nMicro);
1275a3f06598Sdanielk1977 }
1276a3f06598Sdanielk1977 static int asyncCurrentTime(sqlite3_vfs *pAsyncVfs, double *pTimeOut){
1277a3f06598Sdanielk1977   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1278a3f06598Sdanielk1977   return pVfs->xCurrentTime(pVfs, pTimeOut);
1279a3f06598Sdanielk1977 }
1280a3f06598Sdanielk1977 
1281a3f06598Sdanielk1977 static sqlite3_vfs async_vfs = {
1282a3f06598Sdanielk1977   1,                    /* iVersion */
1283a3f06598Sdanielk1977   sizeof(AsyncFile),    /* szOsFile */
1284a3f06598Sdanielk1977   0,                    /* mxPathname */
1285a3f06598Sdanielk1977   0,                    /* pNext */
1286a3f06598Sdanielk1977   SQLITEASYNC_VFSNAME,  /* zName */
1287a3f06598Sdanielk1977   0,                    /* pAppData */
1288a3f06598Sdanielk1977   asyncOpen,            /* xOpen */
1289a3f06598Sdanielk1977   asyncDelete,          /* xDelete */
1290a3f06598Sdanielk1977   asyncAccess,          /* xAccess */
1291a3f06598Sdanielk1977   asyncFullPathname,    /* xFullPathname */
1292a3f06598Sdanielk1977   asyncDlOpen,          /* xDlOpen */
1293a3f06598Sdanielk1977   asyncDlError,         /* xDlError */
1294a3f06598Sdanielk1977   asyncDlSym,           /* xDlSym */
1295a3f06598Sdanielk1977   asyncDlClose,         /* xDlClose */
1296a3f06598Sdanielk1977   asyncRandomness,      /* xDlError */
1297a3f06598Sdanielk1977   asyncSleep,           /* xDlSym */
1298a3f06598Sdanielk1977   asyncCurrentTime      /* xDlClose */
1299a3f06598Sdanielk1977 };
1300a3f06598Sdanielk1977 
1301a3f06598Sdanielk1977 /*
1302a3f06598Sdanielk1977 ** This procedure runs in a separate thread, reading messages off of the
1303a3f06598Sdanielk1977 ** write queue and processing them one by one.
1304a3f06598Sdanielk1977 **
1305a3f06598Sdanielk1977 ** If async.writerHaltNow is true, then this procedure exits
1306a3f06598Sdanielk1977 ** after processing a single message.
1307a3f06598Sdanielk1977 **
1308a3f06598Sdanielk1977 ** If async.writerHaltWhenIdle is true, then this procedure exits when
1309a3f06598Sdanielk1977 ** the write queue is empty.
1310a3f06598Sdanielk1977 **
1311a3f06598Sdanielk1977 ** If both of the above variables are false, this procedure runs
1312a3f06598Sdanielk1977 ** indefinately, waiting for operations to be added to the write queue
1313a3f06598Sdanielk1977 ** and processing them in the order in which they arrive.
1314a3f06598Sdanielk1977 **
1315a3f06598Sdanielk1977 ** An artifical delay of async.ioDelay milliseconds is inserted before
1316a3f06598Sdanielk1977 ** each write operation in order to simulate the effect of a slow disk.
1317a3f06598Sdanielk1977 **
1318a3f06598Sdanielk1977 ** Only one instance of this procedure may be running at a time.
1319a3f06598Sdanielk1977 */
1320a3f06598Sdanielk1977 static void asyncWriterThread(void){
1321a3f06598Sdanielk1977   sqlite3_vfs *pVfs = (sqlite3_vfs *)(async_vfs.pAppData);
1322a3f06598Sdanielk1977   AsyncWrite *p = 0;
1323a3f06598Sdanielk1977   int rc = SQLITE_OK;
1324a3f06598Sdanielk1977   int holdingMutex = 0;
1325a3f06598Sdanielk1977 
1326a3f06598Sdanielk1977   async_mutex_enter(ASYNC_MUTEX_WRITER);
1327a3f06598Sdanielk1977 
1328a3f06598Sdanielk1977   while( async.eHalt!=SQLITEASYNC_HALT_NOW ){
1329a3f06598Sdanielk1977     int doNotFree = 0;
1330a3f06598Sdanielk1977     sqlite3_file *pBase = 0;
1331a3f06598Sdanielk1977 
1332a3f06598Sdanielk1977     if( !holdingMutex ){
1333a3f06598Sdanielk1977       async_mutex_enter(ASYNC_MUTEX_QUEUE);
1334a3f06598Sdanielk1977     }
1335a3f06598Sdanielk1977     while( (p = async.pQueueFirst)==0 ){
1336a3f06598Sdanielk1977       if( async.eHalt!=SQLITEASYNC_HALT_NEVER ){
1337a3f06598Sdanielk1977         async_mutex_leave(ASYNC_MUTEX_QUEUE);
1338a3f06598Sdanielk1977         break;
1339a3f06598Sdanielk1977       }else{
1340a3f06598Sdanielk1977         ASYNC_TRACE(("IDLE\n"));
1341a3f06598Sdanielk1977         async_cond_wait(ASYNC_COND_QUEUE, ASYNC_MUTEX_QUEUE);
1342a3f06598Sdanielk1977         ASYNC_TRACE(("WAKEUP\n"));
1343a3f06598Sdanielk1977       }
1344a3f06598Sdanielk1977     }
1345a3f06598Sdanielk1977     if( p==0 ) break;
1346a3f06598Sdanielk1977     holdingMutex = 1;
1347a3f06598Sdanielk1977 
1348a3f06598Sdanielk1977     /* Right now this thread is holding the mutex on the write-op queue.
1349a3f06598Sdanielk1977     ** Variable 'p' points to the first entry in the write-op queue. In
1350a3f06598Sdanielk1977     ** the general case, we hold on to the mutex for the entire body of
1351a3f06598Sdanielk1977     ** the loop.
1352a3f06598Sdanielk1977     **
1353a3f06598Sdanielk1977     ** However in the cases enumerated below, we relinquish the mutex,
1354a3f06598Sdanielk1977     ** perform the IO, and then re-request the mutex before removing 'p' from
1355a3f06598Sdanielk1977     ** the head of the write-op queue. The idea is to increase concurrency with
1356a3f06598Sdanielk1977     ** sqlite threads.
1357a3f06598Sdanielk1977     **
1358a3f06598Sdanielk1977     **     * An ASYNC_CLOSE operation.
1359a3f06598Sdanielk1977     **     * An ASYNC_OPENEXCLUSIVE operation. For this one, we relinquish
1360a3f06598Sdanielk1977     **       the mutex, call the underlying xOpenExclusive() function, then
1361a3f06598Sdanielk1977     **       re-aquire the mutex before seting the AsyncFile.pBaseRead
1362a3f06598Sdanielk1977     **       variable.
1363a3f06598Sdanielk1977     **     * ASYNC_SYNC and ASYNC_WRITE operations, if
1364a3f06598Sdanielk1977     **       SQLITE_ASYNC_TWO_FILEHANDLES was set at compile time and two
1365a3f06598Sdanielk1977     **       file-handles are open for the particular file being "synced".
1366a3f06598Sdanielk1977     */
1367a3f06598Sdanielk1977     if( async.ioError!=SQLITE_OK && p->op!=ASYNC_CLOSE ){
1368a3f06598Sdanielk1977       p->op = ASYNC_NOOP;
1369a3f06598Sdanielk1977     }
1370a3f06598Sdanielk1977     if( p->pFileData ){
1371a3f06598Sdanielk1977       pBase = p->pFileData->pBaseWrite;
1372a3f06598Sdanielk1977       if(
1373a3f06598Sdanielk1977         p->op==ASYNC_CLOSE ||
1374a3f06598Sdanielk1977         p->op==ASYNC_OPENEXCLUSIVE ||
1375a3f06598Sdanielk1977         (pBase->pMethods && (p->op==ASYNC_SYNC || p->op==ASYNC_WRITE) )
1376a3f06598Sdanielk1977       ){
1377a3f06598Sdanielk1977         async_mutex_leave(ASYNC_MUTEX_QUEUE);
1378a3f06598Sdanielk1977         holdingMutex = 0;
1379a3f06598Sdanielk1977       }
1380a3f06598Sdanielk1977       if( !pBase->pMethods ){
1381a3f06598Sdanielk1977         pBase = p->pFileData->pBaseRead;
1382a3f06598Sdanielk1977       }
1383a3f06598Sdanielk1977     }
1384a3f06598Sdanielk1977 
1385a3f06598Sdanielk1977     switch( p->op ){
1386a3f06598Sdanielk1977       case ASYNC_NOOP:
1387a3f06598Sdanielk1977         break;
1388a3f06598Sdanielk1977 
1389a3f06598Sdanielk1977       case ASYNC_WRITE:
1390a3f06598Sdanielk1977         assert( pBase );
1391a3f06598Sdanielk1977         ASYNC_TRACE(("WRITE %s %d bytes at %d\n",
1392a3f06598Sdanielk1977                 p->pFileData->zName, p->nByte, p->iOffset));
1393a3f06598Sdanielk1977         rc = pBase->pMethods->xWrite(pBase, (void *)(p->zBuf), p->nByte, p->iOffset);
1394a3f06598Sdanielk1977         break;
1395a3f06598Sdanielk1977 
1396a3f06598Sdanielk1977       case ASYNC_SYNC:
1397a3f06598Sdanielk1977         assert( pBase );
1398a3f06598Sdanielk1977         ASYNC_TRACE(("SYNC %s\n", p->pFileData->zName));
1399a3f06598Sdanielk1977         rc = pBase->pMethods->xSync(pBase, p->nByte);
1400a3f06598Sdanielk1977         break;
1401a3f06598Sdanielk1977 
1402a3f06598Sdanielk1977       case ASYNC_TRUNCATE:
1403a3f06598Sdanielk1977         assert( pBase );
1404a3f06598Sdanielk1977         ASYNC_TRACE(("TRUNCATE %s to %d bytes\n",
1405a3f06598Sdanielk1977                 p->pFileData->zName, p->iOffset));
1406a3f06598Sdanielk1977         rc = pBase->pMethods->xTruncate(pBase, p->iOffset);
1407a3f06598Sdanielk1977         break;
1408a3f06598Sdanielk1977 
1409a3f06598Sdanielk1977       case ASYNC_CLOSE: {
1410a3f06598Sdanielk1977         AsyncFileData *pData = p->pFileData;
1411a3f06598Sdanielk1977         ASYNC_TRACE(("CLOSE %s\n", p->pFileData->zName));
1412a3f06598Sdanielk1977         if( pData->pBaseWrite->pMethods ){
1413a3f06598Sdanielk1977           pData->pBaseWrite->pMethods->xClose(pData->pBaseWrite);
1414a3f06598Sdanielk1977         }
1415a3f06598Sdanielk1977         if( pData->pBaseRead->pMethods ){
1416a3f06598Sdanielk1977           pData->pBaseRead->pMethods->xClose(pData->pBaseRead);
1417a3f06598Sdanielk1977         }
1418a3f06598Sdanielk1977 
1419a3f06598Sdanielk1977         /* Unlink AsyncFileData.lock from the linked list of AsyncFileLock
1420a3f06598Sdanielk1977         ** structures for this file. Obtain the async.lockMutex mutex
1421a3f06598Sdanielk1977         ** before doing so.
1422a3f06598Sdanielk1977         */
1423a3f06598Sdanielk1977         async_mutex_enter(ASYNC_MUTEX_LOCK);
1424a3f06598Sdanielk1977         rc = unlinkAsyncFile(pData);
1425a3f06598Sdanielk1977         async_mutex_leave(ASYNC_MUTEX_LOCK);
1426a3f06598Sdanielk1977 
1427a3f06598Sdanielk1977         if( !holdingMutex ){
1428a3f06598Sdanielk1977           async_mutex_enter(ASYNC_MUTEX_QUEUE);
1429a3f06598Sdanielk1977           holdingMutex = 1;
1430a3f06598Sdanielk1977         }
1431a3f06598Sdanielk1977         assert_mutex_is_held(ASYNC_MUTEX_QUEUE);
1432a3f06598Sdanielk1977         async.pQueueFirst = p->pNext;
1433a3f06598Sdanielk1977         sqlite3_free(pData);
1434a3f06598Sdanielk1977         doNotFree = 1;
1435a3f06598Sdanielk1977         break;
1436a3f06598Sdanielk1977       }
1437a3f06598Sdanielk1977 
1438a3f06598Sdanielk1977       case ASYNC_UNLOCK: {
1439a3f06598Sdanielk1977         AsyncWrite *pIter;
1440a3f06598Sdanielk1977         AsyncFileData *pData = p->pFileData;
1441a3f06598Sdanielk1977         int eLock = p->nByte;
1442a3f06598Sdanielk1977 
1443a3f06598Sdanielk1977         /* When a file is locked by SQLite using the async backend, it is
1444a3f06598Sdanielk1977         ** locked within the 'real' file-system synchronously. When it is
1445a3f06598Sdanielk1977         ** unlocked, an ASYNC_UNLOCK event is added to the write-queue to
1446a3f06598Sdanielk1977         ** unlock the file asynchronously. The design of the async backend
1447a3f06598Sdanielk1977         ** requires that the 'real' file-system file be locked from the
1448a3f06598Sdanielk1977         ** time that SQLite first locks it (and probably reads from it)
1449a3f06598Sdanielk1977         ** until all asynchronous write events that were scheduled before
1450a3f06598Sdanielk1977         ** SQLite unlocked the file have been processed.
1451a3f06598Sdanielk1977         **
1452a3f06598Sdanielk1977         ** This is more complex if SQLite locks and unlocks the file multiple
1453a3f06598Sdanielk1977         ** times in quick succession. For example, if SQLite does:
1454a3f06598Sdanielk1977         **
1455a3f06598Sdanielk1977         **   lock, write, unlock, lock, write, unlock
1456a3f06598Sdanielk1977         **
1457a3f06598Sdanielk1977         ** Each "lock" operation locks the file immediately. Each "write"
1458a3f06598Sdanielk1977         ** and "unlock" operation adds an event to the event queue. If the
1459a3f06598Sdanielk1977         ** second "lock" operation is performed before the first "unlock"
1460a3f06598Sdanielk1977         ** operation has been processed asynchronously, then the first
1461a3f06598Sdanielk1977         ** "unlock" cannot be safely processed as is, since this would mean
1462a3f06598Sdanielk1977         ** the file was unlocked when the second "write" operation is
1463a3f06598Sdanielk1977         ** processed. To work around this, when processing an ASYNC_UNLOCK
1464a3f06598Sdanielk1977         ** operation, SQLite:
1465a3f06598Sdanielk1977         **
1466a3f06598Sdanielk1977         **   1) Unlocks the file to the minimum of the argument passed to
1467a3f06598Sdanielk1977         **      the xUnlock() call and the current lock from SQLite's point
1468a3f06598Sdanielk1977         **      of view, and
1469a3f06598Sdanielk1977         **
1470a3f06598Sdanielk1977         **   2) Only unlocks the file at all if this event is the last
1471a3f06598Sdanielk1977         **      ASYNC_UNLOCK event on this file in the write-queue.
1472a3f06598Sdanielk1977         */
1473a3f06598Sdanielk1977         assert( holdingMutex==1 );
1474a3f06598Sdanielk1977         assert( async.pQueueFirst==p );
1475a3f06598Sdanielk1977         for(pIter=async.pQueueFirst->pNext; pIter; pIter=pIter->pNext){
1476a3f06598Sdanielk1977           if( pIter->pFileData==pData && pIter->op==ASYNC_UNLOCK ) break;
1477a3f06598Sdanielk1977         }
1478a3f06598Sdanielk1977         if( !pIter ){
1479a3f06598Sdanielk1977           async_mutex_enter(ASYNC_MUTEX_LOCK);
1480a3f06598Sdanielk1977           pData->lock.eAsyncLock = MIN(
1481a3f06598Sdanielk1977               pData->lock.eAsyncLock, MAX(pData->lock.eLock, eLock)
1482a3f06598Sdanielk1977           );
1483a3f06598Sdanielk1977           assert(pData->lock.eAsyncLock>=pData->lock.eLock);
1484a3f06598Sdanielk1977           rc = getFileLock(pData->pLock);
1485a3f06598Sdanielk1977           async_mutex_leave(ASYNC_MUTEX_LOCK);
1486a3f06598Sdanielk1977         }
1487a3f06598Sdanielk1977         break;
1488a3f06598Sdanielk1977       }
1489a3f06598Sdanielk1977 
1490a3f06598Sdanielk1977       case ASYNC_DELETE:
1491a3f06598Sdanielk1977         ASYNC_TRACE(("DELETE %s\n", p->zBuf));
1492a3f06598Sdanielk1977         rc = pVfs->xDelete(pVfs, p->zBuf, (int)p->iOffset);
1493a3f06598Sdanielk1977         break;
1494a3f06598Sdanielk1977 
1495a3f06598Sdanielk1977       case ASYNC_OPENEXCLUSIVE: {
1496a3f06598Sdanielk1977         int flags = (int)p->iOffset;
1497a3f06598Sdanielk1977         AsyncFileData *pData = p->pFileData;
1498a3f06598Sdanielk1977         ASYNC_TRACE(("OPEN %s flags=%d\n", p->zBuf, (int)p->iOffset));
1499a3f06598Sdanielk1977         assert(pData->pBaseRead->pMethods==0 && pData->pBaseWrite->pMethods==0);
1500a3f06598Sdanielk1977         rc = pVfs->xOpen(pVfs, pData->zName, pData->pBaseRead, flags, 0);
1501a3f06598Sdanielk1977         assert( holdingMutex==0 );
1502a3f06598Sdanielk1977         async_mutex_enter(ASYNC_MUTEX_QUEUE);
1503a3f06598Sdanielk1977         holdingMutex = 1;
1504a3f06598Sdanielk1977         break;
1505a3f06598Sdanielk1977       }
1506a3f06598Sdanielk1977 
1507a3f06598Sdanielk1977       default: assert(!"Illegal value for AsyncWrite.op");
1508a3f06598Sdanielk1977     }
1509a3f06598Sdanielk1977 
1510a3f06598Sdanielk1977     /* If we didn't hang on to the mutex during the IO op, obtain it now
1511a3f06598Sdanielk1977     ** so that the AsyncWrite structure can be safely removed from the
1512a3f06598Sdanielk1977     ** global write-op queue.
1513a3f06598Sdanielk1977     */
1514a3f06598Sdanielk1977     if( !holdingMutex ){
1515a3f06598Sdanielk1977       async_mutex_enter(ASYNC_MUTEX_QUEUE);
1516a3f06598Sdanielk1977       holdingMutex = 1;
1517a3f06598Sdanielk1977     }
1518a3f06598Sdanielk1977     /* ASYNC_TRACE(("UNLINK %p\n", p)); */
1519a3f06598Sdanielk1977     if( p==async.pQueueLast ){
1520a3f06598Sdanielk1977       async.pQueueLast = 0;
1521a3f06598Sdanielk1977     }
1522a3f06598Sdanielk1977     if( !doNotFree ){
1523a3f06598Sdanielk1977       assert_mutex_is_held(ASYNC_MUTEX_QUEUE);
1524a3f06598Sdanielk1977       async.pQueueFirst = p->pNext;
1525a3f06598Sdanielk1977       sqlite3_free(p);
1526a3f06598Sdanielk1977     }
1527a3f06598Sdanielk1977     assert( holdingMutex );
1528a3f06598Sdanielk1977 
1529a3f06598Sdanielk1977     /* An IO error has occurred. We cannot report the error back to the
1530a3f06598Sdanielk1977     ** connection that requested the I/O since the error happened
1531a3f06598Sdanielk1977     ** asynchronously.  The connection has already moved on.  There
1532a3f06598Sdanielk1977     ** really is nobody to report the error to.
1533a3f06598Sdanielk1977     **
1534a3f06598Sdanielk1977     ** The file for which the error occurred may have been a database or
1535a3f06598Sdanielk1977     ** journal file. Regardless, none of the currently queued operations
1536a3f06598Sdanielk1977     ** associated with the same database should now be performed. Nor should
1537a3f06598Sdanielk1977     ** any subsequently requested IO on either a database or journal file
1538a3f06598Sdanielk1977     ** handle for the same database be accepted until the main database
1539a3f06598Sdanielk1977     ** file handle has been closed and reopened.
1540a3f06598Sdanielk1977     **
1541a3f06598Sdanielk1977     ** Furthermore, no further IO should be queued or performed on any file
1542a3f06598Sdanielk1977     ** handle associated with a database that may have been part of a
1543a3f06598Sdanielk1977     ** multi-file transaction that included the database associated with
1544a3f06598Sdanielk1977     ** the IO error (i.e. a database ATTACHed to the same handle at some
1545a3f06598Sdanielk1977     ** point in time).
1546a3f06598Sdanielk1977     */
1547a3f06598Sdanielk1977     if( rc!=SQLITE_OK ){
1548a3f06598Sdanielk1977       async.ioError = rc;
1549a3f06598Sdanielk1977     }
1550a3f06598Sdanielk1977 
1551a3f06598Sdanielk1977     if( async.ioError && !async.pQueueFirst ){
1552a3f06598Sdanielk1977       async_mutex_enter(ASYNC_MUTEX_LOCK);
1553a3f06598Sdanielk1977       if( 0==async.pLock ){
1554a3f06598Sdanielk1977         async.ioError = SQLITE_OK;
1555a3f06598Sdanielk1977       }
1556a3f06598Sdanielk1977       async_mutex_leave(ASYNC_MUTEX_LOCK);
1557a3f06598Sdanielk1977     }
1558a3f06598Sdanielk1977 
1559a3f06598Sdanielk1977     /* Drop the queue mutex before continuing to the next write operation
1560a3f06598Sdanielk1977     ** in order to give other threads a chance to work with the write queue.
1561a3f06598Sdanielk1977     */
1562a3f06598Sdanielk1977     if( !async.pQueueFirst || !async.ioError ){
1563a3f06598Sdanielk1977       async_mutex_leave(ASYNC_MUTEX_QUEUE);
1564a3f06598Sdanielk1977       holdingMutex = 0;
1565a3f06598Sdanielk1977       if( async.ioDelay>0 ){
15666f050aa2Sdanielk1977         pVfs->xSleep(pVfs, async.ioDelay*1000);
1567a3f06598Sdanielk1977       }else{
1568a3f06598Sdanielk1977         async_sched_yield();
1569a3f06598Sdanielk1977       }
1570a3f06598Sdanielk1977     }
1571a3f06598Sdanielk1977   }
1572a3f06598Sdanielk1977 
1573a3f06598Sdanielk1977   async_mutex_leave(ASYNC_MUTEX_WRITER);
1574a3f06598Sdanielk1977   return;
1575a3f06598Sdanielk1977 }
1576a3f06598Sdanielk1977 
1577a3f06598Sdanielk1977 /*
1578a3f06598Sdanielk1977 ** Install the asynchronous VFS.
1579a3f06598Sdanielk1977 */
1580a3f06598Sdanielk1977 int sqlite3async_initialize(const char *zParent, int isDefault){
1581a3f06598Sdanielk1977   int rc = SQLITE_OK;
1582a3f06598Sdanielk1977   if( async_vfs.pAppData==0 ){
1583a3f06598Sdanielk1977     sqlite3_vfs *pParent = sqlite3_vfs_find(zParent);
1584a3f06598Sdanielk1977     if( !pParent || async_os_initialize() ){
1585a3f06598Sdanielk1977       rc = SQLITE_ERROR;
1586a3f06598Sdanielk1977     }else if( SQLITE_OK!=(rc = sqlite3_vfs_register(&async_vfs, isDefault)) ){
1587a3f06598Sdanielk1977       async_os_shutdown();
1588a3f06598Sdanielk1977     }else{
1589a3f06598Sdanielk1977       async_vfs.pAppData = (void *)pParent;
1590a3f06598Sdanielk1977       async_vfs.mxPathname = ((sqlite3_vfs *)async_vfs.pAppData)->mxPathname;
1591a3f06598Sdanielk1977     }
1592a3f06598Sdanielk1977   }
1593a3f06598Sdanielk1977   return rc;
1594a3f06598Sdanielk1977 }
1595a3f06598Sdanielk1977 
1596a3f06598Sdanielk1977 /*
1597a3f06598Sdanielk1977 ** Uninstall the asynchronous VFS.
1598a3f06598Sdanielk1977 */
1599a3f06598Sdanielk1977 void sqlite3async_shutdown(void){
1600a3f06598Sdanielk1977   if( async_vfs.pAppData ){
1601a3f06598Sdanielk1977     async_os_shutdown();
1602a3f06598Sdanielk1977     sqlite3_vfs_unregister((sqlite3_vfs *)&async_vfs);
1603a3f06598Sdanielk1977     async_vfs.pAppData = 0;
1604a3f06598Sdanielk1977   }
1605a3f06598Sdanielk1977 }
1606a3f06598Sdanielk1977 
1607a3f06598Sdanielk1977 /*
1608a3f06598Sdanielk1977 ** Process events on the write-queue.
1609a3f06598Sdanielk1977 */
1610a3f06598Sdanielk1977 void sqlite3async_run(void){
1611a3f06598Sdanielk1977   asyncWriterThread();
1612a3f06598Sdanielk1977 }
1613a3f06598Sdanielk1977 
1614a3f06598Sdanielk1977 /*
1615a3f06598Sdanielk1977 ** Control/configure the asynchronous IO system.
1616a3f06598Sdanielk1977 */
1617a3f06598Sdanielk1977 int sqlite3async_control(int op, ...){
1618a3f06598Sdanielk1977   va_list ap;
1619a3f06598Sdanielk1977   va_start(ap, op);
1620a3f06598Sdanielk1977   switch( op ){
1621a3f06598Sdanielk1977     case SQLITEASYNC_HALT: {
1622a3f06598Sdanielk1977       int eWhen = va_arg(ap, int);
1623a3f06598Sdanielk1977       if( eWhen!=SQLITEASYNC_HALT_NEVER
1624a3f06598Sdanielk1977        && eWhen!=SQLITEASYNC_HALT_NOW
1625a3f06598Sdanielk1977        && eWhen!=SQLITEASYNC_HALT_IDLE
1626a3f06598Sdanielk1977       ){
16274598b8e4Sdanielk1977         return SQLITE_MISUSE;
1628a3f06598Sdanielk1977       }
1629a3f06598Sdanielk1977       async.eHalt = eWhen;
1630a3f06598Sdanielk1977       async_mutex_enter(ASYNC_MUTEX_QUEUE);
1631a3f06598Sdanielk1977       async_cond_signal(ASYNC_COND_QUEUE);
1632a3f06598Sdanielk1977       async_mutex_leave(ASYNC_MUTEX_QUEUE);
1633a3f06598Sdanielk1977       break;
1634a3f06598Sdanielk1977     }
1635a3f06598Sdanielk1977 
1636a3f06598Sdanielk1977     case SQLITEASYNC_DELAY: {
1637a3f06598Sdanielk1977       int iDelay = va_arg(ap, int);
16384598b8e4Sdanielk1977       if( iDelay<0 ){
16394598b8e4Sdanielk1977         return SQLITE_MISUSE;
16404598b8e4Sdanielk1977       }
1641a3f06598Sdanielk1977       async.ioDelay = iDelay;
1642a3f06598Sdanielk1977       break;
1643a3f06598Sdanielk1977     }
1644a3f06598Sdanielk1977 
16454598b8e4Sdanielk1977     case SQLITEASYNC_LOCKFILES: {
16464598b8e4Sdanielk1977       int bLock = va_arg(ap, int);
16474598b8e4Sdanielk1977       async_mutex_enter(ASYNC_MUTEX_QUEUE);
16484598b8e4Sdanielk1977       if( async.nFile || async.pQueueFirst ){
16494598b8e4Sdanielk1977         async_mutex_leave(ASYNC_MUTEX_QUEUE);
16504598b8e4Sdanielk1977         return SQLITE_MISUSE;
16514598b8e4Sdanielk1977       }
16524598b8e4Sdanielk1977       async.bLockFiles = bLock;
16534598b8e4Sdanielk1977       async_mutex_leave(ASYNC_MUTEX_QUEUE);
16544598b8e4Sdanielk1977       break;
16554598b8e4Sdanielk1977     }
16564598b8e4Sdanielk1977 
1657a3f06598Sdanielk1977     case SQLITEASYNC_GET_HALT: {
1658a3f06598Sdanielk1977       int *peWhen = va_arg(ap, int *);
1659a3f06598Sdanielk1977       *peWhen = async.eHalt;
1660a3f06598Sdanielk1977       break;
1661a3f06598Sdanielk1977     }
1662a3f06598Sdanielk1977     case SQLITEASYNC_GET_DELAY: {
1663a3f06598Sdanielk1977       int *piDelay = va_arg(ap, int *);
1664a3f06598Sdanielk1977       *piDelay = async.ioDelay;
1665a3f06598Sdanielk1977       break;
1666a3f06598Sdanielk1977     }
16674598b8e4Sdanielk1977     case SQLITEASYNC_GET_LOCKFILES: {
16684598b8e4Sdanielk1977       int *piDelay = va_arg(ap, int *);
16694598b8e4Sdanielk1977       *piDelay = async.bLockFiles;
16704598b8e4Sdanielk1977       break;
16714598b8e4Sdanielk1977     }
1672a3f06598Sdanielk1977 
1673a3f06598Sdanielk1977     default:
1674a3f06598Sdanielk1977       return SQLITE_ERROR;
1675a3f06598Sdanielk1977   }
1676a3f06598Sdanielk1977   return SQLITE_OK;
1677a3f06598Sdanielk1977 }
1678a3f06598Sdanielk1977 
1679a3f06598Sdanielk1977 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ASYNCIO) */
1680a3f06598Sdanielk1977 
1681