1 /* 2 ** 2005 December 14 3 ** 4 ** The author disclaims copyright to this source code. In place of 5 ** a legal notice, here is a blessing: 6 ** 7 ** May you do good and not evil. 8 ** May you find forgiveness for yourself and forgive others. 9 ** May you share freely, never taking more than you give. 10 ** 11 ************************************************************************* 12 ** 13 ** $Id: sqlite3async.c,v 1.5 2009/04/29 18:12:00 shane Exp $ 14 ** 15 ** This file contains the implementation of an asynchronous IO backend 16 ** for SQLite. 17 */ 18 19 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ASYNCIO) 20 21 #include "sqlite3async.h" 22 #include "sqliteInt.h" 23 #include <stdarg.h> 24 #include <string.h> 25 #include <assert.h> 26 27 /* Useful macros used in several places */ 28 #define MIN(x,y) ((x)<(y)?(x):(y)) 29 #define MAX(x,y) ((x)>(y)?(x):(y)) 30 31 /* Forward references */ 32 typedef struct AsyncWrite AsyncWrite; 33 typedef struct AsyncFile AsyncFile; 34 typedef struct AsyncFileData AsyncFileData; 35 typedef struct AsyncFileLock AsyncFileLock; 36 typedef struct AsyncLock AsyncLock; 37 38 /* Enable for debugging */ 39 #ifndef NDEBUG 40 #include <stdio.h> 41 static int sqlite3async_trace = 0; 42 # define ASYNC_TRACE(X) if( sqlite3async_trace ) asyncTrace X 43 static void asyncTrace(const char *zFormat, ...){ 44 char *z; 45 va_list ap; 46 va_start(ap, zFormat); 47 z = sqlite3_vmprintf(zFormat, ap); 48 va_end(ap); 49 fprintf(stderr, "[%d] %s", 0 /* (int)pthread_self() */, z); 50 sqlite3_free(z); 51 } 52 #else 53 # define ASYNC_TRACE(X) 54 #endif 55 56 /* 57 ** THREAD SAFETY NOTES 58 ** 59 ** Basic rules: 60 ** 61 ** * Both read and write access to the global write-op queue must be 62 ** protected by the async.queueMutex. As are the async.ioError and 63 ** async.nFile variables. 64 ** 65 ** * The async.pLock list and all AsyncLock and AsyncFileLock 66 ** structures must be protected by the async.lockMutex mutex. 67 ** 68 ** * The file handles from the underlying system are not assumed to 69 ** be thread safe. 70 ** 71 ** * See the last two paragraphs under "The Writer Thread" for 72 ** an assumption to do with file-handle synchronization by the Os. 73 ** 74 ** Deadlock prevention: 75 ** 76 ** There are three mutex used by the system: the "writer" mutex, 77 ** the "queue" mutex and the "lock" mutex. Rules are: 78 ** 79 ** * It is illegal to block on the writer mutex when any other mutex 80 ** are held, and 81 ** 82 ** * It is illegal to block on the queue mutex when the lock mutex 83 ** is held. 84 ** 85 ** i.e. mutex's must be grabbed in the order "writer", "queue", "lock". 86 ** 87 ** File system operations (invoked by SQLite thread): 88 ** 89 ** xOpen 90 ** xDelete 91 ** xFileExists 92 ** 93 ** File handle operations (invoked by SQLite thread): 94 ** 95 ** asyncWrite, asyncClose, asyncTruncate, asyncSync 96 ** 97 ** The operations above add an entry to the global write-op list. They 98 ** prepare the entry, acquire the async.queueMutex momentarily while 99 ** list pointers are manipulated to insert the new entry, then release 100 ** the mutex and signal the writer thread to wake up in case it happens 101 ** to be asleep. 102 ** 103 ** 104 ** asyncRead, asyncFileSize. 105 ** 106 ** Read operations. Both of these read from both the underlying file 107 ** first then adjust their result based on pending writes in the 108 ** write-op queue. So async.queueMutex is held for the duration 109 ** of these operations to prevent other threads from changing the 110 ** queue in mid operation. 111 ** 112 ** 113 ** asyncLock, asyncUnlock, asyncCheckReservedLock 114 ** 115 ** These primitives implement in-process locking using a hash table 116 ** on the file name. Files are locked correctly for connections coming 117 ** from the same process. But other processes cannot see these locks 118 ** and will therefore not honor them. 119 ** 120 ** 121 ** The writer thread: 122 ** 123 ** The async.writerMutex is used to make sure only there is only 124 ** a single writer thread running at a time. 125 ** 126 ** Inside the writer thread is a loop that works like this: 127 ** 128 ** WHILE (write-op list is not empty) 129 ** Do IO operation at head of write-op list 130 ** Remove entry from head of write-op list 131 ** END WHILE 132 ** 133 ** The async.queueMutex is always held during the <write-op list is 134 ** not empty> test, and when the entry is removed from the head 135 ** of the write-op list. Sometimes it is held for the interim 136 ** period (while the IO is performed), and sometimes it is 137 ** relinquished. It is relinquished if (a) the IO op is an 138 ** ASYNC_CLOSE or (b) when the file handle was opened, two of 139 ** the underlying systems handles were opened on the same 140 ** file-system entry. 141 ** 142 ** If condition (b) above is true, then one file-handle 143 ** (AsyncFile.pBaseRead) is used exclusively by sqlite threads to read the 144 ** file, the other (AsyncFile.pBaseWrite) by sqlite3_async_flush() 145 ** threads to perform write() operations. This means that read 146 ** operations are not blocked by asynchronous writes (although 147 ** asynchronous writes may still be blocked by reads). 148 ** 149 ** This assumes that the OS keeps two handles open on the same file 150 ** properly in sync. That is, any read operation that starts after a 151 ** write operation on the same file system entry has completed returns 152 ** data consistent with the write. We also assume that if one thread 153 ** reads a file while another is writing it all bytes other than the 154 ** ones actually being written contain valid data. 155 ** 156 ** If the above assumptions are not true, set the preprocessor symbol 157 ** SQLITE_ASYNC_TWO_FILEHANDLES to 0. 158 */ 159 160 161 #ifndef NDEBUG 162 # define TESTONLY( X ) X 163 #else 164 # define TESTONLY( X ) 165 #endif 166 167 /* 168 ** PORTING FUNCTIONS 169 ** 170 ** There are two definitions of the following functions. One for pthreads 171 ** compatible systems and one for Win32. These functions isolate the OS 172 ** specific code required by each platform. 173 ** 174 ** The system uses three mutexes and a single condition variable. To 175 ** block on a mutex, async_mutex_enter() is called. The parameter passed 176 ** to async_mutex_enter(), which must be one of ASYNC_MUTEX_LOCK, 177 ** ASYNC_MUTEX_QUEUE or ASYNC_MUTEX_WRITER, identifies which of the three 178 ** mutexes to lock. Similarly, to unlock a mutex, async_mutex_leave() is 179 ** called with a parameter identifying the mutex being unlocked. Mutexes 180 ** are not recursive - it is an error to call async_mutex_enter() to 181 ** lock a mutex that is already locked, or to call async_mutex_leave() 182 ** to unlock a mutex that is not currently locked. 183 ** 184 ** The async_cond_wait() and async_cond_signal() functions are modelled 185 ** on the pthreads functions with similar names. The first parameter to 186 ** both functions is always ASYNC_COND_QUEUE. When async_cond_wait() 187 ** is called the mutex identified by the second parameter must be held. 188 ** The mutex is unlocked, and the calling thread simultaneously begins 189 ** waiting for the condition variable to be signalled by another thread. 190 ** After another thread signals the condition variable, the calling 191 ** thread stops waiting, locks mutex eMutex and returns. The 192 ** async_cond_signal() function is used to signal the condition variable. 193 ** It is assumed that the mutex used by the thread calling async_cond_wait() 194 ** is held by the caller of async_cond_signal() (otherwise there would be 195 ** a race condition). 196 ** 197 ** It is guaranteed that no other thread will call async_cond_wait() when 198 ** there is already a thread waiting on the condition variable. 199 ** 200 ** The async_sched_yield() function is called to suggest to the operating 201 ** system that it would be a good time to shift the current thread off the 202 ** CPU. The system will still work if this function is not implemented 203 ** (it is not currently implemented for win32), but it might be marginally 204 ** more efficient if it is. 205 */ 206 static void async_mutex_enter(int eMutex); 207 static void async_mutex_leave(int eMutex); 208 static void async_cond_wait(int eCond, int eMutex); 209 static void async_cond_signal(int eCond); 210 static void async_sched_yield(void); 211 212 /* 213 ** There are also two definitions of the following. async_os_initialize() 214 ** is called when the asynchronous VFS is first installed, and os_shutdown() 215 ** is called when it is uninstalled (from within sqlite3async_shutdown()). 216 ** 217 ** For pthreads builds, both of these functions are no-ops. For win32, 218 ** they provide an opportunity to initialize and finalize the required 219 ** mutex and condition variables. 220 ** 221 ** If async_os_initialize() returns other than zero, then the initialization 222 ** fails and SQLITE_ERROR is returned to the user. 223 */ 224 static int async_os_initialize(void); 225 static void async_os_shutdown(void); 226 227 /* Values for use as the 'eMutex' argument of the above functions. The 228 ** integer values assigned to these constants are important for assert() 229 ** statements that verify that mutexes are locked in the correct order. 230 ** Specifically, it is unsafe to try to lock mutex N while holding a lock 231 ** on mutex M if (M<=N). 232 */ 233 #define ASYNC_MUTEX_LOCK 0 234 #define ASYNC_MUTEX_QUEUE 1 235 #define ASYNC_MUTEX_WRITER 2 236 237 /* Values for use as the 'eCond' argument of the above functions. */ 238 #define ASYNC_COND_QUEUE 0 239 240 /************************************************************************* 241 ** Start of OS specific code. 242 */ 243 #if SQLITE_OS_WIN || defined(_WIN32) || defined(WIN32) || defined(__CYGWIN__) || defined(__MINGW32__) || defined(__BORLANDC__) 244 245 /* The following block contains the win32 specific code. */ 246 247 #define mutex_held(X) (GetCurrentThreadId()==primitives.aHolder[X]) 248 249 static struct AsyncPrimitives { 250 int isInit; 251 DWORD aHolder[3]; 252 CRITICAL_SECTION aMutex[3]; 253 HANDLE aCond[1]; 254 } primitives = { 0 }; 255 256 static int async_os_initialize(void){ 257 if( !primitives.isInit ){ 258 primitives.aCond[0] = CreateEvent(NULL, TRUE, FALSE, 0); 259 if( primitives.aCond[0]==NULL ){ 260 return 1; 261 } 262 InitializeCriticalSection(&primitives.aMutex[0]); 263 InitializeCriticalSection(&primitives.aMutex[1]); 264 InitializeCriticalSection(&primitives.aMutex[2]); 265 primitives.isInit = 1; 266 } 267 return 0; 268 } 269 static void async_os_shutdown(void){ 270 if( primitives.isInit ){ 271 DeleteCriticalSection(&primitives.aMutex[0]); 272 DeleteCriticalSection(&primitives.aMutex[1]); 273 DeleteCriticalSection(&primitives.aMutex[2]); 274 CloseHandle(primitives.aCond[0]); 275 primitives.isInit = 0; 276 } 277 } 278 279 /* The following block contains the Win32 specific code. */ 280 static void async_mutex_enter(int eMutex){ 281 assert( eMutex==0 || eMutex==1 || eMutex==2 ); 282 assert( eMutex!=2 || (!mutex_held(0) && !mutex_held(1) && !mutex_held(2)) ); 283 assert( eMutex!=1 || (!mutex_held(0) && !mutex_held(1)) ); 284 assert( eMutex!=0 || (!mutex_held(0)) ); 285 EnterCriticalSection(&primitives.aMutex[eMutex]); 286 TESTONLY( primitives.aHolder[eMutex] = GetCurrentThreadId(); ) 287 } 288 static void async_mutex_leave(int eMutex){ 289 assert( eMutex==0 || eMutex==1 || eMutex==2 ); 290 assert( mutex_held(eMutex) ); 291 TESTONLY( primitives.aHolder[eMutex] = 0; ) 292 LeaveCriticalSection(&primitives.aMutex[eMutex]); 293 } 294 static void async_cond_wait(int eCond, int eMutex){ 295 ResetEvent(primitives.aCond[eCond]); 296 async_mutex_leave(eMutex); 297 WaitForSingleObject(primitives.aCond[eCond], INFINITE); 298 async_mutex_enter(eMutex); 299 } 300 static void async_cond_signal(int eCond){ 301 assert( mutex_held(ASYNC_MUTEX_QUEUE) ); 302 SetEvent(primitives.aCond[eCond]); 303 } 304 static void async_sched_yield(void){ 305 /* Todo: Find out if win32 offers anything like sched_yield() */ 306 Sleep(0); 307 } 308 #else 309 310 /* The following block contains the pthreads specific code. */ 311 #include <pthread.h> 312 #include <sched.h> 313 314 #define mutex_held(X) pthread_equal(primitives.aHolder[X], pthread_self()) 315 316 static int async_os_initialize(void) {return 0;} 317 static void async_os_shutdown(void) {} 318 319 static struct AsyncPrimitives { 320 pthread_mutex_t aMutex[3]; 321 pthread_cond_t aCond[1]; 322 pthread_t aHolder[3]; 323 } primitives = { 324 { PTHREAD_MUTEX_INITIALIZER, 325 PTHREAD_MUTEX_INITIALIZER, 326 PTHREAD_MUTEX_INITIALIZER 327 } , { 328 PTHREAD_COND_INITIALIZER 329 } , { 0, 0, 0 } 330 }; 331 332 static void async_mutex_enter(int eMutex){ 333 assert( eMutex==0 || eMutex==1 || eMutex==2 ); 334 assert( eMutex!=2 || (!mutex_held(0) && !mutex_held(1) && !mutex_held(2)) ); 335 assert( eMutex!=1 || (!mutex_held(0) && !mutex_held(1)) ); 336 assert( eMutex!=0 || (!mutex_held(0)) ); 337 pthread_mutex_lock(&primitives.aMutex[eMutex]); 338 TESTONLY( primitives.aHolder[eMutex] = pthread_self(); ) 339 } 340 static void async_mutex_leave(int eMutex){ 341 assert( eMutex==0 || eMutex==1 || eMutex==2 ); 342 assert( mutex_held(eMutex) ); 343 TESTONLY( primitives.aHolder[eMutex] = 0; ) 344 pthread_mutex_unlock(&primitives.aMutex[eMutex]); 345 } 346 static void async_cond_wait(int eCond, int eMutex){ 347 assert( eMutex==0 || eMutex==1 || eMutex==2 ); 348 assert( mutex_held(eMutex) ); 349 TESTONLY( primitives.aHolder[eMutex] = 0; ) 350 pthread_cond_wait(&primitives.aCond[eCond], &primitives.aMutex[eMutex]); 351 TESTONLY( primitives.aHolder[eMutex] = pthread_self(); ) 352 } 353 static void async_cond_signal(int eCond){ 354 assert( mutex_held(ASYNC_MUTEX_QUEUE) ); 355 pthread_cond_signal(&primitives.aCond[eCond]); 356 } 357 static void async_sched_yield(void){ 358 sched_yield(); 359 } 360 #endif 361 /* 362 ** End of OS specific code. 363 *************************************************************************/ 364 365 #define assert_mutex_is_held(X) assert( mutex_held(X) ) 366 367 368 #ifndef SQLITE_ASYNC_TWO_FILEHANDLES 369 /* #define SQLITE_ASYNC_TWO_FILEHANDLES 0 */ 370 #define SQLITE_ASYNC_TWO_FILEHANDLES 1 371 #endif 372 373 /* 374 ** State information is held in the static variable "async" defined 375 ** as the following structure. 376 ** 377 ** Both async.ioError and async.nFile are protected by async.queueMutex. 378 */ 379 static struct TestAsyncStaticData { 380 AsyncWrite *pQueueFirst; /* Next write operation to be processed */ 381 AsyncWrite *pQueueLast; /* Last write operation on the list */ 382 AsyncLock *pLock; /* Linked list of all AsyncLock structures */ 383 volatile int ioDelay; /* Extra delay between write operations */ 384 volatile int eHalt; /* One of the SQLITEASYNC_HALT_XXX values */ 385 volatile int bLockFiles; /* Current value of "lockfiles" parameter */ 386 int ioError; /* True if an IO error has occurred */ 387 int nFile; /* Number of open files (from sqlite pov) */ 388 } async = { 0,0,0,0,0,1,0,0 }; 389 390 /* Possible values of AsyncWrite.op */ 391 #define ASYNC_NOOP 0 392 #define ASYNC_WRITE 1 393 #define ASYNC_SYNC 2 394 #define ASYNC_TRUNCATE 3 395 #define ASYNC_CLOSE 4 396 #define ASYNC_DELETE 5 397 #define ASYNC_OPENEXCLUSIVE 6 398 #define ASYNC_UNLOCK 7 399 400 /* Names of opcodes. Used for debugging only. 401 ** Make sure these stay in sync with the macros above! 402 */ 403 static const char *azOpcodeName[] = { 404 "NOOP", "WRITE", "SYNC", "TRUNCATE", "CLOSE", "DELETE", "OPENEX", "UNLOCK" 405 }; 406 407 /* 408 ** Entries on the write-op queue are instances of the AsyncWrite 409 ** structure, defined here. 410 ** 411 ** The interpretation of the iOffset and nByte variables varies depending 412 ** on the value of AsyncWrite.op: 413 ** 414 ** ASYNC_NOOP: 415 ** No values used. 416 ** 417 ** ASYNC_WRITE: 418 ** iOffset -> Offset in file to write to. 419 ** nByte -> Number of bytes of data to write (pointed to by zBuf). 420 ** 421 ** ASYNC_SYNC: 422 ** nByte -> flags to pass to sqlite3OsSync(). 423 ** 424 ** ASYNC_TRUNCATE: 425 ** iOffset -> Size to truncate file to. 426 ** nByte -> Unused. 427 ** 428 ** ASYNC_CLOSE: 429 ** iOffset -> Unused. 430 ** nByte -> Unused. 431 ** 432 ** ASYNC_DELETE: 433 ** iOffset -> Contains the "syncDir" flag. 434 ** nByte -> Number of bytes of zBuf points to (file name). 435 ** 436 ** ASYNC_OPENEXCLUSIVE: 437 ** iOffset -> Value of "delflag". 438 ** nByte -> Number of bytes of zBuf points to (file name). 439 ** 440 ** ASYNC_UNLOCK: 441 ** nByte -> Argument to sqlite3OsUnlock(). 442 ** 443 ** 444 ** For an ASYNC_WRITE operation, zBuf points to the data to write to the file. 445 ** This space is sqlite3_malloc()d along with the AsyncWrite structure in a 446 ** single blob, so is deleted when sqlite3_free() is called on the parent 447 ** structure. 448 */ 449 struct AsyncWrite { 450 AsyncFileData *pFileData; /* File to write data to or sync */ 451 int op; /* One of ASYNC_xxx etc. */ 452 sqlite_int64 iOffset; /* See above */ 453 int nByte; /* See above */ 454 char *zBuf; /* Data to write to file (or NULL if op!=ASYNC_WRITE) */ 455 AsyncWrite *pNext; /* Next write operation (to any file) */ 456 }; 457 458 /* 459 ** An instance of this structure is created for each distinct open file 460 ** (i.e. if two handles are opened on the one file, only one of these 461 ** structures is allocated) and stored in the async.aLock hash table. The 462 ** keys for async.aLock are the full pathnames of the opened files. 463 ** 464 ** AsyncLock.pList points to the head of a linked list of AsyncFileLock 465 ** structures, one for each handle currently open on the file. 466 ** 467 ** If the opened file is not a main-database (the SQLITE_OPEN_MAIN_DB is 468 ** not passed to the sqlite3OsOpen() call), or if async.bLockFiles is 469 ** false, variables AsyncLock.pFile and AsyncLock.eLock are never used. 470 ** Otherwise, pFile is a file handle opened on the file in question and 471 ** used to obtain the file-system locks required by database connections 472 ** within this process. 473 ** 474 ** See comments above the asyncLock() function for more details on 475 ** the implementation of database locking used by this backend. 476 */ 477 struct AsyncLock { 478 char *zFile; 479 int nFile; 480 sqlite3_file *pFile; 481 int eLock; 482 AsyncFileLock *pList; 483 AsyncLock *pNext; /* Next in linked list headed by async.pLock */ 484 }; 485 486 /* 487 ** An instance of the following structure is allocated along with each 488 ** AsyncFileData structure (see AsyncFileData.lock), but is only used if the 489 ** file was opened with the SQLITE_OPEN_MAIN_DB. 490 */ 491 struct AsyncFileLock { 492 int eLock; /* Internally visible lock state (sqlite pov) */ 493 int eAsyncLock; /* Lock-state with write-queue unlock */ 494 AsyncFileLock *pNext; 495 }; 496 497 /* 498 ** The AsyncFile structure is a subclass of sqlite3_file used for 499 ** asynchronous IO. 500 ** 501 ** All of the actual data for the structure is stored in the structure 502 ** pointed to by AsyncFile.pData, which is allocated as part of the 503 ** sqlite3OsOpen() using sqlite3_malloc(). The reason for this is that the 504 ** lifetime of the AsyncFile structure is ended by the caller after OsClose() 505 ** is called, but the data in AsyncFileData may be required by the 506 ** writer thread after that point. 507 */ 508 struct AsyncFile { 509 sqlite3_io_methods *pMethod; 510 AsyncFileData *pData; 511 }; 512 struct AsyncFileData { 513 char *zName; /* Underlying OS filename - used for debugging */ 514 int nName; /* Number of characters in zName */ 515 sqlite3_file *pBaseRead; /* Read handle to the underlying Os file */ 516 sqlite3_file *pBaseWrite; /* Write handle to the underlying Os file */ 517 AsyncFileLock lock; /* Lock state for this handle */ 518 AsyncLock *pLock; /* AsyncLock object for this file system entry */ 519 AsyncWrite closeOp; /* Preallocated close operation */ 520 }; 521 522 /* 523 ** Add an entry to the end of the global write-op list. pWrite should point 524 ** to an AsyncWrite structure allocated using sqlite3_malloc(). The writer 525 ** thread will call sqlite3_free() to free the structure after the specified 526 ** operation has been completed. 527 ** 528 ** Once an AsyncWrite structure has been added to the list, it becomes the 529 ** property of the writer thread and must not be read or modified by the 530 ** caller. 531 */ 532 static void addAsyncWrite(AsyncWrite *pWrite){ 533 /* We must hold the queue mutex in order to modify the queue pointers */ 534 if( pWrite->op!=ASYNC_UNLOCK ){ 535 async_mutex_enter(ASYNC_MUTEX_QUEUE); 536 } 537 538 /* Add the record to the end of the write-op queue */ 539 assert( !pWrite->pNext ); 540 if( async.pQueueLast ){ 541 assert( async.pQueueFirst ); 542 async.pQueueLast->pNext = pWrite; 543 }else{ 544 async.pQueueFirst = pWrite; 545 } 546 async.pQueueLast = pWrite; 547 ASYNC_TRACE(("PUSH %p (%s %s %d)\n", pWrite, azOpcodeName[pWrite->op], 548 pWrite->pFileData ? pWrite->pFileData->zName : "-", pWrite->iOffset)); 549 550 if( pWrite->op==ASYNC_CLOSE ){ 551 async.nFile--; 552 } 553 554 /* The writer thread might have been idle because there was nothing 555 ** on the write-op queue for it to do. So wake it up. */ 556 async_cond_signal(ASYNC_COND_QUEUE); 557 558 /* Drop the queue mutex */ 559 if( pWrite->op!=ASYNC_UNLOCK ){ 560 async_mutex_leave(ASYNC_MUTEX_QUEUE); 561 } 562 } 563 564 /* 565 ** Increment async.nFile in a thread-safe manner. 566 */ 567 static void incrOpenFileCount(void){ 568 /* We must hold the queue mutex in order to modify async.nFile */ 569 async_mutex_enter(ASYNC_MUTEX_QUEUE); 570 if( async.nFile==0 ){ 571 async.ioError = SQLITE_OK; 572 } 573 async.nFile++; 574 async_mutex_leave(ASYNC_MUTEX_QUEUE); 575 } 576 577 /* 578 ** This is a utility function to allocate and populate a new AsyncWrite 579 ** structure and insert it (via addAsyncWrite() ) into the global list. 580 */ 581 static int addNewAsyncWrite( 582 AsyncFileData *pFileData, 583 int op, 584 sqlite3_int64 iOffset, 585 int nByte, 586 const char *zByte 587 ){ 588 AsyncWrite *p; 589 if( op!=ASYNC_CLOSE && async.ioError ){ 590 return async.ioError; 591 } 592 p = sqlite3_malloc(sizeof(AsyncWrite) + (zByte?nByte:0)); 593 if( !p ){ 594 /* The upper layer does not expect operations like OsWrite() to 595 ** return SQLITE_NOMEM. This is partly because under normal conditions 596 ** SQLite is required to do rollback without calling malloc(). So 597 ** if malloc() fails here, treat it as an I/O error. The above 598 ** layer knows how to handle that. 599 */ 600 return SQLITE_IOERR; 601 } 602 p->op = op; 603 p->iOffset = iOffset; 604 p->nByte = nByte; 605 p->pFileData = pFileData; 606 p->pNext = 0; 607 if( zByte ){ 608 p->zBuf = (char *)&p[1]; 609 memcpy(p->zBuf, zByte, nByte); 610 }else{ 611 p->zBuf = 0; 612 } 613 addAsyncWrite(p); 614 return SQLITE_OK; 615 } 616 617 /* 618 ** Close the file. This just adds an entry to the write-op list, the file is 619 ** not actually closed. 620 */ 621 static int asyncClose(sqlite3_file *pFile){ 622 AsyncFileData *p = ((AsyncFile *)pFile)->pData; 623 624 /* Unlock the file, if it is locked */ 625 async_mutex_enter(ASYNC_MUTEX_LOCK); 626 p->lock.eLock = 0; 627 async_mutex_leave(ASYNC_MUTEX_LOCK); 628 629 addAsyncWrite(&p->closeOp); 630 return SQLITE_OK; 631 } 632 633 /* 634 ** Implementation of sqlite3OsWrite() for asynchronous files. Instead of 635 ** writing to the underlying file, this function adds an entry to the end of 636 ** the global AsyncWrite list. Either SQLITE_OK or SQLITE_NOMEM may be 637 ** returned. 638 */ 639 static int asyncWrite( 640 sqlite3_file *pFile, 641 const void *pBuf, 642 int amt, 643 sqlite3_int64 iOff 644 ){ 645 AsyncFileData *p = ((AsyncFile *)pFile)->pData; 646 return addNewAsyncWrite(p, ASYNC_WRITE, iOff, amt, pBuf); 647 } 648 649 /* 650 ** Read data from the file. First we read from the filesystem, then adjust 651 ** the contents of the buffer based on ASYNC_WRITE operations in the 652 ** write-op queue. 653 ** 654 ** This method holds the mutex from start to finish. 655 */ 656 static int asyncRead( 657 sqlite3_file *pFile, 658 void *zOut, 659 int iAmt, 660 sqlite3_int64 iOffset 661 ){ 662 AsyncFileData *p = ((AsyncFile *)pFile)->pData; 663 int rc = SQLITE_OK; 664 sqlite3_int64 filesize; 665 int nRead; 666 sqlite3_file *pBase = p->pBaseRead; 667 668 /* Grab the write queue mutex for the duration of the call */ 669 async_mutex_enter(ASYNC_MUTEX_QUEUE); 670 671 /* If an I/O error has previously occurred in this virtual file 672 ** system, then all subsequent operations fail. 673 */ 674 if( async.ioError!=SQLITE_OK ){ 675 rc = async.ioError; 676 goto asyncread_out; 677 } 678 679 if( pBase->pMethods ){ 680 rc = pBase->pMethods->xFileSize(pBase, &filesize); 681 if( rc!=SQLITE_OK ){ 682 goto asyncread_out; 683 } 684 nRead = (int)MIN(filesize - iOffset, iAmt); 685 if( nRead>0 ){ 686 rc = pBase->pMethods->xRead(pBase, zOut, nRead, iOffset); 687 ASYNC_TRACE(("READ %s %d bytes at %d\n", p->zName, nRead, iOffset)); 688 } 689 } 690 691 if( rc==SQLITE_OK ){ 692 AsyncWrite *pWrite; 693 char *zName = p->zName; 694 695 for(pWrite=async.pQueueFirst; pWrite; pWrite = pWrite->pNext){ 696 if( pWrite->op==ASYNC_WRITE && ( 697 (pWrite->pFileData==p) || 698 (zName && pWrite->pFileData->zName==zName) 699 )){ 700 sqlite3_int64 iBeginOut = (pWrite->iOffset-iOffset); 701 sqlite3_int64 iBeginIn = -iBeginOut; 702 int nCopy; 703 704 if( iBeginIn<0 ) iBeginIn = 0; 705 if( iBeginOut<0 ) iBeginOut = 0; 706 nCopy = MIN(pWrite->nByte-iBeginIn, iAmt-iBeginOut); 707 708 if( nCopy>0 ){ 709 memcpy(&((char *)zOut)[iBeginOut], &pWrite->zBuf[iBeginIn], nCopy); 710 ASYNC_TRACE(("OVERREAD %d bytes at %d\n", nCopy, iBeginOut+iOffset)); 711 } 712 } 713 } 714 } 715 716 asyncread_out: 717 async_mutex_leave(ASYNC_MUTEX_QUEUE); 718 return rc; 719 } 720 721 /* 722 ** Truncate the file to nByte bytes in length. This just adds an entry to 723 ** the write-op list, no IO actually takes place. 724 */ 725 static int asyncTruncate(sqlite3_file *pFile, sqlite3_int64 nByte){ 726 AsyncFileData *p = ((AsyncFile *)pFile)->pData; 727 return addNewAsyncWrite(p, ASYNC_TRUNCATE, nByte, 0, 0); 728 } 729 730 /* 731 ** Sync the file. This just adds an entry to the write-op list, the 732 ** sync() is done later by sqlite3_async_flush(). 733 */ 734 static int asyncSync(sqlite3_file *pFile, int flags){ 735 AsyncFileData *p = ((AsyncFile *)pFile)->pData; 736 return addNewAsyncWrite(p, ASYNC_SYNC, 0, flags, 0); 737 } 738 739 /* 740 ** Read the size of the file. First we read the size of the file system 741 ** entry, then adjust for any ASYNC_WRITE or ASYNC_TRUNCATE operations 742 ** currently in the write-op list. 743 ** 744 ** This method holds the mutex from start to finish. 745 */ 746 int asyncFileSize(sqlite3_file *pFile, sqlite3_int64 *piSize){ 747 AsyncFileData *p = ((AsyncFile *)pFile)->pData; 748 int rc = SQLITE_OK; 749 sqlite3_int64 s = 0; 750 sqlite3_file *pBase; 751 752 async_mutex_enter(ASYNC_MUTEX_QUEUE); 753 754 /* Read the filesystem size from the base file. If pBaseRead is NULL, this 755 ** means the file hasn't been opened yet. In this case all relevant data 756 ** must be in the write-op queue anyway, so we can omit reading from the 757 ** file-system. 758 */ 759 pBase = p->pBaseRead; 760 if( pBase->pMethods ){ 761 rc = pBase->pMethods->xFileSize(pBase, &s); 762 } 763 764 if( rc==SQLITE_OK ){ 765 AsyncWrite *pWrite; 766 for(pWrite=async.pQueueFirst; pWrite; pWrite = pWrite->pNext){ 767 if( pWrite->op==ASYNC_DELETE 768 && p->zName 769 && strcmp(p->zName, pWrite->zBuf)==0 770 ){ 771 s = 0; 772 }else if( pWrite->pFileData && ( 773 (pWrite->pFileData==p) 774 || (p->zName && pWrite->pFileData->zName==p->zName) 775 )){ 776 switch( pWrite->op ){ 777 case ASYNC_WRITE: 778 s = MAX(pWrite->iOffset + (sqlite3_int64)(pWrite->nByte), s); 779 break; 780 case ASYNC_TRUNCATE: 781 s = MIN(s, pWrite->iOffset); 782 break; 783 } 784 } 785 } 786 *piSize = s; 787 } 788 async_mutex_leave(ASYNC_MUTEX_QUEUE); 789 return rc; 790 } 791 792 /* 793 ** Lock or unlock the actual file-system entry. 794 */ 795 static int getFileLock(AsyncLock *pLock){ 796 int rc = SQLITE_OK; 797 AsyncFileLock *pIter; 798 int eRequired = 0; 799 800 if( pLock->pFile ){ 801 for(pIter=pLock->pList; pIter; pIter=pIter->pNext){ 802 assert(pIter->eAsyncLock>=pIter->eLock); 803 if( pIter->eAsyncLock>eRequired ){ 804 eRequired = pIter->eAsyncLock; 805 assert(eRequired>=0 && eRequired<=SQLITE_LOCK_EXCLUSIVE); 806 } 807 } 808 809 if( eRequired>pLock->eLock ){ 810 rc = pLock->pFile->pMethods->xLock(pLock->pFile, eRequired); 811 if( rc==SQLITE_OK ){ 812 pLock->eLock = eRequired; 813 } 814 } 815 else if( eRequired<pLock->eLock && eRequired<=SQLITE_LOCK_SHARED ){ 816 rc = pLock->pFile->pMethods->xUnlock(pLock->pFile, eRequired); 817 if( rc==SQLITE_OK ){ 818 pLock->eLock = eRequired; 819 } 820 } 821 } 822 823 return rc; 824 } 825 826 /* 827 ** Return the AsyncLock structure from the global async.pLock list 828 ** associated with the file-system entry identified by path zName 829 ** (a string of nName bytes). If no such structure exists, return 0. 830 */ 831 static AsyncLock *findLock(const char *zName, int nName){ 832 AsyncLock *p = async.pLock; 833 while( p && (p->nFile!=nName || memcmp(p->zFile, zName, nName)) ){ 834 p = p->pNext; 835 } 836 return p; 837 } 838 839 /* 840 ** The following two methods - asyncLock() and asyncUnlock() - are used 841 ** to obtain and release locks on database files opened with the 842 ** asynchronous backend. 843 */ 844 static int asyncLock(sqlite3_file *pFile, int eLock){ 845 int rc = SQLITE_OK; 846 AsyncFileData *p = ((AsyncFile *)pFile)->pData; 847 848 if( p->zName ){ 849 async_mutex_enter(ASYNC_MUTEX_LOCK); 850 if( p->lock.eLock<eLock ){ 851 AsyncLock *pLock = p->pLock; 852 AsyncFileLock *pIter; 853 assert(pLock && pLock->pList); 854 for(pIter=pLock->pList; pIter; pIter=pIter->pNext){ 855 if( pIter!=&p->lock && ( 856 (eLock==SQLITE_LOCK_EXCLUSIVE && pIter->eLock>=SQLITE_LOCK_SHARED) || 857 (eLock==SQLITE_LOCK_PENDING && pIter->eLock>=SQLITE_LOCK_RESERVED) || 858 (eLock==SQLITE_LOCK_RESERVED && pIter->eLock>=SQLITE_LOCK_RESERVED) || 859 (eLock==SQLITE_LOCK_SHARED && pIter->eLock>=SQLITE_LOCK_PENDING) 860 )){ 861 rc = SQLITE_BUSY; 862 } 863 } 864 if( rc==SQLITE_OK ){ 865 p->lock.eLock = eLock; 866 p->lock.eAsyncLock = MAX(p->lock.eAsyncLock, eLock); 867 } 868 assert(p->lock.eAsyncLock>=p->lock.eLock); 869 if( rc==SQLITE_OK ){ 870 rc = getFileLock(pLock); 871 } 872 } 873 async_mutex_leave(ASYNC_MUTEX_LOCK); 874 } 875 876 ASYNC_TRACE(("LOCK %d (%s) rc=%d\n", eLock, p->zName, rc)); 877 return rc; 878 } 879 static int asyncUnlock(sqlite3_file *pFile, int eLock){ 880 int rc = SQLITE_OK; 881 AsyncFileData *p = ((AsyncFile *)pFile)->pData; 882 if( p->zName ){ 883 AsyncFileLock *pLock = &p->lock; 884 async_mutex_enter(ASYNC_MUTEX_QUEUE); 885 async_mutex_enter(ASYNC_MUTEX_LOCK); 886 pLock->eLock = MIN(pLock->eLock, eLock); 887 rc = addNewAsyncWrite(p, ASYNC_UNLOCK, 0, eLock, 0); 888 async_mutex_leave(ASYNC_MUTEX_LOCK); 889 async_mutex_leave(ASYNC_MUTEX_QUEUE); 890 } 891 return rc; 892 } 893 894 /* 895 ** This function is called when the pager layer first opens a database file 896 ** and is checking for a hot-journal. 897 */ 898 static int asyncCheckReservedLock(sqlite3_file *pFile, int *pResOut){ 899 int ret = 0; 900 AsyncFileLock *pIter; 901 AsyncFileData *p = ((AsyncFile *)pFile)->pData; 902 903 async_mutex_enter(ASYNC_MUTEX_LOCK); 904 for(pIter=p->pLock->pList; pIter; pIter=pIter->pNext){ 905 if( pIter->eLock>=SQLITE_LOCK_RESERVED ){ 906 ret = 1; 907 break; 908 } 909 } 910 async_mutex_leave(ASYNC_MUTEX_LOCK); 911 912 ASYNC_TRACE(("CHECK-LOCK %d (%s)\n", ret, p->zName)); 913 *pResOut = ret; 914 return SQLITE_OK; 915 } 916 917 /* 918 ** sqlite3_file_control() implementation. 919 */ 920 static int asyncFileControl(sqlite3_file *id, int op, void *pArg){ 921 switch( op ){ 922 case SQLITE_FCNTL_LOCKSTATE: { 923 async_mutex_enter(ASYNC_MUTEX_LOCK); 924 *(int*)pArg = ((AsyncFile*)id)->pData->lock.eLock; 925 async_mutex_leave(ASYNC_MUTEX_LOCK); 926 return SQLITE_OK; 927 } 928 } 929 return SQLITE_ERROR; 930 } 931 932 /* 933 ** Return the device characteristics and sector-size of the device. It 934 ** is tricky to implement these correctly, as this backend might 935 ** not have an open file handle at this point. 936 */ 937 static int asyncSectorSize(sqlite3_file *pFile){ 938 UNUSED_PARAMETER(pFile); 939 return 512; 940 } 941 static int asyncDeviceCharacteristics(sqlite3_file *pFile){ 942 UNUSED_PARAMETER(pFile); 943 return 0; 944 } 945 946 static int unlinkAsyncFile(AsyncFileData *pData){ 947 AsyncFileLock **ppIter; 948 int rc = SQLITE_OK; 949 950 if( pData->zName ){ 951 AsyncLock *pLock = pData->pLock; 952 for(ppIter=&pLock->pList; *ppIter; ppIter=&((*ppIter)->pNext)){ 953 if( (*ppIter)==&pData->lock ){ 954 *ppIter = pData->lock.pNext; 955 break; 956 } 957 } 958 if( !pLock->pList ){ 959 AsyncLock **pp; 960 if( pLock->pFile ){ 961 pLock->pFile->pMethods->xClose(pLock->pFile); 962 } 963 for(pp=&async.pLock; *pp!=pLock; pp=&((*pp)->pNext)); 964 *pp = pLock->pNext; 965 sqlite3_free(pLock); 966 }else{ 967 rc = getFileLock(pLock); 968 } 969 } 970 971 return rc; 972 } 973 974 /* 975 ** The parameter passed to this function is a copy of a 'flags' parameter 976 ** passed to this modules xOpen() method. This function returns true 977 ** if the file should be opened asynchronously, or false if it should 978 ** be opened immediately. 979 ** 980 ** If the file is to be opened asynchronously, then asyncOpen() will add 981 ** an entry to the event queue and the file will not actually be opened 982 ** until the event is processed. Otherwise, the file is opened directly 983 ** by the caller. 984 */ 985 static int doAsynchronousOpen(int flags){ 986 return (flags&SQLITE_OPEN_CREATE) && ( 987 (flags&SQLITE_OPEN_MAIN_JOURNAL) || 988 (flags&SQLITE_OPEN_TEMP_JOURNAL) || 989 (flags&SQLITE_OPEN_DELETEONCLOSE) 990 ); 991 } 992 993 /* 994 ** Open a file. 995 */ 996 static int asyncOpen( 997 sqlite3_vfs *pAsyncVfs, 998 const char *zName, 999 sqlite3_file *pFile, 1000 int flags, 1001 int *pOutFlags 1002 ){ 1003 static sqlite3_io_methods async_methods = { 1004 1, /* iVersion */ 1005 asyncClose, /* xClose */ 1006 asyncRead, /* xRead */ 1007 asyncWrite, /* xWrite */ 1008 asyncTruncate, /* xTruncate */ 1009 asyncSync, /* xSync */ 1010 asyncFileSize, /* xFileSize */ 1011 asyncLock, /* xLock */ 1012 asyncUnlock, /* xUnlock */ 1013 asyncCheckReservedLock, /* xCheckReservedLock */ 1014 asyncFileControl, /* xFileControl */ 1015 asyncSectorSize, /* xSectorSize */ 1016 asyncDeviceCharacteristics /* xDeviceCharacteristics */ 1017 }; 1018 1019 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData; 1020 AsyncFile *p = (AsyncFile *)pFile; 1021 int nName = 0; 1022 int rc = SQLITE_OK; 1023 int nByte; 1024 AsyncFileData *pData; 1025 AsyncLock *pLock = 0; 1026 char *z; 1027 int isAsyncOpen = doAsynchronousOpen(flags); 1028 1029 /* If zName is NULL, then the upper layer is requesting an anonymous file */ 1030 if( zName ){ 1031 nName = (int)strlen(zName)+1; 1032 } 1033 1034 nByte = ( 1035 sizeof(AsyncFileData) + /* AsyncFileData structure */ 1036 2 * pVfs->szOsFile + /* AsyncFileData.pBaseRead and pBaseWrite */ 1037 nName /* AsyncFileData.zName */ 1038 ); 1039 z = sqlite3_malloc(nByte); 1040 if( !z ){ 1041 return SQLITE_NOMEM; 1042 } 1043 memset(z, 0, nByte); 1044 pData = (AsyncFileData*)z; 1045 z += sizeof(pData[0]); 1046 pData->pBaseRead = (sqlite3_file*)z; 1047 z += pVfs->szOsFile; 1048 pData->pBaseWrite = (sqlite3_file*)z; 1049 pData->closeOp.pFileData = pData; 1050 pData->closeOp.op = ASYNC_CLOSE; 1051 1052 if( zName ){ 1053 z += pVfs->szOsFile; 1054 pData->zName = z; 1055 pData->nName = nName; 1056 memcpy(pData->zName, zName, nName); 1057 } 1058 1059 if( !isAsyncOpen ){ 1060 int flagsout; 1061 rc = pVfs->xOpen(pVfs, pData->zName, pData->pBaseRead, flags, &flagsout); 1062 if( rc==SQLITE_OK && (flagsout&SQLITE_OPEN_READWRITE) ){ 1063 rc = pVfs->xOpen(pVfs, pData->zName, pData->pBaseWrite, flags, 0); 1064 } 1065 if( pOutFlags ){ 1066 *pOutFlags = flagsout; 1067 } 1068 } 1069 1070 async_mutex_enter(ASYNC_MUTEX_LOCK); 1071 1072 if( zName && rc==SQLITE_OK ){ 1073 pLock = findLock(pData->zName, pData->nName); 1074 if( !pLock ){ 1075 int nByte = pVfs->szOsFile + sizeof(AsyncLock) + pData->nName + 1; 1076 pLock = (AsyncLock *)sqlite3_malloc(nByte); 1077 if( pLock ){ 1078 memset(pLock, 0, nByte); 1079 if( async.bLockFiles && (flags&SQLITE_OPEN_MAIN_DB) ){ 1080 pLock->pFile = (sqlite3_file *)&pLock[1]; 1081 rc = pVfs->xOpen(pVfs, pData->zName, pLock->pFile, flags, 0); 1082 if( rc!=SQLITE_OK ){ 1083 sqlite3_free(pLock); 1084 pLock = 0; 1085 } 1086 } 1087 if( pLock ){ 1088 pLock->nFile = pData->nName; 1089 pLock->zFile = &((char *)(&pLock[1]))[pVfs->szOsFile]; 1090 memcpy(pLock->zFile, pData->zName, pLock->nFile); 1091 pLock->pNext = async.pLock; 1092 async.pLock = pLock; 1093 } 1094 }else{ 1095 rc = SQLITE_NOMEM; 1096 } 1097 } 1098 } 1099 1100 if( rc==SQLITE_OK ){ 1101 p->pMethod = &async_methods; 1102 p->pData = pData; 1103 1104 /* Link AsyncFileData.lock into the linked list of 1105 ** AsyncFileLock structures for this file. 1106 */ 1107 if( zName ){ 1108 pData->lock.pNext = pLock->pList; 1109 pLock->pList = &pData->lock; 1110 pData->zName = pLock->zFile; 1111 } 1112 }else{ 1113 if( pData->pBaseRead->pMethods ){ 1114 pData->pBaseRead->pMethods->xClose(pData->pBaseRead); 1115 } 1116 if( pData->pBaseWrite->pMethods ){ 1117 pData->pBaseWrite->pMethods->xClose(pData->pBaseWrite); 1118 } 1119 sqlite3_free(pData); 1120 } 1121 1122 async_mutex_leave(ASYNC_MUTEX_LOCK); 1123 1124 if( rc==SQLITE_OK ){ 1125 incrOpenFileCount(); 1126 pData->pLock = pLock; 1127 } 1128 1129 if( rc==SQLITE_OK && isAsyncOpen ){ 1130 rc = addNewAsyncWrite(pData, ASYNC_OPENEXCLUSIVE, (sqlite3_int64)flags,0,0); 1131 if( rc==SQLITE_OK ){ 1132 if( pOutFlags ) *pOutFlags = flags; 1133 }else{ 1134 async_mutex_enter(ASYNC_MUTEX_LOCK); 1135 unlinkAsyncFile(pData); 1136 async_mutex_leave(ASYNC_MUTEX_LOCK); 1137 sqlite3_free(pData); 1138 } 1139 } 1140 if( rc!=SQLITE_OK ){ 1141 p->pMethod = 0; 1142 } 1143 return rc; 1144 } 1145 1146 /* 1147 ** Implementation of sqlite3OsDelete. Add an entry to the end of the 1148 ** write-op queue to perform the delete. 1149 */ 1150 static int asyncDelete(sqlite3_vfs *pAsyncVfs, const char *z, int syncDir){ 1151 UNUSED_PARAMETER(pAsyncVfs); 1152 return addNewAsyncWrite(0, ASYNC_DELETE, syncDir, (int)strlen(z)+1, z); 1153 } 1154 1155 /* 1156 ** Implementation of sqlite3OsAccess. This method holds the mutex from 1157 ** start to finish. 1158 */ 1159 static int asyncAccess( 1160 sqlite3_vfs *pAsyncVfs, 1161 const char *zName, 1162 int flags, 1163 int *pResOut 1164 ){ 1165 int rc; 1166 int ret; 1167 AsyncWrite *p; 1168 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData; 1169 1170 assert(flags==SQLITE_ACCESS_READWRITE 1171 || flags==SQLITE_ACCESS_READ 1172 || flags==SQLITE_ACCESS_EXISTS 1173 ); 1174 1175 async_mutex_enter(ASYNC_MUTEX_QUEUE); 1176 rc = pVfs->xAccess(pVfs, zName, flags, &ret); 1177 if( rc==SQLITE_OK && flags==SQLITE_ACCESS_EXISTS ){ 1178 for(p=async.pQueueFirst; p; p = p->pNext){ 1179 if( p->op==ASYNC_DELETE && 0==strcmp(p->zBuf, zName) ){ 1180 ret = 0; 1181 }else if( p->op==ASYNC_OPENEXCLUSIVE 1182 && p->pFileData->zName 1183 && 0==strcmp(p->pFileData->zName, zName) 1184 ){ 1185 ret = 1; 1186 } 1187 } 1188 } 1189 ASYNC_TRACE(("ACCESS(%s): %s = %d\n", 1190 flags==SQLITE_ACCESS_READWRITE?"read-write": 1191 flags==SQLITE_ACCESS_READ?"read":"exists" 1192 , zName, ret) 1193 ); 1194 async_mutex_leave(ASYNC_MUTEX_QUEUE); 1195 *pResOut = ret; 1196 return rc; 1197 } 1198 1199 /* 1200 ** Fill in zPathOut with the full path to the file identified by zPath. 1201 */ 1202 static int asyncFullPathname( 1203 sqlite3_vfs *pAsyncVfs, 1204 const char *zPath, 1205 int nPathOut, 1206 char *zPathOut 1207 ){ 1208 int rc; 1209 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData; 1210 rc = pVfs->xFullPathname(pVfs, zPath, nPathOut, zPathOut); 1211 1212 /* Because of the way intra-process file locking works, this backend 1213 ** needs to return a canonical path. The following block assumes the 1214 ** file-system uses unix style paths. 1215 */ 1216 if( rc==SQLITE_OK ){ 1217 int i, j; 1218 int n = nPathOut; 1219 char *z = zPathOut; 1220 while( n>1 && z[n-1]=='/' ){ n--; } 1221 for(i=j=0; i<n; i++){ 1222 if( z[i]=='/' ){ 1223 if( z[i+1]=='/' ) continue; 1224 if( z[i+1]=='.' && i+2<n && z[i+2]=='/' ){ 1225 i += 1; 1226 continue; 1227 } 1228 if( z[i+1]=='.' && i+3<n && z[i+2]=='.' && z[i+3]=='/' ){ 1229 while( j>0 && z[j-1]!='/' ){ j--; } 1230 if( j>0 ){ j--; } 1231 i += 2; 1232 continue; 1233 } 1234 } 1235 z[j++] = z[i]; 1236 } 1237 z[j] = 0; 1238 } 1239 1240 return rc; 1241 } 1242 static void *asyncDlOpen(sqlite3_vfs *pAsyncVfs, const char *zPath){ 1243 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData; 1244 return pVfs->xDlOpen(pVfs, zPath); 1245 } 1246 static void asyncDlError(sqlite3_vfs *pAsyncVfs, int nByte, char *zErrMsg){ 1247 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData; 1248 pVfs->xDlError(pVfs, nByte, zErrMsg); 1249 } 1250 static void (*asyncDlSym( 1251 sqlite3_vfs *pAsyncVfs, 1252 void *pHandle, 1253 const char *zSymbol 1254 ))(void){ 1255 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData; 1256 return pVfs->xDlSym(pVfs, pHandle, zSymbol); 1257 } 1258 static void asyncDlClose(sqlite3_vfs *pAsyncVfs, void *pHandle){ 1259 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData; 1260 pVfs->xDlClose(pVfs, pHandle); 1261 } 1262 static int asyncRandomness(sqlite3_vfs *pAsyncVfs, int nByte, char *zBufOut){ 1263 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData; 1264 return pVfs->xRandomness(pVfs, nByte, zBufOut); 1265 } 1266 static int asyncSleep(sqlite3_vfs *pAsyncVfs, int nMicro){ 1267 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData; 1268 return pVfs->xSleep(pVfs, nMicro); 1269 } 1270 static int asyncCurrentTime(sqlite3_vfs *pAsyncVfs, double *pTimeOut){ 1271 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData; 1272 return pVfs->xCurrentTime(pVfs, pTimeOut); 1273 } 1274 1275 static sqlite3_vfs async_vfs = { 1276 1, /* iVersion */ 1277 sizeof(AsyncFile), /* szOsFile */ 1278 0, /* mxPathname */ 1279 0, /* pNext */ 1280 SQLITEASYNC_VFSNAME, /* zName */ 1281 0, /* pAppData */ 1282 asyncOpen, /* xOpen */ 1283 asyncDelete, /* xDelete */ 1284 asyncAccess, /* xAccess */ 1285 asyncFullPathname, /* xFullPathname */ 1286 asyncDlOpen, /* xDlOpen */ 1287 asyncDlError, /* xDlError */ 1288 asyncDlSym, /* xDlSym */ 1289 asyncDlClose, /* xDlClose */ 1290 asyncRandomness, /* xDlError */ 1291 asyncSleep, /* xDlSym */ 1292 asyncCurrentTime /* xDlClose */ 1293 }; 1294 1295 /* 1296 ** This procedure runs in a separate thread, reading messages off of the 1297 ** write queue and processing them one by one. 1298 ** 1299 ** If async.writerHaltNow is true, then this procedure exits 1300 ** after processing a single message. 1301 ** 1302 ** If async.writerHaltWhenIdle is true, then this procedure exits when 1303 ** the write queue is empty. 1304 ** 1305 ** If both of the above variables are false, this procedure runs 1306 ** indefinately, waiting for operations to be added to the write queue 1307 ** and processing them in the order in which they arrive. 1308 ** 1309 ** An artifical delay of async.ioDelay milliseconds is inserted before 1310 ** each write operation in order to simulate the effect of a slow disk. 1311 ** 1312 ** Only one instance of this procedure may be running at a time. 1313 */ 1314 static void asyncWriterThread(void){ 1315 sqlite3_vfs *pVfs = (sqlite3_vfs *)(async_vfs.pAppData); 1316 AsyncWrite *p = 0; 1317 int rc = SQLITE_OK; 1318 int holdingMutex = 0; 1319 1320 async_mutex_enter(ASYNC_MUTEX_WRITER); 1321 1322 while( async.eHalt!=SQLITEASYNC_HALT_NOW ){ 1323 int doNotFree = 0; 1324 sqlite3_file *pBase = 0; 1325 1326 if( !holdingMutex ){ 1327 async_mutex_enter(ASYNC_MUTEX_QUEUE); 1328 } 1329 while( (p = async.pQueueFirst)==0 ){ 1330 if( async.eHalt!=SQLITEASYNC_HALT_NEVER ){ 1331 async_mutex_leave(ASYNC_MUTEX_QUEUE); 1332 break; 1333 }else{ 1334 ASYNC_TRACE(("IDLE\n")); 1335 async_cond_wait(ASYNC_COND_QUEUE, ASYNC_MUTEX_QUEUE); 1336 ASYNC_TRACE(("WAKEUP\n")); 1337 } 1338 } 1339 if( p==0 ) break; 1340 holdingMutex = 1; 1341 1342 /* Right now this thread is holding the mutex on the write-op queue. 1343 ** Variable 'p' points to the first entry in the write-op queue. In 1344 ** the general case, we hold on to the mutex for the entire body of 1345 ** the loop. 1346 ** 1347 ** However in the cases enumerated below, we relinquish the mutex, 1348 ** perform the IO, and then re-request the mutex before removing 'p' from 1349 ** the head of the write-op queue. The idea is to increase concurrency with 1350 ** sqlite threads. 1351 ** 1352 ** * An ASYNC_CLOSE operation. 1353 ** * An ASYNC_OPENEXCLUSIVE operation. For this one, we relinquish 1354 ** the mutex, call the underlying xOpenExclusive() function, then 1355 ** re-aquire the mutex before seting the AsyncFile.pBaseRead 1356 ** variable. 1357 ** * ASYNC_SYNC and ASYNC_WRITE operations, if 1358 ** SQLITE_ASYNC_TWO_FILEHANDLES was set at compile time and two 1359 ** file-handles are open for the particular file being "synced". 1360 */ 1361 if( async.ioError!=SQLITE_OK && p->op!=ASYNC_CLOSE ){ 1362 p->op = ASYNC_NOOP; 1363 } 1364 if( p->pFileData ){ 1365 pBase = p->pFileData->pBaseWrite; 1366 if( 1367 p->op==ASYNC_CLOSE || 1368 p->op==ASYNC_OPENEXCLUSIVE || 1369 (pBase->pMethods && (p->op==ASYNC_SYNC || p->op==ASYNC_WRITE) ) 1370 ){ 1371 async_mutex_leave(ASYNC_MUTEX_QUEUE); 1372 holdingMutex = 0; 1373 } 1374 if( !pBase->pMethods ){ 1375 pBase = p->pFileData->pBaseRead; 1376 } 1377 } 1378 1379 switch( p->op ){ 1380 case ASYNC_NOOP: 1381 break; 1382 1383 case ASYNC_WRITE: 1384 assert( pBase ); 1385 ASYNC_TRACE(("WRITE %s %d bytes at %d\n", 1386 p->pFileData->zName, p->nByte, p->iOffset)); 1387 rc = pBase->pMethods->xWrite(pBase, (void *)(p->zBuf), p->nByte, p->iOffset); 1388 break; 1389 1390 case ASYNC_SYNC: 1391 assert( pBase ); 1392 ASYNC_TRACE(("SYNC %s\n", p->pFileData->zName)); 1393 rc = pBase->pMethods->xSync(pBase, p->nByte); 1394 break; 1395 1396 case ASYNC_TRUNCATE: 1397 assert( pBase ); 1398 ASYNC_TRACE(("TRUNCATE %s to %d bytes\n", 1399 p->pFileData->zName, p->iOffset)); 1400 rc = pBase->pMethods->xTruncate(pBase, p->iOffset); 1401 break; 1402 1403 case ASYNC_CLOSE: { 1404 AsyncFileData *pData = p->pFileData; 1405 ASYNC_TRACE(("CLOSE %s\n", p->pFileData->zName)); 1406 if( pData->pBaseWrite->pMethods ){ 1407 pData->pBaseWrite->pMethods->xClose(pData->pBaseWrite); 1408 } 1409 if( pData->pBaseRead->pMethods ){ 1410 pData->pBaseRead->pMethods->xClose(pData->pBaseRead); 1411 } 1412 1413 /* Unlink AsyncFileData.lock from the linked list of AsyncFileLock 1414 ** structures for this file. Obtain the async.lockMutex mutex 1415 ** before doing so. 1416 */ 1417 async_mutex_enter(ASYNC_MUTEX_LOCK); 1418 rc = unlinkAsyncFile(pData); 1419 async_mutex_leave(ASYNC_MUTEX_LOCK); 1420 1421 if( !holdingMutex ){ 1422 async_mutex_enter(ASYNC_MUTEX_QUEUE); 1423 holdingMutex = 1; 1424 } 1425 assert_mutex_is_held(ASYNC_MUTEX_QUEUE); 1426 async.pQueueFirst = p->pNext; 1427 sqlite3_free(pData); 1428 doNotFree = 1; 1429 break; 1430 } 1431 1432 case ASYNC_UNLOCK: { 1433 AsyncWrite *pIter; 1434 AsyncFileData *pData = p->pFileData; 1435 int eLock = p->nByte; 1436 1437 /* When a file is locked by SQLite using the async backend, it is 1438 ** locked within the 'real' file-system synchronously. When it is 1439 ** unlocked, an ASYNC_UNLOCK event is added to the write-queue to 1440 ** unlock the file asynchronously. The design of the async backend 1441 ** requires that the 'real' file-system file be locked from the 1442 ** time that SQLite first locks it (and probably reads from it) 1443 ** until all asynchronous write events that were scheduled before 1444 ** SQLite unlocked the file have been processed. 1445 ** 1446 ** This is more complex if SQLite locks and unlocks the file multiple 1447 ** times in quick succession. For example, if SQLite does: 1448 ** 1449 ** lock, write, unlock, lock, write, unlock 1450 ** 1451 ** Each "lock" operation locks the file immediately. Each "write" 1452 ** and "unlock" operation adds an event to the event queue. If the 1453 ** second "lock" operation is performed before the first "unlock" 1454 ** operation has been processed asynchronously, then the first 1455 ** "unlock" cannot be safely processed as is, since this would mean 1456 ** the file was unlocked when the second "write" operation is 1457 ** processed. To work around this, when processing an ASYNC_UNLOCK 1458 ** operation, SQLite: 1459 ** 1460 ** 1) Unlocks the file to the minimum of the argument passed to 1461 ** the xUnlock() call and the current lock from SQLite's point 1462 ** of view, and 1463 ** 1464 ** 2) Only unlocks the file at all if this event is the last 1465 ** ASYNC_UNLOCK event on this file in the write-queue. 1466 */ 1467 assert( holdingMutex==1 ); 1468 assert( async.pQueueFirst==p ); 1469 for(pIter=async.pQueueFirst->pNext; pIter; pIter=pIter->pNext){ 1470 if( pIter->pFileData==pData && pIter->op==ASYNC_UNLOCK ) break; 1471 } 1472 if( !pIter ){ 1473 async_mutex_enter(ASYNC_MUTEX_LOCK); 1474 pData->lock.eAsyncLock = MIN( 1475 pData->lock.eAsyncLock, MAX(pData->lock.eLock, eLock) 1476 ); 1477 assert(pData->lock.eAsyncLock>=pData->lock.eLock); 1478 rc = getFileLock(pData->pLock); 1479 async_mutex_leave(ASYNC_MUTEX_LOCK); 1480 } 1481 break; 1482 } 1483 1484 case ASYNC_DELETE: 1485 ASYNC_TRACE(("DELETE %s\n", p->zBuf)); 1486 rc = pVfs->xDelete(pVfs, p->zBuf, (int)p->iOffset); 1487 break; 1488 1489 case ASYNC_OPENEXCLUSIVE: { 1490 int flags = (int)p->iOffset; 1491 AsyncFileData *pData = p->pFileData; 1492 ASYNC_TRACE(("OPEN %s flags=%d\n", p->zBuf, (int)p->iOffset)); 1493 assert(pData->pBaseRead->pMethods==0 && pData->pBaseWrite->pMethods==0); 1494 rc = pVfs->xOpen(pVfs, pData->zName, pData->pBaseRead, flags, 0); 1495 assert( holdingMutex==0 ); 1496 async_mutex_enter(ASYNC_MUTEX_QUEUE); 1497 holdingMutex = 1; 1498 break; 1499 } 1500 1501 default: assert(!"Illegal value for AsyncWrite.op"); 1502 } 1503 1504 /* If we didn't hang on to the mutex during the IO op, obtain it now 1505 ** so that the AsyncWrite structure can be safely removed from the 1506 ** global write-op queue. 1507 */ 1508 if( !holdingMutex ){ 1509 async_mutex_enter(ASYNC_MUTEX_QUEUE); 1510 holdingMutex = 1; 1511 } 1512 /* ASYNC_TRACE(("UNLINK %p\n", p)); */ 1513 if( p==async.pQueueLast ){ 1514 async.pQueueLast = 0; 1515 } 1516 if( !doNotFree ){ 1517 assert_mutex_is_held(ASYNC_MUTEX_QUEUE); 1518 async.pQueueFirst = p->pNext; 1519 sqlite3_free(p); 1520 } 1521 assert( holdingMutex ); 1522 1523 /* An IO error has occurred. We cannot report the error back to the 1524 ** connection that requested the I/O since the error happened 1525 ** asynchronously. The connection has already moved on. There 1526 ** really is nobody to report the error to. 1527 ** 1528 ** The file for which the error occurred may have been a database or 1529 ** journal file. Regardless, none of the currently queued operations 1530 ** associated with the same database should now be performed. Nor should 1531 ** any subsequently requested IO on either a database or journal file 1532 ** handle for the same database be accepted until the main database 1533 ** file handle has been closed and reopened. 1534 ** 1535 ** Furthermore, no further IO should be queued or performed on any file 1536 ** handle associated with a database that may have been part of a 1537 ** multi-file transaction that included the database associated with 1538 ** the IO error (i.e. a database ATTACHed to the same handle at some 1539 ** point in time). 1540 */ 1541 if( rc!=SQLITE_OK ){ 1542 async.ioError = rc; 1543 } 1544 1545 if( async.ioError && !async.pQueueFirst ){ 1546 async_mutex_enter(ASYNC_MUTEX_LOCK); 1547 if( 0==async.pLock ){ 1548 async.ioError = SQLITE_OK; 1549 } 1550 async_mutex_leave(ASYNC_MUTEX_LOCK); 1551 } 1552 1553 /* Drop the queue mutex before continuing to the next write operation 1554 ** in order to give other threads a chance to work with the write queue. 1555 */ 1556 if( !async.pQueueFirst || !async.ioError ){ 1557 async_mutex_leave(ASYNC_MUTEX_QUEUE); 1558 holdingMutex = 0; 1559 if( async.ioDelay>0 ){ 1560 pVfs->xSleep(pVfs, async.ioDelay*1000); 1561 }else{ 1562 async_sched_yield(); 1563 } 1564 } 1565 } 1566 1567 async_mutex_leave(ASYNC_MUTEX_WRITER); 1568 return; 1569 } 1570 1571 /* 1572 ** Install the asynchronous VFS. 1573 */ 1574 int sqlite3async_initialize(const char *zParent, int isDefault){ 1575 int rc = SQLITE_OK; 1576 if( async_vfs.pAppData==0 ){ 1577 sqlite3_vfs *pParent = sqlite3_vfs_find(zParent); 1578 if( !pParent || async_os_initialize() ){ 1579 rc = SQLITE_ERROR; 1580 }else if( SQLITE_OK!=(rc = sqlite3_vfs_register(&async_vfs, isDefault)) ){ 1581 async_os_shutdown(); 1582 }else{ 1583 async_vfs.pAppData = (void *)pParent; 1584 async_vfs.mxPathname = ((sqlite3_vfs *)async_vfs.pAppData)->mxPathname; 1585 } 1586 } 1587 return rc; 1588 } 1589 1590 /* 1591 ** Uninstall the asynchronous VFS. 1592 */ 1593 void sqlite3async_shutdown(void){ 1594 if( async_vfs.pAppData ){ 1595 async_os_shutdown(); 1596 sqlite3_vfs_unregister((sqlite3_vfs *)&async_vfs); 1597 async_vfs.pAppData = 0; 1598 } 1599 } 1600 1601 /* 1602 ** Process events on the write-queue. 1603 */ 1604 void sqlite3async_run(void){ 1605 asyncWriterThread(); 1606 } 1607 1608 /* 1609 ** Control/configure the asynchronous IO system. 1610 */ 1611 int sqlite3async_control(int op, ...){ 1612 va_list ap; 1613 va_start(ap, op); 1614 switch( op ){ 1615 case SQLITEASYNC_HALT: { 1616 int eWhen = va_arg(ap, int); 1617 if( eWhen!=SQLITEASYNC_HALT_NEVER 1618 && eWhen!=SQLITEASYNC_HALT_NOW 1619 && eWhen!=SQLITEASYNC_HALT_IDLE 1620 ){ 1621 return SQLITE_MISUSE; 1622 } 1623 async.eHalt = eWhen; 1624 async_mutex_enter(ASYNC_MUTEX_QUEUE); 1625 async_cond_signal(ASYNC_COND_QUEUE); 1626 async_mutex_leave(ASYNC_MUTEX_QUEUE); 1627 break; 1628 } 1629 1630 case SQLITEASYNC_DELAY: { 1631 int iDelay = va_arg(ap, int); 1632 if( iDelay<0 ){ 1633 return SQLITE_MISUSE; 1634 } 1635 async.ioDelay = iDelay; 1636 break; 1637 } 1638 1639 case SQLITEASYNC_LOCKFILES: { 1640 int bLock = va_arg(ap, int); 1641 async_mutex_enter(ASYNC_MUTEX_QUEUE); 1642 if( async.nFile || async.pQueueFirst ){ 1643 async_mutex_leave(ASYNC_MUTEX_QUEUE); 1644 return SQLITE_MISUSE; 1645 } 1646 async.bLockFiles = bLock; 1647 async_mutex_leave(ASYNC_MUTEX_QUEUE); 1648 break; 1649 } 1650 1651 case SQLITEASYNC_GET_HALT: { 1652 int *peWhen = va_arg(ap, int *); 1653 *peWhen = async.eHalt; 1654 break; 1655 } 1656 case SQLITEASYNC_GET_DELAY: { 1657 int *piDelay = va_arg(ap, int *); 1658 *piDelay = async.ioDelay; 1659 break; 1660 } 1661 case SQLITEASYNC_GET_LOCKFILES: { 1662 int *piDelay = va_arg(ap, int *); 1663 *piDelay = async.bLockFiles; 1664 break; 1665 } 1666 1667 default: 1668 return SQLITE_ERROR; 1669 } 1670 return SQLITE_OK; 1671 } 1672 1673 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ASYNCIO) */ 1674 1675