1 /* 2 ** 2005 December 14 3 ** 4 ** The author disclaims copyright to this source code. In place of 5 ** a legal notice, here is a blessing: 6 ** 7 ** May you do good and not evil. 8 ** May you find forgiveness for yourself and forgive others. 9 ** May you share freely, never taking more than you give. 10 ** 11 ************************************************************************* 12 ** 13 ** $Id: sqlite3async.c,v 1.4 2009/04/25 08:39:15 danielk1977 Exp $ 14 ** 15 ** This file contains the implementation of an asynchronous IO backend 16 ** for SQLite. 17 */ 18 19 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ASYNCIO) 20 21 #include "sqlite3async.h" 22 #include "sqlite3.h" 23 #include <stdarg.h> 24 #include <string.h> 25 #include <assert.h> 26 27 /* Useful macros used in several places */ 28 #define MIN(x,y) ((x)<(y)?(x):(y)) 29 #define MAX(x,y) ((x)>(y)?(x):(y)) 30 31 /* Forward references */ 32 typedef struct AsyncWrite AsyncWrite; 33 typedef struct AsyncFile AsyncFile; 34 typedef struct AsyncFileData AsyncFileData; 35 typedef struct AsyncFileLock AsyncFileLock; 36 typedef struct AsyncLock AsyncLock; 37 38 /* Enable for debugging */ 39 #ifndef NDEBUG 40 #include <stdio.h> 41 static int sqlite3async_trace = 0; 42 # define ASYNC_TRACE(X) if( sqlite3async_trace ) asyncTrace X 43 static void asyncTrace(const char *zFormat, ...){ 44 char *z; 45 va_list ap; 46 va_start(ap, zFormat); 47 z = sqlite3_vmprintf(zFormat, ap); 48 va_end(ap); 49 fprintf(stderr, "[%d] %s", 0 /* (int)pthread_self() */, z); 50 sqlite3_free(z); 51 } 52 #endif 53 54 /* 55 ** THREAD SAFETY NOTES 56 ** 57 ** Basic rules: 58 ** 59 ** * Both read and write access to the global write-op queue must be 60 ** protected by the async.queueMutex. As are the async.ioError and 61 ** async.nFile variables. 62 ** 63 ** * The async.pLock list and all AsyncLock and AsyncFileLock 64 ** structures must be protected by the async.lockMutex mutex. 65 ** 66 ** * The file handles from the underlying system are not assumed to 67 ** be thread safe. 68 ** 69 ** * See the last two paragraphs under "The Writer Thread" for 70 ** an assumption to do with file-handle synchronization by the Os. 71 ** 72 ** Deadlock prevention: 73 ** 74 ** There are three mutex used by the system: the "writer" mutex, 75 ** the "queue" mutex and the "lock" mutex. Rules are: 76 ** 77 ** * It is illegal to block on the writer mutex when any other mutex 78 ** are held, and 79 ** 80 ** * It is illegal to block on the queue mutex when the lock mutex 81 ** is held. 82 ** 83 ** i.e. mutex's must be grabbed in the order "writer", "queue", "lock". 84 ** 85 ** File system operations (invoked by SQLite thread): 86 ** 87 ** xOpen 88 ** xDelete 89 ** xFileExists 90 ** 91 ** File handle operations (invoked by SQLite thread): 92 ** 93 ** asyncWrite, asyncClose, asyncTruncate, asyncSync 94 ** 95 ** The operations above add an entry to the global write-op list. They 96 ** prepare the entry, acquire the async.queueMutex momentarily while 97 ** list pointers are manipulated to insert the new entry, then release 98 ** the mutex and signal the writer thread to wake up in case it happens 99 ** to be asleep. 100 ** 101 ** 102 ** asyncRead, asyncFileSize. 103 ** 104 ** Read operations. Both of these read from both the underlying file 105 ** first then adjust their result based on pending writes in the 106 ** write-op queue. So async.queueMutex is held for the duration 107 ** of these operations to prevent other threads from changing the 108 ** queue in mid operation. 109 ** 110 ** 111 ** asyncLock, asyncUnlock, asyncCheckReservedLock 112 ** 113 ** These primitives implement in-process locking using a hash table 114 ** on the file name. Files are locked correctly for connections coming 115 ** from the same process. But other processes cannot see these locks 116 ** and will therefore not honor them. 117 ** 118 ** 119 ** The writer thread: 120 ** 121 ** The async.writerMutex is used to make sure only there is only 122 ** a single writer thread running at a time. 123 ** 124 ** Inside the writer thread is a loop that works like this: 125 ** 126 ** WHILE (write-op list is not empty) 127 ** Do IO operation at head of write-op list 128 ** Remove entry from head of write-op list 129 ** END WHILE 130 ** 131 ** The async.queueMutex is always held during the <write-op list is 132 ** not empty> test, and when the entry is removed from the head 133 ** of the write-op list. Sometimes it is held for the interim 134 ** period (while the IO is performed), and sometimes it is 135 ** relinquished. It is relinquished if (a) the IO op is an 136 ** ASYNC_CLOSE or (b) when the file handle was opened, two of 137 ** the underlying systems handles were opened on the same 138 ** file-system entry. 139 ** 140 ** If condition (b) above is true, then one file-handle 141 ** (AsyncFile.pBaseRead) is used exclusively by sqlite threads to read the 142 ** file, the other (AsyncFile.pBaseWrite) by sqlite3_async_flush() 143 ** threads to perform write() operations. This means that read 144 ** operations are not blocked by asynchronous writes (although 145 ** asynchronous writes may still be blocked by reads). 146 ** 147 ** This assumes that the OS keeps two handles open on the same file 148 ** properly in sync. That is, any read operation that starts after a 149 ** write operation on the same file system entry has completed returns 150 ** data consistent with the write. We also assume that if one thread 151 ** reads a file while another is writing it all bytes other than the 152 ** ones actually being written contain valid data. 153 ** 154 ** If the above assumptions are not true, set the preprocessor symbol 155 ** SQLITE_ASYNC_TWO_FILEHANDLES to 0. 156 */ 157 158 159 #ifndef NDEBUG 160 # define TESTONLY( X ) X 161 #else 162 # define TESTONLY( X ) 163 #endif 164 165 /* 166 ** PORTING FUNCTIONS 167 ** 168 ** There are two definitions of the following functions. One for pthreads 169 ** compatible systems and one for Win32. These functions isolate the OS 170 ** specific code required by each platform. 171 ** 172 ** The system uses three mutexes and a single condition variable. To 173 ** block on a mutex, async_mutex_enter() is called. The parameter passed 174 ** to async_mutex_enter(), which must be one of ASYNC_MUTEX_LOCK, 175 ** ASYNC_MUTEX_QUEUE or ASYNC_MUTEX_WRITER, identifies which of the three 176 ** mutexes to lock. Similarly, to unlock a mutex, async_mutex_leave() is 177 ** called with a parameter identifying the mutex being unlocked. Mutexes 178 ** are not recursive - it is an error to call async_mutex_enter() to 179 ** lock a mutex that is already locked, or to call async_mutex_leave() 180 ** to unlock a mutex that is not currently locked. 181 ** 182 ** The async_cond_wait() and async_cond_signal() functions are modelled 183 ** on the pthreads functions with similar names. The first parameter to 184 ** both functions is always ASYNC_COND_QUEUE. When async_cond_wait() 185 ** is called the mutex identified by the second parameter must be held. 186 ** The mutex is unlocked, and the calling thread simultaneously begins 187 ** waiting for the condition variable to be signalled by another thread. 188 ** After another thread signals the condition variable, the calling 189 ** thread stops waiting, locks mutex eMutex and returns. The 190 ** async_cond_signal() function is used to signal the condition variable. 191 ** It is assumed that the mutex used by the thread calling async_cond_wait() 192 ** is held by the caller of async_cond_signal() (otherwise there would be 193 ** a race condition). 194 ** 195 ** It is guaranteed that no other thread will call async_cond_wait() when 196 ** there is already a thread waiting on the condition variable. 197 ** 198 ** The async_sched_yield() function is called to suggest to the operating 199 ** system that it would be a good time to shift the current thread off the 200 ** CPU. The system will still work if this function is not implemented 201 ** (it is not currently implemented for win32), but it might be marginally 202 ** more efficient if it is. 203 */ 204 static void async_mutex_enter(int eMutex); 205 static void async_mutex_leave(int eMutex); 206 static void async_cond_wait(int eCond, int eMutex); 207 static void async_cond_signal(int eCond); 208 static void async_sched_yield(void); 209 210 /* 211 ** There are also two definitions of the following. async_os_initialize() 212 ** is called when the asynchronous VFS is first installed, and os_shutdown() 213 ** is called when it is uninstalled (from within sqlite3async_shutdown()). 214 ** 215 ** For pthreads builds, both of these functions are no-ops. For win32, 216 ** they provide an opportunity to initialize and finalize the required 217 ** mutex and condition variables. 218 ** 219 ** If async_os_initialize() returns other than zero, then the initialization 220 ** fails and SQLITE_ERROR is returned to the user. 221 */ 222 static int async_os_initialize(void); 223 static void async_os_shutdown(void); 224 225 /* Values for use as the 'eMutex' argument of the above functions. The 226 ** integer values assigned to these constants are important for assert() 227 ** statements that verify that mutexes are locked in the correct order. 228 ** Specifically, it is unsafe to try to lock mutex N while holding a lock 229 ** on mutex M if (M<=N). 230 */ 231 #define ASYNC_MUTEX_LOCK 0 232 #define ASYNC_MUTEX_QUEUE 1 233 #define ASYNC_MUTEX_WRITER 2 234 235 /* Values for use as the 'eCond' argument of the above functions. */ 236 #define ASYNC_COND_QUEUE 0 237 238 /************************************************************************* 239 ** Start of OS specific code. 240 */ 241 #if SQLITE_OS_WIN || defined(_WIN32) || defined(WIN32) || defined(__CYGWIN__) || defined(__MINGW32__) || defined(__BORLANDC__) 242 243 /* The following block contains the win32 specific code. */ 244 245 #define mutex_held(X) (GetCurrentThreadId()==primitives.aHolder[X]) 246 247 static struct AsyncPrimitives { 248 int isInit; 249 DWORD aHolder[3]; 250 CRITICAL_SECTION aMutex[3]; 251 HANDLE aCond[1]; 252 } primitives = { 0 }; 253 254 static int async_os_initialize(void){ 255 if( !primitives.isInit ){ 256 primitives.aCond[0] = CreateEvent(NULL, TRUE, FALSE, 0); 257 if( primitives.aCond[0]==NULL ){ 258 return 1; 259 } 260 InitializeCriticalSection(&primitives.aMutex[0]); 261 InitializeCriticalSection(&primitives.aMutex[1]); 262 InitializeCriticalSection(&primitives.aMutex[2]); 263 primitives.isInit = 1; 264 } 265 return 0; 266 } 267 static void async_os_shutdown(void){ 268 if( primitives.isInit ){ 269 DeleteCriticalSection(&primitives.aMutex[0]); 270 DeleteCriticalSection(&primitives.aMutex[1]); 271 DeleteCriticalSection(&primitives.aMutex[2]); 272 CloseHandle(primitives.aCond[0]); 273 primitives.isInit = 0; 274 } 275 } 276 277 /* The following block contains the Win32 specific code. */ 278 static void async_mutex_enter(int eMutex){ 279 assert( eMutex==0 || eMutex==1 || eMutex==2 ); 280 assert( eMutex!=2 || (!mutex_held(0) && !mutex_held(1) && !mutex_held(2)) ); 281 assert( eMutex!=1 || (!mutex_held(0) && !mutex_held(1)) ); 282 assert( eMutex!=0 || (!mutex_held(0)) ); 283 EnterCriticalSection(&primitives.aMutex[eMutex]); 284 TESTONLY( primitives.aHolder[eMutex] = GetCurrentThreadId(); ) 285 } 286 static void async_mutex_leave(int eMutex){ 287 assert( eMutex==0 || eMutex==1 || eMutex==2 ); 288 assert( mutex_held(eMutex) ); 289 TESTONLY( primitives.aHolder[eMutex] = 0; ) 290 LeaveCriticalSection(&primitives.aMutex[eMutex]); 291 } 292 static void async_cond_wait(int eCond, int eMutex){ 293 ResetEvent(primitives.aCond[eCond]); 294 async_mutex_leave(eMutex); 295 WaitForSingleObject(primitives.aCond[eCond], INFINITE); 296 async_mutex_enter(eMutex); 297 } 298 static void async_cond_signal(int eCond){ 299 assert( mutex_held(ASYNC_MUTEX_QUEUE) ); 300 SetEvent(primitives.aCond[eCond]); 301 } 302 static void async_sched_yield(void){ 303 /* Todo: Find out if win32 offers anything like sched_yield() */ 304 } 305 #else 306 307 /* The following block contains the pthreads specific code. */ 308 #include <pthread.h> 309 #include <sched.h> 310 311 #define mutex_held(X) pthread_equal(primitives.aHolder[X], pthread_self()) 312 313 static int async_os_initialize(void) {return 0;} 314 static void async_os_shutdown(void) {} 315 316 static struct AsyncPrimitives { 317 pthread_mutex_t aMutex[3]; 318 pthread_cond_t aCond[1]; 319 pthread_t aHolder[3]; 320 } primitives = { 321 { PTHREAD_MUTEX_INITIALIZER, 322 PTHREAD_MUTEX_INITIALIZER, 323 PTHREAD_MUTEX_INITIALIZER 324 } , { 325 PTHREAD_COND_INITIALIZER 326 } , { 0, 0, 0 } 327 }; 328 329 static void async_mutex_enter(int eMutex){ 330 assert( eMutex==0 || eMutex==1 || eMutex==2 ); 331 assert( eMutex!=2 || (!mutex_held(0) && !mutex_held(1) && !mutex_held(2)) ); 332 assert( eMutex!=1 || (!mutex_held(0) && !mutex_held(1)) ); 333 assert( eMutex!=0 || (!mutex_held(0)) ); 334 pthread_mutex_lock(&primitives.aMutex[eMutex]); 335 TESTONLY( primitives.aHolder[eMutex] = pthread_self(); ) 336 } 337 static void async_mutex_leave(int eMutex){ 338 assert( eMutex==0 || eMutex==1 || eMutex==2 ); 339 assert( mutex_held(eMutex) ); 340 TESTONLY( primitives.aHolder[eMutex] = 0; ) 341 pthread_mutex_unlock(&primitives.aMutex[eMutex]); 342 } 343 static void async_cond_wait(int eCond, int eMutex){ 344 assert( eMutex==0 || eMutex==1 || eMutex==2 ); 345 assert( mutex_held(eMutex) ); 346 TESTONLY( primitives.aHolder[eMutex] = 0; ) 347 pthread_cond_wait(&primitives.aCond[eCond], &primitives.aMutex[eMutex]); 348 TESTONLY( primitives.aHolder[eMutex] = pthread_self(); ) 349 } 350 static void async_cond_signal(int eCond){ 351 assert( mutex_held(ASYNC_MUTEX_QUEUE) ); 352 pthread_cond_signal(&primitives.aCond[eCond]); 353 } 354 static void async_sched_yield(void){ 355 sched_yield(); 356 } 357 #endif 358 /* 359 ** End of OS specific code. 360 *************************************************************************/ 361 362 #define assert_mutex_is_held(X) assert( mutex_held(X) ) 363 364 365 #ifndef SQLITE_ASYNC_TWO_FILEHANDLES 366 /* #define SQLITE_ASYNC_TWO_FILEHANDLES 0 */ 367 #define SQLITE_ASYNC_TWO_FILEHANDLES 1 368 #endif 369 370 /* 371 ** State information is held in the static variable "async" defined 372 ** as the following structure. 373 ** 374 ** Both async.ioError and async.nFile are protected by async.queueMutex. 375 */ 376 static struct TestAsyncStaticData { 377 AsyncWrite *pQueueFirst; /* Next write operation to be processed */ 378 AsyncWrite *pQueueLast; /* Last write operation on the list */ 379 AsyncLock *pLock; /* Linked list of all AsyncLock structures */ 380 volatile int ioDelay; /* Extra delay between write operations */ 381 volatile int eHalt; /* One of the SQLITEASYNC_HALT_XXX values */ 382 volatile int bLockFiles; /* Current value of "lockfiles" parameter */ 383 int ioError; /* True if an IO error has occurred */ 384 int nFile; /* Number of open files (from sqlite pov) */ 385 } async = { 0,0,0,0,0,1,0,0 }; 386 387 /* Possible values of AsyncWrite.op */ 388 #define ASYNC_NOOP 0 389 #define ASYNC_WRITE 1 390 #define ASYNC_SYNC 2 391 #define ASYNC_TRUNCATE 3 392 #define ASYNC_CLOSE 4 393 #define ASYNC_DELETE 5 394 #define ASYNC_OPENEXCLUSIVE 6 395 #define ASYNC_UNLOCK 7 396 397 /* Names of opcodes. Used for debugging only. 398 ** Make sure these stay in sync with the macros above! 399 */ 400 static const char *azOpcodeName[] = { 401 "NOOP", "WRITE", "SYNC", "TRUNCATE", "CLOSE", "DELETE", "OPENEX", "UNLOCK" 402 }; 403 404 /* 405 ** Entries on the write-op queue are instances of the AsyncWrite 406 ** structure, defined here. 407 ** 408 ** The interpretation of the iOffset and nByte variables varies depending 409 ** on the value of AsyncWrite.op: 410 ** 411 ** ASYNC_NOOP: 412 ** No values used. 413 ** 414 ** ASYNC_WRITE: 415 ** iOffset -> Offset in file to write to. 416 ** nByte -> Number of bytes of data to write (pointed to by zBuf). 417 ** 418 ** ASYNC_SYNC: 419 ** nByte -> flags to pass to sqlite3OsSync(). 420 ** 421 ** ASYNC_TRUNCATE: 422 ** iOffset -> Size to truncate file to. 423 ** nByte -> Unused. 424 ** 425 ** ASYNC_CLOSE: 426 ** iOffset -> Unused. 427 ** nByte -> Unused. 428 ** 429 ** ASYNC_DELETE: 430 ** iOffset -> Contains the "syncDir" flag. 431 ** nByte -> Number of bytes of zBuf points to (file name). 432 ** 433 ** ASYNC_OPENEXCLUSIVE: 434 ** iOffset -> Value of "delflag". 435 ** nByte -> Number of bytes of zBuf points to (file name). 436 ** 437 ** ASYNC_UNLOCK: 438 ** nByte -> Argument to sqlite3OsUnlock(). 439 ** 440 ** 441 ** For an ASYNC_WRITE operation, zBuf points to the data to write to the file. 442 ** This space is sqlite3_malloc()d along with the AsyncWrite structure in a 443 ** single blob, so is deleted when sqlite3_free() is called on the parent 444 ** structure. 445 */ 446 struct AsyncWrite { 447 AsyncFileData *pFileData; /* File to write data to or sync */ 448 int op; /* One of ASYNC_xxx etc. */ 449 sqlite_int64 iOffset; /* See above */ 450 int nByte; /* See above */ 451 char *zBuf; /* Data to write to file (or NULL if op!=ASYNC_WRITE) */ 452 AsyncWrite *pNext; /* Next write operation (to any file) */ 453 }; 454 455 /* 456 ** An instance of this structure is created for each distinct open file 457 ** (i.e. if two handles are opened on the one file, only one of these 458 ** structures is allocated) and stored in the async.aLock hash table. The 459 ** keys for async.aLock are the full pathnames of the opened files. 460 ** 461 ** AsyncLock.pList points to the head of a linked list of AsyncFileLock 462 ** structures, one for each handle currently open on the file. 463 ** 464 ** If the opened file is not a main-database (the SQLITE_OPEN_MAIN_DB is 465 ** not passed to the sqlite3OsOpen() call), or if async.bLockFiles is 466 ** false, variables AsyncLock.pFile and AsyncLock.eLock are never used. 467 ** Otherwise, pFile is a file handle opened on the file in question and 468 ** used to obtain the file-system locks required by database connections 469 ** within this process. 470 ** 471 ** See comments above the asyncLock() function for more details on 472 ** the implementation of database locking used by this backend. 473 */ 474 struct AsyncLock { 475 char *zFile; 476 int nFile; 477 sqlite3_file *pFile; 478 int eLock; 479 AsyncFileLock *pList; 480 AsyncLock *pNext; /* Next in linked list headed by async.pLock */ 481 }; 482 483 /* 484 ** An instance of the following structure is allocated along with each 485 ** AsyncFileData structure (see AsyncFileData.lock), but is only used if the 486 ** file was opened with the SQLITE_OPEN_MAIN_DB. 487 */ 488 struct AsyncFileLock { 489 int eLock; /* Internally visible lock state (sqlite pov) */ 490 int eAsyncLock; /* Lock-state with write-queue unlock */ 491 AsyncFileLock *pNext; 492 }; 493 494 /* 495 ** The AsyncFile structure is a subclass of sqlite3_file used for 496 ** asynchronous IO. 497 ** 498 ** All of the actual data for the structure is stored in the structure 499 ** pointed to by AsyncFile.pData, which is allocated as part of the 500 ** sqlite3OsOpen() using sqlite3_malloc(). The reason for this is that the 501 ** lifetime of the AsyncFile structure is ended by the caller after OsClose() 502 ** is called, but the data in AsyncFileData may be required by the 503 ** writer thread after that point. 504 */ 505 struct AsyncFile { 506 sqlite3_io_methods *pMethod; 507 AsyncFileData *pData; 508 }; 509 struct AsyncFileData { 510 char *zName; /* Underlying OS filename - used for debugging */ 511 int nName; /* Number of characters in zName */ 512 sqlite3_file *pBaseRead; /* Read handle to the underlying Os file */ 513 sqlite3_file *pBaseWrite; /* Write handle to the underlying Os file */ 514 AsyncFileLock lock; /* Lock state for this handle */ 515 AsyncLock *pLock; /* AsyncLock object for this file system entry */ 516 AsyncWrite closeOp; /* Preallocated close operation */ 517 }; 518 519 /* 520 ** Add an entry to the end of the global write-op list. pWrite should point 521 ** to an AsyncWrite structure allocated using sqlite3_malloc(). The writer 522 ** thread will call sqlite3_free() to free the structure after the specified 523 ** operation has been completed. 524 ** 525 ** Once an AsyncWrite structure has been added to the list, it becomes the 526 ** property of the writer thread and must not be read or modified by the 527 ** caller. 528 */ 529 static void addAsyncWrite(AsyncWrite *pWrite){ 530 /* We must hold the queue mutex in order to modify the queue pointers */ 531 if( pWrite->op!=ASYNC_UNLOCK ){ 532 async_mutex_enter(ASYNC_MUTEX_QUEUE); 533 } 534 535 /* Add the record to the end of the write-op queue */ 536 assert( !pWrite->pNext ); 537 if( async.pQueueLast ){ 538 assert( async.pQueueFirst ); 539 async.pQueueLast->pNext = pWrite; 540 }else{ 541 async.pQueueFirst = pWrite; 542 } 543 async.pQueueLast = pWrite; 544 ASYNC_TRACE(("PUSH %p (%s %s %d)\n", pWrite, azOpcodeName[pWrite->op], 545 pWrite->pFileData ? pWrite->pFileData->zName : "-", pWrite->iOffset)); 546 547 if( pWrite->op==ASYNC_CLOSE ){ 548 async.nFile--; 549 } 550 551 /* The writer thread might have been idle because there was nothing 552 ** on the write-op queue for it to do. So wake it up. */ 553 async_cond_signal(ASYNC_COND_QUEUE); 554 555 /* Drop the queue mutex */ 556 if( pWrite->op!=ASYNC_UNLOCK ){ 557 async_mutex_leave(ASYNC_MUTEX_QUEUE); 558 } 559 } 560 561 /* 562 ** Increment async.nFile in a thread-safe manner. 563 */ 564 static void incrOpenFileCount(void){ 565 /* We must hold the queue mutex in order to modify async.nFile */ 566 async_mutex_enter(ASYNC_MUTEX_QUEUE); 567 if( async.nFile==0 ){ 568 async.ioError = SQLITE_OK; 569 } 570 async.nFile++; 571 async_mutex_leave(ASYNC_MUTEX_QUEUE); 572 } 573 574 /* 575 ** This is a utility function to allocate and populate a new AsyncWrite 576 ** structure and insert it (via addAsyncWrite() ) into the global list. 577 */ 578 static int addNewAsyncWrite( 579 AsyncFileData *pFileData, 580 int op, 581 sqlite3_int64 iOffset, 582 int nByte, 583 const char *zByte 584 ){ 585 AsyncWrite *p; 586 if( op!=ASYNC_CLOSE && async.ioError ){ 587 return async.ioError; 588 } 589 p = sqlite3_malloc(sizeof(AsyncWrite) + (zByte?nByte:0)); 590 if( !p ){ 591 /* The upper layer does not expect operations like OsWrite() to 592 ** return SQLITE_NOMEM. This is partly because under normal conditions 593 ** SQLite is required to do rollback without calling malloc(). So 594 ** if malloc() fails here, treat it as an I/O error. The above 595 ** layer knows how to handle that. 596 */ 597 return SQLITE_IOERR; 598 } 599 p->op = op; 600 p->iOffset = iOffset; 601 p->nByte = nByte; 602 p->pFileData = pFileData; 603 p->pNext = 0; 604 if( zByte ){ 605 p->zBuf = (char *)&p[1]; 606 memcpy(p->zBuf, zByte, nByte); 607 }else{ 608 p->zBuf = 0; 609 } 610 addAsyncWrite(p); 611 return SQLITE_OK; 612 } 613 614 /* 615 ** Close the file. This just adds an entry to the write-op list, the file is 616 ** not actually closed. 617 */ 618 static int asyncClose(sqlite3_file *pFile){ 619 AsyncFileData *p = ((AsyncFile *)pFile)->pData; 620 621 /* Unlock the file, if it is locked */ 622 async_mutex_enter(ASYNC_MUTEX_LOCK); 623 p->lock.eLock = 0; 624 async_mutex_leave(ASYNC_MUTEX_LOCK); 625 626 addAsyncWrite(&p->closeOp); 627 return SQLITE_OK; 628 } 629 630 /* 631 ** Implementation of sqlite3OsWrite() for asynchronous files. Instead of 632 ** writing to the underlying file, this function adds an entry to the end of 633 ** the global AsyncWrite list. Either SQLITE_OK or SQLITE_NOMEM may be 634 ** returned. 635 */ 636 static int asyncWrite( 637 sqlite3_file *pFile, 638 const void *pBuf, 639 int amt, 640 sqlite3_int64 iOff 641 ){ 642 AsyncFileData *p = ((AsyncFile *)pFile)->pData; 643 return addNewAsyncWrite(p, ASYNC_WRITE, iOff, amt, pBuf); 644 } 645 646 /* 647 ** Read data from the file. First we read from the filesystem, then adjust 648 ** the contents of the buffer based on ASYNC_WRITE operations in the 649 ** write-op queue. 650 ** 651 ** This method holds the mutex from start to finish. 652 */ 653 static int asyncRead( 654 sqlite3_file *pFile, 655 void *zOut, 656 int iAmt, 657 sqlite3_int64 iOffset 658 ){ 659 AsyncFileData *p = ((AsyncFile *)pFile)->pData; 660 int rc = SQLITE_OK; 661 sqlite3_int64 filesize; 662 int nRead; 663 sqlite3_file *pBase = p->pBaseRead; 664 665 /* Grab the write queue mutex for the duration of the call */ 666 async_mutex_enter(ASYNC_MUTEX_QUEUE); 667 668 /* If an I/O error has previously occurred in this virtual file 669 ** system, then all subsequent operations fail. 670 */ 671 if( async.ioError!=SQLITE_OK ){ 672 rc = async.ioError; 673 goto asyncread_out; 674 } 675 676 if( pBase->pMethods ){ 677 rc = pBase->pMethods->xFileSize(pBase, &filesize); 678 if( rc!=SQLITE_OK ){ 679 goto asyncread_out; 680 } 681 nRead = MIN(filesize - iOffset, iAmt); 682 if( nRead>0 ){ 683 rc = pBase->pMethods->xRead(pBase, zOut, nRead, iOffset); 684 ASYNC_TRACE(("READ %s %d bytes at %d\n", p->zName, nRead, iOffset)); 685 } 686 } 687 688 if( rc==SQLITE_OK ){ 689 AsyncWrite *pWrite; 690 char *zName = p->zName; 691 692 for(pWrite=async.pQueueFirst; pWrite; pWrite = pWrite->pNext){ 693 if( pWrite->op==ASYNC_WRITE && ( 694 (pWrite->pFileData==p) || 695 (zName && pWrite->pFileData->zName==zName) 696 )){ 697 int iBeginOut = (pWrite->iOffset-iOffset); 698 int iBeginIn = -iBeginOut; 699 int nCopy; 700 701 if( iBeginIn<0 ) iBeginIn = 0; 702 if( iBeginOut<0 ) iBeginOut = 0; 703 nCopy = MIN(pWrite->nByte-iBeginIn, iAmt-iBeginOut); 704 705 if( nCopy>0 ){ 706 memcpy(&((char *)zOut)[iBeginOut], &pWrite->zBuf[iBeginIn], nCopy); 707 ASYNC_TRACE(("OVERREAD %d bytes at %d\n", nCopy, iBeginOut+iOffset)); 708 } 709 } 710 } 711 } 712 713 asyncread_out: 714 async_mutex_leave(ASYNC_MUTEX_QUEUE); 715 return rc; 716 } 717 718 /* 719 ** Truncate the file to nByte bytes in length. This just adds an entry to 720 ** the write-op list, no IO actually takes place. 721 */ 722 static int asyncTruncate(sqlite3_file *pFile, sqlite3_int64 nByte){ 723 AsyncFileData *p = ((AsyncFile *)pFile)->pData; 724 return addNewAsyncWrite(p, ASYNC_TRUNCATE, nByte, 0, 0); 725 } 726 727 /* 728 ** Sync the file. This just adds an entry to the write-op list, the 729 ** sync() is done later by sqlite3_async_flush(). 730 */ 731 static int asyncSync(sqlite3_file *pFile, int flags){ 732 AsyncFileData *p = ((AsyncFile *)pFile)->pData; 733 return addNewAsyncWrite(p, ASYNC_SYNC, 0, flags, 0); 734 } 735 736 /* 737 ** Read the size of the file. First we read the size of the file system 738 ** entry, then adjust for any ASYNC_WRITE or ASYNC_TRUNCATE operations 739 ** currently in the write-op list. 740 ** 741 ** This method holds the mutex from start to finish. 742 */ 743 int asyncFileSize(sqlite3_file *pFile, sqlite3_int64 *piSize){ 744 AsyncFileData *p = ((AsyncFile *)pFile)->pData; 745 int rc = SQLITE_OK; 746 sqlite3_int64 s = 0; 747 sqlite3_file *pBase; 748 749 async_mutex_enter(ASYNC_MUTEX_QUEUE); 750 751 /* Read the filesystem size from the base file. If pBaseRead is NULL, this 752 ** means the file hasn't been opened yet. In this case all relevant data 753 ** must be in the write-op queue anyway, so we can omit reading from the 754 ** file-system. 755 */ 756 pBase = p->pBaseRead; 757 if( pBase->pMethods ){ 758 rc = pBase->pMethods->xFileSize(pBase, &s); 759 } 760 761 if( rc==SQLITE_OK ){ 762 AsyncWrite *pWrite; 763 for(pWrite=async.pQueueFirst; pWrite; pWrite = pWrite->pNext){ 764 if( pWrite->op==ASYNC_DELETE 765 && p->zName 766 && strcmp(p->zName, pWrite->zBuf)==0 767 ){ 768 s = 0; 769 }else if( pWrite->pFileData && ( 770 (pWrite->pFileData==p) 771 || (p->zName && pWrite->pFileData->zName==p->zName) 772 )){ 773 switch( pWrite->op ){ 774 case ASYNC_WRITE: 775 s = MAX(pWrite->iOffset + (sqlite3_int64)(pWrite->nByte), s); 776 break; 777 case ASYNC_TRUNCATE: 778 s = MIN(s, pWrite->iOffset); 779 break; 780 } 781 } 782 } 783 *piSize = s; 784 } 785 async_mutex_leave(ASYNC_MUTEX_QUEUE); 786 return rc; 787 } 788 789 /* 790 ** Lock or unlock the actual file-system entry. 791 */ 792 static int getFileLock(AsyncLock *pLock){ 793 int rc = SQLITE_OK; 794 AsyncFileLock *pIter; 795 int eRequired = 0; 796 797 if( pLock->pFile ){ 798 for(pIter=pLock->pList; pIter; pIter=pIter->pNext){ 799 assert(pIter->eAsyncLock>=pIter->eLock); 800 if( pIter->eAsyncLock>eRequired ){ 801 eRequired = pIter->eAsyncLock; 802 assert(eRequired>=0 && eRequired<=SQLITE_LOCK_EXCLUSIVE); 803 } 804 } 805 806 if( eRequired>pLock->eLock ){ 807 rc = pLock->pFile->pMethods->xLock(pLock->pFile, eRequired); 808 if( rc==SQLITE_OK ){ 809 pLock->eLock = eRequired; 810 } 811 } 812 else if( eRequired<pLock->eLock && eRequired<=SQLITE_LOCK_SHARED ){ 813 rc = pLock->pFile->pMethods->xUnlock(pLock->pFile, eRequired); 814 if( rc==SQLITE_OK ){ 815 pLock->eLock = eRequired; 816 } 817 } 818 } 819 820 return rc; 821 } 822 823 /* 824 ** Return the AsyncLock structure from the global async.pLock list 825 ** associated with the file-system entry identified by path zName 826 ** (a string of nName bytes). If no such structure exists, return 0. 827 */ 828 static AsyncLock *findLock(const char *zName, int nName){ 829 AsyncLock *p = async.pLock; 830 while( p && (p->nFile!=nName || memcmp(p->zFile, zName, nName)) ){ 831 p = p->pNext; 832 } 833 return p; 834 } 835 836 /* 837 ** The following two methods - asyncLock() and asyncUnlock() - are used 838 ** to obtain and release locks on database files opened with the 839 ** asynchronous backend. 840 */ 841 static int asyncLock(sqlite3_file *pFile, int eLock){ 842 int rc = SQLITE_OK; 843 AsyncFileData *p = ((AsyncFile *)pFile)->pData; 844 845 if( p->zName ){ 846 async_mutex_enter(ASYNC_MUTEX_LOCK); 847 if( p->lock.eLock<eLock ){ 848 AsyncLock *pLock = p->pLock; 849 AsyncFileLock *pIter; 850 assert(pLock && pLock->pList); 851 for(pIter=pLock->pList; pIter; pIter=pIter->pNext){ 852 if( pIter!=&p->lock && ( 853 (eLock==SQLITE_LOCK_EXCLUSIVE && pIter->eLock>=SQLITE_LOCK_SHARED) || 854 (eLock==SQLITE_LOCK_PENDING && pIter->eLock>=SQLITE_LOCK_RESERVED) || 855 (eLock==SQLITE_LOCK_RESERVED && pIter->eLock>=SQLITE_LOCK_RESERVED) || 856 (eLock==SQLITE_LOCK_SHARED && pIter->eLock>=SQLITE_LOCK_PENDING) 857 )){ 858 rc = SQLITE_BUSY; 859 } 860 } 861 if( rc==SQLITE_OK ){ 862 p->lock.eLock = eLock; 863 p->lock.eAsyncLock = MAX(p->lock.eAsyncLock, eLock); 864 } 865 assert(p->lock.eAsyncLock>=p->lock.eLock); 866 if( rc==SQLITE_OK ){ 867 rc = getFileLock(pLock); 868 } 869 } 870 async_mutex_leave(ASYNC_MUTEX_LOCK); 871 } 872 873 ASYNC_TRACE(("LOCK %d (%s) rc=%d\n", eLock, p->zName, rc)); 874 return rc; 875 } 876 static int asyncUnlock(sqlite3_file *pFile, int eLock){ 877 int rc = SQLITE_OK; 878 AsyncFileData *p = ((AsyncFile *)pFile)->pData; 879 if( p->zName ){ 880 AsyncFileLock *pLock = &p->lock; 881 async_mutex_enter(ASYNC_MUTEX_QUEUE); 882 async_mutex_enter(ASYNC_MUTEX_LOCK); 883 pLock->eLock = MIN(pLock->eLock, eLock); 884 rc = addNewAsyncWrite(p, ASYNC_UNLOCK, 0, eLock, 0); 885 async_mutex_leave(ASYNC_MUTEX_LOCK); 886 async_mutex_leave(ASYNC_MUTEX_QUEUE); 887 } 888 return rc; 889 } 890 891 /* 892 ** This function is called when the pager layer first opens a database file 893 ** and is checking for a hot-journal. 894 */ 895 static int asyncCheckReservedLock(sqlite3_file *pFile, int *pResOut){ 896 int ret = 0; 897 AsyncFileLock *pIter; 898 AsyncFileData *p = ((AsyncFile *)pFile)->pData; 899 900 async_mutex_enter(ASYNC_MUTEX_LOCK); 901 for(pIter=p->pLock->pList; pIter; pIter=pIter->pNext){ 902 if( pIter->eLock>=SQLITE_LOCK_RESERVED ){ 903 ret = 1; 904 } 905 } 906 async_mutex_leave(ASYNC_MUTEX_LOCK); 907 908 ASYNC_TRACE(("CHECK-LOCK %d (%s)\n", ret, p->zName)); 909 *pResOut = ret; 910 return SQLITE_OK; 911 } 912 913 /* 914 ** sqlite3_file_control() implementation. 915 */ 916 static int asyncFileControl(sqlite3_file *id, int op, void *pArg){ 917 switch( op ){ 918 case SQLITE_FCNTL_LOCKSTATE: { 919 async_mutex_enter(ASYNC_MUTEX_LOCK); 920 *(int*)pArg = ((AsyncFile*)id)->pData->lock.eLock; 921 async_mutex_leave(ASYNC_MUTEX_LOCK); 922 return SQLITE_OK; 923 } 924 } 925 return SQLITE_ERROR; 926 } 927 928 /* 929 ** Return the device characteristics and sector-size of the device. It 930 ** is not tricky to implement these correctly, as this backend might 931 ** not have an open file handle at this point. 932 */ 933 static int asyncSectorSize(sqlite3_file *pFile){ 934 return 512; 935 } 936 static int asyncDeviceCharacteristics(sqlite3_file *pFile){ 937 return 0; 938 } 939 940 static int unlinkAsyncFile(AsyncFileData *pData){ 941 AsyncFileLock **ppIter; 942 int rc = SQLITE_OK; 943 944 if( pData->zName ){ 945 AsyncLock *pLock = pData->pLock; 946 for(ppIter=&pLock->pList; *ppIter; ppIter=&((*ppIter)->pNext)){ 947 if( (*ppIter)==&pData->lock ){ 948 *ppIter = pData->lock.pNext; 949 break; 950 } 951 } 952 if( !pLock->pList ){ 953 AsyncLock **pp; 954 if( pLock->pFile ){ 955 pLock->pFile->pMethods->xClose(pLock->pFile); 956 } 957 for(pp=&async.pLock; *pp!=pLock; pp=&((*pp)->pNext)); 958 *pp = pLock->pNext; 959 sqlite3_free(pLock); 960 }else{ 961 rc = getFileLock(pLock); 962 } 963 } 964 965 return rc; 966 } 967 968 /* 969 ** The parameter passed to this function is a copy of a 'flags' parameter 970 ** passed to this modules xOpen() method. This function returns true 971 ** if the file should be opened asynchronously, or false if it should 972 ** be opened immediately. 973 ** 974 ** If the file is to be opened asynchronously, then asyncOpen() will add 975 ** an entry to the event queue and the file will not actually be opened 976 ** until the event is processed. Otherwise, the file is opened directly 977 ** by the caller. 978 */ 979 static int doAsynchronousOpen(int flags){ 980 return (flags&SQLITE_OPEN_CREATE) && ( 981 (flags&SQLITE_OPEN_MAIN_JOURNAL) || 982 (flags&SQLITE_OPEN_TEMP_JOURNAL) || 983 (flags&SQLITE_OPEN_DELETEONCLOSE) 984 ); 985 } 986 987 /* 988 ** Open a file. 989 */ 990 static int asyncOpen( 991 sqlite3_vfs *pAsyncVfs, 992 const char *zName, 993 sqlite3_file *pFile, 994 int flags, 995 int *pOutFlags 996 ){ 997 static sqlite3_io_methods async_methods = { 998 1, /* iVersion */ 999 asyncClose, /* xClose */ 1000 asyncRead, /* xRead */ 1001 asyncWrite, /* xWrite */ 1002 asyncTruncate, /* xTruncate */ 1003 asyncSync, /* xSync */ 1004 asyncFileSize, /* xFileSize */ 1005 asyncLock, /* xLock */ 1006 asyncUnlock, /* xUnlock */ 1007 asyncCheckReservedLock, /* xCheckReservedLock */ 1008 asyncFileControl, /* xFileControl */ 1009 asyncSectorSize, /* xSectorSize */ 1010 asyncDeviceCharacteristics /* xDeviceCharacteristics */ 1011 }; 1012 1013 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData; 1014 AsyncFile *p = (AsyncFile *)pFile; 1015 int nName = 0; 1016 int rc = SQLITE_OK; 1017 int nByte; 1018 AsyncFileData *pData; 1019 AsyncLock *pLock = 0; 1020 char *z; 1021 int isAsyncOpen = doAsynchronousOpen(flags); 1022 1023 /* If zName is NULL, then the upper layer is requesting an anonymous file */ 1024 if( zName ){ 1025 nName = strlen(zName)+1; 1026 } 1027 1028 nByte = ( 1029 sizeof(AsyncFileData) + /* AsyncFileData structure */ 1030 2 * pVfs->szOsFile + /* AsyncFileData.pBaseRead and pBaseWrite */ 1031 nName /* AsyncFileData.zName */ 1032 ); 1033 z = sqlite3_malloc(nByte); 1034 if( !z ){ 1035 return SQLITE_NOMEM; 1036 } 1037 memset(z, 0, nByte); 1038 pData = (AsyncFileData*)z; 1039 z += sizeof(pData[0]); 1040 pData->pBaseRead = (sqlite3_file*)z; 1041 z += pVfs->szOsFile; 1042 pData->pBaseWrite = (sqlite3_file*)z; 1043 pData->closeOp.pFileData = pData; 1044 pData->closeOp.op = ASYNC_CLOSE; 1045 1046 if( zName ){ 1047 z += pVfs->szOsFile; 1048 pData->zName = z; 1049 pData->nName = nName; 1050 memcpy(pData->zName, zName, nName); 1051 } 1052 1053 if( !isAsyncOpen ){ 1054 int flagsout; 1055 rc = pVfs->xOpen(pVfs, pData->zName, pData->pBaseRead, flags, &flagsout); 1056 if( rc==SQLITE_OK && (flagsout&SQLITE_OPEN_READWRITE) ){ 1057 rc = pVfs->xOpen(pVfs, pData->zName, pData->pBaseWrite, flags, 0); 1058 } 1059 if( pOutFlags ){ 1060 *pOutFlags = flagsout; 1061 } 1062 } 1063 1064 async_mutex_enter(ASYNC_MUTEX_LOCK); 1065 1066 if( zName && rc==SQLITE_OK ){ 1067 pLock = findLock(pData->zName, pData->nName); 1068 if( !pLock ){ 1069 int nByte = pVfs->szOsFile + sizeof(AsyncLock) + pData->nName + 1; 1070 pLock = (AsyncLock *)sqlite3_malloc(nByte); 1071 if( pLock ){ 1072 memset(pLock, 0, nByte); 1073 if( async.bLockFiles && (flags&SQLITE_OPEN_MAIN_DB) ){ 1074 pLock->pFile = (sqlite3_file *)&pLock[1]; 1075 rc = pVfs->xOpen(pVfs, pData->zName, pLock->pFile, flags, 0); 1076 if( rc!=SQLITE_OK ){ 1077 sqlite3_free(pLock); 1078 pLock = 0; 1079 } 1080 } 1081 if( pLock ){ 1082 pLock->nFile = pData->nName; 1083 pLock->zFile = &((char *)(&pLock[1]))[pVfs->szOsFile]; 1084 memcpy(pLock->zFile, pData->zName, pLock->nFile); 1085 pLock->pNext = async.pLock; 1086 async.pLock = pLock; 1087 } 1088 }else{ 1089 rc = SQLITE_NOMEM; 1090 } 1091 } 1092 } 1093 1094 if( rc==SQLITE_OK ){ 1095 p->pMethod = &async_methods; 1096 p->pData = pData; 1097 1098 /* Link AsyncFileData.lock into the linked list of 1099 ** AsyncFileLock structures for this file. 1100 */ 1101 if( zName ){ 1102 pData->lock.pNext = pLock->pList; 1103 pLock->pList = &pData->lock; 1104 pData->zName = pLock->zFile; 1105 } 1106 }else{ 1107 if( pData->pBaseRead->pMethods ){ 1108 pData->pBaseRead->pMethods->xClose(pData->pBaseRead); 1109 } 1110 if( pData->pBaseWrite->pMethods ){ 1111 pData->pBaseWrite->pMethods->xClose(pData->pBaseWrite); 1112 } 1113 sqlite3_free(pData); 1114 } 1115 1116 async_mutex_leave(ASYNC_MUTEX_LOCK); 1117 1118 if( rc==SQLITE_OK ){ 1119 incrOpenFileCount(); 1120 pData->pLock = pLock; 1121 } 1122 1123 if( rc==SQLITE_OK && isAsyncOpen ){ 1124 rc = addNewAsyncWrite(pData, ASYNC_OPENEXCLUSIVE, (sqlite3_int64)flags,0,0); 1125 if( rc==SQLITE_OK ){ 1126 if( pOutFlags ) *pOutFlags = flags; 1127 }else{ 1128 async_mutex_enter(ASYNC_MUTEX_LOCK); 1129 unlinkAsyncFile(pData); 1130 async_mutex_leave(ASYNC_MUTEX_LOCK); 1131 sqlite3_free(pData); 1132 } 1133 } 1134 if( rc!=SQLITE_OK ){ 1135 p->pMethod = 0; 1136 } 1137 return rc; 1138 } 1139 1140 /* 1141 ** Implementation of sqlite3OsDelete. Add an entry to the end of the 1142 ** write-op queue to perform the delete. 1143 */ 1144 static int asyncDelete(sqlite3_vfs *pAsyncVfs, const char *z, int syncDir){ 1145 return addNewAsyncWrite(0, ASYNC_DELETE, syncDir, strlen(z)+1, z); 1146 } 1147 1148 /* 1149 ** Implementation of sqlite3OsAccess. This method holds the mutex from 1150 ** start to finish. 1151 */ 1152 static int asyncAccess( 1153 sqlite3_vfs *pAsyncVfs, 1154 const char *zName, 1155 int flags, 1156 int *pResOut 1157 ){ 1158 int rc; 1159 int ret; 1160 AsyncWrite *p; 1161 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData; 1162 1163 assert(flags==SQLITE_ACCESS_READWRITE 1164 || flags==SQLITE_ACCESS_READ 1165 || flags==SQLITE_ACCESS_EXISTS 1166 ); 1167 1168 async_mutex_enter(ASYNC_MUTEX_QUEUE); 1169 rc = pVfs->xAccess(pVfs, zName, flags, &ret); 1170 if( rc==SQLITE_OK && flags==SQLITE_ACCESS_EXISTS ){ 1171 for(p=async.pQueueFirst; p; p = p->pNext){ 1172 if( p->op==ASYNC_DELETE && 0==strcmp(p->zBuf, zName) ){ 1173 ret = 0; 1174 }else if( p->op==ASYNC_OPENEXCLUSIVE 1175 && p->pFileData->zName 1176 && 0==strcmp(p->pFileData->zName, zName) 1177 ){ 1178 ret = 1; 1179 } 1180 } 1181 } 1182 ASYNC_TRACE(("ACCESS(%s): %s = %d\n", 1183 flags==SQLITE_ACCESS_READWRITE?"read-write": 1184 flags==SQLITE_ACCESS_READ?"read":"exists" 1185 , zName, ret) 1186 ); 1187 async_mutex_leave(ASYNC_MUTEX_QUEUE); 1188 *pResOut = ret; 1189 return rc; 1190 } 1191 1192 /* 1193 ** Fill in zPathOut with the full path to the file identified by zPath. 1194 */ 1195 static int asyncFullPathname( 1196 sqlite3_vfs *pAsyncVfs, 1197 const char *zPath, 1198 int nPathOut, 1199 char *zPathOut 1200 ){ 1201 int rc; 1202 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData; 1203 rc = pVfs->xFullPathname(pVfs, zPath, nPathOut, zPathOut); 1204 1205 /* Because of the way intra-process file locking works, this backend 1206 ** needs to return a canonical path. The following block assumes the 1207 ** file-system uses unix style paths. 1208 */ 1209 if( rc==SQLITE_OK ){ 1210 int i, j; 1211 int n = nPathOut; 1212 char *z = zPathOut; 1213 while( n>1 && z[n-1]=='/' ){ n--; } 1214 for(i=j=0; i<n; i++){ 1215 if( z[i]=='/' ){ 1216 if( z[i+1]=='/' ) continue; 1217 if( z[i+1]=='.' && i+2<n && z[i+2]=='/' ){ 1218 i += 1; 1219 continue; 1220 } 1221 if( z[i+1]=='.' && i+3<n && z[i+2]=='.' && z[i+3]=='/' ){ 1222 while( j>0 && z[j-1]!='/' ){ j--; } 1223 if( j>0 ){ j--; } 1224 i += 2; 1225 continue; 1226 } 1227 } 1228 z[j++] = z[i]; 1229 } 1230 z[j] = 0; 1231 } 1232 1233 return rc; 1234 } 1235 static void *asyncDlOpen(sqlite3_vfs *pAsyncVfs, const char *zPath){ 1236 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData; 1237 return pVfs->xDlOpen(pVfs, zPath); 1238 } 1239 static void asyncDlError(sqlite3_vfs *pAsyncVfs, int nByte, char *zErrMsg){ 1240 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData; 1241 pVfs->xDlError(pVfs, nByte, zErrMsg); 1242 } 1243 static void (*asyncDlSym( 1244 sqlite3_vfs *pAsyncVfs, 1245 void *pHandle, 1246 const char *zSymbol 1247 ))(void){ 1248 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData; 1249 return pVfs->xDlSym(pVfs, pHandle, zSymbol); 1250 } 1251 static void asyncDlClose(sqlite3_vfs *pAsyncVfs, void *pHandle){ 1252 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData; 1253 pVfs->xDlClose(pVfs, pHandle); 1254 } 1255 static int asyncRandomness(sqlite3_vfs *pAsyncVfs, int nByte, char *zBufOut){ 1256 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData; 1257 return pVfs->xRandomness(pVfs, nByte, zBufOut); 1258 } 1259 static int asyncSleep(sqlite3_vfs *pAsyncVfs, int nMicro){ 1260 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData; 1261 return pVfs->xSleep(pVfs, nMicro); 1262 } 1263 static int asyncCurrentTime(sqlite3_vfs *pAsyncVfs, double *pTimeOut){ 1264 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData; 1265 return pVfs->xCurrentTime(pVfs, pTimeOut); 1266 } 1267 1268 static sqlite3_vfs async_vfs = { 1269 1, /* iVersion */ 1270 sizeof(AsyncFile), /* szOsFile */ 1271 0, /* mxPathname */ 1272 0, /* pNext */ 1273 SQLITEASYNC_VFSNAME, /* zName */ 1274 0, /* pAppData */ 1275 asyncOpen, /* xOpen */ 1276 asyncDelete, /* xDelete */ 1277 asyncAccess, /* xAccess */ 1278 asyncFullPathname, /* xFullPathname */ 1279 asyncDlOpen, /* xDlOpen */ 1280 asyncDlError, /* xDlError */ 1281 asyncDlSym, /* xDlSym */ 1282 asyncDlClose, /* xDlClose */ 1283 asyncRandomness, /* xDlError */ 1284 asyncSleep, /* xDlSym */ 1285 asyncCurrentTime /* xDlClose */ 1286 }; 1287 1288 /* 1289 ** This procedure runs in a separate thread, reading messages off of the 1290 ** write queue and processing them one by one. 1291 ** 1292 ** If async.writerHaltNow is true, then this procedure exits 1293 ** after processing a single message. 1294 ** 1295 ** If async.writerHaltWhenIdle is true, then this procedure exits when 1296 ** the write queue is empty. 1297 ** 1298 ** If both of the above variables are false, this procedure runs 1299 ** indefinately, waiting for operations to be added to the write queue 1300 ** and processing them in the order in which they arrive. 1301 ** 1302 ** An artifical delay of async.ioDelay milliseconds is inserted before 1303 ** each write operation in order to simulate the effect of a slow disk. 1304 ** 1305 ** Only one instance of this procedure may be running at a time. 1306 */ 1307 static void asyncWriterThread(void){ 1308 sqlite3_vfs *pVfs = (sqlite3_vfs *)(async_vfs.pAppData); 1309 AsyncWrite *p = 0; 1310 int rc = SQLITE_OK; 1311 int holdingMutex = 0; 1312 1313 async_mutex_enter(ASYNC_MUTEX_WRITER); 1314 1315 while( async.eHalt!=SQLITEASYNC_HALT_NOW ){ 1316 int doNotFree = 0; 1317 sqlite3_file *pBase = 0; 1318 1319 if( !holdingMutex ){ 1320 async_mutex_enter(ASYNC_MUTEX_QUEUE); 1321 } 1322 while( (p = async.pQueueFirst)==0 ){ 1323 if( async.eHalt!=SQLITEASYNC_HALT_NEVER ){ 1324 async_mutex_leave(ASYNC_MUTEX_QUEUE); 1325 break; 1326 }else{ 1327 ASYNC_TRACE(("IDLE\n")); 1328 async_cond_wait(ASYNC_COND_QUEUE, ASYNC_MUTEX_QUEUE); 1329 ASYNC_TRACE(("WAKEUP\n")); 1330 } 1331 } 1332 if( p==0 ) break; 1333 holdingMutex = 1; 1334 1335 /* Right now this thread is holding the mutex on the write-op queue. 1336 ** Variable 'p' points to the first entry in the write-op queue. In 1337 ** the general case, we hold on to the mutex for the entire body of 1338 ** the loop. 1339 ** 1340 ** However in the cases enumerated below, we relinquish the mutex, 1341 ** perform the IO, and then re-request the mutex before removing 'p' from 1342 ** the head of the write-op queue. The idea is to increase concurrency with 1343 ** sqlite threads. 1344 ** 1345 ** * An ASYNC_CLOSE operation. 1346 ** * An ASYNC_OPENEXCLUSIVE operation. For this one, we relinquish 1347 ** the mutex, call the underlying xOpenExclusive() function, then 1348 ** re-aquire the mutex before seting the AsyncFile.pBaseRead 1349 ** variable. 1350 ** * ASYNC_SYNC and ASYNC_WRITE operations, if 1351 ** SQLITE_ASYNC_TWO_FILEHANDLES was set at compile time and two 1352 ** file-handles are open for the particular file being "synced". 1353 */ 1354 if( async.ioError!=SQLITE_OK && p->op!=ASYNC_CLOSE ){ 1355 p->op = ASYNC_NOOP; 1356 } 1357 if( p->pFileData ){ 1358 pBase = p->pFileData->pBaseWrite; 1359 if( 1360 p->op==ASYNC_CLOSE || 1361 p->op==ASYNC_OPENEXCLUSIVE || 1362 (pBase->pMethods && (p->op==ASYNC_SYNC || p->op==ASYNC_WRITE) ) 1363 ){ 1364 async_mutex_leave(ASYNC_MUTEX_QUEUE); 1365 holdingMutex = 0; 1366 } 1367 if( !pBase->pMethods ){ 1368 pBase = p->pFileData->pBaseRead; 1369 } 1370 } 1371 1372 switch( p->op ){ 1373 case ASYNC_NOOP: 1374 break; 1375 1376 case ASYNC_WRITE: 1377 assert( pBase ); 1378 ASYNC_TRACE(("WRITE %s %d bytes at %d\n", 1379 p->pFileData->zName, p->nByte, p->iOffset)); 1380 rc = pBase->pMethods->xWrite(pBase, (void *)(p->zBuf), p->nByte, p->iOffset); 1381 break; 1382 1383 case ASYNC_SYNC: 1384 assert( pBase ); 1385 ASYNC_TRACE(("SYNC %s\n", p->pFileData->zName)); 1386 rc = pBase->pMethods->xSync(pBase, p->nByte); 1387 break; 1388 1389 case ASYNC_TRUNCATE: 1390 assert( pBase ); 1391 ASYNC_TRACE(("TRUNCATE %s to %d bytes\n", 1392 p->pFileData->zName, p->iOffset)); 1393 rc = pBase->pMethods->xTruncate(pBase, p->iOffset); 1394 break; 1395 1396 case ASYNC_CLOSE: { 1397 AsyncFileData *pData = p->pFileData; 1398 ASYNC_TRACE(("CLOSE %s\n", p->pFileData->zName)); 1399 if( pData->pBaseWrite->pMethods ){ 1400 pData->pBaseWrite->pMethods->xClose(pData->pBaseWrite); 1401 } 1402 if( pData->pBaseRead->pMethods ){ 1403 pData->pBaseRead->pMethods->xClose(pData->pBaseRead); 1404 } 1405 1406 /* Unlink AsyncFileData.lock from the linked list of AsyncFileLock 1407 ** structures for this file. Obtain the async.lockMutex mutex 1408 ** before doing so. 1409 */ 1410 async_mutex_enter(ASYNC_MUTEX_LOCK); 1411 rc = unlinkAsyncFile(pData); 1412 async_mutex_leave(ASYNC_MUTEX_LOCK); 1413 1414 if( !holdingMutex ){ 1415 async_mutex_enter(ASYNC_MUTEX_QUEUE); 1416 holdingMutex = 1; 1417 } 1418 assert_mutex_is_held(ASYNC_MUTEX_QUEUE); 1419 async.pQueueFirst = p->pNext; 1420 sqlite3_free(pData); 1421 doNotFree = 1; 1422 break; 1423 } 1424 1425 case ASYNC_UNLOCK: { 1426 AsyncWrite *pIter; 1427 AsyncFileData *pData = p->pFileData; 1428 int eLock = p->nByte; 1429 1430 /* When a file is locked by SQLite using the async backend, it is 1431 ** locked within the 'real' file-system synchronously. When it is 1432 ** unlocked, an ASYNC_UNLOCK event is added to the write-queue to 1433 ** unlock the file asynchronously. The design of the async backend 1434 ** requires that the 'real' file-system file be locked from the 1435 ** time that SQLite first locks it (and probably reads from it) 1436 ** until all asynchronous write events that were scheduled before 1437 ** SQLite unlocked the file have been processed. 1438 ** 1439 ** This is more complex if SQLite locks and unlocks the file multiple 1440 ** times in quick succession. For example, if SQLite does: 1441 ** 1442 ** lock, write, unlock, lock, write, unlock 1443 ** 1444 ** Each "lock" operation locks the file immediately. Each "write" 1445 ** and "unlock" operation adds an event to the event queue. If the 1446 ** second "lock" operation is performed before the first "unlock" 1447 ** operation has been processed asynchronously, then the first 1448 ** "unlock" cannot be safely processed as is, since this would mean 1449 ** the file was unlocked when the second "write" operation is 1450 ** processed. To work around this, when processing an ASYNC_UNLOCK 1451 ** operation, SQLite: 1452 ** 1453 ** 1) Unlocks the file to the minimum of the argument passed to 1454 ** the xUnlock() call and the current lock from SQLite's point 1455 ** of view, and 1456 ** 1457 ** 2) Only unlocks the file at all if this event is the last 1458 ** ASYNC_UNLOCK event on this file in the write-queue. 1459 */ 1460 assert( holdingMutex==1 ); 1461 assert( async.pQueueFirst==p ); 1462 for(pIter=async.pQueueFirst->pNext; pIter; pIter=pIter->pNext){ 1463 if( pIter->pFileData==pData && pIter->op==ASYNC_UNLOCK ) break; 1464 } 1465 if( !pIter ){ 1466 async_mutex_enter(ASYNC_MUTEX_LOCK); 1467 pData->lock.eAsyncLock = MIN( 1468 pData->lock.eAsyncLock, MAX(pData->lock.eLock, eLock) 1469 ); 1470 assert(pData->lock.eAsyncLock>=pData->lock.eLock); 1471 rc = getFileLock(pData->pLock); 1472 async_mutex_leave(ASYNC_MUTEX_LOCK); 1473 } 1474 break; 1475 } 1476 1477 case ASYNC_DELETE: 1478 ASYNC_TRACE(("DELETE %s\n", p->zBuf)); 1479 rc = pVfs->xDelete(pVfs, p->zBuf, (int)p->iOffset); 1480 break; 1481 1482 case ASYNC_OPENEXCLUSIVE: { 1483 int flags = (int)p->iOffset; 1484 AsyncFileData *pData = p->pFileData; 1485 ASYNC_TRACE(("OPEN %s flags=%d\n", p->zBuf, (int)p->iOffset)); 1486 assert(pData->pBaseRead->pMethods==0 && pData->pBaseWrite->pMethods==0); 1487 rc = pVfs->xOpen(pVfs, pData->zName, pData->pBaseRead, flags, 0); 1488 assert( holdingMutex==0 ); 1489 async_mutex_enter(ASYNC_MUTEX_QUEUE); 1490 holdingMutex = 1; 1491 break; 1492 } 1493 1494 default: assert(!"Illegal value for AsyncWrite.op"); 1495 } 1496 1497 /* If we didn't hang on to the mutex during the IO op, obtain it now 1498 ** so that the AsyncWrite structure can be safely removed from the 1499 ** global write-op queue. 1500 */ 1501 if( !holdingMutex ){ 1502 async_mutex_enter(ASYNC_MUTEX_QUEUE); 1503 holdingMutex = 1; 1504 } 1505 /* ASYNC_TRACE(("UNLINK %p\n", p)); */ 1506 if( p==async.pQueueLast ){ 1507 async.pQueueLast = 0; 1508 } 1509 if( !doNotFree ){ 1510 assert_mutex_is_held(ASYNC_MUTEX_QUEUE); 1511 async.pQueueFirst = p->pNext; 1512 sqlite3_free(p); 1513 } 1514 assert( holdingMutex ); 1515 1516 /* An IO error has occurred. We cannot report the error back to the 1517 ** connection that requested the I/O since the error happened 1518 ** asynchronously. The connection has already moved on. There 1519 ** really is nobody to report the error to. 1520 ** 1521 ** The file for which the error occurred may have been a database or 1522 ** journal file. Regardless, none of the currently queued operations 1523 ** associated with the same database should now be performed. Nor should 1524 ** any subsequently requested IO on either a database or journal file 1525 ** handle for the same database be accepted until the main database 1526 ** file handle has been closed and reopened. 1527 ** 1528 ** Furthermore, no further IO should be queued or performed on any file 1529 ** handle associated with a database that may have been part of a 1530 ** multi-file transaction that included the database associated with 1531 ** the IO error (i.e. a database ATTACHed to the same handle at some 1532 ** point in time). 1533 */ 1534 if( rc!=SQLITE_OK ){ 1535 async.ioError = rc; 1536 } 1537 1538 if( async.ioError && !async.pQueueFirst ){ 1539 async_mutex_enter(ASYNC_MUTEX_LOCK); 1540 if( 0==async.pLock ){ 1541 async.ioError = SQLITE_OK; 1542 } 1543 async_mutex_leave(ASYNC_MUTEX_LOCK); 1544 } 1545 1546 /* Drop the queue mutex before continuing to the next write operation 1547 ** in order to give other threads a chance to work with the write queue. 1548 */ 1549 if( !async.pQueueFirst || !async.ioError ){ 1550 async_mutex_leave(ASYNC_MUTEX_QUEUE); 1551 holdingMutex = 0; 1552 if( async.ioDelay>0 ){ 1553 pVfs->xSleep(pVfs, async.ioDelay*1000); 1554 }else{ 1555 async_sched_yield(); 1556 } 1557 } 1558 } 1559 1560 async_mutex_leave(ASYNC_MUTEX_WRITER); 1561 return; 1562 } 1563 1564 /* 1565 ** Install the asynchronous VFS. 1566 */ 1567 int sqlite3async_initialize(const char *zParent, int isDefault){ 1568 int rc = SQLITE_OK; 1569 if( async_vfs.pAppData==0 ){ 1570 sqlite3_vfs *pParent = sqlite3_vfs_find(zParent); 1571 if( !pParent || async_os_initialize() ){ 1572 rc = SQLITE_ERROR; 1573 }else if( SQLITE_OK!=(rc = sqlite3_vfs_register(&async_vfs, isDefault)) ){ 1574 async_os_shutdown(); 1575 }else{ 1576 async_vfs.pAppData = (void *)pParent; 1577 async_vfs.mxPathname = ((sqlite3_vfs *)async_vfs.pAppData)->mxPathname; 1578 } 1579 } 1580 return rc; 1581 } 1582 1583 /* 1584 ** Uninstall the asynchronous VFS. 1585 */ 1586 void sqlite3async_shutdown(void){ 1587 if( async_vfs.pAppData ){ 1588 async_os_shutdown(); 1589 sqlite3_vfs_unregister((sqlite3_vfs *)&async_vfs); 1590 async_vfs.pAppData = 0; 1591 } 1592 } 1593 1594 /* 1595 ** Process events on the write-queue. 1596 */ 1597 void sqlite3async_run(void){ 1598 asyncWriterThread(); 1599 } 1600 1601 /* 1602 ** Control/configure the asynchronous IO system. 1603 */ 1604 int sqlite3async_control(int op, ...){ 1605 va_list ap; 1606 va_start(ap, op); 1607 switch( op ){ 1608 case SQLITEASYNC_HALT: { 1609 int eWhen = va_arg(ap, int); 1610 if( eWhen!=SQLITEASYNC_HALT_NEVER 1611 && eWhen!=SQLITEASYNC_HALT_NOW 1612 && eWhen!=SQLITEASYNC_HALT_IDLE 1613 ){ 1614 return SQLITE_MISUSE; 1615 } 1616 async.eHalt = eWhen; 1617 async_mutex_enter(ASYNC_MUTEX_QUEUE); 1618 async_cond_signal(ASYNC_COND_QUEUE); 1619 async_mutex_leave(ASYNC_MUTEX_QUEUE); 1620 break; 1621 } 1622 1623 case SQLITEASYNC_DELAY: { 1624 int iDelay = va_arg(ap, int); 1625 if( iDelay<0 ){ 1626 return SQLITE_MISUSE; 1627 } 1628 async.ioDelay = iDelay; 1629 break; 1630 } 1631 1632 case SQLITEASYNC_LOCKFILES: { 1633 int bLock = va_arg(ap, int); 1634 async_mutex_enter(ASYNC_MUTEX_QUEUE); 1635 if( async.nFile || async.pQueueFirst ){ 1636 async_mutex_leave(ASYNC_MUTEX_QUEUE); 1637 return SQLITE_MISUSE; 1638 } 1639 async.bLockFiles = bLock; 1640 async_mutex_leave(ASYNC_MUTEX_QUEUE); 1641 break; 1642 } 1643 1644 case SQLITEASYNC_GET_HALT: { 1645 int *peWhen = va_arg(ap, int *); 1646 *peWhen = async.eHalt; 1647 break; 1648 } 1649 case SQLITEASYNC_GET_DELAY: { 1650 int *piDelay = va_arg(ap, int *); 1651 *piDelay = async.ioDelay; 1652 break; 1653 } 1654 case SQLITEASYNC_GET_LOCKFILES: { 1655 int *piDelay = va_arg(ap, int *); 1656 *piDelay = async.bLockFiles; 1657 break; 1658 } 1659 1660 default: 1661 return SQLITE_ERROR; 1662 } 1663 return SQLITE_OK; 1664 } 1665 1666 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ASYNCIO) */ 1667 1668