1 /* 2 ** 2005 December 14 3 ** 4 ** The author disclaims copyright to this source code. In place of 5 ** a legal notice, here is a blessing: 6 ** 7 ** May you do good and not evil. 8 ** May you find forgiveness for yourself and forgive others. 9 ** May you share freely, never taking more than you give. 10 ** 11 ************************************************************************* 12 ** 13 ** $Id: test_async.c,v 1.46 2008/09/15 14:08:04 danielk1977 Exp $ 14 ** 15 ** This file contains an example implementation of an asynchronous IO 16 ** backend for SQLite. 17 ** 18 ** WHAT IS ASYNCHRONOUS I/O? 19 ** 20 ** With asynchronous I/O, write requests are handled by a separate thread 21 ** running in the background. This means that the thread that initiates 22 ** a database write does not have to wait for (sometimes slow) disk I/O 23 ** to occur. The write seems to happen very quickly, though in reality 24 ** it is happening at its usual slow pace in the background. 25 ** 26 ** Asynchronous I/O appears to give better responsiveness, but at a price. 27 ** You lose the Durable property. With the default I/O backend of SQLite, 28 ** once a write completes, you know that the information you wrote is 29 ** safely on disk. With the asynchronous I/O, this is not the case. If 30 ** your program crashes or if a power lose occurs after the database 31 ** write but before the asynchronous write thread has completed, then the 32 ** database change might never make it to disk and the next user of the 33 ** database might not see your change. 34 ** 35 ** You lose Durability with asynchronous I/O, but you still retain the 36 ** other parts of ACID: Atomic, Consistent, and Isolated. Many 37 ** appliations get along fine without the Durablity. 38 ** 39 ** HOW IT WORKS 40 ** 41 ** Asynchronous I/O works by creating a special SQLite "vfs" structure 42 ** and registering it with sqlite3_vfs_register(). When files opened via 43 ** this vfs are written to (using sqlite3OsWrite()), the data is not 44 ** written directly to disk, but is placed in the "write-queue" to be 45 ** handled by the background thread. 46 ** 47 ** When files opened with the asynchronous vfs are read from 48 ** (using sqlite3OsRead()), the data is read from the file on 49 ** disk and the write-queue, so that from the point of view of 50 ** the vfs reader the OsWrite() appears to have already completed. 51 ** 52 ** The special vfs is registered (and unregistered) by calls to 53 ** function asyncEnable() (see below). 54 ** 55 ** LIMITATIONS 56 ** 57 ** This demonstration code is deliberately kept simple in order to keep 58 ** the main ideas clear and easy to understand. Real applications that 59 ** want to do asynchronous I/O might want to add additional capabilities. 60 ** For example, in this demonstration if writes are happening at a steady 61 ** stream that exceeds the I/O capability of the background writer thread, 62 ** the queue of pending write operations will grow without bound until we 63 ** run out of memory. Users of this technique may want to keep track of 64 ** the quantity of pending writes and stop accepting new write requests 65 ** when the buffer gets to be too big. 66 ** 67 ** LOCKING + CONCURRENCY 68 ** 69 ** Multiple connections from within a single process that use this 70 ** implementation of asynchronous IO may access a single database 71 ** file concurrently. From the point of view of the user, if all 72 ** connections are from within a single process, there is no difference 73 ** between the concurrency offered by "normal" SQLite and SQLite 74 ** using the asynchronous backend. 75 ** 76 ** If connections from within multiple database files may access the 77 ** database file, the ENABLE_FILE_LOCKING symbol (see below) must be 78 ** defined. If it is not defined, then no locks are established on 79 ** the database file. In this case, if multiple processes access 80 ** the database file, corruption will quickly result. 81 ** 82 ** If ENABLE_FILE_LOCKING is defined (the default), then connections 83 ** from within multiple processes may access a single database file 84 ** without risking corruption. However concurrency is reduced as 85 ** follows: 86 ** 87 ** * When a connection using asynchronous IO begins a database 88 ** transaction, the database is locked immediately. However the 89 ** lock is not released until after all relevant operations 90 ** in the write-queue have been flushed to disk. This means 91 ** (for example) that the database may remain locked for some 92 ** time after a "COMMIT" or "ROLLBACK" is issued. 93 ** 94 ** * If an application using asynchronous IO executes transactions 95 ** in quick succession, other database users may be effectively 96 ** locked out of the database. This is because when a BEGIN 97 ** is executed, a database lock is established immediately. But 98 ** when the corresponding COMMIT or ROLLBACK occurs, the lock 99 ** is not released until the relevant part of the write-queue 100 ** has been flushed through. As a result, if a COMMIT is followed 101 ** by a BEGIN before the write-queue is flushed through, the database 102 ** is never unlocked,preventing other processes from accessing 103 ** the database. 104 ** 105 ** Defining ENABLE_FILE_LOCKING when using an NFS or other remote 106 ** file-system may slow things down, as synchronous round-trips to the 107 ** server may be required to establish database file locks. 108 */ 109 #define ENABLE_FILE_LOCKING 110 111 #ifndef SQLITE_AMALGAMATION 112 # include "sqlite3.h" 113 # include <assert.h> 114 # include <string.h> 115 #endif 116 #include <tcl.h> 117 118 /* 119 ** This test uses pthreads and hence only works on unix and with 120 ** a threadsafe build of SQLite. 121 */ 122 #if SQLITE_OS_UNIX && SQLITE_THREADSAFE 123 124 /* 125 ** This demo uses pthreads. If you do not have a pthreads implementation 126 ** for your operating system, you will need to recode the threading 127 ** logic. 128 */ 129 #include <pthread.h> 130 #include <sched.h> 131 132 /* Useful macros used in several places */ 133 #define MIN(x,y) ((x)<(y)?(x):(y)) 134 #define MAX(x,y) ((x)>(y)?(x):(y)) 135 136 /* Forward references */ 137 typedef struct AsyncWrite AsyncWrite; 138 typedef struct AsyncFile AsyncFile; 139 typedef struct AsyncFileData AsyncFileData; 140 typedef struct AsyncFileLock AsyncFileLock; 141 typedef struct AsyncLock AsyncLock; 142 143 /* Enable for debugging */ 144 static int sqlite3async_trace = 0; 145 # define ASYNC_TRACE(X) if( sqlite3async_trace ) asyncTrace X 146 static void asyncTrace(const char *zFormat, ...){ 147 char *z; 148 va_list ap; 149 va_start(ap, zFormat); 150 z = sqlite3_vmprintf(zFormat, ap); 151 va_end(ap); 152 fprintf(stderr, "[%d] %s", (int)pthread_self(), z); 153 sqlite3_free(z); 154 } 155 156 /* 157 ** THREAD SAFETY NOTES 158 ** 159 ** Basic rules: 160 ** 161 ** * Both read and write access to the global write-op queue must be 162 ** protected by the async.queueMutex. As are the async.ioError and 163 ** async.nFile variables. 164 ** 165 ** * The async.aLock hash-table and all AsyncLock and AsyncFileLock 166 ** structures must be protected by the async.lockMutex mutex. 167 ** 168 ** * The file handles from the underlying system are assumed not to 169 ** be thread safe. 170 ** 171 ** * See the last two paragraphs under "The Writer Thread" for 172 ** an assumption to do with file-handle synchronization by the Os. 173 ** 174 ** Deadlock prevention: 175 ** 176 ** There are three mutex used by the system: the "writer" mutex, 177 ** the "queue" mutex and the "lock" mutex. Rules are: 178 ** 179 ** * It is illegal to block on the writer mutex when any other mutex 180 ** are held, and 181 ** 182 ** * It is illegal to block on the queue mutex when the lock mutex 183 ** is held. 184 ** 185 ** i.e. mutex's must be grabbed in the order "writer", "queue", "lock". 186 ** 187 ** File system operations (invoked by SQLite thread): 188 ** 189 ** xOpen 190 ** xDelete 191 ** xFileExists 192 ** 193 ** File handle operations (invoked by SQLite thread): 194 ** 195 ** asyncWrite, asyncClose, asyncTruncate, asyncSync 196 ** 197 ** The operations above add an entry to the global write-op list. They 198 ** prepare the entry, acquire the async.queueMutex momentarily while 199 ** list pointers are manipulated to insert the new entry, then release 200 ** the mutex and signal the writer thread to wake up in case it happens 201 ** to be asleep. 202 ** 203 ** 204 ** asyncRead, asyncFileSize. 205 ** 206 ** Read operations. Both of these read from both the underlying file 207 ** first then adjust their result based on pending writes in the 208 ** write-op queue. So async.queueMutex is held for the duration 209 ** of these operations to prevent other threads from changing the 210 ** queue in mid operation. 211 ** 212 ** 213 ** asyncLock, asyncUnlock, asyncCheckReservedLock 214 ** 215 ** These primitives implement in-process locking using a hash table 216 ** on the file name. Files are locked correctly for connections coming 217 ** from the same process. But other processes cannot see these locks 218 ** and will therefore not honor them. 219 ** 220 ** 221 ** The writer thread: 222 ** 223 ** The async.writerMutex is used to make sure only there is only 224 ** a single writer thread running at a time. 225 ** 226 ** Inside the writer thread is a loop that works like this: 227 ** 228 ** WHILE (write-op list is not empty) 229 ** Do IO operation at head of write-op list 230 ** Remove entry from head of write-op list 231 ** END WHILE 232 ** 233 ** The async.queueMutex is always held during the <write-op list is 234 ** not empty> test, and when the entry is removed from the head 235 ** of the write-op list. Sometimes it is held for the interim 236 ** period (while the IO is performed), and sometimes it is 237 ** relinquished. It is relinquished if (a) the IO op is an 238 ** ASYNC_CLOSE or (b) when the file handle was opened, two of 239 ** the underlying systems handles were opened on the same 240 ** file-system entry. 241 ** 242 ** If condition (b) above is true, then one file-handle 243 ** (AsyncFile.pBaseRead) is used exclusively by sqlite threads to read the 244 ** file, the other (AsyncFile.pBaseWrite) by sqlite3_async_flush() 245 ** threads to perform write() operations. This means that read 246 ** operations are not blocked by asynchronous writes (although 247 ** asynchronous writes may still be blocked by reads). 248 ** 249 ** This assumes that the OS keeps two handles open on the same file 250 ** properly in sync. That is, any read operation that starts after a 251 ** write operation on the same file system entry has completed returns 252 ** data consistent with the write. We also assume that if one thread 253 ** reads a file while another is writing it all bytes other than the 254 ** ones actually being written contain valid data. 255 ** 256 ** If the above assumptions are not true, set the preprocessor symbol 257 ** SQLITE_ASYNC_TWO_FILEHANDLES to 0. 258 */ 259 260 #ifndef SQLITE_ASYNC_TWO_FILEHANDLES 261 /* #define SQLITE_ASYNC_TWO_FILEHANDLES 0 */ 262 #define SQLITE_ASYNC_TWO_FILEHANDLES 1 263 #endif 264 265 /* 266 ** State information is held in the static variable "async" defined 267 ** as the following structure. 268 ** 269 ** Both async.ioError and async.nFile are protected by async.queueMutex. 270 */ 271 static struct TestAsyncStaticData { 272 pthread_mutex_t lockMutex; /* For access to aLock hash table */ 273 pthread_mutex_t queueMutex; /* Mutex for access to write operation queue */ 274 pthread_mutex_t writerMutex; /* Prevents multiple writer threads */ 275 pthread_cond_t queueSignal; /* For waking up sleeping writer thread */ 276 pthread_cond_t emptySignal; /* Notify when the write queue is empty */ 277 AsyncWrite *pQueueFirst; /* Next write operation to be processed */ 278 AsyncWrite *pQueueLast; /* Last write operation on the list */ 279 AsyncLock *pLock; /* Linked list of all AsyncLock structures */ 280 volatile int ioDelay; /* Extra delay between write operations */ 281 volatile int writerHaltWhenIdle; /* Writer thread halts when queue empty */ 282 volatile int writerHaltNow; /* Writer thread halts after next op */ 283 int ioError; /* True if an IO error has occured */ 284 int nFile; /* Number of open files (from sqlite pov) */ 285 } async = { 286 PTHREAD_MUTEX_INITIALIZER, 287 PTHREAD_MUTEX_INITIALIZER, 288 PTHREAD_MUTEX_INITIALIZER, 289 PTHREAD_COND_INITIALIZER, 290 PTHREAD_COND_INITIALIZER, 291 }; 292 293 /* Possible values of AsyncWrite.op */ 294 #define ASYNC_NOOP 0 295 #define ASYNC_WRITE 1 296 #define ASYNC_SYNC 2 297 #define ASYNC_TRUNCATE 3 298 #define ASYNC_CLOSE 4 299 #define ASYNC_DELETE 5 300 #define ASYNC_OPENEXCLUSIVE 6 301 #define ASYNC_UNLOCK 7 302 303 /* Names of opcodes. Used for debugging only. 304 ** Make sure these stay in sync with the macros above! 305 */ 306 static const char *azOpcodeName[] = { 307 "NOOP", "WRITE", "SYNC", "TRUNCATE", "CLOSE", "DELETE", "OPENEX", "UNLOCK" 308 }; 309 310 /* 311 ** Entries on the write-op queue are instances of the AsyncWrite 312 ** structure, defined here. 313 ** 314 ** The interpretation of the iOffset and nByte variables varies depending 315 ** on the value of AsyncWrite.op: 316 ** 317 ** ASYNC_NOOP: 318 ** No values used. 319 ** 320 ** ASYNC_WRITE: 321 ** iOffset -> Offset in file to write to. 322 ** nByte -> Number of bytes of data to write (pointed to by zBuf). 323 ** 324 ** ASYNC_SYNC: 325 ** nByte -> flags to pass to sqlite3OsSync(). 326 ** 327 ** ASYNC_TRUNCATE: 328 ** iOffset -> Size to truncate file to. 329 ** nByte -> Unused. 330 ** 331 ** ASYNC_CLOSE: 332 ** iOffset -> Unused. 333 ** nByte -> Unused. 334 ** 335 ** ASYNC_DELETE: 336 ** iOffset -> Contains the "syncDir" flag. 337 ** nByte -> Number of bytes of zBuf points to (file name). 338 ** 339 ** ASYNC_OPENEXCLUSIVE: 340 ** iOffset -> Value of "delflag". 341 ** nByte -> Number of bytes of zBuf points to (file name). 342 ** 343 ** ASYNC_UNLOCK: 344 ** nByte -> Argument to sqlite3OsUnlock(). 345 ** 346 ** 347 ** For an ASYNC_WRITE operation, zBuf points to the data to write to the file. 348 ** This space is sqlite3_malloc()d along with the AsyncWrite structure in a 349 ** single blob, so is deleted when sqlite3_free() is called on the parent 350 ** structure. 351 */ 352 struct AsyncWrite { 353 AsyncFileData *pFileData; /* File to write data to or sync */ 354 int op; /* One of ASYNC_xxx etc. */ 355 sqlite_int64 iOffset; /* See above */ 356 int nByte; /* See above */ 357 char *zBuf; /* Data to write to file (or NULL if op!=ASYNC_WRITE) */ 358 AsyncWrite *pNext; /* Next write operation (to any file) */ 359 }; 360 361 /* 362 ** An instance of this structure is created for each distinct open file 363 ** (i.e. if two handles are opened on the one file, only one of these 364 ** structures is allocated) and stored in the async.aLock hash table. The 365 ** keys for async.aLock are the full pathnames of the opened files. 366 ** 367 ** AsyncLock.pList points to the head of a linked list of AsyncFileLock 368 ** structures, one for each handle currently open on the file. 369 ** 370 ** If the opened file is not a main-database (the SQLITE_OPEN_MAIN_DB is 371 ** not passed to the sqlite3OsOpen() call), or if ENABLE_FILE_LOCKING is 372 ** not defined at compile time, variables AsyncLock.pFile and 373 ** AsyncLock.eLock are never used. Otherwise, pFile is a file handle 374 ** opened on the file in question and used to obtain the file-system 375 ** locks required by database connections within this process. 376 ** 377 ** See comments above the asyncLock() function for more details on 378 ** the implementation of database locking used by this backend. 379 */ 380 struct AsyncLock { 381 char *zFile; 382 int nFile; 383 sqlite3_file *pFile; 384 int eLock; 385 AsyncFileLock *pList; 386 AsyncLock *pNext; /* Next in linked list headed by async.pLock */ 387 }; 388 389 /* 390 ** An instance of the following structure is allocated along with each 391 ** AsyncFileData structure (see AsyncFileData.lock), but is only used if the 392 ** file was opened with the SQLITE_OPEN_MAIN_DB. 393 */ 394 struct AsyncFileLock { 395 int eLock; /* Internally visible lock state (sqlite pov) */ 396 int eAsyncLock; /* Lock-state with write-queue unlock */ 397 AsyncFileLock *pNext; 398 }; 399 400 /* 401 ** The AsyncFile structure is a subclass of sqlite3_file used for 402 ** asynchronous IO. 403 ** 404 ** All of the actual data for the structure is stored in the structure 405 ** pointed to by AsyncFile.pData, which is allocated as part of the 406 ** sqlite3OsOpen() using sqlite3_malloc(). The reason for this is that the 407 ** lifetime of the AsyncFile structure is ended by the caller after OsClose() 408 ** is called, but the data in AsyncFileData may be required by the 409 ** writer thread after that point. 410 */ 411 struct AsyncFile { 412 sqlite3_io_methods *pMethod; 413 AsyncFileData *pData; 414 }; 415 struct AsyncFileData { 416 char *zName; /* Underlying OS filename - used for debugging */ 417 int nName; /* Number of characters in zName */ 418 sqlite3_file *pBaseRead; /* Read handle to the underlying Os file */ 419 sqlite3_file *pBaseWrite; /* Write handle to the underlying Os file */ 420 AsyncFileLock lock; /* Lock state for this handle */ 421 AsyncLock *pLock; /* AsyncLock object for this file system entry */ 422 AsyncWrite close; 423 }; 424 425 /* 426 ** The following async_XXX functions are debugging wrappers around the 427 ** corresponding pthread_XXX functions: 428 ** 429 ** pthread_mutex_lock(); 430 ** pthread_mutex_unlock(); 431 ** pthread_mutex_trylock(); 432 ** pthread_cond_wait(); 433 ** 434 ** It is illegal to pass any mutex other than those stored in the 435 ** following global variables of these functions. 436 ** 437 ** async.queueMutex 438 ** async.writerMutex 439 ** async.lockMutex 440 ** 441 ** If NDEBUG is defined, these wrappers do nothing except call the 442 ** corresponding pthreads function. If NDEBUG is not defined, then the 443 ** following variables are used to store the thread-id (as returned 444 ** by pthread_self()) currently holding the mutex, or 0 otherwise: 445 ** 446 ** asyncdebug.queueMutexHolder 447 ** asyncdebug.writerMutexHolder 448 ** asyncdebug.lockMutexHolder 449 ** 450 ** These variables are used by some assert() statements that verify 451 ** the statements made in the "Deadlock Prevention" notes earlier 452 ** in this file. 453 */ 454 #ifndef NDEBUG 455 456 static struct TestAsyncDebugData { 457 pthread_t lockMutexHolder; 458 pthread_t queueMutexHolder; 459 pthread_t writerMutexHolder; 460 } asyncdebug = {0, 0, 0}; 461 462 /* 463 ** Wrapper around pthread_mutex_lock(). Checks that we have not violated 464 ** the anti-deadlock rules (see "Deadlock prevention" above). 465 */ 466 static int async_mutex_lock(pthread_mutex_t *pMutex){ 467 int iIdx; 468 int rc; 469 pthread_mutex_t *aMutex = (pthread_mutex_t *)(&async); 470 pthread_t *aHolder = (pthread_t *)(&asyncdebug); 471 472 /* The code in this 'ifndef NDEBUG' block depends on a certain alignment 473 * of the variables in TestAsyncStaticData and TestAsyncDebugData. The 474 * following assert() statements check that this has not been changed. 475 * 476 * Really, these only need to be run once at startup time. 477 */ 478 assert(&(aMutex[0])==&async.lockMutex); 479 assert(&(aMutex[1])==&async.queueMutex); 480 assert(&(aMutex[2])==&async.writerMutex); 481 assert(&(aHolder[0])==&asyncdebug.lockMutexHolder); 482 assert(&(aHolder[1])==&asyncdebug.queueMutexHolder); 483 assert(&(aHolder[2])==&asyncdebug.writerMutexHolder); 484 485 assert( pthread_self()!=0 ); 486 487 for(iIdx=0; iIdx<3; iIdx++){ 488 if( pMutex==&aMutex[iIdx] ) break; 489 490 /* This is the key assert(). Here we are checking that if the caller 491 * is trying to block on async.writerMutex, neither of the other two 492 * mutex are held. If the caller is trying to block on async.queueMutex, 493 * lockMutex is not held. 494 */ 495 assert(!pthread_equal(aHolder[iIdx], pthread_self())); 496 } 497 assert(iIdx<3); 498 499 rc = pthread_mutex_lock(pMutex); 500 if( rc==0 ){ 501 assert(aHolder[iIdx]==0); 502 aHolder[iIdx] = pthread_self(); 503 } 504 return rc; 505 } 506 507 /* 508 ** Wrapper around pthread_mutex_unlock(). 509 */ 510 static int async_mutex_unlock(pthread_mutex_t *pMutex){ 511 int iIdx; 512 int rc; 513 pthread_mutex_t *aMutex = (pthread_mutex_t *)(&async); 514 pthread_t *aHolder = (pthread_t *)(&asyncdebug); 515 516 for(iIdx=0; iIdx<3; iIdx++){ 517 if( pMutex==&aMutex[iIdx] ) break; 518 } 519 assert(iIdx<3); 520 521 assert(pthread_equal(aHolder[iIdx], pthread_self())); 522 aHolder[iIdx] = 0; 523 rc = pthread_mutex_unlock(pMutex); 524 assert(rc==0); 525 526 return 0; 527 } 528 529 /* 530 ** Wrapper around pthread_mutex_trylock(). 531 */ 532 static int async_mutex_trylock(pthread_mutex_t *pMutex){ 533 int iIdx; 534 int rc; 535 pthread_mutex_t *aMutex = (pthread_mutex_t *)(&async); 536 pthread_t *aHolder = (pthread_t *)(&asyncdebug); 537 538 for(iIdx=0; iIdx<3; iIdx++){ 539 if( pMutex==&aMutex[iIdx] ) break; 540 } 541 assert(iIdx<3); 542 543 rc = pthread_mutex_trylock(pMutex); 544 if( rc==0 ){ 545 assert(aHolder[iIdx]==0); 546 aHolder[iIdx] = pthread_self(); 547 } 548 return rc; 549 } 550 551 /* 552 ** Wrapper around pthread_cond_wait(). 553 */ 554 static int async_cond_wait(pthread_cond_t *pCond, pthread_mutex_t *pMutex){ 555 int iIdx; 556 int rc; 557 pthread_mutex_t *aMutex = (pthread_mutex_t *)(&async); 558 pthread_t *aHolder = (pthread_t *)(&asyncdebug); 559 560 for(iIdx=0; iIdx<3; iIdx++){ 561 if( pMutex==&aMutex[iIdx] ) break; 562 } 563 assert(iIdx<3); 564 565 assert(pthread_equal(aHolder[iIdx],pthread_self())); 566 aHolder[iIdx] = 0; 567 rc = pthread_cond_wait(pCond, pMutex); 568 if( rc==0 ){ 569 aHolder[iIdx] = pthread_self(); 570 } 571 return rc; 572 } 573 574 /* Call our async_XX wrappers instead of selected pthread_XX functions */ 575 #define pthread_mutex_lock async_mutex_lock 576 #define pthread_mutex_unlock async_mutex_unlock 577 #define pthread_mutex_trylock async_mutex_trylock 578 #define pthread_cond_wait async_cond_wait 579 580 #endif /* !defined(NDEBUG) */ 581 582 /* 583 ** Add an entry to the end of the global write-op list. pWrite should point 584 ** to an AsyncWrite structure allocated using sqlite3_malloc(). The writer 585 ** thread will call sqlite3_free() to free the structure after the specified 586 ** operation has been completed. 587 ** 588 ** Once an AsyncWrite structure has been added to the list, it becomes the 589 ** property of the writer thread and must not be read or modified by the 590 ** caller. 591 */ 592 static void addAsyncWrite(AsyncWrite *pWrite){ 593 /* We must hold the queue mutex in order to modify the queue pointers */ 594 pthread_mutex_lock(&async.queueMutex); 595 596 /* Add the record to the end of the write-op queue */ 597 assert( !pWrite->pNext ); 598 if( async.pQueueLast ){ 599 assert( async.pQueueFirst ); 600 async.pQueueLast->pNext = pWrite; 601 }else{ 602 async.pQueueFirst = pWrite; 603 } 604 async.pQueueLast = pWrite; 605 ASYNC_TRACE(("PUSH %p (%s %s %d)\n", pWrite, azOpcodeName[pWrite->op], 606 pWrite->pFileData ? pWrite->pFileData->zName : "-", pWrite->iOffset)); 607 608 if( pWrite->op==ASYNC_CLOSE ){ 609 async.nFile--; 610 } 611 612 /* Drop the queue mutex */ 613 pthread_mutex_unlock(&async.queueMutex); 614 615 /* The writer thread might have been idle because there was nothing 616 ** on the write-op queue for it to do. So wake it up. */ 617 pthread_cond_signal(&async.queueSignal); 618 } 619 620 /* 621 ** Increment async.nFile in a thread-safe manner. 622 */ 623 static void incrOpenFileCount(){ 624 /* We must hold the queue mutex in order to modify async.nFile */ 625 pthread_mutex_lock(&async.queueMutex); 626 if( async.nFile==0 ){ 627 async.ioError = SQLITE_OK; 628 } 629 async.nFile++; 630 pthread_mutex_unlock(&async.queueMutex); 631 } 632 633 /* 634 ** This is a utility function to allocate and populate a new AsyncWrite 635 ** structure and insert it (via addAsyncWrite() ) into the global list. 636 */ 637 static int addNewAsyncWrite( 638 AsyncFileData *pFileData, 639 int op, 640 sqlite3_int64 iOffset, 641 int nByte, 642 const char *zByte 643 ){ 644 AsyncWrite *p; 645 if( op!=ASYNC_CLOSE && async.ioError ){ 646 return async.ioError; 647 } 648 p = sqlite3_malloc(sizeof(AsyncWrite) + (zByte?nByte:0)); 649 if( !p ){ 650 /* The upper layer does not expect operations like OsWrite() to 651 ** return SQLITE_NOMEM. This is partly because under normal conditions 652 ** SQLite is required to do rollback without calling malloc(). So 653 ** if malloc() fails here, treat it as an I/O error. The above 654 ** layer knows how to handle that. 655 */ 656 return SQLITE_IOERR; 657 } 658 p->op = op; 659 p->iOffset = iOffset; 660 p->nByte = nByte; 661 p->pFileData = pFileData; 662 p->pNext = 0; 663 if( zByte ){ 664 p->zBuf = (char *)&p[1]; 665 memcpy(p->zBuf, zByte, nByte); 666 }else{ 667 p->zBuf = 0; 668 } 669 addAsyncWrite(p); 670 return SQLITE_OK; 671 } 672 673 /* 674 ** Close the file. This just adds an entry to the write-op list, the file is 675 ** not actually closed. 676 */ 677 static int asyncClose(sqlite3_file *pFile){ 678 AsyncFileData *p = ((AsyncFile *)pFile)->pData; 679 680 /* Unlock the file, if it is locked */ 681 pthread_mutex_lock(&async.lockMutex); 682 p->lock.eLock = 0; 683 pthread_mutex_unlock(&async.lockMutex); 684 685 addAsyncWrite(&p->close); 686 return SQLITE_OK; 687 } 688 689 /* 690 ** Implementation of sqlite3OsWrite() for asynchronous files. Instead of 691 ** writing to the underlying file, this function adds an entry to the end of 692 ** the global AsyncWrite list. Either SQLITE_OK or SQLITE_NOMEM may be 693 ** returned. 694 */ 695 static int asyncWrite( 696 sqlite3_file *pFile, 697 const void *pBuf, 698 int amt, 699 sqlite3_int64 iOff 700 ){ 701 AsyncFileData *p = ((AsyncFile *)pFile)->pData; 702 return addNewAsyncWrite(p, ASYNC_WRITE, iOff, amt, pBuf); 703 } 704 705 /* 706 ** Read data from the file. First we read from the filesystem, then adjust 707 ** the contents of the buffer based on ASYNC_WRITE operations in the 708 ** write-op queue. 709 ** 710 ** This method holds the mutex from start to finish. 711 */ 712 static int asyncRead( 713 sqlite3_file *pFile, 714 void *zOut, 715 int iAmt, 716 sqlite3_int64 iOffset 717 ){ 718 AsyncFileData *p = ((AsyncFile *)pFile)->pData; 719 int rc = SQLITE_OK; 720 sqlite3_int64 filesize; 721 int nRead; 722 sqlite3_file *pBase = p->pBaseRead; 723 724 /* Grab the write queue mutex for the duration of the call */ 725 pthread_mutex_lock(&async.queueMutex); 726 727 /* If an I/O error has previously occurred in this virtual file 728 ** system, then all subsequent operations fail. 729 */ 730 if( async.ioError!=SQLITE_OK ){ 731 rc = async.ioError; 732 goto asyncread_out; 733 } 734 735 if( pBase->pMethods ){ 736 rc = pBase->pMethods->xFileSize(pBase, &filesize); 737 if( rc!=SQLITE_OK ){ 738 goto asyncread_out; 739 } 740 nRead = MIN(filesize - iOffset, iAmt); 741 if( nRead>0 ){ 742 rc = pBase->pMethods->xRead(pBase, zOut, nRead, iOffset); 743 ASYNC_TRACE(("READ %s %d bytes at %d\n", p->zName, nRead, iOffset)); 744 } 745 } 746 747 if( rc==SQLITE_OK ){ 748 AsyncWrite *pWrite; 749 char *zName = p->zName; 750 751 for(pWrite=async.pQueueFirst; pWrite; pWrite = pWrite->pNext){ 752 if( pWrite->op==ASYNC_WRITE && ( 753 (pWrite->pFileData==p) || 754 (zName && pWrite->pFileData->zName==zName) 755 )){ 756 int iBeginOut = (pWrite->iOffset-iOffset); 757 int iBeginIn = -iBeginOut; 758 int nCopy; 759 760 if( iBeginIn<0 ) iBeginIn = 0; 761 if( iBeginOut<0 ) iBeginOut = 0; 762 nCopy = MIN(pWrite->nByte-iBeginIn, iAmt-iBeginOut); 763 764 if( nCopy>0 ){ 765 memcpy(&((char *)zOut)[iBeginOut], &pWrite->zBuf[iBeginIn], nCopy); 766 ASYNC_TRACE(("OVERREAD %d bytes at %d\n", nCopy, iBeginOut+iOffset)); 767 } 768 } 769 } 770 } 771 772 asyncread_out: 773 pthread_mutex_unlock(&async.queueMutex); 774 return rc; 775 } 776 777 /* 778 ** Truncate the file to nByte bytes in length. This just adds an entry to 779 ** the write-op list, no IO actually takes place. 780 */ 781 static int asyncTruncate(sqlite3_file *pFile, sqlite3_int64 nByte){ 782 AsyncFileData *p = ((AsyncFile *)pFile)->pData; 783 return addNewAsyncWrite(p, ASYNC_TRUNCATE, nByte, 0, 0); 784 } 785 786 /* 787 ** Sync the file. This just adds an entry to the write-op list, the 788 ** sync() is done later by sqlite3_async_flush(). 789 */ 790 static int asyncSync(sqlite3_file *pFile, int flags){ 791 AsyncFileData *p = ((AsyncFile *)pFile)->pData; 792 return addNewAsyncWrite(p, ASYNC_SYNC, 0, flags, 0); 793 } 794 795 /* 796 ** Read the size of the file. First we read the size of the file system 797 ** entry, then adjust for any ASYNC_WRITE or ASYNC_TRUNCATE operations 798 ** currently in the write-op list. 799 ** 800 ** This method holds the mutex from start to finish. 801 */ 802 int asyncFileSize(sqlite3_file *pFile, sqlite3_int64 *piSize){ 803 AsyncFileData *p = ((AsyncFile *)pFile)->pData; 804 int rc = SQLITE_OK; 805 sqlite3_int64 s = 0; 806 sqlite3_file *pBase; 807 808 pthread_mutex_lock(&async.queueMutex); 809 810 /* Read the filesystem size from the base file. If pBaseRead is NULL, this 811 ** means the file hasn't been opened yet. In this case all relevant data 812 ** must be in the write-op queue anyway, so we can omit reading from the 813 ** file-system. 814 */ 815 pBase = p->pBaseRead; 816 if( pBase->pMethods ){ 817 rc = pBase->pMethods->xFileSize(pBase, &s); 818 } 819 820 if( rc==SQLITE_OK ){ 821 AsyncWrite *pWrite; 822 for(pWrite=async.pQueueFirst; pWrite; pWrite = pWrite->pNext){ 823 if( pWrite->op==ASYNC_DELETE 824 && p->zName 825 && strcmp(p->zName, pWrite->zBuf)==0 826 ){ 827 s = 0; 828 }else if( pWrite->pFileData && ( 829 (pWrite->pFileData==p) 830 || (p->zName && pWrite->pFileData->zName==p->zName) 831 )){ 832 switch( pWrite->op ){ 833 case ASYNC_WRITE: 834 s = MAX(pWrite->iOffset + (sqlite3_int64)(pWrite->nByte), s); 835 break; 836 case ASYNC_TRUNCATE: 837 s = MIN(s, pWrite->iOffset); 838 break; 839 } 840 } 841 } 842 *piSize = s; 843 } 844 pthread_mutex_unlock(&async.queueMutex); 845 return rc; 846 } 847 848 /* 849 ** Lock or unlock the actual file-system entry. 850 */ 851 static int getFileLock(AsyncLock *pLock){ 852 int rc = SQLITE_OK; 853 AsyncFileLock *pIter; 854 int eRequired = 0; 855 856 if( pLock->pFile ){ 857 for(pIter=pLock->pList; pIter; pIter=pIter->pNext){ 858 assert(pIter->eAsyncLock>=pIter->eLock); 859 if( pIter->eAsyncLock>eRequired ){ 860 eRequired = pIter->eAsyncLock; 861 assert(eRequired>=0 && eRequired<=SQLITE_LOCK_EXCLUSIVE); 862 } 863 } 864 865 if( eRequired>pLock->eLock ){ 866 rc = pLock->pFile->pMethods->xLock(pLock->pFile, eRequired); 867 if( rc==SQLITE_OK ){ 868 pLock->eLock = eRequired; 869 } 870 } 871 else if( eRequired<pLock->eLock && eRequired<=SQLITE_LOCK_SHARED ){ 872 rc = pLock->pFile->pMethods->xUnlock(pLock->pFile, eRequired); 873 if( rc==SQLITE_OK ){ 874 pLock->eLock = eRequired; 875 } 876 } 877 } 878 879 return rc; 880 } 881 882 /* 883 ** Return the AsyncLock structure from the global async.pLock list 884 ** associated with the file-system entry identified by path zName 885 ** (a string of nName bytes). If no such structure exists, return 0. 886 */ 887 static AsyncLock *findLock(const char *zName, int nName){ 888 AsyncLock *p = async.pLock; 889 while( p && (p->nFile!=nName || memcmp(p->zFile, zName, nName)) ){ 890 p = p->pNext; 891 } 892 return p; 893 } 894 895 /* 896 ** The following two methods - asyncLock() and asyncUnlock() - are used 897 ** to obtain and release locks on database files opened with the 898 ** asynchronous backend. 899 */ 900 static int asyncLock(sqlite3_file *pFile, int eLock){ 901 int rc = SQLITE_OK; 902 AsyncFileData *p = ((AsyncFile *)pFile)->pData; 903 904 if( p->zName ){ 905 pthread_mutex_lock(&async.lockMutex); 906 if( p->lock.eLock<eLock ){ 907 AsyncLock *pLock = p->pLock; 908 AsyncFileLock *pIter; 909 assert(pLock && pLock->pList); 910 for(pIter=pLock->pList; pIter; pIter=pIter->pNext){ 911 if( pIter!=&p->lock && ( 912 (eLock==SQLITE_LOCK_EXCLUSIVE && pIter->eLock>=SQLITE_LOCK_SHARED) || 913 (eLock==SQLITE_LOCK_PENDING && pIter->eLock>=SQLITE_LOCK_RESERVED) || 914 (eLock==SQLITE_LOCK_RESERVED && pIter->eLock>=SQLITE_LOCK_RESERVED) || 915 (eLock==SQLITE_LOCK_SHARED && pIter->eLock>=SQLITE_LOCK_PENDING) 916 )){ 917 rc = SQLITE_BUSY; 918 } 919 } 920 if( rc==SQLITE_OK ){ 921 p->lock.eLock = eLock; 922 p->lock.eAsyncLock = MAX(p->lock.eAsyncLock, eLock); 923 } 924 assert(p->lock.eAsyncLock>=p->lock.eLock); 925 if( rc==SQLITE_OK ){ 926 rc = getFileLock(pLock); 927 } 928 } 929 pthread_mutex_unlock(&async.lockMutex); 930 } 931 932 ASYNC_TRACE(("LOCK %d (%s) rc=%d\n", eLock, p->zName, rc)); 933 return rc; 934 } 935 static int asyncUnlock(sqlite3_file *pFile, int eLock){ 936 int rc = SQLITE_OK; 937 AsyncFileData *p = ((AsyncFile *)pFile)->pData; 938 if( p->zName ){ 939 AsyncFileLock *pLock = &p->lock; 940 pthread_mutex_lock(&async.lockMutex); 941 pLock->eLock = MIN(pLock->eLock, eLock); 942 pthread_mutex_unlock(&async.lockMutex); 943 rc = addNewAsyncWrite(p, ASYNC_UNLOCK, 0, eLock, 0); 944 } 945 return rc; 946 } 947 948 /* 949 ** This function is called when the pager layer first opens a database file 950 ** and is checking for a hot-journal. 951 */ 952 static int asyncCheckReservedLock(sqlite3_file *pFile, int *pResOut){ 953 int ret = 0; 954 AsyncFileLock *pIter; 955 AsyncFileData *p = ((AsyncFile *)pFile)->pData; 956 957 pthread_mutex_lock(&async.lockMutex); 958 for(pIter=p->pLock->pList; pIter; pIter=pIter->pNext){ 959 if( pIter->eLock>=SQLITE_LOCK_RESERVED ){ 960 ret = 1; 961 } 962 } 963 pthread_mutex_unlock(&async.lockMutex); 964 965 ASYNC_TRACE(("CHECK-LOCK %d (%s)\n", ret, p->zName)); 966 *pResOut = ret; 967 return SQLITE_OK; 968 } 969 970 /* 971 ** sqlite3_file_control() implementation. 972 */ 973 static int asyncFileControl(sqlite3_file *id, int op, void *pArg){ 974 switch( op ){ 975 case SQLITE_FCNTL_LOCKSTATE: { 976 pthread_mutex_lock(&async.lockMutex); 977 *(int*)pArg = ((AsyncFile*)id)->pData->lock.eLock; 978 pthread_mutex_unlock(&async.lockMutex); 979 return SQLITE_OK; 980 } 981 } 982 return SQLITE_ERROR; 983 } 984 985 /* 986 ** Return the device characteristics and sector-size of the device. It 987 ** is not tricky to implement these correctly, as this backend might 988 ** not have an open file handle at this point. 989 */ 990 static int asyncSectorSize(sqlite3_file *pFile){ 991 return 512; 992 } 993 static int asyncDeviceCharacteristics(sqlite3_file *pFile){ 994 return 0; 995 } 996 997 static int unlinkAsyncFile(AsyncFileData *pData){ 998 AsyncFileLock **ppIter; 999 int rc = SQLITE_OK; 1000 1001 if( pData->zName ){ 1002 AsyncLock *pLock = pData->pLock; 1003 for(ppIter=&pLock->pList; *ppIter; ppIter=&((*ppIter)->pNext)){ 1004 if( (*ppIter)==&pData->lock ){ 1005 *ppIter = pData->lock.pNext; 1006 break; 1007 } 1008 } 1009 if( !pLock->pList ){ 1010 AsyncLock **pp; 1011 if( pLock->pFile ){ 1012 pLock->pFile->pMethods->xClose(pLock->pFile); 1013 } 1014 for(pp=&async.pLock; *pp!=pLock; pp=&((*pp)->pNext)); 1015 *pp = pLock->pNext; 1016 sqlite3_free(pLock); 1017 }else{ 1018 rc = getFileLock(pLock); 1019 } 1020 } 1021 1022 return rc; 1023 } 1024 1025 /* 1026 ** Open a file. 1027 */ 1028 static int asyncOpen( 1029 sqlite3_vfs *pAsyncVfs, 1030 const char *zName, 1031 sqlite3_file *pFile, 1032 int flags, 1033 int *pOutFlags 1034 ){ 1035 static sqlite3_io_methods async_methods = { 1036 1, /* iVersion */ 1037 asyncClose, /* xClose */ 1038 asyncRead, /* xRead */ 1039 asyncWrite, /* xWrite */ 1040 asyncTruncate, /* xTruncate */ 1041 asyncSync, /* xSync */ 1042 asyncFileSize, /* xFileSize */ 1043 asyncLock, /* xLock */ 1044 asyncUnlock, /* xUnlock */ 1045 asyncCheckReservedLock, /* xCheckReservedLock */ 1046 asyncFileControl, /* xFileControl */ 1047 asyncSectorSize, /* xSectorSize */ 1048 asyncDeviceCharacteristics /* xDeviceCharacteristics */ 1049 }; 1050 1051 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData; 1052 AsyncFile *p = (AsyncFile *)pFile; 1053 int nName = 0; 1054 int rc = SQLITE_OK; 1055 int nByte; 1056 AsyncFileData *pData; 1057 AsyncLock *pLock = 0; 1058 char *z; 1059 int isExclusive = (flags&SQLITE_OPEN_EXCLUSIVE); 1060 1061 /* If zName is NULL, then the upper layer is requesting an anonymous file */ 1062 if( zName ){ 1063 nName = strlen(zName)+1; 1064 } 1065 1066 nByte = ( 1067 sizeof(AsyncFileData) + /* AsyncFileData structure */ 1068 2 * pVfs->szOsFile + /* AsyncFileData.pBaseRead and pBaseWrite */ 1069 nName /* AsyncFileData.zName */ 1070 ); 1071 z = sqlite3_malloc(nByte); 1072 if( !z ){ 1073 return SQLITE_NOMEM; 1074 } 1075 memset(z, 0, nByte); 1076 pData = (AsyncFileData*)z; 1077 z += sizeof(pData[0]); 1078 pData->pBaseRead = (sqlite3_file*)z; 1079 z += pVfs->szOsFile; 1080 pData->pBaseWrite = (sqlite3_file*)z; 1081 pData->close.pFileData = pData; 1082 pData->close.op = ASYNC_CLOSE; 1083 1084 if( zName ){ 1085 z += pVfs->szOsFile; 1086 pData->zName = z; 1087 pData->nName = nName; 1088 memcpy(pData->zName, zName, nName); 1089 } 1090 1091 if( !isExclusive ){ 1092 rc = pVfs->xOpen(pVfs, zName, pData->pBaseRead, flags, pOutFlags); 1093 if( rc==SQLITE_OK && ((*pOutFlags)&SQLITE_OPEN_READWRITE) ){ 1094 rc = pVfs->xOpen(pVfs, zName, pData->pBaseWrite, flags, 0); 1095 } 1096 } 1097 1098 pthread_mutex_lock(&async.lockMutex); 1099 1100 if( zName && rc==SQLITE_OK ){ 1101 pLock = findLock(pData->zName, pData->nName); 1102 if( !pLock ){ 1103 int nByte = pVfs->szOsFile + sizeof(AsyncLock) + pData->nName + 1; 1104 pLock = (AsyncLock *)sqlite3_malloc(nByte); 1105 if( pLock ){ 1106 memset(pLock, 0, nByte); 1107 #ifdef ENABLE_FILE_LOCKING 1108 if( flags&SQLITE_OPEN_MAIN_DB ){ 1109 pLock->pFile = (sqlite3_file *)&pLock[1]; 1110 rc = pVfs->xOpen(pVfs, zName, pLock->pFile, flags, 0); 1111 if( rc!=SQLITE_OK ){ 1112 sqlite3_free(pLock); 1113 pLock = 0; 1114 } 1115 } 1116 #endif 1117 if( pLock ){ 1118 pLock->nFile = pData->nName; 1119 pLock->zFile = &((char *)(&pLock[1]))[pVfs->szOsFile]; 1120 memcpy(pLock->zFile, pData->zName, pLock->nFile); 1121 pLock->pNext = async.pLock; 1122 async.pLock = pLock; 1123 } 1124 }else{ 1125 rc = SQLITE_NOMEM; 1126 } 1127 } 1128 } 1129 1130 if( rc==SQLITE_OK ){ 1131 p->pMethod = &async_methods; 1132 p->pData = pData; 1133 1134 /* Link AsyncFileData.lock into the linked list of 1135 ** AsyncFileLock structures for this file. 1136 */ 1137 if( zName ){ 1138 pData->lock.pNext = pLock->pList; 1139 pLock->pList = &pData->lock; 1140 pData->zName = pLock->zFile; 1141 } 1142 }else{ 1143 if( pData->pBaseRead->pMethods ){ 1144 pData->pBaseRead->pMethods->xClose(pData->pBaseRead); 1145 } 1146 if( pData->pBaseWrite->pMethods ){ 1147 pData->pBaseWrite->pMethods->xClose(pData->pBaseWrite); 1148 } 1149 sqlite3_free(pData); 1150 } 1151 1152 pthread_mutex_unlock(&async.lockMutex); 1153 1154 if( rc==SQLITE_OK ){ 1155 incrOpenFileCount(); 1156 pData->pLock = pLock; 1157 } 1158 1159 if( rc==SQLITE_OK && isExclusive ){ 1160 rc = addNewAsyncWrite(pData, ASYNC_OPENEXCLUSIVE, (sqlite3_int64)flags,0,0); 1161 if( rc==SQLITE_OK ){ 1162 if( pOutFlags ) *pOutFlags = flags; 1163 }else{ 1164 pthread_mutex_lock(&async.lockMutex); 1165 unlinkAsyncFile(pData); 1166 pthread_mutex_unlock(&async.lockMutex); 1167 sqlite3_free(pData); 1168 } 1169 } 1170 return rc; 1171 } 1172 1173 /* 1174 ** Implementation of sqlite3OsDelete. Add an entry to the end of the 1175 ** write-op queue to perform the delete. 1176 */ 1177 static int asyncDelete(sqlite3_vfs *pAsyncVfs, const char *z, int syncDir){ 1178 return addNewAsyncWrite(0, ASYNC_DELETE, syncDir, strlen(z)+1, z); 1179 } 1180 1181 /* 1182 ** Implementation of sqlite3OsAccess. This method holds the mutex from 1183 ** start to finish. 1184 */ 1185 static int asyncAccess( 1186 sqlite3_vfs *pAsyncVfs, 1187 const char *zName, 1188 int flags, 1189 int *pResOut 1190 ){ 1191 int rc; 1192 int ret; 1193 AsyncWrite *p; 1194 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData; 1195 1196 assert(flags==SQLITE_ACCESS_READWRITE 1197 || flags==SQLITE_ACCESS_READ 1198 || flags==SQLITE_ACCESS_EXISTS 1199 ); 1200 1201 pthread_mutex_lock(&async.queueMutex); 1202 rc = pVfs->xAccess(pVfs, zName, flags, &ret); 1203 if( rc==SQLITE_OK && flags==SQLITE_ACCESS_EXISTS ){ 1204 for(p=async.pQueueFirst; p; p = p->pNext){ 1205 if( p->op==ASYNC_DELETE && 0==strcmp(p->zBuf, zName) ){ 1206 ret = 0; 1207 }else if( p->op==ASYNC_OPENEXCLUSIVE 1208 && p->pFileData->zName 1209 && 0==strcmp(p->pFileData->zName, zName) 1210 ){ 1211 ret = 1; 1212 } 1213 } 1214 } 1215 ASYNC_TRACE(("ACCESS(%s): %s = %d\n", 1216 flags==SQLITE_ACCESS_READWRITE?"read-write": 1217 flags==SQLITE_ACCESS_READ?"read":"exists" 1218 , zName, ret) 1219 ); 1220 pthread_mutex_unlock(&async.queueMutex); 1221 *pResOut = ret; 1222 return rc; 1223 } 1224 1225 /* 1226 ** Fill in zPathOut with the full path to the file identified by zPath. 1227 */ 1228 static int asyncFullPathname( 1229 sqlite3_vfs *pAsyncVfs, 1230 const char *zPath, 1231 int nPathOut, 1232 char *zPathOut 1233 ){ 1234 int rc; 1235 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData; 1236 rc = pVfs->xFullPathname(pVfs, zPath, nPathOut, zPathOut); 1237 1238 /* Because of the way intra-process file locking works, this backend 1239 ** needs to return a canonical path. The following block assumes the 1240 ** file-system uses unix style paths. 1241 */ 1242 if( rc==SQLITE_OK ){ 1243 int iIn; 1244 int iOut = 0; 1245 int nPathOut = strlen(zPathOut); 1246 1247 for(iIn=0; iIn<nPathOut; iIn++){ 1248 1249 /* Replace any occurences of "//" with "/" */ 1250 if( iIn<=(nPathOut-2) && zPathOut[iIn]=='/' && zPathOut[iIn+1]=='/' 1251 ){ 1252 continue; 1253 } 1254 1255 /* Replace any occurences of "/./" with "/" */ 1256 if( iIn<=(nPathOut-3) 1257 && zPathOut[iIn]=='/' && zPathOut[iIn+1]=='.' && zPathOut[iIn+2]=='/' 1258 ){ 1259 iIn++; 1260 continue; 1261 } 1262 1263 /* Replace any occurences of "<path-component>/../" with "" */ 1264 if( iOut>0 && iIn<=(nPathOut-4) 1265 && zPathOut[iIn]=='/' && zPathOut[iIn+1]=='.' 1266 && zPathOut[iIn+2]=='.' && zPathOut[iIn+3]=='/' 1267 ){ 1268 iIn += 3; 1269 iOut--; 1270 for( ; iOut>0 && zPathOut[iOut-1]!='/'; iOut--); 1271 continue; 1272 } 1273 1274 zPathOut[iOut++] = zPathOut[iIn]; 1275 } 1276 zPathOut[iOut] = '\0'; 1277 } 1278 1279 return rc; 1280 } 1281 static void *asyncDlOpen(sqlite3_vfs *pAsyncVfs, const char *zPath){ 1282 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData; 1283 return pVfs->xDlOpen(pVfs, zPath); 1284 } 1285 static void asyncDlError(sqlite3_vfs *pAsyncVfs, int nByte, char *zErrMsg){ 1286 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData; 1287 pVfs->xDlError(pVfs, nByte, zErrMsg); 1288 } 1289 static void *asyncDlSym( 1290 sqlite3_vfs *pAsyncVfs, 1291 void *pHandle, 1292 const char *zSymbol 1293 ){ 1294 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData; 1295 return pVfs->xDlSym(pVfs, pHandle, zSymbol); 1296 } 1297 static void asyncDlClose(sqlite3_vfs *pAsyncVfs, void *pHandle){ 1298 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData; 1299 pVfs->xDlClose(pVfs, pHandle); 1300 } 1301 static int asyncRandomness(sqlite3_vfs *pAsyncVfs, int nByte, char *zBufOut){ 1302 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData; 1303 return pVfs->xRandomness(pVfs, nByte, zBufOut); 1304 } 1305 static int asyncSleep(sqlite3_vfs *pAsyncVfs, int nMicro){ 1306 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData; 1307 return pVfs->xSleep(pVfs, nMicro); 1308 } 1309 static int asyncCurrentTime(sqlite3_vfs *pAsyncVfs, double *pTimeOut){ 1310 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData; 1311 return pVfs->xCurrentTime(pVfs, pTimeOut); 1312 } 1313 1314 static sqlite3_vfs async_vfs = { 1315 1, /* iVersion */ 1316 sizeof(AsyncFile), /* szOsFile */ 1317 0, /* mxPathname */ 1318 0, /* pNext */ 1319 "async", /* zName */ 1320 0, /* pAppData */ 1321 asyncOpen, /* xOpen */ 1322 asyncDelete, /* xDelete */ 1323 asyncAccess, /* xAccess */ 1324 asyncFullPathname, /* xFullPathname */ 1325 asyncDlOpen, /* xDlOpen */ 1326 asyncDlError, /* xDlError */ 1327 asyncDlSym, /* xDlSym */ 1328 asyncDlClose, /* xDlClose */ 1329 asyncRandomness, /* xDlError */ 1330 asyncSleep, /* xDlSym */ 1331 asyncCurrentTime /* xDlClose */ 1332 }; 1333 1334 /* 1335 ** Call this routine to enable or disable the 1336 ** asynchronous IO features implemented in this file. 1337 ** 1338 ** This routine is not even remotely threadsafe. Do not call 1339 ** this routine while any SQLite database connections are open. 1340 */ 1341 static void asyncEnable(int enable){ 1342 if( enable ){ 1343 if( !async_vfs.pAppData ){ 1344 async_vfs.pAppData = (void *)sqlite3_vfs_find(0); 1345 async_vfs.mxPathname = ((sqlite3_vfs *)async_vfs.pAppData)->mxPathname; 1346 sqlite3_vfs_register(&async_vfs, 1); 1347 } 1348 }else{ 1349 if( async_vfs.pAppData ){ 1350 sqlite3_vfs_unregister(&async_vfs); 1351 async_vfs.pAppData = 0; 1352 } 1353 } 1354 } 1355 1356 /* 1357 ** This procedure runs in a separate thread, reading messages off of the 1358 ** write queue and processing them one by one. 1359 ** 1360 ** If async.writerHaltNow is true, then this procedure exits 1361 ** after processing a single message. 1362 ** 1363 ** If async.writerHaltWhenIdle is true, then this procedure exits when 1364 ** the write queue is empty. 1365 ** 1366 ** If both of the above variables are false, this procedure runs 1367 ** indefinately, waiting for operations to be added to the write queue 1368 ** and processing them in the order in which they arrive. 1369 ** 1370 ** An artifical delay of async.ioDelay milliseconds is inserted before 1371 ** each write operation in order to simulate the effect of a slow disk. 1372 ** 1373 ** Only one instance of this procedure may be running at a time. 1374 */ 1375 static void *asyncWriterThread(void *pIsStarted){ 1376 sqlite3_vfs *pVfs = (sqlite3_vfs *)(async_vfs.pAppData); 1377 AsyncWrite *p = 0; 1378 int rc = SQLITE_OK; 1379 int holdingMutex = 0; 1380 1381 if( pthread_mutex_trylock(&async.writerMutex) ){ 1382 return 0; 1383 } 1384 (*(int *)pIsStarted) = 1; 1385 while( async.writerHaltNow==0 ){ 1386 int doNotFree = 0; 1387 sqlite3_file *pBase = 0; 1388 1389 if( !holdingMutex ){ 1390 pthread_mutex_lock(&async.queueMutex); 1391 } 1392 while( (p = async.pQueueFirst)==0 ){ 1393 pthread_cond_broadcast(&async.emptySignal); 1394 if( async.writerHaltWhenIdle ){ 1395 pthread_mutex_unlock(&async.queueMutex); 1396 break; 1397 }else{ 1398 ASYNC_TRACE(("IDLE\n")); 1399 pthread_cond_wait(&async.queueSignal, &async.queueMutex); 1400 ASYNC_TRACE(("WAKEUP\n")); 1401 } 1402 } 1403 if( p==0 ) break; 1404 holdingMutex = 1; 1405 1406 /* Right now this thread is holding the mutex on the write-op queue. 1407 ** Variable 'p' points to the first entry in the write-op queue. In 1408 ** the general case, we hold on to the mutex for the entire body of 1409 ** the loop. 1410 ** 1411 ** However in the cases enumerated below, we relinquish the mutex, 1412 ** perform the IO, and then re-request the mutex before removing 'p' from 1413 ** the head of the write-op queue. The idea is to increase concurrency with 1414 ** sqlite threads. 1415 ** 1416 ** * An ASYNC_CLOSE operation. 1417 ** * An ASYNC_OPENEXCLUSIVE operation. For this one, we relinquish 1418 ** the mutex, call the underlying xOpenExclusive() function, then 1419 ** re-aquire the mutex before seting the AsyncFile.pBaseRead 1420 ** variable. 1421 ** * ASYNC_SYNC and ASYNC_WRITE operations, if 1422 ** SQLITE_ASYNC_TWO_FILEHANDLES was set at compile time and two 1423 ** file-handles are open for the particular file being "synced". 1424 */ 1425 if( async.ioError!=SQLITE_OK && p->op!=ASYNC_CLOSE ){ 1426 p->op = ASYNC_NOOP; 1427 } 1428 if( p->pFileData ){ 1429 pBase = p->pFileData->pBaseWrite; 1430 if( 1431 p->op==ASYNC_CLOSE || 1432 p->op==ASYNC_OPENEXCLUSIVE || 1433 (pBase->pMethods && (p->op==ASYNC_SYNC || p->op==ASYNC_WRITE) ) 1434 ){ 1435 pthread_mutex_unlock(&async.queueMutex); 1436 holdingMutex = 0; 1437 } 1438 if( !pBase->pMethods ){ 1439 pBase = p->pFileData->pBaseRead; 1440 } 1441 } 1442 1443 switch( p->op ){ 1444 case ASYNC_NOOP: 1445 break; 1446 1447 case ASYNC_WRITE: 1448 assert( pBase ); 1449 ASYNC_TRACE(("WRITE %s %d bytes at %d\n", 1450 p->pFileData->zName, p->nByte, p->iOffset)); 1451 rc = pBase->pMethods->xWrite(pBase, (void *)(p->zBuf), p->nByte, p->iOffset); 1452 break; 1453 1454 case ASYNC_SYNC: 1455 assert( pBase ); 1456 ASYNC_TRACE(("SYNC %s\n", p->pFileData->zName)); 1457 rc = pBase->pMethods->xSync(pBase, p->nByte); 1458 break; 1459 1460 case ASYNC_TRUNCATE: 1461 assert( pBase ); 1462 ASYNC_TRACE(("TRUNCATE %s to %d bytes\n", 1463 p->pFileData->zName, p->iOffset)); 1464 rc = pBase->pMethods->xTruncate(pBase, p->iOffset); 1465 break; 1466 1467 case ASYNC_CLOSE: { 1468 AsyncFileData *pData = p->pFileData; 1469 ASYNC_TRACE(("CLOSE %s\n", p->pFileData->zName)); 1470 if( pData->pBaseWrite->pMethods ){ 1471 pData->pBaseWrite->pMethods->xClose(pData->pBaseWrite); 1472 } 1473 if( pData->pBaseRead->pMethods ){ 1474 pData->pBaseRead->pMethods->xClose(pData->pBaseRead); 1475 } 1476 1477 /* Unlink AsyncFileData.lock from the linked list of AsyncFileLock 1478 ** structures for this file. Obtain the async.lockMutex mutex 1479 ** before doing so. 1480 */ 1481 pthread_mutex_lock(&async.lockMutex); 1482 rc = unlinkAsyncFile(pData); 1483 pthread_mutex_unlock(&async.lockMutex); 1484 1485 async.pQueueFirst = p->pNext; 1486 sqlite3_free(pData); 1487 doNotFree = 1; 1488 break; 1489 } 1490 1491 case ASYNC_UNLOCK: { 1492 AsyncFileData *pData = p->pFileData; 1493 int eLock = p->nByte; 1494 pthread_mutex_lock(&async.lockMutex); 1495 pData->lock.eAsyncLock = MIN( 1496 pData->lock.eAsyncLock, MAX(pData->lock.eLock, eLock) 1497 ); 1498 assert(pData->lock.eAsyncLock>=pData->lock.eLock); 1499 rc = getFileLock(pData->pLock); 1500 pthread_mutex_unlock(&async.lockMutex); 1501 break; 1502 } 1503 1504 case ASYNC_DELETE: 1505 ASYNC_TRACE(("DELETE %s\n", p->zBuf)); 1506 rc = pVfs->xDelete(pVfs, p->zBuf, (int)p->iOffset); 1507 break; 1508 1509 case ASYNC_OPENEXCLUSIVE: { 1510 int flags = (int)p->iOffset; 1511 AsyncFileData *pData = p->pFileData; 1512 ASYNC_TRACE(("OPEN %s flags=%d\n", p->zBuf, (int)p->iOffset)); 1513 assert(pData->pBaseRead->pMethods==0 && pData->pBaseWrite->pMethods==0); 1514 rc = pVfs->xOpen(pVfs, pData->zName, pData->pBaseRead, flags, 0); 1515 assert( holdingMutex==0 ); 1516 pthread_mutex_lock(&async.queueMutex); 1517 holdingMutex = 1; 1518 break; 1519 } 1520 1521 default: assert(!"Illegal value for AsyncWrite.op"); 1522 } 1523 1524 /* If we didn't hang on to the mutex during the IO op, obtain it now 1525 ** so that the AsyncWrite structure can be safely removed from the 1526 ** global write-op queue. 1527 */ 1528 if( !holdingMutex ){ 1529 pthread_mutex_lock(&async.queueMutex); 1530 holdingMutex = 1; 1531 } 1532 /* ASYNC_TRACE(("UNLINK %p\n", p)); */ 1533 if( p==async.pQueueLast ){ 1534 async.pQueueLast = 0; 1535 } 1536 if( !doNotFree ){ 1537 async.pQueueFirst = p->pNext; 1538 sqlite3_free(p); 1539 } 1540 assert( holdingMutex ); 1541 1542 /* An IO error has occured. We cannot report the error back to the 1543 ** connection that requested the I/O since the error happened 1544 ** asynchronously. The connection has already moved on. There 1545 ** really is nobody to report the error to. 1546 ** 1547 ** The file for which the error occured may have been a database or 1548 ** journal file. Regardless, none of the currently queued operations 1549 ** associated with the same database should now be performed. Nor should 1550 ** any subsequently requested IO on either a database or journal file 1551 ** handle for the same database be accepted until the main database 1552 ** file handle has been closed and reopened. 1553 ** 1554 ** Furthermore, no further IO should be queued or performed on any file 1555 ** handle associated with a database that may have been part of a 1556 ** multi-file transaction that included the database associated with 1557 ** the IO error (i.e. a database ATTACHed to the same handle at some 1558 ** point in time). 1559 */ 1560 if( rc!=SQLITE_OK ){ 1561 async.ioError = rc; 1562 } 1563 1564 if( async.ioError && !async.pQueueFirst ){ 1565 pthread_mutex_lock(&async.lockMutex); 1566 if( 0==async.pLock ){ 1567 async.ioError = SQLITE_OK; 1568 } 1569 pthread_mutex_unlock(&async.lockMutex); 1570 } 1571 1572 /* Drop the queue mutex before continuing to the next write operation 1573 ** in order to give other threads a chance to work with the write queue. 1574 */ 1575 if( !async.pQueueFirst || !async.ioError ){ 1576 pthread_mutex_unlock(&async.queueMutex); 1577 holdingMutex = 0; 1578 if( async.ioDelay>0 ){ 1579 pVfs->xSleep(pVfs, async.ioDelay); 1580 }else{ 1581 sched_yield(); 1582 } 1583 } 1584 } 1585 1586 pthread_mutex_unlock(&async.writerMutex); 1587 return 0; 1588 } 1589 1590 /************************************************************************** 1591 ** The remaining code defines a Tcl interface for testing the asynchronous 1592 ** IO implementation in this file. 1593 ** 1594 ** To adapt the code to a non-TCL environment, delete or comment out 1595 ** the code that follows. 1596 */ 1597 1598 /* 1599 ** sqlite3async_enable ?YES/NO? 1600 ** 1601 ** Enable or disable the asynchronous I/O backend. This command is 1602 ** not thread-safe. Do not call it while any database connections 1603 ** are open. 1604 */ 1605 static int testAsyncEnable( 1606 void * clientData, 1607 Tcl_Interp *interp, 1608 int objc, 1609 Tcl_Obj *CONST objv[] 1610 ){ 1611 if( objc!=1 && objc!=2 ){ 1612 Tcl_WrongNumArgs(interp, 1, objv, "?YES/NO?"); 1613 return TCL_ERROR; 1614 } 1615 if( objc==1 ){ 1616 Tcl_SetObjResult(interp, Tcl_NewBooleanObj(async_vfs.pAppData!=0)); 1617 }else{ 1618 int en; 1619 if( Tcl_GetBooleanFromObj(interp, objv[1], &en) ) return TCL_ERROR; 1620 asyncEnable(en); 1621 } 1622 return TCL_OK; 1623 } 1624 1625 /* 1626 ** sqlite3async_halt "now"|"idle"|"never" 1627 ** 1628 ** Set the conditions at which the writer thread will halt. 1629 */ 1630 static int testAsyncHalt( 1631 void * clientData, 1632 Tcl_Interp *interp, 1633 int objc, 1634 Tcl_Obj *CONST objv[] 1635 ){ 1636 const char *zCond; 1637 if( objc!=2 ){ 1638 Tcl_WrongNumArgs(interp, 1, objv, "\"now\"|\"idle\"|\"never\""); 1639 return TCL_ERROR; 1640 } 1641 zCond = Tcl_GetString(objv[1]); 1642 if( strcmp(zCond, "now")==0 ){ 1643 async.writerHaltNow = 1; 1644 pthread_cond_broadcast(&async.queueSignal); 1645 }else if( strcmp(zCond, "idle")==0 ){ 1646 async.writerHaltWhenIdle = 1; 1647 async.writerHaltNow = 0; 1648 pthread_cond_broadcast(&async.queueSignal); 1649 }else if( strcmp(zCond, "never")==0 ){ 1650 async.writerHaltWhenIdle = 0; 1651 async.writerHaltNow = 0; 1652 }else{ 1653 Tcl_AppendResult(interp, 1654 "should be one of: \"now\", \"idle\", or \"never\"", (char*)0); 1655 return TCL_ERROR; 1656 } 1657 return TCL_OK; 1658 } 1659 1660 /* 1661 ** sqlite3async_delay ?MS? 1662 ** 1663 ** Query or set the number of milliseconds of delay in the writer 1664 ** thread after each write operation. The default is 0. By increasing 1665 ** the memory delay we can simulate the effect of slow disk I/O. 1666 */ 1667 static int testAsyncDelay( 1668 void * clientData, 1669 Tcl_Interp *interp, 1670 int objc, 1671 Tcl_Obj *CONST objv[] 1672 ){ 1673 if( objc!=1 && objc!=2 ){ 1674 Tcl_WrongNumArgs(interp, 1, objv, "?MS?"); 1675 return TCL_ERROR; 1676 } 1677 if( objc==1 ){ 1678 Tcl_SetObjResult(interp, Tcl_NewIntObj(async.ioDelay)); 1679 }else{ 1680 int ioDelay; 1681 if( Tcl_GetIntFromObj(interp, objv[1], &ioDelay) ) return TCL_ERROR; 1682 async.ioDelay = ioDelay; 1683 } 1684 return TCL_OK; 1685 } 1686 1687 /* 1688 ** sqlite3async_start 1689 ** 1690 ** Start a new writer thread. 1691 */ 1692 static int testAsyncStart( 1693 void * clientData, 1694 Tcl_Interp *interp, 1695 int objc, 1696 Tcl_Obj *CONST objv[] 1697 ){ 1698 pthread_t x; 1699 int rc; 1700 volatile int isStarted = 0; 1701 rc = pthread_create(&x, 0, asyncWriterThread, (void *)&isStarted); 1702 if( rc ){ 1703 Tcl_AppendResult(interp, "failed to create the thread", 0); 1704 return TCL_ERROR; 1705 } 1706 pthread_detach(x); 1707 while( isStarted==0 ){ 1708 sched_yield(); 1709 } 1710 return TCL_OK; 1711 } 1712 1713 /* 1714 ** sqlite3async_wait 1715 ** 1716 ** Wait for the current writer thread to terminate. 1717 ** 1718 ** If the current writer thread is set to run forever then this 1719 ** command would block forever. To prevent that, an error is returned. 1720 */ 1721 static int testAsyncWait( 1722 void * clientData, 1723 Tcl_Interp *interp, 1724 int objc, 1725 Tcl_Obj *CONST objv[] 1726 ){ 1727 int cnt = 10; 1728 if( async.writerHaltNow==0 && async.writerHaltWhenIdle==0 ){ 1729 Tcl_AppendResult(interp, "would block forever", (char*)0); 1730 return TCL_ERROR; 1731 } 1732 1733 while( cnt-- && !pthread_mutex_trylock(&async.writerMutex) ){ 1734 pthread_mutex_unlock(&async.writerMutex); 1735 sched_yield(); 1736 } 1737 if( cnt>=0 ){ 1738 ASYNC_TRACE(("WAIT\n")); 1739 pthread_mutex_lock(&async.queueMutex); 1740 pthread_cond_broadcast(&async.queueSignal); 1741 pthread_mutex_unlock(&async.queueMutex); 1742 pthread_mutex_lock(&async.writerMutex); 1743 pthread_mutex_unlock(&async.writerMutex); 1744 }else{ 1745 ASYNC_TRACE(("NO-WAIT\n")); 1746 } 1747 return TCL_OK; 1748 } 1749 1750 1751 #endif /* SQLITE_OS_UNIX and SQLITE_THREADSAFE */ 1752 1753 /* 1754 ** This routine registers the custom TCL commands defined in this 1755 ** module. This should be the only procedure visible from outside 1756 ** of this module. 1757 */ 1758 int Sqlitetestasync_Init(Tcl_Interp *interp){ 1759 #if SQLITE_OS_UNIX && SQLITE_THREADSAFE 1760 Tcl_CreateObjCommand(interp,"sqlite3async_enable",testAsyncEnable,0,0); 1761 Tcl_CreateObjCommand(interp,"sqlite3async_halt",testAsyncHalt,0,0); 1762 Tcl_CreateObjCommand(interp,"sqlite3async_delay",testAsyncDelay,0,0); 1763 Tcl_CreateObjCommand(interp,"sqlite3async_start",testAsyncStart,0,0); 1764 Tcl_CreateObjCommand(interp,"sqlite3async_wait",testAsyncWait,0,0); 1765 Tcl_LinkVar(interp, "sqlite3async_trace", 1766 (char*)&sqlite3async_trace, TCL_LINK_INT); 1767 #endif /* SQLITE_OS_UNIX and SQLITE_THREADSAFE */ 1768 return TCL_OK; 1769 } 1770