1 /* 2 ** 2005 December 14 3 ** 4 ** The author disclaims copyright to this source code. In place of 5 ** a legal notice, here is a blessing: 6 ** 7 ** May you do good and not evil. 8 ** May you find forgiveness for yourself and forgive others. 9 ** May you share freely, never taking more than you give. 10 ** 11 ************************************************************************* 12 ** 13 ** This file contains an example implementation of an asynchronous IO 14 ** backend for SQLite. 15 ** 16 ** WHAT IS ASYNCHRONOUS I/O? 17 ** 18 ** With asynchronous I/O, write requests are handled by a separate thread 19 ** running in the background. This means that the thread that initiates 20 ** a database write does not have to wait for (sometimes slow) disk I/O 21 ** to occur. The write seems to happen very quickly, though in reality 22 ** it is happening at its usual slow pace in the background. 23 ** 24 ** Asynchronous I/O appears to give better responsiveness, but at a price. 25 ** You lose the Durable property. With the default I/O backend of SQLite, 26 ** once a write completes, you know that the information you wrote is 27 ** safely on disk. With the asynchronous I/O, this is not the case. If 28 ** your program crashes or if a power lose occurs after the database 29 ** write but before the asynchronous write thread has completed, then the 30 ** database change might never make it to disk and the next user of the 31 ** database might not see your change. 32 ** 33 ** You lose Durability with asynchronous I/O, but you still retain the 34 ** other parts of ACID: Atomic, Consistent, and Isolated. Many 35 ** appliations get along fine without the Durablity. 36 ** 37 ** HOW IT WORKS 38 ** 39 ** Asynchronous I/O works by creating a special SQLite "vfs" structure 40 ** and registering it with sqlite3_vfs_register(). When files opened via 41 ** this vfs are written to (using sqlite3OsWrite()), the data is not 42 ** written directly to disk, but is placed in the "write-queue" to be 43 ** handled by the background thread. 44 ** 45 ** When files opened with the asynchronous vfs are read from 46 ** (using sqlite3OsRead()), the data is read from the file on 47 ** disk and the write-queue, so that from the point of view of 48 ** the vfs reader the OsWrite() appears to have already completed. 49 ** 50 ** The special vfs is registered (and unregistered) by calls to 51 ** function asyncEnable() (see below). 52 ** 53 ** LIMITATIONS 54 ** 55 ** This demonstration code is deliberately kept simple in order to keep 56 ** the main ideas clear and easy to understand. Real applications that 57 ** want to do asynchronous I/O might want to add additional capabilities. 58 ** For example, in this demonstration if writes are happening at a steady 59 ** stream that exceeds the I/O capability of the background writer thread, 60 ** the queue of pending write operations will grow without bound until we 61 ** run out of memory. Users of this technique may want to keep track of 62 ** the quantity of pending writes and stop accepting new write requests 63 ** when the buffer gets to be too big. 64 ** 65 ** LOCKING + CONCURRENCY 66 ** 67 ** Multiple connections from within a single process that use this 68 ** implementation of asynchronous IO may access a single database 69 ** file concurrently. From the point of view of the user, if all 70 ** connections are from within a single process, there is no difference 71 ** between the concurrency offered by "normal" SQLite and SQLite 72 ** using the asynchronous backend. 73 ** 74 ** If connections from within multiple database files may access the 75 ** database file, the ENABLE_FILE_LOCKING symbol (see below) must be 76 ** defined. If it is not defined, then no locks are established on 77 ** the database file. In this case, if multiple processes access 78 ** the database file, corruption will quickly result. 79 ** 80 ** If ENABLE_FILE_LOCKING is defined (the default), then connections 81 ** from within multiple processes may access a single database file 82 ** without risking corruption. However concurrency is reduced as 83 ** follows: 84 ** 85 ** * When a connection using asynchronous IO begins a database 86 ** transaction, the database is locked immediately. However the 87 ** lock is not released until after all relevant operations 88 ** in the write-queue have been flushed to disk. This means 89 ** (for example) that the database may remain locked for some 90 ** time after a "COMMIT" or "ROLLBACK" is issued. 91 ** 92 ** * If an application using asynchronous IO executes transactions 93 ** in quick succession, other database users may be effectively 94 ** locked out of the database. This is because when a BEGIN 95 ** is executed, a database lock is established immediately. But 96 ** when the corresponding COMMIT or ROLLBACK occurs, the lock 97 ** is not released until the relevant part of the write-queue 98 ** has been flushed through. As a result, if a COMMIT is followed 99 ** by a BEGIN before the write-queue is flushed through, the database 100 ** is never unlocked,preventing other processes from accessing 101 ** the database. 102 ** 103 ** Defining ENABLE_FILE_LOCKING when using an NFS or other remote 104 ** file-system may slow things down, as synchronous round-trips to the 105 ** server may be required to establish database file locks. 106 */ 107 #define ENABLE_FILE_LOCKING 108 109 #include "sqliteInt.h" 110 #include <tcl.h> 111 112 /* 113 ** This test uses pthreads and hence only works on unix and with 114 ** a threadsafe build of SQLite. 115 */ 116 #if OS_UNIX && SQLITE_THREADSAFE 117 118 /* 119 ** This demo uses pthreads. If you do not have a pthreads implementation 120 ** for your operating system, you will need to recode the threading 121 ** logic. 122 */ 123 #include <pthread.h> 124 #include <sched.h> 125 126 /* Useful macros used in several places */ 127 #define MIN(x,y) ((x)<(y)?(x):(y)) 128 #define MAX(x,y) ((x)>(y)?(x):(y)) 129 130 /* Forward references */ 131 typedef struct AsyncWrite AsyncWrite; 132 typedef struct AsyncFile AsyncFile; 133 typedef struct AsyncFileData AsyncFileData; 134 typedef struct AsyncFileLock AsyncFileLock; 135 typedef struct AsyncLock AsyncLock; 136 137 /* Enable for debugging */ 138 static int sqlite3async_trace = 0; 139 # define ASYNC_TRACE(X) if( sqlite3async_trace ) asyncTrace X 140 static void asyncTrace(const char *zFormat, ...){ 141 char *z; 142 va_list ap; 143 va_start(ap, zFormat); 144 z = sqlite3_vmprintf(zFormat, ap); 145 va_end(ap); 146 fprintf(stderr, "[%d] %s", (int)pthread_self(), z); 147 sqlite3_free(z); 148 } 149 150 /* 151 ** THREAD SAFETY NOTES 152 ** 153 ** Basic rules: 154 ** 155 ** * Both read and write access to the global write-op queue must be 156 ** protected by the async.queueMutex. As are the async.ioError and 157 ** async.nFile variables. 158 ** 159 ** * The async.aLock hash-table and all AsyncLock and AsyncFileLock 160 ** structures must be protected by teh async.lockMutex mutex. 161 ** 162 ** * The file handles from the underlying system are assumed not to 163 ** be thread safe. 164 ** 165 ** * See the last two paragraphs under "The Writer Thread" for 166 ** an assumption to do with file-handle synchronization by the Os. 167 ** 168 ** Deadlock prevention: 169 ** 170 ** There are three mutex used by the system: the "writer" mutex, 171 ** the "queue" mutex and the "lock" mutex. Rules are: 172 ** 173 ** * It is illegal to block on the writer mutex when any other mutex 174 ** are held, and 175 ** 176 ** * It is illegal to block on the queue mutex when the lock mutex 177 ** is held. 178 ** 179 ** i.e. mutex's must be grabbed in the order "writer", "queue", "lock". 180 ** 181 ** File system operations (invoked by SQLite thread): 182 ** 183 ** xOpen 184 ** xDelete 185 ** xFileExists 186 ** 187 ** File handle operations (invoked by SQLite thread): 188 ** 189 ** asyncWrite, asyncClose, asyncTruncate, asyncSync 190 ** 191 ** The operations above add an entry to the global write-op list. They 192 ** prepare the entry, acquire the async.queueMutex momentarily while 193 ** list pointers are manipulated to insert the new entry, then release 194 ** the mutex and signal the writer thread to wake up in case it happens 195 ** to be asleep. 196 ** 197 ** 198 ** asyncRead, asyncFileSize. 199 ** 200 ** Read operations. Both of these read from both the underlying file 201 ** first then adjust their result based on pending writes in the 202 ** write-op queue. So async.queueMutex is held for the duration 203 ** of these operations to prevent other threads from changing the 204 ** queue in mid operation. 205 ** 206 ** 207 ** asyncLock, asyncUnlock, asyncCheckReservedLock 208 ** 209 ** These primitives implement in-process locking using a hash table 210 ** on the file name. Files are locked correctly for connections coming 211 ** from the same process. But other processes cannot see these locks 212 ** and will therefore not honor them. 213 ** 214 ** 215 ** The writer thread: 216 ** 217 ** The async.writerMutex is used to make sure only there is only 218 ** a single writer thread running at a time. 219 ** 220 ** Inside the writer thread is a loop that works like this: 221 ** 222 ** WHILE (write-op list is not empty) 223 ** Do IO operation at head of write-op list 224 ** Remove entry from head of write-op list 225 ** END WHILE 226 ** 227 ** The async.queueMutex is always held during the <write-op list is 228 ** not empty> test, and when the entry is removed from the head 229 ** of the write-op list. Sometimes it is held for the interim 230 ** period (while the IO is performed), and sometimes it is 231 ** relinquished. It is relinquished if (a) the IO op is an 232 ** ASYNC_CLOSE or (b) when the file handle was opened, two of 233 ** the underlying systems handles were opened on the same 234 ** file-system entry. 235 ** 236 ** If condition (b) above is true, then one file-handle 237 ** (AsyncFile.pBaseRead) is used exclusively by sqlite threads to read the 238 ** file, the other (AsyncFile.pBaseWrite) by sqlite3_async_flush() 239 ** threads to perform write() operations. This means that read 240 ** operations are not blocked by asynchronous writes (although 241 ** asynchronous writes may still be blocked by reads). 242 ** 243 ** This assumes that the OS keeps two handles open on the same file 244 ** properly in sync. That is, any read operation that starts after a 245 ** write operation on the same file system entry has completed returns 246 ** data consistent with the write. We also assume that if one thread 247 ** reads a file while another is writing it all bytes other than the 248 ** ones actually being written contain valid data. 249 ** 250 ** If the above assumptions are not true, set the preprocessor symbol 251 ** SQLITE_ASYNC_TWO_FILEHANDLES to 0. 252 */ 253 254 #ifndef SQLITE_ASYNC_TWO_FILEHANDLES 255 /* #define SQLITE_ASYNC_TWO_FILEHANDLES 0 */ 256 #define SQLITE_ASYNC_TWO_FILEHANDLES 1 257 #endif 258 259 /* 260 ** State information is held in the static variable "async" defined 261 ** as the following structure. 262 ** 263 ** Both async.ioError and async.nFile are protected by async.queueMutex. 264 */ 265 static struct TestAsyncStaticData { 266 pthread_mutex_t queueMutex; /* Mutex for access to write operation queue */ 267 pthread_mutex_t writerMutex; /* Prevents multiple writer threads */ 268 pthread_mutex_t lockMutex; /* For access to aLock hash table */ 269 pthread_cond_t queueSignal; /* For waking up sleeping writer thread */ 270 pthread_cond_t emptySignal; /* Notify when the write queue is empty */ 271 AsyncWrite *pQueueFirst; /* Next write operation to be processed */ 272 AsyncWrite *pQueueLast; /* Last write operation on the list */ 273 Hash aLock; /* Files locked */ 274 volatile int ioDelay; /* Extra delay between write operations */ 275 volatile int writerHaltWhenIdle; /* Writer thread halts when queue empty */ 276 volatile int writerHaltNow; /* Writer thread halts after next op */ 277 int ioError; /* True if an IO error has occured */ 278 int nFile; /* Number of open files (from sqlite pov) */ 279 } async = { 280 PTHREAD_MUTEX_INITIALIZER, 281 PTHREAD_MUTEX_INITIALIZER, 282 PTHREAD_MUTEX_INITIALIZER, 283 PTHREAD_COND_INITIALIZER, 284 PTHREAD_COND_INITIALIZER, 285 }; 286 287 /* Possible values of AsyncWrite.op */ 288 #define ASYNC_NOOP 0 289 #define ASYNC_WRITE 1 290 #define ASYNC_SYNC 2 291 #define ASYNC_TRUNCATE 3 292 #define ASYNC_CLOSE 4 293 #define ASYNC_DELETE 5 294 #define ASYNC_OPENEXCLUSIVE 6 295 #define ASYNC_UNLOCK 7 296 297 /* Names of opcodes. Used for debugging only. 298 ** Make sure these stay in sync with the macros above! 299 */ 300 static const char *azOpcodeName[] = { 301 "NOOP", "WRITE", "SYNC", "TRUNCATE", "CLOSE", "DELETE", "OPENEX", "UNLOCK" 302 }; 303 304 /* 305 ** Entries on the write-op queue are instances of the AsyncWrite 306 ** structure, defined here. 307 ** 308 ** The interpretation of the iOffset and nByte variables varies depending 309 ** on the value of AsyncWrite.op: 310 ** 311 ** ASYNC_NOOP: 312 ** No values used. 313 ** 314 ** ASYNC_WRITE: 315 ** iOffset -> Offset in file to write to. 316 ** nByte -> Number of bytes of data to write (pointed to by zBuf). 317 ** 318 ** ASYNC_SYNC: 319 ** nByte -> flags to pass to sqlite3OsSync(). 320 ** 321 ** ASYNC_TRUNCATE: 322 ** iOffset -> Size to truncate file to. 323 ** nByte -> Unused. 324 ** 325 ** ASYNC_CLOSE: 326 ** iOffset -> Unused. 327 ** nByte -> Unused. 328 ** 329 ** ASYNC_DELETE: 330 ** iOffset -> Contains the "syncDir" flag. 331 ** nByte -> Number of bytes of zBuf points to (file name). 332 ** 333 ** ASYNC_OPENEXCLUSIVE: 334 ** iOffset -> Value of "delflag". 335 ** nByte -> Number of bytes of zBuf points to (file name). 336 ** 337 ** ASYNC_UNLOCK: 338 ** nByte -> Argument to sqlite3OsUnlock(). 339 ** 340 ** 341 ** For an ASYNC_WRITE operation, zBuf points to the data to write to the file. 342 ** This space is sqlite3_malloc()d along with the AsyncWrite structure in a 343 ** single blob, so is deleted when sqlite3_free() is called on the parent 344 ** structure. 345 */ 346 struct AsyncWrite { 347 AsyncFileData *pFileData; /* File to write data to or sync */ 348 int op; /* One of ASYNC_xxx etc. */ 349 i64 iOffset; /* See above */ 350 int nByte; /* See above */ 351 char *zBuf; /* Data to write to file (or NULL if op!=ASYNC_WRITE) */ 352 AsyncWrite *pNext; /* Next write operation (to any file) */ 353 }; 354 355 /* 356 ** An instance of this structure is created for each distinct open file 357 ** (i.e. if two handles are opened on the one file, only one of these 358 ** structures is allocated) and stored in the async.aLock hash table. The 359 ** keys for async.aLock are the full pathnames of the opened files. 360 ** 361 ** AsyncLock.pList points to the head of a linked list of AsyncFileLock 362 ** structures, one for each handle currently open on the file. 363 ** 364 ** If the opened file is not a main-database (the SQLITE_OPEN_MAIN_DB is 365 ** not passed to the sqlite3OsOpen() call), or if ENABLE_FILE_LOCKING is 366 ** not defined at compile time, variables AsyncLock.pFile and 367 ** AsyncLock.eLock are never used. Otherwise, pFile is a file handle 368 ** opened on the file in question and used to obtain the file-system 369 ** locks required by database connections within this process. 370 ** 371 ** See comments above the asyncLock() function for more details on 372 ** the implementation of database locking used by this backend. 373 */ 374 struct AsyncLock { 375 sqlite3_file *pFile; 376 int eLock; 377 AsyncFileLock *pList; 378 }; 379 380 /* 381 ** An instance of the following structure is allocated along with each 382 ** AsyncFileData structure (see AsyncFileData.lock), but is only used if the 383 ** file was opened with the SQLITE_OPEN_MAIN_DB. 384 */ 385 struct AsyncFileLock { 386 int eLock; /* Internally visible lock state (sqlite pov) */ 387 int eAsyncLock; /* Lock-state with write-queue unlock */ 388 AsyncFileLock *pNext; 389 }; 390 391 /* 392 ** The AsyncFile structure is a subclass of sqlite3_file used for 393 ** asynchronous IO. 394 ** 395 ** All of the actual data for the structure is stored in the structure 396 ** pointed to by AsyncFile.pData, which is allocated as part of the 397 ** sqlite3OsOpen() using sqlite3_malloc(). The reason for this is that the 398 ** lifetime of the AsyncFile structure is ended by the caller after OsClose() 399 ** is called, but the data in AsyncFileData may be required by the 400 ** writer thread after that point. 401 */ 402 struct AsyncFile { 403 sqlite3_io_methods *pMethod; 404 AsyncFileData *pData; 405 }; 406 struct AsyncFileData { 407 char *zName; /* Underlying OS filename - used for debugging */ 408 int nName; /* Number of characters in zName */ 409 sqlite3_file *pBaseRead; /* Read handle to the underlying Os file */ 410 sqlite3_file *pBaseWrite; /* Write handle to the underlying Os file */ 411 AsyncFileLock lock; 412 AsyncWrite close; 413 }; 414 415 /* 416 ** Add an entry to the end of the global write-op list. pWrite should point 417 ** to an AsyncWrite structure allocated using sqlite3_malloc(). The writer 418 ** thread will call sqlite3_free() to free the structure after the specified 419 ** operation has been completed. 420 ** 421 ** Once an AsyncWrite structure has been added to the list, it becomes the 422 ** property of the writer thread and must not be read or modified by the 423 ** caller. 424 */ 425 static void addAsyncWrite(AsyncWrite *pWrite){ 426 /* We must hold the queue mutex in order to modify the queue pointers */ 427 pthread_mutex_lock(&async.queueMutex); 428 429 /* Add the record to the end of the write-op queue */ 430 assert( !pWrite->pNext ); 431 if( async.pQueueLast ){ 432 assert( async.pQueueFirst ); 433 async.pQueueLast->pNext = pWrite; 434 }else{ 435 async.pQueueFirst = pWrite; 436 } 437 async.pQueueLast = pWrite; 438 ASYNC_TRACE(("PUSH %p (%s %s %d)\n", pWrite, azOpcodeName[pWrite->op], 439 pWrite->pFileData ? pWrite->pFileData->zName : "-", pWrite->iOffset)); 440 441 if( pWrite->op==ASYNC_CLOSE ){ 442 async.nFile--; 443 } 444 445 /* Drop the queue mutex */ 446 pthread_mutex_unlock(&async.queueMutex); 447 448 /* The writer thread might have been idle because there was nothing 449 ** on the write-op queue for it to do. So wake it up. */ 450 pthread_cond_signal(&async.queueSignal); 451 } 452 453 /* 454 ** Increment async.nFile in a thread-safe manner. 455 */ 456 static void incrOpenFileCount(){ 457 /* We must hold the queue mutex in order to modify async.nFile */ 458 pthread_mutex_lock(&async.queueMutex); 459 if( async.nFile==0 ){ 460 async.ioError = SQLITE_OK; 461 } 462 async.nFile++; 463 pthread_mutex_unlock(&async.queueMutex); 464 } 465 466 /* 467 ** This is a utility function to allocate and populate a new AsyncWrite 468 ** structure and insert it (via addAsyncWrite() ) into the global list. 469 */ 470 static int addNewAsyncWrite( 471 AsyncFileData *pFileData, 472 int op, 473 i64 iOffset, 474 int nByte, 475 const char *zByte 476 ){ 477 AsyncWrite *p; 478 if( op!=ASYNC_CLOSE && async.ioError ){ 479 return async.ioError; 480 } 481 p = sqlite3_malloc(sizeof(AsyncWrite) + (zByte?nByte:0)); 482 if( !p ){ 483 /* The upper layer does not expect operations like OsWrite() to 484 ** return SQLITE_NOMEM. This is partly because under normal conditions 485 ** SQLite is required to do rollback without calling malloc(). So 486 ** if malloc() fails here, treat it as an I/O error. The above 487 ** layer knows how to handle that. 488 */ 489 return SQLITE_IOERR; 490 } 491 p->op = op; 492 p->iOffset = iOffset; 493 p->nByte = nByte; 494 p->pFileData = pFileData; 495 p->pNext = 0; 496 if( zByte ){ 497 p->zBuf = (char *)&p[1]; 498 memcpy(p->zBuf, zByte, nByte); 499 }else{ 500 p->zBuf = 0; 501 } 502 addAsyncWrite(p); 503 return SQLITE_OK; 504 } 505 506 /* 507 ** Close the file. This just adds an entry to the write-op list, the file is 508 ** not actually closed. 509 */ 510 static int asyncClose(sqlite3_file *pFile){ 511 AsyncFileData *p = ((AsyncFile *)pFile)->pData; 512 513 /* Unlock the file, if it is locked */ 514 pthread_mutex_lock(&async.lockMutex); 515 p->lock.eLock = 0; 516 pthread_mutex_unlock(&async.lockMutex); 517 518 addAsyncWrite(&p->close); 519 return SQLITE_OK; 520 } 521 522 /* 523 ** Implementation of sqlite3OsWrite() for asynchronous files. Instead of 524 ** writing to the underlying file, this function adds an entry to the end of 525 ** the global AsyncWrite list. Either SQLITE_OK or SQLITE_NOMEM may be 526 ** returned. 527 */ 528 static int asyncWrite(sqlite3_file *pFile, const void *pBuf, int amt, i64 iOff){ 529 AsyncFileData *p = ((AsyncFile *)pFile)->pData; 530 return addNewAsyncWrite(p, ASYNC_WRITE, iOff, amt, pBuf); 531 } 532 533 /* 534 ** Read data from the file. First we read from the filesystem, then adjust 535 ** the contents of the buffer based on ASYNC_WRITE operations in the 536 ** write-op queue. 537 ** 538 ** This method holds the mutex from start to finish. 539 */ 540 static int asyncRead(sqlite3_file *pFile, void *zOut, int iAmt, i64 iOffset){ 541 AsyncFileData *p = ((AsyncFile *)pFile)->pData; 542 int rc = SQLITE_OK; 543 i64 filesize; 544 int nRead; 545 sqlite3_file *pBase = p->pBaseRead; 546 547 /* Grab the write queue mutex for the duration of the call */ 548 pthread_mutex_lock(&async.queueMutex); 549 550 /* If an I/O error has previously occurred in this virtual file 551 ** system, then all subsequent operations fail. 552 */ 553 if( async.ioError!=SQLITE_OK ){ 554 rc = async.ioError; 555 goto asyncread_out; 556 } 557 558 if( pBase->pMethods ){ 559 rc = sqlite3OsFileSize(pBase, &filesize); 560 if( rc!=SQLITE_OK ){ 561 goto asyncread_out; 562 } 563 nRead = MIN(filesize - iOffset, iAmt); 564 if( nRead>0 ){ 565 rc = sqlite3OsRead(pBase, zOut, nRead, iOffset); 566 ASYNC_TRACE(("READ %s %d bytes at %d\n", p->zName, nRead, iOffset)); 567 } 568 } 569 570 if( rc==SQLITE_OK ){ 571 AsyncWrite *pWrite; 572 char *zName = p->zName; 573 574 for(pWrite=async.pQueueFirst; pWrite; pWrite = pWrite->pNext){ 575 if( pWrite->op==ASYNC_WRITE && pWrite->pFileData->zName==zName ){ 576 int iBeginOut = (pWrite->iOffset-iOffset); 577 int iBeginIn = -iBeginOut; 578 int nCopy; 579 580 if( iBeginIn<0 ) iBeginIn = 0; 581 if( iBeginOut<0 ) iBeginOut = 0; 582 nCopy = MIN(pWrite->nByte-iBeginIn, iAmt-iBeginOut); 583 584 if( nCopy>0 ){ 585 memcpy(&((char *)zOut)[iBeginOut], &pWrite->zBuf[iBeginIn], nCopy); 586 ASYNC_TRACE(("OVERREAD %d bytes at %d\n", nCopy, iBeginOut+iOffset)); 587 } 588 } 589 } 590 } 591 592 asyncread_out: 593 pthread_mutex_unlock(&async.queueMutex); 594 return rc; 595 } 596 597 /* 598 ** Truncate the file to nByte bytes in length. This just adds an entry to 599 ** the write-op list, no IO actually takes place. 600 */ 601 static int asyncTruncate(sqlite3_file *pFile, i64 nByte){ 602 AsyncFileData *p = ((AsyncFile *)pFile)->pData; 603 return addNewAsyncWrite(p, ASYNC_TRUNCATE, nByte, 0, 0); 604 } 605 606 /* 607 ** Sync the file. This just adds an entry to the write-op list, the 608 ** sync() is done later by sqlite3_async_flush(). 609 */ 610 static int asyncSync(sqlite3_file *pFile, int flags){ 611 AsyncFileData *p = ((AsyncFile *)pFile)->pData; 612 return addNewAsyncWrite(p, ASYNC_SYNC, 0, flags, 0); 613 } 614 615 /* 616 ** Read the size of the file. First we read the size of the file system 617 ** entry, then adjust for any ASYNC_WRITE or ASYNC_TRUNCATE operations 618 ** currently in the write-op list. 619 ** 620 ** This method holds the mutex from start to finish. 621 */ 622 int asyncFileSize(sqlite3_file *pFile, i64 *piSize){ 623 AsyncFileData *p = ((AsyncFile *)pFile)->pData; 624 int rc = SQLITE_OK; 625 i64 s = 0; 626 sqlite3_file *pBase; 627 628 pthread_mutex_lock(&async.queueMutex); 629 630 /* Read the filesystem size from the base file. If pBaseRead is NULL, this 631 ** means the file hasn't been opened yet. In this case all relevant data 632 ** must be in the write-op queue anyway, so we can omit reading from the 633 ** file-system. 634 */ 635 pBase = p->pBaseRead; 636 if( pBase->pMethods ){ 637 rc = sqlite3OsFileSize(pBase, &s); 638 } 639 640 if( rc==SQLITE_OK ){ 641 AsyncWrite *pWrite; 642 for(pWrite=async.pQueueFirst; pWrite; pWrite = pWrite->pNext){ 643 if( pWrite->op==ASYNC_DELETE && strcmp(p->zName, pWrite->zBuf)==0 ){ 644 s = 0; 645 }else if( pWrite->pFileData && pWrite->pFileData->zName==p->zName){ 646 switch( pWrite->op ){ 647 case ASYNC_WRITE: 648 s = MAX(pWrite->iOffset + (i64)(pWrite->nByte), s); 649 break; 650 case ASYNC_TRUNCATE: 651 s = MIN(s, pWrite->iOffset); 652 break; 653 } 654 } 655 } 656 *piSize = s; 657 } 658 pthread_mutex_unlock(&async.queueMutex); 659 return rc; 660 } 661 662 /* 663 ** Lock or unlock the actual file-system entry. 664 */ 665 static int getFileLock(AsyncLock *pLock){ 666 int rc = SQLITE_OK; 667 AsyncFileLock *pIter; 668 int eRequired = 0; 669 670 if( pLock->pFile ){ 671 for(pIter=pLock->pList; pIter; pIter=pIter->pNext){ 672 assert(pIter->eAsyncLock>=pIter->eLock); 673 if( pIter->eAsyncLock>eRequired ){ 674 eRequired = pIter->eAsyncLock; 675 assert(eRequired>=0 && eRequired<=SQLITE_LOCK_EXCLUSIVE); 676 } 677 } 678 679 if( eRequired>pLock->eLock ){ 680 rc = sqlite3OsLock(pLock->pFile, eRequired); 681 if( rc==SQLITE_OK ){ 682 pLock->eLock = eRequired; 683 } 684 } 685 else if( eRequired<pLock->eLock && eRequired<=SQLITE_LOCK_SHARED ){ 686 rc = sqlite3OsUnlock(pLock->pFile, eRequired); 687 if( rc==SQLITE_OK ){ 688 pLock->eLock = eRequired; 689 } 690 } 691 } 692 693 return rc; 694 } 695 696 /* 697 ** The following two methods - asyncLock() and asyncUnlock() - are used 698 ** to obtain and release locks on database files opened with the 699 ** asynchronous backend. 700 */ 701 static int asyncLock(sqlite3_file *pFile, int eLock){ 702 int rc = SQLITE_OK; 703 AsyncFileData *p = ((AsyncFile *)pFile)->pData; 704 705 pthread_mutex_lock(&async.lockMutex); 706 if( p->lock.eLock<eLock ){ 707 AsyncLock *pLock; 708 AsyncFileLock *pIter; 709 pLock = (AsyncLock *)sqlite3HashFind(&async.aLock, p->zName, p->nName); 710 assert(pLock && pLock->pList); 711 for(pIter=pLock->pList; pIter; pIter=pIter->pNext){ 712 if( pIter!=&p->lock && ( 713 (eLock==SQLITE_LOCK_EXCLUSIVE && pIter->eLock>=SQLITE_LOCK_SHARED) || 714 (eLock==SQLITE_LOCK_PENDING && pIter->eLock>=SQLITE_LOCK_RESERVED) || 715 (eLock==SQLITE_LOCK_RESERVED && pIter->eLock>=SQLITE_LOCK_RESERVED) || 716 (eLock==SQLITE_LOCK_SHARED && pIter->eLock>=SQLITE_LOCK_PENDING) 717 )){ 718 rc = SQLITE_BUSY; 719 } 720 } 721 if( rc==SQLITE_OK ){ 722 p->lock.eLock = eLock; 723 p->lock.eAsyncLock = MAX(p->lock.eAsyncLock, eLock); 724 } 725 assert(p->lock.eAsyncLock>=p->lock.eLock); 726 if( rc==SQLITE_OK ){ 727 rc = getFileLock(pLock); 728 } 729 } 730 pthread_mutex_unlock(&async.lockMutex); 731 732 ASYNC_TRACE(("LOCK %d (%s) rc=%d\n", eLock, p->zName, rc)); 733 return rc; 734 } 735 static int asyncUnlock(sqlite3_file *pFile, int eLock){ 736 AsyncFileData *p = ((AsyncFile *)pFile)->pData; 737 AsyncFileLock *pLock = &p->lock; 738 pthread_mutex_lock(&async.lockMutex); 739 pLock->eLock = MIN(pLock->eLock, eLock); 740 pthread_mutex_unlock(&async.lockMutex); 741 return addNewAsyncWrite(p, ASYNC_UNLOCK, 0, eLock, 0); 742 } 743 744 /* 745 ** This function is called when the pager layer first opens a database file 746 ** and is checking for a hot-journal. 747 */ 748 static int asyncCheckReservedLock(sqlite3_file *pFile){ 749 int ret = 0; 750 AsyncFileLock *pIter; 751 AsyncLock *pLock; 752 AsyncFileData *p = ((AsyncFile *)pFile)->pData; 753 754 pthread_mutex_lock(&async.lockMutex); 755 pLock = (AsyncLock *)sqlite3HashFind(&async.aLock, p->zName, p->nName); 756 for(pIter=pLock->pList; pIter; pIter=pIter->pNext){ 757 if( pIter->eLock>=SQLITE_LOCK_RESERVED ){ 758 ret = 1; 759 } 760 } 761 pthread_mutex_unlock(&async.lockMutex); 762 763 ASYNC_TRACE(("CHECK-LOCK %d (%s)\n", ret, p->zName)); 764 return ret; 765 } 766 767 /* 768 ** This is a no-op, as the asynchronous backend does not support locking. 769 */ 770 static int asyncFileControl(sqlite3_file *id, int op, void *pArg){ 771 switch( op ){ 772 case SQLITE_FCNTL_LOCKSTATE: { 773 pthread_mutex_lock(&async.lockMutex); 774 *(int*)pArg = ((AsyncFile*)id)->pData->lock.eLock; 775 pthread_mutex_unlock(&async.lockMutex); 776 return SQLITE_OK; 777 } 778 } 779 return SQLITE_ERROR; 780 } 781 782 /* 783 ** Return the device characteristics and sector-size of the device. It 784 ** is not tricky to implement these correctly, as this backend might 785 ** not have an open file handle at this point. 786 */ 787 static int asyncSectorSize(sqlite3_file *pFile){ 788 return 512; 789 } 790 static int asyncDeviceCharacteristics(sqlite3_file *pFile){ 791 return 0; 792 } 793 794 static int unlinkAsyncFile(AsyncFileData *pData){ 795 AsyncLock *pLock; 796 AsyncFileLock **ppIter; 797 int rc = SQLITE_OK; 798 799 pLock = sqlite3HashFind(&async.aLock, pData->zName, pData->nName); 800 for(ppIter=&pLock->pList; *ppIter; ppIter=&((*ppIter)->pNext)){ 801 if( (*ppIter)==&pData->lock ){ 802 *ppIter = pData->lock.pNext; 803 break; 804 } 805 } 806 if( !pLock->pList ){ 807 if( pLock->pFile ){ 808 sqlite3OsClose(pLock->pFile); 809 } 810 sqlite3_free(pLock); 811 sqlite3HashInsert(&async.aLock, pData->zName, pData->nName, 0); 812 if( !sqliteHashFirst(&async.aLock) ){ 813 sqlite3HashClear(&async.aLock); 814 } 815 }else{ 816 rc = getFileLock(pLock); 817 } 818 819 return rc; 820 } 821 822 /* 823 ** Open a file. 824 */ 825 static int asyncOpen( 826 sqlite3_vfs *pAsyncVfs, 827 const char *zName, 828 sqlite3_file *pFile, 829 int flags, 830 int *pOutFlags 831 ){ 832 static sqlite3_io_methods async_methods = { 833 1, /* iVersion */ 834 asyncClose, /* xClose */ 835 asyncRead, /* xRead */ 836 asyncWrite, /* xWrite */ 837 asyncTruncate, /* xTruncate */ 838 asyncSync, /* xSync */ 839 asyncFileSize, /* xFileSize */ 840 asyncLock, /* xLock */ 841 asyncUnlock, /* xUnlock */ 842 asyncCheckReservedLock, /* xCheckReservedLock */ 843 asyncFileControl, /* xFileControl */ 844 asyncSectorSize, /* xSectorSize */ 845 asyncDeviceCharacteristics /* xDeviceCharacteristics */ 846 }; 847 848 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData; 849 AsyncFile *p = (AsyncFile *)pFile; 850 int nName = strlen(zName)+1; 851 int rc = SQLITE_OK; 852 int nByte; 853 AsyncFileData *pData; 854 AsyncLock *pLock = 0; 855 int isExclusive = (flags&SQLITE_OPEN_EXCLUSIVE); 856 857 nByte = ( 858 sizeof(AsyncFileData) + /* AsyncFileData structure */ 859 2 * pVfs->szOsFile + /* AsyncFileData.pBaseRead and pBaseWrite */ 860 nName /* AsyncFileData.zName */ 861 ); 862 pData = sqlite3_malloc(nByte); 863 if( !pData ){ 864 return SQLITE_NOMEM; 865 } 866 memset(pData, 0, nByte); 867 pData->zName = (char *)&pData[1]; 868 pData->nName = nName; 869 pData->pBaseRead = (sqlite3_file *)&pData->zName[nName]; 870 pData->pBaseWrite = (sqlite3_file *)&pData->zName[nName+pVfs->szOsFile]; 871 pData->close.pFileData = pData; 872 pData->close.op = ASYNC_CLOSE; 873 memcpy(pData->zName, zName, nName); 874 875 if( !isExclusive ){ 876 rc = sqlite3OsOpen(pVfs, zName, pData->pBaseRead, flags, pOutFlags); 877 if( rc==SQLITE_OK && ((*pOutFlags)&SQLITE_OPEN_READWRITE) ){ 878 rc = sqlite3OsOpen(pVfs, zName, pData->pBaseWrite, flags, 0); 879 } 880 } 881 882 pthread_mutex_lock(&async.lockMutex); 883 884 if( rc==SQLITE_OK ){ 885 pLock = sqlite3HashFind(&async.aLock, pData->zName, pData->nName); 886 if( !pLock ){ 887 pLock = sqlite3MallocZero(pVfs->szOsFile + sizeof(AsyncLock)); 888 if( pLock ){ 889 AsyncLock *pDelete; 890 #ifdef ENABLE_FILE_LOCKING 891 if( flags&SQLITE_OPEN_MAIN_DB ){ 892 pLock->pFile = (sqlite3_file *)&pLock[1]; 893 rc = sqlite3OsOpen(pVfs, zName, pLock->pFile, flags, 0); 894 if( rc!=SQLITE_OK ){ 895 sqlite3_free(pLock); 896 pLock = 0; 897 } 898 } 899 #endif 900 pDelete = sqlite3HashInsert( 901 &async.aLock, pData->zName, pData->nName, (void *)pLock 902 ); 903 if( pDelete ){ 904 rc = SQLITE_NOMEM; 905 sqlite3_free(pLock); 906 } 907 }else{ 908 rc = SQLITE_NOMEM; 909 } 910 } 911 } 912 913 if( rc==SQLITE_OK ){ 914 HashElem *pElem; 915 p->pMethod = &async_methods; 916 p->pData = pData; 917 incrOpenFileCount(); 918 919 /* Link AsyncFileData.lock into the linked list of 920 ** AsyncFileLock structures for this file. 921 */ 922 pData->lock.pNext = pLock->pList; 923 pLock->pList = &pData->lock; 924 925 pElem = sqlite3HashFindElem(&async.aLock, pData->zName, pData->nName); 926 pData->zName = (char *)sqliteHashKey(pElem); 927 }else{ 928 sqlite3OsClose(pData->pBaseRead); 929 sqlite3OsClose(pData->pBaseWrite); 930 sqlite3_free(pData); 931 } 932 933 pthread_mutex_unlock(&async.lockMutex); 934 935 if( rc==SQLITE_OK && isExclusive ){ 936 rc = addNewAsyncWrite(pData, ASYNC_OPENEXCLUSIVE, (i64)flags, 0, 0); 937 if( rc==SQLITE_OK ){ 938 if( pOutFlags ) *pOutFlags = flags; 939 }else{ 940 pthread_mutex_lock(&async.lockMutex); 941 unlinkAsyncFile(pData); 942 pthread_mutex_unlock(&async.lockMutex); 943 sqlite3_free(pData); 944 } 945 } 946 return rc; 947 } 948 949 /* 950 ** Implementation of sqlite3OsDelete. Add an entry to the end of the 951 ** write-op queue to perform the delete. 952 */ 953 static int asyncDelete(sqlite3_vfs *pAsyncVfs, const char *z, int syncDir){ 954 return addNewAsyncWrite(0, ASYNC_DELETE, syncDir, strlen(z)+1, z); 955 } 956 957 /* 958 ** Implementation of sqlite3OsAccess. This method holds the mutex from 959 ** start to finish. 960 */ 961 static int asyncAccess(sqlite3_vfs *pAsyncVfs, const char *zName, int flags){ 962 int ret; 963 AsyncWrite *p; 964 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData; 965 966 assert(flags==SQLITE_ACCESS_READWRITE 967 || flags==SQLITE_ACCESS_READ 968 || flags==SQLITE_ACCESS_EXISTS 969 ); 970 971 pthread_mutex_lock(&async.queueMutex); 972 ret = sqlite3OsAccess(pVfs, zName, flags); 973 if( flags==SQLITE_ACCESS_EXISTS ){ 974 for(p=async.pQueueFirst; p; p = p->pNext){ 975 if( p->op==ASYNC_DELETE && 0==strcmp(p->zBuf, zName) ){ 976 ret = 0; 977 }else if( p->op==ASYNC_OPENEXCLUSIVE 978 && 0==strcmp(p->pFileData->zName, zName) 979 ){ 980 ret = 1; 981 } 982 } 983 } 984 ASYNC_TRACE(("ACCESS(%s): %s = %d\n", 985 flags==SQLITE_ACCESS_READWRITE?"read-write": 986 flags==SQLITE_ACCESS_READ?"read":"exists" 987 , zName, ret) 988 ); 989 pthread_mutex_unlock(&async.queueMutex); 990 return ret; 991 } 992 993 static int asyncGetTempName(sqlite3_vfs *pAsyncVfs, char *zBufOut){ 994 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData; 995 return pVfs->xGetTempName(pVfs, zBufOut); 996 } 997 998 /* 999 ** Fill in zPathOut with the full path to the file identified by zPath. 1000 */ 1001 static int asyncFullPathname( 1002 sqlite3_vfs *pAsyncVfs, 1003 const char *zPath, 1004 char *zPathOut 1005 ){ 1006 int rc; 1007 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData; 1008 rc = sqlite3OsFullPathname(pVfs, zPath, zPathOut); 1009 1010 /* Because of the way intra-process file locking works, this backend 1011 ** needs to return a canonical path. The following block assumes the 1012 ** file-system uses unix style paths. 1013 */ 1014 if( rc==SQLITE_OK ){ 1015 int iIn; 1016 int iOut = 0; 1017 int nPathOut = strlen(zPathOut); 1018 1019 for(iIn=0; iIn<nPathOut; iIn++){ 1020 1021 /* Replace any occurences of "//" with "/" */ 1022 if( iIn<=(nPathOut-2) && zPathOut[iIn]=='/' && zPathOut[iIn+1]=='/' 1023 ){ 1024 continue; 1025 } 1026 1027 /* Replace any occurences of "/./" with "/" */ 1028 if( iIn<=(nPathOut-3) 1029 && zPathOut[iIn]=='/' && zPathOut[iIn+1]=='.' && zPathOut[iIn+2]=='/' 1030 ){ 1031 iIn++; 1032 continue; 1033 } 1034 1035 /* Replace any occurences of "<path-component>/../" with "" */ 1036 if( iOut>0 && iIn<=(nPathOut-4) 1037 && zPathOut[iIn]=='/' && zPathOut[iIn+1]=='.' 1038 && zPathOut[iIn+2]=='.' && zPathOut[iIn+3]=='/' 1039 ){ 1040 iIn += 3; 1041 iOut--; 1042 for( ; iOut>0 && zPathOut[iOut-1]!='/'; iOut--); 1043 continue; 1044 } 1045 1046 zPathOut[iOut++] = zPathOut[iIn]; 1047 } 1048 zPathOut[iOut] = '\0'; 1049 } 1050 1051 return rc; 1052 } 1053 static void *asyncDlOpen(sqlite3_vfs *pAsyncVfs, const char *zPath){ 1054 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData; 1055 return pVfs->xDlOpen(pVfs, zPath); 1056 } 1057 static void asyncDlError(sqlite3_vfs *pAsyncVfs, int nByte, char *zErrMsg){ 1058 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData; 1059 pVfs->xDlError(pVfs, nByte, zErrMsg); 1060 } 1061 static void *asyncDlSym( 1062 sqlite3_vfs *pAsyncVfs, 1063 void *pHandle, 1064 const char *zSymbol 1065 ){ 1066 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData; 1067 return pVfs->xDlSym(pVfs, pHandle, zSymbol); 1068 } 1069 static void asyncDlClose(sqlite3_vfs *pAsyncVfs, void *pHandle){ 1070 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData; 1071 pVfs->xDlClose(pVfs, pHandle); 1072 } 1073 static int asyncRandomness(sqlite3_vfs *pAsyncVfs, int nByte, char *zBufOut){ 1074 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData; 1075 return pVfs->xRandomness(pVfs, nByte, zBufOut); 1076 } 1077 static int asyncSleep(sqlite3_vfs *pAsyncVfs, int nMicro){ 1078 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData; 1079 return pVfs->xSleep(pVfs, nMicro); 1080 } 1081 static int asyncCurrentTime(sqlite3_vfs *pAsyncVfs, double *pTimeOut){ 1082 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData; 1083 return pVfs->xCurrentTime(pVfs, pTimeOut); 1084 } 1085 1086 static sqlite3_vfs async_vfs = { 1087 1, /* iVersion */ 1088 sizeof(AsyncFile), /* szOsFile */ 1089 0, /* mxPathname */ 1090 0, /* pNext */ 1091 "async", /* zName */ 1092 0, /* pAppData */ 1093 asyncOpen, /* xOpen */ 1094 asyncDelete, /* xDelete */ 1095 asyncAccess, /* xAccess */ 1096 asyncGetTempName, /* xGetTempName */ 1097 asyncFullPathname, /* xFullPathname */ 1098 asyncDlOpen, /* xDlOpen */ 1099 asyncDlError, /* xDlError */ 1100 asyncDlSym, /* xDlSym */ 1101 asyncDlClose, /* xDlClose */ 1102 asyncRandomness, /* xDlError */ 1103 asyncSleep, /* xDlSym */ 1104 asyncCurrentTime /* xDlClose */ 1105 }; 1106 1107 /* 1108 ** Call this routine to enable or disable the 1109 ** asynchronous IO features implemented in this file. 1110 ** 1111 ** This routine is not even remotely threadsafe. Do not call 1112 ** this routine while any SQLite database connections are open. 1113 */ 1114 static void asyncEnable(int enable){ 1115 if( enable ){ 1116 if( !async_vfs.pAppData ){ 1117 static int hashTableInit = 0; 1118 async_vfs.pAppData = (void *)sqlite3_vfs_find(0); 1119 async_vfs.mxPathname = ((sqlite3_vfs *)async_vfs.pAppData)->mxPathname; 1120 sqlite3_vfs_register(&async_vfs, 1); 1121 if( !hashTableInit ){ 1122 sqlite3HashInit(&async.aLock, SQLITE_HASH_BINARY, 1); 1123 hashTableInit = 1; 1124 } 1125 } 1126 }else{ 1127 if( async_vfs.pAppData ){ 1128 sqlite3_vfs_unregister(&async_vfs); 1129 async_vfs.pAppData = 0; 1130 } 1131 } 1132 } 1133 1134 /* 1135 ** This procedure runs in a separate thread, reading messages off of the 1136 ** write queue and processing them one by one. 1137 ** 1138 ** If async.writerHaltNow is true, then this procedure exits 1139 ** after processing a single message. 1140 ** 1141 ** If async.writerHaltWhenIdle is true, then this procedure exits when 1142 ** the write queue is empty. 1143 ** 1144 ** If both of the above variables are false, this procedure runs 1145 ** indefinately, waiting for operations to be added to the write queue 1146 ** and processing them in the order in which they arrive. 1147 ** 1148 ** An artifical delay of async.ioDelay milliseconds is inserted before 1149 ** each write operation in order to simulate the effect of a slow disk. 1150 ** 1151 ** Only one instance of this procedure may be running at a time. 1152 */ 1153 static void *asyncWriterThread(void *NotUsed){ 1154 sqlite3_vfs *pVfs = (sqlite3_vfs *)(async_vfs.pAppData); 1155 AsyncWrite *p = 0; 1156 int rc = SQLITE_OK; 1157 int holdingMutex = 0; 1158 1159 if( pthread_mutex_trylock(&async.writerMutex) ){ 1160 return 0; 1161 } 1162 while( async.writerHaltNow==0 ){ 1163 int doNotFree = 0; 1164 sqlite3_file *pBase = 0; 1165 1166 if( !holdingMutex ){ 1167 pthread_mutex_lock(&async.queueMutex); 1168 } 1169 while( (p = async.pQueueFirst)==0 ){ 1170 pthread_cond_broadcast(&async.emptySignal); 1171 if( async.writerHaltWhenIdle ){ 1172 pthread_mutex_unlock(&async.queueMutex); 1173 break; 1174 }else{ 1175 ASYNC_TRACE(("IDLE\n")); 1176 pthread_cond_wait(&async.queueSignal, &async.queueMutex); 1177 ASYNC_TRACE(("WAKEUP\n")); 1178 } 1179 } 1180 if( p==0 ) break; 1181 holdingMutex = 1; 1182 1183 /* Right now this thread is holding the mutex on the write-op queue. 1184 ** Variable 'p' points to the first entry in the write-op queue. In 1185 ** the general case, we hold on to the mutex for the entire body of 1186 ** the loop. 1187 ** 1188 ** However in the cases enumerated below, we relinquish the mutex, 1189 ** perform the IO, and then re-request the mutex before removing 'p' from 1190 ** the head of the write-op queue. The idea is to increase concurrency with 1191 ** sqlite threads. 1192 ** 1193 ** * An ASYNC_CLOSE operation. 1194 ** * An ASYNC_OPENEXCLUSIVE operation. For this one, we relinquish 1195 ** the mutex, call the underlying xOpenExclusive() function, then 1196 ** re-aquire the mutex before seting the AsyncFile.pBaseRead 1197 ** variable. 1198 ** * ASYNC_SYNC and ASYNC_WRITE operations, if 1199 ** SQLITE_ASYNC_TWO_FILEHANDLES was set at compile time and two 1200 ** file-handles are open for the particular file being "synced". 1201 */ 1202 if( async.ioError!=SQLITE_OK && p->op!=ASYNC_CLOSE ){ 1203 p->op = ASYNC_NOOP; 1204 } 1205 if( p->pFileData ){ 1206 pBase = p->pFileData->pBaseWrite; 1207 if( 1208 p->op==ASYNC_CLOSE || 1209 p->op==ASYNC_OPENEXCLUSIVE || 1210 (pBase->pMethods && (p->op==ASYNC_SYNC || p->op==ASYNC_WRITE) ) 1211 ){ 1212 pthread_mutex_unlock(&async.queueMutex); 1213 holdingMutex = 0; 1214 } 1215 if( !pBase->pMethods ){ 1216 pBase = p->pFileData->pBaseRead; 1217 } 1218 } 1219 1220 switch( p->op ){ 1221 case ASYNC_NOOP: 1222 break; 1223 1224 case ASYNC_WRITE: 1225 assert( pBase ); 1226 ASYNC_TRACE(("WRITE %s %d bytes at %d\n", 1227 p->pFileData->zName, p->nByte, p->iOffset)); 1228 rc = sqlite3OsWrite(pBase, (void *)(p->zBuf), p->nByte, p->iOffset); 1229 break; 1230 1231 case ASYNC_SYNC: 1232 assert( pBase ); 1233 ASYNC_TRACE(("SYNC %s\n", p->pFileData->zName)); 1234 rc = sqlite3OsSync(pBase, p->nByte); 1235 break; 1236 1237 case ASYNC_TRUNCATE: 1238 assert( pBase ); 1239 ASYNC_TRACE(("TRUNCATE %s to %d bytes\n", 1240 p->pFileData->zName, p->iOffset)); 1241 rc = sqlite3OsTruncate(pBase, p->iOffset); 1242 break; 1243 1244 case ASYNC_CLOSE: { 1245 AsyncFileData *pData = p->pFileData; 1246 ASYNC_TRACE(("CLOSE %s\n", p->pFileData->zName)); 1247 sqlite3OsClose(pData->pBaseWrite); 1248 sqlite3OsClose(pData->pBaseRead); 1249 1250 /* Unlink AsyncFileData.lock from the linked list of AsyncFileLock 1251 ** structures for this file. Obtain the async.lockMutex mutex 1252 ** before doing so. 1253 */ 1254 pthread_mutex_lock(&async.lockMutex); 1255 rc = unlinkAsyncFile(pData); 1256 pthread_mutex_unlock(&async.lockMutex); 1257 1258 async.pQueueFirst = p->pNext; 1259 sqlite3_free(pData); 1260 doNotFree = 1; 1261 break; 1262 } 1263 1264 case ASYNC_UNLOCK: { 1265 AsyncLock *pLock; 1266 AsyncFileData *pData = p->pFileData; 1267 int eLock = p->nByte; 1268 pthread_mutex_lock(&async.lockMutex); 1269 pData->lock.eAsyncLock = MIN( 1270 pData->lock.eAsyncLock, MAX(pData->lock.eLock, eLock) 1271 ); 1272 assert(pData->lock.eAsyncLock>=pData->lock.eLock); 1273 pLock = sqlite3HashFind(&async.aLock, pData->zName, pData->nName); 1274 rc = getFileLock(pLock); 1275 pthread_mutex_unlock(&async.lockMutex); 1276 break; 1277 } 1278 1279 case ASYNC_DELETE: 1280 ASYNC_TRACE(("DELETE %s\n", p->zBuf)); 1281 rc = sqlite3OsDelete(pVfs, p->zBuf, (int)p->iOffset); 1282 break; 1283 1284 case ASYNC_OPENEXCLUSIVE: { 1285 int flags = (int)p->iOffset; 1286 AsyncFileData *pData = p->pFileData; 1287 ASYNC_TRACE(("OPEN %s flags=%d\n", p->zBuf, (int)p->iOffset)); 1288 assert(pData->pBaseRead->pMethods==0 && pData->pBaseWrite->pMethods==0); 1289 rc = sqlite3OsOpen(pVfs, pData->zName, pData->pBaseRead, flags, 0); 1290 assert( holdingMutex==0 ); 1291 pthread_mutex_lock(&async.queueMutex); 1292 holdingMutex = 1; 1293 break; 1294 } 1295 1296 default: assert(!"Illegal value for AsyncWrite.op"); 1297 } 1298 1299 /* If we didn't hang on to the mutex during the IO op, obtain it now 1300 ** so that the AsyncWrite structure can be safely removed from the 1301 ** global write-op queue. 1302 */ 1303 if( !holdingMutex ){ 1304 pthread_mutex_lock(&async.queueMutex); 1305 holdingMutex = 1; 1306 } 1307 /* ASYNC_TRACE(("UNLINK %p\n", p)); */ 1308 if( p==async.pQueueLast ){ 1309 async.pQueueLast = 0; 1310 } 1311 if( !doNotFree ){ 1312 async.pQueueFirst = p->pNext; 1313 sqlite3_free(p); 1314 } 1315 assert( holdingMutex ); 1316 1317 /* An IO error has occured. We cannot report the error back to the 1318 ** connection that requested the I/O since the error happened 1319 ** asynchronously. The connection has already moved on. There 1320 ** really is nobody to report the error to. 1321 ** 1322 ** The file for which the error occured may have been a database or 1323 ** journal file. Regardless, none of the currently queued operations 1324 ** associated with the same database should now be performed. Nor should 1325 ** any subsequently requested IO on either a database or journal file 1326 ** handle for the same database be accepted until the main database 1327 ** file handle has been closed and reopened. 1328 ** 1329 ** Furthermore, no further IO should be queued or performed on any file 1330 ** handle associated with a database that may have been part of a 1331 ** multi-file transaction that included the database associated with 1332 ** the IO error (i.e. a database ATTACHed to the same handle at some 1333 ** point in time). 1334 */ 1335 if( rc!=SQLITE_OK ){ 1336 async.ioError = rc; 1337 } 1338 1339 if( async.ioError && !async.pQueueFirst ){ 1340 pthread_mutex_lock(&async.lockMutex); 1341 if( 0==sqliteHashFirst(&async.aLock) ){ 1342 async.ioError = SQLITE_OK; 1343 } 1344 pthread_mutex_unlock(&async.lockMutex); 1345 } 1346 1347 /* Drop the queue mutex before continuing to the next write operation 1348 ** in order to give other threads a chance to work with the write queue. 1349 */ 1350 if( !async.pQueueFirst || !async.ioError ){ 1351 pthread_mutex_unlock(&async.queueMutex); 1352 holdingMutex = 0; 1353 if( async.ioDelay>0 ){ 1354 sqlite3OsSleep(pVfs, async.ioDelay); 1355 }else{ 1356 sched_yield(); 1357 } 1358 } 1359 } 1360 1361 pthread_mutex_unlock(&async.writerMutex); 1362 return 0; 1363 } 1364 1365 /************************************************************************** 1366 ** The remaining code defines a Tcl interface for testing the asynchronous 1367 ** IO implementation in this file. 1368 ** 1369 ** To adapt the code to a non-TCL environment, delete or comment out 1370 ** the code that follows. 1371 */ 1372 1373 /* 1374 ** sqlite3async_enable ?YES/NO? 1375 ** 1376 ** Enable or disable the asynchronous I/O backend. This command is 1377 ** not thread-safe. Do not call it while any database connections 1378 ** are open. 1379 */ 1380 static int testAsyncEnable( 1381 void * clientData, 1382 Tcl_Interp *interp, 1383 int objc, 1384 Tcl_Obj *CONST objv[] 1385 ){ 1386 if( objc!=1 && objc!=2 ){ 1387 Tcl_WrongNumArgs(interp, 1, objv, "?YES/NO?"); 1388 return TCL_ERROR; 1389 } 1390 if( objc==1 ){ 1391 Tcl_SetObjResult(interp, Tcl_NewBooleanObj(async_vfs.pAppData!=0)); 1392 }else{ 1393 int en; 1394 if( Tcl_GetBooleanFromObj(interp, objv[1], &en) ) return TCL_ERROR; 1395 asyncEnable(en); 1396 } 1397 return TCL_OK; 1398 } 1399 1400 /* 1401 ** sqlite3async_halt "now"|"idle"|"never" 1402 ** 1403 ** Set the conditions at which the writer thread will halt. 1404 */ 1405 static int testAsyncHalt( 1406 void * clientData, 1407 Tcl_Interp *interp, 1408 int objc, 1409 Tcl_Obj *CONST objv[] 1410 ){ 1411 const char *zCond; 1412 if( objc!=2 ){ 1413 Tcl_WrongNumArgs(interp, 1, objv, "\"now\"|\"idle\"|\"never\""); 1414 return TCL_ERROR; 1415 } 1416 zCond = Tcl_GetString(objv[1]); 1417 if( strcmp(zCond, "now")==0 ){ 1418 async.writerHaltNow = 1; 1419 pthread_cond_broadcast(&async.queueSignal); 1420 }else if( strcmp(zCond, "idle")==0 ){ 1421 async.writerHaltWhenIdle = 1; 1422 async.writerHaltNow = 0; 1423 pthread_cond_broadcast(&async.queueSignal); 1424 }else if( strcmp(zCond, "never")==0 ){ 1425 async.writerHaltWhenIdle = 0; 1426 async.writerHaltNow = 0; 1427 }else{ 1428 Tcl_AppendResult(interp, 1429 "should be one of: \"now\", \"idle\", or \"never\"", (char*)0); 1430 return TCL_ERROR; 1431 } 1432 return TCL_OK; 1433 } 1434 1435 /* 1436 ** sqlite3async_delay ?MS? 1437 ** 1438 ** Query or set the number of milliseconds of delay in the writer 1439 ** thread after each write operation. The default is 0. By increasing 1440 ** the memory delay we can simulate the effect of slow disk I/O. 1441 */ 1442 static int testAsyncDelay( 1443 void * clientData, 1444 Tcl_Interp *interp, 1445 int objc, 1446 Tcl_Obj *CONST objv[] 1447 ){ 1448 if( objc!=1 && objc!=2 ){ 1449 Tcl_WrongNumArgs(interp, 1, objv, "?MS?"); 1450 return TCL_ERROR; 1451 } 1452 if( objc==1 ){ 1453 Tcl_SetObjResult(interp, Tcl_NewIntObj(async.ioDelay)); 1454 }else{ 1455 int ioDelay; 1456 if( Tcl_GetIntFromObj(interp, objv[1], &ioDelay) ) return TCL_ERROR; 1457 async.ioDelay = ioDelay; 1458 } 1459 return TCL_OK; 1460 } 1461 1462 /* 1463 ** sqlite3async_start 1464 ** 1465 ** Start a new writer thread. 1466 */ 1467 static int testAsyncStart( 1468 void * clientData, 1469 Tcl_Interp *interp, 1470 int objc, 1471 Tcl_Obj *CONST objv[] 1472 ){ 1473 pthread_t x; 1474 int rc; 1475 rc = pthread_create(&x, 0, asyncWriterThread, 0); 1476 if( rc ){ 1477 Tcl_AppendResult(interp, "failed to create the thread", 0); 1478 return TCL_ERROR; 1479 } 1480 pthread_detach(x); 1481 return TCL_OK; 1482 } 1483 1484 /* 1485 ** sqlite3async_wait 1486 ** 1487 ** Wait for the current writer thread to terminate. 1488 ** 1489 ** If the current writer thread is set to run forever then this 1490 ** command would block forever. To prevent that, an error is returned. 1491 */ 1492 static int testAsyncWait( 1493 void * clientData, 1494 Tcl_Interp *interp, 1495 int objc, 1496 Tcl_Obj *CONST objv[] 1497 ){ 1498 int cnt = 10; 1499 if( async.writerHaltNow==0 && async.writerHaltWhenIdle==0 ){ 1500 Tcl_AppendResult(interp, "would block forever", (char*)0); 1501 return TCL_ERROR; 1502 } 1503 1504 while( cnt-- && !pthread_mutex_trylock(&async.writerMutex) ){ 1505 pthread_mutex_unlock(&async.writerMutex); 1506 sched_yield(); 1507 } 1508 if( cnt>=0 ){ 1509 ASYNC_TRACE(("WAIT\n")); 1510 pthread_mutex_lock(&async.queueMutex); 1511 pthread_cond_broadcast(&async.queueSignal); 1512 pthread_mutex_unlock(&async.queueMutex); 1513 pthread_mutex_lock(&async.writerMutex); 1514 pthread_mutex_unlock(&async.writerMutex); 1515 }else{ 1516 ASYNC_TRACE(("NO-WAIT\n")); 1517 } 1518 return TCL_OK; 1519 } 1520 1521 1522 #endif /* OS_UNIX and SQLITE_THREADSAFE */ 1523 1524 /* 1525 ** This routine registers the custom TCL commands defined in this 1526 ** module. This should be the only procedure visible from outside 1527 ** of this module. 1528 */ 1529 int Sqlitetestasync_Init(Tcl_Interp *interp){ 1530 #if OS_UNIX && SQLITE_THREADSAFE 1531 Tcl_CreateObjCommand(interp,"sqlite3async_enable",testAsyncEnable,0,0); 1532 Tcl_CreateObjCommand(interp,"sqlite3async_halt",testAsyncHalt,0,0); 1533 Tcl_CreateObjCommand(interp,"sqlite3async_delay",testAsyncDelay,0,0); 1534 Tcl_CreateObjCommand(interp,"sqlite3async_start",testAsyncStart,0,0); 1535 Tcl_CreateObjCommand(interp,"sqlite3async_wait",testAsyncWait,0,0); 1536 Tcl_LinkVar(interp, "sqlite3async_trace", 1537 (char*)&sqlite3async_trace, TCL_LINK_INT); 1538 #endif /* OS_UNIX and SQLITE_THREADSAFE */ 1539 return TCL_OK; 1540 } 1541