1 /* 2 ** 2004 May 22 3 ** 4 ** The author disclaims copyright to this source code. In place of 5 ** a legal notice, here is a blessing: 6 ** 7 ** May you do good and not evil. 8 ** May you find forgiveness for yourself and forgive others. 9 ** May you share freely, never taking more than you give. 10 ** 11 ****************************************************************************** 12 ** 13 ** This file contains the VFS implementation for unix-like operating systems 14 ** include Linux, MacOSX, *BSD, QNX, VxWorks, AIX, HPUX, and others. 15 ** 16 ** There are actually several different VFS implementations in this file. 17 ** The differences are in the way that file locking is done. The default 18 ** implementation uses Posix Advisory Locks. Alternative implementations 19 ** use flock(), dot-files, various proprietary locking schemas, or simply 20 ** skip locking all together. 21 ** 22 ** This source file is organized into divisions where the logic for various 23 ** subfunctions is contained within the appropriate division. PLEASE 24 ** KEEP THE STRUCTURE OF THIS FILE INTACT. New code should be placed 25 ** in the correct division and should be clearly labeled. 26 ** 27 ** The layout of divisions is as follows: 28 ** 29 ** * General-purpose declarations and utility functions. 30 ** * Unique file ID logic used by VxWorks. 31 ** * Various locking primitive implementations (all except proxy locking): 32 ** + for Posix Advisory Locks 33 ** + for no-op locks 34 ** + for dot-file locks 35 ** + for flock() locking 36 ** + for named semaphore locks (VxWorks only) 37 ** + for AFP filesystem locks (MacOSX only) 38 ** * sqlite3_file methods not associated with locking. 39 ** * Definitions of sqlite3_io_methods objects for all locking 40 ** methods plus "finder" functions for each locking method. 41 ** * sqlite3_vfs method implementations. 42 ** * Locking primitives for the proxy uber-locking-method. (MacOSX only) 43 ** * Definitions of sqlite3_vfs objects for all locking methods 44 ** plus implementations of sqlite3_os_init() and sqlite3_os_end(). 45 */ 46 #include "sqliteInt.h" 47 #if SQLITE_OS_UNIX /* This file is used on unix only */ 48 49 /* 50 ** There are various methods for file locking used for concurrency 51 ** control: 52 ** 53 ** 1. POSIX locking (the default), 54 ** 2. No locking, 55 ** 3. Dot-file locking, 56 ** 4. flock() locking, 57 ** 5. AFP locking (OSX only), 58 ** 6. Named POSIX semaphores (VXWorks only), 59 ** 7. proxy locking. (OSX only) 60 ** 61 ** Styles 4, 5, and 7 are only available of SQLITE_ENABLE_LOCKING_STYLE 62 ** is defined to 1. The SQLITE_ENABLE_LOCKING_STYLE also enables automatic 63 ** selection of the appropriate locking style based on the filesystem 64 ** where the database is located. 65 */ 66 #if !defined(SQLITE_ENABLE_LOCKING_STYLE) 67 # if defined(__APPLE__) 68 # define SQLITE_ENABLE_LOCKING_STYLE 1 69 # else 70 # define SQLITE_ENABLE_LOCKING_STYLE 0 71 # endif 72 #endif 73 74 /* 75 ** Define the OS_VXWORKS pre-processor macro to 1 if building on 76 ** vxworks, or 0 otherwise. 77 */ 78 #ifndef OS_VXWORKS 79 # if defined(__RTP__) || defined(_WRS_KERNEL) 80 # define OS_VXWORKS 1 81 # else 82 # define OS_VXWORKS 0 83 # endif 84 #endif 85 86 /* 87 ** These #defines should enable >2GB file support on Posix if the 88 ** underlying operating system supports it. If the OS lacks 89 ** large file support, these should be no-ops. 90 ** 91 ** Large file support can be disabled using the -DSQLITE_DISABLE_LFS switch 92 ** on the compiler command line. This is necessary if you are compiling 93 ** on a recent machine (ex: RedHat 7.2) but you want your code to work 94 ** on an older machine (ex: RedHat 6.0). If you compile on RedHat 7.2 95 ** without this option, LFS is enable. But LFS does not exist in the kernel 96 ** in RedHat 6.0, so the code won't work. Hence, for maximum binary 97 ** portability you should omit LFS. 98 ** 99 ** The previous paragraph was written in 2005. (This paragraph is written 100 ** on 2008-11-28.) These days, all Linux kernels support large files, so 101 ** you should probably leave LFS enabled. But some embedded platforms might 102 ** lack LFS in which case the SQLITE_DISABLE_LFS macro might still be useful. 103 */ 104 #ifndef SQLITE_DISABLE_LFS 105 # define _LARGE_FILE 1 106 # ifndef _FILE_OFFSET_BITS 107 # define _FILE_OFFSET_BITS 64 108 # endif 109 # define _LARGEFILE_SOURCE 1 110 #endif 111 112 /* 113 ** standard include files. 114 */ 115 #include <sys/types.h> 116 #include <sys/stat.h> 117 #include <fcntl.h> 118 #include <unistd.h> 119 #include <time.h> 120 #include <sys/time.h> 121 #include <errno.h> 122 #ifndef SQLITE_OMIT_WAL 123 #include <sys/mman.h> 124 #endif 125 126 #if SQLITE_ENABLE_LOCKING_STYLE 127 # include <sys/ioctl.h> 128 # if OS_VXWORKS 129 # include <semaphore.h> 130 # include <limits.h> 131 # else 132 # include <sys/file.h> 133 # include <sys/param.h> 134 # endif 135 #endif /* SQLITE_ENABLE_LOCKING_STYLE */ 136 137 #if defined(__APPLE__) || (SQLITE_ENABLE_LOCKING_STYLE && !OS_VXWORKS) 138 # include <sys/mount.h> 139 #endif 140 141 /* 142 ** Allowed values of unixFile.fsFlags 143 */ 144 #define SQLITE_FSFLAGS_IS_MSDOS 0x1 145 146 /* 147 ** If we are to be thread-safe, include the pthreads header and define 148 ** the SQLITE_UNIX_THREADS macro. 149 */ 150 #if SQLITE_THREADSAFE 151 # include <pthread.h> 152 # define SQLITE_UNIX_THREADS 1 153 #endif 154 155 /* 156 ** Default permissions when creating a new file 157 */ 158 #ifndef SQLITE_DEFAULT_FILE_PERMISSIONS 159 # define SQLITE_DEFAULT_FILE_PERMISSIONS 0644 160 #endif 161 162 /* 163 ** Default permissions when creating auto proxy dir 164 */ 165 #ifndef SQLITE_DEFAULT_PROXYDIR_PERMISSIONS 166 # define SQLITE_DEFAULT_PROXYDIR_PERMISSIONS 0755 167 #endif 168 169 /* 170 ** Maximum supported path-length. 171 */ 172 #define MAX_PATHNAME 512 173 174 /* 175 ** Only set the lastErrno if the error code is a real error and not 176 ** a normal expected return code of SQLITE_BUSY or SQLITE_OK 177 */ 178 #define IS_LOCK_ERROR(x) ((x != SQLITE_OK) && (x != SQLITE_BUSY)) 179 180 /* Forward references */ 181 typedef struct unixShm unixShm; /* Connection shared memory */ 182 typedef struct unixShmNode unixShmNode; /* Shared memory instance */ 183 typedef struct unixInodeInfo unixInodeInfo; /* An i-node */ 184 typedef struct UnixUnusedFd UnixUnusedFd; /* An unused file descriptor */ 185 186 /* 187 ** Sometimes, after a file handle is closed by SQLite, the file descriptor 188 ** cannot be closed immediately. In these cases, instances of the following 189 ** structure are used to store the file descriptor while waiting for an 190 ** opportunity to either close or reuse it. 191 */ 192 struct UnixUnusedFd { 193 int fd; /* File descriptor to close */ 194 int flags; /* Flags this file descriptor was opened with */ 195 UnixUnusedFd *pNext; /* Next unused file descriptor on same file */ 196 }; 197 198 /* 199 ** The unixFile structure is subclass of sqlite3_file specific to the unix 200 ** VFS implementations. 201 */ 202 typedef struct unixFile unixFile; 203 struct unixFile { 204 sqlite3_io_methods const *pMethod; /* Always the first entry */ 205 unixInodeInfo *pInode; /* Info about locks on this inode */ 206 int h; /* The file descriptor */ 207 int dirfd; /* File descriptor for the directory */ 208 unsigned char eFileLock; /* The type of lock held on this fd */ 209 unsigned char ctrlFlags; /* Behavioral bits. UNIXFILE_* flags */ 210 int lastErrno; /* The unix errno from last I/O error */ 211 void *lockingContext; /* Locking style specific state */ 212 UnixUnusedFd *pUnused; /* Pre-allocated UnixUnusedFd */ 213 const char *zPath; /* Name of the file */ 214 unixShm *pShm; /* Shared memory segment information */ 215 int szChunk; /* Configured by FCNTL_CHUNK_SIZE */ 216 #if SQLITE_ENABLE_LOCKING_STYLE 217 int openFlags; /* The flags specified at open() */ 218 #endif 219 #if SQLITE_ENABLE_LOCKING_STYLE || defined(__APPLE__) 220 unsigned fsFlags; /* cached details from statfs() */ 221 #endif 222 #if OS_VXWORKS 223 int isDelete; /* Delete on close if true */ 224 struct vxworksFileId *pId; /* Unique file ID */ 225 #endif 226 #ifndef NDEBUG 227 /* The next group of variables are used to track whether or not the 228 ** transaction counter in bytes 24-27 of database files are updated 229 ** whenever any part of the database changes. An assertion fault will 230 ** occur if a file is updated without also updating the transaction 231 ** counter. This test is made to avoid new problems similar to the 232 ** one described by ticket #3584. 233 */ 234 unsigned char transCntrChng; /* True if the transaction counter changed */ 235 unsigned char dbUpdate; /* True if any part of database file changed */ 236 unsigned char inNormalWrite; /* True if in a normal write operation */ 237 #endif 238 #ifdef SQLITE_TEST 239 /* In test mode, increase the size of this structure a bit so that 240 ** it is larger than the struct CrashFile defined in test6.c. 241 */ 242 char aPadding[32]; 243 #endif 244 }; 245 246 /* 247 ** Allowed values for the unixFile.ctrlFlags bitmask: 248 */ 249 #define UNIXFILE_EXCL 0x01 /* Connections from one process only */ 250 #define UNIXFILE_RDONLY 0x02 /* Connection is read only */ 251 252 /* 253 ** Include code that is common to all os_*.c files 254 */ 255 #include "os_common.h" 256 257 /* 258 ** Define various macros that are missing from some systems. 259 */ 260 #ifndef O_LARGEFILE 261 # define O_LARGEFILE 0 262 #endif 263 #ifdef SQLITE_DISABLE_LFS 264 # undef O_LARGEFILE 265 # define O_LARGEFILE 0 266 #endif 267 #ifndef O_NOFOLLOW 268 # define O_NOFOLLOW 0 269 #endif 270 #ifndef O_BINARY 271 # define O_BINARY 0 272 #endif 273 274 /* 275 ** The threadid macro resolves to the thread-id or to 0. Used for 276 ** testing and debugging only. 277 */ 278 #if SQLITE_THREADSAFE 279 #define threadid pthread_self() 280 #else 281 #define threadid 0 282 #endif 283 284 /* 285 ** Many system calls are accessed through pointer-to-functions so that 286 ** they may be overridden at runtime to facilitate fault injection during 287 ** testing and sandboxing. The following array holds the names and pointers 288 ** to all overrideable system calls. 289 */ 290 static struct unix_syscall { 291 const char *zName; /* Name of the sytem call */ 292 sqlite3_syscall_ptr pCurrent; /* Current value of the system call */ 293 sqlite3_syscall_ptr pDefault; /* Default value */ 294 } aSyscall[] = { 295 { "open", (sqlite3_syscall_ptr)open, 0 }, 296 #define osOpen ((int(*)(const char*,int,int))aSyscall[0].pCurrent) 297 298 { "close", (sqlite3_syscall_ptr)close, 0 }, 299 #define osClose ((int(*)(int))aSyscall[1].pCurrent) 300 301 { "access", (sqlite3_syscall_ptr)access, 0 }, 302 #define osAccess ((int(*)(const char*,int))aSyscall[2].pCurrent) 303 304 { "getcwd", (sqlite3_syscall_ptr)getcwd, 0 }, 305 #define osGetcwd ((char*(*)(char*,size_t))aSyscall[3].pCurrent) 306 307 { "stat", (sqlite3_syscall_ptr)stat, 0 }, 308 #define osStat ((int(*)(const char*,struct stat*))aSyscall[4].pCurrent) 309 310 /* 311 ** The DJGPP compiler environment looks mostly like Unix, but it 312 ** lacks the fcntl() system call. So redefine fcntl() to be something 313 ** that always succeeds. This means that locking does not occur under 314 ** DJGPP. But it is DOS - what did you expect? 315 */ 316 #ifdef __DJGPP__ 317 { "fstat", 0, 0 }, 318 #define osFstat(a,b,c) 0 319 #else 320 { "fstat", (sqlite3_syscall_ptr)fstat, 0 }, 321 #define osFstat ((int(*)(int,struct stat*))aSyscall[5].pCurrent) 322 #endif 323 324 { "ftruncate", (sqlite3_syscall_ptr)ftruncate, 0 }, 325 #define osFtruncate ((int(*)(int,off_t))aSyscall[6].pCurrent) 326 327 { "fcntl", (sqlite3_syscall_ptr)fcntl, 0 }, 328 #define osFcntl ((int(*)(int,int,...))aSyscall[7].pCurrent) 329 330 { "read", (sqlite3_syscall_ptr)read, 0 }, 331 #define osRead ((ssize_t(*)(int,void*,size_t))aSyscall[8].pCurrent) 332 333 #if defined(USE_PREAD) || defined(SQLITE_ENABLE_LOCKING_STYLE) 334 { "pread", (sqlite3_syscall_ptr)pread, 0 }, 335 #else 336 { "pread", (sqlite3_syscall_ptr)0, 0 }, 337 #endif 338 #define osPread ((ssize_t(*)(int,void*,size_t,off_t))aSyscall[9].pCurrent) 339 340 #if defined(USE_PREAD64) 341 { "pread64", (sqlite3_syscall_ptr)pread64, 0 }, 342 #else 343 { "pread64", (sqlite3_syscall_ptr)0, 0 }, 344 #endif 345 #define osPread64 ((ssize_t(*)(int,void*,size_t,off_t))aSyscall[10].pCurrent) 346 347 { "write", (sqlite3_syscall_ptr)write, 0 }, 348 #define osWrite ((ssize_t(*)(int,const void*,size_t))aSyscall[11].pCurrent) 349 350 #if defined(USE_PREAD) || defined(SQLITE_ENABLE_LOCKING_STYLE) 351 { "pwrite", (sqlite3_syscall_ptr)pwrite, 0 }, 352 #else 353 { "pwrite", (sqlite3_syscall_ptr)0, 0 }, 354 #endif 355 #define osPwrite ((ssize_t(*)(int,const void*,size_t,off_t))\ 356 aSyscall[12].pCurrent) 357 358 #if defined(USE_PREAD64) 359 { "pwrite64", (sqlite3_syscall_ptr)pwrite64, 0 }, 360 #else 361 { "pwrite64", (sqlite3_syscall_ptr)0, 0 }, 362 #endif 363 #define osPwrite64 ((ssize_t(*)(int,const void*,size_t,off_t))\ 364 aSyscall[13].pCurrent) 365 366 { "fchmod", (sqlite3_syscall_ptr)fchmod, 0 }, 367 #define osFchmod ((int(*)(int,mode_t))aSyscall[14].pCurrent) 368 369 #if defined(HAVE_POSIX_FALLOCATE) && HAVE_POSIX_FALLOCATE 370 { "fallocate", (sqlite3_syscall_ptr)posix_fallocate, 0 }, 371 #else 372 { "fallocate", (sqlite3_syscall_ptr)0, 0 }, 373 #endif 374 #define osFallocate ((int(*)(int,off_t,off_t))aSyscall[15].pCurrent) 375 376 }; /* End of the overrideable system calls */ 377 378 /* 379 ** This is the xSetSystemCall() method of sqlite3_vfs for all of the 380 ** "unix" VFSes. Return SQLITE_OK opon successfully updating the 381 ** system call pointer, or SQLITE_NOTFOUND if there is no configurable 382 ** system call named zName. 383 */ 384 static int unixSetSystemCall( 385 sqlite3_vfs *pNotUsed, /* The VFS pointer. Not used */ 386 const char *zName, /* Name of system call to override */ 387 sqlite3_syscall_ptr pNewFunc /* Pointer to new system call value */ 388 ){ 389 unsigned int i; 390 int rc = SQLITE_NOTFOUND; 391 392 UNUSED_PARAMETER(pNotUsed); 393 if( zName==0 ){ 394 /* If no zName is given, restore all system calls to their default 395 ** settings and return NULL 396 */ 397 rc = SQLITE_OK; 398 for(i=0; i<sizeof(aSyscall)/sizeof(aSyscall[0]); i++){ 399 if( aSyscall[i].pDefault ){ 400 aSyscall[i].pCurrent = aSyscall[i].pDefault; 401 } 402 } 403 }else{ 404 /* If zName is specified, operate on only the one system call 405 ** specified. 406 */ 407 for(i=0; i<sizeof(aSyscall)/sizeof(aSyscall[0]); i++){ 408 if( strcmp(zName, aSyscall[i].zName)==0 ){ 409 if( aSyscall[i].pDefault==0 ){ 410 aSyscall[i].pDefault = aSyscall[i].pCurrent; 411 } 412 rc = SQLITE_OK; 413 if( pNewFunc==0 ) pNewFunc = aSyscall[i].pDefault; 414 aSyscall[i].pCurrent = pNewFunc; 415 break; 416 } 417 } 418 } 419 return rc; 420 } 421 422 /* 423 ** Return the value of a system call. Return NULL if zName is not a 424 ** recognized system call name. NULL is also returned if the system call 425 ** is currently undefined. 426 */ 427 static sqlite3_syscall_ptr unixGetSystemCall( 428 sqlite3_vfs *pNotUsed, 429 const char *zName 430 ){ 431 unsigned int i; 432 433 UNUSED_PARAMETER(pNotUsed); 434 for(i=0; i<sizeof(aSyscall)/sizeof(aSyscall[0]); i++){ 435 if( strcmp(zName, aSyscall[i].zName)==0 ) return aSyscall[i].pCurrent; 436 } 437 return 0; 438 } 439 440 /* 441 ** Return the name of the first system call after zName. If zName==NULL 442 ** then return the name of the first system call. Return NULL if zName 443 ** is the last system call or if zName is not the name of a valid 444 ** system call. 445 */ 446 static const char *unixNextSystemCall(sqlite3_vfs *p, const char *zName){ 447 int i = -1; 448 449 UNUSED_PARAMETER(p); 450 if( zName ){ 451 for(i=0; i<ArraySize(aSyscall)-1; i++){ 452 if( strcmp(zName, aSyscall[i].zName)==0 ) break; 453 } 454 } 455 for(i++; i<ArraySize(aSyscall); i++){ 456 if( aSyscall[i].pCurrent!=0 ) return aSyscall[i].zName; 457 } 458 return 0; 459 } 460 461 /* 462 ** Retry open() calls that fail due to EINTR 463 */ 464 static int robust_open(const char *z, int f, int m){ 465 int rc; 466 do{ rc = osOpen(z,f,m); }while( rc<0 && errno==EINTR ); 467 return rc; 468 } 469 470 /* 471 ** Helper functions to obtain and relinquish the global mutex. The 472 ** global mutex is used to protect the unixInodeInfo and 473 ** vxworksFileId objects used by this file, all of which may be 474 ** shared by multiple threads. 475 ** 476 ** Function unixMutexHeld() is used to assert() that the global mutex 477 ** is held when required. This function is only used as part of assert() 478 ** statements. e.g. 479 ** 480 ** unixEnterMutex() 481 ** assert( unixMutexHeld() ); 482 ** unixEnterLeave() 483 */ 484 static void unixEnterMutex(void){ 485 sqlite3_mutex_enter(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER)); 486 } 487 static void unixLeaveMutex(void){ 488 sqlite3_mutex_leave(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER)); 489 } 490 #ifdef SQLITE_DEBUG 491 static int unixMutexHeld(void) { 492 return sqlite3_mutex_held(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER)); 493 } 494 #endif 495 496 497 #ifdef SQLITE_DEBUG 498 /* 499 ** Helper function for printing out trace information from debugging 500 ** binaries. This returns the string represetation of the supplied 501 ** integer lock-type. 502 */ 503 static const char *azFileLock(int eFileLock){ 504 switch( eFileLock ){ 505 case NO_LOCK: return "NONE"; 506 case SHARED_LOCK: return "SHARED"; 507 case RESERVED_LOCK: return "RESERVED"; 508 case PENDING_LOCK: return "PENDING"; 509 case EXCLUSIVE_LOCK: return "EXCLUSIVE"; 510 } 511 return "ERROR"; 512 } 513 #endif 514 515 #ifdef SQLITE_LOCK_TRACE 516 /* 517 ** Print out information about all locking operations. 518 ** 519 ** This routine is used for troubleshooting locks on multithreaded 520 ** platforms. Enable by compiling with the -DSQLITE_LOCK_TRACE 521 ** command-line option on the compiler. This code is normally 522 ** turned off. 523 */ 524 static int lockTrace(int fd, int op, struct flock *p){ 525 char *zOpName, *zType; 526 int s; 527 int savedErrno; 528 if( op==F_GETLK ){ 529 zOpName = "GETLK"; 530 }else if( op==F_SETLK ){ 531 zOpName = "SETLK"; 532 }else{ 533 s = osFcntl(fd, op, p); 534 sqlite3DebugPrintf("fcntl unknown %d %d %d\n", fd, op, s); 535 return s; 536 } 537 if( p->l_type==F_RDLCK ){ 538 zType = "RDLCK"; 539 }else if( p->l_type==F_WRLCK ){ 540 zType = "WRLCK"; 541 }else if( p->l_type==F_UNLCK ){ 542 zType = "UNLCK"; 543 }else{ 544 assert( 0 ); 545 } 546 assert( p->l_whence==SEEK_SET ); 547 s = osFcntl(fd, op, p); 548 savedErrno = errno; 549 sqlite3DebugPrintf("fcntl %d %d %s %s %d %d %d %d\n", 550 threadid, fd, zOpName, zType, (int)p->l_start, (int)p->l_len, 551 (int)p->l_pid, s); 552 if( s==(-1) && op==F_SETLK && (p->l_type==F_RDLCK || p->l_type==F_WRLCK) ){ 553 struct flock l2; 554 l2 = *p; 555 osFcntl(fd, F_GETLK, &l2); 556 if( l2.l_type==F_RDLCK ){ 557 zType = "RDLCK"; 558 }else if( l2.l_type==F_WRLCK ){ 559 zType = "WRLCK"; 560 }else if( l2.l_type==F_UNLCK ){ 561 zType = "UNLCK"; 562 }else{ 563 assert( 0 ); 564 } 565 sqlite3DebugPrintf("fcntl-failure-reason: %s %d %d %d\n", 566 zType, (int)l2.l_start, (int)l2.l_len, (int)l2.l_pid); 567 } 568 errno = savedErrno; 569 return s; 570 } 571 #undef osFcntl 572 #define osFcntl lockTrace 573 #endif /* SQLITE_LOCK_TRACE */ 574 575 /* 576 ** Retry ftruncate() calls that fail due to EINTR 577 */ 578 static int robust_ftruncate(int h, sqlite3_int64 sz){ 579 int rc; 580 do{ rc = osFtruncate(h,sz); }while( rc<0 && errno==EINTR ); 581 return rc; 582 } 583 584 /* 585 ** This routine translates a standard POSIX errno code into something 586 ** useful to the clients of the sqlite3 functions. Specifically, it is 587 ** intended to translate a variety of "try again" errors into SQLITE_BUSY 588 ** and a variety of "please close the file descriptor NOW" errors into 589 ** SQLITE_IOERR 590 ** 591 ** Errors during initialization of locks, or file system support for locks, 592 ** should handle ENOLCK, ENOTSUP, EOPNOTSUPP separately. 593 */ 594 static int sqliteErrorFromPosixError(int posixError, int sqliteIOErr) { 595 switch (posixError) { 596 #if 0 597 /* At one point this code was not commented out. In theory, this branch 598 ** should never be hit, as this function should only be called after 599 ** a locking-related function (i.e. fcntl()) has returned non-zero with 600 ** the value of errno as the first argument. Since a system call has failed, 601 ** errno should be non-zero. 602 ** 603 ** Despite this, if errno really is zero, we still don't want to return 604 ** SQLITE_OK. The system call failed, and *some* SQLite error should be 605 ** propagated back to the caller. Commenting this branch out means errno==0 606 ** will be handled by the "default:" case below. 607 */ 608 case 0: 609 return SQLITE_OK; 610 #endif 611 612 case EAGAIN: 613 case ETIMEDOUT: 614 case EBUSY: 615 case EINTR: 616 case ENOLCK: 617 /* random NFS retry error, unless during file system support 618 * introspection, in which it actually means what it says */ 619 return SQLITE_BUSY; 620 621 case EACCES: 622 /* EACCES is like EAGAIN during locking operations, but not any other time*/ 623 if( (sqliteIOErr == SQLITE_IOERR_LOCK) || 624 (sqliteIOErr == SQLITE_IOERR_UNLOCK) || 625 (sqliteIOErr == SQLITE_IOERR_RDLOCK) || 626 (sqliteIOErr == SQLITE_IOERR_CHECKRESERVEDLOCK) ){ 627 return SQLITE_BUSY; 628 } 629 /* else fall through */ 630 case EPERM: 631 return SQLITE_PERM; 632 633 /* EDEADLK is only possible if a call to fcntl(F_SETLKW) is made. And 634 ** this module never makes such a call. And the code in SQLite itself 635 ** asserts that SQLITE_IOERR_BLOCKED is never returned. For these reasons 636 ** this case is also commented out. If the system does set errno to EDEADLK, 637 ** the default SQLITE_IOERR_XXX code will be returned. */ 638 #if 0 639 case EDEADLK: 640 return SQLITE_IOERR_BLOCKED; 641 #endif 642 643 #if EOPNOTSUPP!=ENOTSUP 644 case EOPNOTSUPP: 645 /* something went terribly awry, unless during file system support 646 * introspection, in which it actually means what it says */ 647 #endif 648 #ifdef ENOTSUP 649 case ENOTSUP: 650 /* invalid fd, unless during file system support introspection, in which 651 * it actually means what it says */ 652 #endif 653 case EIO: 654 case EBADF: 655 case EINVAL: 656 case ENOTCONN: 657 case ENODEV: 658 case ENXIO: 659 case ENOENT: 660 case ESTALE: 661 case ENOSYS: 662 /* these should force the client to close the file and reconnect */ 663 664 default: 665 return sqliteIOErr; 666 } 667 } 668 669 670 671 /****************************************************************************** 672 ****************** Begin Unique File ID Utility Used By VxWorks *************** 673 ** 674 ** On most versions of unix, we can get a unique ID for a file by concatenating 675 ** the device number and the inode number. But this does not work on VxWorks. 676 ** On VxWorks, a unique file id must be based on the canonical filename. 677 ** 678 ** A pointer to an instance of the following structure can be used as a 679 ** unique file ID in VxWorks. Each instance of this structure contains 680 ** a copy of the canonical filename. There is also a reference count. 681 ** The structure is reclaimed when the number of pointers to it drops to 682 ** zero. 683 ** 684 ** There are never very many files open at one time and lookups are not 685 ** a performance-critical path, so it is sufficient to put these 686 ** structures on a linked list. 687 */ 688 struct vxworksFileId { 689 struct vxworksFileId *pNext; /* Next in a list of them all */ 690 int nRef; /* Number of references to this one */ 691 int nName; /* Length of the zCanonicalName[] string */ 692 char *zCanonicalName; /* Canonical filename */ 693 }; 694 695 #if OS_VXWORKS 696 /* 697 ** All unique filenames are held on a linked list headed by this 698 ** variable: 699 */ 700 static struct vxworksFileId *vxworksFileList = 0; 701 702 /* 703 ** Simplify a filename into its canonical form 704 ** by making the following changes: 705 ** 706 ** * removing any trailing and duplicate / 707 ** * convert /./ into just / 708 ** * convert /A/../ where A is any simple name into just / 709 ** 710 ** Changes are made in-place. Return the new name length. 711 ** 712 ** The original filename is in z[0..n-1]. Return the number of 713 ** characters in the simplified name. 714 */ 715 static int vxworksSimplifyName(char *z, int n){ 716 int i, j; 717 while( n>1 && z[n-1]=='/' ){ n--; } 718 for(i=j=0; i<n; i++){ 719 if( z[i]=='/' ){ 720 if( z[i+1]=='/' ) continue; 721 if( z[i+1]=='.' && i+2<n && z[i+2]=='/' ){ 722 i += 1; 723 continue; 724 } 725 if( z[i+1]=='.' && i+3<n && z[i+2]=='.' && z[i+3]=='/' ){ 726 while( j>0 && z[j-1]!='/' ){ j--; } 727 if( j>0 ){ j--; } 728 i += 2; 729 continue; 730 } 731 } 732 z[j++] = z[i]; 733 } 734 z[j] = 0; 735 return j; 736 } 737 738 /* 739 ** Find a unique file ID for the given absolute pathname. Return 740 ** a pointer to the vxworksFileId object. This pointer is the unique 741 ** file ID. 742 ** 743 ** The nRef field of the vxworksFileId object is incremented before 744 ** the object is returned. A new vxworksFileId object is created 745 ** and added to the global list if necessary. 746 ** 747 ** If a memory allocation error occurs, return NULL. 748 */ 749 static struct vxworksFileId *vxworksFindFileId(const char *zAbsoluteName){ 750 struct vxworksFileId *pNew; /* search key and new file ID */ 751 struct vxworksFileId *pCandidate; /* For looping over existing file IDs */ 752 int n; /* Length of zAbsoluteName string */ 753 754 assert( zAbsoluteName[0]=='/' ); 755 n = (int)strlen(zAbsoluteName); 756 pNew = sqlite3_malloc( sizeof(*pNew) + (n+1) ); 757 if( pNew==0 ) return 0; 758 pNew->zCanonicalName = (char*)&pNew[1]; 759 memcpy(pNew->zCanonicalName, zAbsoluteName, n+1); 760 n = vxworksSimplifyName(pNew->zCanonicalName, n); 761 762 /* Search for an existing entry that matching the canonical name. 763 ** If found, increment the reference count and return a pointer to 764 ** the existing file ID. 765 */ 766 unixEnterMutex(); 767 for(pCandidate=vxworksFileList; pCandidate; pCandidate=pCandidate->pNext){ 768 if( pCandidate->nName==n 769 && memcmp(pCandidate->zCanonicalName, pNew->zCanonicalName, n)==0 770 ){ 771 sqlite3_free(pNew); 772 pCandidate->nRef++; 773 unixLeaveMutex(); 774 return pCandidate; 775 } 776 } 777 778 /* No match was found. We will make a new file ID */ 779 pNew->nRef = 1; 780 pNew->nName = n; 781 pNew->pNext = vxworksFileList; 782 vxworksFileList = pNew; 783 unixLeaveMutex(); 784 return pNew; 785 } 786 787 /* 788 ** Decrement the reference count on a vxworksFileId object. Free 789 ** the object when the reference count reaches zero. 790 */ 791 static void vxworksReleaseFileId(struct vxworksFileId *pId){ 792 unixEnterMutex(); 793 assert( pId->nRef>0 ); 794 pId->nRef--; 795 if( pId->nRef==0 ){ 796 struct vxworksFileId **pp; 797 for(pp=&vxworksFileList; *pp && *pp!=pId; pp = &((*pp)->pNext)){} 798 assert( *pp==pId ); 799 *pp = pId->pNext; 800 sqlite3_free(pId); 801 } 802 unixLeaveMutex(); 803 } 804 #endif /* OS_VXWORKS */ 805 /*************** End of Unique File ID Utility Used By VxWorks **************** 806 ******************************************************************************/ 807 808 809 /****************************************************************************** 810 *************************** Posix Advisory Locking **************************** 811 ** 812 ** POSIX advisory locks are broken by design. ANSI STD 1003.1 (1996) 813 ** section 6.5.2.2 lines 483 through 490 specify that when a process 814 ** sets or clears a lock, that operation overrides any prior locks set 815 ** by the same process. It does not explicitly say so, but this implies 816 ** that it overrides locks set by the same process using a different 817 ** file descriptor. Consider this test case: 818 ** 819 ** int fd1 = open("./file1", O_RDWR|O_CREAT, 0644); 820 ** int fd2 = open("./file2", O_RDWR|O_CREAT, 0644); 821 ** 822 ** Suppose ./file1 and ./file2 are really the same file (because 823 ** one is a hard or symbolic link to the other) then if you set 824 ** an exclusive lock on fd1, then try to get an exclusive lock 825 ** on fd2, it works. I would have expected the second lock to 826 ** fail since there was already a lock on the file due to fd1. 827 ** But not so. Since both locks came from the same process, the 828 ** second overrides the first, even though they were on different 829 ** file descriptors opened on different file names. 830 ** 831 ** This means that we cannot use POSIX locks to synchronize file access 832 ** among competing threads of the same process. POSIX locks will work fine 833 ** to synchronize access for threads in separate processes, but not 834 ** threads within the same process. 835 ** 836 ** To work around the problem, SQLite has to manage file locks internally 837 ** on its own. Whenever a new database is opened, we have to find the 838 ** specific inode of the database file (the inode is determined by the 839 ** st_dev and st_ino fields of the stat structure that fstat() fills in) 840 ** and check for locks already existing on that inode. When locks are 841 ** created or removed, we have to look at our own internal record of the 842 ** locks to see if another thread has previously set a lock on that same 843 ** inode. 844 ** 845 ** (Aside: The use of inode numbers as unique IDs does not work on VxWorks. 846 ** For VxWorks, we have to use the alternative unique ID system based on 847 ** canonical filename and implemented in the previous division.) 848 ** 849 ** The sqlite3_file structure for POSIX is no longer just an integer file 850 ** descriptor. It is now a structure that holds the integer file 851 ** descriptor and a pointer to a structure that describes the internal 852 ** locks on the corresponding inode. There is one locking structure 853 ** per inode, so if the same inode is opened twice, both unixFile structures 854 ** point to the same locking structure. The locking structure keeps 855 ** a reference count (so we will know when to delete it) and a "cnt" 856 ** field that tells us its internal lock status. cnt==0 means the 857 ** file is unlocked. cnt==-1 means the file has an exclusive lock. 858 ** cnt>0 means there are cnt shared locks on the file. 859 ** 860 ** Any attempt to lock or unlock a file first checks the locking 861 ** structure. The fcntl() system call is only invoked to set a 862 ** POSIX lock if the internal lock structure transitions between 863 ** a locked and an unlocked state. 864 ** 865 ** But wait: there are yet more problems with POSIX advisory locks. 866 ** 867 ** If you close a file descriptor that points to a file that has locks, 868 ** all locks on that file that are owned by the current process are 869 ** released. To work around this problem, each unixInodeInfo object 870 ** maintains a count of the number of pending locks on tha inode. 871 ** When an attempt is made to close an unixFile, if there are 872 ** other unixFile open on the same inode that are holding locks, the call 873 ** to close() the file descriptor is deferred until all of the locks clear. 874 ** The unixInodeInfo structure keeps a list of file descriptors that need to 875 ** be closed and that list is walked (and cleared) when the last lock 876 ** clears. 877 ** 878 ** Yet another problem: LinuxThreads do not play well with posix locks. 879 ** 880 ** Many older versions of linux use the LinuxThreads library which is 881 ** not posix compliant. Under LinuxThreads, a lock created by thread 882 ** A cannot be modified or overridden by a different thread B. 883 ** Only thread A can modify the lock. Locking behavior is correct 884 ** if the appliation uses the newer Native Posix Thread Library (NPTL) 885 ** on linux - with NPTL a lock created by thread A can override locks 886 ** in thread B. But there is no way to know at compile-time which 887 ** threading library is being used. So there is no way to know at 888 ** compile-time whether or not thread A can override locks on thread B. 889 ** One has to do a run-time check to discover the behavior of the 890 ** current process. 891 ** 892 ** SQLite used to support LinuxThreads. But support for LinuxThreads 893 ** was dropped beginning with version 3.7.0. SQLite will still work with 894 ** LinuxThreads provided that (1) there is no more than one connection 895 ** per database file in the same process and (2) database connections 896 ** do not move across threads. 897 */ 898 899 /* 900 ** An instance of the following structure serves as the key used 901 ** to locate a particular unixInodeInfo object. 902 */ 903 struct unixFileId { 904 dev_t dev; /* Device number */ 905 #if OS_VXWORKS 906 struct vxworksFileId *pId; /* Unique file ID for vxworks. */ 907 #else 908 ino_t ino; /* Inode number */ 909 #endif 910 }; 911 912 /* 913 ** An instance of the following structure is allocated for each open 914 ** inode. Or, on LinuxThreads, there is one of these structures for 915 ** each inode opened by each thread. 916 ** 917 ** A single inode can have multiple file descriptors, so each unixFile 918 ** structure contains a pointer to an instance of this object and this 919 ** object keeps a count of the number of unixFile pointing to it. 920 */ 921 struct unixInodeInfo { 922 struct unixFileId fileId; /* The lookup key */ 923 int nShared; /* Number of SHARED locks held */ 924 unsigned char eFileLock; /* One of SHARED_LOCK, RESERVED_LOCK etc. */ 925 unsigned char bProcessLock; /* An exclusive process lock is held */ 926 int nRef; /* Number of pointers to this structure */ 927 unixShmNode *pShmNode; /* Shared memory associated with this inode */ 928 int nLock; /* Number of outstanding file locks */ 929 UnixUnusedFd *pUnused; /* Unused file descriptors to close */ 930 unixInodeInfo *pNext; /* List of all unixInodeInfo objects */ 931 unixInodeInfo *pPrev; /* .... doubly linked */ 932 #if defined(SQLITE_ENABLE_LOCKING_STYLE) 933 unsigned long long sharedByte; /* for AFP simulated shared lock */ 934 #endif 935 #if OS_VXWORKS 936 sem_t *pSem; /* Named POSIX semaphore */ 937 char aSemName[MAX_PATHNAME+2]; /* Name of that semaphore */ 938 #endif 939 }; 940 941 /* 942 ** A lists of all unixInodeInfo objects. 943 */ 944 static unixInodeInfo *inodeList = 0; 945 946 /* 947 ** 948 ** This function - unixLogError_x(), is only ever called via the macro 949 ** unixLogError(). 950 ** 951 ** It is invoked after an error occurs in an OS function and errno has been 952 ** set. It logs a message using sqlite3_log() containing the current value of 953 ** errno and, if possible, the human-readable equivalent from strerror() or 954 ** strerror_r(). 955 ** 956 ** The first argument passed to the macro should be the error code that 957 ** will be returned to SQLite (e.g. SQLITE_IOERR_DELETE, SQLITE_CANTOPEN). 958 ** The two subsequent arguments should be the name of the OS function that 959 ** failed (e.g. "unlink", "open") and the the associated file-system path, 960 ** if any. 961 */ 962 #define unixLogError(a,b,c) unixLogErrorAtLine(a,b,c,__LINE__) 963 static int unixLogErrorAtLine( 964 int errcode, /* SQLite error code */ 965 const char *zFunc, /* Name of OS function that failed */ 966 const char *zPath, /* File path associated with error */ 967 int iLine /* Source line number where error occurred */ 968 ){ 969 char *zErr; /* Message from strerror() or equivalent */ 970 int iErrno = errno; /* Saved syscall error number */ 971 972 /* If this is not a threadsafe build (SQLITE_THREADSAFE==0), then use 973 ** the strerror() function to obtain the human-readable error message 974 ** equivalent to errno. Otherwise, use strerror_r(). 975 */ 976 #if SQLITE_THREADSAFE && defined(HAVE_STRERROR_R) 977 char aErr[80]; 978 memset(aErr, 0, sizeof(aErr)); 979 zErr = aErr; 980 981 /* If STRERROR_R_CHAR_P (set by autoconf scripts) or __USE_GNU is defined, 982 ** assume that the system provides the the GNU version of strerror_r() that 983 ** returns a pointer to a buffer containing the error message. That pointer 984 ** may point to aErr[], or it may point to some static storage somewhere. 985 ** Otherwise, assume that the system provides the POSIX version of 986 ** strerror_r(), which always writes an error message into aErr[]. 987 ** 988 ** If the code incorrectly assumes that it is the POSIX version that is 989 ** available, the error message will often be an empty string. Not a 990 ** huge problem. Incorrectly concluding that the GNU version is available 991 ** could lead to a segfault though. 992 */ 993 #if defined(STRERROR_R_CHAR_P) || defined(__USE_GNU) 994 zErr = 995 # endif 996 strerror_r(iErrno, aErr, sizeof(aErr)-1); 997 998 #elif SQLITE_THREADSAFE 999 /* This is a threadsafe build, but strerror_r() is not available. */ 1000 zErr = ""; 1001 #else 1002 /* Non-threadsafe build, use strerror(). */ 1003 zErr = strerror(iErrno); 1004 #endif 1005 1006 assert( errcode!=SQLITE_OK ); 1007 if( zPath==0 ) zPath = ""; 1008 sqlite3_log(errcode, 1009 "os_unix.c:%d: (%d) %s(%s) - %s", 1010 iLine, iErrno, zFunc, zPath, zErr 1011 ); 1012 1013 return errcode; 1014 } 1015 1016 /* 1017 ** Close a file descriptor. 1018 ** 1019 ** We assume that close() almost always works, since it is only in a 1020 ** very sick application or on a very sick platform that it might fail. 1021 ** If it does fail, simply leak the file descriptor, but do log the 1022 ** error. 1023 ** 1024 ** Note that it is not safe to retry close() after EINTR since the 1025 ** file descriptor might have already been reused by another thread. 1026 ** So we don't even try to recover from an EINTR. Just log the error 1027 ** and move on. 1028 */ 1029 static void robust_close(unixFile *pFile, int h, int lineno){ 1030 if( osClose(h) ){ 1031 unixLogErrorAtLine(SQLITE_IOERR_CLOSE, "close", 1032 pFile ? pFile->zPath : 0, lineno); 1033 } 1034 } 1035 1036 /* 1037 ** Close all file descriptors accumuated in the unixInodeInfo->pUnused list. 1038 */ 1039 static void closePendingFds(unixFile *pFile){ 1040 unixInodeInfo *pInode = pFile->pInode; 1041 UnixUnusedFd *p; 1042 UnixUnusedFd *pNext; 1043 for(p=pInode->pUnused; p; p=pNext){ 1044 pNext = p->pNext; 1045 robust_close(pFile, p->fd, __LINE__); 1046 sqlite3_free(p); 1047 } 1048 pInode->pUnused = 0; 1049 } 1050 1051 /* 1052 ** Release a unixInodeInfo structure previously allocated by findInodeInfo(). 1053 ** 1054 ** The mutex entered using the unixEnterMutex() function must be held 1055 ** when this function is called. 1056 */ 1057 static void releaseInodeInfo(unixFile *pFile){ 1058 unixInodeInfo *pInode = pFile->pInode; 1059 assert( unixMutexHeld() ); 1060 if( ALWAYS(pInode) ){ 1061 pInode->nRef--; 1062 if( pInode->nRef==0 ){ 1063 assert( pInode->pShmNode==0 ); 1064 closePendingFds(pFile); 1065 if( pInode->pPrev ){ 1066 assert( pInode->pPrev->pNext==pInode ); 1067 pInode->pPrev->pNext = pInode->pNext; 1068 }else{ 1069 assert( inodeList==pInode ); 1070 inodeList = pInode->pNext; 1071 } 1072 if( pInode->pNext ){ 1073 assert( pInode->pNext->pPrev==pInode ); 1074 pInode->pNext->pPrev = pInode->pPrev; 1075 } 1076 sqlite3_free(pInode); 1077 } 1078 } 1079 } 1080 1081 /* 1082 ** Given a file descriptor, locate the unixInodeInfo object that 1083 ** describes that file descriptor. Create a new one if necessary. The 1084 ** return value might be uninitialized if an error occurs. 1085 ** 1086 ** The mutex entered using the unixEnterMutex() function must be held 1087 ** when this function is called. 1088 ** 1089 ** Return an appropriate error code. 1090 */ 1091 static int findInodeInfo( 1092 unixFile *pFile, /* Unix file with file desc used in the key */ 1093 unixInodeInfo **ppInode /* Return the unixInodeInfo object here */ 1094 ){ 1095 int rc; /* System call return code */ 1096 int fd; /* The file descriptor for pFile */ 1097 struct unixFileId fileId; /* Lookup key for the unixInodeInfo */ 1098 struct stat statbuf; /* Low-level file information */ 1099 unixInodeInfo *pInode = 0; /* Candidate unixInodeInfo object */ 1100 1101 assert( unixMutexHeld() ); 1102 1103 /* Get low-level information about the file that we can used to 1104 ** create a unique name for the file. 1105 */ 1106 fd = pFile->h; 1107 rc = osFstat(fd, &statbuf); 1108 if( rc!=0 ){ 1109 pFile->lastErrno = errno; 1110 #ifdef EOVERFLOW 1111 if( pFile->lastErrno==EOVERFLOW ) return SQLITE_NOLFS; 1112 #endif 1113 return SQLITE_IOERR; 1114 } 1115 1116 #ifdef __APPLE__ 1117 /* On OS X on an msdos filesystem, the inode number is reported 1118 ** incorrectly for zero-size files. See ticket #3260. To work 1119 ** around this problem (we consider it a bug in OS X, not SQLite) 1120 ** we always increase the file size to 1 by writing a single byte 1121 ** prior to accessing the inode number. The one byte written is 1122 ** an ASCII 'S' character which also happens to be the first byte 1123 ** in the header of every SQLite database. In this way, if there 1124 ** is a race condition such that another thread has already populated 1125 ** the first page of the database, no damage is done. 1126 */ 1127 if( statbuf.st_size==0 && (pFile->fsFlags & SQLITE_FSFLAGS_IS_MSDOS)!=0 ){ 1128 do{ rc = osWrite(fd, "S", 1); }while( rc<0 && errno==EINTR ); 1129 if( rc!=1 ){ 1130 pFile->lastErrno = errno; 1131 return SQLITE_IOERR; 1132 } 1133 rc = osFstat(fd, &statbuf); 1134 if( rc!=0 ){ 1135 pFile->lastErrno = errno; 1136 return SQLITE_IOERR; 1137 } 1138 } 1139 #endif 1140 1141 memset(&fileId, 0, sizeof(fileId)); 1142 fileId.dev = statbuf.st_dev; 1143 #if OS_VXWORKS 1144 fileId.pId = pFile->pId; 1145 #else 1146 fileId.ino = statbuf.st_ino; 1147 #endif 1148 pInode = inodeList; 1149 while( pInode && memcmp(&fileId, &pInode->fileId, sizeof(fileId)) ){ 1150 pInode = pInode->pNext; 1151 } 1152 if( pInode==0 ){ 1153 pInode = sqlite3_malloc( sizeof(*pInode) ); 1154 if( pInode==0 ){ 1155 return SQLITE_NOMEM; 1156 } 1157 memset(pInode, 0, sizeof(*pInode)); 1158 memcpy(&pInode->fileId, &fileId, sizeof(fileId)); 1159 pInode->nRef = 1; 1160 pInode->pNext = inodeList; 1161 pInode->pPrev = 0; 1162 if( inodeList ) inodeList->pPrev = pInode; 1163 inodeList = pInode; 1164 }else{ 1165 pInode->nRef++; 1166 } 1167 *ppInode = pInode; 1168 return SQLITE_OK; 1169 } 1170 1171 1172 /* 1173 ** This routine checks if there is a RESERVED lock held on the specified 1174 ** file by this or any other process. If such a lock is held, set *pResOut 1175 ** to a non-zero value otherwise *pResOut is set to zero. The return value 1176 ** is set to SQLITE_OK unless an I/O error occurs during lock checking. 1177 */ 1178 static int unixCheckReservedLock(sqlite3_file *id, int *pResOut){ 1179 int rc = SQLITE_OK; 1180 int reserved = 0; 1181 unixFile *pFile = (unixFile*)id; 1182 1183 SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); 1184 1185 assert( pFile ); 1186 unixEnterMutex(); /* Because pFile->pInode is shared across threads */ 1187 1188 /* Check if a thread in this process holds such a lock */ 1189 if( pFile->pInode->eFileLock>SHARED_LOCK ){ 1190 reserved = 1; 1191 } 1192 1193 /* Otherwise see if some other process holds it. 1194 */ 1195 #ifndef __DJGPP__ 1196 if( !reserved && !pFile->pInode->bProcessLock ){ 1197 struct flock lock; 1198 lock.l_whence = SEEK_SET; 1199 lock.l_start = RESERVED_BYTE; 1200 lock.l_len = 1; 1201 lock.l_type = F_WRLCK; 1202 if( osFcntl(pFile->h, F_GETLK, &lock) ){ 1203 rc = SQLITE_IOERR_CHECKRESERVEDLOCK; 1204 pFile->lastErrno = errno; 1205 } else if( lock.l_type!=F_UNLCK ){ 1206 reserved = 1; 1207 } 1208 } 1209 #endif 1210 1211 unixLeaveMutex(); 1212 OSTRACE(("TEST WR-LOCK %d %d %d (unix)\n", pFile->h, rc, reserved)); 1213 1214 *pResOut = reserved; 1215 return rc; 1216 } 1217 1218 /* 1219 ** Attempt to set a system-lock on the file pFile. The lock is 1220 ** described by pLock. 1221 ** 1222 ** If the pFile was opened read/write from unix-excl, then the only lock 1223 ** ever obtained is an exclusive lock, and it is obtained exactly once 1224 ** the first time any lock is attempted. All subsequent system locking 1225 ** operations become no-ops. Locking operations still happen internally, 1226 ** in order to coordinate access between separate database connections 1227 ** within this process, but all of that is handled in memory and the 1228 ** operating system does not participate. 1229 ** 1230 ** This function is a pass-through to fcntl(F_SETLK) if pFile is using 1231 ** any VFS other than "unix-excl" or if pFile is opened on "unix-excl" 1232 ** and is read-only. 1233 ** 1234 ** Zero is returned if the call completes successfully, or -1 if a call 1235 ** to fcntl() fails. In this case, errno is set appropriately (by fcntl()). 1236 */ 1237 static int unixFileLock(unixFile *pFile, struct flock *pLock){ 1238 int rc; 1239 unixInodeInfo *pInode = pFile->pInode; 1240 assert( unixMutexHeld() ); 1241 assert( pInode!=0 ); 1242 if( ((pFile->ctrlFlags & UNIXFILE_EXCL)!=0 || pInode->bProcessLock) 1243 && ((pFile->ctrlFlags & UNIXFILE_RDONLY)==0) 1244 ){ 1245 if( pInode->bProcessLock==0 ){ 1246 struct flock lock; 1247 assert( pInode->nLock==0 ); 1248 lock.l_whence = SEEK_SET; 1249 lock.l_start = SHARED_FIRST; 1250 lock.l_len = SHARED_SIZE; 1251 lock.l_type = F_WRLCK; 1252 rc = osFcntl(pFile->h, F_SETLK, &lock); 1253 if( rc<0 ) return rc; 1254 pInode->bProcessLock = 1; 1255 pInode->nLock++; 1256 }else{ 1257 rc = 0; 1258 } 1259 }else{ 1260 rc = osFcntl(pFile->h, F_SETLK, pLock); 1261 } 1262 return rc; 1263 } 1264 1265 /* 1266 ** Lock the file with the lock specified by parameter eFileLock - one 1267 ** of the following: 1268 ** 1269 ** (1) SHARED_LOCK 1270 ** (2) RESERVED_LOCK 1271 ** (3) PENDING_LOCK 1272 ** (4) EXCLUSIVE_LOCK 1273 ** 1274 ** Sometimes when requesting one lock state, additional lock states 1275 ** are inserted in between. The locking might fail on one of the later 1276 ** transitions leaving the lock state different from what it started but 1277 ** still short of its goal. The following chart shows the allowed 1278 ** transitions and the inserted intermediate states: 1279 ** 1280 ** UNLOCKED -> SHARED 1281 ** SHARED -> RESERVED 1282 ** SHARED -> (PENDING) -> EXCLUSIVE 1283 ** RESERVED -> (PENDING) -> EXCLUSIVE 1284 ** PENDING -> EXCLUSIVE 1285 ** 1286 ** This routine will only increase a lock. Use the sqlite3OsUnlock() 1287 ** routine to lower a locking level. 1288 */ 1289 static int unixLock(sqlite3_file *id, int eFileLock){ 1290 /* The following describes the implementation of the various locks and 1291 ** lock transitions in terms of the POSIX advisory shared and exclusive 1292 ** lock primitives (called read-locks and write-locks below, to avoid 1293 ** confusion with SQLite lock names). The algorithms are complicated 1294 ** slightly in order to be compatible with windows systems simultaneously 1295 ** accessing the same database file, in case that is ever required. 1296 ** 1297 ** Symbols defined in os.h indentify the 'pending byte' and the 'reserved 1298 ** byte', each single bytes at well known offsets, and the 'shared byte 1299 ** range', a range of 510 bytes at a well known offset. 1300 ** 1301 ** To obtain a SHARED lock, a read-lock is obtained on the 'pending 1302 ** byte'. If this is successful, a random byte from the 'shared byte 1303 ** range' is read-locked and the lock on the 'pending byte' released. 1304 ** 1305 ** A process may only obtain a RESERVED lock after it has a SHARED lock. 1306 ** A RESERVED lock is implemented by grabbing a write-lock on the 1307 ** 'reserved byte'. 1308 ** 1309 ** A process may only obtain a PENDING lock after it has obtained a 1310 ** SHARED lock. A PENDING lock is implemented by obtaining a write-lock 1311 ** on the 'pending byte'. This ensures that no new SHARED locks can be 1312 ** obtained, but existing SHARED locks are allowed to persist. A process 1313 ** does not have to obtain a RESERVED lock on the way to a PENDING lock. 1314 ** This property is used by the algorithm for rolling back a journal file 1315 ** after a crash. 1316 ** 1317 ** An EXCLUSIVE lock, obtained after a PENDING lock is held, is 1318 ** implemented by obtaining a write-lock on the entire 'shared byte 1319 ** range'. Since all other locks require a read-lock on one of the bytes 1320 ** within this range, this ensures that no other locks are held on the 1321 ** database. 1322 ** 1323 ** The reason a single byte cannot be used instead of the 'shared byte 1324 ** range' is that some versions of windows do not support read-locks. By 1325 ** locking a random byte from a range, concurrent SHARED locks may exist 1326 ** even if the locking primitive used is always a write-lock. 1327 */ 1328 int rc = SQLITE_OK; 1329 unixFile *pFile = (unixFile*)id; 1330 unixInodeInfo *pInode = pFile->pInode; 1331 struct flock lock; 1332 int tErrno = 0; 1333 1334 assert( pFile ); 1335 OSTRACE(("LOCK %d %s was %s(%s,%d) pid=%d (unix)\n", pFile->h, 1336 azFileLock(eFileLock), azFileLock(pFile->eFileLock), 1337 azFileLock(pInode->eFileLock), pInode->nShared , getpid())); 1338 1339 /* If there is already a lock of this type or more restrictive on the 1340 ** unixFile, do nothing. Don't use the end_lock: exit path, as 1341 ** unixEnterMutex() hasn't been called yet. 1342 */ 1343 if( pFile->eFileLock>=eFileLock ){ 1344 OSTRACE(("LOCK %d %s ok (already held) (unix)\n", pFile->h, 1345 azFileLock(eFileLock))); 1346 return SQLITE_OK; 1347 } 1348 1349 /* Make sure the locking sequence is correct. 1350 ** (1) We never move from unlocked to anything higher than shared lock. 1351 ** (2) SQLite never explicitly requests a pendig lock. 1352 ** (3) A shared lock is always held when a reserve lock is requested. 1353 */ 1354 assert( pFile->eFileLock!=NO_LOCK || eFileLock==SHARED_LOCK ); 1355 assert( eFileLock!=PENDING_LOCK ); 1356 assert( eFileLock!=RESERVED_LOCK || pFile->eFileLock==SHARED_LOCK ); 1357 1358 /* This mutex is needed because pFile->pInode is shared across threads 1359 */ 1360 unixEnterMutex(); 1361 pInode = pFile->pInode; 1362 1363 /* If some thread using this PID has a lock via a different unixFile* 1364 ** handle that precludes the requested lock, return BUSY. 1365 */ 1366 if( (pFile->eFileLock!=pInode->eFileLock && 1367 (pInode->eFileLock>=PENDING_LOCK || eFileLock>SHARED_LOCK)) 1368 ){ 1369 rc = SQLITE_BUSY; 1370 goto end_lock; 1371 } 1372 1373 /* If a SHARED lock is requested, and some thread using this PID already 1374 ** has a SHARED or RESERVED lock, then increment reference counts and 1375 ** return SQLITE_OK. 1376 */ 1377 if( eFileLock==SHARED_LOCK && 1378 (pInode->eFileLock==SHARED_LOCK || pInode->eFileLock==RESERVED_LOCK) ){ 1379 assert( eFileLock==SHARED_LOCK ); 1380 assert( pFile->eFileLock==0 ); 1381 assert( pInode->nShared>0 ); 1382 pFile->eFileLock = SHARED_LOCK; 1383 pInode->nShared++; 1384 pInode->nLock++; 1385 goto end_lock; 1386 } 1387 1388 1389 /* A PENDING lock is needed before acquiring a SHARED lock and before 1390 ** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will 1391 ** be released. 1392 */ 1393 lock.l_len = 1L; 1394 lock.l_whence = SEEK_SET; 1395 if( eFileLock==SHARED_LOCK 1396 || (eFileLock==EXCLUSIVE_LOCK && pFile->eFileLock<PENDING_LOCK) 1397 ){ 1398 lock.l_type = (eFileLock==SHARED_LOCK?F_RDLCK:F_WRLCK); 1399 lock.l_start = PENDING_BYTE; 1400 if( unixFileLock(pFile, &lock) ){ 1401 tErrno = errno; 1402 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); 1403 if( rc!=SQLITE_BUSY ){ 1404 pFile->lastErrno = tErrno; 1405 } 1406 goto end_lock; 1407 } 1408 } 1409 1410 1411 /* If control gets to this point, then actually go ahead and make 1412 ** operating system calls for the specified lock. 1413 */ 1414 if( eFileLock==SHARED_LOCK ){ 1415 assert( pInode->nShared==0 ); 1416 assert( pInode->eFileLock==0 ); 1417 assert( rc==SQLITE_OK ); 1418 1419 /* Now get the read-lock */ 1420 lock.l_start = SHARED_FIRST; 1421 lock.l_len = SHARED_SIZE; 1422 if( unixFileLock(pFile, &lock) ){ 1423 tErrno = errno; 1424 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); 1425 } 1426 1427 /* Drop the temporary PENDING lock */ 1428 lock.l_start = PENDING_BYTE; 1429 lock.l_len = 1L; 1430 lock.l_type = F_UNLCK; 1431 if( unixFileLock(pFile, &lock) && rc==SQLITE_OK ){ 1432 /* This could happen with a network mount */ 1433 tErrno = errno; 1434 rc = SQLITE_IOERR_UNLOCK; 1435 } 1436 1437 if( rc ){ 1438 if( rc!=SQLITE_BUSY ){ 1439 pFile->lastErrno = tErrno; 1440 } 1441 goto end_lock; 1442 }else{ 1443 pFile->eFileLock = SHARED_LOCK; 1444 pInode->nLock++; 1445 pInode->nShared = 1; 1446 } 1447 }else if( eFileLock==EXCLUSIVE_LOCK && pInode->nShared>1 ){ 1448 /* We are trying for an exclusive lock but another thread in this 1449 ** same process is still holding a shared lock. */ 1450 rc = SQLITE_BUSY; 1451 }else{ 1452 /* The request was for a RESERVED or EXCLUSIVE lock. It is 1453 ** assumed that there is a SHARED or greater lock on the file 1454 ** already. 1455 */ 1456 assert( 0!=pFile->eFileLock ); 1457 lock.l_type = F_WRLCK; 1458 1459 assert( eFileLock==RESERVED_LOCK || eFileLock==EXCLUSIVE_LOCK ); 1460 if( eFileLock==RESERVED_LOCK ){ 1461 lock.l_start = RESERVED_BYTE; 1462 lock.l_len = 1L; 1463 }else{ 1464 lock.l_start = SHARED_FIRST; 1465 lock.l_len = SHARED_SIZE; 1466 } 1467 1468 if( unixFileLock(pFile, &lock) ){ 1469 tErrno = errno; 1470 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); 1471 if( rc!=SQLITE_BUSY ){ 1472 pFile->lastErrno = tErrno; 1473 } 1474 } 1475 } 1476 1477 1478 #ifndef NDEBUG 1479 /* Set up the transaction-counter change checking flags when 1480 ** transitioning from a SHARED to a RESERVED lock. The change 1481 ** from SHARED to RESERVED marks the beginning of a normal 1482 ** write operation (not a hot journal rollback). 1483 */ 1484 if( rc==SQLITE_OK 1485 && pFile->eFileLock<=SHARED_LOCK 1486 && eFileLock==RESERVED_LOCK 1487 ){ 1488 pFile->transCntrChng = 0; 1489 pFile->dbUpdate = 0; 1490 pFile->inNormalWrite = 1; 1491 } 1492 #endif 1493 1494 1495 if( rc==SQLITE_OK ){ 1496 pFile->eFileLock = eFileLock; 1497 pInode->eFileLock = eFileLock; 1498 }else if( eFileLock==EXCLUSIVE_LOCK ){ 1499 pFile->eFileLock = PENDING_LOCK; 1500 pInode->eFileLock = PENDING_LOCK; 1501 } 1502 1503 end_lock: 1504 unixLeaveMutex(); 1505 OSTRACE(("LOCK %d %s %s (unix)\n", pFile->h, azFileLock(eFileLock), 1506 rc==SQLITE_OK ? "ok" : "failed")); 1507 return rc; 1508 } 1509 1510 /* 1511 ** Add the file descriptor used by file handle pFile to the corresponding 1512 ** pUnused list. 1513 */ 1514 static void setPendingFd(unixFile *pFile){ 1515 unixInodeInfo *pInode = pFile->pInode; 1516 UnixUnusedFd *p = pFile->pUnused; 1517 p->pNext = pInode->pUnused; 1518 pInode->pUnused = p; 1519 pFile->h = -1; 1520 pFile->pUnused = 0; 1521 } 1522 1523 /* 1524 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock 1525 ** must be either NO_LOCK or SHARED_LOCK. 1526 ** 1527 ** If the locking level of the file descriptor is already at or below 1528 ** the requested locking level, this routine is a no-op. 1529 ** 1530 ** If handleNFSUnlock is true, then on downgrading an EXCLUSIVE_LOCK to SHARED 1531 ** the byte range is divided into 2 parts and the first part is unlocked then 1532 ** set to a read lock, then the other part is simply unlocked. This works 1533 ** around a bug in BSD NFS lockd (also seen on MacOSX 10.3+) that fails to 1534 ** remove the write lock on a region when a read lock is set. 1535 */ 1536 static int posixUnlock(sqlite3_file *id, int eFileLock, int handleNFSUnlock){ 1537 unixFile *pFile = (unixFile*)id; 1538 unixInodeInfo *pInode; 1539 struct flock lock; 1540 int rc = SQLITE_OK; 1541 int h; 1542 1543 assert( pFile ); 1544 OSTRACE(("UNLOCK %d %d was %d(%d,%d) pid=%d (unix)\n", pFile->h, eFileLock, 1545 pFile->eFileLock, pFile->pInode->eFileLock, pFile->pInode->nShared, 1546 getpid())); 1547 1548 assert( eFileLock<=SHARED_LOCK ); 1549 if( pFile->eFileLock<=eFileLock ){ 1550 return SQLITE_OK; 1551 } 1552 unixEnterMutex(); 1553 h = pFile->h; 1554 pInode = pFile->pInode; 1555 assert( pInode->nShared!=0 ); 1556 if( pFile->eFileLock>SHARED_LOCK ){ 1557 assert( pInode->eFileLock==pFile->eFileLock ); 1558 SimulateIOErrorBenign(1); 1559 SimulateIOError( h=(-1) ) 1560 SimulateIOErrorBenign(0); 1561 1562 #ifndef NDEBUG 1563 /* When reducing a lock such that other processes can start 1564 ** reading the database file again, make sure that the 1565 ** transaction counter was updated if any part of the database 1566 ** file changed. If the transaction counter is not updated, 1567 ** other connections to the same file might not realize that 1568 ** the file has changed and hence might not know to flush their 1569 ** cache. The use of a stale cache can lead to database corruption. 1570 */ 1571 #if 0 1572 assert( pFile->inNormalWrite==0 1573 || pFile->dbUpdate==0 1574 || pFile->transCntrChng==1 ); 1575 #endif 1576 pFile->inNormalWrite = 0; 1577 #endif 1578 1579 /* downgrading to a shared lock on NFS involves clearing the write lock 1580 ** before establishing the readlock - to avoid a race condition we downgrade 1581 ** the lock in 2 blocks, so that part of the range will be covered by a 1582 ** write lock until the rest is covered by a read lock: 1583 ** 1: [WWWWW] 1584 ** 2: [....W] 1585 ** 3: [RRRRW] 1586 ** 4: [RRRR.] 1587 */ 1588 if( eFileLock==SHARED_LOCK ){ 1589 1590 #if !defined(__APPLE__) || !SQLITE_ENABLE_LOCKING_STYLE 1591 (void)handleNFSUnlock; 1592 assert( handleNFSUnlock==0 ); 1593 #endif 1594 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 1595 if( handleNFSUnlock ){ 1596 int tErrno; /* Error code from system call errors */ 1597 off_t divSize = SHARED_SIZE - 1; 1598 1599 lock.l_type = F_UNLCK; 1600 lock.l_whence = SEEK_SET; 1601 lock.l_start = SHARED_FIRST; 1602 lock.l_len = divSize; 1603 if( unixFileLock(pFile, &lock)==(-1) ){ 1604 tErrno = errno; 1605 rc = SQLITE_IOERR_UNLOCK; 1606 if( IS_LOCK_ERROR(rc) ){ 1607 pFile->lastErrno = tErrno; 1608 } 1609 goto end_unlock; 1610 } 1611 lock.l_type = F_RDLCK; 1612 lock.l_whence = SEEK_SET; 1613 lock.l_start = SHARED_FIRST; 1614 lock.l_len = divSize; 1615 if( unixFileLock(pFile, &lock)==(-1) ){ 1616 tErrno = errno; 1617 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_RDLOCK); 1618 if( IS_LOCK_ERROR(rc) ){ 1619 pFile->lastErrno = tErrno; 1620 } 1621 goto end_unlock; 1622 } 1623 lock.l_type = F_UNLCK; 1624 lock.l_whence = SEEK_SET; 1625 lock.l_start = SHARED_FIRST+divSize; 1626 lock.l_len = SHARED_SIZE-divSize; 1627 if( unixFileLock(pFile, &lock)==(-1) ){ 1628 tErrno = errno; 1629 rc = SQLITE_IOERR_UNLOCK; 1630 if( IS_LOCK_ERROR(rc) ){ 1631 pFile->lastErrno = tErrno; 1632 } 1633 goto end_unlock; 1634 } 1635 }else 1636 #endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ 1637 { 1638 lock.l_type = F_RDLCK; 1639 lock.l_whence = SEEK_SET; 1640 lock.l_start = SHARED_FIRST; 1641 lock.l_len = SHARED_SIZE; 1642 if( unixFileLock(pFile, &lock) ){ 1643 /* In theory, the call to unixFileLock() cannot fail because another 1644 ** process is holding an incompatible lock. If it does, this 1645 ** indicates that the other process is not following the locking 1646 ** protocol. If this happens, return SQLITE_IOERR_RDLOCK. Returning 1647 ** SQLITE_BUSY would confuse the upper layer (in practice it causes 1648 ** an assert to fail). */ 1649 rc = SQLITE_IOERR_RDLOCK; 1650 pFile->lastErrno = errno; 1651 goto end_unlock; 1652 } 1653 } 1654 } 1655 lock.l_type = F_UNLCK; 1656 lock.l_whence = SEEK_SET; 1657 lock.l_start = PENDING_BYTE; 1658 lock.l_len = 2L; assert( PENDING_BYTE+1==RESERVED_BYTE ); 1659 if( unixFileLock(pFile, &lock)==0 ){ 1660 pInode->eFileLock = SHARED_LOCK; 1661 }else{ 1662 rc = SQLITE_IOERR_UNLOCK; 1663 pFile->lastErrno = errno; 1664 goto end_unlock; 1665 } 1666 } 1667 if( eFileLock==NO_LOCK ){ 1668 /* Decrement the shared lock counter. Release the lock using an 1669 ** OS call only when all threads in this same process have released 1670 ** the lock. 1671 */ 1672 pInode->nShared--; 1673 if( pInode->nShared==0 ){ 1674 lock.l_type = F_UNLCK; 1675 lock.l_whence = SEEK_SET; 1676 lock.l_start = lock.l_len = 0L; 1677 SimulateIOErrorBenign(1); 1678 SimulateIOError( h=(-1) ) 1679 SimulateIOErrorBenign(0); 1680 if( unixFileLock(pFile, &lock)==0 ){ 1681 pInode->eFileLock = NO_LOCK; 1682 }else{ 1683 rc = SQLITE_IOERR_UNLOCK; 1684 pFile->lastErrno = errno; 1685 pInode->eFileLock = NO_LOCK; 1686 pFile->eFileLock = NO_LOCK; 1687 } 1688 } 1689 1690 /* Decrement the count of locks against this same file. When the 1691 ** count reaches zero, close any other file descriptors whose close 1692 ** was deferred because of outstanding locks. 1693 */ 1694 pInode->nLock--; 1695 assert( pInode->nLock>=0 ); 1696 if( pInode->nLock==0 ){ 1697 closePendingFds(pFile); 1698 } 1699 } 1700 1701 end_unlock: 1702 unixLeaveMutex(); 1703 if( rc==SQLITE_OK ) pFile->eFileLock = eFileLock; 1704 return rc; 1705 } 1706 1707 /* 1708 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock 1709 ** must be either NO_LOCK or SHARED_LOCK. 1710 ** 1711 ** If the locking level of the file descriptor is already at or below 1712 ** the requested locking level, this routine is a no-op. 1713 */ 1714 static int unixUnlock(sqlite3_file *id, int eFileLock){ 1715 return posixUnlock(id, eFileLock, 0); 1716 } 1717 1718 /* 1719 ** This function performs the parts of the "close file" operation 1720 ** common to all locking schemes. It closes the directory and file 1721 ** handles, if they are valid, and sets all fields of the unixFile 1722 ** structure to 0. 1723 ** 1724 ** It is *not* necessary to hold the mutex when this routine is called, 1725 ** even on VxWorks. A mutex will be acquired on VxWorks by the 1726 ** vxworksReleaseFileId() routine. 1727 */ 1728 static int closeUnixFile(sqlite3_file *id){ 1729 unixFile *pFile = (unixFile*)id; 1730 if( pFile->dirfd>=0 ){ 1731 robust_close(pFile, pFile->dirfd, __LINE__); 1732 pFile->dirfd=-1; 1733 } 1734 if( pFile->h>=0 ){ 1735 robust_close(pFile, pFile->h, __LINE__); 1736 pFile->h = -1; 1737 } 1738 #if OS_VXWORKS 1739 if( pFile->pId ){ 1740 if( pFile->isDelete ){ 1741 unlink(pFile->pId->zCanonicalName); 1742 } 1743 vxworksReleaseFileId(pFile->pId); 1744 pFile->pId = 0; 1745 } 1746 #endif 1747 OSTRACE(("CLOSE %-3d\n", pFile->h)); 1748 OpenCounter(-1); 1749 sqlite3_free(pFile->pUnused); 1750 memset(pFile, 0, sizeof(unixFile)); 1751 return SQLITE_OK; 1752 } 1753 1754 /* 1755 ** Close a file. 1756 */ 1757 static int unixClose(sqlite3_file *id){ 1758 int rc = SQLITE_OK; 1759 unixFile *pFile = (unixFile *)id; 1760 unixUnlock(id, NO_LOCK); 1761 unixEnterMutex(); 1762 1763 /* unixFile.pInode is always valid here. Otherwise, a different close 1764 ** routine (e.g. nolockClose()) would be called instead. 1765 */ 1766 assert( pFile->pInode->nLock>0 || pFile->pInode->bProcessLock==0 ); 1767 if( ALWAYS(pFile->pInode) && pFile->pInode->nLock ){ 1768 /* If there are outstanding locks, do not actually close the file just 1769 ** yet because that would clear those locks. Instead, add the file 1770 ** descriptor to pInode->pUnused list. It will be automatically closed 1771 ** when the last lock is cleared. 1772 */ 1773 setPendingFd(pFile); 1774 } 1775 releaseInodeInfo(pFile); 1776 rc = closeUnixFile(id); 1777 unixLeaveMutex(); 1778 return rc; 1779 } 1780 1781 /************** End of the posix advisory lock implementation ***************** 1782 ******************************************************************************/ 1783 1784 /****************************************************************************** 1785 ****************************** No-op Locking ********************************** 1786 ** 1787 ** Of the various locking implementations available, this is by far the 1788 ** simplest: locking is ignored. No attempt is made to lock the database 1789 ** file for reading or writing. 1790 ** 1791 ** This locking mode is appropriate for use on read-only databases 1792 ** (ex: databases that are burned into CD-ROM, for example.) It can 1793 ** also be used if the application employs some external mechanism to 1794 ** prevent simultaneous access of the same database by two or more 1795 ** database connections. But there is a serious risk of database 1796 ** corruption if this locking mode is used in situations where multiple 1797 ** database connections are accessing the same database file at the same 1798 ** time and one or more of those connections are writing. 1799 */ 1800 1801 static int nolockCheckReservedLock(sqlite3_file *NotUsed, int *pResOut){ 1802 UNUSED_PARAMETER(NotUsed); 1803 *pResOut = 0; 1804 return SQLITE_OK; 1805 } 1806 static int nolockLock(sqlite3_file *NotUsed, int NotUsed2){ 1807 UNUSED_PARAMETER2(NotUsed, NotUsed2); 1808 return SQLITE_OK; 1809 } 1810 static int nolockUnlock(sqlite3_file *NotUsed, int NotUsed2){ 1811 UNUSED_PARAMETER2(NotUsed, NotUsed2); 1812 return SQLITE_OK; 1813 } 1814 1815 /* 1816 ** Close the file. 1817 */ 1818 static int nolockClose(sqlite3_file *id) { 1819 return closeUnixFile(id); 1820 } 1821 1822 /******************* End of the no-op lock implementation ********************* 1823 ******************************************************************************/ 1824 1825 /****************************************************************************** 1826 ************************* Begin dot-file Locking ****************************** 1827 ** 1828 ** The dotfile locking implementation uses the existance of separate lock 1829 ** files in order to control access to the database. This works on just 1830 ** about every filesystem imaginable. But there are serious downsides: 1831 ** 1832 ** (1) There is zero concurrency. A single reader blocks all other 1833 ** connections from reading or writing the database. 1834 ** 1835 ** (2) An application crash or power loss can leave stale lock files 1836 ** sitting around that need to be cleared manually. 1837 ** 1838 ** Nevertheless, a dotlock is an appropriate locking mode for use if no 1839 ** other locking strategy is available. 1840 ** 1841 ** Dotfile locking works by creating a file in the same directory as the 1842 ** database and with the same name but with a ".lock" extension added. 1843 ** The existance of a lock file implies an EXCLUSIVE lock. All other lock 1844 ** types (SHARED, RESERVED, PENDING) are mapped into EXCLUSIVE. 1845 */ 1846 1847 /* 1848 ** The file suffix added to the data base filename in order to create the 1849 ** lock file. 1850 */ 1851 #define DOTLOCK_SUFFIX ".lock" 1852 1853 /* 1854 ** This routine checks if there is a RESERVED lock held on the specified 1855 ** file by this or any other process. If such a lock is held, set *pResOut 1856 ** to a non-zero value otherwise *pResOut is set to zero. The return value 1857 ** is set to SQLITE_OK unless an I/O error occurs during lock checking. 1858 ** 1859 ** In dotfile locking, either a lock exists or it does not. So in this 1860 ** variation of CheckReservedLock(), *pResOut is set to true if any lock 1861 ** is held on the file and false if the file is unlocked. 1862 */ 1863 static int dotlockCheckReservedLock(sqlite3_file *id, int *pResOut) { 1864 int rc = SQLITE_OK; 1865 int reserved = 0; 1866 unixFile *pFile = (unixFile*)id; 1867 1868 SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); 1869 1870 assert( pFile ); 1871 1872 /* Check if a thread in this process holds such a lock */ 1873 if( pFile->eFileLock>SHARED_LOCK ){ 1874 /* Either this connection or some other connection in the same process 1875 ** holds a lock on the file. No need to check further. */ 1876 reserved = 1; 1877 }else{ 1878 /* The lock is held if and only if the lockfile exists */ 1879 const char *zLockFile = (const char*)pFile->lockingContext; 1880 reserved = osAccess(zLockFile, 0)==0; 1881 } 1882 OSTRACE(("TEST WR-LOCK %d %d %d (dotlock)\n", pFile->h, rc, reserved)); 1883 *pResOut = reserved; 1884 return rc; 1885 } 1886 1887 /* 1888 ** Lock the file with the lock specified by parameter eFileLock - one 1889 ** of the following: 1890 ** 1891 ** (1) SHARED_LOCK 1892 ** (2) RESERVED_LOCK 1893 ** (3) PENDING_LOCK 1894 ** (4) EXCLUSIVE_LOCK 1895 ** 1896 ** Sometimes when requesting one lock state, additional lock states 1897 ** are inserted in between. The locking might fail on one of the later 1898 ** transitions leaving the lock state different from what it started but 1899 ** still short of its goal. The following chart shows the allowed 1900 ** transitions and the inserted intermediate states: 1901 ** 1902 ** UNLOCKED -> SHARED 1903 ** SHARED -> RESERVED 1904 ** SHARED -> (PENDING) -> EXCLUSIVE 1905 ** RESERVED -> (PENDING) -> EXCLUSIVE 1906 ** PENDING -> EXCLUSIVE 1907 ** 1908 ** This routine will only increase a lock. Use the sqlite3OsUnlock() 1909 ** routine to lower a locking level. 1910 ** 1911 ** With dotfile locking, we really only support state (4): EXCLUSIVE. 1912 ** But we track the other locking levels internally. 1913 */ 1914 static int dotlockLock(sqlite3_file *id, int eFileLock) { 1915 unixFile *pFile = (unixFile*)id; 1916 int fd; 1917 char *zLockFile = (char *)pFile->lockingContext; 1918 int rc = SQLITE_OK; 1919 1920 1921 /* If we have any lock, then the lock file already exists. All we have 1922 ** to do is adjust our internal record of the lock level. 1923 */ 1924 if( pFile->eFileLock > NO_LOCK ){ 1925 pFile->eFileLock = eFileLock; 1926 #if !OS_VXWORKS 1927 /* Always update the timestamp on the old file */ 1928 utimes(zLockFile, NULL); 1929 #endif 1930 return SQLITE_OK; 1931 } 1932 1933 /* grab an exclusive lock */ 1934 fd = robust_open(zLockFile,O_RDONLY|O_CREAT|O_EXCL,0600); 1935 if( fd<0 ){ 1936 /* failed to open/create the file, someone else may have stolen the lock */ 1937 int tErrno = errno; 1938 if( EEXIST == tErrno ){ 1939 rc = SQLITE_BUSY; 1940 } else { 1941 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); 1942 if( IS_LOCK_ERROR(rc) ){ 1943 pFile->lastErrno = tErrno; 1944 } 1945 } 1946 return rc; 1947 } 1948 robust_close(pFile, fd, __LINE__); 1949 1950 /* got it, set the type and return ok */ 1951 pFile->eFileLock = eFileLock; 1952 return rc; 1953 } 1954 1955 /* 1956 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock 1957 ** must be either NO_LOCK or SHARED_LOCK. 1958 ** 1959 ** If the locking level of the file descriptor is already at or below 1960 ** the requested locking level, this routine is a no-op. 1961 ** 1962 ** When the locking level reaches NO_LOCK, delete the lock file. 1963 */ 1964 static int dotlockUnlock(sqlite3_file *id, int eFileLock) { 1965 unixFile *pFile = (unixFile*)id; 1966 char *zLockFile = (char *)pFile->lockingContext; 1967 1968 assert( pFile ); 1969 OSTRACE(("UNLOCK %d %d was %d pid=%d (dotlock)\n", pFile->h, eFileLock, 1970 pFile->eFileLock, getpid())); 1971 assert( eFileLock<=SHARED_LOCK ); 1972 1973 /* no-op if possible */ 1974 if( pFile->eFileLock==eFileLock ){ 1975 return SQLITE_OK; 1976 } 1977 1978 /* To downgrade to shared, simply update our internal notion of the 1979 ** lock state. No need to mess with the file on disk. 1980 */ 1981 if( eFileLock==SHARED_LOCK ){ 1982 pFile->eFileLock = SHARED_LOCK; 1983 return SQLITE_OK; 1984 } 1985 1986 /* To fully unlock the database, delete the lock file */ 1987 assert( eFileLock==NO_LOCK ); 1988 if( unlink(zLockFile) ){ 1989 int rc = 0; 1990 int tErrno = errno; 1991 if( ENOENT != tErrno ){ 1992 rc = SQLITE_IOERR_UNLOCK; 1993 } 1994 if( IS_LOCK_ERROR(rc) ){ 1995 pFile->lastErrno = tErrno; 1996 } 1997 return rc; 1998 } 1999 pFile->eFileLock = NO_LOCK; 2000 return SQLITE_OK; 2001 } 2002 2003 /* 2004 ** Close a file. Make sure the lock has been released before closing. 2005 */ 2006 static int dotlockClose(sqlite3_file *id) { 2007 int rc; 2008 if( id ){ 2009 unixFile *pFile = (unixFile*)id; 2010 dotlockUnlock(id, NO_LOCK); 2011 sqlite3_free(pFile->lockingContext); 2012 } 2013 rc = closeUnixFile(id); 2014 return rc; 2015 } 2016 /****************** End of the dot-file lock implementation ******************* 2017 ******************************************************************************/ 2018 2019 /****************************************************************************** 2020 ************************** Begin flock Locking ******************************** 2021 ** 2022 ** Use the flock() system call to do file locking. 2023 ** 2024 ** flock() locking is like dot-file locking in that the various 2025 ** fine-grain locking levels supported by SQLite are collapsed into 2026 ** a single exclusive lock. In other words, SHARED, RESERVED, and 2027 ** PENDING locks are the same thing as an EXCLUSIVE lock. SQLite 2028 ** still works when you do this, but concurrency is reduced since 2029 ** only a single process can be reading the database at a time. 2030 ** 2031 ** Omit this section if SQLITE_ENABLE_LOCKING_STYLE is turned off or if 2032 ** compiling for VXWORKS. 2033 */ 2034 #if SQLITE_ENABLE_LOCKING_STYLE && !OS_VXWORKS 2035 2036 /* 2037 ** Retry flock() calls that fail with EINTR 2038 */ 2039 #ifdef EINTR 2040 static int robust_flock(int fd, int op){ 2041 int rc; 2042 do{ rc = flock(fd,op); }while( rc<0 && errno==EINTR ); 2043 return rc; 2044 } 2045 #else 2046 # define robust_flock(a,b) flock(a,b) 2047 #endif 2048 2049 2050 /* 2051 ** This routine checks if there is a RESERVED lock held on the specified 2052 ** file by this or any other process. If such a lock is held, set *pResOut 2053 ** to a non-zero value otherwise *pResOut is set to zero. The return value 2054 ** is set to SQLITE_OK unless an I/O error occurs during lock checking. 2055 */ 2056 static int flockCheckReservedLock(sqlite3_file *id, int *pResOut){ 2057 int rc = SQLITE_OK; 2058 int reserved = 0; 2059 unixFile *pFile = (unixFile*)id; 2060 2061 SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); 2062 2063 assert( pFile ); 2064 2065 /* Check if a thread in this process holds such a lock */ 2066 if( pFile->eFileLock>SHARED_LOCK ){ 2067 reserved = 1; 2068 } 2069 2070 /* Otherwise see if some other process holds it. */ 2071 if( !reserved ){ 2072 /* attempt to get the lock */ 2073 int lrc = robust_flock(pFile->h, LOCK_EX | LOCK_NB); 2074 if( !lrc ){ 2075 /* got the lock, unlock it */ 2076 lrc = robust_flock(pFile->h, LOCK_UN); 2077 if ( lrc ) { 2078 int tErrno = errno; 2079 /* unlock failed with an error */ 2080 lrc = SQLITE_IOERR_UNLOCK; 2081 if( IS_LOCK_ERROR(lrc) ){ 2082 pFile->lastErrno = tErrno; 2083 rc = lrc; 2084 } 2085 } 2086 } else { 2087 int tErrno = errno; 2088 reserved = 1; 2089 /* someone else might have it reserved */ 2090 lrc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); 2091 if( IS_LOCK_ERROR(lrc) ){ 2092 pFile->lastErrno = tErrno; 2093 rc = lrc; 2094 } 2095 } 2096 } 2097 OSTRACE(("TEST WR-LOCK %d %d %d (flock)\n", pFile->h, rc, reserved)); 2098 2099 #ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS 2100 if( (rc & SQLITE_IOERR) == SQLITE_IOERR ){ 2101 rc = SQLITE_OK; 2102 reserved=1; 2103 } 2104 #endif /* SQLITE_IGNORE_FLOCK_LOCK_ERRORS */ 2105 *pResOut = reserved; 2106 return rc; 2107 } 2108 2109 /* 2110 ** Lock the file with the lock specified by parameter eFileLock - one 2111 ** of the following: 2112 ** 2113 ** (1) SHARED_LOCK 2114 ** (2) RESERVED_LOCK 2115 ** (3) PENDING_LOCK 2116 ** (4) EXCLUSIVE_LOCK 2117 ** 2118 ** Sometimes when requesting one lock state, additional lock states 2119 ** are inserted in between. The locking might fail on one of the later 2120 ** transitions leaving the lock state different from what it started but 2121 ** still short of its goal. The following chart shows the allowed 2122 ** transitions and the inserted intermediate states: 2123 ** 2124 ** UNLOCKED -> SHARED 2125 ** SHARED -> RESERVED 2126 ** SHARED -> (PENDING) -> EXCLUSIVE 2127 ** RESERVED -> (PENDING) -> EXCLUSIVE 2128 ** PENDING -> EXCLUSIVE 2129 ** 2130 ** flock() only really support EXCLUSIVE locks. We track intermediate 2131 ** lock states in the sqlite3_file structure, but all locks SHARED or 2132 ** above are really EXCLUSIVE locks and exclude all other processes from 2133 ** access the file. 2134 ** 2135 ** This routine will only increase a lock. Use the sqlite3OsUnlock() 2136 ** routine to lower a locking level. 2137 */ 2138 static int flockLock(sqlite3_file *id, int eFileLock) { 2139 int rc = SQLITE_OK; 2140 unixFile *pFile = (unixFile*)id; 2141 2142 assert( pFile ); 2143 2144 /* if we already have a lock, it is exclusive. 2145 ** Just adjust level and punt on outta here. */ 2146 if (pFile->eFileLock > NO_LOCK) { 2147 pFile->eFileLock = eFileLock; 2148 return SQLITE_OK; 2149 } 2150 2151 /* grab an exclusive lock */ 2152 2153 if (robust_flock(pFile->h, LOCK_EX | LOCK_NB)) { 2154 int tErrno = errno; 2155 /* didn't get, must be busy */ 2156 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); 2157 if( IS_LOCK_ERROR(rc) ){ 2158 pFile->lastErrno = tErrno; 2159 } 2160 } else { 2161 /* got it, set the type and return ok */ 2162 pFile->eFileLock = eFileLock; 2163 } 2164 OSTRACE(("LOCK %d %s %s (flock)\n", pFile->h, azFileLock(eFileLock), 2165 rc==SQLITE_OK ? "ok" : "failed")); 2166 #ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS 2167 if( (rc & SQLITE_IOERR) == SQLITE_IOERR ){ 2168 rc = SQLITE_BUSY; 2169 } 2170 #endif /* SQLITE_IGNORE_FLOCK_LOCK_ERRORS */ 2171 return rc; 2172 } 2173 2174 2175 /* 2176 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock 2177 ** must be either NO_LOCK or SHARED_LOCK. 2178 ** 2179 ** If the locking level of the file descriptor is already at or below 2180 ** the requested locking level, this routine is a no-op. 2181 */ 2182 static int flockUnlock(sqlite3_file *id, int eFileLock) { 2183 unixFile *pFile = (unixFile*)id; 2184 2185 assert( pFile ); 2186 OSTRACE(("UNLOCK %d %d was %d pid=%d (flock)\n", pFile->h, eFileLock, 2187 pFile->eFileLock, getpid())); 2188 assert( eFileLock<=SHARED_LOCK ); 2189 2190 /* no-op if possible */ 2191 if( pFile->eFileLock==eFileLock ){ 2192 return SQLITE_OK; 2193 } 2194 2195 /* shared can just be set because we always have an exclusive */ 2196 if (eFileLock==SHARED_LOCK) { 2197 pFile->eFileLock = eFileLock; 2198 return SQLITE_OK; 2199 } 2200 2201 /* no, really, unlock. */ 2202 if( robust_flock(pFile->h, LOCK_UN) ){ 2203 #ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS 2204 return SQLITE_OK; 2205 #endif /* SQLITE_IGNORE_FLOCK_LOCK_ERRORS */ 2206 return SQLITE_IOERR_UNLOCK; 2207 }else{ 2208 pFile->eFileLock = NO_LOCK; 2209 return SQLITE_OK; 2210 } 2211 } 2212 2213 /* 2214 ** Close a file. 2215 */ 2216 static int flockClose(sqlite3_file *id) { 2217 if( id ){ 2218 flockUnlock(id, NO_LOCK); 2219 } 2220 return closeUnixFile(id); 2221 } 2222 2223 #endif /* SQLITE_ENABLE_LOCKING_STYLE && !OS_VXWORK */ 2224 2225 /******************* End of the flock lock implementation ********************* 2226 ******************************************************************************/ 2227 2228 /****************************************************************************** 2229 ************************ Begin Named Semaphore Locking ************************ 2230 ** 2231 ** Named semaphore locking is only supported on VxWorks. 2232 ** 2233 ** Semaphore locking is like dot-lock and flock in that it really only 2234 ** supports EXCLUSIVE locking. Only a single process can read or write 2235 ** the database file at a time. This reduces potential concurrency, but 2236 ** makes the lock implementation much easier. 2237 */ 2238 #if OS_VXWORKS 2239 2240 /* 2241 ** This routine checks if there is a RESERVED lock held on the specified 2242 ** file by this or any other process. If such a lock is held, set *pResOut 2243 ** to a non-zero value otherwise *pResOut is set to zero. The return value 2244 ** is set to SQLITE_OK unless an I/O error occurs during lock checking. 2245 */ 2246 static int semCheckReservedLock(sqlite3_file *id, int *pResOut) { 2247 int rc = SQLITE_OK; 2248 int reserved = 0; 2249 unixFile *pFile = (unixFile*)id; 2250 2251 SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); 2252 2253 assert( pFile ); 2254 2255 /* Check if a thread in this process holds such a lock */ 2256 if( pFile->eFileLock>SHARED_LOCK ){ 2257 reserved = 1; 2258 } 2259 2260 /* Otherwise see if some other process holds it. */ 2261 if( !reserved ){ 2262 sem_t *pSem = pFile->pInode->pSem; 2263 struct stat statBuf; 2264 2265 if( sem_trywait(pSem)==-1 ){ 2266 int tErrno = errno; 2267 if( EAGAIN != tErrno ){ 2268 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_CHECKRESERVEDLOCK); 2269 pFile->lastErrno = tErrno; 2270 } else { 2271 /* someone else has the lock when we are in NO_LOCK */ 2272 reserved = (pFile->eFileLock < SHARED_LOCK); 2273 } 2274 }else{ 2275 /* we could have it if we want it */ 2276 sem_post(pSem); 2277 } 2278 } 2279 OSTRACE(("TEST WR-LOCK %d %d %d (sem)\n", pFile->h, rc, reserved)); 2280 2281 *pResOut = reserved; 2282 return rc; 2283 } 2284 2285 /* 2286 ** Lock the file with the lock specified by parameter eFileLock - one 2287 ** of the following: 2288 ** 2289 ** (1) SHARED_LOCK 2290 ** (2) RESERVED_LOCK 2291 ** (3) PENDING_LOCK 2292 ** (4) EXCLUSIVE_LOCK 2293 ** 2294 ** Sometimes when requesting one lock state, additional lock states 2295 ** are inserted in between. The locking might fail on one of the later 2296 ** transitions leaving the lock state different from what it started but 2297 ** still short of its goal. The following chart shows the allowed 2298 ** transitions and the inserted intermediate states: 2299 ** 2300 ** UNLOCKED -> SHARED 2301 ** SHARED -> RESERVED 2302 ** SHARED -> (PENDING) -> EXCLUSIVE 2303 ** RESERVED -> (PENDING) -> EXCLUSIVE 2304 ** PENDING -> EXCLUSIVE 2305 ** 2306 ** Semaphore locks only really support EXCLUSIVE locks. We track intermediate 2307 ** lock states in the sqlite3_file structure, but all locks SHARED or 2308 ** above are really EXCLUSIVE locks and exclude all other processes from 2309 ** access the file. 2310 ** 2311 ** This routine will only increase a lock. Use the sqlite3OsUnlock() 2312 ** routine to lower a locking level. 2313 */ 2314 static int semLock(sqlite3_file *id, int eFileLock) { 2315 unixFile *pFile = (unixFile*)id; 2316 int fd; 2317 sem_t *pSem = pFile->pInode->pSem; 2318 int rc = SQLITE_OK; 2319 2320 /* if we already have a lock, it is exclusive. 2321 ** Just adjust level and punt on outta here. */ 2322 if (pFile->eFileLock > NO_LOCK) { 2323 pFile->eFileLock = eFileLock; 2324 rc = SQLITE_OK; 2325 goto sem_end_lock; 2326 } 2327 2328 /* lock semaphore now but bail out when already locked. */ 2329 if( sem_trywait(pSem)==-1 ){ 2330 rc = SQLITE_BUSY; 2331 goto sem_end_lock; 2332 } 2333 2334 /* got it, set the type and return ok */ 2335 pFile->eFileLock = eFileLock; 2336 2337 sem_end_lock: 2338 return rc; 2339 } 2340 2341 /* 2342 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock 2343 ** must be either NO_LOCK or SHARED_LOCK. 2344 ** 2345 ** If the locking level of the file descriptor is already at or below 2346 ** the requested locking level, this routine is a no-op. 2347 */ 2348 static int semUnlock(sqlite3_file *id, int eFileLock) { 2349 unixFile *pFile = (unixFile*)id; 2350 sem_t *pSem = pFile->pInode->pSem; 2351 2352 assert( pFile ); 2353 assert( pSem ); 2354 OSTRACE(("UNLOCK %d %d was %d pid=%d (sem)\n", pFile->h, eFileLock, 2355 pFile->eFileLock, getpid())); 2356 assert( eFileLock<=SHARED_LOCK ); 2357 2358 /* no-op if possible */ 2359 if( pFile->eFileLock==eFileLock ){ 2360 return SQLITE_OK; 2361 } 2362 2363 /* shared can just be set because we always have an exclusive */ 2364 if (eFileLock==SHARED_LOCK) { 2365 pFile->eFileLock = eFileLock; 2366 return SQLITE_OK; 2367 } 2368 2369 /* no, really unlock. */ 2370 if ( sem_post(pSem)==-1 ) { 2371 int rc, tErrno = errno; 2372 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK); 2373 if( IS_LOCK_ERROR(rc) ){ 2374 pFile->lastErrno = tErrno; 2375 } 2376 return rc; 2377 } 2378 pFile->eFileLock = NO_LOCK; 2379 return SQLITE_OK; 2380 } 2381 2382 /* 2383 ** Close a file. 2384 */ 2385 static int semClose(sqlite3_file *id) { 2386 if( id ){ 2387 unixFile *pFile = (unixFile*)id; 2388 semUnlock(id, NO_LOCK); 2389 assert( pFile ); 2390 unixEnterMutex(); 2391 releaseInodeInfo(pFile); 2392 unixLeaveMutex(); 2393 closeUnixFile(id); 2394 } 2395 return SQLITE_OK; 2396 } 2397 2398 #endif /* OS_VXWORKS */ 2399 /* 2400 ** Named semaphore locking is only available on VxWorks. 2401 ** 2402 *************** End of the named semaphore lock implementation **************** 2403 ******************************************************************************/ 2404 2405 2406 /****************************************************************************** 2407 *************************** Begin AFP Locking ********************************* 2408 ** 2409 ** AFP is the Apple Filing Protocol. AFP is a network filesystem found 2410 ** on Apple Macintosh computers - both OS9 and OSX. 2411 ** 2412 ** Third-party implementations of AFP are available. But this code here 2413 ** only works on OSX. 2414 */ 2415 2416 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 2417 /* 2418 ** The afpLockingContext structure contains all afp lock specific state 2419 */ 2420 typedef struct afpLockingContext afpLockingContext; 2421 struct afpLockingContext { 2422 int reserved; 2423 const char *dbPath; /* Name of the open file */ 2424 }; 2425 2426 struct ByteRangeLockPB2 2427 { 2428 unsigned long long offset; /* offset to first byte to lock */ 2429 unsigned long long length; /* nbr of bytes to lock */ 2430 unsigned long long retRangeStart; /* nbr of 1st byte locked if successful */ 2431 unsigned char unLockFlag; /* 1 = unlock, 0 = lock */ 2432 unsigned char startEndFlag; /* 1=rel to end of fork, 0=rel to start */ 2433 int fd; /* file desc to assoc this lock with */ 2434 }; 2435 2436 #define afpfsByteRangeLock2FSCTL _IOWR('z', 23, struct ByteRangeLockPB2) 2437 2438 /* 2439 ** This is a utility for setting or clearing a bit-range lock on an 2440 ** AFP filesystem. 2441 ** 2442 ** Return SQLITE_OK on success, SQLITE_BUSY on failure. 2443 */ 2444 static int afpSetLock( 2445 const char *path, /* Name of the file to be locked or unlocked */ 2446 unixFile *pFile, /* Open file descriptor on path */ 2447 unsigned long long offset, /* First byte to be locked */ 2448 unsigned long long length, /* Number of bytes to lock */ 2449 int setLockFlag /* True to set lock. False to clear lock */ 2450 ){ 2451 struct ByteRangeLockPB2 pb; 2452 int err; 2453 2454 pb.unLockFlag = setLockFlag ? 0 : 1; 2455 pb.startEndFlag = 0; 2456 pb.offset = offset; 2457 pb.length = length; 2458 pb.fd = pFile->h; 2459 2460 OSTRACE(("AFPSETLOCK [%s] for %d%s in range %llx:%llx\n", 2461 (setLockFlag?"ON":"OFF"), pFile->h, (pb.fd==-1?"[testval-1]":""), 2462 offset, length)); 2463 err = fsctl(path, afpfsByteRangeLock2FSCTL, &pb, 0); 2464 if ( err==-1 ) { 2465 int rc; 2466 int tErrno = errno; 2467 OSTRACE(("AFPSETLOCK failed to fsctl() '%s' %d %s\n", 2468 path, tErrno, strerror(tErrno))); 2469 #ifdef SQLITE_IGNORE_AFP_LOCK_ERRORS 2470 rc = SQLITE_BUSY; 2471 #else 2472 rc = sqliteErrorFromPosixError(tErrno, 2473 setLockFlag ? SQLITE_IOERR_LOCK : SQLITE_IOERR_UNLOCK); 2474 #endif /* SQLITE_IGNORE_AFP_LOCK_ERRORS */ 2475 if( IS_LOCK_ERROR(rc) ){ 2476 pFile->lastErrno = tErrno; 2477 } 2478 return rc; 2479 } else { 2480 return SQLITE_OK; 2481 } 2482 } 2483 2484 /* 2485 ** This routine checks if there is a RESERVED lock held on the specified 2486 ** file by this or any other process. If such a lock is held, set *pResOut 2487 ** to a non-zero value otherwise *pResOut is set to zero. The return value 2488 ** is set to SQLITE_OK unless an I/O error occurs during lock checking. 2489 */ 2490 static int afpCheckReservedLock(sqlite3_file *id, int *pResOut){ 2491 int rc = SQLITE_OK; 2492 int reserved = 0; 2493 unixFile *pFile = (unixFile*)id; 2494 2495 SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); 2496 2497 assert( pFile ); 2498 afpLockingContext *context = (afpLockingContext *) pFile->lockingContext; 2499 if( context->reserved ){ 2500 *pResOut = 1; 2501 return SQLITE_OK; 2502 } 2503 unixEnterMutex(); /* Because pFile->pInode is shared across threads */ 2504 2505 /* Check if a thread in this process holds such a lock */ 2506 if( pFile->pInode->eFileLock>SHARED_LOCK ){ 2507 reserved = 1; 2508 } 2509 2510 /* Otherwise see if some other process holds it. 2511 */ 2512 if( !reserved ){ 2513 /* lock the RESERVED byte */ 2514 int lrc = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1,1); 2515 if( SQLITE_OK==lrc ){ 2516 /* if we succeeded in taking the reserved lock, unlock it to restore 2517 ** the original state */ 2518 lrc = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1, 0); 2519 } else { 2520 /* if we failed to get the lock then someone else must have it */ 2521 reserved = 1; 2522 } 2523 if( IS_LOCK_ERROR(lrc) ){ 2524 rc=lrc; 2525 } 2526 } 2527 2528 unixLeaveMutex(); 2529 OSTRACE(("TEST WR-LOCK %d %d %d (afp)\n", pFile->h, rc, reserved)); 2530 2531 *pResOut = reserved; 2532 return rc; 2533 } 2534 2535 /* 2536 ** Lock the file with the lock specified by parameter eFileLock - one 2537 ** of the following: 2538 ** 2539 ** (1) SHARED_LOCK 2540 ** (2) RESERVED_LOCK 2541 ** (3) PENDING_LOCK 2542 ** (4) EXCLUSIVE_LOCK 2543 ** 2544 ** Sometimes when requesting one lock state, additional lock states 2545 ** are inserted in between. The locking might fail on one of the later 2546 ** transitions leaving the lock state different from what it started but 2547 ** still short of its goal. The following chart shows the allowed 2548 ** transitions and the inserted intermediate states: 2549 ** 2550 ** UNLOCKED -> SHARED 2551 ** SHARED -> RESERVED 2552 ** SHARED -> (PENDING) -> EXCLUSIVE 2553 ** RESERVED -> (PENDING) -> EXCLUSIVE 2554 ** PENDING -> EXCLUSIVE 2555 ** 2556 ** This routine will only increase a lock. Use the sqlite3OsUnlock() 2557 ** routine to lower a locking level. 2558 */ 2559 static int afpLock(sqlite3_file *id, int eFileLock){ 2560 int rc = SQLITE_OK; 2561 unixFile *pFile = (unixFile*)id; 2562 unixInodeInfo *pInode = pFile->pInode; 2563 afpLockingContext *context = (afpLockingContext *) pFile->lockingContext; 2564 2565 assert( pFile ); 2566 OSTRACE(("LOCK %d %s was %s(%s,%d) pid=%d (afp)\n", pFile->h, 2567 azFileLock(eFileLock), azFileLock(pFile->eFileLock), 2568 azFileLock(pInode->eFileLock), pInode->nShared , getpid())); 2569 2570 /* If there is already a lock of this type or more restrictive on the 2571 ** unixFile, do nothing. Don't use the afp_end_lock: exit path, as 2572 ** unixEnterMutex() hasn't been called yet. 2573 */ 2574 if( pFile->eFileLock>=eFileLock ){ 2575 OSTRACE(("LOCK %d %s ok (already held) (afp)\n", pFile->h, 2576 azFileLock(eFileLock))); 2577 return SQLITE_OK; 2578 } 2579 2580 /* Make sure the locking sequence is correct 2581 ** (1) We never move from unlocked to anything higher than shared lock. 2582 ** (2) SQLite never explicitly requests a pendig lock. 2583 ** (3) A shared lock is always held when a reserve lock is requested. 2584 */ 2585 assert( pFile->eFileLock!=NO_LOCK || eFileLock==SHARED_LOCK ); 2586 assert( eFileLock!=PENDING_LOCK ); 2587 assert( eFileLock!=RESERVED_LOCK || pFile->eFileLock==SHARED_LOCK ); 2588 2589 /* This mutex is needed because pFile->pInode is shared across threads 2590 */ 2591 unixEnterMutex(); 2592 pInode = pFile->pInode; 2593 2594 /* If some thread using this PID has a lock via a different unixFile* 2595 ** handle that precludes the requested lock, return BUSY. 2596 */ 2597 if( (pFile->eFileLock!=pInode->eFileLock && 2598 (pInode->eFileLock>=PENDING_LOCK || eFileLock>SHARED_LOCK)) 2599 ){ 2600 rc = SQLITE_BUSY; 2601 goto afp_end_lock; 2602 } 2603 2604 /* If a SHARED lock is requested, and some thread using this PID already 2605 ** has a SHARED or RESERVED lock, then increment reference counts and 2606 ** return SQLITE_OK. 2607 */ 2608 if( eFileLock==SHARED_LOCK && 2609 (pInode->eFileLock==SHARED_LOCK || pInode->eFileLock==RESERVED_LOCK) ){ 2610 assert( eFileLock==SHARED_LOCK ); 2611 assert( pFile->eFileLock==0 ); 2612 assert( pInode->nShared>0 ); 2613 pFile->eFileLock = SHARED_LOCK; 2614 pInode->nShared++; 2615 pInode->nLock++; 2616 goto afp_end_lock; 2617 } 2618 2619 /* A PENDING lock is needed before acquiring a SHARED lock and before 2620 ** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will 2621 ** be released. 2622 */ 2623 if( eFileLock==SHARED_LOCK 2624 || (eFileLock==EXCLUSIVE_LOCK && pFile->eFileLock<PENDING_LOCK) 2625 ){ 2626 int failed; 2627 failed = afpSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 1); 2628 if (failed) { 2629 rc = failed; 2630 goto afp_end_lock; 2631 } 2632 } 2633 2634 /* If control gets to this point, then actually go ahead and make 2635 ** operating system calls for the specified lock. 2636 */ 2637 if( eFileLock==SHARED_LOCK ){ 2638 int lrc1, lrc2, lrc1Errno; 2639 long lk, mask; 2640 2641 assert( pInode->nShared==0 ); 2642 assert( pInode->eFileLock==0 ); 2643 2644 mask = (sizeof(long)==8) ? LARGEST_INT64 : 0x7fffffff; 2645 /* Now get the read-lock SHARED_LOCK */ 2646 /* note that the quality of the randomness doesn't matter that much */ 2647 lk = random(); 2648 pInode->sharedByte = (lk & mask)%(SHARED_SIZE - 1); 2649 lrc1 = afpSetLock(context->dbPath, pFile, 2650 SHARED_FIRST+pInode->sharedByte, 1, 1); 2651 if( IS_LOCK_ERROR(lrc1) ){ 2652 lrc1Errno = pFile->lastErrno; 2653 } 2654 /* Drop the temporary PENDING lock */ 2655 lrc2 = afpSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 0); 2656 2657 if( IS_LOCK_ERROR(lrc1) ) { 2658 pFile->lastErrno = lrc1Errno; 2659 rc = lrc1; 2660 goto afp_end_lock; 2661 } else if( IS_LOCK_ERROR(lrc2) ){ 2662 rc = lrc2; 2663 goto afp_end_lock; 2664 } else if( lrc1 != SQLITE_OK ) { 2665 rc = lrc1; 2666 } else { 2667 pFile->eFileLock = SHARED_LOCK; 2668 pInode->nLock++; 2669 pInode->nShared = 1; 2670 } 2671 }else if( eFileLock==EXCLUSIVE_LOCK && pInode->nShared>1 ){ 2672 /* We are trying for an exclusive lock but another thread in this 2673 ** same process is still holding a shared lock. */ 2674 rc = SQLITE_BUSY; 2675 }else{ 2676 /* The request was for a RESERVED or EXCLUSIVE lock. It is 2677 ** assumed that there is a SHARED or greater lock on the file 2678 ** already. 2679 */ 2680 int failed = 0; 2681 assert( 0!=pFile->eFileLock ); 2682 if (eFileLock >= RESERVED_LOCK && pFile->eFileLock < RESERVED_LOCK) { 2683 /* Acquire a RESERVED lock */ 2684 failed = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1,1); 2685 if( !failed ){ 2686 context->reserved = 1; 2687 } 2688 } 2689 if (!failed && eFileLock == EXCLUSIVE_LOCK) { 2690 /* Acquire an EXCLUSIVE lock */ 2691 2692 /* Remove the shared lock before trying the range. we'll need to 2693 ** reestablish the shared lock if we can't get the afpUnlock 2694 */ 2695 if( !(failed = afpSetLock(context->dbPath, pFile, SHARED_FIRST + 2696 pInode->sharedByte, 1, 0)) ){ 2697 int failed2 = SQLITE_OK; 2698 /* now attemmpt to get the exclusive lock range */ 2699 failed = afpSetLock(context->dbPath, pFile, SHARED_FIRST, 2700 SHARED_SIZE, 1); 2701 if( failed && (failed2 = afpSetLock(context->dbPath, pFile, 2702 SHARED_FIRST + pInode->sharedByte, 1, 1)) ){ 2703 /* Can't reestablish the shared lock. Sqlite can't deal, this is 2704 ** a critical I/O error 2705 */ 2706 rc = ((failed & SQLITE_IOERR) == SQLITE_IOERR) ? failed2 : 2707 SQLITE_IOERR_LOCK; 2708 goto afp_end_lock; 2709 } 2710 }else{ 2711 rc = failed; 2712 } 2713 } 2714 if( failed ){ 2715 rc = failed; 2716 } 2717 } 2718 2719 if( rc==SQLITE_OK ){ 2720 pFile->eFileLock = eFileLock; 2721 pInode->eFileLock = eFileLock; 2722 }else if( eFileLock==EXCLUSIVE_LOCK ){ 2723 pFile->eFileLock = PENDING_LOCK; 2724 pInode->eFileLock = PENDING_LOCK; 2725 } 2726 2727 afp_end_lock: 2728 unixLeaveMutex(); 2729 OSTRACE(("LOCK %d %s %s (afp)\n", pFile->h, azFileLock(eFileLock), 2730 rc==SQLITE_OK ? "ok" : "failed")); 2731 return rc; 2732 } 2733 2734 /* 2735 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock 2736 ** must be either NO_LOCK or SHARED_LOCK. 2737 ** 2738 ** If the locking level of the file descriptor is already at or below 2739 ** the requested locking level, this routine is a no-op. 2740 */ 2741 static int afpUnlock(sqlite3_file *id, int eFileLock) { 2742 int rc = SQLITE_OK; 2743 unixFile *pFile = (unixFile*)id; 2744 unixInodeInfo *pInode; 2745 afpLockingContext *context = (afpLockingContext *) pFile->lockingContext; 2746 int skipShared = 0; 2747 #ifdef SQLITE_TEST 2748 int h = pFile->h; 2749 #endif 2750 2751 assert( pFile ); 2752 OSTRACE(("UNLOCK %d %d was %d(%d,%d) pid=%d (afp)\n", pFile->h, eFileLock, 2753 pFile->eFileLock, pFile->pInode->eFileLock, pFile->pInode->nShared, 2754 getpid())); 2755 2756 assert( eFileLock<=SHARED_LOCK ); 2757 if( pFile->eFileLock<=eFileLock ){ 2758 return SQLITE_OK; 2759 } 2760 unixEnterMutex(); 2761 pInode = pFile->pInode; 2762 assert( pInode->nShared!=0 ); 2763 if( pFile->eFileLock>SHARED_LOCK ){ 2764 assert( pInode->eFileLock==pFile->eFileLock ); 2765 SimulateIOErrorBenign(1); 2766 SimulateIOError( h=(-1) ) 2767 SimulateIOErrorBenign(0); 2768 2769 #ifndef NDEBUG 2770 /* When reducing a lock such that other processes can start 2771 ** reading the database file again, make sure that the 2772 ** transaction counter was updated if any part of the database 2773 ** file changed. If the transaction counter is not updated, 2774 ** other connections to the same file might not realize that 2775 ** the file has changed and hence might not know to flush their 2776 ** cache. The use of a stale cache can lead to database corruption. 2777 */ 2778 assert( pFile->inNormalWrite==0 2779 || pFile->dbUpdate==0 2780 || pFile->transCntrChng==1 ); 2781 pFile->inNormalWrite = 0; 2782 #endif 2783 2784 if( pFile->eFileLock==EXCLUSIVE_LOCK ){ 2785 rc = afpSetLock(context->dbPath, pFile, SHARED_FIRST, SHARED_SIZE, 0); 2786 if( rc==SQLITE_OK && (eFileLock==SHARED_LOCK || pInode->nShared>1) ){ 2787 /* only re-establish the shared lock if necessary */ 2788 int sharedLockByte = SHARED_FIRST+pInode->sharedByte; 2789 rc = afpSetLock(context->dbPath, pFile, sharedLockByte, 1, 1); 2790 } else { 2791 skipShared = 1; 2792 } 2793 } 2794 if( rc==SQLITE_OK && pFile->eFileLock>=PENDING_LOCK ){ 2795 rc = afpSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 0); 2796 } 2797 if( rc==SQLITE_OK && pFile->eFileLock>=RESERVED_LOCK && context->reserved ){ 2798 rc = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1, 0); 2799 if( !rc ){ 2800 context->reserved = 0; 2801 } 2802 } 2803 if( rc==SQLITE_OK && (eFileLock==SHARED_LOCK || pInode->nShared>1)){ 2804 pInode->eFileLock = SHARED_LOCK; 2805 } 2806 } 2807 if( rc==SQLITE_OK && eFileLock==NO_LOCK ){ 2808 2809 /* Decrement the shared lock counter. Release the lock using an 2810 ** OS call only when all threads in this same process have released 2811 ** the lock. 2812 */ 2813 unsigned long long sharedLockByte = SHARED_FIRST+pInode->sharedByte; 2814 pInode->nShared--; 2815 if( pInode->nShared==0 ){ 2816 SimulateIOErrorBenign(1); 2817 SimulateIOError( h=(-1) ) 2818 SimulateIOErrorBenign(0); 2819 if( !skipShared ){ 2820 rc = afpSetLock(context->dbPath, pFile, sharedLockByte, 1, 0); 2821 } 2822 if( !rc ){ 2823 pInode->eFileLock = NO_LOCK; 2824 pFile->eFileLock = NO_LOCK; 2825 } 2826 } 2827 if( rc==SQLITE_OK ){ 2828 pInode->nLock--; 2829 assert( pInode->nLock>=0 ); 2830 if( pInode->nLock==0 ){ 2831 closePendingFds(pFile); 2832 } 2833 } 2834 } 2835 2836 unixLeaveMutex(); 2837 if( rc==SQLITE_OK ) pFile->eFileLock = eFileLock; 2838 return rc; 2839 } 2840 2841 /* 2842 ** Close a file & cleanup AFP specific locking context 2843 */ 2844 static int afpClose(sqlite3_file *id) { 2845 int rc = SQLITE_OK; 2846 if( id ){ 2847 unixFile *pFile = (unixFile*)id; 2848 afpUnlock(id, NO_LOCK); 2849 unixEnterMutex(); 2850 if( pFile->pInode && pFile->pInode->nLock ){ 2851 /* If there are outstanding locks, do not actually close the file just 2852 ** yet because that would clear those locks. Instead, add the file 2853 ** descriptor to pInode->aPending. It will be automatically closed when 2854 ** the last lock is cleared. 2855 */ 2856 setPendingFd(pFile); 2857 } 2858 releaseInodeInfo(pFile); 2859 sqlite3_free(pFile->lockingContext); 2860 rc = closeUnixFile(id); 2861 unixLeaveMutex(); 2862 } 2863 return rc; 2864 } 2865 2866 #endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ 2867 /* 2868 ** The code above is the AFP lock implementation. The code is specific 2869 ** to MacOSX and does not work on other unix platforms. No alternative 2870 ** is available. If you don't compile for a mac, then the "unix-afp" 2871 ** VFS is not available. 2872 ** 2873 ********************* End of the AFP lock implementation ********************** 2874 ******************************************************************************/ 2875 2876 /****************************************************************************** 2877 *************************** Begin NFS Locking ********************************/ 2878 2879 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 2880 /* 2881 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock 2882 ** must be either NO_LOCK or SHARED_LOCK. 2883 ** 2884 ** If the locking level of the file descriptor is already at or below 2885 ** the requested locking level, this routine is a no-op. 2886 */ 2887 static int nfsUnlock(sqlite3_file *id, int eFileLock){ 2888 return posixUnlock(id, eFileLock, 1); 2889 } 2890 2891 #endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ 2892 /* 2893 ** The code above is the NFS lock implementation. The code is specific 2894 ** to MacOSX and does not work on other unix platforms. No alternative 2895 ** is available. 2896 ** 2897 ********************* End of the NFS lock implementation ********************** 2898 ******************************************************************************/ 2899 2900 /****************************************************************************** 2901 **************** Non-locking sqlite3_file methods ***************************** 2902 ** 2903 ** The next division contains implementations for all methods of the 2904 ** sqlite3_file object other than the locking methods. The locking 2905 ** methods were defined in divisions above (one locking method per 2906 ** division). Those methods that are common to all locking modes 2907 ** are gather together into this division. 2908 */ 2909 2910 /* 2911 ** Seek to the offset passed as the second argument, then read cnt 2912 ** bytes into pBuf. Return the number of bytes actually read. 2913 ** 2914 ** NB: If you define USE_PREAD or USE_PREAD64, then it might also 2915 ** be necessary to define _XOPEN_SOURCE to be 500. This varies from 2916 ** one system to another. Since SQLite does not define USE_PREAD 2917 ** any any form by default, we will not attempt to define _XOPEN_SOURCE. 2918 ** See tickets #2741 and #2681. 2919 ** 2920 ** To avoid stomping the errno value on a failed read the lastErrno value 2921 ** is set before returning. 2922 */ 2923 static int seekAndRead(unixFile *id, sqlite3_int64 offset, void *pBuf, int cnt){ 2924 int got; 2925 #if (!defined(USE_PREAD) && !defined(USE_PREAD64)) 2926 i64 newOffset; 2927 #endif 2928 TIMER_START; 2929 #if defined(USE_PREAD) 2930 do{ got = osPread(id->h, pBuf, cnt, offset); }while( got<0 && errno==EINTR ); 2931 SimulateIOError( got = -1 ); 2932 #elif defined(USE_PREAD64) 2933 do{ got = osPread64(id->h, pBuf, cnt, offset); }while( got<0 && errno==EINTR); 2934 SimulateIOError( got = -1 ); 2935 #else 2936 newOffset = lseek(id->h, offset, SEEK_SET); 2937 SimulateIOError( newOffset-- ); 2938 if( newOffset!=offset ){ 2939 if( newOffset == -1 ){ 2940 ((unixFile*)id)->lastErrno = errno; 2941 }else{ 2942 ((unixFile*)id)->lastErrno = 0; 2943 } 2944 return -1; 2945 } 2946 do{ got = osRead(id->h, pBuf, cnt); }while( got<0 && errno==EINTR ); 2947 #endif 2948 TIMER_END; 2949 if( got<0 ){ 2950 ((unixFile*)id)->lastErrno = errno; 2951 } 2952 OSTRACE(("READ %-3d %5d %7lld %llu\n", id->h, got, offset, TIMER_ELAPSED)); 2953 return got; 2954 } 2955 2956 /* 2957 ** Read data from a file into a buffer. Return SQLITE_OK if all 2958 ** bytes were read successfully and SQLITE_IOERR if anything goes 2959 ** wrong. 2960 */ 2961 static int unixRead( 2962 sqlite3_file *id, 2963 void *pBuf, 2964 int amt, 2965 sqlite3_int64 offset 2966 ){ 2967 unixFile *pFile = (unixFile *)id; 2968 int got; 2969 assert( id ); 2970 2971 /* If this is a database file (not a journal, master-journal or temp 2972 ** file), the bytes in the locking range should never be read or written. */ 2973 #if 0 2974 assert( pFile->pUnused==0 2975 || offset>=PENDING_BYTE+512 2976 || offset+amt<=PENDING_BYTE 2977 ); 2978 #endif 2979 2980 got = seekAndRead(pFile, offset, pBuf, amt); 2981 if( got==amt ){ 2982 return SQLITE_OK; 2983 }else if( got<0 ){ 2984 /* lastErrno set by seekAndRead */ 2985 return SQLITE_IOERR_READ; 2986 }else{ 2987 pFile->lastErrno = 0; /* not a system error */ 2988 /* Unread parts of the buffer must be zero-filled */ 2989 memset(&((char*)pBuf)[got], 0, amt-got); 2990 return SQLITE_IOERR_SHORT_READ; 2991 } 2992 } 2993 2994 /* 2995 ** Seek to the offset in id->offset then read cnt bytes into pBuf. 2996 ** Return the number of bytes actually read. Update the offset. 2997 ** 2998 ** To avoid stomping the errno value on a failed write the lastErrno value 2999 ** is set before returning. 3000 */ 3001 static int seekAndWrite(unixFile *id, i64 offset, const void *pBuf, int cnt){ 3002 int got; 3003 #if (!defined(USE_PREAD) && !defined(USE_PREAD64)) 3004 i64 newOffset; 3005 #endif 3006 TIMER_START; 3007 #if defined(USE_PREAD) 3008 do{ got = osPwrite(id->h, pBuf, cnt, offset); }while( got<0 && errno==EINTR ); 3009 #elif defined(USE_PREAD64) 3010 do{ got = osPwrite64(id->h, pBuf, cnt, offset);}while( got<0 && errno==EINTR); 3011 #else 3012 newOffset = lseek(id->h, offset, SEEK_SET); 3013 SimulateIOError( newOffset-- ); 3014 if( newOffset!=offset ){ 3015 if( newOffset == -1 ){ 3016 ((unixFile*)id)->lastErrno = errno; 3017 }else{ 3018 ((unixFile*)id)->lastErrno = 0; 3019 } 3020 return -1; 3021 } 3022 do{ got = osWrite(id->h, pBuf, cnt); }while( got<0 && errno==EINTR ); 3023 #endif 3024 TIMER_END; 3025 if( got<0 ){ 3026 ((unixFile*)id)->lastErrno = errno; 3027 } 3028 3029 OSTRACE(("WRITE %-3d %5d %7lld %llu\n", id->h, got, offset, TIMER_ELAPSED)); 3030 return got; 3031 } 3032 3033 3034 /* 3035 ** Write data from a buffer into a file. Return SQLITE_OK on success 3036 ** or some other error code on failure. 3037 */ 3038 static int unixWrite( 3039 sqlite3_file *id, 3040 const void *pBuf, 3041 int amt, 3042 sqlite3_int64 offset 3043 ){ 3044 unixFile *pFile = (unixFile*)id; 3045 int wrote = 0; 3046 assert( id ); 3047 assert( amt>0 ); 3048 3049 /* If this is a database file (not a journal, master-journal or temp 3050 ** file), the bytes in the locking range should never be read or written. */ 3051 #if 0 3052 assert( pFile->pUnused==0 3053 || offset>=PENDING_BYTE+512 3054 || offset+amt<=PENDING_BYTE 3055 ); 3056 #endif 3057 3058 #ifndef NDEBUG 3059 /* If we are doing a normal write to a database file (as opposed to 3060 ** doing a hot-journal rollback or a write to some file other than a 3061 ** normal database file) then record the fact that the database 3062 ** has changed. If the transaction counter is modified, record that 3063 ** fact too. 3064 */ 3065 if( pFile->inNormalWrite ){ 3066 pFile->dbUpdate = 1; /* The database has been modified */ 3067 if( offset<=24 && offset+amt>=27 ){ 3068 int rc; 3069 char oldCntr[4]; 3070 SimulateIOErrorBenign(1); 3071 rc = seekAndRead(pFile, 24, oldCntr, 4); 3072 SimulateIOErrorBenign(0); 3073 if( rc!=4 || memcmp(oldCntr, &((char*)pBuf)[24-offset], 4)!=0 ){ 3074 pFile->transCntrChng = 1; /* The transaction counter has changed */ 3075 } 3076 } 3077 } 3078 #endif 3079 3080 while( amt>0 && (wrote = seekAndWrite(pFile, offset, pBuf, amt))>0 ){ 3081 amt -= wrote; 3082 offset += wrote; 3083 pBuf = &((char*)pBuf)[wrote]; 3084 } 3085 SimulateIOError(( wrote=(-1), amt=1 )); 3086 SimulateDiskfullError(( wrote=0, amt=1 )); 3087 3088 if( amt>0 ){ 3089 if( wrote<0 ){ 3090 /* lastErrno set by seekAndWrite */ 3091 return SQLITE_IOERR_WRITE; 3092 }else{ 3093 pFile->lastErrno = 0; /* not a system error */ 3094 return SQLITE_FULL; 3095 } 3096 } 3097 3098 return SQLITE_OK; 3099 } 3100 3101 #ifdef SQLITE_TEST 3102 /* 3103 ** Count the number of fullsyncs and normal syncs. This is used to test 3104 ** that syncs and fullsyncs are occurring at the right times. 3105 */ 3106 int sqlite3_sync_count = 0; 3107 int sqlite3_fullsync_count = 0; 3108 #endif 3109 3110 /* 3111 ** We do not trust systems to provide a working fdatasync(). Some do. 3112 ** Others do no. To be safe, we will stick with the (slower) fsync(). 3113 ** If you know that your system does support fdatasync() correctly, 3114 ** then simply compile with -Dfdatasync=fdatasync 3115 */ 3116 #if !defined(fdatasync) && !defined(__linux__) 3117 # define fdatasync fsync 3118 #endif 3119 3120 /* 3121 ** Define HAVE_FULLFSYNC to 0 or 1 depending on whether or not 3122 ** the F_FULLFSYNC macro is defined. F_FULLFSYNC is currently 3123 ** only available on Mac OS X. But that could change. 3124 */ 3125 #ifdef F_FULLFSYNC 3126 # define HAVE_FULLFSYNC 1 3127 #else 3128 # define HAVE_FULLFSYNC 0 3129 #endif 3130 3131 3132 /* 3133 ** The fsync() system call does not work as advertised on many 3134 ** unix systems. The following procedure is an attempt to make 3135 ** it work better. 3136 ** 3137 ** The SQLITE_NO_SYNC macro disables all fsync()s. This is useful 3138 ** for testing when we want to run through the test suite quickly. 3139 ** You are strongly advised *not* to deploy with SQLITE_NO_SYNC 3140 ** enabled, however, since with SQLITE_NO_SYNC enabled, an OS crash 3141 ** or power failure will likely corrupt the database file. 3142 ** 3143 ** SQLite sets the dataOnly flag if the size of the file is unchanged. 3144 ** The idea behind dataOnly is that it should only write the file content 3145 ** to disk, not the inode. We only set dataOnly if the file size is 3146 ** unchanged since the file size is part of the inode. However, 3147 ** Ted Ts'o tells us that fdatasync() will also write the inode if the 3148 ** file size has changed. The only real difference between fdatasync() 3149 ** and fsync(), Ted tells us, is that fdatasync() will not flush the 3150 ** inode if the mtime or owner or other inode attributes have changed. 3151 ** We only care about the file size, not the other file attributes, so 3152 ** as far as SQLite is concerned, an fdatasync() is always adequate. 3153 ** So, we always use fdatasync() if it is available, regardless of 3154 ** the value of the dataOnly flag. 3155 */ 3156 static int full_fsync(int fd, int fullSync, int dataOnly){ 3157 int rc; 3158 3159 /* The following "ifdef/elif/else/" block has the same structure as 3160 ** the one below. It is replicated here solely to avoid cluttering 3161 ** up the real code with the UNUSED_PARAMETER() macros. 3162 */ 3163 #ifdef SQLITE_NO_SYNC 3164 UNUSED_PARAMETER(fd); 3165 UNUSED_PARAMETER(fullSync); 3166 UNUSED_PARAMETER(dataOnly); 3167 #elif HAVE_FULLFSYNC 3168 UNUSED_PARAMETER(dataOnly); 3169 #else 3170 UNUSED_PARAMETER(fullSync); 3171 UNUSED_PARAMETER(dataOnly); 3172 #endif 3173 3174 /* Record the number of times that we do a normal fsync() and 3175 ** FULLSYNC. This is used during testing to verify that this procedure 3176 ** gets called with the correct arguments. 3177 */ 3178 #ifdef SQLITE_TEST 3179 if( fullSync ) sqlite3_fullsync_count++; 3180 sqlite3_sync_count++; 3181 #endif 3182 3183 /* If we compiled with the SQLITE_NO_SYNC flag, then syncing is a 3184 ** no-op 3185 */ 3186 #ifdef SQLITE_NO_SYNC 3187 rc = SQLITE_OK; 3188 #elif HAVE_FULLFSYNC 3189 if( fullSync ){ 3190 rc = osFcntl(fd, F_FULLFSYNC, 0); 3191 }else{ 3192 rc = 1; 3193 } 3194 /* If the FULLFSYNC failed, fall back to attempting an fsync(). 3195 ** It shouldn't be possible for fullfsync to fail on the local 3196 ** file system (on OSX), so failure indicates that FULLFSYNC 3197 ** isn't supported for this file system. So, attempt an fsync 3198 ** and (for now) ignore the overhead of a superfluous fcntl call. 3199 ** It'd be better to detect fullfsync support once and avoid 3200 ** the fcntl call every time sync is called. 3201 */ 3202 if( rc ) rc = fsync(fd); 3203 3204 #elif defined(__APPLE__) 3205 /* fdatasync() on HFS+ doesn't yet flush the file size if it changed correctly 3206 ** so currently we default to the macro that redefines fdatasync to fsync 3207 */ 3208 rc = fsync(fd); 3209 #else 3210 rc = fdatasync(fd); 3211 #if OS_VXWORKS 3212 if( rc==-1 && errno==ENOTSUP ){ 3213 rc = fsync(fd); 3214 } 3215 #endif /* OS_VXWORKS */ 3216 #endif /* ifdef SQLITE_NO_SYNC elif HAVE_FULLFSYNC */ 3217 3218 if( OS_VXWORKS && rc!= -1 ){ 3219 rc = 0; 3220 } 3221 return rc; 3222 } 3223 3224 /* 3225 ** Make sure all writes to a particular file are committed to disk. 3226 ** 3227 ** If dataOnly==0 then both the file itself and its metadata (file 3228 ** size, access time, etc) are synced. If dataOnly!=0 then only the 3229 ** file data is synced. 3230 ** 3231 ** Under Unix, also make sure that the directory entry for the file 3232 ** has been created by fsync-ing the directory that contains the file. 3233 ** If we do not do this and we encounter a power failure, the directory 3234 ** entry for the journal might not exist after we reboot. The next 3235 ** SQLite to access the file will not know that the journal exists (because 3236 ** the directory entry for the journal was never created) and the transaction 3237 ** will not roll back - possibly leading to database corruption. 3238 */ 3239 static int unixSync(sqlite3_file *id, int flags){ 3240 int rc; 3241 unixFile *pFile = (unixFile*)id; 3242 3243 int isDataOnly = (flags&SQLITE_SYNC_DATAONLY); 3244 int isFullsync = (flags&0x0F)==SQLITE_SYNC_FULL; 3245 3246 /* Check that one of SQLITE_SYNC_NORMAL or FULL was passed */ 3247 assert((flags&0x0F)==SQLITE_SYNC_NORMAL 3248 || (flags&0x0F)==SQLITE_SYNC_FULL 3249 ); 3250 3251 /* Unix cannot, but some systems may return SQLITE_FULL from here. This 3252 ** line is to test that doing so does not cause any problems. 3253 */ 3254 SimulateDiskfullError( return SQLITE_FULL ); 3255 3256 assert( pFile ); 3257 OSTRACE(("SYNC %-3d\n", pFile->h)); 3258 rc = full_fsync(pFile->h, isFullsync, isDataOnly); 3259 SimulateIOError( rc=1 ); 3260 if( rc ){ 3261 pFile->lastErrno = errno; 3262 return unixLogError(SQLITE_IOERR_FSYNC, "full_fsync", pFile->zPath); 3263 } 3264 if( pFile->dirfd>=0 ){ 3265 OSTRACE(("DIRSYNC %-3d (have_fullfsync=%d fullsync=%d)\n", pFile->dirfd, 3266 HAVE_FULLFSYNC, isFullsync)); 3267 #ifndef SQLITE_DISABLE_DIRSYNC 3268 /* The directory sync is only attempted if full_fsync is 3269 ** turned off or unavailable. If a full_fsync occurred above, 3270 ** then the directory sync is superfluous. 3271 */ 3272 if( (!HAVE_FULLFSYNC || !isFullsync) && full_fsync(pFile->dirfd,0,0) ){ 3273 /* 3274 ** We have received multiple reports of fsync() returning 3275 ** errors when applied to directories on certain file systems. 3276 ** A failed directory sync is not a big deal. So it seems 3277 ** better to ignore the error. Ticket #1657 3278 */ 3279 /* pFile->lastErrno = errno; */ 3280 /* return SQLITE_IOERR; */ 3281 } 3282 #endif 3283 /* Only need to sync once, so close the directory when we are done */ 3284 robust_close(pFile, pFile->dirfd, __LINE__); 3285 pFile->dirfd = -1; 3286 } 3287 return rc; 3288 } 3289 3290 /* 3291 ** Truncate an open file to a specified size 3292 */ 3293 static int unixTruncate(sqlite3_file *id, i64 nByte){ 3294 unixFile *pFile = (unixFile *)id; 3295 int rc; 3296 assert( pFile ); 3297 SimulateIOError( return SQLITE_IOERR_TRUNCATE ); 3298 3299 /* If the user has configured a chunk-size for this file, truncate the 3300 ** file so that it consists of an integer number of chunks (i.e. the 3301 ** actual file size after the operation may be larger than the requested 3302 ** size). 3303 */ 3304 if( pFile->szChunk ){ 3305 nByte = ((nByte + pFile->szChunk - 1)/pFile->szChunk) * pFile->szChunk; 3306 } 3307 3308 rc = robust_ftruncate(pFile->h, (off_t)nByte); 3309 if( rc ){ 3310 pFile->lastErrno = errno; 3311 return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate", pFile->zPath); 3312 }else{ 3313 #ifndef NDEBUG 3314 /* If we are doing a normal write to a database file (as opposed to 3315 ** doing a hot-journal rollback or a write to some file other than a 3316 ** normal database file) and we truncate the file to zero length, 3317 ** that effectively updates the change counter. This might happen 3318 ** when restoring a database using the backup API from a zero-length 3319 ** source. 3320 */ 3321 if( pFile->inNormalWrite && nByte==0 ){ 3322 pFile->transCntrChng = 1; 3323 } 3324 #endif 3325 3326 return SQLITE_OK; 3327 } 3328 } 3329 3330 /* 3331 ** Determine the current size of a file in bytes 3332 */ 3333 static int unixFileSize(sqlite3_file *id, i64 *pSize){ 3334 int rc; 3335 struct stat buf; 3336 assert( id ); 3337 rc = osFstat(((unixFile*)id)->h, &buf); 3338 SimulateIOError( rc=1 ); 3339 if( rc!=0 ){ 3340 ((unixFile*)id)->lastErrno = errno; 3341 return SQLITE_IOERR_FSTAT; 3342 } 3343 *pSize = buf.st_size; 3344 3345 /* When opening a zero-size database, the findInodeInfo() procedure 3346 ** writes a single byte into that file in order to work around a bug 3347 ** in the OS-X msdos filesystem. In order to avoid problems with upper 3348 ** layers, we need to report this file size as zero even though it is 3349 ** really 1. Ticket #3260. 3350 */ 3351 if( *pSize==1 ) *pSize = 0; 3352 3353 3354 return SQLITE_OK; 3355 } 3356 3357 #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) 3358 /* 3359 ** Handler for proxy-locking file-control verbs. Defined below in the 3360 ** proxying locking division. 3361 */ 3362 static int proxyFileControl(sqlite3_file*,int,void*); 3363 #endif 3364 3365 /* 3366 ** This function is called to handle the SQLITE_FCNTL_SIZE_HINT 3367 ** file-control operation. 3368 ** 3369 ** If the user has configured a chunk-size for this file, it could be 3370 ** that the file needs to be extended at this point. Otherwise, the 3371 ** SQLITE_FCNTL_SIZE_HINT operation is a no-op for Unix. 3372 */ 3373 static int fcntlSizeHint(unixFile *pFile, i64 nByte){ 3374 if( pFile->szChunk ){ 3375 i64 nSize; /* Required file size */ 3376 struct stat buf; /* Used to hold return values of fstat() */ 3377 3378 if( osFstat(pFile->h, &buf) ) return SQLITE_IOERR_FSTAT; 3379 3380 nSize = ((nByte+pFile->szChunk-1) / pFile->szChunk) * pFile->szChunk; 3381 if( nSize>(i64)buf.st_size ){ 3382 3383 #if defined(HAVE_POSIX_FALLOCATE) && HAVE_POSIX_FALLOCATE 3384 /* The code below is handling the return value of osFallocate() 3385 ** correctly. posix_fallocate() is defined to "returns zero on success, 3386 ** or an error number on failure". See the manpage for details. */ 3387 int err; 3388 do{ 3389 err = osFallocate(pFile->h, buf.st_size, nSize-buf.st_size); 3390 }while( err==EINTR ); 3391 if( err ) return SQLITE_IOERR_WRITE; 3392 #else 3393 /* If the OS does not have posix_fallocate(), fake it. First use 3394 ** ftruncate() to set the file size, then write a single byte to 3395 ** the last byte in each block within the extended region. This 3396 ** is the same technique used by glibc to implement posix_fallocate() 3397 ** on systems that do not have a real fallocate() system call. 3398 */ 3399 int nBlk = buf.st_blksize; /* File-system block size */ 3400 i64 iWrite; /* Next offset to write to */ 3401 int nWrite; /* Return value from seekAndWrite() */ 3402 3403 if( robust_ftruncate(pFile->h, nSize) ){ 3404 pFile->lastErrno = errno; 3405 return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate", pFile->zPath); 3406 } 3407 iWrite = ((buf.st_size + 2*nBlk - 1)/nBlk)*nBlk-1; 3408 do { 3409 nWrite = seekAndWrite(pFile, iWrite, "", 1); 3410 iWrite += nBlk; 3411 } while( nWrite==1 && iWrite<nSize ); 3412 if( nWrite!=1 ) return SQLITE_IOERR_WRITE; 3413 #endif 3414 } 3415 } 3416 3417 return SQLITE_OK; 3418 } 3419 3420 /* 3421 ** Information and control of an open file handle. 3422 */ 3423 static int unixFileControl(sqlite3_file *id, int op, void *pArg){ 3424 switch( op ){ 3425 case SQLITE_FCNTL_LOCKSTATE: { 3426 *(int*)pArg = ((unixFile*)id)->eFileLock; 3427 return SQLITE_OK; 3428 } 3429 case SQLITE_LAST_ERRNO: { 3430 *(int*)pArg = ((unixFile*)id)->lastErrno; 3431 return SQLITE_OK; 3432 } 3433 case SQLITE_FCNTL_CHUNK_SIZE: { 3434 ((unixFile*)id)->szChunk = *(int *)pArg; 3435 return SQLITE_OK; 3436 } 3437 case SQLITE_FCNTL_SIZE_HINT: { 3438 return fcntlSizeHint((unixFile *)id, *(i64 *)pArg); 3439 } 3440 #ifndef NDEBUG 3441 /* The pager calls this method to signal that it has done 3442 ** a rollback and that the database is therefore unchanged and 3443 ** it hence it is OK for the transaction change counter to be 3444 ** unchanged. 3445 */ 3446 case SQLITE_FCNTL_DB_UNCHANGED: { 3447 ((unixFile*)id)->dbUpdate = 0; 3448 return SQLITE_OK; 3449 } 3450 #endif 3451 #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) 3452 case SQLITE_SET_LOCKPROXYFILE: 3453 case SQLITE_GET_LOCKPROXYFILE: { 3454 return proxyFileControl(id,op,pArg); 3455 } 3456 #endif /* SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) */ 3457 case SQLITE_FCNTL_SYNC_OMITTED: { 3458 return SQLITE_OK; /* A no-op */ 3459 } 3460 } 3461 return SQLITE_NOTFOUND; 3462 } 3463 3464 /* 3465 ** Return the sector size in bytes of the underlying block device for 3466 ** the specified file. This is almost always 512 bytes, but may be 3467 ** larger for some devices. 3468 ** 3469 ** SQLite code assumes this function cannot fail. It also assumes that 3470 ** if two files are created in the same file-system directory (i.e. 3471 ** a database and its journal file) that the sector size will be the 3472 ** same for both. 3473 */ 3474 static int unixSectorSize(sqlite3_file *NotUsed){ 3475 UNUSED_PARAMETER(NotUsed); 3476 return SQLITE_DEFAULT_SECTOR_SIZE; 3477 } 3478 3479 /* 3480 ** Return the device characteristics for the file. This is always 0 for unix. 3481 */ 3482 static int unixDeviceCharacteristics(sqlite3_file *NotUsed){ 3483 UNUSED_PARAMETER(NotUsed); 3484 return 0; 3485 } 3486 3487 #ifndef SQLITE_OMIT_WAL 3488 3489 3490 /* 3491 ** Object used to represent an shared memory buffer. 3492 ** 3493 ** When multiple threads all reference the same wal-index, each thread 3494 ** has its own unixShm object, but they all point to a single instance 3495 ** of this unixShmNode object. In other words, each wal-index is opened 3496 ** only once per process. 3497 ** 3498 ** Each unixShmNode object is connected to a single unixInodeInfo object. 3499 ** We could coalesce this object into unixInodeInfo, but that would mean 3500 ** every open file that does not use shared memory (in other words, most 3501 ** open files) would have to carry around this extra information. So 3502 ** the unixInodeInfo object contains a pointer to this unixShmNode object 3503 ** and the unixShmNode object is created only when needed. 3504 ** 3505 ** unixMutexHeld() must be true when creating or destroying 3506 ** this object or while reading or writing the following fields: 3507 ** 3508 ** nRef 3509 ** 3510 ** The following fields are read-only after the object is created: 3511 ** 3512 ** fid 3513 ** zFilename 3514 ** 3515 ** Either unixShmNode.mutex must be held or unixShmNode.nRef==0 and 3516 ** unixMutexHeld() is true when reading or writing any other field 3517 ** in this structure. 3518 */ 3519 struct unixShmNode { 3520 unixInodeInfo *pInode; /* unixInodeInfo that owns this SHM node */ 3521 sqlite3_mutex *mutex; /* Mutex to access this object */ 3522 char *zFilename; /* Name of the mmapped file */ 3523 int h; /* Open file descriptor */ 3524 int szRegion; /* Size of shared-memory regions */ 3525 int nRegion; /* Size of array apRegion */ 3526 char **apRegion; /* Array of mapped shared-memory regions */ 3527 int nRef; /* Number of unixShm objects pointing to this */ 3528 unixShm *pFirst; /* All unixShm objects pointing to this */ 3529 #ifdef SQLITE_DEBUG 3530 u8 exclMask; /* Mask of exclusive locks held */ 3531 u8 sharedMask; /* Mask of shared locks held */ 3532 u8 nextShmId; /* Next available unixShm.id value */ 3533 #endif 3534 }; 3535 3536 /* 3537 ** Structure used internally by this VFS to record the state of an 3538 ** open shared memory connection. 3539 ** 3540 ** The following fields are initialized when this object is created and 3541 ** are read-only thereafter: 3542 ** 3543 ** unixShm.pFile 3544 ** unixShm.id 3545 ** 3546 ** All other fields are read/write. The unixShm.pFile->mutex must be held 3547 ** while accessing any read/write fields. 3548 */ 3549 struct unixShm { 3550 unixShmNode *pShmNode; /* The underlying unixShmNode object */ 3551 unixShm *pNext; /* Next unixShm with the same unixShmNode */ 3552 u8 hasMutex; /* True if holding the unixShmNode mutex */ 3553 u16 sharedMask; /* Mask of shared locks held */ 3554 u16 exclMask; /* Mask of exclusive locks held */ 3555 #ifdef SQLITE_DEBUG 3556 u8 id; /* Id of this connection within its unixShmNode */ 3557 #endif 3558 }; 3559 3560 /* 3561 ** Constants used for locking 3562 */ 3563 #define UNIX_SHM_BASE ((22+SQLITE_SHM_NLOCK)*4) /* first lock byte */ 3564 #define UNIX_SHM_DMS (UNIX_SHM_BASE+SQLITE_SHM_NLOCK) /* deadman switch */ 3565 3566 /* 3567 ** Apply posix advisory locks for all bytes from ofst through ofst+n-1. 3568 ** 3569 ** Locks block if the mask is exactly UNIX_SHM_C and are non-blocking 3570 ** otherwise. 3571 */ 3572 static int unixShmSystemLock( 3573 unixShmNode *pShmNode, /* Apply locks to this open shared-memory segment */ 3574 int lockType, /* F_UNLCK, F_RDLCK, or F_WRLCK */ 3575 int ofst, /* First byte of the locking range */ 3576 int n /* Number of bytes to lock */ 3577 ){ 3578 struct flock f; /* The posix advisory locking structure */ 3579 int rc = SQLITE_OK; /* Result code form fcntl() */ 3580 3581 /* Access to the unixShmNode object is serialized by the caller */ 3582 assert( sqlite3_mutex_held(pShmNode->mutex) || pShmNode->nRef==0 ); 3583 3584 /* Shared locks never span more than one byte */ 3585 assert( n==1 || lockType!=F_RDLCK ); 3586 3587 /* Locks are within range */ 3588 assert( n>=1 && n<SQLITE_SHM_NLOCK ); 3589 3590 if( pShmNode->h>=0 ){ 3591 /* Initialize the locking parameters */ 3592 memset(&f, 0, sizeof(f)); 3593 f.l_type = lockType; 3594 f.l_whence = SEEK_SET; 3595 f.l_start = ofst; 3596 f.l_len = n; 3597 3598 rc = osFcntl(pShmNode->h, F_SETLK, &f); 3599 rc = (rc!=(-1)) ? SQLITE_OK : SQLITE_BUSY; 3600 } 3601 3602 /* Update the global lock state and do debug tracing */ 3603 #ifdef SQLITE_DEBUG 3604 { u16 mask; 3605 OSTRACE(("SHM-LOCK ")); 3606 mask = (1<<(ofst+n)) - (1<<ofst); 3607 if( rc==SQLITE_OK ){ 3608 if( lockType==F_UNLCK ){ 3609 OSTRACE(("unlock %d ok", ofst)); 3610 pShmNode->exclMask &= ~mask; 3611 pShmNode->sharedMask &= ~mask; 3612 }else if( lockType==F_RDLCK ){ 3613 OSTRACE(("read-lock %d ok", ofst)); 3614 pShmNode->exclMask &= ~mask; 3615 pShmNode->sharedMask |= mask; 3616 }else{ 3617 assert( lockType==F_WRLCK ); 3618 OSTRACE(("write-lock %d ok", ofst)); 3619 pShmNode->exclMask |= mask; 3620 pShmNode->sharedMask &= ~mask; 3621 } 3622 }else{ 3623 if( lockType==F_UNLCK ){ 3624 OSTRACE(("unlock %d failed", ofst)); 3625 }else if( lockType==F_RDLCK ){ 3626 OSTRACE(("read-lock failed")); 3627 }else{ 3628 assert( lockType==F_WRLCK ); 3629 OSTRACE(("write-lock %d failed", ofst)); 3630 } 3631 } 3632 OSTRACE((" - afterwards %03x,%03x\n", 3633 pShmNode->sharedMask, pShmNode->exclMask)); 3634 } 3635 #endif 3636 3637 return rc; 3638 } 3639 3640 3641 /* 3642 ** Purge the unixShmNodeList list of all entries with unixShmNode.nRef==0. 3643 ** 3644 ** This is not a VFS shared-memory method; it is a utility function called 3645 ** by VFS shared-memory methods. 3646 */ 3647 static void unixShmPurge(unixFile *pFd){ 3648 unixShmNode *p = pFd->pInode->pShmNode; 3649 assert( unixMutexHeld() ); 3650 if( p && p->nRef==0 ){ 3651 int i; 3652 assert( p->pInode==pFd->pInode ); 3653 if( p->mutex ) sqlite3_mutex_free(p->mutex); 3654 for(i=0; i<p->nRegion; i++){ 3655 if( p->h>=0 ){ 3656 munmap(p->apRegion[i], p->szRegion); 3657 }else{ 3658 sqlite3_free(p->apRegion[i]); 3659 } 3660 } 3661 sqlite3_free(p->apRegion); 3662 if( p->h>=0 ){ 3663 robust_close(pFd, p->h, __LINE__); 3664 p->h = -1; 3665 } 3666 p->pInode->pShmNode = 0; 3667 sqlite3_free(p); 3668 } 3669 } 3670 3671 /* 3672 ** Open a shared-memory area associated with open database file pDbFd. 3673 ** This particular implementation uses mmapped files. 3674 ** 3675 ** The file used to implement shared-memory is in the same directory 3676 ** as the open database file and has the same name as the open database 3677 ** file with the "-shm" suffix added. For example, if the database file 3678 ** is "/home/user1/config.db" then the file that is created and mmapped 3679 ** for shared memory will be called "/home/user1/config.db-shm". 3680 ** 3681 ** Another approach to is to use files in /dev/shm or /dev/tmp or an 3682 ** some other tmpfs mount. But if a file in a different directory 3683 ** from the database file is used, then differing access permissions 3684 ** or a chroot() might cause two different processes on the same 3685 ** database to end up using different files for shared memory - 3686 ** meaning that their memory would not really be shared - resulting 3687 ** in database corruption. Nevertheless, this tmpfs file usage 3688 ** can be enabled at compile-time using -DSQLITE_SHM_DIRECTORY="/dev/shm" 3689 ** or the equivalent. The use of the SQLITE_SHM_DIRECTORY compile-time 3690 ** option results in an incompatible build of SQLite; builds of SQLite 3691 ** that with differing SQLITE_SHM_DIRECTORY settings attempt to use the 3692 ** same database file at the same time, database corruption will likely 3693 ** result. The SQLITE_SHM_DIRECTORY compile-time option is considered 3694 ** "unsupported" and may go away in a future SQLite release. 3695 ** 3696 ** When opening a new shared-memory file, if no other instances of that 3697 ** file are currently open, in this process or in other processes, then 3698 ** the file must be truncated to zero length or have its header cleared. 3699 ** 3700 ** If the original database file (pDbFd) is using the "unix-excl" VFS 3701 ** that means that an exclusive lock is held on the database file and 3702 ** that no other processes are able to read or write the database. In 3703 ** that case, we do not really need shared memory. No shared memory 3704 ** file is created. The shared memory will be simulated with heap memory. 3705 */ 3706 static int unixOpenSharedMemory(unixFile *pDbFd){ 3707 struct unixShm *p = 0; /* The connection to be opened */ 3708 struct unixShmNode *pShmNode; /* The underlying mmapped file */ 3709 int rc; /* Result code */ 3710 unixInodeInfo *pInode; /* The inode of fd */ 3711 char *zShmFilename; /* Name of the file used for SHM */ 3712 int nShmFilename; /* Size of the SHM filename in bytes */ 3713 3714 /* Allocate space for the new unixShm object. */ 3715 p = sqlite3_malloc( sizeof(*p) ); 3716 if( p==0 ) return SQLITE_NOMEM; 3717 memset(p, 0, sizeof(*p)); 3718 assert( pDbFd->pShm==0 ); 3719 3720 /* Check to see if a unixShmNode object already exists. Reuse an existing 3721 ** one if present. Create a new one if necessary. 3722 */ 3723 unixEnterMutex(); 3724 pInode = pDbFd->pInode; 3725 pShmNode = pInode->pShmNode; 3726 if( pShmNode==0 ){ 3727 struct stat sStat; /* fstat() info for database file */ 3728 3729 /* Call fstat() to figure out the permissions on the database file. If 3730 ** a new *-shm file is created, an attempt will be made to create it 3731 ** with the same permissions. The actual permissions the file is created 3732 ** with are subject to the current umask setting. 3733 */ 3734 if( osFstat(pDbFd->h, &sStat) && pInode->bProcessLock==0 ){ 3735 rc = SQLITE_IOERR_FSTAT; 3736 goto shm_open_err; 3737 } 3738 3739 #ifdef SQLITE_SHM_DIRECTORY 3740 nShmFilename = sizeof(SQLITE_SHM_DIRECTORY) + 30; 3741 #else 3742 nShmFilename = 5 + (int)strlen(pDbFd->zPath); 3743 #endif 3744 pShmNode = sqlite3_malloc( sizeof(*pShmNode) + nShmFilename ); 3745 if( pShmNode==0 ){ 3746 rc = SQLITE_NOMEM; 3747 goto shm_open_err; 3748 } 3749 memset(pShmNode, 0, sizeof(*pShmNode)); 3750 zShmFilename = pShmNode->zFilename = (char*)&pShmNode[1]; 3751 #ifdef SQLITE_SHM_DIRECTORY 3752 sqlite3_snprintf(nShmFilename, zShmFilename, 3753 SQLITE_SHM_DIRECTORY "/sqlite-shm-%x-%x", 3754 (u32)sStat.st_ino, (u32)sStat.st_dev); 3755 #else 3756 sqlite3_snprintf(nShmFilename, zShmFilename, "%s-shm", pDbFd->zPath); 3757 #endif 3758 pShmNode->h = -1; 3759 pDbFd->pInode->pShmNode = pShmNode; 3760 pShmNode->pInode = pDbFd->pInode; 3761 pShmNode->mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST); 3762 if( pShmNode->mutex==0 ){ 3763 rc = SQLITE_NOMEM; 3764 goto shm_open_err; 3765 } 3766 3767 if( pInode->bProcessLock==0 ){ 3768 pShmNode->h = robust_open(zShmFilename, O_RDWR|O_CREAT, 3769 (sStat.st_mode & 0777)); 3770 if( pShmNode->h<0 ){ 3771 rc = unixLogError(SQLITE_CANTOPEN_BKPT, "open", zShmFilename); 3772 goto shm_open_err; 3773 } 3774 3775 /* Check to see if another process is holding the dead-man switch. 3776 ** If not, truncate the file to zero length. 3777 */ 3778 rc = SQLITE_OK; 3779 if( unixShmSystemLock(pShmNode, F_WRLCK, UNIX_SHM_DMS, 1)==SQLITE_OK ){ 3780 if( robust_ftruncate(pShmNode->h, 0) ){ 3781 rc = unixLogError(SQLITE_IOERR_SHMOPEN, "ftruncate", zShmFilename); 3782 } 3783 } 3784 if( rc==SQLITE_OK ){ 3785 rc = unixShmSystemLock(pShmNode, F_RDLCK, UNIX_SHM_DMS, 1); 3786 } 3787 if( rc ) goto shm_open_err; 3788 } 3789 } 3790 3791 /* Make the new connection a child of the unixShmNode */ 3792 p->pShmNode = pShmNode; 3793 #ifdef SQLITE_DEBUG 3794 p->id = pShmNode->nextShmId++; 3795 #endif 3796 pShmNode->nRef++; 3797 pDbFd->pShm = p; 3798 unixLeaveMutex(); 3799 3800 /* The reference count on pShmNode has already been incremented under 3801 ** the cover of the unixEnterMutex() mutex and the pointer from the 3802 ** new (struct unixShm) object to the pShmNode has been set. All that is 3803 ** left to do is to link the new object into the linked list starting 3804 ** at pShmNode->pFirst. This must be done while holding the pShmNode->mutex 3805 ** mutex. 3806 */ 3807 sqlite3_mutex_enter(pShmNode->mutex); 3808 p->pNext = pShmNode->pFirst; 3809 pShmNode->pFirst = p; 3810 sqlite3_mutex_leave(pShmNode->mutex); 3811 return SQLITE_OK; 3812 3813 /* Jump here on any error */ 3814 shm_open_err: 3815 unixShmPurge(pDbFd); /* This call frees pShmNode if required */ 3816 sqlite3_free(p); 3817 unixLeaveMutex(); 3818 return rc; 3819 } 3820 3821 /* 3822 ** This function is called to obtain a pointer to region iRegion of the 3823 ** shared-memory associated with the database file fd. Shared-memory regions 3824 ** are numbered starting from zero. Each shared-memory region is szRegion 3825 ** bytes in size. 3826 ** 3827 ** If an error occurs, an error code is returned and *pp is set to NULL. 3828 ** 3829 ** Otherwise, if the bExtend parameter is 0 and the requested shared-memory 3830 ** region has not been allocated (by any client, including one running in a 3831 ** separate process), then *pp is set to NULL and SQLITE_OK returned. If 3832 ** bExtend is non-zero and the requested shared-memory region has not yet 3833 ** been allocated, it is allocated by this function. 3834 ** 3835 ** If the shared-memory region has already been allocated or is allocated by 3836 ** this call as described above, then it is mapped into this processes 3837 ** address space (if it is not already), *pp is set to point to the mapped 3838 ** memory and SQLITE_OK returned. 3839 */ 3840 static int unixShmMap( 3841 sqlite3_file *fd, /* Handle open on database file */ 3842 int iRegion, /* Region to retrieve */ 3843 int szRegion, /* Size of regions */ 3844 int bExtend, /* True to extend file if necessary */ 3845 void volatile **pp /* OUT: Mapped memory */ 3846 ){ 3847 unixFile *pDbFd = (unixFile*)fd; 3848 unixShm *p; 3849 unixShmNode *pShmNode; 3850 int rc = SQLITE_OK; 3851 3852 /* If the shared-memory file has not yet been opened, open it now. */ 3853 if( pDbFd->pShm==0 ){ 3854 rc = unixOpenSharedMemory(pDbFd); 3855 if( rc!=SQLITE_OK ) return rc; 3856 } 3857 3858 p = pDbFd->pShm; 3859 pShmNode = p->pShmNode; 3860 sqlite3_mutex_enter(pShmNode->mutex); 3861 assert( szRegion==pShmNode->szRegion || pShmNode->nRegion==0 ); 3862 assert( pShmNode->pInode==pDbFd->pInode ); 3863 assert( pShmNode->h>=0 || pDbFd->pInode->bProcessLock==1 ); 3864 assert( pShmNode->h<0 || pDbFd->pInode->bProcessLock==0 ); 3865 3866 if( pShmNode->nRegion<=iRegion ){ 3867 char **apNew; /* New apRegion[] array */ 3868 int nByte = (iRegion+1)*szRegion; /* Minimum required file size */ 3869 struct stat sStat; /* Used by fstat() */ 3870 3871 pShmNode->szRegion = szRegion; 3872 3873 if( pShmNode->h>=0 ){ 3874 /* The requested region is not mapped into this processes address space. 3875 ** Check to see if it has been allocated (i.e. if the wal-index file is 3876 ** large enough to contain the requested region). 3877 */ 3878 if( osFstat(pShmNode->h, &sStat) ){ 3879 rc = SQLITE_IOERR_SHMSIZE; 3880 goto shmpage_out; 3881 } 3882 3883 if( sStat.st_size<nByte ){ 3884 /* The requested memory region does not exist. If bExtend is set to 3885 ** false, exit early. *pp will be set to NULL and SQLITE_OK returned. 3886 ** 3887 ** Alternatively, if bExtend is true, use ftruncate() to allocate 3888 ** the requested memory region. 3889 */ 3890 if( !bExtend ) goto shmpage_out; 3891 if( robust_ftruncate(pShmNode->h, nByte) ){ 3892 rc = unixLogError(SQLITE_IOERR_SHMSIZE, "ftruncate", 3893 pShmNode->zFilename); 3894 goto shmpage_out; 3895 } 3896 } 3897 } 3898 3899 /* Map the requested memory region into this processes address space. */ 3900 apNew = (char **)sqlite3_realloc( 3901 pShmNode->apRegion, (iRegion+1)*sizeof(char *) 3902 ); 3903 if( !apNew ){ 3904 rc = SQLITE_IOERR_NOMEM; 3905 goto shmpage_out; 3906 } 3907 pShmNode->apRegion = apNew; 3908 while(pShmNode->nRegion<=iRegion){ 3909 void *pMem; 3910 if( pShmNode->h>=0 ){ 3911 pMem = mmap(0, szRegion, PROT_READ|PROT_WRITE, 3912 MAP_SHARED, pShmNode->h, pShmNode->nRegion*szRegion 3913 ); 3914 if( pMem==MAP_FAILED ){ 3915 rc = SQLITE_IOERR; 3916 goto shmpage_out; 3917 } 3918 }else{ 3919 pMem = sqlite3_malloc(szRegion); 3920 if( pMem==0 ){ 3921 rc = SQLITE_NOMEM; 3922 goto shmpage_out; 3923 } 3924 memset(pMem, 0, szRegion); 3925 } 3926 pShmNode->apRegion[pShmNode->nRegion] = pMem; 3927 pShmNode->nRegion++; 3928 } 3929 } 3930 3931 shmpage_out: 3932 if( pShmNode->nRegion>iRegion ){ 3933 *pp = pShmNode->apRegion[iRegion]; 3934 }else{ 3935 *pp = 0; 3936 } 3937 sqlite3_mutex_leave(pShmNode->mutex); 3938 return rc; 3939 } 3940 3941 /* 3942 ** Change the lock state for a shared-memory segment. 3943 ** 3944 ** Note that the relationship between SHAREd and EXCLUSIVE locks is a little 3945 ** different here than in posix. In xShmLock(), one can go from unlocked 3946 ** to shared and back or from unlocked to exclusive and back. But one may 3947 ** not go from shared to exclusive or from exclusive to shared. 3948 */ 3949 static int unixShmLock( 3950 sqlite3_file *fd, /* Database file holding the shared memory */ 3951 int ofst, /* First lock to acquire or release */ 3952 int n, /* Number of locks to acquire or release */ 3953 int flags /* What to do with the lock */ 3954 ){ 3955 unixFile *pDbFd = (unixFile*)fd; /* Connection holding shared memory */ 3956 unixShm *p = pDbFd->pShm; /* The shared memory being locked */ 3957 unixShm *pX; /* For looping over all siblings */ 3958 unixShmNode *pShmNode = p->pShmNode; /* The underlying file iNode */ 3959 int rc = SQLITE_OK; /* Result code */ 3960 u16 mask; /* Mask of locks to take or release */ 3961 3962 assert( pShmNode==pDbFd->pInode->pShmNode ); 3963 assert( pShmNode->pInode==pDbFd->pInode ); 3964 assert( ofst>=0 && ofst+n<=SQLITE_SHM_NLOCK ); 3965 assert( n>=1 ); 3966 assert( flags==(SQLITE_SHM_LOCK | SQLITE_SHM_SHARED) 3967 || flags==(SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE) 3968 || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED) 3969 || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE) ); 3970 assert( n==1 || (flags & SQLITE_SHM_EXCLUSIVE)!=0 ); 3971 assert( pShmNode->h>=0 || pDbFd->pInode->bProcessLock==1 ); 3972 assert( pShmNode->h<0 || pDbFd->pInode->bProcessLock==0 ); 3973 3974 mask = (1<<(ofst+n)) - (1<<ofst); 3975 assert( n>1 || mask==(1<<ofst) ); 3976 sqlite3_mutex_enter(pShmNode->mutex); 3977 if( flags & SQLITE_SHM_UNLOCK ){ 3978 u16 allMask = 0; /* Mask of locks held by siblings */ 3979 3980 /* See if any siblings hold this same lock */ 3981 for(pX=pShmNode->pFirst; pX; pX=pX->pNext){ 3982 if( pX==p ) continue; 3983 assert( (pX->exclMask & (p->exclMask|p->sharedMask))==0 ); 3984 allMask |= pX->sharedMask; 3985 } 3986 3987 /* Unlock the system-level locks */ 3988 if( (mask & allMask)==0 ){ 3989 rc = unixShmSystemLock(pShmNode, F_UNLCK, ofst+UNIX_SHM_BASE, n); 3990 }else{ 3991 rc = SQLITE_OK; 3992 } 3993 3994 /* Undo the local locks */ 3995 if( rc==SQLITE_OK ){ 3996 p->exclMask &= ~mask; 3997 p->sharedMask &= ~mask; 3998 } 3999 }else if( flags & SQLITE_SHM_SHARED ){ 4000 u16 allShared = 0; /* Union of locks held by connections other than "p" */ 4001 4002 /* Find out which shared locks are already held by sibling connections. 4003 ** If any sibling already holds an exclusive lock, go ahead and return 4004 ** SQLITE_BUSY. 4005 */ 4006 for(pX=pShmNode->pFirst; pX; pX=pX->pNext){ 4007 if( (pX->exclMask & mask)!=0 ){ 4008 rc = SQLITE_BUSY; 4009 break; 4010 } 4011 allShared |= pX->sharedMask; 4012 } 4013 4014 /* Get shared locks at the system level, if necessary */ 4015 if( rc==SQLITE_OK ){ 4016 if( (allShared & mask)==0 ){ 4017 rc = unixShmSystemLock(pShmNode, F_RDLCK, ofst+UNIX_SHM_BASE, n); 4018 }else{ 4019 rc = SQLITE_OK; 4020 } 4021 } 4022 4023 /* Get the local shared locks */ 4024 if( rc==SQLITE_OK ){ 4025 p->sharedMask |= mask; 4026 } 4027 }else{ 4028 /* Make sure no sibling connections hold locks that will block this 4029 ** lock. If any do, return SQLITE_BUSY right away. 4030 */ 4031 for(pX=pShmNode->pFirst; pX; pX=pX->pNext){ 4032 if( (pX->exclMask & mask)!=0 || (pX->sharedMask & mask)!=0 ){ 4033 rc = SQLITE_BUSY; 4034 break; 4035 } 4036 } 4037 4038 /* Get the exclusive locks at the system level. Then if successful 4039 ** also mark the local connection as being locked. 4040 */ 4041 if( rc==SQLITE_OK ){ 4042 rc = unixShmSystemLock(pShmNode, F_WRLCK, ofst+UNIX_SHM_BASE, n); 4043 if( rc==SQLITE_OK ){ 4044 assert( (p->sharedMask & mask)==0 ); 4045 p->exclMask |= mask; 4046 } 4047 } 4048 } 4049 sqlite3_mutex_leave(pShmNode->mutex); 4050 OSTRACE(("SHM-LOCK shmid-%d, pid-%d got %03x,%03x\n", 4051 p->id, getpid(), p->sharedMask, p->exclMask)); 4052 return rc; 4053 } 4054 4055 /* 4056 ** Implement a memory barrier or memory fence on shared memory. 4057 ** 4058 ** All loads and stores begun before the barrier must complete before 4059 ** any load or store begun after the barrier. 4060 */ 4061 static void unixShmBarrier( 4062 sqlite3_file *fd /* Database file holding the shared memory */ 4063 ){ 4064 UNUSED_PARAMETER(fd); 4065 unixEnterMutex(); 4066 unixLeaveMutex(); 4067 } 4068 4069 /* 4070 ** Close a connection to shared-memory. Delete the underlying 4071 ** storage if deleteFlag is true. 4072 ** 4073 ** If there is no shared memory associated with the connection then this 4074 ** routine is a harmless no-op. 4075 */ 4076 static int unixShmUnmap( 4077 sqlite3_file *fd, /* The underlying database file */ 4078 int deleteFlag /* Delete shared-memory if true */ 4079 ){ 4080 unixShm *p; /* The connection to be closed */ 4081 unixShmNode *pShmNode; /* The underlying shared-memory file */ 4082 unixShm **pp; /* For looping over sibling connections */ 4083 unixFile *pDbFd; /* The underlying database file */ 4084 4085 pDbFd = (unixFile*)fd; 4086 p = pDbFd->pShm; 4087 if( p==0 ) return SQLITE_OK; 4088 pShmNode = p->pShmNode; 4089 4090 assert( pShmNode==pDbFd->pInode->pShmNode ); 4091 assert( pShmNode->pInode==pDbFd->pInode ); 4092 4093 /* Remove connection p from the set of connections associated 4094 ** with pShmNode */ 4095 sqlite3_mutex_enter(pShmNode->mutex); 4096 for(pp=&pShmNode->pFirst; (*pp)!=p; pp = &(*pp)->pNext){} 4097 *pp = p->pNext; 4098 4099 /* Free the connection p */ 4100 sqlite3_free(p); 4101 pDbFd->pShm = 0; 4102 sqlite3_mutex_leave(pShmNode->mutex); 4103 4104 /* If pShmNode->nRef has reached 0, then close the underlying 4105 ** shared-memory file, too */ 4106 unixEnterMutex(); 4107 assert( pShmNode->nRef>0 ); 4108 pShmNode->nRef--; 4109 if( pShmNode->nRef==0 ){ 4110 if( deleteFlag && pShmNode->h>=0 ) unlink(pShmNode->zFilename); 4111 unixShmPurge(pDbFd); 4112 } 4113 unixLeaveMutex(); 4114 4115 return SQLITE_OK; 4116 } 4117 4118 4119 #else 4120 # define unixShmMap 0 4121 # define unixShmLock 0 4122 # define unixShmBarrier 0 4123 # define unixShmUnmap 0 4124 #endif /* #ifndef SQLITE_OMIT_WAL */ 4125 4126 /* 4127 ** Here ends the implementation of all sqlite3_file methods. 4128 ** 4129 ********************** End sqlite3_file Methods ******************************* 4130 ******************************************************************************/ 4131 4132 /* 4133 ** This division contains definitions of sqlite3_io_methods objects that 4134 ** implement various file locking strategies. It also contains definitions 4135 ** of "finder" functions. A finder-function is used to locate the appropriate 4136 ** sqlite3_io_methods object for a particular database file. The pAppData 4137 ** field of the sqlite3_vfs VFS objects are initialized to be pointers to 4138 ** the correct finder-function for that VFS. 4139 ** 4140 ** Most finder functions return a pointer to a fixed sqlite3_io_methods 4141 ** object. The only interesting finder-function is autolockIoFinder, which 4142 ** looks at the filesystem type and tries to guess the best locking 4143 ** strategy from that. 4144 ** 4145 ** For finder-funtion F, two objects are created: 4146 ** 4147 ** (1) The real finder-function named "FImpt()". 4148 ** 4149 ** (2) A constant pointer to this function named just "F". 4150 ** 4151 ** 4152 ** A pointer to the F pointer is used as the pAppData value for VFS 4153 ** objects. We have to do this instead of letting pAppData point 4154 ** directly at the finder-function since C90 rules prevent a void* 4155 ** from be cast into a function pointer. 4156 ** 4157 ** 4158 ** Each instance of this macro generates two objects: 4159 ** 4160 ** * A constant sqlite3_io_methods object call METHOD that has locking 4161 ** methods CLOSE, LOCK, UNLOCK, CKRESLOCK. 4162 ** 4163 ** * An I/O method finder function called FINDER that returns a pointer 4164 ** to the METHOD object in the previous bullet. 4165 */ 4166 #define IOMETHODS(FINDER, METHOD, VERSION, CLOSE, LOCK, UNLOCK, CKLOCK) \ 4167 static const sqlite3_io_methods METHOD = { \ 4168 VERSION, /* iVersion */ \ 4169 CLOSE, /* xClose */ \ 4170 unixRead, /* xRead */ \ 4171 unixWrite, /* xWrite */ \ 4172 unixTruncate, /* xTruncate */ \ 4173 unixSync, /* xSync */ \ 4174 unixFileSize, /* xFileSize */ \ 4175 LOCK, /* xLock */ \ 4176 UNLOCK, /* xUnlock */ \ 4177 CKLOCK, /* xCheckReservedLock */ \ 4178 unixFileControl, /* xFileControl */ \ 4179 unixSectorSize, /* xSectorSize */ \ 4180 unixDeviceCharacteristics, /* xDeviceCapabilities */ \ 4181 unixShmMap, /* xShmMap */ \ 4182 unixShmLock, /* xShmLock */ \ 4183 unixShmBarrier, /* xShmBarrier */ \ 4184 unixShmUnmap /* xShmUnmap */ \ 4185 }; \ 4186 static const sqlite3_io_methods *FINDER##Impl(const char *z, unixFile *p){ \ 4187 UNUSED_PARAMETER(z); UNUSED_PARAMETER(p); \ 4188 return &METHOD; \ 4189 } \ 4190 static const sqlite3_io_methods *(*const FINDER)(const char*,unixFile *p) \ 4191 = FINDER##Impl; 4192 4193 /* 4194 ** Here are all of the sqlite3_io_methods objects for each of the 4195 ** locking strategies. Functions that return pointers to these methods 4196 ** are also created. 4197 */ 4198 IOMETHODS( 4199 posixIoFinder, /* Finder function name */ 4200 posixIoMethods, /* sqlite3_io_methods object name */ 4201 2, /* shared memory is enabled */ 4202 unixClose, /* xClose method */ 4203 unixLock, /* xLock method */ 4204 unixUnlock, /* xUnlock method */ 4205 unixCheckReservedLock /* xCheckReservedLock method */ 4206 ) 4207 IOMETHODS( 4208 nolockIoFinder, /* Finder function name */ 4209 nolockIoMethods, /* sqlite3_io_methods object name */ 4210 1, /* shared memory is disabled */ 4211 nolockClose, /* xClose method */ 4212 nolockLock, /* xLock method */ 4213 nolockUnlock, /* xUnlock method */ 4214 nolockCheckReservedLock /* xCheckReservedLock method */ 4215 ) 4216 IOMETHODS( 4217 dotlockIoFinder, /* Finder function name */ 4218 dotlockIoMethods, /* sqlite3_io_methods object name */ 4219 1, /* shared memory is disabled */ 4220 dotlockClose, /* xClose method */ 4221 dotlockLock, /* xLock method */ 4222 dotlockUnlock, /* xUnlock method */ 4223 dotlockCheckReservedLock /* xCheckReservedLock method */ 4224 ) 4225 4226 #if SQLITE_ENABLE_LOCKING_STYLE && !OS_VXWORKS 4227 IOMETHODS( 4228 flockIoFinder, /* Finder function name */ 4229 flockIoMethods, /* sqlite3_io_methods object name */ 4230 1, /* shared memory is disabled */ 4231 flockClose, /* xClose method */ 4232 flockLock, /* xLock method */ 4233 flockUnlock, /* xUnlock method */ 4234 flockCheckReservedLock /* xCheckReservedLock method */ 4235 ) 4236 #endif 4237 4238 #if OS_VXWORKS 4239 IOMETHODS( 4240 semIoFinder, /* Finder function name */ 4241 semIoMethods, /* sqlite3_io_methods object name */ 4242 1, /* shared memory is disabled */ 4243 semClose, /* xClose method */ 4244 semLock, /* xLock method */ 4245 semUnlock, /* xUnlock method */ 4246 semCheckReservedLock /* xCheckReservedLock method */ 4247 ) 4248 #endif 4249 4250 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 4251 IOMETHODS( 4252 afpIoFinder, /* Finder function name */ 4253 afpIoMethods, /* sqlite3_io_methods object name */ 4254 1, /* shared memory is disabled */ 4255 afpClose, /* xClose method */ 4256 afpLock, /* xLock method */ 4257 afpUnlock, /* xUnlock method */ 4258 afpCheckReservedLock /* xCheckReservedLock method */ 4259 ) 4260 #endif 4261 4262 /* 4263 ** The proxy locking method is a "super-method" in the sense that it 4264 ** opens secondary file descriptors for the conch and lock files and 4265 ** it uses proxy, dot-file, AFP, and flock() locking methods on those 4266 ** secondary files. For this reason, the division that implements 4267 ** proxy locking is located much further down in the file. But we need 4268 ** to go ahead and define the sqlite3_io_methods and finder function 4269 ** for proxy locking here. So we forward declare the I/O methods. 4270 */ 4271 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 4272 static int proxyClose(sqlite3_file*); 4273 static int proxyLock(sqlite3_file*, int); 4274 static int proxyUnlock(sqlite3_file*, int); 4275 static int proxyCheckReservedLock(sqlite3_file*, int*); 4276 IOMETHODS( 4277 proxyIoFinder, /* Finder function name */ 4278 proxyIoMethods, /* sqlite3_io_methods object name */ 4279 1, /* shared memory is disabled */ 4280 proxyClose, /* xClose method */ 4281 proxyLock, /* xLock method */ 4282 proxyUnlock, /* xUnlock method */ 4283 proxyCheckReservedLock /* xCheckReservedLock method */ 4284 ) 4285 #endif 4286 4287 /* nfs lockd on OSX 10.3+ doesn't clear write locks when a read lock is set */ 4288 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 4289 IOMETHODS( 4290 nfsIoFinder, /* Finder function name */ 4291 nfsIoMethods, /* sqlite3_io_methods object name */ 4292 1, /* shared memory is disabled */ 4293 unixClose, /* xClose method */ 4294 unixLock, /* xLock method */ 4295 nfsUnlock, /* xUnlock method */ 4296 unixCheckReservedLock /* xCheckReservedLock method */ 4297 ) 4298 #endif 4299 4300 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 4301 /* 4302 ** This "finder" function attempts to determine the best locking strategy 4303 ** for the database file "filePath". It then returns the sqlite3_io_methods 4304 ** object that implements that strategy. 4305 ** 4306 ** This is for MacOSX only. 4307 */ 4308 static const sqlite3_io_methods *autolockIoFinderImpl( 4309 const char *filePath, /* name of the database file */ 4310 unixFile *pNew /* open file object for the database file */ 4311 ){ 4312 static const struct Mapping { 4313 const char *zFilesystem; /* Filesystem type name */ 4314 const sqlite3_io_methods *pMethods; /* Appropriate locking method */ 4315 } aMap[] = { 4316 { "hfs", &posixIoMethods }, 4317 { "ufs", &posixIoMethods }, 4318 { "afpfs", &afpIoMethods }, 4319 { "smbfs", &afpIoMethods }, 4320 { "webdav", &nolockIoMethods }, 4321 { 0, 0 } 4322 }; 4323 int i; 4324 struct statfs fsInfo; 4325 struct flock lockInfo; 4326 4327 if( !filePath ){ 4328 /* If filePath==NULL that means we are dealing with a transient file 4329 ** that does not need to be locked. */ 4330 return &nolockIoMethods; 4331 } 4332 if( statfs(filePath, &fsInfo) != -1 ){ 4333 if( fsInfo.f_flags & MNT_RDONLY ){ 4334 return &nolockIoMethods; 4335 } 4336 for(i=0; aMap[i].zFilesystem; i++){ 4337 if( strcmp(fsInfo.f_fstypename, aMap[i].zFilesystem)==0 ){ 4338 return aMap[i].pMethods; 4339 } 4340 } 4341 } 4342 4343 /* Default case. Handles, amongst others, "nfs". 4344 ** Test byte-range lock using fcntl(). If the call succeeds, 4345 ** assume that the file-system supports POSIX style locks. 4346 */ 4347 lockInfo.l_len = 1; 4348 lockInfo.l_start = 0; 4349 lockInfo.l_whence = SEEK_SET; 4350 lockInfo.l_type = F_RDLCK; 4351 if( osFcntl(pNew->h, F_GETLK, &lockInfo)!=-1 ) { 4352 if( strcmp(fsInfo.f_fstypename, "nfs")==0 ){ 4353 return &nfsIoMethods; 4354 } else { 4355 return &posixIoMethods; 4356 } 4357 }else{ 4358 return &dotlockIoMethods; 4359 } 4360 } 4361 static const sqlite3_io_methods 4362 *(*const autolockIoFinder)(const char*,unixFile*) = autolockIoFinderImpl; 4363 4364 #endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ 4365 4366 #if OS_VXWORKS && SQLITE_ENABLE_LOCKING_STYLE 4367 /* 4368 ** This "finder" function attempts to determine the best locking strategy 4369 ** for the database file "filePath". It then returns the sqlite3_io_methods 4370 ** object that implements that strategy. 4371 ** 4372 ** This is for VXWorks only. 4373 */ 4374 static const sqlite3_io_methods *autolockIoFinderImpl( 4375 const char *filePath, /* name of the database file */ 4376 unixFile *pNew /* the open file object */ 4377 ){ 4378 struct flock lockInfo; 4379 4380 if( !filePath ){ 4381 /* If filePath==NULL that means we are dealing with a transient file 4382 ** that does not need to be locked. */ 4383 return &nolockIoMethods; 4384 } 4385 4386 /* Test if fcntl() is supported and use POSIX style locks. 4387 ** Otherwise fall back to the named semaphore method. 4388 */ 4389 lockInfo.l_len = 1; 4390 lockInfo.l_start = 0; 4391 lockInfo.l_whence = SEEK_SET; 4392 lockInfo.l_type = F_RDLCK; 4393 if( osFcntl(pNew->h, F_GETLK, &lockInfo)!=-1 ) { 4394 return &posixIoMethods; 4395 }else{ 4396 return &semIoMethods; 4397 } 4398 } 4399 static const sqlite3_io_methods 4400 *(*const autolockIoFinder)(const char*,unixFile*) = autolockIoFinderImpl; 4401 4402 #endif /* OS_VXWORKS && SQLITE_ENABLE_LOCKING_STYLE */ 4403 4404 /* 4405 ** An abstract type for a pointer to a IO method finder function: 4406 */ 4407 typedef const sqlite3_io_methods *(*finder_type)(const char*,unixFile*); 4408 4409 4410 /**************************************************************************** 4411 **************************** sqlite3_vfs methods **************************** 4412 ** 4413 ** This division contains the implementation of methods on the 4414 ** sqlite3_vfs object. 4415 */ 4416 4417 /* 4418 ** Initialize the contents of the unixFile structure pointed to by pId. 4419 */ 4420 static int fillInUnixFile( 4421 sqlite3_vfs *pVfs, /* Pointer to vfs object */ 4422 int h, /* Open file descriptor of file being opened */ 4423 int dirfd, /* Directory file descriptor */ 4424 sqlite3_file *pId, /* Write to the unixFile structure here */ 4425 const char *zFilename, /* Name of the file being opened */ 4426 int noLock, /* Omit locking if true */ 4427 int isDelete, /* Delete on close if true */ 4428 int isReadOnly /* True if the file is opened read-only */ 4429 ){ 4430 const sqlite3_io_methods *pLockingStyle; 4431 unixFile *pNew = (unixFile *)pId; 4432 int rc = SQLITE_OK; 4433 4434 assert( pNew->pInode==NULL ); 4435 4436 /* Parameter isDelete is only used on vxworks. Express this explicitly 4437 ** here to prevent compiler warnings about unused parameters. 4438 */ 4439 UNUSED_PARAMETER(isDelete); 4440 4441 /* Usually the path zFilename should not be a relative pathname. The 4442 ** exception is when opening the proxy "conch" file in builds that 4443 ** include the special Apple locking styles. 4444 */ 4445 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 4446 assert( zFilename==0 || zFilename[0]=='/' 4447 || pVfs->pAppData==(void*)&autolockIoFinder ); 4448 #else 4449 assert( zFilename==0 || zFilename[0]=='/' ); 4450 #endif 4451 4452 OSTRACE(("OPEN %-3d %s\n", h, zFilename)); 4453 pNew->h = h; 4454 pNew->dirfd = dirfd; 4455 pNew->zPath = zFilename; 4456 if( memcmp(pVfs->zName,"unix-excl",10)==0 ){ 4457 pNew->ctrlFlags = UNIXFILE_EXCL; 4458 }else{ 4459 pNew->ctrlFlags = 0; 4460 } 4461 if( isReadOnly ){ 4462 pNew->ctrlFlags |= UNIXFILE_RDONLY; 4463 } 4464 4465 #if OS_VXWORKS 4466 pNew->pId = vxworksFindFileId(zFilename); 4467 if( pNew->pId==0 ){ 4468 noLock = 1; 4469 rc = SQLITE_NOMEM; 4470 } 4471 #endif 4472 4473 if( noLock ){ 4474 pLockingStyle = &nolockIoMethods; 4475 }else{ 4476 pLockingStyle = (**(finder_type*)pVfs->pAppData)(zFilename, pNew); 4477 #if SQLITE_ENABLE_LOCKING_STYLE 4478 /* Cache zFilename in the locking context (AFP and dotlock override) for 4479 ** proxyLock activation is possible (remote proxy is based on db name) 4480 ** zFilename remains valid until file is closed, to support */ 4481 pNew->lockingContext = (void*)zFilename; 4482 #endif 4483 } 4484 4485 if( pLockingStyle == &posixIoMethods 4486 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 4487 || pLockingStyle == &nfsIoMethods 4488 #endif 4489 ){ 4490 unixEnterMutex(); 4491 rc = findInodeInfo(pNew, &pNew->pInode); 4492 if( rc!=SQLITE_OK ){ 4493 /* If an error occured in findInodeInfo(), close the file descriptor 4494 ** immediately, before releasing the mutex. findInodeInfo() may fail 4495 ** in two scenarios: 4496 ** 4497 ** (a) A call to fstat() failed. 4498 ** (b) A malloc failed. 4499 ** 4500 ** Scenario (b) may only occur if the process is holding no other 4501 ** file descriptors open on the same file. If there were other file 4502 ** descriptors on this file, then no malloc would be required by 4503 ** findInodeInfo(). If this is the case, it is quite safe to close 4504 ** handle h - as it is guaranteed that no posix locks will be released 4505 ** by doing so. 4506 ** 4507 ** If scenario (a) caused the error then things are not so safe. The 4508 ** implicit assumption here is that if fstat() fails, things are in 4509 ** such bad shape that dropping a lock or two doesn't matter much. 4510 */ 4511 robust_close(pNew, h, __LINE__); 4512 h = -1; 4513 } 4514 unixLeaveMutex(); 4515 } 4516 4517 #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) 4518 else if( pLockingStyle == &afpIoMethods ){ 4519 /* AFP locking uses the file path so it needs to be included in 4520 ** the afpLockingContext. 4521 */ 4522 afpLockingContext *pCtx; 4523 pNew->lockingContext = pCtx = sqlite3_malloc( sizeof(*pCtx) ); 4524 if( pCtx==0 ){ 4525 rc = SQLITE_NOMEM; 4526 }else{ 4527 /* NB: zFilename exists and remains valid until the file is closed 4528 ** according to requirement F11141. So we do not need to make a 4529 ** copy of the filename. */ 4530 pCtx->dbPath = zFilename; 4531 pCtx->reserved = 0; 4532 srandomdev(); 4533 unixEnterMutex(); 4534 rc = findInodeInfo(pNew, &pNew->pInode); 4535 if( rc!=SQLITE_OK ){ 4536 sqlite3_free(pNew->lockingContext); 4537 robust_close(pNew, h, __LINE__); 4538 h = -1; 4539 } 4540 unixLeaveMutex(); 4541 } 4542 } 4543 #endif 4544 4545 else if( pLockingStyle == &dotlockIoMethods ){ 4546 /* Dotfile locking uses the file path so it needs to be included in 4547 ** the dotlockLockingContext 4548 */ 4549 char *zLockFile; 4550 int nFilename; 4551 nFilename = (int)strlen(zFilename) + 6; 4552 zLockFile = (char *)sqlite3_malloc(nFilename); 4553 if( zLockFile==0 ){ 4554 rc = SQLITE_NOMEM; 4555 }else{ 4556 sqlite3_snprintf(nFilename, zLockFile, "%s" DOTLOCK_SUFFIX, zFilename); 4557 } 4558 pNew->lockingContext = zLockFile; 4559 } 4560 4561 #if OS_VXWORKS 4562 else if( pLockingStyle == &semIoMethods ){ 4563 /* Named semaphore locking uses the file path so it needs to be 4564 ** included in the semLockingContext 4565 */ 4566 unixEnterMutex(); 4567 rc = findInodeInfo(pNew, &pNew->pInode); 4568 if( (rc==SQLITE_OK) && (pNew->pInode->pSem==NULL) ){ 4569 char *zSemName = pNew->pInode->aSemName; 4570 int n; 4571 sqlite3_snprintf(MAX_PATHNAME, zSemName, "/%s.sem", 4572 pNew->pId->zCanonicalName); 4573 for( n=1; zSemName[n]; n++ ) 4574 if( zSemName[n]=='/' ) zSemName[n] = '_'; 4575 pNew->pInode->pSem = sem_open(zSemName, O_CREAT, 0666, 1); 4576 if( pNew->pInode->pSem == SEM_FAILED ){ 4577 rc = SQLITE_NOMEM; 4578 pNew->pInode->aSemName[0] = '\0'; 4579 } 4580 } 4581 unixLeaveMutex(); 4582 } 4583 #endif 4584 4585 pNew->lastErrno = 0; 4586 #if OS_VXWORKS 4587 if( rc!=SQLITE_OK ){ 4588 if( h>=0 ) robust_close(pNew, h, __LINE__); 4589 h = -1; 4590 unlink(zFilename); 4591 isDelete = 0; 4592 } 4593 pNew->isDelete = isDelete; 4594 #endif 4595 if( rc!=SQLITE_OK ){ 4596 if( dirfd>=0 ) robust_close(pNew, dirfd, __LINE__); 4597 if( h>=0 ) robust_close(pNew, h, __LINE__); 4598 }else{ 4599 pNew->pMethod = pLockingStyle; 4600 OpenCounter(+1); 4601 } 4602 return rc; 4603 } 4604 4605 /* 4606 ** Open a file descriptor to the directory containing file zFilename. 4607 ** If successful, *pFd is set to the opened file descriptor and 4608 ** SQLITE_OK is returned. If an error occurs, either SQLITE_NOMEM 4609 ** or SQLITE_CANTOPEN is returned and *pFd is set to an undefined 4610 ** value. 4611 ** 4612 ** If SQLITE_OK is returned, the caller is responsible for closing 4613 ** the file descriptor *pFd using close(). 4614 */ 4615 static int openDirectory(const char *zFilename, int *pFd){ 4616 int ii; 4617 int fd = -1; 4618 char zDirname[MAX_PATHNAME+1]; 4619 4620 sqlite3_snprintf(MAX_PATHNAME, zDirname, "%s", zFilename); 4621 for(ii=(int)strlen(zDirname); ii>1 && zDirname[ii]!='/'; ii--); 4622 if( ii>0 ){ 4623 zDirname[ii] = '\0'; 4624 fd = robust_open(zDirname, O_RDONLY|O_BINARY, 0); 4625 if( fd>=0 ){ 4626 #ifdef FD_CLOEXEC 4627 osFcntl(fd, F_SETFD, osFcntl(fd, F_GETFD, 0) | FD_CLOEXEC); 4628 #endif 4629 OSTRACE(("OPENDIR %-3d %s\n", fd, zDirname)); 4630 } 4631 } 4632 *pFd = fd; 4633 return (fd>=0?SQLITE_OK:unixLogError(SQLITE_CANTOPEN_BKPT, "open", zDirname)); 4634 } 4635 4636 /* 4637 ** Return the name of a directory in which to put temporary files. 4638 ** If no suitable temporary file directory can be found, return NULL. 4639 */ 4640 static const char *unixTempFileDir(void){ 4641 static const char *azDirs[] = { 4642 0, 4643 0, 4644 "/var/tmp", 4645 "/usr/tmp", 4646 "/tmp", 4647 0 /* List terminator */ 4648 }; 4649 unsigned int i; 4650 struct stat buf; 4651 const char *zDir = 0; 4652 4653 azDirs[0] = sqlite3_temp_directory; 4654 if( !azDirs[1] ) azDirs[1] = getenv("TMPDIR"); 4655 for(i=0; i<sizeof(azDirs)/sizeof(azDirs[0]); zDir=azDirs[i++]){ 4656 if( zDir==0 ) continue; 4657 if( osStat(zDir, &buf) ) continue; 4658 if( !S_ISDIR(buf.st_mode) ) continue; 4659 if( osAccess(zDir, 07) ) continue; 4660 break; 4661 } 4662 return zDir; 4663 } 4664 4665 /* 4666 ** Create a temporary file name in zBuf. zBuf must be allocated 4667 ** by the calling process and must be big enough to hold at least 4668 ** pVfs->mxPathname bytes. 4669 */ 4670 static int unixGetTempname(int nBuf, char *zBuf){ 4671 static const unsigned char zChars[] = 4672 "abcdefghijklmnopqrstuvwxyz" 4673 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 4674 "0123456789"; 4675 unsigned int i, j; 4676 const char *zDir; 4677 4678 /* It's odd to simulate an io-error here, but really this is just 4679 ** using the io-error infrastructure to test that SQLite handles this 4680 ** function failing. 4681 */ 4682 SimulateIOError( return SQLITE_IOERR ); 4683 4684 zDir = unixTempFileDir(); 4685 if( zDir==0 ) zDir = "."; 4686 4687 /* Check that the output buffer is large enough for the temporary file 4688 ** name. If it is not, return SQLITE_ERROR. 4689 */ 4690 if( (strlen(zDir) + strlen(SQLITE_TEMP_FILE_PREFIX) + 17) >= (size_t)nBuf ){ 4691 return SQLITE_ERROR; 4692 } 4693 4694 do{ 4695 sqlite3_snprintf(nBuf-17, zBuf, "%s/"SQLITE_TEMP_FILE_PREFIX, zDir); 4696 j = (int)strlen(zBuf); 4697 sqlite3_randomness(15, &zBuf[j]); 4698 for(i=0; i<15; i++, j++){ 4699 zBuf[j] = (char)zChars[ ((unsigned char)zBuf[j])%(sizeof(zChars)-1) ]; 4700 } 4701 zBuf[j] = 0; 4702 }while( osAccess(zBuf,0)==0 ); 4703 return SQLITE_OK; 4704 } 4705 4706 #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) 4707 /* 4708 ** Routine to transform a unixFile into a proxy-locking unixFile. 4709 ** Implementation in the proxy-lock division, but used by unixOpen() 4710 ** if SQLITE_PREFER_PROXY_LOCKING is defined. 4711 */ 4712 static int proxyTransformUnixFile(unixFile*, const char*); 4713 #endif 4714 4715 /* 4716 ** Search for an unused file descriptor that was opened on the database 4717 ** file (not a journal or master-journal file) identified by pathname 4718 ** zPath with SQLITE_OPEN_XXX flags matching those passed as the second 4719 ** argument to this function. 4720 ** 4721 ** Such a file descriptor may exist if a database connection was closed 4722 ** but the associated file descriptor could not be closed because some 4723 ** other file descriptor open on the same file is holding a file-lock. 4724 ** Refer to comments in the unixClose() function and the lengthy comment 4725 ** describing "Posix Advisory Locking" at the start of this file for 4726 ** further details. Also, ticket #4018. 4727 ** 4728 ** If a suitable file descriptor is found, then it is returned. If no 4729 ** such file descriptor is located, -1 is returned. 4730 */ 4731 static UnixUnusedFd *findReusableFd(const char *zPath, int flags){ 4732 UnixUnusedFd *pUnused = 0; 4733 4734 /* Do not search for an unused file descriptor on vxworks. Not because 4735 ** vxworks would not benefit from the change (it might, we're not sure), 4736 ** but because no way to test it is currently available. It is better 4737 ** not to risk breaking vxworks support for the sake of such an obscure 4738 ** feature. */ 4739 #if !OS_VXWORKS 4740 struct stat sStat; /* Results of stat() call */ 4741 4742 /* A stat() call may fail for various reasons. If this happens, it is 4743 ** almost certain that an open() call on the same path will also fail. 4744 ** For this reason, if an error occurs in the stat() call here, it is 4745 ** ignored and -1 is returned. The caller will try to open a new file 4746 ** descriptor on the same path, fail, and return an error to SQLite. 4747 ** 4748 ** Even if a subsequent open() call does succeed, the consequences of 4749 ** not searching for a resusable file descriptor are not dire. */ 4750 if( 0==stat(zPath, &sStat) ){ 4751 unixInodeInfo *pInode; 4752 4753 unixEnterMutex(); 4754 pInode = inodeList; 4755 while( pInode && (pInode->fileId.dev!=sStat.st_dev 4756 || pInode->fileId.ino!=sStat.st_ino) ){ 4757 pInode = pInode->pNext; 4758 } 4759 if( pInode ){ 4760 UnixUnusedFd **pp; 4761 for(pp=&pInode->pUnused; *pp && (*pp)->flags!=flags; pp=&((*pp)->pNext)); 4762 pUnused = *pp; 4763 if( pUnused ){ 4764 *pp = pUnused->pNext; 4765 } 4766 } 4767 unixLeaveMutex(); 4768 } 4769 #endif /* if !OS_VXWORKS */ 4770 return pUnused; 4771 } 4772 4773 /* 4774 ** This function is called by unixOpen() to determine the unix permissions 4775 ** to create new files with. If no error occurs, then SQLITE_OK is returned 4776 ** and a value suitable for passing as the third argument to open(2) is 4777 ** written to *pMode. If an IO error occurs, an SQLite error code is 4778 ** returned and the value of *pMode is not modified. 4779 ** 4780 ** If the file being opened is a temporary file, it is always created with 4781 ** the octal permissions 0600 (read/writable by owner only). If the file 4782 ** is a database or master journal file, it is created with the permissions 4783 ** mask SQLITE_DEFAULT_FILE_PERMISSIONS. 4784 ** 4785 ** Finally, if the file being opened is a WAL or regular journal file, then 4786 ** this function queries the file-system for the permissions on the 4787 ** corresponding database file and sets *pMode to this value. Whenever 4788 ** possible, WAL and journal files are created using the same permissions 4789 ** as the associated database file. 4790 */ 4791 static int findCreateFileMode( 4792 const char *zPath, /* Path of file (possibly) being created */ 4793 int flags, /* Flags passed as 4th argument to xOpen() */ 4794 mode_t *pMode /* OUT: Permissions to open file with */ 4795 ){ 4796 int rc = SQLITE_OK; /* Return Code */ 4797 if( flags & (SQLITE_OPEN_WAL|SQLITE_OPEN_MAIN_JOURNAL) ){ 4798 char zDb[MAX_PATHNAME+1]; /* Database file path */ 4799 int nDb; /* Number of valid bytes in zDb */ 4800 struct stat sStat; /* Output of stat() on database file */ 4801 4802 /* zPath is a path to a WAL or journal file. The following block derives 4803 ** the path to the associated database file from zPath. This block handles 4804 ** the following naming conventions: 4805 ** 4806 ** "<path to db>-journal" 4807 ** "<path to db>-wal" 4808 ** "<path to db>-journal-NNNN" 4809 ** "<path to db>-wal-NNNN" 4810 ** 4811 ** where NNNN is a 4 digit decimal number. The NNNN naming schemes are 4812 ** used by the test_multiplex.c module. 4813 */ 4814 nDb = sqlite3Strlen30(zPath) - 1; 4815 while( nDb>0 && zPath[nDb]!='l' ) nDb--; 4816 nDb -= ((flags & SQLITE_OPEN_WAL) ? 3 : 7); 4817 memcpy(zDb, zPath, nDb); 4818 zDb[nDb] = '\0'; 4819 4820 if( 0==stat(zDb, &sStat) ){ 4821 *pMode = sStat.st_mode & 0777; 4822 }else{ 4823 rc = SQLITE_IOERR_FSTAT; 4824 } 4825 }else if( flags & SQLITE_OPEN_DELETEONCLOSE ){ 4826 *pMode = 0600; 4827 }else{ 4828 *pMode = SQLITE_DEFAULT_FILE_PERMISSIONS; 4829 } 4830 return rc; 4831 } 4832 4833 /* 4834 ** Open the file zPath. 4835 ** 4836 ** Previously, the SQLite OS layer used three functions in place of this 4837 ** one: 4838 ** 4839 ** sqlite3OsOpenReadWrite(); 4840 ** sqlite3OsOpenReadOnly(); 4841 ** sqlite3OsOpenExclusive(); 4842 ** 4843 ** These calls correspond to the following combinations of flags: 4844 ** 4845 ** ReadWrite() -> (READWRITE | CREATE) 4846 ** ReadOnly() -> (READONLY) 4847 ** OpenExclusive() -> (READWRITE | CREATE | EXCLUSIVE) 4848 ** 4849 ** The old OpenExclusive() accepted a boolean argument - "delFlag". If 4850 ** true, the file was configured to be automatically deleted when the 4851 ** file handle closed. To achieve the same effect using this new 4852 ** interface, add the DELETEONCLOSE flag to those specified above for 4853 ** OpenExclusive(). 4854 */ 4855 static int unixOpen( 4856 sqlite3_vfs *pVfs, /* The VFS for which this is the xOpen method */ 4857 const char *zPath, /* Pathname of file to be opened */ 4858 sqlite3_file *pFile, /* The file descriptor to be filled in */ 4859 int flags, /* Input flags to control the opening */ 4860 int *pOutFlags /* Output flags returned to SQLite core */ 4861 ){ 4862 unixFile *p = (unixFile *)pFile; 4863 int fd = -1; /* File descriptor returned by open() */ 4864 int dirfd = -1; /* Directory file descriptor */ 4865 int openFlags = 0; /* Flags to pass to open() */ 4866 int eType = flags&0xFFFFFF00; /* Type of file to open */ 4867 int noLock; /* True to omit locking primitives */ 4868 int rc = SQLITE_OK; /* Function Return Code */ 4869 4870 int isExclusive = (flags & SQLITE_OPEN_EXCLUSIVE); 4871 int isDelete = (flags & SQLITE_OPEN_DELETEONCLOSE); 4872 int isCreate = (flags & SQLITE_OPEN_CREATE); 4873 int isReadonly = (flags & SQLITE_OPEN_READONLY); 4874 int isReadWrite = (flags & SQLITE_OPEN_READWRITE); 4875 #if SQLITE_ENABLE_LOCKING_STYLE 4876 int isAutoProxy = (flags & SQLITE_OPEN_AUTOPROXY); 4877 #endif 4878 4879 /* If creating a master or main-file journal, this function will open 4880 ** a file-descriptor on the directory too. The first time unixSync() 4881 ** is called the directory file descriptor will be fsync()ed and close()d. 4882 */ 4883 int isOpenDirectory = (isCreate && ( 4884 eType==SQLITE_OPEN_MASTER_JOURNAL 4885 || eType==SQLITE_OPEN_MAIN_JOURNAL 4886 || eType==SQLITE_OPEN_WAL 4887 )); 4888 4889 /* If argument zPath is a NULL pointer, this function is required to open 4890 ** a temporary file. Use this buffer to store the file name in. 4891 */ 4892 char zTmpname[MAX_PATHNAME+1]; 4893 const char *zName = zPath; 4894 4895 /* Check the following statements are true: 4896 ** 4897 ** (a) Exactly one of the READWRITE and READONLY flags must be set, and 4898 ** (b) if CREATE is set, then READWRITE must also be set, and 4899 ** (c) if EXCLUSIVE is set, then CREATE must also be set. 4900 ** (d) if DELETEONCLOSE is set, then CREATE must also be set. 4901 */ 4902 assert((isReadonly==0 || isReadWrite==0) && (isReadWrite || isReadonly)); 4903 assert(isCreate==0 || isReadWrite); 4904 assert(isExclusive==0 || isCreate); 4905 assert(isDelete==0 || isCreate); 4906 4907 /* The main DB, main journal, WAL file and master journal are never 4908 ** automatically deleted. Nor are they ever temporary files. */ 4909 assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MAIN_DB ); 4910 assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MAIN_JOURNAL ); 4911 assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MASTER_JOURNAL ); 4912 assert( (!isDelete && zName) || eType!=SQLITE_OPEN_WAL ); 4913 4914 /* Assert that the upper layer has set one of the "file-type" flags. */ 4915 assert( eType==SQLITE_OPEN_MAIN_DB || eType==SQLITE_OPEN_TEMP_DB 4916 || eType==SQLITE_OPEN_MAIN_JOURNAL || eType==SQLITE_OPEN_TEMP_JOURNAL 4917 || eType==SQLITE_OPEN_SUBJOURNAL || eType==SQLITE_OPEN_MASTER_JOURNAL 4918 || eType==SQLITE_OPEN_TRANSIENT_DB || eType==SQLITE_OPEN_WAL 4919 ); 4920 4921 memset(p, 0, sizeof(unixFile)); 4922 4923 if( eType==SQLITE_OPEN_MAIN_DB ){ 4924 UnixUnusedFd *pUnused; 4925 pUnused = findReusableFd(zName, flags); 4926 if( pUnused ){ 4927 fd = pUnused->fd; 4928 }else{ 4929 pUnused = sqlite3_malloc(sizeof(*pUnused)); 4930 if( !pUnused ){ 4931 return SQLITE_NOMEM; 4932 } 4933 } 4934 p->pUnused = pUnused; 4935 }else if( !zName ){ 4936 /* If zName is NULL, the upper layer is requesting a temp file. */ 4937 assert(isDelete && !isOpenDirectory); 4938 rc = unixGetTempname(MAX_PATHNAME+1, zTmpname); 4939 if( rc!=SQLITE_OK ){ 4940 return rc; 4941 } 4942 zName = zTmpname; 4943 } 4944 4945 /* Determine the value of the flags parameter passed to POSIX function 4946 ** open(). These must be calculated even if open() is not called, as 4947 ** they may be stored as part of the file handle and used by the 4948 ** 'conch file' locking functions later on. */ 4949 if( isReadonly ) openFlags |= O_RDONLY; 4950 if( isReadWrite ) openFlags |= O_RDWR; 4951 if( isCreate ) openFlags |= O_CREAT; 4952 if( isExclusive ) openFlags |= (O_EXCL|O_NOFOLLOW); 4953 openFlags |= (O_LARGEFILE|O_BINARY); 4954 4955 if( fd<0 ){ 4956 mode_t openMode; /* Permissions to create file with */ 4957 rc = findCreateFileMode(zName, flags, &openMode); 4958 if( rc!=SQLITE_OK ){ 4959 assert( !p->pUnused ); 4960 assert( eType==SQLITE_OPEN_WAL || eType==SQLITE_OPEN_MAIN_JOURNAL ); 4961 return rc; 4962 } 4963 fd = robust_open(zName, openFlags, openMode); 4964 OSTRACE(("OPENX %-3d %s 0%o\n", fd, zName, openFlags)); 4965 if( fd<0 && errno!=EISDIR && isReadWrite && !isExclusive ){ 4966 /* Failed to open the file for read/write access. Try read-only. */ 4967 flags &= ~(SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE); 4968 openFlags &= ~(O_RDWR|O_CREAT); 4969 flags |= SQLITE_OPEN_READONLY; 4970 openFlags |= O_RDONLY; 4971 isReadonly = 1; 4972 fd = robust_open(zName, openFlags, openMode); 4973 } 4974 if( fd<0 ){ 4975 rc = unixLogError(SQLITE_CANTOPEN_BKPT, "open", zName); 4976 goto open_finished; 4977 } 4978 } 4979 assert( fd>=0 ); 4980 if( pOutFlags ){ 4981 *pOutFlags = flags; 4982 } 4983 4984 if( p->pUnused ){ 4985 p->pUnused->fd = fd; 4986 p->pUnused->flags = flags; 4987 } 4988 4989 if( isDelete ){ 4990 #if OS_VXWORKS 4991 zPath = zName; 4992 #else 4993 unlink(zName); 4994 #endif 4995 } 4996 #if SQLITE_ENABLE_LOCKING_STYLE 4997 else{ 4998 p->openFlags = openFlags; 4999 } 5000 #endif 5001 5002 if( isOpenDirectory ){ 5003 rc = openDirectory(zPath, &dirfd); 5004 if( rc!=SQLITE_OK ){ 5005 /* It is safe to close fd at this point, because it is guaranteed not 5006 ** to be open on a database file. If it were open on a database file, 5007 ** it would not be safe to close as this would release any locks held 5008 ** on the file by this process. */ 5009 assert( eType!=SQLITE_OPEN_MAIN_DB ); 5010 robust_close(p, fd, __LINE__); 5011 goto open_finished; 5012 } 5013 } 5014 5015 #ifdef FD_CLOEXEC 5016 osFcntl(fd, F_SETFD, osFcntl(fd, F_GETFD, 0) | FD_CLOEXEC); 5017 #endif 5018 5019 noLock = eType!=SQLITE_OPEN_MAIN_DB; 5020 5021 5022 #if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE 5023 struct statfs fsInfo; 5024 if( fstatfs(fd, &fsInfo) == -1 ){ 5025 ((unixFile*)pFile)->lastErrno = errno; 5026 if( dirfd>=0 ) robust_close(p, dirfd, __LINE__); 5027 robust_close(p, fd, __LINE__); 5028 return SQLITE_IOERR_ACCESS; 5029 } 5030 if (0 == strncmp("msdos", fsInfo.f_fstypename, 5)) { 5031 ((unixFile*)pFile)->fsFlags |= SQLITE_FSFLAGS_IS_MSDOS; 5032 } 5033 #endif 5034 5035 #if SQLITE_ENABLE_LOCKING_STYLE 5036 #if SQLITE_PREFER_PROXY_LOCKING 5037 isAutoProxy = 1; 5038 #endif 5039 if( isAutoProxy && (zPath!=NULL) && (!noLock) && pVfs->xOpen ){ 5040 char *envforce = getenv("SQLITE_FORCE_PROXY_LOCKING"); 5041 int useProxy = 0; 5042 5043 /* SQLITE_FORCE_PROXY_LOCKING==1 means force always use proxy, 0 means 5044 ** never use proxy, NULL means use proxy for non-local files only. */ 5045 if( envforce!=NULL ){ 5046 useProxy = atoi(envforce)>0; 5047 }else{ 5048 struct statfs fsInfo; 5049 if( statfs(zPath, &fsInfo) == -1 ){ 5050 /* In theory, the close(fd) call is sub-optimal. If the file opened 5051 ** with fd is a database file, and there are other connections open 5052 ** on that file that are currently holding advisory locks on it, 5053 ** then the call to close() will cancel those locks. In practice, 5054 ** we're assuming that statfs() doesn't fail very often. At least 5055 ** not while other file descriptors opened by the same process on 5056 ** the same file are working. */ 5057 p->lastErrno = errno; 5058 if( dirfd>=0 ){ 5059 robust_close(p, dirfd, __LINE__); 5060 } 5061 robust_close(p, fd, __LINE__); 5062 rc = SQLITE_IOERR_ACCESS; 5063 goto open_finished; 5064 } 5065 useProxy = !(fsInfo.f_flags&MNT_LOCAL); 5066 } 5067 if( useProxy ){ 5068 rc = fillInUnixFile(pVfs, fd, dirfd, pFile, zPath, noLock, 5069 isDelete, isReadonly); 5070 if( rc==SQLITE_OK ){ 5071 rc = proxyTransformUnixFile((unixFile*)pFile, ":auto:"); 5072 if( rc!=SQLITE_OK ){ 5073 /* Use unixClose to clean up the resources added in fillInUnixFile 5074 ** and clear all the structure's references. Specifically, 5075 ** pFile->pMethods will be NULL so sqlite3OsClose will be a no-op 5076 */ 5077 unixClose(pFile); 5078 return rc; 5079 } 5080 } 5081 goto open_finished; 5082 } 5083 } 5084 #endif 5085 5086 rc = fillInUnixFile(pVfs, fd, dirfd, pFile, zPath, noLock, 5087 isDelete, isReadonly); 5088 open_finished: 5089 if( rc!=SQLITE_OK ){ 5090 sqlite3_free(p->pUnused); 5091 } 5092 return rc; 5093 } 5094 5095 5096 /* 5097 ** Delete the file at zPath. If the dirSync argument is true, fsync() 5098 ** the directory after deleting the file. 5099 */ 5100 static int unixDelete( 5101 sqlite3_vfs *NotUsed, /* VFS containing this as the xDelete method */ 5102 const char *zPath, /* Name of file to be deleted */ 5103 int dirSync /* If true, fsync() directory after deleting file */ 5104 ){ 5105 int rc = SQLITE_OK; 5106 UNUSED_PARAMETER(NotUsed); 5107 SimulateIOError(return SQLITE_IOERR_DELETE); 5108 if( unlink(zPath)==(-1) && errno!=ENOENT ){ 5109 return unixLogError(SQLITE_IOERR_DELETE, "unlink", zPath); 5110 } 5111 #ifndef SQLITE_DISABLE_DIRSYNC 5112 if( dirSync ){ 5113 int fd; 5114 rc = openDirectory(zPath, &fd); 5115 if( rc==SQLITE_OK ){ 5116 #if OS_VXWORKS 5117 if( fsync(fd)==-1 ) 5118 #else 5119 if( fsync(fd) ) 5120 #endif 5121 { 5122 rc = unixLogError(SQLITE_IOERR_DIR_FSYNC, "fsync", zPath); 5123 } 5124 robust_close(0, fd, __LINE__); 5125 } 5126 } 5127 #endif 5128 return rc; 5129 } 5130 5131 /* 5132 ** Test the existance of or access permissions of file zPath. The 5133 ** test performed depends on the value of flags: 5134 ** 5135 ** SQLITE_ACCESS_EXISTS: Return 1 if the file exists 5136 ** SQLITE_ACCESS_READWRITE: Return 1 if the file is read and writable. 5137 ** SQLITE_ACCESS_READONLY: Return 1 if the file is readable. 5138 ** 5139 ** Otherwise return 0. 5140 */ 5141 static int unixAccess( 5142 sqlite3_vfs *NotUsed, /* The VFS containing this xAccess method */ 5143 const char *zPath, /* Path of the file to examine */ 5144 int flags, /* What do we want to learn about the zPath file? */ 5145 int *pResOut /* Write result boolean here */ 5146 ){ 5147 int amode = 0; 5148 UNUSED_PARAMETER(NotUsed); 5149 SimulateIOError( return SQLITE_IOERR_ACCESS; ); 5150 switch( flags ){ 5151 case SQLITE_ACCESS_EXISTS: 5152 amode = F_OK; 5153 break; 5154 case SQLITE_ACCESS_READWRITE: 5155 amode = W_OK|R_OK; 5156 break; 5157 case SQLITE_ACCESS_READ: 5158 amode = R_OK; 5159 break; 5160 5161 default: 5162 assert(!"Invalid flags argument"); 5163 } 5164 *pResOut = (osAccess(zPath, amode)==0); 5165 if( flags==SQLITE_ACCESS_EXISTS && *pResOut ){ 5166 struct stat buf; 5167 if( 0==stat(zPath, &buf) && buf.st_size==0 ){ 5168 *pResOut = 0; 5169 } 5170 } 5171 return SQLITE_OK; 5172 } 5173 5174 5175 /* 5176 ** Turn a relative pathname into a full pathname. The relative path 5177 ** is stored as a nul-terminated string in the buffer pointed to by 5178 ** zPath. 5179 ** 5180 ** zOut points to a buffer of at least sqlite3_vfs.mxPathname bytes 5181 ** (in this case, MAX_PATHNAME bytes). The full-path is written to 5182 ** this buffer before returning. 5183 */ 5184 static int unixFullPathname( 5185 sqlite3_vfs *pVfs, /* Pointer to vfs object */ 5186 const char *zPath, /* Possibly relative input path */ 5187 int nOut, /* Size of output buffer in bytes */ 5188 char *zOut /* Output buffer */ 5189 ){ 5190 5191 /* It's odd to simulate an io-error here, but really this is just 5192 ** using the io-error infrastructure to test that SQLite handles this 5193 ** function failing. This function could fail if, for example, the 5194 ** current working directory has been unlinked. 5195 */ 5196 SimulateIOError( return SQLITE_ERROR ); 5197 5198 assert( pVfs->mxPathname==MAX_PATHNAME ); 5199 UNUSED_PARAMETER(pVfs); 5200 5201 zOut[nOut-1] = '\0'; 5202 if( zPath[0]=='/' ){ 5203 sqlite3_snprintf(nOut, zOut, "%s", zPath); 5204 }else{ 5205 int nCwd; 5206 if( osGetcwd(zOut, nOut-1)==0 ){ 5207 return unixLogError(SQLITE_CANTOPEN_BKPT, "getcwd", zPath); 5208 } 5209 nCwd = (int)strlen(zOut); 5210 sqlite3_snprintf(nOut-nCwd, &zOut[nCwd], "/%s", zPath); 5211 } 5212 return SQLITE_OK; 5213 } 5214 5215 5216 #ifndef SQLITE_OMIT_LOAD_EXTENSION 5217 /* 5218 ** Interfaces for opening a shared library, finding entry points 5219 ** within the shared library, and closing the shared library. 5220 */ 5221 #include <dlfcn.h> 5222 static void *unixDlOpen(sqlite3_vfs *NotUsed, const char *zFilename){ 5223 UNUSED_PARAMETER(NotUsed); 5224 return dlopen(zFilename, RTLD_NOW | RTLD_GLOBAL); 5225 } 5226 5227 /* 5228 ** SQLite calls this function immediately after a call to unixDlSym() or 5229 ** unixDlOpen() fails (returns a null pointer). If a more detailed error 5230 ** message is available, it is written to zBufOut. If no error message 5231 ** is available, zBufOut is left unmodified and SQLite uses a default 5232 ** error message. 5233 */ 5234 static void unixDlError(sqlite3_vfs *NotUsed, int nBuf, char *zBufOut){ 5235 const char *zErr; 5236 UNUSED_PARAMETER(NotUsed); 5237 unixEnterMutex(); 5238 zErr = dlerror(); 5239 if( zErr ){ 5240 sqlite3_snprintf(nBuf, zBufOut, "%s", zErr); 5241 } 5242 unixLeaveMutex(); 5243 } 5244 static void (*unixDlSym(sqlite3_vfs *NotUsed, void *p, const char*zSym))(void){ 5245 /* 5246 ** GCC with -pedantic-errors says that C90 does not allow a void* to be 5247 ** cast into a pointer to a function. And yet the library dlsym() routine 5248 ** returns a void* which is really a pointer to a function. So how do we 5249 ** use dlsym() with -pedantic-errors? 5250 ** 5251 ** Variable x below is defined to be a pointer to a function taking 5252 ** parameters void* and const char* and returning a pointer to a function. 5253 ** We initialize x by assigning it a pointer to the dlsym() function. 5254 ** (That assignment requires a cast.) Then we call the function that 5255 ** x points to. 5256 ** 5257 ** This work-around is unlikely to work correctly on any system where 5258 ** you really cannot cast a function pointer into void*. But then, on the 5259 ** other hand, dlsym() will not work on such a system either, so we have 5260 ** not really lost anything. 5261 */ 5262 void (*(*x)(void*,const char*))(void); 5263 UNUSED_PARAMETER(NotUsed); 5264 x = (void(*(*)(void*,const char*))(void))dlsym; 5265 return (*x)(p, zSym); 5266 } 5267 static void unixDlClose(sqlite3_vfs *NotUsed, void *pHandle){ 5268 UNUSED_PARAMETER(NotUsed); 5269 dlclose(pHandle); 5270 } 5271 #else /* if SQLITE_OMIT_LOAD_EXTENSION is defined: */ 5272 #define unixDlOpen 0 5273 #define unixDlError 0 5274 #define unixDlSym 0 5275 #define unixDlClose 0 5276 #endif 5277 5278 /* 5279 ** Write nBuf bytes of random data to the supplied buffer zBuf. 5280 */ 5281 static int unixRandomness(sqlite3_vfs *NotUsed, int nBuf, char *zBuf){ 5282 UNUSED_PARAMETER(NotUsed); 5283 assert((size_t)nBuf>=(sizeof(time_t)+sizeof(int))); 5284 5285 /* We have to initialize zBuf to prevent valgrind from reporting 5286 ** errors. The reports issued by valgrind are incorrect - we would 5287 ** prefer that the randomness be increased by making use of the 5288 ** uninitialized space in zBuf - but valgrind errors tend to worry 5289 ** some users. Rather than argue, it seems easier just to initialize 5290 ** the whole array and silence valgrind, even if that means less randomness 5291 ** in the random seed. 5292 ** 5293 ** When testing, initializing zBuf[] to zero is all we do. That means 5294 ** that we always use the same random number sequence. This makes the 5295 ** tests repeatable. 5296 */ 5297 memset(zBuf, 0, nBuf); 5298 #if !defined(SQLITE_TEST) 5299 { 5300 int pid, fd; 5301 fd = robust_open("/dev/urandom", O_RDONLY, 0); 5302 if( fd<0 ){ 5303 time_t t; 5304 time(&t); 5305 memcpy(zBuf, &t, sizeof(t)); 5306 pid = getpid(); 5307 memcpy(&zBuf[sizeof(t)], &pid, sizeof(pid)); 5308 assert( sizeof(t)+sizeof(pid)<=(size_t)nBuf ); 5309 nBuf = sizeof(t) + sizeof(pid); 5310 }else{ 5311 do{ nBuf = osRead(fd, zBuf, nBuf); }while( nBuf<0 && errno==EINTR ); 5312 robust_close(0, fd, __LINE__); 5313 } 5314 } 5315 #endif 5316 return nBuf; 5317 } 5318 5319 5320 /* 5321 ** Sleep for a little while. Return the amount of time slept. 5322 ** The argument is the number of microseconds we want to sleep. 5323 ** The return value is the number of microseconds of sleep actually 5324 ** requested from the underlying operating system, a number which 5325 ** might be greater than or equal to the argument, but not less 5326 ** than the argument. 5327 */ 5328 static int unixSleep(sqlite3_vfs *NotUsed, int microseconds){ 5329 #if OS_VXWORKS 5330 struct timespec sp; 5331 5332 sp.tv_sec = microseconds / 1000000; 5333 sp.tv_nsec = (microseconds % 1000000) * 1000; 5334 nanosleep(&sp, NULL); 5335 UNUSED_PARAMETER(NotUsed); 5336 return microseconds; 5337 #elif defined(HAVE_USLEEP) && HAVE_USLEEP 5338 usleep(microseconds); 5339 UNUSED_PARAMETER(NotUsed); 5340 return microseconds; 5341 #else 5342 int seconds = (microseconds+999999)/1000000; 5343 sleep(seconds); 5344 UNUSED_PARAMETER(NotUsed); 5345 return seconds*1000000; 5346 #endif 5347 } 5348 5349 /* 5350 ** The following variable, if set to a non-zero value, is interpreted as 5351 ** the number of seconds since 1970 and is used to set the result of 5352 ** sqlite3OsCurrentTime() during testing. 5353 */ 5354 #ifdef SQLITE_TEST 5355 int sqlite3_current_time = 0; /* Fake system time in seconds since 1970. */ 5356 #endif 5357 5358 /* 5359 ** Find the current time (in Universal Coordinated Time). Write into *piNow 5360 ** the current time and date as a Julian Day number times 86_400_000. In 5361 ** other words, write into *piNow the number of milliseconds since the Julian 5362 ** epoch of noon in Greenwich on November 24, 4714 B.C according to the 5363 ** proleptic Gregorian calendar. 5364 ** 5365 ** On success, return 0. Return 1 if the time and date cannot be found. 5366 */ 5367 static int unixCurrentTimeInt64(sqlite3_vfs *NotUsed, sqlite3_int64 *piNow){ 5368 static const sqlite3_int64 unixEpoch = 24405875*(sqlite3_int64)8640000; 5369 #if defined(NO_GETTOD) 5370 time_t t; 5371 time(&t); 5372 *piNow = ((sqlite3_int64)t)*1000 + unixEpoch; 5373 #elif OS_VXWORKS 5374 struct timespec sNow; 5375 clock_gettime(CLOCK_REALTIME, &sNow); 5376 *piNow = unixEpoch + 1000*(sqlite3_int64)sNow.tv_sec + sNow.tv_nsec/1000000; 5377 #else 5378 struct timeval sNow; 5379 gettimeofday(&sNow, 0); 5380 *piNow = unixEpoch + 1000*(sqlite3_int64)sNow.tv_sec + sNow.tv_usec/1000; 5381 #endif 5382 5383 #ifdef SQLITE_TEST 5384 if( sqlite3_current_time ){ 5385 *piNow = 1000*(sqlite3_int64)sqlite3_current_time + unixEpoch; 5386 } 5387 #endif 5388 UNUSED_PARAMETER(NotUsed); 5389 return 0; 5390 } 5391 5392 /* 5393 ** Find the current time (in Universal Coordinated Time). Write the 5394 ** current time and date as a Julian Day number into *prNow and 5395 ** return 0. Return 1 if the time and date cannot be found. 5396 */ 5397 static int unixCurrentTime(sqlite3_vfs *NotUsed, double *prNow){ 5398 sqlite3_int64 i; 5399 UNUSED_PARAMETER(NotUsed); 5400 unixCurrentTimeInt64(0, &i); 5401 *prNow = i/86400000.0; 5402 return 0; 5403 } 5404 5405 /* 5406 ** We added the xGetLastError() method with the intention of providing 5407 ** better low-level error messages when operating-system problems come up 5408 ** during SQLite operation. But so far, none of that has been implemented 5409 ** in the core. So this routine is never called. For now, it is merely 5410 ** a place-holder. 5411 */ 5412 static int unixGetLastError(sqlite3_vfs *NotUsed, int NotUsed2, char *NotUsed3){ 5413 UNUSED_PARAMETER(NotUsed); 5414 UNUSED_PARAMETER(NotUsed2); 5415 UNUSED_PARAMETER(NotUsed3); 5416 return 0; 5417 } 5418 5419 5420 /* 5421 ************************ End of sqlite3_vfs methods *************************** 5422 ******************************************************************************/ 5423 5424 /****************************************************************************** 5425 ************************** Begin Proxy Locking ******************************** 5426 ** 5427 ** Proxy locking is a "uber-locking-method" in this sense: It uses the 5428 ** other locking methods on secondary lock files. Proxy locking is a 5429 ** meta-layer over top of the primitive locking implemented above. For 5430 ** this reason, the division that implements of proxy locking is deferred 5431 ** until late in the file (here) after all of the other I/O methods have 5432 ** been defined - so that the primitive locking methods are available 5433 ** as services to help with the implementation of proxy locking. 5434 ** 5435 **** 5436 ** 5437 ** The default locking schemes in SQLite use byte-range locks on the 5438 ** database file to coordinate safe, concurrent access by multiple readers 5439 ** and writers [http://sqlite.org/lockingv3.html]. The five file locking 5440 ** states (UNLOCKED, PENDING, SHARED, RESERVED, EXCLUSIVE) are implemented 5441 ** as POSIX read & write locks over fixed set of locations (via fsctl), 5442 ** on AFP and SMB only exclusive byte-range locks are available via fsctl 5443 ** with _IOWR('z', 23, struct ByteRangeLockPB2) to track the same 5 states. 5444 ** To simulate a F_RDLCK on the shared range, on AFP a randomly selected 5445 ** address in the shared range is taken for a SHARED lock, the entire 5446 ** shared range is taken for an EXCLUSIVE lock): 5447 ** 5448 ** PENDING_BYTE 0x40000000 5449 ** RESERVED_BYTE 0x40000001 5450 ** SHARED_RANGE 0x40000002 -> 0x40000200 5451 ** 5452 ** This works well on the local file system, but shows a nearly 100x 5453 ** slowdown in read performance on AFP because the AFP client disables 5454 ** the read cache when byte-range locks are present. Enabling the read 5455 ** cache exposes a cache coherency problem that is present on all OS X 5456 ** supported network file systems. NFS and AFP both observe the 5457 ** close-to-open semantics for ensuring cache coherency 5458 ** [http://nfs.sourceforge.net/#faq_a8], which does not effectively 5459 ** address the requirements for concurrent database access by multiple 5460 ** readers and writers 5461 ** [http://www.nabble.com/SQLite-on-NFS-cache-coherency-td15655701.html]. 5462 ** 5463 ** To address the performance and cache coherency issues, proxy file locking 5464 ** changes the way database access is controlled by limiting access to a 5465 ** single host at a time and moving file locks off of the database file 5466 ** and onto a proxy file on the local file system. 5467 ** 5468 ** 5469 ** Using proxy locks 5470 ** ----------------- 5471 ** 5472 ** C APIs 5473 ** 5474 ** sqlite3_file_control(db, dbname, SQLITE_SET_LOCKPROXYFILE, 5475 ** <proxy_path> | ":auto:"); 5476 ** sqlite3_file_control(db, dbname, SQLITE_GET_LOCKPROXYFILE, &<proxy_path>); 5477 ** 5478 ** 5479 ** SQL pragmas 5480 ** 5481 ** PRAGMA [database.]lock_proxy_file=<proxy_path> | :auto: 5482 ** PRAGMA [database.]lock_proxy_file 5483 ** 5484 ** Specifying ":auto:" means that if there is a conch file with a matching 5485 ** host ID in it, the proxy path in the conch file will be used, otherwise 5486 ** a proxy path based on the user's temp dir 5487 ** (via confstr(_CS_DARWIN_USER_TEMP_DIR,...)) will be used and the 5488 ** actual proxy file name is generated from the name and path of the 5489 ** database file. For example: 5490 ** 5491 ** For database path "/Users/me/foo.db" 5492 ** The lock path will be "<tmpdir>/sqliteplocks/_Users_me_foo.db:auto:") 5493 ** 5494 ** Once a lock proxy is configured for a database connection, it can not 5495 ** be removed, however it may be switched to a different proxy path via 5496 ** the above APIs (assuming the conch file is not being held by another 5497 ** connection or process). 5498 ** 5499 ** 5500 ** How proxy locking works 5501 ** ----------------------- 5502 ** 5503 ** Proxy file locking relies primarily on two new supporting files: 5504 ** 5505 ** * conch file to limit access to the database file to a single host 5506 ** at a time 5507 ** 5508 ** * proxy file to act as a proxy for the advisory locks normally 5509 ** taken on the database 5510 ** 5511 ** The conch file - to use a proxy file, sqlite must first "hold the conch" 5512 ** by taking an sqlite-style shared lock on the conch file, reading the 5513 ** contents and comparing the host's unique host ID (see below) and lock 5514 ** proxy path against the values stored in the conch. The conch file is 5515 ** stored in the same directory as the database file and the file name 5516 ** is patterned after the database file name as ".<databasename>-conch". 5517 ** If the conch file does not exist, or it's contents do not match the 5518 ** host ID and/or proxy path, then the lock is escalated to an exclusive 5519 ** lock and the conch file contents is updated with the host ID and proxy 5520 ** path and the lock is downgraded to a shared lock again. If the conch 5521 ** is held by another process (with a shared lock), the exclusive lock 5522 ** will fail and SQLITE_BUSY is returned. 5523 ** 5524 ** The proxy file - a single-byte file used for all advisory file locks 5525 ** normally taken on the database file. This allows for safe sharing 5526 ** of the database file for multiple readers and writers on the same 5527 ** host (the conch ensures that they all use the same local lock file). 5528 ** 5529 ** Requesting the lock proxy does not immediately take the conch, it is 5530 ** only taken when the first request to lock database file is made. 5531 ** This matches the semantics of the traditional locking behavior, where 5532 ** opening a connection to a database file does not take a lock on it. 5533 ** The shared lock and an open file descriptor are maintained until 5534 ** the connection to the database is closed. 5535 ** 5536 ** The proxy file and the lock file are never deleted so they only need 5537 ** to be created the first time they are used. 5538 ** 5539 ** Configuration options 5540 ** --------------------- 5541 ** 5542 ** SQLITE_PREFER_PROXY_LOCKING 5543 ** 5544 ** Database files accessed on non-local file systems are 5545 ** automatically configured for proxy locking, lock files are 5546 ** named automatically using the same logic as 5547 ** PRAGMA lock_proxy_file=":auto:" 5548 ** 5549 ** SQLITE_PROXY_DEBUG 5550 ** 5551 ** Enables the logging of error messages during host id file 5552 ** retrieval and creation 5553 ** 5554 ** LOCKPROXYDIR 5555 ** 5556 ** Overrides the default directory used for lock proxy files that 5557 ** are named automatically via the ":auto:" setting 5558 ** 5559 ** SQLITE_DEFAULT_PROXYDIR_PERMISSIONS 5560 ** 5561 ** Permissions to use when creating a directory for storing the 5562 ** lock proxy files, only used when LOCKPROXYDIR is not set. 5563 ** 5564 ** 5565 ** As mentioned above, when compiled with SQLITE_PREFER_PROXY_LOCKING, 5566 ** setting the environment variable SQLITE_FORCE_PROXY_LOCKING to 1 will 5567 ** force proxy locking to be used for every database file opened, and 0 5568 ** will force automatic proxy locking to be disabled for all database 5569 ** files (explicity calling the SQLITE_SET_LOCKPROXYFILE pragma or 5570 ** sqlite_file_control API is not affected by SQLITE_FORCE_PROXY_LOCKING). 5571 */ 5572 5573 /* 5574 ** Proxy locking is only available on MacOSX 5575 */ 5576 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 5577 5578 /* 5579 ** The proxyLockingContext has the path and file structures for the remote 5580 ** and local proxy files in it 5581 */ 5582 typedef struct proxyLockingContext proxyLockingContext; 5583 struct proxyLockingContext { 5584 unixFile *conchFile; /* Open conch file */ 5585 char *conchFilePath; /* Name of the conch file */ 5586 unixFile *lockProxy; /* Open proxy lock file */ 5587 char *lockProxyPath; /* Name of the proxy lock file */ 5588 char *dbPath; /* Name of the open file */ 5589 int conchHeld; /* 1 if the conch is held, -1 if lockless */ 5590 void *oldLockingContext; /* Original lockingcontext to restore on close */ 5591 sqlite3_io_methods const *pOldMethod; /* Original I/O methods for close */ 5592 }; 5593 5594 /* 5595 ** The proxy lock file path for the database at dbPath is written into lPath, 5596 ** which must point to valid, writable memory large enough for a maxLen length 5597 ** file path. 5598 */ 5599 static int proxyGetLockPath(const char *dbPath, char *lPath, size_t maxLen){ 5600 int len; 5601 int dbLen; 5602 int i; 5603 5604 #ifdef LOCKPROXYDIR 5605 len = strlcpy(lPath, LOCKPROXYDIR, maxLen); 5606 #else 5607 # ifdef _CS_DARWIN_USER_TEMP_DIR 5608 { 5609 if( !confstr(_CS_DARWIN_USER_TEMP_DIR, lPath, maxLen) ){ 5610 OSTRACE(("GETLOCKPATH failed %s errno=%d pid=%d\n", 5611 lPath, errno, getpid())); 5612 return SQLITE_IOERR_LOCK; 5613 } 5614 len = strlcat(lPath, "sqliteplocks", maxLen); 5615 } 5616 # else 5617 len = strlcpy(lPath, "/tmp/", maxLen); 5618 # endif 5619 #endif 5620 5621 if( lPath[len-1]!='/' ){ 5622 len = strlcat(lPath, "/", maxLen); 5623 } 5624 5625 /* transform the db path to a unique cache name */ 5626 dbLen = (int)strlen(dbPath); 5627 for( i=0; i<dbLen && (i+len+7)<(int)maxLen; i++){ 5628 char c = dbPath[i]; 5629 lPath[i+len] = (c=='/')?'_':c; 5630 } 5631 lPath[i+len]='\0'; 5632 strlcat(lPath, ":auto:", maxLen); 5633 OSTRACE(("GETLOCKPATH proxy lock path=%s pid=%d\n", lPath, getpid())); 5634 return SQLITE_OK; 5635 } 5636 5637 /* 5638 ** Creates the lock file and any missing directories in lockPath 5639 */ 5640 static int proxyCreateLockPath(const char *lockPath){ 5641 int i, len; 5642 char buf[MAXPATHLEN]; 5643 int start = 0; 5644 5645 assert(lockPath!=NULL); 5646 /* try to create all the intermediate directories */ 5647 len = (int)strlen(lockPath); 5648 buf[0] = lockPath[0]; 5649 for( i=1; i<len; i++ ){ 5650 if( lockPath[i] == '/' && (i - start > 0) ){ 5651 /* only mkdir if leaf dir != "." or "/" or ".." */ 5652 if( i-start>2 || (i-start==1 && buf[start] != '.' && buf[start] != '/') 5653 || (i-start==2 && buf[start] != '.' && buf[start+1] != '.') ){ 5654 buf[i]='\0'; 5655 if( mkdir(buf, SQLITE_DEFAULT_PROXYDIR_PERMISSIONS) ){ 5656 int err=errno; 5657 if( err!=EEXIST ) { 5658 OSTRACE(("CREATELOCKPATH FAILED creating %s, " 5659 "'%s' proxy lock path=%s pid=%d\n", 5660 buf, strerror(err), lockPath, getpid())); 5661 return err; 5662 } 5663 } 5664 } 5665 start=i+1; 5666 } 5667 buf[i] = lockPath[i]; 5668 } 5669 OSTRACE(("CREATELOCKPATH proxy lock path=%s pid=%d\n", lockPath, getpid())); 5670 return 0; 5671 } 5672 5673 /* 5674 ** Create a new VFS file descriptor (stored in memory obtained from 5675 ** sqlite3_malloc) and open the file named "path" in the file descriptor. 5676 ** 5677 ** The caller is responsible not only for closing the file descriptor 5678 ** but also for freeing the memory associated with the file descriptor. 5679 */ 5680 static int proxyCreateUnixFile( 5681 const char *path, /* path for the new unixFile */ 5682 unixFile **ppFile, /* unixFile created and returned by ref */ 5683 int islockfile /* if non zero missing dirs will be created */ 5684 ) { 5685 int fd = -1; 5686 int dirfd = -1; 5687 unixFile *pNew; 5688 int rc = SQLITE_OK; 5689 int openFlags = O_RDWR | O_CREAT; 5690 sqlite3_vfs dummyVfs; 5691 int terrno = 0; 5692 UnixUnusedFd *pUnused = NULL; 5693 5694 /* 1. first try to open/create the file 5695 ** 2. if that fails, and this is a lock file (not-conch), try creating 5696 ** the parent directories and then try again. 5697 ** 3. if that fails, try to open the file read-only 5698 ** otherwise return BUSY (if lock file) or CANTOPEN for the conch file 5699 */ 5700 pUnused = findReusableFd(path, openFlags); 5701 if( pUnused ){ 5702 fd = pUnused->fd; 5703 }else{ 5704 pUnused = sqlite3_malloc(sizeof(*pUnused)); 5705 if( !pUnused ){ 5706 return SQLITE_NOMEM; 5707 } 5708 } 5709 if( fd<0 ){ 5710 fd = robust_open(path, openFlags, SQLITE_DEFAULT_FILE_PERMISSIONS); 5711 terrno = errno; 5712 if( fd<0 && errno==ENOENT && islockfile ){ 5713 if( proxyCreateLockPath(path) == SQLITE_OK ){ 5714 fd = robust_open(path, openFlags, SQLITE_DEFAULT_FILE_PERMISSIONS); 5715 } 5716 } 5717 } 5718 if( fd<0 ){ 5719 openFlags = O_RDONLY; 5720 fd = robust_open(path, openFlags, SQLITE_DEFAULT_FILE_PERMISSIONS); 5721 terrno = errno; 5722 } 5723 if( fd<0 ){ 5724 if( islockfile ){ 5725 return SQLITE_BUSY; 5726 } 5727 switch (terrno) { 5728 case EACCES: 5729 return SQLITE_PERM; 5730 case EIO: 5731 return SQLITE_IOERR_LOCK; /* even though it is the conch */ 5732 default: 5733 return SQLITE_CANTOPEN_BKPT; 5734 } 5735 } 5736 5737 pNew = (unixFile *)sqlite3_malloc(sizeof(*pNew)); 5738 if( pNew==NULL ){ 5739 rc = SQLITE_NOMEM; 5740 goto end_create_proxy; 5741 } 5742 memset(pNew, 0, sizeof(unixFile)); 5743 pNew->openFlags = openFlags; 5744 memset(&dummyVfs, 0, sizeof(dummyVfs)); 5745 dummyVfs.pAppData = (void*)&autolockIoFinder; 5746 dummyVfs.zName = "dummy"; 5747 pUnused->fd = fd; 5748 pUnused->flags = openFlags; 5749 pNew->pUnused = pUnused; 5750 5751 rc = fillInUnixFile(&dummyVfs, fd, dirfd, (sqlite3_file*)pNew, path, 0, 0, 0); 5752 if( rc==SQLITE_OK ){ 5753 *ppFile = pNew; 5754 return SQLITE_OK; 5755 } 5756 end_create_proxy: 5757 robust_close(pNew, fd, __LINE__); 5758 sqlite3_free(pNew); 5759 sqlite3_free(pUnused); 5760 return rc; 5761 } 5762 5763 #ifdef SQLITE_TEST 5764 /* simulate multiple hosts by creating unique hostid file paths */ 5765 int sqlite3_hostid_num = 0; 5766 #endif 5767 5768 #define PROXY_HOSTIDLEN 16 /* conch file host id length */ 5769 5770 /* Not always defined in the headers as it ought to be */ 5771 extern int gethostuuid(uuid_t id, const struct timespec *wait); 5772 5773 /* get the host ID via gethostuuid(), pHostID must point to PROXY_HOSTIDLEN 5774 ** bytes of writable memory. 5775 */ 5776 static int proxyGetHostID(unsigned char *pHostID, int *pError){ 5777 assert(PROXY_HOSTIDLEN == sizeof(uuid_t)); 5778 memset(pHostID, 0, PROXY_HOSTIDLEN); 5779 #if defined(__MAX_OS_X_VERSION_MIN_REQUIRED)\ 5780 && __MAC_OS_X_VERSION_MIN_REQUIRED<1050 5781 { 5782 static const struct timespec timeout = {1, 0}; /* 1 sec timeout */ 5783 if( gethostuuid(pHostID, &timeout) ){ 5784 int err = errno; 5785 if( pError ){ 5786 *pError = err; 5787 } 5788 return SQLITE_IOERR; 5789 } 5790 } 5791 #endif 5792 #ifdef SQLITE_TEST 5793 /* simulate multiple hosts by creating unique hostid file paths */ 5794 if( sqlite3_hostid_num != 0){ 5795 pHostID[0] = (char)(pHostID[0] + (char)(sqlite3_hostid_num & 0xFF)); 5796 } 5797 #endif 5798 5799 return SQLITE_OK; 5800 } 5801 5802 /* The conch file contains the header, host id and lock file path 5803 */ 5804 #define PROXY_CONCHVERSION 2 /* 1-byte header, 16-byte host id, path */ 5805 #define PROXY_HEADERLEN 1 /* conch file header length */ 5806 #define PROXY_PATHINDEX (PROXY_HEADERLEN+PROXY_HOSTIDLEN) 5807 #define PROXY_MAXCONCHLEN (PROXY_HEADERLEN+PROXY_HOSTIDLEN+MAXPATHLEN) 5808 5809 /* 5810 ** Takes an open conch file, copies the contents to a new path and then moves 5811 ** it back. The newly created file's file descriptor is assigned to the 5812 ** conch file structure and finally the original conch file descriptor is 5813 ** closed. Returns zero if successful. 5814 */ 5815 static int proxyBreakConchLock(unixFile *pFile, uuid_t myHostID){ 5816 proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; 5817 unixFile *conchFile = pCtx->conchFile; 5818 char tPath[MAXPATHLEN]; 5819 char buf[PROXY_MAXCONCHLEN]; 5820 char *cPath = pCtx->conchFilePath; 5821 size_t readLen = 0; 5822 size_t pathLen = 0; 5823 char errmsg[64] = ""; 5824 int fd = -1; 5825 int rc = -1; 5826 UNUSED_PARAMETER(myHostID); 5827 5828 /* create a new path by replace the trailing '-conch' with '-break' */ 5829 pathLen = strlcpy(tPath, cPath, MAXPATHLEN); 5830 if( pathLen>MAXPATHLEN || pathLen<6 || 5831 (strlcpy(&tPath[pathLen-5], "break", 6) != 5) ){ 5832 sqlite3_snprintf(sizeof(errmsg),errmsg,"path error (len %d)",(int)pathLen); 5833 goto end_breaklock; 5834 } 5835 /* read the conch content */ 5836 readLen = osPread(conchFile->h, buf, PROXY_MAXCONCHLEN, 0); 5837 if( readLen<PROXY_PATHINDEX ){ 5838 sqlite3_snprintf(sizeof(errmsg),errmsg,"read error (len %d)",(int)readLen); 5839 goto end_breaklock; 5840 } 5841 /* write it out to the temporary break file */ 5842 fd = robust_open(tPath, (O_RDWR|O_CREAT|O_EXCL), 5843 SQLITE_DEFAULT_FILE_PERMISSIONS); 5844 if( fd<0 ){ 5845 sqlite3_snprintf(sizeof(errmsg), errmsg, "create failed (%d)", errno); 5846 goto end_breaklock; 5847 } 5848 if( osPwrite(fd, buf, readLen, 0) != (ssize_t)readLen ){ 5849 sqlite3_snprintf(sizeof(errmsg), errmsg, "write failed (%d)", errno); 5850 goto end_breaklock; 5851 } 5852 if( rename(tPath, cPath) ){ 5853 sqlite3_snprintf(sizeof(errmsg), errmsg, "rename failed (%d)", errno); 5854 goto end_breaklock; 5855 } 5856 rc = 0; 5857 fprintf(stderr, "broke stale lock on %s\n", cPath); 5858 robust_close(pFile, conchFile->h, __LINE__); 5859 conchFile->h = fd; 5860 conchFile->openFlags = O_RDWR | O_CREAT; 5861 5862 end_breaklock: 5863 if( rc ){ 5864 if( fd>=0 ){ 5865 unlink(tPath); 5866 robust_close(pFile, fd, __LINE__); 5867 } 5868 fprintf(stderr, "failed to break stale lock on %s, %s\n", cPath, errmsg); 5869 } 5870 return rc; 5871 } 5872 5873 /* Take the requested lock on the conch file and break a stale lock if the 5874 ** host id matches. 5875 */ 5876 static int proxyConchLock(unixFile *pFile, uuid_t myHostID, int lockType){ 5877 proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; 5878 unixFile *conchFile = pCtx->conchFile; 5879 int rc = SQLITE_OK; 5880 int nTries = 0; 5881 struct timespec conchModTime; 5882 5883 do { 5884 rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, lockType); 5885 nTries ++; 5886 if( rc==SQLITE_BUSY ){ 5887 /* If the lock failed (busy): 5888 * 1st try: get the mod time of the conch, wait 0.5s and try again. 5889 * 2nd try: fail if the mod time changed or host id is different, wait 5890 * 10 sec and try again 5891 * 3rd try: break the lock unless the mod time has changed. 5892 */ 5893 struct stat buf; 5894 if( osFstat(conchFile->h, &buf) ){ 5895 pFile->lastErrno = errno; 5896 return SQLITE_IOERR_LOCK; 5897 } 5898 5899 if( nTries==1 ){ 5900 conchModTime = buf.st_mtimespec; 5901 usleep(500000); /* wait 0.5 sec and try the lock again*/ 5902 continue; 5903 } 5904 5905 assert( nTries>1 ); 5906 if( conchModTime.tv_sec != buf.st_mtimespec.tv_sec || 5907 conchModTime.tv_nsec != buf.st_mtimespec.tv_nsec ){ 5908 return SQLITE_BUSY; 5909 } 5910 5911 if( nTries==2 ){ 5912 char tBuf[PROXY_MAXCONCHLEN]; 5913 int len = osPread(conchFile->h, tBuf, PROXY_MAXCONCHLEN, 0); 5914 if( len<0 ){ 5915 pFile->lastErrno = errno; 5916 return SQLITE_IOERR_LOCK; 5917 } 5918 if( len>PROXY_PATHINDEX && tBuf[0]==(char)PROXY_CONCHVERSION){ 5919 /* don't break the lock if the host id doesn't match */ 5920 if( 0!=memcmp(&tBuf[PROXY_HEADERLEN], myHostID, PROXY_HOSTIDLEN) ){ 5921 return SQLITE_BUSY; 5922 } 5923 }else{ 5924 /* don't break the lock on short read or a version mismatch */ 5925 return SQLITE_BUSY; 5926 } 5927 usleep(10000000); /* wait 10 sec and try the lock again */ 5928 continue; 5929 } 5930 5931 assert( nTries==3 ); 5932 if( 0==proxyBreakConchLock(pFile, myHostID) ){ 5933 rc = SQLITE_OK; 5934 if( lockType==EXCLUSIVE_LOCK ){ 5935 rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, SHARED_LOCK); 5936 } 5937 if( !rc ){ 5938 rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, lockType); 5939 } 5940 } 5941 } 5942 } while( rc==SQLITE_BUSY && nTries<3 ); 5943 5944 return rc; 5945 } 5946 5947 /* Takes the conch by taking a shared lock and read the contents conch, if 5948 ** lockPath is non-NULL, the host ID and lock file path must match. A NULL 5949 ** lockPath means that the lockPath in the conch file will be used if the 5950 ** host IDs match, or a new lock path will be generated automatically 5951 ** and written to the conch file. 5952 */ 5953 static int proxyTakeConch(unixFile *pFile){ 5954 proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; 5955 5956 if( pCtx->conchHeld!=0 ){ 5957 return SQLITE_OK; 5958 }else{ 5959 unixFile *conchFile = pCtx->conchFile; 5960 uuid_t myHostID; 5961 int pError = 0; 5962 char readBuf[PROXY_MAXCONCHLEN]; 5963 char lockPath[MAXPATHLEN]; 5964 char *tempLockPath = NULL; 5965 int rc = SQLITE_OK; 5966 int createConch = 0; 5967 int hostIdMatch = 0; 5968 int readLen = 0; 5969 int tryOldLockPath = 0; 5970 int forceNewLockPath = 0; 5971 5972 OSTRACE(("TAKECONCH %d for %s pid=%d\n", conchFile->h, 5973 (pCtx->lockProxyPath ? pCtx->lockProxyPath : ":auto:"), getpid())); 5974 5975 rc = proxyGetHostID(myHostID, &pError); 5976 if( (rc&0xff)==SQLITE_IOERR ){ 5977 pFile->lastErrno = pError; 5978 goto end_takeconch; 5979 } 5980 rc = proxyConchLock(pFile, myHostID, SHARED_LOCK); 5981 if( rc!=SQLITE_OK ){ 5982 goto end_takeconch; 5983 } 5984 /* read the existing conch file */ 5985 readLen = seekAndRead((unixFile*)conchFile, 0, readBuf, PROXY_MAXCONCHLEN); 5986 if( readLen<0 ){ 5987 /* I/O error: lastErrno set by seekAndRead */ 5988 pFile->lastErrno = conchFile->lastErrno; 5989 rc = SQLITE_IOERR_READ; 5990 goto end_takeconch; 5991 }else if( readLen<=(PROXY_HEADERLEN+PROXY_HOSTIDLEN) || 5992 readBuf[0]!=(char)PROXY_CONCHVERSION ){ 5993 /* a short read or version format mismatch means we need to create a new 5994 ** conch file. 5995 */ 5996 createConch = 1; 5997 } 5998 /* if the host id matches and the lock path already exists in the conch 5999 ** we'll try to use the path there, if we can't open that path, we'll 6000 ** retry with a new auto-generated path 6001 */ 6002 do { /* in case we need to try again for an :auto: named lock file */ 6003 6004 if( !createConch && !forceNewLockPath ){ 6005 hostIdMatch = !memcmp(&readBuf[PROXY_HEADERLEN], myHostID, 6006 PROXY_HOSTIDLEN); 6007 /* if the conch has data compare the contents */ 6008 if( !pCtx->lockProxyPath ){ 6009 /* for auto-named local lock file, just check the host ID and we'll 6010 ** use the local lock file path that's already in there 6011 */ 6012 if( hostIdMatch ){ 6013 size_t pathLen = (readLen - PROXY_PATHINDEX); 6014 6015 if( pathLen>=MAXPATHLEN ){ 6016 pathLen=MAXPATHLEN-1; 6017 } 6018 memcpy(lockPath, &readBuf[PROXY_PATHINDEX], pathLen); 6019 lockPath[pathLen] = 0; 6020 tempLockPath = lockPath; 6021 tryOldLockPath = 1; 6022 /* create a copy of the lock path if the conch is taken */ 6023 goto end_takeconch; 6024 } 6025 }else if( hostIdMatch 6026 && !strncmp(pCtx->lockProxyPath, &readBuf[PROXY_PATHINDEX], 6027 readLen-PROXY_PATHINDEX) 6028 ){ 6029 /* conch host and lock path match */ 6030 goto end_takeconch; 6031 } 6032 } 6033 6034 /* if the conch isn't writable and doesn't match, we can't take it */ 6035 if( (conchFile->openFlags&O_RDWR) == 0 ){ 6036 rc = SQLITE_BUSY; 6037 goto end_takeconch; 6038 } 6039 6040 /* either the conch didn't match or we need to create a new one */ 6041 if( !pCtx->lockProxyPath ){ 6042 proxyGetLockPath(pCtx->dbPath, lockPath, MAXPATHLEN); 6043 tempLockPath = lockPath; 6044 /* create a copy of the lock path _only_ if the conch is taken */ 6045 } 6046 6047 /* update conch with host and path (this will fail if other process 6048 ** has a shared lock already), if the host id matches, use the big 6049 ** stick. 6050 */ 6051 futimes(conchFile->h, NULL); 6052 if( hostIdMatch && !createConch ){ 6053 if( conchFile->pInode && conchFile->pInode->nShared>1 ){ 6054 /* We are trying for an exclusive lock but another thread in this 6055 ** same process is still holding a shared lock. */ 6056 rc = SQLITE_BUSY; 6057 } else { 6058 rc = proxyConchLock(pFile, myHostID, EXCLUSIVE_LOCK); 6059 } 6060 }else{ 6061 rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, EXCLUSIVE_LOCK); 6062 } 6063 if( rc==SQLITE_OK ){ 6064 char writeBuffer[PROXY_MAXCONCHLEN]; 6065 int writeSize = 0; 6066 6067 writeBuffer[0] = (char)PROXY_CONCHVERSION; 6068 memcpy(&writeBuffer[PROXY_HEADERLEN], myHostID, PROXY_HOSTIDLEN); 6069 if( pCtx->lockProxyPath!=NULL ){ 6070 strlcpy(&writeBuffer[PROXY_PATHINDEX], pCtx->lockProxyPath, MAXPATHLEN); 6071 }else{ 6072 strlcpy(&writeBuffer[PROXY_PATHINDEX], tempLockPath, MAXPATHLEN); 6073 } 6074 writeSize = PROXY_PATHINDEX + strlen(&writeBuffer[PROXY_PATHINDEX]); 6075 robust_ftruncate(conchFile->h, writeSize); 6076 rc = unixWrite((sqlite3_file *)conchFile, writeBuffer, writeSize, 0); 6077 fsync(conchFile->h); 6078 /* If we created a new conch file (not just updated the contents of a 6079 ** valid conch file), try to match the permissions of the database 6080 */ 6081 if( rc==SQLITE_OK && createConch ){ 6082 struct stat buf; 6083 int err = osFstat(pFile->h, &buf); 6084 if( err==0 ){ 6085 mode_t cmode = buf.st_mode&(S_IRUSR|S_IWUSR | S_IRGRP|S_IWGRP | 6086 S_IROTH|S_IWOTH); 6087 /* try to match the database file R/W permissions, ignore failure */ 6088 #ifndef SQLITE_PROXY_DEBUG 6089 osFchmod(conchFile->h, cmode); 6090 #else 6091 do{ 6092 rc = osFchmod(conchFile->h, cmode); 6093 }while( rc==(-1) && errno==EINTR ); 6094 if( rc!=0 ){ 6095 int code = errno; 6096 fprintf(stderr, "fchmod %o FAILED with %d %s\n", 6097 cmode, code, strerror(code)); 6098 } else { 6099 fprintf(stderr, "fchmod %o SUCCEDED\n",cmode); 6100 } 6101 }else{ 6102 int code = errno; 6103 fprintf(stderr, "STAT FAILED[%d] with %d %s\n", 6104 err, code, strerror(code)); 6105 #endif 6106 } 6107 } 6108 } 6109 conchFile->pMethod->xUnlock((sqlite3_file*)conchFile, SHARED_LOCK); 6110 6111 end_takeconch: 6112 OSTRACE(("TRANSPROXY: CLOSE %d\n", pFile->h)); 6113 if( rc==SQLITE_OK && pFile->openFlags ){ 6114 if( pFile->h>=0 ){ 6115 robust_close(pFile, pFile->h, __LINE__); 6116 } 6117 pFile->h = -1; 6118 int fd = robust_open(pCtx->dbPath, pFile->openFlags, 6119 SQLITE_DEFAULT_FILE_PERMISSIONS); 6120 OSTRACE(("TRANSPROXY: OPEN %d\n", fd)); 6121 if( fd>=0 ){ 6122 pFile->h = fd; 6123 }else{ 6124 rc=SQLITE_CANTOPEN_BKPT; /* SQLITE_BUSY? proxyTakeConch called 6125 during locking */ 6126 } 6127 } 6128 if( rc==SQLITE_OK && !pCtx->lockProxy ){ 6129 char *path = tempLockPath ? tempLockPath : pCtx->lockProxyPath; 6130 rc = proxyCreateUnixFile(path, &pCtx->lockProxy, 1); 6131 if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM && tryOldLockPath ){ 6132 /* we couldn't create the proxy lock file with the old lock file path 6133 ** so try again via auto-naming 6134 */ 6135 forceNewLockPath = 1; 6136 tryOldLockPath = 0; 6137 continue; /* go back to the do {} while start point, try again */ 6138 } 6139 } 6140 if( rc==SQLITE_OK ){ 6141 /* Need to make a copy of path if we extracted the value 6142 ** from the conch file or the path was allocated on the stack 6143 */ 6144 if( tempLockPath ){ 6145 pCtx->lockProxyPath = sqlite3DbStrDup(0, tempLockPath); 6146 if( !pCtx->lockProxyPath ){ 6147 rc = SQLITE_NOMEM; 6148 } 6149 } 6150 } 6151 if( rc==SQLITE_OK ){ 6152 pCtx->conchHeld = 1; 6153 6154 if( pCtx->lockProxy->pMethod == &afpIoMethods ){ 6155 afpLockingContext *afpCtx; 6156 afpCtx = (afpLockingContext *)pCtx->lockProxy->lockingContext; 6157 afpCtx->dbPath = pCtx->lockProxyPath; 6158 } 6159 } else { 6160 conchFile->pMethod->xUnlock((sqlite3_file*)conchFile, NO_LOCK); 6161 } 6162 OSTRACE(("TAKECONCH %d %s\n", conchFile->h, 6163 rc==SQLITE_OK?"ok":"failed")); 6164 return rc; 6165 } while (1); /* in case we need to retry the :auto: lock file - 6166 ** we should never get here except via the 'continue' call. */ 6167 } 6168 } 6169 6170 /* 6171 ** If pFile holds a lock on a conch file, then release that lock. 6172 */ 6173 static int proxyReleaseConch(unixFile *pFile){ 6174 int rc = SQLITE_OK; /* Subroutine return code */ 6175 proxyLockingContext *pCtx; /* The locking context for the proxy lock */ 6176 unixFile *conchFile; /* Name of the conch file */ 6177 6178 pCtx = (proxyLockingContext *)pFile->lockingContext; 6179 conchFile = pCtx->conchFile; 6180 OSTRACE(("RELEASECONCH %d for %s pid=%d\n", conchFile->h, 6181 (pCtx->lockProxyPath ? pCtx->lockProxyPath : ":auto:"), 6182 getpid())); 6183 if( pCtx->conchHeld>0 ){ 6184 rc = conchFile->pMethod->xUnlock((sqlite3_file*)conchFile, NO_LOCK); 6185 } 6186 pCtx->conchHeld = 0; 6187 OSTRACE(("RELEASECONCH %d %s\n", conchFile->h, 6188 (rc==SQLITE_OK ? "ok" : "failed"))); 6189 return rc; 6190 } 6191 6192 /* 6193 ** Given the name of a database file, compute the name of its conch file. 6194 ** Store the conch filename in memory obtained from sqlite3_malloc(). 6195 ** Make *pConchPath point to the new name. Return SQLITE_OK on success 6196 ** or SQLITE_NOMEM if unable to obtain memory. 6197 ** 6198 ** The caller is responsible for ensuring that the allocated memory 6199 ** space is eventually freed. 6200 ** 6201 ** *pConchPath is set to NULL if a memory allocation error occurs. 6202 */ 6203 static int proxyCreateConchPathname(char *dbPath, char **pConchPath){ 6204 int i; /* Loop counter */ 6205 int len = (int)strlen(dbPath); /* Length of database filename - dbPath */ 6206 char *conchPath; /* buffer in which to construct conch name */ 6207 6208 /* Allocate space for the conch filename and initialize the name to 6209 ** the name of the original database file. */ 6210 *pConchPath = conchPath = (char *)sqlite3_malloc(len + 8); 6211 if( conchPath==0 ){ 6212 return SQLITE_NOMEM; 6213 } 6214 memcpy(conchPath, dbPath, len+1); 6215 6216 /* now insert a "." before the last / character */ 6217 for( i=(len-1); i>=0; i-- ){ 6218 if( conchPath[i]=='/' ){ 6219 i++; 6220 break; 6221 } 6222 } 6223 conchPath[i]='.'; 6224 while ( i<len ){ 6225 conchPath[i+1]=dbPath[i]; 6226 i++; 6227 } 6228 6229 /* append the "-conch" suffix to the file */ 6230 memcpy(&conchPath[i+1], "-conch", 7); 6231 assert( (int)strlen(conchPath) == len+7 ); 6232 6233 return SQLITE_OK; 6234 } 6235 6236 6237 /* Takes a fully configured proxy locking-style unix file and switches 6238 ** the local lock file path 6239 */ 6240 static int switchLockProxyPath(unixFile *pFile, const char *path) { 6241 proxyLockingContext *pCtx = (proxyLockingContext*)pFile->lockingContext; 6242 char *oldPath = pCtx->lockProxyPath; 6243 int rc = SQLITE_OK; 6244 6245 if( pFile->eFileLock!=NO_LOCK ){ 6246 return SQLITE_BUSY; 6247 } 6248 6249 /* nothing to do if the path is NULL, :auto: or matches the existing path */ 6250 if( !path || path[0]=='\0' || !strcmp(path, ":auto:") || 6251 (oldPath && !strncmp(oldPath, path, MAXPATHLEN)) ){ 6252 return SQLITE_OK; 6253 }else{ 6254 unixFile *lockProxy = pCtx->lockProxy; 6255 pCtx->lockProxy=NULL; 6256 pCtx->conchHeld = 0; 6257 if( lockProxy!=NULL ){ 6258 rc=lockProxy->pMethod->xClose((sqlite3_file *)lockProxy); 6259 if( rc ) return rc; 6260 sqlite3_free(lockProxy); 6261 } 6262 sqlite3_free(oldPath); 6263 pCtx->lockProxyPath = sqlite3DbStrDup(0, path); 6264 } 6265 6266 return rc; 6267 } 6268 6269 /* 6270 ** pFile is a file that has been opened by a prior xOpen call. dbPath 6271 ** is a string buffer at least MAXPATHLEN+1 characters in size. 6272 ** 6273 ** This routine find the filename associated with pFile and writes it 6274 ** int dbPath. 6275 */ 6276 static int proxyGetDbPathForUnixFile(unixFile *pFile, char *dbPath){ 6277 #if defined(__APPLE__) 6278 if( pFile->pMethod == &afpIoMethods ){ 6279 /* afp style keeps a reference to the db path in the filePath field 6280 ** of the struct */ 6281 assert( (int)strlen((char*)pFile->lockingContext)<=MAXPATHLEN ); 6282 strlcpy(dbPath, ((afpLockingContext *)pFile->lockingContext)->dbPath, MAXPATHLEN); 6283 } else 6284 #endif 6285 if( pFile->pMethod == &dotlockIoMethods ){ 6286 /* dot lock style uses the locking context to store the dot lock 6287 ** file path */ 6288 int len = strlen((char *)pFile->lockingContext) - strlen(DOTLOCK_SUFFIX); 6289 memcpy(dbPath, (char *)pFile->lockingContext, len + 1); 6290 }else{ 6291 /* all other styles use the locking context to store the db file path */ 6292 assert( strlen((char*)pFile->lockingContext)<=MAXPATHLEN ); 6293 strlcpy(dbPath, (char *)pFile->lockingContext, MAXPATHLEN); 6294 } 6295 return SQLITE_OK; 6296 } 6297 6298 /* 6299 ** Takes an already filled in unix file and alters it so all file locking 6300 ** will be performed on the local proxy lock file. The following fields 6301 ** are preserved in the locking context so that they can be restored and 6302 ** the unix structure properly cleaned up at close time: 6303 ** ->lockingContext 6304 ** ->pMethod 6305 */ 6306 static int proxyTransformUnixFile(unixFile *pFile, const char *path) { 6307 proxyLockingContext *pCtx; 6308 char dbPath[MAXPATHLEN+1]; /* Name of the database file */ 6309 char *lockPath=NULL; 6310 int rc = SQLITE_OK; 6311 6312 if( pFile->eFileLock!=NO_LOCK ){ 6313 return SQLITE_BUSY; 6314 } 6315 proxyGetDbPathForUnixFile(pFile, dbPath); 6316 if( !path || path[0]=='\0' || !strcmp(path, ":auto:") ){ 6317 lockPath=NULL; 6318 }else{ 6319 lockPath=(char *)path; 6320 } 6321 6322 OSTRACE(("TRANSPROXY %d for %s pid=%d\n", pFile->h, 6323 (lockPath ? lockPath : ":auto:"), getpid())); 6324 6325 pCtx = sqlite3_malloc( sizeof(*pCtx) ); 6326 if( pCtx==0 ){ 6327 return SQLITE_NOMEM; 6328 } 6329 memset(pCtx, 0, sizeof(*pCtx)); 6330 6331 rc = proxyCreateConchPathname(dbPath, &pCtx->conchFilePath); 6332 if( rc==SQLITE_OK ){ 6333 rc = proxyCreateUnixFile(pCtx->conchFilePath, &pCtx->conchFile, 0); 6334 if( rc==SQLITE_CANTOPEN && ((pFile->openFlags&O_RDWR) == 0) ){ 6335 /* if (a) the open flags are not O_RDWR, (b) the conch isn't there, and 6336 ** (c) the file system is read-only, then enable no-locking access. 6337 ** Ugh, since O_RDONLY==0x0000 we test for !O_RDWR since unixOpen asserts 6338 ** that openFlags will have only one of O_RDONLY or O_RDWR. 6339 */ 6340 struct statfs fsInfo; 6341 struct stat conchInfo; 6342 int goLockless = 0; 6343 6344 if( osStat(pCtx->conchFilePath, &conchInfo) == -1 ) { 6345 int err = errno; 6346 if( (err==ENOENT) && (statfs(dbPath, &fsInfo) != -1) ){ 6347 goLockless = (fsInfo.f_flags&MNT_RDONLY) == MNT_RDONLY; 6348 } 6349 } 6350 if( goLockless ){ 6351 pCtx->conchHeld = -1; /* read only FS/ lockless */ 6352 rc = SQLITE_OK; 6353 } 6354 } 6355 } 6356 if( rc==SQLITE_OK && lockPath ){ 6357 pCtx->lockProxyPath = sqlite3DbStrDup(0, lockPath); 6358 } 6359 6360 if( rc==SQLITE_OK ){ 6361 pCtx->dbPath = sqlite3DbStrDup(0, dbPath); 6362 if( pCtx->dbPath==NULL ){ 6363 rc = SQLITE_NOMEM; 6364 } 6365 } 6366 if( rc==SQLITE_OK ){ 6367 /* all memory is allocated, proxys are created and assigned, 6368 ** switch the locking context and pMethod then return. 6369 */ 6370 pCtx->oldLockingContext = pFile->lockingContext; 6371 pFile->lockingContext = pCtx; 6372 pCtx->pOldMethod = pFile->pMethod; 6373 pFile->pMethod = &proxyIoMethods; 6374 }else{ 6375 if( pCtx->conchFile ){ 6376 pCtx->conchFile->pMethod->xClose((sqlite3_file *)pCtx->conchFile); 6377 sqlite3_free(pCtx->conchFile); 6378 } 6379 sqlite3DbFree(0, pCtx->lockProxyPath); 6380 sqlite3_free(pCtx->conchFilePath); 6381 sqlite3_free(pCtx); 6382 } 6383 OSTRACE(("TRANSPROXY %d %s\n", pFile->h, 6384 (rc==SQLITE_OK ? "ok" : "failed"))); 6385 return rc; 6386 } 6387 6388 6389 /* 6390 ** This routine handles sqlite3_file_control() calls that are specific 6391 ** to proxy locking. 6392 */ 6393 static int proxyFileControl(sqlite3_file *id, int op, void *pArg){ 6394 switch( op ){ 6395 case SQLITE_GET_LOCKPROXYFILE: { 6396 unixFile *pFile = (unixFile*)id; 6397 if( pFile->pMethod == &proxyIoMethods ){ 6398 proxyLockingContext *pCtx = (proxyLockingContext*)pFile->lockingContext; 6399 proxyTakeConch(pFile); 6400 if( pCtx->lockProxyPath ){ 6401 *(const char **)pArg = pCtx->lockProxyPath; 6402 }else{ 6403 *(const char **)pArg = ":auto: (not held)"; 6404 } 6405 } else { 6406 *(const char **)pArg = NULL; 6407 } 6408 return SQLITE_OK; 6409 } 6410 case SQLITE_SET_LOCKPROXYFILE: { 6411 unixFile *pFile = (unixFile*)id; 6412 int rc = SQLITE_OK; 6413 int isProxyStyle = (pFile->pMethod == &proxyIoMethods); 6414 if( pArg==NULL || (const char *)pArg==0 ){ 6415 if( isProxyStyle ){ 6416 /* turn off proxy locking - not supported */ 6417 rc = SQLITE_ERROR /*SQLITE_PROTOCOL? SQLITE_MISUSE?*/; 6418 }else{ 6419 /* turn off proxy locking - already off - NOOP */ 6420 rc = SQLITE_OK; 6421 } 6422 }else{ 6423 const char *proxyPath = (const char *)pArg; 6424 if( isProxyStyle ){ 6425 proxyLockingContext *pCtx = 6426 (proxyLockingContext*)pFile->lockingContext; 6427 if( !strcmp(pArg, ":auto:") 6428 || (pCtx->lockProxyPath && 6429 !strncmp(pCtx->lockProxyPath, proxyPath, MAXPATHLEN)) 6430 ){ 6431 rc = SQLITE_OK; 6432 }else{ 6433 rc = switchLockProxyPath(pFile, proxyPath); 6434 } 6435 }else{ 6436 /* turn on proxy file locking */ 6437 rc = proxyTransformUnixFile(pFile, proxyPath); 6438 } 6439 } 6440 return rc; 6441 } 6442 default: { 6443 assert( 0 ); /* The call assures that only valid opcodes are sent */ 6444 } 6445 } 6446 /*NOTREACHED*/ 6447 return SQLITE_ERROR; 6448 } 6449 6450 /* 6451 ** Within this division (the proxying locking implementation) the procedures 6452 ** above this point are all utilities. The lock-related methods of the 6453 ** proxy-locking sqlite3_io_method object follow. 6454 */ 6455 6456 6457 /* 6458 ** This routine checks if there is a RESERVED lock held on the specified 6459 ** file by this or any other process. If such a lock is held, set *pResOut 6460 ** to a non-zero value otherwise *pResOut is set to zero. The return value 6461 ** is set to SQLITE_OK unless an I/O error occurs during lock checking. 6462 */ 6463 static int proxyCheckReservedLock(sqlite3_file *id, int *pResOut) { 6464 unixFile *pFile = (unixFile*)id; 6465 int rc = proxyTakeConch(pFile); 6466 if( rc==SQLITE_OK ){ 6467 proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; 6468 if( pCtx->conchHeld>0 ){ 6469 unixFile *proxy = pCtx->lockProxy; 6470 return proxy->pMethod->xCheckReservedLock((sqlite3_file*)proxy, pResOut); 6471 }else{ /* conchHeld < 0 is lockless */ 6472 pResOut=0; 6473 } 6474 } 6475 return rc; 6476 } 6477 6478 /* 6479 ** Lock the file with the lock specified by parameter eFileLock - one 6480 ** of the following: 6481 ** 6482 ** (1) SHARED_LOCK 6483 ** (2) RESERVED_LOCK 6484 ** (3) PENDING_LOCK 6485 ** (4) EXCLUSIVE_LOCK 6486 ** 6487 ** Sometimes when requesting one lock state, additional lock states 6488 ** are inserted in between. The locking might fail on one of the later 6489 ** transitions leaving the lock state different from what it started but 6490 ** still short of its goal. The following chart shows the allowed 6491 ** transitions and the inserted intermediate states: 6492 ** 6493 ** UNLOCKED -> SHARED 6494 ** SHARED -> RESERVED 6495 ** SHARED -> (PENDING) -> EXCLUSIVE 6496 ** RESERVED -> (PENDING) -> EXCLUSIVE 6497 ** PENDING -> EXCLUSIVE 6498 ** 6499 ** This routine will only increase a lock. Use the sqlite3OsUnlock() 6500 ** routine to lower a locking level. 6501 */ 6502 static int proxyLock(sqlite3_file *id, int eFileLock) { 6503 unixFile *pFile = (unixFile*)id; 6504 int rc = proxyTakeConch(pFile); 6505 if( rc==SQLITE_OK ){ 6506 proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; 6507 if( pCtx->conchHeld>0 ){ 6508 unixFile *proxy = pCtx->lockProxy; 6509 rc = proxy->pMethod->xLock((sqlite3_file*)proxy, eFileLock); 6510 pFile->eFileLock = proxy->eFileLock; 6511 }else{ 6512 /* conchHeld < 0 is lockless */ 6513 } 6514 } 6515 return rc; 6516 } 6517 6518 6519 /* 6520 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock 6521 ** must be either NO_LOCK or SHARED_LOCK. 6522 ** 6523 ** If the locking level of the file descriptor is already at or below 6524 ** the requested locking level, this routine is a no-op. 6525 */ 6526 static int proxyUnlock(sqlite3_file *id, int eFileLock) { 6527 unixFile *pFile = (unixFile*)id; 6528 int rc = proxyTakeConch(pFile); 6529 if( rc==SQLITE_OK ){ 6530 proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; 6531 if( pCtx->conchHeld>0 ){ 6532 unixFile *proxy = pCtx->lockProxy; 6533 rc = proxy->pMethod->xUnlock((sqlite3_file*)proxy, eFileLock); 6534 pFile->eFileLock = proxy->eFileLock; 6535 }else{ 6536 /* conchHeld < 0 is lockless */ 6537 } 6538 } 6539 return rc; 6540 } 6541 6542 /* 6543 ** Close a file that uses proxy locks. 6544 */ 6545 static int proxyClose(sqlite3_file *id) { 6546 if( id ){ 6547 unixFile *pFile = (unixFile*)id; 6548 proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; 6549 unixFile *lockProxy = pCtx->lockProxy; 6550 unixFile *conchFile = pCtx->conchFile; 6551 int rc = SQLITE_OK; 6552 6553 if( lockProxy ){ 6554 rc = lockProxy->pMethod->xUnlock((sqlite3_file*)lockProxy, NO_LOCK); 6555 if( rc ) return rc; 6556 rc = lockProxy->pMethod->xClose((sqlite3_file*)lockProxy); 6557 if( rc ) return rc; 6558 sqlite3_free(lockProxy); 6559 pCtx->lockProxy = 0; 6560 } 6561 if( conchFile ){ 6562 if( pCtx->conchHeld ){ 6563 rc = proxyReleaseConch(pFile); 6564 if( rc ) return rc; 6565 } 6566 rc = conchFile->pMethod->xClose((sqlite3_file*)conchFile); 6567 if( rc ) return rc; 6568 sqlite3_free(conchFile); 6569 } 6570 sqlite3DbFree(0, pCtx->lockProxyPath); 6571 sqlite3_free(pCtx->conchFilePath); 6572 sqlite3DbFree(0, pCtx->dbPath); 6573 /* restore the original locking context and pMethod then close it */ 6574 pFile->lockingContext = pCtx->oldLockingContext; 6575 pFile->pMethod = pCtx->pOldMethod; 6576 sqlite3_free(pCtx); 6577 return pFile->pMethod->xClose(id); 6578 } 6579 return SQLITE_OK; 6580 } 6581 6582 6583 6584 #endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ 6585 /* 6586 ** The proxy locking style is intended for use with AFP filesystems. 6587 ** And since AFP is only supported on MacOSX, the proxy locking is also 6588 ** restricted to MacOSX. 6589 ** 6590 ** 6591 ******************* End of the proxy lock implementation ********************** 6592 ******************************************************************************/ 6593 6594 /* 6595 ** Initialize the operating system interface. 6596 ** 6597 ** This routine registers all VFS implementations for unix-like operating 6598 ** systems. This routine, and the sqlite3_os_end() routine that follows, 6599 ** should be the only routines in this file that are visible from other 6600 ** files. 6601 ** 6602 ** This routine is called once during SQLite initialization and by a 6603 ** single thread. The memory allocation and mutex subsystems have not 6604 ** necessarily been initialized when this routine is called, and so they 6605 ** should not be used. 6606 */ 6607 int sqlite3_os_init(void){ 6608 /* 6609 ** The following macro defines an initializer for an sqlite3_vfs object. 6610 ** The name of the VFS is NAME. The pAppData is a pointer to a pointer 6611 ** to the "finder" function. (pAppData is a pointer to a pointer because 6612 ** silly C90 rules prohibit a void* from being cast to a function pointer 6613 ** and so we have to go through the intermediate pointer to avoid problems 6614 ** when compiling with -pedantic-errors on GCC.) 6615 ** 6616 ** The FINDER parameter to this macro is the name of the pointer to the 6617 ** finder-function. The finder-function returns a pointer to the 6618 ** sqlite_io_methods object that implements the desired locking 6619 ** behaviors. See the division above that contains the IOMETHODS 6620 ** macro for addition information on finder-functions. 6621 ** 6622 ** Most finders simply return a pointer to a fixed sqlite3_io_methods 6623 ** object. But the "autolockIoFinder" available on MacOSX does a little 6624 ** more than that; it looks at the filesystem type that hosts the 6625 ** database file and tries to choose an locking method appropriate for 6626 ** that filesystem time. 6627 */ 6628 #define UNIXVFS(VFSNAME, FINDER) { \ 6629 3, /* iVersion */ \ 6630 sizeof(unixFile), /* szOsFile */ \ 6631 MAX_PATHNAME, /* mxPathname */ \ 6632 0, /* pNext */ \ 6633 VFSNAME, /* zName */ \ 6634 (void*)&FINDER, /* pAppData */ \ 6635 unixOpen, /* xOpen */ \ 6636 unixDelete, /* xDelete */ \ 6637 unixAccess, /* xAccess */ \ 6638 unixFullPathname, /* xFullPathname */ \ 6639 unixDlOpen, /* xDlOpen */ \ 6640 unixDlError, /* xDlError */ \ 6641 unixDlSym, /* xDlSym */ \ 6642 unixDlClose, /* xDlClose */ \ 6643 unixRandomness, /* xRandomness */ \ 6644 unixSleep, /* xSleep */ \ 6645 unixCurrentTime, /* xCurrentTime */ \ 6646 unixGetLastError, /* xGetLastError */ \ 6647 unixCurrentTimeInt64, /* xCurrentTimeInt64 */ \ 6648 unixSetSystemCall, /* xSetSystemCall */ \ 6649 unixGetSystemCall, /* xGetSystemCall */ \ 6650 unixNextSystemCall, /* xNextSystemCall */ \ 6651 } 6652 6653 /* 6654 ** All default VFSes for unix are contained in the following array. 6655 ** 6656 ** Note that the sqlite3_vfs.pNext field of the VFS object is modified 6657 ** by the SQLite core when the VFS is registered. So the following 6658 ** array cannot be const. 6659 */ 6660 static sqlite3_vfs aVfs[] = { 6661 #if SQLITE_ENABLE_LOCKING_STYLE && (OS_VXWORKS || defined(__APPLE__)) 6662 UNIXVFS("unix", autolockIoFinder ), 6663 #else 6664 UNIXVFS("unix", posixIoFinder ), 6665 #endif 6666 UNIXVFS("unix-none", nolockIoFinder ), 6667 UNIXVFS("unix-dotfile", dotlockIoFinder ), 6668 UNIXVFS("unix-excl", posixIoFinder ), 6669 #if OS_VXWORKS 6670 UNIXVFS("unix-namedsem", semIoFinder ), 6671 #endif 6672 #if SQLITE_ENABLE_LOCKING_STYLE 6673 UNIXVFS("unix-posix", posixIoFinder ), 6674 #if !OS_VXWORKS 6675 UNIXVFS("unix-flock", flockIoFinder ), 6676 #endif 6677 #endif 6678 #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) 6679 UNIXVFS("unix-afp", afpIoFinder ), 6680 UNIXVFS("unix-nfs", nfsIoFinder ), 6681 UNIXVFS("unix-proxy", proxyIoFinder ), 6682 #endif 6683 }; 6684 unsigned int i; /* Loop counter */ 6685 6686 /* Register all VFSes defined in the aVfs[] array */ 6687 for(i=0; i<(sizeof(aVfs)/sizeof(sqlite3_vfs)); i++){ 6688 sqlite3_vfs_register(&aVfs[i], i==0); 6689 } 6690 return SQLITE_OK; 6691 } 6692 6693 /* 6694 ** Shutdown the operating system interface. 6695 ** 6696 ** Some operating systems might need to do some cleanup in this routine, 6697 ** to release dynamically allocated objects. But not on unix. 6698 ** This routine is a no-op for unix. 6699 */ 6700 int sqlite3_os_end(void){ 6701 return SQLITE_OK; 6702 } 6703 6704 #endif /* SQLITE_OS_UNIX */ 6705