1 /* 2 ** 2004 May 22 3 ** 4 ** The author disclaims copyright to this source code. In place of 5 ** a legal notice, here is a blessing: 6 ** 7 ** May you do good and not evil. 8 ** May you find forgiveness for yourself and forgive others. 9 ** May you share freely, never taking more than you give. 10 ** 11 ****************************************************************************** 12 ** 13 ** This file contains the VFS implementation for unix-like operating systems 14 ** include Linux, MacOSX, *BSD, QNX, VxWorks, AIX, HPUX, and others. 15 ** 16 ** There are actually several different VFS implementations in this file. 17 ** The differences are in the way that file locking is done. The default 18 ** implementation uses Posix Advisory Locks. Alternative implementations 19 ** use flock(), dot-files, various proprietary locking schemas, or simply 20 ** skip locking all together. 21 ** 22 ** This source file is organized into divisions where the logic for various 23 ** subfunctions is contained within the appropriate division. PLEASE 24 ** KEEP THE STRUCTURE OF THIS FILE INTACT. New code should be placed 25 ** in the correct division and should be clearly labeled. 26 ** 27 ** The layout of divisions is as follows: 28 ** 29 ** * General-purpose declarations and utility functions. 30 ** * Unique file ID logic used by VxWorks. 31 ** * Various locking primitive implementations (all except proxy locking): 32 ** + for Posix Advisory Locks 33 ** + for no-op locks 34 ** + for dot-file locks 35 ** + for flock() locking 36 ** + for named semaphore locks (VxWorks only) 37 ** + for AFP filesystem locks (MacOSX only) 38 ** * sqlite3_file methods not associated with locking. 39 ** * Definitions of sqlite3_io_methods objects for all locking 40 ** methods plus "finder" functions for each locking method. 41 ** * sqlite3_vfs method implementations. 42 ** * Locking primitives for the proxy uber-locking-method. (MacOSX only) 43 ** * Definitions of sqlite3_vfs objects for all locking methods 44 ** plus implementations of sqlite3_os_init() and sqlite3_os_end(). 45 */ 46 #include "sqliteInt.h" 47 #if SQLITE_OS_UNIX /* This file is used on unix only */ 48 49 /* 50 ** There are various methods for file locking used for concurrency 51 ** control: 52 ** 53 ** 1. POSIX locking (the default), 54 ** 2. No locking, 55 ** 3. Dot-file locking, 56 ** 4. flock() locking, 57 ** 5. AFP locking (OSX only), 58 ** 6. Named POSIX semaphores (VXWorks only), 59 ** 7. proxy locking. (OSX only) 60 ** 61 ** Styles 4, 5, and 7 are only available of SQLITE_ENABLE_LOCKING_STYLE 62 ** is defined to 1. The SQLITE_ENABLE_LOCKING_STYLE also enables automatic 63 ** selection of the appropriate locking style based on the filesystem 64 ** where the database is located. 65 */ 66 #if !defined(SQLITE_ENABLE_LOCKING_STYLE) 67 # if defined(__APPLE__) 68 # define SQLITE_ENABLE_LOCKING_STYLE 1 69 # else 70 # define SQLITE_ENABLE_LOCKING_STYLE 0 71 # endif 72 #endif 73 74 /* Use pread() and pwrite() if they are available */ 75 #if defined(__APPLE__) 76 # define HAVE_PREAD 1 77 # define HAVE_PWRITE 1 78 #endif 79 #if defined(HAVE_PREAD64) && defined(HAVE_PWRITE64) 80 # undef USE_PREAD 81 # define USE_PREAD64 1 82 #elif defined(HAVE_PREAD) && defined(HAVE_PWRITE) 83 # undef USE_PREAD64 84 # define USE_PREAD 1 85 #endif 86 87 /* 88 ** standard include files. 89 */ 90 #include <sys/types.h> 91 #include <sys/stat.h> 92 #include <fcntl.h> 93 #include <unistd.h> 94 #include <time.h> 95 #include <sys/time.h> 96 #include <errno.h> 97 #if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 98 # include <sys/mman.h> 99 #endif 100 101 #if SQLITE_ENABLE_LOCKING_STYLE 102 # include <sys/ioctl.h> 103 # include <sys/file.h> 104 # include <sys/param.h> 105 #endif /* SQLITE_ENABLE_LOCKING_STYLE */ 106 107 #if defined(__APPLE__) && ((__MAC_OS_X_VERSION_MIN_REQUIRED > 1050) || \ 108 (__IPHONE_OS_VERSION_MIN_REQUIRED > 2000)) 109 # if (!defined(TARGET_OS_EMBEDDED) || (TARGET_OS_EMBEDDED==0)) \ 110 && (!defined(TARGET_IPHONE_SIMULATOR) || (TARGET_IPHONE_SIMULATOR==0)) 111 # define HAVE_GETHOSTUUID 1 112 # else 113 # warning "gethostuuid() is disabled." 114 # endif 115 #endif 116 117 118 #if OS_VXWORKS 119 # include <sys/ioctl.h> 120 # include <semaphore.h> 121 # include <limits.h> 122 #endif /* OS_VXWORKS */ 123 124 #if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE 125 # include <sys/mount.h> 126 #endif 127 128 #ifdef HAVE_UTIME 129 # include <utime.h> 130 #endif 131 132 /* 133 ** Allowed values of unixFile.fsFlags 134 */ 135 #define SQLITE_FSFLAGS_IS_MSDOS 0x1 136 137 /* 138 ** If we are to be thread-safe, include the pthreads header and define 139 ** the SQLITE_UNIX_THREADS macro. 140 */ 141 #if SQLITE_THREADSAFE 142 # include <pthread.h> 143 # define SQLITE_UNIX_THREADS 1 144 #endif 145 146 /* 147 ** Default permissions when creating a new file 148 */ 149 #ifndef SQLITE_DEFAULT_FILE_PERMISSIONS 150 # define SQLITE_DEFAULT_FILE_PERMISSIONS 0644 151 #endif 152 153 /* 154 ** Default permissions when creating auto proxy dir 155 */ 156 #ifndef SQLITE_DEFAULT_PROXYDIR_PERMISSIONS 157 # define SQLITE_DEFAULT_PROXYDIR_PERMISSIONS 0755 158 #endif 159 160 /* 161 ** Maximum supported path-length. 162 */ 163 #define MAX_PATHNAME 512 164 165 /* 166 ** Maximum supported symbolic links 167 */ 168 #define SQLITE_MAX_SYMLINKS 100 169 170 /* Always cast the getpid() return type for compatibility with 171 ** kernel modules in VxWorks. */ 172 #define osGetpid(X) (pid_t)getpid() 173 174 /* 175 ** Only set the lastErrno if the error code is a real error and not 176 ** a normal expected return code of SQLITE_BUSY or SQLITE_OK 177 */ 178 #define IS_LOCK_ERROR(x) ((x != SQLITE_OK) && (x != SQLITE_BUSY)) 179 180 /* Forward references */ 181 typedef struct unixShm unixShm; /* Connection shared memory */ 182 typedef struct unixShmNode unixShmNode; /* Shared memory instance */ 183 typedef struct unixInodeInfo unixInodeInfo; /* An i-node */ 184 typedef struct UnixUnusedFd UnixUnusedFd; /* An unused file descriptor */ 185 186 /* 187 ** Sometimes, after a file handle is closed by SQLite, the file descriptor 188 ** cannot be closed immediately. In these cases, instances of the following 189 ** structure are used to store the file descriptor while waiting for an 190 ** opportunity to either close or reuse it. 191 */ 192 struct UnixUnusedFd { 193 int fd; /* File descriptor to close */ 194 int flags; /* Flags this file descriptor was opened with */ 195 UnixUnusedFd *pNext; /* Next unused file descriptor on same file */ 196 }; 197 198 /* 199 ** The unixFile structure is subclass of sqlite3_file specific to the unix 200 ** VFS implementations. 201 */ 202 typedef struct unixFile unixFile; 203 struct unixFile { 204 sqlite3_io_methods const *pMethod; /* Always the first entry */ 205 sqlite3_vfs *pVfs; /* The VFS that created this unixFile */ 206 unixInodeInfo *pInode; /* Info about locks on this inode */ 207 int h; /* The file descriptor */ 208 unsigned char eFileLock; /* The type of lock held on this fd */ 209 unsigned short int ctrlFlags; /* Behavioral bits. UNIXFILE_* flags */ 210 int lastErrno; /* The unix errno from last I/O error */ 211 void *lockingContext; /* Locking style specific state */ 212 UnixUnusedFd *pUnused; /* Pre-allocated UnixUnusedFd */ 213 const char *zPath; /* Name of the file */ 214 unixShm *pShm; /* Shared memory segment information */ 215 int szChunk; /* Configured by FCNTL_CHUNK_SIZE */ 216 #if SQLITE_MAX_MMAP_SIZE>0 217 int nFetchOut; /* Number of outstanding xFetch refs */ 218 sqlite3_int64 mmapSize; /* Usable size of mapping at pMapRegion */ 219 sqlite3_int64 mmapSizeActual; /* Actual size of mapping at pMapRegion */ 220 sqlite3_int64 mmapSizeMax; /* Configured FCNTL_MMAP_SIZE value */ 221 void *pMapRegion; /* Memory mapped region */ 222 #endif 223 #ifdef __QNXNTO__ 224 int sectorSize; /* Device sector size */ 225 int deviceCharacteristics; /* Precomputed device characteristics */ 226 #endif 227 #if SQLITE_ENABLE_LOCKING_STYLE 228 int openFlags; /* The flags specified at open() */ 229 #endif 230 #if SQLITE_ENABLE_LOCKING_STYLE || defined(__APPLE__) 231 unsigned fsFlags; /* cached details from statfs() */ 232 #endif 233 #if OS_VXWORKS 234 struct vxworksFileId *pId; /* Unique file ID */ 235 #endif 236 #ifdef SQLITE_DEBUG 237 /* The next group of variables are used to track whether or not the 238 ** transaction counter in bytes 24-27 of database files are updated 239 ** whenever any part of the database changes. An assertion fault will 240 ** occur if a file is updated without also updating the transaction 241 ** counter. This test is made to avoid new problems similar to the 242 ** one described by ticket #3584. 243 */ 244 unsigned char transCntrChng; /* True if the transaction counter changed */ 245 unsigned char dbUpdate; /* True if any part of database file changed */ 246 unsigned char inNormalWrite; /* True if in a normal write operation */ 247 248 #endif 249 250 #ifdef SQLITE_TEST 251 /* In test mode, increase the size of this structure a bit so that 252 ** it is larger than the struct CrashFile defined in test6.c. 253 */ 254 char aPadding[32]; 255 #endif 256 }; 257 258 /* This variable holds the process id (pid) from when the xRandomness() 259 ** method was called. If xOpen() is called from a different process id, 260 ** indicating that a fork() has occurred, the PRNG will be reset. 261 */ 262 static pid_t randomnessPid = 0; 263 264 /* 265 ** Allowed values for the unixFile.ctrlFlags bitmask: 266 */ 267 #define UNIXFILE_EXCL 0x01 /* Connections from one process only */ 268 #define UNIXFILE_RDONLY 0x02 /* Connection is read only */ 269 #define UNIXFILE_PERSIST_WAL 0x04 /* Persistent WAL mode */ 270 #ifndef SQLITE_DISABLE_DIRSYNC 271 # define UNIXFILE_DIRSYNC 0x08 /* Directory sync needed */ 272 #else 273 # define UNIXFILE_DIRSYNC 0x00 274 #endif 275 #define UNIXFILE_PSOW 0x10 /* SQLITE_IOCAP_POWERSAFE_OVERWRITE */ 276 #define UNIXFILE_DELETE 0x20 /* Delete on close */ 277 #define UNIXFILE_URI 0x40 /* Filename might have query parameters */ 278 #define UNIXFILE_NOLOCK 0x80 /* Do no file locking */ 279 280 /* 281 ** Include code that is common to all os_*.c files 282 */ 283 #include "os_common.h" 284 285 /* 286 ** Define various macros that are missing from some systems. 287 */ 288 #ifndef O_LARGEFILE 289 # define O_LARGEFILE 0 290 #endif 291 #ifdef SQLITE_DISABLE_LFS 292 # undef O_LARGEFILE 293 # define O_LARGEFILE 0 294 #endif 295 #ifndef O_NOFOLLOW 296 # define O_NOFOLLOW 0 297 #endif 298 #ifndef O_BINARY 299 # define O_BINARY 0 300 #endif 301 302 /* 303 ** The threadid macro resolves to the thread-id or to 0. Used for 304 ** testing and debugging only. 305 */ 306 #if SQLITE_THREADSAFE 307 #define threadid pthread_self() 308 #else 309 #define threadid 0 310 #endif 311 312 /* 313 ** HAVE_MREMAP defaults to true on Linux and false everywhere else. 314 */ 315 #if !defined(HAVE_MREMAP) 316 # if defined(__linux__) && defined(_GNU_SOURCE) 317 # define HAVE_MREMAP 1 318 # else 319 # define HAVE_MREMAP 0 320 # endif 321 #endif 322 323 /* 324 ** Explicitly call the 64-bit version of lseek() on Android. Otherwise, lseek() 325 ** is the 32-bit version, even if _FILE_OFFSET_BITS=64 is defined. 326 */ 327 #ifdef __ANDROID__ 328 # define lseek lseek64 329 #endif 330 331 /* 332 ** Different Unix systems declare open() in different ways. Same use 333 ** open(const char*,int,mode_t). Others use open(const char*,int,...). 334 ** The difference is important when using a pointer to the function. 335 ** 336 ** The safest way to deal with the problem is to always use this wrapper 337 ** which always has the same well-defined interface. 338 */ 339 static int posixOpen(const char *zFile, int flags, int mode){ 340 return open(zFile, flags, mode); 341 } 342 343 /* Forward reference */ 344 static int openDirectory(const char*, int*); 345 static int unixGetpagesize(void); 346 347 /* 348 ** Many system calls are accessed through pointer-to-functions so that 349 ** they may be overridden at runtime to facilitate fault injection during 350 ** testing and sandboxing. The following array holds the names and pointers 351 ** to all overrideable system calls. 352 */ 353 static struct unix_syscall { 354 const char *zName; /* Name of the system call */ 355 sqlite3_syscall_ptr pCurrent; /* Current value of the system call */ 356 sqlite3_syscall_ptr pDefault; /* Default value */ 357 } aSyscall[] = { 358 { "open", (sqlite3_syscall_ptr)posixOpen, 0 }, 359 #define osOpen ((int(*)(const char*,int,int))aSyscall[0].pCurrent) 360 361 { "close", (sqlite3_syscall_ptr)close, 0 }, 362 #define osClose ((int(*)(int))aSyscall[1].pCurrent) 363 364 { "access", (sqlite3_syscall_ptr)access, 0 }, 365 #define osAccess ((int(*)(const char*,int))aSyscall[2].pCurrent) 366 367 { "getcwd", (sqlite3_syscall_ptr)getcwd, 0 }, 368 #define osGetcwd ((char*(*)(char*,size_t))aSyscall[3].pCurrent) 369 370 { "stat", (sqlite3_syscall_ptr)stat, 0 }, 371 #define osStat ((int(*)(const char*,struct stat*))aSyscall[4].pCurrent) 372 373 /* 374 ** The DJGPP compiler environment looks mostly like Unix, but it 375 ** lacks the fcntl() system call. So redefine fcntl() to be something 376 ** that always succeeds. This means that locking does not occur under 377 ** DJGPP. But it is DOS - what did you expect? 378 */ 379 #ifdef __DJGPP__ 380 { "fstat", 0, 0 }, 381 #define osFstat(a,b,c) 0 382 #else 383 { "fstat", (sqlite3_syscall_ptr)fstat, 0 }, 384 #define osFstat ((int(*)(int,struct stat*))aSyscall[5].pCurrent) 385 #endif 386 387 { "ftruncate", (sqlite3_syscall_ptr)ftruncate, 0 }, 388 #define osFtruncate ((int(*)(int,off_t))aSyscall[6].pCurrent) 389 390 { "fcntl", (sqlite3_syscall_ptr)fcntl, 0 }, 391 #define osFcntl ((int(*)(int,int,...))aSyscall[7].pCurrent) 392 393 { "read", (sqlite3_syscall_ptr)read, 0 }, 394 #define osRead ((ssize_t(*)(int,void*,size_t))aSyscall[8].pCurrent) 395 396 #if defined(USE_PREAD) || SQLITE_ENABLE_LOCKING_STYLE 397 { "pread", (sqlite3_syscall_ptr)pread, 0 }, 398 #else 399 { "pread", (sqlite3_syscall_ptr)0, 0 }, 400 #endif 401 #define osPread ((ssize_t(*)(int,void*,size_t,off_t))aSyscall[9].pCurrent) 402 403 #if defined(USE_PREAD64) 404 { "pread64", (sqlite3_syscall_ptr)pread64, 0 }, 405 #else 406 { "pread64", (sqlite3_syscall_ptr)0, 0 }, 407 #endif 408 #define osPread64 ((ssize_t(*)(int,void*,size_t,off64_t))aSyscall[10].pCurrent) 409 410 { "write", (sqlite3_syscall_ptr)write, 0 }, 411 #define osWrite ((ssize_t(*)(int,const void*,size_t))aSyscall[11].pCurrent) 412 413 #if defined(USE_PREAD) || SQLITE_ENABLE_LOCKING_STYLE 414 { "pwrite", (sqlite3_syscall_ptr)pwrite, 0 }, 415 #else 416 { "pwrite", (sqlite3_syscall_ptr)0, 0 }, 417 #endif 418 #define osPwrite ((ssize_t(*)(int,const void*,size_t,off_t))\ 419 aSyscall[12].pCurrent) 420 421 #if defined(USE_PREAD64) 422 { "pwrite64", (sqlite3_syscall_ptr)pwrite64, 0 }, 423 #else 424 { "pwrite64", (sqlite3_syscall_ptr)0, 0 }, 425 #endif 426 #define osPwrite64 ((ssize_t(*)(int,const void*,size_t,off64_t))\ 427 aSyscall[13].pCurrent) 428 429 { "fchmod", (sqlite3_syscall_ptr)fchmod, 0 }, 430 #define osFchmod ((int(*)(int,mode_t))aSyscall[14].pCurrent) 431 432 #if defined(HAVE_POSIX_FALLOCATE) && HAVE_POSIX_FALLOCATE 433 { "fallocate", (sqlite3_syscall_ptr)posix_fallocate, 0 }, 434 #else 435 { "fallocate", (sqlite3_syscall_ptr)0, 0 }, 436 #endif 437 #define osFallocate ((int(*)(int,off_t,off_t))aSyscall[15].pCurrent) 438 439 { "unlink", (sqlite3_syscall_ptr)unlink, 0 }, 440 #define osUnlink ((int(*)(const char*))aSyscall[16].pCurrent) 441 442 { "openDirectory", (sqlite3_syscall_ptr)openDirectory, 0 }, 443 #define osOpenDirectory ((int(*)(const char*,int*))aSyscall[17].pCurrent) 444 445 { "mkdir", (sqlite3_syscall_ptr)mkdir, 0 }, 446 #define osMkdir ((int(*)(const char*,mode_t))aSyscall[18].pCurrent) 447 448 { "rmdir", (sqlite3_syscall_ptr)rmdir, 0 }, 449 #define osRmdir ((int(*)(const char*))aSyscall[19].pCurrent) 450 451 #if defined(HAVE_FCHOWN) 452 { "fchown", (sqlite3_syscall_ptr)fchown, 0 }, 453 #else 454 { "fchown", (sqlite3_syscall_ptr)0, 0 }, 455 #endif 456 #define osFchown ((int(*)(int,uid_t,gid_t))aSyscall[20].pCurrent) 457 458 { "geteuid", (sqlite3_syscall_ptr)geteuid, 0 }, 459 #define osGeteuid ((uid_t(*)(void))aSyscall[21].pCurrent) 460 461 #if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 462 { "mmap", (sqlite3_syscall_ptr)mmap, 0 }, 463 #else 464 { "mmap", (sqlite3_syscall_ptr)0, 0 }, 465 #endif 466 #define osMmap ((void*(*)(void*,size_t,int,int,int,off_t))aSyscall[22].pCurrent) 467 468 #if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 469 { "munmap", (sqlite3_syscall_ptr)munmap, 0 }, 470 #else 471 { "munmap", (sqlite3_syscall_ptr)0, 0 }, 472 #endif 473 #define osMunmap ((void*(*)(void*,size_t))aSyscall[23].pCurrent) 474 475 #if HAVE_MREMAP && (!defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0) 476 { "mremap", (sqlite3_syscall_ptr)mremap, 0 }, 477 #else 478 { "mremap", (sqlite3_syscall_ptr)0, 0 }, 479 #endif 480 #define osMremap ((void*(*)(void*,size_t,size_t,int,...))aSyscall[24].pCurrent) 481 482 #if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 483 { "getpagesize", (sqlite3_syscall_ptr)unixGetpagesize, 0 }, 484 #else 485 { "getpagesize", (sqlite3_syscall_ptr)0, 0 }, 486 #endif 487 #define osGetpagesize ((int(*)(void))aSyscall[25].pCurrent) 488 489 #if defined(HAVE_READLINK) 490 { "readlink", (sqlite3_syscall_ptr)readlink, 0 }, 491 #else 492 { "readlink", (sqlite3_syscall_ptr)0, 0 }, 493 #endif 494 #define osReadlink ((ssize_t(*)(const char*,char*,size_t))aSyscall[26].pCurrent) 495 496 #if defined(HAVE_LSTAT) 497 { "lstat", (sqlite3_syscall_ptr)lstat, 0 }, 498 #else 499 { "lstat", (sqlite3_syscall_ptr)0, 0 }, 500 #endif 501 #define osLstat ((int(*)(const char*,struct stat*))aSyscall[27].pCurrent) 502 503 }; /* End of the overrideable system calls */ 504 505 506 /* 507 ** On some systems, calls to fchown() will trigger a message in a security 508 ** log if they come from non-root processes. So avoid calling fchown() if 509 ** we are not running as root. 510 */ 511 static int robustFchown(int fd, uid_t uid, gid_t gid){ 512 #if defined(HAVE_FCHOWN) 513 return osGeteuid() ? 0 : osFchown(fd,uid,gid); 514 #else 515 return 0; 516 #endif 517 } 518 519 /* 520 ** This is the xSetSystemCall() method of sqlite3_vfs for all of the 521 ** "unix" VFSes. Return SQLITE_OK opon successfully updating the 522 ** system call pointer, or SQLITE_NOTFOUND if there is no configurable 523 ** system call named zName. 524 */ 525 static int unixSetSystemCall( 526 sqlite3_vfs *pNotUsed, /* The VFS pointer. Not used */ 527 const char *zName, /* Name of system call to override */ 528 sqlite3_syscall_ptr pNewFunc /* Pointer to new system call value */ 529 ){ 530 unsigned int i; 531 int rc = SQLITE_NOTFOUND; 532 533 UNUSED_PARAMETER(pNotUsed); 534 if( zName==0 ){ 535 /* If no zName is given, restore all system calls to their default 536 ** settings and return NULL 537 */ 538 rc = SQLITE_OK; 539 for(i=0; i<sizeof(aSyscall)/sizeof(aSyscall[0]); i++){ 540 if( aSyscall[i].pDefault ){ 541 aSyscall[i].pCurrent = aSyscall[i].pDefault; 542 } 543 } 544 }else{ 545 /* If zName is specified, operate on only the one system call 546 ** specified. 547 */ 548 for(i=0; i<sizeof(aSyscall)/sizeof(aSyscall[0]); i++){ 549 if( strcmp(zName, aSyscall[i].zName)==0 ){ 550 if( aSyscall[i].pDefault==0 ){ 551 aSyscall[i].pDefault = aSyscall[i].pCurrent; 552 } 553 rc = SQLITE_OK; 554 if( pNewFunc==0 ) pNewFunc = aSyscall[i].pDefault; 555 aSyscall[i].pCurrent = pNewFunc; 556 break; 557 } 558 } 559 } 560 return rc; 561 } 562 563 /* 564 ** Return the value of a system call. Return NULL if zName is not a 565 ** recognized system call name. NULL is also returned if the system call 566 ** is currently undefined. 567 */ 568 static sqlite3_syscall_ptr unixGetSystemCall( 569 sqlite3_vfs *pNotUsed, 570 const char *zName 571 ){ 572 unsigned int i; 573 574 UNUSED_PARAMETER(pNotUsed); 575 for(i=0; i<sizeof(aSyscall)/sizeof(aSyscall[0]); i++){ 576 if( strcmp(zName, aSyscall[i].zName)==0 ) return aSyscall[i].pCurrent; 577 } 578 return 0; 579 } 580 581 /* 582 ** Return the name of the first system call after zName. If zName==NULL 583 ** then return the name of the first system call. Return NULL if zName 584 ** is the last system call or if zName is not the name of a valid 585 ** system call. 586 */ 587 static const char *unixNextSystemCall(sqlite3_vfs *p, const char *zName){ 588 int i = -1; 589 590 UNUSED_PARAMETER(p); 591 if( zName ){ 592 for(i=0; i<ArraySize(aSyscall)-1; i++){ 593 if( strcmp(zName, aSyscall[i].zName)==0 ) break; 594 } 595 } 596 for(i++; i<ArraySize(aSyscall); i++){ 597 if( aSyscall[i].pCurrent!=0 ) return aSyscall[i].zName; 598 } 599 return 0; 600 } 601 602 /* 603 ** Do not accept any file descriptor less than this value, in order to avoid 604 ** opening database file using file descriptors that are commonly used for 605 ** standard input, output, and error. 606 */ 607 #ifndef SQLITE_MINIMUM_FILE_DESCRIPTOR 608 # define SQLITE_MINIMUM_FILE_DESCRIPTOR 3 609 #endif 610 611 /* 612 ** Invoke open(). Do so multiple times, until it either succeeds or 613 ** fails for some reason other than EINTR. 614 ** 615 ** If the file creation mode "m" is 0 then set it to the default for 616 ** SQLite. The default is SQLITE_DEFAULT_FILE_PERMISSIONS (normally 617 ** 0644) as modified by the system umask. If m is not 0, then 618 ** make the file creation mode be exactly m ignoring the umask. 619 ** 620 ** The m parameter will be non-zero only when creating -wal, -journal, 621 ** and -shm files. We want those files to have *exactly* the same 622 ** permissions as their original database, unadulterated by the umask. 623 ** In that way, if a database file is -rw-rw-rw or -rw-rw-r-, and a 624 ** transaction crashes and leaves behind hot journals, then any 625 ** process that is able to write to the database will also be able to 626 ** recover the hot journals. 627 */ 628 static int robust_open(const char *z, int f, mode_t m){ 629 int fd; 630 mode_t m2 = m ? m : SQLITE_DEFAULT_FILE_PERMISSIONS; 631 while(1){ 632 #if defined(O_CLOEXEC) 633 fd = osOpen(z,f|O_CLOEXEC,m2); 634 #else 635 fd = osOpen(z,f,m2); 636 #endif 637 if( fd<0 ){ 638 if( errno==EINTR ) continue; 639 break; 640 } 641 if( fd>=SQLITE_MINIMUM_FILE_DESCRIPTOR ) break; 642 osClose(fd); 643 sqlite3_log(SQLITE_WARNING, 644 "attempt to open \"%s\" as file descriptor %d", z, fd); 645 fd = -1; 646 if( osOpen("/dev/null", f, m)<0 ) break; 647 } 648 if( fd>=0 ){ 649 if( m!=0 ){ 650 struct stat statbuf; 651 if( osFstat(fd, &statbuf)==0 652 && statbuf.st_size==0 653 && (statbuf.st_mode&0777)!=m 654 ){ 655 osFchmod(fd, m); 656 } 657 } 658 #if defined(FD_CLOEXEC) && (!defined(O_CLOEXEC) || O_CLOEXEC==0) 659 osFcntl(fd, F_SETFD, osFcntl(fd, F_GETFD, 0) | FD_CLOEXEC); 660 #endif 661 } 662 return fd; 663 } 664 665 /* 666 ** Helper functions to obtain and relinquish the global mutex. The 667 ** global mutex is used to protect the unixInodeInfo and 668 ** vxworksFileId objects used by this file, all of which may be 669 ** shared by multiple threads. 670 ** 671 ** Function unixMutexHeld() is used to assert() that the global mutex 672 ** is held when required. This function is only used as part of assert() 673 ** statements. e.g. 674 ** 675 ** unixEnterMutex() 676 ** assert( unixMutexHeld() ); 677 ** unixEnterLeave() 678 */ 679 static void unixEnterMutex(void){ 680 sqlite3_mutex_enter(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_VFS1)); 681 } 682 static void unixLeaveMutex(void){ 683 sqlite3_mutex_leave(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_VFS1)); 684 } 685 #ifdef SQLITE_DEBUG 686 static int unixMutexHeld(void) { 687 return sqlite3_mutex_held(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_VFS1)); 688 } 689 #endif 690 691 692 #ifdef SQLITE_HAVE_OS_TRACE 693 /* 694 ** Helper function for printing out trace information from debugging 695 ** binaries. This returns the string representation of the supplied 696 ** integer lock-type. 697 */ 698 static const char *azFileLock(int eFileLock){ 699 switch( eFileLock ){ 700 case NO_LOCK: return "NONE"; 701 case SHARED_LOCK: return "SHARED"; 702 case RESERVED_LOCK: return "RESERVED"; 703 case PENDING_LOCK: return "PENDING"; 704 case EXCLUSIVE_LOCK: return "EXCLUSIVE"; 705 } 706 return "ERROR"; 707 } 708 #endif 709 710 #ifdef SQLITE_LOCK_TRACE 711 /* 712 ** Print out information about all locking operations. 713 ** 714 ** This routine is used for troubleshooting locks on multithreaded 715 ** platforms. Enable by compiling with the -DSQLITE_LOCK_TRACE 716 ** command-line option on the compiler. This code is normally 717 ** turned off. 718 */ 719 static int lockTrace(int fd, int op, struct flock *p){ 720 char *zOpName, *zType; 721 int s; 722 int savedErrno; 723 if( op==F_GETLK ){ 724 zOpName = "GETLK"; 725 }else if( op==F_SETLK ){ 726 zOpName = "SETLK"; 727 }else{ 728 s = osFcntl(fd, op, p); 729 sqlite3DebugPrintf("fcntl unknown %d %d %d\n", fd, op, s); 730 return s; 731 } 732 if( p->l_type==F_RDLCK ){ 733 zType = "RDLCK"; 734 }else if( p->l_type==F_WRLCK ){ 735 zType = "WRLCK"; 736 }else if( p->l_type==F_UNLCK ){ 737 zType = "UNLCK"; 738 }else{ 739 assert( 0 ); 740 } 741 assert( p->l_whence==SEEK_SET ); 742 s = osFcntl(fd, op, p); 743 savedErrno = errno; 744 sqlite3DebugPrintf("fcntl %d %d %s %s %d %d %d %d\n", 745 threadid, fd, zOpName, zType, (int)p->l_start, (int)p->l_len, 746 (int)p->l_pid, s); 747 if( s==(-1) && op==F_SETLK && (p->l_type==F_RDLCK || p->l_type==F_WRLCK) ){ 748 struct flock l2; 749 l2 = *p; 750 osFcntl(fd, F_GETLK, &l2); 751 if( l2.l_type==F_RDLCK ){ 752 zType = "RDLCK"; 753 }else if( l2.l_type==F_WRLCK ){ 754 zType = "WRLCK"; 755 }else if( l2.l_type==F_UNLCK ){ 756 zType = "UNLCK"; 757 }else{ 758 assert( 0 ); 759 } 760 sqlite3DebugPrintf("fcntl-failure-reason: %s %d %d %d\n", 761 zType, (int)l2.l_start, (int)l2.l_len, (int)l2.l_pid); 762 } 763 errno = savedErrno; 764 return s; 765 } 766 #undef osFcntl 767 #define osFcntl lockTrace 768 #endif /* SQLITE_LOCK_TRACE */ 769 770 /* 771 ** Retry ftruncate() calls that fail due to EINTR 772 ** 773 ** All calls to ftruncate() within this file should be made through 774 ** this wrapper. On the Android platform, bypassing the logic below 775 ** could lead to a corrupt database. 776 */ 777 static int robust_ftruncate(int h, sqlite3_int64 sz){ 778 int rc; 779 #ifdef __ANDROID__ 780 /* On Android, ftruncate() always uses 32-bit offsets, even if 781 ** _FILE_OFFSET_BITS=64 is defined. This means it is unsafe to attempt to 782 ** truncate a file to any size larger than 2GiB. Silently ignore any 783 ** such attempts. */ 784 if( sz>(sqlite3_int64)0x7FFFFFFF ){ 785 rc = SQLITE_OK; 786 }else 787 #endif 788 do{ rc = osFtruncate(h,sz); }while( rc<0 && errno==EINTR ); 789 return rc; 790 } 791 792 /* 793 ** This routine translates a standard POSIX errno code into something 794 ** useful to the clients of the sqlite3 functions. Specifically, it is 795 ** intended to translate a variety of "try again" errors into SQLITE_BUSY 796 ** and a variety of "please close the file descriptor NOW" errors into 797 ** SQLITE_IOERR 798 ** 799 ** Errors during initialization of locks, or file system support for locks, 800 ** should handle ENOLCK, ENOTSUP, EOPNOTSUPP separately. 801 */ 802 static int sqliteErrorFromPosixError(int posixError, int sqliteIOErr) { 803 assert( (sqliteIOErr == SQLITE_IOERR_LOCK) || 804 (sqliteIOErr == SQLITE_IOERR_UNLOCK) || 805 (sqliteIOErr == SQLITE_IOERR_RDLOCK) || 806 (sqliteIOErr == SQLITE_IOERR_CHECKRESERVEDLOCK) ); 807 switch (posixError) { 808 case EACCES: 809 case EAGAIN: 810 case ETIMEDOUT: 811 case EBUSY: 812 case EINTR: 813 case ENOLCK: 814 /* random NFS retry error, unless during file system support 815 * introspection, in which it actually means what it says */ 816 return SQLITE_BUSY; 817 818 case EPERM: 819 return SQLITE_PERM; 820 821 default: 822 return sqliteIOErr; 823 } 824 } 825 826 827 /****************************************************************************** 828 ****************** Begin Unique File ID Utility Used By VxWorks *************** 829 ** 830 ** On most versions of unix, we can get a unique ID for a file by concatenating 831 ** the device number and the inode number. But this does not work on VxWorks. 832 ** On VxWorks, a unique file id must be based on the canonical filename. 833 ** 834 ** A pointer to an instance of the following structure can be used as a 835 ** unique file ID in VxWorks. Each instance of this structure contains 836 ** a copy of the canonical filename. There is also a reference count. 837 ** The structure is reclaimed when the number of pointers to it drops to 838 ** zero. 839 ** 840 ** There are never very many files open at one time and lookups are not 841 ** a performance-critical path, so it is sufficient to put these 842 ** structures on a linked list. 843 */ 844 struct vxworksFileId { 845 struct vxworksFileId *pNext; /* Next in a list of them all */ 846 int nRef; /* Number of references to this one */ 847 int nName; /* Length of the zCanonicalName[] string */ 848 char *zCanonicalName; /* Canonical filename */ 849 }; 850 851 #if OS_VXWORKS 852 /* 853 ** All unique filenames are held on a linked list headed by this 854 ** variable: 855 */ 856 static struct vxworksFileId *vxworksFileList = 0; 857 858 /* 859 ** Simplify a filename into its canonical form 860 ** by making the following changes: 861 ** 862 ** * removing any trailing and duplicate / 863 ** * convert /./ into just / 864 ** * convert /A/../ where A is any simple name into just / 865 ** 866 ** Changes are made in-place. Return the new name length. 867 ** 868 ** The original filename is in z[0..n-1]. Return the number of 869 ** characters in the simplified name. 870 */ 871 static int vxworksSimplifyName(char *z, int n){ 872 int i, j; 873 while( n>1 && z[n-1]=='/' ){ n--; } 874 for(i=j=0; i<n; i++){ 875 if( z[i]=='/' ){ 876 if( z[i+1]=='/' ) continue; 877 if( z[i+1]=='.' && i+2<n && z[i+2]=='/' ){ 878 i += 1; 879 continue; 880 } 881 if( z[i+1]=='.' && i+3<n && z[i+2]=='.' && z[i+3]=='/' ){ 882 while( j>0 && z[j-1]!='/' ){ j--; } 883 if( j>0 ){ j--; } 884 i += 2; 885 continue; 886 } 887 } 888 z[j++] = z[i]; 889 } 890 z[j] = 0; 891 return j; 892 } 893 894 /* 895 ** Find a unique file ID for the given absolute pathname. Return 896 ** a pointer to the vxworksFileId object. This pointer is the unique 897 ** file ID. 898 ** 899 ** The nRef field of the vxworksFileId object is incremented before 900 ** the object is returned. A new vxworksFileId object is created 901 ** and added to the global list if necessary. 902 ** 903 ** If a memory allocation error occurs, return NULL. 904 */ 905 static struct vxworksFileId *vxworksFindFileId(const char *zAbsoluteName){ 906 struct vxworksFileId *pNew; /* search key and new file ID */ 907 struct vxworksFileId *pCandidate; /* For looping over existing file IDs */ 908 int n; /* Length of zAbsoluteName string */ 909 910 assert( zAbsoluteName[0]=='/' ); 911 n = (int)strlen(zAbsoluteName); 912 pNew = sqlite3_malloc64( sizeof(*pNew) + (n+1) ); 913 if( pNew==0 ) return 0; 914 pNew->zCanonicalName = (char*)&pNew[1]; 915 memcpy(pNew->zCanonicalName, zAbsoluteName, n+1); 916 n = vxworksSimplifyName(pNew->zCanonicalName, n); 917 918 /* Search for an existing entry that matching the canonical name. 919 ** If found, increment the reference count and return a pointer to 920 ** the existing file ID. 921 */ 922 unixEnterMutex(); 923 for(pCandidate=vxworksFileList; pCandidate; pCandidate=pCandidate->pNext){ 924 if( pCandidate->nName==n 925 && memcmp(pCandidate->zCanonicalName, pNew->zCanonicalName, n)==0 926 ){ 927 sqlite3_free(pNew); 928 pCandidate->nRef++; 929 unixLeaveMutex(); 930 return pCandidate; 931 } 932 } 933 934 /* No match was found. We will make a new file ID */ 935 pNew->nRef = 1; 936 pNew->nName = n; 937 pNew->pNext = vxworksFileList; 938 vxworksFileList = pNew; 939 unixLeaveMutex(); 940 return pNew; 941 } 942 943 /* 944 ** Decrement the reference count on a vxworksFileId object. Free 945 ** the object when the reference count reaches zero. 946 */ 947 static void vxworksReleaseFileId(struct vxworksFileId *pId){ 948 unixEnterMutex(); 949 assert( pId->nRef>0 ); 950 pId->nRef--; 951 if( pId->nRef==0 ){ 952 struct vxworksFileId **pp; 953 for(pp=&vxworksFileList; *pp && *pp!=pId; pp = &((*pp)->pNext)){} 954 assert( *pp==pId ); 955 *pp = pId->pNext; 956 sqlite3_free(pId); 957 } 958 unixLeaveMutex(); 959 } 960 #endif /* OS_VXWORKS */ 961 /*************** End of Unique File ID Utility Used By VxWorks **************** 962 ******************************************************************************/ 963 964 965 /****************************************************************************** 966 *************************** Posix Advisory Locking **************************** 967 ** 968 ** POSIX advisory locks are broken by design. ANSI STD 1003.1 (1996) 969 ** section 6.5.2.2 lines 483 through 490 specify that when a process 970 ** sets or clears a lock, that operation overrides any prior locks set 971 ** by the same process. It does not explicitly say so, but this implies 972 ** that it overrides locks set by the same process using a different 973 ** file descriptor. Consider this test case: 974 ** 975 ** int fd1 = open("./file1", O_RDWR|O_CREAT, 0644); 976 ** int fd2 = open("./file2", O_RDWR|O_CREAT, 0644); 977 ** 978 ** Suppose ./file1 and ./file2 are really the same file (because 979 ** one is a hard or symbolic link to the other) then if you set 980 ** an exclusive lock on fd1, then try to get an exclusive lock 981 ** on fd2, it works. I would have expected the second lock to 982 ** fail since there was already a lock on the file due to fd1. 983 ** But not so. Since both locks came from the same process, the 984 ** second overrides the first, even though they were on different 985 ** file descriptors opened on different file names. 986 ** 987 ** This means that we cannot use POSIX locks to synchronize file access 988 ** among competing threads of the same process. POSIX locks will work fine 989 ** to synchronize access for threads in separate processes, but not 990 ** threads within the same process. 991 ** 992 ** To work around the problem, SQLite has to manage file locks internally 993 ** on its own. Whenever a new database is opened, we have to find the 994 ** specific inode of the database file (the inode is determined by the 995 ** st_dev and st_ino fields of the stat structure that fstat() fills in) 996 ** and check for locks already existing on that inode. When locks are 997 ** created or removed, we have to look at our own internal record of the 998 ** locks to see if another thread has previously set a lock on that same 999 ** inode. 1000 ** 1001 ** (Aside: The use of inode numbers as unique IDs does not work on VxWorks. 1002 ** For VxWorks, we have to use the alternative unique ID system based on 1003 ** canonical filename and implemented in the previous division.) 1004 ** 1005 ** The sqlite3_file structure for POSIX is no longer just an integer file 1006 ** descriptor. It is now a structure that holds the integer file 1007 ** descriptor and a pointer to a structure that describes the internal 1008 ** locks on the corresponding inode. There is one locking structure 1009 ** per inode, so if the same inode is opened twice, both unixFile structures 1010 ** point to the same locking structure. The locking structure keeps 1011 ** a reference count (so we will know when to delete it) and a "cnt" 1012 ** field that tells us its internal lock status. cnt==0 means the 1013 ** file is unlocked. cnt==-1 means the file has an exclusive lock. 1014 ** cnt>0 means there are cnt shared locks on the file. 1015 ** 1016 ** Any attempt to lock or unlock a file first checks the locking 1017 ** structure. The fcntl() system call is only invoked to set a 1018 ** POSIX lock if the internal lock structure transitions between 1019 ** a locked and an unlocked state. 1020 ** 1021 ** But wait: there are yet more problems with POSIX advisory locks. 1022 ** 1023 ** If you close a file descriptor that points to a file that has locks, 1024 ** all locks on that file that are owned by the current process are 1025 ** released. To work around this problem, each unixInodeInfo object 1026 ** maintains a count of the number of pending locks on tha inode. 1027 ** When an attempt is made to close an unixFile, if there are 1028 ** other unixFile open on the same inode that are holding locks, the call 1029 ** to close() the file descriptor is deferred until all of the locks clear. 1030 ** The unixInodeInfo structure keeps a list of file descriptors that need to 1031 ** be closed and that list is walked (and cleared) when the last lock 1032 ** clears. 1033 ** 1034 ** Yet another problem: LinuxThreads do not play well with posix locks. 1035 ** 1036 ** Many older versions of linux use the LinuxThreads library which is 1037 ** not posix compliant. Under LinuxThreads, a lock created by thread 1038 ** A cannot be modified or overridden by a different thread B. 1039 ** Only thread A can modify the lock. Locking behavior is correct 1040 ** if the appliation uses the newer Native Posix Thread Library (NPTL) 1041 ** on linux - with NPTL a lock created by thread A can override locks 1042 ** in thread B. But there is no way to know at compile-time which 1043 ** threading library is being used. So there is no way to know at 1044 ** compile-time whether or not thread A can override locks on thread B. 1045 ** One has to do a run-time check to discover the behavior of the 1046 ** current process. 1047 ** 1048 ** SQLite used to support LinuxThreads. But support for LinuxThreads 1049 ** was dropped beginning with version 3.7.0. SQLite will still work with 1050 ** LinuxThreads provided that (1) there is no more than one connection 1051 ** per database file in the same process and (2) database connections 1052 ** do not move across threads. 1053 */ 1054 1055 /* 1056 ** An instance of the following structure serves as the key used 1057 ** to locate a particular unixInodeInfo object. 1058 */ 1059 struct unixFileId { 1060 dev_t dev; /* Device number */ 1061 #if OS_VXWORKS 1062 struct vxworksFileId *pId; /* Unique file ID for vxworks. */ 1063 #else 1064 ino_t ino; /* Inode number */ 1065 #endif 1066 }; 1067 1068 /* 1069 ** An instance of the following structure is allocated for each open 1070 ** inode. Or, on LinuxThreads, there is one of these structures for 1071 ** each inode opened by each thread. 1072 ** 1073 ** A single inode can have multiple file descriptors, so each unixFile 1074 ** structure contains a pointer to an instance of this object and this 1075 ** object keeps a count of the number of unixFile pointing to it. 1076 */ 1077 struct unixInodeInfo { 1078 struct unixFileId fileId; /* The lookup key */ 1079 int nShared; /* Number of SHARED locks held */ 1080 unsigned char eFileLock; /* One of SHARED_LOCK, RESERVED_LOCK etc. */ 1081 unsigned char bProcessLock; /* An exclusive process lock is held */ 1082 int nRef; /* Number of pointers to this structure */ 1083 unixShmNode *pShmNode; /* Shared memory associated with this inode */ 1084 int nLock; /* Number of outstanding file locks */ 1085 UnixUnusedFd *pUnused; /* Unused file descriptors to close */ 1086 unixInodeInfo *pNext; /* List of all unixInodeInfo objects */ 1087 unixInodeInfo *pPrev; /* .... doubly linked */ 1088 #if SQLITE_ENABLE_LOCKING_STYLE 1089 unsigned long long sharedByte; /* for AFP simulated shared lock */ 1090 #endif 1091 #if OS_VXWORKS 1092 sem_t *pSem; /* Named POSIX semaphore */ 1093 char aSemName[MAX_PATHNAME+2]; /* Name of that semaphore */ 1094 #endif 1095 }; 1096 1097 /* 1098 ** A lists of all unixInodeInfo objects. 1099 */ 1100 static unixInodeInfo *inodeList = 0; 1101 1102 /* 1103 ** 1104 ** This function - unixLogErrorAtLine(), is only ever called via the macro 1105 ** unixLogError(). 1106 ** 1107 ** It is invoked after an error occurs in an OS function and errno has been 1108 ** set. It logs a message using sqlite3_log() containing the current value of 1109 ** errno and, if possible, the human-readable equivalent from strerror() or 1110 ** strerror_r(). 1111 ** 1112 ** The first argument passed to the macro should be the error code that 1113 ** will be returned to SQLite (e.g. SQLITE_IOERR_DELETE, SQLITE_CANTOPEN). 1114 ** The two subsequent arguments should be the name of the OS function that 1115 ** failed (e.g. "unlink", "open") and the associated file-system path, 1116 ** if any. 1117 */ 1118 #define unixLogError(a,b,c) unixLogErrorAtLine(a,b,c,__LINE__) 1119 static int unixLogErrorAtLine( 1120 int errcode, /* SQLite error code */ 1121 const char *zFunc, /* Name of OS function that failed */ 1122 const char *zPath, /* File path associated with error */ 1123 int iLine /* Source line number where error occurred */ 1124 ){ 1125 char *zErr; /* Message from strerror() or equivalent */ 1126 int iErrno = errno; /* Saved syscall error number */ 1127 1128 /* If this is not a threadsafe build (SQLITE_THREADSAFE==0), then use 1129 ** the strerror() function to obtain the human-readable error message 1130 ** equivalent to errno. Otherwise, use strerror_r(). 1131 */ 1132 #if SQLITE_THREADSAFE && defined(HAVE_STRERROR_R) 1133 char aErr[80]; 1134 memset(aErr, 0, sizeof(aErr)); 1135 zErr = aErr; 1136 1137 /* If STRERROR_R_CHAR_P (set by autoconf scripts) or __USE_GNU is defined, 1138 ** assume that the system provides the GNU version of strerror_r() that 1139 ** returns a pointer to a buffer containing the error message. That pointer 1140 ** may point to aErr[], or it may point to some static storage somewhere. 1141 ** Otherwise, assume that the system provides the POSIX version of 1142 ** strerror_r(), which always writes an error message into aErr[]. 1143 ** 1144 ** If the code incorrectly assumes that it is the POSIX version that is 1145 ** available, the error message will often be an empty string. Not a 1146 ** huge problem. Incorrectly concluding that the GNU version is available 1147 ** could lead to a segfault though. 1148 */ 1149 #if defined(STRERROR_R_CHAR_P) || defined(__USE_GNU) 1150 zErr = 1151 # endif 1152 strerror_r(iErrno, aErr, sizeof(aErr)-1); 1153 1154 #elif SQLITE_THREADSAFE 1155 /* This is a threadsafe build, but strerror_r() is not available. */ 1156 zErr = ""; 1157 #else 1158 /* Non-threadsafe build, use strerror(). */ 1159 zErr = strerror(iErrno); 1160 #endif 1161 1162 if( zPath==0 ) zPath = ""; 1163 sqlite3_log(errcode, 1164 "os_unix.c:%d: (%d) %s(%s) - %s", 1165 iLine, iErrno, zFunc, zPath, zErr 1166 ); 1167 1168 return errcode; 1169 } 1170 1171 /* 1172 ** Close a file descriptor. 1173 ** 1174 ** We assume that close() almost always works, since it is only in a 1175 ** very sick application or on a very sick platform that it might fail. 1176 ** If it does fail, simply leak the file descriptor, but do log the 1177 ** error. 1178 ** 1179 ** Note that it is not safe to retry close() after EINTR since the 1180 ** file descriptor might have already been reused by another thread. 1181 ** So we don't even try to recover from an EINTR. Just log the error 1182 ** and move on. 1183 */ 1184 static void robust_close(unixFile *pFile, int h, int lineno){ 1185 if( osClose(h) ){ 1186 unixLogErrorAtLine(SQLITE_IOERR_CLOSE, "close", 1187 pFile ? pFile->zPath : 0, lineno); 1188 } 1189 } 1190 1191 /* 1192 ** Set the pFile->lastErrno. Do this in a subroutine as that provides 1193 ** a convenient place to set a breakpoint. 1194 */ 1195 static void storeLastErrno(unixFile *pFile, int error){ 1196 pFile->lastErrno = error; 1197 } 1198 1199 /* 1200 ** Close all file descriptors accumuated in the unixInodeInfo->pUnused list. 1201 */ 1202 static void closePendingFds(unixFile *pFile){ 1203 unixInodeInfo *pInode = pFile->pInode; 1204 UnixUnusedFd *p; 1205 UnixUnusedFd *pNext; 1206 for(p=pInode->pUnused; p; p=pNext){ 1207 pNext = p->pNext; 1208 robust_close(pFile, p->fd, __LINE__); 1209 sqlite3_free(p); 1210 } 1211 pInode->pUnused = 0; 1212 } 1213 1214 /* 1215 ** Release a unixInodeInfo structure previously allocated by findInodeInfo(). 1216 ** 1217 ** The mutex entered using the unixEnterMutex() function must be held 1218 ** when this function is called. 1219 */ 1220 static void releaseInodeInfo(unixFile *pFile){ 1221 unixInodeInfo *pInode = pFile->pInode; 1222 assert( unixMutexHeld() ); 1223 if( ALWAYS(pInode) ){ 1224 pInode->nRef--; 1225 if( pInode->nRef==0 ){ 1226 assert( pInode->pShmNode==0 ); 1227 closePendingFds(pFile); 1228 if( pInode->pPrev ){ 1229 assert( pInode->pPrev->pNext==pInode ); 1230 pInode->pPrev->pNext = pInode->pNext; 1231 }else{ 1232 assert( inodeList==pInode ); 1233 inodeList = pInode->pNext; 1234 } 1235 if( pInode->pNext ){ 1236 assert( pInode->pNext->pPrev==pInode ); 1237 pInode->pNext->pPrev = pInode->pPrev; 1238 } 1239 sqlite3_free(pInode); 1240 } 1241 } 1242 } 1243 1244 /* 1245 ** Given a file descriptor, locate the unixInodeInfo object that 1246 ** describes that file descriptor. Create a new one if necessary. The 1247 ** return value might be uninitialized if an error occurs. 1248 ** 1249 ** The mutex entered using the unixEnterMutex() function must be held 1250 ** when this function is called. 1251 ** 1252 ** Return an appropriate error code. 1253 */ 1254 static int findInodeInfo( 1255 unixFile *pFile, /* Unix file with file desc used in the key */ 1256 unixInodeInfo **ppInode /* Return the unixInodeInfo object here */ 1257 ){ 1258 int rc; /* System call return code */ 1259 int fd; /* The file descriptor for pFile */ 1260 struct unixFileId fileId; /* Lookup key for the unixInodeInfo */ 1261 struct stat statbuf; /* Low-level file information */ 1262 unixInodeInfo *pInode = 0; /* Candidate unixInodeInfo object */ 1263 1264 assert( unixMutexHeld() ); 1265 1266 /* Get low-level information about the file that we can used to 1267 ** create a unique name for the file. 1268 */ 1269 fd = pFile->h; 1270 rc = osFstat(fd, &statbuf); 1271 if( rc!=0 ){ 1272 storeLastErrno(pFile, errno); 1273 #if defined(EOVERFLOW) && defined(SQLITE_DISABLE_LFS) 1274 if( pFile->lastErrno==EOVERFLOW ) return SQLITE_NOLFS; 1275 #endif 1276 return SQLITE_IOERR; 1277 } 1278 1279 #ifdef __APPLE__ 1280 /* On OS X on an msdos filesystem, the inode number is reported 1281 ** incorrectly for zero-size files. See ticket #3260. To work 1282 ** around this problem (we consider it a bug in OS X, not SQLite) 1283 ** we always increase the file size to 1 by writing a single byte 1284 ** prior to accessing the inode number. The one byte written is 1285 ** an ASCII 'S' character which also happens to be the first byte 1286 ** in the header of every SQLite database. In this way, if there 1287 ** is a race condition such that another thread has already populated 1288 ** the first page of the database, no damage is done. 1289 */ 1290 if( statbuf.st_size==0 && (pFile->fsFlags & SQLITE_FSFLAGS_IS_MSDOS)!=0 ){ 1291 do{ rc = osWrite(fd, "S", 1); }while( rc<0 && errno==EINTR ); 1292 if( rc!=1 ){ 1293 storeLastErrno(pFile, errno); 1294 return SQLITE_IOERR; 1295 } 1296 rc = osFstat(fd, &statbuf); 1297 if( rc!=0 ){ 1298 storeLastErrno(pFile, errno); 1299 return SQLITE_IOERR; 1300 } 1301 } 1302 #endif 1303 1304 memset(&fileId, 0, sizeof(fileId)); 1305 fileId.dev = statbuf.st_dev; 1306 #if OS_VXWORKS 1307 fileId.pId = pFile->pId; 1308 #else 1309 fileId.ino = statbuf.st_ino; 1310 #endif 1311 pInode = inodeList; 1312 while( pInode && memcmp(&fileId, &pInode->fileId, sizeof(fileId)) ){ 1313 pInode = pInode->pNext; 1314 } 1315 if( pInode==0 ){ 1316 pInode = sqlite3_malloc64( sizeof(*pInode) ); 1317 if( pInode==0 ){ 1318 return SQLITE_NOMEM_BKPT; 1319 } 1320 memset(pInode, 0, sizeof(*pInode)); 1321 memcpy(&pInode->fileId, &fileId, sizeof(fileId)); 1322 pInode->nRef = 1; 1323 pInode->pNext = inodeList; 1324 pInode->pPrev = 0; 1325 if( inodeList ) inodeList->pPrev = pInode; 1326 inodeList = pInode; 1327 }else{ 1328 pInode->nRef++; 1329 } 1330 *ppInode = pInode; 1331 return SQLITE_OK; 1332 } 1333 1334 /* 1335 ** Return TRUE if pFile has been renamed or unlinked since it was first opened. 1336 */ 1337 static int fileHasMoved(unixFile *pFile){ 1338 #if OS_VXWORKS 1339 return pFile->pInode!=0 && pFile->pId!=pFile->pInode->fileId.pId; 1340 #else 1341 struct stat buf; 1342 return pFile->pInode!=0 && 1343 (osStat(pFile->zPath, &buf)!=0 || buf.st_ino!=pFile->pInode->fileId.ino); 1344 #endif 1345 } 1346 1347 1348 /* 1349 ** Check a unixFile that is a database. Verify the following: 1350 ** 1351 ** (1) There is exactly one hard link on the file 1352 ** (2) The file is not a symbolic link 1353 ** (3) The file has not been renamed or unlinked 1354 ** 1355 ** Issue sqlite3_log(SQLITE_WARNING,...) messages if anything is not right. 1356 */ 1357 static void verifyDbFile(unixFile *pFile){ 1358 struct stat buf; 1359 int rc; 1360 1361 /* These verifications occurs for the main database only */ 1362 if( pFile->ctrlFlags & UNIXFILE_NOLOCK ) return; 1363 1364 rc = osFstat(pFile->h, &buf); 1365 if( rc!=0 ){ 1366 sqlite3_log(SQLITE_WARNING, "cannot fstat db file %s", pFile->zPath); 1367 return; 1368 } 1369 if( buf.st_nlink==0 ){ 1370 sqlite3_log(SQLITE_WARNING, "file unlinked while open: %s", pFile->zPath); 1371 return; 1372 } 1373 if( buf.st_nlink>1 ){ 1374 sqlite3_log(SQLITE_WARNING, "multiple links to file: %s", pFile->zPath); 1375 return; 1376 } 1377 if( fileHasMoved(pFile) ){ 1378 sqlite3_log(SQLITE_WARNING, "file renamed while open: %s", pFile->zPath); 1379 return; 1380 } 1381 } 1382 1383 1384 /* 1385 ** This routine checks if there is a RESERVED lock held on the specified 1386 ** file by this or any other process. If such a lock is held, set *pResOut 1387 ** to a non-zero value otherwise *pResOut is set to zero. The return value 1388 ** is set to SQLITE_OK unless an I/O error occurs during lock checking. 1389 */ 1390 static int unixCheckReservedLock(sqlite3_file *id, int *pResOut){ 1391 int rc = SQLITE_OK; 1392 int reserved = 0; 1393 unixFile *pFile = (unixFile*)id; 1394 1395 SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); 1396 1397 assert( pFile ); 1398 assert( pFile->eFileLock<=SHARED_LOCK ); 1399 unixEnterMutex(); /* Because pFile->pInode is shared across threads */ 1400 1401 /* Check if a thread in this process holds such a lock */ 1402 if( pFile->pInode->eFileLock>SHARED_LOCK ){ 1403 reserved = 1; 1404 } 1405 1406 /* Otherwise see if some other process holds it. 1407 */ 1408 #ifndef __DJGPP__ 1409 if( !reserved && !pFile->pInode->bProcessLock ){ 1410 struct flock lock; 1411 lock.l_whence = SEEK_SET; 1412 lock.l_start = RESERVED_BYTE; 1413 lock.l_len = 1; 1414 lock.l_type = F_WRLCK; 1415 if( osFcntl(pFile->h, F_GETLK, &lock) ){ 1416 rc = SQLITE_IOERR_CHECKRESERVEDLOCK; 1417 storeLastErrno(pFile, errno); 1418 } else if( lock.l_type!=F_UNLCK ){ 1419 reserved = 1; 1420 } 1421 } 1422 #endif 1423 1424 unixLeaveMutex(); 1425 OSTRACE(("TEST WR-LOCK %d %d %d (unix)\n", pFile->h, rc, reserved)); 1426 1427 *pResOut = reserved; 1428 return rc; 1429 } 1430 1431 /* 1432 ** Attempt to set a system-lock on the file pFile. The lock is 1433 ** described by pLock. 1434 ** 1435 ** If the pFile was opened read/write from unix-excl, then the only lock 1436 ** ever obtained is an exclusive lock, and it is obtained exactly once 1437 ** the first time any lock is attempted. All subsequent system locking 1438 ** operations become no-ops. Locking operations still happen internally, 1439 ** in order to coordinate access between separate database connections 1440 ** within this process, but all of that is handled in memory and the 1441 ** operating system does not participate. 1442 ** 1443 ** This function is a pass-through to fcntl(F_SETLK) if pFile is using 1444 ** any VFS other than "unix-excl" or if pFile is opened on "unix-excl" 1445 ** and is read-only. 1446 ** 1447 ** Zero is returned if the call completes successfully, or -1 if a call 1448 ** to fcntl() fails. In this case, errno is set appropriately (by fcntl()). 1449 */ 1450 static int unixFileLock(unixFile *pFile, struct flock *pLock){ 1451 int rc; 1452 unixInodeInfo *pInode = pFile->pInode; 1453 assert( unixMutexHeld() ); 1454 assert( pInode!=0 ); 1455 if( (pFile->ctrlFlags & (UNIXFILE_EXCL|UNIXFILE_RDONLY))==UNIXFILE_EXCL ){ 1456 if( pInode->bProcessLock==0 ){ 1457 struct flock lock; 1458 assert( pInode->nLock==0 ); 1459 lock.l_whence = SEEK_SET; 1460 lock.l_start = SHARED_FIRST; 1461 lock.l_len = SHARED_SIZE; 1462 lock.l_type = F_WRLCK; 1463 rc = osFcntl(pFile->h, F_SETLK, &lock); 1464 if( rc<0 ) return rc; 1465 pInode->bProcessLock = 1; 1466 pInode->nLock++; 1467 }else{ 1468 rc = 0; 1469 } 1470 }else{ 1471 rc = osFcntl(pFile->h, F_SETLK, pLock); 1472 } 1473 return rc; 1474 } 1475 1476 /* 1477 ** Lock the file with the lock specified by parameter eFileLock - one 1478 ** of the following: 1479 ** 1480 ** (1) SHARED_LOCK 1481 ** (2) RESERVED_LOCK 1482 ** (3) PENDING_LOCK 1483 ** (4) EXCLUSIVE_LOCK 1484 ** 1485 ** Sometimes when requesting one lock state, additional lock states 1486 ** are inserted in between. The locking might fail on one of the later 1487 ** transitions leaving the lock state different from what it started but 1488 ** still short of its goal. The following chart shows the allowed 1489 ** transitions and the inserted intermediate states: 1490 ** 1491 ** UNLOCKED -> SHARED 1492 ** SHARED -> RESERVED 1493 ** SHARED -> (PENDING) -> EXCLUSIVE 1494 ** RESERVED -> (PENDING) -> EXCLUSIVE 1495 ** PENDING -> EXCLUSIVE 1496 ** 1497 ** This routine will only increase a lock. Use the sqlite3OsUnlock() 1498 ** routine to lower a locking level. 1499 */ 1500 static int unixLock(sqlite3_file *id, int eFileLock){ 1501 /* The following describes the implementation of the various locks and 1502 ** lock transitions in terms of the POSIX advisory shared and exclusive 1503 ** lock primitives (called read-locks and write-locks below, to avoid 1504 ** confusion with SQLite lock names). The algorithms are complicated 1505 ** slightly in order to be compatible with Windows95 systems simultaneously 1506 ** accessing the same database file, in case that is ever required. 1507 ** 1508 ** Symbols defined in os.h indentify the 'pending byte' and the 'reserved 1509 ** byte', each single bytes at well known offsets, and the 'shared byte 1510 ** range', a range of 510 bytes at a well known offset. 1511 ** 1512 ** To obtain a SHARED lock, a read-lock is obtained on the 'pending 1513 ** byte'. If this is successful, 'shared byte range' is read-locked 1514 ** and the lock on the 'pending byte' released. (Legacy note: When 1515 ** SQLite was first developed, Windows95 systems were still very common, 1516 ** and Widnows95 lacks a shared-lock capability. So on Windows95, a 1517 ** single randomly selected by from the 'shared byte range' is locked. 1518 ** Windows95 is now pretty much extinct, but this work-around for the 1519 ** lack of shared-locks on Windows95 lives on, for backwards 1520 ** compatibility.) 1521 ** 1522 ** A process may only obtain a RESERVED lock after it has a SHARED lock. 1523 ** A RESERVED lock is implemented by grabbing a write-lock on the 1524 ** 'reserved byte'. 1525 ** 1526 ** A process may only obtain a PENDING lock after it has obtained a 1527 ** SHARED lock. A PENDING lock is implemented by obtaining a write-lock 1528 ** on the 'pending byte'. This ensures that no new SHARED locks can be 1529 ** obtained, but existing SHARED locks are allowed to persist. A process 1530 ** does not have to obtain a RESERVED lock on the way to a PENDING lock. 1531 ** This property is used by the algorithm for rolling back a journal file 1532 ** after a crash. 1533 ** 1534 ** An EXCLUSIVE lock, obtained after a PENDING lock is held, is 1535 ** implemented by obtaining a write-lock on the entire 'shared byte 1536 ** range'. Since all other locks require a read-lock on one of the bytes 1537 ** within this range, this ensures that no other locks are held on the 1538 ** database. 1539 */ 1540 int rc = SQLITE_OK; 1541 unixFile *pFile = (unixFile*)id; 1542 unixInodeInfo *pInode; 1543 struct flock lock; 1544 int tErrno = 0; 1545 1546 assert( pFile ); 1547 OSTRACE(("LOCK %d %s was %s(%s,%d) pid=%d (unix)\n", pFile->h, 1548 azFileLock(eFileLock), azFileLock(pFile->eFileLock), 1549 azFileLock(pFile->pInode->eFileLock), pFile->pInode->nShared, 1550 osGetpid(0))); 1551 1552 /* If there is already a lock of this type or more restrictive on the 1553 ** unixFile, do nothing. Don't use the end_lock: exit path, as 1554 ** unixEnterMutex() hasn't been called yet. 1555 */ 1556 if( pFile->eFileLock>=eFileLock ){ 1557 OSTRACE(("LOCK %d %s ok (already held) (unix)\n", pFile->h, 1558 azFileLock(eFileLock))); 1559 return SQLITE_OK; 1560 } 1561 1562 /* Make sure the locking sequence is correct. 1563 ** (1) We never move from unlocked to anything higher than shared lock. 1564 ** (2) SQLite never explicitly requests a pendig lock. 1565 ** (3) A shared lock is always held when a reserve lock is requested. 1566 */ 1567 assert( pFile->eFileLock!=NO_LOCK || eFileLock==SHARED_LOCK ); 1568 assert( eFileLock!=PENDING_LOCK ); 1569 assert( eFileLock!=RESERVED_LOCK || pFile->eFileLock==SHARED_LOCK ); 1570 1571 /* This mutex is needed because pFile->pInode is shared across threads 1572 */ 1573 unixEnterMutex(); 1574 pInode = pFile->pInode; 1575 1576 /* If some thread using this PID has a lock via a different unixFile* 1577 ** handle that precludes the requested lock, return BUSY. 1578 */ 1579 if( (pFile->eFileLock!=pInode->eFileLock && 1580 (pInode->eFileLock>=PENDING_LOCK || eFileLock>SHARED_LOCK)) 1581 ){ 1582 rc = SQLITE_BUSY; 1583 goto end_lock; 1584 } 1585 1586 /* If a SHARED lock is requested, and some thread using this PID already 1587 ** has a SHARED or RESERVED lock, then increment reference counts and 1588 ** return SQLITE_OK. 1589 */ 1590 if( eFileLock==SHARED_LOCK && 1591 (pInode->eFileLock==SHARED_LOCK || pInode->eFileLock==RESERVED_LOCK) ){ 1592 assert( eFileLock==SHARED_LOCK ); 1593 assert( pFile->eFileLock==0 ); 1594 assert( pInode->nShared>0 ); 1595 pFile->eFileLock = SHARED_LOCK; 1596 pInode->nShared++; 1597 pInode->nLock++; 1598 goto end_lock; 1599 } 1600 1601 1602 /* A PENDING lock is needed before acquiring a SHARED lock and before 1603 ** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will 1604 ** be released. 1605 */ 1606 lock.l_len = 1L; 1607 lock.l_whence = SEEK_SET; 1608 if( eFileLock==SHARED_LOCK 1609 || (eFileLock==EXCLUSIVE_LOCK && pFile->eFileLock<PENDING_LOCK) 1610 ){ 1611 lock.l_type = (eFileLock==SHARED_LOCK?F_RDLCK:F_WRLCK); 1612 lock.l_start = PENDING_BYTE; 1613 if( unixFileLock(pFile, &lock) ){ 1614 tErrno = errno; 1615 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); 1616 if( rc!=SQLITE_BUSY ){ 1617 storeLastErrno(pFile, tErrno); 1618 } 1619 goto end_lock; 1620 } 1621 } 1622 1623 1624 /* If control gets to this point, then actually go ahead and make 1625 ** operating system calls for the specified lock. 1626 */ 1627 if( eFileLock==SHARED_LOCK ){ 1628 assert( pInode->nShared==0 ); 1629 assert( pInode->eFileLock==0 ); 1630 assert( rc==SQLITE_OK ); 1631 1632 /* Now get the read-lock */ 1633 lock.l_start = SHARED_FIRST; 1634 lock.l_len = SHARED_SIZE; 1635 if( unixFileLock(pFile, &lock) ){ 1636 tErrno = errno; 1637 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); 1638 } 1639 1640 /* Drop the temporary PENDING lock */ 1641 lock.l_start = PENDING_BYTE; 1642 lock.l_len = 1L; 1643 lock.l_type = F_UNLCK; 1644 if( unixFileLock(pFile, &lock) && rc==SQLITE_OK ){ 1645 /* This could happen with a network mount */ 1646 tErrno = errno; 1647 rc = SQLITE_IOERR_UNLOCK; 1648 } 1649 1650 if( rc ){ 1651 if( rc!=SQLITE_BUSY ){ 1652 storeLastErrno(pFile, tErrno); 1653 } 1654 goto end_lock; 1655 }else{ 1656 pFile->eFileLock = SHARED_LOCK; 1657 pInode->nLock++; 1658 pInode->nShared = 1; 1659 } 1660 }else if( eFileLock==EXCLUSIVE_LOCK && pInode->nShared>1 ){ 1661 /* We are trying for an exclusive lock but another thread in this 1662 ** same process is still holding a shared lock. */ 1663 rc = SQLITE_BUSY; 1664 }else{ 1665 /* The request was for a RESERVED or EXCLUSIVE lock. It is 1666 ** assumed that there is a SHARED or greater lock on the file 1667 ** already. 1668 */ 1669 assert( 0!=pFile->eFileLock ); 1670 lock.l_type = F_WRLCK; 1671 1672 assert( eFileLock==RESERVED_LOCK || eFileLock==EXCLUSIVE_LOCK ); 1673 if( eFileLock==RESERVED_LOCK ){ 1674 lock.l_start = RESERVED_BYTE; 1675 lock.l_len = 1L; 1676 }else{ 1677 lock.l_start = SHARED_FIRST; 1678 lock.l_len = SHARED_SIZE; 1679 } 1680 1681 if( unixFileLock(pFile, &lock) ){ 1682 tErrno = errno; 1683 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); 1684 if( rc!=SQLITE_BUSY ){ 1685 storeLastErrno(pFile, tErrno); 1686 } 1687 } 1688 } 1689 1690 1691 #ifdef SQLITE_DEBUG 1692 /* Set up the transaction-counter change checking flags when 1693 ** transitioning from a SHARED to a RESERVED lock. The change 1694 ** from SHARED to RESERVED marks the beginning of a normal 1695 ** write operation (not a hot journal rollback). 1696 */ 1697 if( rc==SQLITE_OK 1698 && pFile->eFileLock<=SHARED_LOCK 1699 && eFileLock==RESERVED_LOCK 1700 ){ 1701 pFile->transCntrChng = 0; 1702 pFile->dbUpdate = 0; 1703 pFile->inNormalWrite = 1; 1704 } 1705 #endif 1706 1707 1708 if( rc==SQLITE_OK ){ 1709 pFile->eFileLock = eFileLock; 1710 pInode->eFileLock = eFileLock; 1711 }else if( eFileLock==EXCLUSIVE_LOCK ){ 1712 pFile->eFileLock = PENDING_LOCK; 1713 pInode->eFileLock = PENDING_LOCK; 1714 } 1715 1716 end_lock: 1717 unixLeaveMutex(); 1718 OSTRACE(("LOCK %d %s %s (unix)\n", pFile->h, azFileLock(eFileLock), 1719 rc==SQLITE_OK ? "ok" : "failed")); 1720 return rc; 1721 } 1722 1723 /* 1724 ** Add the file descriptor used by file handle pFile to the corresponding 1725 ** pUnused list. 1726 */ 1727 static void setPendingFd(unixFile *pFile){ 1728 unixInodeInfo *pInode = pFile->pInode; 1729 UnixUnusedFd *p = pFile->pUnused; 1730 p->pNext = pInode->pUnused; 1731 pInode->pUnused = p; 1732 pFile->h = -1; 1733 pFile->pUnused = 0; 1734 } 1735 1736 /* 1737 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock 1738 ** must be either NO_LOCK or SHARED_LOCK. 1739 ** 1740 ** If the locking level of the file descriptor is already at or below 1741 ** the requested locking level, this routine is a no-op. 1742 ** 1743 ** If handleNFSUnlock is true, then on downgrading an EXCLUSIVE_LOCK to SHARED 1744 ** the byte range is divided into 2 parts and the first part is unlocked then 1745 ** set to a read lock, then the other part is simply unlocked. This works 1746 ** around a bug in BSD NFS lockd (also seen on MacOSX 10.3+) that fails to 1747 ** remove the write lock on a region when a read lock is set. 1748 */ 1749 static int posixUnlock(sqlite3_file *id, int eFileLock, int handleNFSUnlock){ 1750 unixFile *pFile = (unixFile*)id; 1751 unixInodeInfo *pInode; 1752 struct flock lock; 1753 int rc = SQLITE_OK; 1754 1755 assert( pFile ); 1756 OSTRACE(("UNLOCK %d %d was %d(%d,%d) pid=%d (unix)\n", pFile->h, eFileLock, 1757 pFile->eFileLock, pFile->pInode->eFileLock, pFile->pInode->nShared, 1758 osGetpid(0))); 1759 1760 assert( eFileLock<=SHARED_LOCK ); 1761 if( pFile->eFileLock<=eFileLock ){ 1762 return SQLITE_OK; 1763 } 1764 unixEnterMutex(); 1765 pInode = pFile->pInode; 1766 assert( pInode->nShared!=0 ); 1767 if( pFile->eFileLock>SHARED_LOCK ){ 1768 assert( pInode->eFileLock==pFile->eFileLock ); 1769 1770 #ifdef SQLITE_DEBUG 1771 /* When reducing a lock such that other processes can start 1772 ** reading the database file again, make sure that the 1773 ** transaction counter was updated if any part of the database 1774 ** file changed. If the transaction counter is not updated, 1775 ** other connections to the same file might not realize that 1776 ** the file has changed and hence might not know to flush their 1777 ** cache. The use of a stale cache can lead to database corruption. 1778 */ 1779 pFile->inNormalWrite = 0; 1780 #endif 1781 1782 /* downgrading to a shared lock on NFS involves clearing the write lock 1783 ** before establishing the readlock - to avoid a race condition we downgrade 1784 ** the lock in 2 blocks, so that part of the range will be covered by a 1785 ** write lock until the rest is covered by a read lock: 1786 ** 1: [WWWWW] 1787 ** 2: [....W] 1788 ** 3: [RRRRW] 1789 ** 4: [RRRR.] 1790 */ 1791 if( eFileLock==SHARED_LOCK ){ 1792 #if !defined(__APPLE__) || !SQLITE_ENABLE_LOCKING_STYLE 1793 (void)handleNFSUnlock; 1794 assert( handleNFSUnlock==0 ); 1795 #endif 1796 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 1797 if( handleNFSUnlock ){ 1798 int tErrno; /* Error code from system call errors */ 1799 off_t divSize = SHARED_SIZE - 1; 1800 1801 lock.l_type = F_UNLCK; 1802 lock.l_whence = SEEK_SET; 1803 lock.l_start = SHARED_FIRST; 1804 lock.l_len = divSize; 1805 if( unixFileLock(pFile, &lock)==(-1) ){ 1806 tErrno = errno; 1807 rc = SQLITE_IOERR_UNLOCK; 1808 storeLastErrno(pFile, tErrno); 1809 goto end_unlock; 1810 } 1811 lock.l_type = F_RDLCK; 1812 lock.l_whence = SEEK_SET; 1813 lock.l_start = SHARED_FIRST; 1814 lock.l_len = divSize; 1815 if( unixFileLock(pFile, &lock)==(-1) ){ 1816 tErrno = errno; 1817 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_RDLOCK); 1818 if( IS_LOCK_ERROR(rc) ){ 1819 storeLastErrno(pFile, tErrno); 1820 } 1821 goto end_unlock; 1822 } 1823 lock.l_type = F_UNLCK; 1824 lock.l_whence = SEEK_SET; 1825 lock.l_start = SHARED_FIRST+divSize; 1826 lock.l_len = SHARED_SIZE-divSize; 1827 if( unixFileLock(pFile, &lock)==(-1) ){ 1828 tErrno = errno; 1829 rc = SQLITE_IOERR_UNLOCK; 1830 storeLastErrno(pFile, tErrno); 1831 goto end_unlock; 1832 } 1833 }else 1834 #endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ 1835 { 1836 lock.l_type = F_RDLCK; 1837 lock.l_whence = SEEK_SET; 1838 lock.l_start = SHARED_FIRST; 1839 lock.l_len = SHARED_SIZE; 1840 if( unixFileLock(pFile, &lock) ){ 1841 /* In theory, the call to unixFileLock() cannot fail because another 1842 ** process is holding an incompatible lock. If it does, this 1843 ** indicates that the other process is not following the locking 1844 ** protocol. If this happens, return SQLITE_IOERR_RDLOCK. Returning 1845 ** SQLITE_BUSY would confuse the upper layer (in practice it causes 1846 ** an assert to fail). */ 1847 rc = SQLITE_IOERR_RDLOCK; 1848 storeLastErrno(pFile, errno); 1849 goto end_unlock; 1850 } 1851 } 1852 } 1853 lock.l_type = F_UNLCK; 1854 lock.l_whence = SEEK_SET; 1855 lock.l_start = PENDING_BYTE; 1856 lock.l_len = 2L; assert( PENDING_BYTE+1==RESERVED_BYTE ); 1857 if( unixFileLock(pFile, &lock)==0 ){ 1858 pInode->eFileLock = SHARED_LOCK; 1859 }else{ 1860 rc = SQLITE_IOERR_UNLOCK; 1861 storeLastErrno(pFile, errno); 1862 goto end_unlock; 1863 } 1864 } 1865 if( eFileLock==NO_LOCK ){ 1866 /* Decrement the shared lock counter. Release the lock using an 1867 ** OS call only when all threads in this same process have released 1868 ** the lock. 1869 */ 1870 pInode->nShared--; 1871 if( pInode->nShared==0 ){ 1872 lock.l_type = F_UNLCK; 1873 lock.l_whence = SEEK_SET; 1874 lock.l_start = lock.l_len = 0L; 1875 if( unixFileLock(pFile, &lock)==0 ){ 1876 pInode->eFileLock = NO_LOCK; 1877 }else{ 1878 rc = SQLITE_IOERR_UNLOCK; 1879 storeLastErrno(pFile, errno); 1880 pInode->eFileLock = NO_LOCK; 1881 pFile->eFileLock = NO_LOCK; 1882 } 1883 } 1884 1885 /* Decrement the count of locks against this same file. When the 1886 ** count reaches zero, close any other file descriptors whose close 1887 ** was deferred because of outstanding locks. 1888 */ 1889 pInode->nLock--; 1890 assert( pInode->nLock>=0 ); 1891 if( pInode->nLock==0 ){ 1892 closePendingFds(pFile); 1893 } 1894 } 1895 1896 end_unlock: 1897 unixLeaveMutex(); 1898 if( rc==SQLITE_OK ) pFile->eFileLock = eFileLock; 1899 return rc; 1900 } 1901 1902 /* 1903 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock 1904 ** must be either NO_LOCK or SHARED_LOCK. 1905 ** 1906 ** If the locking level of the file descriptor is already at or below 1907 ** the requested locking level, this routine is a no-op. 1908 */ 1909 static int unixUnlock(sqlite3_file *id, int eFileLock){ 1910 #if SQLITE_MAX_MMAP_SIZE>0 1911 assert( eFileLock==SHARED_LOCK || ((unixFile *)id)->nFetchOut==0 ); 1912 #endif 1913 return posixUnlock(id, eFileLock, 0); 1914 } 1915 1916 #if SQLITE_MAX_MMAP_SIZE>0 1917 static int unixMapfile(unixFile *pFd, i64 nByte); 1918 static void unixUnmapfile(unixFile *pFd); 1919 #endif 1920 1921 /* 1922 ** This function performs the parts of the "close file" operation 1923 ** common to all locking schemes. It closes the directory and file 1924 ** handles, if they are valid, and sets all fields of the unixFile 1925 ** structure to 0. 1926 ** 1927 ** It is *not* necessary to hold the mutex when this routine is called, 1928 ** even on VxWorks. A mutex will be acquired on VxWorks by the 1929 ** vxworksReleaseFileId() routine. 1930 */ 1931 static int closeUnixFile(sqlite3_file *id){ 1932 unixFile *pFile = (unixFile*)id; 1933 #if SQLITE_MAX_MMAP_SIZE>0 1934 unixUnmapfile(pFile); 1935 #endif 1936 if( pFile->h>=0 ){ 1937 robust_close(pFile, pFile->h, __LINE__); 1938 pFile->h = -1; 1939 } 1940 #if OS_VXWORKS 1941 if( pFile->pId ){ 1942 if( pFile->ctrlFlags & UNIXFILE_DELETE ){ 1943 osUnlink(pFile->pId->zCanonicalName); 1944 } 1945 vxworksReleaseFileId(pFile->pId); 1946 pFile->pId = 0; 1947 } 1948 #endif 1949 #ifdef SQLITE_UNLINK_AFTER_CLOSE 1950 if( pFile->ctrlFlags & UNIXFILE_DELETE ){ 1951 osUnlink(pFile->zPath); 1952 sqlite3_free(*(char**)&pFile->zPath); 1953 pFile->zPath = 0; 1954 } 1955 #endif 1956 OSTRACE(("CLOSE %-3d\n", pFile->h)); 1957 OpenCounter(-1); 1958 sqlite3_free(pFile->pUnused); 1959 memset(pFile, 0, sizeof(unixFile)); 1960 return SQLITE_OK; 1961 } 1962 1963 /* 1964 ** Close a file. 1965 */ 1966 static int unixClose(sqlite3_file *id){ 1967 int rc = SQLITE_OK; 1968 unixFile *pFile = (unixFile *)id; 1969 verifyDbFile(pFile); 1970 unixUnlock(id, NO_LOCK); 1971 unixEnterMutex(); 1972 1973 /* unixFile.pInode is always valid here. Otherwise, a different close 1974 ** routine (e.g. nolockClose()) would be called instead. 1975 */ 1976 assert( pFile->pInode->nLock>0 || pFile->pInode->bProcessLock==0 ); 1977 if( ALWAYS(pFile->pInode) && pFile->pInode->nLock ){ 1978 /* If there are outstanding locks, do not actually close the file just 1979 ** yet because that would clear those locks. Instead, add the file 1980 ** descriptor to pInode->pUnused list. It will be automatically closed 1981 ** when the last lock is cleared. 1982 */ 1983 setPendingFd(pFile); 1984 } 1985 releaseInodeInfo(pFile); 1986 rc = closeUnixFile(id); 1987 unixLeaveMutex(); 1988 return rc; 1989 } 1990 1991 /************** End of the posix advisory lock implementation ***************** 1992 ******************************************************************************/ 1993 1994 /****************************************************************************** 1995 ****************************** No-op Locking ********************************** 1996 ** 1997 ** Of the various locking implementations available, this is by far the 1998 ** simplest: locking is ignored. No attempt is made to lock the database 1999 ** file for reading or writing. 2000 ** 2001 ** This locking mode is appropriate for use on read-only databases 2002 ** (ex: databases that are burned into CD-ROM, for example.) It can 2003 ** also be used if the application employs some external mechanism to 2004 ** prevent simultaneous access of the same database by two or more 2005 ** database connections. But there is a serious risk of database 2006 ** corruption if this locking mode is used in situations where multiple 2007 ** database connections are accessing the same database file at the same 2008 ** time and one or more of those connections are writing. 2009 */ 2010 2011 static int nolockCheckReservedLock(sqlite3_file *NotUsed, int *pResOut){ 2012 UNUSED_PARAMETER(NotUsed); 2013 *pResOut = 0; 2014 return SQLITE_OK; 2015 } 2016 static int nolockLock(sqlite3_file *NotUsed, int NotUsed2){ 2017 UNUSED_PARAMETER2(NotUsed, NotUsed2); 2018 return SQLITE_OK; 2019 } 2020 static int nolockUnlock(sqlite3_file *NotUsed, int NotUsed2){ 2021 UNUSED_PARAMETER2(NotUsed, NotUsed2); 2022 return SQLITE_OK; 2023 } 2024 2025 /* 2026 ** Close the file. 2027 */ 2028 static int nolockClose(sqlite3_file *id) { 2029 return closeUnixFile(id); 2030 } 2031 2032 /******************* End of the no-op lock implementation ********************* 2033 ******************************************************************************/ 2034 2035 /****************************************************************************** 2036 ************************* Begin dot-file Locking ****************************** 2037 ** 2038 ** The dotfile locking implementation uses the existence of separate lock 2039 ** files (really a directory) to control access to the database. This works 2040 ** on just about every filesystem imaginable. But there are serious downsides: 2041 ** 2042 ** (1) There is zero concurrency. A single reader blocks all other 2043 ** connections from reading or writing the database. 2044 ** 2045 ** (2) An application crash or power loss can leave stale lock files 2046 ** sitting around that need to be cleared manually. 2047 ** 2048 ** Nevertheless, a dotlock is an appropriate locking mode for use if no 2049 ** other locking strategy is available. 2050 ** 2051 ** Dotfile locking works by creating a subdirectory in the same directory as 2052 ** the database and with the same name but with a ".lock" extension added. 2053 ** The existence of a lock directory implies an EXCLUSIVE lock. All other 2054 ** lock types (SHARED, RESERVED, PENDING) are mapped into EXCLUSIVE. 2055 */ 2056 2057 /* 2058 ** The file suffix added to the data base filename in order to create the 2059 ** lock directory. 2060 */ 2061 #define DOTLOCK_SUFFIX ".lock" 2062 2063 /* 2064 ** This routine checks if there is a RESERVED lock held on the specified 2065 ** file by this or any other process. If such a lock is held, set *pResOut 2066 ** to a non-zero value otherwise *pResOut is set to zero. The return value 2067 ** is set to SQLITE_OK unless an I/O error occurs during lock checking. 2068 ** 2069 ** In dotfile locking, either a lock exists or it does not. So in this 2070 ** variation of CheckReservedLock(), *pResOut is set to true if any lock 2071 ** is held on the file and false if the file is unlocked. 2072 */ 2073 static int dotlockCheckReservedLock(sqlite3_file *id, int *pResOut) { 2074 int rc = SQLITE_OK; 2075 int reserved = 0; 2076 unixFile *pFile = (unixFile*)id; 2077 2078 SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); 2079 2080 assert( pFile ); 2081 reserved = osAccess((const char*)pFile->lockingContext, 0)==0; 2082 OSTRACE(("TEST WR-LOCK %d %d %d (dotlock)\n", pFile->h, rc, reserved)); 2083 *pResOut = reserved; 2084 return rc; 2085 } 2086 2087 /* 2088 ** Lock the file with the lock specified by parameter eFileLock - one 2089 ** of the following: 2090 ** 2091 ** (1) SHARED_LOCK 2092 ** (2) RESERVED_LOCK 2093 ** (3) PENDING_LOCK 2094 ** (4) EXCLUSIVE_LOCK 2095 ** 2096 ** Sometimes when requesting one lock state, additional lock states 2097 ** are inserted in between. The locking might fail on one of the later 2098 ** transitions leaving the lock state different from what it started but 2099 ** still short of its goal. The following chart shows the allowed 2100 ** transitions and the inserted intermediate states: 2101 ** 2102 ** UNLOCKED -> SHARED 2103 ** SHARED -> RESERVED 2104 ** SHARED -> (PENDING) -> EXCLUSIVE 2105 ** RESERVED -> (PENDING) -> EXCLUSIVE 2106 ** PENDING -> EXCLUSIVE 2107 ** 2108 ** This routine will only increase a lock. Use the sqlite3OsUnlock() 2109 ** routine to lower a locking level. 2110 ** 2111 ** With dotfile locking, we really only support state (4): EXCLUSIVE. 2112 ** But we track the other locking levels internally. 2113 */ 2114 static int dotlockLock(sqlite3_file *id, int eFileLock) { 2115 unixFile *pFile = (unixFile*)id; 2116 char *zLockFile = (char *)pFile->lockingContext; 2117 int rc = SQLITE_OK; 2118 2119 2120 /* If we have any lock, then the lock file already exists. All we have 2121 ** to do is adjust our internal record of the lock level. 2122 */ 2123 if( pFile->eFileLock > NO_LOCK ){ 2124 pFile->eFileLock = eFileLock; 2125 /* Always update the timestamp on the old file */ 2126 #ifdef HAVE_UTIME 2127 utime(zLockFile, NULL); 2128 #else 2129 utimes(zLockFile, NULL); 2130 #endif 2131 return SQLITE_OK; 2132 } 2133 2134 /* grab an exclusive lock */ 2135 rc = osMkdir(zLockFile, 0777); 2136 if( rc<0 ){ 2137 /* failed to open/create the lock directory */ 2138 int tErrno = errno; 2139 if( EEXIST == tErrno ){ 2140 rc = SQLITE_BUSY; 2141 } else { 2142 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); 2143 if( rc!=SQLITE_BUSY ){ 2144 storeLastErrno(pFile, tErrno); 2145 } 2146 } 2147 return rc; 2148 } 2149 2150 /* got it, set the type and return ok */ 2151 pFile->eFileLock = eFileLock; 2152 return rc; 2153 } 2154 2155 /* 2156 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock 2157 ** must be either NO_LOCK or SHARED_LOCK. 2158 ** 2159 ** If the locking level of the file descriptor is already at or below 2160 ** the requested locking level, this routine is a no-op. 2161 ** 2162 ** When the locking level reaches NO_LOCK, delete the lock file. 2163 */ 2164 static int dotlockUnlock(sqlite3_file *id, int eFileLock) { 2165 unixFile *pFile = (unixFile*)id; 2166 char *zLockFile = (char *)pFile->lockingContext; 2167 int rc; 2168 2169 assert( pFile ); 2170 OSTRACE(("UNLOCK %d %d was %d pid=%d (dotlock)\n", pFile->h, eFileLock, 2171 pFile->eFileLock, osGetpid(0))); 2172 assert( eFileLock<=SHARED_LOCK ); 2173 2174 /* no-op if possible */ 2175 if( pFile->eFileLock==eFileLock ){ 2176 return SQLITE_OK; 2177 } 2178 2179 /* To downgrade to shared, simply update our internal notion of the 2180 ** lock state. No need to mess with the file on disk. 2181 */ 2182 if( eFileLock==SHARED_LOCK ){ 2183 pFile->eFileLock = SHARED_LOCK; 2184 return SQLITE_OK; 2185 } 2186 2187 /* To fully unlock the database, delete the lock file */ 2188 assert( eFileLock==NO_LOCK ); 2189 rc = osRmdir(zLockFile); 2190 if( rc<0 ){ 2191 int tErrno = errno; 2192 if( tErrno==ENOENT ){ 2193 rc = SQLITE_OK; 2194 }else{ 2195 rc = SQLITE_IOERR_UNLOCK; 2196 storeLastErrno(pFile, tErrno); 2197 } 2198 return rc; 2199 } 2200 pFile->eFileLock = NO_LOCK; 2201 return SQLITE_OK; 2202 } 2203 2204 /* 2205 ** Close a file. Make sure the lock has been released before closing. 2206 */ 2207 static int dotlockClose(sqlite3_file *id) { 2208 unixFile *pFile = (unixFile*)id; 2209 assert( id!=0 ); 2210 dotlockUnlock(id, NO_LOCK); 2211 sqlite3_free(pFile->lockingContext); 2212 return closeUnixFile(id); 2213 } 2214 /****************** End of the dot-file lock implementation ******************* 2215 ******************************************************************************/ 2216 2217 /****************************************************************************** 2218 ************************** Begin flock Locking ******************************** 2219 ** 2220 ** Use the flock() system call to do file locking. 2221 ** 2222 ** flock() locking is like dot-file locking in that the various 2223 ** fine-grain locking levels supported by SQLite are collapsed into 2224 ** a single exclusive lock. In other words, SHARED, RESERVED, and 2225 ** PENDING locks are the same thing as an EXCLUSIVE lock. SQLite 2226 ** still works when you do this, but concurrency is reduced since 2227 ** only a single process can be reading the database at a time. 2228 ** 2229 ** Omit this section if SQLITE_ENABLE_LOCKING_STYLE is turned off 2230 */ 2231 #if SQLITE_ENABLE_LOCKING_STYLE 2232 2233 /* 2234 ** Retry flock() calls that fail with EINTR 2235 */ 2236 #ifdef EINTR 2237 static int robust_flock(int fd, int op){ 2238 int rc; 2239 do{ rc = flock(fd,op); }while( rc<0 && errno==EINTR ); 2240 return rc; 2241 } 2242 #else 2243 # define robust_flock(a,b) flock(a,b) 2244 #endif 2245 2246 2247 /* 2248 ** This routine checks if there is a RESERVED lock held on the specified 2249 ** file by this or any other process. If such a lock is held, set *pResOut 2250 ** to a non-zero value otherwise *pResOut is set to zero. The return value 2251 ** is set to SQLITE_OK unless an I/O error occurs during lock checking. 2252 */ 2253 static int flockCheckReservedLock(sqlite3_file *id, int *pResOut){ 2254 int rc = SQLITE_OK; 2255 int reserved = 0; 2256 unixFile *pFile = (unixFile*)id; 2257 2258 SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); 2259 2260 assert( pFile ); 2261 2262 /* Check if a thread in this process holds such a lock */ 2263 if( pFile->eFileLock>SHARED_LOCK ){ 2264 reserved = 1; 2265 } 2266 2267 /* Otherwise see if some other process holds it. */ 2268 if( !reserved ){ 2269 /* attempt to get the lock */ 2270 int lrc = robust_flock(pFile->h, LOCK_EX | LOCK_NB); 2271 if( !lrc ){ 2272 /* got the lock, unlock it */ 2273 lrc = robust_flock(pFile->h, LOCK_UN); 2274 if ( lrc ) { 2275 int tErrno = errno; 2276 /* unlock failed with an error */ 2277 lrc = SQLITE_IOERR_UNLOCK; 2278 storeLastErrno(pFile, tErrno); 2279 rc = lrc; 2280 } 2281 } else { 2282 int tErrno = errno; 2283 reserved = 1; 2284 /* someone else might have it reserved */ 2285 lrc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); 2286 if( IS_LOCK_ERROR(lrc) ){ 2287 storeLastErrno(pFile, tErrno); 2288 rc = lrc; 2289 } 2290 } 2291 } 2292 OSTRACE(("TEST WR-LOCK %d %d %d (flock)\n", pFile->h, rc, reserved)); 2293 2294 #ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS 2295 if( (rc & SQLITE_IOERR) == SQLITE_IOERR ){ 2296 rc = SQLITE_OK; 2297 reserved=1; 2298 } 2299 #endif /* SQLITE_IGNORE_FLOCK_LOCK_ERRORS */ 2300 *pResOut = reserved; 2301 return rc; 2302 } 2303 2304 /* 2305 ** Lock the file with the lock specified by parameter eFileLock - one 2306 ** of the following: 2307 ** 2308 ** (1) SHARED_LOCK 2309 ** (2) RESERVED_LOCK 2310 ** (3) PENDING_LOCK 2311 ** (4) EXCLUSIVE_LOCK 2312 ** 2313 ** Sometimes when requesting one lock state, additional lock states 2314 ** are inserted in between. The locking might fail on one of the later 2315 ** transitions leaving the lock state different from what it started but 2316 ** still short of its goal. The following chart shows the allowed 2317 ** transitions and the inserted intermediate states: 2318 ** 2319 ** UNLOCKED -> SHARED 2320 ** SHARED -> RESERVED 2321 ** SHARED -> (PENDING) -> EXCLUSIVE 2322 ** RESERVED -> (PENDING) -> EXCLUSIVE 2323 ** PENDING -> EXCLUSIVE 2324 ** 2325 ** flock() only really support EXCLUSIVE locks. We track intermediate 2326 ** lock states in the sqlite3_file structure, but all locks SHARED or 2327 ** above are really EXCLUSIVE locks and exclude all other processes from 2328 ** access the file. 2329 ** 2330 ** This routine will only increase a lock. Use the sqlite3OsUnlock() 2331 ** routine to lower a locking level. 2332 */ 2333 static int flockLock(sqlite3_file *id, int eFileLock) { 2334 int rc = SQLITE_OK; 2335 unixFile *pFile = (unixFile*)id; 2336 2337 assert( pFile ); 2338 2339 /* if we already have a lock, it is exclusive. 2340 ** Just adjust level and punt on outta here. */ 2341 if (pFile->eFileLock > NO_LOCK) { 2342 pFile->eFileLock = eFileLock; 2343 return SQLITE_OK; 2344 } 2345 2346 /* grab an exclusive lock */ 2347 2348 if (robust_flock(pFile->h, LOCK_EX | LOCK_NB)) { 2349 int tErrno = errno; 2350 /* didn't get, must be busy */ 2351 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); 2352 if( IS_LOCK_ERROR(rc) ){ 2353 storeLastErrno(pFile, tErrno); 2354 } 2355 } else { 2356 /* got it, set the type and return ok */ 2357 pFile->eFileLock = eFileLock; 2358 } 2359 OSTRACE(("LOCK %d %s %s (flock)\n", pFile->h, azFileLock(eFileLock), 2360 rc==SQLITE_OK ? "ok" : "failed")); 2361 #ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS 2362 if( (rc & SQLITE_IOERR) == SQLITE_IOERR ){ 2363 rc = SQLITE_BUSY; 2364 } 2365 #endif /* SQLITE_IGNORE_FLOCK_LOCK_ERRORS */ 2366 return rc; 2367 } 2368 2369 2370 /* 2371 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock 2372 ** must be either NO_LOCK or SHARED_LOCK. 2373 ** 2374 ** If the locking level of the file descriptor is already at or below 2375 ** the requested locking level, this routine is a no-op. 2376 */ 2377 static int flockUnlock(sqlite3_file *id, int eFileLock) { 2378 unixFile *pFile = (unixFile*)id; 2379 2380 assert( pFile ); 2381 OSTRACE(("UNLOCK %d %d was %d pid=%d (flock)\n", pFile->h, eFileLock, 2382 pFile->eFileLock, osGetpid(0))); 2383 assert( eFileLock<=SHARED_LOCK ); 2384 2385 /* no-op if possible */ 2386 if( pFile->eFileLock==eFileLock ){ 2387 return SQLITE_OK; 2388 } 2389 2390 /* shared can just be set because we always have an exclusive */ 2391 if (eFileLock==SHARED_LOCK) { 2392 pFile->eFileLock = eFileLock; 2393 return SQLITE_OK; 2394 } 2395 2396 /* no, really, unlock. */ 2397 if( robust_flock(pFile->h, LOCK_UN) ){ 2398 #ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS 2399 return SQLITE_OK; 2400 #endif /* SQLITE_IGNORE_FLOCK_LOCK_ERRORS */ 2401 return SQLITE_IOERR_UNLOCK; 2402 }else{ 2403 pFile->eFileLock = NO_LOCK; 2404 return SQLITE_OK; 2405 } 2406 } 2407 2408 /* 2409 ** Close a file. 2410 */ 2411 static int flockClose(sqlite3_file *id) { 2412 assert( id!=0 ); 2413 flockUnlock(id, NO_LOCK); 2414 return closeUnixFile(id); 2415 } 2416 2417 #endif /* SQLITE_ENABLE_LOCKING_STYLE && !OS_VXWORK */ 2418 2419 /******************* End of the flock lock implementation ********************* 2420 ******************************************************************************/ 2421 2422 /****************************************************************************** 2423 ************************ Begin Named Semaphore Locking ************************ 2424 ** 2425 ** Named semaphore locking is only supported on VxWorks. 2426 ** 2427 ** Semaphore locking is like dot-lock and flock in that it really only 2428 ** supports EXCLUSIVE locking. Only a single process can read or write 2429 ** the database file at a time. This reduces potential concurrency, but 2430 ** makes the lock implementation much easier. 2431 */ 2432 #if OS_VXWORKS 2433 2434 /* 2435 ** This routine checks if there is a RESERVED lock held on the specified 2436 ** file by this or any other process. If such a lock is held, set *pResOut 2437 ** to a non-zero value otherwise *pResOut is set to zero. The return value 2438 ** is set to SQLITE_OK unless an I/O error occurs during lock checking. 2439 */ 2440 static int semXCheckReservedLock(sqlite3_file *id, int *pResOut) { 2441 int rc = SQLITE_OK; 2442 int reserved = 0; 2443 unixFile *pFile = (unixFile*)id; 2444 2445 SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); 2446 2447 assert( pFile ); 2448 2449 /* Check if a thread in this process holds such a lock */ 2450 if( pFile->eFileLock>SHARED_LOCK ){ 2451 reserved = 1; 2452 } 2453 2454 /* Otherwise see if some other process holds it. */ 2455 if( !reserved ){ 2456 sem_t *pSem = pFile->pInode->pSem; 2457 2458 if( sem_trywait(pSem)==-1 ){ 2459 int tErrno = errno; 2460 if( EAGAIN != tErrno ){ 2461 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_CHECKRESERVEDLOCK); 2462 storeLastErrno(pFile, tErrno); 2463 } else { 2464 /* someone else has the lock when we are in NO_LOCK */ 2465 reserved = (pFile->eFileLock < SHARED_LOCK); 2466 } 2467 }else{ 2468 /* we could have it if we want it */ 2469 sem_post(pSem); 2470 } 2471 } 2472 OSTRACE(("TEST WR-LOCK %d %d %d (sem)\n", pFile->h, rc, reserved)); 2473 2474 *pResOut = reserved; 2475 return rc; 2476 } 2477 2478 /* 2479 ** Lock the file with the lock specified by parameter eFileLock - one 2480 ** of the following: 2481 ** 2482 ** (1) SHARED_LOCK 2483 ** (2) RESERVED_LOCK 2484 ** (3) PENDING_LOCK 2485 ** (4) EXCLUSIVE_LOCK 2486 ** 2487 ** Sometimes when requesting one lock state, additional lock states 2488 ** are inserted in between. The locking might fail on one of the later 2489 ** transitions leaving the lock state different from what it started but 2490 ** still short of its goal. The following chart shows the allowed 2491 ** transitions and the inserted intermediate states: 2492 ** 2493 ** UNLOCKED -> SHARED 2494 ** SHARED -> RESERVED 2495 ** SHARED -> (PENDING) -> EXCLUSIVE 2496 ** RESERVED -> (PENDING) -> EXCLUSIVE 2497 ** PENDING -> EXCLUSIVE 2498 ** 2499 ** Semaphore locks only really support EXCLUSIVE locks. We track intermediate 2500 ** lock states in the sqlite3_file structure, but all locks SHARED or 2501 ** above are really EXCLUSIVE locks and exclude all other processes from 2502 ** access the file. 2503 ** 2504 ** This routine will only increase a lock. Use the sqlite3OsUnlock() 2505 ** routine to lower a locking level. 2506 */ 2507 static int semXLock(sqlite3_file *id, int eFileLock) { 2508 unixFile *pFile = (unixFile*)id; 2509 sem_t *pSem = pFile->pInode->pSem; 2510 int rc = SQLITE_OK; 2511 2512 /* if we already have a lock, it is exclusive. 2513 ** Just adjust level and punt on outta here. */ 2514 if (pFile->eFileLock > NO_LOCK) { 2515 pFile->eFileLock = eFileLock; 2516 rc = SQLITE_OK; 2517 goto sem_end_lock; 2518 } 2519 2520 /* lock semaphore now but bail out when already locked. */ 2521 if( sem_trywait(pSem)==-1 ){ 2522 rc = SQLITE_BUSY; 2523 goto sem_end_lock; 2524 } 2525 2526 /* got it, set the type and return ok */ 2527 pFile->eFileLock = eFileLock; 2528 2529 sem_end_lock: 2530 return rc; 2531 } 2532 2533 /* 2534 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock 2535 ** must be either NO_LOCK or SHARED_LOCK. 2536 ** 2537 ** If the locking level of the file descriptor is already at or below 2538 ** the requested locking level, this routine is a no-op. 2539 */ 2540 static int semXUnlock(sqlite3_file *id, int eFileLock) { 2541 unixFile *pFile = (unixFile*)id; 2542 sem_t *pSem = pFile->pInode->pSem; 2543 2544 assert( pFile ); 2545 assert( pSem ); 2546 OSTRACE(("UNLOCK %d %d was %d pid=%d (sem)\n", pFile->h, eFileLock, 2547 pFile->eFileLock, osGetpid(0))); 2548 assert( eFileLock<=SHARED_LOCK ); 2549 2550 /* no-op if possible */ 2551 if( pFile->eFileLock==eFileLock ){ 2552 return SQLITE_OK; 2553 } 2554 2555 /* shared can just be set because we always have an exclusive */ 2556 if (eFileLock==SHARED_LOCK) { 2557 pFile->eFileLock = eFileLock; 2558 return SQLITE_OK; 2559 } 2560 2561 /* no, really unlock. */ 2562 if ( sem_post(pSem)==-1 ) { 2563 int rc, tErrno = errno; 2564 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK); 2565 if( IS_LOCK_ERROR(rc) ){ 2566 storeLastErrno(pFile, tErrno); 2567 } 2568 return rc; 2569 } 2570 pFile->eFileLock = NO_LOCK; 2571 return SQLITE_OK; 2572 } 2573 2574 /* 2575 ** Close a file. 2576 */ 2577 static int semXClose(sqlite3_file *id) { 2578 if( id ){ 2579 unixFile *pFile = (unixFile*)id; 2580 semXUnlock(id, NO_LOCK); 2581 assert( pFile ); 2582 unixEnterMutex(); 2583 releaseInodeInfo(pFile); 2584 unixLeaveMutex(); 2585 closeUnixFile(id); 2586 } 2587 return SQLITE_OK; 2588 } 2589 2590 #endif /* OS_VXWORKS */ 2591 /* 2592 ** Named semaphore locking is only available on VxWorks. 2593 ** 2594 *************** End of the named semaphore lock implementation **************** 2595 ******************************************************************************/ 2596 2597 2598 /****************************************************************************** 2599 *************************** Begin AFP Locking ********************************* 2600 ** 2601 ** AFP is the Apple Filing Protocol. AFP is a network filesystem found 2602 ** on Apple Macintosh computers - both OS9 and OSX. 2603 ** 2604 ** Third-party implementations of AFP are available. But this code here 2605 ** only works on OSX. 2606 */ 2607 2608 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 2609 /* 2610 ** The afpLockingContext structure contains all afp lock specific state 2611 */ 2612 typedef struct afpLockingContext afpLockingContext; 2613 struct afpLockingContext { 2614 int reserved; 2615 const char *dbPath; /* Name of the open file */ 2616 }; 2617 2618 struct ByteRangeLockPB2 2619 { 2620 unsigned long long offset; /* offset to first byte to lock */ 2621 unsigned long long length; /* nbr of bytes to lock */ 2622 unsigned long long retRangeStart; /* nbr of 1st byte locked if successful */ 2623 unsigned char unLockFlag; /* 1 = unlock, 0 = lock */ 2624 unsigned char startEndFlag; /* 1=rel to end of fork, 0=rel to start */ 2625 int fd; /* file desc to assoc this lock with */ 2626 }; 2627 2628 #define afpfsByteRangeLock2FSCTL _IOWR('z', 23, struct ByteRangeLockPB2) 2629 2630 /* 2631 ** This is a utility for setting or clearing a bit-range lock on an 2632 ** AFP filesystem. 2633 ** 2634 ** Return SQLITE_OK on success, SQLITE_BUSY on failure. 2635 */ 2636 static int afpSetLock( 2637 const char *path, /* Name of the file to be locked or unlocked */ 2638 unixFile *pFile, /* Open file descriptor on path */ 2639 unsigned long long offset, /* First byte to be locked */ 2640 unsigned long long length, /* Number of bytes to lock */ 2641 int setLockFlag /* True to set lock. False to clear lock */ 2642 ){ 2643 struct ByteRangeLockPB2 pb; 2644 int err; 2645 2646 pb.unLockFlag = setLockFlag ? 0 : 1; 2647 pb.startEndFlag = 0; 2648 pb.offset = offset; 2649 pb.length = length; 2650 pb.fd = pFile->h; 2651 2652 OSTRACE(("AFPSETLOCK [%s] for %d%s in range %llx:%llx\n", 2653 (setLockFlag?"ON":"OFF"), pFile->h, (pb.fd==-1?"[testval-1]":""), 2654 offset, length)); 2655 err = fsctl(path, afpfsByteRangeLock2FSCTL, &pb, 0); 2656 if ( err==-1 ) { 2657 int rc; 2658 int tErrno = errno; 2659 OSTRACE(("AFPSETLOCK failed to fsctl() '%s' %d %s\n", 2660 path, tErrno, strerror(tErrno))); 2661 #ifdef SQLITE_IGNORE_AFP_LOCK_ERRORS 2662 rc = SQLITE_BUSY; 2663 #else 2664 rc = sqliteErrorFromPosixError(tErrno, 2665 setLockFlag ? SQLITE_IOERR_LOCK : SQLITE_IOERR_UNLOCK); 2666 #endif /* SQLITE_IGNORE_AFP_LOCK_ERRORS */ 2667 if( IS_LOCK_ERROR(rc) ){ 2668 storeLastErrno(pFile, tErrno); 2669 } 2670 return rc; 2671 } else { 2672 return SQLITE_OK; 2673 } 2674 } 2675 2676 /* 2677 ** This routine checks if there is a RESERVED lock held on the specified 2678 ** file by this or any other process. If such a lock is held, set *pResOut 2679 ** to a non-zero value otherwise *pResOut is set to zero. The return value 2680 ** is set to SQLITE_OK unless an I/O error occurs during lock checking. 2681 */ 2682 static int afpCheckReservedLock(sqlite3_file *id, int *pResOut){ 2683 int rc = SQLITE_OK; 2684 int reserved = 0; 2685 unixFile *pFile = (unixFile*)id; 2686 afpLockingContext *context; 2687 2688 SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); 2689 2690 assert( pFile ); 2691 context = (afpLockingContext *) pFile->lockingContext; 2692 if( context->reserved ){ 2693 *pResOut = 1; 2694 return SQLITE_OK; 2695 } 2696 unixEnterMutex(); /* Because pFile->pInode is shared across threads */ 2697 2698 /* Check if a thread in this process holds such a lock */ 2699 if( pFile->pInode->eFileLock>SHARED_LOCK ){ 2700 reserved = 1; 2701 } 2702 2703 /* Otherwise see if some other process holds it. 2704 */ 2705 if( !reserved ){ 2706 /* lock the RESERVED byte */ 2707 int lrc = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1,1); 2708 if( SQLITE_OK==lrc ){ 2709 /* if we succeeded in taking the reserved lock, unlock it to restore 2710 ** the original state */ 2711 lrc = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1, 0); 2712 } else { 2713 /* if we failed to get the lock then someone else must have it */ 2714 reserved = 1; 2715 } 2716 if( IS_LOCK_ERROR(lrc) ){ 2717 rc=lrc; 2718 } 2719 } 2720 2721 unixLeaveMutex(); 2722 OSTRACE(("TEST WR-LOCK %d %d %d (afp)\n", pFile->h, rc, reserved)); 2723 2724 *pResOut = reserved; 2725 return rc; 2726 } 2727 2728 /* 2729 ** Lock the file with the lock specified by parameter eFileLock - one 2730 ** of the following: 2731 ** 2732 ** (1) SHARED_LOCK 2733 ** (2) RESERVED_LOCK 2734 ** (3) PENDING_LOCK 2735 ** (4) EXCLUSIVE_LOCK 2736 ** 2737 ** Sometimes when requesting one lock state, additional lock states 2738 ** are inserted in between. The locking might fail on one of the later 2739 ** transitions leaving the lock state different from what it started but 2740 ** still short of its goal. The following chart shows the allowed 2741 ** transitions and the inserted intermediate states: 2742 ** 2743 ** UNLOCKED -> SHARED 2744 ** SHARED -> RESERVED 2745 ** SHARED -> (PENDING) -> EXCLUSIVE 2746 ** RESERVED -> (PENDING) -> EXCLUSIVE 2747 ** PENDING -> EXCLUSIVE 2748 ** 2749 ** This routine will only increase a lock. Use the sqlite3OsUnlock() 2750 ** routine to lower a locking level. 2751 */ 2752 static int afpLock(sqlite3_file *id, int eFileLock){ 2753 int rc = SQLITE_OK; 2754 unixFile *pFile = (unixFile*)id; 2755 unixInodeInfo *pInode = pFile->pInode; 2756 afpLockingContext *context = (afpLockingContext *) pFile->lockingContext; 2757 2758 assert( pFile ); 2759 OSTRACE(("LOCK %d %s was %s(%s,%d) pid=%d (afp)\n", pFile->h, 2760 azFileLock(eFileLock), azFileLock(pFile->eFileLock), 2761 azFileLock(pInode->eFileLock), pInode->nShared , osGetpid(0))); 2762 2763 /* If there is already a lock of this type or more restrictive on the 2764 ** unixFile, do nothing. Don't use the afp_end_lock: exit path, as 2765 ** unixEnterMutex() hasn't been called yet. 2766 */ 2767 if( pFile->eFileLock>=eFileLock ){ 2768 OSTRACE(("LOCK %d %s ok (already held) (afp)\n", pFile->h, 2769 azFileLock(eFileLock))); 2770 return SQLITE_OK; 2771 } 2772 2773 /* Make sure the locking sequence is correct 2774 ** (1) We never move from unlocked to anything higher than shared lock. 2775 ** (2) SQLite never explicitly requests a pendig lock. 2776 ** (3) A shared lock is always held when a reserve lock is requested. 2777 */ 2778 assert( pFile->eFileLock!=NO_LOCK || eFileLock==SHARED_LOCK ); 2779 assert( eFileLock!=PENDING_LOCK ); 2780 assert( eFileLock!=RESERVED_LOCK || pFile->eFileLock==SHARED_LOCK ); 2781 2782 /* This mutex is needed because pFile->pInode is shared across threads 2783 */ 2784 unixEnterMutex(); 2785 pInode = pFile->pInode; 2786 2787 /* If some thread using this PID has a lock via a different unixFile* 2788 ** handle that precludes the requested lock, return BUSY. 2789 */ 2790 if( (pFile->eFileLock!=pInode->eFileLock && 2791 (pInode->eFileLock>=PENDING_LOCK || eFileLock>SHARED_LOCK)) 2792 ){ 2793 rc = SQLITE_BUSY; 2794 goto afp_end_lock; 2795 } 2796 2797 /* If a SHARED lock is requested, and some thread using this PID already 2798 ** has a SHARED or RESERVED lock, then increment reference counts and 2799 ** return SQLITE_OK. 2800 */ 2801 if( eFileLock==SHARED_LOCK && 2802 (pInode->eFileLock==SHARED_LOCK || pInode->eFileLock==RESERVED_LOCK) ){ 2803 assert( eFileLock==SHARED_LOCK ); 2804 assert( pFile->eFileLock==0 ); 2805 assert( pInode->nShared>0 ); 2806 pFile->eFileLock = SHARED_LOCK; 2807 pInode->nShared++; 2808 pInode->nLock++; 2809 goto afp_end_lock; 2810 } 2811 2812 /* A PENDING lock is needed before acquiring a SHARED lock and before 2813 ** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will 2814 ** be released. 2815 */ 2816 if( eFileLock==SHARED_LOCK 2817 || (eFileLock==EXCLUSIVE_LOCK && pFile->eFileLock<PENDING_LOCK) 2818 ){ 2819 int failed; 2820 failed = afpSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 1); 2821 if (failed) { 2822 rc = failed; 2823 goto afp_end_lock; 2824 } 2825 } 2826 2827 /* If control gets to this point, then actually go ahead and make 2828 ** operating system calls for the specified lock. 2829 */ 2830 if( eFileLock==SHARED_LOCK ){ 2831 int lrc1, lrc2, lrc1Errno = 0; 2832 long lk, mask; 2833 2834 assert( pInode->nShared==0 ); 2835 assert( pInode->eFileLock==0 ); 2836 2837 mask = (sizeof(long)==8) ? LARGEST_INT64 : 0x7fffffff; 2838 /* Now get the read-lock SHARED_LOCK */ 2839 /* note that the quality of the randomness doesn't matter that much */ 2840 lk = random(); 2841 pInode->sharedByte = (lk & mask)%(SHARED_SIZE - 1); 2842 lrc1 = afpSetLock(context->dbPath, pFile, 2843 SHARED_FIRST+pInode->sharedByte, 1, 1); 2844 if( IS_LOCK_ERROR(lrc1) ){ 2845 lrc1Errno = pFile->lastErrno; 2846 } 2847 /* Drop the temporary PENDING lock */ 2848 lrc2 = afpSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 0); 2849 2850 if( IS_LOCK_ERROR(lrc1) ) { 2851 storeLastErrno(pFile, lrc1Errno); 2852 rc = lrc1; 2853 goto afp_end_lock; 2854 } else if( IS_LOCK_ERROR(lrc2) ){ 2855 rc = lrc2; 2856 goto afp_end_lock; 2857 } else if( lrc1 != SQLITE_OK ) { 2858 rc = lrc1; 2859 } else { 2860 pFile->eFileLock = SHARED_LOCK; 2861 pInode->nLock++; 2862 pInode->nShared = 1; 2863 } 2864 }else if( eFileLock==EXCLUSIVE_LOCK && pInode->nShared>1 ){ 2865 /* We are trying for an exclusive lock but another thread in this 2866 ** same process is still holding a shared lock. */ 2867 rc = SQLITE_BUSY; 2868 }else{ 2869 /* The request was for a RESERVED or EXCLUSIVE lock. It is 2870 ** assumed that there is a SHARED or greater lock on the file 2871 ** already. 2872 */ 2873 int failed = 0; 2874 assert( 0!=pFile->eFileLock ); 2875 if (eFileLock >= RESERVED_LOCK && pFile->eFileLock < RESERVED_LOCK) { 2876 /* Acquire a RESERVED lock */ 2877 failed = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1,1); 2878 if( !failed ){ 2879 context->reserved = 1; 2880 } 2881 } 2882 if (!failed && eFileLock == EXCLUSIVE_LOCK) { 2883 /* Acquire an EXCLUSIVE lock */ 2884 2885 /* Remove the shared lock before trying the range. we'll need to 2886 ** reestablish the shared lock if we can't get the afpUnlock 2887 */ 2888 if( !(failed = afpSetLock(context->dbPath, pFile, SHARED_FIRST + 2889 pInode->sharedByte, 1, 0)) ){ 2890 int failed2 = SQLITE_OK; 2891 /* now attemmpt to get the exclusive lock range */ 2892 failed = afpSetLock(context->dbPath, pFile, SHARED_FIRST, 2893 SHARED_SIZE, 1); 2894 if( failed && (failed2 = afpSetLock(context->dbPath, pFile, 2895 SHARED_FIRST + pInode->sharedByte, 1, 1)) ){ 2896 /* Can't reestablish the shared lock. Sqlite can't deal, this is 2897 ** a critical I/O error 2898 */ 2899 rc = ((failed & SQLITE_IOERR) == SQLITE_IOERR) ? failed2 : 2900 SQLITE_IOERR_LOCK; 2901 goto afp_end_lock; 2902 } 2903 }else{ 2904 rc = failed; 2905 } 2906 } 2907 if( failed ){ 2908 rc = failed; 2909 } 2910 } 2911 2912 if( rc==SQLITE_OK ){ 2913 pFile->eFileLock = eFileLock; 2914 pInode->eFileLock = eFileLock; 2915 }else if( eFileLock==EXCLUSIVE_LOCK ){ 2916 pFile->eFileLock = PENDING_LOCK; 2917 pInode->eFileLock = PENDING_LOCK; 2918 } 2919 2920 afp_end_lock: 2921 unixLeaveMutex(); 2922 OSTRACE(("LOCK %d %s %s (afp)\n", pFile->h, azFileLock(eFileLock), 2923 rc==SQLITE_OK ? "ok" : "failed")); 2924 return rc; 2925 } 2926 2927 /* 2928 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock 2929 ** must be either NO_LOCK or SHARED_LOCK. 2930 ** 2931 ** If the locking level of the file descriptor is already at or below 2932 ** the requested locking level, this routine is a no-op. 2933 */ 2934 static int afpUnlock(sqlite3_file *id, int eFileLock) { 2935 int rc = SQLITE_OK; 2936 unixFile *pFile = (unixFile*)id; 2937 unixInodeInfo *pInode; 2938 afpLockingContext *context = (afpLockingContext *) pFile->lockingContext; 2939 int skipShared = 0; 2940 #ifdef SQLITE_TEST 2941 int h = pFile->h; 2942 #endif 2943 2944 assert( pFile ); 2945 OSTRACE(("UNLOCK %d %d was %d(%d,%d) pid=%d (afp)\n", pFile->h, eFileLock, 2946 pFile->eFileLock, pFile->pInode->eFileLock, pFile->pInode->nShared, 2947 osGetpid(0))); 2948 2949 assert( eFileLock<=SHARED_LOCK ); 2950 if( pFile->eFileLock<=eFileLock ){ 2951 return SQLITE_OK; 2952 } 2953 unixEnterMutex(); 2954 pInode = pFile->pInode; 2955 assert( pInode->nShared!=0 ); 2956 if( pFile->eFileLock>SHARED_LOCK ){ 2957 assert( pInode->eFileLock==pFile->eFileLock ); 2958 SimulateIOErrorBenign(1); 2959 SimulateIOError( h=(-1) ) 2960 SimulateIOErrorBenign(0); 2961 2962 #ifdef SQLITE_DEBUG 2963 /* When reducing a lock such that other processes can start 2964 ** reading the database file again, make sure that the 2965 ** transaction counter was updated if any part of the database 2966 ** file changed. If the transaction counter is not updated, 2967 ** other connections to the same file might not realize that 2968 ** the file has changed and hence might not know to flush their 2969 ** cache. The use of a stale cache can lead to database corruption. 2970 */ 2971 assert( pFile->inNormalWrite==0 2972 || pFile->dbUpdate==0 2973 || pFile->transCntrChng==1 ); 2974 pFile->inNormalWrite = 0; 2975 #endif 2976 2977 if( pFile->eFileLock==EXCLUSIVE_LOCK ){ 2978 rc = afpSetLock(context->dbPath, pFile, SHARED_FIRST, SHARED_SIZE, 0); 2979 if( rc==SQLITE_OK && (eFileLock==SHARED_LOCK || pInode->nShared>1) ){ 2980 /* only re-establish the shared lock if necessary */ 2981 int sharedLockByte = SHARED_FIRST+pInode->sharedByte; 2982 rc = afpSetLock(context->dbPath, pFile, sharedLockByte, 1, 1); 2983 } else { 2984 skipShared = 1; 2985 } 2986 } 2987 if( rc==SQLITE_OK && pFile->eFileLock>=PENDING_LOCK ){ 2988 rc = afpSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 0); 2989 } 2990 if( rc==SQLITE_OK && pFile->eFileLock>=RESERVED_LOCK && context->reserved ){ 2991 rc = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1, 0); 2992 if( !rc ){ 2993 context->reserved = 0; 2994 } 2995 } 2996 if( rc==SQLITE_OK && (eFileLock==SHARED_LOCK || pInode->nShared>1)){ 2997 pInode->eFileLock = SHARED_LOCK; 2998 } 2999 } 3000 if( rc==SQLITE_OK && eFileLock==NO_LOCK ){ 3001 3002 /* Decrement the shared lock counter. Release the lock using an 3003 ** OS call only when all threads in this same process have released 3004 ** the lock. 3005 */ 3006 unsigned long long sharedLockByte = SHARED_FIRST+pInode->sharedByte; 3007 pInode->nShared--; 3008 if( pInode->nShared==0 ){ 3009 SimulateIOErrorBenign(1); 3010 SimulateIOError( h=(-1) ) 3011 SimulateIOErrorBenign(0); 3012 if( !skipShared ){ 3013 rc = afpSetLock(context->dbPath, pFile, sharedLockByte, 1, 0); 3014 } 3015 if( !rc ){ 3016 pInode->eFileLock = NO_LOCK; 3017 pFile->eFileLock = NO_LOCK; 3018 } 3019 } 3020 if( rc==SQLITE_OK ){ 3021 pInode->nLock--; 3022 assert( pInode->nLock>=0 ); 3023 if( pInode->nLock==0 ){ 3024 closePendingFds(pFile); 3025 } 3026 } 3027 } 3028 3029 unixLeaveMutex(); 3030 if( rc==SQLITE_OK ) pFile->eFileLock = eFileLock; 3031 return rc; 3032 } 3033 3034 /* 3035 ** Close a file & cleanup AFP specific locking context 3036 */ 3037 static int afpClose(sqlite3_file *id) { 3038 int rc = SQLITE_OK; 3039 unixFile *pFile = (unixFile*)id; 3040 assert( id!=0 ); 3041 afpUnlock(id, NO_LOCK); 3042 unixEnterMutex(); 3043 if( pFile->pInode && pFile->pInode->nLock ){ 3044 /* If there are outstanding locks, do not actually close the file just 3045 ** yet because that would clear those locks. Instead, add the file 3046 ** descriptor to pInode->aPending. It will be automatically closed when 3047 ** the last lock is cleared. 3048 */ 3049 setPendingFd(pFile); 3050 } 3051 releaseInodeInfo(pFile); 3052 sqlite3_free(pFile->lockingContext); 3053 rc = closeUnixFile(id); 3054 unixLeaveMutex(); 3055 return rc; 3056 } 3057 3058 #endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ 3059 /* 3060 ** The code above is the AFP lock implementation. The code is specific 3061 ** to MacOSX and does not work on other unix platforms. No alternative 3062 ** is available. If you don't compile for a mac, then the "unix-afp" 3063 ** VFS is not available. 3064 ** 3065 ********************* End of the AFP lock implementation ********************** 3066 ******************************************************************************/ 3067 3068 /****************************************************************************** 3069 *************************** Begin NFS Locking ********************************/ 3070 3071 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 3072 /* 3073 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock 3074 ** must be either NO_LOCK or SHARED_LOCK. 3075 ** 3076 ** If the locking level of the file descriptor is already at or below 3077 ** the requested locking level, this routine is a no-op. 3078 */ 3079 static int nfsUnlock(sqlite3_file *id, int eFileLock){ 3080 return posixUnlock(id, eFileLock, 1); 3081 } 3082 3083 #endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ 3084 /* 3085 ** The code above is the NFS lock implementation. The code is specific 3086 ** to MacOSX and does not work on other unix platforms. No alternative 3087 ** is available. 3088 ** 3089 ********************* End of the NFS lock implementation ********************** 3090 ******************************************************************************/ 3091 3092 /****************************************************************************** 3093 **************** Non-locking sqlite3_file methods ***************************** 3094 ** 3095 ** The next division contains implementations for all methods of the 3096 ** sqlite3_file object other than the locking methods. The locking 3097 ** methods were defined in divisions above (one locking method per 3098 ** division). Those methods that are common to all locking modes 3099 ** are gather together into this division. 3100 */ 3101 3102 /* 3103 ** Seek to the offset passed as the second argument, then read cnt 3104 ** bytes into pBuf. Return the number of bytes actually read. 3105 ** 3106 ** NB: If you define USE_PREAD or USE_PREAD64, then it might also 3107 ** be necessary to define _XOPEN_SOURCE to be 500. This varies from 3108 ** one system to another. Since SQLite does not define USE_PREAD 3109 ** in any form by default, we will not attempt to define _XOPEN_SOURCE. 3110 ** See tickets #2741 and #2681. 3111 ** 3112 ** To avoid stomping the errno value on a failed read the lastErrno value 3113 ** is set before returning. 3114 */ 3115 static int seekAndRead(unixFile *id, sqlite3_int64 offset, void *pBuf, int cnt){ 3116 int got; 3117 int prior = 0; 3118 #if (!defined(USE_PREAD) && !defined(USE_PREAD64)) 3119 i64 newOffset; 3120 #endif 3121 TIMER_START; 3122 assert( cnt==(cnt&0x1ffff) ); 3123 assert( id->h>2 ); 3124 do{ 3125 #if defined(USE_PREAD) 3126 got = osPread(id->h, pBuf, cnt, offset); 3127 SimulateIOError( got = -1 ); 3128 #elif defined(USE_PREAD64) 3129 got = osPread64(id->h, pBuf, cnt, offset); 3130 SimulateIOError( got = -1 ); 3131 #else 3132 newOffset = lseek(id->h, offset, SEEK_SET); 3133 SimulateIOError( newOffset = -1 ); 3134 if( newOffset<0 ){ 3135 storeLastErrno((unixFile*)id, errno); 3136 return -1; 3137 } 3138 got = osRead(id->h, pBuf, cnt); 3139 #endif 3140 if( got==cnt ) break; 3141 if( got<0 ){ 3142 if( errno==EINTR ){ got = 1; continue; } 3143 prior = 0; 3144 storeLastErrno((unixFile*)id, errno); 3145 break; 3146 }else if( got>0 ){ 3147 cnt -= got; 3148 offset += got; 3149 prior += got; 3150 pBuf = (void*)(got + (char*)pBuf); 3151 } 3152 }while( got>0 ); 3153 TIMER_END; 3154 OSTRACE(("READ %-3d %5d %7lld %llu\n", 3155 id->h, got+prior, offset-prior, TIMER_ELAPSED)); 3156 return got+prior; 3157 } 3158 3159 /* 3160 ** Read data from a file into a buffer. Return SQLITE_OK if all 3161 ** bytes were read successfully and SQLITE_IOERR if anything goes 3162 ** wrong. 3163 */ 3164 static int unixRead( 3165 sqlite3_file *id, 3166 void *pBuf, 3167 int amt, 3168 sqlite3_int64 offset 3169 ){ 3170 unixFile *pFile = (unixFile *)id; 3171 int got; 3172 assert( id ); 3173 assert( offset>=0 ); 3174 assert( amt>0 ); 3175 3176 /* If this is a database file (not a journal, master-journal or temp 3177 ** file), the bytes in the locking range should never be read or written. */ 3178 #if 0 3179 assert( pFile->pUnused==0 3180 || offset>=PENDING_BYTE+512 3181 || offset+amt<=PENDING_BYTE 3182 ); 3183 #endif 3184 3185 #if SQLITE_MAX_MMAP_SIZE>0 3186 /* Deal with as much of this read request as possible by transfering 3187 ** data from the memory mapping using memcpy(). */ 3188 if( offset<pFile->mmapSize ){ 3189 if( offset+amt <= pFile->mmapSize ){ 3190 memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], amt); 3191 return SQLITE_OK; 3192 }else{ 3193 int nCopy = pFile->mmapSize - offset; 3194 memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], nCopy); 3195 pBuf = &((u8 *)pBuf)[nCopy]; 3196 amt -= nCopy; 3197 offset += nCopy; 3198 } 3199 } 3200 #endif 3201 3202 got = seekAndRead(pFile, offset, pBuf, amt); 3203 if( got==amt ){ 3204 return SQLITE_OK; 3205 }else if( got<0 ){ 3206 /* lastErrno set by seekAndRead */ 3207 return SQLITE_IOERR_READ; 3208 }else{ 3209 storeLastErrno(pFile, 0); /* not a system error */ 3210 /* Unread parts of the buffer must be zero-filled */ 3211 memset(&((char*)pBuf)[got], 0, amt-got); 3212 return SQLITE_IOERR_SHORT_READ; 3213 } 3214 } 3215 3216 /* 3217 ** Attempt to seek the file-descriptor passed as the first argument to 3218 ** absolute offset iOff, then attempt to write nBuf bytes of data from 3219 ** pBuf to it. If an error occurs, return -1 and set *piErrno. Otherwise, 3220 ** return the actual number of bytes written (which may be less than 3221 ** nBuf). 3222 */ 3223 static int seekAndWriteFd( 3224 int fd, /* File descriptor to write to */ 3225 i64 iOff, /* File offset to begin writing at */ 3226 const void *pBuf, /* Copy data from this buffer to the file */ 3227 int nBuf, /* Size of buffer pBuf in bytes */ 3228 int *piErrno /* OUT: Error number if error occurs */ 3229 ){ 3230 int rc = 0; /* Value returned by system call */ 3231 3232 assert( nBuf==(nBuf&0x1ffff) ); 3233 assert( fd>2 ); 3234 assert( piErrno!=0 ); 3235 nBuf &= 0x1ffff; 3236 TIMER_START; 3237 3238 #if defined(USE_PREAD) 3239 do{ rc = (int)osPwrite(fd, pBuf, nBuf, iOff); }while( rc<0 && errno==EINTR ); 3240 #elif defined(USE_PREAD64) 3241 do{ rc = (int)osPwrite64(fd, pBuf, nBuf, iOff);}while( rc<0 && errno==EINTR); 3242 #else 3243 do{ 3244 i64 iSeek = lseek(fd, iOff, SEEK_SET); 3245 SimulateIOError( iSeek = -1 ); 3246 if( iSeek<0 ){ 3247 rc = -1; 3248 break; 3249 } 3250 rc = osWrite(fd, pBuf, nBuf); 3251 }while( rc<0 && errno==EINTR ); 3252 #endif 3253 3254 TIMER_END; 3255 OSTRACE(("WRITE %-3d %5d %7lld %llu\n", fd, rc, iOff, TIMER_ELAPSED)); 3256 3257 if( rc<0 ) *piErrno = errno; 3258 return rc; 3259 } 3260 3261 3262 /* 3263 ** Seek to the offset in id->offset then read cnt bytes into pBuf. 3264 ** Return the number of bytes actually read. Update the offset. 3265 ** 3266 ** To avoid stomping the errno value on a failed write the lastErrno value 3267 ** is set before returning. 3268 */ 3269 static int seekAndWrite(unixFile *id, i64 offset, const void *pBuf, int cnt){ 3270 return seekAndWriteFd(id->h, offset, pBuf, cnt, &id->lastErrno); 3271 } 3272 3273 3274 /* 3275 ** Write data from a buffer into a file. Return SQLITE_OK on success 3276 ** or some other error code on failure. 3277 */ 3278 static int unixWrite( 3279 sqlite3_file *id, 3280 const void *pBuf, 3281 int amt, 3282 sqlite3_int64 offset 3283 ){ 3284 unixFile *pFile = (unixFile*)id; 3285 int wrote = 0; 3286 assert( id ); 3287 assert( amt>0 ); 3288 3289 /* If this is a database file (not a journal, master-journal or temp 3290 ** file), the bytes in the locking range should never be read or written. */ 3291 #if 0 3292 assert( pFile->pUnused==0 3293 || offset>=PENDING_BYTE+512 3294 || offset+amt<=PENDING_BYTE 3295 ); 3296 #endif 3297 3298 #ifdef SQLITE_DEBUG 3299 /* If we are doing a normal write to a database file (as opposed to 3300 ** doing a hot-journal rollback or a write to some file other than a 3301 ** normal database file) then record the fact that the database 3302 ** has changed. If the transaction counter is modified, record that 3303 ** fact too. 3304 */ 3305 if( pFile->inNormalWrite ){ 3306 pFile->dbUpdate = 1; /* The database has been modified */ 3307 if( offset<=24 && offset+amt>=27 ){ 3308 int rc; 3309 char oldCntr[4]; 3310 SimulateIOErrorBenign(1); 3311 rc = seekAndRead(pFile, 24, oldCntr, 4); 3312 SimulateIOErrorBenign(0); 3313 if( rc!=4 || memcmp(oldCntr, &((char*)pBuf)[24-offset], 4)!=0 ){ 3314 pFile->transCntrChng = 1; /* The transaction counter has changed */ 3315 } 3316 } 3317 } 3318 #endif 3319 3320 #if defined(SQLITE_MMAP_READWRITE) && SQLITE_MAX_MMAP_SIZE>0 3321 /* Deal with as much of this write request as possible by transfering 3322 ** data from the memory mapping using memcpy(). */ 3323 if( offset<pFile->mmapSize ){ 3324 if( offset+amt <= pFile->mmapSize ){ 3325 memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, amt); 3326 return SQLITE_OK; 3327 }else{ 3328 int nCopy = pFile->mmapSize - offset; 3329 memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, nCopy); 3330 pBuf = &((u8 *)pBuf)[nCopy]; 3331 amt -= nCopy; 3332 offset += nCopy; 3333 } 3334 } 3335 #endif 3336 3337 while( (wrote = seekAndWrite(pFile, offset, pBuf, amt))<amt && wrote>0 ){ 3338 amt -= wrote; 3339 offset += wrote; 3340 pBuf = &((char*)pBuf)[wrote]; 3341 } 3342 SimulateIOError(( wrote=(-1), amt=1 )); 3343 SimulateDiskfullError(( wrote=0, amt=1 )); 3344 3345 if( amt>wrote ){ 3346 if( wrote<0 && pFile->lastErrno!=ENOSPC ){ 3347 /* lastErrno set by seekAndWrite */ 3348 return SQLITE_IOERR_WRITE; 3349 }else{ 3350 storeLastErrno(pFile, 0); /* not a system error */ 3351 return SQLITE_FULL; 3352 } 3353 } 3354 3355 return SQLITE_OK; 3356 } 3357 3358 #ifdef SQLITE_TEST 3359 /* 3360 ** Count the number of fullsyncs and normal syncs. This is used to test 3361 ** that syncs and fullsyncs are occurring at the right times. 3362 */ 3363 int sqlite3_sync_count = 0; 3364 int sqlite3_fullsync_count = 0; 3365 #endif 3366 3367 /* 3368 ** We do not trust systems to provide a working fdatasync(). Some do. 3369 ** Others do no. To be safe, we will stick with the (slightly slower) 3370 ** fsync(). If you know that your system does support fdatasync() correctly, 3371 ** then simply compile with -Dfdatasync=fdatasync or -DHAVE_FDATASYNC 3372 */ 3373 #if !defined(fdatasync) && !HAVE_FDATASYNC 3374 # define fdatasync fsync 3375 #endif 3376 3377 /* 3378 ** Define HAVE_FULLFSYNC to 0 or 1 depending on whether or not 3379 ** the F_FULLFSYNC macro is defined. F_FULLFSYNC is currently 3380 ** only available on Mac OS X. But that could change. 3381 */ 3382 #ifdef F_FULLFSYNC 3383 # define HAVE_FULLFSYNC 1 3384 #else 3385 # define HAVE_FULLFSYNC 0 3386 #endif 3387 3388 3389 /* 3390 ** The fsync() system call does not work as advertised on many 3391 ** unix systems. The following procedure is an attempt to make 3392 ** it work better. 3393 ** 3394 ** The SQLITE_NO_SYNC macro disables all fsync()s. This is useful 3395 ** for testing when we want to run through the test suite quickly. 3396 ** You are strongly advised *not* to deploy with SQLITE_NO_SYNC 3397 ** enabled, however, since with SQLITE_NO_SYNC enabled, an OS crash 3398 ** or power failure will likely corrupt the database file. 3399 ** 3400 ** SQLite sets the dataOnly flag if the size of the file is unchanged. 3401 ** The idea behind dataOnly is that it should only write the file content 3402 ** to disk, not the inode. We only set dataOnly if the file size is 3403 ** unchanged since the file size is part of the inode. However, 3404 ** Ted Ts'o tells us that fdatasync() will also write the inode if the 3405 ** file size has changed. The only real difference between fdatasync() 3406 ** and fsync(), Ted tells us, is that fdatasync() will not flush the 3407 ** inode if the mtime or owner or other inode attributes have changed. 3408 ** We only care about the file size, not the other file attributes, so 3409 ** as far as SQLite is concerned, an fdatasync() is always adequate. 3410 ** So, we always use fdatasync() if it is available, regardless of 3411 ** the value of the dataOnly flag. 3412 */ 3413 static int full_fsync(int fd, int fullSync, int dataOnly){ 3414 int rc; 3415 3416 /* The following "ifdef/elif/else/" block has the same structure as 3417 ** the one below. It is replicated here solely to avoid cluttering 3418 ** up the real code with the UNUSED_PARAMETER() macros. 3419 */ 3420 #ifdef SQLITE_NO_SYNC 3421 UNUSED_PARAMETER(fd); 3422 UNUSED_PARAMETER(fullSync); 3423 UNUSED_PARAMETER(dataOnly); 3424 #elif HAVE_FULLFSYNC 3425 UNUSED_PARAMETER(dataOnly); 3426 #else 3427 UNUSED_PARAMETER(fullSync); 3428 UNUSED_PARAMETER(dataOnly); 3429 #endif 3430 3431 /* Record the number of times that we do a normal fsync() and 3432 ** FULLSYNC. This is used during testing to verify that this procedure 3433 ** gets called with the correct arguments. 3434 */ 3435 #ifdef SQLITE_TEST 3436 if( fullSync ) sqlite3_fullsync_count++; 3437 sqlite3_sync_count++; 3438 #endif 3439 3440 /* If we compiled with the SQLITE_NO_SYNC flag, then syncing is a 3441 ** no-op. But go ahead and call fstat() to validate the file 3442 ** descriptor as we need a method to provoke a failure during 3443 ** coverate testing. 3444 */ 3445 #ifdef SQLITE_NO_SYNC 3446 { 3447 struct stat buf; 3448 rc = osFstat(fd, &buf); 3449 } 3450 #elif HAVE_FULLFSYNC 3451 if( fullSync ){ 3452 rc = osFcntl(fd, F_FULLFSYNC, 0); 3453 }else{ 3454 rc = 1; 3455 } 3456 /* If the FULLFSYNC failed, fall back to attempting an fsync(). 3457 ** It shouldn't be possible for fullfsync to fail on the local 3458 ** file system (on OSX), so failure indicates that FULLFSYNC 3459 ** isn't supported for this file system. So, attempt an fsync 3460 ** and (for now) ignore the overhead of a superfluous fcntl call. 3461 ** It'd be better to detect fullfsync support once and avoid 3462 ** the fcntl call every time sync is called. 3463 */ 3464 if( rc ) rc = fsync(fd); 3465 3466 #elif defined(__APPLE__) 3467 /* fdatasync() on HFS+ doesn't yet flush the file size if it changed correctly 3468 ** so currently we default to the macro that redefines fdatasync to fsync 3469 */ 3470 rc = fsync(fd); 3471 #else 3472 rc = fdatasync(fd); 3473 #if OS_VXWORKS 3474 if( rc==-1 && errno==ENOTSUP ){ 3475 rc = fsync(fd); 3476 } 3477 #endif /* OS_VXWORKS */ 3478 #endif /* ifdef SQLITE_NO_SYNC elif HAVE_FULLFSYNC */ 3479 3480 if( OS_VXWORKS && rc!= -1 ){ 3481 rc = 0; 3482 } 3483 return rc; 3484 } 3485 3486 /* 3487 ** Open a file descriptor to the directory containing file zFilename. 3488 ** If successful, *pFd is set to the opened file descriptor and 3489 ** SQLITE_OK is returned. If an error occurs, either SQLITE_NOMEM 3490 ** or SQLITE_CANTOPEN is returned and *pFd is set to an undefined 3491 ** value. 3492 ** 3493 ** The directory file descriptor is used for only one thing - to 3494 ** fsync() a directory to make sure file creation and deletion events 3495 ** are flushed to disk. Such fsyncs are not needed on newer 3496 ** journaling filesystems, but are required on older filesystems. 3497 ** 3498 ** This routine can be overridden using the xSetSysCall interface. 3499 ** The ability to override this routine was added in support of the 3500 ** chromium sandbox. Opening a directory is a security risk (we are 3501 ** told) so making it overrideable allows the chromium sandbox to 3502 ** replace this routine with a harmless no-op. To make this routine 3503 ** a no-op, replace it with a stub that returns SQLITE_OK but leaves 3504 ** *pFd set to a negative number. 3505 ** 3506 ** If SQLITE_OK is returned, the caller is responsible for closing 3507 ** the file descriptor *pFd using close(). 3508 */ 3509 static int openDirectory(const char *zFilename, int *pFd){ 3510 int ii; 3511 int fd = -1; 3512 char zDirname[MAX_PATHNAME+1]; 3513 3514 sqlite3_snprintf(MAX_PATHNAME, zDirname, "%s", zFilename); 3515 for(ii=(int)strlen(zDirname); ii>0 && zDirname[ii]!='/'; ii--); 3516 if( ii>0 ){ 3517 zDirname[ii] = '\0'; 3518 }else{ 3519 if( zDirname[0]!='/' ) zDirname[0] = '.'; 3520 zDirname[1] = 0; 3521 } 3522 fd = robust_open(zDirname, O_RDONLY|O_BINARY, 0); 3523 if( fd>=0 ){ 3524 OSTRACE(("OPENDIR %-3d %s\n", fd, zDirname)); 3525 } 3526 *pFd = fd; 3527 if( fd>=0 ) return SQLITE_OK; 3528 return unixLogError(SQLITE_CANTOPEN_BKPT, "openDirectory", zDirname); 3529 } 3530 3531 /* 3532 ** Make sure all writes to a particular file are committed to disk. 3533 ** 3534 ** If dataOnly==0 then both the file itself and its metadata (file 3535 ** size, access time, etc) are synced. If dataOnly!=0 then only the 3536 ** file data is synced. 3537 ** 3538 ** Under Unix, also make sure that the directory entry for the file 3539 ** has been created by fsync-ing the directory that contains the file. 3540 ** If we do not do this and we encounter a power failure, the directory 3541 ** entry for the journal might not exist after we reboot. The next 3542 ** SQLite to access the file will not know that the journal exists (because 3543 ** the directory entry for the journal was never created) and the transaction 3544 ** will not roll back - possibly leading to database corruption. 3545 */ 3546 static int unixSync(sqlite3_file *id, int flags){ 3547 int rc; 3548 unixFile *pFile = (unixFile*)id; 3549 3550 int isDataOnly = (flags&SQLITE_SYNC_DATAONLY); 3551 int isFullsync = (flags&0x0F)==SQLITE_SYNC_FULL; 3552 3553 /* Check that one of SQLITE_SYNC_NORMAL or FULL was passed */ 3554 assert((flags&0x0F)==SQLITE_SYNC_NORMAL 3555 || (flags&0x0F)==SQLITE_SYNC_FULL 3556 ); 3557 3558 /* Unix cannot, but some systems may return SQLITE_FULL from here. This 3559 ** line is to test that doing so does not cause any problems. 3560 */ 3561 SimulateDiskfullError( return SQLITE_FULL ); 3562 3563 assert( pFile ); 3564 OSTRACE(("SYNC %-3d\n", pFile->h)); 3565 rc = full_fsync(pFile->h, isFullsync, isDataOnly); 3566 SimulateIOError( rc=1 ); 3567 if( rc ){ 3568 storeLastErrno(pFile, errno); 3569 return unixLogError(SQLITE_IOERR_FSYNC, "full_fsync", pFile->zPath); 3570 } 3571 3572 /* Also fsync the directory containing the file if the DIRSYNC flag 3573 ** is set. This is a one-time occurrence. Many systems (examples: AIX) 3574 ** are unable to fsync a directory, so ignore errors on the fsync. 3575 */ 3576 if( pFile->ctrlFlags & UNIXFILE_DIRSYNC ){ 3577 int dirfd; 3578 OSTRACE(("DIRSYNC %s (have_fullfsync=%d fullsync=%d)\n", pFile->zPath, 3579 HAVE_FULLFSYNC, isFullsync)); 3580 rc = osOpenDirectory(pFile->zPath, &dirfd); 3581 if( rc==SQLITE_OK ){ 3582 full_fsync(dirfd, 0, 0); 3583 robust_close(pFile, dirfd, __LINE__); 3584 }else{ 3585 assert( rc==SQLITE_CANTOPEN ); 3586 rc = SQLITE_OK; 3587 } 3588 pFile->ctrlFlags &= ~UNIXFILE_DIRSYNC; 3589 } 3590 return rc; 3591 } 3592 3593 /* 3594 ** Truncate an open file to a specified size 3595 */ 3596 static int unixTruncate(sqlite3_file *id, i64 nByte){ 3597 unixFile *pFile = (unixFile *)id; 3598 int rc; 3599 assert( pFile ); 3600 SimulateIOError( return SQLITE_IOERR_TRUNCATE ); 3601 3602 /* If the user has configured a chunk-size for this file, truncate the 3603 ** file so that it consists of an integer number of chunks (i.e. the 3604 ** actual file size after the operation may be larger than the requested 3605 ** size). 3606 */ 3607 if( pFile->szChunk>0 ){ 3608 nByte = ((nByte + pFile->szChunk - 1)/pFile->szChunk) * pFile->szChunk; 3609 } 3610 3611 rc = robust_ftruncate(pFile->h, nByte); 3612 if( rc ){ 3613 storeLastErrno(pFile, errno); 3614 return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate", pFile->zPath); 3615 }else{ 3616 #ifdef SQLITE_DEBUG 3617 /* If we are doing a normal write to a database file (as opposed to 3618 ** doing a hot-journal rollback or a write to some file other than a 3619 ** normal database file) and we truncate the file to zero length, 3620 ** that effectively updates the change counter. This might happen 3621 ** when restoring a database using the backup API from a zero-length 3622 ** source. 3623 */ 3624 if( pFile->inNormalWrite && nByte==0 ){ 3625 pFile->transCntrChng = 1; 3626 } 3627 #endif 3628 3629 #if SQLITE_MAX_MMAP_SIZE>0 3630 /* If the file was just truncated to a size smaller than the currently 3631 ** mapped region, reduce the effective mapping size as well. SQLite will 3632 ** use read() and write() to access data beyond this point from now on. 3633 */ 3634 if( nByte<pFile->mmapSize ){ 3635 pFile->mmapSize = nByte; 3636 } 3637 #endif 3638 3639 return SQLITE_OK; 3640 } 3641 } 3642 3643 /* 3644 ** Determine the current size of a file in bytes 3645 */ 3646 static int unixFileSize(sqlite3_file *id, i64 *pSize){ 3647 int rc; 3648 struct stat buf; 3649 assert( id ); 3650 rc = osFstat(((unixFile*)id)->h, &buf); 3651 SimulateIOError( rc=1 ); 3652 if( rc!=0 ){ 3653 storeLastErrno((unixFile*)id, errno); 3654 return SQLITE_IOERR_FSTAT; 3655 } 3656 *pSize = buf.st_size; 3657 3658 /* When opening a zero-size database, the findInodeInfo() procedure 3659 ** writes a single byte into that file in order to work around a bug 3660 ** in the OS-X msdos filesystem. In order to avoid problems with upper 3661 ** layers, we need to report this file size as zero even though it is 3662 ** really 1. Ticket #3260. 3663 */ 3664 if( *pSize==1 ) *pSize = 0; 3665 3666 3667 return SQLITE_OK; 3668 } 3669 3670 #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) 3671 /* 3672 ** Handler for proxy-locking file-control verbs. Defined below in the 3673 ** proxying locking division. 3674 */ 3675 static int proxyFileControl(sqlite3_file*,int,void*); 3676 #endif 3677 3678 /* 3679 ** This function is called to handle the SQLITE_FCNTL_SIZE_HINT 3680 ** file-control operation. Enlarge the database to nBytes in size 3681 ** (rounded up to the next chunk-size). If the database is already 3682 ** nBytes or larger, this routine is a no-op. 3683 */ 3684 static int fcntlSizeHint(unixFile *pFile, i64 nByte){ 3685 if( pFile->szChunk>0 ){ 3686 i64 nSize; /* Required file size */ 3687 struct stat buf; /* Used to hold return values of fstat() */ 3688 3689 if( osFstat(pFile->h, &buf) ){ 3690 return SQLITE_IOERR_FSTAT; 3691 } 3692 3693 nSize = ((nByte+pFile->szChunk-1) / pFile->szChunk) * pFile->szChunk; 3694 if( nSize>(i64)buf.st_size ){ 3695 3696 #if defined(HAVE_POSIX_FALLOCATE) && HAVE_POSIX_FALLOCATE 3697 /* The code below is handling the return value of osFallocate() 3698 ** correctly. posix_fallocate() is defined to "returns zero on success, 3699 ** or an error number on failure". See the manpage for details. */ 3700 int err; 3701 do{ 3702 err = osFallocate(pFile->h, buf.st_size, nSize-buf.st_size); 3703 }while( err==EINTR ); 3704 if( err ) return SQLITE_IOERR_WRITE; 3705 #else 3706 /* If the OS does not have posix_fallocate(), fake it. Write a 3707 ** single byte to the last byte in each block that falls entirely 3708 ** within the extended region. Then, if required, a single byte 3709 ** at offset (nSize-1), to set the size of the file correctly. 3710 ** This is a similar technique to that used by glibc on systems 3711 ** that do not have a real fallocate() call. 3712 */ 3713 int nBlk = buf.st_blksize; /* File-system block size */ 3714 int nWrite = 0; /* Number of bytes written by seekAndWrite */ 3715 i64 iWrite; /* Next offset to write to */ 3716 3717 iWrite = (buf.st_size/nBlk)*nBlk + nBlk - 1; 3718 assert( iWrite>=buf.st_size ); 3719 assert( ((iWrite+1)%nBlk)==0 ); 3720 for(/*no-op*/; iWrite<nSize+nBlk-1; iWrite+=nBlk ){ 3721 if( iWrite>=nSize ) iWrite = nSize - 1; 3722 nWrite = seekAndWrite(pFile, iWrite, "", 1); 3723 if( nWrite!=1 ) return SQLITE_IOERR_WRITE; 3724 } 3725 #endif 3726 } 3727 } 3728 3729 #if SQLITE_MAX_MMAP_SIZE>0 3730 if( pFile->mmapSizeMax>0 && nByte>pFile->mmapSize ){ 3731 int rc; 3732 if( pFile->szChunk<=0 ){ 3733 if( robust_ftruncate(pFile->h, nByte) ){ 3734 storeLastErrno(pFile, errno); 3735 return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate", pFile->zPath); 3736 } 3737 } 3738 3739 rc = unixMapfile(pFile, nByte); 3740 return rc; 3741 } 3742 #endif 3743 3744 return SQLITE_OK; 3745 } 3746 3747 /* 3748 ** If *pArg is initially negative then this is a query. Set *pArg to 3749 ** 1 or 0 depending on whether or not bit mask of pFile->ctrlFlags is set. 3750 ** 3751 ** If *pArg is 0 or 1, then clear or set the mask bit of pFile->ctrlFlags. 3752 */ 3753 static void unixModeBit(unixFile *pFile, unsigned char mask, int *pArg){ 3754 if( *pArg<0 ){ 3755 *pArg = (pFile->ctrlFlags & mask)!=0; 3756 }else if( (*pArg)==0 ){ 3757 pFile->ctrlFlags &= ~mask; 3758 }else{ 3759 pFile->ctrlFlags |= mask; 3760 } 3761 } 3762 3763 /* Forward declaration */ 3764 static int unixGetTempname(int nBuf, char *zBuf); 3765 3766 /* 3767 ** Information and control of an open file handle. 3768 */ 3769 static int unixFileControl(sqlite3_file *id, int op, void *pArg){ 3770 unixFile *pFile = (unixFile*)id; 3771 switch( op ){ 3772 case SQLITE_FCNTL_LOCKSTATE: { 3773 *(int*)pArg = pFile->eFileLock; 3774 return SQLITE_OK; 3775 } 3776 case SQLITE_FCNTL_LAST_ERRNO: { 3777 *(int*)pArg = pFile->lastErrno; 3778 return SQLITE_OK; 3779 } 3780 case SQLITE_FCNTL_CHUNK_SIZE: { 3781 pFile->szChunk = *(int *)pArg; 3782 return SQLITE_OK; 3783 } 3784 case SQLITE_FCNTL_SIZE_HINT: { 3785 int rc; 3786 SimulateIOErrorBenign(1); 3787 rc = fcntlSizeHint(pFile, *(i64 *)pArg); 3788 SimulateIOErrorBenign(0); 3789 return rc; 3790 } 3791 case SQLITE_FCNTL_PERSIST_WAL: { 3792 unixModeBit(pFile, UNIXFILE_PERSIST_WAL, (int*)pArg); 3793 return SQLITE_OK; 3794 } 3795 case SQLITE_FCNTL_POWERSAFE_OVERWRITE: { 3796 unixModeBit(pFile, UNIXFILE_PSOW, (int*)pArg); 3797 return SQLITE_OK; 3798 } 3799 case SQLITE_FCNTL_VFSNAME: { 3800 *(char**)pArg = sqlite3_mprintf("%s", pFile->pVfs->zName); 3801 return SQLITE_OK; 3802 } 3803 case SQLITE_FCNTL_TEMPFILENAME: { 3804 char *zTFile = sqlite3_malloc64( pFile->pVfs->mxPathname ); 3805 if( zTFile ){ 3806 unixGetTempname(pFile->pVfs->mxPathname, zTFile); 3807 *(char**)pArg = zTFile; 3808 } 3809 return SQLITE_OK; 3810 } 3811 case SQLITE_FCNTL_HAS_MOVED: { 3812 *(int*)pArg = fileHasMoved(pFile); 3813 return SQLITE_OK; 3814 } 3815 #if SQLITE_MAX_MMAP_SIZE>0 3816 case SQLITE_FCNTL_MMAP_SIZE: { 3817 i64 newLimit = *(i64*)pArg; 3818 int rc = SQLITE_OK; 3819 if( newLimit>sqlite3GlobalConfig.mxMmap ){ 3820 newLimit = sqlite3GlobalConfig.mxMmap; 3821 } 3822 *(i64*)pArg = pFile->mmapSizeMax; 3823 if( newLimit>=0 && newLimit!=pFile->mmapSizeMax && pFile->nFetchOut==0 ){ 3824 pFile->mmapSizeMax = newLimit; 3825 if( pFile->mmapSize>0 ){ 3826 unixUnmapfile(pFile); 3827 rc = unixMapfile(pFile, -1); 3828 } 3829 } 3830 return rc; 3831 } 3832 #endif 3833 #ifdef SQLITE_DEBUG 3834 /* The pager calls this method to signal that it has done 3835 ** a rollback and that the database is therefore unchanged and 3836 ** it hence it is OK for the transaction change counter to be 3837 ** unchanged. 3838 */ 3839 case SQLITE_FCNTL_DB_UNCHANGED: { 3840 ((unixFile*)id)->dbUpdate = 0; 3841 return SQLITE_OK; 3842 } 3843 #endif 3844 #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) 3845 case SQLITE_FCNTL_SET_LOCKPROXYFILE: 3846 case SQLITE_FCNTL_GET_LOCKPROXYFILE: { 3847 return proxyFileControl(id,op,pArg); 3848 } 3849 #endif /* SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) */ 3850 } 3851 return SQLITE_NOTFOUND; 3852 } 3853 3854 /* 3855 ** Return the sector size in bytes of the underlying block device for 3856 ** the specified file. This is almost always 512 bytes, but may be 3857 ** larger for some devices. 3858 ** 3859 ** SQLite code assumes this function cannot fail. It also assumes that 3860 ** if two files are created in the same file-system directory (i.e. 3861 ** a database and its journal file) that the sector size will be the 3862 ** same for both. 3863 */ 3864 #ifndef __QNXNTO__ 3865 static int unixSectorSize(sqlite3_file *NotUsed){ 3866 UNUSED_PARAMETER(NotUsed); 3867 return SQLITE_DEFAULT_SECTOR_SIZE; 3868 } 3869 #endif 3870 3871 /* 3872 ** The following version of unixSectorSize() is optimized for QNX. 3873 */ 3874 #ifdef __QNXNTO__ 3875 #include <sys/dcmd_blk.h> 3876 #include <sys/statvfs.h> 3877 static int unixSectorSize(sqlite3_file *id){ 3878 unixFile *pFile = (unixFile*)id; 3879 if( pFile->sectorSize == 0 ){ 3880 struct statvfs fsInfo; 3881 3882 /* Set defaults for non-supported filesystems */ 3883 pFile->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE; 3884 pFile->deviceCharacteristics = 0; 3885 if( fstatvfs(pFile->h, &fsInfo) == -1 ) { 3886 return pFile->sectorSize; 3887 } 3888 3889 if( !strcmp(fsInfo.f_basetype, "tmp") ) { 3890 pFile->sectorSize = fsInfo.f_bsize; 3891 pFile->deviceCharacteristics = 3892 SQLITE_IOCAP_ATOMIC4K | /* All ram filesystem writes are atomic */ 3893 SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until 3894 ** the write succeeds */ 3895 SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind 3896 ** so it is ordered */ 3897 0; 3898 }else if( strstr(fsInfo.f_basetype, "etfs") ){ 3899 pFile->sectorSize = fsInfo.f_bsize; 3900 pFile->deviceCharacteristics = 3901 /* etfs cluster size writes are atomic */ 3902 (pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) | 3903 SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until 3904 ** the write succeeds */ 3905 SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind 3906 ** so it is ordered */ 3907 0; 3908 }else if( !strcmp(fsInfo.f_basetype, "qnx6") ){ 3909 pFile->sectorSize = fsInfo.f_bsize; 3910 pFile->deviceCharacteristics = 3911 SQLITE_IOCAP_ATOMIC | /* All filesystem writes are atomic */ 3912 SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until 3913 ** the write succeeds */ 3914 SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind 3915 ** so it is ordered */ 3916 0; 3917 }else if( !strcmp(fsInfo.f_basetype, "qnx4") ){ 3918 pFile->sectorSize = fsInfo.f_bsize; 3919 pFile->deviceCharacteristics = 3920 /* full bitset of atomics from max sector size and smaller */ 3921 ((pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) << 1) - 2 | 3922 SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind 3923 ** so it is ordered */ 3924 0; 3925 }else if( strstr(fsInfo.f_basetype, "dos") ){ 3926 pFile->sectorSize = fsInfo.f_bsize; 3927 pFile->deviceCharacteristics = 3928 /* full bitset of atomics from max sector size and smaller */ 3929 ((pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) << 1) - 2 | 3930 SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind 3931 ** so it is ordered */ 3932 0; 3933 }else{ 3934 pFile->deviceCharacteristics = 3935 SQLITE_IOCAP_ATOMIC512 | /* blocks are atomic */ 3936 SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until 3937 ** the write succeeds */ 3938 0; 3939 } 3940 } 3941 /* Last chance verification. If the sector size isn't a multiple of 512 3942 ** then it isn't valid.*/ 3943 if( pFile->sectorSize % 512 != 0 ){ 3944 pFile->deviceCharacteristics = 0; 3945 pFile->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE; 3946 } 3947 return pFile->sectorSize; 3948 } 3949 #endif /* __QNXNTO__ */ 3950 3951 /* 3952 ** Return the device characteristics for the file. 3953 ** 3954 ** This VFS is set up to return SQLITE_IOCAP_POWERSAFE_OVERWRITE by default. 3955 ** However, that choice is controversial since technically the underlying 3956 ** file system does not always provide powersafe overwrites. (In other 3957 ** words, after a power-loss event, parts of the file that were never 3958 ** written might end up being altered.) However, non-PSOW behavior is very, 3959 ** very rare. And asserting PSOW makes a large reduction in the amount 3960 ** of required I/O for journaling, since a lot of padding is eliminated. 3961 ** Hence, while POWERSAFE_OVERWRITE is on by default, there is a file-control 3962 ** available to turn it off and URI query parameter available to turn it off. 3963 */ 3964 static int unixDeviceCharacteristics(sqlite3_file *id){ 3965 unixFile *p = (unixFile*)id; 3966 int rc = 0; 3967 #ifdef __QNXNTO__ 3968 if( p->sectorSize==0 ) unixSectorSize(id); 3969 rc = p->deviceCharacteristics; 3970 #endif 3971 if( p->ctrlFlags & UNIXFILE_PSOW ){ 3972 rc |= SQLITE_IOCAP_POWERSAFE_OVERWRITE; 3973 } 3974 return rc; 3975 } 3976 3977 #if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 3978 3979 /* 3980 ** Return the system page size. 3981 ** 3982 ** This function should not be called directly by other code in this file. 3983 ** Instead, it should be called via macro osGetpagesize(). 3984 */ 3985 static int unixGetpagesize(void){ 3986 #if OS_VXWORKS 3987 return 1024; 3988 #elif defined(_BSD_SOURCE) 3989 return getpagesize(); 3990 #else 3991 return (int)sysconf(_SC_PAGESIZE); 3992 #endif 3993 } 3994 3995 #endif /* !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 */ 3996 3997 #ifndef SQLITE_OMIT_WAL 3998 3999 /* 4000 ** Object used to represent an shared memory buffer. 4001 ** 4002 ** When multiple threads all reference the same wal-index, each thread 4003 ** has its own unixShm object, but they all point to a single instance 4004 ** of this unixShmNode object. In other words, each wal-index is opened 4005 ** only once per process. 4006 ** 4007 ** Each unixShmNode object is connected to a single unixInodeInfo object. 4008 ** We could coalesce this object into unixInodeInfo, but that would mean 4009 ** every open file that does not use shared memory (in other words, most 4010 ** open files) would have to carry around this extra information. So 4011 ** the unixInodeInfo object contains a pointer to this unixShmNode object 4012 ** and the unixShmNode object is created only when needed. 4013 ** 4014 ** unixMutexHeld() must be true when creating or destroying 4015 ** this object or while reading or writing the following fields: 4016 ** 4017 ** nRef 4018 ** 4019 ** The following fields are read-only after the object is created: 4020 ** 4021 ** fid 4022 ** zFilename 4023 ** 4024 ** Either unixShmNode.mutex must be held or unixShmNode.nRef==0 and 4025 ** unixMutexHeld() is true when reading or writing any other field 4026 ** in this structure. 4027 */ 4028 struct unixShmNode { 4029 unixInodeInfo *pInode; /* unixInodeInfo that owns this SHM node */ 4030 sqlite3_mutex *mutex; /* Mutex to access this object */ 4031 char *zFilename; /* Name of the mmapped file */ 4032 int h; /* Open file descriptor */ 4033 int szRegion; /* Size of shared-memory regions */ 4034 u16 nRegion; /* Size of array apRegion */ 4035 u8 isReadonly; /* True if read-only */ 4036 char **apRegion; /* Array of mapped shared-memory regions */ 4037 int nRef; /* Number of unixShm objects pointing to this */ 4038 unixShm *pFirst; /* All unixShm objects pointing to this */ 4039 #ifdef SQLITE_DEBUG 4040 u8 exclMask; /* Mask of exclusive locks held */ 4041 u8 sharedMask; /* Mask of shared locks held */ 4042 u8 nextShmId; /* Next available unixShm.id value */ 4043 #endif 4044 }; 4045 4046 /* 4047 ** Structure used internally by this VFS to record the state of an 4048 ** open shared memory connection. 4049 ** 4050 ** The following fields are initialized when this object is created and 4051 ** are read-only thereafter: 4052 ** 4053 ** unixShm.pFile 4054 ** unixShm.id 4055 ** 4056 ** All other fields are read/write. The unixShm.pFile->mutex must be held 4057 ** while accessing any read/write fields. 4058 */ 4059 struct unixShm { 4060 unixShmNode *pShmNode; /* The underlying unixShmNode object */ 4061 unixShm *pNext; /* Next unixShm with the same unixShmNode */ 4062 u8 hasMutex; /* True if holding the unixShmNode mutex */ 4063 u8 id; /* Id of this connection within its unixShmNode */ 4064 u16 sharedMask; /* Mask of shared locks held */ 4065 u16 exclMask; /* Mask of exclusive locks held */ 4066 }; 4067 4068 /* 4069 ** Constants used for locking 4070 */ 4071 #define UNIX_SHM_BASE ((22+SQLITE_SHM_NLOCK)*4) /* first lock byte */ 4072 #define UNIX_SHM_DMS (UNIX_SHM_BASE+SQLITE_SHM_NLOCK) /* deadman switch */ 4073 4074 /* 4075 ** Apply posix advisory locks for all bytes from ofst through ofst+n-1. 4076 ** 4077 ** Locks block if the mask is exactly UNIX_SHM_C and are non-blocking 4078 ** otherwise. 4079 */ 4080 static int unixShmSystemLock( 4081 unixFile *pFile, /* Open connection to the WAL file */ 4082 int lockType, /* F_UNLCK, F_RDLCK, or F_WRLCK */ 4083 int ofst, /* First byte of the locking range */ 4084 int n /* Number of bytes to lock */ 4085 ){ 4086 unixShmNode *pShmNode; /* Apply locks to this open shared-memory segment */ 4087 struct flock f; /* The posix advisory locking structure */ 4088 int rc = SQLITE_OK; /* Result code form fcntl() */ 4089 4090 /* Access to the unixShmNode object is serialized by the caller */ 4091 pShmNode = pFile->pInode->pShmNode; 4092 assert( sqlite3_mutex_held(pShmNode->mutex) || pShmNode->nRef==0 ); 4093 4094 /* Shared locks never span more than one byte */ 4095 assert( n==1 || lockType!=F_RDLCK ); 4096 4097 /* Locks are within range */ 4098 assert( n>=1 && n<=SQLITE_SHM_NLOCK ); 4099 4100 if( pShmNode->h>=0 ){ 4101 /* Initialize the locking parameters */ 4102 memset(&f, 0, sizeof(f)); 4103 f.l_type = lockType; 4104 f.l_whence = SEEK_SET; 4105 f.l_start = ofst; 4106 f.l_len = n; 4107 4108 rc = osFcntl(pShmNode->h, F_SETLK, &f); 4109 rc = (rc!=(-1)) ? SQLITE_OK : SQLITE_BUSY; 4110 } 4111 4112 /* Update the global lock state and do debug tracing */ 4113 #ifdef SQLITE_DEBUG 4114 { u16 mask; 4115 OSTRACE(("SHM-LOCK ")); 4116 mask = ofst>31 ? 0xffff : (1<<(ofst+n)) - (1<<ofst); 4117 if( rc==SQLITE_OK ){ 4118 if( lockType==F_UNLCK ){ 4119 OSTRACE(("unlock %d ok", ofst)); 4120 pShmNode->exclMask &= ~mask; 4121 pShmNode->sharedMask &= ~mask; 4122 }else if( lockType==F_RDLCK ){ 4123 OSTRACE(("read-lock %d ok", ofst)); 4124 pShmNode->exclMask &= ~mask; 4125 pShmNode->sharedMask |= mask; 4126 }else{ 4127 assert( lockType==F_WRLCK ); 4128 OSTRACE(("write-lock %d ok", ofst)); 4129 pShmNode->exclMask |= mask; 4130 pShmNode->sharedMask &= ~mask; 4131 } 4132 }else{ 4133 if( lockType==F_UNLCK ){ 4134 OSTRACE(("unlock %d failed", ofst)); 4135 }else if( lockType==F_RDLCK ){ 4136 OSTRACE(("read-lock failed")); 4137 }else{ 4138 assert( lockType==F_WRLCK ); 4139 OSTRACE(("write-lock %d failed", ofst)); 4140 } 4141 } 4142 OSTRACE((" - afterwards %03x,%03x\n", 4143 pShmNode->sharedMask, pShmNode->exclMask)); 4144 } 4145 #endif 4146 4147 return rc; 4148 } 4149 4150 /* 4151 ** Return the minimum number of 32KB shm regions that should be mapped at 4152 ** a time, assuming that each mapping must be an integer multiple of the 4153 ** current system page-size. 4154 ** 4155 ** Usually, this is 1. The exception seems to be systems that are configured 4156 ** to use 64KB pages - in this case each mapping must cover at least two 4157 ** shm regions. 4158 */ 4159 static int unixShmRegionPerMap(void){ 4160 int shmsz = 32*1024; /* SHM region size */ 4161 int pgsz = osGetpagesize(); /* System page size */ 4162 assert( ((pgsz-1)&pgsz)==0 ); /* Page size must be a power of 2 */ 4163 if( pgsz<shmsz ) return 1; 4164 return pgsz/shmsz; 4165 } 4166 4167 /* 4168 ** Purge the unixShmNodeList list of all entries with unixShmNode.nRef==0. 4169 ** 4170 ** This is not a VFS shared-memory method; it is a utility function called 4171 ** by VFS shared-memory methods. 4172 */ 4173 static void unixShmPurge(unixFile *pFd){ 4174 unixShmNode *p = pFd->pInode->pShmNode; 4175 assert( unixMutexHeld() ); 4176 if( p && ALWAYS(p->nRef==0) ){ 4177 int nShmPerMap = unixShmRegionPerMap(); 4178 int i; 4179 assert( p->pInode==pFd->pInode ); 4180 sqlite3_mutex_free(p->mutex); 4181 for(i=0; i<p->nRegion; i+=nShmPerMap){ 4182 if( p->h>=0 ){ 4183 osMunmap(p->apRegion[i], p->szRegion); 4184 }else{ 4185 sqlite3_free(p->apRegion[i]); 4186 } 4187 } 4188 sqlite3_free(p->apRegion); 4189 if( p->h>=0 ){ 4190 robust_close(pFd, p->h, __LINE__); 4191 p->h = -1; 4192 } 4193 p->pInode->pShmNode = 0; 4194 sqlite3_free(p); 4195 } 4196 } 4197 4198 /* 4199 ** Open a shared-memory area associated with open database file pDbFd. 4200 ** This particular implementation uses mmapped files. 4201 ** 4202 ** The file used to implement shared-memory is in the same directory 4203 ** as the open database file and has the same name as the open database 4204 ** file with the "-shm" suffix added. For example, if the database file 4205 ** is "/home/user1/config.db" then the file that is created and mmapped 4206 ** for shared memory will be called "/home/user1/config.db-shm". 4207 ** 4208 ** Another approach to is to use files in /dev/shm or /dev/tmp or an 4209 ** some other tmpfs mount. But if a file in a different directory 4210 ** from the database file is used, then differing access permissions 4211 ** or a chroot() might cause two different processes on the same 4212 ** database to end up using different files for shared memory - 4213 ** meaning that their memory would not really be shared - resulting 4214 ** in database corruption. Nevertheless, this tmpfs file usage 4215 ** can be enabled at compile-time using -DSQLITE_SHM_DIRECTORY="/dev/shm" 4216 ** or the equivalent. The use of the SQLITE_SHM_DIRECTORY compile-time 4217 ** option results in an incompatible build of SQLite; builds of SQLite 4218 ** that with differing SQLITE_SHM_DIRECTORY settings attempt to use the 4219 ** same database file at the same time, database corruption will likely 4220 ** result. The SQLITE_SHM_DIRECTORY compile-time option is considered 4221 ** "unsupported" and may go away in a future SQLite release. 4222 ** 4223 ** When opening a new shared-memory file, if no other instances of that 4224 ** file are currently open, in this process or in other processes, then 4225 ** the file must be truncated to zero length or have its header cleared. 4226 ** 4227 ** If the original database file (pDbFd) is using the "unix-excl" VFS 4228 ** that means that an exclusive lock is held on the database file and 4229 ** that no other processes are able to read or write the database. In 4230 ** that case, we do not really need shared memory. No shared memory 4231 ** file is created. The shared memory will be simulated with heap memory. 4232 */ 4233 static int unixOpenSharedMemory(unixFile *pDbFd){ 4234 struct unixShm *p = 0; /* The connection to be opened */ 4235 struct unixShmNode *pShmNode; /* The underlying mmapped file */ 4236 int rc; /* Result code */ 4237 unixInodeInfo *pInode; /* The inode of fd */ 4238 char *zShmFilename; /* Name of the file used for SHM */ 4239 int nShmFilename; /* Size of the SHM filename in bytes */ 4240 4241 /* Allocate space for the new unixShm object. */ 4242 p = sqlite3_malloc64( sizeof(*p) ); 4243 if( p==0 ) return SQLITE_NOMEM_BKPT; 4244 memset(p, 0, sizeof(*p)); 4245 assert( pDbFd->pShm==0 ); 4246 4247 /* Check to see if a unixShmNode object already exists. Reuse an existing 4248 ** one if present. Create a new one if necessary. 4249 */ 4250 unixEnterMutex(); 4251 pInode = pDbFd->pInode; 4252 pShmNode = pInode->pShmNode; 4253 if( pShmNode==0 ){ 4254 struct stat sStat; /* fstat() info for database file */ 4255 #ifndef SQLITE_SHM_DIRECTORY 4256 const char *zBasePath = pDbFd->zPath; 4257 #endif 4258 4259 /* Call fstat() to figure out the permissions on the database file. If 4260 ** a new *-shm file is created, an attempt will be made to create it 4261 ** with the same permissions. 4262 */ 4263 if( osFstat(pDbFd->h, &sStat) ){ 4264 rc = SQLITE_IOERR_FSTAT; 4265 goto shm_open_err; 4266 } 4267 4268 #ifdef SQLITE_SHM_DIRECTORY 4269 nShmFilename = sizeof(SQLITE_SHM_DIRECTORY) + 31; 4270 #else 4271 nShmFilename = 6 + (int)strlen(zBasePath); 4272 #endif 4273 pShmNode = sqlite3_malloc64( sizeof(*pShmNode) + nShmFilename ); 4274 if( pShmNode==0 ){ 4275 rc = SQLITE_NOMEM_BKPT; 4276 goto shm_open_err; 4277 } 4278 memset(pShmNode, 0, sizeof(*pShmNode)+nShmFilename); 4279 zShmFilename = pShmNode->zFilename = (char*)&pShmNode[1]; 4280 #ifdef SQLITE_SHM_DIRECTORY 4281 sqlite3_snprintf(nShmFilename, zShmFilename, 4282 SQLITE_SHM_DIRECTORY "/sqlite-shm-%x-%x", 4283 (u32)sStat.st_ino, (u32)sStat.st_dev); 4284 #else 4285 sqlite3_snprintf(nShmFilename, zShmFilename, "%s-shm", zBasePath); 4286 sqlite3FileSuffix3(pDbFd->zPath, zShmFilename); 4287 #endif 4288 pShmNode->h = -1; 4289 pDbFd->pInode->pShmNode = pShmNode; 4290 pShmNode->pInode = pDbFd->pInode; 4291 if( sqlite3GlobalConfig.bCoreMutex ){ 4292 pShmNode->mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST); 4293 if( pShmNode->mutex==0 ){ 4294 rc = SQLITE_NOMEM_BKPT; 4295 goto shm_open_err; 4296 } 4297 } 4298 4299 if( pInode->bProcessLock==0 ){ 4300 int openFlags = O_RDWR | O_CREAT; 4301 if( sqlite3_uri_boolean(pDbFd->zPath, "readonly_shm", 0) ){ 4302 openFlags = O_RDONLY; 4303 pShmNode->isReadonly = 1; 4304 } 4305 pShmNode->h = robust_open(zShmFilename, openFlags, (sStat.st_mode&0777)); 4306 if( pShmNode->h<0 ){ 4307 rc = unixLogError(SQLITE_CANTOPEN_BKPT, "open", zShmFilename); 4308 goto shm_open_err; 4309 } 4310 4311 /* If this process is running as root, make sure that the SHM file 4312 ** is owned by the same user that owns the original database. Otherwise, 4313 ** the original owner will not be able to connect. 4314 */ 4315 robustFchown(pShmNode->h, sStat.st_uid, sStat.st_gid); 4316 4317 /* Check to see if another process is holding the dead-man switch. 4318 ** If not, truncate the file to zero length. 4319 */ 4320 rc = SQLITE_OK; 4321 if( unixShmSystemLock(pDbFd, F_WRLCK, UNIX_SHM_DMS, 1)==SQLITE_OK ){ 4322 if( robust_ftruncate(pShmNode->h, 0) ){ 4323 rc = unixLogError(SQLITE_IOERR_SHMOPEN, "ftruncate", zShmFilename); 4324 } 4325 } 4326 if( rc==SQLITE_OK ){ 4327 rc = unixShmSystemLock(pDbFd, F_RDLCK, UNIX_SHM_DMS, 1); 4328 } 4329 if( rc ) goto shm_open_err; 4330 } 4331 } 4332 4333 /* Make the new connection a child of the unixShmNode */ 4334 p->pShmNode = pShmNode; 4335 #ifdef SQLITE_DEBUG 4336 p->id = pShmNode->nextShmId++; 4337 #endif 4338 pShmNode->nRef++; 4339 pDbFd->pShm = p; 4340 unixLeaveMutex(); 4341 4342 /* The reference count on pShmNode has already been incremented under 4343 ** the cover of the unixEnterMutex() mutex and the pointer from the 4344 ** new (struct unixShm) object to the pShmNode has been set. All that is 4345 ** left to do is to link the new object into the linked list starting 4346 ** at pShmNode->pFirst. This must be done while holding the pShmNode->mutex 4347 ** mutex. 4348 */ 4349 sqlite3_mutex_enter(pShmNode->mutex); 4350 p->pNext = pShmNode->pFirst; 4351 pShmNode->pFirst = p; 4352 sqlite3_mutex_leave(pShmNode->mutex); 4353 return SQLITE_OK; 4354 4355 /* Jump here on any error */ 4356 shm_open_err: 4357 unixShmPurge(pDbFd); /* This call frees pShmNode if required */ 4358 sqlite3_free(p); 4359 unixLeaveMutex(); 4360 return rc; 4361 } 4362 4363 /* 4364 ** This function is called to obtain a pointer to region iRegion of the 4365 ** shared-memory associated with the database file fd. Shared-memory regions 4366 ** are numbered starting from zero. Each shared-memory region is szRegion 4367 ** bytes in size. 4368 ** 4369 ** If an error occurs, an error code is returned and *pp is set to NULL. 4370 ** 4371 ** Otherwise, if the bExtend parameter is 0 and the requested shared-memory 4372 ** region has not been allocated (by any client, including one running in a 4373 ** separate process), then *pp is set to NULL and SQLITE_OK returned. If 4374 ** bExtend is non-zero and the requested shared-memory region has not yet 4375 ** been allocated, it is allocated by this function. 4376 ** 4377 ** If the shared-memory region has already been allocated or is allocated by 4378 ** this call as described above, then it is mapped into this processes 4379 ** address space (if it is not already), *pp is set to point to the mapped 4380 ** memory and SQLITE_OK returned. 4381 */ 4382 static int unixShmMap( 4383 sqlite3_file *fd, /* Handle open on database file */ 4384 int iRegion, /* Region to retrieve */ 4385 int szRegion, /* Size of regions */ 4386 int bExtend, /* True to extend file if necessary */ 4387 void volatile **pp /* OUT: Mapped memory */ 4388 ){ 4389 unixFile *pDbFd = (unixFile*)fd; 4390 unixShm *p; 4391 unixShmNode *pShmNode; 4392 int rc = SQLITE_OK; 4393 int nShmPerMap = unixShmRegionPerMap(); 4394 int nReqRegion; 4395 4396 /* If the shared-memory file has not yet been opened, open it now. */ 4397 if( pDbFd->pShm==0 ){ 4398 rc = unixOpenSharedMemory(pDbFd); 4399 if( rc!=SQLITE_OK ) return rc; 4400 } 4401 4402 p = pDbFd->pShm; 4403 pShmNode = p->pShmNode; 4404 sqlite3_mutex_enter(pShmNode->mutex); 4405 assert( szRegion==pShmNode->szRegion || pShmNode->nRegion==0 ); 4406 assert( pShmNode->pInode==pDbFd->pInode ); 4407 assert( pShmNode->h>=0 || pDbFd->pInode->bProcessLock==1 ); 4408 assert( pShmNode->h<0 || pDbFd->pInode->bProcessLock==0 ); 4409 4410 /* Minimum number of regions required to be mapped. */ 4411 nReqRegion = ((iRegion+nShmPerMap) / nShmPerMap) * nShmPerMap; 4412 4413 if( pShmNode->nRegion<nReqRegion ){ 4414 char **apNew; /* New apRegion[] array */ 4415 int nByte = nReqRegion*szRegion; /* Minimum required file size */ 4416 struct stat sStat; /* Used by fstat() */ 4417 4418 pShmNode->szRegion = szRegion; 4419 4420 if( pShmNode->h>=0 ){ 4421 /* The requested region is not mapped into this processes address space. 4422 ** Check to see if it has been allocated (i.e. if the wal-index file is 4423 ** large enough to contain the requested region). 4424 */ 4425 if( osFstat(pShmNode->h, &sStat) ){ 4426 rc = SQLITE_IOERR_SHMSIZE; 4427 goto shmpage_out; 4428 } 4429 4430 if( sStat.st_size<nByte ){ 4431 /* The requested memory region does not exist. If bExtend is set to 4432 ** false, exit early. *pp will be set to NULL and SQLITE_OK returned. 4433 */ 4434 if( !bExtend ){ 4435 goto shmpage_out; 4436 } 4437 4438 /* Alternatively, if bExtend is true, extend the file. Do this by 4439 ** writing a single byte to the end of each (OS) page being 4440 ** allocated or extended. Technically, we need only write to the 4441 ** last page in order to extend the file. But writing to all new 4442 ** pages forces the OS to allocate them immediately, which reduces 4443 ** the chances of SIGBUS while accessing the mapped region later on. 4444 */ 4445 else{ 4446 static const int pgsz = 4096; 4447 int iPg; 4448 4449 /* Write to the last byte of each newly allocated or extended page */ 4450 assert( (nByte % pgsz)==0 ); 4451 for(iPg=(sStat.st_size/pgsz); iPg<(nByte/pgsz); iPg++){ 4452 int x = 0; 4453 if( seekAndWriteFd(pShmNode->h, iPg*pgsz + pgsz-1, "", 1, &x)!=1 ){ 4454 const char *zFile = pShmNode->zFilename; 4455 rc = unixLogError(SQLITE_IOERR_SHMSIZE, "write", zFile); 4456 goto shmpage_out; 4457 } 4458 } 4459 } 4460 } 4461 } 4462 4463 /* Map the requested memory region into this processes address space. */ 4464 apNew = (char **)sqlite3_realloc( 4465 pShmNode->apRegion, nReqRegion*sizeof(char *) 4466 ); 4467 if( !apNew ){ 4468 rc = SQLITE_IOERR_NOMEM_BKPT; 4469 goto shmpage_out; 4470 } 4471 pShmNode->apRegion = apNew; 4472 while( pShmNode->nRegion<nReqRegion ){ 4473 int nMap = szRegion*nShmPerMap; 4474 int i; 4475 void *pMem; 4476 if( pShmNode->h>=0 ){ 4477 pMem = osMmap(0, nMap, 4478 pShmNode->isReadonly ? PROT_READ : PROT_READ|PROT_WRITE, 4479 MAP_SHARED, pShmNode->h, szRegion*(i64)pShmNode->nRegion 4480 ); 4481 if( pMem==MAP_FAILED ){ 4482 rc = unixLogError(SQLITE_IOERR_SHMMAP, "mmap", pShmNode->zFilename); 4483 goto shmpage_out; 4484 } 4485 }else{ 4486 pMem = sqlite3_malloc64(szRegion); 4487 if( pMem==0 ){ 4488 rc = SQLITE_NOMEM_BKPT; 4489 goto shmpage_out; 4490 } 4491 memset(pMem, 0, szRegion); 4492 } 4493 4494 for(i=0; i<nShmPerMap; i++){ 4495 pShmNode->apRegion[pShmNode->nRegion+i] = &((char*)pMem)[szRegion*i]; 4496 } 4497 pShmNode->nRegion += nShmPerMap; 4498 } 4499 } 4500 4501 shmpage_out: 4502 if( pShmNode->nRegion>iRegion ){ 4503 *pp = pShmNode->apRegion[iRegion]; 4504 }else{ 4505 *pp = 0; 4506 } 4507 if( pShmNode->isReadonly && rc==SQLITE_OK ) rc = SQLITE_READONLY; 4508 sqlite3_mutex_leave(pShmNode->mutex); 4509 return rc; 4510 } 4511 4512 /* 4513 ** Change the lock state for a shared-memory segment. 4514 ** 4515 ** Note that the relationship between SHAREd and EXCLUSIVE locks is a little 4516 ** different here than in posix. In xShmLock(), one can go from unlocked 4517 ** to shared and back or from unlocked to exclusive and back. But one may 4518 ** not go from shared to exclusive or from exclusive to shared. 4519 */ 4520 static int unixShmLock( 4521 sqlite3_file *fd, /* Database file holding the shared memory */ 4522 int ofst, /* First lock to acquire or release */ 4523 int n, /* Number of locks to acquire or release */ 4524 int flags /* What to do with the lock */ 4525 ){ 4526 unixFile *pDbFd = (unixFile*)fd; /* Connection holding shared memory */ 4527 unixShm *p = pDbFd->pShm; /* The shared memory being locked */ 4528 unixShm *pX; /* For looping over all siblings */ 4529 unixShmNode *pShmNode = p->pShmNode; /* The underlying file iNode */ 4530 int rc = SQLITE_OK; /* Result code */ 4531 u16 mask; /* Mask of locks to take or release */ 4532 4533 assert( pShmNode==pDbFd->pInode->pShmNode ); 4534 assert( pShmNode->pInode==pDbFd->pInode ); 4535 assert( ofst>=0 && ofst+n<=SQLITE_SHM_NLOCK ); 4536 assert( n>=1 ); 4537 assert( flags==(SQLITE_SHM_LOCK | SQLITE_SHM_SHARED) 4538 || flags==(SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE) 4539 || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED) 4540 || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE) ); 4541 assert( n==1 || (flags & SQLITE_SHM_EXCLUSIVE)!=0 ); 4542 assert( pShmNode->h>=0 || pDbFd->pInode->bProcessLock==1 ); 4543 assert( pShmNode->h<0 || pDbFd->pInode->bProcessLock==0 ); 4544 4545 mask = (1<<(ofst+n)) - (1<<ofst); 4546 assert( n>1 || mask==(1<<ofst) ); 4547 sqlite3_mutex_enter(pShmNode->mutex); 4548 if( flags & SQLITE_SHM_UNLOCK ){ 4549 u16 allMask = 0; /* Mask of locks held by siblings */ 4550 4551 /* See if any siblings hold this same lock */ 4552 for(pX=pShmNode->pFirst; pX; pX=pX->pNext){ 4553 if( pX==p ) continue; 4554 assert( (pX->exclMask & (p->exclMask|p->sharedMask))==0 ); 4555 allMask |= pX->sharedMask; 4556 } 4557 4558 /* Unlock the system-level locks */ 4559 if( (mask & allMask)==0 ){ 4560 rc = unixShmSystemLock(pDbFd, F_UNLCK, ofst+UNIX_SHM_BASE, n); 4561 }else{ 4562 rc = SQLITE_OK; 4563 } 4564 4565 /* Undo the local locks */ 4566 if( rc==SQLITE_OK ){ 4567 p->exclMask &= ~mask; 4568 p->sharedMask &= ~mask; 4569 } 4570 }else if( flags & SQLITE_SHM_SHARED ){ 4571 u16 allShared = 0; /* Union of locks held by connections other than "p" */ 4572 4573 /* Find out which shared locks are already held by sibling connections. 4574 ** If any sibling already holds an exclusive lock, go ahead and return 4575 ** SQLITE_BUSY. 4576 */ 4577 for(pX=pShmNode->pFirst; pX; pX=pX->pNext){ 4578 if( (pX->exclMask & mask)!=0 ){ 4579 rc = SQLITE_BUSY; 4580 break; 4581 } 4582 allShared |= pX->sharedMask; 4583 } 4584 4585 /* Get shared locks at the system level, if necessary */ 4586 if( rc==SQLITE_OK ){ 4587 if( (allShared & mask)==0 ){ 4588 rc = unixShmSystemLock(pDbFd, F_RDLCK, ofst+UNIX_SHM_BASE, n); 4589 }else{ 4590 rc = SQLITE_OK; 4591 } 4592 } 4593 4594 /* Get the local shared locks */ 4595 if( rc==SQLITE_OK ){ 4596 p->sharedMask |= mask; 4597 } 4598 }else{ 4599 /* Make sure no sibling connections hold locks that will block this 4600 ** lock. If any do, return SQLITE_BUSY right away. 4601 */ 4602 for(pX=pShmNode->pFirst; pX; pX=pX->pNext){ 4603 if( (pX->exclMask & mask)!=0 || (pX->sharedMask & mask)!=0 ){ 4604 rc = SQLITE_BUSY; 4605 break; 4606 } 4607 } 4608 4609 /* Get the exclusive locks at the system level. Then if successful 4610 ** also mark the local connection as being locked. 4611 */ 4612 if( rc==SQLITE_OK ){ 4613 rc = unixShmSystemLock(pDbFd, F_WRLCK, ofst+UNIX_SHM_BASE, n); 4614 if( rc==SQLITE_OK ){ 4615 assert( (p->sharedMask & mask)==0 ); 4616 p->exclMask |= mask; 4617 } 4618 } 4619 } 4620 sqlite3_mutex_leave(pShmNode->mutex); 4621 OSTRACE(("SHM-LOCK shmid-%d, pid-%d got %03x,%03x\n", 4622 p->id, osGetpid(0), p->sharedMask, p->exclMask)); 4623 return rc; 4624 } 4625 4626 /* 4627 ** Implement a memory barrier or memory fence on shared memory. 4628 ** 4629 ** All loads and stores begun before the barrier must complete before 4630 ** any load or store begun after the barrier. 4631 */ 4632 static void unixShmBarrier( 4633 sqlite3_file *fd /* Database file holding the shared memory */ 4634 ){ 4635 UNUSED_PARAMETER(fd); 4636 sqlite3MemoryBarrier(); /* compiler-defined memory barrier */ 4637 unixEnterMutex(); /* Also mutex, for redundancy */ 4638 unixLeaveMutex(); 4639 } 4640 4641 /* 4642 ** Close a connection to shared-memory. Delete the underlying 4643 ** storage if deleteFlag is true. 4644 ** 4645 ** If there is no shared memory associated with the connection then this 4646 ** routine is a harmless no-op. 4647 */ 4648 static int unixShmUnmap( 4649 sqlite3_file *fd, /* The underlying database file */ 4650 int deleteFlag /* Delete shared-memory if true */ 4651 ){ 4652 unixShm *p; /* The connection to be closed */ 4653 unixShmNode *pShmNode; /* The underlying shared-memory file */ 4654 unixShm **pp; /* For looping over sibling connections */ 4655 unixFile *pDbFd; /* The underlying database file */ 4656 4657 pDbFd = (unixFile*)fd; 4658 p = pDbFd->pShm; 4659 if( p==0 ) return SQLITE_OK; 4660 pShmNode = p->pShmNode; 4661 4662 assert( pShmNode==pDbFd->pInode->pShmNode ); 4663 assert( pShmNode->pInode==pDbFd->pInode ); 4664 4665 /* Remove connection p from the set of connections associated 4666 ** with pShmNode */ 4667 sqlite3_mutex_enter(pShmNode->mutex); 4668 for(pp=&pShmNode->pFirst; (*pp)!=p; pp = &(*pp)->pNext){} 4669 *pp = p->pNext; 4670 4671 /* Free the connection p */ 4672 sqlite3_free(p); 4673 pDbFd->pShm = 0; 4674 sqlite3_mutex_leave(pShmNode->mutex); 4675 4676 /* If pShmNode->nRef has reached 0, then close the underlying 4677 ** shared-memory file, too */ 4678 unixEnterMutex(); 4679 assert( pShmNode->nRef>0 ); 4680 pShmNode->nRef--; 4681 if( pShmNode->nRef==0 ){ 4682 if( deleteFlag && pShmNode->h>=0 ){ 4683 osUnlink(pShmNode->zFilename); 4684 } 4685 unixShmPurge(pDbFd); 4686 } 4687 unixLeaveMutex(); 4688 4689 return SQLITE_OK; 4690 } 4691 4692 4693 #else 4694 # define unixShmMap 0 4695 # define unixShmLock 0 4696 # define unixShmBarrier 0 4697 # define unixShmUnmap 0 4698 #endif /* #ifndef SQLITE_OMIT_WAL */ 4699 4700 #if SQLITE_MAX_MMAP_SIZE>0 4701 /* 4702 ** If it is currently memory mapped, unmap file pFd. 4703 */ 4704 static void unixUnmapfile(unixFile *pFd){ 4705 assert( pFd->nFetchOut==0 ); 4706 if( pFd->pMapRegion ){ 4707 osMunmap(pFd->pMapRegion, pFd->mmapSizeActual); 4708 pFd->pMapRegion = 0; 4709 pFd->mmapSize = 0; 4710 pFd->mmapSizeActual = 0; 4711 } 4712 } 4713 4714 /* 4715 ** Attempt to set the size of the memory mapping maintained by file 4716 ** descriptor pFd to nNew bytes. Any existing mapping is discarded. 4717 ** 4718 ** If successful, this function sets the following variables: 4719 ** 4720 ** unixFile.pMapRegion 4721 ** unixFile.mmapSize 4722 ** unixFile.mmapSizeActual 4723 ** 4724 ** If unsuccessful, an error message is logged via sqlite3_log() and 4725 ** the three variables above are zeroed. In this case SQLite should 4726 ** continue accessing the database using the xRead() and xWrite() 4727 ** methods. 4728 */ 4729 static void unixRemapfile( 4730 unixFile *pFd, /* File descriptor object */ 4731 i64 nNew /* Required mapping size */ 4732 ){ 4733 const char *zErr = "mmap"; 4734 int h = pFd->h; /* File descriptor open on db file */ 4735 u8 *pOrig = (u8 *)pFd->pMapRegion; /* Pointer to current file mapping */ 4736 i64 nOrig = pFd->mmapSizeActual; /* Size of pOrig region in bytes */ 4737 u8 *pNew = 0; /* Location of new mapping */ 4738 int flags = PROT_READ; /* Flags to pass to mmap() */ 4739 4740 assert( pFd->nFetchOut==0 ); 4741 assert( nNew>pFd->mmapSize ); 4742 assert( nNew<=pFd->mmapSizeMax ); 4743 assert( nNew>0 ); 4744 assert( pFd->mmapSizeActual>=pFd->mmapSize ); 4745 assert( MAP_FAILED!=0 ); 4746 4747 #ifdef SQLITE_MMAP_READWRITE 4748 if( (pFd->ctrlFlags & UNIXFILE_RDONLY)==0 ) flags |= PROT_WRITE; 4749 #endif 4750 4751 if( pOrig ){ 4752 #if HAVE_MREMAP 4753 i64 nReuse = pFd->mmapSize; 4754 #else 4755 const int szSyspage = osGetpagesize(); 4756 i64 nReuse = (pFd->mmapSize & ~(szSyspage-1)); 4757 #endif 4758 u8 *pReq = &pOrig[nReuse]; 4759 4760 /* Unmap any pages of the existing mapping that cannot be reused. */ 4761 if( nReuse!=nOrig ){ 4762 osMunmap(pReq, nOrig-nReuse); 4763 } 4764 4765 #if HAVE_MREMAP 4766 pNew = osMremap(pOrig, nReuse, nNew, MREMAP_MAYMOVE); 4767 zErr = "mremap"; 4768 #else 4769 pNew = osMmap(pReq, nNew-nReuse, flags, MAP_SHARED, h, nReuse); 4770 if( pNew!=MAP_FAILED ){ 4771 if( pNew!=pReq ){ 4772 osMunmap(pNew, nNew - nReuse); 4773 pNew = 0; 4774 }else{ 4775 pNew = pOrig; 4776 } 4777 } 4778 #endif 4779 4780 /* The attempt to extend the existing mapping failed. Free it. */ 4781 if( pNew==MAP_FAILED || pNew==0 ){ 4782 osMunmap(pOrig, nReuse); 4783 } 4784 } 4785 4786 /* If pNew is still NULL, try to create an entirely new mapping. */ 4787 if( pNew==0 ){ 4788 pNew = osMmap(0, nNew, flags, MAP_SHARED, h, 0); 4789 } 4790 4791 if( pNew==MAP_FAILED ){ 4792 pNew = 0; 4793 nNew = 0; 4794 unixLogError(SQLITE_OK, zErr, pFd->zPath); 4795 4796 /* If the mmap() above failed, assume that all subsequent mmap() calls 4797 ** will probably fail too. Fall back to using xRead/xWrite exclusively 4798 ** in this case. */ 4799 pFd->mmapSizeMax = 0; 4800 } 4801 pFd->pMapRegion = (void *)pNew; 4802 pFd->mmapSize = pFd->mmapSizeActual = nNew; 4803 } 4804 4805 /* 4806 ** Memory map or remap the file opened by file-descriptor pFd (if the file 4807 ** is already mapped, the existing mapping is replaced by the new). Or, if 4808 ** there already exists a mapping for this file, and there are still 4809 ** outstanding xFetch() references to it, this function is a no-op. 4810 ** 4811 ** If parameter nByte is non-negative, then it is the requested size of 4812 ** the mapping to create. Otherwise, if nByte is less than zero, then the 4813 ** requested size is the size of the file on disk. The actual size of the 4814 ** created mapping is either the requested size or the value configured 4815 ** using SQLITE_FCNTL_MMAP_LIMIT, whichever is smaller. 4816 ** 4817 ** SQLITE_OK is returned if no error occurs (even if the mapping is not 4818 ** recreated as a result of outstanding references) or an SQLite error 4819 ** code otherwise. 4820 */ 4821 static int unixMapfile(unixFile *pFd, i64 nMap){ 4822 assert( nMap>=0 || pFd->nFetchOut==0 ); 4823 assert( nMap>0 || (pFd->mmapSize==0 && pFd->pMapRegion==0) ); 4824 if( pFd->nFetchOut>0 ) return SQLITE_OK; 4825 4826 if( nMap<0 ){ 4827 struct stat statbuf; /* Low-level file information */ 4828 if( osFstat(pFd->h, &statbuf) ){ 4829 return SQLITE_IOERR_FSTAT; 4830 } 4831 nMap = statbuf.st_size; 4832 } 4833 if( nMap>pFd->mmapSizeMax ){ 4834 nMap = pFd->mmapSizeMax; 4835 } 4836 4837 assert( nMap>0 || (pFd->mmapSize==0 && pFd->pMapRegion==0) ); 4838 if( nMap!=pFd->mmapSize ){ 4839 unixRemapfile(pFd, nMap); 4840 } 4841 4842 return SQLITE_OK; 4843 } 4844 #endif /* SQLITE_MAX_MMAP_SIZE>0 */ 4845 4846 /* 4847 ** If possible, return a pointer to a mapping of file fd starting at offset 4848 ** iOff. The mapping must be valid for at least nAmt bytes. 4849 ** 4850 ** If such a pointer can be obtained, store it in *pp and return SQLITE_OK. 4851 ** Or, if one cannot but no error occurs, set *pp to 0 and return SQLITE_OK. 4852 ** Finally, if an error does occur, return an SQLite error code. The final 4853 ** value of *pp is undefined in this case. 4854 ** 4855 ** If this function does return a pointer, the caller must eventually 4856 ** release the reference by calling unixUnfetch(). 4857 */ 4858 static int unixFetch(sqlite3_file *fd, i64 iOff, int nAmt, void **pp){ 4859 #if SQLITE_MAX_MMAP_SIZE>0 4860 unixFile *pFd = (unixFile *)fd; /* The underlying database file */ 4861 #endif 4862 *pp = 0; 4863 4864 #if SQLITE_MAX_MMAP_SIZE>0 4865 if( pFd->mmapSizeMax>0 ){ 4866 if( pFd->pMapRegion==0 ){ 4867 int rc = unixMapfile(pFd, -1); 4868 if( rc!=SQLITE_OK ) return rc; 4869 } 4870 if( pFd->mmapSize >= iOff+nAmt ){ 4871 *pp = &((u8 *)pFd->pMapRegion)[iOff]; 4872 pFd->nFetchOut++; 4873 } 4874 } 4875 #endif 4876 return SQLITE_OK; 4877 } 4878 4879 /* 4880 ** If the third argument is non-NULL, then this function releases a 4881 ** reference obtained by an earlier call to unixFetch(). The second 4882 ** argument passed to this function must be the same as the corresponding 4883 ** argument that was passed to the unixFetch() invocation. 4884 ** 4885 ** Or, if the third argument is NULL, then this function is being called 4886 ** to inform the VFS layer that, according to POSIX, any existing mapping 4887 ** may now be invalid and should be unmapped. 4888 */ 4889 static int unixUnfetch(sqlite3_file *fd, i64 iOff, void *p){ 4890 #if SQLITE_MAX_MMAP_SIZE>0 4891 unixFile *pFd = (unixFile *)fd; /* The underlying database file */ 4892 UNUSED_PARAMETER(iOff); 4893 4894 /* If p==0 (unmap the entire file) then there must be no outstanding 4895 ** xFetch references. Or, if p!=0 (meaning it is an xFetch reference), 4896 ** then there must be at least one outstanding. */ 4897 assert( (p==0)==(pFd->nFetchOut==0) ); 4898 4899 /* If p!=0, it must match the iOff value. */ 4900 assert( p==0 || p==&((u8 *)pFd->pMapRegion)[iOff] ); 4901 4902 if( p ){ 4903 pFd->nFetchOut--; 4904 }else{ 4905 unixUnmapfile(pFd); 4906 } 4907 4908 assert( pFd->nFetchOut>=0 ); 4909 #else 4910 UNUSED_PARAMETER(fd); 4911 UNUSED_PARAMETER(p); 4912 UNUSED_PARAMETER(iOff); 4913 #endif 4914 return SQLITE_OK; 4915 } 4916 4917 /* 4918 ** Here ends the implementation of all sqlite3_file methods. 4919 ** 4920 ********************** End sqlite3_file Methods ******************************* 4921 ******************************************************************************/ 4922 4923 /* 4924 ** This division contains definitions of sqlite3_io_methods objects that 4925 ** implement various file locking strategies. It also contains definitions 4926 ** of "finder" functions. A finder-function is used to locate the appropriate 4927 ** sqlite3_io_methods object for a particular database file. The pAppData 4928 ** field of the sqlite3_vfs VFS objects are initialized to be pointers to 4929 ** the correct finder-function for that VFS. 4930 ** 4931 ** Most finder functions return a pointer to a fixed sqlite3_io_methods 4932 ** object. The only interesting finder-function is autolockIoFinder, which 4933 ** looks at the filesystem type and tries to guess the best locking 4934 ** strategy from that. 4935 ** 4936 ** For finder-function F, two objects are created: 4937 ** 4938 ** (1) The real finder-function named "FImpt()". 4939 ** 4940 ** (2) A constant pointer to this function named just "F". 4941 ** 4942 ** 4943 ** A pointer to the F pointer is used as the pAppData value for VFS 4944 ** objects. We have to do this instead of letting pAppData point 4945 ** directly at the finder-function since C90 rules prevent a void* 4946 ** from be cast into a function pointer. 4947 ** 4948 ** 4949 ** Each instance of this macro generates two objects: 4950 ** 4951 ** * A constant sqlite3_io_methods object call METHOD that has locking 4952 ** methods CLOSE, LOCK, UNLOCK, CKRESLOCK. 4953 ** 4954 ** * An I/O method finder function called FINDER that returns a pointer 4955 ** to the METHOD object in the previous bullet. 4956 */ 4957 #define IOMETHODS(FINDER,METHOD,VERSION,CLOSE,LOCK,UNLOCK,CKLOCK,SHMMAP) \ 4958 static const sqlite3_io_methods METHOD = { \ 4959 VERSION, /* iVersion */ \ 4960 CLOSE, /* xClose */ \ 4961 unixRead, /* xRead */ \ 4962 unixWrite, /* xWrite */ \ 4963 unixTruncate, /* xTruncate */ \ 4964 unixSync, /* xSync */ \ 4965 unixFileSize, /* xFileSize */ \ 4966 LOCK, /* xLock */ \ 4967 UNLOCK, /* xUnlock */ \ 4968 CKLOCK, /* xCheckReservedLock */ \ 4969 unixFileControl, /* xFileControl */ \ 4970 unixSectorSize, /* xSectorSize */ \ 4971 unixDeviceCharacteristics, /* xDeviceCapabilities */ \ 4972 SHMMAP, /* xShmMap */ \ 4973 unixShmLock, /* xShmLock */ \ 4974 unixShmBarrier, /* xShmBarrier */ \ 4975 unixShmUnmap, /* xShmUnmap */ \ 4976 unixFetch, /* xFetch */ \ 4977 unixUnfetch, /* xUnfetch */ \ 4978 }; \ 4979 static const sqlite3_io_methods *FINDER##Impl(const char *z, unixFile *p){ \ 4980 UNUSED_PARAMETER(z); UNUSED_PARAMETER(p); \ 4981 return &METHOD; \ 4982 } \ 4983 static const sqlite3_io_methods *(*const FINDER)(const char*,unixFile *p) \ 4984 = FINDER##Impl; 4985 4986 /* 4987 ** Here are all of the sqlite3_io_methods objects for each of the 4988 ** locking strategies. Functions that return pointers to these methods 4989 ** are also created. 4990 */ 4991 IOMETHODS( 4992 posixIoFinder, /* Finder function name */ 4993 posixIoMethods, /* sqlite3_io_methods object name */ 4994 3, /* shared memory and mmap are enabled */ 4995 unixClose, /* xClose method */ 4996 unixLock, /* xLock method */ 4997 unixUnlock, /* xUnlock method */ 4998 unixCheckReservedLock, /* xCheckReservedLock method */ 4999 unixShmMap /* xShmMap method */ 5000 ) 5001 IOMETHODS( 5002 nolockIoFinder, /* Finder function name */ 5003 nolockIoMethods, /* sqlite3_io_methods object name */ 5004 3, /* shared memory is disabled */ 5005 nolockClose, /* xClose method */ 5006 nolockLock, /* xLock method */ 5007 nolockUnlock, /* xUnlock method */ 5008 nolockCheckReservedLock, /* xCheckReservedLock method */ 5009 0 /* xShmMap method */ 5010 ) 5011 IOMETHODS( 5012 dotlockIoFinder, /* Finder function name */ 5013 dotlockIoMethods, /* sqlite3_io_methods object name */ 5014 1, /* shared memory is disabled */ 5015 dotlockClose, /* xClose method */ 5016 dotlockLock, /* xLock method */ 5017 dotlockUnlock, /* xUnlock method */ 5018 dotlockCheckReservedLock, /* xCheckReservedLock method */ 5019 0 /* xShmMap method */ 5020 ) 5021 5022 #if SQLITE_ENABLE_LOCKING_STYLE 5023 IOMETHODS( 5024 flockIoFinder, /* Finder function name */ 5025 flockIoMethods, /* sqlite3_io_methods object name */ 5026 1, /* shared memory is disabled */ 5027 flockClose, /* xClose method */ 5028 flockLock, /* xLock method */ 5029 flockUnlock, /* xUnlock method */ 5030 flockCheckReservedLock, /* xCheckReservedLock method */ 5031 0 /* xShmMap method */ 5032 ) 5033 #endif 5034 5035 #if OS_VXWORKS 5036 IOMETHODS( 5037 semIoFinder, /* Finder function name */ 5038 semIoMethods, /* sqlite3_io_methods object name */ 5039 1, /* shared memory is disabled */ 5040 semXClose, /* xClose method */ 5041 semXLock, /* xLock method */ 5042 semXUnlock, /* xUnlock method */ 5043 semXCheckReservedLock, /* xCheckReservedLock method */ 5044 0 /* xShmMap method */ 5045 ) 5046 #endif 5047 5048 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 5049 IOMETHODS( 5050 afpIoFinder, /* Finder function name */ 5051 afpIoMethods, /* sqlite3_io_methods object name */ 5052 1, /* shared memory is disabled */ 5053 afpClose, /* xClose method */ 5054 afpLock, /* xLock method */ 5055 afpUnlock, /* xUnlock method */ 5056 afpCheckReservedLock, /* xCheckReservedLock method */ 5057 0 /* xShmMap method */ 5058 ) 5059 #endif 5060 5061 /* 5062 ** The proxy locking method is a "super-method" in the sense that it 5063 ** opens secondary file descriptors for the conch and lock files and 5064 ** it uses proxy, dot-file, AFP, and flock() locking methods on those 5065 ** secondary files. For this reason, the division that implements 5066 ** proxy locking is located much further down in the file. But we need 5067 ** to go ahead and define the sqlite3_io_methods and finder function 5068 ** for proxy locking here. So we forward declare the I/O methods. 5069 */ 5070 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 5071 static int proxyClose(sqlite3_file*); 5072 static int proxyLock(sqlite3_file*, int); 5073 static int proxyUnlock(sqlite3_file*, int); 5074 static int proxyCheckReservedLock(sqlite3_file*, int*); 5075 IOMETHODS( 5076 proxyIoFinder, /* Finder function name */ 5077 proxyIoMethods, /* sqlite3_io_methods object name */ 5078 1, /* shared memory is disabled */ 5079 proxyClose, /* xClose method */ 5080 proxyLock, /* xLock method */ 5081 proxyUnlock, /* xUnlock method */ 5082 proxyCheckReservedLock, /* xCheckReservedLock method */ 5083 0 /* xShmMap method */ 5084 ) 5085 #endif 5086 5087 /* nfs lockd on OSX 10.3+ doesn't clear write locks when a read lock is set */ 5088 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 5089 IOMETHODS( 5090 nfsIoFinder, /* Finder function name */ 5091 nfsIoMethods, /* sqlite3_io_methods object name */ 5092 1, /* shared memory is disabled */ 5093 unixClose, /* xClose method */ 5094 unixLock, /* xLock method */ 5095 nfsUnlock, /* xUnlock method */ 5096 unixCheckReservedLock, /* xCheckReservedLock method */ 5097 0 /* xShmMap method */ 5098 ) 5099 #endif 5100 5101 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 5102 /* 5103 ** This "finder" function attempts to determine the best locking strategy 5104 ** for the database file "filePath". It then returns the sqlite3_io_methods 5105 ** object that implements that strategy. 5106 ** 5107 ** This is for MacOSX only. 5108 */ 5109 static const sqlite3_io_methods *autolockIoFinderImpl( 5110 const char *filePath, /* name of the database file */ 5111 unixFile *pNew /* open file object for the database file */ 5112 ){ 5113 static const struct Mapping { 5114 const char *zFilesystem; /* Filesystem type name */ 5115 const sqlite3_io_methods *pMethods; /* Appropriate locking method */ 5116 } aMap[] = { 5117 { "hfs", &posixIoMethods }, 5118 { "ufs", &posixIoMethods }, 5119 { "afpfs", &afpIoMethods }, 5120 { "smbfs", &afpIoMethods }, 5121 { "webdav", &nolockIoMethods }, 5122 { 0, 0 } 5123 }; 5124 int i; 5125 struct statfs fsInfo; 5126 struct flock lockInfo; 5127 5128 if( !filePath ){ 5129 /* If filePath==NULL that means we are dealing with a transient file 5130 ** that does not need to be locked. */ 5131 return &nolockIoMethods; 5132 } 5133 if( statfs(filePath, &fsInfo) != -1 ){ 5134 if( fsInfo.f_flags & MNT_RDONLY ){ 5135 return &nolockIoMethods; 5136 } 5137 for(i=0; aMap[i].zFilesystem; i++){ 5138 if( strcmp(fsInfo.f_fstypename, aMap[i].zFilesystem)==0 ){ 5139 return aMap[i].pMethods; 5140 } 5141 } 5142 } 5143 5144 /* Default case. Handles, amongst others, "nfs". 5145 ** Test byte-range lock using fcntl(). If the call succeeds, 5146 ** assume that the file-system supports POSIX style locks. 5147 */ 5148 lockInfo.l_len = 1; 5149 lockInfo.l_start = 0; 5150 lockInfo.l_whence = SEEK_SET; 5151 lockInfo.l_type = F_RDLCK; 5152 if( osFcntl(pNew->h, F_GETLK, &lockInfo)!=-1 ) { 5153 if( strcmp(fsInfo.f_fstypename, "nfs")==0 ){ 5154 return &nfsIoMethods; 5155 } else { 5156 return &posixIoMethods; 5157 } 5158 }else{ 5159 return &dotlockIoMethods; 5160 } 5161 } 5162 static const sqlite3_io_methods 5163 *(*const autolockIoFinder)(const char*,unixFile*) = autolockIoFinderImpl; 5164 5165 #endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ 5166 5167 #if OS_VXWORKS 5168 /* 5169 ** This "finder" function for VxWorks checks to see if posix advisory 5170 ** locking works. If it does, then that is what is used. If it does not 5171 ** work, then fallback to named semaphore locking. 5172 */ 5173 static const sqlite3_io_methods *vxworksIoFinderImpl( 5174 const char *filePath, /* name of the database file */ 5175 unixFile *pNew /* the open file object */ 5176 ){ 5177 struct flock lockInfo; 5178 5179 if( !filePath ){ 5180 /* If filePath==NULL that means we are dealing with a transient file 5181 ** that does not need to be locked. */ 5182 return &nolockIoMethods; 5183 } 5184 5185 /* Test if fcntl() is supported and use POSIX style locks. 5186 ** Otherwise fall back to the named semaphore method. 5187 */ 5188 lockInfo.l_len = 1; 5189 lockInfo.l_start = 0; 5190 lockInfo.l_whence = SEEK_SET; 5191 lockInfo.l_type = F_RDLCK; 5192 if( osFcntl(pNew->h, F_GETLK, &lockInfo)!=-1 ) { 5193 return &posixIoMethods; 5194 }else{ 5195 return &semIoMethods; 5196 } 5197 } 5198 static const sqlite3_io_methods 5199 *(*const vxworksIoFinder)(const char*,unixFile*) = vxworksIoFinderImpl; 5200 5201 #endif /* OS_VXWORKS */ 5202 5203 /* 5204 ** An abstract type for a pointer to an IO method finder function: 5205 */ 5206 typedef const sqlite3_io_methods *(*finder_type)(const char*,unixFile*); 5207 5208 5209 /**************************************************************************** 5210 **************************** sqlite3_vfs methods **************************** 5211 ** 5212 ** This division contains the implementation of methods on the 5213 ** sqlite3_vfs object. 5214 */ 5215 5216 /* 5217 ** Initialize the contents of the unixFile structure pointed to by pId. 5218 */ 5219 static int fillInUnixFile( 5220 sqlite3_vfs *pVfs, /* Pointer to vfs object */ 5221 int h, /* Open file descriptor of file being opened */ 5222 sqlite3_file *pId, /* Write to the unixFile structure here */ 5223 const char *zFilename, /* Name of the file being opened */ 5224 int ctrlFlags /* Zero or more UNIXFILE_* values */ 5225 ){ 5226 const sqlite3_io_methods *pLockingStyle; 5227 unixFile *pNew = (unixFile *)pId; 5228 int rc = SQLITE_OK; 5229 5230 assert( pNew->pInode==NULL ); 5231 5232 /* Usually the path zFilename should not be a relative pathname. The 5233 ** exception is when opening the proxy "conch" file in builds that 5234 ** include the special Apple locking styles. 5235 */ 5236 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 5237 assert( zFilename==0 || zFilename[0]=='/' 5238 || pVfs->pAppData==(void*)&autolockIoFinder ); 5239 #else 5240 assert( zFilename==0 || zFilename[0]=='/' ); 5241 #endif 5242 5243 /* No locking occurs in temporary files */ 5244 assert( zFilename!=0 || (ctrlFlags & UNIXFILE_NOLOCK)!=0 ); 5245 5246 OSTRACE(("OPEN %-3d %s\n", h, zFilename)); 5247 pNew->h = h; 5248 pNew->pVfs = pVfs; 5249 pNew->zPath = zFilename; 5250 pNew->ctrlFlags = (u8)ctrlFlags; 5251 #if SQLITE_MAX_MMAP_SIZE>0 5252 pNew->mmapSizeMax = sqlite3GlobalConfig.szMmap; 5253 #endif 5254 if( sqlite3_uri_boolean(((ctrlFlags & UNIXFILE_URI) ? zFilename : 0), 5255 "psow", SQLITE_POWERSAFE_OVERWRITE) ){ 5256 pNew->ctrlFlags |= UNIXFILE_PSOW; 5257 } 5258 if( strcmp(pVfs->zName,"unix-excl")==0 ){ 5259 pNew->ctrlFlags |= UNIXFILE_EXCL; 5260 } 5261 5262 #if OS_VXWORKS 5263 pNew->pId = vxworksFindFileId(zFilename); 5264 if( pNew->pId==0 ){ 5265 ctrlFlags |= UNIXFILE_NOLOCK; 5266 rc = SQLITE_NOMEM_BKPT; 5267 } 5268 #endif 5269 5270 if( ctrlFlags & UNIXFILE_NOLOCK ){ 5271 pLockingStyle = &nolockIoMethods; 5272 }else{ 5273 pLockingStyle = (**(finder_type*)pVfs->pAppData)(zFilename, pNew); 5274 #if SQLITE_ENABLE_LOCKING_STYLE 5275 /* Cache zFilename in the locking context (AFP and dotlock override) for 5276 ** proxyLock activation is possible (remote proxy is based on db name) 5277 ** zFilename remains valid until file is closed, to support */ 5278 pNew->lockingContext = (void*)zFilename; 5279 #endif 5280 } 5281 5282 if( pLockingStyle == &posixIoMethods 5283 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 5284 || pLockingStyle == &nfsIoMethods 5285 #endif 5286 ){ 5287 unixEnterMutex(); 5288 rc = findInodeInfo(pNew, &pNew->pInode); 5289 if( rc!=SQLITE_OK ){ 5290 /* If an error occurred in findInodeInfo(), close the file descriptor 5291 ** immediately, before releasing the mutex. findInodeInfo() may fail 5292 ** in two scenarios: 5293 ** 5294 ** (a) A call to fstat() failed. 5295 ** (b) A malloc failed. 5296 ** 5297 ** Scenario (b) may only occur if the process is holding no other 5298 ** file descriptors open on the same file. If there were other file 5299 ** descriptors on this file, then no malloc would be required by 5300 ** findInodeInfo(). If this is the case, it is quite safe to close 5301 ** handle h - as it is guaranteed that no posix locks will be released 5302 ** by doing so. 5303 ** 5304 ** If scenario (a) caused the error then things are not so safe. The 5305 ** implicit assumption here is that if fstat() fails, things are in 5306 ** such bad shape that dropping a lock or two doesn't matter much. 5307 */ 5308 robust_close(pNew, h, __LINE__); 5309 h = -1; 5310 } 5311 unixLeaveMutex(); 5312 } 5313 5314 #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) 5315 else if( pLockingStyle == &afpIoMethods ){ 5316 /* AFP locking uses the file path so it needs to be included in 5317 ** the afpLockingContext. 5318 */ 5319 afpLockingContext *pCtx; 5320 pNew->lockingContext = pCtx = sqlite3_malloc64( sizeof(*pCtx) ); 5321 if( pCtx==0 ){ 5322 rc = SQLITE_NOMEM_BKPT; 5323 }else{ 5324 /* NB: zFilename exists and remains valid until the file is closed 5325 ** according to requirement F11141. So we do not need to make a 5326 ** copy of the filename. */ 5327 pCtx->dbPath = zFilename; 5328 pCtx->reserved = 0; 5329 srandomdev(); 5330 unixEnterMutex(); 5331 rc = findInodeInfo(pNew, &pNew->pInode); 5332 if( rc!=SQLITE_OK ){ 5333 sqlite3_free(pNew->lockingContext); 5334 robust_close(pNew, h, __LINE__); 5335 h = -1; 5336 } 5337 unixLeaveMutex(); 5338 } 5339 } 5340 #endif 5341 5342 else if( pLockingStyle == &dotlockIoMethods ){ 5343 /* Dotfile locking uses the file path so it needs to be included in 5344 ** the dotlockLockingContext 5345 */ 5346 char *zLockFile; 5347 int nFilename; 5348 assert( zFilename!=0 ); 5349 nFilename = (int)strlen(zFilename) + 6; 5350 zLockFile = (char *)sqlite3_malloc64(nFilename); 5351 if( zLockFile==0 ){ 5352 rc = SQLITE_NOMEM_BKPT; 5353 }else{ 5354 sqlite3_snprintf(nFilename, zLockFile, "%s" DOTLOCK_SUFFIX, zFilename); 5355 } 5356 pNew->lockingContext = zLockFile; 5357 } 5358 5359 #if OS_VXWORKS 5360 else if( pLockingStyle == &semIoMethods ){ 5361 /* Named semaphore locking uses the file path so it needs to be 5362 ** included in the semLockingContext 5363 */ 5364 unixEnterMutex(); 5365 rc = findInodeInfo(pNew, &pNew->pInode); 5366 if( (rc==SQLITE_OK) && (pNew->pInode->pSem==NULL) ){ 5367 char *zSemName = pNew->pInode->aSemName; 5368 int n; 5369 sqlite3_snprintf(MAX_PATHNAME, zSemName, "/%s.sem", 5370 pNew->pId->zCanonicalName); 5371 for( n=1; zSemName[n]; n++ ) 5372 if( zSemName[n]=='/' ) zSemName[n] = '_'; 5373 pNew->pInode->pSem = sem_open(zSemName, O_CREAT, 0666, 1); 5374 if( pNew->pInode->pSem == SEM_FAILED ){ 5375 rc = SQLITE_NOMEM_BKPT; 5376 pNew->pInode->aSemName[0] = '\0'; 5377 } 5378 } 5379 unixLeaveMutex(); 5380 } 5381 #endif 5382 5383 storeLastErrno(pNew, 0); 5384 #if OS_VXWORKS 5385 if( rc!=SQLITE_OK ){ 5386 if( h>=0 ) robust_close(pNew, h, __LINE__); 5387 h = -1; 5388 osUnlink(zFilename); 5389 pNew->ctrlFlags |= UNIXFILE_DELETE; 5390 } 5391 #endif 5392 if( rc!=SQLITE_OK ){ 5393 if( h>=0 ) robust_close(pNew, h, __LINE__); 5394 }else{ 5395 pNew->pMethod = pLockingStyle; 5396 OpenCounter(+1); 5397 verifyDbFile(pNew); 5398 } 5399 return rc; 5400 } 5401 5402 /* 5403 ** Return the name of a directory in which to put temporary files. 5404 ** If no suitable temporary file directory can be found, return NULL. 5405 */ 5406 static const char *unixTempFileDir(void){ 5407 static const char *azDirs[] = { 5408 0, 5409 0, 5410 "/var/tmp", 5411 "/usr/tmp", 5412 "/tmp", 5413 "." 5414 }; 5415 unsigned int i = 0; 5416 struct stat buf; 5417 const char *zDir = sqlite3_temp_directory; 5418 5419 if( !azDirs[0] ) azDirs[0] = getenv("SQLITE_TMPDIR"); 5420 if( !azDirs[1] ) azDirs[1] = getenv("TMPDIR"); 5421 while(1){ 5422 if( zDir!=0 5423 && osStat(zDir, &buf)==0 5424 && S_ISDIR(buf.st_mode) 5425 && osAccess(zDir, 03)==0 5426 ){ 5427 return zDir; 5428 } 5429 if( i>=sizeof(azDirs)/sizeof(azDirs[0]) ) break; 5430 zDir = azDirs[i++]; 5431 } 5432 return 0; 5433 } 5434 5435 /* 5436 ** Create a temporary file name in zBuf. zBuf must be allocated 5437 ** by the calling process and must be big enough to hold at least 5438 ** pVfs->mxPathname bytes. 5439 */ 5440 static int unixGetTempname(int nBuf, char *zBuf){ 5441 const char *zDir; 5442 int iLimit = 0; 5443 5444 /* It's odd to simulate an io-error here, but really this is just 5445 ** using the io-error infrastructure to test that SQLite handles this 5446 ** function failing. 5447 */ 5448 zBuf[0] = 0; 5449 SimulateIOError( return SQLITE_IOERR ); 5450 5451 zDir = unixTempFileDir(); 5452 if( zDir==0 ) return SQLITE_IOERR_GETTEMPPATH; 5453 do{ 5454 u64 r; 5455 sqlite3_randomness(sizeof(r), &r); 5456 assert( nBuf>2 ); 5457 zBuf[nBuf-2] = 0; 5458 sqlite3_snprintf(nBuf, zBuf, "%s/"SQLITE_TEMP_FILE_PREFIX"%llx%c", 5459 zDir, r, 0); 5460 if( zBuf[nBuf-2]!=0 || (iLimit++)>10 ) return SQLITE_ERROR; 5461 }while( osAccess(zBuf,0)==0 ); 5462 return SQLITE_OK; 5463 } 5464 5465 #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) 5466 /* 5467 ** Routine to transform a unixFile into a proxy-locking unixFile. 5468 ** Implementation in the proxy-lock division, but used by unixOpen() 5469 ** if SQLITE_PREFER_PROXY_LOCKING is defined. 5470 */ 5471 static int proxyTransformUnixFile(unixFile*, const char*); 5472 #endif 5473 5474 /* 5475 ** Search for an unused file descriptor that was opened on the database 5476 ** file (not a journal or master-journal file) identified by pathname 5477 ** zPath with SQLITE_OPEN_XXX flags matching those passed as the second 5478 ** argument to this function. 5479 ** 5480 ** Such a file descriptor may exist if a database connection was closed 5481 ** but the associated file descriptor could not be closed because some 5482 ** other file descriptor open on the same file is holding a file-lock. 5483 ** Refer to comments in the unixClose() function and the lengthy comment 5484 ** describing "Posix Advisory Locking" at the start of this file for 5485 ** further details. Also, ticket #4018. 5486 ** 5487 ** If a suitable file descriptor is found, then it is returned. If no 5488 ** such file descriptor is located, -1 is returned. 5489 */ 5490 static UnixUnusedFd *findReusableFd(const char *zPath, int flags){ 5491 UnixUnusedFd *pUnused = 0; 5492 5493 /* Do not search for an unused file descriptor on vxworks. Not because 5494 ** vxworks would not benefit from the change (it might, we're not sure), 5495 ** but because no way to test it is currently available. It is better 5496 ** not to risk breaking vxworks support for the sake of such an obscure 5497 ** feature. */ 5498 #if !OS_VXWORKS 5499 struct stat sStat; /* Results of stat() call */ 5500 5501 /* A stat() call may fail for various reasons. If this happens, it is 5502 ** almost certain that an open() call on the same path will also fail. 5503 ** For this reason, if an error occurs in the stat() call here, it is 5504 ** ignored and -1 is returned. The caller will try to open a new file 5505 ** descriptor on the same path, fail, and return an error to SQLite. 5506 ** 5507 ** Even if a subsequent open() call does succeed, the consequences of 5508 ** not searching for a reusable file descriptor are not dire. */ 5509 if( 0==osStat(zPath, &sStat) ){ 5510 unixInodeInfo *pInode; 5511 5512 unixEnterMutex(); 5513 pInode = inodeList; 5514 while( pInode && (pInode->fileId.dev!=sStat.st_dev 5515 || pInode->fileId.ino!=sStat.st_ino) ){ 5516 pInode = pInode->pNext; 5517 } 5518 if( pInode ){ 5519 UnixUnusedFd **pp; 5520 for(pp=&pInode->pUnused; *pp && (*pp)->flags!=flags; pp=&((*pp)->pNext)); 5521 pUnused = *pp; 5522 if( pUnused ){ 5523 *pp = pUnused->pNext; 5524 } 5525 } 5526 unixLeaveMutex(); 5527 } 5528 #endif /* if !OS_VXWORKS */ 5529 return pUnused; 5530 } 5531 5532 /* 5533 ** Find the mode, uid and gid of file zFile. 5534 */ 5535 static int getFileMode( 5536 const char *zFile, /* File name */ 5537 mode_t *pMode, /* OUT: Permissions of zFile */ 5538 uid_t *pUid, /* OUT: uid of zFile. */ 5539 gid_t *pGid /* OUT: gid of zFile. */ 5540 ){ 5541 struct stat sStat; /* Output of stat() on database file */ 5542 int rc = SQLITE_OK; 5543 if( 0==osStat(zFile, &sStat) ){ 5544 *pMode = sStat.st_mode & 0777; 5545 *pUid = sStat.st_uid; 5546 *pGid = sStat.st_gid; 5547 }else{ 5548 rc = SQLITE_IOERR_FSTAT; 5549 } 5550 return rc; 5551 } 5552 5553 /* 5554 ** This function is called by unixOpen() to determine the unix permissions 5555 ** to create new files with. If no error occurs, then SQLITE_OK is returned 5556 ** and a value suitable for passing as the third argument to open(2) is 5557 ** written to *pMode. If an IO error occurs, an SQLite error code is 5558 ** returned and the value of *pMode is not modified. 5559 ** 5560 ** In most cases, this routine sets *pMode to 0, which will become 5561 ** an indication to robust_open() to create the file using 5562 ** SQLITE_DEFAULT_FILE_PERMISSIONS adjusted by the umask. 5563 ** But if the file being opened is a WAL or regular journal file, then 5564 ** this function queries the file-system for the permissions on the 5565 ** corresponding database file and sets *pMode to this value. Whenever 5566 ** possible, WAL and journal files are created using the same permissions 5567 ** as the associated database file. 5568 ** 5569 ** If the SQLITE_ENABLE_8_3_NAMES option is enabled, then the 5570 ** original filename is unavailable. But 8_3_NAMES is only used for 5571 ** FAT filesystems and permissions do not matter there, so just use 5572 ** the default permissions. 5573 */ 5574 static int findCreateFileMode( 5575 const char *zPath, /* Path of file (possibly) being created */ 5576 int flags, /* Flags passed as 4th argument to xOpen() */ 5577 mode_t *pMode, /* OUT: Permissions to open file with */ 5578 uid_t *pUid, /* OUT: uid to set on the file */ 5579 gid_t *pGid /* OUT: gid to set on the file */ 5580 ){ 5581 int rc = SQLITE_OK; /* Return Code */ 5582 *pMode = 0; 5583 *pUid = 0; 5584 *pGid = 0; 5585 if( flags & (SQLITE_OPEN_WAL|SQLITE_OPEN_MAIN_JOURNAL) ){ 5586 char zDb[MAX_PATHNAME+1]; /* Database file path */ 5587 int nDb; /* Number of valid bytes in zDb */ 5588 5589 /* zPath is a path to a WAL or journal file. The following block derives 5590 ** the path to the associated database file from zPath. This block handles 5591 ** the following naming conventions: 5592 ** 5593 ** "<path to db>-journal" 5594 ** "<path to db>-wal" 5595 ** "<path to db>-journalNN" 5596 ** "<path to db>-walNN" 5597 ** 5598 ** where NN is a decimal number. The NN naming schemes are 5599 ** used by the test_multiplex.c module. 5600 */ 5601 nDb = sqlite3Strlen30(zPath) - 1; 5602 while( zPath[nDb]!='-' ){ 5603 #ifndef SQLITE_ENABLE_8_3_NAMES 5604 /* In the normal case (8+3 filenames disabled) the journal filename 5605 ** is guaranteed to contain a '-' character. */ 5606 assert( nDb>0 ); 5607 assert( sqlite3Isalnum(zPath[nDb]) ); 5608 #else 5609 /* If 8+3 names are possible, then the journal file might not contain 5610 ** a '-' character. So check for that case and return early. */ 5611 if( nDb==0 || zPath[nDb]=='.' ) return SQLITE_OK; 5612 #endif 5613 nDb--; 5614 } 5615 memcpy(zDb, zPath, nDb); 5616 zDb[nDb] = '\0'; 5617 5618 rc = getFileMode(zDb, pMode, pUid, pGid); 5619 }else if( flags & SQLITE_OPEN_DELETEONCLOSE ){ 5620 *pMode = 0600; 5621 }else if( flags & SQLITE_OPEN_URI ){ 5622 /* If this is a main database file and the file was opened using a URI 5623 ** filename, check for the "modeof" parameter. If present, interpret 5624 ** its value as a filename and try to copy the mode, uid and gid from 5625 ** that file. */ 5626 const char *z = sqlite3_uri_parameter(zPath, "modeof"); 5627 if( z ){ 5628 rc = getFileMode(z, pMode, pUid, pGid); 5629 } 5630 } 5631 return rc; 5632 } 5633 5634 /* 5635 ** Open the file zPath. 5636 ** 5637 ** Previously, the SQLite OS layer used three functions in place of this 5638 ** one: 5639 ** 5640 ** sqlite3OsOpenReadWrite(); 5641 ** sqlite3OsOpenReadOnly(); 5642 ** sqlite3OsOpenExclusive(); 5643 ** 5644 ** These calls correspond to the following combinations of flags: 5645 ** 5646 ** ReadWrite() -> (READWRITE | CREATE) 5647 ** ReadOnly() -> (READONLY) 5648 ** OpenExclusive() -> (READWRITE | CREATE | EXCLUSIVE) 5649 ** 5650 ** The old OpenExclusive() accepted a boolean argument - "delFlag". If 5651 ** true, the file was configured to be automatically deleted when the 5652 ** file handle closed. To achieve the same effect using this new 5653 ** interface, add the DELETEONCLOSE flag to those specified above for 5654 ** OpenExclusive(). 5655 */ 5656 static int unixOpen( 5657 sqlite3_vfs *pVfs, /* The VFS for which this is the xOpen method */ 5658 const char *zPath, /* Pathname of file to be opened */ 5659 sqlite3_file *pFile, /* The file descriptor to be filled in */ 5660 int flags, /* Input flags to control the opening */ 5661 int *pOutFlags /* Output flags returned to SQLite core */ 5662 ){ 5663 unixFile *p = (unixFile *)pFile; 5664 int fd = -1; /* File descriptor returned by open() */ 5665 int openFlags = 0; /* Flags to pass to open() */ 5666 int eType = flags&0xFFFFFF00; /* Type of file to open */ 5667 int noLock; /* True to omit locking primitives */ 5668 int rc = SQLITE_OK; /* Function Return Code */ 5669 int ctrlFlags = 0; /* UNIXFILE_* flags */ 5670 5671 int isExclusive = (flags & SQLITE_OPEN_EXCLUSIVE); 5672 int isDelete = (flags & SQLITE_OPEN_DELETEONCLOSE); 5673 int isCreate = (flags & SQLITE_OPEN_CREATE); 5674 int isReadonly = (flags & SQLITE_OPEN_READONLY); 5675 int isReadWrite = (flags & SQLITE_OPEN_READWRITE); 5676 #if SQLITE_ENABLE_LOCKING_STYLE 5677 int isAutoProxy = (flags & SQLITE_OPEN_AUTOPROXY); 5678 #endif 5679 #if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE 5680 struct statfs fsInfo; 5681 #endif 5682 5683 /* If creating a master or main-file journal, this function will open 5684 ** a file-descriptor on the directory too. The first time unixSync() 5685 ** is called the directory file descriptor will be fsync()ed and close()d. 5686 */ 5687 int syncDir = (isCreate && ( 5688 eType==SQLITE_OPEN_MASTER_JOURNAL 5689 || eType==SQLITE_OPEN_MAIN_JOURNAL 5690 || eType==SQLITE_OPEN_WAL 5691 )); 5692 5693 /* If argument zPath is a NULL pointer, this function is required to open 5694 ** a temporary file. Use this buffer to store the file name in. 5695 */ 5696 char zTmpname[MAX_PATHNAME+2]; 5697 const char *zName = zPath; 5698 5699 /* Check the following statements are true: 5700 ** 5701 ** (a) Exactly one of the READWRITE and READONLY flags must be set, and 5702 ** (b) if CREATE is set, then READWRITE must also be set, and 5703 ** (c) if EXCLUSIVE is set, then CREATE must also be set. 5704 ** (d) if DELETEONCLOSE is set, then CREATE must also be set. 5705 */ 5706 assert((isReadonly==0 || isReadWrite==0) && (isReadWrite || isReadonly)); 5707 assert(isCreate==0 || isReadWrite); 5708 assert(isExclusive==0 || isCreate); 5709 assert(isDelete==0 || isCreate); 5710 5711 /* The main DB, main journal, WAL file and master journal are never 5712 ** automatically deleted. Nor are they ever temporary files. */ 5713 assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MAIN_DB ); 5714 assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MAIN_JOURNAL ); 5715 assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MASTER_JOURNAL ); 5716 assert( (!isDelete && zName) || eType!=SQLITE_OPEN_WAL ); 5717 5718 /* Assert that the upper layer has set one of the "file-type" flags. */ 5719 assert( eType==SQLITE_OPEN_MAIN_DB || eType==SQLITE_OPEN_TEMP_DB 5720 || eType==SQLITE_OPEN_MAIN_JOURNAL || eType==SQLITE_OPEN_TEMP_JOURNAL 5721 || eType==SQLITE_OPEN_SUBJOURNAL || eType==SQLITE_OPEN_MASTER_JOURNAL 5722 || eType==SQLITE_OPEN_TRANSIENT_DB || eType==SQLITE_OPEN_WAL 5723 ); 5724 5725 /* Detect a pid change and reset the PRNG. There is a race condition 5726 ** here such that two or more threads all trying to open databases at 5727 ** the same instant might all reset the PRNG. But multiple resets 5728 ** are harmless. 5729 */ 5730 if( randomnessPid!=osGetpid(0) ){ 5731 randomnessPid = osGetpid(0); 5732 sqlite3_randomness(0,0); 5733 } 5734 5735 memset(p, 0, sizeof(unixFile)); 5736 5737 if( eType==SQLITE_OPEN_MAIN_DB ){ 5738 UnixUnusedFd *pUnused; 5739 pUnused = findReusableFd(zName, flags); 5740 if( pUnused ){ 5741 fd = pUnused->fd; 5742 }else{ 5743 pUnused = sqlite3_malloc64(sizeof(*pUnused)); 5744 if( !pUnused ){ 5745 return SQLITE_NOMEM_BKPT; 5746 } 5747 } 5748 p->pUnused = pUnused; 5749 5750 /* Database filenames are double-zero terminated if they are not 5751 ** URIs with parameters. Hence, they can always be passed into 5752 ** sqlite3_uri_parameter(). */ 5753 assert( (flags & SQLITE_OPEN_URI) || zName[strlen(zName)+1]==0 ); 5754 5755 }else if( !zName ){ 5756 /* If zName is NULL, the upper layer is requesting a temp file. */ 5757 assert(isDelete && !syncDir); 5758 rc = unixGetTempname(pVfs->mxPathname, zTmpname); 5759 if( rc!=SQLITE_OK ){ 5760 return rc; 5761 } 5762 zName = zTmpname; 5763 5764 /* Generated temporary filenames are always double-zero terminated 5765 ** for use by sqlite3_uri_parameter(). */ 5766 assert( zName[strlen(zName)+1]==0 ); 5767 } 5768 5769 /* Determine the value of the flags parameter passed to POSIX function 5770 ** open(). These must be calculated even if open() is not called, as 5771 ** they may be stored as part of the file handle and used by the 5772 ** 'conch file' locking functions later on. */ 5773 if( isReadonly ) openFlags |= O_RDONLY; 5774 if( isReadWrite ) openFlags |= O_RDWR; 5775 if( isCreate ) openFlags |= O_CREAT; 5776 if( isExclusive ) openFlags |= (O_EXCL|O_NOFOLLOW); 5777 openFlags |= (O_LARGEFILE|O_BINARY); 5778 5779 if( fd<0 ){ 5780 mode_t openMode; /* Permissions to create file with */ 5781 uid_t uid; /* Userid for the file */ 5782 gid_t gid; /* Groupid for the file */ 5783 rc = findCreateFileMode(zName, flags, &openMode, &uid, &gid); 5784 if( rc!=SQLITE_OK ){ 5785 assert( !p->pUnused ); 5786 assert( eType==SQLITE_OPEN_WAL || eType==SQLITE_OPEN_MAIN_JOURNAL ); 5787 return rc; 5788 } 5789 fd = robust_open(zName, openFlags, openMode); 5790 OSTRACE(("OPENX %-3d %s 0%o\n", fd, zName, openFlags)); 5791 assert( !isExclusive || (openFlags & O_CREAT)!=0 ); 5792 if( fd<0 && errno!=EISDIR && isReadWrite ){ 5793 /* Failed to open the file for read/write access. Try read-only. */ 5794 flags &= ~(SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE); 5795 openFlags &= ~(O_RDWR|O_CREAT); 5796 flags |= SQLITE_OPEN_READONLY; 5797 openFlags |= O_RDONLY; 5798 isReadonly = 1; 5799 fd = robust_open(zName, openFlags, openMode); 5800 } 5801 if( fd<0 ){ 5802 rc = unixLogError(SQLITE_CANTOPEN_BKPT, "open", zName); 5803 goto open_finished; 5804 } 5805 5806 /* If this process is running as root and if creating a new rollback 5807 ** journal or WAL file, set the ownership of the journal or WAL to be 5808 ** the same as the original database. 5809 */ 5810 if( flags & (SQLITE_OPEN_WAL|SQLITE_OPEN_MAIN_JOURNAL) ){ 5811 robustFchown(fd, uid, gid); 5812 } 5813 } 5814 assert( fd>=0 ); 5815 if( pOutFlags ){ 5816 *pOutFlags = flags; 5817 } 5818 5819 if( p->pUnused ){ 5820 p->pUnused->fd = fd; 5821 p->pUnused->flags = flags; 5822 } 5823 5824 if( isDelete ){ 5825 #if OS_VXWORKS 5826 zPath = zName; 5827 #elif defined(SQLITE_UNLINK_AFTER_CLOSE) 5828 zPath = sqlite3_mprintf("%s", zName); 5829 if( zPath==0 ){ 5830 robust_close(p, fd, __LINE__); 5831 return SQLITE_NOMEM_BKPT; 5832 } 5833 #else 5834 osUnlink(zName); 5835 #endif 5836 } 5837 #if SQLITE_ENABLE_LOCKING_STYLE 5838 else{ 5839 p->openFlags = openFlags; 5840 } 5841 #endif 5842 5843 #if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE 5844 if( fstatfs(fd, &fsInfo) == -1 ){ 5845 storeLastErrno(p, errno); 5846 robust_close(p, fd, __LINE__); 5847 return SQLITE_IOERR_ACCESS; 5848 } 5849 if (0 == strncmp("msdos", fsInfo.f_fstypename, 5)) { 5850 ((unixFile*)pFile)->fsFlags |= SQLITE_FSFLAGS_IS_MSDOS; 5851 } 5852 if (0 == strncmp("exfat", fsInfo.f_fstypename, 5)) { 5853 ((unixFile*)pFile)->fsFlags |= SQLITE_FSFLAGS_IS_MSDOS; 5854 } 5855 #endif 5856 5857 /* Set up appropriate ctrlFlags */ 5858 if( isDelete ) ctrlFlags |= UNIXFILE_DELETE; 5859 if( isReadonly ) ctrlFlags |= UNIXFILE_RDONLY; 5860 noLock = eType!=SQLITE_OPEN_MAIN_DB; 5861 if( noLock ) ctrlFlags |= UNIXFILE_NOLOCK; 5862 if( syncDir ) ctrlFlags |= UNIXFILE_DIRSYNC; 5863 if( flags & SQLITE_OPEN_URI ) ctrlFlags |= UNIXFILE_URI; 5864 5865 #if SQLITE_ENABLE_LOCKING_STYLE 5866 #if SQLITE_PREFER_PROXY_LOCKING 5867 isAutoProxy = 1; 5868 #endif 5869 if( isAutoProxy && (zPath!=NULL) && (!noLock) && pVfs->xOpen ){ 5870 char *envforce = getenv("SQLITE_FORCE_PROXY_LOCKING"); 5871 int useProxy = 0; 5872 5873 /* SQLITE_FORCE_PROXY_LOCKING==1 means force always use proxy, 0 means 5874 ** never use proxy, NULL means use proxy for non-local files only. */ 5875 if( envforce!=NULL ){ 5876 useProxy = atoi(envforce)>0; 5877 }else{ 5878 useProxy = !(fsInfo.f_flags&MNT_LOCAL); 5879 } 5880 if( useProxy ){ 5881 rc = fillInUnixFile(pVfs, fd, pFile, zPath, ctrlFlags); 5882 if( rc==SQLITE_OK ){ 5883 rc = proxyTransformUnixFile((unixFile*)pFile, ":auto:"); 5884 if( rc!=SQLITE_OK ){ 5885 /* Use unixClose to clean up the resources added in fillInUnixFile 5886 ** and clear all the structure's references. Specifically, 5887 ** pFile->pMethods will be NULL so sqlite3OsClose will be a no-op 5888 */ 5889 unixClose(pFile); 5890 return rc; 5891 } 5892 } 5893 goto open_finished; 5894 } 5895 } 5896 #endif 5897 5898 rc = fillInUnixFile(pVfs, fd, pFile, zPath, ctrlFlags); 5899 5900 open_finished: 5901 if( rc!=SQLITE_OK ){ 5902 sqlite3_free(p->pUnused); 5903 } 5904 return rc; 5905 } 5906 5907 5908 /* 5909 ** Delete the file at zPath. If the dirSync argument is true, fsync() 5910 ** the directory after deleting the file. 5911 */ 5912 static int unixDelete( 5913 sqlite3_vfs *NotUsed, /* VFS containing this as the xDelete method */ 5914 const char *zPath, /* Name of file to be deleted */ 5915 int dirSync /* If true, fsync() directory after deleting file */ 5916 ){ 5917 int rc = SQLITE_OK; 5918 UNUSED_PARAMETER(NotUsed); 5919 SimulateIOError(return SQLITE_IOERR_DELETE); 5920 if( osUnlink(zPath)==(-1) ){ 5921 if( errno==ENOENT 5922 #if OS_VXWORKS 5923 || osAccess(zPath,0)!=0 5924 #endif 5925 ){ 5926 rc = SQLITE_IOERR_DELETE_NOENT; 5927 }else{ 5928 rc = unixLogError(SQLITE_IOERR_DELETE, "unlink", zPath); 5929 } 5930 return rc; 5931 } 5932 #ifndef SQLITE_DISABLE_DIRSYNC 5933 if( (dirSync & 1)!=0 ){ 5934 int fd; 5935 rc = osOpenDirectory(zPath, &fd); 5936 if( rc==SQLITE_OK ){ 5937 if( full_fsync(fd,0,0) ){ 5938 rc = unixLogError(SQLITE_IOERR_DIR_FSYNC, "fsync", zPath); 5939 } 5940 robust_close(0, fd, __LINE__); 5941 }else{ 5942 assert( rc==SQLITE_CANTOPEN ); 5943 rc = SQLITE_OK; 5944 } 5945 } 5946 #endif 5947 return rc; 5948 } 5949 5950 /* 5951 ** Test the existence of or access permissions of file zPath. The 5952 ** test performed depends on the value of flags: 5953 ** 5954 ** SQLITE_ACCESS_EXISTS: Return 1 if the file exists 5955 ** SQLITE_ACCESS_READWRITE: Return 1 if the file is read and writable. 5956 ** SQLITE_ACCESS_READONLY: Return 1 if the file is readable. 5957 ** 5958 ** Otherwise return 0. 5959 */ 5960 static int unixAccess( 5961 sqlite3_vfs *NotUsed, /* The VFS containing this xAccess method */ 5962 const char *zPath, /* Path of the file to examine */ 5963 int flags, /* What do we want to learn about the zPath file? */ 5964 int *pResOut /* Write result boolean here */ 5965 ){ 5966 UNUSED_PARAMETER(NotUsed); 5967 SimulateIOError( return SQLITE_IOERR_ACCESS; ); 5968 assert( pResOut!=0 ); 5969 5970 /* The spec says there are three possible values for flags. But only 5971 ** two of them are actually used */ 5972 assert( flags==SQLITE_ACCESS_EXISTS || flags==SQLITE_ACCESS_READWRITE ); 5973 5974 if( flags==SQLITE_ACCESS_EXISTS ){ 5975 struct stat buf; 5976 *pResOut = (0==osStat(zPath, &buf) && buf.st_size>0); 5977 }else{ 5978 *pResOut = osAccess(zPath, W_OK|R_OK)==0; 5979 } 5980 return SQLITE_OK; 5981 } 5982 5983 /* 5984 ** 5985 */ 5986 static int mkFullPathname( 5987 const char *zPath, /* Input path */ 5988 char *zOut, /* Output buffer */ 5989 int nOut /* Allocated size of buffer zOut */ 5990 ){ 5991 int nPath = sqlite3Strlen30(zPath); 5992 int iOff = 0; 5993 if( zPath[0]!='/' ){ 5994 if( osGetcwd(zOut, nOut-2)==0 ){ 5995 return unixLogError(SQLITE_CANTOPEN_BKPT, "getcwd", zPath); 5996 } 5997 iOff = sqlite3Strlen30(zOut); 5998 zOut[iOff++] = '/'; 5999 } 6000 if( (iOff+nPath+1)>nOut ){ 6001 /* SQLite assumes that xFullPathname() nul-terminates the output buffer 6002 ** even if it returns an error. */ 6003 zOut[iOff] = '\0'; 6004 return SQLITE_CANTOPEN_BKPT; 6005 } 6006 sqlite3_snprintf(nOut-iOff, &zOut[iOff], "%s", zPath); 6007 return SQLITE_OK; 6008 } 6009 6010 /* 6011 ** Turn a relative pathname into a full pathname. The relative path 6012 ** is stored as a nul-terminated string in the buffer pointed to by 6013 ** zPath. 6014 ** 6015 ** zOut points to a buffer of at least sqlite3_vfs.mxPathname bytes 6016 ** (in this case, MAX_PATHNAME bytes). The full-path is written to 6017 ** this buffer before returning. 6018 */ 6019 static int unixFullPathname( 6020 sqlite3_vfs *pVfs, /* Pointer to vfs object */ 6021 const char *zPath, /* Possibly relative input path */ 6022 int nOut, /* Size of output buffer in bytes */ 6023 char *zOut /* Output buffer */ 6024 ){ 6025 #if !defined(HAVE_READLINK) || !defined(HAVE_LSTAT) 6026 return mkFullPathname(zPath, zOut, nOut); 6027 #else 6028 int rc = SQLITE_OK; 6029 int nByte; 6030 int nLink = 1; /* Number of symbolic links followed so far */ 6031 const char *zIn = zPath; /* Input path for each iteration of loop */ 6032 char *zDel = 0; 6033 6034 assert( pVfs->mxPathname==MAX_PATHNAME ); 6035 UNUSED_PARAMETER(pVfs); 6036 6037 /* It's odd to simulate an io-error here, but really this is just 6038 ** using the io-error infrastructure to test that SQLite handles this 6039 ** function failing. This function could fail if, for example, the 6040 ** current working directory has been unlinked. 6041 */ 6042 SimulateIOError( return SQLITE_ERROR ); 6043 6044 do { 6045 6046 /* Call stat() on path zIn. Set bLink to true if the path is a symbolic 6047 ** link, or false otherwise. */ 6048 int bLink = 0; 6049 struct stat buf; 6050 if( osLstat(zIn, &buf)!=0 ){ 6051 if( errno!=ENOENT ){ 6052 rc = unixLogError(SQLITE_CANTOPEN_BKPT, "lstat", zIn); 6053 } 6054 }else{ 6055 bLink = S_ISLNK(buf.st_mode); 6056 } 6057 6058 if( bLink ){ 6059 if( zDel==0 ){ 6060 zDel = sqlite3_malloc(nOut); 6061 if( zDel==0 ) rc = SQLITE_NOMEM_BKPT; 6062 }else if( ++nLink>SQLITE_MAX_SYMLINKS ){ 6063 rc = SQLITE_CANTOPEN_BKPT; 6064 } 6065 6066 if( rc==SQLITE_OK ){ 6067 nByte = osReadlink(zIn, zDel, nOut-1); 6068 if( nByte<0 ){ 6069 rc = unixLogError(SQLITE_CANTOPEN_BKPT, "readlink", zIn); 6070 }else{ 6071 if( zDel[0]!='/' ){ 6072 int n; 6073 for(n = sqlite3Strlen30(zIn); n>0 && zIn[n-1]!='/'; n--); 6074 if( nByte+n+1>nOut ){ 6075 rc = SQLITE_CANTOPEN_BKPT; 6076 }else{ 6077 memmove(&zDel[n], zDel, nByte+1); 6078 memcpy(zDel, zIn, n); 6079 nByte += n; 6080 } 6081 } 6082 zDel[nByte] = '\0'; 6083 } 6084 } 6085 6086 zIn = zDel; 6087 } 6088 6089 assert( rc!=SQLITE_OK || zIn!=zOut || zIn[0]=='/' ); 6090 if( rc==SQLITE_OK && zIn!=zOut ){ 6091 rc = mkFullPathname(zIn, zOut, nOut); 6092 } 6093 if( bLink==0 ) break; 6094 zIn = zOut; 6095 }while( rc==SQLITE_OK ); 6096 6097 sqlite3_free(zDel); 6098 return rc; 6099 #endif /* HAVE_READLINK && HAVE_LSTAT */ 6100 } 6101 6102 6103 #ifndef SQLITE_OMIT_LOAD_EXTENSION 6104 /* 6105 ** Interfaces for opening a shared library, finding entry points 6106 ** within the shared library, and closing the shared library. 6107 */ 6108 #include <dlfcn.h> 6109 static void *unixDlOpen(sqlite3_vfs *NotUsed, const char *zFilename){ 6110 UNUSED_PARAMETER(NotUsed); 6111 return dlopen(zFilename, RTLD_NOW | RTLD_GLOBAL); 6112 } 6113 6114 /* 6115 ** SQLite calls this function immediately after a call to unixDlSym() or 6116 ** unixDlOpen() fails (returns a null pointer). If a more detailed error 6117 ** message is available, it is written to zBufOut. If no error message 6118 ** is available, zBufOut is left unmodified and SQLite uses a default 6119 ** error message. 6120 */ 6121 static void unixDlError(sqlite3_vfs *NotUsed, int nBuf, char *zBufOut){ 6122 const char *zErr; 6123 UNUSED_PARAMETER(NotUsed); 6124 unixEnterMutex(); 6125 zErr = dlerror(); 6126 if( zErr ){ 6127 sqlite3_snprintf(nBuf, zBufOut, "%s", zErr); 6128 } 6129 unixLeaveMutex(); 6130 } 6131 static void (*unixDlSym(sqlite3_vfs *NotUsed, void *p, const char*zSym))(void){ 6132 /* 6133 ** GCC with -pedantic-errors says that C90 does not allow a void* to be 6134 ** cast into a pointer to a function. And yet the library dlsym() routine 6135 ** returns a void* which is really a pointer to a function. So how do we 6136 ** use dlsym() with -pedantic-errors? 6137 ** 6138 ** Variable x below is defined to be a pointer to a function taking 6139 ** parameters void* and const char* and returning a pointer to a function. 6140 ** We initialize x by assigning it a pointer to the dlsym() function. 6141 ** (That assignment requires a cast.) Then we call the function that 6142 ** x points to. 6143 ** 6144 ** This work-around is unlikely to work correctly on any system where 6145 ** you really cannot cast a function pointer into void*. But then, on the 6146 ** other hand, dlsym() will not work on such a system either, so we have 6147 ** not really lost anything. 6148 */ 6149 void (*(*x)(void*,const char*))(void); 6150 UNUSED_PARAMETER(NotUsed); 6151 x = (void(*(*)(void*,const char*))(void))dlsym; 6152 return (*x)(p, zSym); 6153 } 6154 static void unixDlClose(sqlite3_vfs *NotUsed, void *pHandle){ 6155 UNUSED_PARAMETER(NotUsed); 6156 dlclose(pHandle); 6157 } 6158 #else /* if SQLITE_OMIT_LOAD_EXTENSION is defined: */ 6159 #define unixDlOpen 0 6160 #define unixDlError 0 6161 #define unixDlSym 0 6162 #define unixDlClose 0 6163 #endif 6164 6165 /* 6166 ** Write nBuf bytes of random data to the supplied buffer zBuf. 6167 */ 6168 static int unixRandomness(sqlite3_vfs *NotUsed, int nBuf, char *zBuf){ 6169 UNUSED_PARAMETER(NotUsed); 6170 assert((size_t)nBuf>=(sizeof(time_t)+sizeof(int))); 6171 6172 /* We have to initialize zBuf to prevent valgrind from reporting 6173 ** errors. The reports issued by valgrind are incorrect - we would 6174 ** prefer that the randomness be increased by making use of the 6175 ** uninitialized space in zBuf - but valgrind errors tend to worry 6176 ** some users. Rather than argue, it seems easier just to initialize 6177 ** the whole array and silence valgrind, even if that means less randomness 6178 ** in the random seed. 6179 ** 6180 ** When testing, initializing zBuf[] to zero is all we do. That means 6181 ** that we always use the same random number sequence. This makes the 6182 ** tests repeatable. 6183 */ 6184 memset(zBuf, 0, nBuf); 6185 randomnessPid = osGetpid(0); 6186 #if !defined(SQLITE_TEST) && !defined(SQLITE_OMIT_RANDOMNESS) 6187 { 6188 int fd, got; 6189 fd = robust_open("/dev/urandom", O_RDONLY, 0); 6190 if( fd<0 ){ 6191 time_t t; 6192 time(&t); 6193 memcpy(zBuf, &t, sizeof(t)); 6194 memcpy(&zBuf[sizeof(t)], &randomnessPid, sizeof(randomnessPid)); 6195 assert( sizeof(t)+sizeof(randomnessPid)<=(size_t)nBuf ); 6196 nBuf = sizeof(t) + sizeof(randomnessPid); 6197 }else{ 6198 do{ got = osRead(fd, zBuf, nBuf); }while( got<0 && errno==EINTR ); 6199 robust_close(0, fd, __LINE__); 6200 } 6201 } 6202 #endif 6203 return nBuf; 6204 } 6205 6206 6207 /* 6208 ** Sleep for a little while. Return the amount of time slept. 6209 ** The argument is the number of microseconds we want to sleep. 6210 ** The return value is the number of microseconds of sleep actually 6211 ** requested from the underlying operating system, a number which 6212 ** might be greater than or equal to the argument, but not less 6213 ** than the argument. 6214 */ 6215 static int unixSleep(sqlite3_vfs *NotUsed, int microseconds){ 6216 #if OS_VXWORKS 6217 struct timespec sp; 6218 6219 sp.tv_sec = microseconds / 1000000; 6220 sp.tv_nsec = (microseconds % 1000000) * 1000; 6221 nanosleep(&sp, NULL); 6222 UNUSED_PARAMETER(NotUsed); 6223 return microseconds; 6224 #elif defined(HAVE_USLEEP) && HAVE_USLEEP 6225 usleep(microseconds); 6226 UNUSED_PARAMETER(NotUsed); 6227 return microseconds; 6228 #else 6229 int seconds = (microseconds+999999)/1000000; 6230 sleep(seconds); 6231 UNUSED_PARAMETER(NotUsed); 6232 return seconds*1000000; 6233 #endif 6234 } 6235 6236 /* 6237 ** The following variable, if set to a non-zero value, is interpreted as 6238 ** the number of seconds since 1970 and is used to set the result of 6239 ** sqlite3OsCurrentTime() during testing. 6240 */ 6241 #ifdef SQLITE_TEST 6242 int sqlite3_current_time = 0; /* Fake system time in seconds since 1970. */ 6243 #endif 6244 6245 /* 6246 ** Find the current time (in Universal Coordinated Time). Write into *piNow 6247 ** the current time and date as a Julian Day number times 86_400_000. In 6248 ** other words, write into *piNow the number of milliseconds since the Julian 6249 ** epoch of noon in Greenwich on November 24, 4714 B.C according to the 6250 ** proleptic Gregorian calendar. 6251 ** 6252 ** On success, return SQLITE_OK. Return SQLITE_ERROR if the time and date 6253 ** cannot be found. 6254 */ 6255 static int unixCurrentTimeInt64(sqlite3_vfs *NotUsed, sqlite3_int64 *piNow){ 6256 static const sqlite3_int64 unixEpoch = 24405875*(sqlite3_int64)8640000; 6257 int rc = SQLITE_OK; 6258 #if defined(NO_GETTOD) 6259 time_t t; 6260 time(&t); 6261 *piNow = ((sqlite3_int64)t)*1000 + unixEpoch; 6262 #elif OS_VXWORKS 6263 struct timespec sNow; 6264 clock_gettime(CLOCK_REALTIME, &sNow); 6265 *piNow = unixEpoch + 1000*(sqlite3_int64)sNow.tv_sec + sNow.tv_nsec/1000000; 6266 #else 6267 struct timeval sNow; 6268 (void)gettimeofday(&sNow, 0); /* Cannot fail given valid arguments */ 6269 *piNow = unixEpoch + 1000*(sqlite3_int64)sNow.tv_sec + sNow.tv_usec/1000; 6270 #endif 6271 6272 #ifdef SQLITE_TEST 6273 if( sqlite3_current_time ){ 6274 *piNow = 1000*(sqlite3_int64)sqlite3_current_time + unixEpoch; 6275 } 6276 #endif 6277 UNUSED_PARAMETER(NotUsed); 6278 return rc; 6279 } 6280 6281 #ifndef SQLITE_OMIT_DEPRECATED 6282 /* 6283 ** Find the current time (in Universal Coordinated Time). Write the 6284 ** current time and date as a Julian Day number into *prNow and 6285 ** return 0. Return 1 if the time and date cannot be found. 6286 */ 6287 static int unixCurrentTime(sqlite3_vfs *NotUsed, double *prNow){ 6288 sqlite3_int64 i = 0; 6289 int rc; 6290 UNUSED_PARAMETER(NotUsed); 6291 rc = unixCurrentTimeInt64(0, &i); 6292 *prNow = i/86400000.0; 6293 return rc; 6294 } 6295 #else 6296 # define unixCurrentTime 0 6297 #endif 6298 6299 /* 6300 ** The xGetLastError() method is designed to return a better 6301 ** low-level error message when operating-system problems come up 6302 ** during SQLite operation. Only the integer return code is currently 6303 ** used. 6304 */ 6305 static int unixGetLastError(sqlite3_vfs *NotUsed, int NotUsed2, char *NotUsed3){ 6306 UNUSED_PARAMETER(NotUsed); 6307 UNUSED_PARAMETER(NotUsed2); 6308 UNUSED_PARAMETER(NotUsed3); 6309 return errno; 6310 } 6311 6312 6313 /* 6314 ************************ End of sqlite3_vfs methods *************************** 6315 ******************************************************************************/ 6316 6317 /****************************************************************************** 6318 ************************** Begin Proxy Locking ******************************** 6319 ** 6320 ** Proxy locking is a "uber-locking-method" in this sense: It uses the 6321 ** other locking methods on secondary lock files. Proxy locking is a 6322 ** meta-layer over top of the primitive locking implemented above. For 6323 ** this reason, the division that implements of proxy locking is deferred 6324 ** until late in the file (here) after all of the other I/O methods have 6325 ** been defined - so that the primitive locking methods are available 6326 ** as services to help with the implementation of proxy locking. 6327 ** 6328 **** 6329 ** 6330 ** The default locking schemes in SQLite use byte-range locks on the 6331 ** database file to coordinate safe, concurrent access by multiple readers 6332 ** and writers [http://sqlite.org/lockingv3.html]. The five file locking 6333 ** states (UNLOCKED, PENDING, SHARED, RESERVED, EXCLUSIVE) are implemented 6334 ** as POSIX read & write locks over fixed set of locations (via fsctl), 6335 ** on AFP and SMB only exclusive byte-range locks are available via fsctl 6336 ** with _IOWR('z', 23, struct ByteRangeLockPB2) to track the same 5 states. 6337 ** To simulate a F_RDLCK on the shared range, on AFP a randomly selected 6338 ** address in the shared range is taken for a SHARED lock, the entire 6339 ** shared range is taken for an EXCLUSIVE lock): 6340 ** 6341 ** PENDING_BYTE 0x40000000 6342 ** RESERVED_BYTE 0x40000001 6343 ** SHARED_RANGE 0x40000002 -> 0x40000200 6344 ** 6345 ** This works well on the local file system, but shows a nearly 100x 6346 ** slowdown in read performance on AFP because the AFP client disables 6347 ** the read cache when byte-range locks are present. Enabling the read 6348 ** cache exposes a cache coherency problem that is present on all OS X 6349 ** supported network file systems. NFS and AFP both observe the 6350 ** close-to-open semantics for ensuring cache coherency 6351 ** [http://nfs.sourceforge.net/#faq_a8], which does not effectively 6352 ** address the requirements for concurrent database access by multiple 6353 ** readers and writers 6354 ** [http://www.nabble.com/SQLite-on-NFS-cache-coherency-td15655701.html]. 6355 ** 6356 ** To address the performance and cache coherency issues, proxy file locking 6357 ** changes the way database access is controlled by limiting access to a 6358 ** single host at a time and moving file locks off of the database file 6359 ** and onto a proxy file on the local file system. 6360 ** 6361 ** 6362 ** Using proxy locks 6363 ** ----------------- 6364 ** 6365 ** C APIs 6366 ** 6367 ** sqlite3_file_control(db, dbname, SQLITE_FCNTL_SET_LOCKPROXYFILE, 6368 ** <proxy_path> | ":auto:"); 6369 ** sqlite3_file_control(db, dbname, SQLITE_FCNTL_GET_LOCKPROXYFILE, 6370 ** &<proxy_path>); 6371 ** 6372 ** 6373 ** SQL pragmas 6374 ** 6375 ** PRAGMA [database.]lock_proxy_file=<proxy_path> | :auto: 6376 ** PRAGMA [database.]lock_proxy_file 6377 ** 6378 ** Specifying ":auto:" means that if there is a conch file with a matching 6379 ** host ID in it, the proxy path in the conch file will be used, otherwise 6380 ** a proxy path based on the user's temp dir 6381 ** (via confstr(_CS_DARWIN_USER_TEMP_DIR,...)) will be used and the 6382 ** actual proxy file name is generated from the name and path of the 6383 ** database file. For example: 6384 ** 6385 ** For database path "/Users/me/foo.db" 6386 ** The lock path will be "<tmpdir>/sqliteplocks/_Users_me_foo.db:auto:") 6387 ** 6388 ** Once a lock proxy is configured for a database connection, it can not 6389 ** be removed, however it may be switched to a different proxy path via 6390 ** the above APIs (assuming the conch file is not being held by another 6391 ** connection or process). 6392 ** 6393 ** 6394 ** How proxy locking works 6395 ** ----------------------- 6396 ** 6397 ** Proxy file locking relies primarily on two new supporting files: 6398 ** 6399 ** * conch file to limit access to the database file to a single host 6400 ** at a time 6401 ** 6402 ** * proxy file to act as a proxy for the advisory locks normally 6403 ** taken on the database 6404 ** 6405 ** The conch file - to use a proxy file, sqlite must first "hold the conch" 6406 ** by taking an sqlite-style shared lock on the conch file, reading the 6407 ** contents and comparing the host's unique host ID (see below) and lock 6408 ** proxy path against the values stored in the conch. The conch file is 6409 ** stored in the same directory as the database file and the file name 6410 ** is patterned after the database file name as ".<databasename>-conch". 6411 ** If the conch file does not exist, or its contents do not match the 6412 ** host ID and/or proxy path, then the lock is escalated to an exclusive 6413 ** lock and the conch file contents is updated with the host ID and proxy 6414 ** path and the lock is downgraded to a shared lock again. If the conch 6415 ** is held by another process (with a shared lock), the exclusive lock 6416 ** will fail and SQLITE_BUSY is returned. 6417 ** 6418 ** The proxy file - a single-byte file used for all advisory file locks 6419 ** normally taken on the database file. This allows for safe sharing 6420 ** of the database file for multiple readers and writers on the same 6421 ** host (the conch ensures that they all use the same local lock file). 6422 ** 6423 ** Requesting the lock proxy does not immediately take the conch, it is 6424 ** only taken when the first request to lock database file is made. 6425 ** This matches the semantics of the traditional locking behavior, where 6426 ** opening a connection to a database file does not take a lock on it. 6427 ** The shared lock and an open file descriptor are maintained until 6428 ** the connection to the database is closed. 6429 ** 6430 ** The proxy file and the lock file are never deleted so they only need 6431 ** to be created the first time they are used. 6432 ** 6433 ** Configuration options 6434 ** --------------------- 6435 ** 6436 ** SQLITE_PREFER_PROXY_LOCKING 6437 ** 6438 ** Database files accessed on non-local file systems are 6439 ** automatically configured for proxy locking, lock files are 6440 ** named automatically using the same logic as 6441 ** PRAGMA lock_proxy_file=":auto:" 6442 ** 6443 ** SQLITE_PROXY_DEBUG 6444 ** 6445 ** Enables the logging of error messages during host id file 6446 ** retrieval and creation 6447 ** 6448 ** LOCKPROXYDIR 6449 ** 6450 ** Overrides the default directory used for lock proxy files that 6451 ** are named automatically via the ":auto:" setting 6452 ** 6453 ** SQLITE_DEFAULT_PROXYDIR_PERMISSIONS 6454 ** 6455 ** Permissions to use when creating a directory for storing the 6456 ** lock proxy files, only used when LOCKPROXYDIR is not set. 6457 ** 6458 ** 6459 ** As mentioned above, when compiled with SQLITE_PREFER_PROXY_LOCKING, 6460 ** setting the environment variable SQLITE_FORCE_PROXY_LOCKING to 1 will 6461 ** force proxy locking to be used for every database file opened, and 0 6462 ** will force automatic proxy locking to be disabled for all database 6463 ** files (explicitly calling the SQLITE_FCNTL_SET_LOCKPROXYFILE pragma or 6464 ** sqlite_file_control API is not affected by SQLITE_FORCE_PROXY_LOCKING). 6465 */ 6466 6467 /* 6468 ** Proxy locking is only available on MacOSX 6469 */ 6470 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 6471 6472 /* 6473 ** The proxyLockingContext has the path and file structures for the remote 6474 ** and local proxy files in it 6475 */ 6476 typedef struct proxyLockingContext proxyLockingContext; 6477 struct proxyLockingContext { 6478 unixFile *conchFile; /* Open conch file */ 6479 char *conchFilePath; /* Name of the conch file */ 6480 unixFile *lockProxy; /* Open proxy lock file */ 6481 char *lockProxyPath; /* Name of the proxy lock file */ 6482 char *dbPath; /* Name of the open file */ 6483 int conchHeld; /* 1 if the conch is held, -1 if lockless */ 6484 int nFails; /* Number of conch taking failures */ 6485 void *oldLockingContext; /* Original lockingcontext to restore on close */ 6486 sqlite3_io_methods const *pOldMethod; /* Original I/O methods for close */ 6487 }; 6488 6489 /* 6490 ** The proxy lock file path for the database at dbPath is written into lPath, 6491 ** which must point to valid, writable memory large enough for a maxLen length 6492 ** file path. 6493 */ 6494 static int proxyGetLockPath(const char *dbPath, char *lPath, size_t maxLen){ 6495 int len; 6496 int dbLen; 6497 int i; 6498 6499 #ifdef LOCKPROXYDIR 6500 len = strlcpy(lPath, LOCKPROXYDIR, maxLen); 6501 #else 6502 # ifdef _CS_DARWIN_USER_TEMP_DIR 6503 { 6504 if( !confstr(_CS_DARWIN_USER_TEMP_DIR, lPath, maxLen) ){ 6505 OSTRACE(("GETLOCKPATH failed %s errno=%d pid=%d\n", 6506 lPath, errno, osGetpid(0))); 6507 return SQLITE_IOERR_LOCK; 6508 } 6509 len = strlcat(lPath, "sqliteplocks", maxLen); 6510 } 6511 # else 6512 len = strlcpy(lPath, "/tmp/", maxLen); 6513 # endif 6514 #endif 6515 6516 if( lPath[len-1]!='/' ){ 6517 len = strlcat(lPath, "/", maxLen); 6518 } 6519 6520 /* transform the db path to a unique cache name */ 6521 dbLen = (int)strlen(dbPath); 6522 for( i=0; i<dbLen && (i+len+7)<(int)maxLen; i++){ 6523 char c = dbPath[i]; 6524 lPath[i+len] = (c=='/')?'_':c; 6525 } 6526 lPath[i+len]='\0'; 6527 strlcat(lPath, ":auto:", maxLen); 6528 OSTRACE(("GETLOCKPATH proxy lock path=%s pid=%d\n", lPath, osGetpid(0))); 6529 return SQLITE_OK; 6530 } 6531 6532 /* 6533 ** Creates the lock file and any missing directories in lockPath 6534 */ 6535 static int proxyCreateLockPath(const char *lockPath){ 6536 int i, len; 6537 char buf[MAXPATHLEN]; 6538 int start = 0; 6539 6540 assert(lockPath!=NULL); 6541 /* try to create all the intermediate directories */ 6542 len = (int)strlen(lockPath); 6543 buf[0] = lockPath[0]; 6544 for( i=1; i<len; i++ ){ 6545 if( lockPath[i] == '/' && (i - start > 0) ){ 6546 /* only mkdir if leaf dir != "." or "/" or ".." */ 6547 if( i-start>2 || (i-start==1 && buf[start] != '.' && buf[start] != '/') 6548 || (i-start==2 && buf[start] != '.' && buf[start+1] != '.') ){ 6549 buf[i]='\0'; 6550 if( osMkdir(buf, SQLITE_DEFAULT_PROXYDIR_PERMISSIONS) ){ 6551 int err=errno; 6552 if( err!=EEXIST ) { 6553 OSTRACE(("CREATELOCKPATH FAILED creating %s, " 6554 "'%s' proxy lock path=%s pid=%d\n", 6555 buf, strerror(err), lockPath, osGetpid(0))); 6556 return err; 6557 } 6558 } 6559 } 6560 start=i+1; 6561 } 6562 buf[i] = lockPath[i]; 6563 } 6564 OSTRACE(("CREATELOCKPATH proxy lock path=%s pid=%d\n",lockPath,osGetpid(0))); 6565 return 0; 6566 } 6567 6568 /* 6569 ** Create a new VFS file descriptor (stored in memory obtained from 6570 ** sqlite3_malloc) and open the file named "path" in the file descriptor. 6571 ** 6572 ** The caller is responsible not only for closing the file descriptor 6573 ** but also for freeing the memory associated with the file descriptor. 6574 */ 6575 static int proxyCreateUnixFile( 6576 const char *path, /* path for the new unixFile */ 6577 unixFile **ppFile, /* unixFile created and returned by ref */ 6578 int islockfile /* if non zero missing dirs will be created */ 6579 ) { 6580 int fd = -1; 6581 unixFile *pNew; 6582 int rc = SQLITE_OK; 6583 int openFlags = O_RDWR | O_CREAT; 6584 sqlite3_vfs dummyVfs; 6585 int terrno = 0; 6586 UnixUnusedFd *pUnused = NULL; 6587 6588 /* 1. first try to open/create the file 6589 ** 2. if that fails, and this is a lock file (not-conch), try creating 6590 ** the parent directories and then try again. 6591 ** 3. if that fails, try to open the file read-only 6592 ** otherwise return BUSY (if lock file) or CANTOPEN for the conch file 6593 */ 6594 pUnused = findReusableFd(path, openFlags); 6595 if( pUnused ){ 6596 fd = pUnused->fd; 6597 }else{ 6598 pUnused = sqlite3_malloc64(sizeof(*pUnused)); 6599 if( !pUnused ){ 6600 return SQLITE_NOMEM_BKPT; 6601 } 6602 } 6603 if( fd<0 ){ 6604 fd = robust_open(path, openFlags, 0); 6605 terrno = errno; 6606 if( fd<0 && errno==ENOENT && islockfile ){ 6607 if( proxyCreateLockPath(path) == SQLITE_OK ){ 6608 fd = robust_open(path, openFlags, 0); 6609 } 6610 } 6611 } 6612 if( fd<0 ){ 6613 openFlags = O_RDONLY; 6614 fd = robust_open(path, openFlags, 0); 6615 terrno = errno; 6616 } 6617 if( fd<0 ){ 6618 if( islockfile ){ 6619 return SQLITE_BUSY; 6620 } 6621 switch (terrno) { 6622 case EACCES: 6623 return SQLITE_PERM; 6624 case EIO: 6625 return SQLITE_IOERR_LOCK; /* even though it is the conch */ 6626 default: 6627 return SQLITE_CANTOPEN_BKPT; 6628 } 6629 } 6630 6631 pNew = (unixFile *)sqlite3_malloc64(sizeof(*pNew)); 6632 if( pNew==NULL ){ 6633 rc = SQLITE_NOMEM_BKPT; 6634 goto end_create_proxy; 6635 } 6636 memset(pNew, 0, sizeof(unixFile)); 6637 pNew->openFlags = openFlags; 6638 memset(&dummyVfs, 0, sizeof(dummyVfs)); 6639 dummyVfs.pAppData = (void*)&autolockIoFinder; 6640 dummyVfs.zName = "dummy"; 6641 pUnused->fd = fd; 6642 pUnused->flags = openFlags; 6643 pNew->pUnused = pUnused; 6644 6645 rc = fillInUnixFile(&dummyVfs, fd, (sqlite3_file*)pNew, path, 0); 6646 if( rc==SQLITE_OK ){ 6647 *ppFile = pNew; 6648 return SQLITE_OK; 6649 } 6650 end_create_proxy: 6651 robust_close(pNew, fd, __LINE__); 6652 sqlite3_free(pNew); 6653 sqlite3_free(pUnused); 6654 return rc; 6655 } 6656 6657 #ifdef SQLITE_TEST 6658 /* simulate multiple hosts by creating unique hostid file paths */ 6659 int sqlite3_hostid_num = 0; 6660 #endif 6661 6662 #define PROXY_HOSTIDLEN 16 /* conch file host id length */ 6663 6664 #ifdef HAVE_GETHOSTUUID 6665 /* Not always defined in the headers as it ought to be */ 6666 extern int gethostuuid(uuid_t id, const struct timespec *wait); 6667 #endif 6668 6669 /* get the host ID via gethostuuid(), pHostID must point to PROXY_HOSTIDLEN 6670 ** bytes of writable memory. 6671 */ 6672 static int proxyGetHostID(unsigned char *pHostID, int *pError){ 6673 assert(PROXY_HOSTIDLEN == sizeof(uuid_t)); 6674 memset(pHostID, 0, PROXY_HOSTIDLEN); 6675 #ifdef HAVE_GETHOSTUUID 6676 { 6677 struct timespec timeout = {1, 0}; /* 1 sec timeout */ 6678 if( gethostuuid(pHostID, &timeout) ){ 6679 int err = errno; 6680 if( pError ){ 6681 *pError = err; 6682 } 6683 return SQLITE_IOERR; 6684 } 6685 } 6686 #else 6687 UNUSED_PARAMETER(pError); 6688 #endif 6689 #ifdef SQLITE_TEST 6690 /* simulate multiple hosts by creating unique hostid file paths */ 6691 if( sqlite3_hostid_num != 0){ 6692 pHostID[0] = (char)(pHostID[0] + (char)(sqlite3_hostid_num & 0xFF)); 6693 } 6694 #endif 6695 6696 return SQLITE_OK; 6697 } 6698 6699 /* The conch file contains the header, host id and lock file path 6700 */ 6701 #define PROXY_CONCHVERSION 2 /* 1-byte header, 16-byte host id, path */ 6702 #define PROXY_HEADERLEN 1 /* conch file header length */ 6703 #define PROXY_PATHINDEX (PROXY_HEADERLEN+PROXY_HOSTIDLEN) 6704 #define PROXY_MAXCONCHLEN (PROXY_HEADERLEN+PROXY_HOSTIDLEN+MAXPATHLEN) 6705 6706 /* 6707 ** Takes an open conch file, copies the contents to a new path and then moves 6708 ** it back. The newly created file's file descriptor is assigned to the 6709 ** conch file structure and finally the original conch file descriptor is 6710 ** closed. Returns zero if successful. 6711 */ 6712 static int proxyBreakConchLock(unixFile *pFile, uuid_t myHostID){ 6713 proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; 6714 unixFile *conchFile = pCtx->conchFile; 6715 char tPath[MAXPATHLEN]; 6716 char buf[PROXY_MAXCONCHLEN]; 6717 char *cPath = pCtx->conchFilePath; 6718 size_t readLen = 0; 6719 size_t pathLen = 0; 6720 char errmsg[64] = ""; 6721 int fd = -1; 6722 int rc = -1; 6723 UNUSED_PARAMETER(myHostID); 6724 6725 /* create a new path by replace the trailing '-conch' with '-break' */ 6726 pathLen = strlcpy(tPath, cPath, MAXPATHLEN); 6727 if( pathLen>MAXPATHLEN || pathLen<6 || 6728 (strlcpy(&tPath[pathLen-5], "break", 6) != 5) ){ 6729 sqlite3_snprintf(sizeof(errmsg),errmsg,"path error (len %d)",(int)pathLen); 6730 goto end_breaklock; 6731 } 6732 /* read the conch content */ 6733 readLen = osPread(conchFile->h, buf, PROXY_MAXCONCHLEN, 0); 6734 if( readLen<PROXY_PATHINDEX ){ 6735 sqlite3_snprintf(sizeof(errmsg),errmsg,"read error (len %d)",(int)readLen); 6736 goto end_breaklock; 6737 } 6738 /* write it out to the temporary break file */ 6739 fd = robust_open(tPath, (O_RDWR|O_CREAT|O_EXCL), 0); 6740 if( fd<0 ){ 6741 sqlite3_snprintf(sizeof(errmsg), errmsg, "create failed (%d)", errno); 6742 goto end_breaklock; 6743 } 6744 if( osPwrite(fd, buf, readLen, 0) != (ssize_t)readLen ){ 6745 sqlite3_snprintf(sizeof(errmsg), errmsg, "write failed (%d)", errno); 6746 goto end_breaklock; 6747 } 6748 if( rename(tPath, cPath) ){ 6749 sqlite3_snprintf(sizeof(errmsg), errmsg, "rename failed (%d)", errno); 6750 goto end_breaklock; 6751 } 6752 rc = 0; 6753 fprintf(stderr, "broke stale lock on %s\n", cPath); 6754 robust_close(pFile, conchFile->h, __LINE__); 6755 conchFile->h = fd; 6756 conchFile->openFlags = O_RDWR | O_CREAT; 6757 6758 end_breaklock: 6759 if( rc ){ 6760 if( fd>=0 ){ 6761 osUnlink(tPath); 6762 robust_close(pFile, fd, __LINE__); 6763 } 6764 fprintf(stderr, "failed to break stale lock on %s, %s\n", cPath, errmsg); 6765 } 6766 return rc; 6767 } 6768 6769 /* Take the requested lock on the conch file and break a stale lock if the 6770 ** host id matches. 6771 */ 6772 static int proxyConchLock(unixFile *pFile, uuid_t myHostID, int lockType){ 6773 proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; 6774 unixFile *conchFile = pCtx->conchFile; 6775 int rc = SQLITE_OK; 6776 int nTries = 0; 6777 struct timespec conchModTime; 6778 6779 memset(&conchModTime, 0, sizeof(conchModTime)); 6780 do { 6781 rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, lockType); 6782 nTries ++; 6783 if( rc==SQLITE_BUSY ){ 6784 /* If the lock failed (busy): 6785 * 1st try: get the mod time of the conch, wait 0.5s and try again. 6786 * 2nd try: fail if the mod time changed or host id is different, wait 6787 * 10 sec and try again 6788 * 3rd try: break the lock unless the mod time has changed. 6789 */ 6790 struct stat buf; 6791 if( osFstat(conchFile->h, &buf) ){ 6792 storeLastErrno(pFile, errno); 6793 return SQLITE_IOERR_LOCK; 6794 } 6795 6796 if( nTries==1 ){ 6797 conchModTime = buf.st_mtimespec; 6798 usleep(500000); /* wait 0.5 sec and try the lock again*/ 6799 continue; 6800 } 6801 6802 assert( nTries>1 ); 6803 if( conchModTime.tv_sec != buf.st_mtimespec.tv_sec || 6804 conchModTime.tv_nsec != buf.st_mtimespec.tv_nsec ){ 6805 return SQLITE_BUSY; 6806 } 6807 6808 if( nTries==2 ){ 6809 char tBuf[PROXY_MAXCONCHLEN]; 6810 int len = osPread(conchFile->h, tBuf, PROXY_MAXCONCHLEN, 0); 6811 if( len<0 ){ 6812 storeLastErrno(pFile, errno); 6813 return SQLITE_IOERR_LOCK; 6814 } 6815 if( len>PROXY_PATHINDEX && tBuf[0]==(char)PROXY_CONCHVERSION){ 6816 /* don't break the lock if the host id doesn't match */ 6817 if( 0!=memcmp(&tBuf[PROXY_HEADERLEN], myHostID, PROXY_HOSTIDLEN) ){ 6818 return SQLITE_BUSY; 6819 } 6820 }else{ 6821 /* don't break the lock on short read or a version mismatch */ 6822 return SQLITE_BUSY; 6823 } 6824 usleep(10000000); /* wait 10 sec and try the lock again */ 6825 continue; 6826 } 6827 6828 assert( nTries==3 ); 6829 if( 0==proxyBreakConchLock(pFile, myHostID) ){ 6830 rc = SQLITE_OK; 6831 if( lockType==EXCLUSIVE_LOCK ){ 6832 rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, SHARED_LOCK); 6833 } 6834 if( !rc ){ 6835 rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, lockType); 6836 } 6837 } 6838 } 6839 } while( rc==SQLITE_BUSY && nTries<3 ); 6840 6841 return rc; 6842 } 6843 6844 /* Takes the conch by taking a shared lock and read the contents conch, if 6845 ** lockPath is non-NULL, the host ID and lock file path must match. A NULL 6846 ** lockPath means that the lockPath in the conch file will be used if the 6847 ** host IDs match, or a new lock path will be generated automatically 6848 ** and written to the conch file. 6849 */ 6850 static int proxyTakeConch(unixFile *pFile){ 6851 proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; 6852 6853 if( pCtx->conchHeld!=0 ){ 6854 return SQLITE_OK; 6855 }else{ 6856 unixFile *conchFile = pCtx->conchFile; 6857 uuid_t myHostID; 6858 int pError = 0; 6859 char readBuf[PROXY_MAXCONCHLEN]; 6860 char lockPath[MAXPATHLEN]; 6861 char *tempLockPath = NULL; 6862 int rc = SQLITE_OK; 6863 int createConch = 0; 6864 int hostIdMatch = 0; 6865 int readLen = 0; 6866 int tryOldLockPath = 0; 6867 int forceNewLockPath = 0; 6868 6869 OSTRACE(("TAKECONCH %d for %s pid=%d\n", conchFile->h, 6870 (pCtx->lockProxyPath ? pCtx->lockProxyPath : ":auto:"), 6871 osGetpid(0))); 6872 6873 rc = proxyGetHostID(myHostID, &pError); 6874 if( (rc&0xff)==SQLITE_IOERR ){ 6875 storeLastErrno(pFile, pError); 6876 goto end_takeconch; 6877 } 6878 rc = proxyConchLock(pFile, myHostID, SHARED_LOCK); 6879 if( rc!=SQLITE_OK ){ 6880 goto end_takeconch; 6881 } 6882 /* read the existing conch file */ 6883 readLen = seekAndRead((unixFile*)conchFile, 0, readBuf, PROXY_MAXCONCHLEN); 6884 if( readLen<0 ){ 6885 /* I/O error: lastErrno set by seekAndRead */ 6886 storeLastErrno(pFile, conchFile->lastErrno); 6887 rc = SQLITE_IOERR_READ; 6888 goto end_takeconch; 6889 }else if( readLen<=(PROXY_HEADERLEN+PROXY_HOSTIDLEN) || 6890 readBuf[0]!=(char)PROXY_CONCHVERSION ){ 6891 /* a short read or version format mismatch means we need to create a new 6892 ** conch file. 6893 */ 6894 createConch = 1; 6895 } 6896 /* if the host id matches and the lock path already exists in the conch 6897 ** we'll try to use the path there, if we can't open that path, we'll 6898 ** retry with a new auto-generated path 6899 */ 6900 do { /* in case we need to try again for an :auto: named lock file */ 6901 6902 if( !createConch && !forceNewLockPath ){ 6903 hostIdMatch = !memcmp(&readBuf[PROXY_HEADERLEN], myHostID, 6904 PROXY_HOSTIDLEN); 6905 /* if the conch has data compare the contents */ 6906 if( !pCtx->lockProxyPath ){ 6907 /* for auto-named local lock file, just check the host ID and we'll 6908 ** use the local lock file path that's already in there 6909 */ 6910 if( hostIdMatch ){ 6911 size_t pathLen = (readLen - PROXY_PATHINDEX); 6912 6913 if( pathLen>=MAXPATHLEN ){ 6914 pathLen=MAXPATHLEN-1; 6915 } 6916 memcpy(lockPath, &readBuf[PROXY_PATHINDEX], pathLen); 6917 lockPath[pathLen] = 0; 6918 tempLockPath = lockPath; 6919 tryOldLockPath = 1; 6920 /* create a copy of the lock path if the conch is taken */ 6921 goto end_takeconch; 6922 } 6923 }else if( hostIdMatch 6924 && !strncmp(pCtx->lockProxyPath, &readBuf[PROXY_PATHINDEX], 6925 readLen-PROXY_PATHINDEX) 6926 ){ 6927 /* conch host and lock path match */ 6928 goto end_takeconch; 6929 } 6930 } 6931 6932 /* if the conch isn't writable and doesn't match, we can't take it */ 6933 if( (conchFile->openFlags&O_RDWR) == 0 ){ 6934 rc = SQLITE_BUSY; 6935 goto end_takeconch; 6936 } 6937 6938 /* either the conch didn't match or we need to create a new one */ 6939 if( !pCtx->lockProxyPath ){ 6940 proxyGetLockPath(pCtx->dbPath, lockPath, MAXPATHLEN); 6941 tempLockPath = lockPath; 6942 /* create a copy of the lock path _only_ if the conch is taken */ 6943 } 6944 6945 /* update conch with host and path (this will fail if other process 6946 ** has a shared lock already), if the host id matches, use the big 6947 ** stick. 6948 */ 6949 futimes(conchFile->h, NULL); 6950 if( hostIdMatch && !createConch ){ 6951 if( conchFile->pInode && conchFile->pInode->nShared>1 ){ 6952 /* We are trying for an exclusive lock but another thread in this 6953 ** same process is still holding a shared lock. */ 6954 rc = SQLITE_BUSY; 6955 } else { 6956 rc = proxyConchLock(pFile, myHostID, EXCLUSIVE_LOCK); 6957 } 6958 }else{ 6959 rc = proxyConchLock(pFile, myHostID, EXCLUSIVE_LOCK); 6960 } 6961 if( rc==SQLITE_OK ){ 6962 char writeBuffer[PROXY_MAXCONCHLEN]; 6963 int writeSize = 0; 6964 6965 writeBuffer[0] = (char)PROXY_CONCHVERSION; 6966 memcpy(&writeBuffer[PROXY_HEADERLEN], myHostID, PROXY_HOSTIDLEN); 6967 if( pCtx->lockProxyPath!=NULL ){ 6968 strlcpy(&writeBuffer[PROXY_PATHINDEX], pCtx->lockProxyPath, 6969 MAXPATHLEN); 6970 }else{ 6971 strlcpy(&writeBuffer[PROXY_PATHINDEX], tempLockPath, MAXPATHLEN); 6972 } 6973 writeSize = PROXY_PATHINDEX + strlen(&writeBuffer[PROXY_PATHINDEX]); 6974 robust_ftruncate(conchFile->h, writeSize); 6975 rc = unixWrite((sqlite3_file *)conchFile, writeBuffer, writeSize, 0); 6976 full_fsync(conchFile->h,0,0); 6977 /* If we created a new conch file (not just updated the contents of a 6978 ** valid conch file), try to match the permissions of the database 6979 */ 6980 if( rc==SQLITE_OK && createConch ){ 6981 struct stat buf; 6982 int err = osFstat(pFile->h, &buf); 6983 if( err==0 ){ 6984 mode_t cmode = buf.st_mode&(S_IRUSR|S_IWUSR | S_IRGRP|S_IWGRP | 6985 S_IROTH|S_IWOTH); 6986 /* try to match the database file R/W permissions, ignore failure */ 6987 #ifndef SQLITE_PROXY_DEBUG 6988 osFchmod(conchFile->h, cmode); 6989 #else 6990 do{ 6991 rc = osFchmod(conchFile->h, cmode); 6992 }while( rc==(-1) && errno==EINTR ); 6993 if( rc!=0 ){ 6994 int code = errno; 6995 fprintf(stderr, "fchmod %o FAILED with %d %s\n", 6996 cmode, code, strerror(code)); 6997 } else { 6998 fprintf(stderr, "fchmod %o SUCCEDED\n",cmode); 6999 } 7000 }else{ 7001 int code = errno; 7002 fprintf(stderr, "STAT FAILED[%d] with %d %s\n", 7003 err, code, strerror(code)); 7004 #endif 7005 } 7006 } 7007 } 7008 conchFile->pMethod->xUnlock((sqlite3_file*)conchFile, SHARED_LOCK); 7009 7010 end_takeconch: 7011 OSTRACE(("TRANSPROXY: CLOSE %d\n", pFile->h)); 7012 if( rc==SQLITE_OK && pFile->openFlags ){ 7013 int fd; 7014 if( pFile->h>=0 ){ 7015 robust_close(pFile, pFile->h, __LINE__); 7016 } 7017 pFile->h = -1; 7018 fd = robust_open(pCtx->dbPath, pFile->openFlags, 0); 7019 OSTRACE(("TRANSPROXY: OPEN %d\n", fd)); 7020 if( fd>=0 ){ 7021 pFile->h = fd; 7022 }else{ 7023 rc=SQLITE_CANTOPEN_BKPT; /* SQLITE_BUSY? proxyTakeConch called 7024 during locking */ 7025 } 7026 } 7027 if( rc==SQLITE_OK && !pCtx->lockProxy ){ 7028 char *path = tempLockPath ? tempLockPath : pCtx->lockProxyPath; 7029 rc = proxyCreateUnixFile(path, &pCtx->lockProxy, 1); 7030 if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM && tryOldLockPath ){ 7031 /* we couldn't create the proxy lock file with the old lock file path 7032 ** so try again via auto-naming 7033 */ 7034 forceNewLockPath = 1; 7035 tryOldLockPath = 0; 7036 continue; /* go back to the do {} while start point, try again */ 7037 } 7038 } 7039 if( rc==SQLITE_OK ){ 7040 /* Need to make a copy of path if we extracted the value 7041 ** from the conch file or the path was allocated on the stack 7042 */ 7043 if( tempLockPath ){ 7044 pCtx->lockProxyPath = sqlite3DbStrDup(0, tempLockPath); 7045 if( !pCtx->lockProxyPath ){ 7046 rc = SQLITE_NOMEM_BKPT; 7047 } 7048 } 7049 } 7050 if( rc==SQLITE_OK ){ 7051 pCtx->conchHeld = 1; 7052 7053 if( pCtx->lockProxy->pMethod == &afpIoMethods ){ 7054 afpLockingContext *afpCtx; 7055 afpCtx = (afpLockingContext *)pCtx->lockProxy->lockingContext; 7056 afpCtx->dbPath = pCtx->lockProxyPath; 7057 } 7058 } else { 7059 conchFile->pMethod->xUnlock((sqlite3_file*)conchFile, NO_LOCK); 7060 } 7061 OSTRACE(("TAKECONCH %d %s\n", conchFile->h, 7062 rc==SQLITE_OK?"ok":"failed")); 7063 return rc; 7064 } while (1); /* in case we need to retry the :auto: lock file - 7065 ** we should never get here except via the 'continue' call. */ 7066 } 7067 } 7068 7069 /* 7070 ** If pFile holds a lock on a conch file, then release that lock. 7071 */ 7072 static int proxyReleaseConch(unixFile *pFile){ 7073 int rc = SQLITE_OK; /* Subroutine return code */ 7074 proxyLockingContext *pCtx; /* The locking context for the proxy lock */ 7075 unixFile *conchFile; /* Name of the conch file */ 7076 7077 pCtx = (proxyLockingContext *)pFile->lockingContext; 7078 conchFile = pCtx->conchFile; 7079 OSTRACE(("RELEASECONCH %d for %s pid=%d\n", conchFile->h, 7080 (pCtx->lockProxyPath ? pCtx->lockProxyPath : ":auto:"), 7081 osGetpid(0))); 7082 if( pCtx->conchHeld>0 ){ 7083 rc = conchFile->pMethod->xUnlock((sqlite3_file*)conchFile, NO_LOCK); 7084 } 7085 pCtx->conchHeld = 0; 7086 OSTRACE(("RELEASECONCH %d %s\n", conchFile->h, 7087 (rc==SQLITE_OK ? "ok" : "failed"))); 7088 return rc; 7089 } 7090 7091 /* 7092 ** Given the name of a database file, compute the name of its conch file. 7093 ** Store the conch filename in memory obtained from sqlite3_malloc64(). 7094 ** Make *pConchPath point to the new name. Return SQLITE_OK on success 7095 ** or SQLITE_NOMEM if unable to obtain memory. 7096 ** 7097 ** The caller is responsible for ensuring that the allocated memory 7098 ** space is eventually freed. 7099 ** 7100 ** *pConchPath is set to NULL if a memory allocation error occurs. 7101 */ 7102 static int proxyCreateConchPathname(char *dbPath, char **pConchPath){ 7103 int i; /* Loop counter */ 7104 int len = (int)strlen(dbPath); /* Length of database filename - dbPath */ 7105 char *conchPath; /* buffer in which to construct conch name */ 7106 7107 /* Allocate space for the conch filename and initialize the name to 7108 ** the name of the original database file. */ 7109 *pConchPath = conchPath = (char *)sqlite3_malloc64(len + 8); 7110 if( conchPath==0 ){ 7111 return SQLITE_NOMEM_BKPT; 7112 } 7113 memcpy(conchPath, dbPath, len+1); 7114 7115 /* now insert a "." before the last / character */ 7116 for( i=(len-1); i>=0; i-- ){ 7117 if( conchPath[i]=='/' ){ 7118 i++; 7119 break; 7120 } 7121 } 7122 conchPath[i]='.'; 7123 while ( i<len ){ 7124 conchPath[i+1]=dbPath[i]; 7125 i++; 7126 } 7127 7128 /* append the "-conch" suffix to the file */ 7129 memcpy(&conchPath[i+1], "-conch", 7); 7130 assert( (int)strlen(conchPath) == len+7 ); 7131 7132 return SQLITE_OK; 7133 } 7134 7135 7136 /* Takes a fully configured proxy locking-style unix file and switches 7137 ** the local lock file path 7138 */ 7139 static int switchLockProxyPath(unixFile *pFile, const char *path) { 7140 proxyLockingContext *pCtx = (proxyLockingContext*)pFile->lockingContext; 7141 char *oldPath = pCtx->lockProxyPath; 7142 int rc = SQLITE_OK; 7143 7144 if( pFile->eFileLock!=NO_LOCK ){ 7145 return SQLITE_BUSY; 7146 } 7147 7148 /* nothing to do if the path is NULL, :auto: or matches the existing path */ 7149 if( !path || path[0]=='\0' || !strcmp(path, ":auto:") || 7150 (oldPath && !strncmp(oldPath, path, MAXPATHLEN)) ){ 7151 return SQLITE_OK; 7152 }else{ 7153 unixFile *lockProxy = pCtx->lockProxy; 7154 pCtx->lockProxy=NULL; 7155 pCtx->conchHeld = 0; 7156 if( lockProxy!=NULL ){ 7157 rc=lockProxy->pMethod->xClose((sqlite3_file *)lockProxy); 7158 if( rc ) return rc; 7159 sqlite3_free(lockProxy); 7160 } 7161 sqlite3_free(oldPath); 7162 pCtx->lockProxyPath = sqlite3DbStrDup(0, path); 7163 } 7164 7165 return rc; 7166 } 7167 7168 /* 7169 ** pFile is a file that has been opened by a prior xOpen call. dbPath 7170 ** is a string buffer at least MAXPATHLEN+1 characters in size. 7171 ** 7172 ** This routine find the filename associated with pFile and writes it 7173 ** int dbPath. 7174 */ 7175 static int proxyGetDbPathForUnixFile(unixFile *pFile, char *dbPath){ 7176 #if defined(__APPLE__) 7177 if( pFile->pMethod == &afpIoMethods ){ 7178 /* afp style keeps a reference to the db path in the filePath field 7179 ** of the struct */ 7180 assert( (int)strlen((char*)pFile->lockingContext)<=MAXPATHLEN ); 7181 strlcpy(dbPath, ((afpLockingContext *)pFile->lockingContext)->dbPath, 7182 MAXPATHLEN); 7183 } else 7184 #endif 7185 if( pFile->pMethod == &dotlockIoMethods ){ 7186 /* dot lock style uses the locking context to store the dot lock 7187 ** file path */ 7188 int len = strlen((char *)pFile->lockingContext) - strlen(DOTLOCK_SUFFIX); 7189 memcpy(dbPath, (char *)pFile->lockingContext, len + 1); 7190 }else{ 7191 /* all other styles use the locking context to store the db file path */ 7192 assert( strlen((char*)pFile->lockingContext)<=MAXPATHLEN ); 7193 strlcpy(dbPath, (char *)pFile->lockingContext, MAXPATHLEN); 7194 } 7195 return SQLITE_OK; 7196 } 7197 7198 /* 7199 ** Takes an already filled in unix file and alters it so all file locking 7200 ** will be performed on the local proxy lock file. The following fields 7201 ** are preserved in the locking context so that they can be restored and 7202 ** the unix structure properly cleaned up at close time: 7203 ** ->lockingContext 7204 ** ->pMethod 7205 */ 7206 static int proxyTransformUnixFile(unixFile *pFile, const char *path) { 7207 proxyLockingContext *pCtx; 7208 char dbPath[MAXPATHLEN+1]; /* Name of the database file */ 7209 char *lockPath=NULL; 7210 int rc = SQLITE_OK; 7211 7212 if( pFile->eFileLock!=NO_LOCK ){ 7213 return SQLITE_BUSY; 7214 } 7215 proxyGetDbPathForUnixFile(pFile, dbPath); 7216 if( !path || path[0]=='\0' || !strcmp(path, ":auto:") ){ 7217 lockPath=NULL; 7218 }else{ 7219 lockPath=(char *)path; 7220 } 7221 7222 OSTRACE(("TRANSPROXY %d for %s pid=%d\n", pFile->h, 7223 (lockPath ? lockPath : ":auto:"), osGetpid(0))); 7224 7225 pCtx = sqlite3_malloc64( sizeof(*pCtx) ); 7226 if( pCtx==0 ){ 7227 return SQLITE_NOMEM_BKPT; 7228 } 7229 memset(pCtx, 0, sizeof(*pCtx)); 7230 7231 rc = proxyCreateConchPathname(dbPath, &pCtx->conchFilePath); 7232 if( rc==SQLITE_OK ){ 7233 rc = proxyCreateUnixFile(pCtx->conchFilePath, &pCtx->conchFile, 0); 7234 if( rc==SQLITE_CANTOPEN && ((pFile->openFlags&O_RDWR) == 0) ){ 7235 /* if (a) the open flags are not O_RDWR, (b) the conch isn't there, and 7236 ** (c) the file system is read-only, then enable no-locking access. 7237 ** Ugh, since O_RDONLY==0x0000 we test for !O_RDWR since unixOpen asserts 7238 ** that openFlags will have only one of O_RDONLY or O_RDWR. 7239 */ 7240 struct statfs fsInfo; 7241 struct stat conchInfo; 7242 int goLockless = 0; 7243 7244 if( osStat(pCtx->conchFilePath, &conchInfo) == -1 ) { 7245 int err = errno; 7246 if( (err==ENOENT) && (statfs(dbPath, &fsInfo) != -1) ){ 7247 goLockless = (fsInfo.f_flags&MNT_RDONLY) == MNT_RDONLY; 7248 } 7249 } 7250 if( goLockless ){ 7251 pCtx->conchHeld = -1; /* read only FS/ lockless */ 7252 rc = SQLITE_OK; 7253 } 7254 } 7255 } 7256 if( rc==SQLITE_OK && lockPath ){ 7257 pCtx->lockProxyPath = sqlite3DbStrDup(0, lockPath); 7258 } 7259 7260 if( rc==SQLITE_OK ){ 7261 pCtx->dbPath = sqlite3DbStrDup(0, dbPath); 7262 if( pCtx->dbPath==NULL ){ 7263 rc = SQLITE_NOMEM_BKPT; 7264 } 7265 } 7266 if( rc==SQLITE_OK ){ 7267 /* all memory is allocated, proxys are created and assigned, 7268 ** switch the locking context and pMethod then return. 7269 */ 7270 pCtx->oldLockingContext = pFile->lockingContext; 7271 pFile->lockingContext = pCtx; 7272 pCtx->pOldMethod = pFile->pMethod; 7273 pFile->pMethod = &proxyIoMethods; 7274 }else{ 7275 if( pCtx->conchFile ){ 7276 pCtx->conchFile->pMethod->xClose((sqlite3_file *)pCtx->conchFile); 7277 sqlite3_free(pCtx->conchFile); 7278 } 7279 sqlite3DbFree(0, pCtx->lockProxyPath); 7280 sqlite3_free(pCtx->conchFilePath); 7281 sqlite3_free(pCtx); 7282 } 7283 OSTRACE(("TRANSPROXY %d %s\n", pFile->h, 7284 (rc==SQLITE_OK ? "ok" : "failed"))); 7285 return rc; 7286 } 7287 7288 7289 /* 7290 ** This routine handles sqlite3_file_control() calls that are specific 7291 ** to proxy locking. 7292 */ 7293 static int proxyFileControl(sqlite3_file *id, int op, void *pArg){ 7294 switch( op ){ 7295 case SQLITE_FCNTL_GET_LOCKPROXYFILE: { 7296 unixFile *pFile = (unixFile*)id; 7297 if( pFile->pMethod == &proxyIoMethods ){ 7298 proxyLockingContext *pCtx = (proxyLockingContext*)pFile->lockingContext; 7299 proxyTakeConch(pFile); 7300 if( pCtx->lockProxyPath ){ 7301 *(const char **)pArg = pCtx->lockProxyPath; 7302 }else{ 7303 *(const char **)pArg = ":auto: (not held)"; 7304 } 7305 } else { 7306 *(const char **)pArg = NULL; 7307 } 7308 return SQLITE_OK; 7309 } 7310 case SQLITE_FCNTL_SET_LOCKPROXYFILE: { 7311 unixFile *pFile = (unixFile*)id; 7312 int rc = SQLITE_OK; 7313 int isProxyStyle = (pFile->pMethod == &proxyIoMethods); 7314 if( pArg==NULL || (const char *)pArg==0 ){ 7315 if( isProxyStyle ){ 7316 /* turn off proxy locking - not supported. If support is added for 7317 ** switching proxy locking mode off then it will need to fail if 7318 ** the journal mode is WAL mode. 7319 */ 7320 rc = SQLITE_ERROR /*SQLITE_PROTOCOL? SQLITE_MISUSE?*/; 7321 }else{ 7322 /* turn off proxy locking - already off - NOOP */ 7323 rc = SQLITE_OK; 7324 } 7325 }else{ 7326 const char *proxyPath = (const char *)pArg; 7327 if( isProxyStyle ){ 7328 proxyLockingContext *pCtx = 7329 (proxyLockingContext*)pFile->lockingContext; 7330 if( !strcmp(pArg, ":auto:") 7331 || (pCtx->lockProxyPath && 7332 !strncmp(pCtx->lockProxyPath, proxyPath, MAXPATHLEN)) 7333 ){ 7334 rc = SQLITE_OK; 7335 }else{ 7336 rc = switchLockProxyPath(pFile, proxyPath); 7337 } 7338 }else{ 7339 /* turn on proxy file locking */ 7340 rc = proxyTransformUnixFile(pFile, proxyPath); 7341 } 7342 } 7343 return rc; 7344 } 7345 default: { 7346 assert( 0 ); /* The call assures that only valid opcodes are sent */ 7347 } 7348 } 7349 /*NOTREACHED*/ 7350 return SQLITE_ERROR; 7351 } 7352 7353 /* 7354 ** Within this division (the proxying locking implementation) the procedures 7355 ** above this point are all utilities. The lock-related methods of the 7356 ** proxy-locking sqlite3_io_method object follow. 7357 */ 7358 7359 7360 /* 7361 ** This routine checks if there is a RESERVED lock held on the specified 7362 ** file by this or any other process. If such a lock is held, set *pResOut 7363 ** to a non-zero value otherwise *pResOut is set to zero. The return value 7364 ** is set to SQLITE_OK unless an I/O error occurs during lock checking. 7365 */ 7366 static int proxyCheckReservedLock(sqlite3_file *id, int *pResOut) { 7367 unixFile *pFile = (unixFile*)id; 7368 int rc = proxyTakeConch(pFile); 7369 if( rc==SQLITE_OK ){ 7370 proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; 7371 if( pCtx->conchHeld>0 ){ 7372 unixFile *proxy = pCtx->lockProxy; 7373 return proxy->pMethod->xCheckReservedLock((sqlite3_file*)proxy, pResOut); 7374 }else{ /* conchHeld < 0 is lockless */ 7375 pResOut=0; 7376 } 7377 } 7378 return rc; 7379 } 7380 7381 /* 7382 ** Lock the file with the lock specified by parameter eFileLock - one 7383 ** of the following: 7384 ** 7385 ** (1) SHARED_LOCK 7386 ** (2) RESERVED_LOCK 7387 ** (3) PENDING_LOCK 7388 ** (4) EXCLUSIVE_LOCK 7389 ** 7390 ** Sometimes when requesting one lock state, additional lock states 7391 ** are inserted in between. The locking might fail on one of the later 7392 ** transitions leaving the lock state different from what it started but 7393 ** still short of its goal. The following chart shows the allowed 7394 ** transitions and the inserted intermediate states: 7395 ** 7396 ** UNLOCKED -> SHARED 7397 ** SHARED -> RESERVED 7398 ** SHARED -> (PENDING) -> EXCLUSIVE 7399 ** RESERVED -> (PENDING) -> EXCLUSIVE 7400 ** PENDING -> EXCLUSIVE 7401 ** 7402 ** This routine will only increase a lock. Use the sqlite3OsUnlock() 7403 ** routine to lower a locking level. 7404 */ 7405 static int proxyLock(sqlite3_file *id, int eFileLock) { 7406 unixFile *pFile = (unixFile*)id; 7407 int rc = proxyTakeConch(pFile); 7408 if( rc==SQLITE_OK ){ 7409 proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; 7410 if( pCtx->conchHeld>0 ){ 7411 unixFile *proxy = pCtx->lockProxy; 7412 rc = proxy->pMethod->xLock((sqlite3_file*)proxy, eFileLock); 7413 pFile->eFileLock = proxy->eFileLock; 7414 }else{ 7415 /* conchHeld < 0 is lockless */ 7416 } 7417 } 7418 return rc; 7419 } 7420 7421 7422 /* 7423 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock 7424 ** must be either NO_LOCK or SHARED_LOCK. 7425 ** 7426 ** If the locking level of the file descriptor is already at or below 7427 ** the requested locking level, this routine is a no-op. 7428 */ 7429 static int proxyUnlock(sqlite3_file *id, int eFileLock) { 7430 unixFile *pFile = (unixFile*)id; 7431 int rc = proxyTakeConch(pFile); 7432 if( rc==SQLITE_OK ){ 7433 proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; 7434 if( pCtx->conchHeld>0 ){ 7435 unixFile *proxy = pCtx->lockProxy; 7436 rc = proxy->pMethod->xUnlock((sqlite3_file*)proxy, eFileLock); 7437 pFile->eFileLock = proxy->eFileLock; 7438 }else{ 7439 /* conchHeld < 0 is lockless */ 7440 } 7441 } 7442 return rc; 7443 } 7444 7445 /* 7446 ** Close a file that uses proxy locks. 7447 */ 7448 static int proxyClose(sqlite3_file *id) { 7449 if( ALWAYS(id) ){ 7450 unixFile *pFile = (unixFile*)id; 7451 proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; 7452 unixFile *lockProxy = pCtx->lockProxy; 7453 unixFile *conchFile = pCtx->conchFile; 7454 int rc = SQLITE_OK; 7455 7456 if( lockProxy ){ 7457 rc = lockProxy->pMethod->xUnlock((sqlite3_file*)lockProxy, NO_LOCK); 7458 if( rc ) return rc; 7459 rc = lockProxy->pMethod->xClose((sqlite3_file*)lockProxy); 7460 if( rc ) return rc; 7461 sqlite3_free(lockProxy); 7462 pCtx->lockProxy = 0; 7463 } 7464 if( conchFile ){ 7465 if( pCtx->conchHeld ){ 7466 rc = proxyReleaseConch(pFile); 7467 if( rc ) return rc; 7468 } 7469 rc = conchFile->pMethod->xClose((sqlite3_file*)conchFile); 7470 if( rc ) return rc; 7471 sqlite3_free(conchFile); 7472 } 7473 sqlite3DbFree(0, pCtx->lockProxyPath); 7474 sqlite3_free(pCtx->conchFilePath); 7475 sqlite3DbFree(0, pCtx->dbPath); 7476 /* restore the original locking context and pMethod then close it */ 7477 pFile->lockingContext = pCtx->oldLockingContext; 7478 pFile->pMethod = pCtx->pOldMethod; 7479 sqlite3_free(pCtx); 7480 return pFile->pMethod->xClose(id); 7481 } 7482 return SQLITE_OK; 7483 } 7484 7485 7486 7487 #endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ 7488 /* 7489 ** The proxy locking style is intended for use with AFP filesystems. 7490 ** And since AFP is only supported on MacOSX, the proxy locking is also 7491 ** restricted to MacOSX. 7492 ** 7493 ** 7494 ******************* End of the proxy lock implementation ********************** 7495 ******************************************************************************/ 7496 7497 /* 7498 ** Initialize the operating system interface. 7499 ** 7500 ** This routine registers all VFS implementations for unix-like operating 7501 ** systems. This routine, and the sqlite3_os_end() routine that follows, 7502 ** should be the only routines in this file that are visible from other 7503 ** files. 7504 ** 7505 ** This routine is called once during SQLite initialization and by a 7506 ** single thread. The memory allocation and mutex subsystems have not 7507 ** necessarily been initialized when this routine is called, and so they 7508 ** should not be used. 7509 */ 7510 int sqlite3_os_init(void){ 7511 /* 7512 ** The following macro defines an initializer for an sqlite3_vfs object. 7513 ** The name of the VFS is NAME. The pAppData is a pointer to a pointer 7514 ** to the "finder" function. (pAppData is a pointer to a pointer because 7515 ** silly C90 rules prohibit a void* from being cast to a function pointer 7516 ** and so we have to go through the intermediate pointer to avoid problems 7517 ** when compiling with -pedantic-errors on GCC.) 7518 ** 7519 ** The FINDER parameter to this macro is the name of the pointer to the 7520 ** finder-function. The finder-function returns a pointer to the 7521 ** sqlite_io_methods object that implements the desired locking 7522 ** behaviors. See the division above that contains the IOMETHODS 7523 ** macro for addition information on finder-functions. 7524 ** 7525 ** Most finders simply return a pointer to a fixed sqlite3_io_methods 7526 ** object. But the "autolockIoFinder" available on MacOSX does a little 7527 ** more than that; it looks at the filesystem type that hosts the 7528 ** database file and tries to choose an locking method appropriate for 7529 ** that filesystem time. 7530 */ 7531 #define UNIXVFS(VFSNAME, FINDER) { \ 7532 3, /* iVersion */ \ 7533 sizeof(unixFile), /* szOsFile */ \ 7534 MAX_PATHNAME, /* mxPathname */ \ 7535 0, /* pNext */ \ 7536 VFSNAME, /* zName */ \ 7537 (void*)&FINDER, /* pAppData */ \ 7538 unixOpen, /* xOpen */ \ 7539 unixDelete, /* xDelete */ \ 7540 unixAccess, /* xAccess */ \ 7541 unixFullPathname, /* xFullPathname */ \ 7542 unixDlOpen, /* xDlOpen */ \ 7543 unixDlError, /* xDlError */ \ 7544 unixDlSym, /* xDlSym */ \ 7545 unixDlClose, /* xDlClose */ \ 7546 unixRandomness, /* xRandomness */ \ 7547 unixSleep, /* xSleep */ \ 7548 unixCurrentTime, /* xCurrentTime */ \ 7549 unixGetLastError, /* xGetLastError */ \ 7550 unixCurrentTimeInt64, /* xCurrentTimeInt64 */ \ 7551 unixSetSystemCall, /* xSetSystemCall */ \ 7552 unixGetSystemCall, /* xGetSystemCall */ \ 7553 unixNextSystemCall, /* xNextSystemCall */ \ 7554 } 7555 7556 /* 7557 ** All default VFSes for unix are contained in the following array. 7558 ** 7559 ** Note that the sqlite3_vfs.pNext field of the VFS object is modified 7560 ** by the SQLite core when the VFS is registered. So the following 7561 ** array cannot be const. 7562 */ 7563 static sqlite3_vfs aVfs[] = { 7564 #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) 7565 UNIXVFS("unix", autolockIoFinder ), 7566 #elif OS_VXWORKS 7567 UNIXVFS("unix", vxworksIoFinder ), 7568 #else 7569 UNIXVFS("unix", posixIoFinder ), 7570 #endif 7571 UNIXVFS("unix-none", nolockIoFinder ), 7572 UNIXVFS("unix-dotfile", dotlockIoFinder ), 7573 UNIXVFS("unix-excl", posixIoFinder ), 7574 #if OS_VXWORKS 7575 UNIXVFS("unix-namedsem", semIoFinder ), 7576 #endif 7577 #if SQLITE_ENABLE_LOCKING_STYLE || OS_VXWORKS 7578 UNIXVFS("unix-posix", posixIoFinder ), 7579 #endif 7580 #if SQLITE_ENABLE_LOCKING_STYLE 7581 UNIXVFS("unix-flock", flockIoFinder ), 7582 #endif 7583 #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) 7584 UNIXVFS("unix-afp", afpIoFinder ), 7585 UNIXVFS("unix-nfs", nfsIoFinder ), 7586 UNIXVFS("unix-proxy", proxyIoFinder ), 7587 #endif 7588 }; 7589 unsigned int i; /* Loop counter */ 7590 7591 /* Double-check that the aSyscall[] array has been constructed 7592 ** correctly. See ticket [bb3a86e890c8e96ab] */ 7593 assert( ArraySize(aSyscall)==28 ); 7594 7595 /* Register all VFSes defined in the aVfs[] array */ 7596 for(i=0; i<(sizeof(aVfs)/sizeof(sqlite3_vfs)); i++){ 7597 sqlite3_vfs_register(&aVfs[i], i==0); 7598 } 7599 return SQLITE_OK; 7600 } 7601 7602 /* 7603 ** Shutdown the operating system interface. 7604 ** 7605 ** Some operating systems might need to do some cleanup in this routine, 7606 ** to release dynamically allocated objects. But not on unix. 7607 ** This routine is a no-op for unix. 7608 */ 7609 int sqlite3_os_end(void){ 7610 return SQLITE_OK; 7611 } 7612 7613 #endif /* SQLITE_OS_UNIX */ 7614