1 /* 2 ** 2004 May 22 3 ** 4 ** The author disclaims copyright to this source code. In place of 5 ** a legal notice, here is a blessing: 6 ** 7 ** May you do good and not evil. 8 ** May you find forgiveness for yourself and forgive others. 9 ** May you share freely, never taking more than you give. 10 ** 11 ****************************************************************************** 12 ** 13 ** This file contains the VFS implementation for unix-like operating systems 14 ** include Linux, MacOSX, *BSD, QNX, VxWorks, AIX, HPUX, and others. 15 ** 16 ** There are actually several different VFS implementations in this file. 17 ** The differences are in the way that file locking is done. The default 18 ** implementation uses Posix Advisory Locks. Alternative implementations 19 ** use flock(), dot-files, various proprietary locking schemas, or simply 20 ** skip locking all together. 21 ** 22 ** This source file is organized into divisions where the logic for various 23 ** subfunctions is contained within the appropriate division. PLEASE 24 ** KEEP THE STRUCTURE OF THIS FILE INTACT. New code should be placed 25 ** in the correct division and should be clearly labeled. 26 ** 27 ** The layout of divisions is as follows: 28 ** 29 ** * General-purpose declarations and utility functions. 30 ** * Unique file ID logic used by VxWorks. 31 ** * Various locking primitive implementations (all except proxy locking): 32 ** + for Posix Advisory Locks 33 ** + for no-op locks 34 ** + for dot-file locks 35 ** + for flock() locking 36 ** + for named semaphore locks (VxWorks only) 37 ** + for AFP filesystem locks (MacOSX only) 38 ** * sqlite3_file methods not associated with locking. 39 ** * Definitions of sqlite3_io_methods objects for all locking 40 ** methods plus "finder" functions for each locking method. 41 ** * sqlite3_vfs method implementations. 42 ** * Locking primitives for the proxy uber-locking-method. (MacOSX only) 43 ** * Definitions of sqlite3_vfs objects for all locking methods 44 ** plus implementations of sqlite3_os_init() and sqlite3_os_end(). 45 */ 46 #include "sqliteInt.h" 47 #if SQLITE_OS_UNIX /* This file is used on unix only */ 48 49 /* 50 ** There are various methods for file locking used for concurrency 51 ** control: 52 ** 53 ** 1. POSIX locking (the default), 54 ** 2. No locking, 55 ** 3. Dot-file locking, 56 ** 4. flock() locking, 57 ** 5. AFP locking (OSX only), 58 ** 6. Named POSIX semaphores (VXWorks only), 59 ** 7. proxy locking. (OSX only) 60 ** 61 ** Styles 4, 5, and 7 are only available of SQLITE_ENABLE_LOCKING_STYLE 62 ** is defined to 1. The SQLITE_ENABLE_LOCKING_STYLE also enables automatic 63 ** selection of the appropriate locking style based on the filesystem 64 ** where the database is located. 65 */ 66 #if !defined(SQLITE_ENABLE_LOCKING_STYLE) 67 # if defined(__APPLE__) 68 # define SQLITE_ENABLE_LOCKING_STYLE 1 69 # else 70 # define SQLITE_ENABLE_LOCKING_STYLE 0 71 # endif 72 #endif 73 74 /* Use pread() and pwrite() if they are available */ 75 #if defined(__APPLE__) 76 # define HAVE_PREAD 1 77 # define HAVE_PWRITE 1 78 #endif 79 #if defined(HAVE_PREAD64) && defined(HAVE_PWRITE64) 80 # undef USE_PREAD 81 # define USE_PREAD64 1 82 #elif defined(HAVE_PREAD) && defined(HAVE_PWRITE) 83 # undef USE_PREAD64 84 # define USE_PREAD 1 85 #endif 86 87 /* 88 ** standard include files. 89 */ 90 #include <sys/types.h> 91 #include <sys/stat.h> 92 #include <fcntl.h> 93 #include <sys/ioctl.h> 94 #include <unistd.h> 95 #include <time.h> 96 #include <sys/time.h> 97 #include <errno.h> 98 #if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 99 # include <sys/mman.h> 100 #endif 101 102 #if SQLITE_ENABLE_LOCKING_STYLE 103 # include <sys/ioctl.h> 104 # include <sys/file.h> 105 # include <sys/param.h> 106 #endif /* SQLITE_ENABLE_LOCKING_STYLE */ 107 108 /* 109 ** Try to determine if gethostuuid() is available based on standard 110 ** macros. This might sometimes compute the wrong value for some 111 ** obscure platforms. For those cases, simply compile with one of 112 ** the following: 113 ** 114 ** -DHAVE_GETHOSTUUID=0 115 ** -DHAVE_GETHOSTUUID=1 116 ** 117 ** None if this matters except when building on Apple products with 118 ** -DSQLITE_ENABLE_LOCKING_STYLE. 119 */ 120 #ifndef HAVE_GETHOSTUUID 121 # define HAVE_GETHOSTUUID 0 122 # if defined(__APPLE__) && ((__MAC_OS_X_VERSION_MIN_REQUIRED > 1050) || \ 123 (__IPHONE_OS_VERSION_MIN_REQUIRED > 2000)) 124 # if (!defined(TARGET_OS_EMBEDDED) || (TARGET_OS_EMBEDDED==0)) \ 125 && (!defined(TARGET_IPHONE_SIMULATOR) || (TARGET_IPHONE_SIMULATOR==0))\ 126 && (!defined(TARGET_OS_MACCATALYST) || (TARGET_OS_MACCATALYST==0)) 127 # undef HAVE_GETHOSTUUID 128 # define HAVE_GETHOSTUUID 1 129 # else 130 # warning "gethostuuid() is disabled." 131 # endif 132 # endif 133 #endif 134 135 136 #if OS_VXWORKS 137 # include <sys/ioctl.h> 138 # include <semaphore.h> 139 # include <limits.h> 140 #endif /* OS_VXWORKS */ 141 142 #if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE 143 # include <sys/mount.h> 144 #endif 145 146 #ifdef HAVE_UTIME 147 # include <utime.h> 148 #endif 149 150 /* 151 ** Allowed values of unixFile.fsFlags 152 */ 153 #define SQLITE_FSFLAGS_IS_MSDOS 0x1 154 155 /* 156 ** If we are to be thread-safe, include the pthreads header. 157 */ 158 #if SQLITE_THREADSAFE 159 # include <pthread.h> 160 #endif 161 162 /* 163 ** Default permissions when creating a new file 164 */ 165 #ifndef SQLITE_DEFAULT_FILE_PERMISSIONS 166 # define SQLITE_DEFAULT_FILE_PERMISSIONS 0644 167 #endif 168 169 /* 170 ** Default permissions when creating auto proxy dir 171 */ 172 #ifndef SQLITE_DEFAULT_PROXYDIR_PERMISSIONS 173 # define SQLITE_DEFAULT_PROXYDIR_PERMISSIONS 0755 174 #endif 175 176 /* 177 ** Maximum supported path-length. 178 */ 179 #define MAX_PATHNAME 512 180 181 /* 182 ** Maximum supported symbolic links 183 */ 184 #define SQLITE_MAX_SYMLINKS 100 185 186 /* Always cast the getpid() return type for compatibility with 187 ** kernel modules in VxWorks. */ 188 #define osGetpid(X) (pid_t)getpid() 189 190 /* 191 ** Only set the lastErrno if the error code is a real error and not 192 ** a normal expected return code of SQLITE_BUSY or SQLITE_OK 193 */ 194 #define IS_LOCK_ERROR(x) ((x != SQLITE_OK) && (x != SQLITE_BUSY)) 195 196 /* Forward references */ 197 typedef struct unixShm unixShm; /* Connection shared memory */ 198 typedef struct unixShmNode unixShmNode; /* Shared memory instance */ 199 typedef struct unixInodeInfo unixInodeInfo; /* An i-node */ 200 typedef struct UnixUnusedFd UnixUnusedFd; /* An unused file descriptor */ 201 202 /* 203 ** Sometimes, after a file handle is closed by SQLite, the file descriptor 204 ** cannot be closed immediately. In these cases, instances of the following 205 ** structure are used to store the file descriptor while waiting for an 206 ** opportunity to either close or reuse it. 207 */ 208 struct UnixUnusedFd { 209 int fd; /* File descriptor to close */ 210 int flags; /* Flags this file descriptor was opened with */ 211 UnixUnusedFd *pNext; /* Next unused file descriptor on same file */ 212 }; 213 214 /* 215 ** The unixFile structure is subclass of sqlite3_file specific to the unix 216 ** VFS implementations. 217 */ 218 typedef struct unixFile unixFile; 219 struct unixFile { 220 sqlite3_io_methods const *pMethod; /* Always the first entry */ 221 sqlite3_vfs *pVfs; /* The VFS that created this unixFile */ 222 unixInodeInfo *pInode; /* Info about locks on this inode */ 223 int h; /* The file descriptor */ 224 unsigned char eFileLock; /* The type of lock held on this fd */ 225 unsigned short int ctrlFlags; /* Behavioral bits. UNIXFILE_* flags */ 226 int lastErrno; /* The unix errno from last I/O error */ 227 void *lockingContext; /* Locking style specific state */ 228 UnixUnusedFd *pPreallocatedUnused; /* Pre-allocated UnixUnusedFd */ 229 const char *zPath; /* Name of the file */ 230 unixShm *pShm; /* Shared memory segment information */ 231 int szChunk; /* Configured by FCNTL_CHUNK_SIZE */ 232 #if SQLITE_MAX_MMAP_SIZE>0 233 int nFetchOut; /* Number of outstanding xFetch refs */ 234 sqlite3_int64 mmapSize; /* Usable size of mapping at pMapRegion */ 235 sqlite3_int64 mmapSizeActual; /* Actual size of mapping at pMapRegion */ 236 sqlite3_int64 mmapSizeMax; /* Configured FCNTL_MMAP_SIZE value */ 237 void *pMapRegion; /* Memory mapped region */ 238 #endif 239 int sectorSize; /* Device sector size */ 240 int deviceCharacteristics; /* Precomputed device characteristics */ 241 #if SQLITE_ENABLE_LOCKING_STYLE 242 int openFlags; /* The flags specified at open() */ 243 #endif 244 #if SQLITE_ENABLE_LOCKING_STYLE || defined(__APPLE__) 245 unsigned fsFlags; /* cached details from statfs() */ 246 #endif 247 #ifdef SQLITE_ENABLE_SETLK_TIMEOUT 248 unsigned iBusyTimeout; /* Wait this many millisec on locks */ 249 #endif 250 #if OS_VXWORKS 251 struct vxworksFileId *pId; /* Unique file ID */ 252 #endif 253 #ifdef SQLITE_DEBUG 254 /* The next group of variables are used to track whether or not the 255 ** transaction counter in bytes 24-27 of database files are updated 256 ** whenever any part of the database changes. An assertion fault will 257 ** occur if a file is updated without also updating the transaction 258 ** counter. This test is made to avoid new problems similar to the 259 ** one described by ticket #3584. 260 */ 261 unsigned char transCntrChng; /* True if the transaction counter changed */ 262 unsigned char dbUpdate; /* True if any part of database file changed */ 263 unsigned char inNormalWrite; /* True if in a normal write operation */ 264 265 #endif 266 267 #ifdef SQLITE_TEST 268 /* In test mode, increase the size of this structure a bit so that 269 ** it is larger than the struct CrashFile defined in test6.c. 270 */ 271 char aPadding[32]; 272 #endif 273 }; 274 275 /* This variable holds the process id (pid) from when the xRandomness() 276 ** method was called. If xOpen() is called from a different process id, 277 ** indicating that a fork() has occurred, the PRNG will be reset. 278 */ 279 static pid_t randomnessPid = 0; 280 281 /* 282 ** Allowed values for the unixFile.ctrlFlags bitmask: 283 */ 284 #define UNIXFILE_EXCL 0x01 /* Connections from one process only */ 285 #define UNIXFILE_RDONLY 0x02 /* Connection is read only */ 286 #define UNIXFILE_PERSIST_WAL 0x04 /* Persistent WAL mode */ 287 #ifndef SQLITE_DISABLE_DIRSYNC 288 # define UNIXFILE_DIRSYNC 0x08 /* Directory sync needed */ 289 #else 290 # define UNIXFILE_DIRSYNC 0x00 291 #endif 292 #define UNIXFILE_PSOW 0x10 /* SQLITE_IOCAP_POWERSAFE_OVERWRITE */ 293 #define UNIXFILE_DELETE 0x20 /* Delete on close */ 294 #define UNIXFILE_URI 0x40 /* Filename might have query parameters */ 295 #define UNIXFILE_NOLOCK 0x80 /* Do no file locking */ 296 297 /* 298 ** Include code that is common to all os_*.c files 299 */ 300 #include "os_common.h" 301 302 /* 303 ** Define various macros that are missing from some systems. 304 */ 305 #ifndef O_LARGEFILE 306 # define O_LARGEFILE 0 307 #endif 308 #ifdef SQLITE_DISABLE_LFS 309 # undef O_LARGEFILE 310 # define O_LARGEFILE 0 311 #endif 312 #ifndef O_NOFOLLOW 313 # define O_NOFOLLOW 0 314 #endif 315 #ifndef O_BINARY 316 # define O_BINARY 0 317 #endif 318 319 /* 320 ** The threadid macro resolves to the thread-id or to 0. Used for 321 ** testing and debugging only. 322 */ 323 #if SQLITE_THREADSAFE 324 #define threadid pthread_self() 325 #else 326 #define threadid 0 327 #endif 328 329 /* 330 ** HAVE_MREMAP defaults to true on Linux and false everywhere else. 331 */ 332 #if !defined(HAVE_MREMAP) 333 # if defined(__linux__) && defined(_GNU_SOURCE) 334 # define HAVE_MREMAP 1 335 # else 336 # define HAVE_MREMAP 0 337 # endif 338 #endif 339 340 /* 341 ** Explicitly call the 64-bit version of lseek() on Android. Otherwise, lseek() 342 ** is the 32-bit version, even if _FILE_OFFSET_BITS=64 is defined. 343 */ 344 #ifdef __ANDROID__ 345 # define lseek lseek64 346 #endif 347 348 #ifdef __linux__ 349 /* 350 ** Linux-specific IOCTL magic numbers used for controlling F2FS 351 */ 352 #define F2FS_IOCTL_MAGIC 0xf5 353 #define F2FS_IOC_START_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 1) 354 #define F2FS_IOC_COMMIT_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 2) 355 #define F2FS_IOC_START_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 3) 356 #define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5) 357 #define F2FS_IOC_GET_FEATURES _IOR(F2FS_IOCTL_MAGIC, 12, u32) 358 #define F2FS_FEATURE_ATOMIC_WRITE 0x0004 359 #endif /* __linux__ */ 360 361 362 /* 363 ** Different Unix systems declare open() in different ways. Same use 364 ** open(const char*,int,mode_t). Others use open(const char*,int,...). 365 ** The difference is important when using a pointer to the function. 366 ** 367 ** The safest way to deal with the problem is to always use this wrapper 368 ** which always has the same well-defined interface. 369 */ 370 static int posixOpen(const char *zFile, int flags, int mode){ 371 return open(zFile, flags, mode); 372 } 373 374 /* Forward reference */ 375 static int openDirectory(const char*, int*); 376 static int unixGetpagesize(void); 377 378 /* 379 ** Many system calls are accessed through pointer-to-functions so that 380 ** they may be overridden at runtime to facilitate fault injection during 381 ** testing and sandboxing. The following array holds the names and pointers 382 ** to all overrideable system calls. 383 */ 384 static struct unix_syscall { 385 const char *zName; /* Name of the system call */ 386 sqlite3_syscall_ptr pCurrent; /* Current value of the system call */ 387 sqlite3_syscall_ptr pDefault; /* Default value */ 388 } aSyscall[] = { 389 { "open", (sqlite3_syscall_ptr)posixOpen, 0 }, 390 #define osOpen ((int(*)(const char*,int,int))aSyscall[0].pCurrent) 391 392 { "close", (sqlite3_syscall_ptr)close, 0 }, 393 #define osClose ((int(*)(int))aSyscall[1].pCurrent) 394 395 { "access", (sqlite3_syscall_ptr)access, 0 }, 396 #define osAccess ((int(*)(const char*,int))aSyscall[2].pCurrent) 397 398 { "getcwd", (sqlite3_syscall_ptr)getcwd, 0 }, 399 #define osGetcwd ((char*(*)(char*,size_t))aSyscall[3].pCurrent) 400 401 { "stat", (sqlite3_syscall_ptr)stat, 0 }, 402 #define osStat ((int(*)(const char*,struct stat*))aSyscall[4].pCurrent) 403 404 /* 405 ** The DJGPP compiler environment looks mostly like Unix, but it 406 ** lacks the fcntl() system call. So redefine fcntl() to be something 407 ** that always succeeds. This means that locking does not occur under 408 ** DJGPP. But it is DOS - what did you expect? 409 */ 410 #ifdef __DJGPP__ 411 { "fstat", 0, 0 }, 412 #define osFstat(a,b,c) 0 413 #else 414 { "fstat", (sqlite3_syscall_ptr)fstat, 0 }, 415 #define osFstat ((int(*)(int,struct stat*))aSyscall[5].pCurrent) 416 #endif 417 418 { "ftruncate", (sqlite3_syscall_ptr)ftruncate, 0 }, 419 #define osFtruncate ((int(*)(int,off_t))aSyscall[6].pCurrent) 420 421 { "fcntl", (sqlite3_syscall_ptr)fcntl, 0 }, 422 #define osFcntl ((int(*)(int,int,...))aSyscall[7].pCurrent) 423 424 { "read", (sqlite3_syscall_ptr)read, 0 }, 425 #define osRead ((ssize_t(*)(int,void*,size_t))aSyscall[8].pCurrent) 426 427 #if defined(USE_PREAD) || SQLITE_ENABLE_LOCKING_STYLE 428 { "pread", (sqlite3_syscall_ptr)pread, 0 }, 429 #else 430 { "pread", (sqlite3_syscall_ptr)0, 0 }, 431 #endif 432 #define osPread ((ssize_t(*)(int,void*,size_t,off_t))aSyscall[9].pCurrent) 433 434 #if defined(USE_PREAD64) 435 { "pread64", (sqlite3_syscall_ptr)pread64, 0 }, 436 #else 437 { "pread64", (sqlite3_syscall_ptr)0, 0 }, 438 #endif 439 #define osPread64 ((ssize_t(*)(int,void*,size_t,off64_t))aSyscall[10].pCurrent) 440 441 { "write", (sqlite3_syscall_ptr)write, 0 }, 442 #define osWrite ((ssize_t(*)(int,const void*,size_t))aSyscall[11].pCurrent) 443 444 #if defined(USE_PREAD) || SQLITE_ENABLE_LOCKING_STYLE 445 { "pwrite", (sqlite3_syscall_ptr)pwrite, 0 }, 446 #else 447 { "pwrite", (sqlite3_syscall_ptr)0, 0 }, 448 #endif 449 #define osPwrite ((ssize_t(*)(int,const void*,size_t,off_t))\ 450 aSyscall[12].pCurrent) 451 452 #if defined(USE_PREAD64) 453 { "pwrite64", (sqlite3_syscall_ptr)pwrite64, 0 }, 454 #else 455 { "pwrite64", (sqlite3_syscall_ptr)0, 0 }, 456 #endif 457 #define osPwrite64 ((ssize_t(*)(int,const void*,size_t,off64_t))\ 458 aSyscall[13].pCurrent) 459 460 { "fchmod", (sqlite3_syscall_ptr)fchmod, 0 }, 461 #define osFchmod ((int(*)(int,mode_t))aSyscall[14].pCurrent) 462 463 #if defined(HAVE_POSIX_FALLOCATE) && HAVE_POSIX_FALLOCATE 464 { "fallocate", (sqlite3_syscall_ptr)posix_fallocate, 0 }, 465 #else 466 { "fallocate", (sqlite3_syscall_ptr)0, 0 }, 467 #endif 468 #define osFallocate ((int(*)(int,off_t,off_t))aSyscall[15].pCurrent) 469 470 { "unlink", (sqlite3_syscall_ptr)unlink, 0 }, 471 #define osUnlink ((int(*)(const char*))aSyscall[16].pCurrent) 472 473 { "openDirectory", (sqlite3_syscall_ptr)openDirectory, 0 }, 474 #define osOpenDirectory ((int(*)(const char*,int*))aSyscall[17].pCurrent) 475 476 { "mkdir", (sqlite3_syscall_ptr)mkdir, 0 }, 477 #define osMkdir ((int(*)(const char*,mode_t))aSyscall[18].pCurrent) 478 479 { "rmdir", (sqlite3_syscall_ptr)rmdir, 0 }, 480 #define osRmdir ((int(*)(const char*))aSyscall[19].pCurrent) 481 482 #if defined(HAVE_FCHOWN) 483 { "fchown", (sqlite3_syscall_ptr)fchown, 0 }, 484 #else 485 { "fchown", (sqlite3_syscall_ptr)0, 0 }, 486 #endif 487 #define osFchown ((int(*)(int,uid_t,gid_t))aSyscall[20].pCurrent) 488 489 #if defined(HAVE_FCHOWN) 490 { "geteuid", (sqlite3_syscall_ptr)geteuid, 0 }, 491 #else 492 { "geteuid", (sqlite3_syscall_ptr)0, 0 }, 493 #endif 494 #define osGeteuid ((uid_t(*)(void))aSyscall[21].pCurrent) 495 496 #if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 497 { "mmap", (sqlite3_syscall_ptr)mmap, 0 }, 498 #else 499 { "mmap", (sqlite3_syscall_ptr)0, 0 }, 500 #endif 501 #define osMmap ((void*(*)(void*,size_t,int,int,int,off_t))aSyscall[22].pCurrent) 502 503 #if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 504 { "munmap", (sqlite3_syscall_ptr)munmap, 0 }, 505 #else 506 { "munmap", (sqlite3_syscall_ptr)0, 0 }, 507 #endif 508 #define osMunmap ((int(*)(void*,size_t))aSyscall[23].pCurrent) 509 510 #if HAVE_MREMAP && (!defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0) 511 { "mremap", (sqlite3_syscall_ptr)mremap, 0 }, 512 #else 513 { "mremap", (sqlite3_syscall_ptr)0, 0 }, 514 #endif 515 #define osMremap ((void*(*)(void*,size_t,size_t,int,...))aSyscall[24].pCurrent) 516 517 #if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 518 { "getpagesize", (sqlite3_syscall_ptr)unixGetpagesize, 0 }, 519 #else 520 { "getpagesize", (sqlite3_syscall_ptr)0, 0 }, 521 #endif 522 #define osGetpagesize ((int(*)(void))aSyscall[25].pCurrent) 523 524 #if defined(HAVE_READLINK) 525 { "readlink", (sqlite3_syscall_ptr)readlink, 0 }, 526 #else 527 { "readlink", (sqlite3_syscall_ptr)0, 0 }, 528 #endif 529 #define osReadlink ((ssize_t(*)(const char*,char*,size_t))aSyscall[26].pCurrent) 530 531 #if defined(HAVE_LSTAT) 532 { "lstat", (sqlite3_syscall_ptr)lstat, 0 }, 533 #else 534 { "lstat", (sqlite3_syscall_ptr)0, 0 }, 535 #endif 536 #define osLstat ((int(*)(const char*,struct stat*))aSyscall[27].pCurrent) 537 538 #if defined(__linux__) && defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE) 539 # ifdef __ANDROID__ 540 { "ioctl", (sqlite3_syscall_ptr)(int(*)(int, int, ...))ioctl, 0 }, 541 #define osIoctl ((int(*)(int,int,...))aSyscall[28].pCurrent) 542 # else 543 { "ioctl", (sqlite3_syscall_ptr)ioctl, 0 }, 544 #define osIoctl ((int(*)(int,unsigned long,...))aSyscall[28].pCurrent) 545 # endif 546 #else 547 { "ioctl", (sqlite3_syscall_ptr)0, 0 }, 548 #endif 549 550 }; /* End of the overrideable system calls */ 551 552 553 /* 554 ** On some systems, calls to fchown() will trigger a message in a security 555 ** log if they come from non-root processes. So avoid calling fchown() if 556 ** we are not running as root. 557 */ 558 static int robustFchown(int fd, uid_t uid, gid_t gid){ 559 #if defined(HAVE_FCHOWN) 560 return osGeteuid() ? 0 : osFchown(fd,uid,gid); 561 #else 562 return 0; 563 #endif 564 } 565 566 /* 567 ** This is the xSetSystemCall() method of sqlite3_vfs for all of the 568 ** "unix" VFSes. Return SQLITE_OK opon successfully updating the 569 ** system call pointer, or SQLITE_NOTFOUND if there is no configurable 570 ** system call named zName. 571 */ 572 static int unixSetSystemCall( 573 sqlite3_vfs *pNotUsed, /* The VFS pointer. Not used */ 574 const char *zName, /* Name of system call to override */ 575 sqlite3_syscall_ptr pNewFunc /* Pointer to new system call value */ 576 ){ 577 unsigned int i; 578 int rc = SQLITE_NOTFOUND; 579 580 UNUSED_PARAMETER(pNotUsed); 581 if( zName==0 ){ 582 /* If no zName is given, restore all system calls to their default 583 ** settings and return NULL 584 */ 585 rc = SQLITE_OK; 586 for(i=0; i<sizeof(aSyscall)/sizeof(aSyscall[0]); i++){ 587 if( aSyscall[i].pDefault ){ 588 aSyscall[i].pCurrent = aSyscall[i].pDefault; 589 } 590 } 591 }else{ 592 /* If zName is specified, operate on only the one system call 593 ** specified. 594 */ 595 for(i=0; i<sizeof(aSyscall)/sizeof(aSyscall[0]); i++){ 596 if( strcmp(zName, aSyscall[i].zName)==0 ){ 597 if( aSyscall[i].pDefault==0 ){ 598 aSyscall[i].pDefault = aSyscall[i].pCurrent; 599 } 600 rc = SQLITE_OK; 601 if( pNewFunc==0 ) pNewFunc = aSyscall[i].pDefault; 602 aSyscall[i].pCurrent = pNewFunc; 603 break; 604 } 605 } 606 } 607 return rc; 608 } 609 610 /* 611 ** Return the value of a system call. Return NULL if zName is not a 612 ** recognized system call name. NULL is also returned if the system call 613 ** is currently undefined. 614 */ 615 static sqlite3_syscall_ptr unixGetSystemCall( 616 sqlite3_vfs *pNotUsed, 617 const char *zName 618 ){ 619 unsigned int i; 620 621 UNUSED_PARAMETER(pNotUsed); 622 for(i=0; i<sizeof(aSyscall)/sizeof(aSyscall[0]); i++){ 623 if( strcmp(zName, aSyscall[i].zName)==0 ) return aSyscall[i].pCurrent; 624 } 625 return 0; 626 } 627 628 /* 629 ** Return the name of the first system call after zName. If zName==NULL 630 ** then return the name of the first system call. Return NULL if zName 631 ** is the last system call or if zName is not the name of a valid 632 ** system call. 633 */ 634 static const char *unixNextSystemCall(sqlite3_vfs *p, const char *zName){ 635 int i = -1; 636 637 UNUSED_PARAMETER(p); 638 if( zName ){ 639 for(i=0; i<ArraySize(aSyscall)-1; i++){ 640 if( strcmp(zName, aSyscall[i].zName)==0 ) break; 641 } 642 } 643 for(i++; i<ArraySize(aSyscall); i++){ 644 if( aSyscall[i].pCurrent!=0 ) return aSyscall[i].zName; 645 } 646 return 0; 647 } 648 649 /* 650 ** Do not accept any file descriptor less than this value, in order to avoid 651 ** opening database file using file descriptors that are commonly used for 652 ** standard input, output, and error. 653 */ 654 #ifndef SQLITE_MINIMUM_FILE_DESCRIPTOR 655 # define SQLITE_MINIMUM_FILE_DESCRIPTOR 3 656 #endif 657 658 /* 659 ** Invoke open(). Do so multiple times, until it either succeeds or 660 ** fails for some reason other than EINTR. 661 ** 662 ** If the file creation mode "m" is 0 then set it to the default for 663 ** SQLite. The default is SQLITE_DEFAULT_FILE_PERMISSIONS (normally 664 ** 0644) as modified by the system umask. If m is not 0, then 665 ** make the file creation mode be exactly m ignoring the umask. 666 ** 667 ** The m parameter will be non-zero only when creating -wal, -journal, 668 ** and -shm files. We want those files to have *exactly* the same 669 ** permissions as their original database, unadulterated by the umask. 670 ** In that way, if a database file is -rw-rw-rw or -rw-rw-r-, and a 671 ** transaction crashes and leaves behind hot journals, then any 672 ** process that is able to write to the database will also be able to 673 ** recover the hot journals. 674 */ 675 static int robust_open(const char *z, int f, mode_t m){ 676 int fd; 677 mode_t m2 = m ? m : SQLITE_DEFAULT_FILE_PERMISSIONS; 678 while(1){ 679 #if defined(O_CLOEXEC) 680 fd = osOpen(z,f|O_CLOEXEC,m2); 681 #else 682 fd = osOpen(z,f,m2); 683 #endif 684 if( fd<0 ){ 685 if( errno==EINTR ) continue; 686 break; 687 } 688 if( fd>=SQLITE_MINIMUM_FILE_DESCRIPTOR ) break; 689 osClose(fd); 690 sqlite3_log(SQLITE_WARNING, 691 "attempt to open \"%s\" as file descriptor %d", z, fd); 692 fd = -1; 693 if( osOpen("/dev/null", O_RDONLY, m)<0 ) break; 694 } 695 if( fd>=0 ){ 696 if( m!=0 ){ 697 struct stat statbuf; 698 if( osFstat(fd, &statbuf)==0 699 && statbuf.st_size==0 700 && (statbuf.st_mode&0777)!=m 701 ){ 702 osFchmod(fd, m); 703 } 704 } 705 #if defined(FD_CLOEXEC) && (!defined(O_CLOEXEC) || O_CLOEXEC==0) 706 osFcntl(fd, F_SETFD, osFcntl(fd, F_GETFD, 0) | FD_CLOEXEC); 707 #endif 708 } 709 return fd; 710 } 711 712 /* 713 ** Helper functions to obtain and relinquish the global mutex. The 714 ** global mutex is used to protect the unixInodeInfo and 715 ** vxworksFileId objects used by this file, all of which may be 716 ** shared by multiple threads. 717 ** 718 ** Function unixMutexHeld() is used to assert() that the global mutex 719 ** is held when required. This function is only used as part of assert() 720 ** statements. e.g. 721 ** 722 ** unixEnterMutex() 723 ** assert( unixMutexHeld() ); 724 ** unixEnterLeave() 725 ** 726 ** To prevent deadlock, the global unixBigLock must must be acquired 727 ** before the unixInodeInfo.pLockMutex mutex, if both are held. It is 728 ** OK to get the pLockMutex without holding unixBigLock first, but if 729 ** that happens, the unixBigLock mutex must not be acquired until after 730 ** pLockMutex is released. 731 ** 732 ** OK: enter(unixBigLock), enter(pLockInfo) 733 ** OK: enter(unixBigLock) 734 ** OK: enter(pLockInfo) 735 ** ERROR: enter(pLockInfo), enter(unixBigLock) 736 */ 737 static sqlite3_mutex *unixBigLock = 0; 738 static void unixEnterMutex(void){ 739 assert( sqlite3_mutex_notheld(unixBigLock) ); /* Not a recursive mutex */ 740 sqlite3_mutex_enter(unixBigLock); 741 } 742 static void unixLeaveMutex(void){ 743 assert( sqlite3_mutex_held(unixBigLock) ); 744 sqlite3_mutex_leave(unixBigLock); 745 } 746 #ifdef SQLITE_DEBUG 747 static int unixMutexHeld(void) { 748 return sqlite3_mutex_held(unixBigLock); 749 } 750 #endif 751 752 753 #ifdef SQLITE_HAVE_OS_TRACE 754 /* 755 ** Helper function for printing out trace information from debugging 756 ** binaries. This returns the string representation of the supplied 757 ** integer lock-type. 758 */ 759 static const char *azFileLock(int eFileLock){ 760 switch( eFileLock ){ 761 case NO_LOCK: return "NONE"; 762 case SHARED_LOCK: return "SHARED"; 763 case RESERVED_LOCK: return "RESERVED"; 764 case PENDING_LOCK: return "PENDING"; 765 case EXCLUSIVE_LOCK: return "EXCLUSIVE"; 766 } 767 return "ERROR"; 768 } 769 #endif 770 771 #ifdef SQLITE_LOCK_TRACE 772 /* 773 ** Print out information about all locking operations. 774 ** 775 ** This routine is used for troubleshooting locks on multithreaded 776 ** platforms. Enable by compiling with the -DSQLITE_LOCK_TRACE 777 ** command-line option on the compiler. This code is normally 778 ** turned off. 779 */ 780 static int lockTrace(int fd, int op, struct flock *p){ 781 char *zOpName, *zType; 782 int s; 783 int savedErrno; 784 if( op==F_GETLK ){ 785 zOpName = "GETLK"; 786 }else if( op==F_SETLK ){ 787 zOpName = "SETLK"; 788 }else{ 789 s = osFcntl(fd, op, p); 790 sqlite3DebugPrintf("fcntl unknown %d %d %d\n", fd, op, s); 791 return s; 792 } 793 if( p->l_type==F_RDLCK ){ 794 zType = "RDLCK"; 795 }else if( p->l_type==F_WRLCK ){ 796 zType = "WRLCK"; 797 }else if( p->l_type==F_UNLCK ){ 798 zType = "UNLCK"; 799 }else{ 800 assert( 0 ); 801 } 802 assert( p->l_whence==SEEK_SET ); 803 s = osFcntl(fd, op, p); 804 savedErrno = errno; 805 sqlite3DebugPrintf("fcntl %d %d %s %s %d %d %d %d\n", 806 threadid, fd, zOpName, zType, (int)p->l_start, (int)p->l_len, 807 (int)p->l_pid, s); 808 if( s==(-1) && op==F_SETLK && (p->l_type==F_RDLCK || p->l_type==F_WRLCK) ){ 809 struct flock l2; 810 l2 = *p; 811 osFcntl(fd, F_GETLK, &l2); 812 if( l2.l_type==F_RDLCK ){ 813 zType = "RDLCK"; 814 }else if( l2.l_type==F_WRLCK ){ 815 zType = "WRLCK"; 816 }else if( l2.l_type==F_UNLCK ){ 817 zType = "UNLCK"; 818 }else{ 819 assert( 0 ); 820 } 821 sqlite3DebugPrintf("fcntl-failure-reason: %s %d %d %d\n", 822 zType, (int)l2.l_start, (int)l2.l_len, (int)l2.l_pid); 823 } 824 errno = savedErrno; 825 return s; 826 } 827 #undef osFcntl 828 #define osFcntl lockTrace 829 #endif /* SQLITE_LOCK_TRACE */ 830 831 /* 832 ** Retry ftruncate() calls that fail due to EINTR 833 ** 834 ** All calls to ftruncate() within this file should be made through 835 ** this wrapper. On the Android platform, bypassing the logic below 836 ** could lead to a corrupt database. 837 */ 838 static int robust_ftruncate(int h, sqlite3_int64 sz){ 839 int rc; 840 #ifdef __ANDROID__ 841 /* On Android, ftruncate() always uses 32-bit offsets, even if 842 ** _FILE_OFFSET_BITS=64 is defined. This means it is unsafe to attempt to 843 ** truncate a file to any size larger than 2GiB. Silently ignore any 844 ** such attempts. */ 845 if( sz>(sqlite3_int64)0x7FFFFFFF ){ 846 rc = SQLITE_OK; 847 }else 848 #endif 849 do{ rc = osFtruncate(h,sz); }while( rc<0 && errno==EINTR ); 850 return rc; 851 } 852 853 /* 854 ** This routine translates a standard POSIX errno code into something 855 ** useful to the clients of the sqlite3 functions. Specifically, it is 856 ** intended to translate a variety of "try again" errors into SQLITE_BUSY 857 ** and a variety of "please close the file descriptor NOW" errors into 858 ** SQLITE_IOERR 859 ** 860 ** Errors during initialization of locks, or file system support for locks, 861 ** should handle ENOLCK, ENOTSUP, EOPNOTSUPP separately. 862 */ 863 static int sqliteErrorFromPosixError(int posixError, int sqliteIOErr) { 864 assert( (sqliteIOErr == SQLITE_IOERR_LOCK) || 865 (sqliteIOErr == SQLITE_IOERR_UNLOCK) || 866 (sqliteIOErr == SQLITE_IOERR_RDLOCK) || 867 (sqliteIOErr == SQLITE_IOERR_CHECKRESERVEDLOCK) ); 868 switch (posixError) { 869 case EACCES: 870 case EAGAIN: 871 case ETIMEDOUT: 872 case EBUSY: 873 case EINTR: 874 case ENOLCK: 875 /* random NFS retry error, unless during file system support 876 * introspection, in which it actually means what it says */ 877 return SQLITE_BUSY; 878 879 case EPERM: 880 return SQLITE_PERM; 881 882 default: 883 return sqliteIOErr; 884 } 885 } 886 887 888 /****************************************************************************** 889 ****************** Begin Unique File ID Utility Used By VxWorks *************** 890 ** 891 ** On most versions of unix, we can get a unique ID for a file by concatenating 892 ** the device number and the inode number. But this does not work on VxWorks. 893 ** On VxWorks, a unique file id must be based on the canonical filename. 894 ** 895 ** A pointer to an instance of the following structure can be used as a 896 ** unique file ID in VxWorks. Each instance of this structure contains 897 ** a copy of the canonical filename. There is also a reference count. 898 ** The structure is reclaimed when the number of pointers to it drops to 899 ** zero. 900 ** 901 ** There are never very many files open at one time and lookups are not 902 ** a performance-critical path, so it is sufficient to put these 903 ** structures on a linked list. 904 */ 905 struct vxworksFileId { 906 struct vxworksFileId *pNext; /* Next in a list of them all */ 907 int nRef; /* Number of references to this one */ 908 int nName; /* Length of the zCanonicalName[] string */ 909 char *zCanonicalName; /* Canonical filename */ 910 }; 911 912 #if OS_VXWORKS 913 /* 914 ** All unique filenames are held on a linked list headed by this 915 ** variable: 916 */ 917 static struct vxworksFileId *vxworksFileList = 0; 918 919 /* 920 ** Simplify a filename into its canonical form 921 ** by making the following changes: 922 ** 923 ** * removing any trailing and duplicate / 924 ** * convert /./ into just / 925 ** * convert /A/../ where A is any simple name into just / 926 ** 927 ** Changes are made in-place. Return the new name length. 928 ** 929 ** The original filename is in z[0..n-1]. Return the number of 930 ** characters in the simplified name. 931 */ 932 static int vxworksSimplifyName(char *z, int n){ 933 int i, j; 934 while( n>1 && z[n-1]=='/' ){ n--; } 935 for(i=j=0; i<n; i++){ 936 if( z[i]=='/' ){ 937 if( z[i+1]=='/' ) continue; 938 if( z[i+1]=='.' && i+2<n && z[i+2]=='/' ){ 939 i += 1; 940 continue; 941 } 942 if( z[i+1]=='.' && i+3<n && z[i+2]=='.' && z[i+3]=='/' ){ 943 while( j>0 && z[j-1]!='/' ){ j--; } 944 if( j>0 ){ j--; } 945 i += 2; 946 continue; 947 } 948 } 949 z[j++] = z[i]; 950 } 951 z[j] = 0; 952 return j; 953 } 954 955 /* 956 ** Find a unique file ID for the given absolute pathname. Return 957 ** a pointer to the vxworksFileId object. This pointer is the unique 958 ** file ID. 959 ** 960 ** The nRef field of the vxworksFileId object is incremented before 961 ** the object is returned. A new vxworksFileId object is created 962 ** and added to the global list if necessary. 963 ** 964 ** If a memory allocation error occurs, return NULL. 965 */ 966 static struct vxworksFileId *vxworksFindFileId(const char *zAbsoluteName){ 967 struct vxworksFileId *pNew; /* search key and new file ID */ 968 struct vxworksFileId *pCandidate; /* For looping over existing file IDs */ 969 int n; /* Length of zAbsoluteName string */ 970 971 assert( zAbsoluteName[0]=='/' ); 972 n = (int)strlen(zAbsoluteName); 973 pNew = sqlite3_malloc64( sizeof(*pNew) + (n+1) ); 974 if( pNew==0 ) return 0; 975 pNew->zCanonicalName = (char*)&pNew[1]; 976 memcpy(pNew->zCanonicalName, zAbsoluteName, n+1); 977 n = vxworksSimplifyName(pNew->zCanonicalName, n); 978 979 /* Search for an existing entry that matching the canonical name. 980 ** If found, increment the reference count and return a pointer to 981 ** the existing file ID. 982 */ 983 unixEnterMutex(); 984 for(pCandidate=vxworksFileList; pCandidate; pCandidate=pCandidate->pNext){ 985 if( pCandidate->nName==n 986 && memcmp(pCandidate->zCanonicalName, pNew->zCanonicalName, n)==0 987 ){ 988 sqlite3_free(pNew); 989 pCandidate->nRef++; 990 unixLeaveMutex(); 991 return pCandidate; 992 } 993 } 994 995 /* No match was found. We will make a new file ID */ 996 pNew->nRef = 1; 997 pNew->nName = n; 998 pNew->pNext = vxworksFileList; 999 vxworksFileList = pNew; 1000 unixLeaveMutex(); 1001 return pNew; 1002 } 1003 1004 /* 1005 ** Decrement the reference count on a vxworksFileId object. Free 1006 ** the object when the reference count reaches zero. 1007 */ 1008 static void vxworksReleaseFileId(struct vxworksFileId *pId){ 1009 unixEnterMutex(); 1010 assert( pId->nRef>0 ); 1011 pId->nRef--; 1012 if( pId->nRef==0 ){ 1013 struct vxworksFileId **pp; 1014 for(pp=&vxworksFileList; *pp && *pp!=pId; pp = &((*pp)->pNext)){} 1015 assert( *pp==pId ); 1016 *pp = pId->pNext; 1017 sqlite3_free(pId); 1018 } 1019 unixLeaveMutex(); 1020 } 1021 #endif /* OS_VXWORKS */ 1022 /*************** End of Unique File ID Utility Used By VxWorks **************** 1023 ******************************************************************************/ 1024 1025 1026 /****************************************************************************** 1027 *************************** Posix Advisory Locking **************************** 1028 ** 1029 ** POSIX advisory locks are broken by design. ANSI STD 1003.1 (1996) 1030 ** section 6.5.2.2 lines 483 through 490 specify that when a process 1031 ** sets or clears a lock, that operation overrides any prior locks set 1032 ** by the same process. It does not explicitly say so, but this implies 1033 ** that it overrides locks set by the same process using a different 1034 ** file descriptor. Consider this test case: 1035 ** 1036 ** int fd1 = open("./file1", O_RDWR|O_CREAT, 0644); 1037 ** int fd2 = open("./file2", O_RDWR|O_CREAT, 0644); 1038 ** 1039 ** Suppose ./file1 and ./file2 are really the same file (because 1040 ** one is a hard or symbolic link to the other) then if you set 1041 ** an exclusive lock on fd1, then try to get an exclusive lock 1042 ** on fd2, it works. I would have expected the second lock to 1043 ** fail since there was already a lock on the file due to fd1. 1044 ** But not so. Since both locks came from the same process, the 1045 ** second overrides the first, even though they were on different 1046 ** file descriptors opened on different file names. 1047 ** 1048 ** This means that we cannot use POSIX locks to synchronize file access 1049 ** among competing threads of the same process. POSIX locks will work fine 1050 ** to synchronize access for threads in separate processes, but not 1051 ** threads within the same process. 1052 ** 1053 ** To work around the problem, SQLite has to manage file locks internally 1054 ** on its own. Whenever a new database is opened, we have to find the 1055 ** specific inode of the database file (the inode is determined by the 1056 ** st_dev and st_ino fields of the stat structure that fstat() fills in) 1057 ** and check for locks already existing on that inode. When locks are 1058 ** created or removed, we have to look at our own internal record of the 1059 ** locks to see if another thread has previously set a lock on that same 1060 ** inode. 1061 ** 1062 ** (Aside: The use of inode numbers as unique IDs does not work on VxWorks. 1063 ** For VxWorks, we have to use the alternative unique ID system based on 1064 ** canonical filename and implemented in the previous division.) 1065 ** 1066 ** The sqlite3_file structure for POSIX is no longer just an integer file 1067 ** descriptor. It is now a structure that holds the integer file 1068 ** descriptor and a pointer to a structure that describes the internal 1069 ** locks on the corresponding inode. There is one locking structure 1070 ** per inode, so if the same inode is opened twice, both unixFile structures 1071 ** point to the same locking structure. The locking structure keeps 1072 ** a reference count (so we will know when to delete it) and a "cnt" 1073 ** field that tells us its internal lock status. cnt==0 means the 1074 ** file is unlocked. cnt==-1 means the file has an exclusive lock. 1075 ** cnt>0 means there are cnt shared locks on the file. 1076 ** 1077 ** Any attempt to lock or unlock a file first checks the locking 1078 ** structure. The fcntl() system call is only invoked to set a 1079 ** POSIX lock if the internal lock structure transitions between 1080 ** a locked and an unlocked state. 1081 ** 1082 ** But wait: there are yet more problems with POSIX advisory locks. 1083 ** 1084 ** If you close a file descriptor that points to a file that has locks, 1085 ** all locks on that file that are owned by the current process are 1086 ** released. To work around this problem, each unixInodeInfo object 1087 ** maintains a count of the number of pending locks on tha inode. 1088 ** When an attempt is made to close an unixFile, if there are 1089 ** other unixFile open on the same inode that are holding locks, the call 1090 ** to close() the file descriptor is deferred until all of the locks clear. 1091 ** The unixInodeInfo structure keeps a list of file descriptors that need to 1092 ** be closed and that list is walked (and cleared) when the last lock 1093 ** clears. 1094 ** 1095 ** Yet another problem: LinuxThreads do not play well with posix locks. 1096 ** 1097 ** Many older versions of linux use the LinuxThreads library which is 1098 ** not posix compliant. Under LinuxThreads, a lock created by thread 1099 ** A cannot be modified or overridden by a different thread B. 1100 ** Only thread A can modify the lock. Locking behavior is correct 1101 ** if the appliation uses the newer Native Posix Thread Library (NPTL) 1102 ** on linux - with NPTL a lock created by thread A can override locks 1103 ** in thread B. But there is no way to know at compile-time which 1104 ** threading library is being used. So there is no way to know at 1105 ** compile-time whether or not thread A can override locks on thread B. 1106 ** One has to do a run-time check to discover the behavior of the 1107 ** current process. 1108 ** 1109 ** SQLite used to support LinuxThreads. But support for LinuxThreads 1110 ** was dropped beginning with version 3.7.0. SQLite will still work with 1111 ** LinuxThreads provided that (1) there is no more than one connection 1112 ** per database file in the same process and (2) database connections 1113 ** do not move across threads. 1114 */ 1115 1116 /* 1117 ** An instance of the following structure serves as the key used 1118 ** to locate a particular unixInodeInfo object. 1119 */ 1120 struct unixFileId { 1121 dev_t dev; /* Device number */ 1122 #if OS_VXWORKS 1123 struct vxworksFileId *pId; /* Unique file ID for vxworks. */ 1124 #else 1125 /* We are told that some versions of Android contain a bug that 1126 ** sizes ino_t at only 32-bits instead of 64-bits. (See 1127 ** https://android-review.googlesource.com/#/c/115351/3/dist/sqlite3.c) 1128 ** To work around this, always allocate 64-bits for the inode number. 1129 ** On small machines that only have 32-bit inodes, this wastes 4 bytes, 1130 ** but that should not be a big deal. */ 1131 /* WAS: ino_t ino; */ 1132 u64 ino; /* Inode number */ 1133 #endif 1134 }; 1135 1136 /* 1137 ** An instance of the following structure is allocated for each open 1138 ** inode. 1139 ** 1140 ** A single inode can have multiple file descriptors, so each unixFile 1141 ** structure contains a pointer to an instance of this object and this 1142 ** object keeps a count of the number of unixFile pointing to it. 1143 ** 1144 ** Mutex rules: 1145 ** 1146 ** (1) Only the pLockMutex mutex must be held in order to read or write 1147 ** any of the locking fields: 1148 ** nShared, nLock, eFileLock, bProcessLock, pUnused 1149 ** 1150 ** (2) When nRef>0, then the following fields are unchanging and can 1151 ** be read (but not written) without holding any mutex: 1152 ** fileId, pLockMutex 1153 ** 1154 ** (3) With the exceptions above, all the fields may only be read 1155 ** or written while holding the global unixBigLock mutex. 1156 ** 1157 ** Deadlock prevention: The global unixBigLock mutex may not 1158 ** be acquired while holding the pLockMutex mutex. If both unixBigLock 1159 ** and pLockMutex are needed, then unixBigLock must be acquired first. 1160 */ 1161 struct unixInodeInfo { 1162 struct unixFileId fileId; /* The lookup key */ 1163 sqlite3_mutex *pLockMutex; /* Hold this mutex for... */ 1164 int nShared; /* Number of SHARED locks held */ 1165 int nLock; /* Number of outstanding file locks */ 1166 unsigned char eFileLock; /* One of SHARED_LOCK, RESERVED_LOCK etc. */ 1167 unsigned char bProcessLock; /* An exclusive process lock is held */ 1168 UnixUnusedFd *pUnused; /* Unused file descriptors to close */ 1169 int nRef; /* Number of pointers to this structure */ 1170 unixShmNode *pShmNode; /* Shared memory associated with this inode */ 1171 unixInodeInfo *pNext; /* List of all unixInodeInfo objects */ 1172 unixInodeInfo *pPrev; /* .... doubly linked */ 1173 #if SQLITE_ENABLE_LOCKING_STYLE 1174 unsigned long long sharedByte; /* for AFP simulated shared lock */ 1175 #endif 1176 #if OS_VXWORKS 1177 sem_t *pSem; /* Named POSIX semaphore */ 1178 char aSemName[MAX_PATHNAME+2]; /* Name of that semaphore */ 1179 #endif 1180 }; 1181 1182 /* 1183 ** A lists of all unixInodeInfo objects. 1184 ** 1185 ** Must hold unixBigLock in order to read or write this variable. 1186 */ 1187 static unixInodeInfo *inodeList = 0; /* All unixInodeInfo objects */ 1188 1189 #ifdef SQLITE_DEBUG 1190 /* 1191 ** True if the inode mutex (on the unixFile.pFileMutex field) is held, or not. 1192 ** This routine is used only within assert() to help verify correct mutex 1193 ** usage. 1194 */ 1195 int unixFileMutexHeld(unixFile *pFile){ 1196 assert( pFile->pInode ); 1197 return sqlite3_mutex_held(pFile->pInode->pLockMutex); 1198 } 1199 int unixFileMutexNotheld(unixFile *pFile){ 1200 assert( pFile->pInode ); 1201 return sqlite3_mutex_notheld(pFile->pInode->pLockMutex); 1202 } 1203 #endif 1204 1205 /* 1206 ** 1207 ** This function - unixLogErrorAtLine(), is only ever called via the macro 1208 ** unixLogError(). 1209 ** 1210 ** It is invoked after an error occurs in an OS function and errno has been 1211 ** set. It logs a message using sqlite3_log() containing the current value of 1212 ** errno and, if possible, the human-readable equivalent from strerror() or 1213 ** strerror_r(). 1214 ** 1215 ** The first argument passed to the macro should be the error code that 1216 ** will be returned to SQLite (e.g. SQLITE_IOERR_DELETE, SQLITE_CANTOPEN). 1217 ** The two subsequent arguments should be the name of the OS function that 1218 ** failed (e.g. "unlink", "open") and the associated file-system path, 1219 ** if any. 1220 */ 1221 #define unixLogError(a,b,c) unixLogErrorAtLine(a,b,c,__LINE__) 1222 static int unixLogErrorAtLine( 1223 int errcode, /* SQLite error code */ 1224 const char *zFunc, /* Name of OS function that failed */ 1225 const char *zPath, /* File path associated with error */ 1226 int iLine /* Source line number where error occurred */ 1227 ){ 1228 char *zErr; /* Message from strerror() or equivalent */ 1229 int iErrno = errno; /* Saved syscall error number */ 1230 1231 /* If this is not a threadsafe build (SQLITE_THREADSAFE==0), then use 1232 ** the strerror() function to obtain the human-readable error message 1233 ** equivalent to errno. Otherwise, use strerror_r(). 1234 */ 1235 #if SQLITE_THREADSAFE && defined(HAVE_STRERROR_R) 1236 char aErr[80]; 1237 memset(aErr, 0, sizeof(aErr)); 1238 zErr = aErr; 1239 1240 /* If STRERROR_R_CHAR_P (set by autoconf scripts) or __USE_GNU is defined, 1241 ** assume that the system provides the GNU version of strerror_r() that 1242 ** returns a pointer to a buffer containing the error message. That pointer 1243 ** may point to aErr[], or it may point to some static storage somewhere. 1244 ** Otherwise, assume that the system provides the POSIX version of 1245 ** strerror_r(), which always writes an error message into aErr[]. 1246 ** 1247 ** If the code incorrectly assumes that it is the POSIX version that is 1248 ** available, the error message will often be an empty string. Not a 1249 ** huge problem. Incorrectly concluding that the GNU version is available 1250 ** could lead to a segfault though. 1251 */ 1252 #if defined(STRERROR_R_CHAR_P) || defined(__USE_GNU) 1253 zErr = 1254 # endif 1255 strerror_r(iErrno, aErr, sizeof(aErr)-1); 1256 1257 #elif SQLITE_THREADSAFE 1258 /* This is a threadsafe build, but strerror_r() is not available. */ 1259 zErr = ""; 1260 #else 1261 /* Non-threadsafe build, use strerror(). */ 1262 zErr = strerror(iErrno); 1263 #endif 1264 1265 if( zPath==0 ) zPath = ""; 1266 sqlite3_log(errcode, 1267 "os_unix.c:%d: (%d) %s(%s) - %s", 1268 iLine, iErrno, zFunc, zPath, zErr 1269 ); 1270 1271 return errcode; 1272 } 1273 1274 /* 1275 ** Close a file descriptor. 1276 ** 1277 ** We assume that close() almost always works, since it is only in a 1278 ** very sick application or on a very sick platform that it might fail. 1279 ** If it does fail, simply leak the file descriptor, but do log the 1280 ** error. 1281 ** 1282 ** Note that it is not safe to retry close() after EINTR since the 1283 ** file descriptor might have already been reused by another thread. 1284 ** So we don't even try to recover from an EINTR. Just log the error 1285 ** and move on. 1286 */ 1287 static void robust_close(unixFile *pFile, int h, int lineno){ 1288 if( osClose(h) ){ 1289 unixLogErrorAtLine(SQLITE_IOERR_CLOSE, "close", 1290 pFile ? pFile->zPath : 0, lineno); 1291 } 1292 } 1293 1294 /* 1295 ** Set the pFile->lastErrno. Do this in a subroutine as that provides 1296 ** a convenient place to set a breakpoint. 1297 */ 1298 static void storeLastErrno(unixFile *pFile, int error){ 1299 pFile->lastErrno = error; 1300 } 1301 1302 /* 1303 ** Close all file descriptors accumuated in the unixInodeInfo->pUnused list. 1304 */ 1305 static void closePendingFds(unixFile *pFile){ 1306 unixInodeInfo *pInode = pFile->pInode; 1307 UnixUnusedFd *p; 1308 UnixUnusedFd *pNext; 1309 assert( unixFileMutexHeld(pFile) ); 1310 for(p=pInode->pUnused; p; p=pNext){ 1311 pNext = p->pNext; 1312 robust_close(pFile, p->fd, __LINE__); 1313 sqlite3_free(p); 1314 } 1315 pInode->pUnused = 0; 1316 } 1317 1318 /* 1319 ** Release a unixInodeInfo structure previously allocated by findInodeInfo(). 1320 ** 1321 ** The global mutex must be held when this routine is called, but the mutex 1322 ** on the inode being deleted must NOT be held. 1323 */ 1324 static void releaseInodeInfo(unixFile *pFile){ 1325 unixInodeInfo *pInode = pFile->pInode; 1326 assert( unixMutexHeld() ); 1327 assert( unixFileMutexNotheld(pFile) ); 1328 if( ALWAYS(pInode) ){ 1329 pInode->nRef--; 1330 if( pInode->nRef==0 ){ 1331 assert( pInode->pShmNode==0 ); 1332 sqlite3_mutex_enter(pInode->pLockMutex); 1333 closePendingFds(pFile); 1334 sqlite3_mutex_leave(pInode->pLockMutex); 1335 if( pInode->pPrev ){ 1336 assert( pInode->pPrev->pNext==pInode ); 1337 pInode->pPrev->pNext = pInode->pNext; 1338 }else{ 1339 assert( inodeList==pInode ); 1340 inodeList = pInode->pNext; 1341 } 1342 if( pInode->pNext ){ 1343 assert( pInode->pNext->pPrev==pInode ); 1344 pInode->pNext->pPrev = pInode->pPrev; 1345 } 1346 sqlite3_mutex_free(pInode->pLockMutex); 1347 sqlite3_free(pInode); 1348 } 1349 } 1350 } 1351 1352 /* 1353 ** Given a file descriptor, locate the unixInodeInfo object that 1354 ** describes that file descriptor. Create a new one if necessary. The 1355 ** return value might be uninitialized if an error occurs. 1356 ** 1357 ** The global mutex must held when calling this routine. 1358 ** 1359 ** Return an appropriate error code. 1360 */ 1361 static int findInodeInfo( 1362 unixFile *pFile, /* Unix file with file desc used in the key */ 1363 unixInodeInfo **ppInode /* Return the unixInodeInfo object here */ 1364 ){ 1365 int rc; /* System call return code */ 1366 int fd; /* The file descriptor for pFile */ 1367 struct unixFileId fileId; /* Lookup key for the unixInodeInfo */ 1368 struct stat statbuf; /* Low-level file information */ 1369 unixInodeInfo *pInode = 0; /* Candidate unixInodeInfo object */ 1370 1371 assert( unixMutexHeld() ); 1372 1373 /* Get low-level information about the file that we can used to 1374 ** create a unique name for the file. 1375 */ 1376 fd = pFile->h; 1377 rc = osFstat(fd, &statbuf); 1378 if( rc!=0 ){ 1379 storeLastErrno(pFile, errno); 1380 #if defined(EOVERFLOW) && defined(SQLITE_DISABLE_LFS) 1381 if( pFile->lastErrno==EOVERFLOW ) return SQLITE_NOLFS; 1382 #endif 1383 return SQLITE_IOERR; 1384 } 1385 1386 #ifdef __APPLE__ 1387 /* On OS X on an msdos filesystem, the inode number is reported 1388 ** incorrectly for zero-size files. See ticket #3260. To work 1389 ** around this problem (we consider it a bug in OS X, not SQLite) 1390 ** we always increase the file size to 1 by writing a single byte 1391 ** prior to accessing the inode number. The one byte written is 1392 ** an ASCII 'S' character which also happens to be the first byte 1393 ** in the header of every SQLite database. In this way, if there 1394 ** is a race condition such that another thread has already populated 1395 ** the first page of the database, no damage is done. 1396 */ 1397 if( statbuf.st_size==0 && (pFile->fsFlags & SQLITE_FSFLAGS_IS_MSDOS)!=0 ){ 1398 do{ rc = osWrite(fd, "S", 1); }while( rc<0 && errno==EINTR ); 1399 if( rc!=1 ){ 1400 storeLastErrno(pFile, errno); 1401 return SQLITE_IOERR; 1402 } 1403 rc = osFstat(fd, &statbuf); 1404 if( rc!=0 ){ 1405 storeLastErrno(pFile, errno); 1406 return SQLITE_IOERR; 1407 } 1408 } 1409 #endif 1410 1411 memset(&fileId, 0, sizeof(fileId)); 1412 fileId.dev = statbuf.st_dev; 1413 #if OS_VXWORKS 1414 fileId.pId = pFile->pId; 1415 #else 1416 fileId.ino = (u64)statbuf.st_ino; 1417 #endif 1418 assert( unixMutexHeld() ); 1419 pInode = inodeList; 1420 while( pInode && memcmp(&fileId, &pInode->fileId, sizeof(fileId)) ){ 1421 pInode = pInode->pNext; 1422 } 1423 if( pInode==0 ){ 1424 pInode = sqlite3_malloc64( sizeof(*pInode) ); 1425 if( pInode==0 ){ 1426 return SQLITE_NOMEM_BKPT; 1427 } 1428 memset(pInode, 0, sizeof(*pInode)); 1429 memcpy(&pInode->fileId, &fileId, sizeof(fileId)); 1430 if( sqlite3GlobalConfig.bCoreMutex ){ 1431 pInode->pLockMutex = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST); 1432 if( pInode->pLockMutex==0 ){ 1433 sqlite3_free(pInode); 1434 return SQLITE_NOMEM_BKPT; 1435 } 1436 } 1437 pInode->nRef = 1; 1438 assert( unixMutexHeld() ); 1439 pInode->pNext = inodeList; 1440 pInode->pPrev = 0; 1441 if( inodeList ) inodeList->pPrev = pInode; 1442 inodeList = pInode; 1443 }else{ 1444 pInode->nRef++; 1445 } 1446 *ppInode = pInode; 1447 return SQLITE_OK; 1448 } 1449 1450 /* 1451 ** Return TRUE if pFile has been renamed or unlinked since it was first opened. 1452 */ 1453 static int fileHasMoved(unixFile *pFile){ 1454 #if OS_VXWORKS 1455 return pFile->pInode!=0 && pFile->pId!=pFile->pInode->fileId.pId; 1456 #else 1457 struct stat buf; 1458 return pFile->pInode!=0 && 1459 (osStat(pFile->zPath, &buf)!=0 1460 || (u64)buf.st_ino!=pFile->pInode->fileId.ino); 1461 #endif 1462 } 1463 1464 1465 /* 1466 ** Check a unixFile that is a database. Verify the following: 1467 ** 1468 ** (1) There is exactly one hard link on the file 1469 ** (2) The file is not a symbolic link 1470 ** (3) The file has not been renamed or unlinked 1471 ** 1472 ** Issue sqlite3_log(SQLITE_WARNING,...) messages if anything is not right. 1473 */ 1474 static void verifyDbFile(unixFile *pFile){ 1475 struct stat buf; 1476 int rc; 1477 1478 /* These verifications occurs for the main database only */ 1479 if( pFile->ctrlFlags & UNIXFILE_NOLOCK ) return; 1480 1481 rc = osFstat(pFile->h, &buf); 1482 if( rc!=0 ){ 1483 sqlite3_log(SQLITE_WARNING, "cannot fstat db file %s", pFile->zPath); 1484 return; 1485 } 1486 if( buf.st_nlink==0 ){ 1487 sqlite3_log(SQLITE_WARNING, "file unlinked while open: %s", pFile->zPath); 1488 return; 1489 } 1490 if( buf.st_nlink>1 ){ 1491 sqlite3_log(SQLITE_WARNING, "multiple links to file: %s", pFile->zPath); 1492 return; 1493 } 1494 if( fileHasMoved(pFile) ){ 1495 sqlite3_log(SQLITE_WARNING, "file renamed while open: %s", pFile->zPath); 1496 return; 1497 } 1498 } 1499 1500 1501 /* 1502 ** This routine checks if there is a RESERVED lock held on the specified 1503 ** file by this or any other process. If such a lock is held, set *pResOut 1504 ** to a non-zero value otherwise *pResOut is set to zero. The return value 1505 ** is set to SQLITE_OK unless an I/O error occurs during lock checking. 1506 */ 1507 static int unixCheckReservedLock(sqlite3_file *id, int *pResOut){ 1508 int rc = SQLITE_OK; 1509 int reserved = 0; 1510 unixFile *pFile = (unixFile*)id; 1511 1512 SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); 1513 1514 assert( pFile ); 1515 assert( pFile->eFileLock<=SHARED_LOCK ); 1516 sqlite3_mutex_enter(pFile->pInode->pLockMutex); 1517 1518 /* Check if a thread in this process holds such a lock */ 1519 if( pFile->pInode->eFileLock>SHARED_LOCK ){ 1520 reserved = 1; 1521 } 1522 1523 /* Otherwise see if some other process holds it. 1524 */ 1525 #ifndef __DJGPP__ 1526 if( !reserved && !pFile->pInode->bProcessLock ){ 1527 struct flock lock; 1528 lock.l_whence = SEEK_SET; 1529 lock.l_start = RESERVED_BYTE; 1530 lock.l_len = 1; 1531 lock.l_type = F_WRLCK; 1532 if( osFcntl(pFile->h, F_GETLK, &lock) ){ 1533 rc = SQLITE_IOERR_CHECKRESERVEDLOCK; 1534 storeLastErrno(pFile, errno); 1535 } else if( lock.l_type!=F_UNLCK ){ 1536 reserved = 1; 1537 } 1538 } 1539 #endif 1540 1541 sqlite3_mutex_leave(pFile->pInode->pLockMutex); 1542 OSTRACE(("TEST WR-LOCK %d %d %d (unix)\n", pFile->h, rc, reserved)); 1543 1544 *pResOut = reserved; 1545 return rc; 1546 } 1547 1548 /* Forward declaration*/ 1549 static int unixSleep(sqlite3_vfs*,int); 1550 1551 /* 1552 ** Set a posix-advisory-lock. 1553 ** 1554 ** There are two versions of this routine. If compiled with 1555 ** SQLITE_ENABLE_SETLK_TIMEOUT then the routine has an extra parameter 1556 ** which is a pointer to a unixFile. If the unixFile->iBusyTimeout 1557 ** value is set, then it is the number of milliseconds to wait before 1558 ** failing the lock. The iBusyTimeout value is always reset back to 1559 ** zero on each call. 1560 ** 1561 ** If SQLITE_ENABLE_SETLK_TIMEOUT is not defined, then do a non-blocking 1562 ** attempt to set the lock. 1563 */ 1564 #ifndef SQLITE_ENABLE_SETLK_TIMEOUT 1565 # define osSetPosixAdvisoryLock(h,x,t) osFcntl(h,F_SETLK,x) 1566 #else 1567 static int osSetPosixAdvisoryLock( 1568 int h, /* The file descriptor on which to take the lock */ 1569 struct flock *pLock, /* The description of the lock */ 1570 unixFile *pFile /* Structure holding timeout value */ 1571 ){ 1572 int tm = pFile->iBusyTimeout; 1573 int rc = osFcntl(h,F_SETLK,pLock); 1574 while( rc<0 && tm>0 ){ 1575 /* On systems that support some kind of blocking file lock with a timeout, 1576 ** make appropriate changes here to invoke that blocking file lock. On 1577 ** generic posix, however, there is no such API. So we simply try the 1578 ** lock once every millisecond until either the timeout expires, or until 1579 ** the lock is obtained. */ 1580 unixSleep(0,1000); 1581 rc = osFcntl(h,F_SETLK,pLock); 1582 tm--; 1583 } 1584 return rc; 1585 } 1586 #endif /* SQLITE_ENABLE_SETLK_TIMEOUT */ 1587 1588 1589 /* 1590 ** Attempt to set a system-lock on the file pFile. The lock is 1591 ** described by pLock. 1592 ** 1593 ** If the pFile was opened read/write from unix-excl, then the only lock 1594 ** ever obtained is an exclusive lock, and it is obtained exactly once 1595 ** the first time any lock is attempted. All subsequent system locking 1596 ** operations become no-ops. Locking operations still happen internally, 1597 ** in order to coordinate access between separate database connections 1598 ** within this process, but all of that is handled in memory and the 1599 ** operating system does not participate. 1600 ** 1601 ** This function is a pass-through to fcntl(F_SETLK) if pFile is using 1602 ** any VFS other than "unix-excl" or if pFile is opened on "unix-excl" 1603 ** and is read-only. 1604 ** 1605 ** Zero is returned if the call completes successfully, or -1 if a call 1606 ** to fcntl() fails. In this case, errno is set appropriately (by fcntl()). 1607 */ 1608 static int unixFileLock(unixFile *pFile, struct flock *pLock){ 1609 int rc; 1610 unixInodeInfo *pInode = pFile->pInode; 1611 assert( pInode!=0 ); 1612 assert( sqlite3_mutex_held(pInode->pLockMutex) ); 1613 if( (pFile->ctrlFlags & (UNIXFILE_EXCL|UNIXFILE_RDONLY))==UNIXFILE_EXCL ){ 1614 if( pInode->bProcessLock==0 ){ 1615 struct flock lock; 1616 assert( pInode->nLock==0 ); 1617 lock.l_whence = SEEK_SET; 1618 lock.l_start = SHARED_FIRST; 1619 lock.l_len = SHARED_SIZE; 1620 lock.l_type = F_WRLCK; 1621 rc = osSetPosixAdvisoryLock(pFile->h, &lock, pFile); 1622 if( rc<0 ) return rc; 1623 pInode->bProcessLock = 1; 1624 pInode->nLock++; 1625 }else{ 1626 rc = 0; 1627 } 1628 }else{ 1629 rc = osSetPosixAdvisoryLock(pFile->h, pLock, pFile); 1630 } 1631 return rc; 1632 } 1633 1634 /* 1635 ** Lock the file with the lock specified by parameter eFileLock - one 1636 ** of the following: 1637 ** 1638 ** (1) SHARED_LOCK 1639 ** (2) RESERVED_LOCK 1640 ** (3) PENDING_LOCK 1641 ** (4) EXCLUSIVE_LOCK 1642 ** 1643 ** Sometimes when requesting one lock state, additional lock states 1644 ** are inserted in between. The locking might fail on one of the later 1645 ** transitions leaving the lock state different from what it started but 1646 ** still short of its goal. The following chart shows the allowed 1647 ** transitions and the inserted intermediate states: 1648 ** 1649 ** UNLOCKED -> SHARED 1650 ** SHARED -> RESERVED 1651 ** SHARED -> (PENDING) -> EXCLUSIVE 1652 ** RESERVED -> (PENDING) -> EXCLUSIVE 1653 ** PENDING -> EXCLUSIVE 1654 ** 1655 ** This routine will only increase a lock. Use the sqlite3OsUnlock() 1656 ** routine to lower a locking level. 1657 */ 1658 static int unixLock(sqlite3_file *id, int eFileLock){ 1659 /* The following describes the implementation of the various locks and 1660 ** lock transitions in terms of the POSIX advisory shared and exclusive 1661 ** lock primitives (called read-locks and write-locks below, to avoid 1662 ** confusion with SQLite lock names). The algorithms are complicated 1663 ** slightly in order to be compatible with Windows95 systems simultaneously 1664 ** accessing the same database file, in case that is ever required. 1665 ** 1666 ** Symbols defined in os.h indentify the 'pending byte' and the 'reserved 1667 ** byte', each single bytes at well known offsets, and the 'shared byte 1668 ** range', a range of 510 bytes at a well known offset. 1669 ** 1670 ** To obtain a SHARED lock, a read-lock is obtained on the 'pending 1671 ** byte'. If this is successful, 'shared byte range' is read-locked 1672 ** and the lock on the 'pending byte' released. (Legacy note: When 1673 ** SQLite was first developed, Windows95 systems were still very common, 1674 ** and Widnows95 lacks a shared-lock capability. So on Windows95, a 1675 ** single randomly selected by from the 'shared byte range' is locked. 1676 ** Windows95 is now pretty much extinct, but this work-around for the 1677 ** lack of shared-locks on Windows95 lives on, for backwards 1678 ** compatibility.) 1679 ** 1680 ** A process may only obtain a RESERVED lock after it has a SHARED lock. 1681 ** A RESERVED lock is implemented by grabbing a write-lock on the 1682 ** 'reserved byte'. 1683 ** 1684 ** A process may only obtain a PENDING lock after it has obtained a 1685 ** SHARED lock. A PENDING lock is implemented by obtaining a write-lock 1686 ** on the 'pending byte'. This ensures that no new SHARED locks can be 1687 ** obtained, but existing SHARED locks are allowed to persist. A process 1688 ** does not have to obtain a RESERVED lock on the way to a PENDING lock. 1689 ** This property is used by the algorithm for rolling back a journal file 1690 ** after a crash. 1691 ** 1692 ** An EXCLUSIVE lock, obtained after a PENDING lock is held, is 1693 ** implemented by obtaining a write-lock on the entire 'shared byte 1694 ** range'. Since all other locks require a read-lock on one of the bytes 1695 ** within this range, this ensures that no other locks are held on the 1696 ** database. 1697 */ 1698 int rc = SQLITE_OK; 1699 unixFile *pFile = (unixFile*)id; 1700 unixInodeInfo *pInode; 1701 struct flock lock; 1702 int tErrno = 0; 1703 1704 assert( pFile ); 1705 OSTRACE(("LOCK %d %s was %s(%s,%d) pid=%d (unix)\n", pFile->h, 1706 azFileLock(eFileLock), azFileLock(pFile->eFileLock), 1707 azFileLock(pFile->pInode->eFileLock), pFile->pInode->nShared, 1708 osGetpid(0))); 1709 1710 /* If there is already a lock of this type or more restrictive on the 1711 ** unixFile, do nothing. Don't use the end_lock: exit path, as 1712 ** unixEnterMutex() hasn't been called yet. 1713 */ 1714 if( pFile->eFileLock>=eFileLock ){ 1715 OSTRACE(("LOCK %d %s ok (already held) (unix)\n", pFile->h, 1716 azFileLock(eFileLock))); 1717 return SQLITE_OK; 1718 } 1719 1720 /* Make sure the locking sequence is correct. 1721 ** (1) We never move from unlocked to anything higher than shared lock. 1722 ** (2) SQLite never explicitly requests a pendig lock. 1723 ** (3) A shared lock is always held when a reserve lock is requested. 1724 */ 1725 assert( pFile->eFileLock!=NO_LOCK || eFileLock==SHARED_LOCK ); 1726 assert( eFileLock!=PENDING_LOCK ); 1727 assert( eFileLock!=RESERVED_LOCK || pFile->eFileLock==SHARED_LOCK ); 1728 1729 /* This mutex is needed because pFile->pInode is shared across threads 1730 */ 1731 pInode = pFile->pInode; 1732 sqlite3_mutex_enter(pInode->pLockMutex); 1733 1734 /* If some thread using this PID has a lock via a different unixFile* 1735 ** handle that precludes the requested lock, return BUSY. 1736 */ 1737 if( (pFile->eFileLock!=pInode->eFileLock && 1738 (pInode->eFileLock>=PENDING_LOCK || eFileLock>SHARED_LOCK)) 1739 ){ 1740 rc = SQLITE_BUSY; 1741 goto end_lock; 1742 } 1743 1744 /* If a SHARED lock is requested, and some thread using this PID already 1745 ** has a SHARED or RESERVED lock, then increment reference counts and 1746 ** return SQLITE_OK. 1747 */ 1748 if( eFileLock==SHARED_LOCK && 1749 (pInode->eFileLock==SHARED_LOCK || pInode->eFileLock==RESERVED_LOCK) ){ 1750 assert( eFileLock==SHARED_LOCK ); 1751 assert( pFile->eFileLock==0 ); 1752 assert( pInode->nShared>0 ); 1753 pFile->eFileLock = SHARED_LOCK; 1754 pInode->nShared++; 1755 pInode->nLock++; 1756 goto end_lock; 1757 } 1758 1759 1760 /* A PENDING lock is needed before acquiring a SHARED lock and before 1761 ** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will 1762 ** be released. 1763 */ 1764 lock.l_len = 1L; 1765 lock.l_whence = SEEK_SET; 1766 if( eFileLock==SHARED_LOCK 1767 || (eFileLock==EXCLUSIVE_LOCK && pFile->eFileLock<PENDING_LOCK) 1768 ){ 1769 lock.l_type = (eFileLock==SHARED_LOCK?F_RDLCK:F_WRLCK); 1770 lock.l_start = PENDING_BYTE; 1771 if( unixFileLock(pFile, &lock) ){ 1772 tErrno = errno; 1773 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); 1774 if( rc!=SQLITE_BUSY ){ 1775 storeLastErrno(pFile, tErrno); 1776 } 1777 goto end_lock; 1778 } 1779 } 1780 1781 1782 /* If control gets to this point, then actually go ahead and make 1783 ** operating system calls for the specified lock. 1784 */ 1785 if( eFileLock==SHARED_LOCK ){ 1786 assert( pInode->nShared==0 ); 1787 assert( pInode->eFileLock==0 ); 1788 assert( rc==SQLITE_OK ); 1789 1790 /* Now get the read-lock */ 1791 lock.l_start = SHARED_FIRST; 1792 lock.l_len = SHARED_SIZE; 1793 if( unixFileLock(pFile, &lock) ){ 1794 tErrno = errno; 1795 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); 1796 } 1797 1798 /* Drop the temporary PENDING lock */ 1799 lock.l_start = PENDING_BYTE; 1800 lock.l_len = 1L; 1801 lock.l_type = F_UNLCK; 1802 if( unixFileLock(pFile, &lock) && rc==SQLITE_OK ){ 1803 /* This could happen with a network mount */ 1804 tErrno = errno; 1805 rc = SQLITE_IOERR_UNLOCK; 1806 } 1807 1808 if( rc ){ 1809 if( rc!=SQLITE_BUSY ){ 1810 storeLastErrno(pFile, tErrno); 1811 } 1812 goto end_lock; 1813 }else{ 1814 pFile->eFileLock = SHARED_LOCK; 1815 pInode->nLock++; 1816 pInode->nShared = 1; 1817 } 1818 }else if( eFileLock==EXCLUSIVE_LOCK && pInode->nShared>1 ){ 1819 /* We are trying for an exclusive lock but another thread in this 1820 ** same process is still holding a shared lock. */ 1821 rc = SQLITE_BUSY; 1822 }else{ 1823 /* The request was for a RESERVED or EXCLUSIVE lock. It is 1824 ** assumed that there is a SHARED or greater lock on the file 1825 ** already. 1826 */ 1827 assert( 0!=pFile->eFileLock ); 1828 lock.l_type = F_WRLCK; 1829 1830 assert( eFileLock==RESERVED_LOCK || eFileLock==EXCLUSIVE_LOCK ); 1831 if( eFileLock==RESERVED_LOCK ){ 1832 lock.l_start = RESERVED_BYTE; 1833 lock.l_len = 1L; 1834 }else{ 1835 lock.l_start = SHARED_FIRST; 1836 lock.l_len = SHARED_SIZE; 1837 } 1838 1839 if( unixFileLock(pFile, &lock) ){ 1840 tErrno = errno; 1841 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); 1842 if( rc!=SQLITE_BUSY ){ 1843 storeLastErrno(pFile, tErrno); 1844 } 1845 } 1846 } 1847 1848 1849 #ifdef SQLITE_DEBUG 1850 /* Set up the transaction-counter change checking flags when 1851 ** transitioning from a SHARED to a RESERVED lock. The change 1852 ** from SHARED to RESERVED marks the beginning of a normal 1853 ** write operation (not a hot journal rollback). 1854 */ 1855 if( rc==SQLITE_OK 1856 && pFile->eFileLock<=SHARED_LOCK 1857 && eFileLock==RESERVED_LOCK 1858 ){ 1859 pFile->transCntrChng = 0; 1860 pFile->dbUpdate = 0; 1861 pFile->inNormalWrite = 1; 1862 } 1863 #endif 1864 1865 1866 if( rc==SQLITE_OK ){ 1867 pFile->eFileLock = eFileLock; 1868 pInode->eFileLock = eFileLock; 1869 }else if( eFileLock==EXCLUSIVE_LOCK ){ 1870 pFile->eFileLock = PENDING_LOCK; 1871 pInode->eFileLock = PENDING_LOCK; 1872 } 1873 1874 end_lock: 1875 sqlite3_mutex_leave(pInode->pLockMutex); 1876 OSTRACE(("LOCK %d %s %s (unix)\n", pFile->h, azFileLock(eFileLock), 1877 rc==SQLITE_OK ? "ok" : "failed")); 1878 return rc; 1879 } 1880 1881 /* 1882 ** Add the file descriptor used by file handle pFile to the corresponding 1883 ** pUnused list. 1884 */ 1885 static void setPendingFd(unixFile *pFile){ 1886 unixInodeInfo *pInode = pFile->pInode; 1887 UnixUnusedFd *p = pFile->pPreallocatedUnused; 1888 assert( unixFileMutexHeld(pFile) ); 1889 p->pNext = pInode->pUnused; 1890 pInode->pUnused = p; 1891 pFile->h = -1; 1892 pFile->pPreallocatedUnused = 0; 1893 } 1894 1895 /* 1896 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock 1897 ** must be either NO_LOCK or SHARED_LOCK. 1898 ** 1899 ** If the locking level of the file descriptor is already at or below 1900 ** the requested locking level, this routine is a no-op. 1901 ** 1902 ** If handleNFSUnlock is true, then on downgrading an EXCLUSIVE_LOCK to SHARED 1903 ** the byte range is divided into 2 parts and the first part is unlocked then 1904 ** set to a read lock, then the other part is simply unlocked. This works 1905 ** around a bug in BSD NFS lockd (also seen on MacOSX 10.3+) that fails to 1906 ** remove the write lock on a region when a read lock is set. 1907 */ 1908 static int posixUnlock(sqlite3_file *id, int eFileLock, int handleNFSUnlock){ 1909 unixFile *pFile = (unixFile*)id; 1910 unixInodeInfo *pInode; 1911 struct flock lock; 1912 int rc = SQLITE_OK; 1913 1914 assert( pFile ); 1915 OSTRACE(("UNLOCK %d %d was %d(%d,%d) pid=%d (unix)\n", pFile->h, eFileLock, 1916 pFile->eFileLock, pFile->pInode->eFileLock, pFile->pInode->nShared, 1917 osGetpid(0))); 1918 1919 assert( eFileLock<=SHARED_LOCK ); 1920 if( pFile->eFileLock<=eFileLock ){ 1921 return SQLITE_OK; 1922 } 1923 pInode = pFile->pInode; 1924 sqlite3_mutex_enter(pInode->pLockMutex); 1925 assert( pInode->nShared!=0 ); 1926 if( pFile->eFileLock>SHARED_LOCK ){ 1927 assert( pInode->eFileLock==pFile->eFileLock ); 1928 1929 #ifdef SQLITE_DEBUG 1930 /* When reducing a lock such that other processes can start 1931 ** reading the database file again, make sure that the 1932 ** transaction counter was updated if any part of the database 1933 ** file changed. If the transaction counter is not updated, 1934 ** other connections to the same file might not realize that 1935 ** the file has changed and hence might not know to flush their 1936 ** cache. The use of a stale cache can lead to database corruption. 1937 */ 1938 pFile->inNormalWrite = 0; 1939 #endif 1940 1941 /* downgrading to a shared lock on NFS involves clearing the write lock 1942 ** before establishing the readlock - to avoid a race condition we downgrade 1943 ** the lock in 2 blocks, so that part of the range will be covered by a 1944 ** write lock until the rest is covered by a read lock: 1945 ** 1: [WWWWW] 1946 ** 2: [....W] 1947 ** 3: [RRRRW] 1948 ** 4: [RRRR.] 1949 */ 1950 if( eFileLock==SHARED_LOCK ){ 1951 #if !defined(__APPLE__) || !SQLITE_ENABLE_LOCKING_STYLE 1952 (void)handleNFSUnlock; 1953 assert( handleNFSUnlock==0 ); 1954 #endif 1955 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 1956 if( handleNFSUnlock ){ 1957 int tErrno; /* Error code from system call errors */ 1958 off_t divSize = SHARED_SIZE - 1; 1959 1960 lock.l_type = F_UNLCK; 1961 lock.l_whence = SEEK_SET; 1962 lock.l_start = SHARED_FIRST; 1963 lock.l_len = divSize; 1964 if( unixFileLock(pFile, &lock)==(-1) ){ 1965 tErrno = errno; 1966 rc = SQLITE_IOERR_UNLOCK; 1967 storeLastErrno(pFile, tErrno); 1968 goto end_unlock; 1969 } 1970 lock.l_type = F_RDLCK; 1971 lock.l_whence = SEEK_SET; 1972 lock.l_start = SHARED_FIRST; 1973 lock.l_len = divSize; 1974 if( unixFileLock(pFile, &lock)==(-1) ){ 1975 tErrno = errno; 1976 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_RDLOCK); 1977 if( IS_LOCK_ERROR(rc) ){ 1978 storeLastErrno(pFile, tErrno); 1979 } 1980 goto end_unlock; 1981 } 1982 lock.l_type = F_UNLCK; 1983 lock.l_whence = SEEK_SET; 1984 lock.l_start = SHARED_FIRST+divSize; 1985 lock.l_len = SHARED_SIZE-divSize; 1986 if( unixFileLock(pFile, &lock)==(-1) ){ 1987 tErrno = errno; 1988 rc = SQLITE_IOERR_UNLOCK; 1989 storeLastErrno(pFile, tErrno); 1990 goto end_unlock; 1991 } 1992 }else 1993 #endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ 1994 { 1995 lock.l_type = F_RDLCK; 1996 lock.l_whence = SEEK_SET; 1997 lock.l_start = SHARED_FIRST; 1998 lock.l_len = SHARED_SIZE; 1999 if( unixFileLock(pFile, &lock) ){ 2000 /* In theory, the call to unixFileLock() cannot fail because another 2001 ** process is holding an incompatible lock. If it does, this 2002 ** indicates that the other process is not following the locking 2003 ** protocol. If this happens, return SQLITE_IOERR_RDLOCK. Returning 2004 ** SQLITE_BUSY would confuse the upper layer (in practice it causes 2005 ** an assert to fail). */ 2006 rc = SQLITE_IOERR_RDLOCK; 2007 storeLastErrno(pFile, errno); 2008 goto end_unlock; 2009 } 2010 } 2011 } 2012 lock.l_type = F_UNLCK; 2013 lock.l_whence = SEEK_SET; 2014 lock.l_start = PENDING_BYTE; 2015 lock.l_len = 2L; assert( PENDING_BYTE+1==RESERVED_BYTE ); 2016 if( unixFileLock(pFile, &lock)==0 ){ 2017 pInode->eFileLock = SHARED_LOCK; 2018 }else{ 2019 rc = SQLITE_IOERR_UNLOCK; 2020 storeLastErrno(pFile, errno); 2021 goto end_unlock; 2022 } 2023 } 2024 if( eFileLock==NO_LOCK ){ 2025 /* Decrement the shared lock counter. Release the lock using an 2026 ** OS call only when all threads in this same process have released 2027 ** the lock. 2028 */ 2029 pInode->nShared--; 2030 if( pInode->nShared==0 ){ 2031 lock.l_type = F_UNLCK; 2032 lock.l_whence = SEEK_SET; 2033 lock.l_start = lock.l_len = 0L; 2034 if( unixFileLock(pFile, &lock)==0 ){ 2035 pInode->eFileLock = NO_LOCK; 2036 }else{ 2037 rc = SQLITE_IOERR_UNLOCK; 2038 storeLastErrno(pFile, errno); 2039 pInode->eFileLock = NO_LOCK; 2040 pFile->eFileLock = NO_LOCK; 2041 } 2042 } 2043 2044 /* Decrement the count of locks against this same file. When the 2045 ** count reaches zero, close any other file descriptors whose close 2046 ** was deferred because of outstanding locks. 2047 */ 2048 pInode->nLock--; 2049 assert( pInode->nLock>=0 ); 2050 if( pInode->nLock==0 ) closePendingFds(pFile); 2051 } 2052 2053 end_unlock: 2054 sqlite3_mutex_leave(pInode->pLockMutex); 2055 if( rc==SQLITE_OK ){ 2056 pFile->eFileLock = eFileLock; 2057 } 2058 return rc; 2059 } 2060 2061 /* 2062 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock 2063 ** must be either NO_LOCK or SHARED_LOCK. 2064 ** 2065 ** If the locking level of the file descriptor is already at or below 2066 ** the requested locking level, this routine is a no-op. 2067 */ 2068 static int unixUnlock(sqlite3_file *id, int eFileLock){ 2069 #if SQLITE_MAX_MMAP_SIZE>0 2070 assert( eFileLock==SHARED_LOCK || ((unixFile *)id)->nFetchOut==0 ); 2071 #endif 2072 return posixUnlock(id, eFileLock, 0); 2073 } 2074 2075 #if SQLITE_MAX_MMAP_SIZE>0 2076 static int unixMapfile(unixFile *pFd, i64 nByte); 2077 static void unixUnmapfile(unixFile *pFd); 2078 #endif 2079 2080 /* 2081 ** This function performs the parts of the "close file" operation 2082 ** common to all locking schemes. It closes the directory and file 2083 ** handles, if they are valid, and sets all fields of the unixFile 2084 ** structure to 0. 2085 ** 2086 ** It is *not* necessary to hold the mutex when this routine is called, 2087 ** even on VxWorks. A mutex will be acquired on VxWorks by the 2088 ** vxworksReleaseFileId() routine. 2089 */ 2090 static int closeUnixFile(sqlite3_file *id){ 2091 unixFile *pFile = (unixFile*)id; 2092 #if SQLITE_MAX_MMAP_SIZE>0 2093 unixUnmapfile(pFile); 2094 #endif 2095 if( pFile->h>=0 ){ 2096 robust_close(pFile, pFile->h, __LINE__); 2097 pFile->h = -1; 2098 } 2099 #if OS_VXWORKS 2100 if( pFile->pId ){ 2101 if( pFile->ctrlFlags & UNIXFILE_DELETE ){ 2102 osUnlink(pFile->pId->zCanonicalName); 2103 } 2104 vxworksReleaseFileId(pFile->pId); 2105 pFile->pId = 0; 2106 } 2107 #endif 2108 #ifdef SQLITE_UNLINK_AFTER_CLOSE 2109 if( pFile->ctrlFlags & UNIXFILE_DELETE ){ 2110 osUnlink(pFile->zPath); 2111 sqlite3_free(*(char**)&pFile->zPath); 2112 pFile->zPath = 0; 2113 } 2114 #endif 2115 OSTRACE(("CLOSE %-3d\n", pFile->h)); 2116 OpenCounter(-1); 2117 sqlite3_free(pFile->pPreallocatedUnused); 2118 memset(pFile, 0, sizeof(unixFile)); 2119 return SQLITE_OK; 2120 } 2121 2122 /* 2123 ** Close a file. 2124 */ 2125 static int unixClose(sqlite3_file *id){ 2126 int rc = SQLITE_OK; 2127 unixFile *pFile = (unixFile *)id; 2128 unixInodeInfo *pInode = pFile->pInode; 2129 2130 assert( pInode!=0 ); 2131 verifyDbFile(pFile); 2132 unixUnlock(id, NO_LOCK); 2133 assert( unixFileMutexNotheld(pFile) ); 2134 unixEnterMutex(); 2135 2136 /* unixFile.pInode is always valid here. Otherwise, a different close 2137 ** routine (e.g. nolockClose()) would be called instead. 2138 */ 2139 assert( pFile->pInode->nLock>0 || pFile->pInode->bProcessLock==0 ); 2140 sqlite3_mutex_enter(pInode->pLockMutex); 2141 if( pInode->nLock ){ 2142 /* If there are outstanding locks, do not actually close the file just 2143 ** yet because that would clear those locks. Instead, add the file 2144 ** descriptor to pInode->pUnused list. It will be automatically closed 2145 ** when the last lock is cleared. 2146 */ 2147 setPendingFd(pFile); 2148 } 2149 sqlite3_mutex_leave(pInode->pLockMutex); 2150 releaseInodeInfo(pFile); 2151 assert( pFile->pShm==0 ); 2152 rc = closeUnixFile(id); 2153 unixLeaveMutex(); 2154 return rc; 2155 } 2156 2157 /************** End of the posix advisory lock implementation ***************** 2158 ******************************************************************************/ 2159 2160 /****************************************************************************** 2161 ****************************** No-op Locking ********************************** 2162 ** 2163 ** Of the various locking implementations available, this is by far the 2164 ** simplest: locking is ignored. No attempt is made to lock the database 2165 ** file for reading or writing. 2166 ** 2167 ** This locking mode is appropriate for use on read-only databases 2168 ** (ex: databases that are burned into CD-ROM, for example.) It can 2169 ** also be used if the application employs some external mechanism to 2170 ** prevent simultaneous access of the same database by two or more 2171 ** database connections. But there is a serious risk of database 2172 ** corruption if this locking mode is used in situations where multiple 2173 ** database connections are accessing the same database file at the same 2174 ** time and one or more of those connections are writing. 2175 */ 2176 2177 static int nolockCheckReservedLock(sqlite3_file *NotUsed, int *pResOut){ 2178 UNUSED_PARAMETER(NotUsed); 2179 *pResOut = 0; 2180 return SQLITE_OK; 2181 } 2182 static int nolockLock(sqlite3_file *NotUsed, int NotUsed2){ 2183 UNUSED_PARAMETER2(NotUsed, NotUsed2); 2184 return SQLITE_OK; 2185 } 2186 static int nolockUnlock(sqlite3_file *NotUsed, int NotUsed2){ 2187 UNUSED_PARAMETER2(NotUsed, NotUsed2); 2188 return SQLITE_OK; 2189 } 2190 2191 /* 2192 ** Close the file. 2193 */ 2194 static int nolockClose(sqlite3_file *id) { 2195 return closeUnixFile(id); 2196 } 2197 2198 /******************* End of the no-op lock implementation ********************* 2199 ******************************************************************************/ 2200 2201 /****************************************************************************** 2202 ************************* Begin dot-file Locking ****************************** 2203 ** 2204 ** The dotfile locking implementation uses the existence of separate lock 2205 ** files (really a directory) to control access to the database. This works 2206 ** on just about every filesystem imaginable. But there are serious downsides: 2207 ** 2208 ** (1) There is zero concurrency. A single reader blocks all other 2209 ** connections from reading or writing the database. 2210 ** 2211 ** (2) An application crash or power loss can leave stale lock files 2212 ** sitting around that need to be cleared manually. 2213 ** 2214 ** Nevertheless, a dotlock is an appropriate locking mode for use if no 2215 ** other locking strategy is available. 2216 ** 2217 ** Dotfile locking works by creating a subdirectory in the same directory as 2218 ** the database and with the same name but with a ".lock" extension added. 2219 ** The existence of a lock directory implies an EXCLUSIVE lock. All other 2220 ** lock types (SHARED, RESERVED, PENDING) are mapped into EXCLUSIVE. 2221 */ 2222 2223 /* 2224 ** The file suffix added to the data base filename in order to create the 2225 ** lock directory. 2226 */ 2227 #define DOTLOCK_SUFFIX ".lock" 2228 2229 /* 2230 ** This routine checks if there is a RESERVED lock held on the specified 2231 ** file by this or any other process. If such a lock is held, set *pResOut 2232 ** to a non-zero value otherwise *pResOut is set to zero. The return value 2233 ** is set to SQLITE_OK unless an I/O error occurs during lock checking. 2234 ** 2235 ** In dotfile locking, either a lock exists or it does not. So in this 2236 ** variation of CheckReservedLock(), *pResOut is set to true if any lock 2237 ** is held on the file and false if the file is unlocked. 2238 */ 2239 static int dotlockCheckReservedLock(sqlite3_file *id, int *pResOut) { 2240 int rc = SQLITE_OK; 2241 int reserved = 0; 2242 unixFile *pFile = (unixFile*)id; 2243 2244 SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); 2245 2246 assert( pFile ); 2247 reserved = osAccess((const char*)pFile->lockingContext, 0)==0; 2248 OSTRACE(("TEST WR-LOCK %d %d %d (dotlock)\n", pFile->h, rc, reserved)); 2249 *pResOut = reserved; 2250 return rc; 2251 } 2252 2253 /* 2254 ** Lock the file with the lock specified by parameter eFileLock - one 2255 ** of the following: 2256 ** 2257 ** (1) SHARED_LOCK 2258 ** (2) RESERVED_LOCK 2259 ** (3) PENDING_LOCK 2260 ** (4) EXCLUSIVE_LOCK 2261 ** 2262 ** Sometimes when requesting one lock state, additional lock states 2263 ** are inserted in between. The locking might fail on one of the later 2264 ** transitions leaving the lock state different from what it started but 2265 ** still short of its goal. The following chart shows the allowed 2266 ** transitions and the inserted intermediate states: 2267 ** 2268 ** UNLOCKED -> SHARED 2269 ** SHARED -> RESERVED 2270 ** SHARED -> (PENDING) -> EXCLUSIVE 2271 ** RESERVED -> (PENDING) -> EXCLUSIVE 2272 ** PENDING -> EXCLUSIVE 2273 ** 2274 ** This routine will only increase a lock. Use the sqlite3OsUnlock() 2275 ** routine to lower a locking level. 2276 ** 2277 ** With dotfile locking, we really only support state (4): EXCLUSIVE. 2278 ** But we track the other locking levels internally. 2279 */ 2280 static int dotlockLock(sqlite3_file *id, int eFileLock) { 2281 unixFile *pFile = (unixFile*)id; 2282 char *zLockFile = (char *)pFile->lockingContext; 2283 int rc = SQLITE_OK; 2284 2285 2286 /* If we have any lock, then the lock file already exists. All we have 2287 ** to do is adjust our internal record of the lock level. 2288 */ 2289 if( pFile->eFileLock > NO_LOCK ){ 2290 pFile->eFileLock = eFileLock; 2291 /* Always update the timestamp on the old file */ 2292 #ifdef HAVE_UTIME 2293 utime(zLockFile, NULL); 2294 #else 2295 utimes(zLockFile, NULL); 2296 #endif 2297 return SQLITE_OK; 2298 } 2299 2300 /* grab an exclusive lock */ 2301 rc = osMkdir(zLockFile, 0777); 2302 if( rc<0 ){ 2303 /* failed to open/create the lock directory */ 2304 int tErrno = errno; 2305 if( EEXIST == tErrno ){ 2306 rc = SQLITE_BUSY; 2307 } else { 2308 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); 2309 if( rc!=SQLITE_BUSY ){ 2310 storeLastErrno(pFile, tErrno); 2311 } 2312 } 2313 return rc; 2314 } 2315 2316 /* got it, set the type and return ok */ 2317 pFile->eFileLock = eFileLock; 2318 return rc; 2319 } 2320 2321 /* 2322 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock 2323 ** must be either NO_LOCK or SHARED_LOCK. 2324 ** 2325 ** If the locking level of the file descriptor is already at or below 2326 ** the requested locking level, this routine is a no-op. 2327 ** 2328 ** When the locking level reaches NO_LOCK, delete the lock file. 2329 */ 2330 static int dotlockUnlock(sqlite3_file *id, int eFileLock) { 2331 unixFile *pFile = (unixFile*)id; 2332 char *zLockFile = (char *)pFile->lockingContext; 2333 int rc; 2334 2335 assert( pFile ); 2336 OSTRACE(("UNLOCK %d %d was %d pid=%d (dotlock)\n", pFile->h, eFileLock, 2337 pFile->eFileLock, osGetpid(0))); 2338 assert( eFileLock<=SHARED_LOCK ); 2339 2340 /* no-op if possible */ 2341 if( pFile->eFileLock==eFileLock ){ 2342 return SQLITE_OK; 2343 } 2344 2345 /* To downgrade to shared, simply update our internal notion of the 2346 ** lock state. No need to mess with the file on disk. 2347 */ 2348 if( eFileLock==SHARED_LOCK ){ 2349 pFile->eFileLock = SHARED_LOCK; 2350 return SQLITE_OK; 2351 } 2352 2353 /* To fully unlock the database, delete the lock file */ 2354 assert( eFileLock==NO_LOCK ); 2355 rc = osRmdir(zLockFile); 2356 if( rc<0 ){ 2357 int tErrno = errno; 2358 if( tErrno==ENOENT ){ 2359 rc = SQLITE_OK; 2360 }else{ 2361 rc = SQLITE_IOERR_UNLOCK; 2362 storeLastErrno(pFile, tErrno); 2363 } 2364 return rc; 2365 } 2366 pFile->eFileLock = NO_LOCK; 2367 return SQLITE_OK; 2368 } 2369 2370 /* 2371 ** Close a file. Make sure the lock has been released before closing. 2372 */ 2373 static int dotlockClose(sqlite3_file *id) { 2374 unixFile *pFile = (unixFile*)id; 2375 assert( id!=0 ); 2376 dotlockUnlock(id, NO_LOCK); 2377 sqlite3_free(pFile->lockingContext); 2378 return closeUnixFile(id); 2379 } 2380 /****************** End of the dot-file lock implementation ******************* 2381 ******************************************************************************/ 2382 2383 /****************************************************************************** 2384 ************************** Begin flock Locking ******************************** 2385 ** 2386 ** Use the flock() system call to do file locking. 2387 ** 2388 ** flock() locking is like dot-file locking in that the various 2389 ** fine-grain locking levels supported by SQLite are collapsed into 2390 ** a single exclusive lock. In other words, SHARED, RESERVED, and 2391 ** PENDING locks are the same thing as an EXCLUSIVE lock. SQLite 2392 ** still works when you do this, but concurrency is reduced since 2393 ** only a single process can be reading the database at a time. 2394 ** 2395 ** Omit this section if SQLITE_ENABLE_LOCKING_STYLE is turned off 2396 */ 2397 #if SQLITE_ENABLE_LOCKING_STYLE 2398 2399 /* 2400 ** Retry flock() calls that fail with EINTR 2401 */ 2402 #ifdef EINTR 2403 static int robust_flock(int fd, int op){ 2404 int rc; 2405 do{ rc = flock(fd,op); }while( rc<0 && errno==EINTR ); 2406 return rc; 2407 } 2408 #else 2409 # define robust_flock(a,b) flock(a,b) 2410 #endif 2411 2412 2413 /* 2414 ** This routine checks if there is a RESERVED lock held on the specified 2415 ** file by this or any other process. If such a lock is held, set *pResOut 2416 ** to a non-zero value otherwise *pResOut is set to zero. The return value 2417 ** is set to SQLITE_OK unless an I/O error occurs during lock checking. 2418 */ 2419 static int flockCheckReservedLock(sqlite3_file *id, int *pResOut){ 2420 int rc = SQLITE_OK; 2421 int reserved = 0; 2422 unixFile *pFile = (unixFile*)id; 2423 2424 SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); 2425 2426 assert( pFile ); 2427 2428 /* Check if a thread in this process holds such a lock */ 2429 if( pFile->eFileLock>SHARED_LOCK ){ 2430 reserved = 1; 2431 } 2432 2433 /* Otherwise see if some other process holds it. */ 2434 if( !reserved ){ 2435 /* attempt to get the lock */ 2436 int lrc = robust_flock(pFile->h, LOCK_EX | LOCK_NB); 2437 if( !lrc ){ 2438 /* got the lock, unlock it */ 2439 lrc = robust_flock(pFile->h, LOCK_UN); 2440 if ( lrc ) { 2441 int tErrno = errno; 2442 /* unlock failed with an error */ 2443 lrc = SQLITE_IOERR_UNLOCK; 2444 storeLastErrno(pFile, tErrno); 2445 rc = lrc; 2446 } 2447 } else { 2448 int tErrno = errno; 2449 reserved = 1; 2450 /* someone else might have it reserved */ 2451 lrc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); 2452 if( IS_LOCK_ERROR(lrc) ){ 2453 storeLastErrno(pFile, tErrno); 2454 rc = lrc; 2455 } 2456 } 2457 } 2458 OSTRACE(("TEST WR-LOCK %d %d %d (flock)\n", pFile->h, rc, reserved)); 2459 2460 #ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS 2461 if( (rc & 0xff) == SQLITE_IOERR ){ 2462 rc = SQLITE_OK; 2463 reserved=1; 2464 } 2465 #endif /* SQLITE_IGNORE_FLOCK_LOCK_ERRORS */ 2466 *pResOut = reserved; 2467 return rc; 2468 } 2469 2470 /* 2471 ** Lock the file with the lock specified by parameter eFileLock - one 2472 ** of the following: 2473 ** 2474 ** (1) SHARED_LOCK 2475 ** (2) RESERVED_LOCK 2476 ** (3) PENDING_LOCK 2477 ** (4) EXCLUSIVE_LOCK 2478 ** 2479 ** Sometimes when requesting one lock state, additional lock states 2480 ** are inserted in between. The locking might fail on one of the later 2481 ** transitions leaving the lock state different from what it started but 2482 ** still short of its goal. The following chart shows the allowed 2483 ** transitions and the inserted intermediate states: 2484 ** 2485 ** UNLOCKED -> SHARED 2486 ** SHARED -> RESERVED 2487 ** SHARED -> (PENDING) -> EXCLUSIVE 2488 ** RESERVED -> (PENDING) -> EXCLUSIVE 2489 ** PENDING -> EXCLUSIVE 2490 ** 2491 ** flock() only really support EXCLUSIVE locks. We track intermediate 2492 ** lock states in the sqlite3_file structure, but all locks SHARED or 2493 ** above are really EXCLUSIVE locks and exclude all other processes from 2494 ** access the file. 2495 ** 2496 ** This routine will only increase a lock. Use the sqlite3OsUnlock() 2497 ** routine to lower a locking level. 2498 */ 2499 static int flockLock(sqlite3_file *id, int eFileLock) { 2500 int rc = SQLITE_OK; 2501 unixFile *pFile = (unixFile*)id; 2502 2503 assert( pFile ); 2504 2505 /* if we already have a lock, it is exclusive. 2506 ** Just adjust level and punt on outta here. */ 2507 if (pFile->eFileLock > NO_LOCK) { 2508 pFile->eFileLock = eFileLock; 2509 return SQLITE_OK; 2510 } 2511 2512 /* grab an exclusive lock */ 2513 2514 if (robust_flock(pFile->h, LOCK_EX | LOCK_NB)) { 2515 int tErrno = errno; 2516 /* didn't get, must be busy */ 2517 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); 2518 if( IS_LOCK_ERROR(rc) ){ 2519 storeLastErrno(pFile, tErrno); 2520 } 2521 } else { 2522 /* got it, set the type and return ok */ 2523 pFile->eFileLock = eFileLock; 2524 } 2525 OSTRACE(("LOCK %d %s %s (flock)\n", pFile->h, azFileLock(eFileLock), 2526 rc==SQLITE_OK ? "ok" : "failed")); 2527 #ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS 2528 if( (rc & 0xff) == SQLITE_IOERR ){ 2529 rc = SQLITE_BUSY; 2530 } 2531 #endif /* SQLITE_IGNORE_FLOCK_LOCK_ERRORS */ 2532 return rc; 2533 } 2534 2535 2536 /* 2537 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock 2538 ** must be either NO_LOCK or SHARED_LOCK. 2539 ** 2540 ** If the locking level of the file descriptor is already at or below 2541 ** the requested locking level, this routine is a no-op. 2542 */ 2543 static int flockUnlock(sqlite3_file *id, int eFileLock) { 2544 unixFile *pFile = (unixFile*)id; 2545 2546 assert( pFile ); 2547 OSTRACE(("UNLOCK %d %d was %d pid=%d (flock)\n", pFile->h, eFileLock, 2548 pFile->eFileLock, osGetpid(0))); 2549 assert( eFileLock<=SHARED_LOCK ); 2550 2551 /* no-op if possible */ 2552 if( pFile->eFileLock==eFileLock ){ 2553 return SQLITE_OK; 2554 } 2555 2556 /* shared can just be set because we always have an exclusive */ 2557 if (eFileLock==SHARED_LOCK) { 2558 pFile->eFileLock = eFileLock; 2559 return SQLITE_OK; 2560 } 2561 2562 /* no, really, unlock. */ 2563 if( robust_flock(pFile->h, LOCK_UN) ){ 2564 #ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS 2565 return SQLITE_OK; 2566 #endif /* SQLITE_IGNORE_FLOCK_LOCK_ERRORS */ 2567 return SQLITE_IOERR_UNLOCK; 2568 }else{ 2569 pFile->eFileLock = NO_LOCK; 2570 return SQLITE_OK; 2571 } 2572 } 2573 2574 /* 2575 ** Close a file. 2576 */ 2577 static int flockClose(sqlite3_file *id) { 2578 assert( id!=0 ); 2579 flockUnlock(id, NO_LOCK); 2580 return closeUnixFile(id); 2581 } 2582 2583 #endif /* SQLITE_ENABLE_LOCKING_STYLE && !OS_VXWORK */ 2584 2585 /******************* End of the flock lock implementation ********************* 2586 ******************************************************************************/ 2587 2588 /****************************************************************************** 2589 ************************ Begin Named Semaphore Locking ************************ 2590 ** 2591 ** Named semaphore locking is only supported on VxWorks. 2592 ** 2593 ** Semaphore locking is like dot-lock and flock in that it really only 2594 ** supports EXCLUSIVE locking. Only a single process can read or write 2595 ** the database file at a time. This reduces potential concurrency, but 2596 ** makes the lock implementation much easier. 2597 */ 2598 #if OS_VXWORKS 2599 2600 /* 2601 ** This routine checks if there is a RESERVED lock held on the specified 2602 ** file by this or any other process. If such a lock is held, set *pResOut 2603 ** to a non-zero value otherwise *pResOut is set to zero. The return value 2604 ** is set to SQLITE_OK unless an I/O error occurs during lock checking. 2605 */ 2606 static int semXCheckReservedLock(sqlite3_file *id, int *pResOut) { 2607 int rc = SQLITE_OK; 2608 int reserved = 0; 2609 unixFile *pFile = (unixFile*)id; 2610 2611 SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); 2612 2613 assert( pFile ); 2614 2615 /* Check if a thread in this process holds such a lock */ 2616 if( pFile->eFileLock>SHARED_LOCK ){ 2617 reserved = 1; 2618 } 2619 2620 /* Otherwise see if some other process holds it. */ 2621 if( !reserved ){ 2622 sem_t *pSem = pFile->pInode->pSem; 2623 2624 if( sem_trywait(pSem)==-1 ){ 2625 int tErrno = errno; 2626 if( EAGAIN != tErrno ){ 2627 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_CHECKRESERVEDLOCK); 2628 storeLastErrno(pFile, tErrno); 2629 } else { 2630 /* someone else has the lock when we are in NO_LOCK */ 2631 reserved = (pFile->eFileLock < SHARED_LOCK); 2632 } 2633 }else{ 2634 /* we could have it if we want it */ 2635 sem_post(pSem); 2636 } 2637 } 2638 OSTRACE(("TEST WR-LOCK %d %d %d (sem)\n", pFile->h, rc, reserved)); 2639 2640 *pResOut = reserved; 2641 return rc; 2642 } 2643 2644 /* 2645 ** Lock the file with the lock specified by parameter eFileLock - one 2646 ** of the following: 2647 ** 2648 ** (1) SHARED_LOCK 2649 ** (2) RESERVED_LOCK 2650 ** (3) PENDING_LOCK 2651 ** (4) EXCLUSIVE_LOCK 2652 ** 2653 ** Sometimes when requesting one lock state, additional lock states 2654 ** are inserted in between. The locking might fail on one of the later 2655 ** transitions leaving the lock state different from what it started but 2656 ** still short of its goal. The following chart shows the allowed 2657 ** transitions and the inserted intermediate states: 2658 ** 2659 ** UNLOCKED -> SHARED 2660 ** SHARED -> RESERVED 2661 ** SHARED -> (PENDING) -> EXCLUSIVE 2662 ** RESERVED -> (PENDING) -> EXCLUSIVE 2663 ** PENDING -> EXCLUSIVE 2664 ** 2665 ** Semaphore locks only really support EXCLUSIVE locks. We track intermediate 2666 ** lock states in the sqlite3_file structure, but all locks SHARED or 2667 ** above are really EXCLUSIVE locks and exclude all other processes from 2668 ** access the file. 2669 ** 2670 ** This routine will only increase a lock. Use the sqlite3OsUnlock() 2671 ** routine to lower a locking level. 2672 */ 2673 static int semXLock(sqlite3_file *id, int eFileLock) { 2674 unixFile *pFile = (unixFile*)id; 2675 sem_t *pSem = pFile->pInode->pSem; 2676 int rc = SQLITE_OK; 2677 2678 /* if we already have a lock, it is exclusive. 2679 ** Just adjust level and punt on outta here. */ 2680 if (pFile->eFileLock > NO_LOCK) { 2681 pFile->eFileLock = eFileLock; 2682 rc = SQLITE_OK; 2683 goto sem_end_lock; 2684 } 2685 2686 /* lock semaphore now but bail out when already locked. */ 2687 if( sem_trywait(pSem)==-1 ){ 2688 rc = SQLITE_BUSY; 2689 goto sem_end_lock; 2690 } 2691 2692 /* got it, set the type and return ok */ 2693 pFile->eFileLock = eFileLock; 2694 2695 sem_end_lock: 2696 return rc; 2697 } 2698 2699 /* 2700 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock 2701 ** must be either NO_LOCK or SHARED_LOCK. 2702 ** 2703 ** If the locking level of the file descriptor is already at or below 2704 ** the requested locking level, this routine is a no-op. 2705 */ 2706 static int semXUnlock(sqlite3_file *id, int eFileLock) { 2707 unixFile *pFile = (unixFile*)id; 2708 sem_t *pSem = pFile->pInode->pSem; 2709 2710 assert( pFile ); 2711 assert( pSem ); 2712 OSTRACE(("UNLOCK %d %d was %d pid=%d (sem)\n", pFile->h, eFileLock, 2713 pFile->eFileLock, osGetpid(0))); 2714 assert( eFileLock<=SHARED_LOCK ); 2715 2716 /* no-op if possible */ 2717 if( pFile->eFileLock==eFileLock ){ 2718 return SQLITE_OK; 2719 } 2720 2721 /* shared can just be set because we always have an exclusive */ 2722 if (eFileLock==SHARED_LOCK) { 2723 pFile->eFileLock = eFileLock; 2724 return SQLITE_OK; 2725 } 2726 2727 /* no, really unlock. */ 2728 if ( sem_post(pSem)==-1 ) { 2729 int rc, tErrno = errno; 2730 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK); 2731 if( IS_LOCK_ERROR(rc) ){ 2732 storeLastErrno(pFile, tErrno); 2733 } 2734 return rc; 2735 } 2736 pFile->eFileLock = NO_LOCK; 2737 return SQLITE_OK; 2738 } 2739 2740 /* 2741 ** Close a file. 2742 */ 2743 static int semXClose(sqlite3_file *id) { 2744 if( id ){ 2745 unixFile *pFile = (unixFile*)id; 2746 semXUnlock(id, NO_LOCK); 2747 assert( pFile ); 2748 assert( unixFileMutexNotheld(pFile) ); 2749 unixEnterMutex(); 2750 releaseInodeInfo(pFile); 2751 unixLeaveMutex(); 2752 closeUnixFile(id); 2753 } 2754 return SQLITE_OK; 2755 } 2756 2757 #endif /* OS_VXWORKS */ 2758 /* 2759 ** Named semaphore locking is only available on VxWorks. 2760 ** 2761 *************** End of the named semaphore lock implementation **************** 2762 ******************************************************************************/ 2763 2764 2765 /****************************************************************************** 2766 *************************** Begin AFP Locking ********************************* 2767 ** 2768 ** AFP is the Apple Filing Protocol. AFP is a network filesystem found 2769 ** on Apple Macintosh computers - both OS9 and OSX. 2770 ** 2771 ** Third-party implementations of AFP are available. But this code here 2772 ** only works on OSX. 2773 */ 2774 2775 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 2776 /* 2777 ** The afpLockingContext structure contains all afp lock specific state 2778 */ 2779 typedef struct afpLockingContext afpLockingContext; 2780 struct afpLockingContext { 2781 int reserved; 2782 const char *dbPath; /* Name of the open file */ 2783 }; 2784 2785 struct ByteRangeLockPB2 2786 { 2787 unsigned long long offset; /* offset to first byte to lock */ 2788 unsigned long long length; /* nbr of bytes to lock */ 2789 unsigned long long retRangeStart; /* nbr of 1st byte locked if successful */ 2790 unsigned char unLockFlag; /* 1 = unlock, 0 = lock */ 2791 unsigned char startEndFlag; /* 1=rel to end of fork, 0=rel to start */ 2792 int fd; /* file desc to assoc this lock with */ 2793 }; 2794 2795 #define afpfsByteRangeLock2FSCTL _IOWR('z', 23, struct ByteRangeLockPB2) 2796 2797 /* 2798 ** This is a utility for setting or clearing a bit-range lock on an 2799 ** AFP filesystem. 2800 ** 2801 ** Return SQLITE_OK on success, SQLITE_BUSY on failure. 2802 */ 2803 static int afpSetLock( 2804 const char *path, /* Name of the file to be locked or unlocked */ 2805 unixFile *pFile, /* Open file descriptor on path */ 2806 unsigned long long offset, /* First byte to be locked */ 2807 unsigned long long length, /* Number of bytes to lock */ 2808 int setLockFlag /* True to set lock. False to clear lock */ 2809 ){ 2810 struct ByteRangeLockPB2 pb; 2811 int err; 2812 2813 pb.unLockFlag = setLockFlag ? 0 : 1; 2814 pb.startEndFlag = 0; 2815 pb.offset = offset; 2816 pb.length = length; 2817 pb.fd = pFile->h; 2818 2819 OSTRACE(("AFPSETLOCK [%s] for %d%s in range %llx:%llx\n", 2820 (setLockFlag?"ON":"OFF"), pFile->h, (pb.fd==-1?"[testval-1]":""), 2821 offset, length)); 2822 err = fsctl(path, afpfsByteRangeLock2FSCTL, &pb, 0); 2823 if ( err==-1 ) { 2824 int rc; 2825 int tErrno = errno; 2826 OSTRACE(("AFPSETLOCK failed to fsctl() '%s' %d %s\n", 2827 path, tErrno, strerror(tErrno))); 2828 #ifdef SQLITE_IGNORE_AFP_LOCK_ERRORS 2829 rc = SQLITE_BUSY; 2830 #else 2831 rc = sqliteErrorFromPosixError(tErrno, 2832 setLockFlag ? SQLITE_IOERR_LOCK : SQLITE_IOERR_UNLOCK); 2833 #endif /* SQLITE_IGNORE_AFP_LOCK_ERRORS */ 2834 if( IS_LOCK_ERROR(rc) ){ 2835 storeLastErrno(pFile, tErrno); 2836 } 2837 return rc; 2838 } else { 2839 return SQLITE_OK; 2840 } 2841 } 2842 2843 /* 2844 ** This routine checks if there is a RESERVED lock held on the specified 2845 ** file by this or any other process. If such a lock is held, set *pResOut 2846 ** to a non-zero value otherwise *pResOut is set to zero. The return value 2847 ** is set to SQLITE_OK unless an I/O error occurs during lock checking. 2848 */ 2849 static int afpCheckReservedLock(sqlite3_file *id, int *pResOut){ 2850 int rc = SQLITE_OK; 2851 int reserved = 0; 2852 unixFile *pFile = (unixFile*)id; 2853 afpLockingContext *context; 2854 2855 SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); 2856 2857 assert( pFile ); 2858 context = (afpLockingContext *) pFile->lockingContext; 2859 if( context->reserved ){ 2860 *pResOut = 1; 2861 return SQLITE_OK; 2862 } 2863 sqlite3_mutex_enter(pFile->pInode->pLockMutex); 2864 /* Check if a thread in this process holds such a lock */ 2865 if( pFile->pInode->eFileLock>SHARED_LOCK ){ 2866 reserved = 1; 2867 } 2868 2869 /* Otherwise see if some other process holds it. 2870 */ 2871 if( !reserved ){ 2872 /* lock the RESERVED byte */ 2873 int lrc = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1,1); 2874 if( SQLITE_OK==lrc ){ 2875 /* if we succeeded in taking the reserved lock, unlock it to restore 2876 ** the original state */ 2877 lrc = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1, 0); 2878 } else { 2879 /* if we failed to get the lock then someone else must have it */ 2880 reserved = 1; 2881 } 2882 if( IS_LOCK_ERROR(lrc) ){ 2883 rc=lrc; 2884 } 2885 } 2886 2887 sqlite3_mutex_leave(pFile->pInode->pLockMutex); 2888 OSTRACE(("TEST WR-LOCK %d %d %d (afp)\n", pFile->h, rc, reserved)); 2889 2890 *pResOut = reserved; 2891 return rc; 2892 } 2893 2894 /* 2895 ** Lock the file with the lock specified by parameter eFileLock - one 2896 ** of the following: 2897 ** 2898 ** (1) SHARED_LOCK 2899 ** (2) RESERVED_LOCK 2900 ** (3) PENDING_LOCK 2901 ** (4) EXCLUSIVE_LOCK 2902 ** 2903 ** Sometimes when requesting one lock state, additional lock states 2904 ** are inserted in between. The locking might fail on one of the later 2905 ** transitions leaving the lock state different from what it started but 2906 ** still short of its goal. The following chart shows the allowed 2907 ** transitions and the inserted intermediate states: 2908 ** 2909 ** UNLOCKED -> SHARED 2910 ** SHARED -> RESERVED 2911 ** SHARED -> (PENDING) -> EXCLUSIVE 2912 ** RESERVED -> (PENDING) -> EXCLUSIVE 2913 ** PENDING -> EXCLUSIVE 2914 ** 2915 ** This routine will only increase a lock. Use the sqlite3OsUnlock() 2916 ** routine to lower a locking level. 2917 */ 2918 static int afpLock(sqlite3_file *id, int eFileLock){ 2919 int rc = SQLITE_OK; 2920 unixFile *pFile = (unixFile*)id; 2921 unixInodeInfo *pInode = pFile->pInode; 2922 afpLockingContext *context = (afpLockingContext *) pFile->lockingContext; 2923 2924 assert( pFile ); 2925 OSTRACE(("LOCK %d %s was %s(%s,%d) pid=%d (afp)\n", pFile->h, 2926 azFileLock(eFileLock), azFileLock(pFile->eFileLock), 2927 azFileLock(pInode->eFileLock), pInode->nShared , osGetpid(0))); 2928 2929 /* If there is already a lock of this type or more restrictive on the 2930 ** unixFile, do nothing. Don't use the afp_end_lock: exit path, as 2931 ** unixEnterMutex() hasn't been called yet. 2932 */ 2933 if( pFile->eFileLock>=eFileLock ){ 2934 OSTRACE(("LOCK %d %s ok (already held) (afp)\n", pFile->h, 2935 azFileLock(eFileLock))); 2936 return SQLITE_OK; 2937 } 2938 2939 /* Make sure the locking sequence is correct 2940 ** (1) We never move from unlocked to anything higher than shared lock. 2941 ** (2) SQLite never explicitly requests a pendig lock. 2942 ** (3) A shared lock is always held when a reserve lock is requested. 2943 */ 2944 assert( pFile->eFileLock!=NO_LOCK || eFileLock==SHARED_LOCK ); 2945 assert( eFileLock!=PENDING_LOCK ); 2946 assert( eFileLock!=RESERVED_LOCK || pFile->eFileLock==SHARED_LOCK ); 2947 2948 /* This mutex is needed because pFile->pInode is shared across threads 2949 */ 2950 pInode = pFile->pInode; 2951 sqlite3_mutex_enter(pInode->pLockMutex); 2952 2953 /* If some thread using this PID has a lock via a different unixFile* 2954 ** handle that precludes the requested lock, return BUSY. 2955 */ 2956 if( (pFile->eFileLock!=pInode->eFileLock && 2957 (pInode->eFileLock>=PENDING_LOCK || eFileLock>SHARED_LOCK)) 2958 ){ 2959 rc = SQLITE_BUSY; 2960 goto afp_end_lock; 2961 } 2962 2963 /* If a SHARED lock is requested, and some thread using this PID already 2964 ** has a SHARED or RESERVED lock, then increment reference counts and 2965 ** return SQLITE_OK. 2966 */ 2967 if( eFileLock==SHARED_LOCK && 2968 (pInode->eFileLock==SHARED_LOCK || pInode->eFileLock==RESERVED_LOCK) ){ 2969 assert( eFileLock==SHARED_LOCK ); 2970 assert( pFile->eFileLock==0 ); 2971 assert( pInode->nShared>0 ); 2972 pFile->eFileLock = SHARED_LOCK; 2973 pInode->nShared++; 2974 pInode->nLock++; 2975 goto afp_end_lock; 2976 } 2977 2978 /* A PENDING lock is needed before acquiring a SHARED lock and before 2979 ** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will 2980 ** be released. 2981 */ 2982 if( eFileLock==SHARED_LOCK 2983 || (eFileLock==EXCLUSIVE_LOCK && pFile->eFileLock<PENDING_LOCK) 2984 ){ 2985 int failed; 2986 failed = afpSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 1); 2987 if (failed) { 2988 rc = failed; 2989 goto afp_end_lock; 2990 } 2991 } 2992 2993 /* If control gets to this point, then actually go ahead and make 2994 ** operating system calls for the specified lock. 2995 */ 2996 if( eFileLock==SHARED_LOCK ){ 2997 int lrc1, lrc2, lrc1Errno = 0; 2998 long lk, mask; 2999 3000 assert( pInode->nShared==0 ); 3001 assert( pInode->eFileLock==0 ); 3002 3003 mask = (sizeof(long)==8) ? LARGEST_INT64 : 0x7fffffff; 3004 /* Now get the read-lock SHARED_LOCK */ 3005 /* note that the quality of the randomness doesn't matter that much */ 3006 lk = random(); 3007 pInode->sharedByte = (lk & mask)%(SHARED_SIZE - 1); 3008 lrc1 = afpSetLock(context->dbPath, pFile, 3009 SHARED_FIRST+pInode->sharedByte, 1, 1); 3010 if( IS_LOCK_ERROR(lrc1) ){ 3011 lrc1Errno = pFile->lastErrno; 3012 } 3013 /* Drop the temporary PENDING lock */ 3014 lrc2 = afpSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 0); 3015 3016 if( IS_LOCK_ERROR(lrc1) ) { 3017 storeLastErrno(pFile, lrc1Errno); 3018 rc = lrc1; 3019 goto afp_end_lock; 3020 } else if( IS_LOCK_ERROR(lrc2) ){ 3021 rc = lrc2; 3022 goto afp_end_lock; 3023 } else if( lrc1 != SQLITE_OK ) { 3024 rc = lrc1; 3025 } else { 3026 pFile->eFileLock = SHARED_LOCK; 3027 pInode->nLock++; 3028 pInode->nShared = 1; 3029 } 3030 }else if( eFileLock==EXCLUSIVE_LOCK && pInode->nShared>1 ){ 3031 /* We are trying for an exclusive lock but another thread in this 3032 ** same process is still holding a shared lock. */ 3033 rc = SQLITE_BUSY; 3034 }else{ 3035 /* The request was for a RESERVED or EXCLUSIVE lock. It is 3036 ** assumed that there is a SHARED or greater lock on the file 3037 ** already. 3038 */ 3039 int failed = 0; 3040 assert( 0!=pFile->eFileLock ); 3041 if (eFileLock >= RESERVED_LOCK && pFile->eFileLock < RESERVED_LOCK) { 3042 /* Acquire a RESERVED lock */ 3043 failed = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1,1); 3044 if( !failed ){ 3045 context->reserved = 1; 3046 } 3047 } 3048 if (!failed && eFileLock == EXCLUSIVE_LOCK) { 3049 /* Acquire an EXCLUSIVE lock */ 3050 3051 /* Remove the shared lock before trying the range. we'll need to 3052 ** reestablish the shared lock if we can't get the afpUnlock 3053 */ 3054 if( !(failed = afpSetLock(context->dbPath, pFile, SHARED_FIRST + 3055 pInode->sharedByte, 1, 0)) ){ 3056 int failed2 = SQLITE_OK; 3057 /* now attemmpt to get the exclusive lock range */ 3058 failed = afpSetLock(context->dbPath, pFile, SHARED_FIRST, 3059 SHARED_SIZE, 1); 3060 if( failed && (failed2 = afpSetLock(context->dbPath, pFile, 3061 SHARED_FIRST + pInode->sharedByte, 1, 1)) ){ 3062 /* Can't reestablish the shared lock. Sqlite can't deal, this is 3063 ** a critical I/O error 3064 */ 3065 rc = ((failed & 0xff) == SQLITE_IOERR) ? failed2 : 3066 SQLITE_IOERR_LOCK; 3067 goto afp_end_lock; 3068 } 3069 }else{ 3070 rc = failed; 3071 } 3072 } 3073 if( failed ){ 3074 rc = failed; 3075 } 3076 } 3077 3078 if( rc==SQLITE_OK ){ 3079 pFile->eFileLock = eFileLock; 3080 pInode->eFileLock = eFileLock; 3081 }else if( eFileLock==EXCLUSIVE_LOCK ){ 3082 pFile->eFileLock = PENDING_LOCK; 3083 pInode->eFileLock = PENDING_LOCK; 3084 } 3085 3086 afp_end_lock: 3087 sqlite3_mutex_leave(pInode->pLockMutex); 3088 OSTRACE(("LOCK %d %s %s (afp)\n", pFile->h, azFileLock(eFileLock), 3089 rc==SQLITE_OK ? "ok" : "failed")); 3090 return rc; 3091 } 3092 3093 /* 3094 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock 3095 ** must be either NO_LOCK or SHARED_LOCK. 3096 ** 3097 ** If the locking level of the file descriptor is already at or below 3098 ** the requested locking level, this routine is a no-op. 3099 */ 3100 static int afpUnlock(sqlite3_file *id, int eFileLock) { 3101 int rc = SQLITE_OK; 3102 unixFile *pFile = (unixFile*)id; 3103 unixInodeInfo *pInode; 3104 afpLockingContext *context = (afpLockingContext *) pFile->lockingContext; 3105 int skipShared = 0; 3106 #ifdef SQLITE_TEST 3107 int h = pFile->h; 3108 #endif 3109 3110 assert( pFile ); 3111 OSTRACE(("UNLOCK %d %d was %d(%d,%d) pid=%d (afp)\n", pFile->h, eFileLock, 3112 pFile->eFileLock, pFile->pInode->eFileLock, pFile->pInode->nShared, 3113 osGetpid(0))); 3114 3115 assert( eFileLock<=SHARED_LOCK ); 3116 if( pFile->eFileLock<=eFileLock ){ 3117 return SQLITE_OK; 3118 } 3119 pInode = pFile->pInode; 3120 sqlite3_mutex_enter(pInode->pLockMutex); 3121 assert( pInode->nShared!=0 ); 3122 if( pFile->eFileLock>SHARED_LOCK ){ 3123 assert( pInode->eFileLock==pFile->eFileLock ); 3124 SimulateIOErrorBenign(1); 3125 SimulateIOError( h=(-1) ) 3126 SimulateIOErrorBenign(0); 3127 3128 #ifdef SQLITE_DEBUG 3129 /* When reducing a lock such that other processes can start 3130 ** reading the database file again, make sure that the 3131 ** transaction counter was updated if any part of the database 3132 ** file changed. If the transaction counter is not updated, 3133 ** other connections to the same file might not realize that 3134 ** the file has changed and hence might not know to flush their 3135 ** cache. The use of a stale cache can lead to database corruption. 3136 */ 3137 assert( pFile->inNormalWrite==0 3138 || pFile->dbUpdate==0 3139 || pFile->transCntrChng==1 ); 3140 pFile->inNormalWrite = 0; 3141 #endif 3142 3143 if( pFile->eFileLock==EXCLUSIVE_LOCK ){ 3144 rc = afpSetLock(context->dbPath, pFile, SHARED_FIRST, SHARED_SIZE, 0); 3145 if( rc==SQLITE_OK && (eFileLock==SHARED_LOCK || pInode->nShared>1) ){ 3146 /* only re-establish the shared lock if necessary */ 3147 int sharedLockByte = SHARED_FIRST+pInode->sharedByte; 3148 rc = afpSetLock(context->dbPath, pFile, sharedLockByte, 1, 1); 3149 } else { 3150 skipShared = 1; 3151 } 3152 } 3153 if( rc==SQLITE_OK && pFile->eFileLock>=PENDING_LOCK ){ 3154 rc = afpSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 0); 3155 } 3156 if( rc==SQLITE_OK && pFile->eFileLock>=RESERVED_LOCK && context->reserved ){ 3157 rc = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1, 0); 3158 if( !rc ){ 3159 context->reserved = 0; 3160 } 3161 } 3162 if( rc==SQLITE_OK && (eFileLock==SHARED_LOCK || pInode->nShared>1)){ 3163 pInode->eFileLock = SHARED_LOCK; 3164 } 3165 } 3166 if( rc==SQLITE_OK && eFileLock==NO_LOCK ){ 3167 3168 /* Decrement the shared lock counter. Release the lock using an 3169 ** OS call only when all threads in this same process have released 3170 ** the lock. 3171 */ 3172 unsigned long long sharedLockByte = SHARED_FIRST+pInode->sharedByte; 3173 pInode->nShared--; 3174 if( pInode->nShared==0 ){ 3175 SimulateIOErrorBenign(1); 3176 SimulateIOError( h=(-1) ) 3177 SimulateIOErrorBenign(0); 3178 if( !skipShared ){ 3179 rc = afpSetLock(context->dbPath, pFile, sharedLockByte, 1, 0); 3180 } 3181 if( !rc ){ 3182 pInode->eFileLock = NO_LOCK; 3183 pFile->eFileLock = NO_LOCK; 3184 } 3185 } 3186 if( rc==SQLITE_OK ){ 3187 pInode->nLock--; 3188 assert( pInode->nLock>=0 ); 3189 if( pInode->nLock==0 ) closePendingFds(pFile); 3190 } 3191 } 3192 3193 sqlite3_mutex_leave(pInode->pLockMutex); 3194 if( rc==SQLITE_OK ){ 3195 pFile->eFileLock = eFileLock; 3196 } 3197 return rc; 3198 } 3199 3200 /* 3201 ** Close a file & cleanup AFP specific locking context 3202 */ 3203 static int afpClose(sqlite3_file *id) { 3204 int rc = SQLITE_OK; 3205 unixFile *pFile = (unixFile*)id; 3206 assert( id!=0 ); 3207 afpUnlock(id, NO_LOCK); 3208 assert( unixFileMutexNotheld(pFile) ); 3209 unixEnterMutex(); 3210 if( pFile->pInode ){ 3211 unixInodeInfo *pInode = pFile->pInode; 3212 sqlite3_mutex_enter(pInode->pLockMutex); 3213 if( pInode->nLock ){ 3214 /* If there are outstanding locks, do not actually close the file just 3215 ** yet because that would clear those locks. Instead, add the file 3216 ** descriptor to pInode->aPending. It will be automatically closed when 3217 ** the last lock is cleared. 3218 */ 3219 setPendingFd(pFile); 3220 } 3221 sqlite3_mutex_leave(pInode->pLockMutex); 3222 } 3223 releaseInodeInfo(pFile); 3224 sqlite3_free(pFile->lockingContext); 3225 rc = closeUnixFile(id); 3226 unixLeaveMutex(); 3227 return rc; 3228 } 3229 3230 #endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ 3231 /* 3232 ** The code above is the AFP lock implementation. The code is specific 3233 ** to MacOSX and does not work on other unix platforms. No alternative 3234 ** is available. If you don't compile for a mac, then the "unix-afp" 3235 ** VFS is not available. 3236 ** 3237 ********************* End of the AFP lock implementation ********************** 3238 ******************************************************************************/ 3239 3240 /****************************************************************************** 3241 *************************** Begin NFS Locking ********************************/ 3242 3243 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 3244 /* 3245 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock 3246 ** must be either NO_LOCK or SHARED_LOCK. 3247 ** 3248 ** If the locking level of the file descriptor is already at or below 3249 ** the requested locking level, this routine is a no-op. 3250 */ 3251 static int nfsUnlock(sqlite3_file *id, int eFileLock){ 3252 return posixUnlock(id, eFileLock, 1); 3253 } 3254 3255 #endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ 3256 /* 3257 ** The code above is the NFS lock implementation. The code is specific 3258 ** to MacOSX and does not work on other unix platforms. No alternative 3259 ** is available. 3260 ** 3261 ********************* End of the NFS lock implementation ********************** 3262 ******************************************************************************/ 3263 3264 /****************************************************************************** 3265 **************** Non-locking sqlite3_file methods ***************************** 3266 ** 3267 ** The next division contains implementations for all methods of the 3268 ** sqlite3_file object other than the locking methods. The locking 3269 ** methods were defined in divisions above (one locking method per 3270 ** division). Those methods that are common to all locking modes 3271 ** are gather together into this division. 3272 */ 3273 3274 /* 3275 ** Seek to the offset passed as the second argument, then read cnt 3276 ** bytes into pBuf. Return the number of bytes actually read. 3277 ** 3278 ** NB: If you define USE_PREAD or USE_PREAD64, then it might also 3279 ** be necessary to define _XOPEN_SOURCE to be 500. This varies from 3280 ** one system to another. Since SQLite does not define USE_PREAD 3281 ** in any form by default, we will not attempt to define _XOPEN_SOURCE. 3282 ** See tickets #2741 and #2681. 3283 ** 3284 ** To avoid stomping the errno value on a failed read the lastErrno value 3285 ** is set before returning. 3286 */ 3287 static int seekAndRead(unixFile *id, sqlite3_int64 offset, void *pBuf, int cnt){ 3288 int got; 3289 int prior = 0; 3290 #if (!defined(USE_PREAD) && !defined(USE_PREAD64)) 3291 i64 newOffset; 3292 #endif 3293 TIMER_START; 3294 assert( cnt==(cnt&0x1ffff) ); 3295 assert( id->h>2 ); 3296 do{ 3297 #if defined(USE_PREAD) 3298 got = osPread(id->h, pBuf, cnt, offset); 3299 SimulateIOError( got = -1 ); 3300 #elif defined(USE_PREAD64) 3301 got = osPread64(id->h, pBuf, cnt, offset); 3302 SimulateIOError( got = -1 ); 3303 #else 3304 newOffset = lseek(id->h, offset, SEEK_SET); 3305 SimulateIOError( newOffset = -1 ); 3306 if( newOffset<0 ){ 3307 storeLastErrno((unixFile*)id, errno); 3308 return -1; 3309 } 3310 got = osRead(id->h, pBuf, cnt); 3311 #endif 3312 if( got==cnt ) break; 3313 if( got<0 ){ 3314 if( errno==EINTR ){ got = 1; continue; } 3315 prior = 0; 3316 storeLastErrno((unixFile*)id, errno); 3317 break; 3318 }else if( got>0 ){ 3319 cnt -= got; 3320 offset += got; 3321 prior += got; 3322 pBuf = (void*)(got + (char*)pBuf); 3323 } 3324 }while( got>0 ); 3325 TIMER_END; 3326 OSTRACE(("READ %-3d %5d %7lld %llu\n", 3327 id->h, got+prior, offset-prior, TIMER_ELAPSED)); 3328 return got+prior; 3329 } 3330 3331 /* 3332 ** Read data from a file into a buffer. Return SQLITE_OK if all 3333 ** bytes were read successfully and SQLITE_IOERR if anything goes 3334 ** wrong. 3335 */ 3336 static int unixRead( 3337 sqlite3_file *id, 3338 void *pBuf, 3339 int amt, 3340 sqlite3_int64 offset 3341 ){ 3342 unixFile *pFile = (unixFile *)id; 3343 int got; 3344 assert( id ); 3345 assert( offset>=0 ); 3346 assert( amt>0 ); 3347 3348 /* If this is a database file (not a journal, super-journal or temp 3349 ** file), the bytes in the locking range should never be read or written. */ 3350 #if 0 3351 assert( pFile->pPreallocatedUnused==0 3352 || offset>=PENDING_BYTE+512 3353 || offset+amt<=PENDING_BYTE 3354 ); 3355 #endif 3356 3357 #if SQLITE_MAX_MMAP_SIZE>0 3358 /* Deal with as much of this read request as possible by transfering 3359 ** data from the memory mapping using memcpy(). */ 3360 if( offset<pFile->mmapSize ){ 3361 if( offset+amt <= pFile->mmapSize ){ 3362 memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], amt); 3363 return SQLITE_OK; 3364 }else{ 3365 int nCopy = pFile->mmapSize - offset; 3366 memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], nCopy); 3367 pBuf = &((u8 *)pBuf)[nCopy]; 3368 amt -= nCopy; 3369 offset += nCopy; 3370 } 3371 } 3372 #endif 3373 3374 got = seekAndRead(pFile, offset, pBuf, amt); 3375 if( got==amt ){ 3376 return SQLITE_OK; 3377 }else if( got<0 ){ 3378 /* pFile->lastErrno has been set by seekAndRead(). 3379 ** Usually we return SQLITE_IOERR_READ here, though for some 3380 ** kinds of errors we return SQLITE_IOERR_CORRUPTFS. The 3381 ** SQLITE_IOERR_CORRUPTFS will be converted into SQLITE_CORRUPT 3382 ** prior to returning to the application by the sqlite3ApiExit() 3383 ** routine. 3384 */ 3385 switch( pFile->lastErrno ){ 3386 case ERANGE: 3387 case EIO: 3388 #ifdef ENXIO 3389 case ENXIO: 3390 #endif 3391 #ifdef EDEVERR 3392 case EDEVERR: 3393 #endif 3394 return SQLITE_IOERR_CORRUPTFS; 3395 } 3396 return SQLITE_IOERR_READ; 3397 }else{ 3398 storeLastErrno(pFile, 0); /* not a system error */ 3399 /* Unread parts of the buffer must be zero-filled */ 3400 memset(&((char*)pBuf)[got], 0, amt-got); 3401 return SQLITE_IOERR_SHORT_READ; 3402 } 3403 } 3404 3405 /* 3406 ** Attempt to seek the file-descriptor passed as the first argument to 3407 ** absolute offset iOff, then attempt to write nBuf bytes of data from 3408 ** pBuf to it. If an error occurs, return -1 and set *piErrno. Otherwise, 3409 ** return the actual number of bytes written (which may be less than 3410 ** nBuf). 3411 */ 3412 static int seekAndWriteFd( 3413 int fd, /* File descriptor to write to */ 3414 i64 iOff, /* File offset to begin writing at */ 3415 const void *pBuf, /* Copy data from this buffer to the file */ 3416 int nBuf, /* Size of buffer pBuf in bytes */ 3417 int *piErrno /* OUT: Error number if error occurs */ 3418 ){ 3419 int rc = 0; /* Value returned by system call */ 3420 3421 assert( nBuf==(nBuf&0x1ffff) ); 3422 assert( fd>2 ); 3423 assert( piErrno!=0 ); 3424 nBuf &= 0x1ffff; 3425 TIMER_START; 3426 3427 #if defined(USE_PREAD) 3428 do{ rc = (int)osPwrite(fd, pBuf, nBuf, iOff); }while( rc<0 && errno==EINTR ); 3429 #elif defined(USE_PREAD64) 3430 do{ rc = (int)osPwrite64(fd, pBuf, nBuf, iOff);}while( rc<0 && errno==EINTR); 3431 #else 3432 do{ 3433 i64 iSeek = lseek(fd, iOff, SEEK_SET); 3434 SimulateIOError( iSeek = -1 ); 3435 if( iSeek<0 ){ 3436 rc = -1; 3437 break; 3438 } 3439 rc = osWrite(fd, pBuf, nBuf); 3440 }while( rc<0 && errno==EINTR ); 3441 #endif 3442 3443 TIMER_END; 3444 OSTRACE(("WRITE %-3d %5d %7lld %llu\n", fd, rc, iOff, TIMER_ELAPSED)); 3445 3446 if( rc<0 ) *piErrno = errno; 3447 return rc; 3448 } 3449 3450 3451 /* 3452 ** Seek to the offset in id->offset then read cnt bytes into pBuf. 3453 ** Return the number of bytes actually read. Update the offset. 3454 ** 3455 ** To avoid stomping the errno value on a failed write the lastErrno value 3456 ** is set before returning. 3457 */ 3458 static int seekAndWrite(unixFile *id, i64 offset, const void *pBuf, int cnt){ 3459 return seekAndWriteFd(id->h, offset, pBuf, cnt, &id->lastErrno); 3460 } 3461 3462 3463 /* 3464 ** Write data from a buffer into a file. Return SQLITE_OK on success 3465 ** or some other error code on failure. 3466 */ 3467 static int unixWrite( 3468 sqlite3_file *id, 3469 const void *pBuf, 3470 int amt, 3471 sqlite3_int64 offset 3472 ){ 3473 unixFile *pFile = (unixFile*)id; 3474 int wrote = 0; 3475 assert( id ); 3476 assert( amt>0 ); 3477 3478 /* If this is a database file (not a journal, super-journal or temp 3479 ** file), the bytes in the locking range should never be read or written. */ 3480 #if 0 3481 assert( pFile->pPreallocatedUnused==0 3482 || offset>=PENDING_BYTE+512 3483 || offset+amt<=PENDING_BYTE 3484 ); 3485 #endif 3486 3487 #ifdef SQLITE_DEBUG 3488 /* If we are doing a normal write to a database file (as opposed to 3489 ** doing a hot-journal rollback or a write to some file other than a 3490 ** normal database file) then record the fact that the database 3491 ** has changed. If the transaction counter is modified, record that 3492 ** fact too. 3493 */ 3494 if( pFile->inNormalWrite ){ 3495 pFile->dbUpdate = 1; /* The database has been modified */ 3496 if( offset<=24 && offset+amt>=27 ){ 3497 int rc; 3498 char oldCntr[4]; 3499 SimulateIOErrorBenign(1); 3500 rc = seekAndRead(pFile, 24, oldCntr, 4); 3501 SimulateIOErrorBenign(0); 3502 if( rc!=4 || memcmp(oldCntr, &((char*)pBuf)[24-offset], 4)!=0 ){ 3503 pFile->transCntrChng = 1; /* The transaction counter has changed */ 3504 } 3505 } 3506 } 3507 #endif 3508 3509 #if defined(SQLITE_MMAP_READWRITE) && SQLITE_MAX_MMAP_SIZE>0 3510 /* Deal with as much of this write request as possible by transfering 3511 ** data from the memory mapping using memcpy(). */ 3512 if( offset<pFile->mmapSize ){ 3513 if( offset+amt <= pFile->mmapSize ){ 3514 memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, amt); 3515 return SQLITE_OK; 3516 }else{ 3517 int nCopy = pFile->mmapSize - offset; 3518 memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, nCopy); 3519 pBuf = &((u8 *)pBuf)[nCopy]; 3520 amt -= nCopy; 3521 offset += nCopy; 3522 } 3523 } 3524 #endif 3525 3526 while( (wrote = seekAndWrite(pFile, offset, pBuf, amt))<amt && wrote>0 ){ 3527 amt -= wrote; 3528 offset += wrote; 3529 pBuf = &((char*)pBuf)[wrote]; 3530 } 3531 SimulateIOError(( wrote=(-1), amt=1 )); 3532 SimulateDiskfullError(( wrote=0, amt=1 )); 3533 3534 if( amt>wrote ){ 3535 if( wrote<0 && pFile->lastErrno!=ENOSPC ){ 3536 /* lastErrno set by seekAndWrite */ 3537 return SQLITE_IOERR_WRITE; 3538 }else{ 3539 storeLastErrno(pFile, 0); /* not a system error */ 3540 return SQLITE_FULL; 3541 } 3542 } 3543 3544 return SQLITE_OK; 3545 } 3546 3547 #ifdef SQLITE_TEST 3548 /* 3549 ** Count the number of fullsyncs and normal syncs. This is used to test 3550 ** that syncs and fullsyncs are occurring at the right times. 3551 */ 3552 int sqlite3_sync_count = 0; 3553 int sqlite3_fullsync_count = 0; 3554 #endif 3555 3556 /* 3557 ** We do not trust systems to provide a working fdatasync(). Some do. 3558 ** Others do no. To be safe, we will stick with the (slightly slower) 3559 ** fsync(). If you know that your system does support fdatasync() correctly, 3560 ** then simply compile with -Dfdatasync=fdatasync or -DHAVE_FDATASYNC 3561 */ 3562 #if !defined(fdatasync) && !HAVE_FDATASYNC 3563 # define fdatasync fsync 3564 #endif 3565 3566 /* 3567 ** Define HAVE_FULLFSYNC to 0 or 1 depending on whether or not 3568 ** the F_FULLFSYNC macro is defined. F_FULLFSYNC is currently 3569 ** only available on Mac OS X. But that could change. 3570 */ 3571 #ifdef F_FULLFSYNC 3572 # define HAVE_FULLFSYNC 1 3573 #else 3574 # define HAVE_FULLFSYNC 0 3575 #endif 3576 3577 3578 /* 3579 ** The fsync() system call does not work as advertised on many 3580 ** unix systems. The following procedure is an attempt to make 3581 ** it work better. 3582 ** 3583 ** The SQLITE_NO_SYNC macro disables all fsync()s. This is useful 3584 ** for testing when we want to run through the test suite quickly. 3585 ** You are strongly advised *not* to deploy with SQLITE_NO_SYNC 3586 ** enabled, however, since with SQLITE_NO_SYNC enabled, an OS crash 3587 ** or power failure will likely corrupt the database file. 3588 ** 3589 ** SQLite sets the dataOnly flag if the size of the file is unchanged. 3590 ** The idea behind dataOnly is that it should only write the file content 3591 ** to disk, not the inode. We only set dataOnly if the file size is 3592 ** unchanged since the file size is part of the inode. However, 3593 ** Ted Ts'o tells us that fdatasync() will also write the inode if the 3594 ** file size has changed. The only real difference between fdatasync() 3595 ** and fsync(), Ted tells us, is that fdatasync() will not flush the 3596 ** inode if the mtime or owner or other inode attributes have changed. 3597 ** We only care about the file size, not the other file attributes, so 3598 ** as far as SQLite is concerned, an fdatasync() is always adequate. 3599 ** So, we always use fdatasync() if it is available, regardless of 3600 ** the value of the dataOnly flag. 3601 */ 3602 static int full_fsync(int fd, int fullSync, int dataOnly){ 3603 int rc; 3604 3605 /* The following "ifdef/elif/else/" block has the same structure as 3606 ** the one below. It is replicated here solely to avoid cluttering 3607 ** up the real code with the UNUSED_PARAMETER() macros. 3608 */ 3609 #ifdef SQLITE_NO_SYNC 3610 UNUSED_PARAMETER(fd); 3611 UNUSED_PARAMETER(fullSync); 3612 UNUSED_PARAMETER(dataOnly); 3613 #elif HAVE_FULLFSYNC 3614 UNUSED_PARAMETER(dataOnly); 3615 #else 3616 UNUSED_PARAMETER(fullSync); 3617 UNUSED_PARAMETER(dataOnly); 3618 #endif 3619 3620 /* Record the number of times that we do a normal fsync() and 3621 ** FULLSYNC. This is used during testing to verify that this procedure 3622 ** gets called with the correct arguments. 3623 */ 3624 #ifdef SQLITE_TEST 3625 if( fullSync ) sqlite3_fullsync_count++; 3626 sqlite3_sync_count++; 3627 #endif 3628 3629 /* If we compiled with the SQLITE_NO_SYNC flag, then syncing is a 3630 ** no-op. But go ahead and call fstat() to validate the file 3631 ** descriptor as we need a method to provoke a failure during 3632 ** coverate testing. 3633 */ 3634 #ifdef SQLITE_NO_SYNC 3635 { 3636 struct stat buf; 3637 rc = osFstat(fd, &buf); 3638 } 3639 #elif HAVE_FULLFSYNC 3640 if( fullSync ){ 3641 rc = osFcntl(fd, F_FULLFSYNC, 0); 3642 }else{ 3643 rc = 1; 3644 } 3645 /* If the FULLFSYNC failed, fall back to attempting an fsync(). 3646 ** It shouldn't be possible for fullfsync to fail on the local 3647 ** file system (on OSX), so failure indicates that FULLFSYNC 3648 ** isn't supported for this file system. So, attempt an fsync 3649 ** and (for now) ignore the overhead of a superfluous fcntl call. 3650 ** It'd be better to detect fullfsync support once and avoid 3651 ** the fcntl call every time sync is called. 3652 */ 3653 if( rc ) rc = fsync(fd); 3654 3655 #elif defined(__APPLE__) 3656 /* fdatasync() on HFS+ doesn't yet flush the file size if it changed correctly 3657 ** so currently we default to the macro that redefines fdatasync to fsync 3658 */ 3659 rc = fsync(fd); 3660 #else 3661 rc = fdatasync(fd); 3662 #if OS_VXWORKS 3663 if( rc==-1 && errno==ENOTSUP ){ 3664 rc = fsync(fd); 3665 } 3666 #endif /* OS_VXWORKS */ 3667 #endif /* ifdef SQLITE_NO_SYNC elif HAVE_FULLFSYNC */ 3668 3669 if( OS_VXWORKS && rc!= -1 ){ 3670 rc = 0; 3671 } 3672 return rc; 3673 } 3674 3675 /* 3676 ** Open a file descriptor to the directory containing file zFilename. 3677 ** If successful, *pFd is set to the opened file descriptor and 3678 ** SQLITE_OK is returned. If an error occurs, either SQLITE_NOMEM 3679 ** or SQLITE_CANTOPEN is returned and *pFd is set to an undefined 3680 ** value. 3681 ** 3682 ** The directory file descriptor is used for only one thing - to 3683 ** fsync() a directory to make sure file creation and deletion events 3684 ** are flushed to disk. Such fsyncs are not needed on newer 3685 ** journaling filesystems, but are required on older filesystems. 3686 ** 3687 ** This routine can be overridden using the xSetSysCall interface. 3688 ** The ability to override this routine was added in support of the 3689 ** chromium sandbox. Opening a directory is a security risk (we are 3690 ** told) so making it overrideable allows the chromium sandbox to 3691 ** replace this routine with a harmless no-op. To make this routine 3692 ** a no-op, replace it with a stub that returns SQLITE_OK but leaves 3693 ** *pFd set to a negative number. 3694 ** 3695 ** If SQLITE_OK is returned, the caller is responsible for closing 3696 ** the file descriptor *pFd using close(). 3697 */ 3698 static int openDirectory(const char *zFilename, int *pFd){ 3699 int ii; 3700 int fd = -1; 3701 char zDirname[MAX_PATHNAME+1]; 3702 3703 sqlite3_snprintf(MAX_PATHNAME, zDirname, "%s", zFilename); 3704 for(ii=(int)strlen(zDirname); ii>0 && zDirname[ii]!='/'; ii--); 3705 if( ii>0 ){ 3706 zDirname[ii] = '\0'; 3707 }else{ 3708 if( zDirname[0]!='/' ) zDirname[0] = '.'; 3709 zDirname[1] = 0; 3710 } 3711 fd = robust_open(zDirname, O_RDONLY|O_BINARY, 0); 3712 if( fd>=0 ){ 3713 OSTRACE(("OPENDIR %-3d %s\n", fd, zDirname)); 3714 } 3715 *pFd = fd; 3716 if( fd>=0 ) return SQLITE_OK; 3717 return unixLogError(SQLITE_CANTOPEN_BKPT, "openDirectory", zDirname); 3718 } 3719 3720 /* 3721 ** Make sure all writes to a particular file are committed to disk. 3722 ** 3723 ** If dataOnly==0 then both the file itself and its metadata (file 3724 ** size, access time, etc) are synced. If dataOnly!=0 then only the 3725 ** file data is synced. 3726 ** 3727 ** Under Unix, also make sure that the directory entry for the file 3728 ** has been created by fsync-ing the directory that contains the file. 3729 ** If we do not do this and we encounter a power failure, the directory 3730 ** entry for the journal might not exist after we reboot. The next 3731 ** SQLite to access the file will not know that the journal exists (because 3732 ** the directory entry for the journal was never created) and the transaction 3733 ** will not roll back - possibly leading to database corruption. 3734 */ 3735 static int unixSync(sqlite3_file *id, int flags){ 3736 int rc; 3737 unixFile *pFile = (unixFile*)id; 3738 3739 int isDataOnly = (flags&SQLITE_SYNC_DATAONLY); 3740 int isFullsync = (flags&0x0F)==SQLITE_SYNC_FULL; 3741 3742 /* Check that one of SQLITE_SYNC_NORMAL or FULL was passed */ 3743 assert((flags&0x0F)==SQLITE_SYNC_NORMAL 3744 || (flags&0x0F)==SQLITE_SYNC_FULL 3745 ); 3746 3747 /* Unix cannot, but some systems may return SQLITE_FULL from here. This 3748 ** line is to test that doing so does not cause any problems. 3749 */ 3750 SimulateDiskfullError( return SQLITE_FULL ); 3751 3752 assert( pFile ); 3753 OSTRACE(("SYNC %-3d\n", pFile->h)); 3754 rc = full_fsync(pFile->h, isFullsync, isDataOnly); 3755 SimulateIOError( rc=1 ); 3756 if( rc ){ 3757 storeLastErrno(pFile, errno); 3758 return unixLogError(SQLITE_IOERR_FSYNC, "full_fsync", pFile->zPath); 3759 } 3760 3761 /* Also fsync the directory containing the file if the DIRSYNC flag 3762 ** is set. This is a one-time occurrence. Many systems (examples: AIX) 3763 ** are unable to fsync a directory, so ignore errors on the fsync. 3764 */ 3765 if( pFile->ctrlFlags & UNIXFILE_DIRSYNC ){ 3766 int dirfd; 3767 OSTRACE(("DIRSYNC %s (have_fullfsync=%d fullsync=%d)\n", pFile->zPath, 3768 HAVE_FULLFSYNC, isFullsync)); 3769 rc = osOpenDirectory(pFile->zPath, &dirfd); 3770 if( rc==SQLITE_OK ){ 3771 full_fsync(dirfd, 0, 0); 3772 robust_close(pFile, dirfd, __LINE__); 3773 }else{ 3774 assert( rc==SQLITE_CANTOPEN ); 3775 rc = SQLITE_OK; 3776 } 3777 pFile->ctrlFlags &= ~UNIXFILE_DIRSYNC; 3778 } 3779 return rc; 3780 } 3781 3782 /* 3783 ** Truncate an open file to a specified size 3784 */ 3785 static int unixTruncate(sqlite3_file *id, i64 nByte){ 3786 unixFile *pFile = (unixFile *)id; 3787 int rc; 3788 assert( pFile ); 3789 SimulateIOError( return SQLITE_IOERR_TRUNCATE ); 3790 3791 /* If the user has configured a chunk-size for this file, truncate the 3792 ** file so that it consists of an integer number of chunks (i.e. the 3793 ** actual file size after the operation may be larger than the requested 3794 ** size). 3795 */ 3796 if( pFile->szChunk>0 ){ 3797 nByte = ((nByte + pFile->szChunk - 1)/pFile->szChunk) * pFile->szChunk; 3798 } 3799 3800 rc = robust_ftruncate(pFile->h, nByte); 3801 if( rc ){ 3802 storeLastErrno(pFile, errno); 3803 return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate", pFile->zPath); 3804 }else{ 3805 #ifdef SQLITE_DEBUG 3806 /* If we are doing a normal write to a database file (as opposed to 3807 ** doing a hot-journal rollback or a write to some file other than a 3808 ** normal database file) and we truncate the file to zero length, 3809 ** that effectively updates the change counter. This might happen 3810 ** when restoring a database using the backup API from a zero-length 3811 ** source. 3812 */ 3813 if( pFile->inNormalWrite && nByte==0 ){ 3814 pFile->transCntrChng = 1; 3815 } 3816 #endif 3817 3818 #if SQLITE_MAX_MMAP_SIZE>0 3819 /* If the file was just truncated to a size smaller than the currently 3820 ** mapped region, reduce the effective mapping size as well. SQLite will 3821 ** use read() and write() to access data beyond this point from now on. 3822 */ 3823 if( nByte<pFile->mmapSize ){ 3824 pFile->mmapSize = nByte; 3825 } 3826 #endif 3827 3828 return SQLITE_OK; 3829 } 3830 } 3831 3832 /* 3833 ** Determine the current size of a file in bytes 3834 */ 3835 static int unixFileSize(sqlite3_file *id, i64 *pSize){ 3836 int rc; 3837 struct stat buf; 3838 assert( id ); 3839 rc = osFstat(((unixFile*)id)->h, &buf); 3840 SimulateIOError( rc=1 ); 3841 if( rc!=0 ){ 3842 storeLastErrno((unixFile*)id, errno); 3843 return SQLITE_IOERR_FSTAT; 3844 } 3845 *pSize = buf.st_size; 3846 3847 /* When opening a zero-size database, the findInodeInfo() procedure 3848 ** writes a single byte into that file in order to work around a bug 3849 ** in the OS-X msdos filesystem. In order to avoid problems with upper 3850 ** layers, we need to report this file size as zero even though it is 3851 ** really 1. Ticket #3260. 3852 */ 3853 if( *pSize==1 ) *pSize = 0; 3854 3855 3856 return SQLITE_OK; 3857 } 3858 3859 #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) 3860 /* 3861 ** Handler for proxy-locking file-control verbs. Defined below in the 3862 ** proxying locking division. 3863 */ 3864 static int proxyFileControl(sqlite3_file*,int,void*); 3865 #endif 3866 3867 /* 3868 ** This function is called to handle the SQLITE_FCNTL_SIZE_HINT 3869 ** file-control operation. Enlarge the database to nBytes in size 3870 ** (rounded up to the next chunk-size). If the database is already 3871 ** nBytes or larger, this routine is a no-op. 3872 */ 3873 static int fcntlSizeHint(unixFile *pFile, i64 nByte){ 3874 if( pFile->szChunk>0 ){ 3875 i64 nSize; /* Required file size */ 3876 struct stat buf; /* Used to hold return values of fstat() */ 3877 3878 if( osFstat(pFile->h, &buf) ){ 3879 return SQLITE_IOERR_FSTAT; 3880 } 3881 3882 nSize = ((nByte+pFile->szChunk-1) / pFile->szChunk) * pFile->szChunk; 3883 if( nSize>(i64)buf.st_size ){ 3884 3885 #if defined(HAVE_POSIX_FALLOCATE) && HAVE_POSIX_FALLOCATE 3886 /* The code below is handling the return value of osFallocate() 3887 ** correctly. posix_fallocate() is defined to "returns zero on success, 3888 ** or an error number on failure". See the manpage for details. */ 3889 int err; 3890 do{ 3891 err = osFallocate(pFile->h, buf.st_size, nSize-buf.st_size); 3892 }while( err==EINTR ); 3893 if( err && err!=EINVAL ) return SQLITE_IOERR_WRITE; 3894 #else 3895 /* If the OS does not have posix_fallocate(), fake it. Write a 3896 ** single byte to the last byte in each block that falls entirely 3897 ** within the extended region. Then, if required, a single byte 3898 ** at offset (nSize-1), to set the size of the file correctly. 3899 ** This is a similar technique to that used by glibc on systems 3900 ** that do not have a real fallocate() call. 3901 */ 3902 int nBlk = buf.st_blksize; /* File-system block size */ 3903 int nWrite = 0; /* Number of bytes written by seekAndWrite */ 3904 i64 iWrite; /* Next offset to write to */ 3905 3906 iWrite = (buf.st_size/nBlk)*nBlk + nBlk - 1; 3907 assert( iWrite>=buf.st_size ); 3908 assert( ((iWrite+1)%nBlk)==0 ); 3909 for(/*no-op*/; iWrite<nSize+nBlk-1; iWrite+=nBlk ){ 3910 if( iWrite>=nSize ) iWrite = nSize - 1; 3911 nWrite = seekAndWrite(pFile, iWrite, "", 1); 3912 if( nWrite!=1 ) return SQLITE_IOERR_WRITE; 3913 } 3914 #endif 3915 } 3916 } 3917 3918 #if SQLITE_MAX_MMAP_SIZE>0 3919 if( pFile->mmapSizeMax>0 && nByte>pFile->mmapSize ){ 3920 int rc; 3921 if( pFile->szChunk<=0 ){ 3922 if( robust_ftruncate(pFile->h, nByte) ){ 3923 storeLastErrno(pFile, errno); 3924 return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate", pFile->zPath); 3925 } 3926 } 3927 3928 rc = unixMapfile(pFile, nByte); 3929 return rc; 3930 } 3931 #endif 3932 3933 return SQLITE_OK; 3934 } 3935 3936 /* 3937 ** If *pArg is initially negative then this is a query. Set *pArg to 3938 ** 1 or 0 depending on whether or not bit mask of pFile->ctrlFlags is set. 3939 ** 3940 ** If *pArg is 0 or 1, then clear or set the mask bit of pFile->ctrlFlags. 3941 */ 3942 static void unixModeBit(unixFile *pFile, unsigned char mask, int *pArg){ 3943 if( *pArg<0 ){ 3944 *pArg = (pFile->ctrlFlags & mask)!=0; 3945 }else if( (*pArg)==0 ){ 3946 pFile->ctrlFlags &= ~mask; 3947 }else{ 3948 pFile->ctrlFlags |= mask; 3949 } 3950 } 3951 3952 /* Forward declaration */ 3953 static int unixGetTempname(int nBuf, char *zBuf); 3954 #ifndef SQLITE_OMIT_WAL 3955 static int unixFcntlExternalReader(unixFile*, int*); 3956 #endif 3957 3958 /* 3959 ** Information and control of an open file handle. 3960 */ 3961 static int unixFileControl(sqlite3_file *id, int op, void *pArg){ 3962 unixFile *pFile = (unixFile*)id; 3963 switch( op ){ 3964 #if defined(__linux__) && defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE) 3965 case SQLITE_FCNTL_BEGIN_ATOMIC_WRITE: { 3966 int rc = osIoctl(pFile->h, F2FS_IOC_START_ATOMIC_WRITE); 3967 return rc ? SQLITE_IOERR_BEGIN_ATOMIC : SQLITE_OK; 3968 } 3969 case SQLITE_FCNTL_COMMIT_ATOMIC_WRITE: { 3970 int rc = osIoctl(pFile->h, F2FS_IOC_COMMIT_ATOMIC_WRITE); 3971 return rc ? SQLITE_IOERR_COMMIT_ATOMIC : SQLITE_OK; 3972 } 3973 case SQLITE_FCNTL_ROLLBACK_ATOMIC_WRITE: { 3974 int rc = osIoctl(pFile->h, F2FS_IOC_ABORT_VOLATILE_WRITE); 3975 return rc ? SQLITE_IOERR_ROLLBACK_ATOMIC : SQLITE_OK; 3976 } 3977 #endif /* __linux__ && SQLITE_ENABLE_BATCH_ATOMIC_WRITE */ 3978 3979 case SQLITE_FCNTL_LOCKSTATE: { 3980 *(int*)pArg = pFile->eFileLock; 3981 return SQLITE_OK; 3982 } 3983 case SQLITE_FCNTL_LAST_ERRNO: { 3984 *(int*)pArg = pFile->lastErrno; 3985 return SQLITE_OK; 3986 } 3987 case SQLITE_FCNTL_CHUNK_SIZE: { 3988 pFile->szChunk = *(int *)pArg; 3989 return SQLITE_OK; 3990 } 3991 case SQLITE_FCNTL_SIZE_HINT: { 3992 int rc; 3993 SimulateIOErrorBenign(1); 3994 rc = fcntlSizeHint(pFile, *(i64 *)pArg); 3995 SimulateIOErrorBenign(0); 3996 return rc; 3997 } 3998 case SQLITE_FCNTL_PERSIST_WAL: { 3999 unixModeBit(pFile, UNIXFILE_PERSIST_WAL, (int*)pArg); 4000 return SQLITE_OK; 4001 } 4002 case SQLITE_FCNTL_POWERSAFE_OVERWRITE: { 4003 unixModeBit(pFile, UNIXFILE_PSOW, (int*)pArg); 4004 return SQLITE_OK; 4005 } 4006 case SQLITE_FCNTL_VFSNAME: { 4007 *(char**)pArg = sqlite3_mprintf("%s", pFile->pVfs->zName); 4008 return SQLITE_OK; 4009 } 4010 case SQLITE_FCNTL_TEMPFILENAME: { 4011 char *zTFile = sqlite3_malloc64( pFile->pVfs->mxPathname ); 4012 if( zTFile ){ 4013 unixGetTempname(pFile->pVfs->mxPathname, zTFile); 4014 *(char**)pArg = zTFile; 4015 } 4016 return SQLITE_OK; 4017 } 4018 case SQLITE_FCNTL_HAS_MOVED: { 4019 *(int*)pArg = fileHasMoved(pFile); 4020 return SQLITE_OK; 4021 } 4022 #ifdef SQLITE_ENABLE_SETLK_TIMEOUT 4023 case SQLITE_FCNTL_LOCK_TIMEOUT: { 4024 int iOld = pFile->iBusyTimeout; 4025 pFile->iBusyTimeout = *(int*)pArg; 4026 *(int*)pArg = iOld; 4027 return SQLITE_OK; 4028 } 4029 #endif 4030 #if SQLITE_MAX_MMAP_SIZE>0 4031 case SQLITE_FCNTL_MMAP_SIZE: { 4032 i64 newLimit = *(i64*)pArg; 4033 int rc = SQLITE_OK; 4034 if( newLimit>sqlite3GlobalConfig.mxMmap ){ 4035 newLimit = sqlite3GlobalConfig.mxMmap; 4036 } 4037 4038 /* The value of newLimit may be eventually cast to (size_t) and passed 4039 ** to mmap(). Restrict its value to 2GB if (size_t) is not at least a 4040 ** 64-bit type. */ 4041 if( newLimit>0 && sizeof(size_t)<8 ){ 4042 newLimit = (newLimit & 0x7FFFFFFF); 4043 } 4044 4045 *(i64*)pArg = pFile->mmapSizeMax; 4046 if( newLimit>=0 && newLimit!=pFile->mmapSizeMax && pFile->nFetchOut==0 ){ 4047 pFile->mmapSizeMax = newLimit; 4048 if( pFile->mmapSize>0 ){ 4049 unixUnmapfile(pFile); 4050 rc = unixMapfile(pFile, -1); 4051 } 4052 } 4053 return rc; 4054 } 4055 #endif 4056 #ifdef SQLITE_DEBUG 4057 /* The pager calls this method to signal that it has done 4058 ** a rollback and that the database is therefore unchanged and 4059 ** it hence it is OK for the transaction change counter to be 4060 ** unchanged. 4061 */ 4062 case SQLITE_FCNTL_DB_UNCHANGED: { 4063 ((unixFile*)id)->dbUpdate = 0; 4064 return SQLITE_OK; 4065 } 4066 #endif 4067 #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) 4068 case SQLITE_FCNTL_SET_LOCKPROXYFILE: 4069 case SQLITE_FCNTL_GET_LOCKPROXYFILE: { 4070 return proxyFileControl(id,op,pArg); 4071 } 4072 #endif /* SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) */ 4073 4074 case SQLITE_FCNTL_EXTERNAL_READER: { 4075 #ifndef SQLITE_OMIT_WAL 4076 return unixFcntlExternalReader((unixFile*)id, (int*)pArg); 4077 #else 4078 *(int*)pArg = 0; 4079 return SQLITE_OK; 4080 #endif 4081 } 4082 } 4083 return SQLITE_NOTFOUND; 4084 } 4085 4086 /* 4087 ** If pFd->sectorSize is non-zero when this function is called, it is a 4088 ** no-op. Otherwise, the values of pFd->sectorSize and 4089 ** pFd->deviceCharacteristics are set according to the file-system 4090 ** characteristics. 4091 ** 4092 ** There are two versions of this function. One for QNX and one for all 4093 ** other systems. 4094 */ 4095 #ifndef __QNXNTO__ 4096 static void setDeviceCharacteristics(unixFile *pFd){ 4097 assert( pFd->deviceCharacteristics==0 || pFd->sectorSize!=0 ); 4098 if( pFd->sectorSize==0 ){ 4099 #if defined(__linux__) && defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE) 4100 int res; 4101 u32 f = 0; 4102 4103 /* Check for support for F2FS atomic batch writes. */ 4104 res = osIoctl(pFd->h, F2FS_IOC_GET_FEATURES, &f); 4105 if( res==0 && (f & F2FS_FEATURE_ATOMIC_WRITE) ){ 4106 pFd->deviceCharacteristics = SQLITE_IOCAP_BATCH_ATOMIC; 4107 } 4108 #endif /* __linux__ && SQLITE_ENABLE_BATCH_ATOMIC_WRITE */ 4109 4110 /* Set the POWERSAFE_OVERWRITE flag if requested. */ 4111 if( pFd->ctrlFlags & UNIXFILE_PSOW ){ 4112 pFd->deviceCharacteristics |= SQLITE_IOCAP_POWERSAFE_OVERWRITE; 4113 } 4114 4115 pFd->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE; 4116 } 4117 } 4118 #else 4119 #include <sys/dcmd_blk.h> 4120 #include <sys/statvfs.h> 4121 static void setDeviceCharacteristics(unixFile *pFile){ 4122 if( pFile->sectorSize == 0 ){ 4123 struct statvfs fsInfo; 4124 4125 /* Set defaults for non-supported filesystems */ 4126 pFile->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE; 4127 pFile->deviceCharacteristics = 0; 4128 if( fstatvfs(pFile->h, &fsInfo) == -1 ) { 4129 return; 4130 } 4131 4132 if( !strcmp(fsInfo.f_basetype, "tmp") ) { 4133 pFile->sectorSize = fsInfo.f_bsize; 4134 pFile->deviceCharacteristics = 4135 SQLITE_IOCAP_ATOMIC4K | /* All ram filesystem writes are atomic */ 4136 SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until 4137 ** the write succeeds */ 4138 SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind 4139 ** so it is ordered */ 4140 0; 4141 }else if( strstr(fsInfo.f_basetype, "etfs") ){ 4142 pFile->sectorSize = fsInfo.f_bsize; 4143 pFile->deviceCharacteristics = 4144 /* etfs cluster size writes are atomic */ 4145 (pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) | 4146 SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until 4147 ** the write succeeds */ 4148 SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind 4149 ** so it is ordered */ 4150 0; 4151 }else if( !strcmp(fsInfo.f_basetype, "qnx6") ){ 4152 pFile->sectorSize = fsInfo.f_bsize; 4153 pFile->deviceCharacteristics = 4154 SQLITE_IOCAP_ATOMIC | /* All filesystem writes are atomic */ 4155 SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until 4156 ** the write succeeds */ 4157 SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind 4158 ** so it is ordered */ 4159 0; 4160 }else if( !strcmp(fsInfo.f_basetype, "qnx4") ){ 4161 pFile->sectorSize = fsInfo.f_bsize; 4162 pFile->deviceCharacteristics = 4163 /* full bitset of atomics from max sector size and smaller */ 4164 ((pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) << 1) - 2 | 4165 SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind 4166 ** so it is ordered */ 4167 0; 4168 }else if( strstr(fsInfo.f_basetype, "dos") ){ 4169 pFile->sectorSize = fsInfo.f_bsize; 4170 pFile->deviceCharacteristics = 4171 /* full bitset of atomics from max sector size and smaller */ 4172 ((pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) << 1) - 2 | 4173 SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind 4174 ** so it is ordered */ 4175 0; 4176 }else{ 4177 pFile->deviceCharacteristics = 4178 SQLITE_IOCAP_ATOMIC512 | /* blocks are atomic */ 4179 SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until 4180 ** the write succeeds */ 4181 0; 4182 } 4183 } 4184 /* Last chance verification. If the sector size isn't a multiple of 512 4185 ** then it isn't valid.*/ 4186 if( pFile->sectorSize % 512 != 0 ){ 4187 pFile->deviceCharacteristics = 0; 4188 pFile->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE; 4189 } 4190 } 4191 #endif 4192 4193 /* 4194 ** Return the sector size in bytes of the underlying block device for 4195 ** the specified file. This is almost always 512 bytes, but may be 4196 ** larger for some devices. 4197 ** 4198 ** SQLite code assumes this function cannot fail. It also assumes that 4199 ** if two files are created in the same file-system directory (i.e. 4200 ** a database and its journal file) that the sector size will be the 4201 ** same for both. 4202 */ 4203 static int unixSectorSize(sqlite3_file *id){ 4204 unixFile *pFd = (unixFile*)id; 4205 setDeviceCharacteristics(pFd); 4206 return pFd->sectorSize; 4207 } 4208 4209 /* 4210 ** Return the device characteristics for the file. 4211 ** 4212 ** This VFS is set up to return SQLITE_IOCAP_POWERSAFE_OVERWRITE by default. 4213 ** However, that choice is controversial since technically the underlying 4214 ** file system does not always provide powersafe overwrites. (In other 4215 ** words, after a power-loss event, parts of the file that were never 4216 ** written might end up being altered.) However, non-PSOW behavior is very, 4217 ** very rare. And asserting PSOW makes a large reduction in the amount 4218 ** of required I/O for journaling, since a lot of padding is eliminated. 4219 ** Hence, while POWERSAFE_OVERWRITE is on by default, there is a file-control 4220 ** available to turn it off and URI query parameter available to turn it off. 4221 */ 4222 static int unixDeviceCharacteristics(sqlite3_file *id){ 4223 unixFile *pFd = (unixFile*)id; 4224 setDeviceCharacteristics(pFd); 4225 return pFd->deviceCharacteristics; 4226 } 4227 4228 #if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 4229 4230 /* 4231 ** Return the system page size. 4232 ** 4233 ** This function should not be called directly by other code in this file. 4234 ** Instead, it should be called via macro osGetpagesize(). 4235 */ 4236 static int unixGetpagesize(void){ 4237 #if OS_VXWORKS 4238 return 1024; 4239 #elif defined(_BSD_SOURCE) 4240 return getpagesize(); 4241 #else 4242 return (int)sysconf(_SC_PAGESIZE); 4243 #endif 4244 } 4245 4246 #endif /* !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 */ 4247 4248 #ifndef SQLITE_OMIT_WAL 4249 4250 /* 4251 ** Object used to represent an shared memory buffer. 4252 ** 4253 ** When multiple threads all reference the same wal-index, each thread 4254 ** has its own unixShm object, but they all point to a single instance 4255 ** of this unixShmNode object. In other words, each wal-index is opened 4256 ** only once per process. 4257 ** 4258 ** Each unixShmNode object is connected to a single unixInodeInfo object. 4259 ** We could coalesce this object into unixInodeInfo, but that would mean 4260 ** every open file that does not use shared memory (in other words, most 4261 ** open files) would have to carry around this extra information. So 4262 ** the unixInodeInfo object contains a pointer to this unixShmNode object 4263 ** and the unixShmNode object is created only when needed. 4264 ** 4265 ** unixMutexHeld() must be true when creating or destroying 4266 ** this object or while reading or writing the following fields: 4267 ** 4268 ** nRef 4269 ** 4270 ** The following fields are read-only after the object is created: 4271 ** 4272 ** hShm 4273 ** zFilename 4274 ** 4275 ** Either unixShmNode.pShmMutex must be held or unixShmNode.nRef==0 and 4276 ** unixMutexHeld() is true when reading or writing any other field 4277 ** in this structure. 4278 */ 4279 struct unixShmNode { 4280 unixInodeInfo *pInode; /* unixInodeInfo that owns this SHM node */ 4281 sqlite3_mutex *pShmMutex; /* Mutex to access this object */ 4282 char *zFilename; /* Name of the mmapped file */ 4283 int hShm; /* Open file descriptor */ 4284 int szRegion; /* Size of shared-memory regions */ 4285 u16 nRegion; /* Size of array apRegion */ 4286 u8 isReadonly; /* True if read-only */ 4287 u8 isUnlocked; /* True if no DMS lock held */ 4288 char **apRegion; /* Array of mapped shared-memory regions */ 4289 int nRef; /* Number of unixShm objects pointing to this */ 4290 unixShm *pFirst; /* All unixShm objects pointing to this */ 4291 int aLock[SQLITE_SHM_NLOCK]; /* # shared locks on slot, -1==excl lock */ 4292 #ifdef SQLITE_DEBUG 4293 u8 exclMask; /* Mask of exclusive locks held */ 4294 u8 sharedMask; /* Mask of shared locks held */ 4295 u8 nextShmId; /* Next available unixShm.id value */ 4296 #endif 4297 }; 4298 4299 /* 4300 ** Structure used internally by this VFS to record the state of an 4301 ** open shared memory connection. 4302 ** 4303 ** The following fields are initialized when this object is created and 4304 ** are read-only thereafter: 4305 ** 4306 ** unixShm.pShmNode 4307 ** unixShm.id 4308 ** 4309 ** All other fields are read/write. The unixShm.pShmNode->pShmMutex must 4310 ** be held while accessing any read/write fields. 4311 */ 4312 struct unixShm { 4313 unixShmNode *pShmNode; /* The underlying unixShmNode object */ 4314 unixShm *pNext; /* Next unixShm with the same unixShmNode */ 4315 u8 hasMutex; /* True if holding the unixShmNode->pShmMutex */ 4316 u8 id; /* Id of this connection within its unixShmNode */ 4317 u16 sharedMask; /* Mask of shared locks held */ 4318 u16 exclMask; /* Mask of exclusive locks held */ 4319 }; 4320 4321 /* 4322 ** Constants used for locking 4323 */ 4324 #define UNIX_SHM_BASE ((22+SQLITE_SHM_NLOCK)*4) /* first lock byte */ 4325 #define UNIX_SHM_DMS (UNIX_SHM_BASE+SQLITE_SHM_NLOCK) /* deadman switch */ 4326 4327 /* 4328 ** Use F_GETLK to check whether or not there are any readers with open 4329 ** wal-mode transactions in other processes on database file pFile. If 4330 ** no error occurs, return SQLITE_OK and set (*piOut) to 1 if there are 4331 ** such transactions, or 0 otherwise. If an error occurs, return an 4332 ** SQLite error code. The final value of *piOut is undefined in this 4333 ** case. 4334 */ 4335 static int unixFcntlExternalReader(unixFile *pFile, int *piOut){ 4336 int rc = SQLITE_OK; 4337 *piOut = 0; 4338 if( pFile->pShm){ 4339 unixShmNode *pShmNode = pFile->pShm->pShmNode; 4340 struct flock f; 4341 4342 memset(&f, 0, sizeof(f)); 4343 f.l_type = F_WRLCK; 4344 f.l_whence = SEEK_SET; 4345 f.l_start = UNIX_SHM_BASE + 3; 4346 f.l_len = SQLITE_SHM_NLOCK - 3; 4347 4348 sqlite3_mutex_enter(pShmNode->pShmMutex); 4349 if( osFcntl(pShmNode->hShm, F_GETLK, &f)<0 ){ 4350 rc = SQLITE_IOERR_LOCK; 4351 }else{ 4352 *piOut = (f.l_type!=F_UNLCK); 4353 } 4354 sqlite3_mutex_leave(pShmNode->pShmMutex); 4355 } 4356 4357 return rc; 4358 } 4359 4360 4361 /* 4362 ** Apply posix advisory locks for all bytes from ofst through ofst+n-1. 4363 ** 4364 ** Locks block if the mask is exactly UNIX_SHM_C and are non-blocking 4365 ** otherwise. 4366 */ 4367 static int unixShmSystemLock( 4368 unixFile *pFile, /* Open connection to the WAL file */ 4369 int lockType, /* F_UNLCK, F_RDLCK, or F_WRLCK */ 4370 int ofst, /* First byte of the locking range */ 4371 int n /* Number of bytes to lock */ 4372 ){ 4373 unixShmNode *pShmNode; /* Apply locks to this open shared-memory segment */ 4374 struct flock f; /* The posix advisory locking structure */ 4375 int rc = SQLITE_OK; /* Result code form fcntl() */ 4376 4377 /* Access to the unixShmNode object is serialized by the caller */ 4378 pShmNode = pFile->pInode->pShmNode; 4379 assert( pShmNode->nRef==0 || sqlite3_mutex_held(pShmNode->pShmMutex) ); 4380 assert( pShmNode->nRef>0 || unixMutexHeld() ); 4381 4382 /* Shared locks never span more than one byte */ 4383 assert( n==1 || lockType!=F_RDLCK ); 4384 4385 /* Locks are within range */ 4386 assert( n>=1 && n<=SQLITE_SHM_NLOCK ); 4387 4388 if( pShmNode->hShm>=0 ){ 4389 int res; 4390 /* Initialize the locking parameters */ 4391 f.l_type = lockType; 4392 f.l_whence = SEEK_SET; 4393 f.l_start = ofst; 4394 f.l_len = n; 4395 res = osSetPosixAdvisoryLock(pShmNode->hShm, &f, pFile); 4396 if( res==-1 ){ 4397 #ifdef SQLITE_ENABLE_SETLK_TIMEOUT 4398 rc = (pFile->iBusyTimeout ? SQLITE_BUSY_TIMEOUT : SQLITE_BUSY); 4399 #else 4400 rc = SQLITE_BUSY; 4401 #endif 4402 } 4403 } 4404 4405 /* Update the global lock state and do debug tracing */ 4406 #ifdef SQLITE_DEBUG 4407 { u16 mask; 4408 OSTRACE(("SHM-LOCK ")); 4409 mask = ofst>31 ? 0xffff : (1<<(ofst+n)) - (1<<ofst); 4410 if( rc==SQLITE_OK ){ 4411 if( lockType==F_UNLCK ){ 4412 OSTRACE(("unlock %d ok", ofst)); 4413 pShmNode->exclMask &= ~mask; 4414 pShmNode->sharedMask &= ~mask; 4415 }else if( lockType==F_RDLCK ){ 4416 OSTRACE(("read-lock %d ok", ofst)); 4417 pShmNode->exclMask &= ~mask; 4418 pShmNode->sharedMask |= mask; 4419 }else{ 4420 assert( lockType==F_WRLCK ); 4421 OSTRACE(("write-lock %d ok", ofst)); 4422 pShmNode->exclMask |= mask; 4423 pShmNode->sharedMask &= ~mask; 4424 } 4425 }else{ 4426 if( lockType==F_UNLCK ){ 4427 OSTRACE(("unlock %d failed", ofst)); 4428 }else if( lockType==F_RDLCK ){ 4429 OSTRACE(("read-lock failed")); 4430 }else{ 4431 assert( lockType==F_WRLCK ); 4432 OSTRACE(("write-lock %d failed", ofst)); 4433 } 4434 } 4435 OSTRACE((" - afterwards %03x,%03x\n", 4436 pShmNode->sharedMask, pShmNode->exclMask)); 4437 } 4438 #endif 4439 4440 return rc; 4441 } 4442 4443 /* 4444 ** Return the minimum number of 32KB shm regions that should be mapped at 4445 ** a time, assuming that each mapping must be an integer multiple of the 4446 ** current system page-size. 4447 ** 4448 ** Usually, this is 1. The exception seems to be systems that are configured 4449 ** to use 64KB pages - in this case each mapping must cover at least two 4450 ** shm regions. 4451 */ 4452 static int unixShmRegionPerMap(void){ 4453 int shmsz = 32*1024; /* SHM region size */ 4454 int pgsz = osGetpagesize(); /* System page size */ 4455 assert( ((pgsz-1)&pgsz)==0 ); /* Page size must be a power of 2 */ 4456 if( pgsz<shmsz ) return 1; 4457 return pgsz/shmsz; 4458 } 4459 4460 /* 4461 ** Purge the unixShmNodeList list of all entries with unixShmNode.nRef==0. 4462 ** 4463 ** This is not a VFS shared-memory method; it is a utility function called 4464 ** by VFS shared-memory methods. 4465 */ 4466 static void unixShmPurge(unixFile *pFd){ 4467 unixShmNode *p = pFd->pInode->pShmNode; 4468 assert( unixMutexHeld() ); 4469 if( p && ALWAYS(p->nRef==0) ){ 4470 int nShmPerMap = unixShmRegionPerMap(); 4471 int i; 4472 assert( p->pInode==pFd->pInode ); 4473 sqlite3_mutex_free(p->pShmMutex); 4474 for(i=0; i<p->nRegion; i+=nShmPerMap){ 4475 if( p->hShm>=0 ){ 4476 osMunmap(p->apRegion[i], p->szRegion); 4477 }else{ 4478 sqlite3_free(p->apRegion[i]); 4479 } 4480 } 4481 sqlite3_free(p->apRegion); 4482 if( p->hShm>=0 ){ 4483 robust_close(pFd, p->hShm, __LINE__); 4484 p->hShm = -1; 4485 } 4486 p->pInode->pShmNode = 0; 4487 sqlite3_free(p); 4488 } 4489 } 4490 4491 /* 4492 ** The DMS lock has not yet been taken on shm file pShmNode. Attempt to 4493 ** take it now. Return SQLITE_OK if successful, or an SQLite error 4494 ** code otherwise. 4495 ** 4496 ** If the DMS cannot be locked because this is a readonly_shm=1 4497 ** connection and no other process already holds a lock, return 4498 ** SQLITE_READONLY_CANTINIT and set pShmNode->isUnlocked=1. 4499 */ 4500 static int unixLockSharedMemory(unixFile *pDbFd, unixShmNode *pShmNode){ 4501 struct flock lock; 4502 int rc = SQLITE_OK; 4503 4504 /* Use F_GETLK to determine the locks other processes are holding 4505 ** on the DMS byte. If it indicates that another process is holding 4506 ** a SHARED lock, then this process may also take a SHARED lock 4507 ** and proceed with opening the *-shm file. 4508 ** 4509 ** Or, if no other process is holding any lock, then this process 4510 ** is the first to open it. In this case take an EXCLUSIVE lock on the 4511 ** DMS byte and truncate the *-shm file to zero bytes in size. Then 4512 ** downgrade to a SHARED lock on the DMS byte. 4513 ** 4514 ** If another process is holding an EXCLUSIVE lock on the DMS byte, 4515 ** return SQLITE_BUSY to the caller (it will try again). An earlier 4516 ** version of this code attempted the SHARED lock at this point. But 4517 ** this introduced a subtle race condition: if the process holding 4518 ** EXCLUSIVE failed just before truncating the *-shm file, then this 4519 ** process might open and use the *-shm file without truncating it. 4520 ** And if the *-shm file has been corrupted by a power failure or 4521 ** system crash, the database itself may also become corrupt. */ 4522 lock.l_whence = SEEK_SET; 4523 lock.l_start = UNIX_SHM_DMS; 4524 lock.l_len = 1; 4525 lock.l_type = F_WRLCK; 4526 if( osFcntl(pShmNode->hShm, F_GETLK, &lock)!=0 ) { 4527 rc = SQLITE_IOERR_LOCK; 4528 }else if( lock.l_type==F_UNLCK ){ 4529 if( pShmNode->isReadonly ){ 4530 pShmNode->isUnlocked = 1; 4531 rc = SQLITE_READONLY_CANTINIT; 4532 }else{ 4533 rc = unixShmSystemLock(pDbFd, F_WRLCK, UNIX_SHM_DMS, 1); 4534 /* The first connection to attach must truncate the -shm file. We 4535 ** truncate to 3 bytes (an arbitrary small number, less than the 4536 ** -shm header size) rather than 0 as a system debugging aid, to 4537 ** help detect if a -shm file truncation is legitimate or is the work 4538 ** or a rogue process. */ 4539 if( rc==SQLITE_OK && robust_ftruncate(pShmNode->hShm, 3) ){ 4540 rc = unixLogError(SQLITE_IOERR_SHMOPEN,"ftruncate",pShmNode->zFilename); 4541 } 4542 } 4543 }else if( lock.l_type==F_WRLCK ){ 4544 rc = SQLITE_BUSY; 4545 } 4546 4547 if( rc==SQLITE_OK ){ 4548 assert( lock.l_type==F_UNLCK || lock.l_type==F_RDLCK ); 4549 rc = unixShmSystemLock(pDbFd, F_RDLCK, UNIX_SHM_DMS, 1); 4550 } 4551 return rc; 4552 } 4553 4554 /* 4555 ** Open a shared-memory area associated with open database file pDbFd. 4556 ** This particular implementation uses mmapped files. 4557 ** 4558 ** The file used to implement shared-memory is in the same directory 4559 ** as the open database file and has the same name as the open database 4560 ** file with the "-shm" suffix added. For example, if the database file 4561 ** is "/home/user1/config.db" then the file that is created and mmapped 4562 ** for shared memory will be called "/home/user1/config.db-shm". 4563 ** 4564 ** Another approach to is to use files in /dev/shm or /dev/tmp or an 4565 ** some other tmpfs mount. But if a file in a different directory 4566 ** from the database file is used, then differing access permissions 4567 ** or a chroot() might cause two different processes on the same 4568 ** database to end up using different files for shared memory - 4569 ** meaning that their memory would not really be shared - resulting 4570 ** in database corruption. Nevertheless, this tmpfs file usage 4571 ** can be enabled at compile-time using -DSQLITE_SHM_DIRECTORY="/dev/shm" 4572 ** or the equivalent. The use of the SQLITE_SHM_DIRECTORY compile-time 4573 ** option results in an incompatible build of SQLite; builds of SQLite 4574 ** that with differing SQLITE_SHM_DIRECTORY settings attempt to use the 4575 ** same database file at the same time, database corruption will likely 4576 ** result. The SQLITE_SHM_DIRECTORY compile-time option is considered 4577 ** "unsupported" and may go away in a future SQLite release. 4578 ** 4579 ** When opening a new shared-memory file, if no other instances of that 4580 ** file are currently open, in this process or in other processes, then 4581 ** the file must be truncated to zero length or have its header cleared. 4582 ** 4583 ** If the original database file (pDbFd) is using the "unix-excl" VFS 4584 ** that means that an exclusive lock is held on the database file and 4585 ** that no other processes are able to read or write the database. In 4586 ** that case, we do not really need shared memory. No shared memory 4587 ** file is created. The shared memory will be simulated with heap memory. 4588 */ 4589 static int unixOpenSharedMemory(unixFile *pDbFd){ 4590 struct unixShm *p = 0; /* The connection to be opened */ 4591 struct unixShmNode *pShmNode; /* The underlying mmapped file */ 4592 int rc = SQLITE_OK; /* Result code */ 4593 unixInodeInfo *pInode; /* The inode of fd */ 4594 char *zShm; /* Name of the file used for SHM */ 4595 int nShmFilename; /* Size of the SHM filename in bytes */ 4596 4597 /* Allocate space for the new unixShm object. */ 4598 p = sqlite3_malloc64( sizeof(*p) ); 4599 if( p==0 ) return SQLITE_NOMEM_BKPT; 4600 memset(p, 0, sizeof(*p)); 4601 assert( pDbFd->pShm==0 ); 4602 4603 /* Check to see if a unixShmNode object already exists. Reuse an existing 4604 ** one if present. Create a new one if necessary. 4605 */ 4606 assert( unixFileMutexNotheld(pDbFd) ); 4607 unixEnterMutex(); 4608 pInode = pDbFd->pInode; 4609 pShmNode = pInode->pShmNode; 4610 if( pShmNode==0 ){ 4611 struct stat sStat; /* fstat() info for database file */ 4612 #ifndef SQLITE_SHM_DIRECTORY 4613 const char *zBasePath = pDbFd->zPath; 4614 #endif 4615 4616 /* Call fstat() to figure out the permissions on the database file. If 4617 ** a new *-shm file is created, an attempt will be made to create it 4618 ** with the same permissions. 4619 */ 4620 if( osFstat(pDbFd->h, &sStat) ){ 4621 rc = SQLITE_IOERR_FSTAT; 4622 goto shm_open_err; 4623 } 4624 4625 #ifdef SQLITE_SHM_DIRECTORY 4626 nShmFilename = sizeof(SQLITE_SHM_DIRECTORY) + 31; 4627 #else 4628 nShmFilename = 6 + (int)strlen(zBasePath); 4629 #endif 4630 pShmNode = sqlite3_malloc64( sizeof(*pShmNode) + nShmFilename ); 4631 if( pShmNode==0 ){ 4632 rc = SQLITE_NOMEM_BKPT; 4633 goto shm_open_err; 4634 } 4635 memset(pShmNode, 0, sizeof(*pShmNode)+nShmFilename); 4636 zShm = pShmNode->zFilename = (char*)&pShmNode[1]; 4637 #ifdef SQLITE_SHM_DIRECTORY 4638 sqlite3_snprintf(nShmFilename, zShm, 4639 SQLITE_SHM_DIRECTORY "/sqlite-shm-%x-%x", 4640 (u32)sStat.st_ino, (u32)sStat.st_dev); 4641 #else 4642 sqlite3_snprintf(nShmFilename, zShm, "%s-shm", zBasePath); 4643 sqlite3FileSuffix3(pDbFd->zPath, zShm); 4644 #endif 4645 pShmNode->hShm = -1; 4646 pDbFd->pInode->pShmNode = pShmNode; 4647 pShmNode->pInode = pDbFd->pInode; 4648 if( sqlite3GlobalConfig.bCoreMutex ){ 4649 pShmNode->pShmMutex = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST); 4650 if( pShmNode->pShmMutex==0 ){ 4651 rc = SQLITE_NOMEM_BKPT; 4652 goto shm_open_err; 4653 } 4654 } 4655 4656 if( pInode->bProcessLock==0 ){ 4657 if( 0==sqlite3_uri_boolean(pDbFd->zPath, "readonly_shm", 0) ){ 4658 pShmNode->hShm = robust_open(zShm, O_RDWR|O_CREAT|O_NOFOLLOW, 4659 (sStat.st_mode&0777)); 4660 } 4661 if( pShmNode->hShm<0 ){ 4662 pShmNode->hShm = robust_open(zShm, O_RDONLY|O_NOFOLLOW, 4663 (sStat.st_mode&0777)); 4664 if( pShmNode->hShm<0 ){ 4665 rc = unixLogError(SQLITE_CANTOPEN_BKPT, "open", zShm); 4666 goto shm_open_err; 4667 } 4668 pShmNode->isReadonly = 1; 4669 } 4670 4671 /* If this process is running as root, make sure that the SHM file 4672 ** is owned by the same user that owns the original database. Otherwise, 4673 ** the original owner will not be able to connect. 4674 */ 4675 robustFchown(pShmNode->hShm, sStat.st_uid, sStat.st_gid); 4676 4677 rc = unixLockSharedMemory(pDbFd, pShmNode); 4678 if( rc!=SQLITE_OK && rc!=SQLITE_READONLY_CANTINIT ) goto shm_open_err; 4679 } 4680 } 4681 4682 /* Make the new connection a child of the unixShmNode */ 4683 p->pShmNode = pShmNode; 4684 #ifdef SQLITE_DEBUG 4685 p->id = pShmNode->nextShmId++; 4686 #endif 4687 pShmNode->nRef++; 4688 pDbFd->pShm = p; 4689 unixLeaveMutex(); 4690 4691 /* The reference count on pShmNode has already been incremented under 4692 ** the cover of the unixEnterMutex() mutex and the pointer from the 4693 ** new (struct unixShm) object to the pShmNode has been set. All that is 4694 ** left to do is to link the new object into the linked list starting 4695 ** at pShmNode->pFirst. This must be done while holding the 4696 ** pShmNode->pShmMutex. 4697 */ 4698 sqlite3_mutex_enter(pShmNode->pShmMutex); 4699 p->pNext = pShmNode->pFirst; 4700 pShmNode->pFirst = p; 4701 sqlite3_mutex_leave(pShmNode->pShmMutex); 4702 return rc; 4703 4704 /* Jump here on any error */ 4705 shm_open_err: 4706 unixShmPurge(pDbFd); /* This call frees pShmNode if required */ 4707 sqlite3_free(p); 4708 unixLeaveMutex(); 4709 return rc; 4710 } 4711 4712 /* 4713 ** This function is called to obtain a pointer to region iRegion of the 4714 ** shared-memory associated with the database file fd. Shared-memory regions 4715 ** are numbered starting from zero. Each shared-memory region is szRegion 4716 ** bytes in size. 4717 ** 4718 ** If an error occurs, an error code is returned and *pp is set to NULL. 4719 ** 4720 ** Otherwise, if the bExtend parameter is 0 and the requested shared-memory 4721 ** region has not been allocated (by any client, including one running in a 4722 ** separate process), then *pp is set to NULL and SQLITE_OK returned. If 4723 ** bExtend is non-zero and the requested shared-memory region has not yet 4724 ** been allocated, it is allocated by this function. 4725 ** 4726 ** If the shared-memory region has already been allocated or is allocated by 4727 ** this call as described above, then it is mapped into this processes 4728 ** address space (if it is not already), *pp is set to point to the mapped 4729 ** memory and SQLITE_OK returned. 4730 */ 4731 static int unixShmMap( 4732 sqlite3_file *fd, /* Handle open on database file */ 4733 int iRegion, /* Region to retrieve */ 4734 int szRegion, /* Size of regions */ 4735 int bExtend, /* True to extend file if necessary */ 4736 void volatile **pp /* OUT: Mapped memory */ 4737 ){ 4738 unixFile *pDbFd = (unixFile*)fd; 4739 unixShm *p; 4740 unixShmNode *pShmNode; 4741 int rc = SQLITE_OK; 4742 int nShmPerMap = unixShmRegionPerMap(); 4743 int nReqRegion; 4744 4745 /* If the shared-memory file has not yet been opened, open it now. */ 4746 if( pDbFd->pShm==0 ){ 4747 rc = unixOpenSharedMemory(pDbFd); 4748 if( rc!=SQLITE_OK ) return rc; 4749 } 4750 4751 p = pDbFd->pShm; 4752 pShmNode = p->pShmNode; 4753 sqlite3_mutex_enter(pShmNode->pShmMutex); 4754 if( pShmNode->isUnlocked ){ 4755 rc = unixLockSharedMemory(pDbFd, pShmNode); 4756 if( rc!=SQLITE_OK ) goto shmpage_out; 4757 pShmNode->isUnlocked = 0; 4758 } 4759 assert( szRegion==pShmNode->szRegion || pShmNode->nRegion==0 ); 4760 assert( pShmNode->pInode==pDbFd->pInode ); 4761 assert( pShmNode->hShm>=0 || pDbFd->pInode->bProcessLock==1 ); 4762 assert( pShmNode->hShm<0 || pDbFd->pInode->bProcessLock==0 ); 4763 4764 /* Minimum number of regions required to be mapped. */ 4765 nReqRegion = ((iRegion+nShmPerMap) / nShmPerMap) * nShmPerMap; 4766 4767 if( pShmNode->nRegion<nReqRegion ){ 4768 char **apNew; /* New apRegion[] array */ 4769 int nByte = nReqRegion*szRegion; /* Minimum required file size */ 4770 struct stat sStat; /* Used by fstat() */ 4771 4772 pShmNode->szRegion = szRegion; 4773 4774 if( pShmNode->hShm>=0 ){ 4775 /* The requested region is not mapped into this processes address space. 4776 ** Check to see if it has been allocated (i.e. if the wal-index file is 4777 ** large enough to contain the requested region). 4778 */ 4779 if( osFstat(pShmNode->hShm, &sStat) ){ 4780 rc = SQLITE_IOERR_SHMSIZE; 4781 goto shmpage_out; 4782 } 4783 4784 if( sStat.st_size<nByte ){ 4785 /* The requested memory region does not exist. If bExtend is set to 4786 ** false, exit early. *pp will be set to NULL and SQLITE_OK returned. 4787 */ 4788 if( !bExtend ){ 4789 goto shmpage_out; 4790 } 4791 4792 /* Alternatively, if bExtend is true, extend the file. Do this by 4793 ** writing a single byte to the end of each (OS) page being 4794 ** allocated or extended. Technically, we need only write to the 4795 ** last page in order to extend the file. But writing to all new 4796 ** pages forces the OS to allocate them immediately, which reduces 4797 ** the chances of SIGBUS while accessing the mapped region later on. 4798 */ 4799 else{ 4800 static const int pgsz = 4096; 4801 int iPg; 4802 4803 /* Write to the last byte of each newly allocated or extended page */ 4804 assert( (nByte % pgsz)==0 ); 4805 for(iPg=(sStat.st_size/pgsz); iPg<(nByte/pgsz); iPg++){ 4806 int x = 0; 4807 if( seekAndWriteFd(pShmNode->hShm, iPg*pgsz + pgsz-1,"",1,&x)!=1 ){ 4808 const char *zFile = pShmNode->zFilename; 4809 rc = unixLogError(SQLITE_IOERR_SHMSIZE, "write", zFile); 4810 goto shmpage_out; 4811 } 4812 } 4813 } 4814 } 4815 } 4816 4817 /* Map the requested memory region into this processes address space. */ 4818 apNew = (char **)sqlite3_realloc( 4819 pShmNode->apRegion, nReqRegion*sizeof(char *) 4820 ); 4821 if( !apNew ){ 4822 rc = SQLITE_IOERR_NOMEM_BKPT; 4823 goto shmpage_out; 4824 } 4825 pShmNode->apRegion = apNew; 4826 while( pShmNode->nRegion<nReqRegion ){ 4827 int nMap = szRegion*nShmPerMap; 4828 int i; 4829 void *pMem; 4830 if( pShmNode->hShm>=0 ){ 4831 pMem = osMmap(0, nMap, 4832 pShmNode->isReadonly ? PROT_READ : PROT_READ|PROT_WRITE, 4833 MAP_SHARED, pShmNode->hShm, szRegion*(i64)pShmNode->nRegion 4834 ); 4835 if( pMem==MAP_FAILED ){ 4836 rc = unixLogError(SQLITE_IOERR_SHMMAP, "mmap", pShmNode->zFilename); 4837 goto shmpage_out; 4838 } 4839 }else{ 4840 pMem = sqlite3_malloc64(nMap); 4841 if( pMem==0 ){ 4842 rc = SQLITE_NOMEM_BKPT; 4843 goto shmpage_out; 4844 } 4845 memset(pMem, 0, nMap); 4846 } 4847 4848 for(i=0; i<nShmPerMap; i++){ 4849 pShmNode->apRegion[pShmNode->nRegion+i] = &((char*)pMem)[szRegion*i]; 4850 } 4851 pShmNode->nRegion += nShmPerMap; 4852 } 4853 } 4854 4855 shmpage_out: 4856 if( pShmNode->nRegion>iRegion ){ 4857 *pp = pShmNode->apRegion[iRegion]; 4858 }else{ 4859 *pp = 0; 4860 } 4861 if( pShmNode->isReadonly && rc==SQLITE_OK ) rc = SQLITE_READONLY; 4862 sqlite3_mutex_leave(pShmNode->pShmMutex); 4863 return rc; 4864 } 4865 4866 /* 4867 ** Check that the pShmNode->aLock[] array comports with the locking bitmasks 4868 ** held by each client. Return true if it does, or false otherwise. This 4869 ** is to be used in an assert(). e.g. 4870 ** 4871 ** assert( assertLockingArrayOk(pShmNode) ); 4872 */ 4873 #ifdef SQLITE_DEBUG 4874 static int assertLockingArrayOk(unixShmNode *pShmNode){ 4875 unixShm *pX; 4876 int aLock[SQLITE_SHM_NLOCK]; 4877 assert( sqlite3_mutex_held(pShmNode->pShmMutex) ); 4878 4879 memset(aLock, 0, sizeof(aLock)); 4880 for(pX=pShmNode->pFirst; pX; pX=pX->pNext){ 4881 int i; 4882 for(i=0; i<SQLITE_SHM_NLOCK; i++){ 4883 if( pX->exclMask & (1<<i) ){ 4884 assert( aLock[i]==0 ); 4885 aLock[i] = -1; 4886 }else if( pX->sharedMask & (1<<i) ){ 4887 assert( aLock[i]>=0 ); 4888 aLock[i]++; 4889 } 4890 } 4891 } 4892 4893 assert( 0==memcmp(pShmNode->aLock, aLock, sizeof(aLock)) ); 4894 return (memcmp(pShmNode->aLock, aLock, sizeof(aLock))==0); 4895 } 4896 #endif 4897 4898 /* 4899 ** Change the lock state for a shared-memory segment. 4900 ** 4901 ** Note that the relationship between SHAREd and EXCLUSIVE locks is a little 4902 ** different here than in posix. In xShmLock(), one can go from unlocked 4903 ** to shared and back or from unlocked to exclusive and back. But one may 4904 ** not go from shared to exclusive or from exclusive to shared. 4905 */ 4906 static int unixShmLock( 4907 sqlite3_file *fd, /* Database file holding the shared memory */ 4908 int ofst, /* First lock to acquire or release */ 4909 int n, /* Number of locks to acquire or release */ 4910 int flags /* What to do with the lock */ 4911 ){ 4912 unixFile *pDbFd = (unixFile*)fd; /* Connection holding shared memory */ 4913 unixShm *p; /* The shared memory being locked */ 4914 unixShmNode *pShmNode; /* The underlying file iNode */ 4915 int rc = SQLITE_OK; /* Result code */ 4916 u16 mask; /* Mask of locks to take or release */ 4917 int *aLock; 4918 4919 p = pDbFd->pShm; 4920 if( p==0 ) return SQLITE_IOERR_SHMLOCK; 4921 pShmNode = p->pShmNode; 4922 if( NEVER(pShmNode==0) ) return SQLITE_IOERR_SHMLOCK; 4923 aLock = pShmNode->aLock; 4924 4925 assert( pShmNode==pDbFd->pInode->pShmNode ); 4926 assert( pShmNode->pInode==pDbFd->pInode ); 4927 assert( ofst>=0 && ofst+n<=SQLITE_SHM_NLOCK ); 4928 assert( n>=1 ); 4929 assert( flags==(SQLITE_SHM_LOCK | SQLITE_SHM_SHARED) 4930 || flags==(SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE) 4931 || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED) 4932 || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE) ); 4933 assert( n==1 || (flags & SQLITE_SHM_EXCLUSIVE)!=0 ); 4934 assert( pShmNode->hShm>=0 || pDbFd->pInode->bProcessLock==1 ); 4935 assert( pShmNode->hShm<0 || pDbFd->pInode->bProcessLock==0 ); 4936 4937 /* Check that, if this to be a blocking lock, no locks that occur later 4938 ** in the following list than the lock being obtained are already held: 4939 ** 4940 ** 1. Checkpointer lock (ofst==1). 4941 ** 2. Write lock (ofst==0). 4942 ** 3. Read locks (ofst>=3 && ofst<SQLITE_SHM_NLOCK). 4943 ** 4944 ** In other words, if this is a blocking lock, none of the locks that 4945 ** occur later in the above list than the lock being obtained may be 4946 ** held. 4947 ** 4948 ** It is not permitted to block on the RECOVER lock. 4949 */ 4950 #ifdef SQLITE_ENABLE_SETLK_TIMEOUT 4951 assert( (flags & SQLITE_SHM_UNLOCK) || pDbFd->iBusyTimeout==0 || ( 4952 (ofst!=2) /* not RECOVER */ 4953 && (ofst!=1 || (p->exclMask|p->sharedMask)==0) 4954 && (ofst!=0 || (p->exclMask|p->sharedMask)<3) 4955 && (ofst<3 || (p->exclMask|p->sharedMask)<(1<<ofst)) 4956 )); 4957 #endif 4958 4959 mask = (1<<(ofst+n)) - (1<<ofst); 4960 assert( n>1 || mask==(1<<ofst) ); 4961 sqlite3_mutex_enter(pShmNode->pShmMutex); 4962 assert( assertLockingArrayOk(pShmNode) ); 4963 if( flags & SQLITE_SHM_UNLOCK ){ 4964 if( (p->exclMask|p->sharedMask) & mask ){ 4965 int ii; 4966 int bUnlock = 1; 4967 4968 for(ii=ofst; ii<ofst+n; ii++){ 4969 if( aLock[ii]>((p->sharedMask & (1<<ii)) ? 1 : 0) ){ 4970 bUnlock = 0; 4971 } 4972 } 4973 4974 if( bUnlock ){ 4975 rc = unixShmSystemLock(pDbFd, F_UNLCK, ofst+UNIX_SHM_BASE, n); 4976 if( rc==SQLITE_OK ){ 4977 memset(&aLock[ofst], 0, sizeof(int)*n); 4978 } 4979 }else if( ALWAYS(p->sharedMask & (1<<ofst)) ){ 4980 assert( n==1 && aLock[ofst]>1 ); 4981 aLock[ofst]--; 4982 } 4983 4984 /* Undo the local locks */ 4985 if( rc==SQLITE_OK ){ 4986 p->exclMask &= ~mask; 4987 p->sharedMask &= ~mask; 4988 } 4989 } 4990 }else if( flags & SQLITE_SHM_SHARED ){ 4991 assert( n==1 ); 4992 assert( (p->exclMask & (1<<ofst))==0 ); 4993 if( (p->sharedMask & mask)==0 ){ 4994 if( aLock[ofst]<0 ){ 4995 rc = SQLITE_BUSY; 4996 }else if( aLock[ofst]==0 ){ 4997 rc = unixShmSystemLock(pDbFd, F_RDLCK, ofst+UNIX_SHM_BASE, n); 4998 } 4999 5000 /* Get the local shared locks */ 5001 if( rc==SQLITE_OK ){ 5002 p->sharedMask |= mask; 5003 aLock[ofst]++; 5004 } 5005 } 5006 }else{ 5007 /* Make sure no sibling connections hold locks that will block this 5008 ** lock. If any do, return SQLITE_BUSY right away. */ 5009 int ii; 5010 for(ii=ofst; ii<ofst+n; ii++){ 5011 assert( (p->sharedMask & mask)==0 ); 5012 if( ALWAYS((p->exclMask & (1<<ii))==0) && aLock[ii] ){ 5013 rc = SQLITE_BUSY; 5014 break; 5015 } 5016 } 5017 5018 /* Get the exclusive locks at the system level. Then if successful 5019 ** also update the in-memory values. */ 5020 if( rc==SQLITE_OK ){ 5021 rc = unixShmSystemLock(pDbFd, F_WRLCK, ofst+UNIX_SHM_BASE, n); 5022 if( rc==SQLITE_OK ){ 5023 assert( (p->sharedMask & mask)==0 ); 5024 p->exclMask |= mask; 5025 for(ii=ofst; ii<ofst+n; ii++){ 5026 aLock[ii] = -1; 5027 } 5028 } 5029 } 5030 } 5031 assert( assertLockingArrayOk(pShmNode) ); 5032 sqlite3_mutex_leave(pShmNode->pShmMutex); 5033 OSTRACE(("SHM-LOCK shmid-%d, pid-%d got %03x,%03x\n", 5034 p->id, osGetpid(0), p->sharedMask, p->exclMask)); 5035 return rc; 5036 } 5037 5038 /* 5039 ** Implement a memory barrier or memory fence on shared memory. 5040 ** 5041 ** All loads and stores begun before the barrier must complete before 5042 ** any load or store begun after the barrier. 5043 */ 5044 static void unixShmBarrier( 5045 sqlite3_file *fd /* Database file holding the shared memory */ 5046 ){ 5047 UNUSED_PARAMETER(fd); 5048 sqlite3MemoryBarrier(); /* compiler-defined memory barrier */ 5049 assert( fd->pMethods->xLock==nolockLock 5050 || unixFileMutexNotheld((unixFile*)fd) 5051 ); 5052 unixEnterMutex(); /* Also mutex, for redundancy */ 5053 unixLeaveMutex(); 5054 } 5055 5056 /* 5057 ** Close a connection to shared-memory. Delete the underlying 5058 ** storage if deleteFlag is true. 5059 ** 5060 ** If there is no shared memory associated with the connection then this 5061 ** routine is a harmless no-op. 5062 */ 5063 static int unixShmUnmap( 5064 sqlite3_file *fd, /* The underlying database file */ 5065 int deleteFlag /* Delete shared-memory if true */ 5066 ){ 5067 unixShm *p; /* The connection to be closed */ 5068 unixShmNode *pShmNode; /* The underlying shared-memory file */ 5069 unixShm **pp; /* For looping over sibling connections */ 5070 unixFile *pDbFd; /* The underlying database file */ 5071 5072 pDbFd = (unixFile*)fd; 5073 p = pDbFd->pShm; 5074 if( p==0 ) return SQLITE_OK; 5075 pShmNode = p->pShmNode; 5076 5077 assert( pShmNode==pDbFd->pInode->pShmNode ); 5078 assert( pShmNode->pInode==pDbFd->pInode ); 5079 5080 /* Remove connection p from the set of connections associated 5081 ** with pShmNode */ 5082 sqlite3_mutex_enter(pShmNode->pShmMutex); 5083 for(pp=&pShmNode->pFirst; (*pp)!=p; pp = &(*pp)->pNext){} 5084 *pp = p->pNext; 5085 5086 /* Free the connection p */ 5087 sqlite3_free(p); 5088 pDbFd->pShm = 0; 5089 sqlite3_mutex_leave(pShmNode->pShmMutex); 5090 5091 /* If pShmNode->nRef has reached 0, then close the underlying 5092 ** shared-memory file, too */ 5093 assert( unixFileMutexNotheld(pDbFd) ); 5094 unixEnterMutex(); 5095 assert( pShmNode->nRef>0 ); 5096 pShmNode->nRef--; 5097 if( pShmNode->nRef==0 ){ 5098 if( deleteFlag && pShmNode->hShm>=0 ){ 5099 osUnlink(pShmNode->zFilename); 5100 } 5101 unixShmPurge(pDbFd); 5102 } 5103 unixLeaveMutex(); 5104 5105 return SQLITE_OK; 5106 } 5107 5108 5109 #else 5110 # define unixShmMap 0 5111 # define unixShmLock 0 5112 # define unixShmBarrier 0 5113 # define unixShmUnmap 0 5114 #endif /* #ifndef SQLITE_OMIT_WAL */ 5115 5116 #if SQLITE_MAX_MMAP_SIZE>0 5117 /* 5118 ** If it is currently memory mapped, unmap file pFd. 5119 */ 5120 static void unixUnmapfile(unixFile *pFd){ 5121 assert( pFd->nFetchOut==0 ); 5122 if( pFd->pMapRegion ){ 5123 osMunmap(pFd->pMapRegion, pFd->mmapSizeActual); 5124 pFd->pMapRegion = 0; 5125 pFd->mmapSize = 0; 5126 pFd->mmapSizeActual = 0; 5127 } 5128 } 5129 5130 /* 5131 ** Attempt to set the size of the memory mapping maintained by file 5132 ** descriptor pFd to nNew bytes. Any existing mapping is discarded. 5133 ** 5134 ** If successful, this function sets the following variables: 5135 ** 5136 ** unixFile.pMapRegion 5137 ** unixFile.mmapSize 5138 ** unixFile.mmapSizeActual 5139 ** 5140 ** If unsuccessful, an error message is logged via sqlite3_log() and 5141 ** the three variables above are zeroed. In this case SQLite should 5142 ** continue accessing the database using the xRead() and xWrite() 5143 ** methods. 5144 */ 5145 static void unixRemapfile( 5146 unixFile *pFd, /* File descriptor object */ 5147 i64 nNew /* Required mapping size */ 5148 ){ 5149 const char *zErr = "mmap"; 5150 int h = pFd->h; /* File descriptor open on db file */ 5151 u8 *pOrig = (u8 *)pFd->pMapRegion; /* Pointer to current file mapping */ 5152 i64 nOrig = pFd->mmapSizeActual; /* Size of pOrig region in bytes */ 5153 u8 *pNew = 0; /* Location of new mapping */ 5154 int flags = PROT_READ; /* Flags to pass to mmap() */ 5155 5156 assert( pFd->nFetchOut==0 ); 5157 assert( nNew>pFd->mmapSize ); 5158 assert( nNew<=pFd->mmapSizeMax ); 5159 assert( nNew>0 ); 5160 assert( pFd->mmapSizeActual>=pFd->mmapSize ); 5161 assert( MAP_FAILED!=0 ); 5162 5163 #ifdef SQLITE_MMAP_READWRITE 5164 if( (pFd->ctrlFlags & UNIXFILE_RDONLY)==0 ) flags |= PROT_WRITE; 5165 #endif 5166 5167 if( pOrig ){ 5168 #if HAVE_MREMAP 5169 i64 nReuse = pFd->mmapSize; 5170 #else 5171 const int szSyspage = osGetpagesize(); 5172 i64 nReuse = (pFd->mmapSize & ~(szSyspage-1)); 5173 #endif 5174 u8 *pReq = &pOrig[nReuse]; 5175 5176 /* Unmap any pages of the existing mapping that cannot be reused. */ 5177 if( nReuse!=nOrig ){ 5178 osMunmap(pReq, nOrig-nReuse); 5179 } 5180 5181 #if HAVE_MREMAP 5182 pNew = osMremap(pOrig, nReuse, nNew, MREMAP_MAYMOVE); 5183 zErr = "mremap"; 5184 #else 5185 pNew = osMmap(pReq, nNew-nReuse, flags, MAP_SHARED, h, nReuse); 5186 if( pNew!=MAP_FAILED ){ 5187 if( pNew!=pReq ){ 5188 osMunmap(pNew, nNew - nReuse); 5189 pNew = 0; 5190 }else{ 5191 pNew = pOrig; 5192 } 5193 } 5194 #endif 5195 5196 /* The attempt to extend the existing mapping failed. Free it. */ 5197 if( pNew==MAP_FAILED || pNew==0 ){ 5198 osMunmap(pOrig, nReuse); 5199 } 5200 } 5201 5202 /* If pNew is still NULL, try to create an entirely new mapping. */ 5203 if( pNew==0 ){ 5204 pNew = osMmap(0, nNew, flags, MAP_SHARED, h, 0); 5205 } 5206 5207 if( pNew==MAP_FAILED ){ 5208 pNew = 0; 5209 nNew = 0; 5210 unixLogError(SQLITE_OK, zErr, pFd->zPath); 5211 5212 /* If the mmap() above failed, assume that all subsequent mmap() calls 5213 ** will probably fail too. Fall back to using xRead/xWrite exclusively 5214 ** in this case. */ 5215 pFd->mmapSizeMax = 0; 5216 } 5217 pFd->pMapRegion = (void *)pNew; 5218 pFd->mmapSize = pFd->mmapSizeActual = nNew; 5219 } 5220 5221 /* 5222 ** Memory map or remap the file opened by file-descriptor pFd (if the file 5223 ** is already mapped, the existing mapping is replaced by the new). Or, if 5224 ** there already exists a mapping for this file, and there are still 5225 ** outstanding xFetch() references to it, this function is a no-op. 5226 ** 5227 ** If parameter nByte is non-negative, then it is the requested size of 5228 ** the mapping to create. Otherwise, if nByte is less than zero, then the 5229 ** requested size is the size of the file on disk. The actual size of the 5230 ** created mapping is either the requested size or the value configured 5231 ** using SQLITE_FCNTL_MMAP_LIMIT, whichever is smaller. 5232 ** 5233 ** SQLITE_OK is returned if no error occurs (even if the mapping is not 5234 ** recreated as a result of outstanding references) or an SQLite error 5235 ** code otherwise. 5236 */ 5237 static int unixMapfile(unixFile *pFd, i64 nMap){ 5238 assert( nMap>=0 || pFd->nFetchOut==0 ); 5239 assert( nMap>0 || (pFd->mmapSize==0 && pFd->pMapRegion==0) ); 5240 if( pFd->nFetchOut>0 ) return SQLITE_OK; 5241 5242 if( nMap<0 ){ 5243 struct stat statbuf; /* Low-level file information */ 5244 if( osFstat(pFd->h, &statbuf) ){ 5245 return SQLITE_IOERR_FSTAT; 5246 } 5247 nMap = statbuf.st_size; 5248 } 5249 if( nMap>pFd->mmapSizeMax ){ 5250 nMap = pFd->mmapSizeMax; 5251 } 5252 5253 assert( nMap>0 || (pFd->mmapSize==0 && pFd->pMapRegion==0) ); 5254 if( nMap!=pFd->mmapSize ){ 5255 unixRemapfile(pFd, nMap); 5256 } 5257 5258 return SQLITE_OK; 5259 } 5260 #endif /* SQLITE_MAX_MMAP_SIZE>0 */ 5261 5262 /* 5263 ** If possible, return a pointer to a mapping of file fd starting at offset 5264 ** iOff. The mapping must be valid for at least nAmt bytes. 5265 ** 5266 ** If such a pointer can be obtained, store it in *pp and return SQLITE_OK. 5267 ** Or, if one cannot but no error occurs, set *pp to 0 and return SQLITE_OK. 5268 ** Finally, if an error does occur, return an SQLite error code. The final 5269 ** value of *pp is undefined in this case. 5270 ** 5271 ** If this function does return a pointer, the caller must eventually 5272 ** release the reference by calling unixUnfetch(). 5273 */ 5274 static int unixFetch(sqlite3_file *fd, i64 iOff, int nAmt, void **pp){ 5275 #if SQLITE_MAX_MMAP_SIZE>0 5276 unixFile *pFd = (unixFile *)fd; /* The underlying database file */ 5277 #endif 5278 *pp = 0; 5279 5280 #if SQLITE_MAX_MMAP_SIZE>0 5281 if( pFd->mmapSizeMax>0 ){ 5282 if( pFd->pMapRegion==0 ){ 5283 int rc = unixMapfile(pFd, -1); 5284 if( rc!=SQLITE_OK ) return rc; 5285 } 5286 if( pFd->mmapSize >= iOff+nAmt ){ 5287 *pp = &((u8 *)pFd->pMapRegion)[iOff]; 5288 pFd->nFetchOut++; 5289 } 5290 } 5291 #endif 5292 return SQLITE_OK; 5293 } 5294 5295 /* 5296 ** If the third argument is non-NULL, then this function releases a 5297 ** reference obtained by an earlier call to unixFetch(). The second 5298 ** argument passed to this function must be the same as the corresponding 5299 ** argument that was passed to the unixFetch() invocation. 5300 ** 5301 ** Or, if the third argument is NULL, then this function is being called 5302 ** to inform the VFS layer that, according to POSIX, any existing mapping 5303 ** may now be invalid and should be unmapped. 5304 */ 5305 static int unixUnfetch(sqlite3_file *fd, i64 iOff, void *p){ 5306 #if SQLITE_MAX_MMAP_SIZE>0 5307 unixFile *pFd = (unixFile *)fd; /* The underlying database file */ 5308 UNUSED_PARAMETER(iOff); 5309 5310 /* If p==0 (unmap the entire file) then there must be no outstanding 5311 ** xFetch references. Or, if p!=0 (meaning it is an xFetch reference), 5312 ** then there must be at least one outstanding. */ 5313 assert( (p==0)==(pFd->nFetchOut==0) ); 5314 5315 /* If p!=0, it must match the iOff value. */ 5316 assert( p==0 || p==&((u8 *)pFd->pMapRegion)[iOff] ); 5317 5318 if( p ){ 5319 pFd->nFetchOut--; 5320 }else{ 5321 unixUnmapfile(pFd); 5322 } 5323 5324 assert( pFd->nFetchOut>=0 ); 5325 #else 5326 UNUSED_PARAMETER(fd); 5327 UNUSED_PARAMETER(p); 5328 UNUSED_PARAMETER(iOff); 5329 #endif 5330 return SQLITE_OK; 5331 } 5332 5333 /* 5334 ** Here ends the implementation of all sqlite3_file methods. 5335 ** 5336 ********************** End sqlite3_file Methods ******************************* 5337 ******************************************************************************/ 5338 5339 /* 5340 ** This division contains definitions of sqlite3_io_methods objects that 5341 ** implement various file locking strategies. It also contains definitions 5342 ** of "finder" functions. A finder-function is used to locate the appropriate 5343 ** sqlite3_io_methods object for a particular database file. The pAppData 5344 ** field of the sqlite3_vfs VFS objects are initialized to be pointers to 5345 ** the correct finder-function for that VFS. 5346 ** 5347 ** Most finder functions return a pointer to a fixed sqlite3_io_methods 5348 ** object. The only interesting finder-function is autolockIoFinder, which 5349 ** looks at the filesystem type and tries to guess the best locking 5350 ** strategy from that. 5351 ** 5352 ** For finder-function F, two objects are created: 5353 ** 5354 ** (1) The real finder-function named "FImpt()". 5355 ** 5356 ** (2) A constant pointer to this function named just "F". 5357 ** 5358 ** 5359 ** A pointer to the F pointer is used as the pAppData value for VFS 5360 ** objects. We have to do this instead of letting pAppData point 5361 ** directly at the finder-function since C90 rules prevent a void* 5362 ** from be cast into a function pointer. 5363 ** 5364 ** 5365 ** Each instance of this macro generates two objects: 5366 ** 5367 ** * A constant sqlite3_io_methods object call METHOD that has locking 5368 ** methods CLOSE, LOCK, UNLOCK, CKRESLOCK. 5369 ** 5370 ** * An I/O method finder function called FINDER that returns a pointer 5371 ** to the METHOD object in the previous bullet. 5372 */ 5373 #define IOMETHODS(FINDER,METHOD,VERSION,CLOSE,LOCK,UNLOCK,CKLOCK,SHMMAP) \ 5374 static const sqlite3_io_methods METHOD = { \ 5375 VERSION, /* iVersion */ \ 5376 CLOSE, /* xClose */ \ 5377 unixRead, /* xRead */ \ 5378 unixWrite, /* xWrite */ \ 5379 unixTruncate, /* xTruncate */ \ 5380 unixSync, /* xSync */ \ 5381 unixFileSize, /* xFileSize */ \ 5382 LOCK, /* xLock */ \ 5383 UNLOCK, /* xUnlock */ \ 5384 CKLOCK, /* xCheckReservedLock */ \ 5385 unixFileControl, /* xFileControl */ \ 5386 unixSectorSize, /* xSectorSize */ \ 5387 unixDeviceCharacteristics, /* xDeviceCapabilities */ \ 5388 SHMMAP, /* xShmMap */ \ 5389 unixShmLock, /* xShmLock */ \ 5390 unixShmBarrier, /* xShmBarrier */ \ 5391 unixShmUnmap, /* xShmUnmap */ \ 5392 unixFetch, /* xFetch */ \ 5393 unixUnfetch, /* xUnfetch */ \ 5394 }; \ 5395 static const sqlite3_io_methods *FINDER##Impl(const char *z, unixFile *p){ \ 5396 UNUSED_PARAMETER(z); UNUSED_PARAMETER(p); \ 5397 return &METHOD; \ 5398 } \ 5399 static const sqlite3_io_methods *(*const FINDER)(const char*,unixFile *p) \ 5400 = FINDER##Impl; 5401 5402 /* 5403 ** Here are all of the sqlite3_io_methods objects for each of the 5404 ** locking strategies. Functions that return pointers to these methods 5405 ** are also created. 5406 */ 5407 IOMETHODS( 5408 posixIoFinder, /* Finder function name */ 5409 posixIoMethods, /* sqlite3_io_methods object name */ 5410 3, /* shared memory and mmap are enabled */ 5411 unixClose, /* xClose method */ 5412 unixLock, /* xLock method */ 5413 unixUnlock, /* xUnlock method */ 5414 unixCheckReservedLock, /* xCheckReservedLock method */ 5415 unixShmMap /* xShmMap method */ 5416 ) 5417 IOMETHODS( 5418 nolockIoFinder, /* Finder function name */ 5419 nolockIoMethods, /* sqlite3_io_methods object name */ 5420 3, /* shared memory and mmap are enabled */ 5421 nolockClose, /* xClose method */ 5422 nolockLock, /* xLock method */ 5423 nolockUnlock, /* xUnlock method */ 5424 nolockCheckReservedLock, /* xCheckReservedLock method */ 5425 0 /* xShmMap method */ 5426 ) 5427 IOMETHODS( 5428 dotlockIoFinder, /* Finder function name */ 5429 dotlockIoMethods, /* sqlite3_io_methods object name */ 5430 1, /* shared memory is disabled */ 5431 dotlockClose, /* xClose method */ 5432 dotlockLock, /* xLock method */ 5433 dotlockUnlock, /* xUnlock method */ 5434 dotlockCheckReservedLock, /* xCheckReservedLock method */ 5435 0 /* xShmMap method */ 5436 ) 5437 5438 #if SQLITE_ENABLE_LOCKING_STYLE 5439 IOMETHODS( 5440 flockIoFinder, /* Finder function name */ 5441 flockIoMethods, /* sqlite3_io_methods object name */ 5442 1, /* shared memory is disabled */ 5443 flockClose, /* xClose method */ 5444 flockLock, /* xLock method */ 5445 flockUnlock, /* xUnlock method */ 5446 flockCheckReservedLock, /* xCheckReservedLock method */ 5447 0 /* xShmMap method */ 5448 ) 5449 #endif 5450 5451 #if OS_VXWORKS 5452 IOMETHODS( 5453 semIoFinder, /* Finder function name */ 5454 semIoMethods, /* sqlite3_io_methods object name */ 5455 1, /* shared memory is disabled */ 5456 semXClose, /* xClose method */ 5457 semXLock, /* xLock method */ 5458 semXUnlock, /* xUnlock method */ 5459 semXCheckReservedLock, /* xCheckReservedLock method */ 5460 0 /* xShmMap method */ 5461 ) 5462 #endif 5463 5464 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 5465 IOMETHODS( 5466 afpIoFinder, /* Finder function name */ 5467 afpIoMethods, /* sqlite3_io_methods object name */ 5468 1, /* shared memory is disabled */ 5469 afpClose, /* xClose method */ 5470 afpLock, /* xLock method */ 5471 afpUnlock, /* xUnlock method */ 5472 afpCheckReservedLock, /* xCheckReservedLock method */ 5473 0 /* xShmMap method */ 5474 ) 5475 #endif 5476 5477 /* 5478 ** The proxy locking method is a "super-method" in the sense that it 5479 ** opens secondary file descriptors for the conch and lock files and 5480 ** it uses proxy, dot-file, AFP, and flock() locking methods on those 5481 ** secondary files. For this reason, the division that implements 5482 ** proxy locking is located much further down in the file. But we need 5483 ** to go ahead and define the sqlite3_io_methods and finder function 5484 ** for proxy locking here. So we forward declare the I/O methods. 5485 */ 5486 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 5487 static int proxyClose(sqlite3_file*); 5488 static int proxyLock(sqlite3_file*, int); 5489 static int proxyUnlock(sqlite3_file*, int); 5490 static int proxyCheckReservedLock(sqlite3_file*, int*); 5491 IOMETHODS( 5492 proxyIoFinder, /* Finder function name */ 5493 proxyIoMethods, /* sqlite3_io_methods object name */ 5494 1, /* shared memory is disabled */ 5495 proxyClose, /* xClose method */ 5496 proxyLock, /* xLock method */ 5497 proxyUnlock, /* xUnlock method */ 5498 proxyCheckReservedLock, /* xCheckReservedLock method */ 5499 0 /* xShmMap method */ 5500 ) 5501 #endif 5502 5503 /* nfs lockd on OSX 10.3+ doesn't clear write locks when a read lock is set */ 5504 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 5505 IOMETHODS( 5506 nfsIoFinder, /* Finder function name */ 5507 nfsIoMethods, /* sqlite3_io_methods object name */ 5508 1, /* shared memory is disabled */ 5509 unixClose, /* xClose method */ 5510 unixLock, /* xLock method */ 5511 nfsUnlock, /* xUnlock method */ 5512 unixCheckReservedLock, /* xCheckReservedLock method */ 5513 0 /* xShmMap method */ 5514 ) 5515 #endif 5516 5517 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 5518 /* 5519 ** This "finder" function attempts to determine the best locking strategy 5520 ** for the database file "filePath". It then returns the sqlite3_io_methods 5521 ** object that implements that strategy. 5522 ** 5523 ** This is for MacOSX only. 5524 */ 5525 static const sqlite3_io_methods *autolockIoFinderImpl( 5526 const char *filePath, /* name of the database file */ 5527 unixFile *pNew /* open file object for the database file */ 5528 ){ 5529 static const struct Mapping { 5530 const char *zFilesystem; /* Filesystem type name */ 5531 const sqlite3_io_methods *pMethods; /* Appropriate locking method */ 5532 } aMap[] = { 5533 { "hfs", &posixIoMethods }, 5534 { "ufs", &posixIoMethods }, 5535 { "afpfs", &afpIoMethods }, 5536 { "smbfs", &afpIoMethods }, 5537 { "webdav", &nolockIoMethods }, 5538 { 0, 0 } 5539 }; 5540 int i; 5541 struct statfs fsInfo; 5542 struct flock lockInfo; 5543 5544 if( !filePath ){ 5545 /* If filePath==NULL that means we are dealing with a transient file 5546 ** that does not need to be locked. */ 5547 return &nolockIoMethods; 5548 } 5549 if( statfs(filePath, &fsInfo) != -1 ){ 5550 if( fsInfo.f_flags & MNT_RDONLY ){ 5551 return &nolockIoMethods; 5552 } 5553 for(i=0; aMap[i].zFilesystem; i++){ 5554 if( strcmp(fsInfo.f_fstypename, aMap[i].zFilesystem)==0 ){ 5555 return aMap[i].pMethods; 5556 } 5557 } 5558 } 5559 5560 /* Default case. Handles, amongst others, "nfs". 5561 ** Test byte-range lock using fcntl(). If the call succeeds, 5562 ** assume that the file-system supports POSIX style locks. 5563 */ 5564 lockInfo.l_len = 1; 5565 lockInfo.l_start = 0; 5566 lockInfo.l_whence = SEEK_SET; 5567 lockInfo.l_type = F_RDLCK; 5568 if( osFcntl(pNew->h, F_GETLK, &lockInfo)!=-1 ) { 5569 if( strcmp(fsInfo.f_fstypename, "nfs")==0 ){ 5570 return &nfsIoMethods; 5571 } else { 5572 return &posixIoMethods; 5573 } 5574 }else{ 5575 return &dotlockIoMethods; 5576 } 5577 } 5578 static const sqlite3_io_methods 5579 *(*const autolockIoFinder)(const char*,unixFile*) = autolockIoFinderImpl; 5580 5581 #endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ 5582 5583 #if OS_VXWORKS 5584 /* 5585 ** This "finder" function for VxWorks checks to see if posix advisory 5586 ** locking works. If it does, then that is what is used. If it does not 5587 ** work, then fallback to named semaphore locking. 5588 */ 5589 static const sqlite3_io_methods *vxworksIoFinderImpl( 5590 const char *filePath, /* name of the database file */ 5591 unixFile *pNew /* the open file object */ 5592 ){ 5593 struct flock lockInfo; 5594 5595 if( !filePath ){ 5596 /* If filePath==NULL that means we are dealing with a transient file 5597 ** that does not need to be locked. */ 5598 return &nolockIoMethods; 5599 } 5600 5601 /* Test if fcntl() is supported and use POSIX style locks. 5602 ** Otherwise fall back to the named semaphore method. 5603 */ 5604 lockInfo.l_len = 1; 5605 lockInfo.l_start = 0; 5606 lockInfo.l_whence = SEEK_SET; 5607 lockInfo.l_type = F_RDLCK; 5608 if( osFcntl(pNew->h, F_GETLK, &lockInfo)!=-1 ) { 5609 return &posixIoMethods; 5610 }else{ 5611 return &semIoMethods; 5612 } 5613 } 5614 static const sqlite3_io_methods 5615 *(*const vxworksIoFinder)(const char*,unixFile*) = vxworksIoFinderImpl; 5616 5617 #endif /* OS_VXWORKS */ 5618 5619 /* 5620 ** An abstract type for a pointer to an IO method finder function: 5621 */ 5622 typedef const sqlite3_io_methods *(*finder_type)(const char*,unixFile*); 5623 5624 5625 /**************************************************************************** 5626 **************************** sqlite3_vfs methods **************************** 5627 ** 5628 ** This division contains the implementation of methods on the 5629 ** sqlite3_vfs object. 5630 */ 5631 5632 /* 5633 ** Initialize the contents of the unixFile structure pointed to by pId. 5634 */ 5635 static int fillInUnixFile( 5636 sqlite3_vfs *pVfs, /* Pointer to vfs object */ 5637 int h, /* Open file descriptor of file being opened */ 5638 sqlite3_file *pId, /* Write to the unixFile structure here */ 5639 const char *zFilename, /* Name of the file being opened */ 5640 int ctrlFlags /* Zero or more UNIXFILE_* values */ 5641 ){ 5642 const sqlite3_io_methods *pLockingStyle; 5643 unixFile *pNew = (unixFile *)pId; 5644 int rc = SQLITE_OK; 5645 5646 assert( pNew->pInode==NULL ); 5647 5648 /* No locking occurs in temporary files */ 5649 assert( zFilename!=0 || (ctrlFlags & UNIXFILE_NOLOCK)!=0 ); 5650 5651 OSTRACE(("OPEN %-3d %s\n", h, zFilename)); 5652 pNew->h = h; 5653 pNew->pVfs = pVfs; 5654 pNew->zPath = zFilename; 5655 pNew->ctrlFlags = (u8)ctrlFlags; 5656 #if SQLITE_MAX_MMAP_SIZE>0 5657 pNew->mmapSizeMax = sqlite3GlobalConfig.szMmap; 5658 #endif 5659 if( sqlite3_uri_boolean(((ctrlFlags & UNIXFILE_URI) ? zFilename : 0), 5660 "psow", SQLITE_POWERSAFE_OVERWRITE) ){ 5661 pNew->ctrlFlags |= UNIXFILE_PSOW; 5662 } 5663 if( strcmp(pVfs->zName,"unix-excl")==0 ){ 5664 pNew->ctrlFlags |= UNIXFILE_EXCL; 5665 } 5666 5667 #if OS_VXWORKS 5668 pNew->pId = vxworksFindFileId(zFilename); 5669 if( pNew->pId==0 ){ 5670 ctrlFlags |= UNIXFILE_NOLOCK; 5671 rc = SQLITE_NOMEM_BKPT; 5672 } 5673 #endif 5674 5675 if( ctrlFlags & UNIXFILE_NOLOCK ){ 5676 pLockingStyle = &nolockIoMethods; 5677 }else{ 5678 pLockingStyle = (**(finder_type*)pVfs->pAppData)(zFilename, pNew); 5679 #if SQLITE_ENABLE_LOCKING_STYLE 5680 /* Cache zFilename in the locking context (AFP and dotlock override) for 5681 ** proxyLock activation is possible (remote proxy is based on db name) 5682 ** zFilename remains valid until file is closed, to support */ 5683 pNew->lockingContext = (void*)zFilename; 5684 #endif 5685 } 5686 5687 if( pLockingStyle == &posixIoMethods 5688 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 5689 || pLockingStyle == &nfsIoMethods 5690 #endif 5691 ){ 5692 unixEnterMutex(); 5693 rc = findInodeInfo(pNew, &pNew->pInode); 5694 if( rc!=SQLITE_OK ){ 5695 /* If an error occurred in findInodeInfo(), close the file descriptor 5696 ** immediately, before releasing the mutex. findInodeInfo() may fail 5697 ** in two scenarios: 5698 ** 5699 ** (a) A call to fstat() failed. 5700 ** (b) A malloc failed. 5701 ** 5702 ** Scenario (b) may only occur if the process is holding no other 5703 ** file descriptors open on the same file. If there were other file 5704 ** descriptors on this file, then no malloc would be required by 5705 ** findInodeInfo(). If this is the case, it is quite safe to close 5706 ** handle h - as it is guaranteed that no posix locks will be released 5707 ** by doing so. 5708 ** 5709 ** If scenario (a) caused the error then things are not so safe. The 5710 ** implicit assumption here is that if fstat() fails, things are in 5711 ** such bad shape that dropping a lock or two doesn't matter much. 5712 */ 5713 robust_close(pNew, h, __LINE__); 5714 h = -1; 5715 } 5716 unixLeaveMutex(); 5717 } 5718 5719 #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) 5720 else if( pLockingStyle == &afpIoMethods ){ 5721 /* AFP locking uses the file path so it needs to be included in 5722 ** the afpLockingContext. 5723 */ 5724 afpLockingContext *pCtx; 5725 pNew->lockingContext = pCtx = sqlite3_malloc64( sizeof(*pCtx) ); 5726 if( pCtx==0 ){ 5727 rc = SQLITE_NOMEM_BKPT; 5728 }else{ 5729 /* NB: zFilename exists and remains valid until the file is closed 5730 ** according to requirement F11141. So we do not need to make a 5731 ** copy of the filename. */ 5732 pCtx->dbPath = zFilename; 5733 pCtx->reserved = 0; 5734 srandomdev(); 5735 unixEnterMutex(); 5736 rc = findInodeInfo(pNew, &pNew->pInode); 5737 if( rc!=SQLITE_OK ){ 5738 sqlite3_free(pNew->lockingContext); 5739 robust_close(pNew, h, __LINE__); 5740 h = -1; 5741 } 5742 unixLeaveMutex(); 5743 } 5744 } 5745 #endif 5746 5747 else if( pLockingStyle == &dotlockIoMethods ){ 5748 /* Dotfile locking uses the file path so it needs to be included in 5749 ** the dotlockLockingContext 5750 */ 5751 char *zLockFile; 5752 int nFilename; 5753 assert( zFilename!=0 ); 5754 nFilename = (int)strlen(zFilename) + 6; 5755 zLockFile = (char *)sqlite3_malloc64(nFilename); 5756 if( zLockFile==0 ){ 5757 rc = SQLITE_NOMEM_BKPT; 5758 }else{ 5759 sqlite3_snprintf(nFilename, zLockFile, "%s" DOTLOCK_SUFFIX, zFilename); 5760 } 5761 pNew->lockingContext = zLockFile; 5762 } 5763 5764 #if OS_VXWORKS 5765 else if( pLockingStyle == &semIoMethods ){ 5766 /* Named semaphore locking uses the file path so it needs to be 5767 ** included in the semLockingContext 5768 */ 5769 unixEnterMutex(); 5770 rc = findInodeInfo(pNew, &pNew->pInode); 5771 if( (rc==SQLITE_OK) && (pNew->pInode->pSem==NULL) ){ 5772 char *zSemName = pNew->pInode->aSemName; 5773 int n; 5774 sqlite3_snprintf(MAX_PATHNAME, zSemName, "/%s.sem", 5775 pNew->pId->zCanonicalName); 5776 for( n=1; zSemName[n]; n++ ) 5777 if( zSemName[n]=='/' ) zSemName[n] = '_'; 5778 pNew->pInode->pSem = sem_open(zSemName, O_CREAT, 0666, 1); 5779 if( pNew->pInode->pSem == SEM_FAILED ){ 5780 rc = SQLITE_NOMEM_BKPT; 5781 pNew->pInode->aSemName[0] = '\0'; 5782 } 5783 } 5784 unixLeaveMutex(); 5785 } 5786 #endif 5787 5788 storeLastErrno(pNew, 0); 5789 #if OS_VXWORKS 5790 if( rc!=SQLITE_OK ){ 5791 if( h>=0 ) robust_close(pNew, h, __LINE__); 5792 h = -1; 5793 osUnlink(zFilename); 5794 pNew->ctrlFlags |= UNIXFILE_DELETE; 5795 } 5796 #endif 5797 if( rc!=SQLITE_OK ){ 5798 if( h>=0 ) robust_close(pNew, h, __LINE__); 5799 }else{ 5800 pId->pMethods = pLockingStyle; 5801 OpenCounter(+1); 5802 verifyDbFile(pNew); 5803 } 5804 return rc; 5805 } 5806 5807 /* 5808 ** Directories to consider for temp files. 5809 */ 5810 static const char *azTempDirs[] = { 5811 0, 5812 0, 5813 "/var/tmp", 5814 "/usr/tmp", 5815 "/tmp", 5816 "." 5817 }; 5818 5819 /* 5820 ** Initialize first two members of azTempDirs[] array. 5821 */ 5822 static void unixTempFileInit(void){ 5823 azTempDirs[0] = getenv("SQLITE_TMPDIR"); 5824 azTempDirs[1] = getenv("TMPDIR"); 5825 } 5826 5827 /* 5828 ** Return the name of a directory in which to put temporary files. 5829 ** If no suitable temporary file directory can be found, return NULL. 5830 */ 5831 static const char *unixTempFileDir(void){ 5832 unsigned int i = 0; 5833 struct stat buf; 5834 const char *zDir = sqlite3_temp_directory; 5835 5836 while(1){ 5837 if( zDir!=0 5838 && osStat(zDir, &buf)==0 5839 && S_ISDIR(buf.st_mode) 5840 && osAccess(zDir, 03)==0 5841 ){ 5842 return zDir; 5843 } 5844 if( i>=sizeof(azTempDirs)/sizeof(azTempDirs[0]) ) break; 5845 zDir = azTempDirs[i++]; 5846 } 5847 return 0; 5848 } 5849 5850 /* 5851 ** Create a temporary file name in zBuf. zBuf must be allocated 5852 ** by the calling process and must be big enough to hold at least 5853 ** pVfs->mxPathname bytes. 5854 */ 5855 static int unixGetTempname(int nBuf, char *zBuf){ 5856 const char *zDir; 5857 int iLimit = 0; 5858 5859 /* It's odd to simulate an io-error here, but really this is just 5860 ** using the io-error infrastructure to test that SQLite handles this 5861 ** function failing. 5862 */ 5863 zBuf[0] = 0; 5864 SimulateIOError( return SQLITE_IOERR ); 5865 5866 zDir = unixTempFileDir(); 5867 if( zDir==0 ) return SQLITE_IOERR_GETTEMPPATH; 5868 do{ 5869 u64 r; 5870 sqlite3_randomness(sizeof(r), &r); 5871 assert( nBuf>2 ); 5872 zBuf[nBuf-2] = 0; 5873 sqlite3_snprintf(nBuf, zBuf, "%s/"SQLITE_TEMP_FILE_PREFIX"%llx%c", 5874 zDir, r, 0); 5875 if( zBuf[nBuf-2]!=0 || (iLimit++)>10 ) return SQLITE_ERROR; 5876 }while( osAccess(zBuf,0)==0 ); 5877 return SQLITE_OK; 5878 } 5879 5880 #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) 5881 /* 5882 ** Routine to transform a unixFile into a proxy-locking unixFile. 5883 ** Implementation in the proxy-lock division, but used by unixOpen() 5884 ** if SQLITE_PREFER_PROXY_LOCKING is defined. 5885 */ 5886 static int proxyTransformUnixFile(unixFile*, const char*); 5887 #endif 5888 5889 /* 5890 ** Search for an unused file descriptor that was opened on the database 5891 ** file (not a journal or super-journal file) identified by pathname 5892 ** zPath with SQLITE_OPEN_XXX flags matching those passed as the second 5893 ** argument to this function. 5894 ** 5895 ** Such a file descriptor may exist if a database connection was closed 5896 ** but the associated file descriptor could not be closed because some 5897 ** other file descriptor open on the same file is holding a file-lock. 5898 ** Refer to comments in the unixClose() function and the lengthy comment 5899 ** describing "Posix Advisory Locking" at the start of this file for 5900 ** further details. Also, ticket #4018. 5901 ** 5902 ** If a suitable file descriptor is found, then it is returned. If no 5903 ** such file descriptor is located, -1 is returned. 5904 */ 5905 static UnixUnusedFd *findReusableFd(const char *zPath, int flags){ 5906 UnixUnusedFd *pUnused = 0; 5907 5908 /* Do not search for an unused file descriptor on vxworks. Not because 5909 ** vxworks would not benefit from the change (it might, we're not sure), 5910 ** but because no way to test it is currently available. It is better 5911 ** not to risk breaking vxworks support for the sake of such an obscure 5912 ** feature. */ 5913 #if !OS_VXWORKS 5914 struct stat sStat; /* Results of stat() call */ 5915 5916 unixEnterMutex(); 5917 5918 /* A stat() call may fail for various reasons. If this happens, it is 5919 ** almost certain that an open() call on the same path will also fail. 5920 ** For this reason, if an error occurs in the stat() call here, it is 5921 ** ignored and -1 is returned. The caller will try to open a new file 5922 ** descriptor on the same path, fail, and return an error to SQLite. 5923 ** 5924 ** Even if a subsequent open() call does succeed, the consequences of 5925 ** not searching for a reusable file descriptor are not dire. */ 5926 if( inodeList!=0 && 0==osStat(zPath, &sStat) ){ 5927 unixInodeInfo *pInode; 5928 5929 pInode = inodeList; 5930 while( pInode && (pInode->fileId.dev!=sStat.st_dev 5931 || pInode->fileId.ino!=(u64)sStat.st_ino) ){ 5932 pInode = pInode->pNext; 5933 } 5934 if( pInode ){ 5935 UnixUnusedFd **pp; 5936 assert( sqlite3_mutex_notheld(pInode->pLockMutex) ); 5937 sqlite3_mutex_enter(pInode->pLockMutex); 5938 flags &= (SQLITE_OPEN_READONLY|SQLITE_OPEN_READWRITE); 5939 for(pp=&pInode->pUnused; *pp && (*pp)->flags!=flags; pp=&((*pp)->pNext)); 5940 pUnused = *pp; 5941 if( pUnused ){ 5942 *pp = pUnused->pNext; 5943 } 5944 sqlite3_mutex_leave(pInode->pLockMutex); 5945 } 5946 } 5947 unixLeaveMutex(); 5948 #endif /* if !OS_VXWORKS */ 5949 return pUnused; 5950 } 5951 5952 /* 5953 ** Find the mode, uid and gid of file zFile. 5954 */ 5955 static int getFileMode( 5956 const char *zFile, /* File name */ 5957 mode_t *pMode, /* OUT: Permissions of zFile */ 5958 uid_t *pUid, /* OUT: uid of zFile. */ 5959 gid_t *pGid /* OUT: gid of zFile. */ 5960 ){ 5961 struct stat sStat; /* Output of stat() on database file */ 5962 int rc = SQLITE_OK; 5963 if( 0==osStat(zFile, &sStat) ){ 5964 *pMode = sStat.st_mode & 0777; 5965 *pUid = sStat.st_uid; 5966 *pGid = sStat.st_gid; 5967 }else{ 5968 rc = SQLITE_IOERR_FSTAT; 5969 } 5970 return rc; 5971 } 5972 5973 /* 5974 ** This function is called by unixOpen() to determine the unix permissions 5975 ** to create new files with. If no error occurs, then SQLITE_OK is returned 5976 ** and a value suitable for passing as the third argument to open(2) is 5977 ** written to *pMode. If an IO error occurs, an SQLite error code is 5978 ** returned and the value of *pMode is not modified. 5979 ** 5980 ** In most cases, this routine sets *pMode to 0, which will become 5981 ** an indication to robust_open() to create the file using 5982 ** SQLITE_DEFAULT_FILE_PERMISSIONS adjusted by the umask. 5983 ** But if the file being opened is a WAL or regular journal file, then 5984 ** this function queries the file-system for the permissions on the 5985 ** corresponding database file and sets *pMode to this value. Whenever 5986 ** possible, WAL and journal files are created using the same permissions 5987 ** as the associated database file. 5988 ** 5989 ** If the SQLITE_ENABLE_8_3_NAMES option is enabled, then the 5990 ** original filename is unavailable. But 8_3_NAMES is only used for 5991 ** FAT filesystems and permissions do not matter there, so just use 5992 ** the default permissions. In 8_3_NAMES mode, leave *pMode set to zero. 5993 */ 5994 static int findCreateFileMode( 5995 const char *zPath, /* Path of file (possibly) being created */ 5996 int flags, /* Flags passed as 4th argument to xOpen() */ 5997 mode_t *pMode, /* OUT: Permissions to open file with */ 5998 uid_t *pUid, /* OUT: uid to set on the file */ 5999 gid_t *pGid /* OUT: gid to set on the file */ 6000 ){ 6001 int rc = SQLITE_OK; /* Return Code */ 6002 *pMode = 0; 6003 *pUid = 0; 6004 *pGid = 0; 6005 if( flags & (SQLITE_OPEN_WAL|SQLITE_OPEN_MAIN_JOURNAL) ){ 6006 char zDb[MAX_PATHNAME+1]; /* Database file path */ 6007 int nDb; /* Number of valid bytes in zDb */ 6008 6009 /* zPath is a path to a WAL or journal file. The following block derives 6010 ** the path to the associated database file from zPath. This block handles 6011 ** the following naming conventions: 6012 ** 6013 ** "<path to db>-journal" 6014 ** "<path to db>-wal" 6015 ** "<path to db>-journalNN" 6016 ** "<path to db>-walNN" 6017 ** 6018 ** where NN is a decimal number. The NN naming schemes are 6019 ** used by the test_multiplex.c module. 6020 ** 6021 ** In normal operation, the journal file name will always contain 6022 ** a '-' character. However in 8+3 filename mode, or if a corrupt 6023 ** rollback journal specifies a super-journal with a goofy name, then 6024 ** the '-' might be missing or the '-' might be the first character in 6025 ** the filename. In that case, just return SQLITE_OK with *pMode==0. 6026 */ 6027 nDb = sqlite3Strlen30(zPath) - 1; 6028 while( nDb>0 && zPath[nDb]!='.' ){ 6029 if( zPath[nDb]=='-' ){ 6030 memcpy(zDb, zPath, nDb); 6031 zDb[nDb] = '\0'; 6032 rc = getFileMode(zDb, pMode, pUid, pGid); 6033 break; 6034 } 6035 nDb--; 6036 } 6037 }else if( flags & SQLITE_OPEN_DELETEONCLOSE ){ 6038 *pMode = 0600; 6039 }else if( flags & SQLITE_OPEN_URI ){ 6040 /* If this is a main database file and the file was opened using a URI 6041 ** filename, check for the "modeof" parameter. If present, interpret 6042 ** its value as a filename and try to copy the mode, uid and gid from 6043 ** that file. */ 6044 const char *z = sqlite3_uri_parameter(zPath, "modeof"); 6045 if( z ){ 6046 rc = getFileMode(z, pMode, pUid, pGid); 6047 } 6048 } 6049 return rc; 6050 } 6051 6052 /* 6053 ** Open the file zPath. 6054 ** 6055 ** Previously, the SQLite OS layer used three functions in place of this 6056 ** one: 6057 ** 6058 ** sqlite3OsOpenReadWrite(); 6059 ** sqlite3OsOpenReadOnly(); 6060 ** sqlite3OsOpenExclusive(); 6061 ** 6062 ** These calls correspond to the following combinations of flags: 6063 ** 6064 ** ReadWrite() -> (READWRITE | CREATE) 6065 ** ReadOnly() -> (READONLY) 6066 ** OpenExclusive() -> (READWRITE | CREATE | EXCLUSIVE) 6067 ** 6068 ** The old OpenExclusive() accepted a boolean argument - "delFlag". If 6069 ** true, the file was configured to be automatically deleted when the 6070 ** file handle closed. To achieve the same effect using this new 6071 ** interface, add the DELETEONCLOSE flag to those specified above for 6072 ** OpenExclusive(). 6073 */ 6074 static int unixOpen( 6075 sqlite3_vfs *pVfs, /* The VFS for which this is the xOpen method */ 6076 const char *zPath, /* Pathname of file to be opened */ 6077 sqlite3_file *pFile, /* The file descriptor to be filled in */ 6078 int flags, /* Input flags to control the opening */ 6079 int *pOutFlags /* Output flags returned to SQLite core */ 6080 ){ 6081 unixFile *p = (unixFile *)pFile; 6082 int fd = -1; /* File descriptor returned by open() */ 6083 int openFlags = 0; /* Flags to pass to open() */ 6084 int eType = flags&0x0FFF00; /* Type of file to open */ 6085 int noLock; /* True to omit locking primitives */ 6086 int rc = SQLITE_OK; /* Function Return Code */ 6087 int ctrlFlags = 0; /* UNIXFILE_* flags */ 6088 6089 int isExclusive = (flags & SQLITE_OPEN_EXCLUSIVE); 6090 int isDelete = (flags & SQLITE_OPEN_DELETEONCLOSE); 6091 int isCreate = (flags & SQLITE_OPEN_CREATE); 6092 int isReadonly = (flags & SQLITE_OPEN_READONLY); 6093 int isReadWrite = (flags & SQLITE_OPEN_READWRITE); 6094 #if SQLITE_ENABLE_LOCKING_STYLE 6095 int isAutoProxy = (flags & SQLITE_OPEN_AUTOPROXY); 6096 #endif 6097 #if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE 6098 struct statfs fsInfo; 6099 #endif 6100 6101 /* If creating a super- or main-file journal, this function will open 6102 ** a file-descriptor on the directory too. The first time unixSync() 6103 ** is called the directory file descriptor will be fsync()ed and close()d. 6104 */ 6105 int isNewJrnl = (isCreate && ( 6106 eType==SQLITE_OPEN_SUPER_JOURNAL 6107 || eType==SQLITE_OPEN_MAIN_JOURNAL 6108 || eType==SQLITE_OPEN_WAL 6109 )); 6110 6111 /* If argument zPath is a NULL pointer, this function is required to open 6112 ** a temporary file. Use this buffer to store the file name in. 6113 */ 6114 char zTmpname[MAX_PATHNAME+2]; 6115 const char *zName = zPath; 6116 6117 /* Check the following statements are true: 6118 ** 6119 ** (a) Exactly one of the READWRITE and READONLY flags must be set, and 6120 ** (b) if CREATE is set, then READWRITE must also be set, and 6121 ** (c) if EXCLUSIVE is set, then CREATE must also be set. 6122 ** (d) if DELETEONCLOSE is set, then CREATE must also be set. 6123 */ 6124 assert((isReadonly==0 || isReadWrite==0) && (isReadWrite || isReadonly)); 6125 assert(isCreate==0 || isReadWrite); 6126 assert(isExclusive==0 || isCreate); 6127 assert(isDelete==0 || isCreate); 6128 6129 /* The main DB, main journal, WAL file and super-journal are never 6130 ** automatically deleted. Nor are they ever temporary files. */ 6131 assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MAIN_DB ); 6132 assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MAIN_JOURNAL ); 6133 assert( (!isDelete && zName) || eType!=SQLITE_OPEN_SUPER_JOURNAL ); 6134 assert( (!isDelete && zName) || eType!=SQLITE_OPEN_WAL ); 6135 6136 /* Assert that the upper layer has set one of the "file-type" flags. */ 6137 assert( eType==SQLITE_OPEN_MAIN_DB || eType==SQLITE_OPEN_TEMP_DB 6138 || eType==SQLITE_OPEN_MAIN_JOURNAL || eType==SQLITE_OPEN_TEMP_JOURNAL 6139 || eType==SQLITE_OPEN_SUBJOURNAL || eType==SQLITE_OPEN_SUPER_JOURNAL 6140 || eType==SQLITE_OPEN_TRANSIENT_DB || eType==SQLITE_OPEN_WAL 6141 ); 6142 6143 /* Detect a pid change and reset the PRNG. There is a race condition 6144 ** here such that two or more threads all trying to open databases at 6145 ** the same instant might all reset the PRNG. But multiple resets 6146 ** are harmless. 6147 */ 6148 if( randomnessPid!=osGetpid(0) ){ 6149 randomnessPid = osGetpid(0); 6150 sqlite3_randomness(0,0); 6151 } 6152 memset(p, 0, sizeof(unixFile)); 6153 6154 #ifdef SQLITE_ASSERT_NO_FILES 6155 /* Applications that never read or write a persistent disk files */ 6156 assert( zName==0 ); 6157 #endif 6158 6159 if( eType==SQLITE_OPEN_MAIN_DB ){ 6160 UnixUnusedFd *pUnused; 6161 pUnused = findReusableFd(zName, flags); 6162 if( pUnused ){ 6163 fd = pUnused->fd; 6164 }else{ 6165 pUnused = sqlite3_malloc64(sizeof(*pUnused)); 6166 if( !pUnused ){ 6167 return SQLITE_NOMEM_BKPT; 6168 } 6169 } 6170 p->pPreallocatedUnused = pUnused; 6171 6172 /* Database filenames are double-zero terminated if they are not 6173 ** URIs with parameters. Hence, they can always be passed into 6174 ** sqlite3_uri_parameter(). */ 6175 assert( (flags & SQLITE_OPEN_URI) || zName[strlen(zName)+1]==0 ); 6176 6177 }else if( !zName ){ 6178 /* If zName is NULL, the upper layer is requesting a temp file. */ 6179 assert(isDelete && !isNewJrnl); 6180 rc = unixGetTempname(pVfs->mxPathname, zTmpname); 6181 if( rc!=SQLITE_OK ){ 6182 return rc; 6183 } 6184 zName = zTmpname; 6185 6186 /* Generated temporary filenames are always double-zero terminated 6187 ** for use by sqlite3_uri_parameter(). */ 6188 assert( zName[strlen(zName)+1]==0 ); 6189 } 6190 6191 /* Determine the value of the flags parameter passed to POSIX function 6192 ** open(). These must be calculated even if open() is not called, as 6193 ** they may be stored as part of the file handle and used by the 6194 ** 'conch file' locking functions later on. */ 6195 if( isReadonly ) openFlags |= O_RDONLY; 6196 if( isReadWrite ) openFlags |= O_RDWR; 6197 if( isCreate ) openFlags |= O_CREAT; 6198 if( isExclusive ) openFlags |= (O_EXCL|O_NOFOLLOW); 6199 openFlags |= (O_LARGEFILE|O_BINARY|O_NOFOLLOW); 6200 6201 if( fd<0 ){ 6202 mode_t openMode; /* Permissions to create file with */ 6203 uid_t uid; /* Userid for the file */ 6204 gid_t gid; /* Groupid for the file */ 6205 rc = findCreateFileMode(zName, flags, &openMode, &uid, &gid); 6206 if( rc!=SQLITE_OK ){ 6207 assert( !p->pPreallocatedUnused ); 6208 assert( eType==SQLITE_OPEN_WAL || eType==SQLITE_OPEN_MAIN_JOURNAL ); 6209 return rc; 6210 } 6211 fd = robust_open(zName, openFlags, openMode); 6212 OSTRACE(("OPENX %-3d %s 0%o\n", fd, zName, openFlags)); 6213 assert( !isExclusive || (openFlags & O_CREAT)!=0 ); 6214 if( fd<0 ){ 6215 if( isNewJrnl && errno==EACCES && osAccess(zName, F_OK) ){ 6216 /* If unable to create a journal because the directory is not 6217 ** writable, change the error code to indicate that. */ 6218 rc = SQLITE_READONLY_DIRECTORY; 6219 }else if( errno!=EISDIR && isReadWrite ){ 6220 /* Failed to open the file for read/write access. Try read-only. */ 6221 flags &= ~(SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE); 6222 openFlags &= ~(O_RDWR|O_CREAT); 6223 flags |= SQLITE_OPEN_READONLY; 6224 openFlags |= O_RDONLY; 6225 isReadonly = 1; 6226 fd = robust_open(zName, openFlags, openMode); 6227 } 6228 } 6229 if( fd<0 ){ 6230 int rc2 = unixLogError(SQLITE_CANTOPEN_BKPT, "open", zName); 6231 if( rc==SQLITE_OK ) rc = rc2; 6232 goto open_finished; 6233 } 6234 6235 /* The owner of the rollback journal or WAL file should always be the 6236 ** same as the owner of the database file. Try to ensure that this is 6237 ** the case. The chown() system call will be a no-op if the current 6238 ** process lacks root privileges, be we should at least try. Without 6239 ** this step, if a root process opens a database file, it can leave 6240 ** behinds a journal/WAL that is owned by root and hence make the 6241 ** database inaccessible to unprivileged processes. 6242 ** 6243 ** If openMode==0, then that means uid and gid are not set correctly 6244 ** (probably because SQLite is configured to use 8+3 filename mode) and 6245 ** in that case we do not want to attempt the chown(). 6246 */ 6247 if( openMode && (flags & (SQLITE_OPEN_WAL|SQLITE_OPEN_MAIN_JOURNAL))!=0 ){ 6248 robustFchown(fd, uid, gid); 6249 } 6250 } 6251 assert( fd>=0 ); 6252 if( pOutFlags ){ 6253 *pOutFlags = flags; 6254 } 6255 6256 if( p->pPreallocatedUnused ){ 6257 p->pPreallocatedUnused->fd = fd; 6258 p->pPreallocatedUnused->flags = 6259 flags & (SQLITE_OPEN_READONLY|SQLITE_OPEN_READWRITE); 6260 } 6261 6262 if( isDelete ){ 6263 #if OS_VXWORKS 6264 zPath = zName; 6265 #elif defined(SQLITE_UNLINK_AFTER_CLOSE) 6266 zPath = sqlite3_mprintf("%s", zName); 6267 if( zPath==0 ){ 6268 robust_close(p, fd, __LINE__); 6269 return SQLITE_NOMEM_BKPT; 6270 } 6271 #else 6272 osUnlink(zName); 6273 #endif 6274 } 6275 #if SQLITE_ENABLE_LOCKING_STYLE 6276 else{ 6277 p->openFlags = openFlags; 6278 } 6279 #endif 6280 6281 #if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE 6282 if( fstatfs(fd, &fsInfo) == -1 ){ 6283 storeLastErrno(p, errno); 6284 robust_close(p, fd, __LINE__); 6285 return SQLITE_IOERR_ACCESS; 6286 } 6287 if (0 == strncmp("msdos", fsInfo.f_fstypename, 5)) { 6288 ((unixFile*)pFile)->fsFlags |= SQLITE_FSFLAGS_IS_MSDOS; 6289 } 6290 if (0 == strncmp("exfat", fsInfo.f_fstypename, 5)) { 6291 ((unixFile*)pFile)->fsFlags |= SQLITE_FSFLAGS_IS_MSDOS; 6292 } 6293 #endif 6294 6295 /* Set up appropriate ctrlFlags */ 6296 if( isDelete ) ctrlFlags |= UNIXFILE_DELETE; 6297 if( isReadonly ) ctrlFlags |= UNIXFILE_RDONLY; 6298 noLock = eType!=SQLITE_OPEN_MAIN_DB; 6299 if( noLock ) ctrlFlags |= UNIXFILE_NOLOCK; 6300 if( isNewJrnl ) ctrlFlags |= UNIXFILE_DIRSYNC; 6301 if( flags & SQLITE_OPEN_URI ) ctrlFlags |= UNIXFILE_URI; 6302 6303 #if SQLITE_ENABLE_LOCKING_STYLE 6304 #if SQLITE_PREFER_PROXY_LOCKING 6305 isAutoProxy = 1; 6306 #endif 6307 if( isAutoProxy && (zPath!=NULL) && (!noLock) && pVfs->xOpen ){ 6308 char *envforce = getenv("SQLITE_FORCE_PROXY_LOCKING"); 6309 int useProxy = 0; 6310 6311 /* SQLITE_FORCE_PROXY_LOCKING==1 means force always use proxy, 0 means 6312 ** never use proxy, NULL means use proxy for non-local files only. */ 6313 if( envforce!=NULL ){ 6314 useProxy = atoi(envforce)>0; 6315 }else{ 6316 useProxy = !(fsInfo.f_flags&MNT_LOCAL); 6317 } 6318 if( useProxy ){ 6319 rc = fillInUnixFile(pVfs, fd, pFile, zPath, ctrlFlags); 6320 if( rc==SQLITE_OK ){ 6321 rc = proxyTransformUnixFile((unixFile*)pFile, ":auto:"); 6322 if( rc!=SQLITE_OK ){ 6323 /* Use unixClose to clean up the resources added in fillInUnixFile 6324 ** and clear all the structure's references. Specifically, 6325 ** pFile->pMethods will be NULL so sqlite3OsClose will be a no-op 6326 */ 6327 unixClose(pFile); 6328 return rc; 6329 } 6330 } 6331 goto open_finished; 6332 } 6333 } 6334 #endif 6335 6336 assert( zPath==0 || zPath[0]=='/' 6337 || eType==SQLITE_OPEN_SUPER_JOURNAL || eType==SQLITE_OPEN_MAIN_JOURNAL 6338 ); 6339 rc = fillInUnixFile(pVfs, fd, pFile, zPath, ctrlFlags); 6340 6341 open_finished: 6342 if( rc!=SQLITE_OK ){ 6343 sqlite3_free(p->pPreallocatedUnused); 6344 } 6345 return rc; 6346 } 6347 6348 6349 /* 6350 ** Delete the file at zPath. If the dirSync argument is true, fsync() 6351 ** the directory after deleting the file. 6352 */ 6353 static int unixDelete( 6354 sqlite3_vfs *NotUsed, /* VFS containing this as the xDelete method */ 6355 const char *zPath, /* Name of file to be deleted */ 6356 int dirSync /* If true, fsync() directory after deleting file */ 6357 ){ 6358 int rc = SQLITE_OK; 6359 UNUSED_PARAMETER(NotUsed); 6360 SimulateIOError(return SQLITE_IOERR_DELETE); 6361 if( osUnlink(zPath)==(-1) ){ 6362 if( errno==ENOENT 6363 #if OS_VXWORKS 6364 || osAccess(zPath,0)!=0 6365 #endif 6366 ){ 6367 rc = SQLITE_IOERR_DELETE_NOENT; 6368 }else{ 6369 rc = unixLogError(SQLITE_IOERR_DELETE, "unlink", zPath); 6370 } 6371 return rc; 6372 } 6373 #ifndef SQLITE_DISABLE_DIRSYNC 6374 if( (dirSync & 1)!=0 ){ 6375 int fd; 6376 rc = osOpenDirectory(zPath, &fd); 6377 if( rc==SQLITE_OK ){ 6378 if( full_fsync(fd,0,0) ){ 6379 rc = unixLogError(SQLITE_IOERR_DIR_FSYNC, "fsync", zPath); 6380 } 6381 robust_close(0, fd, __LINE__); 6382 }else{ 6383 assert( rc==SQLITE_CANTOPEN ); 6384 rc = SQLITE_OK; 6385 } 6386 } 6387 #endif 6388 return rc; 6389 } 6390 6391 /* 6392 ** Test the existence of or access permissions of file zPath. The 6393 ** test performed depends on the value of flags: 6394 ** 6395 ** SQLITE_ACCESS_EXISTS: Return 1 if the file exists 6396 ** SQLITE_ACCESS_READWRITE: Return 1 if the file is read and writable. 6397 ** SQLITE_ACCESS_READONLY: Return 1 if the file is readable. 6398 ** 6399 ** Otherwise return 0. 6400 */ 6401 static int unixAccess( 6402 sqlite3_vfs *NotUsed, /* The VFS containing this xAccess method */ 6403 const char *zPath, /* Path of the file to examine */ 6404 int flags, /* What do we want to learn about the zPath file? */ 6405 int *pResOut /* Write result boolean here */ 6406 ){ 6407 UNUSED_PARAMETER(NotUsed); 6408 SimulateIOError( return SQLITE_IOERR_ACCESS; ); 6409 assert( pResOut!=0 ); 6410 6411 /* The spec says there are three possible values for flags. But only 6412 ** two of them are actually used */ 6413 assert( flags==SQLITE_ACCESS_EXISTS || flags==SQLITE_ACCESS_READWRITE ); 6414 6415 if( flags==SQLITE_ACCESS_EXISTS ){ 6416 struct stat buf; 6417 *pResOut = 0==osStat(zPath, &buf) && 6418 (!S_ISREG(buf.st_mode) || buf.st_size>0); 6419 }else{ 6420 *pResOut = osAccess(zPath, W_OK|R_OK)==0; 6421 } 6422 return SQLITE_OK; 6423 } 6424 6425 /* 6426 ** A pathname under construction 6427 */ 6428 typedef struct DbPath DbPath; 6429 struct DbPath { 6430 int rc; /* Non-zero following any error */ 6431 int nSymlink; /* Number of symlinks resolved */ 6432 char *zOut; /* Write the pathname here */ 6433 int nOut; /* Bytes of space available to zOut[] */ 6434 int nUsed; /* Bytes of zOut[] currently being used */ 6435 }; 6436 6437 /* Forward reference */ 6438 static void appendAllPathElements(DbPath*,const char*); 6439 6440 /* 6441 ** Append a single path element to the DbPath under construction 6442 */ 6443 static void appendOnePathElement( 6444 DbPath *pPath, /* Path under construction, to which to append zName */ 6445 const char *zName, /* Name to append to pPath. Not zero-terminated */ 6446 int nName /* Number of significant bytes in zName */ 6447 ){ 6448 assert( nName>0 ); 6449 assert( zName!=0 ); 6450 if( zName[0]=='.' ){ 6451 if( nName==1 ) return; 6452 if( zName[1]=='.' && nName==2 ){ 6453 if( pPath->nUsed<=1 ){ 6454 pPath->rc = SQLITE_ERROR; 6455 return; 6456 } 6457 assert( pPath->zOut[0]=='/' ); 6458 while( pPath->zOut[--pPath->nUsed]!='/' ){} 6459 return; 6460 } 6461 } 6462 if( pPath->nUsed + nName + 2 >= pPath->nOut ){ 6463 pPath->rc = SQLITE_ERROR; 6464 return; 6465 } 6466 pPath->zOut[pPath->nUsed++] = '/'; 6467 memcpy(&pPath->zOut[pPath->nUsed], zName, nName); 6468 pPath->nUsed += nName; 6469 #if defined(HAVE_READLINK) && defined(HAVE_LSTAT) 6470 if( pPath->rc==SQLITE_OK ){ 6471 const char *zIn; 6472 struct stat buf; 6473 pPath->zOut[pPath->nUsed] = 0; 6474 zIn = pPath->zOut; 6475 if( osLstat(zIn, &buf)!=0 ){ 6476 if( errno!=ENOENT ){ 6477 pPath->rc = unixLogError(SQLITE_CANTOPEN_BKPT, "lstat", zIn); 6478 } 6479 }else if( S_ISLNK(buf.st_mode) ){ 6480 ssize_t got; 6481 char zLnk[SQLITE_MAX_PATHLEN+2]; 6482 if( pPath->nSymlink++ > SQLITE_MAX_SYMLINK ){ 6483 pPath->rc = SQLITE_CANTOPEN_BKPT; 6484 return; 6485 } 6486 got = osReadlink(zIn, zLnk, sizeof(zLnk)-2); 6487 if( got<=0 || got>=(ssize_t)sizeof(zLnk)-2 ){ 6488 pPath->rc = unixLogError(SQLITE_CANTOPEN_BKPT, "readlink", zIn); 6489 return; 6490 } 6491 zLnk[got] = 0; 6492 if( zLnk[0]=='/' ){ 6493 pPath->nUsed = 0; 6494 }else{ 6495 pPath->nUsed -= nName + 1; 6496 } 6497 appendAllPathElements(pPath, zLnk); 6498 } 6499 } 6500 #endif 6501 } 6502 6503 /* 6504 ** Append all path elements in zPath to the DbPath under construction. 6505 */ 6506 static void appendAllPathElements( 6507 DbPath *pPath, /* Path under construction, to which to append zName */ 6508 const char *zPath /* Path to append to pPath. Is zero-terminated */ 6509 ){ 6510 int i = 0; 6511 int j = 0; 6512 do{ 6513 while( zPath[i] && zPath[i]!='/' ){ i++; } 6514 if( i>j ){ 6515 appendOnePathElement(pPath, &zPath[j], i-j); 6516 } 6517 j = i+1; 6518 }while( zPath[i++] ); 6519 } 6520 6521 /* 6522 ** Turn a relative pathname into a full pathname. The relative path 6523 ** is stored as a nul-terminated string in the buffer pointed to by 6524 ** zPath. 6525 ** 6526 ** zOut points to a buffer of at least sqlite3_vfs.mxPathname bytes 6527 ** (in this case, MAX_PATHNAME bytes). The full-path is written to 6528 ** this buffer before returning. 6529 */ 6530 static int unixFullPathname( 6531 sqlite3_vfs *pVfs, /* Pointer to vfs object */ 6532 const char *zPath, /* Possibly relative input path */ 6533 int nOut, /* Size of output buffer in bytes */ 6534 char *zOut /* Output buffer */ 6535 ){ 6536 DbPath path; 6537 UNUSED_PARAMETER(pVfs); 6538 path.rc = 0; 6539 path.nUsed = 0; 6540 path.nSymlink = 0; 6541 path.nOut = nOut; 6542 path.zOut = zOut; 6543 if( zPath[0]!='/' ){ 6544 char zPwd[SQLITE_MAX_PATHLEN+2]; 6545 if( osGetcwd(zPwd, sizeof(zPwd)-2)==0 ){ 6546 return unixLogError(SQLITE_CANTOPEN_BKPT, "getcwd", zPath); 6547 } 6548 appendAllPathElements(&path, zPwd); 6549 } 6550 appendAllPathElements(&path, zPath); 6551 zOut[path.nUsed] = 0; 6552 if( path.rc || path.nUsed<2 ) return SQLITE_CANTOPEN_BKPT; 6553 if( path.nSymlink ) return SQLITE_OK_SYMLINK; 6554 return SQLITE_OK; 6555 } 6556 6557 #ifndef SQLITE_OMIT_LOAD_EXTENSION 6558 /* 6559 ** Interfaces for opening a shared library, finding entry points 6560 ** within the shared library, and closing the shared library. 6561 */ 6562 #include <dlfcn.h> 6563 static void *unixDlOpen(sqlite3_vfs *NotUsed, const char *zFilename){ 6564 UNUSED_PARAMETER(NotUsed); 6565 return dlopen(zFilename, RTLD_NOW | RTLD_GLOBAL); 6566 } 6567 6568 /* 6569 ** SQLite calls this function immediately after a call to unixDlSym() or 6570 ** unixDlOpen() fails (returns a null pointer). If a more detailed error 6571 ** message is available, it is written to zBufOut. If no error message 6572 ** is available, zBufOut is left unmodified and SQLite uses a default 6573 ** error message. 6574 */ 6575 static void unixDlError(sqlite3_vfs *NotUsed, int nBuf, char *zBufOut){ 6576 const char *zErr; 6577 UNUSED_PARAMETER(NotUsed); 6578 unixEnterMutex(); 6579 zErr = dlerror(); 6580 if( zErr ){ 6581 sqlite3_snprintf(nBuf, zBufOut, "%s", zErr); 6582 } 6583 unixLeaveMutex(); 6584 } 6585 static void (*unixDlSym(sqlite3_vfs *NotUsed, void *p, const char*zSym))(void){ 6586 /* 6587 ** GCC with -pedantic-errors says that C90 does not allow a void* to be 6588 ** cast into a pointer to a function. And yet the library dlsym() routine 6589 ** returns a void* which is really a pointer to a function. So how do we 6590 ** use dlsym() with -pedantic-errors? 6591 ** 6592 ** Variable x below is defined to be a pointer to a function taking 6593 ** parameters void* and const char* and returning a pointer to a function. 6594 ** We initialize x by assigning it a pointer to the dlsym() function. 6595 ** (That assignment requires a cast.) Then we call the function that 6596 ** x points to. 6597 ** 6598 ** This work-around is unlikely to work correctly on any system where 6599 ** you really cannot cast a function pointer into void*. But then, on the 6600 ** other hand, dlsym() will not work on such a system either, so we have 6601 ** not really lost anything. 6602 */ 6603 void (*(*x)(void*,const char*))(void); 6604 UNUSED_PARAMETER(NotUsed); 6605 x = (void(*(*)(void*,const char*))(void))dlsym; 6606 return (*x)(p, zSym); 6607 } 6608 static void unixDlClose(sqlite3_vfs *NotUsed, void *pHandle){ 6609 UNUSED_PARAMETER(NotUsed); 6610 dlclose(pHandle); 6611 } 6612 #else /* if SQLITE_OMIT_LOAD_EXTENSION is defined: */ 6613 #define unixDlOpen 0 6614 #define unixDlError 0 6615 #define unixDlSym 0 6616 #define unixDlClose 0 6617 #endif 6618 6619 /* 6620 ** Write nBuf bytes of random data to the supplied buffer zBuf. 6621 */ 6622 static int unixRandomness(sqlite3_vfs *NotUsed, int nBuf, char *zBuf){ 6623 UNUSED_PARAMETER(NotUsed); 6624 assert((size_t)nBuf>=(sizeof(time_t)+sizeof(int))); 6625 6626 /* We have to initialize zBuf to prevent valgrind from reporting 6627 ** errors. The reports issued by valgrind are incorrect - we would 6628 ** prefer that the randomness be increased by making use of the 6629 ** uninitialized space in zBuf - but valgrind errors tend to worry 6630 ** some users. Rather than argue, it seems easier just to initialize 6631 ** the whole array and silence valgrind, even if that means less randomness 6632 ** in the random seed. 6633 ** 6634 ** When testing, initializing zBuf[] to zero is all we do. That means 6635 ** that we always use the same random number sequence. This makes the 6636 ** tests repeatable. 6637 */ 6638 memset(zBuf, 0, nBuf); 6639 randomnessPid = osGetpid(0); 6640 #if !defined(SQLITE_TEST) && !defined(SQLITE_OMIT_RANDOMNESS) 6641 { 6642 int fd, got; 6643 fd = robust_open("/dev/urandom", O_RDONLY, 0); 6644 if( fd<0 ){ 6645 time_t t; 6646 time(&t); 6647 memcpy(zBuf, &t, sizeof(t)); 6648 memcpy(&zBuf[sizeof(t)], &randomnessPid, sizeof(randomnessPid)); 6649 assert( sizeof(t)+sizeof(randomnessPid)<=(size_t)nBuf ); 6650 nBuf = sizeof(t) + sizeof(randomnessPid); 6651 }else{ 6652 do{ got = osRead(fd, zBuf, nBuf); }while( got<0 && errno==EINTR ); 6653 robust_close(0, fd, __LINE__); 6654 } 6655 } 6656 #endif 6657 return nBuf; 6658 } 6659 6660 6661 /* 6662 ** Sleep for a little while. Return the amount of time slept. 6663 ** The argument is the number of microseconds we want to sleep. 6664 ** The return value is the number of microseconds of sleep actually 6665 ** requested from the underlying operating system, a number which 6666 ** might be greater than or equal to the argument, but not less 6667 ** than the argument. 6668 */ 6669 static int unixSleep(sqlite3_vfs *NotUsed, int microseconds){ 6670 #if OS_VXWORKS 6671 struct timespec sp; 6672 6673 sp.tv_sec = microseconds / 1000000; 6674 sp.tv_nsec = (microseconds % 1000000) * 1000; 6675 nanosleep(&sp, NULL); 6676 UNUSED_PARAMETER(NotUsed); 6677 return microseconds; 6678 #elif defined(HAVE_USLEEP) && HAVE_USLEEP 6679 if( microseconds>=1000000 ) sleep(microseconds/1000000); 6680 if( microseconds%1000000 ) usleep(microseconds%1000000); 6681 UNUSED_PARAMETER(NotUsed); 6682 return microseconds; 6683 #else 6684 int seconds = (microseconds+999999)/1000000; 6685 sleep(seconds); 6686 UNUSED_PARAMETER(NotUsed); 6687 return seconds*1000000; 6688 #endif 6689 } 6690 6691 /* 6692 ** The following variable, if set to a non-zero value, is interpreted as 6693 ** the number of seconds since 1970 and is used to set the result of 6694 ** sqlite3OsCurrentTime() during testing. 6695 */ 6696 #ifdef SQLITE_TEST 6697 int sqlite3_current_time = 0; /* Fake system time in seconds since 1970. */ 6698 #endif 6699 6700 /* 6701 ** Find the current time (in Universal Coordinated Time). Write into *piNow 6702 ** the current time and date as a Julian Day number times 86_400_000. In 6703 ** other words, write into *piNow the number of milliseconds since the Julian 6704 ** epoch of noon in Greenwich on November 24, 4714 B.C according to the 6705 ** proleptic Gregorian calendar. 6706 ** 6707 ** On success, return SQLITE_OK. Return SQLITE_ERROR if the time and date 6708 ** cannot be found. 6709 */ 6710 static int unixCurrentTimeInt64(sqlite3_vfs *NotUsed, sqlite3_int64 *piNow){ 6711 static const sqlite3_int64 unixEpoch = 24405875*(sqlite3_int64)8640000; 6712 int rc = SQLITE_OK; 6713 #if defined(NO_GETTOD) 6714 time_t t; 6715 time(&t); 6716 *piNow = ((sqlite3_int64)t)*1000 + unixEpoch; 6717 #elif OS_VXWORKS 6718 struct timespec sNow; 6719 clock_gettime(CLOCK_REALTIME, &sNow); 6720 *piNow = unixEpoch + 1000*(sqlite3_int64)sNow.tv_sec + sNow.tv_nsec/1000000; 6721 #else 6722 struct timeval sNow; 6723 (void)gettimeofday(&sNow, 0); /* Cannot fail given valid arguments */ 6724 *piNow = unixEpoch + 1000*(sqlite3_int64)sNow.tv_sec + sNow.tv_usec/1000; 6725 #endif 6726 6727 #ifdef SQLITE_TEST 6728 if( sqlite3_current_time ){ 6729 *piNow = 1000*(sqlite3_int64)sqlite3_current_time + unixEpoch; 6730 } 6731 #endif 6732 UNUSED_PARAMETER(NotUsed); 6733 return rc; 6734 } 6735 6736 #ifndef SQLITE_OMIT_DEPRECATED 6737 /* 6738 ** Find the current time (in Universal Coordinated Time). Write the 6739 ** current time and date as a Julian Day number into *prNow and 6740 ** return 0. Return 1 if the time and date cannot be found. 6741 */ 6742 static int unixCurrentTime(sqlite3_vfs *NotUsed, double *prNow){ 6743 sqlite3_int64 i = 0; 6744 int rc; 6745 UNUSED_PARAMETER(NotUsed); 6746 rc = unixCurrentTimeInt64(0, &i); 6747 *prNow = i/86400000.0; 6748 return rc; 6749 } 6750 #else 6751 # define unixCurrentTime 0 6752 #endif 6753 6754 /* 6755 ** The xGetLastError() method is designed to return a better 6756 ** low-level error message when operating-system problems come up 6757 ** during SQLite operation. Only the integer return code is currently 6758 ** used. 6759 */ 6760 static int unixGetLastError(sqlite3_vfs *NotUsed, int NotUsed2, char *NotUsed3){ 6761 UNUSED_PARAMETER(NotUsed); 6762 UNUSED_PARAMETER(NotUsed2); 6763 UNUSED_PARAMETER(NotUsed3); 6764 return errno; 6765 } 6766 6767 6768 /* 6769 ************************ End of sqlite3_vfs methods *************************** 6770 ******************************************************************************/ 6771 6772 /****************************************************************************** 6773 ************************** Begin Proxy Locking ******************************** 6774 ** 6775 ** Proxy locking is a "uber-locking-method" in this sense: It uses the 6776 ** other locking methods on secondary lock files. Proxy locking is a 6777 ** meta-layer over top of the primitive locking implemented above. For 6778 ** this reason, the division that implements of proxy locking is deferred 6779 ** until late in the file (here) after all of the other I/O methods have 6780 ** been defined - so that the primitive locking methods are available 6781 ** as services to help with the implementation of proxy locking. 6782 ** 6783 **** 6784 ** 6785 ** The default locking schemes in SQLite use byte-range locks on the 6786 ** database file to coordinate safe, concurrent access by multiple readers 6787 ** and writers [http://sqlite.org/lockingv3.html]. The five file locking 6788 ** states (UNLOCKED, PENDING, SHARED, RESERVED, EXCLUSIVE) are implemented 6789 ** as POSIX read & write locks over fixed set of locations (via fsctl), 6790 ** on AFP and SMB only exclusive byte-range locks are available via fsctl 6791 ** with _IOWR('z', 23, struct ByteRangeLockPB2) to track the same 5 states. 6792 ** To simulate a F_RDLCK on the shared range, on AFP a randomly selected 6793 ** address in the shared range is taken for a SHARED lock, the entire 6794 ** shared range is taken for an EXCLUSIVE lock): 6795 ** 6796 ** PENDING_BYTE 0x40000000 6797 ** RESERVED_BYTE 0x40000001 6798 ** SHARED_RANGE 0x40000002 -> 0x40000200 6799 ** 6800 ** This works well on the local file system, but shows a nearly 100x 6801 ** slowdown in read performance on AFP because the AFP client disables 6802 ** the read cache when byte-range locks are present. Enabling the read 6803 ** cache exposes a cache coherency problem that is present on all OS X 6804 ** supported network file systems. NFS and AFP both observe the 6805 ** close-to-open semantics for ensuring cache coherency 6806 ** [http://nfs.sourceforge.net/#faq_a8], which does not effectively 6807 ** address the requirements for concurrent database access by multiple 6808 ** readers and writers 6809 ** [http://www.nabble.com/SQLite-on-NFS-cache-coherency-td15655701.html]. 6810 ** 6811 ** To address the performance and cache coherency issues, proxy file locking 6812 ** changes the way database access is controlled by limiting access to a 6813 ** single host at a time and moving file locks off of the database file 6814 ** and onto a proxy file on the local file system. 6815 ** 6816 ** 6817 ** Using proxy locks 6818 ** ----------------- 6819 ** 6820 ** C APIs 6821 ** 6822 ** sqlite3_file_control(db, dbname, SQLITE_FCNTL_SET_LOCKPROXYFILE, 6823 ** <proxy_path> | ":auto:"); 6824 ** sqlite3_file_control(db, dbname, SQLITE_FCNTL_GET_LOCKPROXYFILE, 6825 ** &<proxy_path>); 6826 ** 6827 ** 6828 ** SQL pragmas 6829 ** 6830 ** PRAGMA [database.]lock_proxy_file=<proxy_path> | :auto: 6831 ** PRAGMA [database.]lock_proxy_file 6832 ** 6833 ** Specifying ":auto:" means that if there is a conch file with a matching 6834 ** host ID in it, the proxy path in the conch file will be used, otherwise 6835 ** a proxy path based on the user's temp dir 6836 ** (via confstr(_CS_DARWIN_USER_TEMP_DIR,...)) will be used and the 6837 ** actual proxy file name is generated from the name and path of the 6838 ** database file. For example: 6839 ** 6840 ** For database path "/Users/me/foo.db" 6841 ** The lock path will be "<tmpdir>/sqliteplocks/_Users_me_foo.db:auto:") 6842 ** 6843 ** Once a lock proxy is configured for a database connection, it can not 6844 ** be removed, however it may be switched to a different proxy path via 6845 ** the above APIs (assuming the conch file is not being held by another 6846 ** connection or process). 6847 ** 6848 ** 6849 ** How proxy locking works 6850 ** ----------------------- 6851 ** 6852 ** Proxy file locking relies primarily on two new supporting files: 6853 ** 6854 ** * conch file to limit access to the database file to a single host 6855 ** at a time 6856 ** 6857 ** * proxy file to act as a proxy for the advisory locks normally 6858 ** taken on the database 6859 ** 6860 ** The conch file - to use a proxy file, sqlite must first "hold the conch" 6861 ** by taking an sqlite-style shared lock on the conch file, reading the 6862 ** contents and comparing the host's unique host ID (see below) and lock 6863 ** proxy path against the values stored in the conch. The conch file is 6864 ** stored in the same directory as the database file and the file name 6865 ** is patterned after the database file name as ".<databasename>-conch". 6866 ** If the conch file does not exist, or its contents do not match the 6867 ** host ID and/or proxy path, then the lock is escalated to an exclusive 6868 ** lock and the conch file contents is updated with the host ID and proxy 6869 ** path and the lock is downgraded to a shared lock again. If the conch 6870 ** is held by another process (with a shared lock), the exclusive lock 6871 ** will fail and SQLITE_BUSY is returned. 6872 ** 6873 ** The proxy file - a single-byte file used for all advisory file locks 6874 ** normally taken on the database file. This allows for safe sharing 6875 ** of the database file for multiple readers and writers on the same 6876 ** host (the conch ensures that they all use the same local lock file). 6877 ** 6878 ** Requesting the lock proxy does not immediately take the conch, it is 6879 ** only taken when the first request to lock database file is made. 6880 ** This matches the semantics of the traditional locking behavior, where 6881 ** opening a connection to a database file does not take a lock on it. 6882 ** The shared lock and an open file descriptor are maintained until 6883 ** the connection to the database is closed. 6884 ** 6885 ** The proxy file and the lock file are never deleted so they only need 6886 ** to be created the first time they are used. 6887 ** 6888 ** Configuration options 6889 ** --------------------- 6890 ** 6891 ** SQLITE_PREFER_PROXY_LOCKING 6892 ** 6893 ** Database files accessed on non-local file systems are 6894 ** automatically configured for proxy locking, lock files are 6895 ** named automatically using the same logic as 6896 ** PRAGMA lock_proxy_file=":auto:" 6897 ** 6898 ** SQLITE_PROXY_DEBUG 6899 ** 6900 ** Enables the logging of error messages during host id file 6901 ** retrieval and creation 6902 ** 6903 ** LOCKPROXYDIR 6904 ** 6905 ** Overrides the default directory used for lock proxy files that 6906 ** are named automatically via the ":auto:" setting 6907 ** 6908 ** SQLITE_DEFAULT_PROXYDIR_PERMISSIONS 6909 ** 6910 ** Permissions to use when creating a directory for storing the 6911 ** lock proxy files, only used when LOCKPROXYDIR is not set. 6912 ** 6913 ** 6914 ** As mentioned above, when compiled with SQLITE_PREFER_PROXY_LOCKING, 6915 ** setting the environment variable SQLITE_FORCE_PROXY_LOCKING to 1 will 6916 ** force proxy locking to be used for every database file opened, and 0 6917 ** will force automatic proxy locking to be disabled for all database 6918 ** files (explicitly calling the SQLITE_FCNTL_SET_LOCKPROXYFILE pragma or 6919 ** sqlite_file_control API is not affected by SQLITE_FORCE_PROXY_LOCKING). 6920 */ 6921 6922 /* 6923 ** Proxy locking is only available on MacOSX 6924 */ 6925 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 6926 6927 /* 6928 ** The proxyLockingContext has the path and file structures for the remote 6929 ** and local proxy files in it 6930 */ 6931 typedef struct proxyLockingContext proxyLockingContext; 6932 struct proxyLockingContext { 6933 unixFile *conchFile; /* Open conch file */ 6934 char *conchFilePath; /* Name of the conch file */ 6935 unixFile *lockProxy; /* Open proxy lock file */ 6936 char *lockProxyPath; /* Name of the proxy lock file */ 6937 char *dbPath; /* Name of the open file */ 6938 int conchHeld; /* 1 if the conch is held, -1 if lockless */ 6939 int nFails; /* Number of conch taking failures */ 6940 void *oldLockingContext; /* Original lockingcontext to restore on close */ 6941 sqlite3_io_methods const *pOldMethod; /* Original I/O methods for close */ 6942 }; 6943 6944 /* 6945 ** The proxy lock file path for the database at dbPath is written into lPath, 6946 ** which must point to valid, writable memory large enough for a maxLen length 6947 ** file path. 6948 */ 6949 static int proxyGetLockPath(const char *dbPath, char *lPath, size_t maxLen){ 6950 int len; 6951 int dbLen; 6952 int i; 6953 6954 #ifdef LOCKPROXYDIR 6955 len = strlcpy(lPath, LOCKPROXYDIR, maxLen); 6956 #else 6957 # ifdef _CS_DARWIN_USER_TEMP_DIR 6958 { 6959 if( !confstr(_CS_DARWIN_USER_TEMP_DIR, lPath, maxLen) ){ 6960 OSTRACE(("GETLOCKPATH failed %s errno=%d pid=%d\n", 6961 lPath, errno, osGetpid(0))); 6962 return SQLITE_IOERR_LOCK; 6963 } 6964 len = strlcat(lPath, "sqliteplocks", maxLen); 6965 } 6966 # else 6967 len = strlcpy(lPath, "/tmp/", maxLen); 6968 # endif 6969 #endif 6970 6971 if( lPath[len-1]!='/' ){ 6972 len = strlcat(lPath, "/", maxLen); 6973 } 6974 6975 /* transform the db path to a unique cache name */ 6976 dbLen = (int)strlen(dbPath); 6977 for( i=0; i<dbLen && (i+len+7)<(int)maxLen; i++){ 6978 char c = dbPath[i]; 6979 lPath[i+len] = (c=='/')?'_':c; 6980 } 6981 lPath[i+len]='\0'; 6982 strlcat(lPath, ":auto:", maxLen); 6983 OSTRACE(("GETLOCKPATH proxy lock path=%s pid=%d\n", lPath, osGetpid(0))); 6984 return SQLITE_OK; 6985 } 6986 6987 /* 6988 ** Creates the lock file and any missing directories in lockPath 6989 */ 6990 static int proxyCreateLockPath(const char *lockPath){ 6991 int i, len; 6992 char buf[MAXPATHLEN]; 6993 int start = 0; 6994 6995 assert(lockPath!=NULL); 6996 /* try to create all the intermediate directories */ 6997 len = (int)strlen(lockPath); 6998 buf[0] = lockPath[0]; 6999 for( i=1; i<len; i++ ){ 7000 if( lockPath[i] == '/' && (i - start > 0) ){ 7001 /* only mkdir if leaf dir != "." or "/" or ".." */ 7002 if( i-start>2 || (i-start==1 && buf[start] != '.' && buf[start] != '/') 7003 || (i-start==2 && buf[start] != '.' && buf[start+1] != '.') ){ 7004 buf[i]='\0'; 7005 if( osMkdir(buf, SQLITE_DEFAULT_PROXYDIR_PERMISSIONS) ){ 7006 int err=errno; 7007 if( err!=EEXIST ) { 7008 OSTRACE(("CREATELOCKPATH FAILED creating %s, " 7009 "'%s' proxy lock path=%s pid=%d\n", 7010 buf, strerror(err), lockPath, osGetpid(0))); 7011 return err; 7012 } 7013 } 7014 } 7015 start=i+1; 7016 } 7017 buf[i] = lockPath[i]; 7018 } 7019 OSTRACE(("CREATELOCKPATH proxy lock path=%s pid=%d\n",lockPath,osGetpid(0))); 7020 return 0; 7021 } 7022 7023 /* 7024 ** Create a new VFS file descriptor (stored in memory obtained from 7025 ** sqlite3_malloc) and open the file named "path" in the file descriptor. 7026 ** 7027 ** The caller is responsible not only for closing the file descriptor 7028 ** but also for freeing the memory associated with the file descriptor. 7029 */ 7030 static int proxyCreateUnixFile( 7031 const char *path, /* path for the new unixFile */ 7032 unixFile **ppFile, /* unixFile created and returned by ref */ 7033 int islockfile /* if non zero missing dirs will be created */ 7034 ) { 7035 int fd = -1; 7036 unixFile *pNew; 7037 int rc = SQLITE_OK; 7038 int openFlags = O_RDWR | O_CREAT | O_NOFOLLOW; 7039 sqlite3_vfs dummyVfs; 7040 int terrno = 0; 7041 UnixUnusedFd *pUnused = NULL; 7042 7043 /* 1. first try to open/create the file 7044 ** 2. if that fails, and this is a lock file (not-conch), try creating 7045 ** the parent directories and then try again. 7046 ** 3. if that fails, try to open the file read-only 7047 ** otherwise return BUSY (if lock file) or CANTOPEN for the conch file 7048 */ 7049 pUnused = findReusableFd(path, openFlags); 7050 if( pUnused ){ 7051 fd = pUnused->fd; 7052 }else{ 7053 pUnused = sqlite3_malloc64(sizeof(*pUnused)); 7054 if( !pUnused ){ 7055 return SQLITE_NOMEM_BKPT; 7056 } 7057 } 7058 if( fd<0 ){ 7059 fd = robust_open(path, openFlags, 0); 7060 terrno = errno; 7061 if( fd<0 && errno==ENOENT && islockfile ){ 7062 if( proxyCreateLockPath(path) == SQLITE_OK ){ 7063 fd = robust_open(path, openFlags, 0); 7064 } 7065 } 7066 } 7067 if( fd<0 ){ 7068 openFlags = O_RDONLY | O_NOFOLLOW; 7069 fd = robust_open(path, openFlags, 0); 7070 terrno = errno; 7071 } 7072 if( fd<0 ){ 7073 if( islockfile ){ 7074 return SQLITE_BUSY; 7075 } 7076 switch (terrno) { 7077 case EACCES: 7078 return SQLITE_PERM; 7079 case EIO: 7080 return SQLITE_IOERR_LOCK; /* even though it is the conch */ 7081 default: 7082 return SQLITE_CANTOPEN_BKPT; 7083 } 7084 } 7085 7086 pNew = (unixFile *)sqlite3_malloc64(sizeof(*pNew)); 7087 if( pNew==NULL ){ 7088 rc = SQLITE_NOMEM_BKPT; 7089 goto end_create_proxy; 7090 } 7091 memset(pNew, 0, sizeof(unixFile)); 7092 pNew->openFlags = openFlags; 7093 memset(&dummyVfs, 0, sizeof(dummyVfs)); 7094 dummyVfs.pAppData = (void*)&autolockIoFinder; 7095 dummyVfs.zName = "dummy"; 7096 pUnused->fd = fd; 7097 pUnused->flags = openFlags; 7098 pNew->pPreallocatedUnused = pUnused; 7099 7100 rc = fillInUnixFile(&dummyVfs, fd, (sqlite3_file*)pNew, path, 0); 7101 if( rc==SQLITE_OK ){ 7102 *ppFile = pNew; 7103 return SQLITE_OK; 7104 } 7105 end_create_proxy: 7106 robust_close(pNew, fd, __LINE__); 7107 sqlite3_free(pNew); 7108 sqlite3_free(pUnused); 7109 return rc; 7110 } 7111 7112 #ifdef SQLITE_TEST 7113 /* simulate multiple hosts by creating unique hostid file paths */ 7114 int sqlite3_hostid_num = 0; 7115 #endif 7116 7117 #define PROXY_HOSTIDLEN 16 /* conch file host id length */ 7118 7119 #if HAVE_GETHOSTUUID 7120 /* Not always defined in the headers as it ought to be */ 7121 extern int gethostuuid(uuid_t id, const struct timespec *wait); 7122 #endif 7123 7124 /* get the host ID via gethostuuid(), pHostID must point to PROXY_HOSTIDLEN 7125 ** bytes of writable memory. 7126 */ 7127 static int proxyGetHostID(unsigned char *pHostID, int *pError){ 7128 assert(PROXY_HOSTIDLEN == sizeof(uuid_t)); 7129 memset(pHostID, 0, PROXY_HOSTIDLEN); 7130 #if HAVE_GETHOSTUUID 7131 { 7132 struct timespec timeout = {1, 0}; /* 1 sec timeout */ 7133 if( gethostuuid(pHostID, &timeout) ){ 7134 int err = errno; 7135 if( pError ){ 7136 *pError = err; 7137 } 7138 return SQLITE_IOERR; 7139 } 7140 } 7141 #else 7142 UNUSED_PARAMETER(pError); 7143 #endif 7144 #ifdef SQLITE_TEST 7145 /* simulate multiple hosts by creating unique hostid file paths */ 7146 if( sqlite3_hostid_num != 0){ 7147 pHostID[0] = (char)(pHostID[0] + (char)(sqlite3_hostid_num & 0xFF)); 7148 } 7149 #endif 7150 7151 return SQLITE_OK; 7152 } 7153 7154 /* The conch file contains the header, host id and lock file path 7155 */ 7156 #define PROXY_CONCHVERSION 2 /* 1-byte header, 16-byte host id, path */ 7157 #define PROXY_HEADERLEN 1 /* conch file header length */ 7158 #define PROXY_PATHINDEX (PROXY_HEADERLEN+PROXY_HOSTIDLEN) 7159 #define PROXY_MAXCONCHLEN (PROXY_HEADERLEN+PROXY_HOSTIDLEN+MAXPATHLEN) 7160 7161 /* 7162 ** Takes an open conch file, copies the contents to a new path and then moves 7163 ** it back. The newly created file's file descriptor is assigned to the 7164 ** conch file structure and finally the original conch file descriptor is 7165 ** closed. Returns zero if successful. 7166 */ 7167 static int proxyBreakConchLock(unixFile *pFile, uuid_t myHostID){ 7168 proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; 7169 unixFile *conchFile = pCtx->conchFile; 7170 char tPath[MAXPATHLEN]; 7171 char buf[PROXY_MAXCONCHLEN]; 7172 char *cPath = pCtx->conchFilePath; 7173 size_t readLen = 0; 7174 size_t pathLen = 0; 7175 char errmsg[64] = ""; 7176 int fd = -1; 7177 int rc = -1; 7178 UNUSED_PARAMETER(myHostID); 7179 7180 /* create a new path by replace the trailing '-conch' with '-break' */ 7181 pathLen = strlcpy(tPath, cPath, MAXPATHLEN); 7182 if( pathLen>MAXPATHLEN || pathLen<6 || 7183 (strlcpy(&tPath[pathLen-5], "break", 6) != 5) ){ 7184 sqlite3_snprintf(sizeof(errmsg),errmsg,"path error (len %d)",(int)pathLen); 7185 goto end_breaklock; 7186 } 7187 /* read the conch content */ 7188 readLen = osPread(conchFile->h, buf, PROXY_MAXCONCHLEN, 0); 7189 if( readLen<PROXY_PATHINDEX ){ 7190 sqlite3_snprintf(sizeof(errmsg),errmsg,"read error (len %d)",(int)readLen); 7191 goto end_breaklock; 7192 } 7193 /* write it out to the temporary break file */ 7194 fd = robust_open(tPath, (O_RDWR|O_CREAT|O_EXCL|O_NOFOLLOW), 0); 7195 if( fd<0 ){ 7196 sqlite3_snprintf(sizeof(errmsg), errmsg, "create failed (%d)", errno); 7197 goto end_breaklock; 7198 } 7199 if( osPwrite(fd, buf, readLen, 0) != (ssize_t)readLen ){ 7200 sqlite3_snprintf(sizeof(errmsg), errmsg, "write failed (%d)", errno); 7201 goto end_breaklock; 7202 } 7203 if( rename(tPath, cPath) ){ 7204 sqlite3_snprintf(sizeof(errmsg), errmsg, "rename failed (%d)", errno); 7205 goto end_breaklock; 7206 } 7207 rc = 0; 7208 fprintf(stderr, "broke stale lock on %s\n", cPath); 7209 robust_close(pFile, conchFile->h, __LINE__); 7210 conchFile->h = fd; 7211 conchFile->openFlags = O_RDWR | O_CREAT; 7212 7213 end_breaklock: 7214 if( rc ){ 7215 if( fd>=0 ){ 7216 osUnlink(tPath); 7217 robust_close(pFile, fd, __LINE__); 7218 } 7219 fprintf(stderr, "failed to break stale lock on %s, %s\n", cPath, errmsg); 7220 } 7221 return rc; 7222 } 7223 7224 /* Take the requested lock on the conch file and break a stale lock if the 7225 ** host id matches. 7226 */ 7227 static int proxyConchLock(unixFile *pFile, uuid_t myHostID, int lockType){ 7228 proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; 7229 unixFile *conchFile = pCtx->conchFile; 7230 int rc = SQLITE_OK; 7231 int nTries = 0; 7232 struct timespec conchModTime; 7233 7234 memset(&conchModTime, 0, sizeof(conchModTime)); 7235 do { 7236 rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, lockType); 7237 nTries ++; 7238 if( rc==SQLITE_BUSY ){ 7239 /* If the lock failed (busy): 7240 * 1st try: get the mod time of the conch, wait 0.5s and try again. 7241 * 2nd try: fail if the mod time changed or host id is different, wait 7242 * 10 sec and try again 7243 * 3rd try: break the lock unless the mod time has changed. 7244 */ 7245 struct stat buf; 7246 if( osFstat(conchFile->h, &buf) ){ 7247 storeLastErrno(pFile, errno); 7248 return SQLITE_IOERR_LOCK; 7249 } 7250 7251 if( nTries==1 ){ 7252 conchModTime = buf.st_mtimespec; 7253 unixSleep(0,500000); /* wait 0.5 sec and try the lock again*/ 7254 continue; 7255 } 7256 7257 assert( nTries>1 ); 7258 if( conchModTime.tv_sec != buf.st_mtimespec.tv_sec || 7259 conchModTime.tv_nsec != buf.st_mtimespec.tv_nsec ){ 7260 return SQLITE_BUSY; 7261 } 7262 7263 if( nTries==2 ){ 7264 char tBuf[PROXY_MAXCONCHLEN]; 7265 int len = osPread(conchFile->h, tBuf, PROXY_MAXCONCHLEN, 0); 7266 if( len<0 ){ 7267 storeLastErrno(pFile, errno); 7268 return SQLITE_IOERR_LOCK; 7269 } 7270 if( len>PROXY_PATHINDEX && tBuf[0]==(char)PROXY_CONCHVERSION){ 7271 /* don't break the lock if the host id doesn't match */ 7272 if( 0!=memcmp(&tBuf[PROXY_HEADERLEN], myHostID, PROXY_HOSTIDLEN) ){ 7273 return SQLITE_BUSY; 7274 } 7275 }else{ 7276 /* don't break the lock on short read or a version mismatch */ 7277 return SQLITE_BUSY; 7278 } 7279 unixSleep(0,10000000); /* wait 10 sec and try the lock again */ 7280 continue; 7281 } 7282 7283 assert( nTries==3 ); 7284 if( 0==proxyBreakConchLock(pFile, myHostID) ){ 7285 rc = SQLITE_OK; 7286 if( lockType==EXCLUSIVE_LOCK ){ 7287 rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, SHARED_LOCK); 7288 } 7289 if( !rc ){ 7290 rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, lockType); 7291 } 7292 } 7293 } 7294 } while( rc==SQLITE_BUSY && nTries<3 ); 7295 7296 return rc; 7297 } 7298 7299 /* Takes the conch by taking a shared lock and read the contents conch, if 7300 ** lockPath is non-NULL, the host ID and lock file path must match. A NULL 7301 ** lockPath means that the lockPath in the conch file will be used if the 7302 ** host IDs match, or a new lock path will be generated automatically 7303 ** and written to the conch file. 7304 */ 7305 static int proxyTakeConch(unixFile *pFile){ 7306 proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; 7307 7308 if( pCtx->conchHeld!=0 ){ 7309 return SQLITE_OK; 7310 }else{ 7311 unixFile *conchFile = pCtx->conchFile; 7312 uuid_t myHostID; 7313 int pError = 0; 7314 char readBuf[PROXY_MAXCONCHLEN]; 7315 char lockPath[MAXPATHLEN]; 7316 char *tempLockPath = NULL; 7317 int rc = SQLITE_OK; 7318 int createConch = 0; 7319 int hostIdMatch = 0; 7320 int readLen = 0; 7321 int tryOldLockPath = 0; 7322 int forceNewLockPath = 0; 7323 7324 OSTRACE(("TAKECONCH %d for %s pid=%d\n", conchFile->h, 7325 (pCtx->lockProxyPath ? pCtx->lockProxyPath : ":auto:"), 7326 osGetpid(0))); 7327 7328 rc = proxyGetHostID(myHostID, &pError); 7329 if( (rc&0xff)==SQLITE_IOERR ){ 7330 storeLastErrno(pFile, pError); 7331 goto end_takeconch; 7332 } 7333 rc = proxyConchLock(pFile, myHostID, SHARED_LOCK); 7334 if( rc!=SQLITE_OK ){ 7335 goto end_takeconch; 7336 } 7337 /* read the existing conch file */ 7338 readLen = seekAndRead((unixFile*)conchFile, 0, readBuf, PROXY_MAXCONCHLEN); 7339 if( readLen<0 ){ 7340 /* I/O error: lastErrno set by seekAndRead */ 7341 storeLastErrno(pFile, conchFile->lastErrno); 7342 rc = SQLITE_IOERR_READ; 7343 goto end_takeconch; 7344 }else if( readLen<=(PROXY_HEADERLEN+PROXY_HOSTIDLEN) || 7345 readBuf[0]!=(char)PROXY_CONCHVERSION ){ 7346 /* a short read or version format mismatch means we need to create a new 7347 ** conch file. 7348 */ 7349 createConch = 1; 7350 } 7351 /* if the host id matches and the lock path already exists in the conch 7352 ** we'll try to use the path there, if we can't open that path, we'll 7353 ** retry with a new auto-generated path 7354 */ 7355 do { /* in case we need to try again for an :auto: named lock file */ 7356 7357 if( !createConch && !forceNewLockPath ){ 7358 hostIdMatch = !memcmp(&readBuf[PROXY_HEADERLEN], myHostID, 7359 PROXY_HOSTIDLEN); 7360 /* if the conch has data compare the contents */ 7361 if( !pCtx->lockProxyPath ){ 7362 /* for auto-named local lock file, just check the host ID and we'll 7363 ** use the local lock file path that's already in there 7364 */ 7365 if( hostIdMatch ){ 7366 size_t pathLen = (readLen - PROXY_PATHINDEX); 7367 7368 if( pathLen>=MAXPATHLEN ){ 7369 pathLen=MAXPATHLEN-1; 7370 } 7371 memcpy(lockPath, &readBuf[PROXY_PATHINDEX], pathLen); 7372 lockPath[pathLen] = 0; 7373 tempLockPath = lockPath; 7374 tryOldLockPath = 1; 7375 /* create a copy of the lock path if the conch is taken */ 7376 goto end_takeconch; 7377 } 7378 }else if( hostIdMatch 7379 && !strncmp(pCtx->lockProxyPath, &readBuf[PROXY_PATHINDEX], 7380 readLen-PROXY_PATHINDEX) 7381 ){ 7382 /* conch host and lock path match */ 7383 goto end_takeconch; 7384 } 7385 } 7386 7387 /* if the conch isn't writable and doesn't match, we can't take it */ 7388 if( (conchFile->openFlags&O_RDWR) == 0 ){ 7389 rc = SQLITE_BUSY; 7390 goto end_takeconch; 7391 } 7392 7393 /* either the conch didn't match or we need to create a new one */ 7394 if( !pCtx->lockProxyPath ){ 7395 proxyGetLockPath(pCtx->dbPath, lockPath, MAXPATHLEN); 7396 tempLockPath = lockPath; 7397 /* create a copy of the lock path _only_ if the conch is taken */ 7398 } 7399 7400 /* update conch with host and path (this will fail if other process 7401 ** has a shared lock already), if the host id matches, use the big 7402 ** stick. 7403 */ 7404 futimes(conchFile->h, NULL); 7405 if( hostIdMatch && !createConch ){ 7406 if( conchFile->pInode && conchFile->pInode->nShared>1 ){ 7407 /* We are trying for an exclusive lock but another thread in this 7408 ** same process is still holding a shared lock. */ 7409 rc = SQLITE_BUSY; 7410 } else { 7411 rc = proxyConchLock(pFile, myHostID, EXCLUSIVE_LOCK); 7412 } 7413 }else{ 7414 rc = proxyConchLock(pFile, myHostID, EXCLUSIVE_LOCK); 7415 } 7416 if( rc==SQLITE_OK ){ 7417 char writeBuffer[PROXY_MAXCONCHLEN]; 7418 int writeSize = 0; 7419 7420 writeBuffer[0] = (char)PROXY_CONCHVERSION; 7421 memcpy(&writeBuffer[PROXY_HEADERLEN], myHostID, PROXY_HOSTIDLEN); 7422 if( pCtx->lockProxyPath!=NULL ){ 7423 strlcpy(&writeBuffer[PROXY_PATHINDEX], pCtx->lockProxyPath, 7424 MAXPATHLEN); 7425 }else{ 7426 strlcpy(&writeBuffer[PROXY_PATHINDEX], tempLockPath, MAXPATHLEN); 7427 } 7428 writeSize = PROXY_PATHINDEX + strlen(&writeBuffer[PROXY_PATHINDEX]); 7429 robust_ftruncate(conchFile->h, writeSize); 7430 rc = unixWrite((sqlite3_file *)conchFile, writeBuffer, writeSize, 0); 7431 full_fsync(conchFile->h,0,0); 7432 /* If we created a new conch file (not just updated the contents of a 7433 ** valid conch file), try to match the permissions of the database 7434 */ 7435 if( rc==SQLITE_OK && createConch ){ 7436 struct stat buf; 7437 int err = osFstat(pFile->h, &buf); 7438 if( err==0 ){ 7439 mode_t cmode = buf.st_mode&(S_IRUSR|S_IWUSR | S_IRGRP|S_IWGRP | 7440 S_IROTH|S_IWOTH); 7441 /* try to match the database file R/W permissions, ignore failure */ 7442 #ifndef SQLITE_PROXY_DEBUG 7443 osFchmod(conchFile->h, cmode); 7444 #else 7445 do{ 7446 rc = osFchmod(conchFile->h, cmode); 7447 }while( rc==(-1) && errno==EINTR ); 7448 if( rc!=0 ){ 7449 int code = errno; 7450 fprintf(stderr, "fchmod %o FAILED with %d %s\n", 7451 cmode, code, strerror(code)); 7452 } else { 7453 fprintf(stderr, "fchmod %o SUCCEDED\n",cmode); 7454 } 7455 }else{ 7456 int code = errno; 7457 fprintf(stderr, "STAT FAILED[%d] with %d %s\n", 7458 err, code, strerror(code)); 7459 #endif 7460 } 7461 } 7462 } 7463 conchFile->pMethod->xUnlock((sqlite3_file*)conchFile, SHARED_LOCK); 7464 7465 end_takeconch: 7466 OSTRACE(("TRANSPROXY: CLOSE %d\n", pFile->h)); 7467 if( rc==SQLITE_OK && pFile->openFlags ){ 7468 int fd; 7469 if( pFile->h>=0 ){ 7470 robust_close(pFile, pFile->h, __LINE__); 7471 } 7472 pFile->h = -1; 7473 fd = robust_open(pCtx->dbPath, pFile->openFlags, 0); 7474 OSTRACE(("TRANSPROXY: OPEN %d\n", fd)); 7475 if( fd>=0 ){ 7476 pFile->h = fd; 7477 }else{ 7478 rc=SQLITE_CANTOPEN_BKPT; /* SQLITE_BUSY? proxyTakeConch called 7479 during locking */ 7480 } 7481 } 7482 if( rc==SQLITE_OK && !pCtx->lockProxy ){ 7483 char *path = tempLockPath ? tempLockPath : pCtx->lockProxyPath; 7484 rc = proxyCreateUnixFile(path, &pCtx->lockProxy, 1); 7485 if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM && tryOldLockPath ){ 7486 /* we couldn't create the proxy lock file with the old lock file path 7487 ** so try again via auto-naming 7488 */ 7489 forceNewLockPath = 1; 7490 tryOldLockPath = 0; 7491 continue; /* go back to the do {} while start point, try again */ 7492 } 7493 } 7494 if( rc==SQLITE_OK ){ 7495 /* Need to make a copy of path if we extracted the value 7496 ** from the conch file or the path was allocated on the stack 7497 */ 7498 if( tempLockPath ){ 7499 pCtx->lockProxyPath = sqlite3DbStrDup(0, tempLockPath); 7500 if( !pCtx->lockProxyPath ){ 7501 rc = SQLITE_NOMEM_BKPT; 7502 } 7503 } 7504 } 7505 if( rc==SQLITE_OK ){ 7506 pCtx->conchHeld = 1; 7507 7508 if( pCtx->lockProxy->pMethod == &afpIoMethods ){ 7509 afpLockingContext *afpCtx; 7510 afpCtx = (afpLockingContext *)pCtx->lockProxy->lockingContext; 7511 afpCtx->dbPath = pCtx->lockProxyPath; 7512 } 7513 } else { 7514 conchFile->pMethod->xUnlock((sqlite3_file*)conchFile, NO_LOCK); 7515 } 7516 OSTRACE(("TAKECONCH %d %s\n", conchFile->h, 7517 rc==SQLITE_OK?"ok":"failed")); 7518 return rc; 7519 } while (1); /* in case we need to retry the :auto: lock file - 7520 ** we should never get here except via the 'continue' call. */ 7521 } 7522 } 7523 7524 /* 7525 ** If pFile holds a lock on a conch file, then release that lock. 7526 */ 7527 static int proxyReleaseConch(unixFile *pFile){ 7528 int rc = SQLITE_OK; /* Subroutine return code */ 7529 proxyLockingContext *pCtx; /* The locking context for the proxy lock */ 7530 unixFile *conchFile; /* Name of the conch file */ 7531 7532 pCtx = (proxyLockingContext *)pFile->lockingContext; 7533 conchFile = pCtx->conchFile; 7534 OSTRACE(("RELEASECONCH %d for %s pid=%d\n", conchFile->h, 7535 (pCtx->lockProxyPath ? pCtx->lockProxyPath : ":auto:"), 7536 osGetpid(0))); 7537 if( pCtx->conchHeld>0 ){ 7538 rc = conchFile->pMethod->xUnlock((sqlite3_file*)conchFile, NO_LOCK); 7539 } 7540 pCtx->conchHeld = 0; 7541 OSTRACE(("RELEASECONCH %d %s\n", conchFile->h, 7542 (rc==SQLITE_OK ? "ok" : "failed"))); 7543 return rc; 7544 } 7545 7546 /* 7547 ** Given the name of a database file, compute the name of its conch file. 7548 ** Store the conch filename in memory obtained from sqlite3_malloc64(). 7549 ** Make *pConchPath point to the new name. Return SQLITE_OK on success 7550 ** or SQLITE_NOMEM if unable to obtain memory. 7551 ** 7552 ** The caller is responsible for ensuring that the allocated memory 7553 ** space is eventually freed. 7554 ** 7555 ** *pConchPath is set to NULL if a memory allocation error occurs. 7556 */ 7557 static int proxyCreateConchPathname(char *dbPath, char **pConchPath){ 7558 int i; /* Loop counter */ 7559 int len = (int)strlen(dbPath); /* Length of database filename - dbPath */ 7560 char *conchPath; /* buffer in which to construct conch name */ 7561 7562 /* Allocate space for the conch filename and initialize the name to 7563 ** the name of the original database file. */ 7564 *pConchPath = conchPath = (char *)sqlite3_malloc64(len + 8); 7565 if( conchPath==0 ){ 7566 return SQLITE_NOMEM_BKPT; 7567 } 7568 memcpy(conchPath, dbPath, len+1); 7569 7570 /* now insert a "." before the last / character */ 7571 for( i=(len-1); i>=0; i-- ){ 7572 if( conchPath[i]=='/' ){ 7573 i++; 7574 break; 7575 } 7576 } 7577 conchPath[i]='.'; 7578 while ( i<len ){ 7579 conchPath[i+1]=dbPath[i]; 7580 i++; 7581 } 7582 7583 /* append the "-conch" suffix to the file */ 7584 memcpy(&conchPath[i+1], "-conch", 7); 7585 assert( (int)strlen(conchPath) == len+7 ); 7586 7587 return SQLITE_OK; 7588 } 7589 7590 7591 /* Takes a fully configured proxy locking-style unix file and switches 7592 ** the local lock file path 7593 */ 7594 static int switchLockProxyPath(unixFile *pFile, const char *path) { 7595 proxyLockingContext *pCtx = (proxyLockingContext*)pFile->lockingContext; 7596 char *oldPath = pCtx->lockProxyPath; 7597 int rc = SQLITE_OK; 7598 7599 if( pFile->eFileLock!=NO_LOCK ){ 7600 return SQLITE_BUSY; 7601 } 7602 7603 /* nothing to do if the path is NULL, :auto: or matches the existing path */ 7604 if( !path || path[0]=='\0' || !strcmp(path, ":auto:") || 7605 (oldPath && !strncmp(oldPath, path, MAXPATHLEN)) ){ 7606 return SQLITE_OK; 7607 }else{ 7608 unixFile *lockProxy = pCtx->lockProxy; 7609 pCtx->lockProxy=NULL; 7610 pCtx->conchHeld = 0; 7611 if( lockProxy!=NULL ){ 7612 rc=lockProxy->pMethod->xClose((sqlite3_file *)lockProxy); 7613 if( rc ) return rc; 7614 sqlite3_free(lockProxy); 7615 } 7616 sqlite3_free(oldPath); 7617 pCtx->lockProxyPath = sqlite3DbStrDup(0, path); 7618 } 7619 7620 return rc; 7621 } 7622 7623 /* 7624 ** pFile is a file that has been opened by a prior xOpen call. dbPath 7625 ** is a string buffer at least MAXPATHLEN+1 characters in size. 7626 ** 7627 ** This routine find the filename associated with pFile and writes it 7628 ** int dbPath. 7629 */ 7630 static int proxyGetDbPathForUnixFile(unixFile *pFile, char *dbPath){ 7631 #if defined(__APPLE__) 7632 if( pFile->pMethod == &afpIoMethods ){ 7633 /* afp style keeps a reference to the db path in the filePath field 7634 ** of the struct */ 7635 assert( (int)strlen((char*)pFile->lockingContext)<=MAXPATHLEN ); 7636 strlcpy(dbPath, ((afpLockingContext *)pFile->lockingContext)->dbPath, 7637 MAXPATHLEN); 7638 } else 7639 #endif 7640 if( pFile->pMethod == &dotlockIoMethods ){ 7641 /* dot lock style uses the locking context to store the dot lock 7642 ** file path */ 7643 int len = strlen((char *)pFile->lockingContext) - strlen(DOTLOCK_SUFFIX); 7644 memcpy(dbPath, (char *)pFile->lockingContext, len + 1); 7645 }else{ 7646 /* all other styles use the locking context to store the db file path */ 7647 assert( strlen((char*)pFile->lockingContext)<=MAXPATHLEN ); 7648 strlcpy(dbPath, (char *)pFile->lockingContext, MAXPATHLEN); 7649 } 7650 return SQLITE_OK; 7651 } 7652 7653 /* 7654 ** Takes an already filled in unix file and alters it so all file locking 7655 ** will be performed on the local proxy lock file. The following fields 7656 ** are preserved in the locking context so that they can be restored and 7657 ** the unix structure properly cleaned up at close time: 7658 ** ->lockingContext 7659 ** ->pMethod 7660 */ 7661 static int proxyTransformUnixFile(unixFile *pFile, const char *path) { 7662 proxyLockingContext *pCtx; 7663 char dbPath[MAXPATHLEN+1]; /* Name of the database file */ 7664 char *lockPath=NULL; 7665 int rc = SQLITE_OK; 7666 7667 if( pFile->eFileLock!=NO_LOCK ){ 7668 return SQLITE_BUSY; 7669 } 7670 proxyGetDbPathForUnixFile(pFile, dbPath); 7671 if( !path || path[0]=='\0' || !strcmp(path, ":auto:") ){ 7672 lockPath=NULL; 7673 }else{ 7674 lockPath=(char *)path; 7675 } 7676 7677 OSTRACE(("TRANSPROXY %d for %s pid=%d\n", pFile->h, 7678 (lockPath ? lockPath : ":auto:"), osGetpid(0))); 7679 7680 pCtx = sqlite3_malloc64( sizeof(*pCtx) ); 7681 if( pCtx==0 ){ 7682 return SQLITE_NOMEM_BKPT; 7683 } 7684 memset(pCtx, 0, sizeof(*pCtx)); 7685 7686 rc = proxyCreateConchPathname(dbPath, &pCtx->conchFilePath); 7687 if( rc==SQLITE_OK ){ 7688 rc = proxyCreateUnixFile(pCtx->conchFilePath, &pCtx->conchFile, 0); 7689 if( rc==SQLITE_CANTOPEN && ((pFile->openFlags&O_RDWR) == 0) ){ 7690 /* if (a) the open flags are not O_RDWR, (b) the conch isn't there, and 7691 ** (c) the file system is read-only, then enable no-locking access. 7692 ** Ugh, since O_RDONLY==0x0000 we test for !O_RDWR since unixOpen asserts 7693 ** that openFlags will have only one of O_RDONLY or O_RDWR. 7694 */ 7695 struct statfs fsInfo; 7696 struct stat conchInfo; 7697 int goLockless = 0; 7698 7699 if( osStat(pCtx->conchFilePath, &conchInfo) == -1 ) { 7700 int err = errno; 7701 if( (err==ENOENT) && (statfs(dbPath, &fsInfo) != -1) ){ 7702 goLockless = (fsInfo.f_flags&MNT_RDONLY) == MNT_RDONLY; 7703 } 7704 } 7705 if( goLockless ){ 7706 pCtx->conchHeld = -1; /* read only FS/ lockless */ 7707 rc = SQLITE_OK; 7708 } 7709 } 7710 } 7711 if( rc==SQLITE_OK && lockPath ){ 7712 pCtx->lockProxyPath = sqlite3DbStrDup(0, lockPath); 7713 } 7714 7715 if( rc==SQLITE_OK ){ 7716 pCtx->dbPath = sqlite3DbStrDup(0, dbPath); 7717 if( pCtx->dbPath==NULL ){ 7718 rc = SQLITE_NOMEM_BKPT; 7719 } 7720 } 7721 if( rc==SQLITE_OK ){ 7722 /* all memory is allocated, proxys are created and assigned, 7723 ** switch the locking context and pMethod then return. 7724 */ 7725 pCtx->oldLockingContext = pFile->lockingContext; 7726 pFile->lockingContext = pCtx; 7727 pCtx->pOldMethod = pFile->pMethod; 7728 pFile->pMethod = &proxyIoMethods; 7729 }else{ 7730 if( pCtx->conchFile ){ 7731 pCtx->conchFile->pMethod->xClose((sqlite3_file *)pCtx->conchFile); 7732 sqlite3_free(pCtx->conchFile); 7733 } 7734 sqlite3DbFree(0, pCtx->lockProxyPath); 7735 sqlite3_free(pCtx->conchFilePath); 7736 sqlite3_free(pCtx); 7737 } 7738 OSTRACE(("TRANSPROXY %d %s\n", pFile->h, 7739 (rc==SQLITE_OK ? "ok" : "failed"))); 7740 return rc; 7741 } 7742 7743 7744 /* 7745 ** This routine handles sqlite3_file_control() calls that are specific 7746 ** to proxy locking. 7747 */ 7748 static int proxyFileControl(sqlite3_file *id, int op, void *pArg){ 7749 switch( op ){ 7750 case SQLITE_FCNTL_GET_LOCKPROXYFILE: { 7751 unixFile *pFile = (unixFile*)id; 7752 if( pFile->pMethod == &proxyIoMethods ){ 7753 proxyLockingContext *pCtx = (proxyLockingContext*)pFile->lockingContext; 7754 proxyTakeConch(pFile); 7755 if( pCtx->lockProxyPath ){ 7756 *(const char **)pArg = pCtx->lockProxyPath; 7757 }else{ 7758 *(const char **)pArg = ":auto: (not held)"; 7759 } 7760 } else { 7761 *(const char **)pArg = NULL; 7762 } 7763 return SQLITE_OK; 7764 } 7765 case SQLITE_FCNTL_SET_LOCKPROXYFILE: { 7766 unixFile *pFile = (unixFile*)id; 7767 int rc = SQLITE_OK; 7768 int isProxyStyle = (pFile->pMethod == &proxyIoMethods); 7769 if( pArg==NULL || (const char *)pArg==0 ){ 7770 if( isProxyStyle ){ 7771 /* turn off proxy locking - not supported. If support is added for 7772 ** switching proxy locking mode off then it will need to fail if 7773 ** the journal mode is WAL mode. 7774 */ 7775 rc = SQLITE_ERROR /*SQLITE_PROTOCOL? SQLITE_MISUSE?*/; 7776 }else{ 7777 /* turn off proxy locking - already off - NOOP */ 7778 rc = SQLITE_OK; 7779 } 7780 }else{ 7781 const char *proxyPath = (const char *)pArg; 7782 if( isProxyStyle ){ 7783 proxyLockingContext *pCtx = 7784 (proxyLockingContext*)pFile->lockingContext; 7785 if( !strcmp(pArg, ":auto:") 7786 || (pCtx->lockProxyPath && 7787 !strncmp(pCtx->lockProxyPath, proxyPath, MAXPATHLEN)) 7788 ){ 7789 rc = SQLITE_OK; 7790 }else{ 7791 rc = switchLockProxyPath(pFile, proxyPath); 7792 } 7793 }else{ 7794 /* turn on proxy file locking */ 7795 rc = proxyTransformUnixFile(pFile, proxyPath); 7796 } 7797 } 7798 return rc; 7799 } 7800 default: { 7801 assert( 0 ); /* The call assures that only valid opcodes are sent */ 7802 } 7803 } 7804 /*NOTREACHED*/ assert(0); 7805 return SQLITE_ERROR; 7806 } 7807 7808 /* 7809 ** Within this division (the proxying locking implementation) the procedures 7810 ** above this point are all utilities. The lock-related methods of the 7811 ** proxy-locking sqlite3_io_method object follow. 7812 */ 7813 7814 7815 /* 7816 ** This routine checks if there is a RESERVED lock held on the specified 7817 ** file by this or any other process. If such a lock is held, set *pResOut 7818 ** to a non-zero value otherwise *pResOut is set to zero. The return value 7819 ** is set to SQLITE_OK unless an I/O error occurs during lock checking. 7820 */ 7821 static int proxyCheckReservedLock(sqlite3_file *id, int *pResOut) { 7822 unixFile *pFile = (unixFile*)id; 7823 int rc = proxyTakeConch(pFile); 7824 if( rc==SQLITE_OK ){ 7825 proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; 7826 if( pCtx->conchHeld>0 ){ 7827 unixFile *proxy = pCtx->lockProxy; 7828 return proxy->pMethod->xCheckReservedLock((sqlite3_file*)proxy, pResOut); 7829 }else{ /* conchHeld < 0 is lockless */ 7830 pResOut=0; 7831 } 7832 } 7833 return rc; 7834 } 7835 7836 /* 7837 ** Lock the file with the lock specified by parameter eFileLock - one 7838 ** of the following: 7839 ** 7840 ** (1) SHARED_LOCK 7841 ** (2) RESERVED_LOCK 7842 ** (3) PENDING_LOCK 7843 ** (4) EXCLUSIVE_LOCK 7844 ** 7845 ** Sometimes when requesting one lock state, additional lock states 7846 ** are inserted in between. The locking might fail on one of the later 7847 ** transitions leaving the lock state different from what it started but 7848 ** still short of its goal. The following chart shows the allowed 7849 ** transitions and the inserted intermediate states: 7850 ** 7851 ** UNLOCKED -> SHARED 7852 ** SHARED -> RESERVED 7853 ** SHARED -> (PENDING) -> EXCLUSIVE 7854 ** RESERVED -> (PENDING) -> EXCLUSIVE 7855 ** PENDING -> EXCLUSIVE 7856 ** 7857 ** This routine will only increase a lock. Use the sqlite3OsUnlock() 7858 ** routine to lower a locking level. 7859 */ 7860 static int proxyLock(sqlite3_file *id, int eFileLock) { 7861 unixFile *pFile = (unixFile*)id; 7862 int rc = proxyTakeConch(pFile); 7863 if( rc==SQLITE_OK ){ 7864 proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; 7865 if( pCtx->conchHeld>0 ){ 7866 unixFile *proxy = pCtx->lockProxy; 7867 rc = proxy->pMethod->xLock((sqlite3_file*)proxy, eFileLock); 7868 pFile->eFileLock = proxy->eFileLock; 7869 }else{ 7870 /* conchHeld < 0 is lockless */ 7871 } 7872 } 7873 return rc; 7874 } 7875 7876 7877 /* 7878 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock 7879 ** must be either NO_LOCK or SHARED_LOCK. 7880 ** 7881 ** If the locking level of the file descriptor is already at or below 7882 ** the requested locking level, this routine is a no-op. 7883 */ 7884 static int proxyUnlock(sqlite3_file *id, int eFileLock) { 7885 unixFile *pFile = (unixFile*)id; 7886 int rc = proxyTakeConch(pFile); 7887 if( rc==SQLITE_OK ){ 7888 proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; 7889 if( pCtx->conchHeld>0 ){ 7890 unixFile *proxy = pCtx->lockProxy; 7891 rc = proxy->pMethod->xUnlock((sqlite3_file*)proxy, eFileLock); 7892 pFile->eFileLock = proxy->eFileLock; 7893 }else{ 7894 /* conchHeld < 0 is lockless */ 7895 } 7896 } 7897 return rc; 7898 } 7899 7900 /* 7901 ** Close a file that uses proxy locks. 7902 */ 7903 static int proxyClose(sqlite3_file *id) { 7904 if( ALWAYS(id) ){ 7905 unixFile *pFile = (unixFile*)id; 7906 proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; 7907 unixFile *lockProxy = pCtx->lockProxy; 7908 unixFile *conchFile = pCtx->conchFile; 7909 int rc = SQLITE_OK; 7910 7911 if( lockProxy ){ 7912 rc = lockProxy->pMethod->xUnlock((sqlite3_file*)lockProxy, NO_LOCK); 7913 if( rc ) return rc; 7914 rc = lockProxy->pMethod->xClose((sqlite3_file*)lockProxy); 7915 if( rc ) return rc; 7916 sqlite3_free(lockProxy); 7917 pCtx->lockProxy = 0; 7918 } 7919 if( conchFile ){ 7920 if( pCtx->conchHeld ){ 7921 rc = proxyReleaseConch(pFile); 7922 if( rc ) return rc; 7923 } 7924 rc = conchFile->pMethod->xClose((sqlite3_file*)conchFile); 7925 if( rc ) return rc; 7926 sqlite3_free(conchFile); 7927 } 7928 sqlite3DbFree(0, pCtx->lockProxyPath); 7929 sqlite3_free(pCtx->conchFilePath); 7930 sqlite3DbFree(0, pCtx->dbPath); 7931 /* restore the original locking context and pMethod then close it */ 7932 pFile->lockingContext = pCtx->oldLockingContext; 7933 pFile->pMethod = pCtx->pOldMethod; 7934 sqlite3_free(pCtx); 7935 return pFile->pMethod->xClose(id); 7936 } 7937 return SQLITE_OK; 7938 } 7939 7940 7941 7942 #endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ 7943 /* 7944 ** The proxy locking style is intended for use with AFP filesystems. 7945 ** And since AFP is only supported on MacOSX, the proxy locking is also 7946 ** restricted to MacOSX. 7947 ** 7948 ** 7949 ******************* End of the proxy lock implementation ********************** 7950 ******************************************************************************/ 7951 7952 /* 7953 ** Initialize the operating system interface. 7954 ** 7955 ** This routine registers all VFS implementations for unix-like operating 7956 ** systems. This routine, and the sqlite3_os_end() routine that follows, 7957 ** should be the only routines in this file that are visible from other 7958 ** files. 7959 ** 7960 ** This routine is called once during SQLite initialization and by a 7961 ** single thread. The memory allocation and mutex subsystems have not 7962 ** necessarily been initialized when this routine is called, and so they 7963 ** should not be used. 7964 */ 7965 int sqlite3_os_init(void){ 7966 /* 7967 ** The following macro defines an initializer for an sqlite3_vfs object. 7968 ** The name of the VFS is NAME. The pAppData is a pointer to a pointer 7969 ** to the "finder" function. (pAppData is a pointer to a pointer because 7970 ** silly C90 rules prohibit a void* from being cast to a function pointer 7971 ** and so we have to go through the intermediate pointer to avoid problems 7972 ** when compiling with -pedantic-errors on GCC.) 7973 ** 7974 ** The FINDER parameter to this macro is the name of the pointer to the 7975 ** finder-function. The finder-function returns a pointer to the 7976 ** sqlite_io_methods object that implements the desired locking 7977 ** behaviors. See the division above that contains the IOMETHODS 7978 ** macro for addition information on finder-functions. 7979 ** 7980 ** Most finders simply return a pointer to a fixed sqlite3_io_methods 7981 ** object. But the "autolockIoFinder" available on MacOSX does a little 7982 ** more than that; it looks at the filesystem type that hosts the 7983 ** database file and tries to choose an locking method appropriate for 7984 ** that filesystem time. 7985 */ 7986 #define UNIXVFS(VFSNAME, FINDER) { \ 7987 3, /* iVersion */ \ 7988 sizeof(unixFile), /* szOsFile */ \ 7989 MAX_PATHNAME, /* mxPathname */ \ 7990 0, /* pNext */ \ 7991 VFSNAME, /* zName */ \ 7992 (void*)&FINDER, /* pAppData */ \ 7993 unixOpen, /* xOpen */ \ 7994 unixDelete, /* xDelete */ \ 7995 unixAccess, /* xAccess */ \ 7996 unixFullPathname, /* xFullPathname */ \ 7997 unixDlOpen, /* xDlOpen */ \ 7998 unixDlError, /* xDlError */ \ 7999 unixDlSym, /* xDlSym */ \ 8000 unixDlClose, /* xDlClose */ \ 8001 unixRandomness, /* xRandomness */ \ 8002 unixSleep, /* xSleep */ \ 8003 unixCurrentTime, /* xCurrentTime */ \ 8004 unixGetLastError, /* xGetLastError */ \ 8005 unixCurrentTimeInt64, /* xCurrentTimeInt64 */ \ 8006 unixSetSystemCall, /* xSetSystemCall */ \ 8007 unixGetSystemCall, /* xGetSystemCall */ \ 8008 unixNextSystemCall, /* xNextSystemCall */ \ 8009 } 8010 8011 /* 8012 ** All default VFSes for unix are contained in the following array. 8013 ** 8014 ** Note that the sqlite3_vfs.pNext field of the VFS object is modified 8015 ** by the SQLite core when the VFS is registered. So the following 8016 ** array cannot be const. 8017 */ 8018 static sqlite3_vfs aVfs[] = { 8019 #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) 8020 UNIXVFS("unix", autolockIoFinder ), 8021 #elif OS_VXWORKS 8022 UNIXVFS("unix", vxworksIoFinder ), 8023 #else 8024 UNIXVFS("unix", posixIoFinder ), 8025 #endif 8026 UNIXVFS("unix-none", nolockIoFinder ), 8027 UNIXVFS("unix-dotfile", dotlockIoFinder ), 8028 UNIXVFS("unix-excl", posixIoFinder ), 8029 #if OS_VXWORKS 8030 UNIXVFS("unix-namedsem", semIoFinder ), 8031 #endif 8032 #if SQLITE_ENABLE_LOCKING_STYLE || OS_VXWORKS 8033 UNIXVFS("unix-posix", posixIoFinder ), 8034 #endif 8035 #if SQLITE_ENABLE_LOCKING_STYLE 8036 UNIXVFS("unix-flock", flockIoFinder ), 8037 #endif 8038 #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) 8039 UNIXVFS("unix-afp", afpIoFinder ), 8040 UNIXVFS("unix-nfs", nfsIoFinder ), 8041 UNIXVFS("unix-proxy", proxyIoFinder ), 8042 #endif 8043 }; 8044 unsigned int i; /* Loop counter */ 8045 8046 /* Double-check that the aSyscall[] array has been constructed 8047 ** correctly. See ticket [bb3a86e890c8e96ab] */ 8048 assert( ArraySize(aSyscall)==29 ); 8049 8050 /* Register all VFSes defined in the aVfs[] array */ 8051 for(i=0; i<(sizeof(aVfs)/sizeof(sqlite3_vfs)); i++){ 8052 sqlite3_vfs_register(&aVfs[i], i==0); 8053 } 8054 unixBigLock = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_VFS1); 8055 8056 #ifndef SQLITE_OMIT_WAL 8057 /* Validate lock assumptions */ 8058 assert( SQLITE_SHM_NLOCK==8 ); /* Number of available locks */ 8059 assert( UNIX_SHM_BASE==120 ); /* Start of locking area */ 8060 /* Locks: 8061 ** WRITE UNIX_SHM_BASE 120 8062 ** CKPT UNIX_SHM_BASE+1 121 8063 ** RECOVER UNIX_SHM_BASE+2 122 8064 ** READ-0 UNIX_SHM_BASE+3 123 8065 ** READ-1 UNIX_SHM_BASE+4 124 8066 ** READ-2 UNIX_SHM_BASE+5 125 8067 ** READ-3 UNIX_SHM_BASE+6 126 8068 ** READ-4 UNIX_SHM_BASE+7 127 8069 ** DMS UNIX_SHM_BASE+8 128 8070 */ 8071 assert( UNIX_SHM_DMS==128 ); /* Byte offset of the deadman-switch */ 8072 #endif 8073 8074 /* Initialize temp file dir array. */ 8075 unixTempFileInit(); 8076 8077 return SQLITE_OK; 8078 } 8079 8080 /* 8081 ** Shutdown the operating system interface. 8082 ** 8083 ** Some operating systems might need to do some cleanup in this routine, 8084 ** to release dynamically allocated objects. But not on unix. 8085 ** This routine is a no-op for unix. 8086 */ 8087 int sqlite3_os_end(void){ 8088 unixBigLock = 0; 8089 return SQLITE_OK; 8090 } 8091 8092 #endif /* SQLITE_OS_UNIX */ 8093