1 /* 2 ** 2004 May 22 3 ** 4 ** The author disclaims copyright to this source code. In place of 5 ** a legal notice, here is a blessing: 6 ** 7 ** May you do good and not evil. 8 ** May you find forgiveness for yourself and forgive others. 9 ** May you share freely, never taking more than you give. 10 ** 11 ****************************************************************************** 12 ** 13 ** This file contains the VFS implementation for unix-like operating systems 14 ** include Linux, MacOSX, *BSD, QNX, VxWorks, AIX, HPUX, and others. 15 ** 16 ** There are actually several different VFS implementations in this file. 17 ** The differences are in the way that file locking is done. The default 18 ** implementation uses Posix Advisory Locks. Alternative implementations 19 ** use flock(), dot-files, various proprietary locking schemas, or simply 20 ** skip locking all together. 21 ** 22 ** This source file is organized into divisions where the logic for various 23 ** subfunctions is contained within the appropriate division. PLEASE 24 ** KEEP THE STRUCTURE OF THIS FILE INTACT. New code should be placed 25 ** in the correct division and should be clearly labeled. 26 ** 27 ** The layout of divisions is as follows: 28 ** 29 ** * General-purpose declarations and utility functions. 30 ** * Unique file ID logic used by VxWorks. 31 ** * Various locking primitive implementations (all except proxy locking): 32 ** + for Posix Advisory Locks 33 ** + for no-op locks 34 ** + for dot-file locks 35 ** + for flock() locking 36 ** + for named semaphore locks (VxWorks only) 37 ** + for AFP filesystem locks (MacOSX only) 38 ** * sqlite3_file methods not associated with locking. 39 ** * Definitions of sqlite3_io_methods objects for all locking 40 ** methods plus "finder" functions for each locking method. 41 ** * sqlite3_vfs method implementations. 42 ** * Locking primitives for the proxy uber-locking-method. (MacOSX only) 43 ** * Definitions of sqlite3_vfs objects for all locking methods 44 ** plus implementations of sqlite3_os_init() and sqlite3_os_end(). 45 */ 46 #include "sqliteInt.h" 47 #if SQLITE_OS_UNIX /* This file is used on unix only */ 48 49 /* 50 ** There are various methods for file locking used for concurrency 51 ** control: 52 ** 53 ** 1. POSIX locking (the default), 54 ** 2. No locking, 55 ** 3. Dot-file locking, 56 ** 4. flock() locking, 57 ** 5. AFP locking (OSX only), 58 ** 6. Named POSIX semaphores (VXWorks only), 59 ** 7. proxy locking. (OSX only) 60 ** 61 ** Styles 4, 5, and 7 are only available of SQLITE_ENABLE_LOCKING_STYLE 62 ** is defined to 1. The SQLITE_ENABLE_LOCKING_STYLE also enables automatic 63 ** selection of the appropriate locking style based on the filesystem 64 ** where the database is located. 65 */ 66 #if !defined(SQLITE_ENABLE_LOCKING_STYLE) 67 # if defined(__APPLE__) 68 # define SQLITE_ENABLE_LOCKING_STYLE 1 69 # else 70 # define SQLITE_ENABLE_LOCKING_STYLE 0 71 # endif 72 #endif 73 74 /* Use pread() and pwrite() if they are available */ 75 #if defined(__APPLE__) 76 # define HAVE_PREAD 1 77 # define HAVE_PWRITE 1 78 #endif 79 #if defined(HAVE_PREAD64) && defined(HAVE_PWRITE64) 80 # undef USE_PREAD 81 # define USE_PREAD64 1 82 #elif defined(HAVE_PREAD) && defined(HAVE_PWRITE) 83 # undef USE_PREAD64 84 # define USE_PREAD 1 85 #endif 86 87 /* 88 ** standard include files. 89 */ 90 #include <sys/types.h> 91 #include <sys/stat.h> 92 #include <fcntl.h> 93 #include <sys/ioctl.h> 94 #include <unistd.h> 95 #include <time.h> 96 #include <sys/time.h> 97 #include <errno.h> 98 #if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 99 # include <sys/mman.h> 100 #endif 101 102 #if SQLITE_ENABLE_LOCKING_STYLE 103 # include <sys/ioctl.h> 104 # include <sys/file.h> 105 # include <sys/param.h> 106 #endif /* SQLITE_ENABLE_LOCKING_STYLE */ 107 108 /* 109 ** Try to determine if gethostuuid() is available based on standard 110 ** macros. This might sometimes compute the wrong value for some 111 ** obscure platforms. For those cases, simply compile with one of 112 ** the following: 113 ** 114 ** -DHAVE_GETHOSTUUID=0 115 ** -DHAVE_GETHOSTUUID=1 116 ** 117 ** None if this matters except when building on Apple products with 118 ** -DSQLITE_ENABLE_LOCKING_STYLE. 119 */ 120 #ifndef HAVE_GETHOSTUUID 121 # define HAVE_GETHOSTUUID 0 122 # if defined(__APPLE__) && ((__MAC_OS_X_VERSION_MIN_REQUIRED > 1050) || \ 123 (__IPHONE_OS_VERSION_MIN_REQUIRED > 2000)) 124 # if (!defined(TARGET_OS_EMBEDDED) || (TARGET_OS_EMBEDDED==0)) \ 125 && (!defined(TARGET_IPHONE_SIMULATOR) || (TARGET_IPHONE_SIMULATOR==0))\ 126 && (!defined(TARGET_OS_MACCATALYST) || (TARGET_OS_MACCATALYST==0)) 127 # undef HAVE_GETHOSTUUID 128 # define HAVE_GETHOSTUUID 1 129 # else 130 # warning "gethostuuid() is disabled." 131 # endif 132 # endif 133 #endif 134 135 136 #if OS_VXWORKS 137 # include <sys/ioctl.h> 138 # include <semaphore.h> 139 # include <limits.h> 140 #endif /* OS_VXWORKS */ 141 142 #if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE 143 # include <sys/mount.h> 144 #endif 145 146 #ifdef HAVE_UTIME 147 # include <utime.h> 148 #endif 149 150 /* 151 ** Allowed values of unixFile.fsFlags 152 */ 153 #define SQLITE_FSFLAGS_IS_MSDOS 0x1 154 155 /* 156 ** If we are to be thread-safe, include the pthreads header. 157 */ 158 #if SQLITE_THREADSAFE 159 # include <pthread.h> 160 #endif 161 162 /* 163 ** Default permissions when creating a new file 164 */ 165 #ifndef SQLITE_DEFAULT_FILE_PERMISSIONS 166 # define SQLITE_DEFAULT_FILE_PERMISSIONS 0644 167 #endif 168 169 /* 170 ** Default permissions when creating auto proxy dir 171 */ 172 #ifndef SQLITE_DEFAULT_PROXYDIR_PERMISSIONS 173 # define SQLITE_DEFAULT_PROXYDIR_PERMISSIONS 0755 174 #endif 175 176 /* 177 ** Maximum supported path-length. 178 */ 179 #define MAX_PATHNAME 512 180 181 /* 182 ** Maximum supported symbolic links 183 */ 184 #define SQLITE_MAX_SYMLINKS 100 185 186 /* Always cast the getpid() return type for compatibility with 187 ** kernel modules in VxWorks. */ 188 #define osGetpid(X) (pid_t)getpid() 189 190 /* 191 ** Only set the lastErrno if the error code is a real error and not 192 ** a normal expected return code of SQLITE_BUSY or SQLITE_OK 193 */ 194 #define IS_LOCK_ERROR(x) ((x != SQLITE_OK) && (x != SQLITE_BUSY)) 195 196 /* Forward references */ 197 typedef struct unixShm unixShm; /* Connection shared memory */ 198 typedef struct unixShmNode unixShmNode; /* Shared memory instance */ 199 typedef struct unixInodeInfo unixInodeInfo; /* An i-node */ 200 typedef struct UnixUnusedFd UnixUnusedFd; /* An unused file descriptor */ 201 202 /* 203 ** Sometimes, after a file handle is closed by SQLite, the file descriptor 204 ** cannot be closed immediately. In these cases, instances of the following 205 ** structure are used to store the file descriptor while waiting for an 206 ** opportunity to either close or reuse it. 207 */ 208 struct UnixUnusedFd { 209 int fd; /* File descriptor to close */ 210 int flags; /* Flags this file descriptor was opened with */ 211 UnixUnusedFd *pNext; /* Next unused file descriptor on same file */ 212 }; 213 214 /* 215 ** The unixFile structure is subclass of sqlite3_file specific to the unix 216 ** VFS implementations. 217 */ 218 typedef struct unixFile unixFile; 219 struct unixFile { 220 sqlite3_io_methods const *pMethod; /* Always the first entry */ 221 sqlite3_vfs *pVfs; /* The VFS that created this unixFile */ 222 unixInodeInfo *pInode; /* Info about locks on this inode */ 223 int h; /* The file descriptor */ 224 unsigned char eFileLock; /* The type of lock held on this fd */ 225 unsigned short int ctrlFlags; /* Behavioral bits. UNIXFILE_* flags */ 226 int lastErrno; /* The unix errno from last I/O error */ 227 void *lockingContext; /* Locking style specific state */ 228 UnixUnusedFd *pPreallocatedUnused; /* Pre-allocated UnixUnusedFd */ 229 const char *zPath; /* Name of the file */ 230 unixShm *pShm; /* Shared memory segment information */ 231 int szChunk; /* Configured by FCNTL_CHUNK_SIZE */ 232 #if SQLITE_MAX_MMAP_SIZE>0 233 int nFetchOut; /* Number of outstanding xFetch refs */ 234 sqlite3_int64 mmapSize; /* Usable size of mapping at pMapRegion */ 235 sqlite3_int64 mmapSizeActual; /* Actual size of mapping at pMapRegion */ 236 sqlite3_int64 mmapSizeMax; /* Configured FCNTL_MMAP_SIZE value */ 237 void *pMapRegion; /* Memory mapped region */ 238 #endif 239 int sectorSize; /* Device sector size */ 240 int deviceCharacteristics; /* Precomputed device characteristics */ 241 #if SQLITE_ENABLE_LOCKING_STYLE 242 int openFlags; /* The flags specified at open() */ 243 #endif 244 #if SQLITE_ENABLE_LOCKING_STYLE || defined(__APPLE__) 245 unsigned fsFlags; /* cached details from statfs() */ 246 #endif 247 #ifdef SQLITE_ENABLE_SETLK_TIMEOUT 248 unsigned iBusyTimeout; /* Wait this many millisec on locks */ 249 #endif 250 #if OS_VXWORKS 251 struct vxworksFileId *pId; /* Unique file ID */ 252 #endif 253 #ifdef SQLITE_DEBUG 254 /* The next group of variables are used to track whether or not the 255 ** transaction counter in bytes 24-27 of database files are updated 256 ** whenever any part of the database changes. An assertion fault will 257 ** occur if a file is updated without also updating the transaction 258 ** counter. This test is made to avoid new problems similar to the 259 ** one described by ticket #3584. 260 */ 261 unsigned char transCntrChng; /* True if the transaction counter changed */ 262 unsigned char dbUpdate; /* True if any part of database file changed */ 263 unsigned char inNormalWrite; /* True if in a normal write operation */ 264 265 #endif 266 267 #ifdef SQLITE_TEST 268 /* In test mode, increase the size of this structure a bit so that 269 ** it is larger than the struct CrashFile defined in test6.c. 270 */ 271 char aPadding[32]; 272 #endif 273 }; 274 275 /* This variable holds the process id (pid) from when the xRandomness() 276 ** method was called. If xOpen() is called from a different process id, 277 ** indicating that a fork() has occurred, the PRNG will be reset. 278 */ 279 static pid_t randomnessPid = 0; 280 281 /* 282 ** Allowed values for the unixFile.ctrlFlags bitmask: 283 */ 284 #define UNIXFILE_EXCL 0x01 /* Connections from one process only */ 285 #define UNIXFILE_RDONLY 0x02 /* Connection is read only */ 286 #define UNIXFILE_PERSIST_WAL 0x04 /* Persistent WAL mode */ 287 #ifndef SQLITE_DISABLE_DIRSYNC 288 # define UNIXFILE_DIRSYNC 0x08 /* Directory sync needed */ 289 #else 290 # define UNIXFILE_DIRSYNC 0x00 291 #endif 292 #define UNIXFILE_PSOW 0x10 /* SQLITE_IOCAP_POWERSAFE_OVERWRITE */ 293 #define UNIXFILE_DELETE 0x20 /* Delete on close */ 294 #define UNIXFILE_URI 0x40 /* Filename might have query parameters */ 295 #define UNIXFILE_NOLOCK 0x80 /* Do no file locking */ 296 297 /* 298 ** Include code that is common to all os_*.c files 299 */ 300 #include "os_common.h" 301 302 /* 303 ** Define various macros that are missing from some systems. 304 */ 305 #ifndef O_LARGEFILE 306 # define O_LARGEFILE 0 307 #endif 308 #ifdef SQLITE_DISABLE_LFS 309 # undef O_LARGEFILE 310 # define O_LARGEFILE 0 311 #endif 312 #ifndef O_NOFOLLOW 313 # define O_NOFOLLOW 0 314 #endif 315 #ifndef O_BINARY 316 # define O_BINARY 0 317 #endif 318 319 /* 320 ** The threadid macro resolves to the thread-id or to 0. Used for 321 ** testing and debugging only. 322 */ 323 #if SQLITE_THREADSAFE 324 #define threadid pthread_self() 325 #else 326 #define threadid 0 327 #endif 328 329 /* 330 ** HAVE_MREMAP defaults to true on Linux and false everywhere else. 331 */ 332 #if !defined(HAVE_MREMAP) 333 # if defined(__linux__) && defined(_GNU_SOURCE) 334 # define HAVE_MREMAP 1 335 # else 336 # define HAVE_MREMAP 0 337 # endif 338 #endif 339 340 /* 341 ** Explicitly call the 64-bit version of lseek() on Android. Otherwise, lseek() 342 ** is the 32-bit version, even if _FILE_OFFSET_BITS=64 is defined. 343 */ 344 #ifdef __ANDROID__ 345 # define lseek lseek64 346 #endif 347 348 #ifdef __linux__ 349 /* 350 ** Linux-specific IOCTL magic numbers used for controlling F2FS 351 */ 352 #define F2FS_IOCTL_MAGIC 0xf5 353 #define F2FS_IOC_START_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 1) 354 #define F2FS_IOC_COMMIT_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 2) 355 #define F2FS_IOC_START_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 3) 356 #define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5) 357 #define F2FS_IOC_GET_FEATURES _IOR(F2FS_IOCTL_MAGIC, 12, u32) 358 #define F2FS_FEATURE_ATOMIC_WRITE 0x0004 359 #endif /* __linux__ */ 360 361 362 /* 363 ** Different Unix systems declare open() in different ways. Same use 364 ** open(const char*,int,mode_t). Others use open(const char*,int,...). 365 ** The difference is important when using a pointer to the function. 366 ** 367 ** The safest way to deal with the problem is to always use this wrapper 368 ** which always has the same well-defined interface. 369 */ 370 static int posixOpen(const char *zFile, int flags, int mode){ 371 return open(zFile, flags, mode); 372 } 373 374 /* Forward reference */ 375 static int openDirectory(const char*, int*); 376 static int unixGetpagesize(void); 377 378 /* 379 ** Many system calls are accessed through pointer-to-functions so that 380 ** they may be overridden at runtime to facilitate fault injection during 381 ** testing and sandboxing. The following array holds the names and pointers 382 ** to all overrideable system calls. 383 */ 384 static struct unix_syscall { 385 const char *zName; /* Name of the system call */ 386 sqlite3_syscall_ptr pCurrent; /* Current value of the system call */ 387 sqlite3_syscall_ptr pDefault; /* Default value */ 388 } aSyscall[] = { 389 { "open", (sqlite3_syscall_ptr)posixOpen, 0 }, 390 #define osOpen ((int(*)(const char*,int,int))aSyscall[0].pCurrent) 391 392 { "close", (sqlite3_syscall_ptr)close, 0 }, 393 #define osClose ((int(*)(int))aSyscall[1].pCurrent) 394 395 { "access", (sqlite3_syscall_ptr)access, 0 }, 396 #define osAccess ((int(*)(const char*,int))aSyscall[2].pCurrent) 397 398 { "getcwd", (sqlite3_syscall_ptr)getcwd, 0 }, 399 #define osGetcwd ((char*(*)(char*,size_t))aSyscall[3].pCurrent) 400 401 { "stat", (sqlite3_syscall_ptr)stat, 0 }, 402 #define osStat ((int(*)(const char*,struct stat*))aSyscall[4].pCurrent) 403 404 /* 405 ** The DJGPP compiler environment looks mostly like Unix, but it 406 ** lacks the fcntl() system call. So redefine fcntl() to be something 407 ** that always succeeds. This means that locking does not occur under 408 ** DJGPP. But it is DOS - what did you expect? 409 */ 410 #ifdef __DJGPP__ 411 { "fstat", 0, 0 }, 412 #define osFstat(a,b,c) 0 413 #else 414 { "fstat", (sqlite3_syscall_ptr)fstat, 0 }, 415 #define osFstat ((int(*)(int,struct stat*))aSyscall[5].pCurrent) 416 #endif 417 418 { "ftruncate", (sqlite3_syscall_ptr)ftruncate, 0 }, 419 #define osFtruncate ((int(*)(int,off_t))aSyscall[6].pCurrent) 420 421 { "fcntl", (sqlite3_syscall_ptr)fcntl, 0 }, 422 #define osFcntl ((int(*)(int,int,...))aSyscall[7].pCurrent) 423 424 { "read", (sqlite3_syscall_ptr)read, 0 }, 425 #define osRead ((ssize_t(*)(int,void*,size_t))aSyscall[8].pCurrent) 426 427 #if defined(USE_PREAD) || SQLITE_ENABLE_LOCKING_STYLE 428 { "pread", (sqlite3_syscall_ptr)pread, 0 }, 429 #else 430 { "pread", (sqlite3_syscall_ptr)0, 0 }, 431 #endif 432 #define osPread ((ssize_t(*)(int,void*,size_t,off_t))aSyscall[9].pCurrent) 433 434 #if defined(USE_PREAD64) 435 { "pread64", (sqlite3_syscall_ptr)pread64, 0 }, 436 #else 437 { "pread64", (sqlite3_syscall_ptr)0, 0 }, 438 #endif 439 #define osPread64 ((ssize_t(*)(int,void*,size_t,off64_t))aSyscall[10].pCurrent) 440 441 { "write", (sqlite3_syscall_ptr)write, 0 }, 442 #define osWrite ((ssize_t(*)(int,const void*,size_t))aSyscall[11].pCurrent) 443 444 #if defined(USE_PREAD) || SQLITE_ENABLE_LOCKING_STYLE 445 { "pwrite", (sqlite3_syscall_ptr)pwrite, 0 }, 446 #else 447 { "pwrite", (sqlite3_syscall_ptr)0, 0 }, 448 #endif 449 #define osPwrite ((ssize_t(*)(int,const void*,size_t,off_t))\ 450 aSyscall[12].pCurrent) 451 452 #if defined(USE_PREAD64) 453 { "pwrite64", (sqlite3_syscall_ptr)pwrite64, 0 }, 454 #else 455 { "pwrite64", (sqlite3_syscall_ptr)0, 0 }, 456 #endif 457 #define osPwrite64 ((ssize_t(*)(int,const void*,size_t,off64_t))\ 458 aSyscall[13].pCurrent) 459 460 { "fchmod", (sqlite3_syscall_ptr)fchmod, 0 }, 461 #define osFchmod ((int(*)(int,mode_t))aSyscall[14].pCurrent) 462 463 #if defined(HAVE_POSIX_FALLOCATE) && HAVE_POSIX_FALLOCATE 464 { "fallocate", (sqlite3_syscall_ptr)posix_fallocate, 0 }, 465 #else 466 { "fallocate", (sqlite3_syscall_ptr)0, 0 }, 467 #endif 468 #define osFallocate ((int(*)(int,off_t,off_t))aSyscall[15].pCurrent) 469 470 { "unlink", (sqlite3_syscall_ptr)unlink, 0 }, 471 #define osUnlink ((int(*)(const char*))aSyscall[16].pCurrent) 472 473 { "openDirectory", (sqlite3_syscall_ptr)openDirectory, 0 }, 474 #define osOpenDirectory ((int(*)(const char*,int*))aSyscall[17].pCurrent) 475 476 { "mkdir", (sqlite3_syscall_ptr)mkdir, 0 }, 477 #define osMkdir ((int(*)(const char*,mode_t))aSyscall[18].pCurrent) 478 479 { "rmdir", (sqlite3_syscall_ptr)rmdir, 0 }, 480 #define osRmdir ((int(*)(const char*))aSyscall[19].pCurrent) 481 482 #if defined(HAVE_FCHOWN) 483 { "fchown", (sqlite3_syscall_ptr)fchown, 0 }, 484 #else 485 { "fchown", (sqlite3_syscall_ptr)0, 0 }, 486 #endif 487 #define osFchown ((int(*)(int,uid_t,gid_t))aSyscall[20].pCurrent) 488 489 #if defined(HAVE_FCHOWN) 490 { "geteuid", (sqlite3_syscall_ptr)geteuid, 0 }, 491 #else 492 { "geteuid", (sqlite3_syscall_ptr)0, 0 }, 493 #endif 494 #define osGeteuid ((uid_t(*)(void))aSyscall[21].pCurrent) 495 496 #if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 497 { "mmap", (sqlite3_syscall_ptr)mmap, 0 }, 498 #else 499 { "mmap", (sqlite3_syscall_ptr)0, 0 }, 500 #endif 501 #define osMmap ((void*(*)(void*,size_t,int,int,int,off_t))aSyscall[22].pCurrent) 502 503 #if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 504 { "munmap", (sqlite3_syscall_ptr)munmap, 0 }, 505 #else 506 { "munmap", (sqlite3_syscall_ptr)0, 0 }, 507 #endif 508 #define osMunmap ((int(*)(void*,size_t))aSyscall[23].pCurrent) 509 510 #if HAVE_MREMAP && (!defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0) 511 { "mremap", (sqlite3_syscall_ptr)mremap, 0 }, 512 #else 513 { "mremap", (sqlite3_syscall_ptr)0, 0 }, 514 #endif 515 #define osMremap ((void*(*)(void*,size_t,size_t,int,...))aSyscall[24].pCurrent) 516 517 #if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 518 { "getpagesize", (sqlite3_syscall_ptr)unixGetpagesize, 0 }, 519 #else 520 { "getpagesize", (sqlite3_syscall_ptr)0, 0 }, 521 #endif 522 #define osGetpagesize ((int(*)(void))aSyscall[25].pCurrent) 523 524 #if defined(HAVE_READLINK) 525 { "readlink", (sqlite3_syscall_ptr)readlink, 0 }, 526 #else 527 { "readlink", (sqlite3_syscall_ptr)0, 0 }, 528 #endif 529 #define osReadlink ((ssize_t(*)(const char*,char*,size_t))aSyscall[26].pCurrent) 530 531 #if defined(HAVE_LSTAT) 532 { "lstat", (sqlite3_syscall_ptr)lstat, 0 }, 533 #else 534 { "lstat", (sqlite3_syscall_ptr)0, 0 }, 535 #endif 536 #define osLstat ((int(*)(const char*,struct stat*))aSyscall[27].pCurrent) 537 538 #if defined(__linux__) && defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE) 539 # ifdef __ANDROID__ 540 { "ioctl", (sqlite3_syscall_ptr)(int(*)(int, int, ...))ioctl, 0 }, 541 #define osIoctl ((int(*)(int,int,...))aSyscall[28].pCurrent) 542 # else 543 { "ioctl", (sqlite3_syscall_ptr)ioctl, 0 }, 544 #define osIoctl ((int(*)(int,unsigned long,...))aSyscall[28].pCurrent) 545 # endif 546 #else 547 { "ioctl", (sqlite3_syscall_ptr)0, 0 }, 548 #endif 549 550 }; /* End of the overrideable system calls */ 551 552 553 /* 554 ** On some systems, calls to fchown() will trigger a message in a security 555 ** log if they come from non-root processes. So avoid calling fchown() if 556 ** we are not running as root. 557 */ 558 static int robustFchown(int fd, uid_t uid, gid_t gid){ 559 #if defined(HAVE_FCHOWN) 560 return osGeteuid() ? 0 : osFchown(fd,uid,gid); 561 #else 562 return 0; 563 #endif 564 } 565 566 /* 567 ** This is the xSetSystemCall() method of sqlite3_vfs for all of the 568 ** "unix" VFSes. Return SQLITE_OK opon successfully updating the 569 ** system call pointer, or SQLITE_NOTFOUND if there is no configurable 570 ** system call named zName. 571 */ 572 static int unixSetSystemCall( 573 sqlite3_vfs *pNotUsed, /* The VFS pointer. Not used */ 574 const char *zName, /* Name of system call to override */ 575 sqlite3_syscall_ptr pNewFunc /* Pointer to new system call value */ 576 ){ 577 unsigned int i; 578 int rc = SQLITE_NOTFOUND; 579 580 UNUSED_PARAMETER(pNotUsed); 581 if( zName==0 ){ 582 /* If no zName is given, restore all system calls to their default 583 ** settings and return NULL 584 */ 585 rc = SQLITE_OK; 586 for(i=0; i<sizeof(aSyscall)/sizeof(aSyscall[0]); i++){ 587 if( aSyscall[i].pDefault ){ 588 aSyscall[i].pCurrent = aSyscall[i].pDefault; 589 } 590 } 591 }else{ 592 /* If zName is specified, operate on only the one system call 593 ** specified. 594 */ 595 for(i=0; i<sizeof(aSyscall)/sizeof(aSyscall[0]); i++){ 596 if( strcmp(zName, aSyscall[i].zName)==0 ){ 597 if( aSyscall[i].pDefault==0 ){ 598 aSyscall[i].pDefault = aSyscall[i].pCurrent; 599 } 600 rc = SQLITE_OK; 601 if( pNewFunc==0 ) pNewFunc = aSyscall[i].pDefault; 602 aSyscall[i].pCurrent = pNewFunc; 603 break; 604 } 605 } 606 } 607 return rc; 608 } 609 610 /* 611 ** Return the value of a system call. Return NULL if zName is not a 612 ** recognized system call name. NULL is also returned if the system call 613 ** is currently undefined. 614 */ 615 static sqlite3_syscall_ptr unixGetSystemCall( 616 sqlite3_vfs *pNotUsed, 617 const char *zName 618 ){ 619 unsigned int i; 620 621 UNUSED_PARAMETER(pNotUsed); 622 for(i=0; i<sizeof(aSyscall)/sizeof(aSyscall[0]); i++){ 623 if( strcmp(zName, aSyscall[i].zName)==0 ) return aSyscall[i].pCurrent; 624 } 625 return 0; 626 } 627 628 /* 629 ** Return the name of the first system call after zName. If zName==NULL 630 ** then return the name of the first system call. Return NULL if zName 631 ** is the last system call or if zName is not the name of a valid 632 ** system call. 633 */ 634 static const char *unixNextSystemCall(sqlite3_vfs *p, const char *zName){ 635 int i = -1; 636 637 UNUSED_PARAMETER(p); 638 if( zName ){ 639 for(i=0; i<ArraySize(aSyscall)-1; i++){ 640 if( strcmp(zName, aSyscall[i].zName)==0 ) break; 641 } 642 } 643 for(i++; i<ArraySize(aSyscall); i++){ 644 if( aSyscall[i].pCurrent!=0 ) return aSyscall[i].zName; 645 } 646 return 0; 647 } 648 649 /* 650 ** Do not accept any file descriptor less than this value, in order to avoid 651 ** opening database file using file descriptors that are commonly used for 652 ** standard input, output, and error. 653 */ 654 #ifndef SQLITE_MINIMUM_FILE_DESCRIPTOR 655 # define SQLITE_MINIMUM_FILE_DESCRIPTOR 3 656 #endif 657 658 /* 659 ** Invoke open(). Do so multiple times, until it either succeeds or 660 ** fails for some reason other than EINTR. 661 ** 662 ** If the file creation mode "m" is 0 then set it to the default for 663 ** SQLite. The default is SQLITE_DEFAULT_FILE_PERMISSIONS (normally 664 ** 0644) as modified by the system umask. If m is not 0, then 665 ** make the file creation mode be exactly m ignoring the umask. 666 ** 667 ** The m parameter will be non-zero only when creating -wal, -journal, 668 ** and -shm files. We want those files to have *exactly* the same 669 ** permissions as their original database, unadulterated by the umask. 670 ** In that way, if a database file is -rw-rw-rw or -rw-rw-r-, and a 671 ** transaction crashes and leaves behind hot journals, then any 672 ** process that is able to write to the database will also be able to 673 ** recover the hot journals. 674 */ 675 static int robust_open(const char *z, int f, mode_t m){ 676 int fd; 677 mode_t m2 = m ? m : SQLITE_DEFAULT_FILE_PERMISSIONS; 678 while(1){ 679 #if defined(O_CLOEXEC) 680 fd = osOpen(z,f|O_CLOEXEC,m2); 681 #else 682 fd = osOpen(z,f,m2); 683 #endif 684 if( fd<0 ){ 685 if( errno==EINTR ) continue; 686 break; 687 } 688 if( fd>=SQLITE_MINIMUM_FILE_DESCRIPTOR ) break; 689 osClose(fd); 690 sqlite3_log(SQLITE_WARNING, 691 "attempt to open \"%s\" as file descriptor %d", z, fd); 692 fd = -1; 693 if( osOpen("/dev/null", O_RDONLY, m)<0 ) break; 694 } 695 if( fd>=0 ){ 696 if( m!=0 ){ 697 struct stat statbuf; 698 if( osFstat(fd, &statbuf)==0 699 && statbuf.st_size==0 700 && (statbuf.st_mode&0777)!=m 701 ){ 702 osFchmod(fd, m); 703 } 704 } 705 #if defined(FD_CLOEXEC) && (!defined(O_CLOEXEC) || O_CLOEXEC==0) 706 osFcntl(fd, F_SETFD, osFcntl(fd, F_GETFD, 0) | FD_CLOEXEC); 707 #endif 708 } 709 return fd; 710 } 711 712 /* 713 ** Helper functions to obtain and relinquish the global mutex. The 714 ** global mutex is used to protect the unixInodeInfo and 715 ** vxworksFileId objects used by this file, all of which may be 716 ** shared by multiple threads. 717 ** 718 ** Function unixMutexHeld() is used to assert() that the global mutex 719 ** is held when required. This function is only used as part of assert() 720 ** statements. e.g. 721 ** 722 ** unixEnterMutex() 723 ** assert( unixMutexHeld() ); 724 ** unixEnterLeave() 725 ** 726 ** To prevent deadlock, the global unixBigLock must must be acquired 727 ** before the unixInodeInfo.pLockMutex mutex, if both are held. It is 728 ** OK to get the pLockMutex without holding unixBigLock first, but if 729 ** that happens, the unixBigLock mutex must not be acquired until after 730 ** pLockMutex is released. 731 ** 732 ** OK: enter(unixBigLock), enter(pLockInfo) 733 ** OK: enter(unixBigLock) 734 ** OK: enter(pLockInfo) 735 ** ERROR: enter(pLockInfo), enter(unixBigLock) 736 */ 737 static sqlite3_mutex *unixBigLock = 0; 738 static void unixEnterMutex(void){ 739 assert( sqlite3_mutex_notheld(unixBigLock) ); /* Not a recursive mutex */ 740 sqlite3_mutex_enter(unixBigLock); 741 } 742 static void unixLeaveMutex(void){ 743 assert( sqlite3_mutex_held(unixBigLock) ); 744 sqlite3_mutex_leave(unixBigLock); 745 } 746 #ifdef SQLITE_DEBUG 747 static int unixMutexHeld(void) { 748 return sqlite3_mutex_held(unixBigLock); 749 } 750 #endif 751 752 753 #ifdef SQLITE_HAVE_OS_TRACE 754 /* 755 ** Helper function for printing out trace information from debugging 756 ** binaries. This returns the string representation of the supplied 757 ** integer lock-type. 758 */ 759 static const char *azFileLock(int eFileLock){ 760 switch( eFileLock ){ 761 case NO_LOCK: return "NONE"; 762 case SHARED_LOCK: return "SHARED"; 763 case RESERVED_LOCK: return "RESERVED"; 764 case PENDING_LOCK: return "PENDING"; 765 case EXCLUSIVE_LOCK: return "EXCLUSIVE"; 766 } 767 return "ERROR"; 768 } 769 #endif 770 771 #ifdef SQLITE_LOCK_TRACE 772 /* 773 ** Print out information about all locking operations. 774 ** 775 ** This routine is used for troubleshooting locks on multithreaded 776 ** platforms. Enable by compiling with the -DSQLITE_LOCK_TRACE 777 ** command-line option on the compiler. This code is normally 778 ** turned off. 779 */ 780 static int lockTrace(int fd, int op, struct flock *p){ 781 char *zOpName, *zType; 782 int s; 783 int savedErrno; 784 if( op==F_GETLK ){ 785 zOpName = "GETLK"; 786 }else if( op==F_SETLK ){ 787 zOpName = "SETLK"; 788 }else{ 789 s = osFcntl(fd, op, p); 790 sqlite3DebugPrintf("fcntl unknown %d %d %d\n", fd, op, s); 791 return s; 792 } 793 if( p->l_type==F_RDLCK ){ 794 zType = "RDLCK"; 795 }else if( p->l_type==F_WRLCK ){ 796 zType = "WRLCK"; 797 }else if( p->l_type==F_UNLCK ){ 798 zType = "UNLCK"; 799 }else{ 800 assert( 0 ); 801 } 802 assert( p->l_whence==SEEK_SET ); 803 s = osFcntl(fd, op, p); 804 savedErrno = errno; 805 sqlite3DebugPrintf("fcntl %d %d %s %s %d %d %d %d\n", 806 threadid, fd, zOpName, zType, (int)p->l_start, (int)p->l_len, 807 (int)p->l_pid, s); 808 if( s==(-1) && op==F_SETLK && (p->l_type==F_RDLCK || p->l_type==F_WRLCK) ){ 809 struct flock l2; 810 l2 = *p; 811 osFcntl(fd, F_GETLK, &l2); 812 if( l2.l_type==F_RDLCK ){ 813 zType = "RDLCK"; 814 }else if( l2.l_type==F_WRLCK ){ 815 zType = "WRLCK"; 816 }else if( l2.l_type==F_UNLCK ){ 817 zType = "UNLCK"; 818 }else{ 819 assert( 0 ); 820 } 821 sqlite3DebugPrintf("fcntl-failure-reason: %s %d %d %d\n", 822 zType, (int)l2.l_start, (int)l2.l_len, (int)l2.l_pid); 823 } 824 errno = savedErrno; 825 return s; 826 } 827 #undef osFcntl 828 #define osFcntl lockTrace 829 #endif /* SQLITE_LOCK_TRACE */ 830 831 /* 832 ** Retry ftruncate() calls that fail due to EINTR 833 ** 834 ** All calls to ftruncate() within this file should be made through 835 ** this wrapper. On the Android platform, bypassing the logic below 836 ** could lead to a corrupt database. 837 */ 838 static int robust_ftruncate(int h, sqlite3_int64 sz){ 839 int rc; 840 #ifdef __ANDROID__ 841 /* On Android, ftruncate() always uses 32-bit offsets, even if 842 ** _FILE_OFFSET_BITS=64 is defined. This means it is unsafe to attempt to 843 ** truncate a file to any size larger than 2GiB. Silently ignore any 844 ** such attempts. */ 845 if( sz>(sqlite3_int64)0x7FFFFFFF ){ 846 rc = SQLITE_OK; 847 }else 848 #endif 849 do{ rc = osFtruncate(h,sz); }while( rc<0 && errno==EINTR ); 850 return rc; 851 } 852 853 /* 854 ** This routine translates a standard POSIX errno code into something 855 ** useful to the clients of the sqlite3 functions. Specifically, it is 856 ** intended to translate a variety of "try again" errors into SQLITE_BUSY 857 ** and a variety of "please close the file descriptor NOW" errors into 858 ** SQLITE_IOERR 859 ** 860 ** Errors during initialization of locks, or file system support for locks, 861 ** should handle ENOLCK, ENOTSUP, EOPNOTSUPP separately. 862 */ 863 static int sqliteErrorFromPosixError(int posixError, int sqliteIOErr) { 864 assert( (sqliteIOErr == SQLITE_IOERR_LOCK) || 865 (sqliteIOErr == SQLITE_IOERR_UNLOCK) || 866 (sqliteIOErr == SQLITE_IOERR_RDLOCK) || 867 (sqliteIOErr == SQLITE_IOERR_CHECKRESERVEDLOCK) ); 868 switch (posixError) { 869 case EACCES: 870 case EAGAIN: 871 case ETIMEDOUT: 872 case EBUSY: 873 case EINTR: 874 case ENOLCK: 875 /* random NFS retry error, unless during file system support 876 * introspection, in which it actually means what it says */ 877 return SQLITE_BUSY; 878 879 case EPERM: 880 return SQLITE_PERM; 881 882 default: 883 return sqliteIOErr; 884 } 885 } 886 887 888 /****************************************************************************** 889 ****************** Begin Unique File ID Utility Used By VxWorks *************** 890 ** 891 ** On most versions of unix, we can get a unique ID for a file by concatenating 892 ** the device number and the inode number. But this does not work on VxWorks. 893 ** On VxWorks, a unique file id must be based on the canonical filename. 894 ** 895 ** A pointer to an instance of the following structure can be used as a 896 ** unique file ID in VxWorks. Each instance of this structure contains 897 ** a copy of the canonical filename. There is also a reference count. 898 ** The structure is reclaimed when the number of pointers to it drops to 899 ** zero. 900 ** 901 ** There are never very many files open at one time and lookups are not 902 ** a performance-critical path, so it is sufficient to put these 903 ** structures on a linked list. 904 */ 905 struct vxworksFileId { 906 struct vxworksFileId *pNext; /* Next in a list of them all */ 907 int nRef; /* Number of references to this one */ 908 int nName; /* Length of the zCanonicalName[] string */ 909 char *zCanonicalName; /* Canonical filename */ 910 }; 911 912 #if OS_VXWORKS 913 /* 914 ** All unique filenames are held on a linked list headed by this 915 ** variable: 916 */ 917 static struct vxworksFileId *vxworksFileList = 0; 918 919 /* 920 ** Simplify a filename into its canonical form 921 ** by making the following changes: 922 ** 923 ** * removing any trailing and duplicate / 924 ** * convert /./ into just / 925 ** * convert /A/../ where A is any simple name into just / 926 ** 927 ** Changes are made in-place. Return the new name length. 928 ** 929 ** The original filename is in z[0..n-1]. Return the number of 930 ** characters in the simplified name. 931 */ 932 static int vxworksSimplifyName(char *z, int n){ 933 int i, j; 934 while( n>1 && z[n-1]=='/' ){ n--; } 935 for(i=j=0; i<n; i++){ 936 if( z[i]=='/' ){ 937 if( z[i+1]=='/' ) continue; 938 if( z[i+1]=='.' && i+2<n && z[i+2]=='/' ){ 939 i += 1; 940 continue; 941 } 942 if( z[i+1]=='.' && i+3<n && z[i+2]=='.' && z[i+3]=='/' ){ 943 while( j>0 && z[j-1]!='/' ){ j--; } 944 if( j>0 ){ j--; } 945 i += 2; 946 continue; 947 } 948 } 949 z[j++] = z[i]; 950 } 951 z[j] = 0; 952 return j; 953 } 954 955 /* 956 ** Find a unique file ID for the given absolute pathname. Return 957 ** a pointer to the vxworksFileId object. This pointer is the unique 958 ** file ID. 959 ** 960 ** The nRef field of the vxworksFileId object is incremented before 961 ** the object is returned. A new vxworksFileId object is created 962 ** and added to the global list if necessary. 963 ** 964 ** If a memory allocation error occurs, return NULL. 965 */ 966 static struct vxworksFileId *vxworksFindFileId(const char *zAbsoluteName){ 967 struct vxworksFileId *pNew; /* search key and new file ID */ 968 struct vxworksFileId *pCandidate; /* For looping over existing file IDs */ 969 int n; /* Length of zAbsoluteName string */ 970 971 assert( zAbsoluteName[0]=='/' ); 972 n = (int)strlen(zAbsoluteName); 973 pNew = sqlite3_malloc64( sizeof(*pNew) + (n+1) ); 974 if( pNew==0 ) return 0; 975 pNew->zCanonicalName = (char*)&pNew[1]; 976 memcpy(pNew->zCanonicalName, zAbsoluteName, n+1); 977 n = vxworksSimplifyName(pNew->zCanonicalName, n); 978 979 /* Search for an existing entry that matching the canonical name. 980 ** If found, increment the reference count and return a pointer to 981 ** the existing file ID. 982 */ 983 unixEnterMutex(); 984 for(pCandidate=vxworksFileList; pCandidate; pCandidate=pCandidate->pNext){ 985 if( pCandidate->nName==n 986 && memcmp(pCandidate->zCanonicalName, pNew->zCanonicalName, n)==0 987 ){ 988 sqlite3_free(pNew); 989 pCandidate->nRef++; 990 unixLeaveMutex(); 991 return pCandidate; 992 } 993 } 994 995 /* No match was found. We will make a new file ID */ 996 pNew->nRef = 1; 997 pNew->nName = n; 998 pNew->pNext = vxworksFileList; 999 vxworksFileList = pNew; 1000 unixLeaveMutex(); 1001 return pNew; 1002 } 1003 1004 /* 1005 ** Decrement the reference count on a vxworksFileId object. Free 1006 ** the object when the reference count reaches zero. 1007 */ 1008 static void vxworksReleaseFileId(struct vxworksFileId *pId){ 1009 unixEnterMutex(); 1010 assert( pId->nRef>0 ); 1011 pId->nRef--; 1012 if( pId->nRef==0 ){ 1013 struct vxworksFileId **pp; 1014 for(pp=&vxworksFileList; *pp && *pp!=pId; pp = &((*pp)->pNext)){} 1015 assert( *pp==pId ); 1016 *pp = pId->pNext; 1017 sqlite3_free(pId); 1018 } 1019 unixLeaveMutex(); 1020 } 1021 #endif /* OS_VXWORKS */ 1022 /*************** End of Unique File ID Utility Used By VxWorks **************** 1023 ******************************************************************************/ 1024 1025 1026 /****************************************************************************** 1027 *************************** Posix Advisory Locking **************************** 1028 ** 1029 ** POSIX advisory locks are broken by design. ANSI STD 1003.1 (1996) 1030 ** section 6.5.2.2 lines 483 through 490 specify that when a process 1031 ** sets or clears a lock, that operation overrides any prior locks set 1032 ** by the same process. It does not explicitly say so, but this implies 1033 ** that it overrides locks set by the same process using a different 1034 ** file descriptor. Consider this test case: 1035 ** 1036 ** int fd1 = open("./file1", O_RDWR|O_CREAT, 0644); 1037 ** int fd2 = open("./file2", O_RDWR|O_CREAT, 0644); 1038 ** 1039 ** Suppose ./file1 and ./file2 are really the same file (because 1040 ** one is a hard or symbolic link to the other) then if you set 1041 ** an exclusive lock on fd1, then try to get an exclusive lock 1042 ** on fd2, it works. I would have expected the second lock to 1043 ** fail since there was already a lock on the file due to fd1. 1044 ** But not so. Since both locks came from the same process, the 1045 ** second overrides the first, even though they were on different 1046 ** file descriptors opened on different file names. 1047 ** 1048 ** This means that we cannot use POSIX locks to synchronize file access 1049 ** among competing threads of the same process. POSIX locks will work fine 1050 ** to synchronize access for threads in separate processes, but not 1051 ** threads within the same process. 1052 ** 1053 ** To work around the problem, SQLite has to manage file locks internally 1054 ** on its own. Whenever a new database is opened, we have to find the 1055 ** specific inode of the database file (the inode is determined by the 1056 ** st_dev and st_ino fields of the stat structure that fstat() fills in) 1057 ** and check for locks already existing on that inode. When locks are 1058 ** created or removed, we have to look at our own internal record of the 1059 ** locks to see if another thread has previously set a lock on that same 1060 ** inode. 1061 ** 1062 ** (Aside: The use of inode numbers as unique IDs does not work on VxWorks. 1063 ** For VxWorks, we have to use the alternative unique ID system based on 1064 ** canonical filename and implemented in the previous division.) 1065 ** 1066 ** The sqlite3_file structure for POSIX is no longer just an integer file 1067 ** descriptor. It is now a structure that holds the integer file 1068 ** descriptor and a pointer to a structure that describes the internal 1069 ** locks on the corresponding inode. There is one locking structure 1070 ** per inode, so if the same inode is opened twice, both unixFile structures 1071 ** point to the same locking structure. The locking structure keeps 1072 ** a reference count (so we will know when to delete it) and a "cnt" 1073 ** field that tells us its internal lock status. cnt==0 means the 1074 ** file is unlocked. cnt==-1 means the file has an exclusive lock. 1075 ** cnt>0 means there are cnt shared locks on the file. 1076 ** 1077 ** Any attempt to lock or unlock a file first checks the locking 1078 ** structure. The fcntl() system call is only invoked to set a 1079 ** POSIX lock if the internal lock structure transitions between 1080 ** a locked and an unlocked state. 1081 ** 1082 ** But wait: there are yet more problems with POSIX advisory locks. 1083 ** 1084 ** If you close a file descriptor that points to a file that has locks, 1085 ** all locks on that file that are owned by the current process are 1086 ** released. To work around this problem, each unixInodeInfo object 1087 ** maintains a count of the number of pending locks on tha inode. 1088 ** When an attempt is made to close an unixFile, if there are 1089 ** other unixFile open on the same inode that are holding locks, the call 1090 ** to close() the file descriptor is deferred until all of the locks clear. 1091 ** The unixInodeInfo structure keeps a list of file descriptors that need to 1092 ** be closed and that list is walked (and cleared) when the last lock 1093 ** clears. 1094 ** 1095 ** Yet another problem: LinuxThreads do not play well with posix locks. 1096 ** 1097 ** Many older versions of linux use the LinuxThreads library which is 1098 ** not posix compliant. Under LinuxThreads, a lock created by thread 1099 ** A cannot be modified or overridden by a different thread B. 1100 ** Only thread A can modify the lock. Locking behavior is correct 1101 ** if the appliation uses the newer Native Posix Thread Library (NPTL) 1102 ** on linux - with NPTL a lock created by thread A can override locks 1103 ** in thread B. But there is no way to know at compile-time which 1104 ** threading library is being used. So there is no way to know at 1105 ** compile-time whether or not thread A can override locks on thread B. 1106 ** One has to do a run-time check to discover the behavior of the 1107 ** current process. 1108 ** 1109 ** SQLite used to support LinuxThreads. But support for LinuxThreads 1110 ** was dropped beginning with version 3.7.0. SQLite will still work with 1111 ** LinuxThreads provided that (1) there is no more than one connection 1112 ** per database file in the same process and (2) database connections 1113 ** do not move across threads. 1114 */ 1115 1116 /* 1117 ** An instance of the following structure serves as the key used 1118 ** to locate a particular unixInodeInfo object. 1119 */ 1120 struct unixFileId { 1121 dev_t dev; /* Device number */ 1122 #if OS_VXWORKS 1123 struct vxworksFileId *pId; /* Unique file ID for vxworks. */ 1124 #else 1125 /* We are told that some versions of Android contain a bug that 1126 ** sizes ino_t at only 32-bits instead of 64-bits. (See 1127 ** https://android-review.googlesource.com/#/c/115351/3/dist/sqlite3.c) 1128 ** To work around this, always allocate 64-bits for the inode number. 1129 ** On small machines that only have 32-bit inodes, this wastes 4 bytes, 1130 ** but that should not be a big deal. */ 1131 /* WAS: ino_t ino; */ 1132 u64 ino; /* Inode number */ 1133 #endif 1134 }; 1135 1136 /* 1137 ** An instance of the following structure is allocated for each open 1138 ** inode. 1139 ** 1140 ** A single inode can have multiple file descriptors, so each unixFile 1141 ** structure contains a pointer to an instance of this object and this 1142 ** object keeps a count of the number of unixFile pointing to it. 1143 ** 1144 ** Mutex rules: 1145 ** 1146 ** (1) Only the pLockMutex mutex must be held in order to read or write 1147 ** any of the locking fields: 1148 ** nShared, nLock, eFileLock, bProcessLock, pUnused 1149 ** 1150 ** (2) When nRef>0, then the following fields are unchanging and can 1151 ** be read (but not written) without holding any mutex: 1152 ** fileId, pLockMutex 1153 ** 1154 ** (3) With the exceptions above, all the fields may only be read 1155 ** or written while holding the global unixBigLock mutex. 1156 ** 1157 ** Deadlock prevention: The global unixBigLock mutex may not 1158 ** be acquired while holding the pLockMutex mutex. If both unixBigLock 1159 ** and pLockMutex are needed, then unixBigLock must be acquired first. 1160 */ 1161 struct unixInodeInfo { 1162 struct unixFileId fileId; /* The lookup key */ 1163 sqlite3_mutex *pLockMutex; /* Hold this mutex for... */ 1164 int nShared; /* Number of SHARED locks held */ 1165 int nLock; /* Number of outstanding file locks */ 1166 unsigned char eFileLock; /* One of SHARED_LOCK, RESERVED_LOCK etc. */ 1167 unsigned char bProcessLock; /* An exclusive process lock is held */ 1168 UnixUnusedFd *pUnused; /* Unused file descriptors to close */ 1169 int nRef; /* Number of pointers to this structure */ 1170 unixShmNode *pShmNode; /* Shared memory associated with this inode */ 1171 unixInodeInfo *pNext; /* List of all unixInodeInfo objects */ 1172 unixInodeInfo *pPrev; /* .... doubly linked */ 1173 #if SQLITE_ENABLE_LOCKING_STYLE 1174 unsigned long long sharedByte; /* for AFP simulated shared lock */ 1175 #endif 1176 #if OS_VXWORKS 1177 sem_t *pSem; /* Named POSIX semaphore */ 1178 char aSemName[MAX_PATHNAME+2]; /* Name of that semaphore */ 1179 #endif 1180 }; 1181 1182 /* 1183 ** A lists of all unixInodeInfo objects. 1184 ** 1185 ** Must hold unixBigLock in order to read or write this variable. 1186 */ 1187 static unixInodeInfo *inodeList = 0; /* All unixInodeInfo objects */ 1188 1189 #ifdef SQLITE_DEBUG 1190 /* 1191 ** True if the inode mutex (on the unixFile.pFileMutex field) is held, or not. 1192 ** This routine is used only within assert() to help verify correct mutex 1193 ** usage. 1194 */ 1195 int unixFileMutexHeld(unixFile *pFile){ 1196 assert( pFile->pInode ); 1197 return sqlite3_mutex_held(pFile->pInode->pLockMutex); 1198 } 1199 int unixFileMutexNotheld(unixFile *pFile){ 1200 assert( pFile->pInode ); 1201 return sqlite3_mutex_notheld(pFile->pInode->pLockMutex); 1202 } 1203 #endif 1204 1205 /* 1206 ** 1207 ** This function - unixLogErrorAtLine(), is only ever called via the macro 1208 ** unixLogError(). 1209 ** 1210 ** It is invoked after an error occurs in an OS function and errno has been 1211 ** set. It logs a message using sqlite3_log() containing the current value of 1212 ** errno and, if possible, the human-readable equivalent from strerror() or 1213 ** strerror_r(). 1214 ** 1215 ** The first argument passed to the macro should be the error code that 1216 ** will be returned to SQLite (e.g. SQLITE_IOERR_DELETE, SQLITE_CANTOPEN). 1217 ** The two subsequent arguments should be the name of the OS function that 1218 ** failed (e.g. "unlink", "open") and the associated file-system path, 1219 ** if any. 1220 */ 1221 #define unixLogError(a,b,c) unixLogErrorAtLine(a,b,c,__LINE__) 1222 static int unixLogErrorAtLine( 1223 int errcode, /* SQLite error code */ 1224 const char *zFunc, /* Name of OS function that failed */ 1225 const char *zPath, /* File path associated with error */ 1226 int iLine /* Source line number where error occurred */ 1227 ){ 1228 char *zErr; /* Message from strerror() or equivalent */ 1229 int iErrno = errno; /* Saved syscall error number */ 1230 1231 /* If this is not a threadsafe build (SQLITE_THREADSAFE==0), then use 1232 ** the strerror() function to obtain the human-readable error message 1233 ** equivalent to errno. Otherwise, use strerror_r(). 1234 */ 1235 #if SQLITE_THREADSAFE && defined(HAVE_STRERROR_R) 1236 char aErr[80]; 1237 memset(aErr, 0, sizeof(aErr)); 1238 zErr = aErr; 1239 1240 /* If STRERROR_R_CHAR_P (set by autoconf scripts) or __USE_GNU is defined, 1241 ** assume that the system provides the GNU version of strerror_r() that 1242 ** returns a pointer to a buffer containing the error message. That pointer 1243 ** may point to aErr[], or it may point to some static storage somewhere. 1244 ** Otherwise, assume that the system provides the POSIX version of 1245 ** strerror_r(), which always writes an error message into aErr[]. 1246 ** 1247 ** If the code incorrectly assumes that it is the POSIX version that is 1248 ** available, the error message will often be an empty string. Not a 1249 ** huge problem. Incorrectly concluding that the GNU version is available 1250 ** could lead to a segfault though. 1251 */ 1252 #if defined(STRERROR_R_CHAR_P) || defined(__USE_GNU) 1253 zErr = 1254 # endif 1255 strerror_r(iErrno, aErr, sizeof(aErr)-1); 1256 1257 #elif SQLITE_THREADSAFE 1258 /* This is a threadsafe build, but strerror_r() is not available. */ 1259 zErr = ""; 1260 #else 1261 /* Non-threadsafe build, use strerror(). */ 1262 zErr = strerror(iErrno); 1263 #endif 1264 1265 if( zPath==0 ) zPath = ""; 1266 sqlite3_log(errcode, 1267 "os_unix.c:%d: (%d) %s(%s) - %s", 1268 iLine, iErrno, zFunc, zPath, zErr 1269 ); 1270 1271 return errcode; 1272 } 1273 1274 /* 1275 ** Close a file descriptor. 1276 ** 1277 ** We assume that close() almost always works, since it is only in a 1278 ** very sick application or on a very sick platform that it might fail. 1279 ** If it does fail, simply leak the file descriptor, but do log the 1280 ** error. 1281 ** 1282 ** Note that it is not safe to retry close() after EINTR since the 1283 ** file descriptor might have already been reused by another thread. 1284 ** So we don't even try to recover from an EINTR. Just log the error 1285 ** and move on. 1286 */ 1287 static void robust_close(unixFile *pFile, int h, int lineno){ 1288 if( osClose(h) ){ 1289 unixLogErrorAtLine(SQLITE_IOERR_CLOSE, "close", 1290 pFile ? pFile->zPath : 0, lineno); 1291 } 1292 } 1293 1294 /* 1295 ** Set the pFile->lastErrno. Do this in a subroutine as that provides 1296 ** a convenient place to set a breakpoint. 1297 */ 1298 static void storeLastErrno(unixFile *pFile, int error){ 1299 pFile->lastErrno = error; 1300 } 1301 1302 /* 1303 ** Close all file descriptors accumuated in the unixInodeInfo->pUnused list. 1304 */ 1305 static void closePendingFds(unixFile *pFile){ 1306 unixInodeInfo *pInode = pFile->pInode; 1307 UnixUnusedFd *p; 1308 UnixUnusedFd *pNext; 1309 assert( unixFileMutexHeld(pFile) ); 1310 for(p=pInode->pUnused; p; p=pNext){ 1311 pNext = p->pNext; 1312 robust_close(pFile, p->fd, __LINE__); 1313 sqlite3_free(p); 1314 } 1315 pInode->pUnused = 0; 1316 } 1317 1318 /* 1319 ** Release a unixInodeInfo structure previously allocated by findInodeInfo(). 1320 ** 1321 ** The global mutex must be held when this routine is called, but the mutex 1322 ** on the inode being deleted must NOT be held. 1323 */ 1324 static void releaseInodeInfo(unixFile *pFile){ 1325 unixInodeInfo *pInode = pFile->pInode; 1326 assert( unixMutexHeld() ); 1327 assert( unixFileMutexNotheld(pFile) ); 1328 if( ALWAYS(pInode) ){ 1329 pInode->nRef--; 1330 if( pInode->nRef==0 ){ 1331 assert( pInode->pShmNode==0 ); 1332 sqlite3_mutex_enter(pInode->pLockMutex); 1333 closePendingFds(pFile); 1334 sqlite3_mutex_leave(pInode->pLockMutex); 1335 if( pInode->pPrev ){ 1336 assert( pInode->pPrev->pNext==pInode ); 1337 pInode->pPrev->pNext = pInode->pNext; 1338 }else{ 1339 assert( inodeList==pInode ); 1340 inodeList = pInode->pNext; 1341 } 1342 if( pInode->pNext ){ 1343 assert( pInode->pNext->pPrev==pInode ); 1344 pInode->pNext->pPrev = pInode->pPrev; 1345 } 1346 sqlite3_mutex_free(pInode->pLockMutex); 1347 sqlite3_free(pInode); 1348 } 1349 } 1350 } 1351 1352 /* 1353 ** Given a file descriptor, locate the unixInodeInfo object that 1354 ** describes that file descriptor. Create a new one if necessary. The 1355 ** return value might be uninitialized if an error occurs. 1356 ** 1357 ** The global mutex must held when calling this routine. 1358 ** 1359 ** Return an appropriate error code. 1360 */ 1361 static int findInodeInfo( 1362 unixFile *pFile, /* Unix file with file desc used in the key */ 1363 unixInodeInfo **ppInode /* Return the unixInodeInfo object here */ 1364 ){ 1365 int rc; /* System call return code */ 1366 int fd; /* The file descriptor for pFile */ 1367 struct unixFileId fileId; /* Lookup key for the unixInodeInfo */ 1368 struct stat statbuf; /* Low-level file information */ 1369 unixInodeInfo *pInode = 0; /* Candidate unixInodeInfo object */ 1370 1371 assert( unixMutexHeld() ); 1372 1373 /* Get low-level information about the file that we can used to 1374 ** create a unique name for the file. 1375 */ 1376 fd = pFile->h; 1377 rc = osFstat(fd, &statbuf); 1378 if( rc!=0 ){ 1379 storeLastErrno(pFile, errno); 1380 #if defined(EOVERFLOW) && defined(SQLITE_DISABLE_LFS) 1381 if( pFile->lastErrno==EOVERFLOW ) return SQLITE_NOLFS; 1382 #endif 1383 return SQLITE_IOERR; 1384 } 1385 1386 #ifdef __APPLE__ 1387 /* On OS X on an msdos filesystem, the inode number is reported 1388 ** incorrectly for zero-size files. See ticket #3260. To work 1389 ** around this problem (we consider it a bug in OS X, not SQLite) 1390 ** we always increase the file size to 1 by writing a single byte 1391 ** prior to accessing the inode number. The one byte written is 1392 ** an ASCII 'S' character which also happens to be the first byte 1393 ** in the header of every SQLite database. In this way, if there 1394 ** is a race condition such that another thread has already populated 1395 ** the first page of the database, no damage is done. 1396 */ 1397 if( statbuf.st_size==0 && (pFile->fsFlags & SQLITE_FSFLAGS_IS_MSDOS)!=0 ){ 1398 do{ rc = osWrite(fd, "S", 1); }while( rc<0 && errno==EINTR ); 1399 if( rc!=1 ){ 1400 storeLastErrno(pFile, errno); 1401 return SQLITE_IOERR; 1402 } 1403 rc = osFstat(fd, &statbuf); 1404 if( rc!=0 ){ 1405 storeLastErrno(pFile, errno); 1406 return SQLITE_IOERR; 1407 } 1408 } 1409 #endif 1410 1411 memset(&fileId, 0, sizeof(fileId)); 1412 fileId.dev = statbuf.st_dev; 1413 #if OS_VXWORKS 1414 fileId.pId = pFile->pId; 1415 #else 1416 fileId.ino = (u64)statbuf.st_ino; 1417 #endif 1418 assert( unixMutexHeld() ); 1419 pInode = inodeList; 1420 while( pInode && memcmp(&fileId, &pInode->fileId, sizeof(fileId)) ){ 1421 pInode = pInode->pNext; 1422 } 1423 if( pInode==0 ){ 1424 pInode = sqlite3_malloc64( sizeof(*pInode) ); 1425 if( pInode==0 ){ 1426 return SQLITE_NOMEM_BKPT; 1427 } 1428 memset(pInode, 0, sizeof(*pInode)); 1429 memcpy(&pInode->fileId, &fileId, sizeof(fileId)); 1430 if( sqlite3GlobalConfig.bCoreMutex ){ 1431 pInode->pLockMutex = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST); 1432 if( pInode->pLockMutex==0 ){ 1433 sqlite3_free(pInode); 1434 return SQLITE_NOMEM_BKPT; 1435 } 1436 } 1437 pInode->nRef = 1; 1438 assert( unixMutexHeld() ); 1439 pInode->pNext = inodeList; 1440 pInode->pPrev = 0; 1441 if( inodeList ) inodeList->pPrev = pInode; 1442 inodeList = pInode; 1443 }else{ 1444 pInode->nRef++; 1445 } 1446 *ppInode = pInode; 1447 return SQLITE_OK; 1448 } 1449 1450 /* 1451 ** Return TRUE if pFile has been renamed or unlinked since it was first opened. 1452 */ 1453 static int fileHasMoved(unixFile *pFile){ 1454 #if OS_VXWORKS 1455 return pFile->pInode!=0 && pFile->pId!=pFile->pInode->fileId.pId; 1456 #else 1457 struct stat buf; 1458 return pFile->pInode!=0 && 1459 (osStat(pFile->zPath, &buf)!=0 1460 || (u64)buf.st_ino!=pFile->pInode->fileId.ino); 1461 #endif 1462 } 1463 1464 1465 /* 1466 ** Check a unixFile that is a database. Verify the following: 1467 ** 1468 ** (1) There is exactly one hard link on the file 1469 ** (2) The file is not a symbolic link 1470 ** (3) The file has not been renamed or unlinked 1471 ** 1472 ** Issue sqlite3_log(SQLITE_WARNING,...) messages if anything is not right. 1473 */ 1474 static void verifyDbFile(unixFile *pFile){ 1475 struct stat buf; 1476 int rc; 1477 1478 /* These verifications occurs for the main database only */ 1479 if( pFile->ctrlFlags & UNIXFILE_NOLOCK ) return; 1480 1481 rc = osFstat(pFile->h, &buf); 1482 if( rc!=0 ){ 1483 sqlite3_log(SQLITE_WARNING, "cannot fstat db file %s", pFile->zPath); 1484 return; 1485 } 1486 if( buf.st_nlink==0 ){ 1487 sqlite3_log(SQLITE_WARNING, "file unlinked while open: %s", pFile->zPath); 1488 return; 1489 } 1490 if( buf.st_nlink>1 ){ 1491 sqlite3_log(SQLITE_WARNING, "multiple links to file: %s", pFile->zPath); 1492 return; 1493 } 1494 if( fileHasMoved(pFile) ){ 1495 sqlite3_log(SQLITE_WARNING, "file renamed while open: %s", pFile->zPath); 1496 return; 1497 } 1498 } 1499 1500 1501 /* 1502 ** This routine checks if there is a RESERVED lock held on the specified 1503 ** file by this or any other process. If such a lock is held, set *pResOut 1504 ** to a non-zero value otherwise *pResOut is set to zero. The return value 1505 ** is set to SQLITE_OK unless an I/O error occurs during lock checking. 1506 */ 1507 static int unixCheckReservedLock(sqlite3_file *id, int *pResOut){ 1508 int rc = SQLITE_OK; 1509 int reserved = 0; 1510 unixFile *pFile = (unixFile*)id; 1511 1512 SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); 1513 1514 assert( pFile ); 1515 assert( pFile->eFileLock<=SHARED_LOCK ); 1516 sqlite3_mutex_enter(pFile->pInode->pLockMutex); 1517 1518 /* Check if a thread in this process holds such a lock */ 1519 if( pFile->pInode->eFileLock>SHARED_LOCK ){ 1520 reserved = 1; 1521 } 1522 1523 /* Otherwise see if some other process holds it. 1524 */ 1525 #ifndef __DJGPP__ 1526 if( !reserved && !pFile->pInode->bProcessLock ){ 1527 struct flock lock; 1528 lock.l_whence = SEEK_SET; 1529 lock.l_start = RESERVED_BYTE; 1530 lock.l_len = 1; 1531 lock.l_type = F_WRLCK; 1532 if( osFcntl(pFile->h, F_GETLK, &lock) ){ 1533 rc = SQLITE_IOERR_CHECKRESERVEDLOCK; 1534 storeLastErrno(pFile, errno); 1535 } else if( lock.l_type!=F_UNLCK ){ 1536 reserved = 1; 1537 } 1538 } 1539 #endif 1540 1541 sqlite3_mutex_leave(pFile->pInode->pLockMutex); 1542 OSTRACE(("TEST WR-LOCK %d %d %d (unix)\n", pFile->h, rc, reserved)); 1543 1544 *pResOut = reserved; 1545 return rc; 1546 } 1547 1548 /* Forward declaration*/ 1549 static int unixSleep(sqlite3_vfs*,int); 1550 1551 /* 1552 ** Set a posix-advisory-lock. 1553 ** 1554 ** There are two versions of this routine. If compiled with 1555 ** SQLITE_ENABLE_SETLK_TIMEOUT then the routine has an extra parameter 1556 ** which is a pointer to a unixFile. If the unixFile->iBusyTimeout 1557 ** value is set, then it is the number of milliseconds to wait before 1558 ** failing the lock. The iBusyTimeout value is always reset back to 1559 ** zero on each call. 1560 ** 1561 ** If SQLITE_ENABLE_SETLK_TIMEOUT is not defined, then do a non-blocking 1562 ** attempt to set the lock. 1563 */ 1564 #ifndef SQLITE_ENABLE_SETLK_TIMEOUT 1565 # define osSetPosixAdvisoryLock(h,x,t) osFcntl(h,F_SETLK,x) 1566 #else 1567 static int osSetPosixAdvisoryLock( 1568 int h, /* The file descriptor on which to take the lock */ 1569 struct flock *pLock, /* The description of the lock */ 1570 unixFile *pFile /* Structure holding timeout value */ 1571 ){ 1572 int tm = pFile->iBusyTimeout; 1573 int rc = osFcntl(h,F_SETLK,pLock); 1574 while( rc<0 && tm>0 ){ 1575 /* On systems that support some kind of blocking file lock with a timeout, 1576 ** make appropriate changes here to invoke that blocking file lock. On 1577 ** generic posix, however, there is no such API. So we simply try the 1578 ** lock once every millisecond until either the timeout expires, or until 1579 ** the lock is obtained. */ 1580 unixSleep(0,1000); 1581 rc = osFcntl(h,F_SETLK,pLock); 1582 tm--; 1583 } 1584 return rc; 1585 } 1586 #endif /* SQLITE_ENABLE_SETLK_TIMEOUT */ 1587 1588 1589 /* 1590 ** Attempt to set a system-lock on the file pFile. The lock is 1591 ** described by pLock. 1592 ** 1593 ** If the pFile was opened read/write from unix-excl, then the only lock 1594 ** ever obtained is an exclusive lock, and it is obtained exactly once 1595 ** the first time any lock is attempted. All subsequent system locking 1596 ** operations become no-ops. Locking operations still happen internally, 1597 ** in order to coordinate access between separate database connections 1598 ** within this process, but all of that is handled in memory and the 1599 ** operating system does not participate. 1600 ** 1601 ** This function is a pass-through to fcntl(F_SETLK) if pFile is using 1602 ** any VFS other than "unix-excl" or if pFile is opened on "unix-excl" 1603 ** and is read-only. 1604 ** 1605 ** Zero is returned if the call completes successfully, or -1 if a call 1606 ** to fcntl() fails. In this case, errno is set appropriately (by fcntl()). 1607 */ 1608 static int unixFileLock(unixFile *pFile, struct flock *pLock){ 1609 int rc; 1610 unixInodeInfo *pInode = pFile->pInode; 1611 assert( pInode!=0 ); 1612 assert( sqlite3_mutex_held(pInode->pLockMutex) ); 1613 if( (pFile->ctrlFlags & (UNIXFILE_EXCL|UNIXFILE_RDONLY))==UNIXFILE_EXCL ){ 1614 if( pInode->bProcessLock==0 ){ 1615 struct flock lock; 1616 assert( pInode->nLock==0 ); 1617 lock.l_whence = SEEK_SET; 1618 lock.l_start = SHARED_FIRST; 1619 lock.l_len = SHARED_SIZE; 1620 lock.l_type = F_WRLCK; 1621 rc = osSetPosixAdvisoryLock(pFile->h, &lock, pFile); 1622 if( rc<0 ) return rc; 1623 pInode->bProcessLock = 1; 1624 pInode->nLock++; 1625 }else{ 1626 rc = 0; 1627 } 1628 }else{ 1629 rc = osSetPosixAdvisoryLock(pFile->h, pLock, pFile); 1630 } 1631 return rc; 1632 } 1633 1634 /* 1635 ** Lock the file with the lock specified by parameter eFileLock - one 1636 ** of the following: 1637 ** 1638 ** (1) SHARED_LOCK 1639 ** (2) RESERVED_LOCK 1640 ** (3) PENDING_LOCK 1641 ** (4) EXCLUSIVE_LOCK 1642 ** 1643 ** Sometimes when requesting one lock state, additional lock states 1644 ** are inserted in between. The locking might fail on one of the later 1645 ** transitions leaving the lock state different from what it started but 1646 ** still short of its goal. The following chart shows the allowed 1647 ** transitions and the inserted intermediate states: 1648 ** 1649 ** UNLOCKED -> SHARED 1650 ** SHARED -> RESERVED 1651 ** SHARED -> (PENDING) -> EXCLUSIVE 1652 ** RESERVED -> (PENDING) -> EXCLUSIVE 1653 ** PENDING -> EXCLUSIVE 1654 ** 1655 ** This routine will only increase a lock. Use the sqlite3OsUnlock() 1656 ** routine to lower a locking level. 1657 */ 1658 static int unixLock(sqlite3_file *id, int eFileLock){ 1659 /* The following describes the implementation of the various locks and 1660 ** lock transitions in terms of the POSIX advisory shared and exclusive 1661 ** lock primitives (called read-locks and write-locks below, to avoid 1662 ** confusion with SQLite lock names). The algorithms are complicated 1663 ** slightly in order to be compatible with Windows95 systems simultaneously 1664 ** accessing the same database file, in case that is ever required. 1665 ** 1666 ** Symbols defined in os.h indentify the 'pending byte' and the 'reserved 1667 ** byte', each single bytes at well known offsets, and the 'shared byte 1668 ** range', a range of 510 bytes at a well known offset. 1669 ** 1670 ** To obtain a SHARED lock, a read-lock is obtained on the 'pending 1671 ** byte'. If this is successful, 'shared byte range' is read-locked 1672 ** and the lock on the 'pending byte' released. (Legacy note: When 1673 ** SQLite was first developed, Windows95 systems were still very common, 1674 ** and Widnows95 lacks a shared-lock capability. So on Windows95, a 1675 ** single randomly selected by from the 'shared byte range' is locked. 1676 ** Windows95 is now pretty much extinct, but this work-around for the 1677 ** lack of shared-locks on Windows95 lives on, for backwards 1678 ** compatibility.) 1679 ** 1680 ** A process may only obtain a RESERVED lock after it has a SHARED lock. 1681 ** A RESERVED lock is implemented by grabbing a write-lock on the 1682 ** 'reserved byte'. 1683 ** 1684 ** A process may only obtain a PENDING lock after it has obtained a 1685 ** SHARED lock. A PENDING lock is implemented by obtaining a write-lock 1686 ** on the 'pending byte'. This ensures that no new SHARED locks can be 1687 ** obtained, but existing SHARED locks are allowed to persist. A process 1688 ** does not have to obtain a RESERVED lock on the way to a PENDING lock. 1689 ** This property is used by the algorithm for rolling back a journal file 1690 ** after a crash. 1691 ** 1692 ** An EXCLUSIVE lock, obtained after a PENDING lock is held, is 1693 ** implemented by obtaining a write-lock on the entire 'shared byte 1694 ** range'. Since all other locks require a read-lock on one of the bytes 1695 ** within this range, this ensures that no other locks are held on the 1696 ** database. 1697 */ 1698 int rc = SQLITE_OK; 1699 unixFile *pFile = (unixFile*)id; 1700 unixInodeInfo *pInode; 1701 struct flock lock; 1702 int tErrno = 0; 1703 1704 assert( pFile ); 1705 OSTRACE(("LOCK %d %s was %s(%s,%d) pid=%d (unix)\n", pFile->h, 1706 azFileLock(eFileLock), azFileLock(pFile->eFileLock), 1707 azFileLock(pFile->pInode->eFileLock), pFile->pInode->nShared, 1708 osGetpid(0))); 1709 1710 /* If there is already a lock of this type or more restrictive on the 1711 ** unixFile, do nothing. Don't use the end_lock: exit path, as 1712 ** unixEnterMutex() hasn't been called yet. 1713 */ 1714 if( pFile->eFileLock>=eFileLock ){ 1715 OSTRACE(("LOCK %d %s ok (already held) (unix)\n", pFile->h, 1716 azFileLock(eFileLock))); 1717 return SQLITE_OK; 1718 } 1719 1720 /* Make sure the locking sequence is correct. 1721 ** (1) We never move from unlocked to anything higher than shared lock. 1722 ** (2) SQLite never explicitly requests a pendig lock. 1723 ** (3) A shared lock is always held when a reserve lock is requested. 1724 */ 1725 assert( pFile->eFileLock!=NO_LOCK || eFileLock==SHARED_LOCK ); 1726 assert( eFileLock!=PENDING_LOCK ); 1727 assert( eFileLock!=RESERVED_LOCK || pFile->eFileLock==SHARED_LOCK ); 1728 1729 /* This mutex is needed because pFile->pInode is shared across threads 1730 */ 1731 pInode = pFile->pInode; 1732 sqlite3_mutex_enter(pInode->pLockMutex); 1733 1734 /* If some thread using this PID has a lock via a different unixFile* 1735 ** handle that precludes the requested lock, return BUSY. 1736 */ 1737 if( (pFile->eFileLock!=pInode->eFileLock && 1738 (pInode->eFileLock>=PENDING_LOCK || eFileLock>SHARED_LOCK)) 1739 ){ 1740 rc = SQLITE_BUSY; 1741 goto end_lock; 1742 } 1743 1744 /* If a SHARED lock is requested, and some thread using this PID already 1745 ** has a SHARED or RESERVED lock, then increment reference counts and 1746 ** return SQLITE_OK. 1747 */ 1748 if( eFileLock==SHARED_LOCK && 1749 (pInode->eFileLock==SHARED_LOCK || pInode->eFileLock==RESERVED_LOCK) ){ 1750 assert( eFileLock==SHARED_LOCK ); 1751 assert( pFile->eFileLock==0 ); 1752 assert( pInode->nShared>0 ); 1753 pFile->eFileLock = SHARED_LOCK; 1754 pInode->nShared++; 1755 pInode->nLock++; 1756 goto end_lock; 1757 } 1758 1759 1760 /* A PENDING lock is needed before acquiring a SHARED lock and before 1761 ** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will 1762 ** be released. 1763 */ 1764 lock.l_len = 1L; 1765 lock.l_whence = SEEK_SET; 1766 if( eFileLock==SHARED_LOCK 1767 || (eFileLock==EXCLUSIVE_LOCK && pFile->eFileLock<PENDING_LOCK) 1768 ){ 1769 lock.l_type = (eFileLock==SHARED_LOCK?F_RDLCK:F_WRLCK); 1770 lock.l_start = PENDING_BYTE; 1771 if( unixFileLock(pFile, &lock) ){ 1772 tErrno = errno; 1773 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); 1774 if( rc!=SQLITE_BUSY ){ 1775 storeLastErrno(pFile, tErrno); 1776 } 1777 goto end_lock; 1778 } 1779 } 1780 1781 1782 /* If control gets to this point, then actually go ahead and make 1783 ** operating system calls for the specified lock. 1784 */ 1785 if( eFileLock==SHARED_LOCK ){ 1786 assert( pInode->nShared==0 ); 1787 assert( pInode->eFileLock==0 ); 1788 assert( rc==SQLITE_OK ); 1789 1790 /* Now get the read-lock */ 1791 lock.l_start = SHARED_FIRST; 1792 lock.l_len = SHARED_SIZE; 1793 if( unixFileLock(pFile, &lock) ){ 1794 tErrno = errno; 1795 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); 1796 } 1797 1798 /* Drop the temporary PENDING lock */ 1799 lock.l_start = PENDING_BYTE; 1800 lock.l_len = 1L; 1801 lock.l_type = F_UNLCK; 1802 if( unixFileLock(pFile, &lock) && rc==SQLITE_OK ){ 1803 /* This could happen with a network mount */ 1804 tErrno = errno; 1805 rc = SQLITE_IOERR_UNLOCK; 1806 } 1807 1808 if( rc ){ 1809 if( rc!=SQLITE_BUSY ){ 1810 storeLastErrno(pFile, tErrno); 1811 } 1812 goto end_lock; 1813 }else{ 1814 pFile->eFileLock = SHARED_LOCK; 1815 pInode->nLock++; 1816 pInode->nShared = 1; 1817 } 1818 }else if( eFileLock==EXCLUSIVE_LOCK && pInode->nShared>1 ){ 1819 /* We are trying for an exclusive lock but another thread in this 1820 ** same process is still holding a shared lock. */ 1821 rc = SQLITE_BUSY; 1822 }else{ 1823 /* The request was for a RESERVED or EXCLUSIVE lock. It is 1824 ** assumed that there is a SHARED or greater lock on the file 1825 ** already. 1826 */ 1827 assert( 0!=pFile->eFileLock ); 1828 lock.l_type = F_WRLCK; 1829 1830 assert( eFileLock==RESERVED_LOCK || eFileLock==EXCLUSIVE_LOCK ); 1831 if( eFileLock==RESERVED_LOCK ){ 1832 lock.l_start = RESERVED_BYTE; 1833 lock.l_len = 1L; 1834 }else{ 1835 lock.l_start = SHARED_FIRST; 1836 lock.l_len = SHARED_SIZE; 1837 } 1838 1839 if( unixFileLock(pFile, &lock) ){ 1840 tErrno = errno; 1841 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); 1842 if( rc!=SQLITE_BUSY ){ 1843 storeLastErrno(pFile, tErrno); 1844 } 1845 } 1846 } 1847 1848 1849 #ifdef SQLITE_DEBUG 1850 /* Set up the transaction-counter change checking flags when 1851 ** transitioning from a SHARED to a RESERVED lock. The change 1852 ** from SHARED to RESERVED marks the beginning of a normal 1853 ** write operation (not a hot journal rollback). 1854 */ 1855 if( rc==SQLITE_OK 1856 && pFile->eFileLock<=SHARED_LOCK 1857 && eFileLock==RESERVED_LOCK 1858 ){ 1859 pFile->transCntrChng = 0; 1860 pFile->dbUpdate = 0; 1861 pFile->inNormalWrite = 1; 1862 } 1863 #endif 1864 1865 1866 if( rc==SQLITE_OK ){ 1867 pFile->eFileLock = eFileLock; 1868 pInode->eFileLock = eFileLock; 1869 }else if( eFileLock==EXCLUSIVE_LOCK ){ 1870 pFile->eFileLock = PENDING_LOCK; 1871 pInode->eFileLock = PENDING_LOCK; 1872 } 1873 1874 end_lock: 1875 sqlite3_mutex_leave(pInode->pLockMutex); 1876 OSTRACE(("LOCK %d %s %s (unix)\n", pFile->h, azFileLock(eFileLock), 1877 rc==SQLITE_OK ? "ok" : "failed")); 1878 return rc; 1879 } 1880 1881 /* 1882 ** Add the file descriptor used by file handle pFile to the corresponding 1883 ** pUnused list. 1884 */ 1885 static void setPendingFd(unixFile *pFile){ 1886 unixInodeInfo *pInode = pFile->pInode; 1887 UnixUnusedFd *p = pFile->pPreallocatedUnused; 1888 assert( unixFileMutexHeld(pFile) ); 1889 p->pNext = pInode->pUnused; 1890 pInode->pUnused = p; 1891 pFile->h = -1; 1892 pFile->pPreallocatedUnused = 0; 1893 } 1894 1895 /* 1896 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock 1897 ** must be either NO_LOCK or SHARED_LOCK. 1898 ** 1899 ** If the locking level of the file descriptor is already at or below 1900 ** the requested locking level, this routine is a no-op. 1901 ** 1902 ** If handleNFSUnlock is true, then on downgrading an EXCLUSIVE_LOCK to SHARED 1903 ** the byte range is divided into 2 parts and the first part is unlocked then 1904 ** set to a read lock, then the other part is simply unlocked. This works 1905 ** around a bug in BSD NFS lockd (also seen on MacOSX 10.3+) that fails to 1906 ** remove the write lock on a region when a read lock is set. 1907 */ 1908 static int posixUnlock(sqlite3_file *id, int eFileLock, int handleNFSUnlock){ 1909 unixFile *pFile = (unixFile*)id; 1910 unixInodeInfo *pInode; 1911 struct flock lock; 1912 int rc = SQLITE_OK; 1913 1914 assert( pFile ); 1915 OSTRACE(("UNLOCK %d %d was %d(%d,%d) pid=%d (unix)\n", pFile->h, eFileLock, 1916 pFile->eFileLock, pFile->pInode->eFileLock, pFile->pInode->nShared, 1917 osGetpid(0))); 1918 1919 assert( eFileLock<=SHARED_LOCK ); 1920 if( pFile->eFileLock<=eFileLock ){ 1921 return SQLITE_OK; 1922 } 1923 pInode = pFile->pInode; 1924 sqlite3_mutex_enter(pInode->pLockMutex); 1925 assert( pInode->nShared!=0 ); 1926 if( pFile->eFileLock>SHARED_LOCK ){ 1927 assert( pInode->eFileLock==pFile->eFileLock ); 1928 1929 #ifdef SQLITE_DEBUG 1930 /* When reducing a lock such that other processes can start 1931 ** reading the database file again, make sure that the 1932 ** transaction counter was updated if any part of the database 1933 ** file changed. If the transaction counter is not updated, 1934 ** other connections to the same file might not realize that 1935 ** the file has changed and hence might not know to flush their 1936 ** cache. The use of a stale cache can lead to database corruption. 1937 */ 1938 pFile->inNormalWrite = 0; 1939 #endif 1940 1941 /* downgrading to a shared lock on NFS involves clearing the write lock 1942 ** before establishing the readlock - to avoid a race condition we downgrade 1943 ** the lock in 2 blocks, so that part of the range will be covered by a 1944 ** write lock until the rest is covered by a read lock: 1945 ** 1: [WWWWW] 1946 ** 2: [....W] 1947 ** 3: [RRRRW] 1948 ** 4: [RRRR.] 1949 */ 1950 if( eFileLock==SHARED_LOCK ){ 1951 #if !defined(__APPLE__) || !SQLITE_ENABLE_LOCKING_STYLE 1952 (void)handleNFSUnlock; 1953 assert( handleNFSUnlock==0 ); 1954 #endif 1955 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 1956 if( handleNFSUnlock ){ 1957 int tErrno; /* Error code from system call errors */ 1958 off_t divSize = SHARED_SIZE - 1; 1959 1960 lock.l_type = F_UNLCK; 1961 lock.l_whence = SEEK_SET; 1962 lock.l_start = SHARED_FIRST; 1963 lock.l_len = divSize; 1964 if( unixFileLock(pFile, &lock)==(-1) ){ 1965 tErrno = errno; 1966 rc = SQLITE_IOERR_UNLOCK; 1967 storeLastErrno(pFile, tErrno); 1968 goto end_unlock; 1969 } 1970 lock.l_type = F_RDLCK; 1971 lock.l_whence = SEEK_SET; 1972 lock.l_start = SHARED_FIRST; 1973 lock.l_len = divSize; 1974 if( unixFileLock(pFile, &lock)==(-1) ){ 1975 tErrno = errno; 1976 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_RDLOCK); 1977 if( IS_LOCK_ERROR(rc) ){ 1978 storeLastErrno(pFile, tErrno); 1979 } 1980 goto end_unlock; 1981 } 1982 lock.l_type = F_UNLCK; 1983 lock.l_whence = SEEK_SET; 1984 lock.l_start = SHARED_FIRST+divSize; 1985 lock.l_len = SHARED_SIZE-divSize; 1986 if( unixFileLock(pFile, &lock)==(-1) ){ 1987 tErrno = errno; 1988 rc = SQLITE_IOERR_UNLOCK; 1989 storeLastErrno(pFile, tErrno); 1990 goto end_unlock; 1991 } 1992 }else 1993 #endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ 1994 { 1995 lock.l_type = F_RDLCK; 1996 lock.l_whence = SEEK_SET; 1997 lock.l_start = SHARED_FIRST; 1998 lock.l_len = SHARED_SIZE; 1999 if( unixFileLock(pFile, &lock) ){ 2000 /* In theory, the call to unixFileLock() cannot fail because another 2001 ** process is holding an incompatible lock. If it does, this 2002 ** indicates that the other process is not following the locking 2003 ** protocol. If this happens, return SQLITE_IOERR_RDLOCK. Returning 2004 ** SQLITE_BUSY would confuse the upper layer (in practice it causes 2005 ** an assert to fail). */ 2006 rc = SQLITE_IOERR_RDLOCK; 2007 storeLastErrno(pFile, errno); 2008 goto end_unlock; 2009 } 2010 } 2011 } 2012 lock.l_type = F_UNLCK; 2013 lock.l_whence = SEEK_SET; 2014 lock.l_start = PENDING_BYTE; 2015 lock.l_len = 2L; assert( PENDING_BYTE+1==RESERVED_BYTE ); 2016 if( unixFileLock(pFile, &lock)==0 ){ 2017 pInode->eFileLock = SHARED_LOCK; 2018 }else{ 2019 rc = SQLITE_IOERR_UNLOCK; 2020 storeLastErrno(pFile, errno); 2021 goto end_unlock; 2022 } 2023 } 2024 if( eFileLock==NO_LOCK ){ 2025 /* Decrement the shared lock counter. Release the lock using an 2026 ** OS call only when all threads in this same process have released 2027 ** the lock. 2028 */ 2029 pInode->nShared--; 2030 if( pInode->nShared==0 ){ 2031 lock.l_type = F_UNLCK; 2032 lock.l_whence = SEEK_SET; 2033 lock.l_start = lock.l_len = 0L; 2034 if( unixFileLock(pFile, &lock)==0 ){ 2035 pInode->eFileLock = NO_LOCK; 2036 }else{ 2037 rc = SQLITE_IOERR_UNLOCK; 2038 storeLastErrno(pFile, errno); 2039 pInode->eFileLock = NO_LOCK; 2040 pFile->eFileLock = NO_LOCK; 2041 } 2042 } 2043 2044 /* Decrement the count of locks against this same file. When the 2045 ** count reaches zero, close any other file descriptors whose close 2046 ** was deferred because of outstanding locks. 2047 */ 2048 pInode->nLock--; 2049 assert( pInode->nLock>=0 ); 2050 if( pInode->nLock==0 ) closePendingFds(pFile); 2051 } 2052 2053 end_unlock: 2054 sqlite3_mutex_leave(pInode->pLockMutex); 2055 if( rc==SQLITE_OK ){ 2056 pFile->eFileLock = eFileLock; 2057 } 2058 return rc; 2059 } 2060 2061 /* 2062 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock 2063 ** must be either NO_LOCK or SHARED_LOCK. 2064 ** 2065 ** If the locking level of the file descriptor is already at or below 2066 ** the requested locking level, this routine is a no-op. 2067 */ 2068 static int unixUnlock(sqlite3_file *id, int eFileLock){ 2069 #if SQLITE_MAX_MMAP_SIZE>0 2070 assert( eFileLock==SHARED_LOCK || ((unixFile *)id)->nFetchOut==0 ); 2071 #endif 2072 return posixUnlock(id, eFileLock, 0); 2073 } 2074 2075 #if SQLITE_MAX_MMAP_SIZE>0 2076 static int unixMapfile(unixFile *pFd, i64 nByte); 2077 static void unixUnmapfile(unixFile *pFd); 2078 #endif 2079 2080 /* 2081 ** This function performs the parts of the "close file" operation 2082 ** common to all locking schemes. It closes the directory and file 2083 ** handles, if they are valid, and sets all fields of the unixFile 2084 ** structure to 0. 2085 ** 2086 ** It is *not* necessary to hold the mutex when this routine is called, 2087 ** even on VxWorks. A mutex will be acquired on VxWorks by the 2088 ** vxworksReleaseFileId() routine. 2089 */ 2090 static int closeUnixFile(sqlite3_file *id){ 2091 unixFile *pFile = (unixFile*)id; 2092 #if SQLITE_MAX_MMAP_SIZE>0 2093 unixUnmapfile(pFile); 2094 #endif 2095 if( pFile->h>=0 ){ 2096 robust_close(pFile, pFile->h, __LINE__); 2097 pFile->h = -1; 2098 } 2099 #if OS_VXWORKS 2100 if( pFile->pId ){ 2101 if( pFile->ctrlFlags & UNIXFILE_DELETE ){ 2102 osUnlink(pFile->pId->zCanonicalName); 2103 } 2104 vxworksReleaseFileId(pFile->pId); 2105 pFile->pId = 0; 2106 } 2107 #endif 2108 #ifdef SQLITE_UNLINK_AFTER_CLOSE 2109 if( pFile->ctrlFlags & UNIXFILE_DELETE ){ 2110 osUnlink(pFile->zPath); 2111 sqlite3_free(*(char**)&pFile->zPath); 2112 pFile->zPath = 0; 2113 } 2114 #endif 2115 OSTRACE(("CLOSE %-3d\n", pFile->h)); 2116 OpenCounter(-1); 2117 sqlite3_free(pFile->pPreallocatedUnused); 2118 memset(pFile, 0, sizeof(unixFile)); 2119 return SQLITE_OK; 2120 } 2121 2122 /* 2123 ** Close a file. 2124 */ 2125 static int unixClose(sqlite3_file *id){ 2126 int rc = SQLITE_OK; 2127 unixFile *pFile = (unixFile *)id; 2128 unixInodeInfo *pInode = pFile->pInode; 2129 2130 assert( pInode!=0 ); 2131 verifyDbFile(pFile); 2132 unixUnlock(id, NO_LOCK); 2133 assert( unixFileMutexNotheld(pFile) ); 2134 unixEnterMutex(); 2135 2136 /* unixFile.pInode is always valid here. Otherwise, a different close 2137 ** routine (e.g. nolockClose()) would be called instead. 2138 */ 2139 assert( pFile->pInode->nLock>0 || pFile->pInode->bProcessLock==0 ); 2140 sqlite3_mutex_enter(pInode->pLockMutex); 2141 if( pInode->nLock ){ 2142 /* If there are outstanding locks, do not actually close the file just 2143 ** yet because that would clear those locks. Instead, add the file 2144 ** descriptor to pInode->pUnused list. It will be automatically closed 2145 ** when the last lock is cleared. 2146 */ 2147 setPendingFd(pFile); 2148 } 2149 sqlite3_mutex_leave(pInode->pLockMutex); 2150 releaseInodeInfo(pFile); 2151 assert( pFile->pShm==0 ); 2152 rc = closeUnixFile(id); 2153 unixLeaveMutex(); 2154 return rc; 2155 } 2156 2157 /************** End of the posix advisory lock implementation ***************** 2158 ******************************************************************************/ 2159 2160 /****************************************************************************** 2161 ****************************** No-op Locking ********************************** 2162 ** 2163 ** Of the various locking implementations available, this is by far the 2164 ** simplest: locking is ignored. No attempt is made to lock the database 2165 ** file for reading or writing. 2166 ** 2167 ** This locking mode is appropriate for use on read-only databases 2168 ** (ex: databases that are burned into CD-ROM, for example.) It can 2169 ** also be used if the application employs some external mechanism to 2170 ** prevent simultaneous access of the same database by two or more 2171 ** database connections. But there is a serious risk of database 2172 ** corruption if this locking mode is used in situations where multiple 2173 ** database connections are accessing the same database file at the same 2174 ** time and one or more of those connections are writing. 2175 */ 2176 2177 static int nolockCheckReservedLock(sqlite3_file *NotUsed, int *pResOut){ 2178 UNUSED_PARAMETER(NotUsed); 2179 *pResOut = 0; 2180 return SQLITE_OK; 2181 } 2182 static int nolockLock(sqlite3_file *NotUsed, int NotUsed2){ 2183 UNUSED_PARAMETER2(NotUsed, NotUsed2); 2184 return SQLITE_OK; 2185 } 2186 static int nolockUnlock(sqlite3_file *NotUsed, int NotUsed2){ 2187 UNUSED_PARAMETER2(NotUsed, NotUsed2); 2188 return SQLITE_OK; 2189 } 2190 2191 /* 2192 ** Close the file. 2193 */ 2194 static int nolockClose(sqlite3_file *id) { 2195 return closeUnixFile(id); 2196 } 2197 2198 /******************* End of the no-op lock implementation ********************* 2199 ******************************************************************************/ 2200 2201 /****************************************************************************** 2202 ************************* Begin dot-file Locking ****************************** 2203 ** 2204 ** The dotfile locking implementation uses the existence of separate lock 2205 ** files (really a directory) to control access to the database. This works 2206 ** on just about every filesystem imaginable. But there are serious downsides: 2207 ** 2208 ** (1) There is zero concurrency. A single reader blocks all other 2209 ** connections from reading or writing the database. 2210 ** 2211 ** (2) An application crash or power loss can leave stale lock files 2212 ** sitting around that need to be cleared manually. 2213 ** 2214 ** Nevertheless, a dotlock is an appropriate locking mode for use if no 2215 ** other locking strategy is available. 2216 ** 2217 ** Dotfile locking works by creating a subdirectory in the same directory as 2218 ** the database and with the same name but with a ".lock" extension added. 2219 ** The existence of a lock directory implies an EXCLUSIVE lock. All other 2220 ** lock types (SHARED, RESERVED, PENDING) are mapped into EXCLUSIVE. 2221 */ 2222 2223 /* 2224 ** The file suffix added to the data base filename in order to create the 2225 ** lock directory. 2226 */ 2227 #define DOTLOCK_SUFFIX ".lock" 2228 2229 /* 2230 ** This routine checks if there is a RESERVED lock held on the specified 2231 ** file by this or any other process. If such a lock is held, set *pResOut 2232 ** to a non-zero value otherwise *pResOut is set to zero. The return value 2233 ** is set to SQLITE_OK unless an I/O error occurs during lock checking. 2234 ** 2235 ** In dotfile locking, either a lock exists or it does not. So in this 2236 ** variation of CheckReservedLock(), *pResOut is set to true if any lock 2237 ** is held on the file and false if the file is unlocked. 2238 */ 2239 static int dotlockCheckReservedLock(sqlite3_file *id, int *pResOut) { 2240 int rc = SQLITE_OK; 2241 int reserved = 0; 2242 unixFile *pFile = (unixFile*)id; 2243 2244 SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); 2245 2246 assert( pFile ); 2247 reserved = osAccess((const char*)pFile->lockingContext, 0)==0; 2248 OSTRACE(("TEST WR-LOCK %d %d %d (dotlock)\n", pFile->h, rc, reserved)); 2249 *pResOut = reserved; 2250 return rc; 2251 } 2252 2253 /* 2254 ** Lock the file with the lock specified by parameter eFileLock - one 2255 ** of the following: 2256 ** 2257 ** (1) SHARED_LOCK 2258 ** (2) RESERVED_LOCK 2259 ** (3) PENDING_LOCK 2260 ** (4) EXCLUSIVE_LOCK 2261 ** 2262 ** Sometimes when requesting one lock state, additional lock states 2263 ** are inserted in between. The locking might fail on one of the later 2264 ** transitions leaving the lock state different from what it started but 2265 ** still short of its goal. The following chart shows the allowed 2266 ** transitions and the inserted intermediate states: 2267 ** 2268 ** UNLOCKED -> SHARED 2269 ** SHARED -> RESERVED 2270 ** SHARED -> (PENDING) -> EXCLUSIVE 2271 ** RESERVED -> (PENDING) -> EXCLUSIVE 2272 ** PENDING -> EXCLUSIVE 2273 ** 2274 ** This routine will only increase a lock. Use the sqlite3OsUnlock() 2275 ** routine to lower a locking level. 2276 ** 2277 ** With dotfile locking, we really only support state (4): EXCLUSIVE. 2278 ** But we track the other locking levels internally. 2279 */ 2280 static int dotlockLock(sqlite3_file *id, int eFileLock) { 2281 unixFile *pFile = (unixFile*)id; 2282 char *zLockFile = (char *)pFile->lockingContext; 2283 int rc = SQLITE_OK; 2284 2285 2286 /* If we have any lock, then the lock file already exists. All we have 2287 ** to do is adjust our internal record of the lock level. 2288 */ 2289 if( pFile->eFileLock > NO_LOCK ){ 2290 pFile->eFileLock = eFileLock; 2291 /* Always update the timestamp on the old file */ 2292 #ifdef HAVE_UTIME 2293 utime(zLockFile, NULL); 2294 #else 2295 utimes(zLockFile, NULL); 2296 #endif 2297 return SQLITE_OK; 2298 } 2299 2300 /* grab an exclusive lock */ 2301 rc = osMkdir(zLockFile, 0777); 2302 if( rc<0 ){ 2303 /* failed to open/create the lock directory */ 2304 int tErrno = errno; 2305 if( EEXIST == tErrno ){ 2306 rc = SQLITE_BUSY; 2307 } else { 2308 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); 2309 if( rc!=SQLITE_BUSY ){ 2310 storeLastErrno(pFile, tErrno); 2311 } 2312 } 2313 return rc; 2314 } 2315 2316 /* got it, set the type and return ok */ 2317 pFile->eFileLock = eFileLock; 2318 return rc; 2319 } 2320 2321 /* 2322 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock 2323 ** must be either NO_LOCK or SHARED_LOCK. 2324 ** 2325 ** If the locking level of the file descriptor is already at or below 2326 ** the requested locking level, this routine is a no-op. 2327 ** 2328 ** When the locking level reaches NO_LOCK, delete the lock file. 2329 */ 2330 static int dotlockUnlock(sqlite3_file *id, int eFileLock) { 2331 unixFile *pFile = (unixFile*)id; 2332 char *zLockFile = (char *)pFile->lockingContext; 2333 int rc; 2334 2335 assert( pFile ); 2336 OSTRACE(("UNLOCK %d %d was %d pid=%d (dotlock)\n", pFile->h, eFileLock, 2337 pFile->eFileLock, osGetpid(0))); 2338 assert( eFileLock<=SHARED_LOCK ); 2339 2340 /* no-op if possible */ 2341 if( pFile->eFileLock==eFileLock ){ 2342 return SQLITE_OK; 2343 } 2344 2345 /* To downgrade to shared, simply update our internal notion of the 2346 ** lock state. No need to mess with the file on disk. 2347 */ 2348 if( eFileLock==SHARED_LOCK ){ 2349 pFile->eFileLock = SHARED_LOCK; 2350 return SQLITE_OK; 2351 } 2352 2353 /* To fully unlock the database, delete the lock file */ 2354 assert( eFileLock==NO_LOCK ); 2355 rc = osRmdir(zLockFile); 2356 if( rc<0 ){ 2357 int tErrno = errno; 2358 if( tErrno==ENOENT ){ 2359 rc = SQLITE_OK; 2360 }else{ 2361 rc = SQLITE_IOERR_UNLOCK; 2362 storeLastErrno(pFile, tErrno); 2363 } 2364 return rc; 2365 } 2366 pFile->eFileLock = NO_LOCK; 2367 return SQLITE_OK; 2368 } 2369 2370 /* 2371 ** Close a file. Make sure the lock has been released before closing. 2372 */ 2373 static int dotlockClose(sqlite3_file *id) { 2374 unixFile *pFile = (unixFile*)id; 2375 assert( id!=0 ); 2376 dotlockUnlock(id, NO_LOCK); 2377 sqlite3_free(pFile->lockingContext); 2378 return closeUnixFile(id); 2379 } 2380 /****************** End of the dot-file lock implementation ******************* 2381 ******************************************************************************/ 2382 2383 /****************************************************************************** 2384 ************************** Begin flock Locking ******************************** 2385 ** 2386 ** Use the flock() system call to do file locking. 2387 ** 2388 ** flock() locking is like dot-file locking in that the various 2389 ** fine-grain locking levels supported by SQLite are collapsed into 2390 ** a single exclusive lock. In other words, SHARED, RESERVED, and 2391 ** PENDING locks are the same thing as an EXCLUSIVE lock. SQLite 2392 ** still works when you do this, but concurrency is reduced since 2393 ** only a single process can be reading the database at a time. 2394 ** 2395 ** Omit this section if SQLITE_ENABLE_LOCKING_STYLE is turned off 2396 */ 2397 #if SQLITE_ENABLE_LOCKING_STYLE 2398 2399 /* 2400 ** Retry flock() calls that fail with EINTR 2401 */ 2402 #ifdef EINTR 2403 static int robust_flock(int fd, int op){ 2404 int rc; 2405 do{ rc = flock(fd,op); }while( rc<0 && errno==EINTR ); 2406 return rc; 2407 } 2408 #else 2409 # define robust_flock(a,b) flock(a,b) 2410 #endif 2411 2412 2413 /* 2414 ** This routine checks if there is a RESERVED lock held on the specified 2415 ** file by this or any other process. If such a lock is held, set *pResOut 2416 ** to a non-zero value otherwise *pResOut is set to zero. The return value 2417 ** is set to SQLITE_OK unless an I/O error occurs during lock checking. 2418 */ 2419 static int flockCheckReservedLock(sqlite3_file *id, int *pResOut){ 2420 int rc = SQLITE_OK; 2421 int reserved = 0; 2422 unixFile *pFile = (unixFile*)id; 2423 2424 SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); 2425 2426 assert( pFile ); 2427 2428 /* Check if a thread in this process holds such a lock */ 2429 if( pFile->eFileLock>SHARED_LOCK ){ 2430 reserved = 1; 2431 } 2432 2433 /* Otherwise see if some other process holds it. */ 2434 if( !reserved ){ 2435 /* attempt to get the lock */ 2436 int lrc = robust_flock(pFile->h, LOCK_EX | LOCK_NB); 2437 if( !lrc ){ 2438 /* got the lock, unlock it */ 2439 lrc = robust_flock(pFile->h, LOCK_UN); 2440 if ( lrc ) { 2441 int tErrno = errno; 2442 /* unlock failed with an error */ 2443 lrc = SQLITE_IOERR_UNLOCK; 2444 storeLastErrno(pFile, tErrno); 2445 rc = lrc; 2446 } 2447 } else { 2448 int tErrno = errno; 2449 reserved = 1; 2450 /* someone else might have it reserved */ 2451 lrc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); 2452 if( IS_LOCK_ERROR(lrc) ){ 2453 storeLastErrno(pFile, tErrno); 2454 rc = lrc; 2455 } 2456 } 2457 } 2458 OSTRACE(("TEST WR-LOCK %d %d %d (flock)\n", pFile->h, rc, reserved)); 2459 2460 #ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS 2461 if( (rc & 0xff) == SQLITE_IOERR ){ 2462 rc = SQLITE_OK; 2463 reserved=1; 2464 } 2465 #endif /* SQLITE_IGNORE_FLOCK_LOCK_ERRORS */ 2466 *pResOut = reserved; 2467 return rc; 2468 } 2469 2470 /* 2471 ** Lock the file with the lock specified by parameter eFileLock - one 2472 ** of the following: 2473 ** 2474 ** (1) SHARED_LOCK 2475 ** (2) RESERVED_LOCK 2476 ** (3) PENDING_LOCK 2477 ** (4) EXCLUSIVE_LOCK 2478 ** 2479 ** Sometimes when requesting one lock state, additional lock states 2480 ** are inserted in between. The locking might fail on one of the later 2481 ** transitions leaving the lock state different from what it started but 2482 ** still short of its goal. The following chart shows the allowed 2483 ** transitions and the inserted intermediate states: 2484 ** 2485 ** UNLOCKED -> SHARED 2486 ** SHARED -> RESERVED 2487 ** SHARED -> (PENDING) -> EXCLUSIVE 2488 ** RESERVED -> (PENDING) -> EXCLUSIVE 2489 ** PENDING -> EXCLUSIVE 2490 ** 2491 ** flock() only really support EXCLUSIVE locks. We track intermediate 2492 ** lock states in the sqlite3_file structure, but all locks SHARED or 2493 ** above are really EXCLUSIVE locks and exclude all other processes from 2494 ** access the file. 2495 ** 2496 ** This routine will only increase a lock. Use the sqlite3OsUnlock() 2497 ** routine to lower a locking level. 2498 */ 2499 static int flockLock(sqlite3_file *id, int eFileLock) { 2500 int rc = SQLITE_OK; 2501 unixFile *pFile = (unixFile*)id; 2502 2503 assert( pFile ); 2504 2505 /* if we already have a lock, it is exclusive. 2506 ** Just adjust level and punt on outta here. */ 2507 if (pFile->eFileLock > NO_LOCK) { 2508 pFile->eFileLock = eFileLock; 2509 return SQLITE_OK; 2510 } 2511 2512 /* grab an exclusive lock */ 2513 2514 if (robust_flock(pFile->h, LOCK_EX | LOCK_NB)) { 2515 int tErrno = errno; 2516 /* didn't get, must be busy */ 2517 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); 2518 if( IS_LOCK_ERROR(rc) ){ 2519 storeLastErrno(pFile, tErrno); 2520 } 2521 } else { 2522 /* got it, set the type and return ok */ 2523 pFile->eFileLock = eFileLock; 2524 } 2525 OSTRACE(("LOCK %d %s %s (flock)\n", pFile->h, azFileLock(eFileLock), 2526 rc==SQLITE_OK ? "ok" : "failed")); 2527 #ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS 2528 if( (rc & 0xff) == SQLITE_IOERR ){ 2529 rc = SQLITE_BUSY; 2530 } 2531 #endif /* SQLITE_IGNORE_FLOCK_LOCK_ERRORS */ 2532 return rc; 2533 } 2534 2535 2536 /* 2537 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock 2538 ** must be either NO_LOCK or SHARED_LOCK. 2539 ** 2540 ** If the locking level of the file descriptor is already at or below 2541 ** the requested locking level, this routine is a no-op. 2542 */ 2543 static int flockUnlock(sqlite3_file *id, int eFileLock) { 2544 unixFile *pFile = (unixFile*)id; 2545 2546 assert( pFile ); 2547 OSTRACE(("UNLOCK %d %d was %d pid=%d (flock)\n", pFile->h, eFileLock, 2548 pFile->eFileLock, osGetpid(0))); 2549 assert( eFileLock<=SHARED_LOCK ); 2550 2551 /* no-op if possible */ 2552 if( pFile->eFileLock==eFileLock ){ 2553 return SQLITE_OK; 2554 } 2555 2556 /* shared can just be set because we always have an exclusive */ 2557 if (eFileLock==SHARED_LOCK) { 2558 pFile->eFileLock = eFileLock; 2559 return SQLITE_OK; 2560 } 2561 2562 /* no, really, unlock. */ 2563 if( robust_flock(pFile->h, LOCK_UN) ){ 2564 #ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS 2565 return SQLITE_OK; 2566 #endif /* SQLITE_IGNORE_FLOCK_LOCK_ERRORS */ 2567 return SQLITE_IOERR_UNLOCK; 2568 }else{ 2569 pFile->eFileLock = NO_LOCK; 2570 return SQLITE_OK; 2571 } 2572 } 2573 2574 /* 2575 ** Close a file. 2576 */ 2577 static int flockClose(sqlite3_file *id) { 2578 assert( id!=0 ); 2579 flockUnlock(id, NO_LOCK); 2580 return closeUnixFile(id); 2581 } 2582 2583 #endif /* SQLITE_ENABLE_LOCKING_STYLE && !OS_VXWORK */ 2584 2585 /******************* End of the flock lock implementation ********************* 2586 ******************************************************************************/ 2587 2588 /****************************************************************************** 2589 ************************ Begin Named Semaphore Locking ************************ 2590 ** 2591 ** Named semaphore locking is only supported on VxWorks. 2592 ** 2593 ** Semaphore locking is like dot-lock and flock in that it really only 2594 ** supports EXCLUSIVE locking. Only a single process can read or write 2595 ** the database file at a time. This reduces potential concurrency, but 2596 ** makes the lock implementation much easier. 2597 */ 2598 #if OS_VXWORKS 2599 2600 /* 2601 ** This routine checks if there is a RESERVED lock held on the specified 2602 ** file by this or any other process. If such a lock is held, set *pResOut 2603 ** to a non-zero value otherwise *pResOut is set to zero. The return value 2604 ** is set to SQLITE_OK unless an I/O error occurs during lock checking. 2605 */ 2606 static int semXCheckReservedLock(sqlite3_file *id, int *pResOut) { 2607 int rc = SQLITE_OK; 2608 int reserved = 0; 2609 unixFile *pFile = (unixFile*)id; 2610 2611 SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); 2612 2613 assert( pFile ); 2614 2615 /* Check if a thread in this process holds such a lock */ 2616 if( pFile->eFileLock>SHARED_LOCK ){ 2617 reserved = 1; 2618 } 2619 2620 /* Otherwise see if some other process holds it. */ 2621 if( !reserved ){ 2622 sem_t *pSem = pFile->pInode->pSem; 2623 2624 if( sem_trywait(pSem)==-1 ){ 2625 int tErrno = errno; 2626 if( EAGAIN != tErrno ){ 2627 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_CHECKRESERVEDLOCK); 2628 storeLastErrno(pFile, tErrno); 2629 } else { 2630 /* someone else has the lock when we are in NO_LOCK */ 2631 reserved = (pFile->eFileLock < SHARED_LOCK); 2632 } 2633 }else{ 2634 /* we could have it if we want it */ 2635 sem_post(pSem); 2636 } 2637 } 2638 OSTRACE(("TEST WR-LOCK %d %d %d (sem)\n", pFile->h, rc, reserved)); 2639 2640 *pResOut = reserved; 2641 return rc; 2642 } 2643 2644 /* 2645 ** Lock the file with the lock specified by parameter eFileLock - one 2646 ** of the following: 2647 ** 2648 ** (1) SHARED_LOCK 2649 ** (2) RESERVED_LOCK 2650 ** (3) PENDING_LOCK 2651 ** (4) EXCLUSIVE_LOCK 2652 ** 2653 ** Sometimes when requesting one lock state, additional lock states 2654 ** are inserted in between. The locking might fail on one of the later 2655 ** transitions leaving the lock state different from what it started but 2656 ** still short of its goal. The following chart shows the allowed 2657 ** transitions and the inserted intermediate states: 2658 ** 2659 ** UNLOCKED -> SHARED 2660 ** SHARED -> RESERVED 2661 ** SHARED -> (PENDING) -> EXCLUSIVE 2662 ** RESERVED -> (PENDING) -> EXCLUSIVE 2663 ** PENDING -> EXCLUSIVE 2664 ** 2665 ** Semaphore locks only really support EXCLUSIVE locks. We track intermediate 2666 ** lock states in the sqlite3_file structure, but all locks SHARED or 2667 ** above are really EXCLUSIVE locks and exclude all other processes from 2668 ** access the file. 2669 ** 2670 ** This routine will only increase a lock. Use the sqlite3OsUnlock() 2671 ** routine to lower a locking level. 2672 */ 2673 static int semXLock(sqlite3_file *id, int eFileLock) { 2674 unixFile *pFile = (unixFile*)id; 2675 sem_t *pSem = pFile->pInode->pSem; 2676 int rc = SQLITE_OK; 2677 2678 /* if we already have a lock, it is exclusive. 2679 ** Just adjust level and punt on outta here. */ 2680 if (pFile->eFileLock > NO_LOCK) { 2681 pFile->eFileLock = eFileLock; 2682 rc = SQLITE_OK; 2683 goto sem_end_lock; 2684 } 2685 2686 /* lock semaphore now but bail out when already locked. */ 2687 if( sem_trywait(pSem)==-1 ){ 2688 rc = SQLITE_BUSY; 2689 goto sem_end_lock; 2690 } 2691 2692 /* got it, set the type and return ok */ 2693 pFile->eFileLock = eFileLock; 2694 2695 sem_end_lock: 2696 return rc; 2697 } 2698 2699 /* 2700 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock 2701 ** must be either NO_LOCK or SHARED_LOCK. 2702 ** 2703 ** If the locking level of the file descriptor is already at or below 2704 ** the requested locking level, this routine is a no-op. 2705 */ 2706 static int semXUnlock(sqlite3_file *id, int eFileLock) { 2707 unixFile *pFile = (unixFile*)id; 2708 sem_t *pSem = pFile->pInode->pSem; 2709 2710 assert( pFile ); 2711 assert( pSem ); 2712 OSTRACE(("UNLOCK %d %d was %d pid=%d (sem)\n", pFile->h, eFileLock, 2713 pFile->eFileLock, osGetpid(0))); 2714 assert( eFileLock<=SHARED_LOCK ); 2715 2716 /* no-op if possible */ 2717 if( pFile->eFileLock==eFileLock ){ 2718 return SQLITE_OK; 2719 } 2720 2721 /* shared can just be set because we always have an exclusive */ 2722 if (eFileLock==SHARED_LOCK) { 2723 pFile->eFileLock = eFileLock; 2724 return SQLITE_OK; 2725 } 2726 2727 /* no, really unlock. */ 2728 if ( sem_post(pSem)==-1 ) { 2729 int rc, tErrno = errno; 2730 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK); 2731 if( IS_LOCK_ERROR(rc) ){ 2732 storeLastErrno(pFile, tErrno); 2733 } 2734 return rc; 2735 } 2736 pFile->eFileLock = NO_LOCK; 2737 return SQLITE_OK; 2738 } 2739 2740 /* 2741 ** Close a file. 2742 */ 2743 static int semXClose(sqlite3_file *id) { 2744 if( id ){ 2745 unixFile *pFile = (unixFile*)id; 2746 semXUnlock(id, NO_LOCK); 2747 assert( pFile ); 2748 assert( unixFileMutexNotheld(pFile) ); 2749 unixEnterMutex(); 2750 releaseInodeInfo(pFile); 2751 unixLeaveMutex(); 2752 closeUnixFile(id); 2753 } 2754 return SQLITE_OK; 2755 } 2756 2757 #endif /* OS_VXWORKS */ 2758 /* 2759 ** Named semaphore locking is only available on VxWorks. 2760 ** 2761 *************** End of the named semaphore lock implementation **************** 2762 ******************************************************************************/ 2763 2764 2765 /****************************************************************************** 2766 *************************** Begin AFP Locking ********************************* 2767 ** 2768 ** AFP is the Apple Filing Protocol. AFP is a network filesystem found 2769 ** on Apple Macintosh computers - both OS9 and OSX. 2770 ** 2771 ** Third-party implementations of AFP are available. But this code here 2772 ** only works on OSX. 2773 */ 2774 2775 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 2776 /* 2777 ** The afpLockingContext structure contains all afp lock specific state 2778 */ 2779 typedef struct afpLockingContext afpLockingContext; 2780 struct afpLockingContext { 2781 int reserved; 2782 const char *dbPath; /* Name of the open file */ 2783 }; 2784 2785 struct ByteRangeLockPB2 2786 { 2787 unsigned long long offset; /* offset to first byte to lock */ 2788 unsigned long long length; /* nbr of bytes to lock */ 2789 unsigned long long retRangeStart; /* nbr of 1st byte locked if successful */ 2790 unsigned char unLockFlag; /* 1 = unlock, 0 = lock */ 2791 unsigned char startEndFlag; /* 1=rel to end of fork, 0=rel to start */ 2792 int fd; /* file desc to assoc this lock with */ 2793 }; 2794 2795 #define afpfsByteRangeLock2FSCTL _IOWR('z', 23, struct ByteRangeLockPB2) 2796 2797 /* 2798 ** This is a utility for setting or clearing a bit-range lock on an 2799 ** AFP filesystem. 2800 ** 2801 ** Return SQLITE_OK on success, SQLITE_BUSY on failure. 2802 */ 2803 static int afpSetLock( 2804 const char *path, /* Name of the file to be locked or unlocked */ 2805 unixFile *pFile, /* Open file descriptor on path */ 2806 unsigned long long offset, /* First byte to be locked */ 2807 unsigned long long length, /* Number of bytes to lock */ 2808 int setLockFlag /* True to set lock. False to clear lock */ 2809 ){ 2810 struct ByteRangeLockPB2 pb; 2811 int err; 2812 2813 pb.unLockFlag = setLockFlag ? 0 : 1; 2814 pb.startEndFlag = 0; 2815 pb.offset = offset; 2816 pb.length = length; 2817 pb.fd = pFile->h; 2818 2819 OSTRACE(("AFPSETLOCK [%s] for %d%s in range %llx:%llx\n", 2820 (setLockFlag?"ON":"OFF"), pFile->h, (pb.fd==-1?"[testval-1]":""), 2821 offset, length)); 2822 err = fsctl(path, afpfsByteRangeLock2FSCTL, &pb, 0); 2823 if ( err==-1 ) { 2824 int rc; 2825 int tErrno = errno; 2826 OSTRACE(("AFPSETLOCK failed to fsctl() '%s' %d %s\n", 2827 path, tErrno, strerror(tErrno))); 2828 #ifdef SQLITE_IGNORE_AFP_LOCK_ERRORS 2829 rc = SQLITE_BUSY; 2830 #else 2831 rc = sqliteErrorFromPosixError(tErrno, 2832 setLockFlag ? SQLITE_IOERR_LOCK : SQLITE_IOERR_UNLOCK); 2833 #endif /* SQLITE_IGNORE_AFP_LOCK_ERRORS */ 2834 if( IS_LOCK_ERROR(rc) ){ 2835 storeLastErrno(pFile, tErrno); 2836 } 2837 return rc; 2838 } else { 2839 return SQLITE_OK; 2840 } 2841 } 2842 2843 /* 2844 ** This routine checks if there is a RESERVED lock held on the specified 2845 ** file by this or any other process. If such a lock is held, set *pResOut 2846 ** to a non-zero value otherwise *pResOut is set to zero. The return value 2847 ** is set to SQLITE_OK unless an I/O error occurs during lock checking. 2848 */ 2849 static int afpCheckReservedLock(sqlite3_file *id, int *pResOut){ 2850 int rc = SQLITE_OK; 2851 int reserved = 0; 2852 unixFile *pFile = (unixFile*)id; 2853 afpLockingContext *context; 2854 2855 SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); 2856 2857 assert( pFile ); 2858 context = (afpLockingContext *) pFile->lockingContext; 2859 if( context->reserved ){ 2860 *pResOut = 1; 2861 return SQLITE_OK; 2862 } 2863 sqlite3_mutex_enter(pFile->pInode->pLockMutex); 2864 /* Check if a thread in this process holds such a lock */ 2865 if( pFile->pInode->eFileLock>SHARED_LOCK ){ 2866 reserved = 1; 2867 } 2868 2869 /* Otherwise see if some other process holds it. 2870 */ 2871 if( !reserved ){ 2872 /* lock the RESERVED byte */ 2873 int lrc = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1,1); 2874 if( SQLITE_OK==lrc ){ 2875 /* if we succeeded in taking the reserved lock, unlock it to restore 2876 ** the original state */ 2877 lrc = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1, 0); 2878 } else { 2879 /* if we failed to get the lock then someone else must have it */ 2880 reserved = 1; 2881 } 2882 if( IS_LOCK_ERROR(lrc) ){ 2883 rc=lrc; 2884 } 2885 } 2886 2887 sqlite3_mutex_leave(pFile->pInode->pLockMutex); 2888 OSTRACE(("TEST WR-LOCK %d %d %d (afp)\n", pFile->h, rc, reserved)); 2889 2890 *pResOut = reserved; 2891 return rc; 2892 } 2893 2894 /* 2895 ** Lock the file with the lock specified by parameter eFileLock - one 2896 ** of the following: 2897 ** 2898 ** (1) SHARED_LOCK 2899 ** (2) RESERVED_LOCK 2900 ** (3) PENDING_LOCK 2901 ** (4) EXCLUSIVE_LOCK 2902 ** 2903 ** Sometimes when requesting one lock state, additional lock states 2904 ** are inserted in between. The locking might fail on one of the later 2905 ** transitions leaving the lock state different from what it started but 2906 ** still short of its goal. The following chart shows the allowed 2907 ** transitions and the inserted intermediate states: 2908 ** 2909 ** UNLOCKED -> SHARED 2910 ** SHARED -> RESERVED 2911 ** SHARED -> (PENDING) -> EXCLUSIVE 2912 ** RESERVED -> (PENDING) -> EXCLUSIVE 2913 ** PENDING -> EXCLUSIVE 2914 ** 2915 ** This routine will only increase a lock. Use the sqlite3OsUnlock() 2916 ** routine to lower a locking level. 2917 */ 2918 static int afpLock(sqlite3_file *id, int eFileLock){ 2919 int rc = SQLITE_OK; 2920 unixFile *pFile = (unixFile*)id; 2921 unixInodeInfo *pInode = pFile->pInode; 2922 afpLockingContext *context = (afpLockingContext *) pFile->lockingContext; 2923 2924 assert( pFile ); 2925 OSTRACE(("LOCK %d %s was %s(%s,%d) pid=%d (afp)\n", pFile->h, 2926 azFileLock(eFileLock), azFileLock(pFile->eFileLock), 2927 azFileLock(pInode->eFileLock), pInode->nShared , osGetpid(0))); 2928 2929 /* If there is already a lock of this type or more restrictive on the 2930 ** unixFile, do nothing. Don't use the afp_end_lock: exit path, as 2931 ** unixEnterMutex() hasn't been called yet. 2932 */ 2933 if( pFile->eFileLock>=eFileLock ){ 2934 OSTRACE(("LOCK %d %s ok (already held) (afp)\n", pFile->h, 2935 azFileLock(eFileLock))); 2936 return SQLITE_OK; 2937 } 2938 2939 /* Make sure the locking sequence is correct 2940 ** (1) We never move from unlocked to anything higher than shared lock. 2941 ** (2) SQLite never explicitly requests a pendig lock. 2942 ** (3) A shared lock is always held when a reserve lock is requested. 2943 */ 2944 assert( pFile->eFileLock!=NO_LOCK || eFileLock==SHARED_LOCK ); 2945 assert( eFileLock!=PENDING_LOCK ); 2946 assert( eFileLock!=RESERVED_LOCK || pFile->eFileLock==SHARED_LOCK ); 2947 2948 /* This mutex is needed because pFile->pInode is shared across threads 2949 */ 2950 pInode = pFile->pInode; 2951 sqlite3_mutex_enter(pInode->pLockMutex); 2952 2953 /* If some thread using this PID has a lock via a different unixFile* 2954 ** handle that precludes the requested lock, return BUSY. 2955 */ 2956 if( (pFile->eFileLock!=pInode->eFileLock && 2957 (pInode->eFileLock>=PENDING_LOCK || eFileLock>SHARED_LOCK)) 2958 ){ 2959 rc = SQLITE_BUSY; 2960 goto afp_end_lock; 2961 } 2962 2963 /* If a SHARED lock is requested, and some thread using this PID already 2964 ** has a SHARED or RESERVED lock, then increment reference counts and 2965 ** return SQLITE_OK. 2966 */ 2967 if( eFileLock==SHARED_LOCK && 2968 (pInode->eFileLock==SHARED_LOCK || pInode->eFileLock==RESERVED_LOCK) ){ 2969 assert( eFileLock==SHARED_LOCK ); 2970 assert( pFile->eFileLock==0 ); 2971 assert( pInode->nShared>0 ); 2972 pFile->eFileLock = SHARED_LOCK; 2973 pInode->nShared++; 2974 pInode->nLock++; 2975 goto afp_end_lock; 2976 } 2977 2978 /* A PENDING lock is needed before acquiring a SHARED lock and before 2979 ** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will 2980 ** be released. 2981 */ 2982 if( eFileLock==SHARED_LOCK 2983 || (eFileLock==EXCLUSIVE_LOCK && pFile->eFileLock<PENDING_LOCK) 2984 ){ 2985 int failed; 2986 failed = afpSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 1); 2987 if (failed) { 2988 rc = failed; 2989 goto afp_end_lock; 2990 } 2991 } 2992 2993 /* If control gets to this point, then actually go ahead and make 2994 ** operating system calls for the specified lock. 2995 */ 2996 if( eFileLock==SHARED_LOCK ){ 2997 int lrc1, lrc2, lrc1Errno = 0; 2998 long lk, mask; 2999 3000 assert( pInode->nShared==0 ); 3001 assert( pInode->eFileLock==0 ); 3002 3003 mask = (sizeof(long)==8) ? LARGEST_INT64 : 0x7fffffff; 3004 /* Now get the read-lock SHARED_LOCK */ 3005 /* note that the quality of the randomness doesn't matter that much */ 3006 lk = random(); 3007 pInode->sharedByte = (lk & mask)%(SHARED_SIZE - 1); 3008 lrc1 = afpSetLock(context->dbPath, pFile, 3009 SHARED_FIRST+pInode->sharedByte, 1, 1); 3010 if( IS_LOCK_ERROR(lrc1) ){ 3011 lrc1Errno = pFile->lastErrno; 3012 } 3013 /* Drop the temporary PENDING lock */ 3014 lrc2 = afpSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 0); 3015 3016 if( IS_LOCK_ERROR(lrc1) ) { 3017 storeLastErrno(pFile, lrc1Errno); 3018 rc = lrc1; 3019 goto afp_end_lock; 3020 } else if( IS_LOCK_ERROR(lrc2) ){ 3021 rc = lrc2; 3022 goto afp_end_lock; 3023 } else if( lrc1 != SQLITE_OK ) { 3024 rc = lrc1; 3025 } else { 3026 pFile->eFileLock = SHARED_LOCK; 3027 pInode->nLock++; 3028 pInode->nShared = 1; 3029 } 3030 }else if( eFileLock==EXCLUSIVE_LOCK && pInode->nShared>1 ){ 3031 /* We are trying for an exclusive lock but another thread in this 3032 ** same process is still holding a shared lock. */ 3033 rc = SQLITE_BUSY; 3034 }else{ 3035 /* The request was for a RESERVED or EXCLUSIVE lock. It is 3036 ** assumed that there is a SHARED or greater lock on the file 3037 ** already. 3038 */ 3039 int failed = 0; 3040 assert( 0!=pFile->eFileLock ); 3041 if (eFileLock >= RESERVED_LOCK && pFile->eFileLock < RESERVED_LOCK) { 3042 /* Acquire a RESERVED lock */ 3043 failed = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1,1); 3044 if( !failed ){ 3045 context->reserved = 1; 3046 } 3047 } 3048 if (!failed && eFileLock == EXCLUSIVE_LOCK) { 3049 /* Acquire an EXCLUSIVE lock */ 3050 3051 /* Remove the shared lock before trying the range. we'll need to 3052 ** reestablish the shared lock if we can't get the afpUnlock 3053 */ 3054 if( !(failed = afpSetLock(context->dbPath, pFile, SHARED_FIRST + 3055 pInode->sharedByte, 1, 0)) ){ 3056 int failed2 = SQLITE_OK; 3057 /* now attemmpt to get the exclusive lock range */ 3058 failed = afpSetLock(context->dbPath, pFile, SHARED_FIRST, 3059 SHARED_SIZE, 1); 3060 if( failed && (failed2 = afpSetLock(context->dbPath, pFile, 3061 SHARED_FIRST + pInode->sharedByte, 1, 1)) ){ 3062 /* Can't reestablish the shared lock. Sqlite can't deal, this is 3063 ** a critical I/O error 3064 */ 3065 rc = ((failed & 0xff) == SQLITE_IOERR) ? failed2 : 3066 SQLITE_IOERR_LOCK; 3067 goto afp_end_lock; 3068 } 3069 }else{ 3070 rc = failed; 3071 } 3072 } 3073 if( failed ){ 3074 rc = failed; 3075 } 3076 } 3077 3078 if( rc==SQLITE_OK ){ 3079 pFile->eFileLock = eFileLock; 3080 pInode->eFileLock = eFileLock; 3081 }else if( eFileLock==EXCLUSIVE_LOCK ){ 3082 pFile->eFileLock = PENDING_LOCK; 3083 pInode->eFileLock = PENDING_LOCK; 3084 } 3085 3086 afp_end_lock: 3087 sqlite3_mutex_leave(pInode->pLockMutex); 3088 OSTRACE(("LOCK %d %s %s (afp)\n", pFile->h, azFileLock(eFileLock), 3089 rc==SQLITE_OK ? "ok" : "failed")); 3090 return rc; 3091 } 3092 3093 /* 3094 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock 3095 ** must be either NO_LOCK or SHARED_LOCK. 3096 ** 3097 ** If the locking level of the file descriptor is already at or below 3098 ** the requested locking level, this routine is a no-op. 3099 */ 3100 static int afpUnlock(sqlite3_file *id, int eFileLock) { 3101 int rc = SQLITE_OK; 3102 unixFile *pFile = (unixFile*)id; 3103 unixInodeInfo *pInode; 3104 afpLockingContext *context = (afpLockingContext *) pFile->lockingContext; 3105 int skipShared = 0; 3106 #ifdef SQLITE_TEST 3107 int h = pFile->h; 3108 #endif 3109 3110 assert( pFile ); 3111 OSTRACE(("UNLOCK %d %d was %d(%d,%d) pid=%d (afp)\n", pFile->h, eFileLock, 3112 pFile->eFileLock, pFile->pInode->eFileLock, pFile->pInode->nShared, 3113 osGetpid(0))); 3114 3115 assert( eFileLock<=SHARED_LOCK ); 3116 if( pFile->eFileLock<=eFileLock ){ 3117 return SQLITE_OK; 3118 } 3119 pInode = pFile->pInode; 3120 sqlite3_mutex_enter(pInode->pLockMutex); 3121 assert( pInode->nShared!=0 ); 3122 if( pFile->eFileLock>SHARED_LOCK ){ 3123 assert( pInode->eFileLock==pFile->eFileLock ); 3124 SimulateIOErrorBenign(1); 3125 SimulateIOError( h=(-1) ) 3126 SimulateIOErrorBenign(0); 3127 3128 #ifdef SQLITE_DEBUG 3129 /* When reducing a lock such that other processes can start 3130 ** reading the database file again, make sure that the 3131 ** transaction counter was updated if any part of the database 3132 ** file changed. If the transaction counter is not updated, 3133 ** other connections to the same file might not realize that 3134 ** the file has changed and hence might not know to flush their 3135 ** cache. The use of a stale cache can lead to database corruption. 3136 */ 3137 assert( pFile->inNormalWrite==0 3138 || pFile->dbUpdate==0 3139 || pFile->transCntrChng==1 ); 3140 pFile->inNormalWrite = 0; 3141 #endif 3142 3143 if( pFile->eFileLock==EXCLUSIVE_LOCK ){ 3144 rc = afpSetLock(context->dbPath, pFile, SHARED_FIRST, SHARED_SIZE, 0); 3145 if( rc==SQLITE_OK && (eFileLock==SHARED_LOCK || pInode->nShared>1) ){ 3146 /* only re-establish the shared lock if necessary */ 3147 int sharedLockByte = SHARED_FIRST+pInode->sharedByte; 3148 rc = afpSetLock(context->dbPath, pFile, sharedLockByte, 1, 1); 3149 } else { 3150 skipShared = 1; 3151 } 3152 } 3153 if( rc==SQLITE_OK && pFile->eFileLock>=PENDING_LOCK ){ 3154 rc = afpSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 0); 3155 } 3156 if( rc==SQLITE_OK && pFile->eFileLock>=RESERVED_LOCK && context->reserved ){ 3157 rc = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1, 0); 3158 if( !rc ){ 3159 context->reserved = 0; 3160 } 3161 } 3162 if( rc==SQLITE_OK && (eFileLock==SHARED_LOCK || pInode->nShared>1)){ 3163 pInode->eFileLock = SHARED_LOCK; 3164 } 3165 } 3166 if( rc==SQLITE_OK && eFileLock==NO_LOCK ){ 3167 3168 /* Decrement the shared lock counter. Release the lock using an 3169 ** OS call only when all threads in this same process have released 3170 ** the lock. 3171 */ 3172 unsigned long long sharedLockByte = SHARED_FIRST+pInode->sharedByte; 3173 pInode->nShared--; 3174 if( pInode->nShared==0 ){ 3175 SimulateIOErrorBenign(1); 3176 SimulateIOError( h=(-1) ) 3177 SimulateIOErrorBenign(0); 3178 if( !skipShared ){ 3179 rc = afpSetLock(context->dbPath, pFile, sharedLockByte, 1, 0); 3180 } 3181 if( !rc ){ 3182 pInode->eFileLock = NO_LOCK; 3183 pFile->eFileLock = NO_LOCK; 3184 } 3185 } 3186 if( rc==SQLITE_OK ){ 3187 pInode->nLock--; 3188 assert( pInode->nLock>=0 ); 3189 if( pInode->nLock==0 ) closePendingFds(pFile); 3190 } 3191 } 3192 3193 sqlite3_mutex_leave(pInode->pLockMutex); 3194 if( rc==SQLITE_OK ){ 3195 pFile->eFileLock = eFileLock; 3196 } 3197 return rc; 3198 } 3199 3200 /* 3201 ** Close a file & cleanup AFP specific locking context 3202 */ 3203 static int afpClose(sqlite3_file *id) { 3204 int rc = SQLITE_OK; 3205 unixFile *pFile = (unixFile*)id; 3206 assert( id!=0 ); 3207 afpUnlock(id, NO_LOCK); 3208 assert( unixFileMutexNotheld(pFile) ); 3209 unixEnterMutex(); 3210 if( pFile->pInode ){ 3211 unixInodeInfo *pInode = pFile->pInode; 3212 sqlite3_mutex_enter(pInode->pLockMutex); 3213 if( pInode->nLock ){ 3214 /* If there are outstanding locks, do not actually close the file just 3215 ** yet because that would clear those locks. Instead, add the file 3216 ** descriptor to pInode->aPending. It will be automatically closed when 3217 ** the last lock is cleared. 3218 */ 3219 setPendingFd(pFile); 3220 } 3221 sqlite3_mutex_leave(pInode->pLockMutex); 3222 } 3223 releaseInodeInfo(pFile); 3224 sqlite3_free(pFile->lockingContext); 3225 rc = closeUnixFile(id); 3226 unixLeaveMutex(); 3227 return rc; 3228 } 3229 3230 #endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ 3231 /* 3232 ** The code above is the AFP lock implementation. The code is specific 3233 ** to MacOSX and does not work on other unix platforms. No alternative 3234 ** is available. If you don't compile for a mac, then the "unix-afp" 3235 ** VFS is not available. 3236 ** 3237 ********************* End of the AFP lock implementation ********************** 3238 ******************************************************************************/ 3239 3240 /****************************************************************************** 3241 *************************** Begin NFS Locking ********************************/ 3242 3243 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 3244 /* 3245 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock 3246 ** must be either NO_LOCK or SHARED_LOCK. 3247 ** 3248 ** If the locking level of the file descriptor is already at or below 3249 ** the requested locking level, this routine is a no-op. 3250 */ 3251 static int nfsUnlock(sqlite3_file *id, int eFileLock){ 3252 return posixUnlock(id, eFileLock, 1); 3253 } 3254 3255 #endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ 3256 /* 3257 ** The code above is the NFS lock implementation. The code is specific 3258 ** to MacOSX and does not work on other unix platforms. No alternative 3259 ** is available. 3260 ** 3261 ********************* End of the NFS lock implementation ********************** 3262 ******************************************************************************/ 3263 3264 /****************************************************************************** 3265 **************** Non-locking sqlite3_file methods ***************************** 3266 ** 3267 ** The next division contains implementations for all methods of the 3268 ** sqlite3_file object other than the locking methods. The locking 3269 ** methods were defined in divisions above (one locking method per 3270 ** division). Those methods that are common to all locking modes 3271 ** are gather together into this division. 3272 */ 3273 3274 /* 3275 ** Seek to the offset passed as the second argument, then read cnt 3276 ** bytes into pBuf. Return the number of bytes actually read. 3277 ** 3278 ** NB: If you define USE_PREAD or USE_PREAD64, then it might also 3279 ** be necessary to define _XOPEN_SOURCE to be 500. This varies from 3280 ** one system to another. Since SQLite does not define USE_PREAD 3281 ** in any form by default, we will not attempt to define _XOPEN_SOURCE. 3282 ** See tickets #2741 and #2681. 3283 ** 3284 ** To avoid stomping the errno value on a failed read the lastErrno value 3285 ** is set before returning. 3286 */ 3287 static int seekAndRead(unixFile *id, sqlite3_int64 offset, void *pBuf, int cnt){ 3288 int got; 3289 int prior = 0; 3290 #if (!defined(USE_PREAD) && !defined(USE_PREAD64)) 3291 i64 newOffset; 3292 #endif 3293 TIMER_START; 3294 assert( cnt==(cnt&0x1ffff) ); 3295 assert( id->h>2 ); 3296 do{ 3297 #if defined(USE_PREAD) 3298 got = osPread(id->h, pBuf, cnt, offset); 3299 SimulateIOError( got = -1 ); 3300 #elif defined(USE_PREAD64) 3301 got = osPread64(id->h, pBuf, cnt, offset); 3302 SimulateIOError( got = -1 ); 3303 #else 3304 newOffset = lseek(id->h, offset, SEEK_SET); 3305 SimulateIOError( newOffset = -1 ); 3306 if( newOffset<0 ){ 3307 storeLastErrno((unixFile*)id, errno); 3308 return -1; 3309 } 3310 got = osRead(id->h, pBuf, cnt); 3311 #endif 3312 if( got==cnt ) break; 3313 if( got<0 ){ 3314 if( errno==EINTR ){ got = 1; continue; } 3315 prior = 0; 3316 storeLastErrno((unixFile*)id, errno); 3317 break; 3318 }else if( got>0 ){ 3319 cnt -= got; 3320 offset += got; 3321 prior += got; 3322 pBuf = (void*)(got + (char*)pBuf); 3323 } 3324 }while( got>0 ); 3325 TIMER_END; 3326 OSTRACE(("READ %-3d %5d %7lld %llu\n", 3327 id->h, got+prior, offset-prior, TIMER_ELAPSED)); 3328 return got+prior; 3329 } 3330 3331 /* 3332 ** Read data from a file into a buffer. Return SQLITE_OK if all 3333 ** bytes were read successfully and SQLITE_IOERR if anything goes 3334 ** wrong. 3335 */ 3336 static int unixRead( 3337 sqlite3_file *id, 3338 void *pBuf, 3339 int amt, 3340 sqlite3_int64 offset 3341 ){ 3342 unixFile *pFile = (unixFile *)id; 3343 int got; 3344 assert( id ); 3345 assert( offset>=0 ); 3346 assert( amt>0 ); 3347 3348 /* If this is a database file (not a journal, super-journal or temp 3349 ** file), the bytes in the locking range should never be read or written. */ 3350 #if 0 3351 assert( pFile->pPreallocatedUnused==0 3352 || offset>=PENDING_BYTE+512 3353 || offset+amt<=PENDING_BYTE 3354 ); 3355 #endif 3356 3357 #if SQLITE_MAX_MMAP_SIZE>0 3358 /* Deal with as much of this read request as possible by transfering 3359 ** data from the memory mapping using memcpy(). */ 3360 if( offset<pFile->mmapSize ){ 3361 if( offset+amt <= pFile->mmapSize ){ 3362 memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], amt); 3363 return SQLITE_OK; 3364 }else{ 3365 int nCopy = pFile->mmapSize - offset; 3366 memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], nCopy); 3367 pBuf = &((u8 *)pBuf)[nCopy]; 3368 amt -= nCopy; 3369 offset += nCopy; 3370 } 3371 } 3372 #endif 3373 3374 got = seekAndRead(pFile, offset, pBuf, amt); 3375 if( got==amt ){ 3376 return SQLITE_OK; 3377 }else if( got<0 ){ 3378 /* pFile->lastErrno has been set by seekAndRead(). 3379 ** Usually we return SQLITE_IOERR_READ here, though for some 3380 ** kinds of errors we return SQLITE_IOERR_CORRUPTFS. The 3381 ** SQLITE_IOERR_CORRUPTFS will be converted into SQLITE_CORRUPT 3382 ** prior to returning to the application by the sqlite3ApiExit() 3383 ** routine. 3384 */ 3385 switch( pFile->lastErrno ){ 3386 case ERANGE: 3387 case EIO: 3388 #ifdef ENXIO 3389 case ENXIO: 3390 #endif 3391 #ifdef EDEVERR 3392 case EDEVERR: 3393 #endif 3394 return SQLITE_IOERR_CORRUPTFS; 3395 } 3396 return SQLITE_IOERR_READ; 3397 }else{ 3398 storeLastErrno(pFile, 0); /* not a system error */ 3399 /* Unread parts of the buffer must be zero-filled */ 3400 memset(&((char*)pBuf)[got], 0, amt-got); 3401 return SQLITE_IOERR_SHORT_READ; 3402 } 3403 } 3404 3405 /* 3406 ** Attempt to seek the file-descriptor passed as the first argument to 3407 ** absolute offset iOff, then attempt to write nBuf bytes of data from 3408 ** pBuf to it. If an error occurs, return -1 and set *piErrno. Otherwise, 3409 ** return the actual number of bytes written (which may be less than 3410 ** nBuf). 3411 */ 3412 static int seekAndWriteFd( 3413 int fd, /* File descriptor to write to */ 3414 i64 iOff, /* File offset to begin writing at */ 3415 const void *pBuf, /* Copy data from this buffer to the file */ 3416 int nBuf, /* Size of buffer pBuf in bytes */ 3417 int *piErrno /* OUT: Error number if error occurs */ 3418 ){ 3419 int rc = 0; /* Value returned by system call */ 3420 3421 assert( nBuf==(nBuf&0x1ffff) ); 3422 assert( fd>2 ); 3423 assert( piErrno!=0 ); 3424 nBuf &= 0x1ffff; 3425 TIMER_START; 3426 3427 #if defined(USE_PREAD) 3428 do{ rc = (int)osPwrite(fd, pBuf, nBuf, iOff); }while( rc<0 && errno==EINTR ); 3429 #elif defined(USE_PREAD64) 3430 do{ rc = (int)osPwrite64(fd, pBuf, nBuf, iOff);}while( rc<0 && errno==EINTR); 3431 #else 3432 do{ 3433 i64 iSeek = lseek(fd, iOff, SEEK_SET); 3434 SimulateIOError( iSeek = -1 ); 3435 if( iSeek<0 ){ 3436 rc = -1; 3437 break; 3438 } 3439 rc = osWrite(fd, pBuf, nBuf); 3440 }while( rc<0 && errno==EINTR ); 3441 #endif 3442 3443 TIMER_END; 3444 OSTRACE(("WRITE %-3d %5d %7lld %llu\n", fd, rc, iOff, TIMER_ELAPSED)); 3445 3446 if( rc<0 ) *piErrno = errno; 3447 return rc; 3448 } 3449 3450 3451 /* 3452 ** Seek to the offset in id->offset then read cnt bytes into pBuf. 3453 ** Return the number of bytes actually read. Update the offset. 3454 ** 3455 ** To avoid stomping the errno value on a failed write the lastErrno value 3456 ** is set before returning. 3457 */ 3458 static int seekAndWrite(unixFile *id, i64 offset, const void *pBuf, int cnt){ 3459 return seekAndWriteFd(id->h, offset, pBuf, cnt, &id->lastErrno); 3460 } 3461 3462 3463 /* 3464 ** Write data from a buffer into a file. Return SQLITE_OK on success 3465 ** or some other error code on failure. 3466 */ 3467 static int unixWrite( 3468 sqlite3_file *id, 3469 const void *pBuf, 3470 int amt, 3471 sqlite3_int64 offset 3472 ){ 3473 unixFile *pFile = (unixFile*)id; 3474 int wrote = 0; 3475 assert( id ); 3476 assert( amt>0 ); 3477 3478 /* If this is a database file (not a journal, super-journal or temp 3479 ** file), the bytes in the locking range should never be read or written. */ 3480 #if 0 3481 assert( pFile->pPreallocatedUnused==0 3482 || offset>=PENDING_BYTE+512 3483 || offset+amt<=PENDING_BYTE 3484 ); 3485 #endif 3486 3487 #ifdef SQLITE_DEBUG 3488 /* If we are doing a normal write to a database file (as opposed to 3489 ** doing a hot-journal rollback or a write to some file other than a 3490 ** normal database file) then record the fact that the database 3491 ** has changed. If the transaction counter is modified, record that 3492 ** fact too. 3493 */ 3494 if( pFile->inNormalWrite ){ 3495 pFile->dbUpdate = 1; /* The database has been modified */ 3496 if( offset<=24 && offset+amt>=27 ){ 3497 int rc; 3498 char oldCntr[4]; 3499 SimulateIOErrorBenign(1); 3500 rc = seekAndRead(pFile, 24, oldCntr, 4); 3501 SimulateIOErrorBenign(0); 3502 if( rc!=4 || memcmp(oldCntr, &((char*)pBuf)[24-offset], 4)!=0 ){ 3503 pFile->transCntrChng = 1; /* The transaction counter has changed */ 3504 } 3505 } 3506 } 3507 #endif 3508 3509 #if defined(SQLITE_MMAP_READWRITE) && SQLITE_MAX_MMAP_SIZE>0 3510 /* Deal with as much of this write request as possible by transfering 3511 ** data from the memory mapping using memcpy(). */ 3512 if( offset<pFile->mmapSize ){ 3513 if( offset+amt <= pFile->mmapSize ){ 3514 memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, amt); 3515 return SQLITE_OK; 3516 }else{ 3517 int nCopy = pFile->mmapSize - offset; 3518 memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, nCopy); 3519 pBuf = &((u8 *)pBuf)[nCopy]; 3520 amt -= nCopy; 3521 offset += nCopy; 3522 } 3523 } 3524 #endif 3525 3526 while( (wrote = seekAndWrite(pFile, offset, pBuf, amt))<amt && wrote>0 ){ 3527 amt -= wrote; 3528 offset += wrote; 3529 pBuf = &((char*)pBuf)[wrote]; 3530 } 3531 SimulateIOError(( wrote=(-1), amt=1 )); 3532 SimulateDiskfullError(( wrote=0, amt=1 )); 3533 3534 if( amt>wrote ){ 3535 if( wrote<0 && pFile->lastErrno!=ENOSPC ){ 3536 /* lastErrno set by seekAndWrite */ 3537 return SQLITE_IOERR_WRITE; 3538 }else{ 3539 storeLastErrno(pFile, 0); /* not a system error */ 3540 return SQLITE_FULL; 3541 } 3542 } 3543 3544 return SQLITE_OK; 3545 } 3546 3547 #ifdef SQLITE_TEST 3548 /* 3549 ** Count the number of fullsyncs and normal syncs. This is used to test 3550 ** that syncs and fullsyncs are occurring at the right times. 3551 */ 3552 int sqlite3_sync_count = 0; 3553 int sqlite3_fullsync_count = 0; 3554 #endif 3555 3556 /* 3557 ** We do not trust systems to provide a working fdatasync(). Some do. 3558 ** Others do no. To be safe, we will stick with the (slightly slower) 3559 ** fsync(). If you know that your system does support fdatasync() correctly, 3560 ** then simply compile with -Dfdatasync=fdatasync or -DHAVE_FDATASYNC 3561 */ 3562 #if !defined(fdatasync) && !HAVE_FDATASYNC 3563 # define fdatasync fsync 3564 #endif 3565 3566 /* 3567 ** Define HAVE_FULLFSYNC to 0 or 1 depending on whether or not 3568 ** the F_FULLFSYNC macro is defined. F_FULLFSYNC is currently 3569 ** only available on Mac OS X. But that could change. 3570 */ 3571 #ifdef F_FULLFSYNC 3572 # define HAVE_FULLFSYNC 1 3573 #else 3574 # define HAVE_FULLFSYNC 0 3575 #endif 3576 3577 3578 /* 3579 ** The fsync() system call does not work as advertised on many 3580 ** unix systems. The following procedure is an attempt to make 3581 ** it work better. 3582 ** 3583 ** The SQLITE_NO_SYNC macro disables all fsync()s. This is useful 3584 ** for testing when we want to run through the test suite quickly. 3585 ** You are strongly advised *not* to deploy with SQLITE_NO_SYNC 3586 ** enabled, however, since with SQLITE_NO_SYNC enabled, an OS crash 3587 ** or power failure will likely corrupt the database file. 3588 ** 3589 ** SQLite sets the dataOnly flag if the size of the file is unchanged. 3590 ** The idea behind dataOnly is that it should only write the file content 3591 ** to disk, not the inode. We only set dataOnly if the file size is 3592 ** unchanged since the file size is part of the inode. However, 3593 ** Ted Ts'o tells us that fdatasync() will also write the inode if the 3594 ** file size has changed. The only real difference between fdatasync() 3595 ** and fsync(), Ted tells us, is that fdatasync() will not flush the 3596 ** inode if the mtime or owner or other inode attributes have changed. 3597 ** We only care about the file size, not the other file attributes, so 3598 ** as far as SQLite is concerned, an fdatasync() is always adequate. 3599 ** So, we always use fdatasync() if it is available, regardless of 3600 ** the value of the dataOnly flag. 3601 */ 3602 static int full_fsync(int fd, int fullSync, int dataOnly){ 3603 int rc; 3604 3605 /* The following "ifdef/elif/else/" block has the same structure as 3606 ** the one below. It is replicated here solely to avoid cluttering 3607 ** up the real code with the UNUSED_PARAMETER() macros. 3608 */ 3609 #ifdef SQLITE_NO_SYNC 3610 UNUSED_PARAMETER(fd); 3611 UNUSED_PARAMETER(fullSync); 3612 UNUSED_PARAMETER(dataOnly); 3613 #elif HAVE_FULLFSYNC 3614 UNUSED_PARAMETER(dataOnly); 3615 #else 3616 UNUSED_PARAMETER(fullSync); 3617 UNUSED_PARAMETER(dataOnly); 3618 #endif 3619 3620 /* Record the number of times that we do a normal fsync() and 3621 ** FULLSYNC. This is used during testing to verify that this procedure 3622 ** gets called with the correct arguments. 3623 */ 3624 #ifdef SQLITE_TEST 3625 if( fullSync ) sqlite3_fullsync_count++; 3626 sqlite3_sync_count++; 3627 #endif 3628 3629 /* If we compiled with the SQLITE_NO_SYNC flag, then syncing is a 3630 ** no-op. But go ahead and call fstat() to validate the file 3631 ** descriptor as we need a method to provoke a failure during 3632 ** coverate testing. 3633 */ 3634 #ifdef SQLITE_NO_SYNC 3635 { 3636 struct stat buf; 3637 rc = osFstat(fd, &buf); 3638 } 3639 #elif HAVE_FULLFSYNC 3640 if( fullSync ){ 3641 rc = osFcntl(fd, F_FULLFSYNC, 0); 3642 }else{ 3643 rc = 1; 3644 } 3645 /* If the FULLFSYNC failed, fall back to attempting an fsync(). 3646 ** It shouldn't be possible for fullfsync to fail on the local 3647 ** file system (on OSX), so failure indicates that FULLFSYNC 3648 ** isn't supported for this file system. So, attempt an fsync 3649 ** and (for now) ignore the overhead of a superfluous fcntl call. 3650 ** It'd be better to detect fullfsync support once and avoid 3651 ** the fcntl call every time sync is called. 3652 */ 3653 if( rc ) rc = fsync(fd); 3654 3655 #elif defined(__APPLE__) 3656 /* fdatasync() on HFS+ doesn't yet flush the file size if it changed correctly 3657 ** so currently we default to the macro that redefines fdatasync to fsync 3658 */ 3659 rc = fsync(fd); 3660 #else 3661 rc = fdatasync(fd); 3662 #if OS_VXWORKS 3663 if( rc==-1 && errno==ENOTSUP ){ 3664 rc = fsync(fd); 3665 } 3666 #endif /* OS_VXWORKS */ 3667 #endif /* ifdef SQLITE_NO_SYNC elif HAVE_FULLFSYNC */ 3668 3669 if( OS_VXWORKS && rc!= -1 ){ 3670 rc = 0; 3671 } 3672 return rc; 3673 } 3674 3675 /* 3676 ** Open a file descriptor to the directory containing file zFilename. 3677 ** If successful, *pFd is set to the opened file descriptor and 3678 ** SQLITE_OK is returned. If an error occurs, either SQLITE_NOMEM 3679 ** or SQLITE_CANTOPEN is returned and *pFd is set to an undefined 3680 ** value. 3681 ** 3682 ** The directory file descriptor is used for only one thing - to 3683 ** fsync() a directory to make sure file creation and deletion events 3684 ** are flushed to disk. Such fsyncs are not needed on newer 3685 ** journaling filesystems, but are required on older filesystems. 3686 ** 3687 ** This routine can be overridden using the xSetSysCall interface. 3688 ** The ability to override this routine was added in support of the 3689 ** chromium sandbox. Opening a directory is a security risk (we are 3690 ** told) so making it overrideable allows the chromium sandbox to 3691 ** replace this routine with a harmless no-op. To make this routine 3692 ** a no-op, replace it with a stub that returns SQLITE_OK but leaves 3693 ** *pFd set to a negative number. 3694 ** 3695 ** If SQLITE_OK is returned, the caller is responsible for closing 3696 ** the file descriptor *pFd using close(). 3697 */ 3698 static int openDirectory(const char *zFilename, int *pFd){ 3699 int ii; 3700 int fd = -1; 3701 char zDirname[MAX_PATHNAME+1]; 3702 3703 sqlite3_snprintf(MAX_PATHNAME, zDirname, "%s", zFilename); 3704 for(ii=(int)strlen(zDirname); ii>0 && zDirname[ii]!='/'; ii--); 3705 if( ii>0 ){ 3706 zDirname[ii] = '\0'; 3707 }else{ 3708 if( zDirname[0]!='/' ) zDirname[0] = '.'; 3709 zDirname[1] = 0; 3710 } 3711 fd = robust_open(zDirname, O_RDONLY|O_BINARY, 0); 3712 if( fd>=0 ){ 3713 OSTRACE(("OPENDIR %-3d %s\n", fd, zDirname)); 3714 } 3715 *pFd = fd; 3716 if( fd>=0 ) return SQLITE_OK; 3717 return unixLogError(SQLITE_CANTOPEN_BKPT, "openDirectory", zDirname); 3718 } 3719 3720 /* 3721 ** Make sure all writes to a particular file are committed to disk. 3722 ** 3723 ** If dataOnly==0 then both the file itself and its metadata (file 3724 ** size, access time, etc) are synced. If dataOnly!=0 then only the 3725 ** file data is synced. 3726 ** 3727 ** Under Unix, also make sure that the directory entry for the file 3728 ** has been created by fsync-ing the directory that contains the file. 3729 ** If we do not do this and we encounter a power failure, the directory 3730 ** entry for the journal might not exist after we reboot. The next 3731 ** SQLite to access the file will not know that the journal exists (because 3732 ** the directory entry for the journal was never created) and the transaction 3733 ** will not roll back - possibly leading to database corruption. 3734 */ 3735 static int unixSync(sqlite3_file *id, int flags){ 3736 int rc; 3737 unixFile *pFile = (unixFile*)id; 3738 3739 int isDataOnly = (flags&SQLITE_SYNC_DATAONLY); 3740 int isFullsync = (flags&0x0F)==SQLITE_SYNC_FULL; 3741 3742 /* Check that one of SQLITE_SYNC_NORMAL or FULL was passed */ 3743 assert((flags&0x0F)==SQLITE_SYNC_NORMAL 3744 || (flags&0x0F)==SQLITE_SYNC_FULL 3745 ); 3746 3747 /* Unix cannot, but some systems may return SQLITE_FULL from here. This 3748 ** line is to test that doing so does not cause any problems. 3749 */ 3750 SimulateDiskfullError( return SQLITE_FULL ); 3751 3752 assert( pFile ); 3753 OSTRACE(("SYNC %-3d\n", pFile->h)); 3754 rc = full_fsync(pFile->h, isFullsync, isDataOnly); 3755 SimulateIOError( rc=1 ); 3756 if( rc ){ 3757 storeLastErrno(pFile, errno); 3758 return unixLogError(SQLITE_IOERR_FSYNC, "full_fsync", pFile->zPath); 3759 } 3760 3761 /* Also fsync the directory containing the file if the DIRSYNC flag 3762 ** is set. This is a one-time occurrence. Many systems (examples: AIX) 3763 ** are unable to fsync a directory, so ignore errors on the fsync. 3764 */ 3765 if( pFile->ctrlFlags & UNIXFILE_DIRSYNC ){ 3766 int dirfd; 3767 OSTRACE(("DIRSYNC %s (have_fullfsync=%d fullsync=%d)\n", pFile->zPath, 3768 HAVE_FULLFSYNC, isFullsync)); 3769 rc = osOpenDirectory(pFile->zPath, &dirfd); 3770 if( rc==SQLITE_OK ){ 3771 full_fsync(dirfd, 0, 0); 3772 robust_close(pFile, dirfd, __LINE__); 3773 }else{ 3774 assert( rc==SQLITE_CANTOPEN ); 3775 rc = SQLITE_OK; 3776 } 3777 pFile->ctrlFlags &= ~UNIXFILE_DIRSYNC; 3778 } 3779 return rc; 3780 } 3781 3782 /* 3783 ** Truncate an open file to a specified size 3784 */ 3785 static int unixTruncate(sqlite3_file *id, i64 nByte){ 3786 unixFile *pFile = (unixFile *)id; 3787 int rc; 3788 assert( pFile ); 3789 SimulateIOError( return SQLITE_IOERR_TRUNCATE ); 3790 3791 /* If the user has configured a chunk-size for this file, truncate the 3792 ** file so that it consists of an integer number of chunks (i.e. the 3793 ** actual file size after the operation may be larger than the requested 3794 ** size). 3795 */ 3796 if( pFile->szChunk>0 ){ 3797 nByte = ((nByte + pFile->szChunk - 1)/pFile->szChunk) * pFile->szChunk; 3798 } 3799 3800 rc = robust_ftruncate(pFile->h, nByte); 3801 if( rc ){ 3802 storeLastErrno(pFile, errno); 3803 return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate", pFile->zPath); 3804 }else{ 3805 #ifdef SQLITE_DEBUG 3806 /* If we are doing a normal write to a database file (as opposed to 3807 ** doing a hot-journal rollback or a write to some file other than a 3808 ** normal database file) and we truncate the file to zero length, 3809 ** that effectively updates the change counter. This might happen 3810 ** when restoring a database using the backup API from a zero-length 3811 ** source. 3812 */ 3813 if( pFile->inNormalWrite && nByte==0 ){ 3814 pFile->transCntrChng = 1; 3815 } 3816 #endif 3817 3818 #if SQLITE_MAX_MMAP_SIZE>0 3819 /* If the file was just truncated to a size smaller than the currently 3820 ** mapped region, reduce the effective mapping size as well. SQLite will 3821 ** use read() and write() to access data beyond this point from now on. 3822 */ 3823 if( nByte<pFile->mmapSize ){ 3824 pFile->mmapSize = nByte; 3825 } 3826 #endif 3827 3828 return SQLITE_OK; 3829 } 3830 } 3831 3832 /* 3833 ** Determine the current size of a file in bytes 3834 */ 3835 static int unixFileSize(sqlite3_file *id, i64 *pSize){ 3836 int rc; 3837 struct stat buf; 3838 assert( id ); 3839 rc = osFstat(((unixFile*)id)->h, &buf); 3840 SimulateIOError( rc=1 ); 3841 if( rc!=0 ){ 3842 storeLastErrno((unixFile*)id, errno); 3843 return SQLITE_IOERR_FSTAT; 3844 } 3845 *pSize = buf.st_size; 3846 3847 /* When opening a zero-size database, the findInodeInfo() procedure 3848 ** writes a single byte into that file in order to work around a bug 3849 ** in the OS-X msdos filesystem. In order to avoid problems with upper 3850 ** layers, we need to report this file size as zero even though it is 3851 ** really 1. Ticket #3260. 3852 */ 3853 if( *pSize==1 ) *pSize = 0; 3854 3855 3856 return SQLITE_OK; 3857 } 3858 3859 #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) 3860 /* 3861 ** Handler for proxy-locking file-control verbs. Defined below in the 3862 ** proxying locking division. 3863 */ 3864 static int proxyFileControl(sqlite3_file*,int,void*); 3865 #endif 3866 3867 /* 3868 ** This function is called to handle the SQLITE_FCNTL_SIZE_HINT 3869 ** file-control operation. Enlarge the database to nBytes in size 3870 ** (rounded up to the next chunk-size). If the database is already 3871 ** nBytes or larger, this routine is a no-op. 3872 */ 3873 static int fcntlSizeHint(unixFile *pFile, i64 nByte){ 3874 if( pFile->szChunk>0 ){ 3875 i64 nSize; /* Required file size */ 3876 struct stat buf; /* Used to hold return values of fstat() */ 3877 3878 if( osFstat(pFile->h, &buf) ){ 3879 return SQLITE_IOERR_FSTAT; 3880 } 3881 3882 nSize = ((nByte+pFile->szChunk-1) / pFile->szChunk) * pFile->szChunk; 3883 if( nSize>(i64)buf.st_size ){ 3884 3885 #if defined(HAVE_POSIX_FALLOCATE) && HAVE_POSIX_FALLOCATE 3886 /* The code below is handling the return value of osFallocate() 3887 ** correctly. posix_fallocate() is defined to "returns zero on success, 3888 ** or an error number on failure". See the manpage for details. */ 3889 int err; 3890 do{ 3891 err = osFallocate(pFile->h, buf.st_size, nSize-buf.st_size); 3892 }while( err==EINTR ); 3893 if( err && err!=EINVAL ) return SQLITE_IOERR_WRITE; 3894 #else 3895 /* If the OS does not have posix_fallocate(), fake it. Write a 3896 ** single byte to the last byte in each block that falls entirely 3897 ** within the extended region. Then, if required, a single byte 3898 ** at offset (nSize-1), to set the size of the file correctly. 3899 ** This is a similar technique to that used by glibc on systems 3900 ** that do not have a real fallocate() call. 3901 */ 3902 int nBlk = buf.st_blksize; /* File-system block size */ 3903 int nWrite = 0; /* Number of bytes written by seekAndWrite */ 3904 i64 iWrite; /* Next offset to write to */ 3905 3906 iWrite = (buf.st_size/nBlk)*nBlk + nBlk - 1; 3907 assert( iWrite>=buf.st_size ); 3908 assert( ((iWrite+1)%nBlk)==0 ); 3909 for(/*no-op*/; iWrite<nSize+nBlk-1; iWrite+=nBlk ){ 3910 if( iWrite>=nSize ) iWrite = nSize - 1; 3911 nWrite = seekAndWrite(pFile, iWrite, "", 1); 3912 if( nWrite!=1 ) return SQLITE_IOERR_WRITE; 3913 } 3914 #endif 3915 } 3916 } 3917 3918 #if SQLITE_MAX_MMAP_SIZE>0 3919 if( pFile->mmapSizeMax>0 && nByte>pFile->mmapSize ){ 3920 int rc; 3921 if( pFile->szChunk<=0 ){ 3922 if( robust_ftruncate(pFile->h, nByte) ){ 3923 storeLastErrno(pFile, errno); 3924 return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate", pFile->zPath); 3925 } 3926 } 3927 3928 rc = unixMapfile(pFile, nByte); 3929 return rc; 3930 } 3931 #endif 3932 3933 return SQLITE_OK; 3934 } 3935 3936 /* 3937 ** If *pArg is initially negative then this is a query. Set *pArg to 3938 ** 1 or 0 depending on whether or not bit mask of pFile->ctrlFlags is set. 3939 ** 3940 ** If *pArg is 0 or 1, then clear or set the mask bit of pFile->ctrlFlags. 3941 */ 3942 static void unixModeBit(unixFile *pFile, unsigned char mask, int *pArg){ 3943 if( *pArg<0 ){ 3944 *pArg = (pFile->ctrlFlags & mask)!=0; 3945 }else if( (*pArg)==0 ){ 3946 pFile->ctrlFlags &= ~mask; 3947 }else{ 3948 pFile->ctrlFlags |= mask; 3949 } 3950 } 3951 3952 /* Forward declaration */ 3953 static int unixGetTempname(int nBuf, char *zBuf); 3954 3955 /* 3956 ** Information and control of an open file handle. 3957 */ 3958 static int unixFileControl(sqlite3_file *id, int op, void *pArg){ 3959 unixFile *pFile = (unixFile*)id; 3960 switch( op ){ 3961 #if defined(__linux__) && defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE) 3962 case SQLITE_FCNTL_BEGIN_ATOMIC_WRITE: { 3963 int rc = osIoctl(pFile->h, F2FS_IOC_START_ATOMIC_WRITE); 3964 return rc ? SQLITE_IOERR_BEGIN_ATOMIC : SQLITE_OK; 3965 } 3966 case SQLITE_FCNTL_COMMIT_ATOMIC_WRITE: { 3967 int rc = osIoctl(pFile->h, F2FS_IOC_COMMIT_ATOMIC_WRITE); 3968 return rc ? SQLITE_IOERR_COMMIT_ATOMIC : SQLITE_OK; 3969 } 3970 case SQLITE_FCNTL_ROLLBACK_ATOMIC_WRITE: { 3971 int rc = osIoctl(pFile->h, F2FS_IOC_ABORT_VOLATILE_WRITE); 3972 return rc ? SQLITE_IOERR_ROLLBACK_ATOMIC : SQLITE_OK; 3973 } 3974 #endif /* __linux__ && SQLITE_ENABLE_BATCH_ATOMIC_WRITE */ 3975 3976 case SQLITE_FCNTL_LOCKSTATE: { 3977 *(int*)pArg = pFile->eFileLock; 3978 return SQLITE_OK; 3979 } 3980 case SQLITE_FCNTL_LAST_ERRNO: { 3981 *(int*)pArg = pFile->lastErrno; 3982 return SQLITE_OK; 3983 } 3984 case SQLITE_FCNTL_CHUNK_SIZE: { 3985 pFile->szChunk = *(int *)pArg; 3986 return SQLITE_OK; 3987 } 3988 case SQLITE_FCNTL_SIZE_HINT: { 3989 int rc; 3990 SimulateIOErrorBenign(1); 3991 rc = fcntlSizeHint(pFile, *(i64 *)pArg); 3992 SimulateIOErrorBenign(0); 3993 return rc; 3994 } 3995 case SQLITE_FCNTL_PERSIST_WAL: { 3996 unixModeBit(pFile, UNIXFILE_PERSIST_WAL, (int*)pArg); 3997 return SQLITE_OK; 3998 } 3999 case SQLITE_FCNTL_POWERSAFE_OVERWRITE: { 4000 unixModeBit(pFile, UNIXFILE_PSOW, (int*)pArg); 4001 return SQLITE_OK; 4002 } 4003 case SQLITE_FCNTL_VFSNAME: { 4004 *(char**)pArg = sqlite3_mprintf("%s", pFile->pVfs->zName); 4005 return SQLITE_OK; 4006 } 4007 case SQLITE_FCNTL_TEMPFILENAME: { 4008 char *zTFile = sqlite3_malloc64( pFile->pVfs->mxPathname ); 4009 if( zTFile ){ 4010 unixGetTempname(pFile->pVfs->mxPathname, zTFile); 4011 *(char**)pArg = zTFile; 4012 } 4013 return SQLITE_OK; 4014 } 4015 case SQLITE_FCNTL_HAS_MOVED: { 4016 *(int*)pArg = fileHasMoved(pFile); 4017 return SQLITE_OK; 4018 } 4019 #ifdef SQLITE_ENABLE_SETLK_TIMEOUT 4020 case SQLITE_FCNTL_LOCK_TIMEOUT: { 4021 int iOld = pFile->iBusyTimeout; 4022 pFile->iBusyTimeout = *(int*)pArg; 4023 *(int*)pArg = iOld; 4024 return SQLITE_OK; 4025 } 4026 #endif 4027 #if SQLITE_MAX_MMAP_SIZE>0 4028 case SQLITE_FCNTL_MMAP_SIZE: { 4029 i64 newLimit = *(i64*)pArg; 4030 int rc = SQLITE_OK; 4031 if( newLimit>sqlite3GlobalConfig.mxMmap ){ 4032 newLimit = sqlite3GlobalConfig.mxMmap; 4033 } 4034 4035 /* The value of newLimit may be eventually cast to (size_t) and passed 4036 ** to mmap(). Restrict its value to 2GB if (size_t) is not at least a 4037 ** 64-bit type. */ 4038 if( newLimit>0 && sizeof(size_t)<8 ){ 4039 newLimit = (newLimit & 0x7FFFFFFF); 4040 } 4041 4042 *(i64*)pArg = pFile->mmapSizeMax; 4043 if( newLimit>=0 && newLimit!=pFile->mmapSizeMax && pFile->nFetchOut==0 ){ 4044 pFile->mmapSizeMax = newLimit; 4045 if( pFile->mmapSize>0 ){ 4046 unixUnmapfile(pFile); 4047 rc = unixMapfile(pFile, -1); 4048 } 4049 } 4050 return rc; 4051 } 4052 #endif 4053 #ifdef SQLITE_DEBUG 4054 /* The pager calls this method to signal that it has done 4055 ** a rollback and that the database is therefore unchanged and 4056 ** it hence it is OK for the transaction change counter to be 4057 ** unchanged. 4058 */ 4059 case SQLITE_FCNTL_DB_UNCHANGED: { 4060 ((unixFile*)id)->dbUpdate = 0; 4061 return SQLITE_OK; 4062 } 4063 #endif 4064 #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) 4065 case SQLITE_FCNTL_SET_LOCKPROXYFILE: 4066 case SQLITE_FCNTL_GET_LOCKPROXYFILE: { 4067 return proxyFileControl(id,op,pArg); 4068 } 4069 #endif /* SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) */ 4070 } 4071 return SQLITE_NOTFOUND; 4072 } 4073 4074 /* 4075 ** If pFd->sectorSize is non-zero when this function is called, it is a 4076 ** no-op. Otherwise, the values of pFd->sectorSize and 4077 ** pFd->deviceCharacteristics are set according to the file-system 4078 ** characteristics. 4079 ** 4080 ** There are two versions of this function. One for QNX and one for all 4081 ** other systems. 4082 */ 4083 #ifndef __QNXNTO__ 4084 static void setDeviceCharacteristics(unixFile *pFd){ 4085 assert( pFd->deviceCharacteristics==0 || pFd->sectorSize!=0 ); 4086 if( pFd->sectorSize==0 ){ 4087 #if defined(__linux__) && defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE) 4088 int res; 4089 u32 f = 0; 4090 4091 /* Check for support for F2FS atomic batch writes. */ 4092 res = osIoctl(pFd->h, F2FS_IOC_GET_FEATURES, &f); 4093 if( res==0 && (f & F2FS_FEATURE_ATOMIC_WRITE) ){ 4094 pFd->deviceCharacteristics = SQLITE_IOCAP_BATCH_ATOMIC; 4095 } 4096 #endif /* __linux__ && SQLITE_ENABLE_BATCH_ATOMIC_WRITE */ 4097 4098 /* Set the POWERSAFE_OVERWRITE flag if requested. */ 4099 if( pFd->ctrlFlags & UNIXFILE_PSOW ){ 4100 pFd->deviceCharacteristics |= SQLITE_IOCAP_POWERSAFE_OVERWRITE; 4101 } 4102 4103 pFd->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE; 4104 } 4105 } 4106 #else 4107 #include <sys/dcmd_blk.h> 4108 #include <sys/statvfs.h> 4109 static void setDeviceCharacteristics(unixFile *pFile){ 4110 if( pFile->sectorSize == 0 ){ 4111 struct statvfs fsInfo; 4112 4113 /* Set defaults for non-supported filesystems */ 4114 pFile->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE; 4115 pFile->deviceCharacteristics = 0; 4116 if( fstatvfs(pFile->h, &fsInfo) == -1 ) { 4117 return; 4118 } 4119 4120 if( !strcmp(fsInfo.f_basetype, "tmp") ) { 4121 pFile->sectorSize = fsInfo.f_bsize; 4122 pFile->deviceCharacteristics = 4123 SQLITE_IOCAP_ATOMIC4K | /* All ram filesystem writes are atomic */ 4124 SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until 4125 ** the write succeeds */ 4126 SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind 4127 ** so it is ordered */ 4128 0; 4129 }else if( strstr(fsInfo.f_basetype, "etfs") ){ 4130 pFile->sectorSize = fsInfo.f_bsize; 4131 pFile->deviceCharacteristics = 4132 /* etfs cluster size writes are atomic */ 4133 (pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) | 4134 SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until 4135 ** the write succeeds */ 4136 SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind 4137 ** so it is ordered */ 4138 0; 4139 }else if( !strcmp(fsInfo.f_basetype, "qnx6") ){ 4140 pFile->sectorSize = fsInfo.f_bsize; 4141 pFile->deviceCharacteristics = 4142 SQLITE_IOCAP_ATOMIC | /* All filesystem writes are atomic */ 4143 SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until 4144 ** the write succeeds */ 4145 SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind 4146 ** so it is ordered */ 4147 0; 4148 }else if( !strcmp(fsInfo.f_basetype, "qnx4") ){ 4149 pFile->sectorSize = fsInfo.f_bsize; 4150 pFile->deviceCharacteristics = 4151 /* full bitset of atomics from max sector size and smaller */ 4152 ((pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) << 1) - 2 | 4153 SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind 4154 ** so it is ordered */ 4155 0; 4156 }else if( strstr(fsInfo.f_basetype, "dos") ){ 4157 pFile->sectorSize = fsInfo.f_bsize; 4158 pFile->deviceCharacteristics = 4159 /* full bitset of atomics from max sector size and smaller */ 4160 ((pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) << 1) - 2 | 4161 SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind 4162 ** so it is ordered */ 4163 0; 4164 }else{ 4165 pFile->deviceCharacteristics = 4166 SQLITE_IOCAP_ATOMIC512 | /* blocks are atomic */ 4167 SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until 4168 ** the write succeeds */ 4169 0; 4170 } 4171 } 4172 /* Last chance verification. If the sector size isn't a multiple of 512 4173 ** then it isn't valid.*/ 4174 if( pFile->sectorSize % 512 != 0 ){ 4175 pFile->deviceCharacteristics = 0; 4176 pFile->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE; 4177 } 4178 } 4179 #endif 4180 4181 /* 4182 ** Return the sector size in bytes of the underlying block device for 4183 ** the specified file. This is almost always 512 bytes, but may be 4184 ** larger for some devices. 4185 ** 4186 ** SQLite code assumes this function cannot fail. It also assumes that 4187 ** if two files are created in the same file-system directory (i.e. 4188 ** a database and its journal file) that the sector size will be the 4189 ** same for both. 4190 */ 4191 static int unixSectorSize(sqlite3_file *id){ 4192 unixFile *pFd = (unixFile*)id; 4193 setDeviceCharacteristics(pFd); 4194 return pFd->sectorSize; 4195 } 4196 4197 /* 4198 ** Return the device characteristics for the file. 4199 ** 4200 ** This VFS is set up to return SQLITE_IOCAP_POWERSAFE_OVERWRITE by default. 4201 ** However, that choice is controversial since technically the underlying 4202 ** file system does not always provide powersafe overwrites. (In other 4203 ** words, after a power-loss event, parts of the file that were never 4204 ** written might end up being altered.) However, non-PSOW behavior is very, 4205 ** very rare. And asserting PSOW makes a large reduction in the amount 4206 ** of required I/O for journaling, since a lot of padding is eliminated. 4207 ** Hence, while POWERSAFE_OVERWRITE is on by default, there is a file-control 4208 ** available to turn it off and URI query parameter available to turn it off. 4209 */ 4210 static int unixDeviceCharacteristics(sqlite3_file *id){ 4211 unixFile *pFd = (unixFile*)id; 4212 setDeviceCharacteristics(pFd); 4213 return pFd->deviceCharacteristics; 4214 } 4215 4216 #if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 4217 4218 /* 4219 ** Return the system page size. 4220 ** 4221 ** This function should not be called directly by other code in this file. 4222 ** Instead, it should be called via macro osGetpagesize(). 4223 */ 4224 static int unixGetpagesize(void){ 4225 #if OS_VXWORKS 4226 return 1024; 4227 #elif defined(_BSD_SOURCE) 4228 return getpagesize(); 4229 #else 4230 return (int)sysconf(_SC_PAGESIZE); 4231 #endif 4232 } 4233 4234 #endif /* !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 */ 4235 4236 #ifndef SQLITE_OMIT_WAL 4237 4238 /* 4239 ** Object used to represent an shared memory buffer. 4240 ** 4241 ** When multiple threads all reference the same wal-index, each thread 4242 ** has its own unixShm object, but they all point to a single instance 4243 ** of this unixShmNode object. In other words, each wal-index is opened 4244 ** only once per process. 4245 ** 4246 ** Each unixShmNode object is connected to a single unixInodeInfo object. 4247 ** We could coalesce this object into unixInodeInfo, but that would mean 4248 ** every open file that does not use shared memory (in other words, most 4249 ** open files) would have to carry around this extra information. So 4250 ** the unixInodeInfo object contains a pointer to this unixShmNode object 4251 ** and the unixShmNode object is created only when needed. 4252 ** 4253 ** unixMutexHeld() must be true when creating or destroying 4254 ** this object or while reading or writing the following fields: 4255 ** 4256 ** nRef 4257 ** 4258 ** The following fields are read-only after the object is created: 4259 ** 4260 ** hShm 4261 ** zFilename 4262 ** 4263 ** Either unixShmNode.pShmMutex must be held or unixShmNode.nRef==0 and 4264 ** unixMutexHeld() is true when reading or writing any other field 4265 ** in this structure. 4266 */ 4267 struct unixShmNode { 4268 unixInodeInfo *pInode; /* unixInodeInfo that owns this SHM node */ 4269 sqlite3_mutex *pShmMutex; /* Mutex to access this object */ 4270 char *zFilename; /* Name of the mmapped file */ 4271 int hShm; /* Open file descriptor */ 4272 int szRegion; /* Size of shared-memory regions */ 4273 u16 nRegion; /* Size of array apRegion */ 4274 u8 isReadonly; /* True if read-only */ 4275 u8 isUnlocked; /* True if no DMS lock held */ 4276 char **apRegion; /* Array of mapped shared-memory regions */ 4277 int nRef; /* Number of unixShm objects pointing to this */ 4278 unixShm *pFirst; /* All unixShm objects pointing to this */ 4279 int aLock[SQLITE_SHM_NLOCK]; /* # shared locks on slot, -1==excl lock */ 4280 #ifdef SQLITE_DEBUG 4281 u8 exclMask; /* Mask of exclusive locks held */ 4282 u8 sharedMask; /* Mask of shared locks held */ 4283 u8 nextShmId; /* Next available unixShm.id value */ 4284 #endif 4285 }; 4286 4287 /* 4288 ** Structure used internally by this VFS to record the state of an 4289 ** open shared memory connection. 4290 ** 4291 ** The following fields are initialized when this object is created and 4292 ** are read-only thereafter: 4293 ** 4294 ** unixShm.pShmNode 4295 ** unixShm.id 4296 ** 4297 ** All other fields are read/write. The unixShm.pShmNode->pShmMutex must 4298 ** be held while accessing any read/write fields. 4299 */ 4300 struct unixShm { 4301 unixShmNode *pShmNode; /* The underlying unixShmNode object */ 4302 unixShm *pNext; /* Next unixShm with the same unixShmNode */ 4303 u8 hasMutex; /* True if holding the unixShmNode->pShmMutex */ 4304 u8 id; /* Id of this connection within its unixShmNode */ 4305 u16 sharedMask; /* Mask of shared locks held */ 4306 u16 exclMask; /* Mask of exclusive locks held */ 4307 }; 4308 4309 /* 4310 ** Constants used for locking 4311 */ 4312 #define UNIX_SHM_BASE ((22+SQLITE_SHM_NLOCK)*4) /* first lock byte */ 4313 #define UNIX_SHM_DMS (UNIX_SHM_BASE+SQLITE_SHM_NLOCK) /* deadman switch */ 4314 4315 /* 4316 ** Apply posix advisory locks for all bytes from ofst through ofst+n-1. 4317 ** 4318 ** Locks block if the mask is exactly UNIX_SHM_C and are non-blocking 4319 ** otherwise. 4320 */ 4321 static int unixShmSystemLock( 4322 unixFile *pFile, /* Open connection to the WAL file */ 4323 int lockType, /* F_UNLCK, F_RDLCK, or F_WRLCK */ 4324 int ofst, /* First byte of the locking range */ 4325 int n /* Number of bytes to lock */ 4326 ){ 4327 unixShmNode *pShmNode; /* Apply locks to this open shared-memory segment */ 4328 struct flock f; /* The posix advisory locking structure */ 4329 int rc = SQLITE_OK; /* Result code form fcntl() */ 4330 4331 /* Access to the unixShmNode object is serialized by the caller */ 4332 pShmNode = pFile->pInode->pShmNode; 4333 assert( pShmNode->nRef==0 || sqlite3_mutex_held(pShmNode->pShmMutex) ); 4334 assert( pShmNode->nRef>0 || unixMutexHeld() ); 4335 4336 /* Shared locks never span more than one byte */ 4337 assert( n==1 || lockType!=F_RDLCK ); 4338 4339 /* Locks are within range */ 4340 assert( n>=1 && n<=SQLITE_SHM_NLOCK ); 4341 4342 if( pShmNode->hShm>=0 ){ 4343 int res; 4344 /* Initialize the locking parameters */ 4345 f.l_type = lockType; 4346 f.l_whence = SEEK_SET; 4347 f.l_start = ofst; 4348 f.l_len = n; 4349 res = osSetPosixAdvisoryLock(pShmNode->hShm, &f, pFile); 4350 if( res==-1 ){ 4351 #ifdef SQLITE_ENABLE_SETLK_TIMEOUT 4352 rc = (pFile->iBusyTimeout ? SQLITE_BUSY_TIMEOUT : SQLITE_BUSY); 4353 #else 4354 rc = SQLITE_BUSY; 4355 #endif 4356 } 4357 } 4358 4359 /* Update the global lock state and do debug tracing */ 4360 #ifdef SQLITE_DEBUG 4361 { u16 mask; 4362 OSTRACE(("SHM-LOCK ")); 4363 mask = ofst>31 ? 0xffff : (1<<(ofst+n)) - (1<<ofst); 4364 if( rc==SQLITE_OK ){ 4365 if( lockType==F_UNLCK ){ 4366 OSTRACE(("unlock %d ok", ofst)); 4367 pShmNode->exclMask &= ~mask; 4368 pShmNode->sharedMask &= ~mask; 4369 }else if( lockType==F_RDLCK ){ 4370 OSTRACE(("read-lock %d ok", ofst)); 4371 pShmNode->exclMask &= ~mask; 4372 pShmNode->sharedMask |= mask; 4373 }else{ 4374 assert( lockType==F_WRLCK ); 4375 OSTRACE(("write-lock %d ok", ofst)); 4376 pShmNode->exclMask |= mask; 4377 pShmNode->sharedMask &= ~mask; 4378 } 4379 }else{ 4380 if( lockType==F_UNLCK ){ 4381 OSTRACE(("unlock %d failed", ofst)); 4382 }else if( lockType==F_RDLCK ){ 4383 OSTRACE(("read-lock failed")); 4384 }else{ 4385 assert( lockType==F_WRLCK ); 4386 OSTRACE(("write-lock %d failed", ofst)); 4387 } 4388 } 4389 OSTRACE((" - afterwards %03x,%03x\n", 4390 pShmNode->sharedMask, pShmNode->exclMask)); 4391 } 4392 #endif 4393 4394 return rc; 4395 } 4396 4397 /* 4398 ** Return the minimum number of 32KB shm regions that should be mapped at 4399 ** a time, assuming that each mapping must be an integer multiple of the 4400 ** current system page-size. 4401 ** 4402 ** Usually, this is 1. The exception seems to be systems that are configured 4403 ** to use 64KB pages - in this case each mapping must cover at least two 4404 ** shm regions. 4405 */ 4406 static int unixShmRegionPerMap(void){ 4407 int shmsz = 32*1024; /* SHM region size */ 4408 int pgsz = osGetpagesize(); /* System page size */ 4409 assert( ((pgsz-1)&pgsz)==0 ); /* Page size must be a power of 2 */ 4410 if( pgsz<shmsz ) return 1; 4411 return pgsz/shmsz; 4412 } 4413 4414 /* 4415 ** Purge the unixShmNodeList list of all entries with unixShmNode.nRef==0. 4416 ** 4417 ** This is not a VFS shared-memory method; it is a utility function called 4418 ** by VFS shared-memory methods. 4419 */ 4420 static void unixShmPurge(unixFile *pFd){ 4421 unixShmNode *p = pFd->pInode->pShmNode; 4422 assert( unixMutexHeld() ); 4423 if( p && ALWAYS(p->nRef==0) ){ 4424 int nShmPerMap = unixShmRegionPerMap(); 4425 int i; 4426 assert( p->pInode==pFd->pInode ); 4427 sqlite3_mutex_free(p->pShmMutex); 4428 for(i=0; i<p->nRegion; i+=nShmPerMap){ 4429 if( p->hShm>=0 ){ 4430 osMunmap(p->apRegion[i], p->szRegion); 4431 }else{ 4432 sqlite3_free(p->apRegion[i]); 4433 } 4434 } 4435 sqlite3_free(p->apRegion); 4436 if( p->hShm>=0 ){ 4437 robust_close(pFd, p->hShm, __LINE__); 4438 p->hShm = -1; 4439 } 4440 p->pInode->pShmNode = 0; 4441 sqlite3_free(p); 4442 } 4443 } 4444 4445 /* 4446 ** The DMS lock has not yet been taken on shm file pShmNode. Attempt to 4447 ** take it now. Return SQLITE_OK if successful, or an SQLite error 4448 ** code otherwise. 4449 ** 4450 ** If the DMS cannot be locked because this is a readonly_shm=1 4451 ** connection and no other process already holds a lock, return 4452 ** SQLITE_READONLY_CANTINIT and set pShmNode->isUnlocked=1. 4453 */ 4454 static int unixLockSharedMemory(unixFile *pDbFd, unixShmNode *pShmNode){ 4455 struct flock lock; 4456 int rc = SQLITE_OK; 4457 4458 /* Use F_GETLK to determine the locks other processes are holding 4459 ** on the DMS byte. If it indicates that another process is holding 4460 ** a SHARED lock, then this process may also take a SHARED lock 4461 ** and proceed with opening the *-shm file. 4462 ** 4463 ** Or, if no other process is holding any lock, then this process 4464 ** is the first to open it. In this case take an EXCLUSIVE lock on the 4465 ** DMS byte and truncate the *-shm file to zero bytes in size. Then 4466 ** downgrade to a SHARED lock on the DMS byte. 4467 ** 4468 ** If another process is holding an EXCLUSIVE lock on the DMS byte, 4469 ** return SQLITE_BUSY to the caller (it will try again). An earlier 4470 ** version of this code attempted the SHARED lock at this point. But 4471 ** this introduced a subtle race condition: if the process holding 4472 ** EXCLUSIVE failed just before truncating the *-shm file, then this 4473 ** process might open and use the *-shm file without truncating it. 4474 ** And if the *-shm file has been corrupted by a power failure or 4475 ** system crash, the database itself may also become corrupt. */ 4476 lock.l_whence = SEEK_SET; 4477 lock.l_start = UNIX_SHM_DMS; 4478 lock.l_len = 1; 4479 lock.l_type = F_WRLCK; 4480 if( osFcntl(pShmNode->hShm, F_GETLK, &lock)!=0 ) { 4481 rc = SQLITE_IOERR_LOCK; 4482 }else if( lock.l_type==F_UNLCK ){ 4483 if( pShmNode->isReadonly ){ 4484 pShmNode->isUnlocked = 1; 4485 rc = SQLITE_READONLY_CANTINIT; 4486 }else{ 4487 rc = unixShmSystemLock(pDbFd, F_WRLCK, UNIX_SHM_DMS, 1); 4488 /* The first connection to attach must truncate the -shm file. We 4489 ** truncate to 3 bytes (an arbitrary small number, less than the 4490 ** -shm header size) rather than 0 as a system debugging aid, to 4491 ** help detect if a -shm file truncation is legitimate or is the work 4492 ** or a rogue process. */ 4493 if( rc==SQLITE_OK && robust_ftruncate(pShmNode->hShm, 3) ){ 4494 rc = unixLogError(SQLITE_IOERR_SHMOPEN,"ftruncate",pShmNode->zFilename); 4495 } 4496 } 4497 }else if( lock.l_type==F_WRLCK ){ 4498 rc = SQLITE_BUSY; 4499 } 4500 4501 if( rc==SQLITE_OK ){ 4502 assert( lock.l_type==F_UNLCK || lock.l_type==F_RDLCK ); 4503 rc = unixShmSystemLock(pDbFd, F_RDLCK, UNIX_SHM_DMS, 1); 4504 } 4505 return rc; 4506 } 4507 4508 /* 4509 ** Open a shared-memory area associated with open database file pDbFd. 4510 ** This particular implementation uses mmapped files. 4511 ** 4512 ** The file used to implement shared-memory is in the same directory 4513 ** as the open database file and has the same name as the open database 4514 ** file with the "-shm" suffix added. For example, if the database file 4515 ** is "/home/user1/config.db" then the file that is created and mmapped 4516 ** for shared memory will be called "/home/user1/config.db-shm". 4517 ** 4518 ** Another approach to is to use files in /dev/shm or /dev/tmp or an 4519 ** some other tmpfs mount. But if a file in a different directory 4520 ** from the database file is used, then differing access permissions 4521 ** or a chroot() might cause two different processes on the same 4522 ** database to end up using different files for shared memory - 4523 ** meaning that their memory would not really be shared - resulting 4524 ** in database corruption. Nevertheless, this tmpfs file usage 4525 ** can be enabled at compile-time using -DSQLITE_SHM_DIRECTORY="/dev/shm" 4526 ** or the equivalent. The use of the SQLITE_SHM_DIRECTORY compile-time 4527 ** option results in an incompatible build of SQLite; builds of SQLite 4528 ** that with differing SQLITE_SHM_DIRECTORY settings attempt to use the 4529 ** same database file at the same time, database corruption will likely 4530 ** result. The SQLITE_SHM_DIRECTORY compile-time option is considered 4531 ** "unsupported" and may go away in a future SQLite release. 4532 ** 4533 ** When opening a new shared-memory file, if no other instances of that 4534 ** file are currently open, in this process or in other processes, then 4535 ** the file must be truncated to zero length or have its header cleared. 4536 ** 4537 ** If the original database file (pDbFd) is using the "unix-excl" VFS 4538 ** that means that an exclusive lock is held on the database file and 4539 ** that no other processes are able to read or write the database. In 4540 ** that case, we do not really need shared memory. No shared memory 4541 ** file is created. The shared memory will be simulated with heap memory. 4542 */ 4543 static int unixOpenSharedMemory(unixFile *pDbFd){ 4544 struct unixShm *p = 0; /* The connection to be opened */ 4545 struct unixShmNode *pShmNode; /* The underlying mmapped file */ 4546 int rc = SQLITE_OK; /* Result code */ 4547 unixInodeInfo *pInode; /* The inode of fd */ 4548 char *zShm; /* Name of the file used for SHM */ 4549 int nShmFilename; /* Size of the SHM filename in bytes */ 4550 4551 /* Allocate space for the new unixShm object. */ 4552 p = sqlite3_malloc64( sizeof(*p) ); 4553 if( p==0 ) return SQLITE_NOMEM_BKPT; 4554 memset(p, 0, sizeof(*p)); 4555 assert( pDbFd->pShm==0 ); 4556 4557 /* Check to see if a unixShmNode object already exists. Reuse an existing 4558 ** one if present. Create a new one if necessary. 4559 */ 4560 assert( unixFileMutexNotheld(pDbFd) ); 4561 unixEnterMutex(); 4562 pInode = pDbFd->pInode; 4563 pShmNode = pInode->pShmNode; 4564 if( pShmNode==0 ){ 4565 struct stat sStat; /* fstat() info for database file */ 4566 #ifndef SQLITE_SHM_DIRECTORY 4567 const char *zBasePath = pDbFd->zPath; 4568 #endif 4569 4570 /* Call fstat() to figure out the permissions on the database file. If 4571 ** a new *-shm file is created, an attempt will be made to create it 4572 ** with the same permissions. 4573 */ 4574 if( osFstat(pDbFd->h, &sStat) ){ 4575 rc = SQLITE_IOERR_FSTAT; 4576 goto shm_open_err; 4577 } 4578 4579 #ifdef SQLITE_SHM_DIRECTORY 4580 nShmFilename = sizeof(SQLITE_SHM_DIRECTORY) + 31; 4581 #else 4582 nShmFilename = 6 + (int)strlen(zBasePath); 4583 #endif 4584 pShmNode = sqlite3_malloc64( sizeof(*pShmNode) + nShmFilename ); 4585 if( pShmNode==0 ){ 4586 rc = SQLITE_NOMEM_BKPT; 4587 goto shm_open_err; 4588 } 4589 memset(pShmNode, 0, sizeof(*pShmNode)+nShmFilename); 4590 zShm = pShmNode->zFilename = (char*)&pShmNode[1]; 4591 #ifdef SQLITE_SHM_DIRECTORY 4592 sqlite3_snprintf(nShmFilename, zShm, 4593 SQLITE_SHM_DIRECTORY "/sqlite-shm-%x-%x", 4594 (u32)sStat.st_ino, (u32)sStat.st_dev); 4595 #else 4596 sqlite3_snprintf(nShmFilename, zShm, "%s-shm", zBasePath); 4597 sqlite3FileSuffix3(pDbFd->zPath, zShm); 4598 #endif 4599 pShmNode->hShm = -1; 4600 pDbFd->pInode->pShmNode = pShmNode; 4601 pShmNode->pInode = pDbFd->pInode; 4602 if( sqlite3GlobalConfig.bCoreMutex ){ 4603 pShmNode->pShmMutex = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST); 4604 if( pShmNode->pShmMutex==0 ){ 4605 rc = SQLITE_NOMEM_BKPT; 4606 goto shm_open_err; 4607 } 4608 } 4609 4610 if( pInode->bProcessLock==0 ){ 4611 if( 0==sqlite3_uri_boolean(pDbFd->zPath, "readonly_shm", 0) ){ 4612 pShmNode->hShm = robust_open(zShm, O_RDWR|O_CREAT|O_NOFOLLOW, 4613 (sStat.st_mode&0777)); 4614 } 4615 if( pShmNode->hShm<0 ){ 4616 pShmNode->hShm = robust_open(zShm, O_RDONLY|O_NOFOLLOW, 4617 (sStat.st_mode&0777)); 4618 if( pShmNode->hShm<0 ){ 4619 rc = unixLogError(SQLITE_CANTOPEN_BKPT, "open", zShm); 4620 goto shm_open_err; 4621 } 4622 pShmNode->isReadonly = 1; 4623 } 4624 4625 /* If this process is running as root, make sure that the SHM file 4626 ** is owned by the same user that owns the original database. Otherwise, 4627 ** the original owner will not be able to connect. 4628 */ 4629 robustFchown(pShmNode->hShm, sStat.st_uid, sStat.st_gid); 4630 4631 rc = unixLockSharedMemory(pDbFd, pShmNode); 4632 if( rc!=SQLITE_OK && rc!=SQLITE_READONLY_CANTINIT ) goto shm_open_err; 4633 } 4634 } 4635 4636 /* Make the new connection a child of the unixShmNode */ 4637 p->pShmNode = pShmNode; 4638 #ifdef SQLITE_DEBUG 4639 p->id = pShmNode->nextShmId++; 4640 #endif 4641 pShmNode->nRef++; 4642 pDbFd->pShm = p; 4643 unixLeaveMutex(); 4644 4645 /* The reference count on pShmNode has already been incremented under 4646 ** the cover of the unixEnterMutex() mutex and the pointer from the 4647 ** new (struct unixShm) object to the pShmNode has been set. All that is 4648 ** left to do is to link the new object into the linked list starting 4649 ** at pShmNode->pFirst. This must be done while holding the 4650 ** pShmNode->pShmMutex. 4651 */ 4652 sqlite3_mutex_enter(pShmNode->pShmMutex); 4653 p->pNext = pShmNode->pFirst; 4654 pShmNode->pFirst = p; 4655 sqlite3_mutex_leave(pShmNode->pShmMutex); 4656 return rc; 4657 4658 /* Jump here on any error */ 4659 shm_open_err: 4660 unixShmPurge(pDbFd); /* This call frees pShmNode if required */ 4661 sqlite3_free(p); 4662 unixLeaveMutex(); 4663 return rc; 4664 } 4665 4666 /* 4667 ** This function is called to obtain a pointer to region iRegion of the 4668 ** shared-memory associated with the database file fd. Shared-memory regions 4669 ** are numbered starting from zero. Each shared-memory region is szRegion 4670 ** bytes in size. 4671 ** 4672 ** If an error occurs, an error code is returned and *pp is set to NULL. 4673 ** 4674 ** Otherwise, if the bExtend parameter is 0 and the requested shared-memory 4675 ** region has not been allocated (by any client, including one running in a 4676 ** separate process), then *pp is set to NULL and SQLITE_OK returned. If 4677 ** bExtend is non-zero and the requested shared-memory region has not yet 4678 ** been allocated, it is allocated by this function. 4679 ** 4680 ** If the shared-memory region has already been allocated or is allocated by 4681 ** this call as described above, then it is mapped into this processes 4682 ** address space (if it is not already), *pp is set to point to the mapped 4683 ** memory and SQLITE_OK returned. 4684 */ 4685 static int unixShmMap( 4686 sqlite3_file *fd, /* Handle open on database file */ 4687 int iRegion, /* Region to retrieve */ 4688 int szRegion, /* Size of regions */ 4689 int bExtend, /* True to extend file if necessary */ 4690 void volatile **pp /* OUT: Mapped memory */ 4691 ){ 4692 unixFile *pDbFd = (unixFile*)fd; 4693 unixShm *p; 4694 unixShmNode *pShmNode; 4695 int rc = SQLITE_OK; 4696 int nShmPerMap = unixShmRegionPerMap(); 4697 int nReqRegion; 4698 4699 /* If the shared-memory file has not yet been opened, open it now. */ 4700 if( pDbFd->pShm==0 ){ 4701 rc = unixOpenSharedMemory(pDbFd); 4702 if( rc!=SQLITE_OK ) return rc; 4703 } 4704 4705 p = pDbFd->pShm; 4706 pShmNode = p->pShmNode; 4707 sqlite3_mutex_enter(pShmNode->pShmMutex); 4708 if( pShmNode->isUnlocked ){ 4709 rc = unixLockSharedMemory(pDbFd, pShmNode); 4710 if( rc!=SQLITE_OK ) goto shmpage_out; 4711 pShmNode->isUnlocked = 0; 4712 } 4713 assert( szRegion==pShmNode->szRegion || pShmNode->nRegion==0 ); 4714 assert( pShmNode->pInode==pDbFd->pInode ); 4715 assert( pShmNode->hShm>=0 || pDbFd->pInode->bProcessLock==1 ); 4716 assert( pShmNode->hShm<0 || pDbFd->pInode->bProcessLock==0 ); 4717 4718 /* Minimum number of regions required to be mapped. */ 4719 nReqRegion = ((iRegion+nShmPerMap) / nShmPerMap) * nShmPerMap; 4720 4721 if( pShmNode->nRegion<nReqRegion ){ 4722 char **apNew; /* New apRegion[] array */ 4723 int nByte = nReqRegion*szRegion; /* Minimum required file size */ 4724 struct stat sStat; /* Used by fstat() */ 4725 4726 pShmNode->szRegion = szRegion; 4727 4728 if( pShmNode->hShm>=0 ){ 4729 /* The requested region is not mapped into this processes address space. 4730 ** Check to see if it has been allocated (i.e. if the wal-index file is 4731 ** large enough to contain the requested region). 4732 */ 4733 if( osFstat(pShmNode->hShm, &sStat) ){ 4734 rc = SQLITE_IOERR_SHMSIZE; 4735 goto shmpage_out; 4736 } 4737 4738 if( sStat.st_size<nByte ){ 4739 /* The requested memory region does not exist. If bExtend is set to 4740 ** false, exit early. *pp will be set to NULL and SQLITE_OK returned. 4741 */ 4742 if( !bExtend ){ 4743 goto shmpage_out; 4744 } 4745 4746 /* Alternatively, if bExtend is true, extend the file. Do this by 4747 ** writing a single byte to the end of each (OS) page being 4748 ** allocated or extended. Technically, we need only write to the 4749 ** last page in order to extend the file. But writing to all new 4750 ** pages forces the OS to allocate them immediately, which reduces 4751 ** the chances of SIGBUS while accessing the mapped region later on. 4752 */ 4753 else{ 4754 static const int pgsz = 4096; 4755 int iPg; 4756 4757 /* Write to the last byte of each newly allocated or extended page */ 4758 assert( (nByte % pgsz)==0 ); 4759 for(iPg=(sStat.st_size/pgsz); iPg<(nByte/pgsz); iPg++){ 4760 int x = 0; 4761 if( seekAndWriteFd(pShmNode->hShm, iPg*pgsz + pgsz-1,"",1,&x)!=1 ){ 4762 const char *zFile = pShmNode->zFilename; 4763 rc = unixLogError(SQLITE_IOERR_SHMSIZE, "write", zFile); 4764 goto shmpage_out; 4765 } 4766 } 4767 } 4768 } 4769 } 4770 4771 /* Map the requested memory region into this processes address space. */ 4772 apNew = (char **)sqlite3_realloc( 4773 pShmNode->apRegion, nReqRegion*sizeof(char *) 4774 ); 4775 if( !apNew ){ 4776 rc = SQLITE_IOERR_NOMEM_BKPT; 4777 goto shmpage_out; 4778 } 4779 pShmNode->apRegion = apNew; 4780 while( pShmNode->nRegion<nReqRegion ){ 4781 int nMap = szRegion*nShmPerMap; 4782 int i; 4783 void *pMem; 4784 if( pShmNode->hShm>=0 ){ 4785 pMem = osMmap(0, nMap, 4786 pShmNode->isReadonly ? PROT_READ : PROT_READ|PROT_WRITE, 4787 MAP_SHARED, pShmNode->hShm, szRegion*(i64)pShmNode->nRegion 4788 ); 4789 if( pMem==MAP_FAILED ){ 4790 rc = unixLogError(SQLITE_IOERR_SHMMAP, "mmap", pShmNode->zFilename); 4791 goto shmpage_out; 4792 } 4793 }else{ 4794 pMem = sqlite3_malloc64(nMap); 4795 if( pMem==0 ){ 4796 rc = SQLITE_NOMEM_BKPT; 4797 goto shmpage_out; 4798 } 4799 memset(pMem, 0, nMap); 4800 } 4801 4802 for(i=0; i<nShmPerMap; i++){ 4803 pShmNode->apRegion[pShmNode->nRegion+i] = &((char*)pMem)[szRegion*i]; 4804 } 4805 pShmNode->nRegion += nShmPerMap; 4806 } 4807 } 4808 4809 shmpage_out: 4810 if( pShmNode->nRegion>iRegion ){ 4811 *pp = pShmNode->apRegion[iRegion]; 4812 }else{ 4813 *pp = 0; 4814 } 4815 if( pShmNode->isReadonly && rc==SQLITE_OK ) rc = SQLITE_READONLY; 4816 sqlite3_mutex_leave(pShmNode->pShmMutex); 4817 return rc; 4818 } 4819 4820 /* 4821 ** Check that the pShmNode->aLock[] array comports with the locking bitmasks 4822 ** held by each client. Return true if it does, or false otherwise. This 4823 ** is to be used in an assert(). e.g. 4824 ** 4825 ** assert( assertLockingArrayOk(pShmNode) ); 4826 */ 4827 #ifdef SQLITE_DEBUG 4828 static int assertLockingArrayOk(unixShmNode *pShmNode){ 4829 unixShm *pX; 4830 int aLock[SQLITE_SHM_NLOCK]; 4831 assert( sqlite3_mutex_held(pShmNode->pShmMutex) ); 4832 4833 memset(aLock, 0, sizeof(aLock)); 4834 for(pX=pShmNode->pFirst; pX; pX=pX->pNext){ 4835 int i; 4836 for(i=0; i<SQLITE_SHM_NLOCK; i++){ 4837 if( pX->exclMask & (1<<i) ){ 4838 assert( aLock[i]==0 ); 4839 aLock[i] = -1; 4840 }else if( pX->sharedMask & (1<<i) ){ 4841 assert( aLock[i]>=0 ); 4842 aLock[i]++; 4843 } 4844 } 4845 } 4846 4847 assert( 0==memcmp(pShmNode->aLock, aLock, sizeof(aLock)) ); 4848 return (memcmp(pShmNode->aLock, aLock, sizeof(aLock))==0); 4849 } 4850 #endif 4851 4852 /* 4853 ** Change the lock state for a shared-memory segment. 4854 ** 4855 ** Note that the relationship between SHAREd and EXCLUSIVE locks is a little 4856 ** different here than in posix. In xShmLock(), one can go from unlocked 4857 ** to shared and back or from unlocked to exclusive and back. But one may 4858 ** not go from shared to exclusive or from exclusive to shared. 4859 */ 4860 static int unixShmLock( 4861 sqlite3_file *fd, /* Database file holding the shared memory */ 4862 int ofst, /* First lock to acquire or release */ 4863 int n, /* Number of locks to acquire or release */ 4864 int flags /* What to do with the lock */ 4865 ){ 4866 unixFile *pDbFd = (unixFile*)fd; /* Connection holding shared memory */ 4867 unixShm *p = pDbFd->pShm; /* The shared memory being locked */ 4868 unixShmNode *pShmNode = p->pShmNode; /* The underlying file iNode */ 4869 int rc = SQLITE_OK; /* Result code */ 4870 u16 mask; /* Mask of locks to take or release */ 4871 int *aLock = pShmNode->aLock; 4872 4873 assert( pShmNode==pDbFd->pInode->pShmNode ); 4874 assert( pShmNode->pInode==pDbFd->pInode ); 4875 assert( ofst>=0 && ofst+n<=SQLITE_SHM_NLOCK ); 4876 assert( n>=1 ); 4877 assert( flags==(SQLITE_SHM_LOCK | SQLITE_SHM_SHARED) 4878 || flags==(SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE) 4879 || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED) 4880 || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE) ); 4881 assert( n==1 || (flags & SQLITE_SHM_EXCLUSIVE)!=0 ); 4882 assert( pShmNode->hShm>=0 || pDbFd->pInode->bProcessLock==1 ); 4883 assert( pShmNode->hShm<0 || pDbFd->pInode->bProcessLock==0 ); 4884 4885 /* Check that, if this to be a blocking lock, no locks that occur later 4886 ** in the following list than the lock being obtained are already held: 4887 ** 4888 ** 1. Checkpointer lock (ofst==1). 4889 ** 2. Write lock (ofst==0). 4890 ** 3. Read locks (ofst>=3 && ofst<SQLITE_SHM_NLOCK). 4891 ** 4892 ** In other words, if this is a blocking lock, none of the locks that 4893 ** occur later in the above list than the lock being obtained may be 4894 ** held. 4895 ** 4896 ** It is not permitted to block on the RECOVER lock. 4897 */ 4898 #ifdef SQLITE_ENABLE_SETLK_TIMEOUT 4899 assert( (flags & SQLITE_SHM_UNLOCK) || pDbFd->iBusyTimeout==0 || ( 4900 (ofst!=2) /* not RECOVER */ 4901 && (ofst!=1 || (p->exclMask|p->sharedMask)==0) 4902 && (ofst!=0 || (p->exclMask|p->sharedMask)<3) 4903 && (ofst<3 || (p->exclMask|p->sharedMask)<(1<<ofst)) 4904 )); 4905 #endif 4906 4907 mask = (1<<(ofst+n)) - (1<<ofst); 4908 assert( n>1 || mask==(1<<ofst) ); 4909 sqlite3_mutex_enter(pShmNode->pShmMutex); 4910 assert( assertLockingArrayOk(pShmNode) ); 4911 if( flags & SQLITE_SHM_UNLOCK ){ 4912 if( (p->exclMask|p->sharedMask) & mask ){ 4913 int ii; 4914 int bUnlock = 1; 4915 4916 for(ii=ofst; ii<ofst+n; ii++){ 4917 if( aLock[ii]>((p->sharedMask & (1<<ii)) ? 1 : 0) ){ 4918 bUnlock = 0; 4919 } 4920 } 4921 4922 if( bUnlock ){ 4923 rc = unixShmSystemLock(pDbFd, F_UNLCK, ofst+UNIX_SHM_BASE, n); 4924 if( rc==SQLITE_OK ){ 4925 memset(&aLock[ofst], 0, sizeof(int)*n); 4926 } 4927 }else if( ALWAYS(p->sharedMask & (1<<ofst)) ){ 4928 assert( n==1 && aLock[ofst]>1 ); 4929 aLock[ofst]--; 4930 } 4931 4932 /* Undo the local locks */ 4933 if( rc==SQLITE_OK ){ 4934 p->exclMask &= ~mask; 4935 p->sharedMask &= ~mask; 4936 } 4937 } 4938 }else if( flags & SQLITE_SHM_SHARED ){ 4939 assert( n==1 ); 4940 assert( (p->exclMask & (1<<ofst))==0 ); 4941 if( (p->sharedMask & mask)==0 ){ 4942 if( aLock[ofst]<0 ){ 4943 rc = SQLITE_BUSY; 4944 }else if( aLock[ofst]==0 ){ 4945 rc = unixShmSystemLock(pDbFd, F_RDLCK, ofst+UNIX_SHM_BASE, n); 4946 } 4947 4948 /* Get the local shared locks */ 4949 if( rc==SQLITE_OK ){ 4950 p->sharedMask |= mask; 4951 aLock[ofst]++; 4952 } 4953 } 4954 }else{ 4955 /* Make sure no sibling connections hold locks that will block this 4956 ** lock. If any do, return SQLITE_BUSY right away. */ 4957 int ii; 4958 for(ii=ofst; ii<ofst+n; ii++){ 4959 assert( (p->sharedMask & mask)==0 ); 4960 if( ALWAYS((p->exclMask & (1<<ii))==0) && aLock[ii] ){ 4961 rc = SQLITE_BUSY; 4962 break; 4963 } 4964 } 4965 4966 /* Get the exclusive locks at the system level. Then if successful 4967 ** also update the in-memory values. */ 4968 if( rc==SQLITE_OK ){ 4969 rc = unixShmSystemLock(pDbFd, F_WRLCK, ofst+UNIX_SHM_BASE, n); 4970 if( rc==SQLITE_OK ){ 4971 assert( (p->sharedMask & mask)==0 ); 4972 p->exclMask |= mask; 4973 for(ii=ofst; ii<ofst+n; ii++){ 4974 aLock[ii] = -1; 4975 } 4976 } 4977 } 4978 } 4979 assert( assertLockingArrayOk(pShmNode) ); 4980 sqlite3_mutex_leave(pShmNode->pShmMutex); 4981 OSTRACE(("SHM-LOCK shmid-%d, pid-%d got %03x,%03x\n", 4982 p->id, osGetpid(0), p->sharedMask, p->exclMask)); 4983 return rc; 4984 } 4985 4986 /* 4987 ** Implement a memory barrier or memory fence on shared memory. 4988 ** 4989 ** All loads and stores begun before the barrier must complete before 4990 ** any load or store begun after the barrier. 4991 */ 4992 static void unixShmBarrier( 4993 sqlite3_file *fd /* Database file holding the shared memory */ 4994 ){ 4995 UNUSED_PARAMETER(fd); 4996 sqlite3MemoryBarrier(); /* compiler-defined memory barrier */ 4997 assert( fd->pMethods->xLock==nolockLock 4998 || unixFileMutexNotheld((unixFile*)fd) 4999 ); 5000 unixEnterMutex(); /* Also mutex, for redundancy */ 5001 unixLeaveMutex(); 5002 } 5003 5004 /* 5005 ** Close a connection to shared-memory. Delete the underlying 5006 ** storage if deleteFlag is true. 5007 ** 5008 ** If there is no shared memory associated with the connection then this 5009 ** routine is a harmless no-op. 5010 */ 5011 static int unixShmUnmap( 5012 sqlite3_file *fd, /* The underlying database file */ 5013 int deleteFlag /* Delete shared-memory if true */ 5014 ){ 5015 unixShm *p; /* The connection to be closed */ 5016 unixShmNode *pShmNode; /* The underlying shared-memory file */ 5017 unixShm **pp; /* For looping over sibling connections */ 5018 unixFile *pDbFd; /* The underlying database file */ 5019 5020 pDbFd = (unixFile*)fd; 5021 p = pDbFd->pShm; 5022 if( p==0 ) return SQLITE_OK; 5023 pShmNode = p->pShmNode; 5024 5025 assert( pShmNode==pDbFd->pInode->pShmNode ); 5026 assert( pShmNode->pInode==pDbFd->pInode ); 5027 5028 /* Remove connection p from the set of connections associated 5029 ** with pShmNode */ 5030 sqlite3_mutex_enter(pShmNode->pShmMutex); 5031 for(pp=&pShmNode->pFirst; (*pp)!=p; pp = &(*pp)->pNext){} 5032 *pp = p->pNext; 5033 5034 /* Free the connection p */ 5035 sqlite3_free(p); 5036 pDbFd->pShm = 0; 5037 sqlite3_mutex_leave(pShmNode->pShmMutex); 5038 5039 /* If pShmNode->nRef has reached 0, then close the underlying 5040 ** shared-memory file, too */ 5041 assert( unixFileMutexNotheld(pDbFd) ); 5042 unixEnterMutex(); 5043 assert( pShmNode->nRef>0 ); 5044 pShmNode->nRef--; 5045 if( pShmNode->nRef==0 ){ 5046 if( deleteFlag && pShmNode->hShm>=0 ){ 5047 osUnlink(pShmNode->zFilename); 5048 } 5049 unixShmPurge(pDbFd); 5050 } 5051 unixLeaveMutex(); 5052 5053 return SQLITE_OK; 5054 } 5055 5056 5057 #else 5058 # define unixShmMap 0 5059 # define unixShmLock 0 5060 # define unixShmBarrier 0 5061 # define unixShmUnmap 0 5062 #endif /* #ifndef SQLITE_OMIT_WAL */ 5063 5064 #if SQLITE_MAX_MMAP_SIZE>0 5065 /* 5066 ** If it is currently memory mapped, unmap file pFd. 5067 */ 5068 static void unixUnmapfile(unixFile *pFd){ 5069 assert( pFd->nFetchOut==0 ); 5070 if( pFd->pMapRegion ){ 5071 osMunmap(pFd->pMapRegion, pFd->mmapSizeActual); 5072 pFd->pMapRegion = 0; 5073 pFd->mmapSize = 0; 5074 pFd->mmapSizeActual = 0; 5075 } 5076 } 5077 5078 /* 5079 ** Attempt to set the size of the memory mapping maintained by file 5080 ** descriptor pFd to nNew bytes. Any existing mapping is discarded. 5081 ** 5082 ** If successful, this function sets the following variables: 5083 ** 5084 ** unixFile.pMapRegion 5085 ** unixFile.mmapSize 5086 ** unixFile.mmapSizeActual 5087 ** 5088 ** If unsuccessful, an error message is logged via sqlite3_log() and 5089 ** the three variables above are zeroed. In this case SQLite should 5090 ** continue accessing the database using the xRead() and xWrite() 5091 ** methods. 5092 */ 5093 static void unixRemapfile( 5094 unixFile *pFd, /* File descriptor object */ 5095 i64 nNew /* Required mapping size */ 5096 ){ 5097 const char *zErr = "mmap"; 5098 int h = pFd->h; /* File descriptor open on db file */ 5099 u8 *pOrig = (u8 *)pFd->pMapRegion; /* Pointer to current file mapping */ 5100 i64 nOrig = pFd->mmapSizeActual; /* Size of pOrig region in bytes */ 5101 u8 *pNew = 0; /* Location of new mapping */ 5102 int flags = PROT_READ; /* Flags to pass to mmap() */ 5103 5104 assert( pFd->nFetchOut==0 ); 5105 assert( nNew>pFd->mmapSize ); 5106 assert( nNew<=pFd->mmapSizeMax ); 5107 assert( nNew>0 ); 5108 assert( pFd->mmapSizeActual>=pFd->mmapSize ); 5109 assert( MAP_FAILED!=0 ); 5110 5111 #ifdef SQLITE_MMAP_READWRITE 5112 if( (pFd->ctrlFlags & UNIXFILE_RDONLY)==0 ) flags |= PROT_WRITE; 5113 #endif 5114 5115 if( pOrig ){ 5116 #if HAVE_MREMAP 5117 i64 nReuse = pFd->mmapSize; 5118 #else 5119 const int szSyspage = osGetpagesize(); 5120 i64 nReuse = (pFd->mmapSize & ~(szSyspage-1)); 5121 #endif 5122 u8 *pReq = &pOrig[nReuse]; 5123 5124 /* Unmap any pages of the existing mapping that cannot be reused. */ 5125 if( nReuse!=nOrig ){ 5126 osMunmap(pReq, nOrig-nReuse); 5127 } 5128 5129 #if HAVE_MREMAP 5130 pNew = osMremap(pOrig, nReuse, nNew, MREMAP_MAYMOVE); 5131 zErr = "mremap"; 5132 #else 5133 pNew = osMmap(pReq, nNew-nReuse, flags, MAP_SHARED, h, nReuse); 5134 if( pNew!=MAP_FAILED ){ 5135 if( pNew!=pReq ){ 5136 osMunmap(pNew, nNew - nReuse); 5137 pNew = 0; 5138 }else{ 5139 pNew = pOrig; 5140 } 5141 } 5142 #endif 5143 5144 /* The attempt to extend the existing mapping failed. Free it. */ 5145 if( pNew==MAP_FAILED || pNew==0 ){ 5146 osMunmap(pOrig, nReuse); 5147 } 5148 } 5149 5150 /* If pNew is still NULL, try to create an entirely new mapping. */ 5151 if( pNew==0 ){ 5152 pNew = osMmap(0, nNew, flags, MAP_SHARED, h, 0); 5153 } 5154 5155 if( pNew==MAP_FAILED ){ 5156 pNew = 0; 5157 nNew = 0; 5158 unixLogError(SQLITE_OK, zErr, pFd->zPath); 5159 5160 /* If the mmap() above failed, assume that all subsequent mmap() calls 5161 ** will probably fail too. Fall back to using xRead/xWrite exclusively 5162 ** in this case. */ 5163 pFd->mmapSizeMax = 0; 5164 } 5165 pFd->pMapRegion = (void *)pNew; 5166 pFd->mmapSize = pFd->mmapSizeActual = nNew; 5167 } 5168 5169 /* 5170 ** Memory map or remap the file opened by file-descriptor pFd (if the file 5171 ** is already mapped, the existing mapping is replaced by the new). Or, if 5172 ** there already exists a mapping for this file, and there are still 5173 ** outstanding xFetch() references to it, this function is a no-op. 5174 ** 5175 ** If parameter nByte is non-negative, then it is the requested size of 5176 ** the mapping to create. Otherwise, if nByte is less than zero, then the 5177 ** requested size is the size of the file on disk. The actual size of the 5178 ** created mapping is either the requested size or the value configured 5179 ** using SQLITE_FCNTL_MMAP_LIMIT, whichever is smaller. 5180 ** 5181 ** SQLITE_OK is returned if no error occurs (even if the mapping is not 5182 ** recreated as a result of outstanding references) or an SQLite error 5183 ** code otherwise. 5184 */ 5185 static int unixMapfile(unixFile *pFd, i64 nMap){ 5186 assert( nMap>=0 || pFd->nFetchOut==0 ); 5187 assert( nMap>0 || (pFd->mmapSize==0 && pFd->pMapRegion==0) ); 5188 if( pFd->nFetchOut>0 ) return SQLITE_OK; 5189 5190 if( nMap<0 ){ 5191 struct stat statbuf; /* Low-level file information */ 5192 if( osFstat(pFd->h, &statbuf) ){ 5193 return SQLITE_IOERR_FSTAT; 5194 } 5195 nMap = statbuf.st_size; 5196 } 5197 if( nMap>pFd->mmapSizeMax ){ 5198 nMap = pFd->mmapSizeMax; 5199 } 5200 5201 assert( nMap>0 || (pFd->mmapSize==0 && pFd->pMapRegion==0) ); 5202 if( nMap!=pFd->mmapSize ){ 5203 unixRemapfile(pFd, nMap); 5204 } 5205 5206 return SQLITE_OK; 5207 } 5208 #endif /* SQLITE_MAX_MMAP_SIZE>0 */ 5209 5210 /* 5211 ** If possible, return a pointer to a mapping of file fd starting at offset 5212 ** iOff. The mapping must be valid for at least nAmt bytes. 5213 ** 5214 ** If such a pointer can be obtained, store it in *pp and return SQLITE_OK. 5215 ** Or, if one cannot but no error occurs, set *pp to 0 and return SQLITE_OK. 5216 ** Finally, if an error does occur, return an SQLite error code. The final 5217 ** value of *pp is undefined in this case. 5218 ** 5219 ** If this function does return a pointer, the caller must eventually 5220 ** release the reference by calling unixUnfetch(). 5221 */ 5222 static int unixFetch(sqlite3_file *fd, i64 iOff, int nAmt, void **pp){ 5223 #if SQLITE_MAX_MMAP_SIZE>0 5224 unixFile *pFd = (unixFile *)fd; /* The underlying database file */ 5225 #endif 5226 *pp = 0; 5227 5228 #if SQLITE_MAX_MMAP_SIZE>0 5229 if( pFd->mmapSizeMax>0 ){ 5230 if( pFd->pMapRegion==0 ){ 5231 int rc = unixMapfile(pFd, -1); 5232 if( rc!=SQLITE_OK ) return rc; 5233 } 5234 if( pFd->mmapSize >= iOff+nAmt ){ 5235 *pp = &((u8 *)pFd->pMapRegion)[iOff]; 5236 pFd->nFetchOut++; 5237 } 5238 } 5239 #endif 5240 return SQLITE_OK; 5241 } 5242 5243 /* 5244 ** If the third argument is non-NULL, then this function releases a 5245 ** reference obtained by an earlier call to unixFetch(). The second 5246 ** argument passed to this function must be the same as the corresponding 5247 ** argument that was passed to the unixFetch() invocation. 5248 ** 5249 ** Or, if the third argument is NULL, then this function is being called 5250 ** to inform the VFS layer that, according to POSIX, any existing mapping 5251 ** may now be invalid and should be unmapped. 5252 */ 5253 static int unixUnfetch(sqlite3_file *fd, i64 iOff, void *p){ 5254 #if SQLITE_MAX_MMAP_SIZE>0 5255 unixFile *pFd = (unixFile *)fd; /* The underlying database file */ 5256 UNUSED_PARAMETER(iOff); 5257 5258 /* If p==0 (unmap the entire file) then there must be no outstanding 5259 ** xFetch references. Or, if p!=0 (meaning it is an xFetch reference), 5260 ** then there must be at least one outstanding. */ 5261 assert( (p==0)==(pFd->nFetchOut==0) ); 5262 5263 /* If p!=0, it must match the iOff value. */ 5264 assert( p==0 || p==&((u8 *)pFd->pMapRegion)[iOff] ); 5265 5266 if( p ){ 5267 pFd->nFetchOut--; 5268 }else{ 5269 unixUnmapfile(pFd); 5270 } 5271 5272 assert( pFd->nFetchOut>=0 ); 5273 #else 5274 UNUSED_PARAMETER(fd); 5275 UNUSED_PARAMETER(p); 5276 UNUSED_PARAMETER(iOff); 5277 #endif 5278 return SQLITE_OK; 5279 } 5280 5281 /* 5282 ** Here ends the implementation of all sqlite3_file methods. 5283 ** 5284 ********************** End sqlite3_file Methods ******************************* 5285 ******************************************************************************/ 5286 5287 /* 5288 ** This division contains definitions of sqlite3_io_methods objects that 5289 ** implement various file locking strategies. It also contains definitions 5290 ** of "finder" functions. A finder-function is used to locate the appropriate 5291 ** sqlite3_io_methods object for a particular database file. The pAppData 5292 ** field of the sqlite3_vfs VFS objects are initialized to be pointers to 5293 ** the correct finder-function for that VFS. 5294 ** 5295 ** Most finder functions return a pointer to a fixed sqlite3_io_methods 5296 ** object. The only interesting finder-function is autolockIoFinder, which 5297 ** looks at the filesystem type and tries to guess the best locking 5298 ** strategy from that. 5299 ** 5300 ** For finder-function F, two objects are created: 5301 ** 5302 ** (1) The real finder-function named "FImpt()". 5303 ** 5304 ** (2) A constant pointer to this function named just "F". 5305 ** 5306 ** 5307 ** A pointer to the F pointer is used as the pAppData value for VFS 5308 ** objects. We have to do this instead of letting pAppData point 5309 ** directly at the finder-function since C90 rules prevent a void* 5310 ** from be cast into a function pointer. 5311 ** 5312 ** 5313 ** Each instance of this macro generates two objects: 5314 ** 5315 ** * A constant sqlite3_io_methods object call METHOD that has locking 5316 ** methods CLOSE, LOCK, UNLOCK, CKRESLOCK. 5317 ** 5318 ** * An I/O method finder function called FINDER that returns a pointer 5319 ** to the METHOD object in the previous bullet. 5320 */ 5321 #define IOMETHODS(FINDER,METHOD,VERSION,CLOSE,LOCK,UNLOCK,CKLOCK,SHMMAP) \ 5322 static const sqlite3_io_methods METHOD = { \ 5323 VERSION, /* iVersion */ \ 5324 CLOSE, /* xClose */ \ 5325 unixRead, /* xRead */ \ 5326 unixWrite, /* xWrite */ \ 5327 unixTruncate, /* xTruncate */ \ 5328 unixSync, /* xSync */ \ 5329 unixFileSize, /* xFileSize */ \ 5330 LOCK, /* xLock */ \ 5331 UNLOCK, /* xUnlock */ \ 5332 CKLOCK, /* xCheckReservedLock */ \ 5333 unixFileControl, /* xFileControl */ \ 5334 unixSectorSize, /* xSectorSize */ \ 5335 unixDeviceCharacteristics, /* xDeviceCapabilities */ \ 5336 SHMMAP, /* xShmMap */ \ 5337 unixShmLock, /* xShmLock */ \ 5338 unixShmBarrier, /* xShmBarrier */ \ 5339 unixShmUnmap, /* xShmUnmap */ \ 5340 unixFetch, /* xFetch */ \ 5341 unixUnfetch, /* xUnfetch */ \ 5342 }; \ 5343 static const sqlite3_io_methods *FINDER##Impl(const char *z, unixFile *p){ \ 5344 UNUSED_PARAMETER(z); UNUSED_PARAMETER(p); \ 5345 return &METHOD; \ 5346 } \ 5347 static const sqlite3_io_methods *(*const FINDER)(const char*,unixFile *p) \ 5348 = FINDER##Impl; 5349 5350 /* 5351 ** Here are all of the sqlite3_io_methods objects for each of the 5352 ** locking strategies. Functions that return pointers to these methods 5353 ** are also created. 5354 */ 5355 IOMETHODS( 5356 posixIoFinder, /* Finder function name */ 5357 posixIoMethods, /* sqlite3_io_methods object name */ 5358 3, /* shared memory and mmap are enabled */ 5359 unixClose, /* xClose method */ 5360 unixLock, /* xLock method */ 5361 unixUnlock, /* xUnlock method */ 5362 unixCheckReservedLock, /* xCheckReservedLock method */ 5363 unixShmMap /* xShmMap method */ 5364 ) 5365 IOMETHODS( 5366 nolockIoFinder, /* Finder function name */ 5367 nolockIoMethods, /* sqlite3_io_methods object name */ 5368 3, /* shared memory and mmap are enabled */ 5369 nolockClose, /* xClose method */ 5370 nolockLock, /* xLock method */ 5371 nolockUnlock, /* xUnlock method */ 5372 nolockCheckReservedLock, /* xCheckReservedLock method */ 5373 0 /* xShmMap method */ 5374 ) 5375 IOMETHODS( 5376 dotlockIoFinder, /* Finder function name */ 5377 dotlockIoMethods, /* sqlite3_io_methods object name */ 5378 1, /* shared memory is disabled */ 5379 dotlockClose, /* xClose method */ 5380 dotlockLock, /* xLock method */ 5381 dotlockUnlock, /* xUnlock method */ 5382 dotlockCheckReservedLock, /* xCheckReservedLock method */ 5383 0 /* xShmMap method */ 5384 ) 5385 5386 #if SQLITE_ENABLE_LOCKING_STYLE 5387 IOMETHODS( 5388 flockIoFinder, /* Finder function name */ 5389 flockIoMethods, /* sqlite3_io_methods object name */ 5390 1, /* shared memory is disabled */ 5391 flockClose, /* xClose method */ 5392 flockLock, /* xLock method */ 5393 flockUnlock, /* xUnlock method */ 5394 flockCheckReservedLock, /* xCheckReservedLock method */ 5395 0 /* xShmMap method */ 5396 ) 5397 #endif 5398 5399 #if OS_VXWORKS 5400 IOMETHODS( 5401 semIoFinder, /* Finder function name */ 5402 semIoMethods, /* sqlite3_io_methods object name */ 5403 1, /* shared memory is disabled */ 5404 semXClose, /* xClose method */ 5405 semXLock, /* xLock method */ 5406 semXUnlock, /* xUnlock method */ 5407 semXCheckReservedLock, /* xCheckReservedLock method */ 5408 0 /* xShmMap method */ 5409 ) 5410 #endif 5411 5412 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 5413 IOMETHODS( 5414 afpIoFinder, /* Finder function name */ 5415 afpIoMethods, /* sqlite3_io_methods object name */ 5416 1, /* shared memory is disabled */ 5417 afpClose, /* xClose method */ 5418 afpLock, /* xLock method */ 5419 afpUnlock, /* xUnlock method */ 5420 afpCheckReservedLock, /* xCheckReservedLock method */ 5421 0 /* xShmMap method */ 5422 ) 5423 #endif 5424 5425 /* 5426 ** The proxy locking method is a "super-method" in the sense that it 5427 ** opens secondary file descriptors for the conch and lock files and 5428 ** it uses proxy, dot-file, AFP, and flock() locking methods on those 5429 ** secondary files. For this reason, the division that implements 5430 ** proxy locking is located much further down in the file. But we need 5431 ** to go ahead and define the sqlite3_io_methods and finder function 5432 ** for proxy locking here. So we forward declare the I/O methods. 5433 */ 5434 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 5435 static int proxyClose(sqlite3_file*); 5436 static int proxyLock(sqlite3_file*, int); 5437 static int proxyUnlock(sqlite3_file*, int); 5438 static int proxyCheckReservedLock(sqlite3_file*, int*); 5439 IOMETHODS( 5440 proxyIoFinder, /* Finder function name */ 5441 proxyIoMethods, /* sqlite3_io_methods object name */ 5442 1, /* shared memory is disabled */ 5443 proxyClose, /* xClose method */ 5444 proxyLock, /* xLock method */ 5445 proxyUnlock, /* xUnlock method */ 5446 proxyCheckReservedLock, /* xCheckReservedLock method */ 5447 0 /* xShmMap method */ 5448 ) 5449 #endif 5450 5451 /* nfs lockd on OSX 10.3+ doesn't clear write locks when a read lock is set */ 5452 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 5453 IOMETHODS( 5454 nfsIoFinder, /* Finder function name */ 5455 nfsIoMethods, /* sqlite3_io_methods object name */ 5456 1, /* shared memory is disabled */ 5457 unixClose, /* xClose method */ 5458 unixLock, /* xLock method */ 5459 nfsUnlock, /* xUnlock method */ 5460 unixCheckReservedLock, /* xCheckReservedLock method */ 5461 0 /* xShmMap method */ 5462 ) 5463 #endif 5464 5465 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 5466 /* 5467 ** This "finder" function attempts to determine the best locking strategy 5468 ** for the database file "filePath". It then returns the sqlite3_io_methods 5469 ** object that implements that strategy. 5470 ** 5471 ** This is for MacOSX only. 5472 */ 5473 static const sqlite3_io_methods *autolockIoFinderImpl( 5474 const char *filePath, /* name of the database file */ 5475 unixFile *pNew /* open file object for the database file */ 5476 ){ 5477 static const struct Mapping { 5478 const char *zFilesystem; /* Filesystem type name */ 5479 const sqlite3_io_methods *pMethods; /* Appropriate locking method */ 5480 } aMap[] = { 5481 { "hfs", &posixIoMethods }, 5482 { "ufs", &posixIoMethods }, 5483 { "afpfs", &afpIoMethods }, 5484 { "smbfs", &afpIoMethods }, 5485 { "webdav", &nolockIoMethods }, 5486 { 0, 0 } 5487 }; 5488 int i; 5489 struct statfs fsInfo; 5490 struct flock lockInfo; 5491 5492 if( !filePath ){ 5493 /* If filePath==NULL that means we are dealing with a transient file 5494 ** that does not need to be locked. */ 5495 return &nolockIoMethods; 5496 } 5497 if( statfs(filePath, &fsInfo) != -1 ){ 5498 if( fsInfo.f_flags & MNT_RDONLY ){ 5499 return &nolockIoMethods; 5500 } 5501 for(i=0; aMap[i].zFilesystem; i++){ 5502 if( strcmp(fsInfo.f_fstypename, aMap[i].zFilesystem)==0 ){ 5503 return aMap[i].pMethods; 5504 } 5505 } 5506 } 5507 5508 /* Default case. Handles, amongst others, "nfs". 5509 ** Test byte-range lock using fcntl(). If the call succeeds, 5510 ** assume that the file-system supports POSIX style locks. 5511 */ 5512 lockInfo.l_len = 1; 5513 lockInfo.l_start = 0; 5514 lockInfo.l_whence = SEEK_SET; 5515 lockInfo.l_type = F_RDLCK; 5516 if( osFcntl(pNew->h, F_GETLK, &lockInfo)!=-1 ) { 5517 if( strcmp(fsInfo.f_fstypename, "nfs")==0 ){ 5518 return &nfsIoMethods; 5519 } else { 5520 return &posixIoMethods; 5521 } 5522 }else{ 5523 return &dotlockIoMethods; 5524 } 5525 } 5526 static const sqlite3_io_methods 5527 *(*const autolockIoFinder)(const char*,unixFile*) = autolockIoFinderImpl; 5528 5529 #endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ 5530 5531 #if OS_VXWORKS 5532 /* 5533 ** This "finder" function for VxWorks checks to see if posix advisory 5534 ** locking works. If it does, then that is what is used. If it does not 5535 ** work, then fallback to named semaphore locking. 5536 */ 5537 static const sqlite3_io_methods *vxworksIoFinderImpl( 5538 const char *filePath, /* name of the database file */ 5539 unixFile *pNew /* the open file object */ 5540 ){ 5541 struct flock lockInfo; 5542 5543 if( !filePath ){ 5544 /* If filePath==NULL that means we are dealing with a transient file 5545 ** that does not need to be locked. */ 5546 return &nolockIoMethods; 5547 } 5548 5549 /* Test if fcntl() is supported and use POSIX style locks. 5550 ** Otherwise fall back to the named semaphore method. 5551 */ 5552 lockInfo.l_len = 1; 5553 lockInfo.l_start = 0; 5554 lockInfo.l_whence = SEEK_SET; 5555 lockInfo.l_type = F_RDLCK; 5556 if( osFcntl(pNew->h, F_GETLK, &lockInfo)!=-1 ) { 5557 return &posixIoMethods; 5558 }else{ 5559 return &semIoMethods; 5560 } 5561 } 5562 static const sqlite3_io_methods 5563 *(*const vxworksIoFinder)(const char*,unixFile*) = vxworksIoFinderImpl; 5564 5565 #endif /* OS_VXWORKS */ 5566 5567 /* 5568 ** An abstract type for a pointer to an IO method finder function: 5569 */ 5570 typedef const sqlite3_io_methods *(*finder_type)(const char*,unixFile*); 5571 5572 5573 /**************************************************************************** 5574 **************************** sqlite3_vfs methods **************************** 5575 ** 5576 ** This division contains the implementation of methods on the 5577 ** sqlite3_vfs object. 5578 */ 5579 5580 /* 5581 ** Initialize the contents of the unixFile structure pointed to by pId. 5582 */ 5583 static int fillInUnixFile( 5584 sqlite3_vfs *pVfs, /* Pointer to vfs object */ 5585 int h, /* Open file descriptor of file being opened */ 5586 sqlite3_file *pId, /* Write to the unixFile structure here */ 5587 const char *zFilename, /* Name of the file being opened */ 5588 int ctrlFlags /* Zero or more UNIXFILE_* values */ 5589 ){ 5590 const sqlite3_io_methods *pLockingStyle; 5591 unixFile *pNew = (unixFile *)pId; 5592 int rc = SQLITE_OK; 5593 5594 assert( pNew->pInode==NULL ); 5595 5596 /* No locking occurs in temporary files */ 5597 assert( zFilename!=0 || (ctrlFlags & UNIXFILE_NOLOCK)!=0 ); 5598 5599 OSTRACE(("OPEN %-3d %s\n", h, zFilename)); 5600 pNew->h = h; 5601 pNew->pVfs = pVfs; 5602 pNew->zPath = zFilename; 5603 pNew->ctrlFlags = (u8)ctrlFlags; 5604 #if SQLITE_MAX_MMAP_SIZE>0 5605 pNew->mmapSizeMax = sqlite3GlobalConfig.szMmap; 5606 #endif 5607 if( sqlite3_uri_boolean(((ctrlFlags & UNIXFILE_URI) ? zFilename : 0), 5608 "psow", SQLITE_POWERSAFE_OVERWRITE) ){ 5609 pNew->ctrlFlags |= UNIXFILE_PSOW; 5610 } 5611 if( strcmp(pVfs->zName,"unix-excl")==0 ){ 5612 pNew->ctrlFlags |= UNIXFILE_EXCL; 5613 } 5614 5615 #if OS_VXWORKS 5616 pNew->pId = vxworksFindFileId(zFilename); 5617 if( pNew->pId==0 ){ 5618 ctrlFlags |= UNIXFILE_NOLOCK; 5619 rc = SQLITE_NOMEM_BKPT; 5620 } 5621 #endif 5622 5623 if( ctrlFlags & UNIXFILE_NOLOCK ){ 5624 pLockingStyle = &nolockIoMethods; 5625 }else{ 5626 pLockingStyle = (**(finder_type*)pVfs->pAppData)(zFilename, pNew); 5627 #if SQLITE_ENABLE_LOCKING_STYLE 5628 /* Cache zFilename in the locking context (AFP and dotlock override) for 5629 ** proxyLock activation is possible (remote proxy is based on db name) 5630 ** zFilename remains valid until file is closed, to support */ 5631 pNew->lockingContext = (void*)zFilename; 5632 #endif 5633 } 5634 5635 if( pLockingStyle == &posixIoMethods 5636 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 5637 || pLockingStyle == &nfsIoMethods 5638 #endif 5639 ){ 5640 unixEnterMutex(); 5641 rc = findInodeInfo(pNew, &pNew->pInode); 5642 if( rc!=SQLITE_OK ){ 5643 /* If an error occurred in findInodeInfo(), close the file descriptor 5644 ** immediately, before releasing the mutex. findInodeInfo() may fail 5645 ** in two scenarios: 5646 ** 5647 ** (a) A call to fstat() failed. 5648 ** (b) A malloc failed. 5649 ** 5650 ** Scenario (b) may only occur if the process is holding no other 5651 ** file descriptors open on the same file. If there were other file 5652 ** descriptors on this file, then no malloc would be required by 5653 ** findInodeInfo(). If this is the case, it is quite safe to close 5654 ** handle h - as it is guaranteed that no posix locks will be released 5655 ** by doing so. 5656 ** 5657 ** If scenario (a) caused the error then things are not so safe. The 5658 ** implicit assumption here is that if fstat() fails, things are in 5659 ** such bad shape that dropping a lock or two doesn't matter much. 5660 */ 5661 robust_close(pNew, h, __LINE__); 5662 h = -1; 5663 } 5664 unixLeaveMutex(); 5665 } 5666 5667 #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) 5668 else if( pLockingStyle == &afpIoMethods ){ 5669 /* AFP locking uses the file path so it needs to be included in 5670 ** the afpLockingContext. 5671 */ 5672 afpLockingContext *pCtx; 5673 pNew->lockingContext = pCtx = sqlite3_malloc64( sizeof(*pCtx) ); 5674 if( pCtx==0 ){ 5675 rc = SQLITE_NOMEM_BKPT; 5676 }else{ 5677 /* NB: zFilename exists and remains valid until the file is closed 5678 ** according to requirement F11141. So we do not need to make a 5679 ** copy of the filename. */ 5680 pCtx->dbPath = zFilename; 5681 pCtx->reserved = 0; 5682 srandomdev(); 5683 unixEnterMutex(); 5684 rc = findInodeInfo(pNew, &pNew->pInode); 5685 if( rc!=SQLITE_OK ){ 5686 sqlite3_free(pNew->lockingContext); 5687 robust_close(pNew, h, __LINE__); 5688 h = -1; 5689 } 5690 unixLeaveMutex(); 5691 } 5692 } 5693 #endif 5694 5695 else if( pLockingStyle == &dotlockIoMethods ){ 5696 /* Dotfile locking uses the file path so it needs to be included in 5697 ** the dotlockLockingContext 5698 */ 5699 char *zLockFile; 5700 int nFilename; 5701 assert( zFilename!=0 ); 5702 nFilename = (int)strlen(zFilename) + 6; 5703 zLockFile = (char *)sqlite3_malloc64(nFilename); 5704 if( zLockFile==0 ){ 5705 rc = SQLITE_NOMEM_BKPT; 5706 }else{ 5707 sqlite3_snprintf(nFilename, zLockFile, "%s" DOTLOCK_SUFFIX, zFilename); 5708 } 5709 pNew->lockingContext = zLockFile; 5710 } 5711 5712 #if OS_VXWORKS 5713 else if( pLockingStyle == &semIoMethods ){ 5714 /* Named semaphore locking uses the file path so it needs to be 5715 ** included in the semLockingContext 5716 */ 5717 unixEnterMutex(); 5718 rc = findInodeInfo(pNew, &pNew->pInode); 5719 if( (rc==SQLITE_OK) && (pNew->pInode->pSem==NULL) ){ 5720 char *zSemName = pNew->pInode->aSemName; 5721 int n; 5722 sqlite3_snprintf(MAX_PATHNAME, zSemName, "/%s.sem", 5723 pNew->pId->zCanonicalName); 5724 for( n=1; zSemName[n]; n++ ) 5725 if( zSemName[n]=='/' ) zSemName[n] = '_'; 5726 pNew->pInode->pSem = sem_open(zSemName, O_CREAT, 0666, 1); 5727 if( pNew->pInode->pSem == SEM_FAILED ){ 5728 rc = SQLITE_NOMEM_BKPT; 5729 pNew->pInode->aSemName[0] = '\0'; 5730 } 5731 } 5732 unixLeaveMutex(); 5733 } 5734 #endif 5735 5736 storeLastErrno(pNew, 0); 5737 #if OS_VXWORKS 5738 if( rc!=SQLITE_OK ){ 5739 if( h>=0 ) robust_close(pNew, h, __LINE__); 5740 h = -1; 5741 osUnlink(zFilename); 5742 pNew->ctrlFlags |= UNIXFILE_DELETE; 5743 } 5744 #endif 5745 if( rc!=SQLITE_OK ){ 5746 if( h>=0 ) robust_close(pNew, h, __LINE__); 5747 }else{ 5748 pId->pMethods = pLockingStyle; 5749 OpenCounter(+1); 5750 verifyDbFile(pNew); 5751 } 5752 return rc; 5753 } 5754 5755 /* 5756 ** Return the name of a directory in which to put temporary files. 5757 ** If no suitable temporary file directory can be found, return NULL. 5758 */ 5759 static const char *unixTempFileDir(void){ 5760 static const char *azDirs[] = { 5761 0, 5762 0, 5763 "/var/tmp", 5764 "/usr/tmp", 5765 "/tmp", 5766 "." 5767 }; 5768 unsigned int i = 0; 5769 struct stat buf; 5770 const char *zDir = sqlite3_temp_directory; 5771 5772 if( !azDirs[0] ) azDirs[0] = getenv("SQLITE_TMPDIR"); 5773 if( !azDirs[1] ) azDirs[1] = getenv("TMPDIR"); 5774 while(1){ 5775 if( zDir!=0 5776 && osStat(zDir, &buf)==0 5777 && S_ISDIR(buf.st_mode) 5778 && osAccess(zDir, 03)==0 5779 ){ 5780 return zDir; 5781 } 5782 if( i>=sizeof(azDirs)/sizeof(azDirs[0]) ) break; 5783 zDir = azDirs[i++]; 5784 } 5785 return 0; 5786 } 5787 5788 /* 5789 ** Create a temporary file name in zBuf. zBuf must be allocated 5790 ** by the calling process and must be big enough to hold at least 5791 ** pVfs->mxPathname bytes. 5792 */ 5793 static int unixGetTempname(int nBuf, char *zBuf){ 5794 const char *zDir; 5795 int iLimit = 0; 5796 5797 /* It's odd to simulate an io-error here, but really this is just 5798 ** using the io-error infrastructure to test that SQLite handles this 5799 ** function failing. 5800 */ 5801 zBuf[0] = 0; 5802 SimulateIOError( return SQLITE_IOERR ); 5803 5804 zDir = unixTempFileDir(); 5805 if( zDir==0 ) return SQLITE_IOERR_GETTEMPPATH; 5806 do{ 5807 u64 r; 5808 sqlite3_randomness(sizeof(r), &r); 5809 assert( nBuf>2 ); 5810 zBuf[nBuf-2] = 0; 5811 sqlite3_snprintf(nBuf, zBuf, "%s/"SQLITE_TEMP_FILE_PREFIX"%llx%c", 5812 zDir, r, 0); 5813 if( zBuf[nBuf-2]!=0 || (iLimit++)>10 ) return SQLITE_ERROR; 5814 }while( osAccess(zBuf,0)==0 ); 5815 return SQLITE_OK; 5816 } 5817 5818 #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) 5819 /* 5820 ** Routine to transform a unixFile into a proxy-locking unixFile. 5821 ** Implementation in the proxy-lock division, but used by unixOpen() 5822 ** if SQLITE_PREFER_PROXY_LOCKING is defined. 5823 */ 5824 static int proxyTransformUnixFile(unixFile*, const char*); 5825 #endif 5826 5827 /* 5828 ** Search for an unused file descriptor that was opened on the database 5829 ** file (not a journal or super-journal file) identified by pathname 5830 ** zPath with SQLITE_OPEN_XXX flags matching those passed as the second 5831 ** argument to this function. 5832 ** 5833 ** Such a file descriptor may exist if a database connection was closed 5834 ** but the associated file descriptor could not be closed because some 5835 ** other file descriptor open on the same file is holding a file-lock. 5836 ** Refer to comments in the unixClose() function and the lengthy comment 5837 ** describing "Posix Advisory Locking" at the start of this file for 5838 ** further details. Also, ticket #4018. 5839 ** 5840 ** If a suitable file descriptor is found, then it is returned. If no 5841 ** such file descriptor is located, -1 is returned. 5842 */ 5843 static UnixUnusedFd *findReusableFd(const char *zPath, int flags){ 5844 UnixUnusedFd *pUnused = 0; 5845 5846 /* Do not search for an unused file descriptor on vxworks. Not because 5847 ** vxworks would not benefit from the change (it might, we're not sure), 5848 ** but because no way to test it is currently available. It is better 5849 ** not to risk breaking vxworks support for the sake of such an obscure 5850 ** feature. */ 5851 #if !OS_VXWORKS 5852 struct stat sStat; /* Results of stat() call */ 5853 5854 unixEnterMutex(); 5855 5856 /* A stat() call may fail for various reasons. If this happens, it is 5857 ** almost certain that an open() call on the same path will also fail. 5858 ** For this reason, if an error occurs in the stat() call here, it is 5859 ** ignored and -1 is returned. The caller will try to open a new file 5860 ** descriptor on the same path, fail, and return an error to SQLite. 5861 ** 5862 ** Even if a subsequent open() call does succeed, the consequences of 5863 ** not searching for a reusable file descriptor are not dire. */ 5864 if( inodeList!=0 && 0==osStat(zPath, &sStat) ){ 5865 unixInodeInfo *pInode; 5866 5867 pInode = inodeList; 5868 while( pInode && (pInode->fileId.dev!=sStat.st_dev 5869 || pInode->fileId.ino!=(u64)sStat.st_ino) ){ 5870 pInode = pInode->pNext; 5871 } 5872 if( pInode ){ 5873 UnixUnusedFd **pp; 5874 assert( sqlite3_mutex_notheld(pInode->pLockMutex) ); 5875 sqlite3_mutex_enter(pInode->pLockMutex); 5876 flags &= (SQLITE_OPEN_READONLY|SQLITE_OPEN_READWRITE); 5877 for(pp=&pInode->pUnused; *pp && (*pp)->flags!=flags; pp=&((*pp)->pNext)); 5878 pUnused = *pp; 5879 if( pUnused ){ 5880 *pp = pUnused->pNext; 5881 } 5882 sqlite3_mutex_leave(pInode->pLockMutex); 5883 } 5884 } 5885 unixLeaveMutex(); 5886 #endif /* if !OS_VXWORKS */ 5887 return pUnused; 5888 } 5889 5890 /* 5891 ** Find the mode, uid and gid of file zFile. 5892 */ 5893 static int getFileMode( 5894 const char *zFile, /* File name */ 5895 mode_t *pMode, /* OUT: Permissions of zFile */ 5896 uid_t *pUid, /* OUT: uid of zFile. */ 5897 gid_t *pGid /* OUT: gid of zFile. */ 5898 ){ 5899 struct stat sStat; /* Output of stat() on database file */ 5900 int rc = SQLITE_OK; 5901 if( 0==osStat(zFile, &sStat) ){ 5902 *pMode = sStat.st_mode & 0777; 5903 *pUid = sStat.st_uid; 5904 *pGid = sStat.st_gid; 5905 }else{ 5906 rc = SQLITE_IOERR_FSTAT; 5907 } 5908 return rc; 5909 } 5910 5911 /* 5912 ** This function is called by unixOpen() to determine the unix permissions 5913 ** to create new files with. If no error occurs, then SQLITE_OK is returned 5914 ** and a value suitable for passing as the third argument to open(2) is 5915 ** written to *pMode. If an IO error occurs, an SQLite error code is 5916 ** returned and the value of *pMode is not modified. 5917 ** 5918 ** In most cases, this routine sets *pMode to 0, which will become 5919 ** an indication to robust_open() to create the file using 5920 ** SQLITE_DEFAULT_FILE_PERMISSIONS adjusted by the umask. 5921 ** But if the file being opened is a WAL or regular journal file, then 5922 ** this function queries the file-system for the permissions on the 5923 ** corresponding database file and sets *pMode to this value. Whenever 5924 ** possible, WAL and journal files are created using the same permissions 5925 ** as the associated database file. 5926 ** 5927 ** If the SQLITE_ENABLE_8_3_NAMES option is enabled, then the 5928 ** original filename is unavailable. But 8_3_NAMES is only used for 5929 ** FAT filesystems and permissions do not matter there, so just use 5930 ** the default permissions. In 8_3_NAMES mode, leave *pMode set to zero. 5931 */ 5932 static int findCreateFileMode( 5933 const char *zPath, /* Path of file (possibly) being created */ 5934 int flags, /* Flags passed as 4th argument to xOpen() */ 5935 mode_t *pMode, /* OUT: Permissions to open file with */ 5936 uid_t *pUid, /* OUT: uid to set on the file */ 5937 gid_t *pGid /* OUT: gid to set on the file */ 5938 ){ 5939 int rc = SQLITE_OK; /* Return Code */ 5940 *pMode = 0; 5941 *pUid = 0; 5942 *pGid = 0; 5943 if( flags & (SQLITE_OPEN_WAL|SQLITE_OPEN_MAIN_JOURNAL) ){ 5944 char zDb[MAX_PATHNAME+1]; /* Database file path */ 5945 int nDb; /* Number of valid bytes in zDb */ 5946 5947 /* zPath is a path to a WAL or journal file. The following block derives 5948 ** the path to the associated database file from zPath. This block handles 5949 ** the following naming conventions: 5950 ** 5951 ** "<path to db>-journal" 5952 ** "<path to db>-wal" 5953 ** "<path to db>-journalNN" 5954 ** "<path to db>-walNN" 5955 ** 5956 ** where NN is a decimal number. The NN naming schemes are 5957 ** used by the test_multiplex.c module. 5958 */ 5959 nDb = sqlite3Strlen30(zPath) - 1; 5960 while( zPath[nDb]!='-' ){ 5961 /* In normal operation, the journal file name will always contain 5962 ** a '-' character. However in 8+3 filename mode, or if a corrupt 5963 ** rollback journal specifies a super-journal with a goofy name, then 5964 ** the '-' might be missing. */ 5965 if( nDb==0 || zPath[nDb]=='.' ) return SQLITE_OK; 5966 nDb--; 5967 } 5968 memcpy(zDb, zPath, nDb); 5969 zDb[nDb] = '\0'; 5970 5971 rc = getFileMode(zDb, pMode, pUid, pGid); 5972 }else if( flags & SQLITE_OPEN_DELETEONCLOSE ){ 5973 *pMode = 0600; 5974 }else if( flags & SQLITE_OPEN_URI ){ 5975 /* If this is a main database file and the file was opened using a URI 5976 ** filename, check for the "modeof" parameter. If present, interpret 5977 ** its value as a filename and try to copy the mode, uid and gid from 5978 ** that file. */ 5979 const char *z = sqlite3_uri_parameter(zPath, "modeof"); 5980 if( z ){ 5981 rc = getFileMode(z, pMode, pUid, pGid); 5982 } 5983 } 5984 return rc; 5985 } 5986 5987 /* 5988 ** Open the file zPath. 5989 ** 5990 ** Previously, the SQLite OS layer used three functions in place of this 5991 ** one: 5992 ** 5993 ** sqlite3OsOpenReadWrite(); 5994 ** sqlite3OsOpenReadOnly(); 5995 ** sqlite3OsOpenExclusive(); 5996 ** 5997 ** These calls correspond to the following combinations of flags: 5998 ** 5999 ** ReadWrite() -> (READWRITE | CREATE) 6000 ** ReadOnly() -> (READONLY) 6001 ** OpenExclusive() -> (READWRITE | CREATE | EXCLUSIVE) 6002 ** 6003 ** The old OpenExclusive() accepted a boolean argument - "delFlag". If 6004 ** true, the file was configured to be automatically deleted when the 6005 ** file handle closed. To achieve the same effect using this new 6006 ** interface, add the DELETEONCLOSE flag to those specified above for 6007 ** OpenExclusive(). 6008 */ 6009 static int unixOpen( 6010 sqlite3_vfs *pVfs, /* The VFS for which this is the xOpen method */ 6011 const char *zPath, /* Pathname of file to be opened */ 6012 sqlite3_file *pFile, /* The file descriptor to be filled in */ 6013 int flags, /* Input flags to control the opening */ 6014 int *pOutFlags /* Output flags returned to SQLite core */ 6015 ){ 6016 unixFile *p = (unixFile *)pFile; 6017 int fd = -1; /* File descriptor returned by open() */ 6018 int openFlags = 0; /* Flags to pass to open() */ 6019 int eType = flags&0x0FFF00; /* Type of file to open */ 6020 int noLock; /* True to omit locking primitives */ 6021 int rc = SQLITE_OK; /* Function Return Code */ 6022 int ctrlFlags = 0; /* UNIXFILE_* flags */ 6023 6024 int isExclusive = (flags & SQLITE_OPEN_EXCLUSIVE); 6025 int isDelete = (flags & SQLITE_OPEN_DELETEONCLOSE); 6026 int isCreate = (flags & SQLITE_OPEN_CREATE); 6027 int isReadonly = (flags & SQLITE_OPEN_READONLY); 6028 int isReadWrite = (flags & SQLITE_OPEN_READWRITE); 6029 #if SQLITE_ENABLE_LOCKING_STYLE 6030 int isAutoProxy = (flags & SQLITE_OPEN_AUTOPROXY); 6031 #endif 6032 #if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE 6033 struct statfs fsInfo; 6034 #endif 6035 6036 /* If creating a super- or main-file journal, this function will open 6037 ** a file-descriptor on the directory too. The first time unixSync() 6038 ** is called the directory file descriptor will be fsync()ed and close()d. 6039 */ 6040 int isNewJrnl = (isCreate && ( 6041 eType==SQLITE_OPEN_SUPER_JOURNAL 6042 || eType==SQLITE_OPEN_MAIN_JOURNAL 6043 || eType==SQLITE_OPEN_WAL 6044 )); 6045 6046 /* If argument zPath is a NULL pointer, this function is required to open 6047 ** a temporary file. Use this buffer to store the file name in. 6048 */ 6049 char zTmpname[MAX_PATHNAME+2]; 6050 const char *zName = zPath; 6051 6052 /* Check the following statements are true: 6053 ** 6054 ** (a) Exactly one of the READWRITE and READONLY flags must be set, and 6055 ** (b) if CREATE is set, then READWRITE must also be set, and 6056 ** (c) if EXCLUSIVE is set, then CREATE must also be set. 6057 ** (d) if DELETEONCLOSE is set, then CREATE must also be set. 6058 */ 6059 assert((isReadonly==0 || isReadWrite==0) && (isReadWrite || isReadonly)); 6060 assert(isCreate==0 || isReadWrite); 6061 assert(isExclusive==0 || isCreate); 6062 assert(isDelete==0 || isCreate); 6063 6064 /* The main DB, main journal, WAL file and super-journal are never 6065 ** automatically deleted. Nor are they ever temporary files. */ 6066 assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MAIN_DB ); 6067 assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MAIN_JOURNAL ); 6068 assert( (!isDelete && zName) || eType!=SQLITE_OPEN_SUPER_JOURNAL ); 6069 assert( (!isDelete && zName) || eType!=SQLITE_OPEN_WAL ); 6070 6071 /* Assert that the upper layer has set one of the "file-type" flags. */ 6072 assert( eType==SQLITE_OPEN_MAIN_DB || eType==SQLITE_OPEN_TEMP_DB 6073 || eType==SQLITE_OPEN_MAIN_JOURNAL || eType==SQLITE_OPEN_TEMP_JOURNAL 6074 || eType==SQLITE_OPEN_SUBJOURNAL || eType==SQLITE_OPEN_SUPER_JOURNAL 6075 || eType==SQLITE_OPEN_TRANSIENT_DB || eType==SQLITE_OPEN_WAL 6076 ); 6077 6078 /* Detect a pid change and reset the PRNG. There is a race condition 6079 ** here such that two or more threads all trying to open databases at 6080 ** the same instant might all reset the PRNG. But multiple resets 6081 ** are harmless. 6082 */ 6083 if( randomnessPid!=osGetpid(0) ){ 6084 randomnessPid = osGetpid(0); 6085 sqlite3_randomness(0,0); 6086 } 6087 memset(p, 0, sizeof(unixFile)); 6088 6089 if( eType==SQLITE_OPEN_MAIN_DB ){ 6090 UnixUnusedFd *pUnused; 6091 pUnused = findReusableFd(zName, flags); 6092 if( pUnused ){ 6093 fd = pUnused->fd; 6094 }else{ 6095 pUnused = sqlite3_malloc64(sizeof(*pUnused)); 6096 if( !pUnused ){ 6097 return SQLITE_NOMEM_BKPT; 6098 } 6099 } 6100 p->pPreallocatedUnused = pUnused; 6101 6102 /* Database filenames are double-zero terminated if they are not 6103 ** URIs with parameters. Hence, they can always be passed into 6104 ** sqlite3_uri_parameter(). */ 6105 assert( (flags & SQLITE_OPEN_URI) || zName[strlen(zName)+1]==0 ); 6106 6107 }else if( !zName ){ 6108 /* If zName is NULL, the upper layer is requesting a temp file. */ 6109 assert(isDelete && !isNewJrnl); 6110 rc = unixGetTempname(pVfs->mxPathname, zTmpname); 6111 if( rc!=SQLITE_OK ){ 6112 return rc; 6113 } 6114 zName = zTmpname; 6115 6116 /* Generated temporary filenames are always double-zero terminated 6117 ** for use by sqlite3_uri_parameter(). */ 6118 assert( zName[strlen(zName)+1]==0 ); 6119 } 6120 6121 /* Determine the value of the flags parameter passed to POSIX function 6122 ** open(). These must be calculated even if open() is not called, as 6123 ** they may be stored as part of the file handle and used by the 6124 ** 'conch file' locking functions later on. */ 6125 if( isReadonly ) openFlags |= O_RDONLY; 6126 if( isReadWrite ) openFlags |= O_RDWR; 6127 if( isCreate ) openFlags |= O_CREAT; 6128 if( isExclusive ) openFlags |= (O_EXCL|O_NOFOLLOW); 6129 openFlags |= (O_LARGEFILE|O_BINARY|O_NOFOLLOW); 6130 6131 if( fd<0 ){ 6132 mode_t openMode; /* Permissions to create file with */ 6133 uid_t uid; /* Userid for the file */ 6134 gid_t gid; /* Groupid for the file */ 6135 rc = findCreateFileMode(zName, flags, &openMode, &uid, &gid); 6136 if( rc!=SQLITE_OK ){ 6137 assert( !p->pPreallocatedUnused ); 6138 assert( eType==SQLITE_OPEN_WAL || eType==SQLITE_OPEN_MAIN_JOURNAL ); 6139 return rc; 6140 } 6141 fd = robust_open(zName, openFlags, openMode); 6142 OSTRACE(("OPENX %-3d %s 0%o\n", fd, zName, openFlags)); 6143 assert( !isExclusive || (openFlags & O_CREAT)!=0 ); 6144 if( fd<0 ){ 6145 if( isNewJrnl && errno==EACCES && osAccess(zName, F_OK) ){ 6146 /* If unable to create a journal because the directory is not 6147 ** writable, change the error code to indicate that. */ 6148 rc = SQLITE_READONLY_DIRECTORY; 6149 }else if( errno!=EISDIR && isReadWrite ){ 6150 /* Failed to open the file for read/write access. Try read-only. */ 6151 flags &= ~(SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE); 6152 openFlags &= ~(O_RDWR|O_CREAT); 6153 flags |= SQLITE_OPEN_READONLY; 6154 openFlags |= O_RDONLY; 6155 isReadonly = 1; 6156 fd = robust_open(zName, openFlags, openMode); 6157 } 6158 } 6159 if( fd<0 ){ 6160 int rc2 = unixLogError(SQLITE_CANTOPEN_BKPT, "open", zName); 6161 if( rc==SQLITE_OK ) rc = rc2; 6162 goto open_finished; 6163 } 6164 6165 /* The owner of the rollback journal or WAL file should always be the 6166 ** same as the owner of the database file. Try to ensure that this is 6167 ** the case. The chown() system call will be a no-op if the current 6168 ** process lacks root privileges, be we should at least try. Without 6169 ** this step, if a root process opens a database file, it can leave 6170 ** behinds a journal/WAL that is owned by root and hence make the 6171 ** database inaccessible to unprivileged processes. 6172 ** 6173 ** If openMode==0, then that means uid and gid are not set correctly 6174 ** (probably because SQLite is configured to use 8+3 filename mode) and 6175 ** in that case we do not want to attempt the chown(). 6176 */ 6177 if( openMode && (flags & (SQLITE_OPEN_WAL|SQLITE_OPEN_MAIN_JOURNAL))!=0 ){ 6178 robustFchown(fd, uid, gid); 6179 } 6180 } 6181 assert( fd>=0 ); 6182 if( pOutFlags ){ 6183 *pOutFlags = flags; 6184 } 6185 6186 if( p->pPreallocatedUnused ){ 6187 p->pPreallocatedUnused->fd = fd; 6188 p->pPreallocatedUnused->flags = 6189 flags & (SQLITE_OPEN_READONLY|SQLITE_OPEN_READWRITE); 6190 } 6191 6192 if( isDelete ){ 6193 #if OS_VXWORKS 6194 zPath = zName; 6195 #elif defined(SQLITE_UNLINK_AFTER_CLOSE) 6196 zPath = sqlite3_mprintf("%s", zName); 6197 if( zPath==0 ){ 6198 robust_close(p, fd, __LINE__); 6199 return SQLITE_NOMEM_BKPT; 6200 } 6201 #else 6202 osUnlink(zName); 6203 #endif 6204 } 6205 #if SQLITE_ENABLE_LOCKING_STYLE 6206 else{ 6207 p->openFlags = openFlags; 6208 } 6209 #endif 6210 6211 #if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE 6212 if( fstatfs(fd, &fsInfo) == -1 ){ 6213 storeLastErrno(p, errno); 6214 robust_close(p, fd, __LINE__); 6215 return SQLITE_IOERR_ACCESS; 6216 } 6217 if (0 == strncmp("msdos", fsInfo.f_fstypename, 5)) { 6218 ((unixFile*)pFile)->fsFlags |= SQLITE_FSFLAGS_IS_MSDOS; 6219 } 6220 if (0 == strncmp("exfat", fsInfo.f_fstypename, 5)) { 6221 ((unixFile*)pFile)->fsFlags |= SQLITE_FSFLAGS_IS_MSDOS; 6222 } 6223 #endif 6224 6225 /* Set up appropriate ctrlFlags */ 6226 if( isDelete ) ctrlFlags |= UNIXFILE_DELETE; 6227 if( isReadonly ) ctrlFlags |= UNIXFILE_RDONLY; 6228 noLock = eType!=SQLITE_OPEN_MAIN_DB; 6229 if( noLock ) ctrlFlags |= UNIXFILE_NOLOCK; 6230 if( isNewJrnl ) ctrlFlags |= UNIXFILE_DIRSYNC; 6231 if( flags & SQLITE_OPEN_URI ) ctrlFlags |= UNIXFILE_URI; 6232 6233 #if SQLITE_ENABLE_LOCKING_STYLE 6234 #if SQLITE_PREFER_PROXY_LOCKING 6235 isAutoProxy = 1; 6236 #endif 6237 if( isAutoProxy && (zPath!=NULL) && (!noLock) && pVfs->xOpen ){ 6238 char *envforce = getenv("SQLITE_FORCE_PROXY_LOCKING"); 6239 int useProxy = 0; 6240 6241 /* SQLITE_FORCE_PROXY_LOCKING==1 means force always use proxy, 0 means 6242 ** never use proxy, NULL means use proxy for non-local files only. */ 6243 if( envforce!=NULL ){ 6244 useProxy = atoi(envforce)>0; 6245 }else{ 6246 useProxy = !(fsInfo.f_flags&MNT_LOCAL); 6247 } 6248 if( useProxy ){ 6249 rc = fillInUnixFile(pVfs, fd, pFile, zPath, ctrlFlags); 6250 if( rc==SQLITE_OK ){ 6251 rc = proxyTransformUnixFile((unixFile*)pFile, ":auto:"); 6252 if( rc!=SQLITE_OK ){ 6253 /* Use unixClose to clean up the resources added in fillInUnixFile 6254 ** and clear all the structure's references. Specifically, 6255 ** pFile->pMethods will be NULL so sqlite3OsClose will be a no-op 6256 */ 6257 unixClose(pFile); 6258 return rc; 6259 } 6260 } 6261 goto open_finished; 6262 } 6263 } 6264 #endif 6265 6266 assert( zPath==0 || zPath[0]=='/' 6267 || eType==SQLITE_OPEN_SUPER_JOURNAL || eType==SQLITE_OPEN_MAIN_JOURNAL 6268 ); 6269 rc = fillInUnixFile(pVfs, fd, pFile, zPath, ctrlFlags); 6270 6271 open_finished: 6272 if( rc!=SQLITE_OK ){ 6273 sqlite3_free(p->pPreallocatedUnused); 6274 } 6275 return rc; 6276 } 6277 6278 6279 /* 6280 ** Delete the file at zPath. If the dirSync argument is true, fsync() 6281 ** the directory after deleting the file. 6282 */ 6283 static int unixDelete( 6284 sqlite3_vfs *NotUsed, /* VFS containing this as the xDelete method */ 6285 const char *zPath, /* Name of file to be deleted */ 6286 int dirSync /* If true, fsync() directory after deleting file */ 6287 ){ 6288 int rc = SQLITE_OK; 6289 UNUSED_PARAMETER(NotUsed); 6290 SimulateIOError(return SQLITE_IOERR_DELETE); 6291 if( osUnlink(zPath)==(-1) ){ 6292 if( errno==ENOENT 6293 #if OS_VXWORKS 6294 || osAccess(zPath,0)!=0 6295 #endif 6296 ){ 6297 rc = SQLITE_IOERR_DELETE_NOENT; 6298 }else{ 6299 rc = unixLogError(SQLITE_IOERR_DELETE, "unlink", zPath); 6300 } 6301 return rc; 6302 } 6303 #ifndef SQLITE_DISABLE_DIRSYNC 6304 if( (dirSync & 1)!=0 ){ 6305 int fd; 6306 rc = osOpenDirectory(zPath, &fd); 6307 if( rc==SQLITE_OK ){ 6308 if( full_fsync(fd,0,0) ){ 6309 rc = unixLogError(SQLITE_IOERR_DIR_FSYNC, "fsync", zPath); 6310 } 6311 robust_close(0, fd, __LINE__); 6312 }else{ 6313 assert( rc==SQLITE_CANTOPEN ); 6314 rc = SQLITE_OK; 6315 } 6316 } 6317 #endif 6318 return rc; 6319 } 6320 6321 /* 6322 ** Test the existence of or access permissions of file zPath. The 6323 ** test performed depends on the value of flags: 6324 ** 6325 ** SQLITE_ACCESS_EXISTS: Return 1 if the file exists 6326 ** SQLITE_ACCESS_READWRITE: Return 1 if the file is read and writable. 6327 ** SQLITE_ACCESS_READONLY: Return 1 if the file is readable. 6328 ** 6329 ** Otherwise return 0. 6330 */ 6331 static int unixAccess( 6332 sqlite3_vfs *NotUsed, /* The VFS containing this xAccess method */ 6333 const char *zPath, /* Path of the file to examine */ 6334 int flags, /* What do we want to learn about the zPath file? */ 6335 int *pResOut /* Write result boolean here */ 6336 ){ 6337 UNUSED_PARAMETER(NotUsed); 6338 SimulateIOError( return SQLITE_IOERR_ACCESS; ); 6339 assert( pResOut!=0 ); 6340 6341 /* The spec says there are three possible values for flags. But only 6342 ** two of them are actually used */ 6343 assert( flags==SQLITE_ACCESS_EXISTS || flags==SQLITE_ACCESS_READWRITE ); 6344 6345 if( flags==SQLITE_ACCESS_EXISTS ){ 6346 struct stat buf; 6347 *pResOut = 0==osStat(zPath, &buf) && 6348 (!S_ISREG(buf.st_mode) || buf.st_size>0); 6349 }else{ 6350 *pResOut = osAccess(zPath, W_OK|R_OK)==0; 6351 } 6352 return SQLITE_OK; 6353 } 6354 6355 /* 6356 ** If the last component of the pathname in z[0]..z[j-1] is something 6357 ** other than ".." then back it out and return true. If the last 6358 ** component is empty or if it is ".." then return false. 6359 */ 6360 static int unixBackupDir(const char *z, int *pJ){ 6361 int j = *pJ; 6362 int i; 6363 if( j<=0 ) return 0; 6364 for(i=j-1; ALWAYS(i>0) && z[i-1]!='/'; i--){} 6365 if( z[i]=='.' && i==j-2 && z[i+1]=='.' ) return 0; 6366 *pJ = i-1; 6367 return 1; 6368 } 6369 6370 /* 6371 ** Convert a relative pathname into a full pathname. Also 6372 ** simplify the pathname as follows: 6373 ** 6374 ** Remove all instances of /./ 6375 ** Remove all isntances of /X/../ for any X 6376 */ 6377 static int mkFullPathname( 6378 const char *zPath, /* Input path */ 6379 char *zOut, /* Output buffer */ 6380 int nOut /* Allocated size of buffer zOut */ 6381 ){ 6382 int nPath = sqlite3Strlen30(zPath); 6383 int iOff = 0; 6384 int i, j; 6385 if( zPath[0]!='/' ){ 6386 if( osGetcwd(zOut, nOut-2)==0 ){ 6387 return unixLogError(SQLITE_CANTOPEN_BKPT, "getcwd", zPath); 6388 } 6389 iOff = sqlite3Strlen30(zOut); 6390 zOut[iOff++] = '/'; 6391 } 6392 if( (iOff+nPath+1)>nOut ){ 6393 /* SQLite assumes that xFullPathname() nul-terminates the output buffer 6394 ** even if it returns an error. */ 6395 zOut[iOff] = '\0'; 6396 return SQLITE_CANTOPEN_BKPT; 6397 } 6398 sqlite3_snprintf(nOut-iOff, &zOut[iOff], "%s", zPath); 6399 6400 /* Remove duplicate '/' characters. Except, two // at the beginning 6401 ** of a pathname is allowed since this is important on windows. */ 6402 for(i=j=1; zOut[i]; i++){ 6403 zOut[j++] = zOut[i]; 6404 while( zOut[i]=='/' && zOut[i+1]=='/' ) i++; 6405 } 6406 zOut[j] = 0; 6407 6408 assert( zOut[0]=='/' ); 6409 for(i=j=0; zOut[i]; i++){ 6410 if( zOut[i]=='/' ){ 6411 /* Skip over internal "/." directory components */ 6412 if( zOut[i+1]=='.' && zOut[i+2]=='/' ){ 6413 i += 1; 6414 continue; 6415 } 6416 6417 /* If this is a "/.." directory component then back out the 6418 ** previous term of the directory if it is something other than "..". 6419 */ 6420 if( zOut[i+1]=='.' 6421 && zOut[i+2]=='.' 6422 && zOut[i+3]=='/' 6423 && unixBackupDir(zOut, &j) 6424 ){ 6425 i += 2; 6426 continue; 6427 } 6428 } 6429 if( ALWAYS(j>=0) ) zOut[j] = zOut[i]; 6430 j++; 6431 } 6432 if( NEVER(j==0) ) zOut[j++] = '/'; 6433 zOut[j] = 0; 6434 return SQLITE_OK; 6435 } 6436 6437 /* 6438 ** Turn a relative pathname into a full pathname. The relative path 6439 ** is stored as a nul-terminated string in the buffer pointed to by 6440 ** zPath. 6441 ** 6442 ** zOut points to a buffer of at least sqlite3_vfs.mxPathname bytes 6443 ** (in this case, MAX_PATHNAME bytes). The full-path is written to 6444 ** this buffer before returning. 6445 */ 6446 static int unixFullPathname( 6447 sqlite3_vfs *pVfs, /* Pointer to vfs object */ 6448 const char *zPath, /* Possibly relative input path */ 6449 int nOut, /* Size of output buffer in bytes */ 6450 char *zOut /* Output buffer */ 6451 ){ 6452 #if !defined(HAVE_READLINK) || !defined(HAVE_LSTAT) 6453 return mkFullPathname(zPath, zOut, nOut); 6454 #else 6455 int rc = SQLITE_OK; 6456 int nByte; 6457 int nLink = 0; /* Number of symbolic links followed so far */ 6458 const char *zIn = zPath; /* Input path for each iteration of loop */ 6459 char *zDel = 0; 6460 6461 assert( pVfs->mxPathname==MAX_PATHNAME ); 6462 UNUSED_PARAMETER(pVfs); 6463 6464 /* It's odd to simulate an io-error here, but really this is just 6465 ** using the io-error infrastructure to test that SQLite handles this 6466 ** function failing. This function could fail if, for example, the 6467 ** current working directory has been unlinked. 6468 */ 6469 SimulateIOError( return SQLITE_ERROR ); 6470 6471 do { 6472 6473 /* Call stat() on path zIn. Set bLink to true if the path is a symbolic 6474 ** link, or false otherwise. */ 6475 int bLink = 0; 6476 struct stat buf; 6477 if( osLstat(zIn, &buf)!=0 ){ 6478 if( errno!=ENOENT ){ 6479 rc = unixLogError(SQLITE_CANTOPEN_BKPT, "lstat", zIn); 6480 } 6481 }else{ 6482 bLink = S_ISLNK(buf.st_mode); 6483 } 6484 6485 if( bLink ){ 6486 nLink++; 6487 if( zDel==0 ){ 6488 zDel = sqlite3_malloc(nOut); 6489 if( zDel==0 ) rc = SQLITE_NOMEM_BKPT; 6490 }else if( nLink>=SQLITE_MAX_SYMLINKS ){ 6491 rc = SQLITE_CANTOPEN_BKPT; 6492 } 6493 6494 if( rc==SQLITE_OK ){ 6495 nByte = osReadlink(zIn, zDel, nOut-1); 6496 if( nByte<0 ){ 6497 rc = unixLogError(SQLITE_CANTOPEN_BKPT, "readlink", zIn); 6498 }else{ 6499 if( zDel[0]!='/' ){ 6500 int n; 6501 for(n = sqlite3Strlen30(zIn); n>0 && zIn[n-1]!='/'; n--); 6502 if( nByte+n+1>nOut ){ 6503 rc = SQLITE_CANTOPEN_BKPT; 6504 }else{ 6505 memmove(&zDel[n], zDel, nByte+1); 6506 memcpy(zDel, zIn, n); 6507 nByte += n; 6508 } 6509 } 6510 zDel[nByte] = '\0'; 6511 } 6512 } 6513 6514 zIn = zDel; 6515 } 6516 6517 assert( rc!=SQLITE_OK || zIn!=zOut || zIn[0]=='/' ); 6518 if( rc==SQLITE_OK && zIn!=zOut ){ 6519 rc = mkFullPathname(zIn, zOut, nOut); 6520 } 6521 if( bLink==0 ) break; 6522 zIn = zOut; 6523 }while( rc==SQLITE_OK ); 6524 6525 sqlite3_free(zDel); 6526 if( rc==SQLITE_OK && nLink ) rc = SQLITE_OK_SYMLINK; 6527 return rc; 6528 #endif /* HAVE_READLINK && HAVE_LSTAT */ 6529 } 6530 6531 6532 #ifndef SQLITE_OMIT_LOAD_EXTENSION 6533 /* 6534 ** Interfaces for opening a shared library, finding entry points 6535 ** within the shared library, and closing the shared library. 6536 */ 6537 #include <dlfcn.h> 6538 static void *unixDlOpen(sqlite3_vfs *NotUsed, const char *zFilename){ 6539 UNUSED_PARAMETER(NotUsed); 6540 return dlopen(zFilename, RTLD_NOW | RTLD_GLOBAL); 6541 } 6542 6543 /* 6544 ** SQLite calls this function immediately after a call to unixDlSym() or 6545 ** unixDlOpen() fails (returns a null pointer). If a more detailed error 6546 ** message is available, it is written to zBufOut. If no error message 6547 ** is available, zBufOut is left unmodified and SQLite uses a default 6548 ** error message. 6549 */ 6550 static void unixDlError(sqlite3_vfs *NotUsed, int nBuf, char *zBufOut){ 6551 const char *zErr; 6552 UNUSED_PARAMETER(NotUsed); 6553 unixEnterMutex(); 6554 zErr = dlerror(); 6555 if( zErr ){ 6556 sqlite3_snprintf(nBuf, zBufOut, "%s", zErr); 6557 } 6558 unixLeaveMutex(); 6559 } 6560 static void (*unixDlSym(sqlite3_vfs *NotUsed, void *p, const char*zSym))(void){ 6561 /* 6562 ** GCC with -pedantic-errors says that C90 does not allow a void* to be 6563 ** cast into a pointer to a function. And yet the library dlsym() routine 6564 ** returns a void* which is really a pointer to a function. So how do we 6565 ** use dlsym() with -pedantic-errors? 6566 ** 6567 ** Variable x below is defined to be a pointer to a function taking 6568 ** parameters void* and const char* and returning a pointer to a function. 6569 ** We initialize x by assigning it a pointer to the dlsym() function. 6570 ** (That assignment requires a cast.) Then we call the function that 6571 ** x points to. 6572 ** 6573 ** This work-around is unlikely to work correctly on any system where 6574 ** you really cannot cast a function pointer into void*. But then, on the 6575 ** other hand, dlsym() will not work on such a system either, so we have 6576 ** not really lost anything. 6577 */ 6578 void (*(*x)(void*,const char*))(void); 6579 UNUSED_PARAMETER(NotUsed); 6580 x = (void(*(*)(void*,const char*))(void))dlsym; 6581 return (*x)(p, zSym); 6582 } 6583 static void unixDlClose(sqlite3_vfs *NotUsed, void *pHandle){ 6584 UNUSED_PARAMETER(NotUsed); 6585 dlclose(pHandle); 6586 } 6587 #else /* if SQLITE_OMIT_LOAD_EXTENSION is defined: */ 6588 #define unixDlOpen 0 6589 #define unixDlError 0 6590 #define unixDlSym 0 6591 #define unixDlClose 0 6592 #endif 6593 6594 /* 6595 ** Write nBuf bytes of random data to the supplied buffer zBuf. 6596 */ 6597 static int unixRandomness(sqlite3_vfs *NotUsed, int nBuf, char *zBuf){ 6598 UNUSED_PARAMETER(NotUsed); 6599 assert((size_t)nBuf>=(sizeof(time_t)+sizeof(int))); 6600 6601 /* We have to initialize zBuf to prevent valgrind from reporting 6602 ** errors. The reports issued by valgrind are incorrect - we would 6603 ** prefer that the randomness be increased by making use of the 6604 ** uninitialized space in zBuf - but valgrind errors tend to worry 6605 ** some users. Rather than argue, it seems easier just to initialize 6606 ** the whole array and silence valgrind, even if that means less randomness 6607 ** in the random seed. 6608 ** 6609 ** When testing, initializing zBuf[] to zero is all we do. That means 6610 ** that we always use the same random number sequence. This makes the 6611 ** tests repeatable. 6612 */ 6613 memset(zBuf, 0, nBuf); 6614 randomnessPid = osGetpid(0); 6615 #if !defined(SQLITE_TEST) && !defined(SQLITE_OMIT_RANDOMNESS) 6616 { 6617 int fd, got; 6618 fd = robust_open("/dev/urandom", O_RDONLY, 0); 6619 if( fd<0 ){ 6620 time_t t; 6621 time(&t); 6622 memcpy(zBuf, &t, sizeof(t)); 6623 memcpy(&zBuf[sizeof(t)], &randomnessPid, sizeof(randomnessPid)); 6624 assert( sizeof(t)+sizeof(randomnessPid)<=(size_t)nBuf ); 6625 nBuf = sizeof(t) + sizeof(randomnessPid); 6626 }else{ 6627 do{ got = osRead(fd, zBuf, nBuf); }while( got<0 && errno==EINTR ); 6628 robust_close(0, fd, __LINE__); 6629 } 6630 } 6631 #endif 6632 return nBuf; 6633 } 6634 6635 6636 /* 6637 ** Sleep for a little while. Return the amount of time slept. 6638 ** The argument is the number of microseconds we want to sleep. 6639 ** The return value is the number of microseconds of sleep actually 6640 ** requested from the underlying operating system, a number which 6641 ** might be greater than or equal to the argument, but not less 6642 ** than the argument. 6643 */ 6644 static int unixSleep(sqlite3_vfs *NotUsed, int microseconds){ 6645 #if OS_VXWORKS 6646 struct timespec sp; 6647 6648 sp.tv_sec = microseconds / 1000000; 6649 sp.tv_nsec = (microseconds % 1000000) * 1000; 6650 nanosleep(&sp, NULL); 6651 UNUSED_PARAMETER(NotUsed); 6652 return microseconds; 6653 #elif defined(HAVE_USLEEP) && HAVE_USLEEP 6654 if( microseconds>=1000000 ) sleep(microseconds/1000000); 6655 if( microseconds%1000000 ) usleep(microseconds%1000000); 6656 UNUSED_PARAMETER(NotUsed); 6657 return microseconds; 6658 #else 6659 int seconds = (microseconds+999999)/1000000; 6660 sleep(seconds); 6661 UNUSED_PARAMETER(NotUsed); 6662 return seconds*1000000; 6663 #endif 6664 } 6665 6666 /* 6667 ** The following variable, if set to a non-zero value, is interpreted as 6668 ** the number of seconds since 1970 and is used to set the result of 6669 ** sqlite3OsCurrentTime() during testing. 6670 */ 6671 #ifdef SQLITE_TEST 6672 int sqlite3_current_time = 0; /* Fake system time in seconds since 1970. */ 6673 #endif 6674 6675 /* 6676 ** Find the current time (in Universal Coordinated Time). Write into *piNow 6677 ** the current time and date as a Julian Day number times 86_400_000. In 6678 ** other words, write into *piNow the number of milliseconds since the Julian 6679 ** epoch of noon in Greenwich on November 24, 4714 B.C according to the 6680 ** proleptic Gregorian calendar. 6681 ** 6682 ** On success, return SQLITE_OK. Return SQLITE_ERROR if the time and date 6683 ** cannot be found. 6684 */ 6685 static int unixCurrentTimeInt64(sqlite3_vfs *NotUsed, sqlite3_int64 *piNow){ 6686 static const sqlite3_int64 unixEpoch = 24405875*(sqlite3_int64)8640000; 6687 int rc = SQLITE_OK; 6688 #if defined(NO_GETTOD) 6689 time_t t; 6690 time(&t); 6691 *piNow = ((sqlite3_int64)t)*1000 + unixEpoch; 6692 #elif OS_VXWORKS 6693 struct timespec sNow; 6694 clock_gettime(CLOCK_REALTIME, &sNow); 6695 *piNow = unixEpoch + 1000*(sqlite3_int64)sNow.tv_sec + sNow.tv_nsec/1000000; 6696 #else 6697 struct timeval sNow; 6698 (void)gettimeofday(&sNow, 0); /* Cannot fail given valid arguments */ 6699 *piNow = unixEpoch + 1000*(sqlite3_int64)sNow.tv_sec + sNow.tv_usec/1000; 6700 #endif 6701 6702 #ifdef SQLITE_TEST 6703 if( sqlite3_current_time ){ 6704 *piNow = 1000*(sqlite3_int64)sqlite3_current_time + unixEpoch; 6705 } 6706 #endif 6707 UNUSED_PARAMETER(NotUsed); 6708 return rc; 6709 } 6710 6711 #ifndef SQLITE_OMIT_DEPRECATED 6712 /* 6713 ** Find the current time (in Universal Coordinated Time). Write the 6714 ** current time and date as a Julian Day number into *prNow and 6715 ** return 0. Return 1 if the time and date cannot be found. 6716 */ 6717 static int unixCurrentTime(sqlite3_vfs *NotUsed, double *prNow){ 6718 sqlite3_int64 i = 0; 6719 int rc; 6720 UNUSED_PARAMETER(NotUsed); 6721 rc = unixCurrentTimeInt64(0, &i); 6722 *prNow = i/86400000.0; 6723 return rc; 6724 } 6725 #else 6726 # define unixCurrentTime 0 6727 #endif 6728 6729 /* 6730 ** The xGetLastError() method is designed to return a better 6731 ** low-level error message when operating-system problems come up 6732 ** during SQLite operation. Only the integer return code is currently 6733 ** used. 6734 */ 6735 static int unixGetLastError(sqlite3_vfs *NotUsed, int NotUsed2, char *NotUsed3){ 6736 UNUSED_PARAMETER(NotUsed); 6737 UNUSED_PARAMETER(NotUsed2); 6738 UNUSED_PARAMETER(NotUsed3); 6739 return errno; 6740 } 6741 6742 6743 /* 6744 ************************ End of sqlite3_vfs methods *************************** 6745 ******************************************************************************/ 6746 6747 /****************************************************************************** 6748 ************************** Begin Proxy Locking ******************************** 6749 ** 6750 ** Proxy locking is a "uber-locking-method" in this sense: It uses the 6751 ** other locking methods on secondary lock files. Proxy locking is a 6752 ** meta-layer over top of the primitive locking implemented above. For 6753 ** this reason, the division that implements of proxy locking is deferred 6754 ** until late in the file (here) after all of the other I/O methods have 6755 ** been defined - so that the primitive locking methods are available 6756 ** as services to help with the implementation of proxy locking. 6757 ** 6758 **** 6759 ** 6760 ** The default locking schemes in SQLite use byte-range locks on the 6761 ** database file to coordinate safe, concurrent access by multiple readers 6762 ** and writers [http://sqlite.org/lockingv3.html]. The five file locking 6763 ** states (UNLOCKED, PENDING, SHARED, RESERVED, EXCLUSIVE) are implemented 6764 ** as POSIX read & write locks over fixed set of locations (via fsctl), 6765 ** on AFP and SMB only exclusive byte-range locks are available via fsctl 6766 ** with _IOWR('z', 23, struct ByteRangeLockPB2) to track the same 5 states. 6767 ** To simulate a F_RDLCK on the shared range, on AFP a randomly selected 6768 ** address in the shared range is taken for a SHARED lock, the entire 6769 ** shared range is taken for an EXCLUSIVE lock): 6770 ** 6771 ** PENDING_BYTE 0x40000000 6772 ** RESERVED_BYTE 0x40000001 6773 ** SHARED_RANGE 0x40000002 -> 0x40000200 6774 ** 6775 ** This works well on the local file system, but shows a nearly 100x 6776 ** slowdown in read performance on AFP because the AFP client disables 6777 ** the read cache when byte-range locks are present. Enabling the read 6778 ** cache exposes a cache coherency problem that is present on all OS X 6779 ** supported network file systems. NFS and AFP both observe the 6780 ** close-to-open semantics for ensuring cache coherency 6781 ** [http://nfs.sourceforge.net/#faq_a8], which does not effectively 6782 ** address the requirements for concurrent database access by multiple 6783 ** readers and writers 6784 ** [http://www.nabble.com/SQLite-on-NFS-cache-coherency-td15655701.html]. 6785 ** 6786 ** To address the performance and cache coherency issues, proxy file locking 6787 ** changes the way database access is controlled by limiting access to a 6788 ** single host at a time and moving file locks off of the database file 6789 ** and onto a proxy file on the local file system. 6790 ** 6791 ** 6792 ** Using proxy locks 6793 ** ----------------- 6794 ** 6795 ** C APIs 6796 ** 6797 ** sqlite3_file_control(db, dbname, SQLITE_FCNTL_SET_LOCKPROXYFILE, 6798 ** <proxy_path> | ":auto:"); 6799 ** sqlite3_file_control(db, dbname, SQLITE_FCNTL_GET_LOCKPROXYFILE, 6800 ** &<proxy_path>); 6801 ** 6802 ** 6803 ** SQL pragmas 6804 ** 6805 ** PRAGMA [database.]lock_proxy_file=<proxy_path> | :auto: 6806 ** PRAGMA [database.]lock_proxy_file 6807 ** 6808 ** Specifying ":auto:" means that if there is a conch file with a matching 6809 ** host ID in it, the proxy path in the conch file will be used, otherwise 6810 ** a proxy path based on the user's temp dir 6811 ** (via confstr(_CS_DARWIN_USER_TEMP_DIR,...)) will be used and the 6812 ** actual proxy file name is generated from the name and path of the 6813 ** database file. For example: 6814 ** 6815 ** For database path "/Users/me/foo.db" 6816 ** The lock path will be "<tmpdir>/sqliteplocks/_Users_me_foo.db:auto:") 6817 ** 6818 ** Once a lock proxy is configured for a database connection, it can not 6819 ** be removed, however it may be switched to a different proxy path via 6820 ** the above APIs (assuming the conch file is not being held by another 6821 ** connection or process). 6822 ** 6823 ** 6824 ** How proxy locking works 6825 ** ----------------------- 6826 ** 6827 ** Proxy file locking relies primarily on two new supporting files: 6828 ** 6829 ** * conch file to limit access to the database file to a single host 6830 ** at a time 6831 ** 6832 ** * proxy file to act as a proxy for the advisory locks normally 6833 ** taken on the database 6834 ** 6835 ** The conch file - to use a proxy file, sqlite must first "hold the conch" 6836 ** by taking an sqlite-style shared lock on the conch file, reading the 6837 ** contents and comparing the host's unique host ID (see below) and lock 6838 ** proxy path against the values stored in the conch. The conch file is 6839 ** stored in the same directory as the database file and the file name 6840 ** is patterned after the database file name as ".<databasename>-conch". 6841 ** If the conch file does not exist, or its contents do not match the 6842 ** host ID and/or proxy path, then the lock is escalated to an exclusive 6843 ** lock and the conch file contents is updated with the host ID and proxy 6844 ** path and the lock is downgraded to a shared lock again. If the conch 6845 ** is held by another process (with a shared lock), the exclusive lock 6846 ** will fail and SQLITE_BUSY is returned. 6847 ** 6848 ** The proxy file - a single-byte file used for all advisory file locks 6849 ** normally taken on the database file. This allows for safe sharing 6850 ** of the database file for multiple readers and writers on the same 6851 ** host (the conch ensures that they all use the same local lock file). 6852 ** 6853 ** Requesting the lock proxy does not immediately take the conch, it is 6854 ** only taken when the first request to lock database file is made. 6855 ** This matches the semantics of the traditional locking behavior, where 6856 ** opening a connection to a database file does not take a lock on it. 6857 ** The shared lock and an open file descriptor are maintained until 6858 ** the connection to the database is closed. 6859 ** 6860 ** The proxy file and the lock file are never deleted so they only need 6861 ** to be created the first time they are used. 6862 ** 6863 ** Configuration options 6864 ** --------------------- 6865 ** 6866 ** SQLITE_PREFER_PROXY_LOCKING 6867 ** 6868 ** Database files accessed on non-local file systems are 6869 ** automatically configured for proxy locking, lock files are 6870 ** named automatically using the same logic as 6871 ** PRAGMA lock_proxy_file=":auto:" 6872 ** 6873 ** SQLITE_PROXY_DEBUG 6874 ** 6875 ** Enables the logging of error messages during host id file 6876 ** retrieval and creation 6877 ** 6878 ** LOCKPROXYDIR 6879 ** 6880 ** Overrides the default directory used for lock proxy files that 6881 ** are named automatically via the ":auto:" setting 6882 ** 6883 ** SQLITE_DEFAULT_PROXYDIR_PERMISSIONS 6884 ** 6885 ** Permissions to use when creating a directory for storing the 6886 ** lock proxy files, only used when LOCKPROXYDIR is not set. 6887 ** 6888 ** 6889 ** As mentioned above, when compiled with SQLITE_PREFER_PROXY_LOCKING, 6890 ** setting the environment variable SQLITE_FORCE_PROXY_LOCKING to 1 will 6891 ** force proxy locking to be used for every database file opened, and 0 6892 ** will force automatic proxy locking to be disabled for all database 6893 ** files (explicitly calling the SQLITE_FCNTL_SET_LOCKPROXYFILE pragma or 6894 ** sqlite_file_control API is not affected by SQLITE_FORCE_PROXY_LOCKING). 6895 */ 6896 6897 /* 6898 ** Proxy locking is only available on MacOSX 6899 */ 6900 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 6901 6902 /* 6903 ** The proxyLockingContext has the path and file structures for the remote 6904 ** and local proxy files in it 6905 */ 6906 typedef struct proxyLockingContext proxyLockingContext; 6907 struct proxyLockingContext { 6908 unixFile *conchFile; /* Open conch file */ 6909 char *conchFilePath; /* Name of the conch file */ 6910 unixFile *lockProxy; /* Open proxy lock file */ 6911 char *lockProxyPath; /* Name of the proxy lock file */ 6912 char *dbPath; /* Name of the open file */ 6913 int conchHeld; /* 1 if the conch is held, -1 if lockless */ 6914 int nFails; /* Number of conch taking failures */ 6915 void *oldLockingContext; /* Original lockingcontext to restore on close */ 6916 sqlite3_io_methods const *pOldMethod; /* Original I/O methods for close */ 6917 }; 6918 6919 /* 6920 ** The proxy lock file path for the database at dbPath is written into lPath, 6921 ** which must point to valid, writable memory large enough for a maxLen length 6922 ** file path. 6923 */ 6924 static int proxyGetLockPath(const char *dbPath, char *lPath, size_t maxLen){ 6925 int len; 6926 int dbLen; 6927 int i; 6928 6929 #ifdef LOCKPROXYDIR 6930 len = strlcpy(lPath, LOCKPROXYDIR, maxLen); 6931 #else 6932 # ifdef _CS_DARWIN_USER_TEMP_DIR 6933 { 6934 if( !confstr(_CS_DARWIN_USER_TEMP_DIR, lPath, maxLen) ){ 6935 OSTRACE(("GETLOCKPATH failed %s errno=%d pid=%d\n", 6936 lPath, errno, osGetpid(0))); 6937 return SQLITE_IOERR_LOCK; 6938 } 6939 len = strlcat(lPath, "sqliteplocks", maxLen); 6940 } 6941 # else 6942 len = strlcpy(lPath, "/tmp/", maxLen); 6943 # endif 6944 #endif 6945 6946 if( lPath[len-1]!='/' ){ 6947 len = strlcat(lPath, "/", maxLen); 6948 } 6949 6950 /* transform the db path to a unique cache name */ 6951 dbLen = (int)strlen(dbPath); 6952 for( i=0; i<dbLen && (i+len+7)<(int)maxLen; i++){ 6953 char c = dbPath[i]; 6954 lPath[i+len] = (c=='/')?'_':c; 6955 } 6956 lPath[i+len]='\0'; 6957 strlcat(lPath, ":auto:", maxLen); 6958 OSTRACE(("GETLOCKPATH proxy lock path=%s pid=%d\n", lPath, osGetpid(0))); 6959 return SQLITE_OK; 6960 } 6961 6962 /* 6963 ** Creates the lock file and any missing directories in lockPath 6964 */ 6965 static int proxyCreateLockPath(const char *lockPath){ 6966 int i, len; 6967 char buf[MAXPATHLEN]; 6968 int start = 0; 6969 6970 assert(lockPath!=NULL); 6971 /* try to create all the intermediate directories */ 6972 len = (int)strlen(lockPath); 6973 buf[0] = lockPath[0]; 6974 for( i=1; i<len; i++ ){ 6975 if( lockPath[i] == '/' && (i - start > 0) ){ 6976 /* only mkdir if leaf dir != "." or "/" or ".." */ 6977 if( i-start>2 || (i-start==1 && buf[start] != '.' && buf[start] != '/') 6978 || (i-start==2 && buf[start] != '.' && buf[start+1] != '.') ){ 6979 buf[i]='\0'; 6980 if( osMkdir(buf, SQLITE_DEFAULT_PROXYDIR_PERMISSIONS) ){ 6981 int err=errno; 6982 if( err!=EEXIST ) { 6983 OSTRACE(("CREATELOCKPATH FAILED creating %s, " 6984 "'%s' proxy lock path=%s pid=%d\n", 6985 buf, strerror(err), lockPath, osGetpid(0))); 6986 return err; 6987 } 6988 } 6989 } 6990 start=i+1; 6991 } 6992 buf[i] = lockPath[i]; 6993 } 6994 OSTRACE(("CREATELOCKPATH proxy lock path=%s pid=%d\n",lockPath,osGetpid(0))); 6995 return 0; 6996 } 6997 6998 /* 6999 ** Create a new VFS file descriptor (stored in memory obtained from 7000 ** sqlite3_malloc) and open the file named "path" in the file descriptor. 7001 ** 7002 ** The caller is responsible not only for closing the file descriptor 7003 ** but also for freeing the memory associated with the file descriptor. 7004 */ 7005 static int proxyCreateUnixFile( 7006 const char *path, /* path for the new unixFile */ 7007 unixFile **ppFile, /* unixFile created and returned by ref */ 7008 int islockfile /* if non zero missing dirs will be created */ 7009 ) { 7010 int fd = -1; 7011 unixFile *pNew; 7012 int rc = SQLITE_OK; 7013 int openFlags = O_RDWR | O_CREAT | O_NOFOLLOW; 7014 sqlite3_vfs dummyVfs; 7015 int terrno = 0; 7016 UnixUnusedFd *pUnused = NULL; 7017 7018 /* 1. first try to open/create the file 7019 ** 2. if that fails, and this is a lock file (not-conch), try creating 7020 ** the parent directories and then try again. 7021 ** 3. if that fails, try to open the file read-only 7022 ** otherwise return BUSY (if lock file) or CANTOPEN for the conch file 7023 */ 7024 pUnused = findReusableFd(path, openFlags); 7025 if( pUnused ){ 7026 fd = pUnused->fd; 7027 }else{ 7028 pUnused = sqlite3_malloc64(sizeof(*pUnused)); 7029 if( !pUnused ){ 7030 return SQLITE_NOMEM_BKPT; 7031 } 7032 } 7033 if( fd<0 ){ 7034 fd = robust_open(path, openFlags, 0); 7035 terrno = errno; 7036 if( fd<0 && errno==ENOENT && islockfile ){ 7037 if( proxyCreateLockPath(path) == SQLITE_OK ){ 7038 fd = robust_open(path, openFlags, 0); 7039 } 7040 } 7041 } 7042 if( fd<0 ){ 7043 openFlags = O_RDONLY | O_NOFOLLOW; 7044 fd = robust_open(path, openFlags, 0); 7045 terrno = errno; 7046 } 7047 if( fd<0 ){ 7048 if( islockfile ){ 7049 return SQLITE_BUSY; 7050 } 7051 switch (terrno) { 7052 case EACCES: 7053 return SQLITE_PERM; 7054 case EIO: 7055 return SQLITE_IOERR_LOCK; /* even though it is the conch */ 7056 default: 7057 return SQLITE_CANTOPEN_BKPT; 7058 } 7059 } 7060 7061 pNew = (unixFile *)sqlite3_malloc64(sizeof(*pNew)); 7062 if( pNew==NULL ){ 7063 rc = SQLITE_NOMEM_BKPT; 7064 goto end_create_proxy; 7065 } 7066 memset(pNew, 0, sizeof(unixFile)); 7067 pNew->openFlags = openFlags; 7068 memset(&dummyVfs, 0, sizeof(dummyVfs)); 7069 dummyVfs.pAppData = (void*)&autolockIoFinder; 7070 dummyVfs.zName = "dummy"; 7071 pUnused->fd = fd; 7072 pUnused->flags = openFlags; 7073 pNew->pPreallocatedUnused = pUnused; 7074 7075 rc = fillInUnixFile(&dummyVfs, fd, (sqlite3_file*)pNew, path, 0); 7076 if( rc==SQLITE_OK ){ 7077 *ppFile = pNew; 7078 return SQLITE_OK; 7079 } 7080 end_create_proxy: 7081 robust_close(pNew, fd, __LINE__); 7082 sqlite3_free(pNew); 7083 sqlite3_free(pUnused); 7084 return rc; 7085 } 7086 7087 #ifdef SQLITE_TEST 7088 /* simulate multiple hosts by creating unique hostid file paths */ 7089 int sqlite3_hostid_num = 0; 7090 #endif 7091 7092 #define PROXY_HOSTIDLEN 16 /* conch file host id length */ 7093 7094 #if HAVE_GETHOSTUUID 7095 /* Not always defined in the headers as it ought to be */ 7096 extern int gethostuuid(uuid_t id, const struct timespec *wait); 7097 #endif 7098 7099 /* get the host ID via gethostuuid(), pHostID must point to PROXY_HOSTIDLEN 7100 ** bytes of writable memory. 7101 */ 7102 static int proxyGetHostID(unsigned char *pHostID, int *pError){ 7103 assert(PROXY_HOSTIDLEN == sizeof(uuid_t)); 7104 memset(pHostID, 0, PROXY_HOSTIDLEN); 7105 #if HAVE_GETHOSTUUID 7106 { 7107 struct timespec timeout = {1, 0}; /* 1 sec timeout */ 7108 if( gethostuuid(pHostID, &timeout) ){ 7109 int err = errno; 7110 if( pError ){ 7111 *pError = err; 7112 } 7113 return SQLITE_IOERR; 7114 } 7115 } 7116 #else 7117 UNUSED_PARAMETER(pError); 7118 #endif 7119 #ifdef SQLITE_TEST 7120 /* simulate multiple hosts by creating unique hostid file paths */ 7121 if( sqlite3_hostid_num != 0){ 7122 pHostID[0] = (char)(pHostID[0] + (char)(sqlite3_hostid_num & 0xFF)); 7123 } 7124 #endif 7125 7126 return SQLITE_OK; 7127 } 7128 7129 /* The conch file contains the header, host id and lock file path 7130 */ 7131 #define PROXY_CONCHVERSION 2 /* 1-byte header, 16-byte host id, path */ 7132 #define PROXY_HEADERLEN 1 /* conch file header length */ 7133 #define PROXY_PATHINDEX (PROXY_HEADERLEN+PROXY_HOSTIDLEN) 7134 #define PROXY_MAXCONCHLEN (PROXY_HEADERLEN+PROXY_HOSTIDLEN+MAXPATHLEN) 7135 7136 /* 7137 ** Takes an open conch file, copies the contents to a new path and then moves 7138 ** it back. The newly created file's file descriptor is assigned to the 7139 ** conch file structure and finally the original conch file descriptor is 7140 ** closed. Returns zero if successful. 7141 */ 7142 static int proxyBreakConchLock(unixFile *pFile, uuid_t myHostID){ 7143 proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; 7144 unixFile *conchFile = pCtx->conchFile; 7145 char tPath[MAXPATHLEN]; 7146 char buf[PROXY_MAXCONCHLEN]; 7147 char *cPath = pCtx->conchFilePath; 7148 size_t readLen = 0; 7149 size_t pathLen = 0; 7150 char errmsg[64] = ""; 7151 int fd = -1; 7152 int rc = -1; 7153 UNUSED_PARAMETER(myHostID); 7154 7155 /* create a new path by replace the trailing '-conch' with '-break' */ 7156 pathLen = strlcpy(tPath, cPath, MAXPATHLEN); 7157 if( pathLen>MAXPATHLEN || pathLen<6 || 7158 (strlcpy(&tPath[pathLen-5], "break", 6) != 5) ){ 7159 sqlite3_snprintf(sizeof(errmsg),errmsg,"path error (len %d)",(int)pathLen); 7160 goto end_breaklock; 7161 } 7162 /* read the conch content */ 7163 readLen = osPread(conchFile->h, buf, PROXY_MAXCONCHLEN, 0); 7164 if( readLen<PROXY_PATHINDEX ){ 7165 sqlite3_snprintf(sizeof(errmsg),errmsg,"read error (len %d)",(int)readLen); 7166 goto end_breaklock; 7167 } 7168 /* write it out to the temporary break file */ 7169 fd = robust_open(tPath, (O_RDWR|O_CREAT|O_EXCL|O_NOFOLLOW), 0); 7170 if( fd<0 ){ 7171 sqlite3_snprintf(sizeof(errmsg), errmsg, "create failed (%d)", errno); 7172 goto end_breaklock; 7173 } 7174 if( osPwrite(fd, buf, readLen, 0) != (ssize_t)readLen ){ 7175 sqlite3_snprintf(sizeof(errmsg), errmsg, "write failed (%d)", errno); 7176 goto end_breaklock; 7177 } 7178 if( rename(tPath, cPath) ){ 7179 sqlite3_snprintf(sizeof(errmsg), errmsg, "rename failed (%d)", errno); 7180 goto end_breaklock; 7181 } 7182 rc = 0; 7183 fprintf(stderr, "broke stale lock on %s\n", cPath); 7184 robust_close(pFile, conchFile->h, __LINE__); 7185 conchFile->h = fd; 7186 conchFile->openFlags = O_RDWR | O_CREAT; 7187 7188 end_breaklock: 7189 if( rc ){ 7190 if( fd>=0 ){ 7191 osUnlink(tPath); 7192 robust_close(pFile, fd, __LINE__); 7193 } 7194 fprintf(stderr, "failed to break stale lock on %s, %s\n", cPath, errmsg); 7195 } 7196 return rc; 7197 } 7198 7199 /* Take the requested lock on the conch file and break a stale lock if the 7200 ** host id matches. 7201 */ 7202 static int proxyConchLock(unixFile *pFile, uuid_t myHostID, int lockType){ 7203 proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; 7204 unixFile *conchFile = pCtx->conchFile; 7205 int rc = SQLITE_OK; 7206 int nTries = 0; 7207 struct timespec conchModTime; 7208 7209 memset(&conchModTime, 0, sizeof(conchModTime)); 7210 do { 7211 rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, lockType); 7212 nTries ++; 7213 if( rc==SQLITE_BUSY ){ 7214 /* If the lock failed (busy): 7215 * 1st try: get the mod time of the conch, wait 0.5s and try again. 7216 * 2nd try: fail if the mod time changed or host id is different, wait 7217 * 10 sec and try again 7218 * 3rd try: break the lock unless the mod time has changed. 7219 */ 7220 struct stat buf; 7221 if( osFstat(conchFile->h, &buf) ){ 7222 storeLastErrno(pFile, errno); 7223 return SQLITE_IOERR_LOCK; 7224 } 7225 7226 if( nTries==1 ){ 7227 conchModTime = buf.st_mtimespec; 7228 unixSleep(0,500000); /* wait 0.5 sec and try the lock again*/ 7229 continue; 7230 } 7231 7232 assert( nTries>1 ); 7233 if( conchModTime.tv_sec != buf.st_mtimespec.tv_sec || 7234 conchModTime.tv_nsec != buf.st_mtimespec.tv_nsec ){ 7235 return SQLITE_BUSY; 7236 } 7237 7238 if( nTries==2 ){ 7239 char tBuf[PROXY_MAXCONCHLEN]; 7240 int len = osPread(conchFile->h, tBuf, PROXY_MAXCONCHLEN, 0); 7241 if( len<0 ){ 7242 storeLastErrno(pFile, errno); 7243 return SQLITE_IOERR_LOCK; 7244 } 7245 if( len>PROXY_PATHINDEX && tBuf[0]==(char)PROXY_CONCHVERSION){ 7246 /* don't break the lock if the host id doesn't match */ 7247 if( 0!=memcmp(&tBuf[PROXY_HEADERLEN], myHostID, PROXY_HOSTIDLEN) ){ 7248 return SQLITE_BUSY; 7249 } 7250 }else{ 7251 /* don't break the lock on short read or a version mismatch */ 7252 return SQLITE_BUSY; 7253 } 7254 unixSleep(0,10000000); /* wait 10 sec and try the lock again */ 7255 continue; 7256 } 7257 7258 assert( nTries==3 ); 7259 if( 0==proxyBreakConchLock(pFile, myHostID) ){ 7260 rc = SQLITE_OK; 7261 if( lockType==EXCLUSIVE_LOCK ){ 7262 rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, SHARED_LOCK); 7263 } 7264 if( !rc ){ 7265 rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, lockType); 7266 } 7267 } 7268 } 7269 } while( rc==SQLITE_BUSY && nTries<3 ); 7270 7271 return rc; 7272 } 7273 7274 /* Takes the conch by taking a shared lock and read the contents conch, if 7275 ** lockPath is non-NULL, the host ID and lock file path must match. A NULL 7276 ** lockPath means that the lockPath in the conch file will be used if the 7277 ** host IDs match, or a new lock path will be generated automatically 7278 ** and written to the conch file. 7279 */ 7280 static int proxyTakeConch(unixFile *pFile){ 7281 proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; 7282 7283 if( pCtx->conchHeld!=0 ){ 7284 return SQLITE_OK; 7285 }else{ 7286 unixFile *conchFile = pCtx->conchFile; 7287 uuid_t myHostID; 7288 int pError = 0; 7289 char readBuf[PROXY_MAXCONCHLEN]; 7290 char lockPath[MAXPATHLEN]; 7291 char *tempLockPath = NULL; 7292 int rc = SQLITE_OK; 7293 int createConch = 0; 7294 int hostIdMatch = 0; 7295 int readLen = 0; 7296 int tryOldLockPath = 0; 7297 int forceNewLockPath = 0; 7298 7299 OSTRACE(("TAKECONCH %d for %s pid=%d\n", conchFile->h, 7300 (pCtx->lockProxyPath ? pCtx->lockProxyPath : ":auto:"), 7301 osGetpid(0))); 7302 7303 rc = proxyGetHostID(myHostID, &pError); 7304 if( (rc&0xff)==SQLITE_IOERR ){ 7305 storeLastErrno(pFile, pError); 7306 goto end_takeconch; 7307 } 7308 rc = proxyConchLock(pFile, myHostID, SHARED_LOCK); 7309 if( rc!=SQLITE_OK ){ 7310 goto end_takeconch; 7311 } 7312 /* read the existing conch file */ 7313 readLen = seekAndRead((unixFile*)conchFile, 0, readBuf, PROXY_MAXCONCHLEN); 7314 if( readLen<0 ){ 7315 /* I/O error: lastErrno set by seekAndRead */ 7316 storeLastErrno(pFile, conchFile->lastErrno); 7317 rc = SQLITE_IOERR_READ; 7318 goto end_takeconch; 7319 }else if( readLen<=(PROXY_HEADERLEN+PROXY_HOSTIDLEN) || 7320 readBuf[0]!=(char)PROXY_CONCHVERSION ){ 7321 /* a short read or version format mismatch means we need to create a new 7322 ** conch file. 7323 */ 7324 createConch = 1; 7325 } 7326 /* if the host id matches and the lock path already exists in the conch 7327 ** we'll try to use the path there, if we can't open that path, we'll 7328 ** retry with a new auto-generated path 7329 */ 7330 do { /* in case we need to try again for an :auto: named lock file */ 7331 7332 if( !createConch && !forceNewLockPath ){ 7333 hostIdMatch = !memcmp(&readBuf[PROXY_HEADERLEN], myHostID, 7334 PROXY_HOSTIDLEN); 7335 /* if the conch has data compare the contents */ 7336 if( !pCtx->lockProxyPath ){ 7337 /* for auto-named local lock file, just check the host ID and we'll 7338 ** use the local lock file path that's already in there 7339 */ 7340 if( hostIdMatch ){ 7341 size_t pathLen = (readLen - PROXY_PATHINDEX); 7342 7343 if( pathLen>=MAXPATHLEN ){ 7344 pathLen=MAXPATHLEN-1; 7345 } 7346 memcpy(lockPath, &readBuf[PROXY_PATHINDEX], pathLen); 7347 lockPath[pathLen] = 0; 7348 tempLockPath = lockPath; 7349 tryOldLockPath = 1; 7350 /* create a copy of the lock path if the conch is taken */ 7351 goto end_takeconch; 7352 } 7353 }else if( hostIdMatch 7354 && !strncmp(pCtx->lockProxyPath, &readBuf[PROXY_PATHINDEX], 7355 readLen-PROXY_PATHINDEX) 7356 ){ 7357 /* conch host and lock path match */ 7358 goto end_takeconch; 7359 } 7360 } 7361 7362 /* if the conch isn't writable and doesn't match, we can't take it */ 7363 if( (conchFile->openFlags&O_RDWR) == 0 ){ 7364 rc = SQLITE_BUSY; 7365 goto end_takeconch; 7366 } 7367 7368 /* either the conch didn't match or we need to create a new one */ 7369 if( !pCtx->lockProxyPath ){ 7370 proxyGetLockPath(pCtx->dbPath, lockPath, MAXPATHLEN); 7371 tempLockPath = lockPath; 7372 /* create a copy of the lock path _only_ if the conch is taken */ 7373 } 7374 7375 /* update conch with host and path (this will fail if other process 7376 ** has a shared lock already), if the host id matches, use the big 7377 ** stick. 7378 */ 7379 futimes(conchFile->h, NULL); 7380 if( hostIdMatch && !createConch ){ 7381 if( conchFile->pInode && conchFile->pInode->nShared>1 ){ 7382 /* We are trying for an exclusive lock but another thread in this 7383 ** same process is still holding a shared lock. */ 7384 rc = SQLITE_BUSY; 7385 } else { 7386 rc = proxyConchLock(pFile, myHostID, EXCLUSIVE_LOCK); 7387 } 7388 }else{ 7389 rc = proxyConchLock(pFile, myHostID, EXCLUSIVE_LOCK); 7390 } 7391 if( rc==SQLITE_OK ){ 7392 char writeBuffer[PROXY_MAXCONCHLEN]; 7393 int writeSize = 0; 7394 7395 writeBuffer[0] = (char)PROXY_CONCHVERSION; 7396 memcpy(&writeBuffer[PROXY_HEADERLEN], myHostID, PROXY_HOSTIDLEN); 7397 if( pCtx->lockProxyPath!=NULL ){ 7398 strlcpy(&writeBuffer[PROXY_PATHINDEX], pCtx->lockProxyPath, 7399 MAXPATHLEN); 7400 }else{ 7401 strlcpy(&writeBuffer[PROXY_PATHINDEX], tempLockPath, MAXPATHLEN); 7402 } 7403 writeSize = PROXY_PATHINDEX + strlen(&writeBuffer[PROXY_PATHINDEX]); 7404 robust_ftruncate(conchFile->h, writeSize); 7405 rc = unixWrite((sqlite3_file *)conchFile, writeBuffer, writeSize, 0); 7406 full_fsync(conchFile->h,0,0); 7407 /* If we created a new conch file (not just updated the contents of a 7408 ** valid conch file), try to match the permissions of the database 7409 */ 7410 if( rc==SQLITE_OK && createConch ){ 7411 struct stat buf; 7412 int err = osFstat(pFile->h, &buf); 7413 if( err==0 ){ 7414 mode_t cmode = buf.st_mode&(S_IRUSR|S_IWUSR | S_IRGRP|S_IWGRP | 7415 S_IROTH|S_IWOTH); 7416 /* try to match the database file R/W permissions, ignore failure */ 7417 #ifndef SQLITE_PROXY_DEBUG 7418 osFchmod(conchFile->h, cmode); 7419 #else 7420 do{ 7421 rc = osFchmod(conchFile->h, cmode); 7422 }while( rc==(-1) && errno==EINTR ); 7423 if( rc!=0 ){ 7424 int code = errno; 7425 fprintf(stderr, "fchmod %o FAILED with %d %s\n", 7426 cmode, code, strerror(code)); 7427 } else { 7428 fprintf(stderr, "fchmod %o SUCCEDED\n",cmode); 7429 } 7430 }else{ 7431 int code = errno; 7432 fprintf(stderr, "STAT FAILED[%d] with %d %s\n", 7433 err, code, strerror(code)); 7434 #endif 7435 } 7436 } 7437 } 7438 conchFile->pMethod->xUnlock((sqlite3_file*)conchFile, SHARED_LOCK); 7439 7440 end_takeconch: 7441 OSTRACE(("TRANSPROXY: CLOSE %d\n", pFile->h)); 7442 if( rc==SQLITE_OK && pFile->openFlags ){ 7443 int fd; 7444 if( pFile->h>=0 ){ 7445 robust_close(pFile, pFile->h, __LINE__); 7446 } 7447 pFile->h = -1; 7448 fd = robust_open(pCtx->dbPath, pFile->openFlags, 0); 7449 OSTRACE(("TRANSPROXY: OPEN %d\n", fd)); 7450 if( fd>=0 ){ 7451 pFile->h = fd; 7452 }else{ 7453 rc=SQLITE_CANTOPEN_BKPT; /* SQLITE_BUSY? proxyTakeConch called 7454 during locking */ 7455 } 7456 } 7457 if( rc==SQLITE_OK && !pCtx->lockProxy ){ 7458 char *path = tempLockPath ? tempLockPath : pCtx->lockProxyPath; 7459 rc = proxyCreateUnixFile(path, &pCtx->lockProxy, 1); 7460 if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM && tryOldLockPath ){ 7461 /* we couldn't create the proxy lock file with the old lock file path 7462 ** so try again via auto-naming 7463 */ 7464 forceNewLockPath = 1; 7465 tryOldLockPath = 0; 7466 continue; /* go back to the do {} while start point, try again */ 7467 } 7468 } 7469 if( rc==SQLITE_OK ){ 7470 /* Need to make a copy of path if we extracted the value 7471 ** from the conch file or the path was allocated on the stack 7472 */ 7473 if( tempLockPath ){ 7474 pCtx->lockProxyPath = sqlite3DbStrDup(0, tempLockPath); 7475 if( !pCtx->lockProxyPath ){ 7476 rc = SQLITE_NOMEM_BKPT; 7477 } 7478 } 7479 } 7480 if( rc==SQLITE_OK ){ 7481 pCtx->conchHeld = 1; 7482 7483 if( pCtx->lockProxy->pMethod == &afpIoMethods ){ 7484 afpLockingContext *afpCtx; 7485 afpCtx = (afpLockingContext *)pCtx->lockProxy->lockingContext; 7486 afpCtx->dbPath = pCtx->lockProxyPath; 7487 } 7488 } else { 7489 conchFile->pMethod->xUnlock((sqlite3_file*)conchFile, NO_LOCK); 7490 } 7491 OSTRACE(("TAKECONCH %d %s\n", conchFile->h, 7492 rc==SQLITE_OK?"ok":"failed")); 7493 return rc; 7494 } while (1); /* in case we need to retry the :auto: lock file - 7495 ** we should never get here except via the 'continue' call. */ 7496 } 7497 } 7498 7499 /* 7500 ** If pFile holds a lock on a conch file, then release that lock. 7501 */ 7502 static int proxyReleaseConch(unixFile *pFile){ 7503 int rc = SQLITE_OK; /* Subroutine return code */ 7504 proxyLockingContext *pCtx; /* The locking context for the proxy lock */ 7505 unixFile *conchFile; /* Name of the conch file */ 7506 7507 pCtx = (proxyLockingContext *)pFile->lockingContext; 7508 conchFile = pCtx->conchFile; 7509 OSTRACE(("RELEASECONCH %d for %s pid=%d\n", conchFile->h, 7510 (pCtx->lockProxyPath ? pCtx->lockProxyPath : ":auto:"), 7511 osGetpid(0))); 7512 if( pCtx->conchHeld>0 ){ 7513 rc = conchFile->pMethod->xUnlock((sqlite3_file*)conchFile, NO_LOCK); 7514 } 7515 pCtx->conchHeld = 0; 7516 OSTRACE(("RELEASECONCH %d %s\n", conchFile->h, 7517 (rc==SQLITE_OK ? "ok" : "failed"))); 7518 return rc; 7519 } 7520 7521 /* 7522 ** Given the name of a database file, compute the name of its conch file. 7523 ** Store the conch filename in memory obtained from sqlite3_malloc64(). 7524 ** Make *pConchPath point to the new name. Return SQLITE_OK on success 7525 ** or SQLITE_NOMEM if unable to obtain memory. 7526 ** 7527 ** The caller is responsible for ensuring that the allocated memory 7528 ** space is eventually freed. 7529 ** 7530 ** *pConchPath is set to NULL if a memory allocation error occurs. 7531 */ 7532 static int proxyCreateConchPathname(char *dbPath, char **pConchPath){ 7533 int i; /* Loop counter */ 7534 int len = (int)strlen(dbPath); /* Length of database filename - dbPath */ 7535 char *conchPath; /* buffer in which to construct conch name */ 7536 7537 /* Allocate space for the conch filename and initialize the name to 7538 ** the name of the original database file. */ 7539 *pConchPath = conchPath = (char *)sqlite3_malloc64(len + 8); 7540 if( conchPath==0 ){ 7541 return SQLITE_NOMEM_BKPT; 7542 } 7543 memcpy(conchPath, dbPath, len+1); 7544 7545 /* now insert a "." before the last / character */ 7546 for( i=(len-1); i>=0; i-- ){ 7547 if( conchPath[i]=='/' ){ 7548 i++; 7549 break; 7550 } 7551 } 7552 conchPath[i]='.'; 7553 while ( i<len ){ 7554 conchPath[i+1]=dbPath[i]; 7555 i++; 7556 } 7557 7558 /* append the "-conch" suffix to the file */ 7559 memcpy(&conchPath[i+1], "-conch", 7); 7560 assert( (int)strlen(conchPath) == len+7 ); 7561 7562 return SQLITE_OK; 7563 } 7564 7565 7566 /* Takes a fully configured proxy locking-style unix file and switches 7567 ** the local lock file path 7568 */ 7569 static int switchLockProxyPath(unixFile *pFile, const char *path) { 7570 proxyLockingContext *pCtx = (proxyLockingContext*)pFile->lockingContext; 7571 char *oldPath = pCtx->lockProxyPath; 7572 int rc = SQLITE_OK; 7573 7574 if( pFile->eFileLock!=NO_LOCK ){ 7575 return SQLITE_BUSY; 7576 } 7577 7578 /* nothing to do if the path is NULL, :auto: or matches the existing path */ 7579 if( !path || path[0]=='\0' || !strcmp(path, ":auto:") || 7580 (oldPath && !strncmp(oldPath, path, MAXPATHLEN)) ){ 7581 return SQLITE_OK; 7582 }else{ 7583 unixFile *lockProxy = pCtx->lockProxy; 7584 pCtx->lockProxy=NULL; 7585 pCtx->conchHeld = 0; 7586 if( lockProxy!=NULL ){ 7587 rc=lockProxy->pMethod->xClose((sqlite3_file *)lockProxy); 7588 if( rc ) return rc; 7589 sqlite3_free(lockProxy); 7590 } 7591 sqlite3_free(oldPath); 7592 pCtx->lockProxyPath = sqlite3DbStrDup(0, path); 7593 } 7594 7595 return rc; 7596 } 7597 7598 /* 7599 ** pFile is a file that has been opened by a prior xOpen call. dbPath 7600 ** is a string buffer at least MAXPATHLEN+1 characters in size. 7601 ** 7602 ** This routine find the filename associated with pFile and writes it 7603 ** int dbPath. 7604 */ 7605 static int proxyGetDbPathForUnixFile(unixFile *pFile, char *dbPath){ 7606 #if defined(__APPLE__) 7607 if( pFile->pMethod == &afpIoMethods ){ 7608 /* afp style keeps a reference to the db path in the filePath field 7609 ** of the struct */ 7610 assert( (int)strlen((char*)pFile->lockingContext)<=MAXPATHLEN ); 7611 strlcpy(dbPath, ((afpLockingContext *)pFile->lockingContext)->dbPath, 7612 MAXPATHLEN); 7613 } else 7614 #endif 7615 if( pFile->pMethod == &dotlockIoMethods ){ 7616 /* dot lock style uses the locking context to store the dot lock 7617 ** file path */ 7618 int len = strlen((char *)pFile->lockingContext) - strlen(DOTLOCK_SUFFIX); 7619 memcpy(dbPath, (char *)pFile->lockingContext, len + 1); 7620 }else{ 7621 /* all other styles use the locking context to store the db file path */ 7622 assert( strlen((char*)pFile->lockingContext)<=MAXPATHLEN ); 7623 strlcpy(dbPath, (char *)pFile->lockingContext, MAXPATHLEN); 7624 } 7625 return SQLITE_OK; 7626 } 7627 7628 /* 7629 ** Takes an already filled in unix file and alters it so all file locking 7630 ** will be performed on the local proxy lock file. The following fields 7631 ** are preserved in the locking context so that they can be restored and 7632 ** the unix structure properly cleaned up at close time: 7633 ** ->lockingContext 7634 ** ->pMethod 7635 */ 7636 static int proxyTransformUnixFile(unixFile *pFile, const char *path) { 7637 proxyLockingContext *pCtx; 7638 char dbPath[MAXPATHLEN+1]; /* Name of the database file */ 7639 char *lockPath=NULL; 7640 int rc = SQLITE_OK; 7641 7642 if( pFile->eFileLock!=NO_LOCK ){ 7643 return SQLITE_BUSY; 7644 } 7645 proxyGetDbPathForUnixFile(pFile, dbPath); 7646 if( !path || path[0]=='\0' || !strcmp(path, ":auto:") ){ 7647 lockPath=NULL; 7648 }else{ 7649 lockPath=(char *)path; 7650 } 7651 7652 OSTRACE(("TRANSPROXY %d for %s pid=%d\n", pFile->h, 7653 (lockPath ? lockPath : ":auto:"), osGetpid(0))); 7654 7655 pCtx = sqlite3_malloc64( sizeof(*pCtx) ); 7656 if( pCtx==0 ){ 7657 return SQLITE_NOMEM_BKPT; 7658 } 7659 memset(pCtx, 0, sizeof(*pCtx)); 7660 7661 rc = proxyCreateConchPathname(dbPath, &pCtx->conchFilePath); 7662 if( rc==SQLITE_OK ){ 7663 rc = proxyCreateUnixFile(pCtx->conchFilePath, &pCtx->conchFile, 0); 7664 if( rc==SQLITE_CANTOPEN && ((pFile->openFlags&O_RDWR) == 0) ){ 7665 /* if (a) the open flags are not O_RDWR, (b) the conch isn't there, and 7666 ** (c) the file system is read-only, then enable no-locking access. 7667 ** Ugh, since O_RDONLY==0x0000 we test for !O_RDWR since unixOpen asserts 7668 ** that openFlags will have only one of O_RDONLY or O_RDWR. 7669 */ 7670 struct statfs fsInfo; 7671 struct stat conchInfo; 7672 int goLockless = 0; 7673 7674 if( osStat(pCtx->conchFilePath, &conchInfo) == -1 ) { 7675 int err = errno; 7676 if( (err==ENOENT) && (statfs(dbPath, &fsInfo) != -1) ){ 7677 goLockless = (fsInfo.f_flags&MNT_RDONLY) == MNT_RDONLY; 7678 } 7679 } 7680 if( goLockless ){ 7681 pCtx->conchHeld = -1; /* read only FS/ lockless */ 7682 rc = SQLITE_OK; 7683 } 7684 } 7685 } 7686 if( rc==SQLITE_OK && lockPath ){ 7687 pCtx->lockProxyPath = sqlite3DbStrDup(0, lockPath); 7688 } 7689 7690 if( rc==SQLITE_OK ){ 7691 pCtx->dbPath = sqlite3DbStrDup(0, dbPath); 7692 if( pCtx->dbPath==NULL ){ 7693 rc = SQLITE_NOMEM_BKPT; 7694 } 7695 } 7696 if( rc==SQLITE_OK ){ 7697 /* all memory is allocated, proxys are created and assigned, 7698 ** switch the locking context and pMethod then return. 7699 */ 7700 pCtx->oldLockingContext = pFile->lockingContext; 7701 pFile->lockingContext = pCtx; 7702 pCtx->pOldMethod = pFile->pMethod; 7703 pFile->pMethod = &proxyIoMethods; 7704 }else{ 7705 if( pCtx->conchFile ){ 7706 pCtx->conchFile->pMethod->xClose((sqlite3_file *)pCtx->conchFile); 7707 sqlite3_free(pCtx->conchFile); 7708 } 7709 sqlite3DbFree(0, pCtx->lockProxyPath); 7710 sqlite3_free(pCtx->conchFilePath); 7711 sqlite3_free(pCtx); 7712 } 7713 OSTRACE(("TRANSPROXY %d %s\n", pFile->h, 7714 (rc==SQLITE_OK ? "ok" : "failed"))); 7715 return rc; 7716 } 7717 7718 7719 /* 7720 ** This routine handles sqlite3_file_control() calls that are specific 7721 ** to proxy locking. 7722 */ 7723 static int proxyFileControl(sqlite3_file *id, int op, void *pArg){ 7724 switch( op ){ 7725 case SQLITE_FCNTL_GET_LOCKPROXYFILE: { 7726 unixFile *pFile = (unixFile*)id; 7727 if( pFile->pMethod == &proxyIoMethods ){ 7728 proxyLockingContext *pCtx = (proxyLockingContext*)pFile->lockingContext; 7729 proxyTakeConch(pFile); 7730 if( pCtx->lockProxyPath ){ 7731 *(const char **)pArg = pCtx->lockProxyPath; 7732 }else{ 7733 *(const char **)pArg = ":auto: (not held)"; 7734 } 7735 } else { 7736 *(const char **)pArg = NULL; 7737 } 7738 return SQLITE_OK; 7739 } 7740 case SQLITE_FCNTL_SET_LOCKPROXYFILE: { 7741 unixFile *pFile = (unixFile*)id; 7742 int rc = SQLITE_OK; 7743 int isProxyStyle = (pFile->pMethod == &proxyIoMethods); 7744 if( pArg==NULL || (const char *)pArg==0 ){ 7745 if( isProxyStyle ){ 7746 /* turn off proxy locking - not supported. If support is added for 7747 ** switching proxy locking mode off then it will need to fail if 7748 ** the journal mode is WAL mode. 7749 */ 7750 rc = SQLITE_ERROR /*SQLITE_PROTOCOL? SQLITE_MISUSE?*/; 7751 }else{ 7752 /* turn off proxy locking - already off - NOOP */ 7753 rc = SQLITE_OK; 7754 } 7755 }else{ 7756 const char *proxyPath = (const char *)pArg; 7757 if( isProxyStyle ){ 7758 proxyLockingContext *pCtx = 7759 (proxyLockingContext*)pFile->lockingContext; 7760 if( !strcmp(pArg, ":auto:") 7761 || (pCtx->lockProxyPath && 7762 !strncmp(pCtx->lockProxyPath, proxyPath, MAXPATHLEN)) 7763 ){ 7764 rc = SQLITE_OK; 7765 }else{ 7766 rc = switchLockProxyPath(pFile, proxyPath); 7767 } 7768 }else{ 7769 /* turn on proxy file locking */ 7770 rc = proxyTransformUnixFile(pFile, proxyPath); 7771 } 7772 } 7773 return rc; 7774 } 7775 default: { 7776 assert( 0 ); /* The call assures that only valid opcodes are sent */ 7777 } 7778 } 7779 /*NOTREACHED*/ assert(0); 7780 return SQLITE_ERROR; 7781 } 7782 7783 /* 7784 ** Within this division (the proxying locking implementation) the procedures 7785 ** above this point are all utilities. The lock-related methods of the 7786 ** proxy-locking sqlite3_io_method object follow. 7787 */ 7788 7789 7790 /* 7791 ** This routine checks if there is a RESERVED lock held on the specified 7792 ** file by this or any other process. If such a lock is held, set *pResOut 7793 ** to a non-zero value otherwise *pResOut is set to zero. The return value 7794 ** is set to SQLITE_OK unless an I/O error occurs during lock checking. 7795 */ 7796 static int proxyCheckReservedLock(sqlite3_file *id, int *pResOut) { 7797 unixFile *pFile = (unixFile*)id; 7798 int rc = proxyTakeConch(pFile); 7799 if( rc==SQLITE_OK ){ 7800 proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; 7801 if( pCtx->conchHeld>0 ){ 7802 unixFile *proxy = pCtx->lockProxy; 7803 return proxy->pMethod->xCheckReservedLock((sqlite3_file*)proxy, pResOut); 7804 }else{ /* conchHeld < 0 is lockless */ 7805 pResOut=0; 7806 } 7807 } 7808 return rc; 7809 } 7810 7811 /* 7812 ** Lock the file with the lock specified by parameter eFileLock - one 7813 ** of the following: 7814 ** 7815 ** (1) SHARED_LOCK 7816 ** (2) RESERVED_LOCK 7817 ** (3) PENDING_LOCK 7818 ** (4) EXCLUSIVE_LOCK 7819 ** 7820 ** Sometimes when requesting one lock state, additional lock states 7821 ** are inserted in between. The locking might fail on one of the later 7822 ** transitions leaving the lock state different from what it started but 7823 ** still short of its goal. The following chart shows the allowed 7824 ** transitions and the inserted intermediate states: 7825 ** 7826 ** UNLOCKED -> SHARED 7827 ** SHARED -> RESERVED 7828 ** SHARED -> (PENDING) -> EXCLUSIVE 7829 ** RESERVED -> (PENDING) -> EXCLUSIVE 7830 ** PENDING -> EXCLUSIVE 7831 ** 7832 ** This routine will only increase a lock. Use the sqlite3OsUnlock() 7833 ** routine to lower a locking level. 7834 */ 7835 static int proxyLock(sqlite3_file *id, int eFileLock) { 7836 unixFile *pFile = (unixFile*)id; 7837 int rc = proxyTakeConch(pFile); 7838 if( rc==SQLITE_OK ){ 7839 proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; 7840 if( pCtx->conchHeld>0 ){ 7841 unixFile *proxy = pCtx->lockProxy; 7842 rc = proxy->pMethod->xLock((sqlite3_file*)proxy, eFileLock); 7843 pFile->eFileLock = proxy->eFileLock; 7844 }else{ 7845 /* conchHeld < 0 is lockless */ 7846 } 7847 } 7848 return rc; 7849 } 7850 7851 7852 /* 7853 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock 7854 ** must be either NO_LOCK or SHARED_LOCK. 7855 ** 7856 ** If the locking level of the file descriptor is already at or below 7857 ** the requested locking level, this routine is a no-op. 7858 */ 7859 static int proxyUnlock(sqlite3_file *id, int eFileLock) { 7860 unixFile *pFile = (unixFile*)id; 7861 int rc = proxyTakeConch(pFile); 7862 if( rc==SQLITE_OK ){ 7863 proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; 7864 if( pCtx->conchHeld>0 ){ 7865 unixFile *proxy = pCtx->lockProxy; 7866 rc = proxy->pMethod->xUnlock((sqlite3_file*)proxy, eFileLock); 7867 pFile->eFileLock = proxy->eFileLock; 7868 }else{ 7869 /* conchHeld < 0 is lockless */ 7870 } 7871 } 7872 return rc; 7873 } 7874 7875 /* 7876 ** Close a file that uses proxy locks. 7877 */ 7878 static int proxyClose(sqlite3_file *id) { 7879 if( ALWAYS(id) ){ 7880 unixFile *pFile = (unixFile*)id; 7881 proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; 7882 unixFile *lockProxy = pCtx->lockProxy; 7883 unixFile *conchFile = pCtx->conchFile; 7884 int rc = SQLITE_OK; 7885 7886 if( lockProxy ){ 7887 rc = lockProxy->pMethod->xUnlock((sqlite3_file*)lockProxy, NO_LOCK); 7888 if( rc ) return rc; 7889 rc = lockProxy->pMethod->xClose((sqlite3_file*)lockProxy); 7890 if( rc ) return rc; 7891 sqlite3_free(lockProxy); 7892 pCtx->lockProxy = 0; 7893 } 7894 if( conchFile ){ 7895 if( pCtx->conchHeld ){ 7896 rc = proxyReleaseConch(pFile); 7897 if( rc ) return rc; 7898 } 7899 rc = conchFile->pMethod->xClose((sqlite3_file*)conchFile); 7900 if( rc ) return rc; 7901 sqlite3_free(conchFile); 7902 } 7903 sqlite3DbFree(0, pCtx->lockProxyPath); 7904 sqlite3_free(pCtx->conchFilePath); 7905 sqlite3DbFree(0, pCtx->dbPath); 7906 /* restore the original locking context and pMethod then close it */ 7907 pFile->lockingContext = pCtx->oldLockingContext; 7908 pFile->pMethod = pCtx->pOldMethod; 7909 sqlite3_free(pCtx); 7910 return pFile->pMethod->xClose(id); 7911 } 7912 return SQLITE_OK; 7913 } 7914 7915 7916 7917 #endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ 7918 /* 7919 ** The proxy locking style is intended for use with AFP filesystems. 7920 ** And since AFP is only supported on MacOSX, the proxy locking is also 7921 ** restricted to MacOSX. 7922 ** 7923 ** 7924 ******************* End of the proxy lock implementation ********************** 7925 ******************************************************************************/ 7926 7927 /* 7928 ** Initialize the operating system interface. 7929 ** 7930 ** This routine registers all VFS implementations for unix-like operating 7931 ** systems. This routine, and the sqlite3_os_end() routine that follows, 7932 ** should be the only routines in this file that are visible from other 7933 ** files. 7934 ** 7935 ** This routine is called once during SQLite initialization and by a 7936 ** single thread. The memory allocation and mutex subsystems have not 7937 ** necessarily been initialized when this routine is called, and so they 7938 ** should not be used. 7939 */ 7940 int sqlite3_os_init(void){ 7941 /* 7942 ** The following macro defines an initializer for an sqlite3_vfs object. 7943 ** The name of the VFS is NAME. The pAppData is a pointer to a pointer 7944 ** to the "finder" function. (pAppData is a pointer to a pointer because 7945 ** silly C90 rules prohibit a void* from being cast to a function pointer 7946 ** and so we have to go through the intermediate pointer to avoid problems 7947 ** when compiling with -pedantic-errors on GCC.) 7948 ** 7949 ** The FINDER parameter to this macro is the name of the pointer to the 7950 ** finder-function. The finder-function returns a pointer to the 7951 ** sqlite_io_methods object that implements the desired locking 7952 ** behaviors. See the division above that contains the IOMETHODS 7953 ** macro for addition information on finder-functions. 7954 ** 7955 ** Most finders simply return a pointer to a fixed sqlite3_io_methods 7956 ** object. But the "autolockIoFinder" available on MacOSX does a little 7957 ** more than that; it looks at the filesystem type that hosts the 7958 ** database file and tries to choose an locking method appropriate for 7959 ** that filesystem time. 7960 */ 7961 #define UNIXVFS(VFSNAME, FINDER) { \ 7962 3, /* iVersion */ \ 7963 sizeof(unixFile), /* szOsFile */ \ 7964 MAX_PATHNAME, /* mxPathname */ \ 7965 0, /* pNext */ \ 7966 VFSNAME, /* zName */ \ 7967 (void*)&FINDER, /* pAppData */ \ 7968 unixOpen, /* xOpen */ \ 7969 unixDelete, /* xDelete */ \ 7970 unixAccess, /* xAccess */ \ 7971 unixFullPathname, /* xFullPathname */ \ 7972 unixDlOpen, /* xDlOpen */ \ 7973 unixDlError, /* xDlError */ \ 7974 unixDlSym, /* xDlSym */ \ 7975 unixDlClose, /* xDlClose */ \ 7976 unixRandomness, /* xRandomness */ \ 7977 unixSleep, /* xSleep */ \ 7978 unixCurrentTime, /* xCurrentTime */ \ 7979 unixGetLastError, /* xGetLastError */ \ 7980 unixCurrentTimeInt64, /* xCurrentTimeInt64 */ \ 7981 unixSetSystemCall, /* xSetSystemCall */ \ 7982 unixGetSystemCall, /* xGetSystemCall */ \ 7983 unixNextSystemCall, /* xNextSystemCall */ \ 7984 } 7985 7986 /* 7987 ** All default VFSes for unix are contained in the following array. 7988 ** 7989 ** Note that the sqlite3_vfs.pNext field of the VFS object is modified 7990 ** by the SQLite core when the VFS is registered. So the following 7991 ** array cannot be const. 7992 */ 7993 static sqlite3_vfs aVfs[] = { 7994 #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) 7995 UNIXVFS("unix", autolockIoFinder ), 7996 #elif OS_VXWORKS 7997 UNIXVFS("unix", vxworksIoFinder ), 7998 #else 7999 UNIXVFS("unix", posixIoFinder ), 8000 #endif 8001 UNIXVFS("unix-none", nolockIoFinder ), 8002 UNIXVFS("unix-dotfile", dotlockIoFinder ), 8003 UNIXVFS("unix-excl", posixIoFinder ), 8004 #if OS_VXWORKS 8005 UNIXVFS("unix-namedsem", semIoFinder ), 8006 #endif 8007 #if SQLITE_ENABLE_LOCKING_STYLE || OS_VXWORKS 8008 UNIXVFS("unix-posix", posixIoFinder ), 8009 #endif 8010 #if SQLITE_ENABLE_LOCKING_STYLE 8011 UNIXVFS("unix-flock", flockIoFinder ), 8012 #endif 8013 #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) 8014 UNIXVFS("unix-afp", afpIoFinder ), 8015 UNIXVFS("unix-nfs", nfsIoFinder ), 8016 UNIXVFS("unix-proxy", proxyIoFinder ), 8017 #endif 8018 }; 8019 unsigned int i; /* Loop counter */ 8020 8021 /* Double-check that the aSyscall[] array has been constructed 8022 ** correctly. See ticket [bb3a86e890c8e96ab] */ 8023 assert( ArraySize(aSyscall)==29 ); 8024 8025 /* Register all VFSes defined in the aVfs[] array */ 8026 for(i=0; i<(sizeof(aVfs)/sizeof(sqlite3_vfs)); i++){ 8027 sqlite3_vfs_register(&aVfs[i], i==0); 8028 } 8029 unixBigLock = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_VFS1); 8030 return SQLITE_OK; 8031 } 8032 8033 /* 8034 ** Shutdown the operating system interface. 8035 ** 8036 ** Some operating systems might need to do some cleanup in this routine, 8037 ** to release dynamically allocated objects. But not on unix. 8038 ** This routine is a no-op for unix. 8039 */ 8040 int sqlite3_os_end(void){ 8041 unixBigLock = 0; 8042 return SQLITE_OK; 8043 } 8044 8045 #endif /* SQLITE_OS_UNIX */ 8046