1 /* 2 ** 2004 May 22 3 ** 4 ** The author disclaims copyright to this source code. In place of 5 ** a legal notice, here is a blessing: 6 ** 7 ** May you do good and not evil. 8 ** May you find forgiveness for yourself and forgive others. 9 ** May you share freely, never taking more than you give. 10 ** 11 ****************************************************************************** 12 ** 13 ** This file contains the VFS implementation for unix-like operating systems 14 ** include Linux, MacOSX, *BSD, QNX, VxWorks, AIX, HPUX, and others. 15 ** 16 ** There are actually several different VFS implementations in this file. 17 ** The differences are in the way that file locking is done. The default 18 ** implementation uses Posix Advisory Locks. Alternative implementations 19 ** use flock(), dot-files, various proprietary locking schemas, or simply 20 ** skip locking all together. 21 ** 22 ** This source file is organized into divisions where the logic for various 23 ** subfunctions is contained within the appropriate division. PLEASE 24 ** KEEP THE STRUCTURE OF THIS FILE INTACT. New code should be placed 25 ** in the correct division and should be clearly labeled. 26 ** 27 ** The layout of divisions is as follows: 28 ** 29 ** * General-purpose declarations and utility functions. 30 ** * Unique file ID logic used by VxWorks. 31 ** * Various locking primitive implementations (all except proxy locking): 32 ** + for Posix Advisory Locks 33 ** + for no-op locks 34 ** + for dot-file locks 35 ** + for flock() locking 36 ** + for named semaphore locks (VxWorks only) 37 ** + for AFP filesystem locks (MacOSX only) 38 ** * sqlite3_file methods not associated with locking. 39 ** * Definitions of sqlite3_io_methods objects for all locking 40 ** methods plus "finder" functions for each locking method. 41 ** * sqlite3_vfs method implementations. 42 ** * Locking primitives for the proxy uber-locking-method. (MacOSX only) 43 ** * Definitions of sqlite3_vfs objects for all locking methods 44 ** plus implementations of sqlite3_os_init() and sqlite3_os_end(). 45 */ 46 #include "sqliteInt.h" 47 #if SQLITE_OS_UNIX /* This file is used on unix only */ 48 49 /* 50 ** There are various methods for file locking used for concurrency 51 ** control: 52 ** 53 ** 1. POSIX locking (the default), 54 ** 2. No locking, 55 ** 3. Dot-file locking, 56 ** 4. flock() locking, 57 ** 5. AFP locking (OSX only), 58 ** 6. Named POSIX semaphores (VXWorks only), 59 ** 7. proxy locking. (OSX only) 60 ** 61 ** Styles 4, 5, and 7 are only available of SQLITE_ENABLE_LOCKING_STYLE 62 ** is defined to 1. The SQLITE_ENABLE_LOCKING_STYLE also enables automatic 63 ** selection of the appropriate locking style based on the filesystem 64 ** where the database is located. 65 */ 66 #if !defined(SQLITE_ENABLE_LOCKING_STYLE) 67 # if defined(__APPLE__) 68 # define SQLITE_ENABLE_LOCKING_STYLE 1 69 # else 70 # define SQLITE_ENABLE_LOCKING_STYLE 0 71 # endif 72 #endif 73 74 /* Use pread() and pwrite() if they are available */ 75 #if defined(__APPLE__) 76 # define HAVE_PREAD 1 77 # define HAVE_PWRITE 1 78 #endif 79 #if defined(HAVE_PREAD64) && defined(HAVE_PWRITE64) 80 # undef USE_PREAD 81 # define USE_PREAD64 1 82 #elif defined(HAVE_PREAD) && defined(HAVE_PWRITE) 83 # undef USE_PREAD64 84 # define USE_PREAD 1 85 #endif 86 87 /* 88 ** standard include files. 89 */ 90 #include <sys/types.h> /* amalgamator: keep */ 91 #include <sys/stat.h> /* amalgamator: keep */ 92 #include <fcntl.h> 93 #include <sys/ioctl.h> 94 #include <unistd.h> /* amalgamator: keep */ 95 #include <time.h> 96 #include <sys/time.h> /* amalgamator: keep */ 97 #include <errno.h> 98 #if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 99 # include <sys/mman.h> 100 #endif 101 102 #if SQLITE_ENABLE_LOCKING_STYLE 103 # include <sys/ioctl.h> 104 # include <sys/file.h> 105 # include <sys/param.h> 106 #endif /* SQLITE_ENABLE_LOCKING_STYLE */ 107 108 /* 109 ** Try to determine if gethostuuid() is available based on standard 110 ** macros. This might sometimes compute the wrong value for some 111 ** obscure platforms. For those cases, simply compile with one of 112 ** the following: 113 ** 114 ** -DHAVE_GETHOSTUUID=0 115 ** -DHAVE_GETHOSTUUID=1 116 ** 117 ** None if this matters except when building on Apple products with 118 ** -DSQLITE_ENABLE_LOCKING_STYLE. 119 */ 120 #ifndef HAVE_GETHOSTUUID 121 # define HAVE_GETHOSTUUID 0 122 # if defined(__APPLE__) && ((__MAC_OS_X_VERSION_MIN_REQUIRED > 1050) || \ 123 (__IPHONE_OS_VERSION_MIN_REQUIRED > 2000)) 124 # if (!defined(TARGET_OS_EMBEDDED) || (TARGET_OS_EMBEDDED==0)) \ 125 && (!defined(TARGET_IPHONE_SIMULATOR) || (TARGET_IPHONE_SIMULATOR==0))\ 126 && (!defined(TARGET_OS_MACCATALYST) || (TARGET_OS_MACCATALYST==0)) 127 # undef HAVE_GETHOSTUUID 128 # define HAVE_GETHOSTUUID 1 129 # else 130 # warning "gethostuuid() is disabled." 131 # endif 132 # endif 133 #endif 134 135 136 #if OS_VXWORKS 137 # include <sys/ioctl.h> 138 # include <semaphore.h> 139 # include <limits.h> 140 #endif /* OS_VXWORKS */ 141 142 #if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE 143 # include <sys/mount.h> 144 #endif 145 146 #ifdef HAVE_UTIME 147 # include <utime.h> 148 #endif 149 150 /* 151 ** Allowed values of unixFile.fsFlags 152 */ 153 #define SQLITE_FSFLAGS_IS_MSDOS 0x1 154 155 /* 156 ** If we are to be thread-safe, include the pthreads header. 157 */ 158 #if SQLITE_THREADSAFE 159 # include <pthread.h> 160 #endif 161 162 /* 163 ** Default permissions when creating a new file 164 */ 165 #ifndef SQLITE_DEFAULT_FILE_PERMISSIONS 166 # define SQLITE_DEFAULT_FILE_PERMISSIONS 0644 167 #endif 168 169 /* 170 ** Default permissions when creating auto proxy dir 171 */ 172 #ifndef SQLITE_DEFAULT_PROXYDIR_PERMISSIONS 173 # define SQLITE_DEFAULT_PROXYDIR_PERMISSIONS 0755 174 #endif 175 176 /* 177 ** Maximum supported path-length. 178 */ 179 #define MAX_PATHNAME 512 180 181 /* 182 ** Maximum supported symbolic links 183 */ 184 #define SQLITE_MAX_SYMLINKS 100 185 186 /* Always cast the getpid() return type for compatibility with 187 ** kernel modules in VxWorks. */ 188 #define osGetpid(X) (pid_t)getpid() 189 190 /* 191 ** Only set the lastErrno if the error code is a real error and not 192 ** a normal expected return code of SQLITE_BUSY or SQLITE_OK 193 */ 194 #define IS_LOCK_ERROR(x) ((x != SQLITE_OK) && (x != SQLITE_BUSY)) 195 196 /* Forward references */ 197 typedef struct unixShm unixShm; /* Connection shared memory */ 198 typedef struct unixShmNode unixShmNode; /* Shared memory instance */ 199 typedef struct unixInodeInfo unixInodeInfo; /* An i-node */ 200 typedef struct UnixUnusedFd UnixUnusedFd; /* An unused file descriptor */ 201 202 /* 203 ** Sometimes, after a file handle is closed by SQLite, the file descriptor 204 ** cannot be closed immediately. In these cases, instances of the following 205 ** structure are used to store the file descriptor while waiting for an 206 ** opportunity to either close or reuse it. 207 */ 208 struct UnixUnusedFd { 209 int fd; /* File descriptor to close */ 210 int flags; /* Flags this file descriptor was opened with */ 211 UnixUnusedFd *pNext; /* Next unused file descriptor on same file */ 212 }; 213 214 /* 215 ** The unixFile structure is subclass of sqlite3_file specific to the unix 216 ** VFS implementations. 217 */ 218 typedef struct unixFile unixFile; 219 struct unixFile { 220 sqlite3_io_methods const *pMethod; /* Always the first entry */ 221 sqlite3_vfs *pVfs; /* The VFS that created this unixFile */ 222 unixInodeInfo *pInode; /* Info about locks on this inode */ 223 int h; /* The file descriptor */ 224 unsigned char eFileLock; /* The type of lock held on this fd */ 225 unsigned short int ctrlFlags; /* Behavioral bits. UNIXFILE_* flags */ 226 int lastErrno; /* The unix errno from last I/O error */ 227 void *lockingContext; /* Locking style specific state */ 228 UnixUnusedFd *pPreallocatedUnused; /* Pre-allocated UnixUnusedFd */ 229 const char *zPath; /* Name of the file */ 230 unixShm *pShm; /* Shared memory segment information */ 231 int szChunk; /* Configured by FCNTL_CHUNK_SIZE */ 232 #if SQLITE_MAX_MMAP_SIZE>0 233 int nFetchOut; /* Number of outstanding xFetch refs */ 234 sqlite3_int64 mmapSize; /* Usable size of mapping at pMapRegion */ 235 sqlite3_int64 mmapSizeActual; /* Actual size of mapping at pMapRegion */ 236 sqlite3_int64 mmapSizeMax; /* Configured FCNTL_MMAP_SIZE value */ 237 void *pMapRegion; /* Memory mapped region */ 238 #endif 239 int sectorSize; /* Device sector size */ 240 int deviceCharacteristics; /* Precomputed device characteristics */ 241 #if SQLITE_ENABLE_LOCKING_STYLE 242 int openFlags; /* The flags specified at open() */ 243 #endif 244 #if SQLITE_ENABLE_LOCKING_STYLE || defined(__APPLE__) 245 unsigned fsFlags; /* cached details from statfs() */ 246 #endif 247 #ifdef SQLITE_ENABLE_SETLK_TIMEOUT 248 unsigned iBusyTimeout; /* Wait this many millisec on locks */ 249 #endif 250 #if OS_VXWORKS 251 struct vxworksFileId *pId; /* Unique file ID */ 252 #endif 253 #ifdef SQLITE_DEBUG 254 /* The next group of variables are used to track whether or not the 255 ** transaction counter in bytes 24-27 of database files are updated 256 ** whenever any part of the database changes. An assertion fault will 257 ** occur if a file is updated without also updating the transaction 258 ** counter. This test is made to avoid new problems similar to the 259 ** one described by ticket #3584. 260 */ 261 unsigned char transCntrChng; /* True if the transaction counter changed */ 262 unsigned char dbUpdate; /* True if any part of database file changed */ 263 unsigned char inNormalWrite; /* True if in a normal write operation */ 264 265 #endif 266 267 #ifdef SQLITE_TEST 268 /* In test mode, increase the size of this structure a bit so that 269 ** it is larger than the struct CrashFile defined in test6.c. 270 */ 271 char aPadding[32]; 272 #endif 273 }; 274 275 /* This variable holds the process id (pid) from when the xRandomness() 276 ** method was called. If xOpen() is called from a different process id, 277 ** indicating that a fork() has occurred, the PRNG will be reset. 278 */ 279 static pid_t randomnessPid = 0; 280 281 /* 282 ** Allowed values for the unixFile.ctrlFlags bitmask: 283 */ 284 #define UNIXFILE_EXCL 0x01 /* Connections from one process only */ 285 #define UNIXFILE_RDONLY 0x02 /* Connection is read only */ 286 #define UNIXFILE_PERSIST_WAL 0x04 /* Persistent WAL mode */ 287 #ifndef SQLITE_DISABLE_DIRSYNC 288 # define UNIXFILE_DIRSYNC 0x08 /* Directory sync needed */ 289 #else 290 # define UNIXFILE_DIRSYNC 0x00 291 #endif 292 #define UNIXFILE_PSOW 0x10 /* SQLITE_IOCAP_POWERSAFE_OVERWRITE */ 293 #define UNIXFILE_DELETE 0x20 /* Delete on close */ 294 #define UNIXFILE_URI 0x40 /* Filename might have query parameters */ 295 #define UNIXFILE_NOLOCK 0x80 /* Do no file locking */ 296 297 /* 298 ** Include code that is common to all os_*.c files 299 */ 300 #include "os_common.h" 301 302 /* 303 ** Define various macros that are missing from some systems. 304 */ 305 #ifndef O_LARGEFILE 306 # define O_LARGEFILE 0 307 #endif 308 #ifdef SQLITE_DISABLE_LFS 309 # undef O_LARGEFILE 310 # define O_LARGEFILE 0 311 #endif 312 #ifndef O_NOFOLLOW 313 # define O_NOFOLLOW 0 314 #endif 315 #ifndef O_BINARY 316 # define O_BINARY 0 317 #endif 318 319 /* 320 ** The threadid macro resolves to the thread-id or to 0. Used for 321 ** testing and debugging only. 322 */ 323 #if SQLITE_THREADSAFE 324 #define threadid pthread_self() 325 #else 326 #define threadid 0 327 #endif 328 329 /* 330 ** HAVE_MREMAP defaults to true on Linux and false everywhere else. 331 */ 332 #if !defined(HAVE_MREMAP) 333 # if defined(__linux__) && defined(_GNU_SOURCE) 334 # define HAVE_MREMAP 1 335 # else 336 # define HAVE_MREMAP 0 337 # endif 338 #endif 339 340 /* 341 ** Explicitly call the 64-bit version of lseek() on Android. Otherwise, lseek() 342 ** is the 32-bit version, even if _FILE_OFFSET_BITS=64 is defined. 343 */ 344 #ifdef __ANDROID__ 345 # define lseek lseek64 346 #endif 347 348 #ifdef __linux__ 349 /* 350 ** Linux-specific IOCTL magic numbers used for controlling F2FS 351 */ 352 #define F2FS_IOCTL_MAGIC 0xf5 353 #define F2FS_IOC_START_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 1) 354 #define F2FS_IOC_COMMIT_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 2) 355 #define F2FS_IOC_START_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 3) 356 #define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5) 357 #define F2FS_IOC_GET_FEATURES _IOR(F2FS_IOCTL_MAGIC, 12, u32) 358 #define F2FS_FEATURE_ATOMIC_WRITE 0x0004 359 #endif /* __linux__ */ 360 361 362 /* 363 ** Different Unix systems declare open() in different ways. Same use 364 ** open(const char*,int,mode_t). Others use open(const char*,int,...). 365 ** The difference is important when using a pointer to the function. 366 ** 367 ** The safest way to deal with the problem is to always use this wrapper 368 ** which always has the same well-defined interface. 369 */ 370 static int posixOpen(const char *zFile, int flags, int mode){ 371 return open(zFile, flags, mode); 372 } 373 374 /* Forward reference */ 375 static int openDirectory(const char*, int*); 376 static int unixGetpagesize(void); 377 378 /* 379 ** Many system calls are accessed through pointer-to-functions so that 380 ** they may be overridden at runtime to facilitate fault injection during 381 ** testing and sandboxing. The following array holds the names and pointers 382 ** to all overrideable system calls. 383 */ 384 static struct unix_syscall { 385 const char *zName; /* Name of the system call */ 386 sqlite3_syscall_ptr pCurrent; /* Current value of the system call */ 387 sqlite3_syscall_ptr pDefault; /* Default value */ 388 } aSyscall[] = { 389 { "open", (sqlite3_syscall_ptr)posixOpen, 0 }, 390 #define osOpen ((int(*)(const char*,int,int))aSyscall[0].pCurrent) 391 392 { "close", (sqlite3_syscall_ptr)close, 0 }, 393 #define osClose ((int(*)(int))aSyscall[1].pCurrent) 394 395 { "access", (sqlite3_syscall_ptr)access, 0 }, 396 #define osAccess ((int(*)(const char*,int))aSyscall[2].pCurrent) 397 398 { "getcwd", (sqlite3_syscall_ptr)getcwd, 0 }, 399 #define osGetcwd ((char*(*)(char*,size_t))aSyscall[3].pCurrent) 400 401 { "stat", (sqlite3_syscall_ptr)stat, 0 }, 402 #define osStat ((int(*)(const char*,struct stat*))aSyscall[4].pCurrent) 403 404 /* 405 ** The DJGPP compiler environment looks mostly like Unix, but it 406 ** lacks the fcntl() system call. So redefine fcntl() to be something 407 ** that always succeeds. This means that locking does not occur under 408 ** DJGPP. But it is DOS - what did you expect? 409 */ 410 #ifdef __DJGPP__ 411 { "fstat", 0, 0 }, 412 #define osFstat(a,b,c) 0 413 #else 414 { "fstat", (sqlite3_syscall_ptr)fstat, 0 }, 415 #define osFstat ((int(*)(int,struct stat*))aSyscall[5].pCurrent) 416 #endif 417 418 { "ftruncate", (sqlite3_syscall_ptr)ftruncate, 0 }, 419 #define osFtruncate ((int(*)(int,off_t))aSyscall[6].pCurrent) 420 421 { "fcntl", (sqlite3_syscall_ptr)fcntl, 0 }, 422 #define osFcntl ((int(*)(int,int,...))aSyscall[7].pCurrent) 423 424 { "read", (sqlite3_syscall_ptr)read, 0 }, 425 #define osRead ((ssize_t(*)(int,void*,size_t))aSyscall[8].pCurrent) 426 427 #if defined(USE_PREAD) || SQLITE_ENABLE_LOCKING_STYLE 428 { "pread", (sqlite3_syscall_ptr)pread, 0 }, 429 #else 430 { "pread", (sqlite3_syscall_ptr)0, 0 }, 431 #endif 432 #define osPread ((ssize_t(*)(int,void*,size_t,off_t))aSyscall[9].pCurrent) 433 434 #if defined(USE_PREAD64) 435 { "pread64", (sqlite3_syscall_ptr)pread64, 0 }, 436 #else 437 { "pread64", (sqlite3_syscall_ptr)0, 0 }, 438 #endif 439 #define osPread64 ((ssize_t(*)(int,void*,size_t,off64_t))aSyscall[10].pCurrent) 440 441 { "write", (sqlite3_syscall_ptr)write, 0 }, 442 #define osWrite ((ssize_t(*)(int,const void*,size_t))aSyscall[11].pCurrent) 443 444 #if defined(USE_PREAD) || SQLITE_ENABLE_LOCKING_STYLE 445 { "pwrite", (sqlite3_syscall_ptr)pwrite, 0 }, 446 #else 447 { "pwrite", (sqlite3_syscall_ptr)0, 0 }, 448 #endif 449 #define osPwrite ((ssize_t(*)(int,const void*,size_t,off_t))\ 450 aSyscall[12].pCurrent) 451 452 #if defined(USE_PREAD64) 453 { "pwrite64", (sqlite3_syscall_ptr)pwrite64, 0 }, 454 #else 455 { "pwrite64", (sqlite3_syscall_ptr)0, 0 }, 456 #endif 457 #define osPwrite64 ((ssize_t(*)(int,const void*,size_t,off64_t))\ 458 aSyscall[13].pCurrent) 459 460 { "fchmod", (sqlite3_syscall_ptr)fchmod, 0 }, 461 #define osFchmod ((int(*)(int,mode_t))aSyscall[14].pCurrent) 462 463 #if defined(HAVE_POSIX_FALLOCATE) && HAVE_POSIX_FALLOCATE 464 { "fallocate", (sqlite3_syscall_ptr)posix_fallocate, 0 }, 465 #else 466 { "fallocate", (sqlite3_syscall_ptr)0, 0 }, 467 #endif 468 #define osFallocate ((int(*)(int,off_t,off_t))aSyscall[15].pCurrent) 469 470 { "unlink", (sqlite3_syscall_ptr)unlink, 0 }, 471 #define osUnlink ((int(*)(const char*))aSyscall[16].pCurrent) 472 473 { "openDirectory", (sqlite3_syscall_ptr)openDirectory, 0 }, 474 #define osOpenDirectory ((int(*)(const char*,int*))aSyscall[17].pCurrent) 475 476 { "mkdir", (sqlite3_syscall_ptr)mkdir, 0 }, 477 #define osMkdir ((int(*)(const char*,mode_t))aSyscall[18].pCurrent) 478 479 { "rmdir", (sqlite3_syscall_ptr)rmdir, 0 }, 480 #define osRmdir ((int(*)(const char*))aSyscall[19].pCurrent) 481 482 #if defined(HAVE_FCHOWN) 483 { "fchown", (sqlite3_syscall_ptr)fchown, 0 }, 484 #else 485 { "fchown", (sqlite3_syscall_ptr)0, 0 }, 486 #endif 487 #define osFchown ((int(*)(int,uid_t,gid_t))aSyscall[20].pCurrent) 488 489 #if defined(HAVE_FCHOWN) 490 { "geteuid", (sqlite3_syscall_ptr)geteuid, 0 }, 491 #else 492 { "geteuid", (sqlite3_syscall_ptr)0, 0 }, 493 #endif 494 #define osGeteuid ((uid_t(*)(void))aSyscall[21].pCurrent) 495 496 #if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 497 { "mmap", (sqlite3_syscall_ptr)mmap, 0 }, 498 #else 499 { "mmap", (sqlite3_syscall_ptr)0, 0 }, 500 #endif 501 #define osMmap ((void*(*)(void*,size_t,int,int,int,off_t))aSyscall[22].pCurrent) 502 503 #if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 504 { "munmap", (sqlite3_syscall_ptr)munmap, 0 }, 505 #else 506 { "munmap", (sqlite3_syscall_ptr)0, 0 }, 507 #endif 508 #define osMunmap ((int(*)(void*,size_t))aSyscall[23].pCurrent) 509 510 #if HAVE_MREMAP && (!defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0) 511 { "mremap", (sqlite3_syscall_ptr)mremap, 0 }, 512 #else 513 { "mremap", (sqlite3_syscall_ptr)0, 0 }, 514 #endif 515 #define osMremap ((void*(*)(void*,size_t,size_t,int,...))aSyscall[24].pCurrent) 516 517 #if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 518 { "getpagesize", (sqlite3_syscall_ptr)unixGetpagesize, 0 }, 519 #else 520 { "getpagesize", (sqlite3_syscall_ptr)0, 0 }, 521 #endif 522 #define osGetpagesize ((int(*)(void))aSyscall[25].pCurrent) 523 524 #if defined(HAVE_READLINK) 525 { "readlink", (sqlite3_syscall_ptr)readlink, 0 }, 526 #else 527 { "readlink", (sqlite3_syscall_ptr)0, 0 }, 528 #endif 529 #define osReadlink ((ssize_t(*)(const char*,char*,size_t))aSyscall[26].pCurrent) 530 531 #if defined(HAVE_LSTAT) 532 { "lstat", (sqlite3_syscall_ptr)lstat, 0 }, 533 #else 534 { "lstat", (sqlite3_syscall_ptr)0, 0 }, 535 #endif 536 #define osLstat ((int(*)(const char*,struct stat*))aSyscall[27].pCurrent) 537 538 #if defined(__linux__) && defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE) 539 # ifdef __ANDROID__ 540 { "ioctl", (sqlite3_syscall_ptr)(int(*)(int, int, ...))ioctl, 0 }, 541 #define osIoctl ((int(*)(int,int,...))aSyscall[28].pCurrent) 542 # else 543 { "ioctl", (sqlite3_syscall_ptr)ioctl, 0 }, 544 #define osIoctl ((int(*)(int,unsigned long,...))aSyscall[28].pCurrent) 545 # endif 546 #else 547 { "ioctl", (sqlite3_syscall_ptr)0, 0 }, 548 #endif 549 550 }; /* End of the overrideable system calls */ 551 552 553 /* 554 ** On some systems, calls to fchown() will trigger a message in a security 555 ** log if they come from non-root processes. So avoid calling fchown() if 556 ** we are not running as root. 557 */ 558 static int robustFchown(int fd, uid_t uid, gid_t gid){ 559 #if defined(HAVE_FCHOWN) 560 return osGeteuid() ? 0 : osFchown(fd,uid,gid); 561 #else 562 return 0; 563 #endif 564 } 565 566 /* 567 ** This is the xSetSystemCall() method of sqlite3_vfs for all of the 568 ** "unix" VFSes. Return SQLITE_OK opon successfully updating the 569 ** system call pointer, or SQLITE_NOTFOUND if there is no configurable 570 ** system call named zName. 571 */ 572 static int unixSetSystemCall( 573 sqlite3_vfs *pNotUsed, /* The VFS pointer. Not used */ 574 const char *zName, /* Name of system call to override */ 575 sqlite3_syscall_ptr pNewFunc /* Pointer to new system call value */ 576 ){ 577 unsigned int i; 578 int rc = SQLITE_NOTFOUND; 579 580 UNUSED_PARAMETER(pNotUsed); 581 if( zName==0 ){ 582 /* If no zName is given, restore all system calls to their default 583 ** settings and return NULL 584 */ 585 rc = SQLITE_OK; 586 for(i=0; i<sizeof(aSyscall)/sizeof(aSyscall[0]); i++){ 587 if( aSyscall[i].pDefault ){ 588 aSyscall[i].pCurrent = aSyscall[i].pDefault; 589 } 590 } 591 }else{ 592 /* If zName is specified, operate on only the one system call 593 ** specified. 594 */ 595 for(i=0; i<sizeof(aSyscall)/sizeof(aSyscall[0]); i++){ 596 if( strcmp(zName, aSyscall[i].zName)==0 ){ 597 if( aSyscall[i].pDefault==0 ){ 598 aSyscall[i].pDefault = aSyscall[i].pCurrent; 599 } 600 rc = SQLITE_OK; 601 if( pNewFunc==0 ) pNewFunc = aSyscall[i].pDefault; 602 aSyscall[i].pCurrent = pNewFunc; 603 break; 604 } 605 } 606 } 607 return rc; 608 } 609 610 /* 611 ** Return the value of a system call. Return NULL if zName is not a 612 ** recognized system call name. NULL is also returned if the system call 613 ** is currently undefined. 614 */ 615 static sqlite3_syscall_ptr unixGetSystemCall( 616 sqlite3_vfs *pNotUsed, 617 const char *zName 618 ){ 619 unsigned int i; 620 621 UNUSED_PARAMETER(pNotUsed); 622 for(i=0; i<sizeof(aSyscall)/sizeof(aSyscall[0]); i++){ 623 if( strcmp(zName, aSyscall[i].zName)==0 ) return aSyscall[i].pCurrent; 624 } 625 return 0; 626 } 627 628 /* 629 ** Return the name of the first system call after zName. If zName==NULL 630 ** then return the name of the first system call. Return NULL if zName 631 ** is the last system call or if zName is not the name of a valid 632 ** system call. 633 */ 634 static const char *unixNextSystemCall(sqlite3_vfs *p, const char *zName){ 635 int i = -1; 636 637 UNUSED_PARAMETER(p); 638 if( zName ){ 639 for(i=0; i<ArraySize(aSyscall)-1; i++){ 640 if( strcmp(zName, aSyscall[i].zName)==0 ) break; 641 } 642 } 643 for(i++; i<ArraySize(aSyscall); i++){ 644 if( aSyscall[i].pCurrent!=0 ) return aSyscall[i].zName; 645 } 646 return 0; 647 } 648 649 /* 650 ** Do not accept any file descriptor less than this value, in order to avoid 651 ** opening database file using file descriptors that are commonly used for 652 ** standard input, output, and error. 653 */ 654 #ifndef SQLITE_MINIMUM_FILE_DESCRIPTOR 655 # define SQLITE_MINIMUM_FILE_DESCRIPTOR 3 656 #endif 657 658 /* 659 ** Invoke open(). Do so multiple times, until it either succeeds or 660 ** fails for some reason other than EINTR. 661 ** 662 ** If the file creation mode "m" is 0 then set it to the default for 663 ** SQLite. The default is SQLITE_DEFAULT_FILE_PERMISSIONS (normally 664 ** 0644) as modified by the system umask. If m is not 0, then 665 ** make the file creation mode be exactly m ignoring the umask. 666 ** 667 ** The m parameter will be non-zero only when creating -wal, -journal, 668 ** and -shm files. We want those files to have *exactly* the same 669 ** permissions as their original database, unadulterated by the umask. 670 ** In that way, if a database file is -rw-rw-rw or -rw-rw-r-, and a 671 ** transaction crashes and leaves behind hot journals, then any 672 ** process that is able to write to the database will also be able to 673 ** recover the hot journals. 674 */ 675 static int robust_open(const char *z, int f, mode_t m){ 676 int fd; 677 mode_t m2 = m ? m : SQLITE_DEFAULT_FILE_PERMISSIONS; 678 while(1){ 679 #if defined(O_CLOEXEC) 680 fd = osOpen(z,f|O_CLOEXEC,m2); 681 #else 682 fd = osOpen(z,f,m2); 683 #endif 684 if( fd<0 ){ 685 if( errno==EINTR ) continue; 686 break; 687 } 688 if( fd>=SQLITE_MINIMUM_FILE_DESCRIPTOR ) break; 689 osClose(fd); 690 sqlite3_log(SQLITE_WARNING, 691 "attempt to open \"%s\" as file descriptor %d", z, fd); 692 fd = -1; 693 if( osOpen("/dev/null", O_RDONLY, m)<0 ) break; 694 } 695 if( fd>=0 ){ 696 if( m!=0 ){ 697 struct stat statbuf; 698 if( osFstat(fd, &statbuf)==0 699 && statbuf.st_size==0 700 && (statbuf.st_mode&0777)!=m 701 ){ 702 osFchmod(fd, m); 703 } 704 } 705 #if defined(FD_CLOEXEC) && (!defined(O_CLOEXEC) || O_CLOEXEC==0) 706 osFcntl(fd, F_SETFD, osFcntl(fd, F_GETFD, 0) | FD_CLOEXEC); 707 #endif 708 } 709 return fd; 710 } 711 712 /* 713 ** Helper functions to obtain and relinquish the global mutex. The 714 ** global mutex is used to protect the unixInodeInfo and 715 ** vxworksFileId objects used by this file, all of which may be 716 ** shared by multiple threads. 717 ** 718 ** Function unixMutexHeld() is used to assert() that the global mutex 719 ** is held when required. This function is only used as part of assert() 720 ** statements. e.g. 721 ** 722 ** unixEnterMutex() 723 ** assert( unixMutexHeld() ); 724 ** unixEnterLeave() 725 ** 726 ** To prevent deadlock, the global unixBigLock must must be acquired 727 ** before the unixInodeInfo.pLockMutex mutex, if both are held. It is 728 ** OK to get the pLockMutex without holding unixBigLock first, but if 729 ** that happens, the unixBigLock mutex must not be acquired until after 730 ** pLockMutex is released. 731 ** 732 ** OK: enter(unixBigLock), enter(pLockInfo) 733 ** OK: enter(unixBigLock) 734 ** OK: enter(pLockInfo) 735 ** ERROR: enter(pLockInfo), enter(unixBigLock) 736 */ 737 static sqlite3_mutex *unixBigLock = 0; 738 static void unixEnterMutex(void){ 739 assert( sqlite3_mutex_notheld(unixBigLock) ); /* Not a recursive mutex */ 740 sqlite3_mutex_enter(unixBigLock); 741 } 742 static void unixLeaveMutex(void){ 743 assert( sqlite3_mutex_held(unixBigLock) ); 744 sqlite3_mutex_leave(unixBigLock); 745 } 746 #ifdef SQLITE_DEBUG 747 static int unixMutexHeld(void) { 748 return sqlite3_mutex_held(unixBigLock); 749 } 750 #endif 751 752 753 #ifdef SQLITE_HAVE_OS_TRACE 754 /* 755 ** Helper function for printing out trace information from debugging 756 ** binaries. This returns the string representation of the supplied 757 ** integer lock-type. 758 */ 759 static const char *azFileLock(int eFileLock){ 760 switch( eFileLock ){ 761 case NO_LOCK: return "NONE"; 762 case SHARED_LOCK: return "SHARED"; 763 case RESERVED_LOCK: return "RESERVED"; 764 case PENDING_LOCK: return "PENDING"; 765 case EXCLUSIVE_LOCK: return "EXCLUSIVE"; 766 } 767 return "ERROR"; 768 } 769 #endif 770 771 #ifdef SQLITE_LOCK_TRACE 772 /* 773 ** Print out information about all locking operations. 774 ** 775 ** This routine is used for troubleshooting locks on multithreaded 776 ** platforms. Enable by compiling with the -DSQLITE_LOCK_TRACE 777 ** command-line option on the compiler. This code is normally 778 ** turned off. 779 */ 780 static int lockTrace(int fd, int op, struct flock *p){ 781 char *zOpName, *zType; 782 int s; 783 int savedErrno; 784 if( op==F_GETLK ){ 785 zOpName = "GETLK"; 786 }else if( op==F_SETLK ){ 787 zOpName = "SETLK"; 788 }else{ 789 s = osFcntl(fd, op, p); 790 sqlite3DebugPrintf("fcntl unknown %d %d %d\n", fd, op, s); 791 return s; 792 } 793 if( p->l_type==F_RDLCK ){ 794 zType = "RDLCK"; 795 }else if( p->l_type==F_WRLCK ){ 796 zType = "WRLCK"; 797 }else if( p->l_type==F_UNLCK ){ 798 zType = "UNLCK"; 799 }else{ 800 assert( 0 ); 801 } 802 assert( p->l_whence==SEEK_SET ); 803 s = osFcntl(fd, op, p); 804 savedErrno = errno; 805 sqlite3DebugPrintf("fcntl %d %d %s %s %d %d %d %d\n", 806 threadid, fd, zOpName, zType, (int)p->l_start, (int)p->l_len, 807 (int)p->l_pid, s); 808 if( s==(-1) && op==F_SETLK && (p->l_type==F_RDLCK || p->l_type==F_WRLCK) ){ 809 struct flock l2; 810 l2 = *p; 811 osFcntl(fd, F_GETLK, &l2); 812 if( l2.l_type==F_RDLCK ){ 813 zType = "RDLCK"; 814 }else if( l2.l_type==F_WRLCK ){ 815 zType = "WRLCK"; 816 }else if( l2.l_type==F_UNLCK ){ 817 zType = "UNLCK"; 818 }else{ 819 assert( 0 ); 820 } 821 sqlite3DebugPrintf("fcntl-failure-reason: %s %d %d %d\n", 822 zType, (int)l2.l_start, (int)l2.l_len, (int)l2.l_pid); 823 } 824 errno = savedErrno; 825 return s; 826 } 827 #undef osFcntl 828 #define osFcntl lockTrace 829 #endif /* SQLITE_LOCK_TRACE */ 830 831 /* 832 ** Retry ftruncate() calls that fail due to EINTR 833 ** 834 ** All calls to ftruncate() within this file should be made through 835 ** this wrapper. On the Android platform, bypassing the logic below 836 ** could lead to a corrupt database. 837 */ 838 static int robust_ftruncate(int h, sqlite3_int64 sz){ 839 int rc; 840 #ifdef __ANDROID__ 841 /* On Android, ftruncate() always uses 32-bit offsets, even if 842 ** _FILE_OFFSET_BITS=64 is defined. This means it is unsafe to attempt to 843 ** truncate a file to any size larger than 2GiB. Silently ignore any 844 ** such attempts. */ 845 if( sz>(sqlite3_int64)0x7FFFFFFF ){ 846 rc = SQLITE_OK; 847 }else 848 #endif 849 do{ rc = osFtruncate(h,sz); }while( rc<0 && errno==EINTR ); 850 return rc; 851 } 852 853 /* 854 ** This routine translates a standard POSIX errno code into something 855 ** useful to the clients of the sqlite3 functions. Specifically, it is 856 ** intended to translate a variety of "try again" errors into SQLITE_BUSY 857 ** and a variety of "please close the file descriptor NOW" errors into 858 ** SQLITE_IOERR 859 ** 860 ** Errors during initialization of locks, or file system support for locks, 861 ** should handle ENOLCK, ENOTSUP, EOPNOTSUPP separately. 862 */ 863 static int sqliteErrorFromPosixError(int posixError, int sqliteIOErr) { 864 assert( (sqliteIOErr == SQLITE_IOERR_LOCK) || 865 (sqliteIOErr == SQLITE_IOERR_UNLOCK) || 866 (sqliteIOErr == SQLITE_IOERR_RDLOCK) || 867 (sqliteIOErr == SQLITE_IOERR_CHECKRESERVEDLOCK) ); 868 switch (posixError) { 869 case EACCES: 870 case EAGAIN: 871 case ETIMEDOUT: 872 case EBUSY: 873 case EINTR: 874 case ENOLCK: 875 /* random NFS retry error, unless during file system support 876 * introspection, in which it actually means what it says */ 877 return SQLITE_BUSY; 878 879 case EPERM: 880 return SQLITE_PERM; 881 882 default: 883 return sqliteIOErr; 884 } 885 } 886 887 888 /****************************************************************************** 889 ****************** Begin Unique File ID Utility Used By VxWorks *************** 890 ** 891 ** On most versions of unix, we can get a unique ID for a file by concatenating 892 ** the device number and the inode number. But this does not work on VxWorks. 893 ** On VxWorks, a unique file id must be based on the canonical filename. 894 ** 895 ** A pointer to an instance of the following structure can be used as a 896 ** unique file ID in VxWorks. Each instance of this structure contains 897 ** a copy of the canonical filename. There is also a reference count. 898 ** The structure is reclaimed when the number of pointers to it drops to 899 ** zero. 900 ** 901 ** There are never very many files open at one time and lookups are not 902 ** a performance-critical path, so it is sufficient to put these 903 ** structures on a linked list. 904 */ 905 struct vxworksFileId { 906 struct vxworksFileId *pNext; /* Next in a list of them all */ 907 int nRef; /* Number of references to this one */ 908 int nName; /* Length of the zCanonicalName[] string */ 909 char *zCanonicalName; /* Canonical filename */ 910 }; 911 912 #if OS_VXWORKS 913 /* 914 ** All unique filenames are held on a linked list headed by this 915 ** variable: 916 */ 917 static struct vxworksFileId *vxworksFileList = 0; 918 919 /* 920 ** Simplify a filename into its canonical form 921 ** by making the following changes: 922 ** 923 ** * removing any trailing and duplicate / 924 ** * convert /./ into just / 925 ** * convert /A/../ where A is any simple name into just / 926 ** 927 ** Changes are made in-place. Return the new name length. 928 ** 929 ** The original filename is in z[0..n-1]. Return the number of 930 ** characters in the simplified name. 931 */ 932 static int vxworksSimplifyName(char *z, int n){ 933 int i, j; 934 while( n>1 && z[n-1]=='/' ){ n--; } 935 for(i=j=0; i<n; i++){ 936 if( z[i]=='/' ){ 937 if( z[i+1]=='/' ) continue; 938 if( z[i+1]=='.' && i+2<n && z[i+2]=='/' ){ 939 i += 1; 940 continue; 941 } 942 if( z[i+1]=='.' && i+3<n && z[i+2]=='.' && z[i+3]=='/' ){ 943 while( j>0 && z[j-1]!='/' ){ j--; } 944 if( j>0 ){ j--; } 945 i += 2; 946 continue; 947 } 948 } 949 z[j++] = z[i]; 950 } 951 z[j] = 0; 952 return j; 953 } 954 955 /* 956 ** Find a unique file ID for the given absolute pathname. Return 957 ** a pointer to the vxworksFileId object. This pointer is the unique 958 ** file ID. 959 ** 960 ** The nRef field of the vxworksFileId object is incremented before 961 ** the object is returned. A new vxworksFileId object is created 962 ** and added to the global list if necessary. 963 ** 964 ** If a memory allocation error occurs, return NULL. 965 */ 966 static struct vxworksFileId *vxworksFindFileId(const char *zAbsoluteName){ 967 struct vxworksFileId *pNew; /* search key and new file ID */ 968 struct vxworksFileId *pCandidate; /* For looping over existing file IDs */ 969 int n; /* Length of zAbsoluteName string */ 970 971 assert( zAbsoluteName[0]=='/' ); 972 n = (int)strlen(zAbsoluteName); 973 pNew = sqlite3_malloc64( sizeof(*pNew) + (n+1) ); 974 if( pNew==0 ) return 0; 975 pNew->zCanonicalName = (char*)&pNew[1]; 976 memcpy(pNew->zCanonicalName, zAbsoluteName, n+1); 977 n = vxworksSimplifyName(pNew->zCanonicalName, n); 978 979 /* Search for an existing entry that matching the canonical name. 980 ** If found, increment the reference count and return a pointer to 981 ** the existing file ID. 982 */ 983 unixEnterMutex(); 984 for(pCandidate=vxworksFileList; pCandidate; pCandidate=pCandidate->pNext){ 985 if( pCandidate->nName==n 986 && memcmp(pCandidate->zCanonicalName, pNew->zCanonicalName, n)==0 987 ){ 988 sqlite3_free(pNew); 989 pCandidate->nRef++; 990 unixLeaveMutex(); 991 return pCandidate; 992 } 993 } 994 995 /* No match was found. We will make a new file ID */ 996 pNew->nRef = 1; 997 pNew->nName = n; 998 pNew->pNext = vxworksFileList; 999 vxworksFileList = pNew; 1000 unixLeaveMutex(); 1001 return pNew; 1002 } 1003 1004 /* 1005 ** Decrement the reference count on a vxworksFileId object. Free 1006 ** the object when the reference count reaches zero. 1007 */ 1008 static void vxworksReleaseFileId(struct vxworksFileId *pId){ 1009 unixEnterMutex(); 1010 assert( pId->nRef>0 ); 1011 pId->nRef--; 1012 if( pId->nRef==0 ){ 1013 struct vxworksFileId **pp; 1014 for(pp=&vxworksFileList; *pp && *pp!=pId; pp = &((*pp)->pNext)){} 1015 assert( *pp==pId ); 1016 *pp = pId->pNext; 1017 sqlite3_free(pId); 1018 } 1019 unixLeaveMutex(); 1020 } 1021 #endif /* OS_VXWORKS */ 1022 /*************** End of Unique File ID Utility Used By VxWorks **************** 1023 ******************************************************************************/ 1024 1025 1026 /****************************************************************************** 1027 *************************** Posix Advisory Locking **************************** 1028 ** 1029 ** POSIX advisory locks are broken by design. ANSI STD 1003.1 (1996) 1030 ** section 6.5.2.2 lines 483 through 490 specify that when a process 1031 ** sets or clears a lock, that operation overrides any prior locks set 1032 ** by the same process. It does not explicitly say so, but this implies 1033 ** that it overrides locks set by the same process using a different 1034 ** file descriptor. Consider this test case: 1035 ** 1036 ** int fd1 = open("./file1", O_RDWR|O_CREAT, 0644); 1037 ** int fd2 = open("./file2", O_RDWR|O_CREAT, 0644); 1038 ** 1039 ** Suppose ./file1 and ./file2 are really the same file (because 1040 ** one is a hard or symbolic link to the other) then if you set 1041 ** an exclusive lock on fd1, then try to get an exclusive lock 1042 ** on fd2, it works. I would have expected the second lock to 1043 ** fail since there was already a lock on the file due to fd1. 1044 ** But not so. Since both locks came from the same process, the 1045 ** second overrides the first, even though they were on different 1046 ** file descriptors opened on different file names. 1047 ** 1048 ** This means that we cannot use POSIX locks to synchronize file access 1049 ** among competing threads of the same process. POSIX locks will work fine 1050 ** to synchronize access for threads in separate processes, but not 1051 ** threads within the same process. 1052 ** 1053 ** To work around the problem, SQLite has to manage file locks internally 1054 ** on its own. Whenever a new database is opened, we have to find the 1055 ** specific inode of the database file (the inode is determined by the 1056 ** st_dev and st_ino fields of the stat structure that fstat() fills in) 1057 ** and check for locks already existing on that inode. When locks are 1058 ** created or removed, we have to look at our own internal record of the 1059 ** locks to see if another thread has previously set a lock on that same 1060 ** inode. 1061 ** 1062 ** (Aside: The use of inode numbers as unique IDs does not work on VxWorks. 1063 ** For VxWorks, we have to use the alternative unique ID system based on 1064 ** canonical filename and implemented in the previous division.) 1065 ** 1066 ** The sqlite3_file structure for POSIX is no longer just an integer file 1067 ** descriptor. It is now a structure that holds the integer file 1068 ** descriptor and a pointer to a structure that describes the internal 1069 ** locks on the corresponding inode. There is one locking structure 1070 ** per inode, so if the same inode is opened twice, both unixFile structures 1071 ** point to the same locking structure. The locking structure keeps 1072 ** a reference count (so we will know when to delete it) and a "cnt" 1073 ** field that tells us its internal lock status. cnt==0 means the 1074 ** file is unlocked. cnt==-1 means the file has an exclusive lock. 1075 ** cnt>0 means there are cnt shared locks on the file. 1076 ** 1077 ** Any attempt to lock or unlock a file first checks the locking 1078 ** structure. The fcntl() system call is only invoked to set a 1079 ** POSIX lock if the internal lock structure transitions between 1080 ** a locked and an unlocked state. 1081 ** 1082 ** But wait: there are yet more problems with POSIX advisory locks. 1083 ** 1084 ** If you close a file descriptor that points to a file that has locks, 1085 ** all locks on that file that are owned by the current process are 1086 ** released. To work around this problem, each unixInodeInfo object 1087 ** maintains a count of the number of pending locks on tha inode. 1088 ** When an attempt is made to close an unixFile, if there are 1089 ** other unixFile open on the same inode that are holding locks, the call 1090 ** to close() the file descriptor is deferred until all of the locks clear. 1091 ** The unixInodeInfo structure keeps a list of file descriptors that need to 1092 ** be closed and that list is walked (and cleared) when the last lock 1093 ** clears. 1094 ** 1095 ** Yet another problem: LinuxThreads do not play well with posix locks. 1096 ** 1097 ** Many older versions of linux use the LinuxThreads library which is 1098 ** not posix compliant. Under LinuxThreads, a lock created by thread 1099 ** A cannot be modified or overridden by a different thread B. 1100 ** Only thread A can modify the lock. Locking behavior is correct 1101 ** if the appliation uses the newer Native Posix Thread Library (NPTL) 1102 ** on linux - with NPTL a lock created by thread A can override locks 1103 ** in thread B. But there is no way to know at compile-time which 1104 ** threading library is being used. So there is no way to know at 1105 ** compile-time whether or not thread A can override locks on thread B. 1106 ** One has to do a run-time check to discover the behavior of the 1107 ** current process. 1108 ** 1109 ** SQLite used to support LinuxThreads. But support for LinuxThreads 1110 ** was dropped beginning with version 3.7.0. SQLite will still work with 1111 ** LinuxThreads provided that (1) there is no more than one connection 1112 ** per database file in the same process and (2) database connections 1113 ** do not move across threads. 1114 */ 1115 1116 /* 1117 ** An instance of the following structure serves as the key used 1118 ** to locate a particular unixInodeInfo object. 1119 */ 1120 struct unixFileId { 1121 dev_t dev; /* Device number */ 1122 #if OS_VXWORKS 1123 struct vxworksFileId *pId; /* Unique file ID for vxworks. */ 1124 #else 1125 /* We are told that some versions of Android contain a bug that 1126 ** sizes ino_t at only 32-bits instead of 64-bits. (See 1127 ** https://android-review.googlesource.com/#/c/115351/3/dist/sqlite3.c) 1128 ** To work around this, always allocate 64-bits for the inode number. 1129 ** On small machines that only have 32-bit inodes, this wastes 4 bytes, 1130 ** but that should not be a big deal. */ 1131 /* WAS: ino_t ino; */ 1132 u64 ino; /* Inode number */ 1133 #endif 1134 }; 1135 1136 /* 1137 ** An instance of the following structure is allocated for each open 1138 ** inode. 1139 ** 1140 ** A single inode can have multiple file descriptors, so each unixFile 1141 ** structure contains a pointer to an instance of this object and this 1142 ** object keeps a count of the number of unixFile pointing to it. 1143 ** 1144 ** Mutex rules: 1145 ** 1146 ** (1) Only the pLockMutex mutex must be held in order to read or write 1147 ** any of the locking fields: 1148 ** nShared, nLock, eFileLock, bProcessLock, pUnused 1149 ** 1150 ** (2) When nRef>0, then the following fields are unchanging and can 1151 ** be read (but not written) without holding any mutex: 1152 ** fileId, pLockMutex 1153 ** 1154 ** (3) With the exceptions above, all the fields may only be read 1155 ** or written while holding the global unixBigLock mutex. 1156 ** 1157 ** Deadlock prevention: The global unixBigLock mutex may not 1158 ** be acquired while holding the pLockMutex mutex. If both unixBigLock 1159 ** and pLockMutex are needed, then unixBigLock must be acquired first. 1160 */ 1161 struct unixInodeInfo { 1162 struct unixFileId fileId; /* The lookup key */ 1163 sqlite3_mutex *pLockMutex; /* Hold this mutex for... */ 1164 int nShared; /* Number of SHARED locks held */ 1165 int nLock; /* Number of outstanding file locks */ 1166 unsigned char eFileLock; /* One of SHARED_LOCK, RESERVED_LOCK etc. */ 1167 unsigned char bProcessLock; /* An exclusive process lock is held */ 1168 UnixUnusedFd *pUnused; /* Unused file descriptors to close */ 1169 int nRef; /* Number of pointers to this structure */ 1170 unixShmNode *pShmNode; /* Shared memory associated with this inode */ 1171 unixInodeInfo *pNext; /* List of all unixInodeInfo objects */ 1172 unixInodeInfo *pPrev; /* .... doubly linked */ 1173 #if SQLITE_ENABLE_LOCKING_STYLE 1174 unsigned long long sharedByte; /* for AFP simulated shared lock */ 1175 #endif 1176 #if OS_VXWORKS 1177 sem_t *pSem; /* Named POSIX semaphore */ 1178 char aSemName[MAX_PATHNAME+2]; /* Name of that semaphore */ 1179 #endif 1180 }; 1181 1182 /* 1183 ** A lists of all unixInodeInfo objects. 1184 ** 1185 ** Must hold unixBigLock in order to read or write this variable. 1186 */ 1187 static unixInodeInfo *inodeList = 0; /* All unixInodeInfo objects */ 1188 1189 #ifdef SQLITE_DEBUG 1190 /* 1191 ** True if the inode mutex (on the unixFile.pFileMutex field) is held, or not. 1192 ** This routine is used only within assert() to help verify correct mutex 1193 ** usage. 1194 */ 1195 int unixFileMutexHeld(unixFile *pFile){ 1196 assert( pFile->pInode ); 1197 return sqlite3_mutex_held(pFile->pInode->pLockMutex); 1198 } 1199 int unixFileMutexNotheld(unixFile *pFile){ 1200 assert( pFile->pInode ); 1201 return sqlite3_mutex_notheld(pFile->pInode->pLockMutex); 1202 } 1203 #endif 1204 1205 /* 1206 ** 1207 ** This function - unixLogErrorAtLine(), is only ever called via the macro 1208 ** unixLogError(). 1209 ** 1210 ** It is invoked after an error occurs in an OS function and errno has been 1211 ** set. It logs a message using sqlite3_log() containing the current value of 1212 ** errno and, if possible, the human-readable equivalent from strerror() or 1213 ** strerror_r(). 1214 ** 1215 ** The first argument passed to the macro should be the error code that 1216 ** will be returned to SQLite (e.g. SQLITE_IOERR_DELETE, SQLITE_CANTOPEN). 1217 ** The two subsequent arguments should be the name of the OS function that 1218 ** failed (e.g. "unlink", "open") and the associated file-system path, 1219 ** if any. 1220 */ 1221 #define unixLogError(a,b,c) unixLogErrorAtLine(a,b,c,__LINE__) 1222 static int unixLogErrorAtLine( 1223 int errcode, /* SQLite error code */ 1224 const char *zFunc, /* Name of OS function that failed */ 1225 const char *zPath, /* File path associated with error */ 1226 int iLine /* Source line number where error occurred */ 1227 ){ 1228 char *zErr; /* Message from strerror() or equivalent */ 1229 int iErrno = errno; /* Saved syscall error number */ 1230 1231 /* If this is not a threadsafe build (SQLITE_THREADSAFE==0), then use 1232 ** the strerror() function to obtain the human-readable error message 1233 ** equivalent to errno. Otherwise, use strerror_r(). 1234 */ 1235 #if SQLITE_THREADSAFE && defined(HAVE_STRERROR_R) 1236 char aErr[80]; 1237 memset(aErr, 0, sizeof(aErr)); 1238 zErr = aErr; 1239 1240 /* If STRERROR_R_CHAR_P (set by autoconf scripts) or __USE_GNU is defined, 1241 ** assume that the system provides the GNU version of strerror_r() that 1242 ** returns a pointer to a buffer containing the error message. That pointer 1243 ** may point to aErr[], or it may point to some static storage somewhere. 1244 ** Otherwise, assume that the system provides the POSIX version of 1245 ** strerror_r(), which always writes an error message into aErr[]. 1246 ** 1247 ** If the code incorrectly assumes that it is the POSIX version that is 1248 ** available, the error message will often be an empty string. Not a 1249 ** huge problem. Incorrectly concluding that the GNU version is available 1250 ** could lead to a segfault though. 1251 */ 1252 #if defined(STRERROR_R_CHAR_P) || defined(__USE_GNU) 1253 zErr = 1254 # endif 1255 strerror_r(iErrno, aErr, sizeof(aErr)-1); 1256 1257 #elif SQLITE_THREADSAFE 1258 /* This is a threadsafe build, but strerror_r() is not available. */ 1259 zErr = ""; 1260 #else 1261 /* Non-threadsafe build, use strerror(). */ 1262 zErr = strerror(iErrno); 1263 #endif 1264 1265 if( zPath==0 ) zPath = ""; 1266 sqlite3_log(errcode, 1267 "os_unix.c:%d: (%d) %s(%s) - %s", 1268 iLine, iErrno, zFunc, zPath, zErr 1269 ); 1270 1271 return errcode; 1272 } 1273 1274 /* 1275 ** Close a file descriptor. 1276 ** 1277 ** We assume that close() almost always works, since it is only in a 1278 ** very sick application or on a very sick platform that it might fail. 1279 ** If it does fail, simply leak the file descriptor, but do log the 1280 ** error. 1281 ** 1282 ** Note that it is not safe to retry close() after EINTR since the 1283 ** file descriptor might have already been reused by another thread. 1284 ** So we don't even try to recover from an EINTR. Just log the error 1285 ** and move on. 1286 */ 1287 static void robust_close(unixFile *pFile, int h, int lineno){ 1288 if( osClose(h) ){ 1289 unixLogErrorAtLine(SQLITE_IOERR_CLOSE, "close", 1290 pFile ? pFile->zPath : 0, lineno); 1291 } 1292 } 1293 1294 /* 1295 ** Set the pFile->lastErrno. Do this in a subroutine as that provides 1296 ** a convenient place to set a breakpoint. 1297 */ 1298 static void storeLastErrno(unixFile *pFile, int error){ 1299 pFile->lastErrno = error; 1300 } 1301 1302 /* 1303 ** Close all file descriptors accumuated in the unixInodeInfo->pUnused list. 1304 */ 1305 static void closePendingFds(unixFile *pFile){ 1306 unixInodeInfo *pInode = pFile->pInode; 1307 UnixUnusedFd *p; 1308 UnixUnusedFd *pNext; 1309 assert( unixFileMutexHeld(pFile) ); 1310 for(p=pInode->pUnused; p; p=pNext){ 1311 pNext = p->pNext; 1312 robust_close(pFile, p->fd, __LINE__); 1313 sqlite3_free(p); 1314 } 1315 pInode->pUnused = 0; 1316 } 1317 1318 /* 1319 ** Release a unixInodeInfo structure previously allocated by findInodeInfo(). 1320 ** 1321 ** The global mutex must be held when this routine is called, but the mutex 1322 ** on the inode being deleted must NOT be held. 1323 */ 1324 static void releaseInodeInfo(unixFile *pFile){ 1325 unixInodeInfo *pInode = pFile->pInode; 1326 assert( unixMutexHeld() ); 1327 assert( unixFileMutexNotheld(pFile) ); 1328 if( ALWAYS(pInode) ){ 1329 pInode->nRef--; 1330 if( pInode->nRef==0 ){ 1331 assert( pInode->pShmNode==0 ); 1332 sqlite3_mutex_enter(pInode->pLockMutex); 1333 closePendingFds(pFile); 1334 sqlite3_mutex_leave(pInode->pLockMutex); 1335 if( pInode->pPrev ){ 1336 assert( pInode->pPrev->pNext==pInode ); 1337 pInode->pPrev->pNext = pInode->pNext; 1338 }else{ 1339 assert( inodeList==pInode ); 1340 inodeList = pInode->pNext; 1341 } 1342 if( pInode->pNext ){ 1343 assert( pInode->pNext->pPrev==pInode ); 1344 pInode->pNext->pPrev = pInode->pPrev; 1345 } 1346 sqlite3_mutex_free(pInode->pLockMutex); 1347 sqlite3_free(pInode); 1348 } 1349 } 1350 } 1351 1352 /* 1353 ** Given a file descriptor, locate the unixInodeInfo object that 1354 ** describes that file descriptor. Create a new one if necessary. The 1355 ** return value might be uninitialized if an error occurs. 1356 ** 1357 ** The global mutex must held when calling this routine. 1358 ** 1359 ** Return an appropriate error code. 1360 */ 1361 static int findInodeInfo( 1362 unixFile *pFile, /* Unix file with file desc used in the key */ 1363 unixInodeInfo **ppInode /* Return the unixInodeInfo object here */ 1364 ){ 1365 int rc; /* System call return code */ 1366 int fd; /* The file descriptor for pFile */ 1367 struct unixFileId fileId; /* Lookup key for the unixInodeInfo */ 1368 struct stat statbuf; /* Low-level file information */ 1369 unixInodeInfo *pInode = 0; /* Candidate unixInodeInfo object */ 1370 1371 assert( unixMutexHeld() ); 1372 1373 /* Get low-level information about the file that we can used to 1374 ** create a unique name for the file. 1375 */ 1376 fd = pFile->h; 1377 rc = osFstat(fd, &statbuf); 1378 if( rc!=0 ){ 1379 storeLastErrno(pFile, errno); 1380 #if defined(EOVERFLOW) && defined(SQLITE_DISABLE_LFS) 1381 if( pFile->lastErrno==EOVERFLOW ) return SQLITE_NOLFS; 1382 #endif 1383 return SQLITE_IOERR; 1384 } 1385 1386 #ifdef __APPLE__ 1387 /* On OS X on an msdos filesystem, the inode number is reported 1388 ** incorrectly for zero-size files. See ticket #3260. To work 1389 ** around this problem (we consider it a bug in OS X, not SQLite) 1390 ** we always increase the file size to 1 by writing a single byte 1391 ** prior to accessing the inode number. The one byte written is 1392 ** an ASCII 'S' character which also happens to be the first byte 1393 ** in the header of every SQLite database. In this way, if there 1394 ** is a race condition such that another thread has already populated 1395 ** the first page of the database, no damage is done. 1396 */ 1397 if( statbuf.st_size==0 && (pFile->fsFlags & SQLITE_FSFLAGS_IS_MSDOS)!=0 ){ 1398 do{ rc = osWrite(fd, "S", 1); }while( rc<0 && errno==EINTR ); 1399 if( rc!=1 ){ 1400 storeLastErrno(pFile, errno); 1401 return SQLITE_IOERR; 1402 } 1403 rc = osFstat(fd, &statbuf); 1404 if( rc!=0 ){ 1405 storeLastErrno(pFile, errno); 1406 return SQLITE_IOERR; 1407 } 1408 } 1409 #endif 1410 1411 memset(&fileId, 0, sizeof(fileId)); 1412 fileId.dev = statbuf.st_dev; 1413 #if OS_VXWORKS 1414 fileId.pId = pFile->pId; 1415 #else 1416 fileId.ino = (u64)statbuf.st_ino; 1417 #endif 1418 assert( unixMutexHeld() ); 1419 pInode = inodeList; 1420 while( pInode && memcmp(&fileId, &pInode->fileId, sizeof(fileId)) ){ 1421 pInode = pInode->pNext; 1422 } 1423 if( pInode==0 ){ 1424 pInode = sqlite3_malloc64( sizeof(*pInode) ); 1425 if( pInode==0 ){ 1426 return SQLITE_NOMEM_BKPT; 1427 } 1428 memset(pInode, 0, sizeof(*pInode)); 1429 memcpy(&pInode->fileId, &fileId, sizeof(fileId)); 1430 if( sqlite3GlobalConfig.bCoreMutex ){ 1431 pInode->pLockMutex = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST); 1432 if( pInode->pLockMutex==0 ){ 1433 sqlite3_free(pInode); 1434 return SQLITE_NOMEM_BKPT; 1435 } 1436 } 1437 pInode->nRef = 1; 1438 assert( unixMutexHeld() ); 1439 pInode->pNext = inodeList; 1440 pInode->pPrev = 0; 1441 if( inodeList ) inodeList->pPrev = pInode; 1442 inodeList = pInode; 1443 }else{ 1444 pInode->nRef++; 1445 } 1446 *ppInode = pInode; 1447 return SQLITE_OK; 1448 } 1449 1450 /* 1451 ** Return TRUE if pFile has been renamed or unlinked since it was first opened. 1452 */ 1453 static int fileHasMoved(unixFile *pFile){ 1454 #if OS_VXWORKS 1455 return pFile->pInode!=0 && pFile->pId!=pFile->pInode->fileId.pId; 1456 #else 1457 struct stat buf; 1458 return pFile->pInode!=0 && 1459 (osStat(pFile->zPath, &buf)!=0 1460 || (u64)buf.st_ino!=pFile->pInode->fileId.ino); 1461 #endif 1462 } 1463 1464 1465 /* 1466 ** Check a unixFile that is a database. Verify the following: 1467 ** 1468 ** (1) There is exactly one hard link on the file 1469 ** (2) The file is not a symbolic link 1470 ** (3) The file has not been renamed or unlinked 1471 ** 1472 ** Issue sqlite3_log(SQLITE_WARNING,...) messages if anything is not right. 1473 */ 1474 static void verifyDbFile(unixFile *pFile){ 1475 struct stat buf; 1476 int rc; 1477 1478 /* These verifications occurs for the main database only */ 1479 if( pFile->ctrlFlags & UNIXFILE_NOLOCK ) return; 1480 1481 rc = osFstat(pFile->h, &buf); 1482 if( rc!=0 ){ 1483 sqlite3_log(SQLITE_WARNING, "cannot fstat db file %s", pFile->zPath); 1484 return; 1485 } 1486 if( buf.st_nlink==0 ){ 1487 sqlite3_log(SQLITE_WARNING, "file unlinked while open: %s", pFile->zPath); 1488 return; 1489 } 1490 if( buf.st_nlink>1 ){ 1491 sqlite3_log(SQLITE_WARNING, "multiple links to file: %s", pFile->zPath); 1492 return; 1493 } 1494 if( fileHasMoved(pFile) ){ 1495 sqlite3_log(SQLITE_WARNING, "file renamed while open: %s", pFile->zPath); 1496 return; 1497 } 1498 } 1499 1500 1501 /* 1502 ** This routine checks if there is a RESERVED lock held on the specified 1503 ** file by this or any other process. If such a lock is held, set *pResOut 1504 ** to a non-zero value otherwise *pResOut is set to zero. The return value 1505 ** is set to SQLITE_OK unless an I/O error occurs during lock checking. 1506 */ 1507 static int unixCheckReservedLock(sqlite3_file *id, int *pResOut){ 1508 int rc = SQLITE_OK; 1509 int reserved = 0; 1510 unixFile *pFile = (unixFile*)id; 1511 1512 SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); 1513 1514 assert( pFile ); 1515 assert( pFile->eFileLock<=SHARED_LOCK ); 1516 sqlite3_mutex_enter(pFile->pInode->pLockMutex); 1517 1518 /* Check if a thread in this process holds such a lock */ 1519 if( pFile->pInode->eFileLock>SHARED_LOCK ){ 1520 reserved = 1; 1521 } 1522 1523 /* Otherwise see if some other process holds it. 1524 */ 1525 #ifndef __DJGPP__ 1526 if( !reserved && !pFile->pInode->bProcessLock ){ 1527 struct flock lock; 1528 lock.l_whence = SEEK_SET; 1529 lock.l_start = RESERVED_BYTE; 1530 lock.l_len = 1; 1531 lock.l_type = F_WRLCK; 1532 if( osFcntl(pFile->h, F_GETLK, &lock) ){ 1533 rc = SQLITE_IOERR_CHECKRESERVEDLOCK; 1534 storeLastErrno(pFile, errno); 1535 } else if( lock.l_type!=F_UNLCK ){ 1536 reserved = 1; 1537 } 1538 } 1539 #endif 1540 1541 sqlite3_mutex_leave(pFile->pInode->pLockMutex); 1542 OSTRACE(("TEST WR-LOCK %d %d %d (unix)\n", pFile->h, rc, reserved)); 1543 1544 *pResOut = reserved; 1545 return rc; 1546 } 1547 1548 /* Forward declaration*/ 1549 static int unixSleep(sqlite3_vfs*,int); 1550 1551 /* 1552 ** Set a posix-advisory-lock. 1553 ** 1554 ** There are two versions of this routine. If compiled with 1555 ** SQLITE_ENABLE_SETLK_TIMEOUT then the routine has an extra parameter 1556 ** which is a pointer to a unixFile. If the unixFile->iBusyTimeout 1557 ** value is set, then it is the number of milliseconds to wait before 1558 ** failing the lock. The iBusyTimeout value is always reset back to 1559 ** zero on each call. 1560 ** 1561 ** If SQLITE_ENABLE_SETLK_TIMEOUT is not defined, then do a non-blocking 1562 ** attempt to set the lock. 1563 */ 1564 #ifndef SQLITE_ENABLE_SETLK_TIMEOUT 1565 # define osSetPosixAdvisoryLock(h,x,t) osFcntl(h,F_SETLK,x) 1566 #else 1567 static int osSetPosixAdvisoryLock( 1568 int h, /* The file descriptor on which to take the lock */ 1569 struct flock *pLock, /* The description of the lock */ 1570 unixFile *pFile /* Structure holding timeout value */ 1571 ){ 1572 int tm = pFile->iBusyTimeout; 1573 int rc = osFcntl(h,F_SETLK,pLock); 1574 while( rc<0 && tm>0 ){ 1575 /* On systems that support some kind of blocking file lock with a timeout, 1576 ** make appropriate changes here to invoke that blocking file lock. On 1577 ** generic posix, however, there is no such API. So we simply try the 1578 ** lock once every millisecond until either the timeout expires, or until 1579 ** the lock is obtained. */ 1580 unixSleep(0,1000); 1581 rc = osFcntl(h,F_SETLK,pLock); 1582 tm--; 1583 } 1584 return rc; 1585 } 1586 #endif /* SQLITE_ENABLE_SETLK_TIMEOUT */ 1587 1588 1589 /* 1590 ** Attempt to set a system-lock on the file pFile. The lock is 1591 ** described by pLock. 1592 ** 1593 ** If the pFile was opened read/write from unix-excl, then the only lock 1594 ** ever obtained is an exclusive lock, and it is obtained exactly once 1595 ** the first time any lock is attempted. All subsequent system locking 1596 ** operations become no-ops. Locking operations still happen internally, 1597 ** in order to coordinate access between separate database connections 1598 ** within this process, but all of that is handled in memory and the 1599 ** operating system does not participate. 1600 ** 1601 ** This function is a pass-through to fcntl(F_SETLK) if pFile is using 1602 ** any VFS other than "unix-excl" or if pFile is opened on "unix-excl" 1603 ** and is read-only. 1604 ** 1605 ** Zero is returned if the call completes successfully, or -1 if a call 1606 ** to fcntl() fails. In this case, errno is set appropriately (by fcntl()). 1607 */ 1608 static int unixFileLock(unixFile *pFile, struct flock *pLock){ 1609 int rc; 1610 unixInodeInfo *pInode = pFile->pInode; 1611 assert( pInode!=0 ); 1612 assert( sqlite3_mutex_held(pInode->pLockMutex) ); 1613 if( (pFile->ctrlFlags & (UNIXFILE_EXCL|UNIXFILE_RDONLY))==UNIXFILE_EXCL ){ 1614 if( pInode->bProcessLock==0 ){ 1615 struct flock lock; 1616 assert( pInode->nLock==0 ); 1617 lock.l_whence = SEEK_SET; 1618 lock.l_start = SHARED_FIRST; 1619 lock.l_len = SHARED_SIZE; 1620 lock.l_type = F_WRLCK; 1621 rc = osSetPosixAdvisoryLock(pFile->h, &lock, pFile); 1622 if( rc<0 ) return rc; 1623 pInode->bProcessLock = 1; 1624 pInode->nLock++; 1625 }else{ 1626 rc = 0; 1627 } 1628 }else{ 1629 rc = osSetPosixAdvisoryLock(pFile->h, pLock, pFile); 1630 } 1631 return rc; 1632 } 1633 1634 /* 1635 ** Lock the file with the lock specified by parameter eFileLock - one 1636 ** of the following: 1637 ** 1638 ** (1) SHARED_LOCK 1639 ** (2) RESERVED_LOCK 1640 ** (3) PENDING_LOCK 1641 ** (4) EXCLUSIVE_LOCK 1642 ** 1643 ** Sometimes when requesting one lock state, additional lock states 1644 ** are inserted in between. The locking might fail on one of the later 1645 ** transitions leaving the lock state different from what it started but 1646 ** still short of its goal. The following chart shows the allowed 1647 ** transitions and the inserted intermediate states: 1648 ** 1649 ** UNLOCKED -> SHARED 1650 ** SHARED -> RESERVED 1651 ** SHARED -> (PENDING) -> EXCLUSIVE 1652 ** RESERVED -> (PENDING) -> EXCLUSIVE 1653 ** PENDING -> EXCLUSIVE 1654 ** 1655 ** This routine will only increase a lock. Use the sqlite3OsUnlock() 1656 ** routine to lower a locking level. 1657 */ 1658 static int unixLock(sqlite3_file *id, int eFileLock){ 1659 /* The following describes the implementation of the various locks and 1660 ** lock transitions in terms of the POSIX advisory shared and exclusive 1661 ** lock primitives (called read-locks and write-locks below, to avoid 1662 ** confusion with SQLite lock names). The algorithms are complicated 1663 ** slightly in order to be compatible with Windows95 systems simultaneously 1664 ** accessing the same database file, in case that is ever required. 1665 ** 1666 ** Symbols defined in os.h indentify the 'pending byte' and the 'reserved 1667 ** byte', each single bytes at well known offsets, and the 'shared byte 1668 ** range', a range of 510 bytes at a well known offset. 1669 ** 1670 ** To obtain a SHARED lock, a read-lock is obtained on the 'pending 1671 ** byte'. If this is successful, 'shared byte range' is read-locked 1672 ** and the lock on the 'pending byte' released. (Legacy note: When 1673 ** SQLite was first developed, Windows95 systems were still very common, 1674 ** and Widnows95 lacks a shared-lock capability. So on Windows95, a 1675 ** single randomly selected by from the 'shared byte range' is locked. 1676 ** Windows95 is now pretty much extinct, but this work-around for the 1677 ** lack of shared-locks on Windows95 lives on, for backwards 1678 ** compatibility.) 1679 ** 1680 ** A process may only obtain a RESERVED lock after it has a SHARED lock. 1681 ** A RESERVED lock is implemented by grabbing a write-lock on the 1682 ** 'reserved byte'. 1683 ** 1684 ** A process may only obtain a PENDING lock after it has obtained a 1685 ** SHARED lock. A PENDING lock is implemented by obtaining a write-lock 1686 ** on the 'pending byte'. This ensures that no new SHARED locks can be 1687 ** obtained, but existing SHARED locks are allowed to persist. A process 1688 ** does not have to obtain a RESERVED lock on the way to a PENDING lock. 1689 ** This property is used by the algorithm for rolling back a journal file 1690 ** after a crash. 1691 ** 1692 ** An EXCLUSIVE lock, obtained after a PENDING lock is held, is 1693 ** implemented by obtaining a write-lock on the entire 'shared byte 1694 ** range'. Since all other locks require a read-lock on one of the bytes 1695 ** within this range, this ensures that no other locks are held on the 1696 ** database. 1697 */ 1698 int rc = SQLITE_OK; 1699 unixFile *pFile = (unixFile*)id; 1700 unixInodeInfo *pInode; 1701 struct flock lock; 1702 int tErrno = 0; 1703 1704 assert( pFile ); 1705 OSTRACE(("LOCK %d %s was %s(%s,%d) pid=%d (unix)\n", pFile->h, 1706 azFileLock(eFileLock), azFileLock(pFile->eFileLock), 1707 azFileLock(pFile->pInode->eFileLock), pFile->pInode->nShared, 1708 osGetpid(0))); 1709 1710 /* If there is already a lock of this type or more restrictive on the 1711 ** unixFile, do nothing. Don't use the end_lock: exit path, as 1712 ** unixEnterMutex() hasn't been called yet. 1713 */ 1714 if( pFile->eFileLock>=eFileLock ){ 1715 OSTRACE(("LOCK %d %s ok (already held) (unix)\n", pFile->h, 1716 azFileLock(eFileLock))); 1717 return SQLITE_OK; 1718 } 1719 1720 /* Make sure the locking sequence is correct. 1721 ** (1) We never move from unlocked to anything higher than shared lock. 1722 ** (2) SQLite never explicitly requests a pendig lock. 1723 ** (3) A shared lock is always held when a reserve lock is requested. 1724 */ 1725 assert( pFile->eFileLock!=NO_LOCK || eFileLock==SHARED_LOCK ); 1726 assert( eFileLock!=PENDING_LOCK ); 1727 assert( eFileLock!=RESERVED_LOCK || pFile->eFileLock==SHARED_LOCK ); 1728 1729 /* This mutex is needed because pFile->pInode is shared across threads 1730 */ 1731 pInode = pFile->pInode; 1732 sqlite3_mutex_enter(pInode->pLockMutex); 1733 1734 /* If some thread using this PID has a lock via a different unixFile* 1735 ** handle that precludes the requested lock, return BUSY. 1736 */ 1737 if( (pFile->eFileLock!=pInode->eFileLock && 1738 (pInode->eFileLock>=PENDING_LOCK || eFileLock>SHARED_LOCK)) 1739 ){ 1740 rc = SQLITE_BUSY; 1741 goto end_lock; 1742 } 1743 1744 /* If a SHARED lock is requested, and some thread using this PID already 1745 ** has a SHARED or RESERVED lock, then increment reference counts and 1746 ** return SQLITE_OK. 1747 */ 1748 if( eFileLock==SHARED_LOCK && 1749 (pInode->eFileLock==SHARED_LOCK || pInode->eFileLock==RESERVED_LOCK) ){ 1750 assert( eFileLock==SHARED_LOCK ); 1751 assert( pFile->eFileLock==0 ); 1752 assert( pInode->nShared>0 ); 1753 pFile->eFileLock = SHARED_LOCK; 1754 pInode->nShared++; 1755 pInode->nLock++; 1756 goto end_lock; 1757 } 1758 1759 1760 /* A PENDING lock is needed before acquiring a SHARED lock and before 1761 ** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will 1762 ** be released. 1763 */ 1764 lock.l_len = 1L; 1765 lock.l_whence = SEEK_SET; 1766 if( eFileLock==SHARED_LOCK 1767 || (eFileLock==EXCLUSIVE_LOCK && pFile->eFileLock<PENDING_LOCK) 1768 ){ 1769 lock.l_type = (eFileLock==SHARED_LOCK?F_RDLCK:F_WRLCK); 1770 lock.l_start = PENDING_BYTE; 1771 if( unixFileLock(pFile, &lock) ){ 1772 tErrno = errno; 1773 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); 1774 if( rc!=SQLITE_BUSY ){ 1775 storeLastErrno(pFile, tErrno); 1776 } 1777 goto end_lock; 1778 } 1779 } 1780 1781 1782 /* If control gets to this point, then actually go ahead and make 1783 ** operating system calls for the specified lock. 1784 */ 1785 if( eFileLock==SHARED_LOCK ){ 1786 assert( pInode->nShared==0 ); 1787 assert( pInode->eFileLock==0 ); 1788 assert( rc==SQLITE_OK ); 1789 1790 /* Now get the read-lock */ 1791 lock.l_start = SHARED_FIRST; 1792 lock.l_len = SHARED_SIZE; 1793 if( unixFileLock(pFile, &lock) ){ 1794 tErrno = errno; 1795 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); 1796 } 1797 1798 /* Drop the temporary PENDING lock */ 1799 lock.l_start = PENDING_BYTE; 1800 lock.l_len = 1L; 1801 lock.l_type = F_UNLCK; 1802 if( unixFileLock(pFile, &lock) && rc==SQLITE_OK ){ 1803 /* This could happen with a network mount */ 1804 tErrno = errno; 1805 rc = SQLITE_IOERR_UNLOCK; 1806 } 1807 1808 if( rc ){ 1809 if( rc!=SQLITE_BUSY ){ 1810 storeLastErrno(pFile, tErrno); 1811 } 1812 goto end_lock; 1813 }else{ 1814 pFile->eFileLock = SHARED_LOCK; 1815 pInode->nLock++; 1816 pInode->nShared = 1; 1817 } 1818 }else if( eFileLock==EXCLUSIVE_LOCK && pInode->nShared>1 ){ 1819 /* We are trying for an exclusive lock but another thread in this 1820 ** same process is still holding a shared lock. */ 1821 rc = SQLITE_BUSY; 1822 }else{ 1823 /* The request was for a RESERVED or EXCLUSIVE lock. It is 1824 ** assumed that there is a SHARED or greater lock on the file 1825 ** already. 1826 */ 1827 assert( 0!=pFile->eFileLock ); 1828 lock.l_type = F_WRLCK; 1829 1830 assert( eFileLock==RESERVED_LOCK || eFileLock==EXCLUSIVE_LOCK ); 1831 if( eFileLock==RESERVED_LOCK ){ 1832 lock.l_start = RESERVED_BYTE; 1833 lock.l_len = 1L; 1834 }else{ 1835 lock.l_start = SHARED_FIRST; 1836 lock.l_len = SHARED_SIZE; 1837 } 1838 1839 if( unixFileLock(pFile, &lock) ){ 1840 tErrno = errno; 1841 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); 1842 if( rc!=SQLITE_BUSY ){ 1843 storeLastErrno(pFile, tErrno); 1844 } 1845 } 1846 } 1847 1848 1849 #ifdef SQLITE_DEBUG 1850 /* Set up the transaction-counter change checking flags when 1851 ** transitioning from a SHARED to a RESERVED lock. The change 1852 ** from SHARED to RESERVED marks the beginning of a normal 1853 ** write operation (not a hot journal rollback). 1854 */ 1855 if( rc==SQLITE_OK 1856 && pFile->eFileLock<=SHARED_LOCK 1857 && eFileLock==RESERVED_LOCK 1858 ){ 1859 pFile->transCntrChng = 0; 1860 pFile->dbUpdate = 0; 1861 pFile->inNormalWrite = 1; 1862 } 1863 #endif 1864 1865 1866 if( rc==SQLITE_OK ){ 1867 pFile->eFileLock = eFileLock; 1868 pInode->eFileLock = eFileLock; 1869 }else if( eFileLock==EXCLUSIVE_LOCK ){ 1870 pFile->eFileLock = PENDING_LOCK; 1871 pInode->eFileLock = PENDING_LOCK; 1872 } 1873 1874 end_lock: 1875 sqlite3_mutex_leave(pInode->pLockMutex); 1876 OSTRACE(("LOCK %d %s %s (unix)\n", pFile->h, azFileLock(eFileLock), 1877 rc==SQLITE_OK ? "ok" : "failed")); 1878 return rc; 1879 } 1880 1881 /* 1882 ** Add the file descriptor used by file handle pFile to the corresponding 1883 ** pUnused list. 1884 */ 1885 static void setPendingFd(unixFile *pFile){ 1886 unixInodeInfo *pInode = pFile->pInode; 1887 UnixUnusedFd *p = pFile->pPreallocatedUnused; 1888 assert( unixFileMutexHeld(pFile) ); 1889 p->pNext = pInode->pUnused; 1890 pInode->pUnused = p; 1891 pFile->h = -1; 1892 pFile->pPreallocatedUnused = 0; 1893 } 1894 1895 /* 1896 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock 1897 ** must be either NO_LOCK or SHARED_LOCK. 1898 ** 1899 ** If the locking level of the file descriptor is already at or below 1900 ** the requested locking level, this routine is a no-op. 1901 ** 1902 ** If handleNFSUnlock is true, then on downgrading an EXCLUSIVE_LOCK to SHARED 1903 ** the byte range is divided into 2 parts and the first part is unlocked then 1904 ** set to a read lock, then the other part is simply unlocked. This works 1905 ** around a bug in BSD NFS lockd (also seen on MacOSX 10.3+) that fails to 1906 ** remove the write lock on a region when a read lock is set. 1907 */ 1908 static int posixUnlock(sqlite3_file *id, int eFileLock, int handleNFSUnlock){ 1909 unixFile *pFile = (unixFile*)id; 1910 unixInodeInfo *pInode; 1911 struct flock lock; 1912 int rc = SQLITE_OK; 1913 1914 assert( pFile ); 1915 OSTRACE(("UNLOCK %d %d was %d(%d,%d) pid=%d (unix)\n", pFile->h, eFileLock, 1916 pFile->eFileLock, pFile->pInode->eFileLock, pFile->pInode->nShared, 1917 osGetpid(0))); 1918 1919 assert( eFileLock<=SHARED_LOCK ); 1920 if( pFile->eFileLock<=eFileLock ){ 1921 return SQLITE_OK; 1922 } 1923 pInode = pFile->pInode; 1924 sqlite3_mutex_enter(pInode->pLockMutex); 1925 assert( pInode->nShared!=0 ); 1926 if( pFile->eFileLock>SHARED_LOCK ){ 1927 assert( pInode->eFileLock==pFile->eFileLock ); 1928 1929 #ifdef SQLITE_DEBUG 1930 /* When reducing a lock such that other processes can start 1931 ** reading the database file again, make sure that the 1932 ** transaction counter was updated if any part of the database 1933 ** file changed. If the transaction counter is not updated, 1934 ** other connections to the same file might not realize that 1935 ** the file has changed and hence might not know to flush their 1936 ** cache. The use of a stale cache can lead to database corruption. 1937 */ 1938 pFile->inNormalWrite = 0; 1939 #endif 1940 1941 /* downgrading to a shared lock on NFS involves clearing the write lock 1942 ** before establishing the readlock - to avoid a race condition we downgrade 1943 ** the lock in 2 blocks, so that part of the range will be covered by a 1944 ** write lock until the rest is covered by a read lock: 1945 ** 1: [WWWWW] 1946 ** 2: [....W] 1947 ** 3: [RRRRW] 1948 ** 4: [RRRR.] 1949 */ 1950 if( eFileLock==SHARED_LOCK ){ 1951 #if !defined(__APPLE__) || !SQLITE_ENABLE_LOCKING_STYLE 1952 (void)handleNFSUnlock; 1953 assert( handleNFSUnlock==0 ); 1954 #endif 1955 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 1956 if( handleNFSUnlock ){ 1957 int tErrno; /* Error code from system call errors */ 1958 off_t divSize = SHARED_SIZE - 1; 1959 1960 lock.l_type = F_UNLCK; 1961 lock.l_whence = SEEK_SET; 1962 lock.l_start = SHARED_FIRST; 1963 lock.l_len = divSize; 1964 if( unixFileLock(pFile, &lock)==(-1) ){ 1965 tErrno = errno; 1966 rc = SQLITE_IOERR_UNLOCK; 1967 storeLastErrno(pFile, tErrno); 1968 goto end_unlock; 1969 } 1970 lock.l_type = F_RDLCK; 1971 lock.l_whence = SEEK_SET; 1972 lock.l_start = SHARED_FIRST; 1973 lock.l_len = divSize; 1974 if( unixFileLock(pFile, &lock)==(-1) ){ 1975 tErrno = errno; 1976 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_RDLOCK); 1977 if( IS_LOCK_ERROR(rc) ){ 1978 storeLastErrno(pFile, tErrno); 1979 } 1980 goto end_unlock; 1981 } 1982 lock.l_type = F_UNLCK; 1983 lock.l_whence = SEEK_SET; 1984 lock.l_start = SHARED_FIRST+divSize; 1985 lock.l_len = SHARED_SIZE-divSize; 1986 if( unixFileLock(pFile, &lock)==(-1) ){ 1987 tErrno = errno; 1988 rc = SQLITE_IOERR_UNLOCK; 1989 storeLastErrno(pFile, tErrno); 1990 goto end_unlock; 1991 } 1992 }else 1993 #endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ 1994 { 1995 lock.l_type = F_RDLCK; 1996 lock.l_whence = SEEK_SET; 1997 lock.l_start = SHARED_FIRST; 1998 lock.l_len = SHARED_SIZE; 1999 if( unixFileLock(pFile, &lock) ){ 2000 /* In theory, the call to unixFileLock() cannot fail because another 2001 ** process is holding an incompatible lock. If it does, this 2002 ** indicates that the other process is not following the locking 2003 ** protocol. If this happens, return SQLITE_IOERR_RDLOCK. Returning 2004 ** SQLITE_BUSY would confuse the upper layer (in practice it causes 2005 ** an assert to fail). */ 2006 rc = SQLITE_IOERR_RDLOCK; 2007 storeLastErrno(pFile, errno); 2008 goto end_unlock; 2009 } 2010 } 2011 } 2012 lock.l_type = F_UNLCK; 2013 lock.l_whence = SEEK_SET; 2014 lock.l_start = PENDING_BYTE; 2015 lock.l_len = 2L; assert( PENDING_BYTE+1==RESERVED_BYTE ); 2016 if( unixFileLock(pFile, &lock)==0 ){ 2017 pInode->eFileLock = SHARED_LOCK; 2018 }else{ 2019 rc = SQLITE_IOERR_UNLOCK; 2020 storeLastErrno(pFile, errno); 2021 goto end_unlock; 2022 } 2023 } 2024 if( eFileLock==NO_LOCK ){ 2025 /* Decrement the shared lock counter. Release the lock using an 2026 ** OS call only when all threads in this same process have released 2027 ** the lock. 2028 */ 2029 pInode->nShared--; 2030 if( pInode->nShared==0 ){ 2031 lock.l_type = F_UNLCK; 2032 lock.l_whence = SEEK_SET; 2033 lock.l_start = lock.l_len = 0L; 2034 if( unixFileLock(pFile, &lock)==0 ){ 2035 pInode->eFileLock = NO_LOCK; 2036 }else{ 2037 rc = SQLITE_IOERR_UNLOCK; 2038 storeLastErrno(pFile, errno); 2039 pInode->eFileLock = NO_LOCK; 2040 pFile->eFileLock = NO_LOCK; 2041 } 2042 } 2043 2044 /* Decrement the count of locks against this same file. When the 2045 ** count reaches zero, close any other file descriptors whose close 2046 ** was deferred because of outstanding locks. 2047 */ 2048 pInode->nLock--; 2049 assert( pInode->nLock>=0 ); 2050 if( pInode->nLock==0 ) closePendingFds(pFile); 2051 } 2052 2053 end_unlock: 2054 sqlite3_mutex_leave(pInode->pLockMutex); 2055 if( rc==SQLITE_OK ){ 2056 pFile->eFileLock = eFileLock; 2057 } 2058 return rc; 2059 } 2060 2061 /* 2062 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock 2063 ** must be either NO_LOCK or SHARED_LOCK. 2064 ** 2065 ** If the locking level of the file descriptor is already at or below 2066 ** the requested locking level, this routine is a no-op. 2067 */ 2068 static int unixUnlock(sqlite3_file *id, int eFileLock){ 2069 #if SQLITE_MAX_MMAP_SIZE>0 2070 assert( eFileLock==SHARED_LOCK || ((unixFile *)id)->nFetchOut==0 ); 2071 #endif 2072 return posixUnlock(id, eFileLock, 0); 2073 } 2074 2075 #if SQLITE_MAX_MMAP_SIZE>0 2076 static int unixMapfile(unixFile *pFd, i64 nByte); 2077 static void unixUnmapfile(unixFile *pFd); 2078 #endif 2079 2080 /* 2081 ** This function performs the parts of the "close file" operation 2082 ** common to all locking schemes. It closes the directory and file 2083 ** handles, if they are valid, and sets all fields of the unixFile 2084 ** structure to 0. 2085 ** 2086 ** It is *not* necessary to hold the mutex when this routine is called, 2087 ** even on VxWorks. A mutex will be acquired on VxWorks by the 2088 ** vxworksReleaseFileId() routine. 2089 */ 2090 static int closeUnixFile(sqlite3_file *id){ 2091 unixFile *pFile = (unixFile*)id; 2092 #if SQLITE_MAX_MMAP_SIZE>0 2093 unixUnmapfile(pFile); 2094 #endif 2095 if( pFile->h>=0 ){ 2096 robust_close(pFile, pFile->h, __LINE__); 2097 pFile->h = -1; 2098 } 2099 #if OS_VXWORKS 2100 if( pFile->pId ){ 2101 if( pFile->ctrlFlags & UNIXFILE_DELETE ){ 2102 osUnlink(pFile->pId->zCanonicalName); 2103 } 2104 vxworksReleaseFileId(pFile->pId); 2105 pFile->pId = 0; 2106 } 2107 #endif 2108 #ifdef SQLITE_UNLINK_AFTER_CLOSE 2109 if( pFile->ctrlFlags & UNIXFILE_DELETE ){ 2110 osUnlink(pFile->zPath); 2111 sqlite3_free(*(char**)&pFile->zPath); 2112 pFile->zPath = 0; 2113 } 2114 #endif 2115 OSTRACE(("CLOSE %-3d\n", pFile->h)); 2116 OpenCounter(-1); 2117 sqlite3_free(pFile->pPreallocatedUnused); 2118 memset(pFile, 0, sizeof(unixFile)); 2119 return SQLITE_OK; 2120 } 2121 2122 /* 2123 ** Close a file. 2124 */ 2125 static int unixClose(sqlite3_file *id){ 2126 int rc = SQLITE_OK; 2127 unixFile *pFile = (unixFile *)id; 2128 unixInodeInfo *pInode = pFile->pInode; 2129 2130 assert( pInode!=0 ); 2131 verifyDbFile(pFile); 2132 unixUnlock(id, NO_LOCK); 2133 assert( unixFileMutexNotheld(pFile) ); 2134 unixEnterMutex(); 2135 2136 /* unixFile.pInode is always valid here. Otherwise, a different close 2137 ** routine (e.g. nolockClose()) would be called instead. 2138 */ 2139 assert( pFile->pInode->nLock>0 || pFile->pInode->bProcessLock==0 ); 2140 sqlite3_mutex_enter(pInode->pLockMutex); 2141 if( pInode->nLock ){ 2142 /* If there are outstanding locks, do not actually close the file just 2143 ** yet because that would clear those locks. Instead, add the file 2144 ** descriptor to pInode->pUnused list. It will be automatically closed 2145 ** when the last lock is cleared. 2146 */ 2147 setPendingFd(pFile); 2148 } 2149 sqlite3_mutex_leave(pInode->pLockMutex); 2150 releaseInodeInfo(pFile); 2151 assert( pFile->pShm==0 ); 2152 rc = closeUnixFile(id); 2153 unixLeaveMutex(); 2154 return rc; 2155 } 2156 2157 /************** End of the posix advisory lock implementation ***************** 2158 ******************************************************************************/ 2159 2160 /****************************************************************************** 2161 ****************************** No-op Locking ********************************** 2162 ** 2163 ** Of the various locking implementations available, this is by far the 2164 ** simplest: locking is ignored. No attempt is made to lock the database 2165 ** file for reading or writing. 2166 ** 2167 ** This locking mode is appropriate for use on read-only databases 2168 ** (ex: databases that are burned into CD-ROM, for example.) It can 2169 ** also be used if the application employs some external mechanism to 2170 ** prevent simultaneous access of the same database by two or more 2171 ** database connections. But there is a serious risk of database 2172 ** corruption if this locking mode is used in situations where multiple 2173 ** database connections are accessing the same database file at the same 2174 ** time and one or more of those connections are writing. 2175 */ 2176 2177 static int nolockCheckReservedLock(sqlite3_file *NotUsed, int *pResOut){ 2178 UNUSED_PARAMETER(NotUsed); 2179 *pResOut = 0; 2180 return SQLITE_OK; 2181 } 2182 static int nolockLock(sqlite3_file *NotUsed, int NotUsed2){ 2183 UNUSED_PARAMETER2(NotUsed, NotUsed2); 2184 return SQLITE_OK; 2185 } 2186 static int nolockUnlock(sqlite3_file *NotUsed, int NotUsed2){ 2187 UNUSED_PARAMETER2(NotUsed, NotUsed2); 2188 return SQLITE_OK; 2189 } 2190 2191 /* 2192 ** Close the file. 2193 */ 2194 static int nolockClose(sqlite3_file *id) { 2195 return closeUnixFile(id); 2196 } 2197 2198 /******************* End of the no-op lock implementation ********************* 2199 ******************************************************************************/ 2200 2201 /****************************************************************************** 2202 ************************* Begin dot-file Locking ****************************** 2203 ** 2204 ** The dotfile locking implementation uses the existence of separate lock 2205 ** files (really a directory) to control access to the database. This works 2206 ** on just about every filesystem imaginable. But there are serious downsides: 2207 ** 2208 ** (1) There is zero concurrency. A single reader blocks all other 2209 ** connections from reading or writing the database. 2210 ** 2211 ** (2) An application crash or power loss can leave stale lock files 2212 ** sitting around that need to be cleared manually. 2213 ** 2214 ** Nevertheless, a dotlock is an appropriate locking mode for use if no 2215 ** other locking strategy is available. 2216 ** 2217 ** Dotfile locking works by creating a subdirectory in the same directory as 2218 ** the database and with the same name but with a ".lock" extension added. 2219 ** The existence of a lock directory implies an EXCLUSIVE lock. All other 2220 ** lock types (SHARED, RESERVED, PENDING) are mapped into EXCLUSIVE. 2221 */ 2222 2223 /* 2224 ** The file suffix added to the data base filename in order to create the 2225 ** lock directory. 2226 */ 2227 #define DOTLOCK_SUFFIX ".lock" 2228 2229 /* 2230 ** This routine checks if there is a RESERVED lock held on the specified 2231 ** file by this or any other process. If such a lock is held, set *pResOut 2232 ** to a non-zero value otherwise *pResOut is set to zero. The return value 2233 ** is set to SQLITE_OK unless an I/O error occurs during lock checking. 2234 ** 2235 ** In dotfile locking, either a lock exists or it does not. So in this 2236 ** variation of CheckReservedLock(), *pResOut is set to true if any lock 2237 ** is held on the file and false if the file is unlocked. 2238 */ 2239 static int dotlockCheckReservedLock(sqlite3_file *id, int *pResOut) { 2240 int rc = SQLITE_OK; 2241 int reserved = 0; 2242 unixFile *pFile = (unixFile*)id; 2243 2244 SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); 2245 2246 assert( pFile ); 2247 reserved = osAccess((const char*)pFile->lockingContext, 0)==0; 2248 OSTRACE(("TEST WR-LOCK %d %d %d (dotlock)\n", pFile->h, rc, reserved)); 2249 *pResOut = reserved; 2250 return rc; 2251 } 2252 2253 /* 2254 ** Lock the file with the lock specified by parameter eFileLock - one 2255 ** of the following: 2256 ** 2257 ** (1) SHARED_LOCK 2258 ** (2) RESERVED_LOCK 2259 ** (3) PENDING_LOCK 2260 ** (4) EXCLUSIVE_LOCK 2261 ** 2262 ** Sometimes when requesting one lock state, additional lock states 2263 ** are inserted in between. The locking might fail on one of the later 2264 ** transitions leaving the lock state different from what it started but 2265 ** still short of its goal. The following chart shows the allowed 2266 ** transitions and the inserted intermediate states: 2267 ** 2268 ** UNLOCKED -> SHARED 2269 ** SHARED -> RESERVED 2270 ** SHARED -> (PENDING) -> EXCLUSIVE 2271 ** RESERVED -> (PENDING) -> EXCLUSIVE 2272 ** PENDING -> EXCLUSIVE 2273 ** 2274 ** This routine will only increase a lock. Use the sqlite3OsUnlock() 2275 ** routine to lower a locking level. 2276 ** 2277 ** With dotfile locking, we really only support state (4): EXCLUSIVE. 2278 ** But we track the other locking levels internally. 2279 */ 2280 static int dotlockLock(sqlite3_file *id, int eFileLock) { 2281 unixFile *pFile = (unixFile*)id; 2282 char *zLockFile = (char *)pFile->lockingContext; 2283 int rc = SQLITE_OK; 2284 2285 2286 /* If we have any lock, then the lock file already exists. All we have 2287 ** to do is adjust our internal record of the lock level. 2288 */ 2289 if( pFile->eFileLock > NO_LOCK ){ 2290 pFile->eFileLock = eFileLock; 2291 /* Always update the timestamp on the old file */ 2292 #ifdef HAVE_UTIME 2293 utime(zLockFile, NULL); 2294 #else 2295 utimes(zLockFile, NULL); 2296 #endif 2297 return SQLITE_OK; 2298 } 2299 2300 /* grab an exclusive lock */ 2301 rc = osMkdir(zLockFile, 0777); 2302 if( rc<0 ){ 2303 /* failed to open/create the lock directory */ 2304 int tErrno = errno; 2305 if( EEXIST == tErrno ){ 2306 rc = SQLITE_BUSY; 2307 } else { 2308 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); 2309 if( rc!=SQLITE_BUSY ){ 2310 storeLastErrno(pFile, tErrno); 2311 } 2312 } 2313 return rc; 2314 } 2315 2316 /* got it, set the type and return ok */ 2317 pFile->eFileLock = eFileLock; 2318 return rc; 2319 } 2320 2321 /* 2322 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock 2323 ** must be either NO_LOCK or SHARED_LOCK. 2324 ** 2325 ** If the locking level of the file descriptor is already at or below 2326 ** the requested locking level, this routine is a no-op. 2327 ** 2328 ** When the locking level reaches NO_LOCK, delete the lock file. 2329 */ 2330 static int dotlockUnlock(sqlite3_file *id, int eFileLock) { 2331 unixFile *pFile = (unixFile*)id; 2332 char *zLockFile = (char *)pFile->lockingContext; 2333 int rc; 2334 2335 assert( pFile ); 2336 OSTRACE(("UNLOCK %d %d was %d pid=%d (dotlock)\n", pFile->h, eFileLock, 2337 pFile->eFileLock, osGetpid(0))); 2338 assert( eFileLock<=SHARED_LOCK ); 2339 2340 /* no-op if possible */ 2341 if( pFile->eFileLock==eFileLock ){ 2342 return SQLITE_OK; 2343 } 2344 2345 /* To downgrade to shared, simply update our internal notion of the 2346 ** lock state. No need to mess with the file on disk. 2347 */ 2348 if( eFileLock==SHARED_LOCK ){ 2349 pFile->eFileLock = SHARED_LOCK; 2350 return SQLITE_OK; 2351 } 2352 2353 /* To fully unlock the database, delete the lock file */ 2354 assert( eFileLock==NO_LOCK ); 2355 rc = osRmdir(zLockFile); 2356 if( rc<0 ){ 2357 int tErrno = errno; 2358 if( tErrno==ENOENT ){ 2359 rc = SQLITE_OK; 2360 }else{ 2361 rc = SQLITE_IOERR_UNLOCK; 2362 storeLastErrno(pFile, tErrno); 2363 } 2364 return rc; 2365 } 2366 pFile->eFileLock = NO_LOCK; 2367 return SQLITE_OK; 2368 } 2369 2370 /* 2371 ** Close a file. Make sure the lock has been released before closing. 2372 */ 2373 static int dotlockClose(sqlite3_file *id) { 2374 unixFile *pFile = (unixFile*)id; 2375 assert( id!=0 ); 2376 dotlockUnlock(id, NO_LOCK); 2377 sqlite3_free(pFile->lockingContext); 2378 return closeUnixFile(id); 2379 } 2380 /****************** End of the dot-file lock implementation ******************* 2381 ******************************************************************************/ 2382 2383 /****************************************************************************** 2384 ************************** Begin flock Locking ******************************** 2385 ** 2386 ** Use the flock() system call to do file locking. 2387 ** 2388 ** flock() locking is like dot-file locking in that the various 2389 ** fine-grain locking levels supported by SQLite are collapsed into 2390 ** a single exclusive lock. In other words, SHARED, RESERVED, and 2391 ** PENDING locks are the same thing as an EXCLUSIVE lock. SQLite 2392 ** still works when you do this, but concurrency is reduced since 2393 ** only a single process can be reading the database at a time. 2394 ** 2395 ** Omit this section if SQLITE_ENABLE_LOCKING_STYLE is turned off 2396 */ 2397 #if SQLITE_ENABLE_LOCKING_STYLE 2398 2399 /* 2400 ** Retry flock() calls that fail with EINTR 2401 */ 2402 #ifdef EINTR 2403 static int robust_flock(int fd, int op){ 2404 int rc; 2405 do{ rc = flock(fd,op); }while( rc<0 && errno==EINTR ); 2406 return rc; 2407 } 2408 #else 2409 # define robust_flock(a,b) flock(a,b) 2410 #endif 2411 2412 2413 /* 2414 ** This routine checks if there is a RESERVED lock held on the specified 2415 ** file by this or any other process. If such a lock is held, set *pResOut 2416 ** to a non-zero value otherwise *pResOut is set to zero. The return value 2417 ** is set to SQLITE_OK unless an I/O error occurs during lock checking. 2418 */ 2419 static int flockCheckReservedLock(sqlite3_file *id, int *pResOut){ 2420 int rc = SQLITE_OK; 2421 int reserved = 0; 2422 unixFile *pFile = (unixFile*)id; 2423 2424 SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); 2425 2426 assert( pFile ); 2427 2428 /* Check if a thread in this process holds such a lock */ 2429 if( pFile->eFileLock>SHARED_LOCK ){ 2430 reserved = 1; 2431 } 2432 2433 /* Otherwise see if some other process holds it. */ 2434 if( !reserved ){ 2435 /* attempt to get the lock */ 2436 int lrc = robust_flock(pFile->h, LOCK_EX | LOCK_NB); 2437 if( !lrc ){ 2438 /* got the lock, unlock it */ 2439 lrc = robust_flock(pFile->h, LOCK_UN); 2440 if ( lrc ) { 2441 int tErrno = errno; 2442 /* unlock failed with an error */ 2443 lrc = SQLITE_IOERR_UNLOCK; 2444 storeLastErrno(pFile, tErrno); 2445 rc = lrc; 2446 } 2447 } else { 2448 int tErrno = errno; 2449 reserved = 1; 2450 /* someone else might have it reserved */ 2451 lrc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); 2452 if( IS_LOCK_ERROR(lrc) ){ 2453 storeLastErrno(pFile, tErrno); 2454 rc = lrc; 2455 } 2456 } 2457 } 2458 OSTRACE(("TEST WR-LOCK %d %d %d (flock)\n", pFile->h, rc, reserved)); 2459 2460 #ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS 2461 if( (rc & 0xff) == SQLITE_IOERR ){ 2462 rc = SQLITE_OK; 2463 reserved=1; 2464 } 2465 #endif /* SQLITE_IGNORE_FLOCK_LOCK_ERRORS */ 2466 *pResOut = reserved; 2467 return rc; 2468 } 2469 2470 /* 2471 ** Lock the file with the lock specified by parameter eFileLock - one 2472 ** of the following: 2473 ** 2474 ** (1) SHARED_LOCK 2475 ** (2) RESERVED_LOCK 2476 ** (3) PENDING_LOCK 2477 ** (4) EXCLUSIVE_LOCK 2478 ** 2479 ** Sometimes when requesting one lock state, additional lock states 2480 ** are inserted in between. The locking might fail on one of the later 2481 ** transitions leaving the lock state different from what it started but 2482 ** still short of its goal. The following chart shows the allowed 2483 ** transitions and the inserted intermediate states: 2484 ** 2485 ** UNLOCKED -> SHARED 2486 ** SHARED -> RESERVED 2487 ** SHARED -> (PENDING) -> EXCLUSIVE 2488 ** RESERVED -> (PENDING) -> EXCLUSIVE 2489 ** PENDING -> EXCLUSIVE 2490 ** 2491 ** flock() only really support EXCLUSIVE locks. We track intermediate 2492 ** lock states in the sqlite3_file structure, but all locks SHARED or 2493 ** above are really EXCLUSIVE locks and exclude all other processes from 2494 ** access the file. 2495 ** 2496 ** This routine will only increase a lock. Use the sqlite3OsUnlock() 2497 ** routine to lower a locking level. 2498 */ 2499 static int flockLock(sqlite3_file *id, int eFileLock) { 2500 int rc = SQLITE_OK; 2501 unixFile *pFile = (unixFile*)id; 2502 2503 assert( pFile ); 2504 2505 /* if we already have a lock, it is exclusive. 2506 ** Just adjust level and punt on outta here. */ 2507 if (pFile->eFileLock > NO_LOCK) { 2508 pFile->eFileLock = eFileLock; 2509 return SQLITE_OK; 2510 } 2511 2512 /* grab an exclusive lock */ 2513 2514 if (robust_flock(pFile->h, LOCK_EX | LOCK_NB)) { 2515 int tErrno = errno; 2516 /* didn't get, must be busy */ 2517 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); 2518 if( IS_LOCK_ERROR(rc) ){ 2519 storeLastErrno(pFile, tErrno); 2520 } 2521 } else { 2522 /* got it, set the type and return ok */ 2523 pFile->eFileLock = eFileLock; 2524 } 2525 OSTRACE(("LOCK %d %s %s (flock)\n", pFile->h, azFileLock(eFileLock), 2526 rc==SQLITE_OK ? "ok" : "failed")); 2527 #ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS 2528 if( (rc & 0xff) == SQLITE_IOERR ){ 2529 rc = SQLITE_BUSY; 2530 } 2531 #endif /* SQLITE_IGNORE_FLOCK_LOCK_ERRORS */ 2532 return rc; 2533 } 2534 2535 2536 /* 2537 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock 2538 ** must be either NO_LOCK or SHARED_LOCK. 2539 ** 2540 ** If the locking level of the file descriptor is already at or below 2541 ** the requested locking level, this routine is a no-op. 2542 */ 2543 static int flockUnlock(sqlite3_file *id, int eFileLock) { 2544 unixFile *pFile = (unixFile*)id; 2545 2546 assert( pFile ); 2547 OSTRACE(("UNLOCK %d %d was %d pid=%d (flock)\n", pFile->h, eFileLock, 2548 pFile->eFileLock, osGetpid(0))); 2549 assert( eFileLock<=SHARED_LOCK ); 2550 2551 /* no-op if possible */ 2552 if( pFile->eFileLock==eFileLock ){ 2553 return SQLITE_OK; 2554 } 2555 2556 /* shared can just be set because we always have an exclusive */ 2557 if (eFileLock==SHARED_LOCK) { 2558 pFile->eFileLock = eFileLock; 2559 return SQLITE_OK; 2560 } 2561 2562 /* no, really, unlock. */ 2563 if( robust_flock(pFile->h, LOCK_UN) ){ 2564 #ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS 2565 return SQLITE_OK; 2566 #endif /* SQLITE_IGNORE_FLOCK_LOCK_ERRORS */ 2567 return SQLITE_IOERR_UNLOCK; 2568 }else{ 2569 pFile->eFileLock = NO_LOCK; 2570 return SQLITE_OK; 2571 } 2572 } 2573 2574 /* 2575 ** Close a file. 2576 */ 2577 static int flockClose(sqlite3_file *id) { 2578 assert( id!=0 ); 2579 flockUnlock(id, NO_LOCK); 2580 return closeUnixFile(id); 2581 } 2582 2583 #endif /* SQLITE_ENABLE_LOCKING_STYLE && !OS_VXWORK */ 2584 2585 /******************* End of the flock lock implementation ********************* 2586 ******************************************************************************/ 2587 2588 /****************************************************************************** 2589 ************************ Begin Named Semaphore Locking ************************ 2590 ** 2591 ** Named semaphore locking is only supported on VxWorks. 2592 ** 2593 ** Semaphore locking is like dot-lock and flock in that it really only 2594 ** supports EXCLUSIVE locking. Only a single process can read or write 2595 ** the database file at a time. This reduces potential concurrency, but 2596 ** makes the lock implementation much easier. 2597 */ 2598 #if OS_VXWORKS 2599 2600 /* 2601 ** This routine checks if there is a RESERVED lock held on the specified 2602 ** file by this or any other process. If such a lock is held, set *pResOut 2603 ** to a non-zero value otherwise *pResOut is set to zero. The return value 2604 ** is set to SQLITE_OK unless an I/O error occurs during lock checking. 2605 */ 2606 static int semXCheckReservedLock(sqlite3_file *id, int *pResOut) { 2607 int rc = SQLITE_OK; 2608 int reserved = 0; 2609 unixFile *pFile = (unixFile*)id; 2610 2611 SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); 2612 2613 assert( pFile ); 2614 2615 /* Check if a thread in this process holds such a lock */ 2616 if( pFile->eFileLock>SHARED_LOCK ){ 2617 reserved = 1; 2618 } 2619 2620 /* Otherwise see if some other process holds it. */ 2621 if( !reserved ){ 2622 sem_t *pSem = pFile->pInode->pSem; 2623 2624 if( sem_trywait(pSem)==-1 ){ 2625 int tErrno = errno; 2626 if( EAGAIN != tErrno ){ 2627 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_CHECKRESERVEDLOCK); 2628 storeLastErrno(pFile, tErrno); 2629 } else { 2630 /* someone else has the lock when we are in NO_LOCK */ 2631 reserved = (pFile->eFileLock < SHARED_LOCK); 2632 } 2633 }else{ 2634 /* we could have it if we want it */ 2635 sem_post(pSem); 2636 } 2637 } 2638 OSTRACE(("TEST WR-LOCK %d %d %d (sem)\n", pFile->h, rc, reserved)); 2639 2640 *pResOut = reserved; 2641 return rc; 2642 } 2643 2644 /* 2645 ** Lock the file with the lock specified by parameter eFileLock - one 2646 ** of the following: 2647 ** 2648 ** (1) SHARED_LOCK 2649 ** (2) RESERVED_LOCK 2650 ** (3) PENDING_LOCK 2651 ** (4) EXCLUSIVE_LOCK 2652 ** 2653 ** Sometimes when requesting one lock state, additional lock states 2654 ** are inserted in between. The locking might fail on one of the later 2655 ** transitions leaving the lock state different from what it started but 2656 ** still short of its goal. The following chart shows the allowed 2657 ** transitions and the inserted intermediate states: 2658 ** 2659 ** UNLOCKED -> SHARED 2660 ** SHARED -> RESERVED 2661 ** SHARED -> (PENDING) -> EXCLUSIVE 2662 ** RESERVED -> (PENDING) -> EXCLUSIVE 2663 ** PENDING -> EXCLUSIVE 2664 ** 2665 ** Semaphore locks only really support EXCLUSIVE locks. We track intermediate 2666 ** lock states in the sqlite3_file structure, but all locks SHARED or 2667 ** above are really EXCLUSIVE locks and exclude all other processes from 2668 ** access the file. 2669 ** 2670 ** This routine will only increase a lock. Use the sqlite3OsUnlock() 2671 ** routine to lower a locking level. 2672 */ 2673 static int semXLock(sqlite3_file *id, int eFileLock) { 2674 unixFile *pFile = (unixFile*)id; 2675 sem_t *pSem = pFile->pInode->pSem; 2676 int rc = SQLITE_OK; 2677 2678 /* if we already have a lock, it is exclusive. 2679 ** Just adjust level and punt on outta here. */ 2680 if (pFile->eFileLock > NO_LOCK) { 2681 pFile->eFileLock = eFileLock; 2682 rc = SQLITE_OK; 2683 goto sem_end_lock; 2684 } 2685 2686 /* lock semaphore now but bail out when already locked. */ 2687 if( sem_trywait(pSem)==-1 ){ 2688 rc = SQLITE_BUSY; 2689 goto sem_end_lock; 2690 } 2691 2692 /* got it, set the type and return ok */ 2693 pFile->eFileLock = eFileLock; 2694 2695 sem_end_lock: 2696 return rc; 2697 } 2698 2699 /* 2700 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock 2701 ** must be either NO_LOCK or SHARED_LOCK. 2702 ** 2703 ** If the locking level of the file descriptor is already at or below 2704 ** the requested locking level, this routine is a no-op. 2705 */ 2706 static int semXUnlock(sqlite3_file *id, int eFileLock) { 2707 unixFile *pFile = (unixFile*)id; 2708 sem_t *pSem = pFile->pInode->pSem; 2709 2710 assert( pFile ); 2711 assert( pSem ); 2712 OSTRACE(("UNLOCK %d %d was %d pid=%d (sem)\n", pFile->h, eFileLock, 2713 pFile->eFileLock, osGetpid(0))); 2714 assert( eFileLock<=SHARED_LOCK ); 2715 2716 /* no-op if possible */ 2717 if( pFile->eFileLock==eFileLock ){ 2718 return SQLITE_OK; 2719 } 2720 2721 /* shared can just be set because we always have an exclusive */ 2722 if (eFileLock==SHARED_LOCK) { 2723 pFile->eFileLock = eFileLock; 2724 return SQLITE_OK; 2725 } 2726 2727 /* no, really unlock. */ 2728 if ( sem_post(pSem)==-1 ) { 2729 int rc, tErrno = errno; 2730 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK); 2731 if( IS_LOCK_ERROR(rc) ){ 2732 storeLastErrno(pFile, tErrno); 2733 } 2734 return rc; 2735 } 2736 pFile->eFileLock = NO_LOCK; 2737 return SQLITE_OK; 2738 } 2739 2740 /* 2741 ** Close a file. 2742 */ 2743 static int semXClose(sqlite3_file *id) { 2744 if( id ){ 2745 unixFile *pFile = (unixFile*)id; 2746 semXUnlock(id, NO_LOCK); 2747 assert( pFile ); 2748 assert( unixFileMutexNotheld(pFile) ); 2749 unixEnterMutex(); 2750 releaseInodeInfo(pFile); 2751 unixLeaveMutex(); 2752 closeUnixFile(id); 2753 } 2754 return SQLITE_OK; 2755 } 2756 2757 #endif /* OS_VXWORKS */ 2758 /* 2759 ** Named semaphore locking is only available on VxWorks. 2760 ** 2761 *************** End of the named semaphore lock implementation **************** 2762 ******************************************************************************/ 2763 2764 2765 /****************************************************************************** 2766 *************************** Begin AFP Locking ********************************* 2767 ** 2768 ** AFP is the Apple Filing Protocol. AFP is a network filesystem found 2769 ** on Apple Macintosh computers - both OS9 and OSX. 2770 ** 2771 ** Third-party implementations of AFP are available. But this code here 2772 ** only works on OSX. 2773 */ 2774 2775 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 2776 /* 2777 ** The afpLockingContext structure contains all afp lock specific state 2778 */ 2779 typedef struct afpLockingContext afpLockingContext; 2780 struct afpLockingContext { 2781 int reserved; 2782 const char *dbPath; /* Name of the open file */ 2783 }; 2784 2785 struct ByteRangeLockPB2 2786 { 2787 unsigned long long offset; /* offset to first byte to lock */ 2788 unsigned long long length; /* nbr of bytes to lock */ 2789 unsigned long long retRangeStart; /* nbr of 1st byte locked if successful */ 2790 unsigned char unLockFlag; /* 1 = unlock, 0 = lock */ 2791 unsigned char startEndFlag; /* 1=rel to end of fork, 0=rel to start */ 2792 int fd; /* file desc to assoc this lock with */ 2793 }; 2794 2795 #define afpfsByteRangeLock2FSCTL _IOWR('z', 23, struct ByteRangeLockPB2) 2796 2797 /* 2798 ** This is a utility for setting or clearing a bit-range lock on an 2799 ** AFP filesystem. 2800 ** 2801 ** Return SQLITE_OK on success, SQLITE_BUSY on failure. 2802 */ 2803 static int afpSetLock( 2804 const char *path, /* Name of the file to be locked or unlocked */ 2805 unixFile *pFile, /* Open file descriptor on path */ 2806 unsigned long long offset, /* First byte to be locked */ 2807 unsigned long long length, /* Number of bytes to lock */ 2808 int setLockFlag /* True to set lock. False to clear lock */ 2809 ){ 2810 struct ByteRangeLockPB2 pb; 2811 int err; 2812 2813 pb.unLockFlag = setLockFlag ? 0 : 1; 2814 pb.startEndFlag = 0; 2815 pb.offset = offset; 2816 pb.length = length; 2817 pb.fd = pFile->h; 2818 2819 OSTRACE(("AFPSETLOCK [%s] for %d%s in range %llx:%llx\n", 2820 (setLockFlag?"ON":"OFF"), pFile->h, (pb.fd==-1?"[testval-1]":""), 2821 offset, length)); 2822 err = fsctl(path, afpfsByteRangeLock2FSCTL, &pb, 0); 2823 if ( err==-1 ) { 2824 int rc; 2825 int tErrno = errno; 2826 OSTRACE(("AFPSETLOCK failed to fsctl() '%s' %d %s\n", 2827 path, tErrno, strerror(tErrno))); 2828 #ifdef SQLITE_IGNORE_AFP_LOCK_ERRORS 2829 rc = SQLITE_BUSY; 2830 #else 2831 rc = sqliteErrorFromPosixError(tErrno, 2832 setLockFlag ? SQLITE_IOERR_LOCK : SQLITE_IOERR_UNLOCK); 2833 #endif /* SQLITE_IGNORE_AFP_LOCK_ERRORS */ 2834 if( IS_LOCK_ERROR(rc) ){ 2835 storeLastErrno(pFile, tErrno); 2836 } 2837 return rc; 2838 } else { 2839 return SQLITE_OK; 2840 } 2841 } 2842 2843 /* 2844 ** This routine checks if there is a RESERVED lock held on the specified 2845 ** file by this or any other process. If such a lock is held, set *pResOut 2846 ** to a non-zero value otherwise *pResOut is set to zero. The return value 2847 ** is set to SQLITE_OK unless an I/O error occurs during lock checking. 2848 */ 2849 static int afpCheckReservedLock(sqlite3_file *id, int *pResOut){ 2850 int rc = SQLITE_OK; 2851 int reserved = 0; 2852 unixFile *pFile = (unixFile*)id; 2853 afpLockingContext *context; 2854 2855 SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); 2856 2857 assert( pFile ); 2858 context = (afpLockingContext *) pFile->lockingContext; 2859 if( context->reserved ){ 2860 *pResOut = 1; 2861 return SQLITE_OK; 2862 } 2863 sqlite3_mutex_enter(pFile->pInode->pLockMutex); 2864 /* Check if a thread in this process holds such a lock */ 2865 if( pFile->pInode->eFileLock>SHARED_LOCK ){ 2866 reserved = 1; 2867 } 2868 2869 /* Otherwise see if some other process holds it. 2870 */ 2871 if( !reserved ){ 2872 /* lock the RESERVED byte */ 2873 int lrc = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1,1); 2874 if( SQLITE_OK==lrc ){ 2875 /* if we succeeded in taking the reserved lock, unlock it to restore 2876 ** the original state */ 2877 lrc = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1, 0); 2878 } else { 2879 /* if we failed to get the lock then someone else must have it */ 2880 reserved = 1; 2881 } 2882 if( IS_LOCK_ERROR(lrc) ){ 2883 rc=lrc; 2884 } 2885 } 2886 2887 sqlite3_mutex_leave(pFile->pInode->pLockMutex); 2888 OSTRACE(("TEST WR-LOCK %d %d %d (afp)\n", pFile->h, rc, reserved)); 2889 2890 *pResOut = reserved; 2891 return rc; 2892 } 2893 2894 /* 2895 ** Lock the file with the lock specified by parameter eFileLock - one 2896 ** of the following: 2897 ** 2898 ** (1) SHARED_LOCK 2899 ** (2) RESERVED_LOCK 2900 ** (3) PENDING_LOCK 2901 ** (4) EXCLUSIVE_LOCK 2902 ** 2903 ** Sometimes when requesting one lock state, additional lock states 2904 ** are inserted in between. The locking might fail on one of the later 2905 ** transitions leaving the lock state different from what it started but 2906 ** still short of its goal. The following chart shows the allowed 2907 ** transitions and the inserted intermediate states: 2908 ** 2909 ** UNLOCKED -> SHARED 2910 ** SHARED -> RESERVED 2911 ** SHARED -> (PENDING) -> EXCLUSIVE 2912 ** RESERVED -> (PENDING) -> EXCLUSIVE 2913 ** PENDING -> EXCLUSIVE 2914 ** 2915 ** This routine will only increase a lock. Use the sqlite3OsUnlock() 2916 ** routine to lower a locking level. 2917 */ 2918 static int afpLock(sqlite3_file *id, int eFileLock){ 2919 int rc = SQLITE_OK; 2920 unixFile *pFile = (unixFile*)id; 2921 unixInodeInfo *pInode = pFile->pInode; 2922 afpLockingContext *context = (afpLockingContext *) pFile->lockingContext; 2923 2924 assert( pFile ); 2925 OSTRACE(("LOCK %d %s was %s(%s,%d) pid=%d (afp)\n", pFile->h, 2926 azFileLock(eFileLock), azFileLock(pFile->eFileLock), 2927 azFileLock(pInode->eFileLock), pInode->nShared , osGetpid(0))); 2928 2929 /* If there is already a lock of this type or more restrictive on the 2930 ** unixFile, do nothing. Don't use the afp_end_lock: exit path, as 2931 ** unixEnterMutex() hasn't been called yet. 2932 */ 2933 if( pFile->eFileLock>=eFileLock ){ 2934 OSTRACE(("LOCK %d %s ok (already held) (afp)\n", pFile->h, 2935 azFileLock(eFileLock))); 2936 return SQLITE_OK; 2937 } 2938 2939 /* Make sure the locking sequence is correct 2940 ** (1) We never move from unlocked to anything higher than shared lock. 2941 ** (2) SQLite never explicitly requests a pendig lock. 2942 ** (3) A shared lock is always held when a reserve lock is requested. 2943 */ 2944 assert( pFile->eFileLock!=NO_LOCK || eFileLock==SHARED_LOCK ); 2945 assert( eFileLock!=PENDING_LOCK ); 2946 assert( eFileLock!=RESERVED_LOCK || pFile->eFileLock==SHARED_LOCK ); 2947 2948 /* This mutex is needed because pFile->pInode is shared across threads 2949 */ 2950 pInode = pFile->pInode; 2951 sqlite3_mutex_enter(pInode->pLockMutex); 2952 2953 /* If some thread using this PID has a lock via a different unixFile* 2954 ** handle that precludes the requested lock, return BUSY. 2955 */ 2956 if( (pFile->eFileLock!=pInode->eFileLock && 2957 (pInode->eFileLock>=PENDING_LOCK || eFileLock>SHARED_LOCK)) 2958 ){ 2959 rc = SQLITE_BUSY; 2960 goto afp_end_lock; 2961 } 2962 2963 /* If a SHARED lock is requested, and some thread using this PID already 2964 ** has a SHARED or RESERVED lock, then increment reference counts and 2965 ** return SQLITE_OK. 2966 */ 2967 if( eFileLock==SHARED_LOCK && 2968 (pInode->eFileLock==SHARED_LOCK || pInode->eFileLock==RESERVED_LOCK) ){ 2969 assert( eFileLock==SHARED_LOCK ); 2970 assert( pFile->eFileLock==0 ); 2971 assert( pInode->nShared>0 ); 2972 pFile->eFileLock = SHARED_LOCK; 2973 pInode->nShared++; 2974 pInode->nLock++; 2975 goto afp_end_lock; 2976 } 2977 2978 /* A PENDING lock is needed before acquiring a SHARED lock and before 2979 ** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will 2980 ** be released. 2981 */ 2982 if( eFileLock==SHARED_LOCK 2983 || (eFileLock==EXCLUSIVE_LOCK && pFile->eFileLock<PENDING_LOCK) 2984 ){ 2985 int failed; 2986 failed = afpSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 1); 2987 if (failed) { 2988 rc = failed; 2989 goto afp_end_lock; 2990 } 2991 } 2992 2993 /* If control gets to this point, then actually go ahead and make 2994 ** operating system calls for the specified lock. 2995 */ 2996 if( eFileLock==SHARED_LOCK ){ 2997 int lrc1, lrc2, lrc1Errno = 0; 2998 long lk, mask; 2999 3000 assert( pInode->nShared==0 ); 3001 assert( pInode->eFileLock==0 ); 3002 3003 mask = (sizeof(long)==8) ? LARGEST_INT64 : 0x7fffffff; 3004 /* Now get the read-lock SHARED_LOCK */ 3005 /* note that the quality of the randomness doesn't matter that much */ 3006 lk = random(); 3007 pInode->sharedByte = (lk & mask)%(SHARED_SIZE - 1); 3008 lrc1 = afpSetLock(context->dbPath, pFile, 3009 SHARED_FIRST+pInode->sharedByte, 1, 1); 3010 if( IS_LOCK_ERROR(lrc1) ){ 3011 lrc1Errno = pFile->lastErrno; 3012 } 3013 /* Drop the temporary PENDING lock */ 3014 lrc2 = afpSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 0); 3015 3016 if( IS_LOCK_ERROR(lrc1) ) { 3017 storeLastErrno(pFile, lrc1Errno); 3018 rc = lrc1; 3019 goto afp_end_lock; 3020 } else if( IS_LOCK_ERROR(lrc2) ){ 3021 rc = lrc2; 3022 goto afp_end_lock; 3023 } else if( lrc1 != SQLITE_OK ) { 3024 rc = lrc1; 3025 } else { 3026 pFile->eFileLock = SHARED_LOCK; 3027 pInode->nLock++; 3028 pInode->nShared = 1; 3029 } 3030 }else if( eFileLock==EXCLUSIVE_LOCK && pInode->nShared>1 ){ 3031 /* We are trying for an exclusive lock but another thread in this 3032 ** same process is still holding a shared lock. */ 3033 rc = SQLITE_BUSY; 3034 }else{ 3035 /* The request was for a RESERVED or EXCLUSIVE lock. It is 3036 ** assumed that there is a SHARED or greater lock on the file 3037 ** already. 3038 */ 3039 int failed = 0; 3040 assert( 0!=pFile->eFileLock ); 3041 if (eFileLock >= RESERVED_LOCK && pFile->eFileLock < RESERVED_LOCK) { 3042 /* Acquire a RESERVED lock */ 3043 failed = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1,1); 3044 if( !failed ){ 3045 context->reserved = 1; 3046 } 3047 } 3048 if (!failed && eFileLock == EXCLUSIVE_LOCK) { 3049 /* Acquire an EXCLUSIVE lock */ 3050 3051 /* Remove the shared lock before trying the range. we'll need to 3052 ** reestablish the shared lock if we can't get the afpUnlock 3053 */ 3054 if( !(failed = afpSetLock(context->dbPath, pFile, SHARED_FIRST + 3055 pInode->sharedByte, 1, 0)) ){ 3056 int failed2 = SQLITE_OK; 3057 /* now attemmpt to get the exclusive lock range */ 3058 failed = afpSetLock(context->dbPath, pFile, SHARED_FIRST, 3059 SHARED_SIZE, 1); 3060 if( failed && (failed2 = afpSetLock(context->dbPath, pFile, 3061 SHARED_FIRST + pInode->sharedByte, 1, 1)) ){ 3062 /* Can't reestablish the shared lock. Sqlite can't deal, this is 3063 ** a critical I/O error 3064 */ 3065 rc = ((failed & 0xff) == SQLITE_IOERR) ? failed2 : 3066 SQLITE_IOERR_LOCK; 3067 goto afp_end_lock; 3068 } 3069 }else{ 3070 rc = failed; 3071 } 3072 } 3073 if( failed ){ 3074 rc = failed; 3075 } 3076 } 3077 3078 if( rc==SQLITE_OK ){ 3079 pFile->eFileLock = eFileLock; 3080 pInode->eFileLock = eFileLock; 3081 }else if( eFileLock==EXCLUSIVE_LOCK ){ 3082 pFile->eFileLock = PENDING_LOCK; 3083 pInode->eFileLock = PENDING_LOCK; 3084 } 3085 3086 afp_end_lock: 3087 sqlite3_mutex_leave(pInode->pLockMutex); 3088 OSTRACE(("LOCK %d %s %s (afp)\n", pFile->h, azFileLock(eFileLock), 3089 rc==SQLITE_OK ? "ok" : "failed")); 3090 return rc; 3091 } 3092 3093 /* 3094 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock 3095 ** must be either NO_LOCK or SHARED_LOCK. 3096 ** 3097 ** If the locking level of the file descriptor is already at or below 3098 ** the requested locking level, this routine is a no-op. 3099 */ 3100 static int afpUnlock(sqlite3_file *id, int eFileLock) { 3101 int rc = SQLITE_OK; 3102 unixFile *pFile = (unixFile*)id; 3103 unixInodeInfo *pInode; 3104 afpLockingContext *context = (afpLockingContext *) pFile->lockingContext; 3105 int skipShared = 0; 3106 #ifdef SQLITE_TEST 3107 int h = pFile->h; 3108 #endif 3109 3110 assert( pFile ); 3111 OSTRACE(("UNLOCK %d %d was %d(%d,%d) pid=%d (afp)\n", pFile->h, eFileLock, 3112 pFile->eFileLock, pFile->pInode->eFileLock, pFile->pInode->nShared, 3113 osGetpid(0))); 3114 3115 assert( eFileLock<=SHARED_LOCK ); 3116 if( pFile->eFileLock<=eFileLock ){ 3117 return SQLITE_OK; 3118 } 3119 pInode = pFile->pInode; 3120 sqlite3_mutex_enter(pInode->pLockMutex); 3121 assert( pInode->nShared!=0 ); 3122 if( pFile->eFileLock>SHARED_LOCK ){ 3123 assert( pInode->eFileLock==pFile->eFileLock ); 3124 SimulateIOErrorBenign(1); 3125 SimulateIOError( h=(-1) ) 3126 SimulateIOErrorBenign(0); 3127 3128 #ifdef SQLITE_DEBUG 3129 /* When reducing a lock such that other processes can start 3130 ** reading the database file again, make sure that the 3131 ** transaction counter was updated if any part of the database 3132 ** file changed. If the transaction counter is not updated, 3133 ** other connections to the same file might not realize that 3134 ** the file has changed and hence might not know to flush their 3135 ** cache. The use of a stale cache can lead to database corruption. 3136 */ 3137 assert( pFile->inNormalWrite==0 3138 || pFile->dbUpdate==0 3139 || pFile->transCntrChng==1 ); 3140 pFile->inNormalWrite = 0; 3141 #endif 3142 3143 if( pFile->eFileLock==EXCLUSIVE_LOCK ){ 3144 rc = afpSetLock(context->dbPath, pFile, SHARED_FIRST, SHARED_SIZE, 0); 3145 if( rc==SQLITE_OK && (eFileLock==SHARED_LOCK || pInode->nShared>1) ){ 3146 /* only re-establish the shared lock if necessary */ 3147 int sharedLockByte = SHARED_FIRST+pInode->sharedByte; 3148 rc = afpSetLock(context->dbPath, pFile, sharedLockByte, 1, 1); 3149 } else { 3150 skipShared = 1; 3151 } 3152 } 3153 if( rc==SQLITE_OK && pFile->eFileLock>=PENDING_LOCK ){ 3154 rc = afpSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 0); 3155 } 3156 if( rc==SQLITE_OK && pFile->eFileLock>=RESERVED_LOCK && context->reserved ){ 3157 rc = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1, 0); 3158 if( !rc ){ 3159 context->reserved = 0; 3160 } 3161 } 3162 if( rc==SQLITE_OK && (eFileLock==SHARED_LOCK || pInode->nShared>1)){ 3163 pInode->eFileLock = SHARED_LOCK; 3164 } 3165 } 3166 if( rc==SQLITE_OK && eFileLock==NO_LOCK ){ 3167 3168 /* Decrement the shared lock counter. Release the lock using an 3169 ** OS call only when all threads in this same process have released 3170 ** the lock. 3171 */ 3172 unsigned long long sharedLockByte = SHARED_FIRST+pInode->sharedByte; 3173 pInode->nShared--; 3174 if( pInode->nShared==0 ){ 3175 SimulateIOErrorBenign(1); 3176 SimulateIOError( h=(-1) ) 3177 SimulateIOErrorBenign(0); 3178 if( !skipShared ){ 3179 rc = afpSetLock(context->dbPath, pFile, sharedLockByte, 1, 0); 3180 } 3181 if( !rc ){ 3182 pInode->eFileLock = NO_LOCK; 3183 pFile->eFileLock = NO_LOCK; 3184 } 3185 } 3186 if( rc==SQLITE_OK ){ 3187 pInode->nLock--; 3188 assert( pInode->nLock>=0 ); 3189 if( pInode->nLock==0 ) closePendingFds(pFile); 3190 } 3191 } 3192 3193 sqlite3_mutex_leave(pInode->pLockMutex); 3194 if( rc==SQLITE_OK ){ 3195 pFile->eFileLock = eFileLock; 3196 } 3197 return rc; 3198 } 3199 3200 /* 3201 ** Close a file & cleanup AFP specific locking context 3202 */ 3203 static int afpClose(sqlite3_file *id) { 3204 int rc = SQLITE_OK; 3205 unixFile *pFile = (unixFile*)id; 3206 assert( id!=0 ); 3207 afpUnlock(id, NO_LOCK); 3208 assert( unixFileMutexNotheld(pFile) ); 3209 unixEnterMutex(); 3210 if( pFile->pInode ){ 3211 unixInodeInfo *pInode = pFile->pInode; 3212 sqlite3_mutex_enter(pInode->pLockMutex); 3213 if( pInode->nLock ){ 3214 /* If there are outstanding locks, do not actually close the file just 3215 ** yet because that would clear those locks. Instead, add the file 3216 ** descriptor to pInode->aPending. It will be automatically closed when 3217 ** the last lock is cleared. 3218 */ 3219 setPendingFd(pFile); 3220 } 3221 sqlite3_mutex_leave(pInode->pLockMutex); 3222 } 3223 releaseInodeInfo(pFile); 3224 sqlite3_free(pFile->lockingContext); 3225 rc = closeUnixFile(id); 3226 unixLeaveMutex(); 3227 return rc; 3228 } 3229 3230 #endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ 3231 /* 3232 ** The code above is the AFP lock implementation. The code is specific 3233 ** to MacOSX and does not work on other unix platforms. No alternative 3234 ** is available. If you don't compile for a mac, then the "unix-afp" 3235 ** VFS is not available. 3236 ** 3237 ********************* End of the AFP lock implementation ********************** 3238 ******************************************************************************/ 3239 3240 /****************************************************************************** 3241 *************************** Begin NFS Locking ********************************/ 3242 3243 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 3244 /* 3245 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock 3246 ** must be either NO_LOCK or SHARED_LOCK. 3247 ** 3248 ** If the locking level of the file descriptor is already at or below 3249 ** the requested locking level, this routine is a no-op. 3250 */ 3251 static int nfsUnlock(sqlite3_file *id, int eFileLock){ 3252 return posixUnlock(id, eFileLock, 1); 3253 } 3254 3255 #endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ 3256 /* 3257 ** The code above is the NFS lock implementation. The code is specific 3258 ** to MacOSX and does not work on other unix platforms. No alternative 3259 ** is available. 3260 ** 3261 ********************* End of the NFS lock implementation ********************** 3262 ******************************************************************************/ 3263 3264 /****************************************************************************** 3265 **************** Non-locking sqlite3_file methods ***************************** 3266 ** 3267 ** The next division contains implementations for all methods of the 3268 ** sqlite3_file object other than the locking methods. The locking 3269 ** methods were defined in divisions above (one locking method per 3270 ** division). Those methods that are common to all locking modes 3271 ** are gather together into this division. 3272 */ 3273 3274 /* 3275 ** Seek to the offset passed as the second argument, then read cnt 3276 ** bytes into pBuf. Return the number of bytes actually read. 3277 ** 3278 ** NB: If you define USE_PREAD or USE_PREAD64, then it might also 3279 ** be necessary to define _XOPEN_SOURCE to be 500. This varies from 3280 ** one system to another. Since SQLite does not define USE_PREAD 3281 ** in any form by default, we will not attempt to define _XOPEN_SOURCE. 3282 ** See tickets #2741 and #2681. 3283 ** 3284 ** To avoid stomping the errno value on a failed read the lastErrno value 3285 ** is set before returning. 3286 */ 3287 static int seekAndRead(unixFile *id, sqlite3_int64 offset, void *pBuf, int cnt){ 3288 int got; 3289 int prior = 0; 3290 #if (!defined(USE_PREAD) && !defined(USE_PREAD64)) 3291 i64 newOffset; 3292 #endif 3293 TIMER_START; 3294 assert( cnt==(cnt&0x1ffff) ); 3295 assert( id->h>2 ); 3296 do{ 3297 #if defined(USE_PREAD) 3298 got = osPread(id->h, pBuf, cnt, offset); 3299 SimulateIOError( got = -1 ); 3300 #elif defined(USE_PREAD64) 3301 got = osPread64(id->h, pBuf, cnt, offset); 3302 SimulateIOError( got = -1 ); 3303 #else 3304 newOffset = lseek(id->h, offset, SEEK_SET); 3305 SimulateIOError( newOffset = -1 ); 3306 if( newOffset<0 ){ 3307 storeLastErrno((unixFile*)id, errno); 3308 return -1; 3309 } 3310 got = osRead(id->h, pBuf, cnt); 3311 #endif 3312 if( got==cnt ) break; 3313 if( got<0 ){ 3314 if( errno==EINTR ){ got = 1; continue; } 3315 prior = 0; 3316 storeLastErrno((unixFile*)id, errno); 3317 break; 3318 }else if( got>0 ){ 3319 cnt -= got; 3320 offset += got; 3321 prior += got; 3322 pBuf = (void*)(got + (char*)pBuf); 3323 } 3324 }while( got>0 ); 3325 TIMER_END; 3326 OSTRACE(("READ %-3d %5d %7lld %llu\n", 3327 id->h, got+prior, offset-prior, TIMER_ELAPSED)); 3328 return got+prior; 3329 } 3330 3331 /* 3332 ** Read data from a file into a buffer. Return SQLITE_OK if all 3333 ** bytes were read successfully and SQLITE_IOERR if anything goes 3334 ** wrong. 3335 */ 3336 static int unixRead( 3337 sqlite3_file *id, 3338 void *pBuf, 3339 int amt, 3340 sqlite3_int64 offset 3341 ){ 3342 unixFile *pFile = (unixFile *)id; 3343 int got; 3344 assert( id ); 3345 assert( offset>=0 ); 3346 assert( amt>0 ); 3347 3348 /* If this is a database file (not a journal, super-journal or temp 3349 ** file), the bytes in the locking range should never be read or written. */ 3350 #if 0 3351 assert( pFile->pPreallocatedUnused==0 3352 || offset>=PENDING_BYTE+512 3353 || offset+amt<=PENDING_BYTE 3354 ); 3355 #endif 3356 3357 #if SQLITE_MAX_MMAP_SIZE>0 3358 /* Deal with as much of this read request as possible by transfering 3359 ** data from the memory mapping using memcpy(). */ 3360 if( offset<pFile->mmapSize ){ 3361 if( offset+amt <= pFile->mmapSize ){ 3362 memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], amt); 3363 return SQLITE_OK; 3364 }else{ 3365 int nCopy = pFile->mmapSize - offset; 3366 memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], nCopy); 3367 pBuf = &((u8 *)pBuf)[nCopy]; 3368 amt -= nCopy; 3369 offset += nCopy; 3370 } 3371 } 3372 #endif 3373 3374 got = seekAndRead(pFile, offset, pBuf, amt); 3375 if( got==amt ){ 3376 return SQLITE_OK; 3377 }else if( got<0 ){ 3378 /* pFile->lastErrno has been set by seekAndRead(). 3379 ** Usually we return SQLITE_IOERR_READ here, though for some 3380 ** kinds of errors we return SQLITE_IOERR_CORRUPTFS. The 3381 ** SQLITE_IOERR_CORRUPTFS will be converted into SQLITE_CORRUPT 3382 ** prior to returning to the application by the sqlite3ApiExit() 3383 ** routine. 3384 */ 3385 switch( pFile->lastErrno ){ 3386 case ERANGE: 3387 case EIO: 3388 #ifdef ENXIO 3389 case ENXIO: 3390 #endif 3391 #ifdef EDEVERR 3392 case EDEVERR: 3393 #endif 3394 return SQLITE_IOERR_CORRUPTFS; 3395 } 3396 return SQLITE_IOERR_READ; 3397 }else{ 3398 storeLastErrno(pFile, 0); /* not a system error */ 3399 /* Unread parts of the buffer must be zero-filled */ 3400 memset(&((char*)pBuf)[got], 0, amt-got); 3401 return SQLITE_IOERR_SHORT_READ; 3402 } 3403 } 3404 3405 /* 3406 ** Attempt to seek the file-descriptor passed as the first argument to 3407 ** absolute offset iOff, then attempt to write nBuf bytes of data from 3408 ** pBuf to it. If an error occurs, return -1 and set *piErrno. Otherwise, 3409 ** return the actual number of bytes written (which may be less than 3410 ** nBuf). 3411 */ 3412 static int seekAndWriteFd( 3413 int fd, /* File descriptor to write to */ 3414 i64 iOff, /* File offset to begin writing at */ 3415 const void *pBuf, /* Copy data from this buffer to the file */ 3416 int nBuf, /* Size of buffer pBuf in bytes */ 3417 int *piErrno /* OUT: Error number if error occurs */ 3418 ){ 3419 int rc = 0; /* Value returned by system call */ 3420 3421 assert( nBuf==(nBuf&0x1ffff) ); 3422 assert( fd>2 ); 3423 assert( piErrno!=0 ); 3424 nBuf &= 0x1ffff; 3425 TIMER_START; 3426 3427 #if defined(USE_PREAD) 3428 do{ rc = (int)osPwrite(fd, pBuf, nBuf, iOff); }while( rc<0 && errno==EINTR ); 3429 #elif defined(USE_PREAD64) 3430 do{ rc = (int)osPwrite64(fd, pBuf, nBuf, iOff);}while( rc<0 && errno==EINTR); 3431 #else 3432 do{ 3433 i64 iSeek = lseek(fd, iOff, SEEK_SET); 3434 SimulateIOError( iSeek = -1 ); 3435 if( iSeek<0 ){ 3436 rc = -1; 3437 break; 3438 } 3439 rc = osWrite(fd, pBuf, nBuf); 3440 }while( rc<0 && errno==EINTR ); 3441 #endif 3442 3443 TIMER_END; 3444 OSTRACE(("WRITE %-3d %5d %7lld %llu\n", fd, rc, iOff, TIMER_ELAPSED)); 3445 3446 if( rc<0 ) *piErrno = errno; 3447 return rc; 3448 } 3449 3450 3451 /* 3452 ** Seek to the offset in id->offset then read cnt bytes into pBuf. 3453 ** Return the number of bytes actually read. Update the offset. 3454 ** 3455 ** To avoid stomping the errno value on a failed write the lastErrno value 3456 ** is set before returning. 3457 */ 3458 static int seekAndWrite(unixFile *id, i64 offset, const void *pBuf, int cnt){ 3459 return seekAndWriteFd(id->h, offset, pBuf, cnt, &id->lastErrno); 3460 } 3461 3462 3463 /* 3464 ** Write data from a buffer into a file. Return SQLITE_OK on success 3465 ** or some other error code on failure. 3466 */ 3467 static int unixWrite( 3468 sqlite3_file *id, 3469 const void *pBuf, 3470 int amt, 3471 sqlite3_int64 offset 3472 ){ 3473 unixFile *pFile = (unixFile*)id; 3474 int wrote = 0; 3475 assert( id ); 3476 assert( amt>0 ); 3477 3478 /* If this is a database file (not a journal, super-journal or temp 3479 ** file), the bytes in the locking range should never be read or written. */ 3480 #if 0 3481 assert( pFile->pPreallocatedUnused==0 3482 || offset>=PENDING_BYTE+512 3483 || offset+amt<=PENDING_BYTE 3484 ); 3485 #endif 3486 3487 #ifdef SQLITE_DEBUG 3488 /* If we are doing a normal write to a database file (as opposed to 3489 ** doing a hot-journal rollback or a write to some file other than a 3490 ** normal database file) then record the fact that the database 3491 ** has changed. If the transaction counter is modified, record that 3492 ** fact too. 3493 */ 3494 if( pFile->inNormalWrite ){ 3495 pFile->dbUpdate = 1; /* The database has been modified */ 3496 if( offset<=24 && offset+amt>=27 ){ 3497 int rc; 3498 char oldCntr[4]; 3499 SimulateIOErrorBenign(1); 3500 rc = seekAndRead(pFile, 24, oldCntr, 4); 3501 SimulateIOErrorBenign(0); 3502 if( rc!=4 || memcmp(oldCntr, &((char*)pBuf)[24-offset], 4)!=0 ){ 3503 pFile->transCntrChng = 1; /* The transaction counter has changed */ 3504 } 3505 } 3506 } 3507 #endif 3508 3509 #if defined(SQLITE_MMAP_READWRITE) && SQLITE_MAX_MMAP_SIZE>0 3510 /* Deal with as much of this write request as possible by transfering 3511 ** data from the memory mapping using memcpy(). */ 3512 if( offset<pFile->mmapSize ){ 3513 if( offset+amt <= pFile->mmapSize ){ 3514 memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, amt); 3515 return SQLITE_OK; 3516 }else{ 3517 int nCopy = pFile->mmapSize - offset; 3518 memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, nCopy); 3519 pBuf = &((u8 *)pBuf)[nCopy]; 3520 amt -= nCopy; 3521 offset += nCopy; 3522 } 3523 } 3524 #endif 3525 3526 while( (wrote = seekAndWrite(pFile, offset, pBuf, amt))<amt && wrote>0 ){ 3527 amt -= wrote; 3528 offset += wrote; 3529 pBuf = &((char*)pBuf)[wrote]; 3530 } 3531 SimulateIOError(( wrote=(-1), amt=1 )); 3532 SimulateDiskfullError(( wrote=0, amt=1 )); 3533 3534 if( amt>wrote ){ 3535 if( wrote<0 && pFile->lastErrno!=ENOSPC ){ 3536 /* lastErrno set by seekAndWrite */ 3537 return SQLITE_IOERR_WRITE; 3538 }else{ 3539 storeLastErrno(pFile, 0); /* not a system error */ 3540 return SQLITE_FULL; 3541 } 3542 } 3543 3544 return SQLITE_OK; 3545 } 3546 3547 #ifdef SQLITE_TEST 3548 /* 3549 ** Count the number of fullsyncs and normal syncs. This is used to test 3550 ** that syncs and fullsyncs are occurring at the right times. 3551 */ 3552 int sqlite3_sync_count = 0; 3553 int sqlite3_fullsync_count = 0; 3554 #endif 3555 3556 /* 3557 ** We do not trust systems to provide a working fdatasync(). Some do. 3558 ** Others do no. To be safe, we will stick with the (slightly slower) 3559 ** fsync(). If you know that your system does support fdatasync() correctly, 3560 ** then simply compile with -Dfdatasync=fdatasync or -DHAVE_FDATASYNC 3561 */ 3562 #if !defined(fdatasync) && !HAVE_FDATASYNC 3563 # define fdatasync fsync 3564 #endif 3565 3566 /* 3567 ** Define HAVE_FULLFSYNC to 0 or 1 depending on whether or not 3568 ** the F_FULLFSYNC macro is defined. F_FULLFSYNC is currently 3569 ** only available on Mac OS X. But that could change. 3570 */ 3571 #ifdef F_FULLFSYNC 3572 # define HAVE_FULLFSYNC 1 3573 #else 3574 # define HAVE_FULLFSYNC 0 3575 #endif 3576 3577 3578 /* 3579 ** The fsync() system call does not work as advertised on many 3580 ** unix systems. The following procedure is an attempt to make 3581 ** it work better. 3582 ** 3583 ** The SQLITE_NO_SYNC macro disables all fsync()s. This is useful 3584 ** for testing when we want to run through the test suite quickly. 3585 ** You are strongly advised *not* to deploy with SQLITE_NO_SYNC 3586 ** enabled, however, since with SQLITE_NO_SYNC enabled, an OS crash 3587 ** or power failure will likely corrupt the database file. 3588 ** 3589 ** SQLite sets the dataOnly flag if the size of the file is unchanged. 3590 ** The idea behind dataOnly is that it should only write the file content 3591 ** to disk, not the inode. We only set dataOnly if the file size is 3592 ** unchanged since the file size is part of the inode. However, 3593 ** Ted Ts'o tells us that fdatasync() will also write the inode if the 3594 ** file size has changed. The only real difference between fdatasync() 3595 ** and fsync(), Ted tells us, is that fdatasync() will not flush the 3596 ** inode if the mtime or owner or other inode attributes have changed. 3597 ** We only care about the file size, not the other file attributes, so 3598 ** as far as SQLite is concerned, an fdatasync() is always adequate. 3599 ** So, we always use fdatasync() if it is available, regardless of 3600 ** the value of the dataOnly flag. 3601 */ 3602 static int full_fsync(int fd, int fullSync, int dataOnly){ 3603 int rc; 3604 3605 /* The following "ifdef/elif/else/" block has the same structure as 3606 ** the one below. It is replicated here solely to avoid cluttering 3607 ** up the real code with the UNUSED_PARAMETER() macros. 3608 */ 3609 #ifdef SQLITE_NO_SYNC 3610 UNUSED_PARAMETER(fd); 3611 UNUSED_PARAMETER(fullSync); 3612 UNUSED_PARAMETER(dataOnly); 3613 #elif HAVE_FULLFSYNC 3614 UNUSED_PARAMETER(dataOnly); 3615 #else 3616 UNUSED_PARAMETER(fullSync); 3617 UNUSED_PARAMETER(dataOnly); 3618 #endif 3619 3620 /* Record the number of times that we do a normal fsync() and 3621 ** FULLSYNC. This is used during testing to verify that this procedure 3622 ** gets called with the correct arguments. 3623 */ 3624 #ifdef SQLITE_TEST 3625 if( fullSync ) sqlite3_fullsync_count++; 3626 sqlite3_sync_count++; 3627 #endif 3628 3629 /* If we compiled with the SQLITE_NO_SYNC flag, then syncing is a 3630 ** no-op. But go ahead and call fstat() to validate the file 3631 ** descriptor as we need a method to provoke a failure during 3632 ** coverate testing. 3633 */ 3634 #ifdef SQLITE_NO_SYNC 3635 { 3636 struct stat buf; 3637 rc = osFstat(fd, &buf); 3638 } 3639 #elif HAVE_FULLFSYNC 3640 if( fullSync ){ 3641 rc = osFcntl(fd, F_FULLFSYNC, 0); 3642 }else{ 3643 rc = 1; 3644 } 3645 /* If the FULLFSYNC failed, fall back to attempting an fsync(). 3646 ** It shouldn't be possible for fullfsync to fail on the local 3647 ** file system (on OSX), so failure indicates that FULLFSYNC 3648 ** isn't supported for this file system. So, attempt an fsync 3649 ** and (for now) ignore the overhead of a superfluous fcntl call. 3650 ** It'd be better to detect fullfsync support once and avoid 3651 ** the fcntl call every time sync is called. 3652 */ 3653 if( rc ) rc = fsync(fd); 3654 3655 #elif defined(__APPLE__) 3656 /* fdatasync() on HFS+ doesn't yet flush the file size if it changed correctly 3657 ** so currently we default to the macro that redefines fdatasync to fsync 3658 */ 3659 rc = fsync(fd); 3660 #else 3661 rc = fdatasync(fd); 3662 #if OS_VXWORKS 3663 if( rc==-1 && errno==ENOTSUP ){ 3664 rc = fsync(fd); 3665 } 3666 #endif /* OS_VXWORKS */ 3667 #endif /* ifdef SQLITE_NO_SYNC elif HAVE_FULLFSYNC */ 3668 3669 if( OS_VXWORKS && rc!= -1 ){ 3670 rc = 0; 3671 } 3672 return rc; 3673 } 3674 3675 /* 3676 ** Open a file descriptor to the directory containing file zFilename. 3677 ** If successful, *pFd is set to the opened file descriptor and 3678 ** SQLITE_OK is returned. If an error occurs, either SQLITE_NOMEM 3679 ** or SQLITE_CANTOPEN is returned and *pFd is set to an undefined 3680 ** value. 3681 ** 3682 ** The directory file descriptor is used for only one thing - to 3683 ** fsync() a directory to make sure file creation and deletion events 3684 ** are flushed to disk. Such fsyncs are not needed on newer 3685 ** journaling filesystems, but are required on older filesystems. 3686 ** 3687 ** This routine can be overridden using the xSetSysCall interface. 3688 ** The ability to override this routine was added in support of the 3689 ** chromium sandbox. Opening a directory is a security risk (we are 3690 ** told) so making it overrideable allows the chromium sandbox to 3691 ** replace this routine with a harmless no-op. To make this routine 3692 ** a no-op, replace it with a stub that returns SQLITE_OK but leaves 3693 ** *pFd set to a negative number. 3694 ** 3695 ** If SQLITE_OK is returned, the caller is responsible for closing 3696 ** the file descriptor *pFd using close(). 3697 */ 3698 static int openDirectory(const char *zFilename, int *pFd){ 3699 int ii; 3700 int fd = -1; 3701 char zDirname[MAX_PATHNAME+1]; 3702 3703 sqlite3_snprintf(MAX_PATHNAME, zDirname, "%s", zFilename); 3704 for(ii=(int)strlen(zDirname); ii>0 && zDirname[ii]!='/'; ii--); 3705 if( ii>0 ){ 3706 zDirname[ii] = '\0'; 3707 }else{ 3708 if( zDirname[0]!='/' ) zDirname[0] = '.'; 3709 zDirname[1] = 0; 3710 } 3711 fd = robust_open(zDirname, O_RDONLY|O_BINARY, 0); 3712 if( fd>=0 ){ 3713 OSTRACE(("OPENDIR %-3d %s\n", fd, zDirname)); 3714 } 3715 *pFd = fd; 3716 if( fd>=0 ) return SQLITE_OK; 3717 return unixLogError(SQLITE_CANTOPEN_BKPT, "openDirectory", zDirname); 3718 } 3719 3720 /* 3721 ** Make sure all writes to a particular file are committed to disk. 3722 ** 3723 ** If dataOnly==0 then both the file itself and its metadata (file 3724 ** size, access time, etc) are synced. If dataOnly!=0 then only the 3725 ** file data is synced. 3726 ** 3727 ** Under Unix, also make sure that the directory entry for the file 3728 ** has been created by fsync-ing the directory that contains the file. 3729 ** If we do not do this and we encounter a power failure, the directory 3730 ** entry for the journal might not exist after we reboot. The next 3731 ** SQLite to access the file will not know that the journal exists (because 3732 ** the directory entry for the journal was never created) and the transaction 3733 ** will not roll back - possibly leading to database corruption. 3734 */ 3735 static int unixSync(sqlite3_file *id, int flags){ 3736 int rc; 3737 unixFile *pFile = (unixFile*)id; 3738 3739 int isDataOnly = (flags&SQLITE_SYNC_DATAONLY); 3740 int isFullsync = (flags&0x0F)==SQLITE_SYNC_FULL; 3741 3742 /* Check that one of SQLITE_SYNC_NORMAL or FULL was passed */ 3743 assert((flags&0x0F)==SQLITE_SYNC_NORMAL 3744 || (flags&0x0F)==SQLITE_SYNC_FULL 3745 ); 3746 3747 /* Unix cannot, but some systems may return SQLITE_FULL from here. This 3748 ** line is to test that doing so does not cause any problems. 3749 */ 3750 SimulateDiskfullError( return SQLITE_FULL ); 3751 3752 assert( pFile ); 3753 OSTRACE(("SYNC %-3d\n", pFile->h)); 3754 rc = full_fsync(pFile->h, isFullsync, isDataOnly); 3755 SimulateIOError( rc=1 ); 3756 if( rc ){ 3757 storeLastErrno(pFile, errno); 3758 return unixLogError(SQLITE_IOERR_FSYNC, "full_fsync", pFile->zPath); 3759 } 3760 3761 /* Also fsync the directory containing the file if the DIRSYNC flag 3762 ** is set. This is a one-time occurrence. Many systems (examples: AIX) 3763 ** are unable to fsync a directory, so ignore errors on the fsync. 3764 */ 3765 if( pFile->ctrlFlags & UNIXFILE_DIRSYNC ){ 3766 int dirfd; 3767 OSTRACE(("DIRSYNC %s (have_fullfsync=%d fullsync=%d)\n", pFile->zPath, 3768 HAVE_FULLFSYNC, isFullsync)); 3769 rc = osOpenDirectory(pFile->zPath, &dirfd); 3770 if( rc==SQLITE_OK ){ 3771 full_fsync(dirfd, 0, 0); 3772 robust_close(pFile, dirfd, __LINE__); 3773 }else{ 3774 assert( rc==SQLITE_CANTOPEN ); 3775 rc = SQLITE_OK; 3776 } 3777 pFile->ctrlFlags &= ~UNIXFILE_DIRSYNC; 3778 } 3779 return rc; 3780 } 3781 3782 /* 3783 ** Truncate an open file to a specified size 3784 */ 3785 static int unixTruncate(sqlite3_file *id, i64 nByte){ 3786 unixFile *pFile = (unixFile *)id; 3787 int rc; 3788 assert( pFile ); 3789 SimulateIOError( return SQLITE_IOERR_TRUNCATE ); 3790 3791 /* If the user has configured a chunk-size for this file, truncate the 3792 ** file so that it consists of an integer number of chunks (i.e. the 3793 ** actual file size after the operation may be larger than the requested 3794 ** size). 3795 */ 3796 if( pFile->szChunk>0 ){ 3797 nByte = ((nByte + pFile->szChunk - 1)/pFile->szChunk) * pFile->szChunk; 3798 } 3799 3800 rc = robust_ftruncate(pFile->h, nByte); 3801 if( rc ){ 3802 storeLastErrno(pFile, errno); 3803 return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate", pFile->zPath); 3804 }else{ 3805 #ifdef SQLITE_DEBUG 3806 /* If we are doing a normal write to a database file (as opposed to 3807 ** doing a hot-journal rollback or a write to some file other than a 3808 ** normal database file) and we truncate the file to zero length, 3809 ** that effectively updates the change counter. This might happen 3810 ** when restoring a database using the backup API from a zero-length 3811 ** source. 3812 */ 3813 if( pFile->inNormalWrite && nByte==0 ){ 3814 pFile->transCntrChng = 1; 3815 } 3816 #endif 3817 3818 #if SQLITE_MAX_MMAP_SIZE>0 3819 /* If the file was just truncated to a size smaller than the currently 3820 ** mapped region, reduce the effective mapping size as well. SQLite will 3821 ** use read() and write() to access data beyond this point from now on. 3822 */ 3823 if( nByte<pFile->mmapSize ){ 3824 pFile->mmapSize = nByte; 3825 } 3826 #endif 3827 3828 return SQLITE_OK; 3829 } 3830 } 3831 3832 /* 3833 ** Determine the current size of a file in bytes 3834 */ 3835 static int unixFileSize(sqlite3_file *id, i64 *pSize){ 3836 int rc; 3837 struct stat buf; 3838 assert( id ); 3839 rc = osFstat(((unixFile*)id)->h, &buf); 3840 SimulateIOError( rc=1 ); 3841 if( rc!=0 ){ 3842 storeLastErrno((unixFile*)id, errno); 3843 return SQLITE_IOERR_FSTAT; 3844 } 3845 *pSize = buf.st_size; 3846 3847 /* When opening a zero-size database, the findInodeInfo() procedure 3848 ** writes a single byte into that file in order to work around a bug 3849 ** in the OS-X msdos filesystem. In order to avoid problems with upper 3850 ** layers, we need to report this file size as zero even though it is 3851 ** really 1. Ticket #3260. 3852 */ 3853 if( *pSize==1 ) *pSize = 0; 3854 3855 3856 return SQLITE_OK; 3857 } 3858 3859 #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) 3860 /* 3861 ** Handler for proxy-locking file-control verbs. Defined below in the 3862 ** proxying locking division. 3863 */ 3864 static int proxyFileControl(sqlite3_file*,int,void*); 3865 #endif 3866 3867 /* 3868 ** This function is called to handle the SQLITE_FCNTL_SIZE_HINT 3869 ** file-control operation. Enlarge the database to nBytes in size 3870 ** (rounded up to the next chunk-size). If the database is already 3871 ** nBytes or larger, this routine is a no-op. 3872 */ 3873 static int fcntlSizeHint(unixFile *pFile, i64 nByte){ 3874 if( pFile->szChunk>0 ){ 3875 i64 nSize; /* Required file size */ 3876 struct stat buf; /* Used to hold return values of fstat() */ 3877 3878 if( osFstat(pFile->h, &buf) ){ 3879 return SQLITE_IOERR_FSTAT; 3880 } 3881 3882 nSize = ((nByte+pFile->szChunk-1) / pFile->szChunk) * pFile->szChunk; 3883 if( nSize>(i64)buf.st_size ){ 3884 3885 #if defined(HAVE_POSIX_FALLOCATE) && HAVE_POSIX_FALLOCATE 3886 /* The code below is handling the return value of osFallocate() 3887 ** correctly. posix_fallocate() is defined to "returns zero on success, 3888 ** or an error number on failure". See the manpage for details. */ 3889 int err; 3890 do{ 3891 err = osFallocate(pFile->h, buf.st_size, nSize-buf.st_size); 3892 }while( err==EINTR ); 3893 if( err && err!=EINVAL ) return SQLITE_IOERR_WRITE; 3894 #else 3895 /* If the OS does not have posix_fallocate(), fake it. Write a 3896 ** single byte to the last byte in each block that falls entirely 3897 ** within the extended region. Then, if required, a single byte 3898 ** at offset (nSize-1), to set the size of the file correctly. 3899 ** This is a similar technique to that used by glibc on systems 3900 ** that do not have a real fallocate() call. 3901 */ 3902 int nBlk = buf.st_blksize; /* File-system block size */ 3903 int nWrite = 0; /* Number of bytes written by seekAndWrite */ 3904 i64 iWrite; /* Next offset to write to */ 3905 3906 iWrite = (buf.st_size/nBlk)*nBlk + nBlk - 1; 3907 assert( iWrite>=buf.st_size ); 3908 assert( ((iWrite+1)%nBlk)==0 ); 3909 for(/*no-op*/; iWrite<nSize+nBlk-1; iWrite+=nBlk ){ 3910 if( iWrite>=nSize ) iWrite = nSize - 1; 3911 nWrite = seekAndWrite(pFile, iWrite, "", 1); 3912 if( nWrite!=1 ) return SQLITE_IOERR_WRITE; 3913 } 3914 #endif 3915 } 3916 } 3917 3918 #if SQLITE_MAX_MMAP_SIZE>0 3919 if( pFile->mmapSizeMax>0 && nByte>pFile->mmapSize ){ 3920 int rc; 3921 if( pFile->szChunk<=0 ){ 3922 if( robust_ftruncate(pFile->h, nByte) ){ 3923 storeLastErrno(pFile, errno); 3924 return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate", pFile->zPath); 3925 } 3926 } 3927 3928 rc = unixMapfile(pFile, nByte); 3929 return rc; 3930 } 3931 #endif 3932 3933 return SQLITE_OK; 3934 } 3935 3936 /* 3937 ** If *pArg is initially negative then this is a query. Set *pArg to 3938 ** 1 or 0 depending on whether or not bit mask of pFile->ctrlFlags is set. 3939 ** 3940 ** If *pArg is 0 or 1, then clear or set the mask bit of pFile->ctrlFlags. 3941 */ 3942 static void unixModeBit(unixFile *pFile, unsigned char mask, int *pArg){ 3943 if( *pArg<0 ){ 3944 *pArg = (pFile->ctrlFlags & mask)!=0; 3945 }else if( (*pArg)==0 ){ 3946 pFile->ctrlFlags &= ~mask; 3947 }else{ 3948 pFile->ctrlFlags |= mask; 3949 } 3950 } 3951 3952 /* Forward declaration */ 3953 static int unixGetTempname(int nBuf, char *zBuf); 3954 #ifndef SQLITE_OMIT_WAL 3955 static int unixFcntlExternalReader(unixFile*, int*); 3956 #endif 3957 3958 /* 3959 ** Information and control of an open file handle. 3960 */ 3961 static int unixFileControl(sqlite3_file *id, int op, void *pArg){ 3962 unixFile *pFile = (unixFile*)id; 3963 switch( op ){ 3964 #if defined(__linux__) && defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE) 3965 case SQLITE_FCNTL_BEGIN_ATOMIC_WRITE: { 3966 int rc = osIoctl(pFile->h, F2FS_IOC_START_ATOMIC_WRITE); 3967 return rc ? SQLITE_IOERR_BEGIN_ATOMIC : SQLITE_OK; 3968 } 3969 case SQLITE_FCNTL_COMMIT_ATOMIC_WRITE: { 3970 int rc = osIoctl(pFile->h, F2FS_IOC_COMMIT_ATOMIC_WRITE); 3971 return rc ? SQLITE_IOERR_COMMIT_ATOMIC : SQLITE_OK; 3972 } 3973 case SQLITE_FCNTL_ROLLBACK_ATOMIC_WRITE: { 3974 int rc = osIoctl(pFile->h, F2FS_IOC_ABORT_VOLATILE_WRITE); 3975 return rc ? SQLITE_IOERR_ROLLBACK_ATOMIC : SQLITE_OK; 3976 } 3977 #endif /* __linux__ && SQLITE_ENABLE_BATCH_ATOMIC_WRITE */ 3978 3979 case SQLITE_FCNTL_LOCKSTATE: { 3980 *(int*)pArg = pFile->eFileLock; 3981 return SQLITE_OK; 3982 } 3983 case SQLITE_FCNTL_LAST_ERRNO: { 3984 *(int*)pArg = pFile->lastErrno; 3985 return SQLITE_OK; 3986 } 3987 case SQLITE_FCNTL_CHUNK_SIZE: { 3988 pFile->szChunk = *(int *)pArg; 3989 return SQLITE_OK; 3990 } 3991 case SQLITE_FCNTL_SIZE_HINT: { 3992 int rc; 3993 SimulateIOErrorBenign(1); 3994 rc = fcntlSizeHint(pFile, *(i64 *)pArg); 3995 SimulateIOErrorBenign(0); 3996 return rc; 3997 } 3998 case SQLITE_FCNTL_PERSIST_WAL: { 3999 unixModeBit(pFile, UNIXFILE_PERSIST_WAL, (int*)pArg); 4000 return SQLITE_OK; 4001 } 4002 case SQLITE_FCNTL_POWERSAFE_OVERWRITE: { 4003 unixModeBit(pFile, UNIXFILE_PSOW, (int*)pArg); 4004 return SQLITE_OK; 4005 } 4006 case SQLITE_FCNTL_VFSNAME: { 4007 *(char**)pArg = sqlite3_mprintf("%s", pFile->pVfs->zName); 4008 return SQLITE_OK; 4009 } 4010 case SQLITE_FCNTL_TEMPFILENAME: { 4011 char *zTFile = sqlite3_malloc64( pFile->pVfs->mxPathname ); 4012 if( zTFile ){ 4013 unixGetTempname(pFile->pVfs->mxPathname, zTFile); 4014 *(char**)pArg = zTFile; 4015 } 4016 return SQLITE_OK; 4017 } 4018 case SQLITE_FCNTL_HAS_MOVED: { 4019 *(int*)pArg = fileHasMoved(pFile); 4020 return SQLITE_OK; 4021 } 4022 #ifdef SQLITE_ENABLE_SETLK_TIMEOUT 4023 case SQLITE_FCNTL_LOCK_TIMEOUT: { 4024 int iOld = pFile->iBusyTimeout; 4025 pFile->iBusyTimeout = *(int*)pArg; 4026 *(int*)pArg = iOld; 4027 return SQLITE_OK; 4028 } 4029 #endif 4030 #if SQLITE_MAX_MMAP_SIZE>0 4031 case SQLITE_FCNTL_MMAP_SIZE: { 4032 i64 newLimit = *(i64*)pArg; 4033 int rc = SQLITE_OK; 4034 if( newLimit>sqlite3GlobalConfig.mxMmap ){ 4035 newLimit = sqlite3GlobalConfig.mxMmap; 4036 } 4037 4038 /* The value of newLimit may be eventually cast to (size_t) and passed 4039 ** to mmap(). Restrict its value to 2GB if (size_t) is not at least a 4040 ** 64-bit type. */ 4041 if( newLimit>0 && sizeof(size_t)<8 ){ 4042 newLimit = (newLimit & 0x7FFFFFFF); 4043 } 4044 4045 *(i64*)pArg = pFile->mmapSizeMax; 4046 if( newLimit>=0 && newLimit!=pFile->mmapSizeMax && pFile->nFetchOut==0 ){ 4047 pFile->mmapSizeMax = newLimit; 4048 if( pFile->mmapSize>0 ){ 4049 unixUnmapfile(pFile); 4050 rc = unixMapfile(pFile, -1); 4051 } 4052 } 4053 return rc; 4054 } 4055 #endif 4056 #ifdef SQLITE_DEBUG 4057 /* The pager calls this method to signal that it has done 4058 ** a rollback and that the database is therefore unchanged and 4059 ** it hence it is OK for the transaction change counter to be 4060 ** unchanged. 4061 */ 4062 case SQLITE_FCNTL_DB_UNCHANGED: { 4063 ((unixFile*)id)->dbUpdate = 0; 4064 return SQLITE_OK; 4065 } 4066 #endif 4067 #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) 4068 case SQLITE_FCNTL_SET_LOCKPROXYFILE: 4069 case SQLITE_FCNTL_GET_LOCKPROXYFILE: { 4070 return proxyFileControl(id,op,pArg); 4071 } 4072 #endif /* SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) */ 4073 4074 case SQLITE_FCNTL_EXTERNAL_READER: { 4075 #ifndef SQLITE_OMIT_WAL 4076 return unixFcntlExternalReader((unixFile*)id, (int*)pArg); 4077 #else 4078 *(int*)pArg = 0; 4079 return SQLITE_OK; 4080 #endif 4081 } 4082 } 4083 return SQLITE_NOTFOUND; 4084 } 4085 4086 /* 4087 ** If pFd->sectorSize is non-zero when this function is called, it is a 4088 ** no-op. Otherwise, the values of pFd->sectorSize and 4089 ** pFd->deviceCharacteristics are set according to the file-system 4090 ** characteristics. 4091 ** 4092 ** There are two versions of this function. One for QNX and one for all 4093 ** other systems. 4094 */ 4095 #ifndef __QNXNTO__ 4096 static void setDeviceCharacteristics(unixFile *pFd){ 4097 assert( pFd->deviceCharacteristics==0 || pFd->sectorSize!=0 ); 4098 if( pFd->sectorSize==0 ){ 4099 #if defined(__linux__) && defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE) 4100 int res; 4101 u32 f = 0; 4102 4103 /* Check for support for F2FS atomic batch writes. */ 4104 res = osIoctl(pFd->h, F2FS_IOC_GET_FEATURES, &f); 4105 if( res==0 && (f & F2FS_FEATURE_ATOMIC_WRITE) ){ 4106 pFd->deviceCharacteristics = SQLITE_IOCAP_BATCH_ATOMIC; 4107 } 4108 #endif /* __linux__ && SQLITE_ENABLE_BATCH_ATOMIC_WRITE */ 4109 4110 /* Set the POWERSAFE_OVERWRITE flag if requested. */ 4111 if( pFd->ctrlFlags & UNIXFILE_PSOW ){ 4112 pFd->deviceCharacteristics |= SQLITE_IOCAP_POWERSAFE_OVERWRITE; 4113 } 4114 4115 pFd->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE; 4116 } 4117 } 4118 #else 4119 #include <sys/dcmd_blk.h> 4120 #include <sys/statvfs.h> 4121 static void setDeviceCharacteristics(unixFile *pFile){ 4122 if( pFile->sectorSize == 0 ){ 4123 struct statvfs fsInfo; 4124 4125 /* Set defaults for non-supported filesystems */ 4126 pFile->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE; 4127 pFile->deviceCharacteristics = 0; 4128 if( fstatvfs(pFile->h, &fsInfo) == -1 ) { 4129 return; 4130 } 4131 4132 if( !strcmp(fsInfo.f_basetype, "tmp") ) { 4133 pFile->sectorSize = fsInfo.f_bsize; 4134 pFile->deviceCharacteristics = 4135 SQLITE_IOCAP_ATOMIC4K | /* All ram filesystem writes are atomic */ 4136 SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until 4137 ** the write succeeds */ 4138 SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind 4139 ** so it is ordered */ 4140 0; 4141 }else if( strstr(fsInfo.f_basetype, "etfs") ){ 4142 pFile->sectorSize = fsInfo.f_bsize; 4143 pFile->deviceCharacteristics = 4144 /* etfs cluster size writes are atomic */ 4145 (pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) | 4146 SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until 4147 ** the write succeeds */ 4148 SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind 4149 ** so it is ordered */ 4150 0; 4151 }else if( !strcmp(fsInfo.f_basetype, "qnx6") ){ 4152 pFile->sectorSize = fsInfo.f_bsize; 4153 pFile->deviceCharacteristics = 4154 SQLITE_IOCAP_ATOMIC | /* All filesystem writes are atomic */ 4155 SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until 4156 ** the write succeeds */ 4157 SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind 4158 ** so it is ordered */ 4159 0; 4160 }else if( !strcmp(fsInfo.f_basetype, "qnx4") ){ 4161 pFile->sectorSize = fsInfo.f_bsize; 4162 pFile->deviceCharacteristics = 4163 /* full bitset of atomics from max sector size and smaller */ 4164 ((pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) << 1) - 2 | 4165 SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind 4166 ** so it is ordered */ 4167 0; 4168 }else if( strstr(fsInfo.f_basetype, "dos") ){ 4169 pFile->sectorSize = fsInfo.f_bsize; 4170 pFile->deviceCharacteristics = 4171 /* full bitset of atomics from max sector size and smaller */ 4172 ((pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) << 1) - 2 | 4173 SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind 4174 ** so it is ordered */ 4175 0; 4176 }else{ 4177 pFile->deviceCharacteristics = 4178 SQLITE_IOCAP_ATOMIC512 | /* blocks are atomic */ 4179 SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until 4180 ** the write succeeds */ 4181 0; 4182 } 4183 } 4184 /* Last chance verification. If the sector size isn't a multiple of 512 4185 ** then it isn't valid.*/ 4186 if( pFile->sectorSize % 512 != 0 ){ 4187 pFile->deviceCharacteristics = 0; 4188 pFile->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE; 4189 } 4190 } 4191 #endif 4192 4193 /* 4194 ** Return the sector size in bytes of the underlying block device for 4195 ** the specified file. This is almost always 512 bytes, but may be 4196 ** larger for some devices. 4197 ** 4198 ** SQLite code assumes this function cannot fail. It also assumes that 4199 ** if two files are created in the same file-system directory (i.e. 4200 ** a database and its journal file) that the sector size will be the 4201 ** same for both. 4202 */ 4203 static int unixSectorSize(sqlite3_file *id){ 4204 unixFile *pFd = (unixFile*)id; 4205 setDeviceCharacteristics(pFd); 4206 return pFd->sectorSize; 4207 } 4208 4209 /* 4210 ** Return the device characteristics for the file. 4211 ** 4212 ** This VFS is set up to return SQLITE_IOCAP_POWERSAFE_OVERWRITE by default. 4213 ** However, that choice is controversial since technically the underlying 4214 ** file system does not always provide powersafe overwrites. (In other 4215 ** words, after a power-loss event, parts of the file that were never 4216 ** written might end up being altered.) However, non-PSOW behavior is very, 4217 ** very rare. And asserting PSOW makes a large reduction in the amount 4218 ** of required I/O for journaling, since a lot of padding is eliminated. 4219 ** Hence, while POWERSAFE_OVERWRITE is on by default, there is a file-control 4220 ** available to turn it off and URI query parameter available to turn it off. 4221 */ 4222 static int unixDeviceCharacteristics(sqlite3_file *id){ 4223 unixFile *pFd = (unixFile*)id; 4224 setDeviceCharacteristics(pFd); 4225 return pFd->deviceCharacteristics; 4226 } 4227 4228 #if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 4229 4230 /* 4231 ** Return the system page size. 4232 ** 4233 ** This function should not be called directly by other code in this file. 4234 ** Instead, it should be called via macro osGetpagesize(). 4235 */ 4236 static int unixGetpagesize(void){ 4237 #if OS_VXWORKS 4238 return 1024; 4239 #elif defined(_BSD_SOURCE) 4240 return getpagesize(); 4241 #else 4242 return (int)sysconf(_SC_PAGESIZE); 4243 #endif 4244 } 4245 4246 #endif /* !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 */ 4247 4248 #ifndef SQLITE_OMIT_WAL 4249 4250 /* 4251 ** Object used to represent an shared memory buffer. 4252 ** 4253 ** When multiple threads all reference the same wal-index, each thread 4254 ** has its own unixShm object, but they all point to a single instance 4255 ** of this unixShmNode object. In other words, each wal-index is opened 4256 ** only once per process. 4257 ** 4258 ** Each unixShmNode object is connected to a single unixInodeInfo object. 4259 ** We could coalesce this object into unixInodeInfo, but that would mean 4260 ** every open file that does not use shared memory (in other words, most 4261 ** open files) would have to carry around this extra information. So 4262 ** the unixInodeInfo object contains a pointer to this unixShmNode object 4263 ** and the unixShmNode object is created only when needed. 4264 ** 4265 ** unixMutexHeld() must be true when creating or destroying 4266 ** this object or while reading or writing the following fields: 4267 ** 4268 ** nRef 4269 ** 4270 ** The following fields are read-only after the object is created: 4271 ** 4272 ** hShm 4273 ** zFilename 4274 ** 4275 ** Either unixShmNode.pShmMutex must be held or unixShmNode.nRef==0 and 4276 ** unixMutexHeld() is true when reading or writing any other field 4277 ** in this structure. 4278 */ 4279 struct unixShmNode { 4280 unixInodeInfo *pInode; /* unixInodeInfo that owns this SHM node */ 4281 sqlite3_mutex *pShmMutex; /* Mutex to access this object */ 4282 char *zFilename; /* Name of the mmapped file */ 4283 int hShm; /* Open file descriptor */ 4284 int szRegion; /* Size of shared-memory regions */ 4285 u16 nRegion; /* Size of array apRegion */ 4286 u8 isReadonly; /* True if read-only */ 4287 u8 isUnlocked; /* True if no DMS lock held */ 4288 char **apRegion; /* Array of mapped shared-memory regions */ 4289 int nRef; /* Number of unixShm objects pointing to this */ 4290 unixShm *pFirst; /* All unixShm objects pointing to this */ 4291 int aLock[SQLITE_SHM_NLOCK]; /* # shared locks on slot, -1==excl lock */ 4292 #ifdef SQLITE_DEBUG 4293 u8 exclMask; /* Mask of exclusive locks held */ 4294 u8 sharedMask; /* Mask of shared locks held */ 4295 u8 nextShmId; /* Next available unixShm.id value */ 4296 #endif 4297 }; 4298 4299 /* 4300 ** Structure used internally by this VFS to record the state of an 4301 ** open shared memory connection. 4302 ** 4303 ** The following fields are initialized when this object is created and 4304 ** are read-only thereafter: 4305 ** 4306 ** unixShm.pShmNode 4307 ** unixShm.id 4308 ** 4309 ** All other fields are read/write. The unixShm.pShmNode->pShmMutex must 4310 ** be held while accessing any read/write fields. 4311 */ 4312 struct unixShm { 4313 unixShmNode *pShmNode; /* The underlying unixShmNode object */ 4314 unixShm *pNext; /* Next unixShm with the same unixShmNode */ 4315 u8 hasMutex; /* True if holding the unixShmNode->pShmMutex */ 4316 u8 id; /* Id of this connection within its unixShmNode */ 4317 u16 sharedMask; /* Mask of shared locks held */ 4318 u16 exclMask; /* Mask of exclusive locks held */ 4319 }; 4320 4321 /* 4322 ** Constants used for locking 4323 */ 4324 #define UNIX_SHM_BASE ((22+SQLITE_SHM_NLOCK)*4) /* first lock byte */ 4325 #define UNIX_SHM_DMS (UNIX_SHM_BASE+SQLITE_SHM_NLOCK) /* deadman switch */ 4326 4327 /* 4328 ** Use F_GETLK to check whether or not there are any readers with open 4329 ** wal-mode transactions in other processes on database file pFile. If 4330 ** no error occurs, return SQLITE_OK and set (*piOut) to 1 if there are 4331 ** such transactions, or 0 otherwise. If an error occurs, return an 4332 ** SQLite error code. The final value of *piOut is undefined in this 4333 ** case. 4334 */ 4335 static int unixFcntlExternalReader(unixFile *pFile, int *piOut){ 4336 int rc = SQLITE_OK; 4337 *piOut = 0; 4338 if( pFile->pShm){ 4339 unixShmNode *pShmNode = pFile->pShm->pShmNode; 4340 struct flock f; 4341 4342 memset(&f, 0, sizeof(f)); 4343 f.l_type = F_WRLCK; 4344 f.l_whence = SEEK_SET; 4345 f.l_start = UNIX_SHM_BASE + 3; 4346 f.l_len = SQLITE_SHM_NLOCK - 3; 4347 4348 sqlite3_mutex_enter(pShmNode->pShmMutex); 4349 if( osFcntl(pShmNode->hShm, F_GETLK, &f)<0 ){ 4350 rc = SQLITE_IOERR_LOCK; 4351 }else{ 4352 *piOut = (f.l_type!=F_UNLCK); 4353 } 4354 sqlite3_mutex_leave(pShmNode->pShmMutex); 4355 } 4356 4357 return rc; 4358 } 4359 4360 4361 /* 4362 ** Apply posix advisory locks for all bytes from ofst through ofst+n-1. 4363 ** 4364 ** Locks block if the mask is exactly UNIX_SHM_C and are non-blocking 4365 ** otherwise. 4366 */ 4367 static int unixShmSystemLock( 4368 unixFile *pFile, /* Open connection to the WAL file */ 4369 int lockType, /* F_UNLCK, F_RDLCK, or F_WRLCK */ 4370 int ofst, /* First byte of the locking range */ 4371 int n /* Number of bytes to lock */ 4372 ){ 4373 unixShmNode *pShmNode; /* Apply locks to this open shared-memory segment */ 4374 struct flock f; /* The posix advisory locking structure */ 4375 int rc = SQLITE_OK; /* Result code form fcntl() */ 4376 4377 /* Access to the unixShmNode object is serialized by the caller */ 4378 pShmNode = pFile->pInode->pShmNode; 4379 assert( pShmNode->nRef==0 || sqlite3_mutex_held(pShmNode->pShmMutex) ); 4380 assert( pShmNode->nRef>0 || unixMutexHeld() ); 4381 4382 /* Shared locks never span more than one byte */ 4383 assert( n==1 || lockType!=F_RDLCK ); 4384 4385 /* Locks are within range */ 4386 assert( n>=1 && n<=SQLITE_SHM_NLOCK ); 4387 4388 if( pShmNode->hShm>=0 ){ 4389 int res; 4390 /* Initialize the locking parameters */ 4391 f.l_type = lockType; 4392 f.l_whence = SEEK_SET; 4393 f.l_start = ofst; 4394 f.l_len = n; 4395 res = osSetPosixAdvisoryLock(pShmNode->hShm, &f, pFile); 4396 if( res==-1 ){ 4397 #ifdef SQLITE_ENABLE_SETLK_TIMEOUT 4398 rc = (pFile->iBusyTimeout ? SQLITE_BUSY_TIMEOUT : SQLITE_BUSY); 4399 #else 4400 rc = SQLITE_BUSY; 4401 #endif 4402 } 4403 } 4404 4405 /* Update the global lock state and do debug tracing */ 4406 #ifdef SQLITE_DEBUG 4407 { u16 mask; 4408 OSTRACE(("SHM-LOCK ")); 4409 mask = ofst>31 ? 0xffff : (1<<(ofst+n)) - (1<<ofst); 4410 if( rc==SQLITE_OK ){ 4411 if( lockType==F_UNLCK ){ 4412 OSTRACE(("unlock %d ok", ofst)); 4413 pShmNode->exclMask &= ~mask; 4414 pShmNode->sharedMask &= ~mask; 4415 }else if( lockType==F_RDLCK ){ 4416 OSTRACE(("read-lock %d ok", ofst)); 4417 pShmNode->exclMask &= ~mask; 4418 pShmNode->sharedMask |= mask; 4419 }else{ 4420 assert( lockType==F_WRLCK ); 4421 OSTRACE(("write-lock %d ok", ofst)); 4422 pShmNode->exclMask |= mask; 4423 pShmNode->sharedMask &= ~mask; 4424 } 4425 }else{ 4426 if( lockType==F_UNLCK ){ 4427 OSTRACE(("unlock %d failed", ofst)); 4428 }else if( lockType==F_RDLCK ){ 4429 OSTRACE(("read-lock failed")); 4430 }else{ 4431 assert( lockType==F_WRLCK ); 4432 OSTRACE(("write-lock %d failed", ofst)); 4433 } 4434 } 4435 OSTRACE((" - afterwards %03x,%03x\n", 4436 pShmNode->sharedMask, pShmNode->exclMask)); 4437 } 4438 #endif 4439 4440 return rc; 4441 } 4442 4443 /* 4444 ** Return the minimum number of 32KB shm regions that should be mapped at 4445 ** a time, assuming that each mapping must be an integer multiple of the 4446 ** current system page-size. 4447 ** 4448 ** Usually, this is 1. The exception seems to be systems that are configured 4449 ** to use 64KB pages - in this case each mapping must cover at least two 4450 ** shm regions. 4451 */ 4452 static int unixShmRegionPerMap(void){ 4453 int shmsz = 32*1024; /* SHM region size */ 4454 int pgsz = osGetpagesize(); /* System page size */ 4455 assert( ((pgsz-1)&pgsz)==0 ); /* Page size must be a power of 2 */ 4456 if( pgsz<shmsz ) return 1; 4457 return pgsz/shmsz; 4458 } 4459 4460 /* 4461 ** Purge the unixShmNodeList list of all entries with unixShmNode.nRef==0. 4462 ** 4463 ** This is not a VFS shared-memory method; it is a utility function called 4464 ** by VFS shared-memory methods. 4465 */ 4466 static void unixShmPurge(unixFile *pFd){ 4467 unixShmNode *p = pFd->pInode->pShmNode; 4468 assert( unixMutexHeld() ); 4469 if( p && ALWAYS(p->nRef==0) ){ 4470 int nShmPerMap = unixShmRegionPerMap(); 4471 int i; 4472 assert( p->pInode==pFd->pInode ); 4473 sqlite3_mutex_free(p->pShmMutex); 4474 for(i=0; i<p->nRegion; i+=nShmPerMap){ 4475 if( p->hShm>=0 ){ 4476 osMunmap(p->apRegion[i], p->szRegion); 4477 }else{ 4478 sqlite3_free(p->apRegion[i]); 4479 } 4480 } 4481 sqlite3_free(p->apRegion); 4482 if( p->hShm>=0 ){ 4483 robust_close(pFd, p->hShm, __LINE__); 4484 p->hShm = -1; 4485 } 4486 p->pInode->pShmNode = 0; 4487 sqlite3_free(p); 4488 } 4489 } 4490 4491 /* 4492 ** The DMS lock has not yet been taken on shm file pShmNode. Attempt to 4493 ** take it now. Return SQLITE_OK if successful, or an SQLite error 4494 ** code otherwise. 4495 ** 4496 ** If the DMS cannot be locked because this is a readonly_shm=1 4497 ** connection and no other process already holds a lock, return 4498 ** SQLITE_READONLY_CANTINIT and set pShmNode->isUnlocked=1. 4499 */ 4500 static int unixLockSharedMemory(unixFile *pDbFd, unixShmNode *pShmNode){ 4501 struct flock lock; 4502 int rc = SQLITE_OK; 4503 4504 /* Use F_GETLK to determine the locks other processes are holding 4505 ** on the DMS byte. If it indicates that another process is holding 4506 ** a SHARED lock, then this process may also take a SHARED lock 4507 ** and proceed with opening the *-shm file. 4508 ** 4509 ** Or, if no other process is holding any lock, then this process 4510 ** is the first to open it. In this case take an EXCLUSIVE lock on the 4511 ** DMS byte and truncate the *-shm file to zero bytes in size. Then 4512 ** downgrade to a SHARED lock on the DMS byte. 4513 ** 4514 ** If another process is holding an EXCLUSIVE lock on the DMS byte, 4515 ** return SQLITE_BUSY to the caller (it will try again). An earlier 4516 ** version of this code attempted the SHARED lock at this point. But 4517 ** this introduced a subtle race condition: if the process holding 4518 ** EXCLUSIVE failed just before truncating the *-shm file, then this 4519 ** process might open and use the *-shm file without truncating it. 4520 ** And if the *-shm file has been corrupted by a power failure or 4521 ** system crash, the database itself may also become corrupt. */ 4522 lock.l_whence = SEEK_SET; 4523 lock.l_start = UNIX_SHM_DMS; 4524 lock.l_len = 1; 4525 lock.l_type = F_WRLCK; 4526 if( osFcntl(pShmNode->hShm, F_GETLK, &lock)!=0 ) { 4527 rc = SQLITE_IOERR_LOCK; 4528 }else if( lock.l_type==F_UNLCK ){ 4529 if( pShmNode->isReadonly ){ 4530 pShmNode->isUnlocked = 1; 4531 rc = SQLITE_READONLY_CANTINIT; 4532 }else{ 4533 rc = unixShmSystemLock(pDbFd, F_WRLCK, UNIX_SHM_DMS, 1); 4534 /* The first connection to attach must truncate the -shm file. We 4535 ** truncate to 3 bytes (an arbitrary small number, less than the 4536 ** -shm header size) rather than 0 as a system debugging aid, to 4537 ** help detect if a -shm file truncation is legitimate or is the work 4538 ** or a rogue process. */ 4539 if( rc==SQLITE_OK && robust_ftruncate(pShmNode->hShm, 3) ){ 4540 rc = unixLogError(SQLITE_IOERR_SHMOPEN,"ftruncate",pShmNode->zFilename); 4541 } 4542 } 4543 }else if( lock.l_type==F_WRLCK ){ 4544 rc = SQLITE_BUSY; 4545 } 4546 4547 if( rc==SQLITE_OK ){ 4548 assert( lock.l_type==F_UNLCK || lock.l_type==F_RDLCK ); 4549 rc = unixShmSystemLock(pDbFd, F_RDLCK, UNIX_SHM_DMS, 1); 4550 } 4551 return rc; 4552 } 4553 4554 /* 4555 ** Open a shared-memory area associated with open database file pDbFd. 4556 ** This particular implementation uses mmapped files. 4557 ** 4558 ** The file used to implement shared-memory is in the same directory 4559 ** as the open database file and has the same name as the open database 4560 ** file with the "-shm" suffix added. For example, if the database file 4561 ** is "/home/user1/config.db" then the file that is created and mmapped 4562 ** for shared memory will be called "/home/user1/config.db-shm". 4563 ** 4564 ** Another approach to is to use files in /dev/shm or /dev/tmp or an 4565 ** some other tmpfs mount. But if a file in a different directory 4566 ** from the database file is used, then differing access permissions 4567 ** or a chroot() might cause two different processes on the same 4568 ** database to end up using different files for shared memory - 4569 ** meaning that their memory would not really be shared - resulting 4570 ** in database corruption. Nevertheless, this tmpfs file usage 4571 ** can be enabled at compile-time using -DSQLITE_SHM_DIRECTORY="/dev/shm" 4572 ** or the equivalent. The use of the SQLITE_SHM_DIRECTORY compile-time 4573 ** option results in an incompatible build of SQLite; builds of SQLite 4574 ** that with differing SQLITE_SHM_DIRECTORY settings attempt to use the 4575 ** same database file at the same time, database corruption will likely 4576 ** result. The SQLITE_SHM_DIRECTORY compile-time option is considered 4577 ** "unsupported" and may go away in a future SQLite release. 4578 ** 4579 ** When opening a new shared-memory file, if no other instances of that 4580 ** file are currently open, in this process or in other processes, then 4581 ** the file must be truncated to zero length or have its header cleared. 4582 ** 4583 ** If the original database file (pDbFd) is using the "unix-excl" VFS 4584 ** that means that an exclusive lock is held on the database file and 4585 ** that no other processes are able to read or write the database. In 4586 ** that case, we do not really need shared memory. No shared memory 4587 ** file is created. The shared memory will be simulated with heap memory. 4588 */ 4589 static int unixOpenSharedMemory(unixFile *pDbFd){ 4590 struct unixShm *p = 0; /* The connection to be opened */ 4591 struct unixShmNode *pShmNode; /* The underlying mmapped file */ 4592 int rc = SQLITE_OK; /* Result code */ 4593 unixInodeInfo *pInode; /* The inode of fd */ 4594 char *zShm; /* Name of the file used for SHM */ 4595 int nShmFilename; /* Size of the SHM filename in bytes */ 4596 4597 /* Allocate space for the new unixShm object. */ 4598 p = sqlite3_malloc64( sizeof(*p) ); 4599 if( p==0 ) return SQLITE_NOMEM_BKPT; 4600 memset(p, 0, sizeof(*p)); 4601 assert( pDbFd->pShm==0 ); 4602 4603 /* Check to see if a unixShmNode object already exists. Reuse an existing 4604 ** one if present. Create a new one if necessary. 4605 */ 4606 assert( unixFileMutexNotheld(pDbFd) ); 4607 unixEnterMutex(); 4608 pInode = pDbFd->pInode; 4609 pShmNode = pInode->pShmNode; 4610 if( pShmNode==0 ){ 4611 struct stat sStat; /* fstat() info for database file */ 4612 #ifndef SQLITE_SHM_DIRECTORY 4613 const char *zBasePath = pDbFd->zPath; 4614 #endif 4615 4616 /* Call fstat() to figure out the permissions on the database file. If 4617 ** a new *-shm file is created, an attempt will be made to create it 4618 ** with the same permissions. 4619 */ 4620 if( osFstat(pDbFd->h, &sStat) ){ 4621 rc = SQLITE_IOERR_FSTAT; 4622 goto shm_open_err; 4623 } 4624 4625 #ifdef SQLITE_SHM_DIRECTORY 4626 nShmFilename = sizeof(SQLITE_SHM_DIRECTORY) + 31; 4627 #else 4628 nShmFilename = 6 + (int)strlen(zBasePath); 4629 #endif 4630 pShmNode = sqlite3_malloc64( sizeof(*pShmNode) + nShmFilename ); 4631 if( pShmNode==0 ){ 4632 rc = SQLITE_NOMEM_BKPT; 4633 goto shm_open_err; 4634 } 4635 memset(pShmNode, 0, sizeof(*pShmNode)+nShmFilename); 4636 zShm = pShmNode->zFilename = (char*)&pShmNode[1]; 4637 #ifdef SQLITE_SHM_DIRECTORY 4638 sqlite3_snprintf(nShmFilename, zShm, 4639 SQLITE_SHM_DIRECTORY "/sqlite-shm-%x-%x", 4640 (u32)sStat.st_ino, (u32)sStat.st_dev); 4641 #else 4642 sqlite3_snprintf(nShmFilename, zShm, "%s-shm", zBasePath); 4643 sqlite3FileSuffix3(pDbFd->zPath, zShm); 4644 #endif 4645 pShmNode->hShm = -1; 4646 pDbFd->pInode->pShmNode = pShmNode; 4647 pShmNode->pInode = pDbFd->pInode; 4648 if( sqlite3GlobalConfig.bCoreMutex ){ 4649 pShmNode->pShmMutex = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST); 4650 if( pShmNode->pShmMutex==0 ){ 4651 rc = SQLITE_NOMEM_BKPT; 4652 goto shm_open_err; 4653 } 4654 } 4655 4656 if( pInode->bProcessLock==0 ){ 4657 if( 0==sqlite3_uri_boolean(pDbFd->zPath, "readonly_shm", 0) ){ 4658 pShmNode->hShm = robust_open(zShm, O_RDWR|O_CREAT|O_NOFOLLOW, 4659 (sStat.st_mode&0777)); 4660 } 4661 if( pShmNode->hShm<0 ){ 4662 pShmNode->hShm = robust_open(zShm, O_RDONLY|O_NOFOLLOW, 4663 (sStat.st_mode&0777)); 4664 if( pShmNode->hShm<0 ){ 4665 rc = unixLogError(SQLITE_CANTOPEN_BKPT, "open", zShm); 4666 goto shm_open_err; 4667 } 4668 pShmNode->isReadonly = 1; 4669 } 4670 4671 /* If this process is running as root, make sure that the SHM file 4672 ** is owned by the same user that owns the original database. Otherwise, 4673 ** the original owner will not be able to connect. 4674 */ 4675 robustFchown(pShmNode->hShm, sStat.st_uid, sStat.st_gid); 4676 4677 rc = unixLockSharedMemory(pDbFd, pShmNode); 4678 if( rc!=SQLITE_OK && rc!=SQLITE_READONLY_CANTINIT ) goto shm_open_err; 4679 } 4680 } 4681 4682 /* Make the new connection a child of the unixShmNode */ 4683 p->pShmNode = pShmNode; 4684 #ifdef SQLITE_DEBUG 4685 p->id = pShmNode->nextShmId++; 4686 #endif 4687 pShmNode->nRef++; 4688 pDbFd->pShm = p; 4689 unixLeaveMutex(); 4690 4691 /* The reference count on pShmNode has already been incremented under 4692 ** the cover of the unixEnterMutex() mutex and the pointer from the 4693 ** new (struct unixShm) object to the pShmNode has been set. All that is 4694 ** left to do is to link the new object into the linked list starting 4695 ** at pShmNode->pFirst. This must be done while holding the 4696 ** pShmNode->pShmMutex. 4697 */ 4698 sqlite3_mutex_enter(pShmNode->pShmMutex); 4699 p->pNext = pShmNode->pFirst; 4700 pShmNode->pFirst = p; 4701 sqlite3_mutex_leave(pShmNode->pShmMutex); 4702 return rc; 4703 4704 /* Jump here on any error */ 4705 shm_open_err: 4706 unixShmPurge(pDbFd); /* This call frees pShmNode if required */ 4707 sqlite3_free(p); 4708 unixLeaveMutex(); 4709 return rc; 4710 } 4711 4712 /* 4713 ** This function is called to obtain a pointer to region iRegion of the 4714 ** shared-memory associated with the database file fd. Shared-memory regions 4715 ** are numbered starting from zero. Each shared-memory region is szRegion 4716 ** bytes in size. 4717 ** 4718 ** If an error occurs, an error code is returned and *pp is set to NULL. 4719 ** 4720 ** Otherwise, if the bExtend parameter is 0 and the requested shared-memory 4721 ** region has not been allocated (by any client, including one running in a 4722 ** separate process), then *pp is set to NULL and SQLITE_OK returned. If 4723 ** bExtend is non-zero and the requested shared-memory region has not yet 4724 ** been allocated, it is allocated by this function. 4725 ** 4726 ** If the shared-memory region has already been allocated or is allocated by 4727 ** this call as described above, then it is mapped into this processes 4728 ** address space (if it is not already), *pp is set to point to the mapped 4729 ** memory and SQLITE_OK returned. 4730 */ 4731 static int unixShmMap( 4732 sqlite3_file *fd, /* Handle open on database file */ 4733 int iRegion, /* Region to retrieve */ 4734 int szRegion, /* Size of regions */ 4735 int bExtend, /* True to extend file if necessary */ 4736 void volatile **pp /* OUT: Mapped memory */ 4737 ){ 4738 unixFile *pDbFd = (unixFile*)fd; 4739 unixShm *p; 4740 unixShmNode *pShmNode; 4741 int rc = SQLITE_OK; 4742 int nShmPerMap = unixShmRegionPerMap(); 4743 int nReqRegion; 4744 4745 /* If the shared-memory file has not yet been opened, open it now. */ 4746 if( pDbFd->pShm==0 ){ 4747 rc = unixOpenSharedMemory(pDbFd); 4748 if( rc!=SQLITE_OK ) return rc; 4749 } 4750 4751 p = pDbFd->pShm; 4752 pShmNode = p->pShmNode; 4753 sqlite3_mutex_enter(pShmNode->pShmMutex); 4754 if( pShmNode->isUnlocked ){ 4755 rc = unixLockSharedMemory(pDbFd, pShmNode); 4756 if( rc!=SQLITE_OK ) goto shmpage_out; 4757 pShmNode->isUnlocked = 0; 4758 } 4759 assert( szRegion==pShmNode->szRegion || pShmNode->nRegion==0 ); 4760 assert( pShmNode->pInode==pDbFd->pInode ); 4761 assert( pShmNode->hShm>=0 || pDbFd->pInode->bProcessLock==1 ); 4762 assert( pShmNode->hShm<0 || pDbFd->pInode->bProcessLock==0 ); 4763 4764 /* Minimum number of regions required to be mapped. */ 4765 nReqRegion = ((iRegion+nShmPerMap) / nShmPerMap) * nShmPerMap; 4766 4767 if( pShmNode->nRegion<nReqRegion ){ 4768 char **apNew; /* New apRegion[] array */ 4769 int nByte = nReqRegion*szRegion; /* Minimum required file size */ 4770 struct stat sStat; /* Used by fstat() */ 4771 4772 pShmNode->szRegion = szRegion; 4773 4774 if( pShmNode->hShm>=0 ){ 4775 /* The requested region is not mapped into this processes address space. 4776 ** Check to see if it has been allocated (i.e. if the wal-index file is 4777 ** large enough to contain the requested region). 4778 */ 4779 if( osFstat(pShmNode->hShm, &sStat) ){ 4780 rc = SQLITE_IOERR_SHMSIZE; 4781 goto shmpage_out; 4782 } 4783 4784 if( sStat.st_size<nByte ){ 4785 /* The requested memory region does not exist. If bExtend is set to 4786 ** false, exit early. *pp will be set to NULL and SQLITE_OK returned. 4787 */ 4788 if( !bExtend ){ 4789 goto shmpage_out; 4790 } 4791 4792 /* Alternatively, if bExtend is true, extend the file. Do this by 4793 ** writing a single byte to the end of each (OS) page being 4794 ** allocated or extended. Technically, we need only write to the 4795 ** last page in order to extend the file. But writing to all new 4796 ** pages forces the OS to allocate them immediately, which reduces 4797 ** the chances of SIGBUS while accessing the mapped region later on. 4798 */ 4799 else{ 4800 static const int pgsz = 4096; 4801 int iPg; 4802 4803 /* Write to the last byte of each newly allocated or extended page */ 4804 assert( (nByte % pgsz)==0 ); 4805 for(iPg=(sStat.st_size/pgsz); iPg<(nByte/pgsz); iPg++){ 4806 int x = 0; 4807 if( seekAndWriteFd(pShmNode->hShm, iPg*pgsz + pgsz-1,"",1,&x)!=1 ){ 4808 const char *zFile = pShmNode->zFilename; 4809 rc = unixLogError(SQLITE_IOERR_SHMSIZE, "write", zFile); 4810 goto shmpage_out; 4811 } 4812 } 4813 } 4814 } 4815 } 4816 4817 /* Map the requested memory region into this processes address space. */ 4818 apNew = (char **)sqlite3_realloc( 4819 pShmNode->apRegion, nReqRegion*sizeof(char *) 4820 ); 4821 if( !apNew ){ 4822 rc = SQLITE_IOERR_NOMEM_BKPT; 4823 goto shmpage_out; 4824 } 4825 pShmNode->apRegion = apNew; 4826 while( pShmNode->nRegion<nReqRegion ){ 4827 int nMap = szRegion*nShmPerMap; 4828 int i; 4829 void *pMem; 4830 if( pShmNode->hShm>=0 ){ 4831 pMem = osMmap(0, nMap, 4832 pShmNode->isReadonly ? PROT_READ : PROT_READ|PROT_WRITE, 4833 MAP_SHARED, pShmNode->hShm, szRegion*(i64)pShmNode->nRegion 4834 ); 4835 if( pMem==MAP_FAILED ){ 4836 rc = unixLogError(SQLITE_IOERR_SHMMAP, "mmap", pShmNode->zFilename); 4837 goto shmpage_out; 4838 } 4839 }else{ 4840 pMem = sqlite3_malloc64(nMap); 4841 if( pMem==0 ){ 4842 rc = SQLITE_NOMEM_BKPT; 4843 goto shmpage_out; 4844 } 4845 memset(pMem, 0, nMap); 4846 } 4847 4848 for(i=0; i<nShmPerMap; i++){ 4849 pShmNode->apRegion[pShmNode->nRegion+i] = &((char*)pMem)[szRegion*i]; 4850 } 4851 pShmNode->nRegion += nShmPerMap; 4852 } 4853 } 4854 4855 shmpage_out: 4856 if( pShmNode->nRegion>iRegion ){ 4857 *pp = pShmNode->apRegion[iRegion]; 4858 }else{ 4859 *pp = 0; 4860 } 4861 if( pShmNode->isReadonly && rc==SQLITE_OK ) rc = SQLITE_READONLY; 4862 sqlite3_mutex_leave(pShmNode->pShmMutex); 4863 return rc; 4864 } 4865 4866 /* 4867 ** Check that the pShmNode->aLock[] array comports with the locking bitmasks 4868 ** held by each client. Return true if it does, or false otherwise. This 4869 ** is to be used in an assert(). e.g. 4870 ** 4871 ** assert( assertLockingArrayOk(pShmNode) ); 4872 */ 4873 #ifdef SQLITE_DEBUG 4874 static int assertLockingArrayOk(unixShmNode *pShmNode){ 4875 unixShm *pX; 4876 int aLock[SQLITE_SHM_NLOCK]; 4877 assert( sqlite3_mutex_held(pShmNode->pShmMutex) ); 4878 4879 memset(aLock, 0, sizeof(aLock)); 4880 for(pX=pShmNode->pFirst; pX; pX=pX->pNext){ 4881 int i; 4882 for(i=0; i<SQLITE_SHM_NLOCK; i++){ 4883 if( pX->exclMask & (1<<i) ){ 4884 assert( aLock[i]==0 ); 4885 aLock[i] = -1; 4886 }else if( pX->sharedMask & (1<<i) ){ 4887 assert( aLock[i]>=0 ); 4888 aLock[i]++; 4889 } 4890 } 4891 } 4892 4893 assert( 0==memcmp(pShmNode->aLock, aLock, sizeof(aLock)) ); 4894 return (memcmp(pShmNode->aLock, aLock, sizeof(aLock))==0); 4895 } 4896 #endif 4897 4898 /* 4899 ** Change the lock state for a shared-memory segment. 4900 ** 4901 ** Note that the relationship between SHAREd and EXCLUSIVE locks is a little 4902 ** different here than in posix. In xShmLock(), one can go from unlocked 4903 ** to shared and back or from unlocked to exclusive and back. But one may 4904 ** not go from shared to exclusive or from exclusive to shared. 4905 */ 4906 static int unixShmLock( 4907 sqlite3_file *fd, /* Database file holding the shared memory */ 4908 int ofst, /* First lock to acquire or release */ 4909 int n, /* Number of locks to acquire or release */ 4910 int flags /* What to do with the lock */ 4911 ){ 4912 unixFile *pDbFd = (unixFile*)fd; /* Connection holding shared memory */ 4913 unixShm *p; /* The shared memory being locked */ 4914 unixShmNode *pShmNode; /* The underlying file iNode */ 4915 int rc = SQLITE_OK; /* Result code */ 4916 u16 mask; /* Mask of locks to take or release */ 4917 int *aLock; 4918 4919 p = pDbFd->pShm; 4920 if( p==0 ) return SQLITE_IOERR_SHMLOCK; 4921 pShmNode = p->pShmNode; 4922 if( NEVER(pShmNode==0) ) return SQLITE_IOERR_SHMLOCK; 4923 aLock = pShmNode->aLock; 4924 4925 assert( pShmNode==pDbFd->pInode->pShmNode ); 4926 assert( pShmNode->pInode==pDbFd->pInode ); 4927 assert( ofst>=0 && ofst+n<=SQLITE_SHM_NLOCK ); 4928 assert( n>=1 ); 4929 assert( flags==(SQLITE_SHM_LOCK | SQLITE_SHM_SHARED) 4930 || flags==(SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE) 4931 || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED) 4932 || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE) ); 4933 assert( n==1 || (flags & SQLITE_SHM_EXCLUSIVE)!=0 ); 4934 assert( pShmNode->hShm>=0 || pDbFd->pInode->bProcessLock==1 ); 4935 assert( pShmNode->hShm<0 || pDbFd->pInode->bProcessLock==0 ); 4936 4937 /* Check that, if this to be a blocking lock, no locks that occur later 4938 ** in the following list than the lock being obtained are already held: 4939 ** 4940 ** 1. Checkpointer lock (ofst==1). 4941 ** 2. Write lock (ofst==0). 4942 ** 3. Read locks (ofst>=3 && ofst<SQLITE_SHM_NLOCK). 4943 ** 4944 ** In other words, if this is a blocking lock, none of the locks that 4945 ** occur later in the above list than the lock being obtained may be 4946 ** held. 4947 ** 4948 ** It is not permitted to block on the RECOVER lock. 4949 */ 4950 #ifdef SQLITE_ENABLE_SETLK_TIMEOUT 4951 assert( (flags & SQLITE_SHM_UNLOCK) || pDbFd->iBusyTimeout==0 || ( 4952 (ofst!=2) /* not RECOVER */ 4953 && (ofst!=1 || (p->exclMask|p->sharedMask)==0) 4954 && (ofst!=0 || (p->exclMask|p->sharedMask)<3) 4955 && (ofst<3 || (p->exclMask|p->sharedMask)<(1<<ofst)) 4956 )); 4957 #endif 4958 4959 mask = (1<<(ofst+n)) - (1<<ofst); 4960 assert( n>1 || mask==(1<<ofst) ); 4961 sqlite3_mutex_enter(pShmNode->pShmMutex); 4962 assert( assertLockingArrayOk(pShmNode) ); 4963 if( flags & SQLITE_SHM_UNLOCK ){ 4964 if( (p->exclMask|p->sharedMask) & mask ){ 4965 int ii; 4966 int bUnlock = 1; 4967 4968 for(ii=ofst; ii<ofst+n; ii++){ 4969 if( aLock[ii]>((p->sharedMask & (1<<ii)) ? 1 : 0) ){ 4970 bUnlock = 0; 4971 } 4972 } 4973 4974 if( bUnlock ){ 4975 rc = unixShmSystemLock(pDbFd, F_UNLCK, ofst+UNIX_SHM_BASE, n); 4976 if( rc==SQLITE_OK ){ 4977 memset(&aLock[ofst], 0, sizeof(int)*n); 4978 } 4979 }else if( ALWAYS(p->sharedMask & (1<<ofst)) ){ 4980 assert( n==1 && aLock[ofst]>1 ); 4981 aLock[ofst]--; 4982 } 4983 4984 /* Undo the local locks */ 4985 if( rc==SQLITE_OK ){ 4986 p->exclMask &= ~mask; 4987 p->sharedMask &= ~mask; 4988 } 4989 } 4990 }else if( flags & SQLITE_SHM_SHARED ){ 4991 assert( n==1 ); 4992 assert( (p->exclMask & (1<<ofst))==0 ); 4993 if( (p->sharedMask & mask)==0 ){ 4994 if( aLock[ofst]<0 ){ 4995 rc = SQLITE_BUSY; 4996 }else if( aLock[ofst]==0 ){ 4997 rc = unixShmSystemLock(pDbFd, F_RDLCK, ofst+UNIX_SHM_BASE, n); 4998 } 4999 5000 /* Get the local shared locks */ 5001 if( rc==SQLITE_OK ){ 5002 p->sharedMask |= mask; 5003 aLock[ofst]++; 5004 } 5005 } 5006 }else{ 5007 /* Make sure no sibling connections hold locks that will block this 5008 ** lock. If any do, return SQLITE_BUSY right away. */ 5009 int ii; 5010 for(ii=ofst; ii<ofst+n; ii++){ 5011 assert( (p->sharedMask & mask)==0 ); 5012 if( ALWAYS((p->exclMask & (1<<ii))==0) && aLock[ii] ){ 5013 rc = SQLITE_BUSY; 5014 break; 5015 } 5016 } 5017 5018 /* Get the exclusive locks at the system level. Then if successful 5019 ** also update the in-memory values. */ 5020 if( rc==SQLITE_OK ){ 5021 rc = unixShmSystemLock(pDbFd, F_WRLCK, ofst+UNIX_SHM_BASE, n); 5022 if( rc==SQLITE_OK ){ 5023 assert( (p->sharedMask & mask)==0 ); 5024 p->exclMask |= mask; 5025 for(ii=ofst; ii<ofst+n; ii++){ 5026 aLock[ii] = -1; 5027 } 5028 } 5029 } 5030 } 5031 assert( assertLockingArrayOk(pShmNode) ); 5032 sqlite3_mutex_leave(pShmNode->pShmMutex); 5033 OSTRACE(("SHM-LOCK shmid-%d, pid-%d got %03x,%03x\n", 5034 p->id, osGetpid(0), p->sharedMask, p->exclMask)); 5035 return rc; 5036 } 5037 5038 /* 5039 ** Implement a memory barrier or memory fence on shared memory. 5040 ** 5041 ** All loads and stores begun before the barrier must complete before 5042 ** any load or store begun after the barrier. 5043 */ 5044 static void unixShmBarrier( 5045 sqlite3_file *fd /* Database file holding the shared memory */ 5046 ){ 5047 UNUSED_PARAMETER(fd); 5048 sqlite3MemoryBarrier(); /* compiler-defined memory barrier */ 5049 assert( fd->pMethods->xLock==nolockLock 5050 || unixFileMutexNotheld((unixFile*)fd) 5051 ); 5052 unixEnterMutex(); /* Also mutex, for redundancy */ 5053 unixLeaveMutex(); 5054 } 5055 5056 /* 5057 ** Close a connection to shared-memory. Delete the underlying 5058 ** storage if deleteFlag is true. 5059 ** 5060 ** If there is no shared memory associated with the connection then this 5061 ** routine is a harmless no-op. 5062 */ 5063 static int unixShmUnmap( 5064 sqlite3_file *fd, /* The underlying database file */ 5065 int deleteFlag /* Delete shared-memory if true */ 5066 ){ 5067 unixShm *p; /* The connection to be closed */ 5068 unixShmNode *pShmNode; /* The underlying shared-memory file */ 5069 unixShm **pp; /* For looping over sibling connections */ 5070 unixFile *pDbFd; /* The underlying database file */ 5071 5072 pDbFd = (unixFile*)fd; 5073 p = pDbFd->pShm; 5074 if( p==0 ) return SQLITE_OK; 5075 pShmNode = p->pShmNode; 5076 5077 assert( pShmNode==pDbFd->pInode->pShmNode ); 5078 assert( pShmNode->pInode==pDbFd->pInode ); 5079 5080 /* Remove connection p from the set of connections associated 5081 ** with pShmNode */ 5082 sqlite3_mutex_enter(pShmNode->pShmMutex); 5083 for(pp=&pShmNode->pFirst; (*pp)!=p; pp = &(*pp)->pNext){} 5084 *pp = p->pNext; 5085 5086 /* Free the connection p */ 5087 sqlite3_free(p); 5088 pDbFd->pShm = 0; 5089 sqlite3_mutex_leave(pShmNode->pShmMutex); 5090 5091 /* If pShmNode->nRef has reached 0, then close the underlying 5092 ** shared-memory file, too */ 5093 assert( unixFileMutexNotheld(pDbFd) ); 5094 unixEnterMutex(); 5095 assert( pShmNode->nRef>0 ); 5096 pShmNode->nRef--; 5097 if( pShmNode->nRef==0 ){ 5098 if( deleteFlag && pShmNode->hShm>=0 ){ 5099 osUnlink(pShmNode->zFilename); 5100 } 5101 unixShmPurge(pDbFd); 5102 } 5103 unixLeaveMutex(); 5104 5105 return SQLITE_OK; 5106 } 5107 5108 5109 #else 5110 # define unixShmMap 0 5111 # define unixShmLock 0 5112 # define unixShmBarrier 0 5113 # define unixShmUnmap 0 5114 #endif /* #ifndef SQLITE_OMIT_WAL */ 5115 5116 #if SQLITE_MAX_MMAP_SIZE>0 5117 /* 5118 ** If it is currently memory mapped, unmap file pFd. 5119 */ 5120 static void unixUnmapfile(unixFile *pFd){ 5121 assert( pFd->nFetchOut==0 ); 5122 if( pFd->pMapRegion ){ 5123 osMunmap(pFd->pMapRegion, pFd->mmapSizeActual); 5124 pFd->pMapRegion = 0; 5125 pFd->mmapSize = 0; 5126 pFd->mmapSizeActual = 0; 5127 } 5128 } 5129 5130 /* 5131 ** Attempt to set the size of the memory mapping maintained by file 5132 ** descriptor pFd to nNew bytes. Any existing mapping is discarded. 5133 ** 5134 ** If successful, this function sets the following variables: 5135 ** 5136 ** unixFile.pMapRegion 5137 ** unixFile.mmapSize 5138 ** unixFile.mmapSizeActual 5139 ** 5140 ** If unsuccessful, an error message is logged via sqlite3_log() and 5141 ** the three variables above are zeroed. In this case SQLite should 5142 ** continue accessing the database using the xRead() and xWrite() 5143 ** methods. 5144 */ 5145 static void unixRemapfile( 5146 unixFile *pFd, /* File descriptor object */ 5147 i64 nNew /* Required mapping size */ 5148 ){ 5149 const char *zErr = "mmap"; 5150 int h = pFd->h; /* File descriptor open on db file */ 5151 u8 *pOrig = (u8 *)pFd->pMapRegion; /* Pointer to current file mapping */ 5152 i64 nOrig = pFd->mmapSizeActual; /* Size of pOrig region in bytes */ 5153 u8 *pNew = 0; /* Location of new mapping */ 5154 int flags = PROT_READ; /* Flags to pass to mmap() */ 5155 5156 assert( pFd->nFetchOut==0 ); 5157 assert( nNew>pFd->mmapSize ); 5158 assert( nNew<=pFd->mmapSizeMax ); 5159 assert( nNew>0 ); 5160 assert( pFd->mmapSizeActual>=pFd->mmapSize ); 5161 assert( MAP_FAILED!=0 ); 5162 5163 #ifdef SQLITE_MMAP_READWRITE 5164 if( (pFd->ctrlFlags & UNIXFILE_RDONLY)==0 ) flags |= PROT_WRITE; 5165 #endif 5166 5167 if( pOrig ){ 5168 #if HAVE_MREMAP 5169 i64 nReuse = pFd->mmapSize; 5170 #else 5171 const int szSyspage = osGetpagesize(); 5172 i64 nReuse = (pFd->mmapSize & ~(szSyspage-1)); 5173 #endif 5174 u8 *pReq = &pOrig[nReuse]; 5175 5176 /* Unmap any pages of the existing mapping that cannot be reused. */ 5177 if( nReuse!=nOrig ){ 5178 osMunmap(pReq, nOrig-nReuse); 5179 } 5180 5181 #if HAVE_MREMAP 5182 pNew = osMremap(pOrig, nReuse, nNew, MREMAP_MAYMOVE); 5183 zErr = "mremap"; 5184 #else 5185 pNew = osMmap(pReq, nNew-nReuse, flags, MAP_SHARED, h, nReuse); 5186 if( pNew!=MAP_FAILED ){ 5187 if( pNew!=pReq ){ 5188 osMunmap(pNew, nNew - nReuse); 5189 pNew = 0; 5190 }else{ 5191 pNew = pOrig; 5192 } 5193 } 5194 #endif 5195 5196 /* The attempt to extend the existing mapping failed. Free it. */ 5197 if( pNew==MAP_FAILED || pNew==0 ){ 5198 osMunmap(pOrig, nReuse); 5199 } 5200 } 5201 5202 /* If pNew is still NULL, try to create an entirely new mapping. */ 5203 if( pNew==0 ){ 5204 pNew = osMmap(0, nNew, flags, MAP_SHARED, h, 0); 5205 } 5206 5207 if( pNew==MAP_FAILED ){ 5208 pNew = 0; 5209 nNew = 0; 5210 unixLogError(SQLITE_OK, zErr, pFd->zPath); 5211 5212 /* If the mmap() above failed, assume that all subsequent mmap() calls 5213 ** will probably fail too. Fall back to using xRead/xWrite exclusively 5214 ** in this case. */ 5215 pFd->mmapSizeMax = 0; 5216 } 5217 pFd->pMapRegion = (void *)pNew; 5218 pFd->mmapSize = pFd->mmapSizeActual = nNew; 5219 } 5220 5221 /* 5222 ** Memory map or remap the file opened by file-descriptor pFd (if the file 5223 ** is already mapped, the existing mapping is replaced by the new). Or, if 5224 ** there already exists a mapping for this file, and there are still 5225 ** outstanding xFetch() references to it, this function is a no-op. 5226 ** 5227 ** If parameter nByte is non-negative, then it is the requested size of 5228 ** the mapping to create. Otherwise, if nByte is less than zero, then the 5229 ** requested size is the size of the file on disk. The actual size of the 5230 ** created mapping is either the requested size or the value configured 5231 ** using SQLITE_FCNTL_MMAP_LIMIT, whichever is smaller. 5232 ** 5233 ** SQLITE_OK is returned if no error occurs (even if the mapping is not 5234 ** recreated as a result of outstanding references) or an SQLite error 5235 ** code otherwise. 5236 */ 5237 static int unixMapfile(unixFile *pFd, i64 nMap){ 5238 assert( nMap>=0 || pFd->nFetchOut==0 ); 5239 assert( nMap>0 || (pFd->mmapSize==0 && pFd->pMapRegion==0) ); 5240 if( pFd->nFetchOut>0 ) return SQLITE_OK; 5241 5242 if( nMap<0 ){ 5243 struct stat statbuf; /* Low-level file information */ 5244 if( osFstat(pFd->h, &statbuf) ){ 5245 return SQLITE_IOERR_FSTAT; 5246 } 5247 nMap = statbuf.st_size; 5248 } 5249 if( nMap>pFd->mmapSizeMax ){ 5250 nMap = pFd->mmapSizeMax; 5251 } 5252 5253 assert( nMap>0 || (pFd->mmapSize==0 && pFd->pMapRegion==0) ); 5254 if( nMap!=pFd->mmapSize ){ 5255 unixRemapfile(pFd, nMap); 5256 } 5257 5258 return SQLITE_OK; 5259 } 5260 #endif /* SQLITE_MAX_MMAP_SIZE>0 */ 5261 5262 /* 5263 ** If possible, return a pointer to a mapping of file fd starting at offset 5264 ** iOff. The mapping must be valid for at least nAmt bytes. 5265 ** 5266 ** If such a pointer can be obtained, store it in *pp and return SQLITE_OK. 5267 ** Or, if one cannot but no error occurs, set *pp to 0 and return SQLITE_OK. 5268 ** Finally, if an error does occur, return an SQLite error code. The final 5269 ** value of *pp is undefined in this case. 5270 ** 5271 ** If this function does return a pointer, the caller must eventually 5272 ** release the reference by calling unixUnfetch(). 5273 */ 5274 static int unixFetch(sqlite3_file *fd, i64 iOff, int nAmt, void **pp){ 5275 #if SQLITE_MAX_MMAP_SIZE>0 5276 unixFile *pFd = (unixFile *)fd; /* The underlying database file */ 5277 #endif 5278 *pp = 0; 5279 5280 #if SQLITE_MAX_MMAP_SIZE>0 5281 if( pFd->mmapSizeMax>0 ){ 5282 if( pFd->pMapRegion==0 ){ 5283 int rc = unixMapfile(pFd, -1); 5284 if( rc!=SQLITE_OK ) return rc; 5285 } 5286 if( pFd->mmapSize >= iOff+nAmt ){ 5287 *pp = &((u8 *)pFd->pMapRegion)[iOff]; 5288 pFd->nFetchOut++; 5289 } 5290 } 5291 #endif 5292 return SQLITE_OK; 5293 } 5294 5295 /* 5296 ** If the third argument is non-NULL, then this function releases a 5297 ** reference obtained by an earlier call to unixFetch(). The second 5298 ** argument passed to this function must be the same as the corresponding 5299 ** argument that was passed to the unixFetch() invocation. 5300 ** 5301 ** Or, if the third argument is NULL, then this function is being called 5302 ** to inform the VFS layer that, according to POSIX, any existing mapping 5303 ** may now be invalid and should be unmapped. 5304 */ 5305 static int unixUnfetch(sqlite3_file *fd, i64 iOff, void *p){ 5306 #if SQLITE_MAX_MMAP_SIZE>0 5307 unixFile *pFd = (unixFile *)fd; /* The underlying database file */ 5308 UNUSED_PARAMETER(iOff); 5309 5310 /* If p==0 (unmap the entire file) then there must be no outstanding 5311 ** xFetch references. Or, if p!=0 (meaning it is an xFetch reference), 5312 ** then there must be at least one outstanding. */ 5313 assert( (p==0)==(pFd->nFetchOut==0) ); 5314 5315 /* If p!=0, it must match the iOff value. */ 5316 assert( p==0 || p==&((u8 *)pFd->pMapRegion)[iOff] ); 5317 5318 if( p ){ 5319 pFd->nFetchOut--; 5320 }else{ 5321 unixUnmapfile(pFd); 5322 } 5323 5324 assert( pFd->nFetchOut>=0 ); 5325 #else 5326 UNUSED_PARAMETER(fd); 5327 UNUSED_PARAMETER(p); 5328 UNUSED_PARAMETER(iOff); 5329 #endif 5330 return SQLITE_OK; 5331 } 5332 5333 /* 5334 ** Here ends the implementation of all sqlite3_file methods. 5335 ** 5336 ********************** End sqlite3_file Methods ******************************* 5337 ******************************************************************************/ 5338 5339 /* 5340 ** This division contains definitions of sqlite3_io_methods objects that 5341 ** implement various file locking strategies. It also contains definitions 5342 ** of "finder" functions. A finder-function is used to locate the appropriate 5343 ** sqlite3_io_methods object for a particular database file. The pAppData 5344 ** field of the sqlite3_vfs VFS objects are initialized to be pointers to 5345 ** the correct finder-function for that VFS. 5346 ** 5347 ** Most finder functions return a pointer to a fixed sqlite3_io_methods 5348 ** object. The only interesting finder-function is autolockIoFinder, which 5349 ** looks at the filesystem type and tries to guess the best locking 5350 ** strategy from that. 5351 ** 5352 ** For finder-function F, two objects are created: 5353 ** 5354 ** (1) The real finder-function named "FImpt()". 5355 ** 5356 ** (2) A constant pointer to this function named just "F". 5357 ** 5358 ** 5359 ** A pointer to the F pointer is used as the pAppData value for VFS 5360 ** objects. We have to do this instead of letting pAppData point 5361 ** directly at the finder-function since C90 rules prevent a void* 5362 ** from be cast into a function pointer. 5363 ** 5364 ** 5365 ** Each instance of this macro generates two objects: 5366 ** 5367 ** * A constant sqlite3_io_methods object call METHOD that has locking 5368 ** methods CLOSE, LOCK, UNLOCK, CKRESLOCK. 5369 ** 5370 ** * An I/O method finder function called FINDER that returns a pointer 5371 ** to the METHOD object in the previous bullet. 5372 */ 5373 #define IOMETHODS(FINDER,METHOD,VERSION,CLOSE,LOCK,UNLOCK,CKLOCK,SHMMAP) \ 5374 static const sqlite3_io_methods METHOD = { \ 5375 VERSION, /* iVersion */ \ 5376 CLOSE, /* xClose */ \ 5377 unixRead, /* xRead */ \ 5378 unixWrite, /* xWrite */ \ 5379 unixTruncate, /* xTruncate */ \ 5380 unixSync, /* xSync */ \ 5381 unixFileSize, /* xFileSize */ \ 5382 LOCK, /* xLock */ \ 5383 UNLOCK, /* xUnlock */ \ 5384 CKLOCK, /* xCheckReservedLock */ \ 5385 unixFileControl, /* xFileControl */ \ 5386 unixSectorSize, /* xSectorSize */ \ 5387 unixDeviceCharacteristics, /* xDeviceCapabilities */ \ 5388 SHMMAP, /* xShmMap */ \ 5389 unixShmLock, /* xShmLock */ \ 5390 unixShmBarrier, /* xShmBarrier */ \ 5391 unixShmUnmap, /* xShmUnmap */ \ 5392 unixFetch, /* xFetch */ \ 5393 unixUnfetch, /* xUnfetch */ \ 5394 }; \ 5395 static const sqlite3_io_methods *FINDER##Impl(const char *z, unixFile *p){ \ 5396 UNUSED_PARAMETER(z); UNUSED_PARAMETER(p); \ 5397 return &METHOD; \ 5398 } \ 5399 static const sqlite3_io_methods *(*const FINDER)(const char*,unixFile *p) \ 5400 = FINDER##Impl; 5401 5402 /* 5403 ** Here are all of the sqlite3_io_methods objects for each of the 5404 ** locking strategies. Functions that return pointers to these methods 5405 ** are also created. 5406 */ 5407 IOMETHODS( 5408 posixIoFinder, /* Finder function name */ 5409 posixIoMethods, /* sqlite3_io_methods object name */ 5410 3, /* shared memory and mmap are enabled */ 5411 unixClose, /* xClose method */ 5412 unixLock, /* xLock method */ 5413 unixUnlock, /* xUnlock method */ 5414 unixCheckReservedLock, /* xCheckReservedLock method */ 5415 unixShmMap /* xShmMap method */ 5416 ) 5417 IOMETHODS( 5418 nolockIoFinder, /* Finder function name */ 5419 nolockIoMethods, /* sqlite3_io_methods object name */ 5420 3, /* shared memory and mmap are enabled */ 5421 nolockClose, /* xClose method */ 5422 nolockLock, /* xLock method */ 5423 nolockUnlock, /* xUnlock method */ 5424 nolockCheckReservedLock, /* xCheckReservedLock method */ 5425 0 /* xShmMap method */ 5426 ) 5427 IOMETHODS( 5428 dotlockIoFinder, /* Finder function name */ 5429 dotlockIoMethods, /* sqlite3_io_methods object name */ 5430 1, /* shared memory is disabled */ 5431 dotlockClose, /* xClose method */ 5432 dotlockLock, /* xLock method */ 5433 dotlockUnlock, /* xUnlock method */ 5434 dotlockCheckReservedLock, /* xCheckReservedLock method */ 5435 0 /* xShmMap method */ 5436 ) 5437 5438 #if SQLITE_ENABLE_LOCKING_STYLE 5439 IOMETHODS( 5440 flockIoFinder, /* Finder function name */ 5441 flockIoMethods, /* sqlite3_io_methods object name */ 5442 1, /* shared memory is disabled */ 5443 flockClose, /* xClose method */ 5444 flockLock, /* xLock method */ 5445 flockUnlock, /* xUnlock method */ 5446 flockCheckReservedLock, /* xCheckReservedLock method */ 5447 0 /* xShmMap method */ 5448 ) 5449 #endif 5450 5451 #if OS_VXWORKS 5452 IOMETHODS( 5453 semIoFinder, /* Finder function name */ 5454 semIoMethods, /* sqlite3_io_methods object name */ 5455 1, /* shared memory is disabled */ 5456 semXClose, /* xClose method */ 5457 semXLock, /* xLock method */ 5458 semXUnlock, /* xUnlock method */ 5459 semXCheckReservedLock, /* xCheckReservedLock method */ 5460 0 /* xShmMap method */ 5461 ) 5462 #endif 5463 5464 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 5465 IOMETHODS( 5466 afpIoFinder, /* Finder function name */ 5467 afpIoMethods, /* sqlite3_io_methods object name */ 5468 1, /* shared memory is disabled */ 5469 afpClose, /* xClose method */ 5470 afpLock, /* xLock method */ 5471 afpUnlock, /* xUnlock method */ 5472 afpCheckReservedLock, /* xCheckReservedLock method */ 5473 0 /* xShmMap method */ 5474 ) 5475 #endif 5476 5477 /* 5478 ** The proxy locking method is a "super-method" in the sense that it 5479 ** opens secondary file descriptors for the conch and lock files and 5480 ** it uses proxy, dot-file, AFP, and flock() locking methods on those 5481 ** secondary files. For this reason, the division that implements 5482 ** proxy locking is located much further down in the file. But we need 5483 ** to go ahead and define the sqlite3_io_methods and finder function 5484 ** for proxy locking here. So we forward declare the I/O methods. 5485 */ 5486 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 5487 static int proxyClose(sqlite3_file*); 5488 static int proxyLock(sqlite3_file*, int); 5489 static int proxyUnlock(sqlite3_file*, int); 5490 static int proxyCheckReservedLock(sqlite3_file*, int*); 5491 IOMETHODS( 5492 proxyIoFinder, /* Finder function name */ 5493 proxyIoMethods, /* sqlite3_io_methods object name */ 5494 1, /* shared memory is disabled */ 5495 proxyClose, /* xClose method */ 5496 proxyLock, /* xLock method */ 5497 proxyUnlock, /* xUnlock method */ 5498 proxyCheckReservedLock, /* xCheckReservedLock method */ 5499 0 /* xShmMap method */ 5500 ) 5501 #endif 5502 5503 /* nfs lockd on OSX 10.3+ doesn't clear write locks when a read lock is set */ 5504 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 5505 IOMETHODS( 5506 nfsIoFinder, /* Finder function name */ 5507 nfsIoMethods, /* sqlite3_io_methods object name */ 5508 1, /* shared memory is disabled */ 5509 unixClose, /* xClose method */ 5510 unixLock, /* xLock method */ 5511 nfsUnlock, /* xUnlock method */ 5512 unixCheckReservedLock, /* xCheckReservedLock method */ 5513 0 /* xShmMap method */ 5514 ) 5515 #endif 5516 5517 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 5518 /* 5519 ** This "finder" function attempts to determine the best locking strategy 5520 ** for the database file "filePath". It then returns the sqlite3_io_methods 5521 ** object that implements that strategy. 5522 ** 5523 ** This is for MacOSX only. 5524 */ 5525 static const sqlite3_io_methods *autolockIoFinderImpl( 5526 const char *filePath, /* name of the database file */ 5527 unixFile *pNew /* open file object for the database file */ 5528 ){ 5529 static const struct Mapping { 5530 const char *zFilesystem; /* Filesystem type name */ 5531 const sqlite3_io_methods *pMethods; /* Appropriate locking method */ 5532 } aMap[] = { 5533 { "hfs", &posixIoMethods }, 5534 { "ufs", &posixIoMethods }, 5535 { "afpfs", &afpIoMethods }, 5536 { "smbfs", &afpIoMethods }, 5537 { "webdav", &nolockIoMethods }, 5538 { 0, 0 } 5539 }; 5540 int i; 5541 struct statfs fsInfo; 5542 struct flock lockInfo; 5543 5544 if( !filePath ){ 5545 /* If filePath==NULL that means we are dealing with a transient file 5546 ** that does not need to be locked. */ 5547 return &nolockIoMethods; 5548 } 5549 if( statfs(filePath, &fsInfo) != -1 ){ 5550 if( fsInfo.f_flags & MNT_RDONLY ){ 5551 return &nolockIoMethods; 5552 } 5553 for(i=0; aMap[i].zFilesystem; i++){ 5554 if( strcmp(fsInfo.f_fstypename, aMap[i].zFilesystem)==0 ){ 5555 return aMap[i].pMethods; 5556 } 5557 } 5558 } 5559 5560 /* Default case. Handles, amongst others, "nfs". 5561 ** Test byte-range lock using fcntl(). If the call succeeds, 5562 ** assume that the file-system supports POSIX style locks. 5563 */ 5564 lockInfo.l_len = 1; 5565 lockInfo.l_start = 0; 5566 lockInfo.l_whence = SEEK_SET; 5567 lockInfo.l_type = F_RDLCK; 5568 if( osFcntl(pNew->h, F_GETLK, &lockInfo)!=-1 ) { 5569 if( strcmp(fsInfo.f_fstypename, "nfs")==0 ){ 5570 return &nfsIoMethods; 5571 } else { 5572 return &posixIoMethods; 5573 } 5574 }else{ 5575 return &dotlockIoMethods; 5576 } 5577 } 5578 static const sqlite3_io_methods 5579 *(*const autolockIoFinder)(const char*,unixFile*) = autolockIoFinderImpl; 5580 5581 #endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ 5582 5583 #if OS_VXWORKS 5584 /* 5585 ** This "finder" function for VxWorks checks to see if posix advisory 5586 ** locking works. If it does, then that is what is used. If it does not 5587 ** work, then fallback to named semaphore locking. 5588 */ 5589 static const sqlite3_io_methods *vxworksIoFinderImpl( 5590 const char *filePath, /* name of the database file */ 5591 unixFile *pNew /* the open file object */ 5592 ){ 5593 struct flock lockInfo; 5594 5595 if( !filePath ){ 5596 /* If filePath==NULL that means we are dealing with a transient file 5597 ** that does not need to be locked. */ 5598 return &nolockIoMethods; 5599 } 5600 5601 /* Test if fcntl() is supported and use POSIX style locks. 5602 ** Otherwise fall back to the named semaphore method. 5603 */ 5604 lockInfo.l_len = 1; 5605 lockInfo.l_start = 0; 5606 lockInfo.l_whence = SEEK_SET; 5607 lockInfo.l_type = F_RDLCK; 5608 if( osFcntl(pNew->h, F_GETLK, &lockInfo)!=-1 ) { 5609 return &posixIoMethods; 5610 }else{ 5611 return &semIoMethods; 5612 } 5613 } 5614 static const sqlite3_io_methods 5615 *(*const vxworksIoFinder)(const char*,unixFile*) = vxworksIoFinderImpl; 5616 5617 #endif /* OS_VXWORKS */ 5618 5619 /* 5620 ** An abstract type for a pointer to an IO method finder function: 5621 */ 5622 typedef const sqlite3_io_methods *(*finder_type)(const char*,unixFile*); 5623 5624 5625 /**************************************************************************** 5626 **************************** sqlite3_vfs methods **************************** 5627 ** 5628 ** This division contains the implementation of methods on the 5629 ** sqlite3_vfs object. 5630 */ 5631 5632 /* 5633 ** Initialize the contents of the unixFile structure pointed to by pId. 5634 */ 5635 static int fillInUnixFile( 5636 sqlite3_vfs *pVfs, /* Pointer to vfs object */ 5637 int h, /* Open file descriptor of file being opened */ 5638 sqlite3_file *pId, /* Write to the unixFile structure here */ 5639 const char *zFilename, /* Name of the file being opened */ 5640 int ctrlFlags /* Zero or more UNIXFILE_* values */ 5641 ){ 5642 const sqlite3_io_methods *pLockingStyle; 5643 unixFile *pNew = (unixFile *)pId; 5644 int rc = SQLITE_OK; 5645 5646 assert( pNew->pInode==NULL ); 5647 5648 /* No locking occurs in temporary files */ 5649 assert( zFilename!=0 || (ctrlFlags & UNIXFILE_NOLOCK)!=0 ); 5650 5651 OSTRACE(("OPEN %-3d %s\n", h, zFilename)); 5652 pNew->h = h; 5653 pNew->pVfs = pVfs; 5654 pNew->zPath = zFilename; 5655 pNew->ctrlFlags = (u8)ctrlFlags; 5656 #if SQLITE_MAX_MMAP_SIZE>0 5657 pNew->mmapSizeMax = sqlite3GlobalConfig.szMmap; 5658 #endif 5659 if( sqlite3_uri_boolean(((ctrlFlags & UNIXFILE_URI) ? zFilename : 0), 5660 "psow", SQLITE_POWERSAFE_OVERWRITE) ){ 5661 pNew->ctrlFlags |= UNIXFILE_PSOW; 5662 } 5663 if( strcmp(pVfs->zName,"unix-excl")==0 ){ 5664 pNew->ctrlFlags |= UNIXFILE_EXCL; 5665 } 5666 5667 #if OS_VXWORKS 5668 pNew->pId = vxworksFindFileId(zFilename); 5669 if( pNew->pId==0 ){ 5670 ctrlFlags |= UNIXFILE_NOLOCK; 5671 rc = SQLITE_NOMEM_BKPT; 5672 } 5673 #endif 5674 5675 if( ctrlFlags & UNIXFILE_NOLOCK ){ 5676 pLockingStyle = &nolockIoMethods; 5677 }else{ 5678 pLockingStyle = (**(finder_type*)pVfs->pAppData)(zFilename, pNew); 5679 #if SQLITE_ENABLE_LOCKING_STYLE 5680 /* Cache zFilename in the locking context (AFP and dotlock override) for 5681 ** proxyLock activation is possible (remote proxy is based on db name) 5682 ** zFilename remains valid until file is closed, to support */ 5683 pNew->lockingContext = (void*)zFilename; 5684 #endif 5685 } 5686 5687 if( pLockingStyle == &posixIoMethods 5688 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 5689 || pLockingStyle == &nfsIoMethods 5690 #endif 5691 ){ 5692 unixEnterMutex(); 5693 rc = findInodeInfo(pNew, &pNew->pInode); 5694 if( rc!=SQLITE_OK ){ 5695 /* If an error occurred in findInodeInfo(), close the file descriptor 5696 ** immediately, before releasing the mutex. findInodeInfo() may fail 5697 ** in two scenarios: 5698 ** 5699 ** (a) A call to fstat() failed. 5700 ** (b) A malloc failed. 5701 ** 5702 ** Scenario (b) may only occur if the process is holding no other 5703 ** file descriptors open on the same file. If there were other file 5704 ** descriptors on this file, then no malloc would be required by 5705 ** findInodeInfo(). If this is the case, it is quite safe to close 5706 ** handle h - as it is guaranteed that no posix locks will be released 5707 ** by doing so. 5708 ** 5709 ** If scenario (a) caused the error then things are not so safe. The 5710 ** implicit assumption here is that if fstat() fails, things are in 5711 ** such bad shape that dropping a lock or two doesn't matter much. 5712 */ 5713 robust_close(pNew, h, __LINE__); 5714 h = -1; 5715 } 5716 unixLeaveMutex(); 5717 } 5718 5719 #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) 5720 else if( pLockingStyle == &afpIoMethods ){ 5721 /* AFP locking uses the file path so it needs to be included in 5722 ** the afpLockingContext. 5723 */ 5724 afpLockingContext *pCtx; 5725 pNew->lockingContext = pCtx = sqlite3_malloc64( sizeof(*pCtx) ); 5726 if( pCtx==0 ){ 5727 rc = SQLITE_NOMEM_BKPT; 5728 }else{ 5729 /* NB: zFilename exists and remains valid until the file is closed 5730 ** according to requirement F11141. So we do not need to make a 5731 ** copy of the filename. */ 5732 pCtx->dbPath = zFilename; 5733 pCtx->reserved = 0; 5734 srandomdev(); 5735 unixEnterMutex(); 5736 rc = findInodeInfo(pNew, &pNew->pInode); 5737 if( rc!=SQLITE_OK ){ 5738 sqlite3_free(pNew->lockingContext); 5739 robust_close(pNew, h, __LINE__); 5740 h = -1; 5741 } 5742 unixLeaveMutex(); 5743 } 5744 } 5745 #endif 5746 5747 else if( pLockingStyle == &dotlockIoMethods ){ 5748 /* Dotfile locking uses the file path so it needs to be included in 5749 ** the dotlockLockingContext 5750 */ 5751 char *zLockFile; 5752 int nFilename; 5753 assert( zFilename!=0 ); 5754 nFilename = (int)strlen(zFilename) + 6; 5755 zLockFile = (char *)sqlite3_malloc64(nFilename); 5756 if( zLockFile==0 ){ 5757 rc = SQLITE_NOMEM_BKPT; 5758 }else{ 5759 sqlite3_snprintf(nFilename, zLockFile, "%s" DOTLOCK_SUFFIX, zFilename); 5760 } 5761 pNew->lockingContext = zLockFile; 5762 } 5763 5764 #if OS_VXWORKS 5765 else if( pLockingStyle == &semIoMethods ){ 5766 /* Named semaphore locking uses the file path so it needs to be 5767 ** included in the semLockingContext 5768 */ 5769 unixEnterMutex(); 5770 rc = findInodeInfo(pNew, &pNew->pInode); 5771 if( (rc==SQLITE_OK) && (pNew->pInode->pSem==NULL) ){ 5772 char *zSemName = pNew->pInode->aSemName; 5773 int n; 5774 sqlite3_snprintf(MAX_PATHNAME, zSemName, "/%s.sem", 5775 pNew->pId->zCanonicalName); 5776 for( n=1; zSemName[n]; n++ ) 5777 if( zSemName[n]=='/' ) zSemName[n] = '_'; 5778 pNew->pInode->pSem = sem_open(zSemName, O_CREAT, 0666, 1); 5779 if( pNew->pInode->pSem == SEM_FAILED ){ 5780 rc = SQLITE_NOMEM_BKPT; 5781 pNew->pInode->aSemName[0] = '\0'; 5782 } 5783 } 5784 unixLeaveMutex(); 5785 } 5786 #endif 5787 5788 storeLastErrno(pNew, 0); 5789 #if OS_VXWORKS 5790 if( rc!=SQLITE_OK ){ 5791 if( h>=0 ) robust_close(pNew, h, __LINE__); 5792 h = -1; 5793 osUnlink(zFilename); 5794 pNew->ctrlFlags |= UNIXFILE_DELETE; 5795 } 5796 #endif 5797 if( rc!=SQLITE_OK ){ 5798 if( h>=0 ) robust_close(pNew, h, __LINE__); 5799 }else{ 5800 pId->pMethods = pLockingStyle; 5801 OpenCounter(+1); 5802 verifyDbFile(pNew); 5803 } 5804 return rc; 5805 } 5806 5807 /* 5808 ** Directories to consider for temp files. 5809 */ 5810 static const char *azTempDirs[] = { 5811 0, 5812 0, 5813 "/var/tmp", 5814 "/usr/tmp", 5815 "/tmp", 5816 "." 5817 }; 5818 5819 /* 5820 ** Initialize first two members of azTempDirs[] array. 5821 */ 5822 static void unixTempFileInit(void){ 5823 azTempDirs[0] = getenv("SQLITE_TMPDIR"); 5824 azTempDirs[1] = getenv("TMPDIR"); 5825 } 5826 5827 /* 5828 ** Return the name of a directory in which to put temporary files. 5829 ** If no suitable temporary file directory can be found, return NULL. 5830 */ 5831 static const char *unixTempFileDir(void){ 5832 unsigned int i = 0; 5833 struct stat buf; 5834 const char *zDir = sqlite3_temp_directory; 5835 5836 while(1){ 5837 if( zDir!=0 5838 && osStat(zDir, &buf)==0 5839 && S_ISDIR(buf.st_mode) 5840 && osAccess(zDir, 03)==0 5841 ){ 5842 return zDir; 5843 } 5844 if( i>=sizeof(azTempDirs)/sizeof(azTempDirs[0]) ) break; 5845 zDir = azTempDirs[i++]; 5846 } 5847 return 0; 5848 } 5849 5850 /* 5851 ** Create a temporary file name in zBuf. zBuf must be allocated 5852 ** by the calling process and must be big enough to hold at least 5853 ** pVfs->mxPathname bytes. 5854 */ 5855 static int unixGetTempname(int nBuf, char *zBuf){ 5856 const char *zDir; 5857 int iLimit = 0; 5858 int rc = SQLITE_OK; 5859 5860 /* It's odd to simulate an io-error here, but really this is just 5861 ** using the io-error infrastructure to test that SQLite handles this 5862 ** function failing. 5863 */ 5864 zBuf[0] = 0; 5865 SimulateIOError( return SQLITE_IOERR ); 5866 5867 sqlite3_mutex_enter(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_TEMPDIR)); 5868 zDir = unixTempFileDir(); 5869 if( zDir==0 ){ 5870 rc = SQLITE_IOERR_GETTEMPPATH; 5871 }else{ 5872 do{ 5873 u64 r; 5874 sqlite3_randomness(sizeof(r), &r); 5875 assert( nBuf>2 ); 5876 zBuf[nBuf-2] = 0; 5877 sqlite3_snprintf(nBuf, zBuf, "%s/"SQLITE_TEMP_FILE_PREFIX"%llx%c", 5878 zDir, r, 0); 5879 if( zBuf[nBuf-2]!=0 || (iLimit++)>10 ){ 5880 rc = SQLITE_ERROR; 5881 break; 5882 } 5883 }while( osAccess(zBuf,0)==0 ); 5884 } 5885 sqlite3_mutex_leave(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_TEMPDIR)); 5886 return rc; 5887 } 5888 5889 #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) 5890 /* 5891 ** Routine to transform a unixFile into a proxy-locking unixFile. 5892 ** Implementation in the proxy-lock division, but used by unixOpen() 5893 ** if SQLITE_PREFER_PROXY_LOCKING is defined. 5894 */ 5895 static int proxyTransformUnixFile(unixFile*, const char*); 5896 #endif 5897 5898 /* 5899 ** Search for an unused file descriptor that was opened on the database 5900 ** file (not a journal or super-journal file) identified by pathname 5901 ** zPath with SQLITE_OPEN_XXX flags matching those passed as the second 5902 ** argument to this function. 5903 ** 5904 ** Such a file descriptor may exist if a database connection was closed 5905 ** but the associated file descriptor could not be closed because some 5906 ** other file descriptor open on the same file is holding a file-lock. 5907 ** Refer to comments in the unixClose() function and the lengthy comment 5908 ** describing "Posix Advisory Locking" at the start of this file for 5909 ** further details. Also, ticket #4018. 5910 ** 5911 ** If a suitable file descriptor is found, then it is returned. If no 5912 ** such file descriptor is located, -1 is returned. 5913 */ 5914 static UnixUnusedFd *findReusableFd(const char *zPath, int flags){ 5915 UnixUnusedFd *pUnused = 0; 5916 5917 /* Do not search for an unused file descriptor on vxworks. Not because 5918 ** vxworks would not benefit from the change (it might, we're not sure), 5919 ** but because no way to test it is currently available. It is better 5920 ** not to risk breaking vxworks support for the sake of such an obscure 5921 ** feature. */ 5922 #if !OS_VXWORKS 5923 struct stat sStat; /* Results of stat() call */ 5924 5925 unixEnterMutex(); 5926 5927 /* A stat() call may fail for various reasons. If this happens, it is 5928 ** almost certain that an open() call on the same path will also fail. 5929 ** For this reason, if an error occurs in the stat() call here, it is 5930 ** ignored and -1 is returned. The caller will try to open a new file 5931 ** descriptor on the same path, fail, and return an error to SQLite. 5932 ** 5933 ** Even if a subsequent open() call does succeed, the consequences of 5934 ** not searching for a reusable file descriptor are not dire. */ 5935 if( inodeList!=0 && 0==osStat(zPath, &sStat) ){ 5936 unixInodeInfo *pInode; 5937 5938 pInode = inodeList; 5939 while( pInode && (pInode->fileId.dev!=sStat.st_dev 5940 || pInode->fileId.ino!=(u64)sStat.st_ino) ){ 5941 pInode = pInode->pNext; 5942 } 5943 if( pInode ){ 5944 UnixUnusedFd **pp; 5945 assert( sqlite3_mutex_notheld(pInode->pLockMutex) ); 5946 sqlite3_mutex_enter(pInode->pLockMutex); 5947 flags &= (SQLITE_OPEN_READONLY|SQLITE_OPEN_READWRITE); 5948 for(pp=&pInode->pUnused; *pp && (*pp)->flags!=flags; pp=&((*pp)->pNext)); 5949 pUnused = *pp; 5950 if( pUnused ){ 5951 *pp = pUnused->pNext; 5952 } 5953 sqlite3_mutex_leave(pInode->pLockMutex); 5954 } 5955 } 5956 unixLeaveMutex(); 5957 #endif /* if !OS_VXWORKS */ 5958 return pUnused; 5959 } 5960 5961 /* 5962 ** Find the mode, uid and gid of file zFile. 5963 */ 5964 static int getFileMode( 5965 const char *zFile, /* File name */ 5966 mode_t *pMode, /* OUT: Permissions of zFile */ 5967 uid_t *pUid, /* OUT: uid of zFile. */ 5968 gid_t *pGid /* OUT: gid of zFile. */ 5969 ){ 5970 struct stat sStat; /* Output of stat() on database file */ 5971 int rc = SQLITE_OK; 5972 if( 0==osStat(zFile, &sStat) ){ 5973 *pMode = sStat.st_mode & 0777; 5974 *pUid = sStat.st_uid; 5975 *pGid = sStat.st_gid; 5976 }else{ 5977 rc = SQLITE_IOERR_FSTAT; 5978 } 5979 return rc; 5980 } 5981 5982 /* 5983 ** This function is called by unixOpen() to determine the unix permissions 5984 ** to create new files with. If no error occurs, then SQLITE_OK is returned 5985 ** and a value suitable for passing as the third argument to open(2) is 5986 ** written to *pMode. If an IO error occurs, an SQLite error code is 5987 ** returned and the value of *pMode is not modified. 5988 ** 5989 ** In most cases, this routine sets *pMode to 0, which will become 5990 ** an indication to robust_open() to create the file using 5991 ** SQLITE_DEFAULT_FILE_PERMISSIONS adjusted by the umask. 5992 ** But if the file being opened is a WAL or regular journal file, then 5993 ** this function queries the file-system for the permissions on the 5994 ** corresponding database file and sets *pMode to this value. Whenever 5995 ** possible, WAL and journal files are created using the same permissions 5996 ** as the associated database file. 5997 ** 5998 ** If the SQLITE_ENABLE_8_3_NAMES option is enabled, then the 5999 ** original filename is unavailable. But 8_3_NAMES is only used for 6000 ** FAT filesystems and permissions do not matter there, so just use 6001 ** the default permissions. In 8_3_NAMES mode, leave *pMode set to zero. 6002 */ 6003 static int findCreateFileMode( 6004 const char *zPath, /* Path of file (possibly) being created */ 6005 int flags, /* Flags passed as 4th argument to xOpen() */ 6006 mode_t *pMode, /* OUT: Permissions to open file with */ 6007 uid_t *pUid, /* OUT: uid to set on the file */ 6008 gid_t *pGid /* OUT: gid to set on the file */ 6009 ){ 6010 int rc = SQLITE_OK; /* Return Code */ 6011 *pMode = 0; 6012 *pUid = 0; 6013 *pGid = 0; 6014 if( flags & (SQLITE_OPEN_WAL|SQLITE_OPEN_MAIN_JOURNAL) ){ 6015 char zDb[MAX_PATHNAME+1]; /* Database file path */ 6016 int nDb; /* Number of valid bytes in zDb */ 6017 6018 /* zPath is a path to a WAL or journal file. The following block derives 6019 ** the path to the associated database file from zPath. This block handles 6020 ** the following naming conventions: 6021 ** 6022 ** "<path to db>-journal" 6023 ** "<path to db>-wal" 6024 ** "<path to db>-journalNN" 6025 ** "<path to db>-walNN" 6026 ** 6027 ** where NN is a decimal number. The NN naming schemes are 6028 ** used by the test_multiplex.c module. 6029 ** 6030 ** In normal operation, the journal file name will always contain 6031 ** a '-' character. However in 8+3 filename mode, or if a corrupt 6032 ** rollback journal specifies a super-journal with a goofy name, then 6033 ** the '-' might be missing or the '-' might be the first character in 6034 ** the filename. In that case, just return SQLITE_OK with *pMode==0. 6035 */ 6036 nDb = sqlite3Strlen30(zPath) - 1; 6037 while( nDb>0 && zPath[nDb]!='.' ){ 6038 if( zPath[nDb]=='-' ){ 6039 memcpy(zDb, zPath, nDb); 6040 zDb[nDb] = '\0'; 6041 rc = getFileMode(zDb, pMode, pUid, pGid); 6042 break; 6043 } 6044 nDb--; 6045 } 6046 }else if( flags & SQLITE_OPEN_DELETEONCLOSE ){ 6047 *pMode = 0600; 6048 }else if( flags & SQLITE_OPEN_URI ){ 6049 /* If this is a main database file and the file was opened using a URI 6050 ** filename, check for the "modeof" parameter. If present, interpret 6051 ** its value as a filename and try to copy the mode, uid and gid from 6052 ** that file. */ 6053 const char *z = sqlite3_uri_parameter(zPath, "modeof"); 6054 if( z ){ 6055 rc = getFileMode(z, pMode, pUid, pGid); 6056 } 6057 } 6058 return rc; 6059 } 6060 6061 /* 6062 ** Open the file zPath. 6063 ** 6064 ** Previously, the SQLite OS layer used three functions in place of this 6065 ** one: 6066 ** 6067 ** sqlite3OsOpenReadWrite(); 6068 ** sqlite3OsOpenReadOnly(); 6069 ** sqlite3OsOpenExclusive(); 6070 ** 6071 ** These calls correspond to the following combinations of flags: 6072 ** 6073 ** ReadWrite() -> (READWRITE | CREATE) 6074 ** ReadOnly() -> (READONLY) 6075 ** OpenExclusive() -> (READWRITE | CREATE | EXCLUSIVE) 6076 ** 6077 ** The old OpenExclusive() accepted a boolean argument - "delFlag". If 6078 ** true, the file was configured to be automatically deleted when the 6079 ** file handle closed. To achieve the same effect using this new 6080 ** interface, add the DELETEONCLOSE flag to those specified above for 6081 ** OpenExclusive(). 6082 */ 6083 static int unixOpen( 6084 sqlite3_vfs *pVfs, /* The VFS for which this is the xOpen method */ 6085 const char *zPath, /* Pathname of file to be opened */ 6086 sqlite3_file *pFile, /* The file descriptor to be filled in */ 6087 int flags, /* Input flags to control the opening */ 6088 int *pOutFlags /* Output flags returned to SQLite core */ 6089 ){ 6090 unixFile *p = (unixFile *)pFile; 6091 int fd = -1; /* File descriptor returned by open() */ 6092 int openFlags = 0; /* Flags to pass to open() */ 6093 int eType = flags&0x0FFF00; /* Type of file to open */ 6094 int noLock; /* True to omit locking primitives */ 6095 int rc = SQLITE_OK; /* Function Return Code */ 6096 int ctrlFlags = 0; /* UNIXFILE_* flags */ 6097 6098 int isExclusive = (flags & SQLITE_OPEN_EXCLUSIVE); 6099 int isDelete = (flags & SQLITE_OPEN_DELETEONCLOSE); 6100 int isCreate = (flags & SQLITE_OPEN_CREATE); 6101 int isReadonly = (flags & SQLITE_OPEN_READONLY); 6102 int isReadWrite = (flags & SQLITE_OPEN_READWRITE); 6103 #if SQLITE_ENABLE_LOCKING_STYLE 6104 int isAutoProxy = (flags & SQLITE_OPEN_AUTOPROXY); 6105 #endif 6106 #if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE 6107 struct statfs fsInfo; 6108 #endif 6109 6110 /* If creating a super- or main-file journal, this function will open 6111 ** a file-descriptor on the directory too. The first time unixSync() 6112 ** is called the directory file descriptor will be fsync()ed and close()d. 6113 */ 6114 int isNewJrnl = (isCreate && ( 6115 eType==SQLITE_OPEN_SUPER_JOURNAL 6116 || eType==SQLITE_OPEN_MAIN_JOURNAL 6117 || eType==SQLITE_OPEN_WAL 6118 )); 6119 6120 /* If argument zPath is a NULL pointer, this function is required to open 6121 ** a temporary file. Use this buffer to store the file name in. 6122 */ 6123 char zTmpname[MAX_PATHNAME+2]; 6124 const char *zName = zPath; 6125 6126 /* Check the following statements are true: 6127 ** 6128 ** (a) Exactly one of the READWRITE and READONLY flags must be set, and 6129 ** (b) if CREATE is set, then READWRITE must also be set, and 6130 ** (c) if EXCLUSIVE is set, then CREATE must also be set. 6131 ** (d) if DELETEONCLOSE is set, then CREATE must also be set. 6132 */ 6133 assert((isReadonly==0 || isReadWrite==0) && (isReadWrite || isReadonly)); 6134 assert(isCreate==0 || isReadWrite); 6135 assert(isExclusive==0 || isCreate); 6136 assert(isDelete==0 || isCreate); 6137 6138 /* The main DB, main journal, WAL file and super-journal are never 6139 ** automatically deleted. Nor are they ever temporary files. */ 6140 assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MAIN_DB ); 6141 assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MAIN_JOURNAL ); 6142 assert( (!isDelete && zName) || eType!=SQLITE_OPEN_SUPER_JOURNAL ); 6143 assert( (!isDelete && zName) || eType!=SQLITE_OPEN_WAL ); 6144 6145 /* Assert that the upper layer has set one of the "file-type" flags. */ 6146 assert( eType==SQLITE_OPEN_MAIN_DB || eType==SQLITE_OPEN_TEMP_DB 6147 || eType==SQLITE_OPEN_MAIN_JOURNAL || eType==SQLITE_OPEN_TEMP_JOURNAL 6148 || eType==SQLITE_OPEN_SUBJOURNAL || eType==SQLITE_OPEN_SUPER_JOURNAL 6149 || eType==SQLITE_OPEN_TRANSIENT_DB || eType==SQLITE_OPEN_WAL 6150 ); 6151 6152 /* Detect a pid change and reset the PRNG. There is a race condition 6153 ** here such that two or more threads all trying to open databases at 6154 ** the same instant might all reset the PRNG. But multiple resets 6155 ** are harmless. 6156 */ 6157 if( randomnessPid!=osGetpid(0) ){ 6158 randomnessPid = osGetpid(0); 6159 sqlite3_randomness(0,0); 6160 } 6161 memset(p, 0, sizeof(unixFile)); 6162 6163 #ifdef SQLITE_ASSERT_NO_FILES 6164 /* Applications that never read or write a persistent disk files */ 6165 assert( zName==0 ); 6166 #endif 6167 6168 if( eType==SQLITE_OPEN_MAIN_DB ){ 6169 UnixUnusedFd *pUnused; 6170 pUnused = findReusableFd(zName, flags); 6171 if( pUnused ){ 6172 fd = pUnused->fd; 6173 }else{ 6174 pUnused = sqlite3_malloc64(sizeof(*pUnused)); 6175 if( !pUnused ){ 6176 return SQLITE_NOMEM_BKPT; 6177 } 6178 } 6179 p->pPreallocatedUnused = pUnused; 6180 6181 /* Database filenames are double-zero terminated if they are not 6182 ** URIs with parameters. Hence, they can always be passed into 6183 ** sqlite3_uri_parameter(). */ 6184 assert( (flags & SQLITE_OPEN_URI) || zName[strlen(zName)+1]==0 ); 6185 6186 }else if( !zName ){ 6187 /* If zName is NULL, the upper layer is requesting a temp file. */ 6188 assert(isDelete && !isNewJrnl); 6189 rc = unixGetTempname(pVfs->mxPathname, zTmpname); 6190 if( rc!=SQLITE_OK ){ 6191 return rc; 6192 } 6193 zName = zTmpname; 6194 6195 /* Generated temporary filenames are always double-zero terminated 6196 ** for use by sqlite3_uri_parameter(). */ 6197 assert( zName[strlen(zName)+1]==0 ); 6198 } 6199 6200 /* Determine the value of the flags parameter passed to POSIX function 6201 ** open(). These must be calculated even if open() is not called, as 6202 ** they may be stored as part of the file handle and used by the 6203 ** 'conch file' locking functions later on. */ 6204 if( isReadonly ) openFlags |= O_RDONLY; 6205 if( isReadWrite ) openFlags |= O_RDWR; 6206 if( isCreate ) openFlags |= O_CREAT; 6207 if( isExclusive ) openFlags |= (O_EXCL|O_NOFOLLOW); 6208 openFlags |= (O_LARGEFILE|O_BINARY|O_NOFOLLOW); 6209 6210 if( fd<0 ){ 6211 mode_t openMode; /* Permissions to create file with */ 6212 uid_t uid; /* Userid for the file */ 6213 gid_t gid; /* Groupid for the file */ 6214 rc = findCreateFileMode(zName, flags, &openMode, &uid, &gid); 6215 if( rc!=SQLITE_OK ){ 6216 assert( !p->pPreallocatedUnused ); 6217 assert( eType==SQLITE_OPEN_WAL || eType==SQLITE_OPEN_MAIN_JOURNAL ); 6218 return rc; 6219 } 6220 fd = robust_open(zName, openFlags, openMode); 6221 OSTRACE(("OPENX %-3d %s 0%o\n", fd, zName, openFlags)); 6222 assert( !isExclusive || (openFlags & O_CREAT)!=0 ); 6223 if( fd<0 ){ 6224 if( isNewJrnl && errno==EACCES && osAccess(zName, F_OK) ){ 6225 /* If unable to create a journal because the directory is not 6226 ** writable, change the error code to indicate that. */ 6227 rc = SQLITE_READONLY_DIRECTORY; 6228 }else if( errno!=EISDIR && isReadWrite ){ 6229 /* Failed to open the file for read/write access. Try read-only. */ 6230 flags &= ~(SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE); 6231 openFlags &= ~(O_RDWR|O_CREAT); 6232 flags |= SQLITE_OPEN_READONLY; 6233 openFlags |= O_RDONLY; 6234 isReadonly = 1; 6235 fd = robust_open(zName, openFlags, openMode); 6236 } 6237 } 6238 if( fd<0 ){ 6239 int rc2 = unixLogError(SQLITE_CANTOPEN_BKPT, "open", zName); 6240 if( rc==SQLITE_OK ) rc = rc2; 6241 goto open_finished; 6242 } 6243 6244 /* The owner of the rollback journal or WAL file should always be the 6245 ** same as the owner of the database file. Try to ensure that this is 6246 ** the case. The chown() system call will be a no-op if the current 6247 ** process lacks root privileges, be we should at least try. Without 6248 ** this step, if a root process opens a database file, it can leave 6249 ** behinds a journal/WAL that is owned by root and hence make the 6250 ** database inaccessible to unprivileged processes. 6251 ** 6252 ** If openMode==0, then that means uid and gid are not set correctly 6253 ** (probably because SQLite is configured to use 8+3 filename mode) and 6254 ** in that case we do not want to attempt the chown(). 6255 */ 6256 if( openMode && (flags & (SQLITE_OPEN_WAL|SQLITE_OPEN_MAIN_JOURNAL))!=0 ){ 6257 robustFchown(fd, uid, gid); 6258 } 6259 } 6260 assert( fd>=0 ); 6261 if( pOutFlags ){ 6262 *pOutFlags = flags; 6263 } 6264 6265 if( p->pPreallocatedUnused ){ 6266 p->pPreallocatedUnused->fd = fd; 6267 p->pPreallocatedUnused->flags = 6268 flags & (SQLITE_OPEN_READONLY|SQLITE_OPEN_READWRITE); 6269 } 6270 6271 if( isDelete ){ 6272 #if OS_VXWORKS 6273 zPath = zName; 6274 #elif defined(SQLITE_UNLINK_AFTER_CLOSE) 6275 zPath = sqlite3_mprintf("%s", zName); 6276 if( zPath==0 ){ 6277 robust_close(p, fd, __LINE__); 6278 return SQLITE_NOMEM_BKPT; 6279 } 6280 #else 6281 osUnlink(zName); 6282 #endif 6283 } 6284 #if SQLITE_ENABLE_LOCKING_STYLE 6285 else{ 6286 p->openFlags = openFlags; 6287 } 6288 #endif 6289 6290 #if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE 6291 if( fstatfs(fd, &fsInfo) == -1 ){ 6292 storeLastErrno(p, errno); 6293 robust_close(p, fd, __LINE__); 6294 return SQLITE_IOERR_ACCESS; 6295 } 6296 if (0 == strncmp("msdos", fsInfo.f_fstypename, 5)) { 6297 ((unixFile*)pFile)->fsFlags |= SQLITE_FSFLAGS_IS_MSDOS; 6298 } 6299 if (0 == strncmp("exfat", fsInfo.f_fstypename, 5)) { 6300 ((unixFile*)pFile)->fsFlags |= SQLITE_FSFLAGS_IS_MSDOS; 6301 } 6302 #endif 6303 6304 /* Set up appropriate ctrlFlags */ 6305 if( isDelete ) ctrlFlags |= UNIXFILE_DELETE; 6306 if( isReadonly ) ctrlFlags |= UNIXFILE_RDONLY; 6307 noLock = eType!=SQLITE_OPEN_MAIN_DB; 6308 if( noLock ) ctrlFlags |= UNIXFILE_NOLOCK; 6309 if( isNewJrnl ) ctrlFlags |= UNIXFILE_DIRSYNC; 6310 if( flags & SQLITE_OPEN_URI ) ctrlFlags |= UNIXFILE_URI; 6311 6312 #if SQLITE_ENABLE_LOCKING_STYLE 6313 #if SQLITE_PREFER_PROXY_LOCKING 6314 isAutoProxy = 1; 6315 #endif 6316 if( isAutoProxy && (zPath!=NULL) && (!noLock) && pVfs->xOpen ){ 6317 char *envforce = getenv("SQLITE_FORCE_PROXY_LOCKING"); 6318 int useProxy = 0; 6319 6320 /* SQLITE_FORCE_PROXY_LOCKING==1 means force always use proxy, 0 means 6321 ** never use proxy, NULL means use proxy for non-local files only. */ 6322 if( envforce!=NULL ){ 6323 useProxy = atoi(envforce)>0; 6324 }else{ 6325 useProxy = !(fsInfo.f_flags&MNT_LOCAL); 6326 } 6327 if( useProxy ){ 6328 rc = fillInUnixFile(pVfs, fd, pFile, zPath, ctrlFlags); 6329 if( rc==SQLITE_OK ){ 6330 rc = proxyTransformUnixFile((unixFile*)pFile, ":auto:"); 6331 if( rc!=SQLITE_OK ){ 6332 /* Use unixClose to clean up the resources added in fillInUnixFile 6333 ** and clear all the structure's references. Specifically, 6334 ** pFile->pMethods will be NULL so sqlite3OsClose will be a no-op 6335 */ 6336 unixClose(pFile); 6337 return rc; 6338 } 6339 } 6340 goto open_finished; 6341 } 6342 } 6343 #endif 6344 6345 assert( zPath==0 || zPath[0]=='/' 6346 || eType==SQLITE_OPEN_SUPER_JOURNAL || eType==SQLITE_OPEN_MAIN_JOURNAL 6347 ); 6348 rc = fillInUnixFile(pVfs, fd, pFile, zPath, ctrlFlags); 6349 6350 open_finished: 6351 if( rc!=SQLITE_OK ){ 6352 sqlite3_free(p->pPreallocatedUnused); 6353 } 6354 return rc; 6355 } 6356 6357 6358 /* 6359 ** Delete the file at zPath. If the dirSync argument is true, fsync() 6360 ** the directory after deleting the file. 6361 */ 6362 static int unixDelete( 6363 sqlite3_vfs *NotUsed, /* VFS containing this as the xDelete method */ 6364 const char *zPath, /* Name of file to be deleted */ 6365 int dirSync /* If true, fsync() directory after deleting file */ 6366 ){ 6367 int rc = SQLITE_OK; 6368 UNUSED_PARAMETER(NotUsed); 6369 SimulateIOError(return SQLITE_IOERR_DELETE); 6370 if( osUnlink(zPath)==(-1) ){ 6371 if( errno==ENOENT 6372 #if OS_VXWORKS 6373 || osAccess(zPath,0)!=0 6374 #endif 6375 ){ 6376 rc = SQLITE_IOERR_DELETE_NOENT; 6377 }else{ 6378 rc = unixLogError(SQLITE_IOERR_DELETE, "unlink", zPath); 6379 } 6380 return rc; 6381 } 6382 #ifndef SQLITE_DISABLE_DIRSYNC 6383 if( (dirSync & 1)!=0 ){ 6384 int fd; 6385 rc = osOpenDirectory(zPath, &fd); 6386 if( rc==SQLITE_OK ){ 6387 if( full_fsync(fd,0,0) ){ 6388 rc = unixLogError(SQLITE_IOERR_DIR_FSYNC, "fsync", zPath); 6389 } 6390 robust_close(0, fd, __LINE__); 6391 }else{ 6392 assert( rc==SQLITE_CANTOPEN ); 6393 rc = SQLITE_OK; 6394 } 6395 } 6396 #endif 6397 return rc; 6398 } 6399 6400 /* 6401 ** Test the existence of or access permissions of file zPath. The 6402 ** test performed depends on the value of flags: 6403 ** 6404 ** SQLITE_ACCESS_EXISTS: Return 1 if the file exists 6405 ** SQLITE_ACCESS_READWRITE: Return 1 if the file is read and writable. 6406 ** SQLITE_ACCESS_READONLY: Return 1 if the file is readable. 6407 ** 6408 ** Otherwise return 0. 6409 */ 6410 static int unixAccess( 6411 sqlite3_vfs *NotUsed, /* The VFS containing this xAccess method */ 6412 const char *zPath, /* Path of the file to examine */ 6413 int flags, /* What do we want to learn about the zPath file? */ 6414 int *pResOut /* Write result boolean here */ 6415 ){ 6416 UNUSED_PARAMETER(NotUsed); 6417 SimulateIOError( return SQLITE_IOERR_ACCESS; ); 6418 assert( pResOut!=0 ); 6419 6420 /* The spec says there are three possible values for flags. But only 6421 ** two of them are actually used */ 6422 assert( flags==SQLITE_ACCESS_EXISTS || flags==SQLITE_ACCESS_READWRITE ); 6423 6424 if( flags==SQLITE_ACCESS_EXISTS ){ 6425 struct stat buf; 6426 *pResOut = 0==osStat(zPath, &buf) && 6427 (!S_ISREG(buf.st_mode) || buf.st_size>0); 6428 }else{ 6429 *pResOut = osAccess(zPath, W_OK|R_OK)==0; 6430 } 6431 return SQLITE_OK; 6432 } 6433 6434 /* 6435 ** A pathname under construction 6436 */ 6437 typedef struct DbPath DbPath; 6438 struct DbPath { 6439 int rc; /* Non-zero following any error */ 6440 int nSymlink; /* Number of symlinks resolved */ 6441 char *zOut; /* Write the pathname here */ 6442 int nOut; /* Bytes of space available to zOut[] */ 6443 int nUsed; /* Bytes of zOut[] currently being used */ 6444 }; 6445 6446 /* Forward reference */ 6447 static void appendAllPathElements(DbPath*,const char*); 6448 6449 /* 6450 ** Append a single path element to the DbPath under construction 6451 */ 6452 static void appendOnePathElement( 6453 DbPath *pPath, /* Path under construction, to which to append zName */ 6454 const char *zName, /* Name to append to pPath. Not zero-terminated */ 6455 int nName /* Number of significant bytes in zName */ 6456 ){ 6457 assert( nName>0 ); 6458 assert( zName!=0 ); 6459 if( zName[0]=='.' ){ 6460 if( nName==1 ) return; 6461 if( zName[1]=='.' && nName==2 ){ 6462 if( pPath->nUsed<=1 ){ 6463 pPath->rc = SQLITE_ERROR; 6464 return; 6465 } 6466 assert( pPath->zOut[0]=='/' ); 6467 while( pPath->zOut[--pPath->nUsed]!='/' ){} 6468 return; 6469 } 6470 } 6471 if( pPath->nUsed + nName + 2 >= pPath->nOut ){ 6472 pPath->rc = SQLITE_ERROR; 6473 return; 6474 } 6475 pPath->zOut[pPath->nUsed++] = '/'; 6476 memcpy(&pPath->zOut[pPath->nUsed], zName, nName); 6477 pPath->nUsed += nName; 6478 #if defined(HAVE_READLINK) && defined(HAVE_LSTAT) 6479 if( pPath->rc==SQLITE_OK ){ 6480 const char *zIn; 6481 struct stat buf; 6482 pPath->zOut[pPath->nUsed] = 0; 6483 zIn = pPath->zOut; 6484 if( osLstat(zIn, &buf)!=0 ){ 6485 if( errno!=ENOENT ){ 6486 pPath->rc = unixLogError(SQLITE_CANTOPEN_BKPT, "lstat", zIn); 6487 } 6488 }else if( S_ISLNK(buf.st_mode) ){ 6489 ssize_t got; 6490 char zLnk[SQLITE_MAX_PATHLEN+2]; 6491 if( pPath->nSymlink++ > SQLITE_MAX_SYMLINK ){ 6492 pPath->rc = SQLITE_CANTOPEN_BKPT; 6493 return; 6494 } 6495 got = osReadlink(zIn, zLnk, sizeof(zLnk)-2); 6496 if( got<=0 || got>=(ssize_t)sizeof(zLnk)-2 ){ 6497 pPath->rc = unixLogError(SQLITE_CANTOPEN_BKPT, "readlink", zIn); 6498 return; 6499 } 6500 zLnk[got] = 0; 6501 if( zLnk[0]=='/' ){ 6502 pPath->nUsed = 0; 6503 }else{ 6504 pPath->nUsed -= nName + 1; 6505 } 6506 appendAllPathElements(pPath, zLnk); 6507 } 6508 } 6509 #endif 6510 } 6511 6512 /* 6513 ** Append all path elements in zPath to the DbPath under construction. 6514 */ 6515 static void appendAllPathElements( 6516 DbPath *pPath, /* Path under construction, to which to append zName */ 6517 const char *zPath /* Path to append to pPath. Is zero-terminated */ 6518 ){ 6519 int i = 0; 6520 int j = 0; 6521 do{ 6522 while( zPath[i] && zPath[i]!='/' ){ i++; } 6523 if( i>j ){ 6524 appendOnePathElement(pPath, &zPath[j], i-j); 6525 } 6526 j = i+1; 6527 }while( zPath[i++] ); 6528 } 6529 6530 /* 6531 ** Turn a relative pathname into a full pathname. The relative path 6532 ** is stored as a nul-terminated string in the buffer pointed to by 6533 ** zPath. 6534 ** 6535 ** zOut points to a buffer of at least sqlite3_vfs.mxPathname bytes 6536 ** (in this case, MAX_PATHNAME bytes). The full-path is written to 6537 ** this buffer before returning. 6538 */ 6539 static int unixFullPathname( 6540 sqlite3_vfs *pVfs, /* Pointer to vfs object */ 6541 const char *zPath, /* Possibly relative input path */ 6542 int nOut, /* Size of output buffer in bytes */ 6543 char *zOut /* Output buffer */ 6544 ){ 6545 DbPath path; 6546 UNUSED_PARAMETER(pVfs); 6547 path.rc = 0; 6548 path.nUsed = 0; 6549 path.nSymlink = 0; 6550 path.nOut = nOut; 6551 path.zOut = zOut; 6552 if( zPath[0]!='/' ){ 6553 char zPwd[SQLITE_MAX_PATHLEN+2]; 6554 if( osGetcwd(zPwd, sizeof(zPwd)-2)==0 ){ 6555 return unixLogError(SQLITE_CANTOPEN_BKPT, "getcwd", zPath); 6556 } 6557 appendAllPathElements(&path, zPwd); 6558 } 6559 appendAllPathElements(&path, zPath); 6560 zOut[path.nUsed] = 0; 6561 if( path.rc || path.nUsed<2 ) return SQLITE_CANTOPEN_BKPT; 6562 if( path.nSymlink ) return SQLITE_OK_SYMLINK; 6563 return SQLITE_OK; 6564 } 6565 6566 #ifndef SQLITE_OMIT_LOAD_EXTENSION 6567 /* 6568 ** Interfaces for opening a shared library, finding entry points 6569 ** within the shared library, and closing the shared library. 6570 */ 6571 #include <dlfcn.h> 6572 static void *unixDlOpen(sqlite3_vfs *NotUsed, const char *zFilename){ 6573 UNUSED_PARAMETER(NotUsed); 6574 return dlopen(zFilename, RTLD_NOW | RTLD_GLOBAL); 6575 } 6576 6577 /* 6578 ** SQLite calls this function immediately after a call to unixDlSym() or 6579 ** unixDlOpen() fails (returns a null pointer). If a more detailed error 6580 ** message is available, it is written to zBufOut. If no error message 6581 ** is available, zBufOut is left unmodified and SQLite uses a default 6582 ** error message. 6583 */ 6584 static void unixDlError(sqlite3_vfs *NotUsed, int nBuf, char *zBufOut){ 6585 const char *zErr; 6586 UNUSED_PARAMETER(NotUsed); 6587 unixEnterMutex(); 6588 zErr = dlerror(); 6589 if( zErr ){ 6590 sqlite3_snprintf(nBuf, zBufOut, "%s", zErr); 6591 } 6592 unixLeaveMutex(); 6593 } 6594 static void (*unixDlSym(sqlite3_vfs *NotUsed, void *p, const char*zSym))(void){ 6595 /* 6596 ** GCC with -pedantic-errors says that C90 does not allow a void* to be 6597 ** cast into a pointer to a function. And yet the library dlsym() routine 6598 ** returns a void* which is really a pointer to a function. So how do we 6599 ** use dlsym() with -pedantic-errors? 6600 ** 6601 ** Variable x below is defined to be a pointer to a function taking 6602 ** parameters void* and const char* and returning a pointer to a function. 6603 ** We initialize x by assigning it a pointer to the dlsym() function. 6604 ** (That assignment requires a cast.) Then we call the function that 6605 ** x points to. 6606 ** 6607 ** This work-around is unlikely to work correctly on any system where 6608 ** you really cannot cast a function pointer into void*. But then, on the 6609 ** other hand, dlsym() will not work on such a system either, so we have 6610 ** not really lost anything. 6611 */ 6612 void (*(*x)(void*,const char*))(void); 6613 UNUSED_PARAMETER(NotUsed); 6614 x = (void(*(*)(void*,const char*))(void))dlsym; 6615 return (*x)(p, zSym); 6616 } 6617 static void unixDlClose(sqlite3_vfs *NotUsed, void *pHandle){ 6618 UNUSED_PARAMETER(NotUsed); 6619 dlclose(pHandle); 6620 } 6621 #else /* if SQLITE_OMIT_LOAD_EXTENSION is defined: */ 6622 #define unixDlOpen 0 6623 #define unixDlError 0 6624 #define unixDlSym 0 6625 #define unixDlClose 0 6626 #endif 6627 6628 /* 6629 ** Write nBuf bytes of random data to the supplied buffer zBuf. 6630 */ 6631 static int unixRandomness(sqlite3_vfs *NotUsed, int nBuf, char *zBuf){ 6632 UNUSED_PARAMETER(NotUsed); 6633 assert((size_t)nBuf>=(sizeof(time_t)+sizeof(int))); 6634 6635 /* We have to initialize zBuf to prevent valgrind from reporting 6636 ** errors. The reports issued by valgrind are incorrect - we would 6637 ** prefer that the randomness be increased by making use of the 6638 ** uninitialized space in zBuf - but valgrind errors tend to worry 6639 ** some users. Rather than argue, it seems easier just to initialize 6640 ** the whole array and silence valgrind, even if that means less randomness 6641 ** in the random seed. 6642 ** 6643 ** When testing, initializing zBuf[] to zero is all we do. That means 6644 ** that we always use the same random number sequence. This makes the 6645 ** tests repeatable. 6646 */ 6647 memset(zBuf, 0, nBuf); 6648 randomnessPid = osGetpid(0); 6649 #if !defined(SQLITE_TEST) && !defined(SQLITE_OMIT_RANDOMNESS) 6650 { 6651 int fd, got; 6652 fd = robust_open("/dev/urandom", O_RDONLY, 0); 6653 if( fd<0 ){ 6654 time_t t; 6655 time(&t); 6656 memcpy(zBuf, &t, sizeof(t)); 6657 memcpy(&zBuf[sizeof(t)], &randomnessPid, sizeof(randomnessPid)); 6658 assert( sizeof(t)+sizeof(randomnessPid)<=(size_t)nBuf ); 6659 nBuf = sizeof(t) + sizeof(randomnessPid); 6660 }else{ 6661 do{ got = osRead(fd, zBuf, nBuf); }while( got<0 && errno==EINTR ); 6662 robust_close(0, fd, __LINE__); 6663 } 6664 } 6665 #endif 6666 return nBuf; 6667 } 6668 6669 6670 /* 6671 ** Sleep for a little while. Return the amount of time slept. 6672 ** The argument is the number of microseconds we want to sleep. 6673 ** The return value is the number of microseconds of sleep actually 6674 ** requested from the underlying operating system, a number which 6675 ** might be greater than or equal to the argument, but not less 6676 ** than the argument. 6677 */ 6678 static int unixSleep(sqlite3_vfs *NotUsed, int microseconds){ 6679 #if OS_VXWORKS 6680 struct timespec sp; 6681 6682 sp.tv_sec = microseconds / 1000000; 6683 sp.tv_nsec = (microseconds % 1000000) * 1000; 6684 nanosleep(&sp, NULL); 6685 UNUSED_PARAMETER(NotUsed); 6686 return microseconds; 6687 #elif defined(HAVE_USLEEP) && HAVE_USLEEP 6688 if( microseconds>=1000000 ) sleep(microseconds/1000000); 6689 if( microseconds%1000000 ) usleep(microseconds%1000000); 6690 UNUSED_PARAMETER(NotUsed); 6691 return microseconds; 6692 #else 6693 int seconds = (microseconds+999999)/1000000; 6694 sleep(seconds); 6695 UNUSED_PARAMETER(NotUsed); 6696 return seconds*1000000; 6697 #endif 6698 } 6699 6700 /* 6701 ** The following variable, if set to a non-zero value, is interpreted as 6702 ** the number of seconds since 1970 and is used to set the result of 6703 ** sqlite3OsCurrentTime() during testing. 6704 */ 6705 #ifdef SQLITE_TEST 6706 int sqlite3_current_time = 0; /* Fake system time in seconds since 1970. */ 6707 #endif 6708 6709 /* 6710 ** Find the current time (in Universal Coordinated Time). Write into *piNow 6711 ** the current time and date as a Julian Day number times 86_400_000. In 6712 ** other words, write into *piNow the number of milliseconds since the Julian 6713 ** epoch of noon in Greenwich on November 24, 4714 B.C according to the 6714 ** proleptic Gregorian calendar. 6715 ** 6716 ** On success, return SQLITE_OK. Return SQLITE_ERROR if the time and date 6717 ** cannot be found. 6718 */ 6719 static int unixCurrentTimeInt64(sqlite3_vfs *NotUsed, sqlite3_int64 *piNow){ 6720 static const sqlite3_int64 unixEpoch = 24405875*(sqlite3_int64)8640000; 6721 int rc = SQLITE_OK; 6722 #if defined(NO_GETTOD) 6723 time_t t; 6724 time(&t); 6725 *piNow = ((sqlite3_int64)t)*1000 + unixEpoch; 6726 #elif OS_VXWORKS 6727 struct timespec sNow; 6728 clock_gettime(CLOCK_REALTIME, &sNow); 6729 *piNow = unixEpoch + 1000*(sqlite3_int64)sNow.tv_sec + sNow.tv_nsec/1000000; 6730 #else 6731 struct timeval sNow; 6732 (void)gettimeofday(&sNow, 0); /* Cannot fail given valid arguments */ 6733 *piNow = unixEpoch + 1000*(sqlite3_int64)sNow.tv_sec + sNow.tv_usec/1000; 6734 #endif 6735 6736 #ifdef SQLITE_TEST 6737 if( sqlite3_current_time ){ 6738 *piNow = 1000*(sqlite3_int64)sqlite3_current_time + unixEpoch; 6739 } 6740 #endif 6741 UNUSED_PARAMETER(NotUsed); 6742 return rc; 6743 } 6744 6745 #ifndef SQLITE_OMIT_DEPRECATED 6746 /* 6747 ** Find the current time (in Universal Coordinated Time). Write the 6748 ** current time and date as a Julian Day number into *prNow and 6749 ** return 0. Return 1 if the time and date cannot be found. 6750 */ 6751 static int unixCurrentTime(sqlite3_vfs *NotUsed, double *prNow){ 6752 sqlite3_int64 i = 0; 6753 int rc; 6754 UNUSED_PARAMETER(NotUsed); 6755 rc = unixCurrentTimeInt64(0, &i); 6756 *prNow = i/86400000.0; 6757 return rc; 6758 } 6759 #else 6760 # define unixCurrentTime 0 6761 #endif 6762 6763 /* 6764 ** The xGetLastError() method is designed to return a better 6765 ** low-level error message when operating-system problems come up 6766 ** during SQLite operation. Only the integer return code is currently 6767 ** used. 6768 */ 6769 static int unixGetLastError(sqlite3_vfs *NotUsed, int NotUsed2, char *NotUsed3){ 6770 UNUSED_PARAMETER(NotUsed); 6771 UNUSED_PARAMETER(NotUsed2); 6772 UNUSED_PARAMETER(NotUsed3); 6773 return errno; 6774 } 6775 6776 6777 /* 6778 ************************ End of sqlite3_vfs methods *************************** 6779 ******************************************************************************/ 6780 6781 /****************************************************************************** 6782 ************************** Begin Proxy Locking ******************************** 6783 ** 6784 ** Proxy locking is a "uber-locking-method" in this sense: It uses the 6785 ** other locking methods on secondary lock files. Proxy locking is a 6786 ** meta-layer over top of the primitive locking implemented above. For 6787 ** this reason, the division that implements of proxy locking is deferred 6788 ** until late in the file (here) after all of the other I/O methods have 6789 ** been defined - so that the primitive locking methods are available 6790 ** as services to help with the implementation of proxy locking. 6791 ** 6792 **** 6793 ** 6794 ** The default locking schemes in SQLite use byte-range locks on the 6795 ** database file to coordinate safe, concurrent access by multiple readers 6796 ** and writers [http://sqlite.org/lockingv3.html]. The five file locking 6797 ** states (UNLOCKED, PENDING, SHARED, RESERVED, EXCLUSIVE) are implemented 6798 ** as POSIX read & write locks over fixed set of locations (via fsctl), 6799 ** on AFP and SMB only exclusive byte-range locks are available via fsctl 6800 ** with _IOWR('z', 23, struct ByteRangeLockPB2) to track the same 5 states. 6801 ** To simulate a F_RDLCK on the shared range, on AFP a randomly selected 6802 ** address in the shared range is taken for a SHARED lock, the entire 6803 ** shared range is taken for an EXCLUSIVE lock): 6804 ** 6805 ** PENDING_BYTE 0x40000000 6806 ** RESERVED_BYTE 0x40000001 6807 ** SHARED_RANGE 0x40000002 -> 0x40000200 6808 ** 6809 ** This works well on the local file system, but shows a nearly 100x 6810 ** slowdown in read performance on AFP because the AFP client disables 6811 ** the read cache when byte-range locks are present. Enabling the read 6812 ** cache exposes a cache coherency problem that is present on all OS X 6813 ** supported network file systems. NFS and AFP both observe the 6814 ** close-to-open semantics for ensuring cache coherency 6815 ** [http://nfs.sourceforge.net/#faq_a8], which does not effectively 6816 ** address the requirements for concurrent database access by multiple 6817 ** readers and writers 6818 ** [http://www.nabble.com/SQLite-on-NFS-cache-coherency-td15655701.html]. 6819 ** 6820 ** To address the performance and cache coherency issues, proxy file locking 6821 ** changes the way database access is controlled by limiting access to a 6822 ** single host at a time and moving file locks off of the database file 6823 ** and onto a proxy file on the local file system. 6824 ** 6825 ** 6826 ** Using proxy locks 6827 ** ----------------- 6828 ** 6829 ** C APIs 6830 ** 6831 ** sqlite3_file_control(db, dbname, SQLITE_FCNTL_SET_LOCKPROXYFILE, 6832 ** <proxy_path> | ":auto:"); 6833 ** sqlite3_file_control(db, dbname, SQLITE_FCNTL_GET_LOCKPROXYFILE, 6834 ** &<proxy_path>); 6835 ** 6836 ** 6837 ** SQL pragmas 6838 ** 6839 ** PRAGMA [database.]lock_proxy_file=<proxy_path> | :auto: 6840 ** PRAGMA [database.]lock_proxy_file 6841 ** 6842 ** Specifying ":auto:" means that if there is a conch file with a matching 6843 ** host ID in it, the proxy path in the conch file will be used, otherwise 6844 ** a proxy path based on the user's temp dir 6845 ** (via confstr(_CS_DARWIN_USER_TEMP_DIR,...)) will be used and the 6846 ** actual proxy file name is generated from the name and path of the 6847 ** database file. For example: 6848 ** 6849 ** For database path "/Users/me/foo.db" 6850 ** The lock path will be "<tmpdir>/sqliteplocks/_Users_me_foo.db:auto:") 6851 ** 6852 ** Once a lock proxy is configured for a database connection, it can not 6853 ** be removed, however it may be switched to a different proxy path via 6854 ** the above APIs (assuming the conch file is not being held by another 6855 ** connection or process). 6856 ** 6857 ** 6858 ** How proxy locking works 6859 ** ----------------------- 6860 ** 6861 ** Proxy file locking relies primarily on two new supporting files: 6862 ** 6863 ** * conch file to limit access to the database file to a single host 6864 ** at a time 6865 ** 6866 ** * proxy file to act as a proxy for the advisory locks normally 6867 ** taken on the database 6868 ** 6869 ** The conch file - to use a proxy file, sqlite must first "hold the conch" 6870 ** by taking an sqlite-style shared lock on the conch file, reading the 6871 ** contents and comparing the host's unique host ID (see below) and lock 6872 ** proxy path against the values stored in the conch. The conch file is 6873 ** stored in the same directory as the database file and the file name 6874 ** is patterned after the database file name as ".<databasename>-conch". 6875 ** If the conch file does not exist, or its contents do not match the 6876 ** host ID and/or proxy path, then the lock is escalated to an exclusive 6877 ** lock and the conch file contents is updated with the host ID and proxy 6878 ** path and the lock is downgraded to a shared lock again. If the conch 6879 ** is held by another process (with a shared lock), the exclusive lock 6880 ** will fail and SQLITE_BUSY is returned. 6881 ** 6882 ** The proxy file - a single-byte file used for all advisory file locks 6883 ** normally taken on the database file. This allows for safe sharing 6884 ** of the database file for multiple readers and writers on the same 6885 ** host (the conch ensures that they all use the same local lock file). 6886 ** 6887 ** Requesting the lock proxy does not immediately take the conch, it is 6888 ** only taken when the first request to lock database file is made. 6889 ** This matches the semantics of the traditional locking behavior, where 6890 ** opening a connection to a database file does not take a lock on it. 6891 ** The shared lock and an open file descriptor are maintained until 6892 ** the connection to the database is closed. 6893 ** 6894 ** The proxy file and the lock file are never deleted so they only need 6895 ** to be created the first time they are used. 6896 ** 6897 ** Configuration options 6898 ** --------------------- 6899 ** 6900 ** SQLITE_PREFER_PROXY_LOCKING 6901 ** 6902 ** Database files accessed on non-local file systems are 6903 ** automatically configured for proxy locking, lock files are 6904 ** named automatically using the same logic as 6905 ** PRAGMA lock_proxy_file=":auto:" 6906 ** 6907 ** SQLITE_PROXY_DEBUG 6908 ** 6909 ** Enables the logging of error messages during host id file 6910 ** retrieval and creation 6911 ** 6912 ** LOCKPROXYDIR 6913 ** 6914 ** Overrides the default directory used for lock proxy files that 6915 ** are named automatically via the ":auto:" setting 6916 ** 6917 ** SQLITE_DEFAULT_PROXYDIR_PERMISSIONS 6918 ** 6919 ** Permissions to use when creating a directory for storing the 6920 ** lock proxy files, only used when LOCKPROXYDIR is not set. 6921 ** 6922 ** 6923 ** As mentioned above, when compiled with SQLITE_PREFER_PROXY_LOCKING, 6924 ** setting the environment variable SQLITE_FORCE_PROXY_LOCKING to 1 will 6925 ** force proxy locking to be used for every database file opened, and 0 6926 ** will force automatic proxy locking to be disabled for all database 6927 ** files (explicitly calling the SQLITE_FCNTL_SET_LOCKPROXYFILE pragma or 6928 ** sqlite_file_control API is not affected by SQLITE_FORCE_PROXY_LOCKING). 6929 */ 6930 6931 /* 6932 ** Proxy locking is only available on MacOSX 6933 */ 6934 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 6935 6936 /* 6937 ** The proxyLockingContext has the path and file structures for the remote 6938 ** and local proxy files in it 6939 */ 6940 typedef struct proxyLockingContext proxyLockingContext; 6941 struct proxyLockingContext { 6942 unixFile *conchFile; /* Open conch file */ 6943 char *conchFilePath; /* Name of the conch file */ 6944 unixFile *lockProxy; /* Open proxy lock file */ 6945 char *lockProxyPath; /* Name of the proxy lock file */ 6946 char *dbPath; /* Name of the open file */ 6947 int conchHeld; /* 1 if the conch is held, -1 if lockless */ 6948 int nFails; /* Number of conch taking failures */ 6949 void *oldLockingContext; /* Original lockingcontext to restore on close */ 6950 sqlite3_io_methods const *pOldMethod; /* Original I/O methods for close */ 6951 }; 6952 6953 /* 6954 ** The proxy lock file path for the database at dbPath is written into lPath, 6955 ** which must point to valid, writable memory large enough for a maxLen length 6956 ** file path. 6957 */ 6958 static int proxyGetLockPath(const char *dbPath, char *lPath, size_t maxLen){ 6959 int len; 6960 int dbLen; 6961 int i; 6962 6963 #ifdef LOCKPROXYDIR 6964 len = strlcpy(lPath, LOCKPROXYDIR, maxLen); 6965 #else 6966 # ifdef _CS_DARWIN_USER_TEMP_DIR 6967 { 6968 if( !confstr(_CS_DARWIN_USER_TEMP_DIR, lPath, maxLen) ){ 6969 OSTRACE(("GETLOCKPATH failed %s errno=%d pid=%d\n", 6970 lPath, errno, osGetpid(0))); 6971 return SQLITE_IOERR_LOCK; 6972 } 6973 len = strlcat(lPath, "sqliteplocks", maxLen); 6974 } 6975 # else 6976 len = strlcpy(lPath, "/tmp/", maxLen); 6977 # endif 6978 #endif 6979 6980 if( lPath[len-1]!='/' ){ 6981 len = strlcat(lPath, "/", maxLen); 6982 } 6983 6984 /* transform the db path to a unique cache name */ 6985 dbLen = (int)strlen(dbPath); 6986 for( i=0; i<dbLen && (i+len+7)<(int)maxLen; i++){ 6987 char c = dbPath[i]; 6988 lPath[i+len] = (c=='/')?'_':c; 6989 } 6990 lPath[i+len]='\0'; 6991 strlcat(lPath, ":auto:", maxLen); 6992 OSTRACE(("GETLOCKPATH proxy lock path=%s pid=%d\n", lPath, osGetpid(0))); 6993 return SQLITE_OK; 6994 } 6995 6996 /* 6997 ** Creates the lock file and any missing directories in lockPath 6998 */ 6999 static int proxyCreateLockPath(const char *lockPath){ 7000 int i, len; 7001 char buf[MAXPATHLEN]; 7002 int start = 0; 7003 7004 assert(lockPath!=NULL); 7005 /* try to create all the intermediate directories */ 7006 len = (int)strlen(lockPath); 7007 buf[0] = lockPath[0]; 7008 for( i=1; i<len; i++ ){ 7009 if( lockPath[i] == '/' && (i - start > 0) ){ 7010 /* only mkdir if leaf dir != "." or "/" or ".." */ 7011 if( i-start>2 || (i-start==1 && buf[start] != '.' && buf[start] != '/') 7012 || (i-start==2 && buf[start] != '.' && buf[start+1] != '.') ){ 7013 buf[i]='\0'; 7014 if( osMkdir(buf, SQLITE_DEFAULT_PROXYDIR_PERMISSIONS) ){ 7015 int err=errno; 7016 if( err!=EEXIST ) { 7017 OSTRACE(("CREATELOCKPATH FAILED creating %s, " 7018 "'%s' proxy lock path=%s pid=%d\n", 7019 buf, strerror(err), lockPath, osGetpid(0))); 7020 return err; 7021 } 7022 } 7023 } 7024 start=i+1; 7025 } 7026 buf[i] = lockPath[i]; 7027 } 7028 OSTRACE(("CREATELOCKPATH proxy lock path=%s pid=%d\n",lockPath,osGetpid(0))); 7029 return 0; 7030 } 7031 7032 /* 7033 ** Create a new VFS file descriptor (stored in memory obtained from 7034 ** sqlite3_malloc) and open the file named "path" in the file descriptor. 7035 ** 7036 ** The caller is responsible not only for closing the file descriptor 7037 ** but also for freeing the memory associated with the file descriptor. 7038 */ 7039 static int proxyCreateUnixFile( 7040 const char *path, /* path for the new unixFile */ 7041 unixFile **ppFile, /* unixFile created and returned by ref */ 7042 int islockfile /* if non zero missing dirs will be created */ 7043 ) { 7044 int fd = -1; 7045 unixFile *pNew; 7046 int rc = SQLITE_OK; 7047 int openFlags = O_RDWR | O_CREAT | O_NOFOLLOW; 7048 sqlite3_vfs dummyVfs; 7049 int terrno = 0; 7050 UnixUnusedFd *pUnused = NULL; 7051 7052 /* 1. first try to open/create the file 7053 ** 2. if that fails, and this is a lock file (not-conch), try creating 7054 ** the parent directories and then try again. 7055 ** 3. if that fails, try to open the file read-only 7056 ** otherwise return BUSY (if lock file) or CANTOPEN for the conch file 7057 */ 7058 pUnused = findReusableFd(path, openFlags); 7059 if( pUnused ){ 7060 fd = pUnused->fd; 7061 }else{ 7062 pUnused = sqlite3_malloc64(sizeof(*pUnused)); 7063 if( !pUnused ){ 7064 return SQLITE_NOMEM_BKPT; 7065 } 7066 } 7067 if( fd<0 ){ 7068 fd = robust_open(path, openFlags, 0); 7069 terrno = errno; 7070 if( fd<0 && errno==ENOENT && islockfile ){ 7071 if( proxyCreateLockPath(path) == SQLITE_OK ){ 7072 fd = robust_open(path, openFlags, 0); 7073 } 7074 } 7075 } 7076 if( fd<0 ){ 7077 openFlags = O_RDONLY | O_NOFOLLOW; 7078 fd = robust_open(path, openFlags, 0); 7079 terrno = errno; 7080 } 7081 if( fd<0 ){ 7082 if( islockfile ){ 7083 return SQLITE_BUSY; 7084 } 7085 switch (terrno) { 7086 case EACCES: 7087 return SQLITE_PERM; 7088 case EIO: 7089 return SQLITE_IOERR_LOCK; /* even though it is the conch */ 7090 default: 7091 return SQLITE_CANTOPEN_BKPT; 7092 } 7093 } 7094 7095 pNew = (unixFile *)sqlite3_malloc64(sizeof(*pNew)); 7096 if( pNew==NULL ){ 7097 rc = SQLITE_NOMEM_BKPT; 7098 goto end_create_proxy; 7099 } 7100 memset(pNew, 0, sizeof(unixFile)); 7101 pNew->openFlags = openFlags; 7102 memset(&dummyVfs, 0, sizeof(dummyVfs)); 7103 dummyVfs.pAppData = (void*)&autolockIoFinder; 7104 dummyVfs.zName = "dummy"; 7105 pUnused->fd = fd; 7106 pUnused->flags = openFlags; 7107 pNew->pPreallocatedUnused = pUnused; 7108 7109 rc = fillInUnixFile(&dummyVfs, fd, (sqlite3_file*)pNew, path, 0); 7110 if( rc==SQLITE_OK ){ 7111 *ppFile = pNew; 7112 return SQLITE_OK; 7113 } 7114 end_create_proxy: 7115 robust_close(pNew, fd, __LINE__); 7116 sqlite3_free(pNew); 7117 sqlite3_free(pUnused); 7118 return rc; 7119 } 7120 7121 #ifdef SQLITE_TEST 7122 /* simulate multiple hosts by creating unique hostid file paths */ 7123 int sqlite3_hostid_num = 0; 7124 #endif 7125 7126 #define PROXY_HOSTIDLEN 16 /* conch file host id length */ 7127 7128 #if HAVE_GETHOSTUUID 7129 /* Not always defined in the headers as it ought to be */ 7130 extern int gethostuuid(uuid_t id, const struct timespec *wait); 7131 #endif 7132 7133 /* get the host ID via gethostuuid(), pHostID must point to PROXY_HOSTIDLEN 7134 ** bytes of writable memory. 7135 */ 7136 static int proxyGetHostID(unsigned char *pHostID, int *pError){ 7137 assert(PROXY_HOSTIDLEN == sizeof(uuid_t)); 7138 memset(pHostID, 0, PROXY_HOSTIDLEN); 7139 #if HAVE_GETHOSTUUID 7140 { 7141 struct timespec timeout = {1, 0}; /* 1 sec timeout */ 7142 if( gethostuuid(pHostID, &timeout) ){ 7143 int err = errno; 7144 if( pError ){ 7145 *pError = err; 7146 } 7147 return SQLITE_IOERR; 7148 } 7149 } 7150 #else 7151 UNUSED_PARAMETER(pError); 7152 #endif 7153 #ifdef SQLITE_TEST 7154 /* simulate multiple hosts by creating unique hostid file paths */ 7155 if( sqlite3_hostid_num != 0){ 7156 pHostID[0] = (char)(pHostID[0] + (char)(sqlite3_hostid_num & 0xFF)); 7157 } 7158 #endif 7159 7160 return SQLITE_OK; 7161 } 7162 7163 /* The conch file contains the header, host id and lock file path 7164 */ 7165 #define PROXY_CONCHVERSION 2 /* 1-byte header, 16-byte host id, path */ 7166 #define PROXY_HEADERLEN 1 /* conch file header length */ 7167 #define PROXY_PATHINDEX (PROXY_HEADERLEN+PROXY_HOSTIDLEN) 7168 #define PROXY_MAXCONCHLEN (PROXY_HEADERLEN+PROXY_HOSTIDLEN+MAXPATHLEN) 7169 7170 /* 7171 ** Takes an open conch file, copies the contents to a new path and then moves 7172 ** it back. The newly created file's file descriptor is assigned to the 7173 ** conch file structure and finally the original conch file descriptor is 7174 ** closed. Returns zero if successful. 7175 */ 7176 static int proxyBreakConchLock(unixFile *pFile, uuid_t myHostID){ 7177 proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; 7178 unixFile *conchFile = pCtx->conchFile; 7179 char tPath[MAXPATHLEN]; 7180 char buf[PROXY_MAXCONCHLEN]; 7181 char *cPath = pCtx->conchFilePath; 7182 size_t readLen = 0; 7183 size_t pathLen = 0; 7184 char errmsg[64] = ""; 7185 int fd = -1; 7186 int rc = -1; 7187 UNUSED_PARAMETER(myHostID); 7188 7189 /* create a new path by replace the trailing '-conch' with '-break' */ 7190 pathLen = strlcpy(tPath, cPath, MAXPATHLEN); 7191 if( pathLen>MAXPATHLEN || pathLen<6 || 7192 (strlcpy(&tPath[pathLen-5], "break", 6) != 5) ){ 7193 sqlite3_snprintf(sizeof(errmsg),errmsg,"path error (len %d)",(int)pathLen); 7194 goto end_breaklock; 7195 } 7196 /* read the conch content */ 7197 readLen = osPread(conchFile->h, buf, PROXY_MAXCONCHLEN, 0); 7198 if( readLen<PROXY_PATHINDEX ){ 7199 sqlite3_snprintf(sizeof(errmsg),errmsg,"read error (len %d)",(int)readLen); 7200 goto end_breaklock; 7201 } 7202 /* write it out to the temporary break file */ 7203 fd = robust_open(tPath, (O_RDWR|O_CREAT|O_EXCL|O_NOFOLLOW), 0); 7204 if( fd<0 ){ 7205 sqlite3_snprintf(sizeof(errmsg), errmsg, "create failed (%d)", errno); 7206 goto end_breaklock; 7207 } 7208 if( osPwrite(fd, buf, readLen, 0) != (ssize_t)readLen ){ 7209 sqlite3_snprintf(sizeof(errmsg), errmsg, "write failed (%d)", errno); 7210 goto end_breaklock; 7211 } 7212 if( rename(tPath, cPath) ){ 7213 sqlite3_snprintf(sizeof(errmsg), errmsg, "rename failed (%d)", errno); 7214 goto end_breaklock; 7215 } 7216 rc = 0; 7217 fprintf(stderr, "broke stale lock on %s\n", cPath); 7218 robust_close(pFile, conchFile->h, __LINE__); 7219 conchFile->h = fd; 7220 conchFile->openFlags = O_RDWR | O_CREAT; 7221 7222 end_breaklock: 7223 if( rc ){ 7224 if( fd>=0 ){ 7225 osUnlink(tPath); 7226 robust_close(pFile, fd, __LINE__); 7227 } 7228 fprintf(stderr, "failed to break stale lock on %s, %s\n", cPath, errmsg); 7229 } 7230 return rc; 7231 } 7232 7233 /* Take the requested lock on the conch file and break a stale lock if the 7234 ** host id matches. 7235 */ 7236 static int proxyConchLock(unixFile *pFile, uuid_t myHostID, int lockType){ 7237 proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; 7238 unixFile *conchFile = pCtx->conchFile; 7239 int rc = SQLITE_OK; 7240 int nTries = 0; 7241 struct timespec conchModTime; 7242 7243 memset(&conchModTime, 0, sizeof(conchModTime)); 7244 do { 7245 rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, lockType); 7246 nTries ++; 7247 if( rc==SQLITE_BUSY ){ 7248 /* If the lock failed (busy): 7249 * 1st try: get the mod time of the conch, wait 0.5s and try again. 7250 * 2nd try: fail if the mod time changed or host id is different, wait 7251 * 10 sec and try again 7252 * 3rd try: break the lock unless the mod time has changed. 7253 */ 7254 struct stat buf; 7255 if( osFstat(conchFile->h, &buf) ){ 7256 storeLastErrno(pFile, errno); 7257 return SQLITE_IOERR_LOCK; 7258 } 7259 7260 if( nTries==1 ){ 7261 conchModTime = buf.st_mtimespec; 7262 unixSleep(0,500000); /* wait 0.5 sec and try the lock again*/ 7263 continue; 7264 } 7265 7266 assert( nTries>1 ); 7267 if( conchModTime.tv_sec != buf.st_mtimespec.tv_sec || 7268 conchModTime.tv_nsec != buf.st_mtimespec.tv_nsec ){ 7269 return SQLITE_BUSY; 7270 } 7271 7272 if( nTries==2 ){ 7273 char tBuf[PROXY_MAXCONCHLEN]; 7274 int len = osPread(conchFile->h, tBuf, PROXY_MAXCONCHLEN, 0); 7275 if( len<0 ){ 7276 storeLastErrno(pFile, errno); 7277 return SQLITE_IOERR_LOCK; 7278 } 7279 if( len>PROXY_PATHINDEX && tBuf[0]==(char)PROXY_CONCHVERSION){ 7280 /* don't break the lock if the host id doesn't match */ 7281 if( 0!=memcmp(&tBuf[PROXY_HEADERLEN], myHostID, PROXY_HOSTIDLEN) ){ 7282 return SQLITE_BUSY; 7283 } 7284 }else{ 7285 /* don't break the lock on short read or a version mismatch */ 7286 return SQLITE_BUSY; 7287 } 7288 unixSleep(0,10000000); /* wait 10 sec and try the lock again */ 7289 continue; 7290 } 7291 7292 assert( nTries==3 ); 7293 if( 0==proxyBreakConchLock(pFile, myHostID) ){ 7294 rc = SQLITE_OK; 7295 if( lockType==EXCLUSIVE_LOCK ){ 7296 rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, SHARED_LOCK); 7297 } 7298 if( !rc ){ 7299 rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, lockType); 7300 } 7301 } 7302 } 7303 } while( rc==SQLITE_BUSY && nTries<3 ); 7304 7305 return rc; 7306 } 7307 7308 /* Takes the conch by taking a shared lock and read the contents conch, if 7309 ** lockPath is non-NULL, the host ID and lock file path must match. A NULL 7310 ** lockPath means that the lockPath in the conch file will be used if the 7311 ** host IDs match, or a new lock path will be generated automatically 7312 ** and written to the conch file. 7313 */ 7314 static int proxyTakeConch(unixFile *pFile){ 7315 proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; 7316 7317 if( pCtx->conchHeld!=0 ){ 7318 return SQLITE_OK; 7319 }else{ 7320 unixFile *conchFile = pCtx->conchFile; 7321 uuid_t myHostID; 7322 int pError = 0; 7323 char readBuf[PROXY_MAXCONCHLEN]; 7324 char lockPath[MAXPATHLEN]; 7325 char *tempLockPath = NULL; 7326 int rc = SQLITE_OK; 7327 int createConch = 0; 7328 int hostIdMatch = 0; 7329 int readLen = 0; 7330 int tryOldLockPath = 0; 7331 int forceNewLockPath = 0; 7332 7333 OSTRACE(("TAKECONCH %d for %s pid=%d\n", conchFile->h, 7334 (pCtx->lockProxyPath ? pCtx->lockProxyPath : ":auto:"), 7335 osGetpid(0))); 7336 7337 rc = proxyGetHostID(myHostID, &pError); 7338 if( (rc&0xff)==SQLITE_IOERR ){ 7339 storeLastErrno(pFile, pError); 7340 goto end_takeconch; 7341 } 7342 rc = proxyConchLock(pFile, myHostID, SHARED_LOCK); 7343 if( rc!=SQLITE_OK ){ 7344 goto end_takeconch; 7345 } 7346 /* read the existing conch file */ 7347 readLen = seekAndRead((unixFile*)conchFile, 0, readBuf, PROXY_MAXCONCHLEN); 7348 if( readLen<0 ){ 7349 /* I/O error: lastErrno set by seekAndRead */ 7350 storeLastErrno(pFile, conchFile->lastErrno); 7351 rc = SQLITE_IOERR_READ; 7352 goto end_takeconch; 7353 }else if( readLen<=(PROXY_HEADERLEN+PROXY_HOSTIDLEN) || 7354 readBuf[0]!=(char)PROXY_CONCHVERSION ){ 7355 /* a short read or version format mismatch means we need to create a new 7356 ** conch file. 7357 */ 7358 createConch = 1; 7359 } 7360 /* if the host id matches and the lock path already exists in the conch 7361 ** we'll try to use the path there, if we can't open that path, we'll 7362 ** retry with a new auto-generated path 7363 */ 7364 do { /* in case we need to try again for an :auto: named lock file */ 7365 7366 if( !createConch && !forceNewLockPath ){ 7367 hostIdMatch = !memcmp(&readBuf[PROXY_HEADERLEN], myHostID, 7368 PROXY_HOSTIDLEN); 7369 /* if the conch has data compare the contents */ 7370 if( !pCtx->lockProxyPath ){ 7371 /* for auto-named local lock file, just check the host ID and we'll 7372 ** use the local lock file path that's already in there 7373 */ 7374 if( hostIdMatch ){ 7375 size_t pathLen = (readLen - PROXY_PATHINDEX); 7376 7377 if( pathLen>=MAXPATHLEN ){ 7378 pathLen=MAXPATHLEN-1; 7379 } 7380 memcpy(lockPath, &readBuf[PROXY_PATHINDEX], pathLen); 7381 lockPath[pathLen] = 0; 7382 tempLockPath = lockPath; 7383 tryOldLockPath = 1; 7384 /* create a copy of the lock path if the conch is taken */ 7385 goto end_takeconch; 7386 } 7387 }else if( hostIdMatch 7388 && !strncmp(pCtx->lockProxyPath, &readBuf[PROXY_PATHINDEX], 7389 readLen-PROXY_PATHINDEX) 7390 ){ 7391 /* conch host and lock path match */ 7392 goto end_takeconch; 7393 } 7394 } 7395 7396 /* if the conch isn't writable and doesn't match, we can't take it */ 7397 if( (conchFile->openFlags&O_RDWR) == 0 ){ 7398 rc = SQLITE_BUSY; 7399 goto end_takeconch; 7400 } 7401 7402 /* either the conch didn't match or we need to create a new one */ 7403 if( !pCtx->lockProxyPath ){ 7404 proxyGetLockPath(pCtx->dbPath, lockPath, MAXPATHLEN); 7405 tempLockPath = lockPath; 7406 /* create a copy of the lock path _only_ if the conch is taken */ 7407 } 7408 7409 /* update conch with host and path (this will fail if other process 7410 ** has a shared lock already), if the host id matches, use the big 7411 ** stick. 7412 */ 7413 futimes(conchFile->h, NULL); 7414 if( hostIdMatch && !createConch ){ 7415 if( conchFile->pInode && conchFile->pInode->nShared>1 ){ 7416 /* We are trying for an exclusive lock but another thread in this 7417 ** same process is still holding a shared lock. */ 7418 rc = SQLITE_BUSY; 7419 } else { 7420 rc = proxyConchLock(pFile, myHostID, EXCLUSIVE_LOCK); 7421 } 7422 }else{ 7423 rc = proxyConchLock(pFile, myHostID, EXCLUSIVE_LOCK); 7424 } 7425 if( rc==SQLITE_OK ){ 7426 char writeBuffer[PROXY_MAXCONCHLEN]; 7427 int writeSize = 0; 7428 7429 writeBuffer[0] = (char)PROXY_CONCHVERSION; 7430 memcpy(&writeBuffer[PROXY_HEADERLEN], myHostID, PROXY_HOSTIDLEN); 7431 if( pCtx->lockProxyPath!=NULL ){ 7432 strlcpy(&writeBuffer[PROXY_PATHINDEX], pCtx->lockProxyPath, 7433 MAXPATHLEN); 7434 }else{ 7435 strlcpy(&writeBuffer[PROXY_PATHINDEX], tempLockPath, MAXPATHLEN); 7436 } 7437 writeSize = PROXY_PATHINDEX + strlen(&writeBuffer[PROXY_PATHINDEX]); 7438 robust_ftruncate(conchFile->h, writeSize); 7439 rc = unixWrite((sqlite3_file *)conchFile, writeBuffer, writeSize, 0); 7440 full_fsync(conchFile->h,0,0); 7441 /* If we created a new conch file (not just updated the contents of a 7442 ** valid conch file), try to match the permissions of the database 7443 */ 7444 if( rc==SQLITE_OK && createConch ){ 7445 struct stat buf; 7446 int err = osFstat(pFile->h, &buf); 7447 if( err==0 ){ 7448 mode_t cmode = buf.st_mode&(S_IRUSR|S_IWUSR | S_IRGRP|S_IWGRP | 7449 S_IROTH|S_IWOTH); 7450 /* try to match the database file R/W permissions, ignore failure */ 7451 #ifndef SQLITE_PROXY_DEBUG 7452 osFchmod(conchFile->h, cmode); 7453 #else 7454 do{ 7455 rc = osFchmod(conchFile->h, cmode); 7456 }while( rc==(-1) && errno==EINTR ); 7457 if( rc!=0 ){ 7458 int code = errno; 7459 fprintf(stderr, "fchmod %o FAILED with %d %s\n", 7460 cmode, code, strerror(code)); 7461 } else { 7462 fprintf(stderr, "fchmod %o SUCCEDED\n",cmode); 7463 } 7464 }else{ 7465 int code = errno; 7466 fprintf(stderr, "STAT FAILED[%d] with %d %s\n", 7467 err, code, strerror(code)); 7468 #endif 7469 } 7470 } 7471 } 7472 conchFile->pMethod->xUnlock((sqlite3_file*)conchFile, SHARED_LOCK); 7473 7474 end_takeconch: 7475 OSTRACE(("TRANSPROXY: CLOSE %d\n", pFile->h)); 7476 if( rc==SQLITE_OK && pFile->openFlags ){ 7477 int fd; 7478 if( pFile->h>=0 ){ 7479 robust_close(pFile, pFile->h, __LINE__); 7480 } 7481 pFile->h = -1; 7482 fd = robust_open(pCtx->dbPath, pFile->openFlags, 0); 7483 OSTRACE(("TRANSPROXY: OPEN %d\n", fd)); 7484 if( fd>=0 ){ 7485 pFile->h = fd; 7486 }else{ 7487 rc=SQLITE_CANTOPEN_BKPT; /* SQLITE_BUSY? proxyTakeConch called 7488 during locking */ 7489 } 7490 } 7491 if( rc==SQLITE_OK && !pCtx->lockProxy ){ 7492 char *path = tempLockPath ? tempLockPath : pCtx->lockProxyPath; 7493 rc = proxyCreateUnixFile(path, &pCtx->lockProxy, 1); 7494 if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM && tryOldLockPath ){ 7495 /* we couldn't create the proxy lock file with the old lock file path 7496 ** so try again via auto-naming 7497 */ 7498 forceNewLockPath = 1; 7499 tryOldLockPath = 0; 7500 continue; /* go back to the do {} while start point, try again */ 7501 } 7502 } 7503 if( rc==SQLITE_OK ){ 7504 /* Need to make a copy of path if we extracted the value 7505 ** from the conch file or the path was allocated on the stack 7506 */ 7507 if( tempLockPath ){ 7508 pCtx->lockProxyPath = sqlite3DbStrDup(0, tempLockPath); 7509 if( !pCtx->lockProxyPath ){ 7510 rc = SQLITE_NOMEM_BKPT; 7511 } 7512 } 7513 } 7514 if( rc==SQLITE_OK ){ 7515 pCtx->conchHeld = 1; 7516 7517 if( pCtx->lockProxy->pMethod == &afpIoMethods ){ 7518 afpLockingContext *afpCtx; 7519 afpCtx = (afpLockingContext *)pCtx->lockProxy->lockingContext; 7520 afpCtx->dbPath = pCtx->lockProxyPath; 7521 } 7522 } else { 7523 conchFile->pMethod->xUnlock((sqlite3_file*)conchFile, NO_LOCK); 7524 } 7525 OSTRACE(("TAKECONCH %d %s\n", conchFile->h, 7526 rc==SQLITE_OK?"ok":"failed")); 7527 return rc; 7528 } while (1); /* in case we need to retry the :auto: lock file - 7529 ** we should never get here except via the 'continue' call. */ 7530 } 7531 } 7532 7533 /* 7534 ** If pFile holds a lock on a conch file, then release that lock. 7535 */ 7536 static int proxyReleaseConch(unixFile *pFile){ 7537 int rc = SQLITE_OK; /* Subroutine return code */ 7538 proxyLockingContext *pCtx; /* The locking context for the proxy lock */ 7539 unixFile *conchFile; /* Name of the conch file */ 7540 7541 pCtx = (proxyLockingContext *)pFile->lockingContext; 7542 conchFile = pCtx->conchFile; 7543 OSTRACE(("RELEASECONCH %d for %s pid=%d\n", conchFile->h, 7544 (pCtx->lockProxyPath ? pCtx->lockProxyPath : ":auto:"), 7545 osGetpid(0))); 7546 if( pCtx->conchHeld>0 ){ 7547 rc = conchFile->pMethod->xUnlock((sqlite3_file*)conchFile, NO_LOCK); 7548 } 7549 pCtx->conchHeld = 0; 7550 OSTRACE(("RELEASECONCH %d %s\n", conchFile->h, 7551 (rc==SQLITE_OK ? "ok" : "failed"))); 7552 return rc; 7553 } 7554 7555 /* 7556 ** Given the name of a database file, compute the name of its conch file. 7557 ** Store the conch filename in memory obtained from sqlite3_malloc64(). 7558 ** Make *pConchPath point to the new name. Return SQLITE_OK on success 7559 ** or SQLITE_NOMEM if unable to obtain memory. 7560 ** 7561 ** The caller is responsible for ensuring that the allocated memory 7562 ** space is eventually freed. 7563 ** 7564 ** *pConchPath is set to NULL if a memory allocation error occurs. 7565 */ 7566 static int proxyCreateConchPathname(char *dbPath, char **pConchPath){ 7567 int i; /* Loop counter */ 7568 int len = (int)strlen(dbPath); /* Length of database filename - dbPath */ 7569 char *conchPath; /* buffer in which to construct conch name */ 7570 7571 /* Allocate space for the conch filename and initialize the name to 7572 ** the name of the original database file. */ 7573 *pConchPath = conchPath = (char *)sqlite3_malloc64(len + 8); 7574 if( conchPath==0 ){ 7575 return SQLITE_NOMEM_BKPT; 7576 } 7577 memcpy(conchPath, dbPath, len+1); 7578 7579 /* now insert a "." before the last / character */ 7580 for( i=(len-1); i>=0; i-- ){ 7581 if( conchPath[i]=='/' ){ 7582 i++; 7583 break; 7584 } 7585 } 7586 conchPath[i]='.'; 7587 while ( i<len ){ 7588 conchPath[i+1]=dbPath[i]; 7589 i++; 7590 } 7591 7592 /* append the "-conch" suffix to the file */ 7593 memcpy(&conchPath[i+1], "-conch", 7); 7594 assert( (int)strlen(conchPath) == len+7 ); 7595 7596 return SQLITE_OK; 7597 } 7598 7599 7600 /* Takes a fully configured proxy locking-style unix file and switches 7601 ** the local lock file path 7602 */ 7603 static int switchLockProxyPath(unixFile *pFile, const char *path) { 7604 proxyLockingContext *pCtx = (proxyLockingContext*)pFile->lockingContext; 7605 char *oldPath = pCtx->lockProxyPath; 7606 int rc = SQLITE_OK; 7607 7608 if( pFile->eFileLock!=NO_LOCK ){ 7609 return SQLITE_BUSY; 7610 } 7611 7612 /* nothing to do if the path is NULL, :auto: or matches the existing path */ 7613 if( !path || path[0]=='\0' || !strcmp(path, ":auto:") || 7614 (oldPath && !strncmp(oldPath, path, MAXPATHLEN)) ){ 7615 return SQLITE_OK; 7616 }else{ 7617 unixFile *lockProxy = pCtx->lockProxy; 7618 pCtx->lockProxy=NULL; 7619 pCtx->conchHeld = 0; 7620 if( lockProxy!=NULL ){ 7621 rc=lockProxy->pMethod->xClose((sqlite3_file *)lockProxy); 7622 if( rc ) return rc; 7623 sqlite3_free(lockProxy); 7624 } 7625 sqlite3_free(oldPath); 7626 pCtx->lockProxyPath = sqlite3DbStrDup(0, path); 7627 } 7628 7629 return rc; 7630 } 7631 7632 /* 7633 ** pFile is a file that has been opened by a prior xOpen call. dbPath 7634 ** is a string buffer at least MAXPATHLEN+1 characters in size. 7635 ** 7636 ** This routine find the filename associated with pFile and writes it 7637 ** int dbPath. 7638 */ 7639 static int proxyGetDbPathForUnixFile(unixFile *pFile, char *dbPath){ 7640 #if defined(__APPLE__) 7641 if( pFile->pMethod == &afpIoMethods ){ 7642 /* afp style keeps a reference to the db path in the filePath field 7643 ** of the struct */ 7644 assert( (int)strlen((char*)pFile->lockingContext)<=MAXPATHLEN ); 7645 strlcpy(dbPath, ((afpLockingContext *)pFile->lockingContext)->dbPath, 7646 MAXPATHLEN); 7647 } else 7648 #endif 7649 if( pFile->pMethod == &dotlockIoMethods ){ 7650 /* dot lock style uses the locking context to store the dot lock 7651 ** file path */ 7652 int len = strlen((char *)pFile->lockingContext) - strlen(DOTLOCK_SUFFIX); 7653 memcpy(dbPath, (char *)pFile->lockingContext, len + 1); 7654 }else{ 7655 /* all other styles use the locking context to store the db file path */ 7656 assert( strlen((char*)pFile->lockingContext)<=MAXPATHLEN ); 7657 strlcpy(dbPath, (char *)pFile->lockingContext, MAXPATHLEN); 7658 } 7659 return SQLITE_OK; 7660 } 7661 7662 /* 7663 ** Takes an already filled in unix file and alters it so all file locking 7664 ** will be performed on the local proxy lock file. The following fields 7665 ** are preserved in the locking context so that they can be restored and 7666 ** the unix structure properly cleaned up at close time: 7667 ** ->lockingContext 7668 ** ->pMethod 7669 */ 7670 static int proxyTransformUnixFile(unixFile *pFile, const char *path) { 7671 proxyLockingContext *pCtx; 7672 char dbPath[MAXPATHLEN+1]; /* Name of the database file */ 7673 char *lockPath=NULL; 7674 int rc = SQLITE_OK; 7675 7676 if( pFile->eFileLock!=NO_LOCK ){ 7677 return SQLITE_BUSY; 7678 } 7679 proxyGetDbPathForUnixFile(pFile, dbPath); 7680 if( !path || path[0]=='\0' || !strcmp(path, ":auto:") ){ 7681 lockPath=NULL; 7682 }else{ 7683 lockPath=(char *)path; 7684 } 7685 7686 OSTRACE(("TRANSPROXY %d for %s pid=%d\n", pFile->h, 7687 (lockPath ? lockPath : ":auto:"), osGetpid(0))); 7688 7689 pCtx = sqlite3_malloc64( sizeof(*pCtx) ); 7690 if( pCtx==0 ){ 7691 return SQLITE_NOMEM_BKPT; 7692 } 7693 memset(pCtx, 0, sizeof(*pCtx)); 7694 7695 rc = proxyCreateConchPathname(dbPath, &pCtx->conchFilePath); 7696 if( rc==SQLITE_OK ){ 7697 rc = proxyCreateUnixFile(pCtx->conchFilePath, &pCtx->conchFile, 0); 7698 if( rc==SQLITE_CANTOPEN && ((pFile->openFlags&O_RDWR) == 0) ){ 7699 /* if (a) the open flags are not O_RDWR, (b) the conch isn't there, and 7700 ** (c) the file system is read-only, then enable no-locking access. 7701 ** Ugh, since O_RDONLY==0x0000 we test for !O_RDWR since unixOpen asserts 7702 ** that openFlags will have only one of O_RDONLY or O_RDWR. 7703 */ 7704 struct statfs fsInfo; 7705 struct stat conchInfo; 7706 int goLockless = 0; 7707 7708 if( osStat(pCtx->conchFilePath, &conchInfo) == -1 ) { 7709 int err = errno; 7710 if( (err==ENOENT) && (statfs(dbPath, &fsInfo) != -1) ){ 7711 goLockless = (fsInfo.f_flags&MNT_RDONLY) == MNT_RDONLY; 7712 } 7713 } 7714 if( goLockless ){ 7715 pCtx->conchHeld = -1; /* read only FS/ lockless */ 7716 rc = SQLITE_OK; 7717 } 7718 } 7719 } 7720 if( rc==SQLITE_OK && lockPath ){ 7721 pCtx->lockProxyPath = sqlite3DbStrDup(0, lockPath); 7722 } 7723 7724 if( rc==SQLITE_OK ){ 7725 pCtx->dbPath = sqlite3DbStrDup(0, dbPath); 7726 if( pCtx->dbPath==NULL ){ 7727 rc = SQLITE_NOMEM_BKPT; 7728 } 7729 } 7730 if( rc==SQLITE_OK ){ 7731 /* all memory is allocated, proxys are created and assigned, 7732 ** switch the locking context and pMethod then return. 7733 */ 7734 pCtx->oldLockingContext = pFile->lockingContext; 7735 pFile->lockingContext = pCtx; 7736 pCtx->pOldMethod = pFile->pMethod; 7737 pFile->pMethod = &proxyIoMethods; 7738 }else{ 7739 if( pCtx->conchFile ){ 7740 pCtx->conchFile->pMethod->xClose((sqlite3_file *)pCtx->conchFile); 7741 sqlite3_free(pCtx->conchFile); 7742 } 7743 sqlite3DbFree(0, pCtx->lockProxyPath); 7744 sqlite3_free(pCtx->conchFilePath); 7745 sqlite3_free(pCtx); 7746 } 7747 OSTRACE(("TRANSPROXY %d %s\n", pFile->h, 7748 (rc==SQLITE_OK ? "ok" : "failed"))); 7749 return rc; 7750 } 7751 7752 7753 /* 7754 ** This routine handles sqlite3_file_control() calls that are specific 7755 ** to proxy locking. 7756 */ 7757 static int proxyFileControl(sqlite3_file *id, int op, void *pArg){ 7758 switch( op ){ 7759 case SQLITE_FCNTL_GET_LOCKPROXYFILE: { 7760 unixFile *pFile = (unixFile*)id; 7761 if( pFile->pMethod == &proxyIoMethods ){ 7762 proxyLockingContext *pCtx = (proxyLockingContext*)pFile->lockingContext; 7763 proxyTakeConch(pFile); 7764 if( pCtx->lockProxyPath ){ 7765 *(const char **)pArg = pCtx->lockProxyPath; 7766 }else{ 7767 *(const char **)pArg = ":auto: (not held)"; 7768 } 7769 } else { 7770 *(const char **)pArg = NULL; 7771 } 7772 return SQLITE_OK; 7773 } 7774 case SQLITE_FCNTL_SET_LOCKPROXYFILE: { 7775 unixFile *pFile = (unixFile*)id; 7776 int rc = SQLITE_OK; 7777 int isProxyStyle = (pFile->pMethod == &proxyIoMethods); 7778 if( pArg==NULL || (const char *)pArg==0 ){ 7779 if( isProxyStyle ){ 7780 /* turn off proxy locking - not supported. If support is added for 7781 ** switching proxy locking mode off then it will need to fail if 7782 ** the journal mode is WAL mode. 7783 */ 7784 rc = SQLITE_ERROR /*SQLITE_PROTOCOL? SQLITE_MISUSE?*/; 7785 }else{ 7786 /* turn off proxy locking - already off - NOOP */ 7787 rc = SQLITE_OK; 7788 } 7789 }else{ 7790 const char *proxyPath = (const char *)pArg; 7791 if( isProxyStyle ){ 7792 proxyLockingContext *pCtx = 7793 (proxyLockingContext*)pFile->lockingContext; 7794 if( !strcmp(pArg, ":auto:") 7795 || (pCtx->lockProxyPath && 7796 !strncmp(pCtx->lockProxyPath, proxyPath, MAXPATHLEN)) 7797 ){ 7798 rc = SQLITE_OK; 7799 }else{ 7800 rc = switchLockProxyPath(pFile, proxyPath); 7801 } 7802 }else{ 7803 /* turn on proxy file locking */ 7804 rc = proxyTransformUnixFile(pFile, proxyPath); 7805 } 7806 } 7807 return rc; 7808 } 7809 default: { 7810 assert( 0 ); /* The call assures that only valid opcodes are sent */ 7811 } 7812 } 7813 /*NOTREACHED*/ assert(0); 7814 return SQLITE_ERROR; 7815 } 7816 7817 /* 7818 ** Within this division (the proxying locking implementation) the procedures 7819 ** above this point are all utilities. The lock-related methods of the 7820 ** proxy-locking sqlite3_io_method object follow. 7821 */ 7822 7823 7824 /* 7825 ** This routine checks if there is a RESERVED lock held on the specified 7826 ** file by this or any other process. If such a lock is held, set *pResOut 7827 ** to a non-zero value otherwise *pResOut is set to zero. The return value 7828 ** is set to SQLITE_OK unless an I/O error occurs during lock checking. 7829 */ 7830 static int proxyCheckReservedLock(sqlite3_file *id, int *pResOut) { 7831 unixFile *pFile = (unixFile*)id; 7832 int rc = proxyTakeConch(pFile); 7833 if( rc==SQLITE_OK ){ 7834 proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; 7835 if( pCtx->conchHeld>0 ){ 7836 unixFile *proxy = pCtx->lockProxy; 7837 return proxy->pMethod->xCheckReservedLock((sqlite3_file*)proxy, pResOut); 7838 }else{ /* conchHeld < 0 is lockless */ 7839 pResOut=0; 7840 } 7841 } 7842 return rc; 7843 } 7844 7845 /* 7846 ** Lock the file with the lock specified by parameter eFileLock - one 7847 ** of the following: 7848 ** 7849 ** (1) SHARED_LOCK 7850 ** (2) RESERVED_LOCK 7851 ** (3) PENDING_LOCK 7852 ** (4) EXCLUSIVE_LOCK 7853 ** 7854 ** Sometimes when requesting one lock state, additional lock states 7855 ** are inserted in between. The locking might fail on one of the later 7856 ** transitions leaving the lock state different from what it started but 7857 ** still short of its goal. The following chart shows the allowed 7858 ** transitions and the inserted intermediate states: 7859 ** 7860 ** UNLOCKED -> SHARED 7861 ** SHARED -> RESERVED 7862 ** SHARED -> (PENDING) -> EXCLUSIVE 7863 ** RESERVED -> (PENDING) -> EXCLUSIVE 7864 ** PENDING -> EXCLUSIVE 7865 ** 7866 ** This routine will only increase a lock. Use the sqlite3OsUnlock() 7867 ** routine to lower a locking level. 7868 */ 7869 static int proxyLock(sqlite3_file *id, int eFileLock) { 7870 unixFile *pFile = (unixFile*)id; 7871 int rc = proxyTakeConch(pFile); 7872 if( rc==SQLITE_OK ){ 7873 proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; 7874 if( pCtx->conchHeld>0 ){ 7875 unixFile *proxy = pCtx->lockProxy; 7876 rc = proxy->pMethod->xLock((sqlite3_file*)proxy, eFileLock); 7877 pFile->eFileLock = proxy->eFileLock; 7878 }else{ 7879 /* conchHeld < 0 is lockless */ 7880 } 7881 } 7882 return rc; 7883 } 7884 7885 7886 /* 7887 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock 7888 ** must be either NO_LOCK or SHARED_LOCK. 7889 ** 7890 ** If the locking level of the file descriptor is already at or below 7891 ** the requested locking level, this routine is a no-op. 7892 */ 7893 static int proxyUnlock(sqlite3_file *id, int eFileLock) { 7894 unixFile *pFile = (unixFile*)id; 7895 int rc = proxyTakeConch(pFile); 7896 if( rc==SQLITE_OK ){ 7897 proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; 7898 if( pCtx->conchHeld>0 ){ 7899 unixFile *proxy = pCtx->lockProxy; 7900 rc = proxy->pMethod->xUnlock((sqlite3_file*)proxy, eFileLock); 7901 pFile->eFileLock = proxy->eFileLock; 7902 }else{ 7903 /* conchHeld < 0 is lockless */ 7904 } 7905 } 7906 return rc; 7907 } 7908 7909 /* 7910 ** Close a file that uses proxy locks. 7911 */ 7912 static int proxyClose(sqlite3_file *id) { 7913 if( ALWAYS(id) ){ 7914 unixFile *pFile = (unixFile*)id; 7915 proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; 7916 unixFile *lockProxy = pCtx->lockProxy; 7917 unixFile *conchFile = pCtx->conchFile; 7918 int rc = SQLITE_OK; 7919 7920 if( lockProxy ){ 7921 rc = lockProxy->pMethod->xUnlock((sqlite3_file*)lockProxy, NO_LOCK); 7922 if( rc ) return rc; 7923 rc = lockProxy->pMethod->xClose((sqlite3_file*)lockProxy); 7924 if( rc ) return rc; 7925 sqlite3_free(lockProxy); 7926 pCtx->lockProxy = 0; 7927 } 7928 if( conchFile ){ 7929 if( pCtx->conchHeld ){ 7930 rc = proxyReleaseConch(pFile); 7931 if( rc ) return rc; 7932 } 7933 rc = conchFile->pMethod->xClose((sqlite3_file*)conchFile); 7934 if( rc ) return rc; 7935 sqlite3_free(conchFile); 7936 } 7937 sqlite3DbFree(0, pCtx->lockProxyPath); 7938 sqlite3_free(pCtx->conchFilePath); 7939 sqlite3DbFree(0, pCtx->dbPath); 7940 /* restore the original locking context and pMethod then close it */ 7941 pFile->lockingContext = pCtx->oldLockingContext; 7942 pFile->pMethod = pCtx->pOldMethod; 7943 sqlite3_free(pCtx); 7944 return pFile->pMethod->xClose(id); 7945 } 7946 return SQLITE_OK; 7947 } 7948 7949 7950 7951 #endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ 7952 /* 7953 ** The proxy locking style is intended for use with AFP filesystems. 7954 ** And since AFP is only supported on MacOSX, the proxy locking is also 7955 ** restricted to MacOSX. 7956 ** 7957 ** 7958 ******************* End of the proxy lock implementation ********************** 7959 ******************************************************************************/ 7960 7961 /* 7962 ** Initialize the operating system interface. 7963 ** 7964 ** This routine registers all VFS implementations for unix-like operating 7965 ** systems. This routine, and the sqlite3_os_end() routine that follows, 7966 ** should be the only routines in this file that are visible from other 7967 ** files. 7968 ** 7969 ** This routine is called once during SQLite initialization and by a 7970 ** single thread. The memory allocation and mutex subsystems have not 7971 ** necessarily been initialized when this routine is called, and so they 7972 ** should not be used. 7973 */ 7974 int sqlite3_os_init(void){ 7975 /* 7976 ** The following macro defines an initializer for an sqlite3_vfs object. 7977 ** The name of the VFS is NAME. The pAppData is a pointer to a pointer 7978 ** to the "finder" function. (pAppData is a pointer to a pointer because 7979 ** silly C90 rules prohibit a void* from being cast to a function pointer 7980 ** and so we have to go through the intermediate pointer to avoid problems 7981 ** when compiling with -pedantic-errors on GCC.) 7982 ** 7983 ** The FINDER parameter to this macro is the name of the pointer to the 7984 ** finder-function. The finder-function returns a pointer to the 7985 ** sqlite_io_methods object that implements the desired locking 7986 ** behaviors. See the division above that contains the IOMETHODS 7987 ** macro for addition information on finder-functions. 7988 ** 7989 ** Most finders simply return a pointer to a fixed sqlite3_io_methods 7990 ** object. But the "autolockIoFinder" available on MacOSX does a little 7991 ** more than that; it looks at the filesystem type that hosts the 7992 ** database file and tries to choose an locking method appropriate for 7993 ** that filesystem time. 7994 */ 7995 #define UNIXVFS(VFSNAME, FINDER) { \ 7996 3, /* iVersion */ \ 7997 sizeof(unixFile), /* szOsFile */ \ 7998 MAX_PATHNAME, /* mxPathname */ \ 7999 0, /* pNext */ \ 8000 VFSNAME, /* zName */ \ 8001 (void*)&FINDER, /* pAppData */ \ 8002 unixOpen, /* xOpen */ \ 8003 unixDelete, /* xDelete */ \ 8004 unixAccess, /* xAccess */ \ 8005 unixFullPathname, /* xFullPathname */ \ 8006 unixDlOpen, /* xDlOpen */ \ 8007 unixDlError, /* xDlError */ \ 8008 unixDlSym, /* xDlSym */ \ 8009 unixDlClose, /* xDlClose */ \ 8010 unixRandomness, /* xRandomness */ \ 8011 unixSleep, /* xSleep */ \ 8012 unixCurrentTime, /* xCurrentTime */ \ 8013 unixGetLastError, /* xGetLastError */ \ 8014 unixCurrentTimeInt64, /* xCurrentTimeInt64 */ \ 8015 unixSetSystemCall, /* xSetSystemCall */ \ 8016 unixGetSystemCall, /* xGetSystemCall */ \ 8017 unixNextSystemCall, /* xNextSystemCall */ \ 8018 } 8019 8020 /* 8021 ** All default VFSes for unix are contained in the following array. 8022 ** 8023 ** Note that the sqlite3_vfs.pNext field of the VFS object is modified 8024 ** by the SQLite core when the VFS is registered. So the following 8025 ** array cannot be const. 8026 */ 8027 static sqlite3_vfs aVfs[] = { 8028 #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) 8029 UNIXVFS("unix", autolockIoFinder ), 8030 #elif OS_VXWORKS 8031 UNIXVFS("unix", vxworksIoFinder ), 8032 #else 8033 UNIXVFS("unix", posixIoFinder ), 8034 #endif 8035 UNIXVFS("unix-none", nolockIoFinder ), 8036 UNIXVFS("unix-dotfile", dotlockIoFinder ), 8037 UNIXVFS("unix-excl", posixIoFinder ), 8038 #if OS_VXWORKS 8039 UNIXVFS("unix-namedsem", semIoFinder ), 8040 #endif 8041 #if SQLITE_ENABLE_LOCKING_STYLE || OS_VXWORKS 8042 UNIXVFS("unix-posix", posixIoFinder ), 8043 #endif 8044 #if SQLITE_ENABLE_LOCKING_STYLE 8045 UNIXVFS("unix-flock", flockIoFinder ), 8046 #endif 8047 #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) 8048 UNIXVFS("unix-afp", afpIoFinder ), 8049 UNIXVFS("unix-nfs", nfsIoFinder ), 8050 UNIXVFS("unix-proxy", proxyIoFinder ), 8051 #endif 8052 }; 8053 unsigned int i; /* Loop counter */ 8054 8055 /* Double-check that the aSyscall[] array has been constructed 8056 ** correctly. See ticket [bb3a86e890c8e96ab] */ 8057 assert( ArraySize(aSyscall)==29 ); 8058 8059 /* Register all VFSes defined in the aVfs[] array */ 8060 for(i=0; i<(sizeof(aVfs)/sizeof(sqlite3_vfs)); i++){ 8061 #ifdef SQLITE_DEFAULT_UNIX_VFS 8062 sqlite3_vfs_register(&aVfs[i], 8063 0==strcmp(aVfs[i].zName,SQLITE_DEFAULT_UNIX_VFS)); 8064 #else 8065 sqlite3_vfs_register(&aVfs[i], i==0); 8066 #endif 8067 } 8068 #ifdef SQLITE_OS_KV_OPTIONAL 8069 sqlite3KvvfsInit(); 8070 #endif 8071 unixBigLock = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_VFS1); 8072 8073 #ifndef SQLITE_OMIT_WAL 8074 /* Validate lock assumptions */ 8075 assert( SQLITE_SHM_NLOCK==8 ); /* Number of available locks */ 8076 assert( UNIX_SHM_BASE==120 ); /* Start of locking area */ 8077 /* Locks: 8078 ** WRITE UNIX_SHM_BASE 120 8079 ** CKPT UNIX_SHM_BASE+1 121 8080 ** RECOVER UNIX_SHM_BASE+2 122 8081 ** READ-0 UNIX_SHM_BASE+3 123 8082 ** READ-1 UNIX_SHM_BASE+4 124 8083 ** READ-2 UNIX_SHM_BASE+5 125 8084 ** READ-3 UNIX_SHM_BASE+6 126 8085 ** READ-4 UNIX_SHM_BASE+7 127 8086 ** DMS UNIX_SHM_BASE+8 128 8087 */ 8088 assert( UNIX_SHM_DMS==128 ); /* Byte offset of the deadman-switch */ 8089 #endif 8090 8091 /* Initialize temp file dir array. */ 8092 unixTempFileInit(); 8093 8094 return SQLITE_OK; 8095 } 8096 8097 /* 8098 ** Shutdown the operating system interface. 8099 ** 8100 ** Some operating systems might need to do some cleanup in this routine, 8101 ** to release dynamically allocated objects. But not on unix. 8102 ** This routine is a no-op for unix. 8103 */ 8104 int sqlite3_os_end(void){ 8105 unixBigLock = 0; 8106 return SQLITE_OK; 8107 } 8108 8109 #endif /* SQLITE_OS_UNIX */ 8110