os_unix.c 248 KB


  1. /*
  2. ** 2004 May 22
  3. **
  4. ** The author disclaims copyright to this source code. In place of
  5. ** a legal notice, here is a blessing:
  6. **
  7. ** May you do good and not evil.
  8. ** May you find forgiveness for yourself and forgive others.
  9. ** May you share freely, never taking more than you give.
  10. **
  11. ******************************************************************************
  12. **
  13. ** This file contains the VFS implementation for unix-like operating systems
  14. ** include Linux, MacOSX, *BSD, QNX, VxWorks, AIX, HPUX, and others.
  15. **
  16. ** There are actually several different VFS implementations in this file.
  17. ** The differences are in the way that file locking is done. The default
  18. ** implementation uses Posix Advisory Locks. Alternative implementations
  19. ** use flock(), dot-files, various proprietary locking schemas, or simply
  20. ** skip locking all together.
  21. **
  22. ** This source file is organized into divisions where the logic for various
  23. ** subfunctions is contained within the appropriate division. PLEASE
  24. ** KEEP THE STRUCTURE OF THIS FILE INTACT. New code should be placed
  25. ** in the correct division and should be clearly labeled.
  26. **
  27. ** The layout of divisions is as follows:
  28. **
  29. ** * General-purpose declarations and utility functions.
  30. ** * Unique file ID logic used by VxWorks.
  31. ** * Various locking primitive implementations (all except proxy locking):
  32. ** + for Posix Advisory Locks
  33. ** + for no-op locks
  34. ** + for dot-file locks
  35. ** + for flock() locking
  36. ** + for named semaphore locks (VxWorks only)
  37. ** + for AFP filesystem locks (MacOSX only)
  38. ** * sqlite3_file methods not associated with locking.
  39. ** * Definitions of sqlite3_io_methods objects for all locking
  40. ** methods plus "finder" functions for each locking method.
  41. ** * sqlite3_vfs method implementations.
  42. ** * Locking primitives for the proxy uber-locking-method. (MacOSX only)
  43. ** * Definitions of sqlite3_vfs objects for all locking methods
  44. ** plus implementations of sqlite3_os_init() and sqlite3_os_end().
  45. */
  46. #include "sqliteInt.h"
  47. #if SQLITE_OS_UNIX /* This file is used on unix only */
  48. /*
  49. ** There are various methods for file locking used for concurrency
  50. ** control:
  51. **
  52. ** 1. POSIX locking (the default),
  53. ** 2. No locking,
  54. ** 3. Dot-file locking,
  55. ** 4. flock() locking,
  56. ** 5. AFP locking (OSX only),
  57. ** 6. Named POSIX semaphores (VXWorks only),
  58. ** 7. proxy locking. (OSX only)
  59. **
  60. ** Styles 4, 5, and 7 are only available of SQLITE_ENABLE_LOCKING_STYLE
  61. ** is defined to 1. The SQLITE_ENABLE_LOCKING_STYLE also enables automatic
  62. ** selection of the appropriate locking style based on the filesystem
  63. ** where the database is located.
  64. */
  65. #if !defined(SQLITE_ENABLE_LOCKING_STYLE)
  66. # if defined(__APPLE__)
  67. # define SQLITE_ENABLE_LOCKING_STYLE 1
  68. # else
  69. # define SQLITE_ENABLE_LOCKING_STYLE 0
  70. # endif
  71. #endif
  72. /*
  73. ** Define the OS_VXWORKS pre-processor macro to 1 if building on
  74. ** vxworks, or 0 otherwise.
  75. */
  76. #ifndef OS_VXWORKS
  77. # if defined(__RTP__) || defined(_WRS_KERNEL)
  78. # define OS_VXWORKS 1
  79. # else
  80. # define OS_VXWORKS 0
  81. # endif
  82. #endif
  83. /*
  84. ** These #defines should enable >2GB file support on Posix if the
  85. ** underlying operating system supports it. If the OS lacks
  86. ** large file support, these should be no-ops.
  87. **
  88. ** Large file support can be disabled using the -DSQLITE_DISABLE_LFS switch
  89. ** on the compiler command line. This is necessary if you are compiling
  90. ** on a recent machine (ex: RedHat 7.2) but you want your code to work
  91. ** on an older machine (ex: RedHat 6.0). If you compile on RedHat 7.2
  92. ** without this option, LFS is enable. But LFS does not exist in the kernel
  93. ** in RedHat 6.0, so the code won't work. Hence, for maximum binary
  94. ** portability you should omit LFS.
  95. **
  96. ** The previous paragraph was written in 2005. (This paragraph is written
  97. ** on 2008-11-28.) These days, all Linux kernels support large files, so
  98. ** you should probably leave LFS enabled. But some embedded platforms might
  99. ** lack LFS in which case the SQLITE_DISABLE_LFS macro might still be useful.
  100. */
  101. #ifndef SQLITE_DISABLE_LFS
  102. # define _LARGE_FILE 1
  103. # ifndef _FILE_OFFSET_BITS
  104. # define _FILE_OFFSET_BITS 64
  105. # endif
  106. # define _LARGEFILE_SOURCE 1
  107. #endif
  108. /*
  109. ** standard include files.
  110. */
  111. #include <sys/types.h>
  112. #include <sys/stat.h>
  113. #include <fcntl.h>
  114. #include <unistd.h>
  115. #include <time.h>
  116. #include <sys/time.h>
  117. #include <errno.h>
  118. #if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0
  119. #include <sys/mman.h>
  120. #endif
  121. #if SQLITE_ENABLE_LOCKING_STYLE
  122. # include <sys/ioctl.h>
  123. # if OS_VXWORKS
  124. # include <semaphore.h>
  125. # include <limits.h>
  126. # else
  127. # include <sys/file.h>
  128. # include <sys/param.h>
  129. # endif
  130. #endif /* SQLITE_ENABLE_LOCKING_STYLE */
  131. #if defined(__APPLE__) || (SQLITE_ENABLE_LOCKING_STYLE && !OS_VXWORKS)
  132. # include <sys/mount.h>
  133. #endif
  134. #ifdef HAVE_UTIME
  135. # include <utime.h>
  136. #endif
  137. /*
  138. ** Allowed values of unixFile.fsFlags
  139. */
  140. #define SQLITE_FSFLAGS_IS_MSDOS 0x1
  141. /*
  142. ** If we are to be thread-safe, include the pthreads header and define
  143. ** the SQLITE_UNIX_THREADS macro.
  144. */
  145. #if SQLITE_THREADSAFE
  146. # include <pthread.h>
  147. # define SQLITE_UNIX_THREADS 1
  148. #endif
  149. /*
  150. ** Default permissions when creating a new file
  151. */
  152. #ifndef SQLITE_DEFAULT_FILE_PERMISSIONS
  153. # define SQLITE_DEFAULT_FILE_PERMISSIONS 0644
  154. #endif
  155. /*
  156. ** Default permissions when creating auto proxy dir
  157. */
  158. #ifndef SQLITE_DEFAULT_PROXYDIR_PERMISSIONS
  159. # define SQLITE_DEFAULT_PROXYDIR_PERMISSIONS 0755
  160. #endif
  161. /*
  162. ** Maximum supported path-length.
  163. */
  164. #define MAX_PATHNAME 512
  165. /*
  166. ** Only set the lastErrno if the error code is a real error and not
  167. ** a normal expected return code of SQLITE_BUSY or SQLITE_OK
  168. */
  169. #define IS_LOCK_ERROR(x) ((x != SQLITE_OK) && (x != SQLITE_BUSY))
  170. /* Forward references */
  171. typedef struct unixShm unixShm; /* Connection shared memory */
  172. typedef struct unixShmNode unixShmNode; /* Shared memory instance */
  173. typedef struct unixInodeInfo unixInodeInfo; /* An i-node */
  174. typedef struct UnixUnusedFd UnixUnusedFd; /* An unused file descriptor */
  175. /*
  176. ** Sometimes, after a file handle is closed by SQLite, the file descriptor
  177. ** cannot be closed immediately. In these cases, instances of the following
  178. ** structure are used to store the file descriptor while waiting for an
  179. ** opportunity to either close or reuse it.
  180. */
  181. struct UnixUnusedFd {
  182. int fd; /* File descriptor to close */
  183. int flags; /* Flags this file descriptor was opened with */
  184. UnixUnusedFd *pNext; /* Next unused file descriptor on same file */
  185. };
  186. /*
  187. ** The unixFile structure is subclass of sqlite3_file specific to the unix
  188. ** VFS implementations.
  189. */
  190. typedef struct unixFile unixFile;
  191. struct unixFile {
  192. sqlite3_io_methods const *pMethod; /* Always the first entry */
  193. sqlite3_vfs *pVfs; /* The VFS that created this unixFile */
  194. unixInodeInfo *pInode; /* Info about locks on this inode */
  195. int h; /* The file descriptor */
  196. unsigned char eFileLock; /* The type of lock held on this fd */
  197. unsigned short int ctrlFlags; /* Behavioral bits. UNIXFILE_* flags */
  198. int lastErrno; /* The unix errno from last I/O error */
  199. void *lockingContext; /* Locking style specific state */
  200. UnixUnusedFd *pUnused; /* Pre-allocated UnixUnusedFd */
  201. const char *zPath; /* Name of the file */
  202. unixShm *pShm; /* Shared memory segment information */
  203. int szChunk; /* Configured by FCNTL_CHUNK_SIZE */
  204. #if SQLITE_MAX_MMAP_SIZE>0
  205. int nFetchOut; /* Number of outstanding xFetch refs */
  206. sqlite3_int64 mmapSize; /* Usable size of mapping at pMapRegion */
  207. sqlite3_int64 mmapSizeActual; /* Actual size of mapping at pMapRegion */
  208. sqlite3_int64 mmapSizeMax; /* Configured FCNTL_MMAP_SIZE value */
  209. void *pMapRegion; /* Memory mapped region */
  210. #endif
  211. #ifdef __QNXNTO__
  212. int sectorSize; /* Device sector size */
  213. int deviceCharacteristics; /* Precomputed device characteristics */
  214. #endif
  215. #if SQLITE_ENABLE_LOCKING_STYLE
  216. int openFlags; /* The flags specified at open() */
  217. #endif
  218. #if SQLITE_ENABLE_LOCKING_STYLE || defined(__APPLE__)
  219. unsigned fsFlags; /* cached details from statfs() */
  220. #endif
  221. #if OS_VXWORKS
  222. struct vxworksFileId *pId; /* Unique file ID */
  223. #endif
  224. #ifdef SQLITE_DEBUG
  225. /* The next group of variables are used to track whether or not the
  226. ** transaction counter in bytes 24-27 of database files are updated
  227. ** whenever any part of the database changes. An assertion fault will
  228. ** occur if a file is updated without also updating the transaction
  229. ** counter. This test is made to avoid new problems similar to the
  230. ** one described by ticket #3584.
  231. */
  232. unsigned char transCntrChng; /* True if the transaction counter changed */
  233. unsigned char dbUpdate; /* True if any part of database file changed */
  234. unsigned char inNormalWrite; /* True if in a normal write operation */
  235. #endif
  236. #ifdef SQLITE_TEST
  237. /* In test mode, increase the size of this structure a bit so that
  238. ** it is larger than the struct CrashFile defined in test6.c.
  239. */
  240. char aPadding[32];
  241. #endif
  242. };
  243. /*
  244. ** Allowed values for the unixFile.ctrlFlags bitmask:
  245. */
  246. #define UNIXFILE_EXCL 0x01 /* Connections from one process only */
  247. #define UNIXFILE_RDONLY 0x02 /* Connection is read only */
  248. #define UNIXFILE_PERSIST_WAL 0x04 /* Persistent WAL mode */
  249. #ifndef SQLITE_DISABLE_DIRSYNC
  250. # define UNIXFILE_DIRSYNC 0x08 /* Directory sync needed */
  251. #else
  252. # define UNIXFILE_DIRSYNC 0x00
  253. #endif
  254. #define UNIXFILE_PSOW 0x10 /* SQLITE_IOCAP_POWERSAFE_OVERWRITE */
  255. #define UNIXFILE_DELETE 0x20 /* Delete on close */
  256. #define UNIXFILE_URI 0x40 /* Filename might have query parameters */
  257. #define UNIXFILE_NOLOCK 0x80 /* Do no file locking */
  258. #define UNIXFILE_WARNED 0x0100 /* verifyDbFile() warnings have been issued */
  259. /*
  260. ** Include code that is common to all os_*.c files
  261. */
  262. #include "os_common.h"
  263. /*
  264. ** Define various macros that are missing from some systems.
  265. */
  266. #ifndef O_LARGEFILE
  267. # define O_LARGEFILE 0
  268. #endif
  269. #ifdef SQLITE_DISABLE_LFS
  270. # undef O_LARGEFILE
  271. # define O_LARGEFILE 0
  272. #endif
  273. #ifndef O_NOFOLLOW
  274. # define O_NOFOLLOW 0
  275. #endif
  276. #ifndef O_BINARY
  277. # define O_BINARY 0
  278. #endif
  279. /*
  280. ** The threadid macro resolves to the thread-id or to 0. Used for
  281. ** testing and debugging only.
  282. */
  283. #if SQLITE_THREADSAFE
  284. #define threadid pthread_self()
  285. #else
  286. #define threadid 0
  287. #endif
  288. /*
  289. ** HAVE_MREMAP defaults to true on Linux and false everywhere else.
  290. */
  291. #if !defined(HAVE_MREMAP)
  292. # if defined(__linux__) && defined(_GNU_SOURCE)
  293. # define HAVE_MREMAP 1
  294. # else
  295. # define HAVE_MREMAP 0
  296. # endif
  297. #endif
  298. /*
  299. ** Different Unix systems declare open() in different ways. Same use
  300. ** open(const char*,int,mode_t). Others use open(const char*,int,...).
  301. ** The difference is important when using a pointer to the function.
  302. **
  303. ** The safest way to deal with the problem is to always use this wrapper
  304. ** which always has the same well-defined interface.
  305. */
  306. static int posixOpen(const char *zFile, int flags, int mode){
  307. return open(zFile, flags, mode);
  308. }
  309. /*
  310. ** On some systems, calls to fchown() will trigger a message in a security
  311. ** log if they come from non-root processes. So avoid calling fchown() if
  312. ** we are not running as root.
  313. */
  314. static int posixFchown(int fd, uid_t uid, gid_t gid){
  315. return geteuid() ? 0 : fchown(fd,uid,gid);
  316. }
  317. /* Forward reference */
  318. static int openDirectory(const char*, int*);
  319. /*
  320. ** Many system calls are accessed through pointer-to-functions so that
  321. ** they may be overridden at runtime to facilitate fault injection during
  322. ** testing and sandboxing. The following array holds the names and pointers
  323. ** to all overrideable system calls.
  324. */
  325. static struct unix_syscall {
  326. const char *zName; /* Name of the system call */
  327. sqlite3_syscall_ptr pCurrent; /* Current value of the system call */
  328. sqlite3_syscall_ptr pDefault; /* Default value */
  329. } aSyscall[] = {
  330. { "open", (sqlite3_syscall_ptr)posixOpen, 0 },
  331. #define osOpen ((int(*)(const char*,int,int))aSyscall[0].pCurrent)
  332. { "close", (sqlite3_syscall_ptr)close, 0 },
  333. #define osClose ((int(*)(int))aSyscall[1].pCurrent)
  334. { "access", (sqlite3_syscall_ptr)access, 0 },
  335. #define osAccess ((int(*)(const char*,int))aSyscall[2].pCurrent)
  336. { "getcwd", (sqlite3_syscall_ptr)getcwd, 0 },
  337. #define osGetcwd ((char*(*)(char*,size_t))aSyscall[3].pCurrent)
  338. { "stat", (sqlite3_syscall_ptr)stat, 0 },
  339. #define osStat ((int(*)(const char*,struct stat*))aSyscall[4].pCurrent)
  340. /*
  341. ** The DJGPP compiler environment looks mostly like Unix, but it
  342. ** lacks the fcntl() system call. So redefine fcntl() to be something
  343. ** that always succeeds. This means that locking does not occur under
  344. ** DJGPP. But it is DOS - what did you expect?
  345. */
  346. #ifdef __DJGPP__
  347. { "fstat", 0, 0 },
  348. #define osFstat(a,b,c) 0
  349. #else
  350. { "fstat", (sqlite3_syscall_ptr)fstat, 0 },
  351. #define osFstat ((int(*)(int,struct stat*))aSyscall[5].pCurrent)
  352. #endif
  353. { "ftruncate", (sqlite3_syscall_ptr)ftruncate, 0 },
  354. #define osFtruncate ((int(*)(int,off_t))aSyscall[6].pCurrent)
  355. { "fcntl", (sqlite3_syscall_ptr)fcntl, 0 },
  356. #define osFcntl ((int(*)(int,int,...))aSyscall[7].pCurrent)
  357. { "read", (sqlite3_syscall_ptr)read, 0 },
  358. #define osRead ((ssize_t(*)(int,void*,size_t))aSyscall[8].pCurrent)
  359. #if defined(USE_PREAD) || SQLITE_ENABLE_LOCKING_STYLE
  360. { "pread", (sqlite3_syscall_ptr)pread, 0 },
  361. #else
  362. { "pread", (sqlite3_syscall_ptr)0, 0 },
  363. #endif
  364. #define osPread ((ssize_t(*)(int,void*,size_t,off_t))aSyscall[9].pCurrent)
  365. #if defined(USE_PREAD64)
  366. { "pread64", (sqlite3_syscall_ptr)pread64, 0 },
  367. #else
  368. { "pread64", (sqlite3_syscall_ptr)0, 0 },
  369. #endif
  370. #define osPread64 ((ssize_t(*)(int,void*,size_t,off_t))aSyscall[10].pCurrent)
  371. { "write", (sqlite3_syscall_ptr)write, 0 },
  372. #define osWrite ((ssize_t(*)(int,const void*,size_t))aSyscall[11].pCurrent)
  373. #if defined(USE_PREAD) || SQLITE_ENABLE_LOCKING_STYLE
  374. { "pwrite", (sqlite3_syscall_ptr)pwrite, 0 },
  375. #else
  376. { "pwrite", (sqlite3_syscall_ptr)0, 0 },
  377. #endif
  378. #define osPwrite ((ssize_t(*)(int,const void*,size_t,off_t))\
  379. aSyscall[12].pCurrent)
  380. #if defined(USE_PREAD64)
  381. { "pwrite64", (sqlite3_syscall_ptr)pwrite64, 0 },
  382. #else
  383. { "pwrite64", (sqlite3_syscall_ptr)0, 0 },
  384. #endif
  385. #define osPwrite64 ((ssize_t(*)(int,const void*,size_t,off_t))\
  386. aSyscall[13].pCurrent)
  387. { "fchmod", (sqlite3_syscall_ptr)fchmod, 0 },
  388. #define osFchmod ((int(*)(int,mode_t))aSyscall[14].pCurrent)
  389. #if defined(HAVE_POSIX_FALLOCATE) && HAVE_POSIX_FALLOCATE
  390. { "fallocate", (sqlite3_syscall_ptr)posix_fallocate, 0 },
  391. #else
  392. { "fallocate", (sqlite3_syscall_ptr)0, 0 },
  393. #endif
  394. #define osFallocate ((int(*)(int,off_t,off_t))aSyscall[15].pCurrent)
  395. { "unlink", (sqlite3_syscall_ptr)unlink, 0 },
  396. #define osUnlink ((int(*)(const char*))aSyscall[16].pCurrent)
  397. { "openDirectory", (sqlite3_syscall_ptr)openDirectory, 0 },
  398. #define osOpenDirectory ((int(*)(const char*,int*))aSyscall[17].pCurrent)
  399. { "mkdir", (sqlite3_syscall_ptr)mkdir, 0 },
  400. #define osMkdir ((int(*)(const char*,mode_t))aSyscall[18].pCurrent)
  401. { "rmdir", (sqlite3_syscall_ptr)rmdir, 0 },
  402. #define osRmdir ((int(*)(const char*))aSyscall[19].pCurrent)
  403. { "fchown", (sqlite3_syscall_ptr)posixFchown, 0 },
  404. #define osFchown ((int(*)(int,uid_t,gid_t))aSyscall[20].pCurrent)
  405. #if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0
  406. { "mmap", (sqlite3_syscall_ptr)mmap, 0 },
  407. #define osMmap ((void*(*)(void*,size_t,int,int,int,off_t))aSyscall[21].pCurrent)
  408. { "munmap", (sqlite3_syscall_ptr)munmap, 0 },
  409. #define osMunmap ((void*(*)(void*,size_t))aSyscall[22].pCurrent)
  410. #if HAVE_MREMAP
  411. { "mremap", (sqlite3_syscall_ptr)mremap, 0 },
  412. #else
  413. { "mremap", (sqlite3_syscall_ptr)0, 0 },
  414. #endif
  415. #define osMremap ((void*(*)(void*,size_t,size_t,int,...))aSyscall[23].pCurrent)
  416. #endif
  417. }; /* End of the overrideable system calls */
  418. /*
  419. ** This is the xSetSystemCall() method of sqlite3_vfs for all of the
  420. ** "unix" VFSes. Return SQLITE_OK opon successfully updating the
  421. ** system call pointer, or SQLITE_NOTFOUND if there is no configurable
  422. ** system call named zName.
  423. */
  424. static int unixSetSystemCall(
  425. sqlite3_vfs *pNotUsed, /* The VFS pointer. Not used */
  426. const char *zName, /* Name of system call to override */
  427. sqlite3_syscall_ptr pNewFunc /* Pointer to new system call value */
  428. ){
  429. unsigned int i;
  430. int rc = SQLITE_NOTFOUND;
  431. UNUSED_PARAMETER(pNotUsed);
  432. if( zName==0 ){
  433. /* If no zName is given, restore all system calls to their default
  434. ** settings and return NULL
  435. */
  436. rc = SQLITE_OK;
  437. for(i=0; i<sizeof(aSyscall)/sizeof(aSyscall[0]); i++){
  438. if( aSyscall[i].pDefault ){
  439. aSyscall[i].pCurrent = aSyscall[i].pDefault;
  440. }
  441. }
  442. }else{
  443. /* If zName is specified, operate on only the one system call
  444. ** specified.
  445. */
  446. for(i=0; i<sizeof(aSyscall)/sizeof(aSyscall[0]); i++){
  447. if( strcmp(zName, aSyscall[i].zName)==0 ){
  448. if( aSyscall[i].pDefault==0 ){
  449. aSyscall[i].pDefault = aSyscall[i].pCurrent;
  450. }
  451. rc = SQLITE_OK;
  452. if( pNewFunc==0 ) pNewFunc = aSyscall[i].pDefault;
  453. aSyscall[i].pCurrent = pNewFunc;
  454. break;
  455. }
  456. }
  457. }
  458. return rc;
  459. }
  460. /*
  461. ** Return the value of a system call. Return NULL if zName is not a
  462. ** recognized system call name. NULL is also returned if the system call
  463. ** is currently undefined.
  464. */
  465. static sqlite3_syscall_ptr unixGetSystemCall(
  466. sqlite3_vfs *pNotUsed,
  467. const char *zName
  468. ){
  469. unsigned int i;
  470. UNUSED_PARAMETER(pNotUsed);
  471. for(i=0; i<sizeof(aSyscall)/sizeof(aSyscall[0]); i++){
  472. if( strcmp(zName, aSyscall[i].zName)==0 ) return aSyscall[i].pCurrent;
  473. }
  474. return 0;
  475. }
  476. /*
  477. ** Return the name of the first system call after zName. If zName==NULL
  478. ** then return the name of the first system call. Return NULL if zName
  479. ** is the last system call or if zName is not the name of a valid
  480. ** system call.
  481. */
  482. static const char *unixNextSystemCall(sqlite3_vfs *p, const char *zName){
  483. int i = -1;
  484. UNUSED_PARAMETER(p);
  485. if( zName ){
  486. for(i=0; i<ArraySize(aSyscall)-1; i++){
  487. if( strcmp(zName, aSyscall[i].zName)==0 ) break;
  488. }
  489. }
  490. for(i++; i<ArraySize(aSyscall); i++){
  491. if( aSyscall[i].pCurrent!=0 ) return aSyscall[i].zName;
  492. }
  493. return 0;
  494. }
  495. /*
  496. ** Do not accept any file descriptor less than this value, in order to avoid
  497. ** opening database file using file descriptors that are commonly used for
  498. ** standard input, output, and error.
  499. */
  500. #ifndef SQLITE_MINIMUM_FILE_DESCRIPTOR
  501. # define SQLITE_MINIMUM_FILE_DESCRIPTOR 3
  502. #endif
  503. /*
  504. ** Invoke open(). Do so multiple times, until it either succeeds or
  505. ** fails for some reason other than EINTR.
  506. **
  507. ** If the file creation mode "m" is 0 then set it to the default for
  508. ** SQLite. The default is SQLITE_DEFAULT_FILE_PERMISSIONS (normally
  509. ** 0644) as modified by the system umask. If m is not 0, then
  510. ** make the file creation mode be exactly m ignoring the umask.
  511. **
  512. ** The m parameter will be non-zero only when creating -wal, -journal,
  513. ** and -shm files. We want those files to have *exactly* the same
  514. ** permissions as their original database, unadulterated by the umask.
  515. ** In that way, if a database file is -rw-rw-rw or -rw-rw-r-, and a
  516. ** transaction crashes and leaves behind hot journals, then any
  517. ** process that is able to write to the database will also be able to
  518. ** recover the hot journals.
  519. */
  520. static int robust_open(const char *z, int f, mode_t m){
  521. int fd;
  522. mode_t m2 = m ? m : SQLITE_DEFAULT_FILE_PERMISSIONS;
  523. while(1){
  524. #if defined(O_CLOEXEC)
  525. fd = osOpen(z,f|O_CLOEXEC,m2);
  526. #else
  527. fd = osOpen(z,f,m2);
  528. #endif
  529. if( fd<0 ){
  530. if( errno==EINTR ) continue;
  531. break;
  532. }
  533. if( fd>=SQLITE_MINIMUM_FILE_DESCRIPTOR ) break;
  534. osClose(fd);
  535. sqlite3_log(SQLITE_WARNING,
  536. "attempt to open \"%s\" as file descriptor %d", z, fd);
  537. fd = -1;
  538. if( osOpen("/dev/null", f, m)<0 ) break;
  539. }
  540. if( fd>=0 ){
  541. if( m!=0 ){
  542. struct stat statbuf;
  543. if( osFstat(fd, &statbuf)==0
  544. && statbuf.st_size==0
  545. && (statbuf.st_mode&0777)!=m
  546. ){
  547. osFchmod(fd, m);
  548. }
  549. }
  550. #if defined(FD_CLOEXEC) && (!defined(O_CLOEXEC) || O_CLOEXEC==0)
  551. osFcntl(fd, F_SETFD, osFcntl(fd, F_GETFD, 0) | FD_CLOEXEC);
  552. #endif
  553. }
  554. return fd;
  555. }
  556. /*
  557. ** Helper functions to obtain and relinquish the global mutex. The
  558. ** global mutex is used to protect the unixInodeInfo and
  559. ** vxworksFileId objects used by this file, all of which may be
  560. ** shared by multiple threads.
  561. **
  562. ** Function unixMutexHeld() is used to assert() that the global mutex
  563. ** is held when required. This function is only used as part of assert()
  564. ** statements. e.g.
  565. **
  566. ** unixEnterMutex()
  567. ** assert( unixMutexHeld() );
  568. ** unixEnterLeave()
  569. */
  570. static void unixEnterMutex(void){
  571. sqlite3_mutex_enter(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER));
  572. }
  573. static void unixLeaveMutex(void){
  574. sqlite3_mutex_leave(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER));
  575. }
  576. #ifdef SQLITE_DEBUG
  577. static int unixMutexHeld(void) {
  578. return sqlite3_mutex_held(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER));
  579. }
  580. #endif
  581. #if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)
  582. /*
  583. ** Helper function for printing out trace information from debugging
  584. ** binaries. This returns the string represetation of the supplied
  585. ** integer lock-type.
  586. */
  587. static const char *azFileLock(int eFileLock){
  588. switch( eFileLock ){
  589. case NO_LOCK: return "NONE";
  590. case SHARED_LOCK: return "SHARED";
  591. case RESERVED_LOCK: return "RESERVED";
  592. case PENDING_LOCK: return "PENDING";
  593. case EXCLUSIVE_LOCK: return "EXCLUSIVE";
  594. }
  595. return "ERROR";
  596. }
  597. #endif
  598. #ifdef SQLITE_LOCK_TRACE
  599. /*
  600. ** Print out information about all locking operations.
  601. **
  602. ** This routine is used for troubleshooting locks on multithreaded
  603. ** platforms. Enable by compiling with the -DSQLITE_LOCK_TRACE
  604. ** command-line option on the compiler. This code is normally
  605. ** turned off.
  606. */
  607. static int lockTrace(int fd, int op, struct flock *p){
  608. char *zOpName, *zType;
  609. int s;
  610. int savedErrno;
  611. if( op==F_GETLK ){
  612. zOpName = "GETLK";
  613. }else if( op==F_SETLK ){
  614. zOpName = "SETLK";
  615. }else{
  616. s = osFcntl(fd, op, p);
  617. sqlite3DebugPrintf("fcntl unknown %d %d %d\n", fd, op, s);
  618. return s;
  619. }
  620. if( p->l_type==F_RDLCK ){
  621. zType = "RDLCK";
  622. }else if( p->l_type==F_WRLCK ){
  623. zType = "WRLCK";
  624. }else if( p->l_type==F_UNLCK ){
  625. zType = "UNLCK";
  626. }else{
  627. assert( 0 );
  628. }
  629. assert( p->l_whence==SEEK_SET );
  630. s = osFcntl(fd, op, p);
  631. savedErrno = errno;
  632. sqlite3DebugPrintf("fcntl %d %d %s %s %d %d %d %d\n",
  633. threadid, fd, zOpName, zType, (int)p->l_start, (int)p->l_len,
  634. (int)p->l_pid, s);
  635. if( s==(-1) && op==F_SETLK && (p->l_type==F_RDLCK || p->l_type==F_WRLCK) ){
  636. struct flock l2;
  637. l2 = *p;
  638. osFcntl(fd, F_GETLK, &l2);
  639. if( l2.l_type==F_RDLCK ){
  640. zType = "RDLCK";
  641. }else if( l2.l_type==F_WRLCK ){
  642. zType = "WRLCK";
  643. }else if( l2.l_type==F_UNLCK ){
  644. zType = "UNLCK";
  645. }else{
  646. assert( 0 );
  647. }
  648. sqlite3DebugPrintf("fcntl-failure-reason: %s %d %d %d\n",
  649. zType, (int)l2.l_start, (int)l2.l_len, (int)l2.l_pid);
  650. }
  651. errno = savedErrno;
  652. return s;
  653. }
  654. #undef osFcntl
  655. #define osFcntl lockTrace
  656. #endif /* SQLITE_LOCK_TRACE */
  657. /*
  658. ** Retry ftruncate() calls that fail due to EINTR
  659. */
  660. static int robust_ftruncate(int h, sqlite3_int64 sz){
  661. int rc;
  662. do{ rc = osFtruncate(h,sz); }while( rc<0 && errno==EINTR );
  663. return rc;
  664. }
  665. /*
  666. ** This routine translates a standard POSIX errno code into something
  667. ** useful to the clients of the sqlite3 functions. Specifically, it is
  668. ** intended to translate a variety of "try again" errors into SQLITE_BUSY
  669. ** and a variety of "please close the file descriptor NOW" errors into
  670. ** SQLITE_IOERR
  671. **
  672. ** Errors during initialization of locks, or file system support for locks,
  673. ** should handle ENOLCK, ENOTSUP, EOPNOTSUPP separately.
  674. */
  675. static int sqliteErrorFromPosixError(int posixError, int sqliteIOErr) {
  676. switch (posixError) {
  677. #if 0
  678. /* At one point this code was not commented out. In theory, this branch
  679. ** should never be hit, as this function should only be called after
  680. ** a locking-related function (i.e. fcntl()) has returned non-zero with
  681. ** the value of errno as the first argument. Since a system call has failed,
  682. ** errno should be non-zero.
  683. **
  684. ** Despite this, if errno really is zero, we still don't want to return
  685. ** SQLITE_OK. The system call failed, and *some* SQLite error should be
  686. ** propagated back to the caller. Commenting this branch out means errno==0
  687. ** will be handled by the "default:" case below.
  688. */
  689. case 0:
  690. return SQLITE_OK;
  691. #endif
  692. case EAGAIN:
  693. case ETIMEDOUT:
  694. case EBUSY:
  695. case EINTR:
  696. case ENOLCK:
  697. /* random NFS retry error, unless during file system support
  698. * introspection, in which it actually means what it says */
  699. return SQLITE_BUSY;
  700. case EACCES:
  701. /* EACCES is like EAGAIN during locking operations, but not any other time*/
  702. if( (sqliteIOErr == SQLITE_IOERR_LOCK) ||
  703. (sqliteIOErr == SQLITE_IOERR_UNLOCK) ||
  704. (sqliteIOErr == SQLITE_IOERR_RDLOCK) ||
  705. (sqliteIOErr == SQLITE_IOERR_CHECKRESERVEDLOCK) ){
  706. return SQLITE_BUSY;
  707. }
  708. /* else fall through */
  709. case EPERM:
  710. return SQLITE_PERM;
  711. /* EDEADLK is only possible if a call to fcntl(F_SETLKW) is made. And
  712. ** this module never makes such a call. And the code in SQLite itself
  713. ** asserts that SQLITE_IOERR_BLOCKED is never returned. For these reasons
  714. ** this case is also commented out. If the system does set errno to EDEADLK,
  715. ** the default SQLITE_IOERR_XXX code will be returned. */
  716. #if 0
  717. case EDEADLK:
  718. return SQLITE_IOERR_BLOCKED;
  719. #endif
  720. #if EOPNOTSUPP!=ENOTSUP
  721. case EOPNOTSUPP:
  722. /* something went terribly awry, unless during file system support
  723. * introspection, in which it actually means what it says */
  724. #endif
  725. #ifdef ENOTSUP
  726. case ENOTSUP:
  727. /* invalid fd, unless during file system support introspection, in which
  728. * it actually means what it says */
  729. #endif
  730. case EIO:
  731. case EBADF:
  732. case EINVAL:
  733. case ENOTCONN:
  734. case ENODEV:
  735. case ENXIO:
  736. case ENOENT:
  737. #ifdef ESTALE /* ESTALE is not defined on Interix systems */
  738. case ESTALE:
  739. #endif
  740. case ENOSYS:
  741. /* these should force the client to close the file and reconnect */
  742. default:
  743. return sqliteIOErr;
  744. }
  745. }
  746. /******************************************************************************
  747. ****************** Begin Unique File ID Utility Used By VxWorks ***************
  748. **
  749. ** On most versions of unix, we can get a unique ID for a file by concatenating
  750. ** the device number and the inode number. But this does not work on VxWorks.
  751. ** On VxWorks, a unique file id must be based on the canonical filename.
  752. **
  753. ** A pointer to an instance of the following structure can be used as a
  754. ** unique file ID in VxWorks. Each instance of this structure contains
  755. ** a copy of the canonical filename. There is also a reference count.
  756. ** The structure is reclaimed when the number of pointers to it drops to
  757. ** zero.
  758. **
  759. ** There are never very many files open at one time and lookups are not
  760. ** a performance-critical path, so it is sufficient to put these
  761. ** structures on a linked list.
  762. */
  763. struct vxworksFileId {
  764. struct vxworksFileId *pNext; /* Next in a list of them all */
  765. int nRef; /* Number of references to this one */
  766. int nName; /* Length of the zCanonicalName[] string */
  767. char *zCanonicalName; /* Canonical filename */
  768. };
  769. #if OS_VXWORKS
  770. /*
  771. ** All unique filenames are held on a linked list headed by this
  772. ** variable:
  773. */
  774. static struct vxworksFileId *vxworksFileList = 0;
  775. /*
  776. ** Simplify a filename into its canonical form
  777. ** by making the following changes:
  778. **
  779. ** * removing any trailing and duplicate /
  780. ** * convert /./ into just /
  781. ** * convert /A/../ where A is any simple name into just /
  782. **
  783. ** Changes are made in-place. Return the new name length.
  784. **
  785. ** The original filename is in z[0..n-1]. Return the number of
  786. ** characters in the simplified name.
  787. */
  788. static int vxworksSimplifyName(char *z, int n){
  789. int i, j;
  790. while( n>1 && z[n-1]=='/' ){ n--; }
  791. for(i=j=0; i<n; i++){
  792. if( z[i]=='/' ){
  793. if( z[i+1]=='/' ) continue;
  794. if( z[i+1]=='.' && i+2<n && z[i+2]=='/' ){
  795. i += 1;
  796. continue;
  797. }
  798. if( z[i+1]=='.' && i+3<n && z[i+2]=='.' && z[i+3]=='/' ){
  799. while( j>0 && z[j-1]!='/' ){ j--; }
  800. if( j>0 ){ j--; }
  801. i += 2;
  802. continue;
  803. }
  804. }
  805. z[j++] = z[i];
  806. }
  807. z[j] = 0;
  808. return j;
  809. }
  810. /*
  811. ** Find a unique file ID for the given absolute pathname. Return
  812. ** a pointer to the vxworksFileId object. This pointer is the unique
  813. ** file ID.
  814. **
  815. ** The nRef field of the vxworksFileId object is incremented before
  816. ** the object is returned. A new vxworksFileId object is created
  817. ** and added to the global list if necessary.
  818. **
  819. ** If a memory allocation error occurs, return NULL.
  820. */
  821. static struct vxworksFileId *vxworksFindFileId(const char *zAbsoluteName){
  822. struct vxworksFileId *pNew; /* search key and new file ID */
  823. struct vxworksFileId *pCandidate; /* For looping over existing file IDs */
  824. int n; /* Length of zAbsoluteName string */
  825. assert( zAbsoluteName[0]=='/' );
  826. n = (int)strlen(zAbsoluteName);
  827. pNew = sqlite3_malloc( sizeof(*pNew) + (n+1) );
  828. if( pNew==0 ) return 0;
  829. pNew->zCanonicalName = (char*)&pNew[1];
  830. memcpy(pNew->zCanonicalName, zAbsoluteName, n+1);
  831. n = vxworksSimplifyName(pNew->zCanonicalName, n);
  832. /* Search for an existing entry that matching the canonical name.
  833. ** If found, increment the reference count and return a pointer to
  834. ** the existing file ID.
  835. */
  836. unixEnterMutex();
  837. for(pCandidate=vxworksFileList; pCandidate; pCandidate=pCandidate->pNext){
  838. if( pCandidate->nName==n
  839. && memcmp(pCandidate->zCanonicalName, pNew->zCanonicalName, n)==0
  840. ){
  841. sqlite3_free(pNew);
  842. pCandidate->nRef++;
  843. unixLeaveMutex();
  844. return pCandidate;
  845. }
  846. }
  847. /* No match was found. We will make a new file ID */
  848. pNew->nRef = 1;
  849. pNew->nName = n;
  850. pNew->pNext = vxworksFileList;
  851. vxworksFileList = pNew;
  852. unixLeaveMutex();
  853. return pNew;
  854. }
  855. /*
  856. ** Decrement the reference count on a vxworksFileId object. Free
  857. ** the object when the reference count reaches zero.
  858. */
  859. static void vxworksReleaseFileId(struct vxworksFileId *pId){
  860. unixEnterMutex();
  861. assert( pId->nRef>0 );
  862. pId->nRef--;
  863. if( pId->nRef==0 ){
  864. struct vxworksFileId **pp;
  865. for(pp=&vxworksFileList; *pp && *pp!=pId; pp = &((*pp)->pNext)){}
  866. assert( *pp==pId );
  867. *pp = pId->pNext;
  868. sqlite3_free(pId);
  869. }
  870. unixLeaveMutex();
  871. }
  872. #endif /* OS_VXWORKS */
  873. /*************** End of Unique File ID Utility Used By VxWorks ****************
  874. ******************************************************************************/
  875. /******************************************************************************
  876. *************************** Posix Advisory Locking ****************************
  877. **
  878. ** POSIX advisory locks are broken by design. ANSI STD 1003.1 (1996)
  879. ** section 6.5.2.2 lines 483 through 490 specify that when a process
  880. ** sets or clears a lock, that operation overrides any prior locks set
  881. ** by the same process. It does not explicitly say so, but this implies
  882. ** that it overrides locks set by the same process using a different
  883. ** file descriptor. Consider this test case:
  884. **
  885. ** int fd1 = open("./file1", O_RDWR|O_CREAT, 0644);
  886. ** int fd2 = open("./file2", O_RDWR|O_CREAT, 0644);
  887. **
  888. ** Suppose ./file1 and ./file2 are really the same file (because
  889. ** one is a hard or symbolic link to the other) then if you set
  890. ** an exclusive lock on fd1, then try to get an exclusive lock
  891. ** on fd2, it works. I would have expected the second lock to
  892. ** fail since there was already a lock on the file due to fd1.
  893. ** But not so. Since both locks came from the same process, the
  894. ** second overrides the first, even though they were on different
  895. ** file descriptors opened on different file names.
  896. **
  897. ** This means that we cannot use POSIX locks to synchronize file access
  898. ** among competing threads of the same process. POSIX locks will work fine
  899. ** to synchronize access for threads in separate processes, but not
  900. ** threads within the same process.
  901. **
  902. ** To work around the problem, SQLite has to manage file locks internally
  903. ** on its own. Whenever a new database is opened, we have to find the
  904. ** specific inode of the database file (the inode is determined by the
  905. ** st_dev and st_ino fields of the stat structure that fstat() fills in)
  906. ** and check for locks already existing on that inode. When locks are
  907. ** created or removed, we have to look at our own internal record of the
  908. ** locks to see if another thread has previously set a lock on that same
  909. ** inode.
  910. **
  911. ** (Aside: The use of inode numbers as unique IDs does not work on VxWorks.
  912. ** For VxWorks, we have to use the alternative unique ID system based on
  913. ** canonical filename and implemented in the previous division.)
  914. **
  915. ** The sqlite3_file structure for POSIX is no longer just an integer file
  916. ** descriptor. It is now a structure that holds the integer file
  917. ** descriptor and a pointer to a structure that describes the internal
  918. ** locks on the corresponding inode. There is one locking structure
  919. ** per inode, so if the same inode is opened twice, both unixFile structures
  920. ** point to the same locking structure. The locking structure keeps
  921. ** a reference count (so we will know when to delete it) and a "cnt"
  922. ** field that tells us its internal lock status. cnt==0 means the
  923. ** file is unlocked. cnt==-1 means the file has an exclusive lock.
  924. ** cnt>0 means there are cnt shared locks on the file.
  925. **
  926. ** Any attempt to lock or unlock a file first checks the locking
  927. ** structure. The fcntl() system call is only invoked to set a
  928. ** POSIX lock if the internal lock structure transitions between
  929. ** a locked and an unlocked state.
  930. **
  931. ** But wait: there are yet more problems with POSIX advisory locks.
  932. **
  933. ** If you close a file descriptor that points to a file that has locks,
  934. ** all locks on that file that are owned by the current process are
  935. ** released. To work around this problem, each unixInodeInfo object
  936. ** maintains a count of the number of pending locks on tha inode.
  937. ** When an attempt is made to close an unixFile, if there are
  938. ** other unixFile open on the same inode that are holding locks, the call
  939. ** to close() the file descriptor is deferred until all of the locks clear.
  940. ** The unixInodeInfo structure keeps a list of file descriptors that need to
  941. ** be closed and that list is walked (and cleared) when the last lock
  942. ** clears.
  943. **
  944. ** Yet another problem: LinuxThreads do not play well with posix locks.
  945. **
  946. ** Many older versions of linux use the LinuxThreads library which is
  947. ** not posix compliant. Under LinuxThreads, a lock created by thread
  948. ** A cannot be modified or overridden by a different thread B.
  949. ** Only thread A can modify the lock. Locking behavior is correct
  950. ** if the appliation uses the newer Native Posix Thread Library (NPTL)
  951. ** on linux - with NPTL a lock created by thread A can override locks
  952. ** in thread B. But there is no way to know at compile-time which
  953. ** threading library is being used. So there is no way to know at
  954. ** compile-time whether or not thread A can override locks on thread B.
  955. ** One has to do a run-time check to discover the behavior of the
  956. ** current process.
  957. **
  958. ** SQLite used to support LinuxThreads. But support for LinuxThreads
  959. ** was dropped beginning with version 3.7.0. SQLite will still work with
  960. ** LinuxThreads provided that (1) there is no more than one connection
  961. ** per database file in the same process and (2) database connections
  962. ** do not move across threads.
  963. */
  964. /*
  965. ** An instance of the following structure serves as the key used
  966. ** to locate a particular unixInodeInfo object.
  967. */
  968. struct unixFileId {
  969. dev_t dev; /* Device number */
  970. #if OS_VXWORKS
  971. struct vxworksFileId *pId; /* Unique file ID for vxworks. */
  972. #else
  973. ino_t ino; /* Inode number */
  974. #endif
  975. };
  976. /*
  977. ** An instance of the following structure is allocated for each open
  978. ** inode. Or, on LinuxThreads, there is one of these structures for
  979. ** each inode opened by each thread.
  980. **
  981. ** A single inode can have multiple file descriptors, so each unixFile
  982. ** structure contains a pointer to an instance of this object and this
  983. ** object keeps a count of the number of unixFile pointing to it.
  984. */
  985. struct unixInodeInfo {
  986. struct unixFileId fileId; /* The lookup key */
  987. int nShared; /* Number of SHARED locks held */
  988. unsigned char eFileLock; /* One of SHARED_LOCK, RESERVED_LOCK etc. */
  989. unsigned char bProcessLock; /* An exclusive process lock is held */
  990. int nRef; /* Number of pointers to this structure */
  991. unixShmNode *pShmNode; /* Shared memory associated with this inode */
  992. int nLock; /* Number of outstanding file locks */
  993. UnixUnusedFd *pUnused; /* Unused file descriptors to close */
  994. unixInodeInfo *pNext; /* List of all unixInodeInfo objects */
  995. unixInodeInfo *pPrev; /* .... doubly linked */
  996. #if SQLITE_ENABLE_LOCKING_STYLE
  997. unsigned long long sharedByte; /* for AFP simulated shared lock */
  998. #endif
  999. #if OS_VXWORKS
  1000. sem_t *pSem; /* Named POSIX semaphore */
  1001. char aSemName[MAX_PATHNAME+2]; /* Name of that semaphore */
  1002. #endif
  1003. };
  1004. /*
  1005. ** A lists of all unixInodeInfo objects.
  1006. */
  1007. static unixInodeInfo *inodeList = 0;
  1008. /*
  1009. **
  1010. ** This function - unixLogError_x(), is only ever called via the macro
  1011. ** unixLogError().
  1012. **
  1013. ** It is invoked after an error occurs in an OS function and errno has been
  1014. ** set. It logs a message using sqlite3_log() containing the current value of
  1015. ** errno and, if possible, the human-readable equivalent from strerror() or
  1016. ** strerror_r().
  1017. **
  1018. ** The first argument passed to the macro should be the error code that
  1019. ** will be returned to SQLite (e.g. SQLITE_IOERR_DELETE, SQLITE_CANTOPEN).
  1020. ** The two subsequent arguments should be the name of the OS function that
  1021. ** failed (e.g. "unlink", "open") and the associated file-system path,
  1022. ** if any.
  1023. */
  1024. #define unixLogError(a,b,c) unixLogErrorAtLine(a,b,c,__LINE__)
  1025. static int unixLogErrorAtLine(
  1026. int errcode, /* SQLite error code */
  1027. const char *zFunc, /* Name of OS function that failed */
  1028. const char *zPath, /* File path associated with error */
  1029. int iLine /* Source line number where error occurred */
  1030. ){
  1031. char *zErr; /* Message from strerror() or equivalent */
  1032. int iErrno = errno; /* Saved syscall error number */
  1033. /* If this is not a threadsafe build (SQLITE_THREADSAFE==0), then use
  1034. ** the strerror() function to obtain the human-readable error message
  1035. ** equivalent to errno. Otherwise, use strerror_r().
  1036. */
  1037. #if SQLITE_THREADSAFE && defined(HAVE_STRERROR_R)
  1038. char aErr[80];
  1039. memset(aErr, 0, sizeof(aErr));
  1040. zErr = aErr;
  1041. /* If STRERROR_R_CHAR_P (set by autoconf scripts) or __USE_GNU is defined,
  1042. ** assume that the system provides the GNU version of strerror_r() that
  1043. ** returns a pointer to a buffer containing the error message. That pointer
  1044. ** may point to aErr[], or it may point to some static storage somewhere.
  1045. ** Otherwise, assume that the system provides the POSIX version of
  1046. ** strerror_r(), which always writes an error message into aErr[].
  1047. **
  1048. ** If the code incorrectly assumes that it is the POSIX version that is
  1049. ** available, the error message will often be an empty string. Not a
  1050. ** huge problem. Incorrectly concluding that the GNU version is available
  1051. ** could lead to a segfault though.
  1052. */
  1053. #if defined(STRERROR_R_CHAR_P) || defined(__USE_GNU)
  1054. zErr =
  1055. # endif
  1056. strerror_r(iErrno, aErr, sizeof(aErr)-1);
  1057. #elif SQLITE_THREADSAFE
  1058. /* This is a threadsafe build, but strerror_r() is not available. */
  1059. zErr = "";
  1060. #else
  1061. /* Non-threadsafe build, use strerror(). */
  1062. zErr = strerror(iErrno);
  1063. #endif
  1064. if( zPath==0 ) zPath = "";
  1065. sqlite3_log(errcode,
  1066. "os_unix.c:%d: (%d) %s(%s) - %s",
  1067. iLine, iErrno, zFunc, zPath, zErr
  1068. );
  1069. return errcode;
  1070. }
  1071. /*
  1072. ** Close a file descriptor.
  1073. **
  1074. ** We assume that close() almost always works, since it is only in a
  1075. ** very sick application or on a very sick platform that it might fail.
  1076. ** If it does fail, simply leak the file descriptor, but do log the
  1077. ** error.
  1078. **
  1079. ** Note that it is not safe to retry close() after EINTR since the
  1080. ** file descriptor might have already been reused by another thread.
  1081. ** So we don't even try to recover from an EINTR. Just log the error
  1082. ** and move on.
  1083. */
  1084. static void robust_close(unixFile *pFile, int h, int lineno){
  1085. if( osClose(h) ){
  1086. unixLogErrorAtLine(SQLITE_IOERR_CLOSE, "close",
  1087. pFile ? pFile->zPath : 0, lineno);
  1088. }
  1089. }
  1090. /*
  1091. ** Close all file descriptors accumuated in the unixInodeInfo->pUnused list.
  1092. */
  1093. static void closePendingFds(unixFile *pFile){
  1094. unixInodeInfo *pInode = pFile->pInode;
  1095. UnixUnusedFd *p;
  1096. UnixUnusedFd *pNext;
  1097. for(p=pInode->pUnused; p; p=pNext){
  1098. pNext = p->pNext;
  1099. robust_close(pFile, p->fd, __LINE__);
  1100. sqlite3_free(p);
  1101. }
  1102. pInode->pUnused = 0;
  1103. }
  1104. /*
  1105. ** Release a unixInodeInfo structure previously allocated by findInodeInfo().
  1106. **
  1107. ** The mutex entered using the unixEnterMutex() function must be held
  1108. ** when this function is called.
  1109. */
  1110. static void releaseInodeInfo(unixFile *pFile){
  1111. unixInodeInfo *pInode = pFile->pInode;
  1112. assert( unixMutexHeld() );
  1113. if( ALWAYS(pInode) ){
  1114. pInode->nRef--;
  1115. if( pInode->nRef==0 ){
  1116. assert( pInode->pShmNode==0 );
  1117. closePendingFds(pFile);
  1118. if( pInode->pPrev ){
  1119. assert( pInode->pPrev->pNext==pInode );
  1120. pInode->pPrev->pNext = pInode->pNext;
  1121. }else{
  1122. assert( inodeList==pInode );
  1123. inodeList = pInode->pNext;
  1124. }
  1125. if( pInode->pNext ){
  1126. assert( pInode->pNext->pPrev==pInode );
  1127. pInode->pNext->pPrev = pInode->pPrev;
  1128. }
  1129. sqlite3_free(pInode);
  1130. }
  1131. }
  1132. }
  1133. /*
  1134. ** Given a file descriptor, locate the unixInodeInfo object that
  1135. ** describes that file descriptor. Create a new one if necessary. The
  1136. ** return value might be uninitialized if an error occurs.
  1137. **
  1138. ** The mutex entered using the unixEnterMutex() function must be held
  1139. ** when this function is called.
  1140. **
  1141. ** Return an appropriate error code.
  1142. */
  1143. static int findInodeInfo(
  1144. unixFile *pFile, /* Unix file with file desc used in the key */
  1145. unixInodeInfo **ppInode /* Return the unixInodeInfo object here */
  1146. ){
  1147. int rc; /* System call return code */
  1148. int fd; /* The file descriptor for pFile */
  1149. struct unixFileId fileId; /* Lookup key for the unixInodeInfo */
  1150. struct stat statbuf; /* Low-level file information */
  1151. unixInodeInfo *pInode = 0; /* Candidate unixInodeInfo object */
  1152. assert( unixMutexHeld() );
  1153. /* Get low-level information about the file that we can used to
  1154. ** create a unique name for the file.
  1155. */
  1156. fd = pFile->h;
  1157. rc = osFstat(fd, &statbuf);
  1158. if( rc!=0 ){
  1159. pFile->lastErrno = errno;
  1160. #ifdef EOVERFLOW
  1161. if( pFile->lastErrno==EOVERFLOW ) return SQLITE_NOLFS;
  1162. #endif
  1163. return SQLITE_IOERR;
  1164. }
  1165. #ifdef __APPLE__
  1166. /* On OS X on an msdos filesystem, the inode number is reported
  1167. ** incorrectly for zero-size files. See ticket #3260. To work
  1168. ** around this problem (we consider it a bug in OS X, not SQLite)
  1169. ** we always increase the file size to 1 by writing a single byte
  1170. ** prior to accessing the inode number. The one byte written is
  1171. ** an ASCII 'S' character which also happens to be the first byte
  1172. ** in the header of every SQLite database. In this way, if there
  1173. ** is a race condition such that another thread has already populated
  1174. ** the first page of the database, no damage is done.
  1175. */
  1176. if( statbuf.st_size==0 && (pFile->fsFlags & SQLITE_FSFLAGS_IS_MSDOS)!=0 ){
  1177. do{ rc = osWrite(fd, "S", 1); }while( rc<0 && errno==EINTR );
  1178. if( rc!=1 ){
  1179. pFile->lastErrno = errno;
  1180. return SQLITE_IOERR;
  1181. }
  1182. rc = osFstat(fd, &statbuf);
  1183. if( rc!=0 ){
  1184. pFile->lastErrno = errno;
  1185. return SQLITE_IOERR;
  1186. }
  1187. }
  1188. #endif
  1189. memset(&fileId, 0, sizeof(fileId));
  1190. fileId.dev = statbuf.st_dev;
  1191. #if OS_VXWORKS
  1192. fileId.pId = pFile->pId;
  1193. #else
  1194. fileId.ino = statbuf.st_ino;
  1195. #endif
  1196. pInode = inodeList;
  1197. while( pInode && memcmp(&fileId, &pInode->fileId, sizeof(fileId)) ){
  1198. pInode = pInode->pNext;
  1199. }
  1200. if( pInode==0 ){
  1201. pInode = sqlite3_malloc( sizeof(*pInode) );
  1202. if( pInode==0 ){
  1203. return SQLITE_NOMEM;
  1204. }
  1205. memset(pInode, 0, sizeof(*pInode));
  1206. memcpy(&pInode->fileId, &fileId, sizeof(fileId));
  1207. pInode->nRef = 1;
  1208. pInode->pNext = inodeList;
  1209. pInode->pPrev = 0;
  1210. if( inodeList ) inodeList->pPrev = pInode;
  1211. inodeList = pInode;
  1212. }else{
  1213. pInode->nRef++;
  1214. }
  1215. *ppInode = pInode;
  1216. return SQLITE_OK;
  1217. }
  1218. /*
  1219. ** Check a unixFile that is a database. Verify the following:
  1220. **
  1221. ** (1) There is exactly one hard link on the file
  1222. ** (2) The file is not a symbolic link
  1223. ** (3) The file has not been renamed or unlinked
  1224. **
  1225. ** Issue sqlite3_log(SQLITE_WARNING,...) messages if anything is not right.
  1226. */
  1227. static void verifyDbFile(unixFile *pFile){
  1228. struct stat buf;
  1229. int rc;
  1230. if( pFile->ctrlFlags & UNIXFILE_WARNED ){
  1231. /* One or more of the following warnings have already been issued. Do not
  1232. ** repeat them so as not to clutter the error log */
  1233. return;
  1234. }
  1235. rc = osFstat(pFile->h, &buf);
  1236. if( rc!=0 ){
  1237. sqlite3_log(SQLITE_WARNING, "cannot fstat db file %s", pFile->zPath);
  1238. pFile->ctrlFlags |= UNIXFILE_WARNED;
  1239. return;
  1240. }
  1241. if( buf.st_nlink==0 && (pFile->ctrlFlags & UNIXFILE_DELETE)==0 ){
  1242. sqlite3_log(SQLITE_WARNING, "file unlinked while open: %s", pFile->zPath);
  1243. pFile->ctrlFlags |= UNIXFILE_WARNED;
  1244. return;
  1245. }
  1246. if( buf.st_nlink>1 ){
  1247. sqlite3_log(SQLITE_WARNING, "multiple links to file: %s", pFile->zPath);
  1248. pFile->ctrlFlags |= UNIXFILE_WARNED;
  1249. return;
  1250. }
  1251. if( pFile->pInode!=0
  1252. && ((rc = osStat(pFile->zPath, &buf))!=0
  1253. || buf.st_ino!=pFile->pInode->fileId.ino)
  1254. ){
  1255. sqlite3_log(SQLITE_WARNING, "file renamed while open: %s", pFile->zPath);
  1256. pFile->ctrlFlags |= UNIXFILE_WARNED;
  1257. return;
  1258. }
  1259. }
  1260. /*
  1261. ** This routine checks if there is a RESERVED lock held on the specified
  1262. ** file by this or any other process. If such a lock is held, set *pResOut
  1263. ** to a non-zero value otherwise *pResOut is set to zero. The return value
  1264. ** is set to SQLITE_OK unless an I/O error occurs during lock checking.
  1265. */
  1266. static int unixCheckReservedLock(sqlite3_file *id, int *pResOut){
  1267. int rc = SQLITE_OK;
  1268. int reserved = 0;
  1269. unixFile *pFile = (unixFile*)id;
  1270. SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );
  1271. assert( pFile );
  1272. unixEnterMutex(); /* Because pFile->pInode is shared across threads */
  1273. /* Check if a thread in this process holds such a lock */
  1274. if( pFile->pInode->eFileLock>SHARED_LOCK ){
  1275. reserved = 1;
  1276. }
  1277. /* Otherwise see if some other process holds it.
  1278. */
  1279. #ifndef __DJGPP__
  1280. if( !reserved && !pFile->pInode->bProcessLock ){
  1281. struct flock lock;
  1282. lock.l_whence = SEEK_SET;
  1283. lock.l_start = RESERVED_BYTE;
  1284. lock.l_len = 1;
  1285. lock.l_type = F_WRLCK;
  1286. if( osFcntl(pFile->h, F_GETLK, &lock) ){
  1287. rc = SQLITE_IOERR_CHECKRESERVEDLOCK;
  1288. pFile->lastErrno = errno;
  1289. } else if( lock.l_type!=F_UNLCK ){
  1290. reserved = 1;
  1291. }
  1292. }
  1293. #endif
  1294. unixLeaveMutex();
  1295. OSTRACE(("TEST WR-LOCK %d %d %d (unix)\n", pFile->h, rc, reserved));
  1296. *pResOut = reserved;
  1297. return rc;
  1298. }
  1299. /*
  1300. ** Attempt to set a system-lock on the file pFile. The lock is
  1301. ** described by pLock.
  1302. **
  1303. ** If the pFile was opened read/write from unix-excl, then the only lock
  1304. ** ever obtained is an exclusive lock, and it is obtained exactly once
  1305. ** the first time any lock is attempted. All subsequent system locking
  1306. ** operations become no-ops. Locking operations still happen internally,
  1307. ** in order to coordinate access between separate database connections
  1308. ** within this process, but all of that is handled in memory and the
  1309. ** operating system does not participate.
  1310. **
  1311. ** This function is a pass-through to fcntl(F_SETLK) if pFile is using
  1312. ** any VFS other than "unix-excl" or if pFile is opened on "unix-excl"
  1313. ** and is read-only.
  1314. **
  1315. ** Zero is returned if the call completes successfully, or -1 if a call
  1316. ** to fcntl() fails. In this case, errno is set appropriately (by fcntl()).
  1317. */
  1318. static int unixFileLock(unixFile *pFile, struct flock *pLock){
  1319. int rc;
  1320. unixInodeInfo *pInode = pFile->pInode;
  1321. assert( unixMutexHeld() );
  1322. assert( pInode!=0 );
  1323. if( ((pFile->ctrlFlags & UNIXFILE_EXCL)!=0 || pInode->bProcessLock)
  1324. && ((pFile->ctrlFlags & UNIXFILE_RDONLY)==0)
  1325. ){
  1326. if( pInode->bProcessLock==0 ){
  1327. struct flock lock;
  1328. assert( pInode->nLock==0 );
  1329. lock.l_whence = SEEK_SET;
  1330. lock.l_start = SHARED_FIRST;
  1331. lock.l_len = SHARED_SIZE;
  1332. lock.l_type = F_WRLCK;
  1333. rc = osFcntl(pFile->h, F_SETLK, &lock);
  1334. if( rc<0 ) return rc;
  1335. pInode->bProcessLock = 1;
  1336. pInode->nLock++;
  1337. }else{
  1338. rc = 0;
  1339. }
  1340. }else{
  1341. rc = osFcntl(pFile->h, F_SETLK, pLock);
  1342. }
  1343. return rc;
  1344. }
  1345. /*
  1346. ** Lock the file with the lock specified by parameter eFileLock - one
  1347. ** of the following:
  1348. **
  1349. ** (1) SHARED_LOCK
  1350. ** (2) RESERVED_LOCK
  1351. ** (3) PENDING_LOCK
  1352. ** (4) EXCLUSIVE_LOCK
  1353. **
  1354. ** Sometimes when requesting one lock state, additional lock states
  1355. ** are inserted in between. The locking might fail on one of the later
  1356. ** transitions leaving the lock state different from what it started but
  1357. ** still short of its goal. The following chart shows the allowed
  1358. ** transitions and the inserted intermediate states:
  1359. **
  1360. ** UNLOCKED -> SHARED
  1361. ** SHARED -> RESERVED
  1362. ** SHARED -> (PENDING) -> EXCLUSIVE
  1363. ** RESERVED -> (PENDING) -> EXCLUSIVE
  1364. ** PENDING -> EXCLUSIVE
  1365. **
  1366. ** This routine will only increase a lock. Use the sqlite3OsUnlock()
  1367. ** routine to lower a locking level.
  1368. */
  1369. static int unixLock(sqlite3_file *id, int eFileLock){
  1370. /* The following describes the implementation of the various locks and
  1371. ** lock transitions in terms of the POSIX advisory shared and exclusive
  1372. ** lock primitives (called read-locks and write-locks below, to avoid
  1373. ** confusion with SQLite lock names). The algorithms are complicated
  1374. ** slightly in order to be compatible with windows systems simultaneously
  1375. ** accessing the same database file, in case that is ever required.
  1376. **
  1377. ** Symbols defined in os.h indentify the 'pending byte' and the 'reserved
  1378. ** byte', each single bytes at well known offsets, and the 'shared byte
  1379. ** range', a range of 510 bytes at a well known offset.
  1380. **
  1381. ** To obtain a SHARED lock, a read-lock is obtained on the 'pending
  1382. ** byte'. If this is successful, a random byte from the 'shared byte
  1383. ** range' is read-locked and the lock on the 'pending byte' released.
  1384. **
  1385. ** A process may only obtain a RESERVED lock after it has a SHARED lock.
  1386. ** A RESERVED lock is implemented by grabbing a write-lock on the
  1387. ** 'reserved byte'.
  1388. **
  1389. ** A process may only obtain a PENDING lock after it has obtained a
  1390. ** SHARED lock. A PENDING lock is implemented by obtaining a write-lock
  1391. ** on the 'pending byte'. This ensures that no new SHARED locks can be
  1392. ** obtained, but existing SHARED locks are allowed to persist. A process
  1393. ** does not have to obtain a RESERVED lock on the way to a PENDING lock.
  1394. ** This property is used by the algorithm for rolling back a journal file
  1395. ** after a crash.
  1396. **
  1397. ** An EXCLUSIVE lock, obtained after a PENDING lock is held, is
  1398. ** implemented by obtaining a write-lock on the entire 'shared byte
  1399. ** range'. Since all other locks require a read-lock on one of the bytes
  1400. ** within this range, this ensures that no other locks are held on the
  1401. ** database.
  1402. **
  1403. ** The reason a single byte cannot be used instead of the 'shared byte
  1404. ** range' is that some versions of windows do not support read-locks. By
  1405. ** locking a random byte from a range, concurrent SHARED locks may exist
  1406. ** even if the locking primitive used is always a write-lock.
  1407. */
  1408. int rc = SQLITE_OK;
  1409. unixFile *pFile = (unixFile*)id;
  1410. unixInodeInfo *pInode;
  1411. struct flock lock;
  1412. int tErrno = 0;
  1413. assert( pFile );
  1414. OSTRACE(("LOCK %d %s was %s(%s,%d) pid=%d (unix)\n", pFile->h,
  1415. azFileLock(eFileLock), azFileLock(pFile->eFileLock),
  1416. azFileLock(pFile->pInode->eFileLock), pFile->pInode->nShared , getpid()));
  1417. /* If there is already a lock of this type or more restrictive on the
  1418. ** unixFile, do nothing. Don't use the end_lock: exit path, as
  1419. ** unixEnterMutex() hasn't been called yet.
  1420. */
  1421. if( pFile->eFileLock>=eFileLock ){
  1422. OSTRACE(("LOCK %d %s ok (already held) (unix)\n", pFile->h,
  1423. azFileLock(eFileLock)));
  1424. return SQLITE_OK;
  1425. }
  1426. /* Make sure the locking sequence is correct.
  1427. ** (1) We never move from unlocked to anything higher than shared lock.
  1428. ** (2) SQLite never explicitly requests a pendig lock.
  1429. ** (3) A shared lock is always held when a reserve lock is requested.
  1430. */
  1431. assert( pFile->eFileLock!=NO_LOCK || eFileLock==SHARED_LOCK );
  1432. assert( eFileLock!=PENDING_LOCK );
  1433. assert( eFileLock!=RESERVED_LOCK || pFile->eFileLock==SHARED_LOCK );
  1434. /* This mutex is needed because pFile->pInode is shared across threads
  1435. */
  1436. unixEnterMutex();
  1437. pInode = pFile->pInode;
  1438. /* If some thread using this PID has a lock via a different unixFile*
  1439. ** handle that precludes the requested lock, return BUSY.
  1440. */
  1441. if( (pFile->eFileLock!=pInode->eFileLock &&
  1442. (pInode->eFileLock>=PENDING_LOCK || eFileLock>SHARED_LOCK))
  1443. ){
  1444. rc = SQLITE_BUSY;
  1445. goto end_lock;
  1446. }
  1447. /* If a SHARED lock is requested, and some thread using this PID already
  1448. ** has a SHARED or RESERVED lock, then increment reference counts and
  1449. ** return SQLITE_OK.
  1450. */
  1451. if( eFileLock==SHARED_LOCK &&
  1452. (pInode->eFileLock==SHARED_LOCK || pInode->eFileLock==RESERVED_LOCK) ){
  1453. assert( eFileLock==SHARED_LOCK );
  1454. assert( pFile->eFileLock==0 );
  1455. assert( pInode->nShared>0 );
  1456. pFile->eFileLock = SHARED_LOCK;
  1457. pInode->nShared++;
  1458. pInode->nLock++;
  1459. goto end_lock;
  1460. }
  1461. /* A PENDING lock is needed before acquiring a SHARED lock and before
  1462. ** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will
  1463. ** be released.
  1464. */
  1465. lock.l_len = 1L;
  1466. lock.l_whence = SEEK_SET;
  1467. if( eFileLock==SHARED_LOCK
  1468. || (eFileLock==EXCLUSIVE_LOCK && pFile->eFileLock<PENDING_LOCK)
  1469. ){
  1470. lock.l_type = (eFileLock==SHARED_LOCK?F_RDLCK:F_WRLCK);
  1471. lock.l_start = PENDING_BYTE;
  1472. if( unixFileLock(pFile, &lock) ){
  1473. tErrno = errno;
  1474. rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
  1475. if( rc!=SQLITE_BUSY ){
  1476. pFile->lastErrno = tErrno;
  1477. }
  1478. goto end_lock;
  1479. }
  1480. }
  1481. /* If control gets to this point, then actually go ahead and make
  1482. ** operating system calls for the specified lock.
  1483. */
  1484. if( eFileLock==SHARED_LOCK ){
  1485. assert( pInode->nShared==0 );
  1486. assert( pInode->eFileLock==0 );
  1487. assert( rc==SQLITE_OK );
  1488. /* Now get the read-lock */
  1489. lock.l_start = SHARED_FIRST;
  1490. lock.l_len = SHARED_SIZE;
  1491. if( unixFileLock(pFile, &lock) ){
  1492. tErrno = errno;
  1493. rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
  1494. }
  1495. /* Drop the temporary PENDING lock */
  1496. lock.l_start = PENDING_BYTE;
  1497. lock.l_len = 1L;
  1498. lock.l_type = F_UNLCK;
  1499. if( unixFileLock(pFile, &lock) && rc==SQLITE_OK ){
  1500. /* This could happen with a network mount */
  1501. tErrno = errno;
  1502. rc = SQLITE_IOERR_UNLOCK;
  1503. }
  1504. if( rc ){
  1505. if( rc!=SQLITE_BUSY ){
  1506. pFile->lastErrno = tErrno;
  1507. }
  1508. goto end_lock;
  1509. }else{
  1510. pFile->eFileLock = SHARED_LOCK;
  1511. pInode->nLock++;
  1512. pInode->nShared = 1;
  1513. }
  1514. }else if( eFileLock==EXCLUSIVE_LOCK && pInode->nShared>1 ){
  1515. /* We are trying for an exclusive lock but another thread in this
  1516. ** same process is still holding a shared lock. */
  1517. rc = SQLITE_BUSY;
  1518. }else{
  1519. /* The request was for a RESERVED or EXCLUSIVE lock. It is
  1520. ** assumed that there is a SHARED or greater lock on the file
  1521. ** already.
  1522. */
  1523. assert( 0!=pFile->eFileLock );
  1524. lock.l_type = F_WRLCK;
  1525. assert( eFileLock==RESERVED_LOCK || eFileLock==EXCLUSIVE_LOCK );
  1526. if( eFileLock==RESERVED_LOCK ){
  1527. lock.l_start = RESERVED_BYTE;
  1528. lock.l_len = 1L;
  1529. }else{
  1530. lock.l_start = SHARED_FIRST;
  1531. lock.l_len = SHARED_SIZE;
  1532. }
  1533. if( unixFileLock(pFile, &lock) ){
  1534. tErrno = errno;
  1535. rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
  1536. if( rc!=SQLITE_BUSY ){
  1537. pFile->lastErrno = tErrno;
  1538. }
  1539. }
  1540. }
  1541. #ifdef SQLITE_DEBUG
  1542. /* Set up the transaction-counter change checking flags when
  1543. ** transitioning from a SHARED to a RESERVED lock. The change
  1544. ** from SHARED to RESERVED marks the beginning of a normal
  1545. ** write operation (not a hot journal rollback).
  1546. */
  1547. if( rc==SQLITE_OK
  1548. && pFile->eFileLock<=SHARED_LOCK
  1549. && eFileLock==RESERVED_LOCK
  1550. ){
  1551. pFile->transCntrChng = 0;
  1552. pFile->dbUpdate = 0;
  1553. pFile->inNormalWrite = 1;
  1554. }
  1555. #endif
  1556. if( rc==SQLITE_OK ){
  1557. pFile->eFileLock = eFileLock;
  1558. pInode->eFileLock = eFileLock;
  1559. }else if( eFileLock==EXCLUSIVE_LOCK ){
  1560. pFile->eFileLock = PENDING_LOCK;
  1561. pInode->eFileLock = PENDING_LOCK;
  1562. }
  1563. end_lock:
  1564. unixLeaveMutex();
  1565. OSTRACE(("LOCK %d %s %s (unix)\n", pFile->h, azFileLock(eFileLock),
  1566. rc==SQLITE_OK ? "ok" : "failed"));
  1567. return rc;
  1568. }
  1569. /*
  1570. ** Add the file descriptor used by file handle pFile to the corresponding
  1571. ** pUnused list.
  1572. */
  1573. static void setPendingFd(unixFile *pFile){
  1574. unixInodeInfo *pInode = pFile->pInode;
  1575. UnixUnusedFd *p = pFile->pUnused;
  1576. p->pNext = pInode->pUnused;
  1577. pInode->pUnused = p;
  1578. pFile->h = -1;
  1579. pFile->pUnused = 0;
  1580. }
  1581. /*
  1582. ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock
  1583. ** must be either NO_LOCK or SHARED_LOCK.
  1584. **
  1585. ** If the locking level of the file descriptor is already at or below
  1586. ** the requested locking level, this routine is a no-op.
  1587. **
  1588. ** If handleNFSUnlock is true, then on downgrading an EXCLUSIVE_LOCK to SHARED
  1589. ** the byte range is divided into 2 parts and the first part is unlocked then
  1590. ** set to a read lock, then the other part is simply unlocked. This works
  1591. ** around a bug in BSD NFS lockd (also seen on MacOSX 10.3+) that fails to
  1592. ** remove the write lock on a region when a read lock is set.
  1593. */
  1594. static int posixUnlock(sqlite3_file *id, int eFileLock, int handleNFSUnlock){
  1595. unixFile *pFile = (unixFile*)id;
  1596. unixInodeInfo *pInode;
  1597. struct flock lock;
  1598. int rc = SQLITE_OK;
  1599. assert( pFile );
  1600. OSTRACE(("UNLOCK %d %d was %d(%d,%d) pid=%d (unix)\n", pFile->h, eFileLock,
  1601. pFile->eFileLock, pFile->pInode->eFileLock, pFile->pInode->nShared,
  1602. getpid()));
  1603. assert( eFileLock<=SHARED_LOCK );
  1604. if( pFile->eFileLock<=eFileLock ){
  1605. return SQLITE_OK;
  1606. }
  1607. unixEnterMutex();
  1608. pInode = pFile->pInode;
  1609. assert( pInode->nShared!=0 );
  1610. if( pFile->eFileLock>SHARED_LOCK ){
  1611. assert( pInode->eFileLock==pFile->eFileLock );
  1612. #ifdef SQLITE_DEBUG
  1613. /* When reducing a lock such that other processes can start
  1614. ** reading the database file again, make sure that the
  1615. ** transaction counter was updated if any part of the database
  1616. ** file changed. If the transaction counter is not updated,
  1617. ** other connections to the same file might not realize that
  1618. ** the file has changed and hence might not know to flush their
  1619. ** cache. The use of a stale cache can lead to database corruption.
  1620. */
  1621. pFile->inNormalWrite = 0;
  1622. #endif
  1623. /* downgrading to a shared lock on NFS involves clearing the write lock
  1624. ** before establishing the readlock - to avoid a race condition we downgrade
  1625. ** the lock in 2 blocks, so that part of the range will be covered by a
  1626. ** write lock until the rest is covered by a read lock:
  1627. ** 1: [WWWWW]
  1628. ** 2: [....W]
  1629. ** 3: [RRRRW]
  1630. ** 4: [RRRR.]
  1631. */
  1632. if( eFileLock==SHARED_LOCK ){
  1633. #if !defined(__APPLE__) || !SQLITE_ENABLE_LOCKING_STYLE
  1634. (void)handleNFSUnlock;
  1635. assert( handleNFSUnlock==0 );
  1636. #endif
  1637. #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE
  1638. if( handleNFSUnlock ){
  1639. int tErrno; /* Error code from system call errors */
  1640. off_t divSize = SHARED_SIZE - 1;
  1641. lock.l_type = F_UNLCK;
  1642. lock.l_whence = SEEK_SET;
  1643. lock.l_start = SHARED_FIRST;
  1644. lock.l_len = divSize;
  1645. if( unixFileLock(pFile, &lock)==(-1) ){
  1646. tErrno = errno;
  1647. rc = SQLITE_IOERR_UNLOCK;
  1648. if( IS_LOCK_ERROR(rc) ){
  1649. pFile->lastErrno = tErrno;
  1650. }
  1651. goto end_unlock;
  1652. }
  1653. lock.l_type = F_RDLCK;
  1654. lock.l_whence = SEEK_SET;
  1655. lock.l_start = SHARED_FIRST;
  1656. lock.l_len = divSize;
  1657. if( unixFileLock(pFile, &lock)==(-1) ){
  1658. tErrno = errno;
  1659. rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_RDLOCK);
  1660. if( IS_LOCK_ERROR(rc) ){
  1661. pFile->lastErrno = tErrno;
  1662. }
  1663. goto end_unlock;
  1664. }
  1665. lock.l_type = F_UNLCK;
  1666. lock.l_whence = SEEK_SET;
  1667. lock.l_start = SHARED_FIRST+divSize;
  1668. lock.l_len = SHARED_SIZE-divSize;
  1669. if( unixFileLock(pFile, &lock)==(-1) ){
  1670. tErrno = errno;
  1671. rc = SQLITE_IOERR_UNLOCK;
  1672. if( IS_LOCK_ERROR(rc) ){
  1673. pFile->lastErrno = tErrno;
  1674. }
  1675. goto end_unlock;
  1676. }
  1677. }else
  1678. #endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */
  1679. {
  1680. lock.l_type = F_RDLCK;
  1681. lock.l_whence = SEEK_SET;
  1682. lock.l_start = SHARED_FIRST;
  1683. lock.l_len = SHARED_SIZE;
  1684. if( unixFileLock(pFile, &lock) ){
  1685. /* In theory, the call to unixFileLock() cannot fail because another
  1686. ** process is holding an incompatible lock. If it does, this
  1687. ** indicates that the other process is not following the locking
  1688. ** protocol. If this happens, return SQLITE_IOERR_RDLOCK. Returning
  1689. ** SQLITE_BUSY would confuse the upper layer (in practice it causes
  1690. ** an assert to fail). */
  1691. rc = SQLITE_IOERR_RDLOCK;
  1692. pFile->lastErrno = errno;
  1693. goto end_unlock;
  1694. }
  1695. }
  1696. }
  1697. lock.l_type = F_UNLCK;
  1698. lock.l_whence = SEEK_SET;
  1699. lock.l_start = PENDING_BYTE;
  1700. lock.l_len = 2L; assert( PENDING_BYTE+1==RESERVED_BYTE );
  1701. if( unixFileLock(pFile, &lock)==0 ){
  1702. pInode->eFileLock = SHARED_LOCK;
  1703. }else{
  1704. rc = SQLITE_IOERR_UNLOCK;
  1705. pFile->lastErrno = errno;
  1706. goto end_unlock;
  1707. }
  1708. }
  1709. if( eFileLock==NO_LOCK ){
  1710. /* Decrement the shared lock counter. Release the lock using an
  1711. ** OS call only when all threads in this same process have released
  1712. ** the lock.
  1713. */
  1714. pInode->nShared--;
  1715. if( pInode->nShared==0 ){
  1716. lock.l_type = F_UNLCK;
  1717. lock.l_whence = SEEK_SET;
  1718. lock.l_start = lock.l_len = 0L;
  1719. if( unixFileLock(pFile, &lock)==0 ){
  1720. pInode->eFileLock = NO_LOCK;
  1721. }else{
  1722. rc = SQLITE_IOERR_UNLOCK;
  1723. pFile->lastErrno = errno;
  1724. pInode->eFileLock = NO_LOCK;
  1725. pFile->eFileLock = NO_LOCK;
  1726. }
  1727. }
  1728. /* Decrement the count of locks against this same file. When the
  1729. ** count reaches zero, close any other file descriptors whose close
  1730. ** was deferred because of outstanding locks.
  1731. */
  1732. pInode->nLock--;
  1733. assert( pInode->nLock>=0 );
  1734. if( pInode->nLock==0 ){
  1735. closePendingFds(pFile);
  1736. }
  1737. }
  1738. end_unlock:
  1739. unixLeaveMutex();
  1740. if( rc==SQLITE_OK ) pFile->eFileLock = eFileLock;
  1741. return rc;
  1742. }
  1743. /*
  1744. ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock
  1745. ** must be either NO_LOCK or SHARED_LOCK.
  1746. **
  1747. ** If the locking level of the file descriptor is already at or below
  1748. ** the requested locking level, this routine is a no-op.
  1749. */
  1750. static int unixUnlock(sqlite3_file *id, int eFileLock){
  1751. assert( eFileLock==SHARED_LOCK || ((unixFile *)id)->nFetchOut==0 );
  1752. return posixUnlock(id, eFileLock, 0);
  1753. }
  1754. #if SQLITE_MAX_MMAP_SIZE>0
  1755. static int unixMapfile(unixFile *pFd, i64 nByte);
  1756. static void unixUnmapfile(unixFile *pFd);
  1757. #endif
  1758. /*
  1759. ** This function performs the parts of the "close file" operation
  1760. ** common to all locking schemes. It closes the directory and file
  1761. ** handles, if they are valid, and sets all fields of the unixFile
  1762. ** structure to 0.
  1763. **
  1764. ** It is *not* necessary to hold the mutex when this routine is called,
  1765. ** even on VxWorks. A mutex will be acquired on VxWorks by the
  1766. ** vxworksReleaseFileId() routine.
  1767. */
  1768. static int closeUnixFile(sqlite3_file *id){
  1769. unixFile *pFile = (unixFile*)id;
  1770. #if SQLITE_MAX_MMAP_SIZE>0
  1771. unixUnmapfile(pFile);
  1772. #endif
  1773. if( pFile->h>=0 ){
  1774. robust_close(pFile, pFile->h, __LINE__);
  1775. pFile->h = -1;
  1776. }
  1777. #if OS_VXWORKS
  1778. if( pFile->pId ){
  1779. if( pFile->ctrlFlags & UNIXFILE_DELETE ){
  1780. osUnlink(pFile->pId->zCanonicalName);
  1781. }
  1782. vxworksReleaseFileId(pFile->pId);
  1783. pFile->pId = 0;
  1784. }
  1785. #endif
  1786. OSTRACE(("CLOSE %-3d\n", pFile->h));
  1787. OpenCounter(-1);
  1788. sqlite3_free(pFile->pUnused);
  1789. memset(pFile, 0, sizeof(unixFile));
  1790. return SQLITE_OK;
  1791. }
  1792. /*
  1793. ** Close a file.
  1794. */
  1795. static int unixClose(sqlite3_file *id){
  1796. int rc = SQLITE_OK;
  1797. unixFile *pFile = (unixFile *)id;
  1798. verifyDbFile(pFile);
  1799. unixUnlock(id, NO_LOCK);
  1800. unixEnterMutex();
  1801. /* unixFile.pInode is always valid here. Otherwise, a different close
  1802. ** routine (e.g. nolockClose()) would be called instead.
  1803. */
  1804. assert( pFile->pInode->nLock>0 || pFile->pInode->bProcessLock==0 );
  1805. if( ALWAYS(pFile->pInode) && pFile->pInode->nLock ){
  1806. /* If there are outstanding locks, do not actually close the file just
  1807. ** yet because that would clear those locks. Instead, add the file
  1808. ** descriptor to pInode->pUnused list. It will be automatically closed
  1809. ** when the last lock is cleared.
  1810. */
  1811. setPendingFd(pFile);
  1812. }
  1813. releaseInodeInfo(pFile);
  1814. rc = closeUnixFile(id);
  1815. unixLeaveMutex();
  1816. return rc;
  1817. }
  1818. /************** End of the posix advisory lock implementation *****************
  1819. ******************************************************************************/
  1820. /******************************************************************************
  1821. ****************************** No-op Locking **********************************
  1822. **
  1823. ** Of the various locking implementations available, this is by far the
  1824. ** simplest: locking is ignored. No attempt is made to lock the database
  1825. ** file for reading or writing.
  1826. **
  1827. ** This locking mode is appropriate for use on read-only databases
  1828. ** (ex: databases that are burned into CD-ROM, for example.) It can
  1829. ** also be used if the application employs some external mechanism to
  1830. ** prevent simultaneous access of the same database by two or more
  1831. ** database connections. But there is a serious risk of database
  1832. ** corruption if this locking mode is used in situations where multiple
  1833. ** database connections are accessing the same database file at the same
  1834. ** time and one or more of those connections are writing.
  1835. */
  1836. static int nolockCheckReservedLock(sqlite3_file *NotUsed, int *pResOut){
  1837. UNUSED_PARAMETER(NotUsed);
  1838. *pResOut = 0;
  1839. return SQLITE_OK;
  1840. }
  1841. static int nolockLock(sqlite3_file *NotUsed, int NotUsed2){
  1842. UNUSED_PARAMETER2(NotUsed, NotUsed2);
  1843. return SQLITE_OK;
  1844. }
  1845. static int nolockUnlock(sqlite3_file *NotUsed, int NotUsed2){
  1846. UNUSED_PARAMETER2(NotUsed, NotUsed2);
  1847. return SQLITE_OK;
  1848. }
  1849. /*
  1850. ** Close the file.
  1851. */
  1852. static int nolockClose(sqlite3_file *id) {
  1853. return closeUnixFile(id);
  1854. }
  1855. /******************* End of the no-op lock implementation *********************
  1856. ******************************************************************************/
  1857. /******************************************************************************
  1858. ************************* Begin dot-file Locking ******************************
  1859. **
  1860. ** The dotfile locking implementation uses the existence of separate lock
  1861. ** files (really a directory) to control access to the database. This works
  1862. ** on just about every filesystem imaginable. But there are serious downsides:
  1863. **
  1864. ** (1) There is zero concurrency. A single reader blocks all other
  1865. ** connections from reading or writing the database.
  1866. **
  1867. ** (2) An application crash or power loss can leave stale lock files
  1868. ** sitting around that need to be cleared manually.
  1869. **
  1870. ** Nevertheless, a dotlock is an appropriate locking mode for use if no
  1871. ** other locking strategy is available.
  1872. **
  1873. ** Dotfile locking works by creating a subdirectory in the same directory as
  1874. ** the database and with the same name but with a ".lock" extension added.
  1875. ** The existence of a lock directory implies an EXCLUSIVE lock. All other
  1876. ** lock types (SHARED, RESERVED, PENDING) are mapped into EXCLUSIVE.
  1877. */
  1878. /*
  1879. ** The file suffix added to the data base filename in order to create the
  1880. ** lock directory.
  1881. */
  1882. #define DOTLOCK_SUFFIX ".lock"
  1883. /*
  1884. ** This routine checks if there is a RESERVED lock held on the specified
  1885. ** file by this or any other process. If such a lock is held, set *pResOut
  1886. ** to a non-zero value otherwise *pResOut is set to zero. The return value
  1887. ** is set to SQLITE_OK unless an I/O error occurs during lock checking.
  1888. **
  1889. ** In dotfile locking, either a lock exists or it does not. So in this
  1890. ** variation of CheckReservedLock(), *pResOut is set to true if any lock
  1891. ** is held on the file and false if the file is unlocked.
  1892. */
  1893. static int dotlockCheckReservedLock(sqlite3_file *id, int *pResOut) {
  1894. int rc = SQLITE_OK;
  1895. int reserved = 0;
  1896. unixFile *pFile = (unixFile*)id;
  1897. SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );
  1898. assert( pFile );
  1899. /* Check if a thread in this process holds such a lock */
  1900. if( pFile->eFileLock>SHARED_LOCK ){
  1901. /* Either this connection or some other connection in the same process
  1902. ** holds a lock on the file. No need to check further. */
  1903. reserved = 1;
  1904. }else{
  1905. /* The lock is held if and only if the lockfile exists */
  1906. const char *zLockFile = (const char*)pFile->lockingContext;
  1907. reserved = osAccess(zLockFile, 0)==0;
  1908. }
  1909. OSTRACE(("TEST WR-LOCK %d %d %d (dotlock)\n", pFile->h, rc, reserved));
  1910. *pResOut = reserved;
  1911. return rc;
  1912. }
  1913. /*
  1914. ** Lock the file with the lock specified by parameter eFileLock - one
  1915. ** of the following:
  1916. **
  1917. ** (1) SHARED_LOCK
  1918. ** (2) RESERVED_LOCK
  1919. ** (3) PENDING_LOCK
  1920. ** (4) EXCLUSIVE_LOCK
  1921. **
  1922. ** Sometimes when requesting one lock state, additional lock states
  1923. ** are inserted in between. The locking might fail on one of the later
  1924. ** transitions leaving the lock state different from what it started but
  1925. ** still short of its goal. The following chart shows the allowed
  1926. ** transitions and the inserted intermediate states:
  1927. **
  1928. ** UNLOCKED -> SHARED
  1929. ** SHARED -> RESERVED
  1930. ** SHARED -> (PENDING) -> EXCLUSIVE
  1931. ** RESERVED -> (PENDING) -> EXCLUSIVE
  1932. ** PENDING -> EXCLUSIVE
  1933. **
  1934. ** This routine will only increase a lock. Use the sqlite3OsUnlock()
  1935. ** routine to lower a locking level.
  1936. **
  1937. ** With dotfile locking, we really only support state (4): EXCLUSIVE.
  1938. ** But we track the other locking levels internally.
  1939. */
  1940. static int dotlockLock(sqlite3_file *id, int eFileLock) {
  1941. unixFile *pFile = (unixFile*)id;
  1942. char *zLockFile = (char *)pFile->lockingContext;
  1943. int rc = SQLITE_OK;
  1944. /* If we have any lock, then the lock file already exists. All we have
  1945. ** to do is adjust our internal record of the lock level.
  1946. */
  1947. if( pFile->eFileLock > NO_LOCK ){
  1948. pFile->eFileLock = eFileLock;
  1949. /* Always update the timestamp on the old file */
  1950. #ifdef HAVE_UTIME
  1951. utime(zLockFile, NULL);
  1952. #else
  1953. utimes(zLockFile, NULL);
  1954. #endif
  1955. return SQLITE_OK;
  1956. }
  1957. /* grab an exclusive lock */
  1958. rc = osMkdir(zLockFile, 0777);
  1959. if( rc<0 ){
  1960. /* failed to open/create the lock directory */
  1961. int tErrno = errno;
  1962. if( EEXIST == tErrno ){
  1963. rc = SQLITE_BUSY;
  1964. } else {
  1965. rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
  1966. if( IS_LOCK_ERROR(rc) ){
  1967. pFile->lastErrno = tErrno;
  1968. }
  1969. }
  1970. return rc;
  1971. }
  1972. /* got it, set the type and return ok */
  1973. pFile->eFileLock = eFileLock;
  1974. return rc;
  1975. }
  1976. /*
  1977. ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock
  1978. ** must be either NO_LOCK or SHARED_LOCK.
  1979. **
  1980. ** If the locking level of the file descriptor is already at or below
  1981. ** the requested locking level, this routine is a no-op.
  1982. **
  1983. ** When the locking level reaches NO_LOCK, delete the lock file.
  1984. */
  1985. static int dotlockUnlock(sqlite3_file *id, int eFileLock) {
  1986. unixFile *pFile = (unixFile*)id;
  1987. char *zLockFile = (char *)pFile->lockingContext;
  1988. int rc;
  1989. assert( pFile );
  1990. OSTRACE(("UNLOCK %d %d was %d pid=%d (dotlock)\n", pFile->h, eFileLock,
  1991. pFile->eFileLock, getpid()));
  1992. assert( eFileLock<=SHARED_LOCK );
  1993. /* no-op if possible */
  1994. if( pFile->eFileLock==eFileLock ){
  1995. return SQLITE_OK;
  1996. }
  1997. /* To downgrade to shared, simply update our internal notion of the
  1998. ** lock state. No need to mess with the file on disk.
  1999. */
  2000. if( eFileLock==SHARED_LOCK ){
  2001. pFile->eFileLock = SHARED_LOCK;
  2002. return SQLITE_OK;
  2003. }
  2004. /* To fully unlock the database, delete the lock file */
  2005. assert( eFileLock==NO_LOCK );
  2006. rc = osRmdir(zLockFile);
  2007. if( rc<0 && errno==ENOTDIR ) rc = osUnlink(zLockFile);
  2008. if( rc<0 ){
  2009. int tErrno = errno;
  2010. rc = 0;
  2011. if( ENOENT != tErrno ){
  2012. rc = SQLITE_IOERR_UNLOCK;
  2013. }
  2014. if( IS_LOCK_ERROR(rc) ){
  2015. pFile->lastErrno = tErrno;
  2016. }
  2017. return rc;
  2018. }
  2019. pFile->eFileLock = NO_LOCK;
  2020. return SQLITE_OK;
  2021. }
  2022. /*
  2023. ** Close a file. Make sure the lock has been released before closing.
  2024. */
  2025. static int dotlockClose(sqlite3_file *id) {
  2026. int rc = SQLITE_OK;
  2027. if( id ){
  2028. unixFile *pFile = (unixFile*)id;
  2029. dotlockUnlock(id, NO_LOCK);
  2030. sqlite3_free(pFile->lockingContext);
  2031. rc = closeUnixFile(id);
  2032. }
  2033. return rc;
  2034. }
  2035. /****************** End of the dot-file lock implementation *******************
  2036. ******************************************************************************/
  2037. /******************************************************************************
  2038. ************************** Begin flock Locking ********************************
  2039. **
  2040. ** Use the flock() system call to do file locking.
  2041. **
  2042. ** flock() locking is like dot-file locking in that the various
  2043. ** fine-grain locking levels supported by SQLite are collapsed into
  2044. ** a single exclusive lock. In other words, SHARED, RESERVED, and
  2045. ** PENDING locks are the same thing as an EXCLUSIVE lock. SQLite
  2046. ** still works when you do this, but concurrency is reduced since
  2047. ** only a single process can be reading the database at a time.
  2048. **
  2049. ** Omit this section if SQLITE_ENABLE_LOCKING_STYLE is turned off or if
  2050. ** compiling for VXWORKS.
  2051. */
  2052. #if SQLITE_ENABLE_LOCKING_STYLE && !OS_VXWORKS
  2053. /*
  2054. ** Retry flock() calls that fail with EINTR
  2055. */
  2056. #ifdef EINTR
  2057. static int robust_flock(int fd, int op){
  2058. int rc;
  2059. do{ rc = flock(fd,op); }while( rc<0 && errno==EINTR );
  2060. return rc;
  2061. }
  2062. #else
  2063. # define robust_flock(a,b) flock(a,b)
  2064. #endif
  2065. /*
  2066. ** This routine checks if there is a RESERVED lock held on the specified
  2067. ** file by this or any other process. If such a lock is held, set *pResOut
  2068. ** to a non-zero value otherwise *pResOut is set to zero. The return value
  2069. ** is set to SQLITE_OK unless an I/O error occurs during lock checking.
  2070. */
  2071. static int flockCheckReservedLock(sqlite3_file *id, int *pResOut){
  2072. int rc = SQLITE_OK;
  2073. int reserved = 0;
  2074. unixFile *pFile = (unixFile*)id;
  2075. SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );
  2076. assert( pFile );
  2077. /* Check if a thread in this process holds such a lock */
  2078. if( pFile->eFileLock>SHARED_LOCK ){
  2079. reserved = 1;
  2080. }
  2081. /* Otherwise see if some other process holds it. */
  2082. if( !reserved ){
  2083. /* attempt to get the lock */
  2084. int lrc = robust_flock(pFile->h, LOCK_EX | LOCK_NB);
  2085. if( !lrc ){
  2086. /* got the lock, unlock it */
  2087. lrc = robust_flock(pFile->h, LOCK_UN);
  2088. if ( lrc ) {
  2089. int tErrno = errno;
  2090. /* unlock failed with an error */
  2091. lrc = SQLITE_IOERR_UNLOCK;
  2092. if( IS_LOCK_ERROR(lrc) ){
  2093. pFile->lastErrno = tErrno;
  2094. rc = lrc;
  2095. }
  2096. }
  2097. } else {
  2098. int tErrno = errno;
  2099. reserved = 1;
  2100. /* someone else might have it reserved */
  2101. lrc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
  2102. if( IS_LOCK_ERROR(lrc) ){
  2103. pFile->lastErrno = tErrno;
  2104. rc = lrc;
  2105. }
  2106. }
  2107. }
  2108. OSTRACE(("TEST WR-LOCK %d %d %d (flock)\n", pFile->h, rc, reserved));
  2109. #ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS
  2110. if( (rc & SQLITE_IOERR) == SQLITE_IOERR ){
  2111. rc = SQLITE_OK;
  2112. reserved=1;
  2113. }
  2114. #endif /* SQLITE_IGNORE_FLOCK_LOCK_ERRORS */
  2115. *pResOut = reserved;
  2116. return rc;
  2117. }
  2118. /*
  2119. ** Lock the file with the lock specified by parameter eFileLock - one
  2120. ** of the following:
  2121. **
  2122. ** (1) SHARED_LOCK
  2123. ** (2) RESERVED_LOCK
  2124. ** (3) PENDING_LOCK
  2125. ** (4) EXCLUSIVE_LOCK
  2126. **
  2127. ** Sometimes when requesting one lock state, additional lock states
  2128. ** are inserted in between. The locking might fail on one of the later
  2129. ** transitions leaving the lock state different from what it started but
  2130. ** still short of its goal. The following chart shows the allowed
  2131. ** transitions and the inserted intermediate states:
  2132. **
  2133. ** UNLOCKED -> SHARED
  2134. ** SHARED -> RESERVED
  2135. ** SHARED -> (PENDING) -> EXCLUSIVE
  2136. ** RESERVED -> (PENDING) -> EXCLUSIVE
  2137. ** PENDING -> EXCLUSIVE
  2138. **
  2139. ** flock() only really support EXCLUSIVE locks. We track intermediate
  2140. ** lock states in the sqlite3_file structure, but all locks SHARED or
  2141. ** above are really EXCLUSIVE locks and exclude all other processes from
  2142. ** access the file.
  2143. **
  2144. ** This routine will only increase a lock. Use the sqlite3OsUnlock()
  2145. ** routine to lower a locking level.
  2146. */
  2147. static int flockLock(sqlite3_file *id, int eFileLock) {
  2148. int rc = SQLITE_OK;
  2149. unixFile *pFile = (unixFile*)id;
  2150. assert( pFile );
  2151. /* if we already have a lock, it is exclusive.
  2152. ** Just adjust level and punt on outta here. */
  2153. if (pFile->eFileLock > NO_LOCK) {
  2154. pFile->eFileLock = eFileLock;
  2155. return SQLITE_OK;
  2156. }
  2157. /* grab an exclusive lock */
  2158. if (robust_flock(pFile->h, LOCK_EX | LOCK_NB)) {
  2159. int tErrno = errno;
  2160. /* didn't get, must be busy */
  2161. rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
  2162. if( IS_LOCK_ERROR(rc) ){
  2163. pFile->lastErrno = tErrno;
  2164. }
  2165. } else {
  2166. /* got it, set the type and return ok */
  2167. pFile->eFileLock = eFileLock;
  2168. }
  2169. OSTRACE(("LOCK %d %s %s (flock)\n", pFile->h, azFileLock(eFileLock),
  2170. rc==SQLITE_OK ? "ok" : "failed"));
  2171. #ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS
  2172. if( (rc & SQLITE_IOERR) == SQLITE_IOERR ){
  2173. rc = SQLITE_BUSY;
  2174. }
  2175. #endif /* SQLITE_IGNORE_FLOCK_LOCK_ERRORS */
  2176. return rc;
  2177. }
  2178. /*
  2179. ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock
  2180. ** must be either NO_LOCK or SHARED_LOCK.
  2181. **
  2182. ** If the locking level of the file descriptor is already at or below
  2183. ** the requested locking level, this routine is a no-op.
  2184. */
  2185. static int flockUnlock(sqlite3_file *id, int eFileLock) {
  2186. unixFile *pFile = (unixFile*)id;
  2187. assert( pFile );
  2188. OSTRACE(("UNLOCK %d %d was %d pid=%d (flock)\n", pFile->h, eFileLock,
  2189. pFile->eFileLock, getpid()));
  2190. assert( eFileLock<=SHARED_LOCK );
  2191. /* no-op if possible */
  2192. if( pFile->eFileLock==eFileLock ){
  2193. return SQLITE_OK;
  2194. }
  2195. /* shared can just be set because we always have an exclusive */
  2196. if (eFileLock==SHARED_LOCK) {
  2197. pFile->eFileLock = eFileLock;
  2198. return SQLITE_OK;
  2199. }
  2200. /* no, really, unlock. */
  2201. if( robust_flock(pFile->h, LOCK_UN) ){
  2202. #ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS
  2203. return SQLITE_OK;
  2204. #endif /* SQLITE_IGNORE_FLOCK_LOCK_ERRORS */
  2205. return SQLITE_IOERR_UNLOCK;
  2206. }else{
  2207. pFile->eFileLock = NO_LOCK;
  2208. return SQLITE_OK;
  2209. }
  2210. }
  2211. /*
  2212. ** Close a file.
  2213. */
  2214. static int flockClose(sqlite3_file *id) {
  2215. int rc = SQLITE_OK;
  2216. if( id ){
  2217. flockUnlock(id, NO_LOCK);
  2218. rc = closeUnixFile(id);
  2219. }
  2220. return rc;
  2221. }
  2222. #endif /* SQLITE_ENABLE_LOCKING_STYLE && !OS_VXWORK */
  2223. /******************* End of the flock lock implementation *********************
  2224. ******************************************************************************/
  2225. /******************************************************************************
  2226. ************************ Begin Named Semaphore Locking ************************
  2227. **
  2228. ** Named semaphore locking is only supported on VxWorks.
  2229. **
  2230. ** Semaphore locking is like dot-lock and flock in that it really only
  2231. ** supports EXCLUSIVE locking. Only a single process can read or write
  2232. ** the database file at a time. This reduces potential concurrency, but
  2233. ** makes the lock implementation much easier.
  2234. */
  2235. #if OS_VXWORKS
  2236. /*
  2237. ** This routine checks if there is a RESERVED lock held on the specified
  2238. ** file by this or any other process. If such a lock is held, set *pResOut
  2239. ** to a non-zero value otherwise *pResOut is set to zero. The return value
  2240. ** is set to SQLITE_OK unless an I/O error occurs during lock checking.
  2241. */
  2242. static int semCheckReservedLock(sqlite3_file *id, int *pResOut) {
  2243. int rc = SQLITE_OK;
  2244. int reserved = 0;
  2245. unixFile *pFile = (unixFile*)id;
  2246. SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );
  2247. assert( pFile );
  2248. /* Check if a thread in this process holds such a lock */
  2249. if( pFile->eFileLock>SHARED_LOCK ){
  2250. reserved = 1;
  2251. }
  2252. /* Otherwise see if some other process holds it. */
  2253. if( !reserved ){
  2254. sem_t *pSem = pFile->pInode->pSem;
  2255. struct stat statBuf;
  2256. if( sem_trywait(pSem)==-1 ){
  2257. int tErrno = errno;
  2258. if( EAGAIN != tErrno ){
  2259. rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_CHECKRESERVEDLOCK);
  2260. pFile->lastErrno = tErrno;
  2261. } else {
  2262. /* someone else has the lock when we are in NO_LOCK */
  2263. reserved = (pFile->eFileLock < SHARED_LOCK);
  2264. }
  2265. }else{
  2266. /* we could have it if we want it */
  2267. sem_post(pSem);
  2268. }
  2269. }
  2270. OSTRACE(("TEST WR-LOCK %d %d %d (sem)\n", pFile->h, rc, reserved));
  2271. *pResOut = reserved;
  2272. return rc;
  2273. }
  2274. /*
  2275. ** Lock the file with the lock specified by parameter eFileLock - one
  2276. ** of the following:
  2277. **
  2278. ** (1) SHARED_LOCK
  2279. ** (2) RESERVED_LOCK
  2280. ** (3) PENDING_LOCK
  2281. ** (4) EXCLUSIVE_LOCK
  2282. **
  2283. ** Sometimes when requesting one lock state, additional lock states
  2284. ** are inserted in between. The locking might fail on one of the later
  2285. ** transitions leaving the lock state different from what it started but
  2286. ** still short of its goal. The following chart shows the allowed
  2287. ** transitions and the inserted intermediate states:
  2288. **
  2289. ** UNLOCKED -> SHARED
  2290. ** SHARED -> RESERVED
  2291. ** SHARED -> (PENDING) -> EXCLUSIVE
  2292. ** RESERVED -> (PENDING) -> EXCLUSIVE
  2293. ** PENDING -> EXCLUSIVE
  2294. **
  2295. ** Semaphore locks only really support EXCLUSIVE locks. We track intermediate
  2296. ** lock states in the sqlite3_file structure, but all locks SHARED or
  2297. ** above are really EXCLUSIVE locks and exclude all other processes from
  2298. ** access the file.
  2299. **
  2300. ** This routine will only increase a lock. Use the sqlite3OsUnlock()
  2301. ** routine to lower a locking level.
  2302. */
  2303. static int semLock(sqlite3_file *id, int eFileLock) {
  2304. unixFile *pFile = (unixFile*)id;
  2305. int fd;
  2306. sem_t *pSem = pFile->pInode->pSem;
  2307. int rc = SQLITE_OK;
  2308. /* if we already have a lock, it is exclusive.
  2309. ** Just adjust level and punt on outta here. */
  2310. if (pFile->eFileLock > NO_LOCK) {
  2311. pFile->eFileLock = eFileLock;
  2312. rc = SQLITE_OK;
  2313. goto sem_end_lock;
  2314. }
  2315. /* lock semaphore now but bail out when already locked. */
  2316. if( sem_trywait(pSem)==-1 ){
  2317. rc = SQLITE_BUSY;
  2318. goto sem_end_lock;
  2319. }
  2320. /* got it, set the type and return ok */
  2321. pFile->eFileLock = eFileLock;
  2322. sem_end_lock:
  2323. return rc;
  2324. }
  2325. /*
  2326. ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock
  2327. ** must be either NO_LOCK or SHARED_LOCK.
  2328. **
  2329. ** If the locking level of the file descriptor is already at or below
  2330. ** the requested locking level, this routine is a no-op.
  2331. */
  2332. static int semUnlock(sqlite3_file *id, int eFileLock) {
  2333. unixFile *pFile = (unixFile*)id;
  2334. sem_t *pSem = pFile->pInode->pSem;
  2335. assert( pFile );
  2336. assert( pSem );
  2337. OSTRACE(("UNLOCK %d %d was %d pid=%d (sem)\n", pFile->h, eFileLock,
  2338. pFile->eFileLock, getpid()));
  2339. assert( eFileLock<=SHARED_LOCK );
  2340. /* no-op if possible */
  2341. if( pFile->eFileLock==eFileLock ){
  2342. return SQLITE_OK;
  2343. }
  2344. /* shared can just be set because we always have an exclusive */
  2345. if (eFileLock==SHARED_LOCK) {
  2346. pFile->eFileLock = eFileLock;
  2347. return SQLITE_OK;
  2348. }
  2349. /* no, really unlock. */
  2350. if ( sem_post(pSem)==-1 ) {
  2351. int rc, tErrno = errno;
  2352. rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK);
  2353. if( IS_LOCK_ERROR(rc) ){
  2354. pFile->lastErrno = tErrno;
  2355. }
  2356. return rc;
  2357. }
  2358. pFile->eFileLock = NO_LOCK;
  2359. return SQLITE_OK;
  2360. }
  2361. /*
  2362. ** Close a file.
  2363. */
  2364. static int semClose(sqlite3_file *id) {
  2365. if( id ){
  2366. unixFile *pFile = (unixFile*)id;
  2367. semUnlock(id, NO_LOCK);
  2368. assert( pFile );
  2369. unixEnterMutex();
  2370. releaseInodeInfo(pFile);
  2371. unixLeaveMutex();
  2372. closeUnixFile(id);
  2373. }
  2374. return SQLITE_OK;
  2375. }
  2376. #endif /* OS_VXWORKS */
  2377. /*
  2378. ** Named semaphore locking is only available on VxWorks.
  2379. **
  2380. *************** End of the named semaphore lock implementation ****************
  2381. ******************************************************************************/
  2382. /******************************************************************************
  2383. *************************** Begin AFP Locking *********************************
  2384. **
  2385. ** AFP is the Apple Filing Protocol. AFP is a network filesystem found
  2386. ** on Apple Macintosh computers - both OS9 and OSX.
  2387. **
  2388. ** Third-party implementations of AFP are available. But this code here
  2389. ** only works on OSX.
  2390. */
  2391. #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE
  2392. /*
  2393. ** The afpLockingContext structure contains all afp lock specific state
  2394. */
  2395. typedef struct afpLockingContext afpLockingContext;
  2396. struct afpLockingContext {
  2397. int reserved;
  2398. const char *dbPath; /* Name of the open file */
  2399. };
  2400. struct ByteRangeLockPB2
  2401. {
  2402. unsigned long long offset; /* offset to first byte to lock */
  2403. unsigned long long length; /* nbr of bytes to lock */
  2404. unsigned long long retRangeStart; /* nbr of 1st byte locked if successful */
  2405. unsigned char unLockFlag; /* 1 = unlock, 0 = lock */
  2406. unsigned char startEndFlag; /* 1=rel to end of fork, 0=rel to start */
  2407. int fd; /* file desc to assoc this lock with */
  2408. };
  2409. #define afpfsByteRangeLock2FSCTL _IOWR('z', 23, struct ByteRangeLockPB2)
  2410. /*
  2411. ** This is a utility for setting or clearing a bit-range lock on an
  2412. ** AFP filesystem.
  2413. **
  2414. ** Return SQLITE_OK on success, SQLITE_BUSY on failure.
  2415. */
  2416. static int afpSetLock(
  2417. const char *path, /* Name of the file to be locked or unlocked */
  2418. unixFile *pFile, /* Open file descriptor on path */
  2419. unsigned long long offset, /* First byte to be locked */
  2420. unsigned long long length, /* Number of bytes to lock */
  2421. int setLockFlag /* True to set lock. False to clear lock */
  2422. ){
  2423. struct ByteRangeLockPB2 pb;
  2424. int err;
  2425. pb.unLockFlag = setLockFlag ? 0 : 1;
  2426. pb.startEndFlag = 0;
  2427. pb.offset = offset;
  2428. pb.length = length;
  2429. pb.fd = pFile->h;
  2430. OSTRACE(("AFPSETLOCK [%s] for %d%s in range %llx:%llx\n",
  2431. (setLockFlag?"ON":"OFF"), pFile->h, (pb.fd==-1?"[testval-1]":""),
  2432. offset, length));
  2433. err = fsctl(path, afpfsByteRangeLock2FSCTL, &pb, 0);
  2434. if ( err==-1 ) {
  2435. int rc;
  2436. int tErrno = errno;
  2437. OSTRACE(("AFPSETLOCK failed to fsctl() '%s' %d %s\n",
  2438. path, tErrno, strerror(tErrno)));
  2439. #ifdef SQLITE_IGNORE_AFP_LOCK_ERRORS
  2440. rc = SQLITE_BUSY;
  2441. #else
  2442. rc = sqliteErrorFromPosixError(tErrno,
  2443. setLockFlag ? SQLITE_IOERR_LOCK : SQLITE_IOERR_UNLOCK);
  2444. #endif /* SQLITE_IGNORE_AFP_LOCK_ERRORS */
  2445. if( IS_LOCK_ERROR(rc) ){
  2446. pFile->lastErrno = tErrno;
  2447. }
  2448. return rc;
  2449. } else {
  2450. return SQLITE_OK;
  2451. }
  2452. }
  2453. /*
  2454. ** This routine checks if there is a RESERVED lock held on the specified
  2455. ** file by this or any other process. If such a lock is held, set *pResOut
  2456. ** to a non-zero value otherwise *pResOut is set to zero. The return value
  2457. ** is set to SQLITE_OK unless an I/O error occurs during lock checking.
  2458. */
  2459. static int afpCheckReservedLock(sqlite3_file *id, int *pResOut){
  2460. int rc = SQLITE_OK;
  2461. int reserved = 0;
  2462. unixFile *pFile = (unixFile*)id;
  2463. afpLockingContext *context;
  2464. SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );
  2465. assert( pFile );
  2466. context = (afpLockingContext *) pFile->lockingContext;
  2467. if( context->reserved ){
  2468. *pResOut = 1;
  2469. return SQLITE_OK;
  2470. }
  2471. unixEnterMutex(); /* Because pFile->pInode is shared across threads */
  2472. /* Check if a thread in this process holds such a lock */
  2473. if( pFile->pInode->eFileLock>SHARED_LOCK ){
  2474. reserved = 1;
  2475. }
  2476. /* Otherwise see if some other process holds it.
  2477. */
  2478. if( !reserved ){
  2479. /* lock the RESERVED byte */
  2480. int lrc = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1,1);
  2481. if( SQLITE_OK==lrc ){
  2482. /* if we succeeded in taking the reserved lock, unlock it to restore
  2483. ** the original state */
  2484. lrc = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1, 0);
  2485. } else {
  2486. /* if we failed to get the lock then someone else must have it */
  2487. reserved = 1;
  2488. }
  2489. if( IS_LOCK_ERROR(lrc) ){
  2490. rc=lrc;
  2491. }
  2492. }
  2493. unixLeaveMutex();
  2494. OSTRACE(("TEST WR-LOCK %d %d %d (afp)\n", pFile->h, rc, reserved));
  2495. *pResOut = reserved;
  2496. return rc;
  2497. }
  2498. /*
  2499. ** Lock the file with the lock specified by parameter eFileLock - one
  2500. ** of the following:
  2501. **
  2502. ** (1) SHARED_LOCK
  2503. ** (2) RESERVED_LOCK
  2504. ** (3) PENDING_LOCK
  2505. ** (4) EXCLUSIVE_LOCK
  2506. **
  2507. ** Sometimes when requesting one lock state, additional lock states
  2508. ** are inserted in between. The locking might fail on one of the later
  2509. ** transitions leaving the lock state different from what it started but
  2510. ** still short of its goal. The following chart shows the allowed
  2511. ** transitions and the inserted intermediate states:
  2512. **
  2513. ** UNLOCKED -> SHARED
  2514. ** SHARED -> RESERVED
  2515. ** SHARED -> (PENDING) -> EXCLUSIVE
  2516. ** RESERVED -> (PENDING) -> EXCLUSIVE
  2517. ** PENDING -> EXCLUSIVE
  2518. **
  2519. ** This routine will only increase a lock. Use the sqlite3OsUnlock()
  2520. ** routine to lower a locking level.
  2521. */
  2522. static int afpLock(sqlite3_file *id, int eFileLock){
  2523. int rc = SQLITE_OK;
  2524. unixFile *pFile = (unixFile*)id;
  2525. unixInodeInfo *pInode = pFile->pInode;
  2526. afpLockingContext *context = (afpLockingContext *) pFile->lockingContext;
  2527. assert( pFile );
  2528. OSTRACE(("LOCK %d %s was %s(%s,%d) pid=%d (afp)\n", pFile->h,
  2529. azFileLock(eFileLock), azFileLock(pFile->eFileLock),
  2530. azFileLock(pInode->eFileLock), pInode->nShared , getpid()));
  2531. /* If there is already a lock of this type or more restrictive on the
  2532. ** unixFile, do nothing. Don't use the afp_end_lock: exit path, as
  2533. ** unixEnterMutex() hasn't been called yet.
  2534. */
  2535. if( pFile->eFileLock>=eFileLock ){
  2536. OSTRACE(("LOCK %d %s ok (already held) (afp)\n", pFile->h,
  2537. azFileLock(eFileLock)));
  2538. return SQLITE_OK;
  2539. }
  2540. /* Make sure the locking sequence is correct
  2541. ** (1) We never move from unlocked to anything higher than shared lock.
  2542. ** (2) SQLite never explicitly requests a pendig lock.
  2543. ** (3) A shared lock is always held when a reserve lock is requested.
  2544. */
  2545. assert( pFile->eFileLock!=NO_LOCK || eFileLock==SHARED_LOCK );
  2546. assert( eFileLock!=PENDING_LOCK );
  2547. assert( eFileLock!=RESERVED_LOCK || pFile->eFileLock==SHARED_LOCK );
  2548. /* This mutex is needed because pFile->pInode is shared across threads
  2549. */
  2550. unixEnterMutex();
  2551. pInode = pFile->pInode;
  2552. /* If some thread using this PID has a lock via a different unixFile*
  2553. ** handle that precludes the requested lock, return BUSY.
  2554. */
  2555. if( (pFile->eFileLock!=pInode->eFileLock &&
  2556. (pInode->eFileLock>=PENDING_LOCK || eFileLock>SHARED_LOCK))
  2557. ){
  2558. rc = SQLITE_BUSY;
  2559. goto afp_end_lock;
  2560. }
  2561. /* If a SHARED lock is requested, and some thread using this PID already
  2562. ** has a SHARED or RESERVED lock, then increment reference counts and
  2563. ** return SQLITE_OK.
  2564. */
  2565. if( eFileLock==SHARED_LOCK &&
  2566. (pInode->eFileLock==SHARED_LOCK || pInode->eFileLock==RESERVED_LOCK) ){
  2567. assert( eFileLock==SHARED_LOCK );
  2568. assert( pFile->eFileLock==0 );
  2569. assert( pInode->nShared>0 );
  2570. pFile->eFileLock = SHARED_LOCK;
  2571. pInode->nShared++;
  2572. pInode->nLock++;
  2573. goto afp_end_lock;
  2574. }
  2575. /* A PENDING lock is needed before acquiring a SHARED lock and before
  2576. ** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will
  2577. ** be released.
  2578. */
  2579. if( eFileLock==SHARED_LOCK
  2580. || (eFileLock==EXCLUSIVE_LOCK && pFile->eFileLock<PENDING_LOCK)
  2581. ){
  2582. int failed;
  2583. failed = afpSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 1);
  2584. if (failed) {
  2585. rc = failed;
  2586. goto afp_end_lock;
  2587. }
  2588. }
  2589. /* If control gets to this point, then actually go ahead and make
  2590. ** operating system calls for the specified lock.
  2591. */
  2592. if( eFileLock==SHARED_LOCK ){
  2593. int lrc1, lrc2, lrc1Errno = 0;
  2594. long lk, mask;
  2595. assert( pInode->nShared==0 );
  2596. assert( pInode->eFileLock==0 );
  2597. mask = (sizeof(long)==8) ? LARGEST_INT64 : 0x7fffffff;
  2598. /* Now get the read-lock SHARED_LOCK */
  2599. /* note that the quality of the randomness doesn't matter that much */
  2600. lk = random();
  2601. pInode->sharedByte = (lk & mask)%(SHARED_SIZE - 1);
  2602. lrc1 = afpSetLock(context->dbPath, pFile,
  2603. SHARED_FIRST+pInode->sharedByte, 1, 1);
  2604. if( IS_LOCK_ERROR(lrc1) ){
  2605. lrc1Errno = pFile->lastErrno;
  2606. }
  2607. /* Drop the temporary PENDING lock */
  2608. lrc2 = afpSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 0);
  2609. if( IS_LOCK_ERROR(lrc1) ) {
  2610. pFile->lastErrno = lrc1Errno;
  2611. rc = lrc1;
  2612. goto afp_end_lock;
  2613. } else if( IS_LOCK_ERROR(lrc2) ){
  2614. rc = lrc2;
  2615. goto afp_end_lock;
  2616. } else if( lrc1 != SQLITE_OK ) {
  2617. rc = lrc1;
  2618. } else {
  2619. pFile->eFileLock = SHARED_LOCK;
  2620. pInode->nLock++;
  2621. pInode->nShared = 1;
  2622. }
  2623. }else if( eFileLock==EXCLUSIVE_LOCK && pInode->nShared>1 ){
  2624. /* We are trying for an exclusive lock but another thread in this
  2625. ** same process is still holding a shared lock. */
  2626. rc = SQLITE_BUSY;
  2627. }else{
  2628. /* The request was for a RESERVED or EXCLUSIVE lock. It is
  2629. ** assumed that there is a SHARED or greater lock on the file
  2630. ** already.
  2631. */
  2632. int failed = 0;
  2633. assert( 0!=pFile->eFileLock );
  2634. if (eFileLock >= RESERVED_LOCK && pFile->eFileLock < RESERVED_LOCK) {
  2635. /* Acquire a RESERVED lock */
  2636. failed = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1,1);
  2637. if( !failed ){
  2638. context->reserved = 1;
  2639. }
  2640. }
  2641. if (!failed && eFileLock == EXCLUSIVE_LOCK) {
  2642. /* Acquire an EXCLUSIVE lock */
  2643. /* Remove the shared lock before trying the range. we'll need to
  2644. ** reestablish the shared lock if we can't get the afpUnlock
  2645. */
  2646. if( !(failed = afpSetLock(context->dbPath, pFile, SHARED_FIRST +
  2647. pInode->sharedByte, 1, 0)) ){
  2648. int failed2 = SQLITE_OK;
  2649. /* now attemmpt to get the exclusive lock range */
  2650. failed = afpSetLock(context->dbPath, pFile, SHARED_FIRST,
  2651. SHARED_SIZE, 1);
  2652. if( failed && (failed2 = afpSetLock(context->dbPath, pFile,
  2653. SHARED_FIRST + pInode->sharedByte, 1, 1)) ){
  2654. /* Can't reestablish the shared lock. Sqlite can't deal, this is
  2655. ** a critical I/O error
  2656. */
  2657. rc = ((failed & SQLITE_IOERR) == SQLITE_IOERR) ? failed2 :
  2658. SQLITE_IOERR_LOCK;
  2659. goto afp_end_lock;
  2660. }
  2661. }else{
  2662. rc = failed;
  2663. }
  2664. }
  2665. if( failed ){
  2666. rc = failed;
  2667. }
  2668. }
  2669. if( rc==SQLITE_OK ){
  2670. pFile->eFileLock = eFileLock;
  2671. pInode->eFileLock = eFileLock;
  2672. }else if( eFileLock==EXCLUSIVE_LOCK ){
  2673. pFile->eFileLock = PENDING_LOCK;
  2674. pInode->eFileLock = PENDING_LOCK;
  2675. }
  2676. afp_end_lock:
  2677. unixLeaveMutex();
  2678. OSTRACE(("LOCK %d %s %s (afp)\n", pFile->h, azFileLock(eFileLock),
  2679. rc==SQLITE_OK ? "ok" : "failed"));
  2680. return rc;
  2681. }
  2682. /*
  2683. ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock
  2684. ** must be either NO_LOCK or SHARED_LOCK.
  2685. **
  2686. ** If the locking level of the file descriptor is already at or below
  2687. ** the requested locking level, this routine is a no-op.
  2688. */
  2689. static int afpUnlock(sqlite3_file *id, int eFileLock) {
  2690. int rc = SQLITE_OK;
  2691. unixFile *pFile = (unixFile*)id;
  2692. unixInodeInfo *pInode;
  2693. afpLockingContext *context = (afpLockingContext *) pFile->lockingContext;
  2694. int skipShared = 0;
  2695. #ifdef SQLITE_TEST
  2696. int h = pFile->h;
  2697. #endif
  2698. assert( pFile );
  2699. OSTRACE(("UNLOCK %d %d was %d(%d,%d) pid=%d (afp)\n", pFile->h, eFileLock,
  2700. pFile->eFileLock, pFile->pInode->eFileLock, pFile->pInode->nShared,
  2701. getpid()));
  2702. assert( eFileLock<=SHARED_LOCK );
  2703. if( pFile->eFileLock<=eFileLock ){
  2704. return SQLITE_OK;
  2705. }
  2706. unixEnterMutex();
  2707. pInode = pFile->pInode;
  2708. assert( pInode->nShared!=0 );
  2709. if( pFile->eFileLock>SHARED_LOCK ){
  2710. assert( pInode->eFileLock==pFile->eFileLock );
  2711. SimulateIOErrorBenign(1);
  2712. SimulateIOError( h=(-1) )
  2713. SimulateIOErrorBenign(0);
  2714. #ifdef SQLITE_DEBUG
  2715. /* When reducing a lock such that other processes can start
  2716. ** reading the database file again, make sure that the
  2717. ** transaction counter was updated if any part of the database
  2718. ** file changed. If the transaction counter is not updated,
  2719. ** other connections to the same file might not realize that
  2720. ** the file has changed and hence might not know to flush their
  2721. ** cache. The use of a stale cache can lead to database corruption.
  2722. */
  2723. assert( pFile->inNormalWrite==0
  2724. || pFile->dbUpdate==0
  2725. || pFile->transCntrChng==1 );
  2726. pFile->inNormalWrite = 0;
  2727. #endif
  2728. if( pFile->eFileLock==EXCLUSIVE_LOCK ){
  2729. rc = afpSetLock(context->dbPath, pFile, SHARED_FIRST, SHARED_SIZE, 0);
  2730. if( rc==SQLITE_OK && (eFileLock==SHARED_LOCK || pInode->nShared>1) ){
  2731. /* only re-establish the shared lock if necessary */
  2732. int sharedLockByte = SHARED_FIRST+pInode->sharedByte;
  2733. rc = afpSetLock(context->dbPath, pFile, sharedLockByte, 1, 1);
  2734. } else {
  2735. skipShared = 1;
  2736. }
  2737. }
  2738. if( rc==SQLITE_OK && pFile->eFileLock>=PENDING_LOCK ){
  2739. rc = afpSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 0);
  2740. }
  2741. if( rc==SQLITE_OK && pFile->eFileLock>=RESERVED_LOCK && context->reserved ){
  2742. rc = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1, 0);
  2743. if( !rc ){
  2744. context->reserved = 0;
  2745. }
  2746. }
  2747. if( rc==SQLITE_OK && (eFileLock==SHARED_LOCK || pInode->nShared>1)){
  2748. pInode->eFileLock = SHARED_LOCK;
  2749. }
  2750. }
  2751. if( rc==SQLITE_OK && eFileLock==NO_LOCK ){
  2752. /* Decrement the shared lock counter. Release the lock using an
  2753. ** OS call only when all threads in this same process have released
  2754. ** the lock.
  2755. */
  2756. unsigned long long sharedLockByte = SHARED_FIRST+pInode->sharedByte;
  2757. pInode->nShared--;
  2758. if( pInode->nShared==0 ){
  2759. SimulateIOErrorBenign(1);
  2760. SimulateIOError( h=(-1) )
  2761. SimulateIOErrorBenign(0);
  2762. if( !skipShared ){
  2763. rc = afpSetLock(context->dbPath, pFile, sharedLockByte, 1, 0);
  2764. }
  2765. if( !rc ){
  2766. pInode->eFileLock = NO_LOCK;
  2767. pFile->eFileLock = NO_LOCK;
  2768. }
  2769. }
  2770. if( rc==SQLITE_OK ){
  2771. pInode->nLock--;
  2772. assert( pInode->nLock>=0 );
  2773. if( pInode->nLock==0 ){
  2774. closePendingFds(pFile);
  2775. }
  2776. }
  2777. }
  2778. unixLeaveMutex();
  2779. if( rc==SQLITE_OK ) pFile->eFileLock = eFileLock;
  2780. return rc;
  2781. }
  2782. /*
  2783. ** Close a file & cleanup AFP specific locking context
  2784. */
  2785. static int afpClose(sqlite3_file *id) {
  2786. int rc = SQLITE_OK;
  2787. if( id ){
  2788. unixFile *pFile = (unixFile*)id;
  2789. afpUnlock(id, NO_LOCK);
  2790. unixEnterMutex();
  2791. if( pFile->pInode && pFile->pInode->nLock ){
  2792. /* If there are outstanding locks, do not actually close the file just
  2793. ** yet because that would clear those locks. Instead, add the file
  2794. ** descriptor to pInode->aPending. It will be automatically closed when
  2795. ** the last lock is cleared.
  2796. */
  2797. setPendingFd(pFile);
  2798. }
  2799. releaseInodeInfo(pFile);
  2800. sqlite3_free(pFile->lockingContext);
  2801. rc = closeUnixFile(id);
  2802. unixLeaveMutex();
  2803. }
  2804. return rc;
  2805. }
  2806. #endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */
  2807. /*
  2808. ** The code above is the AFP lock implementation. The code is specific
  2809. ** to MacOSX and does not work on other unix platforms. No alternative
  2810. ** is available. If you don't compile for a mac, then the "unix-afp"
  2811. ** VFS is not available.
  2812. **
  2813. ********************* End of the AFP lock implementation **********************
  2814. ******************************************************************************/
  2815. /******************************************************************************
  2816. *************************** Begin NFS Locking ********************************/
  2817. #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE
  2818. /*
  2819. ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock
  2820. ** must be either NO_LOCK or SHARED_LOCK.
  2821. **
  2822. ** If the locking level of the file descriptor is already at or below
  2823. ** the requested locking level, this routine is a no-op.
  2824. */
  2825. static int nfsUnlock(sqlite3_file *id, int eFileLock){
  2826. return posixUnlock(id, eFileLock, 1);
  2827. }
  2828. #endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */
  2829. /*
  2830. ** The code above is the NFS lock implementation. The code is specific
  2831. ** to MacOSX and does not work on other unix platforms. No alternative
  2832. ** is available.
  2833. **
  2834. ********************* End of the NFS lock implementation **********************
  2835. ******************************************************************************/
  2836. /******************************************************************************
  2837. **************** Non-locking sqlite3_file methods *****************************
  2838. **
  2839. ** The next division contains implementations for all methods of the
  2840. ** sqlite3_file object other than the locking methods. The locking
  2841. ** methods were defined in divisions above (one locking method per
  2842. ** division). Those methods that are common to all locking modes
  2843. ** are gather together into this division.
  2844. */
  2845. /*
  2846. ** Seek to the offset passed as the second argument, then read cnt
  2847. ** bytes into pBuf. Return the number of bytes actually read.
  2848. **
  2849. ** NB: If you define USE_PREAD or USE_PREAD64, then it might also
  2850. ** be necessary to define _XOPEN_SOURCE to be 500. This varies from
  2851. ** one system to another. Since SQLite does not define USE_PREAD
  2852. ** any any form by default, we will not attempt to define _XOPEN_SOURCE.
  2853. ** See tickets #2741 and #2681.
  2854. **
  2855. ** To avoid stomping the errno value on a failed read the lastErrno value
  2856. ** is set before returning.
  2857. */
  2858. static int seekAndRead(unixFile *id, sqlite3_int64 offset, void *pBuf, int cnt){
  2859. int got;
  2860. int prior = 0;
  2861. #if (!defined(USE_PREAD) && !defined(USE_PREAD64))
  2862. i64 newOffset;
  2863. #endif
  2864. TIMER_START;
  2865. assert( cnt==(cnt&0x1ffff) );
  2866. assert( id->h>2 );
  2867. cnt &= 0x1ffff;
  2868. do{
  2869. #if defined(USE_PREAD)
  2870. got = osPread(id->h, pBuf, cnt, offset);
  2871. SimulateIOError( got = -1 );
  2872. #elif defined(USE_PREAD64)
  2873. got = osPread64(id->h, pBuf, cnt, offset);
  2874. SimulateIOError( got = -1 );
  2875. #else
  2876. newOffset = lseek(id->h, offset, SEEK_SET);
  2877. SimulateIOError( newOffset-- );
  2878. if( newOffset!=offset ){
  2879. if( newOffset == -1 ){
  2880. ((unixFile*)id)->lastErrno = errno;
  2881. }else{
  2882. ((unixFile*)id)->lastErrno = 0;
  2883. }
  2884. return -1;
  2885. }
  2886. got = osRead(id->h, pBuf, cnt);
  2887. #endif
  2888. if( got==cnt ) break;
  2889. if( got<0 ){
  2890. if( errno==EINTR ){ got = 1; continue; }
  2891. prior = 0;
  2892. ((unixFile*)id)->lastErrno = errno;
  2893. break;
  2894. }else if( got>0 ){
  2895. cnt -= got;
  2896. offset += got;
  2897. prior += got;
  2898. pBuf = (void*)(got + (char*)pBuf);
  2899. }
  2900. }while( got>0 );
  2901. TIMER_END;
  2902. OSTRACE(("READ %-3d %5d %7lld %llu\n",
  2903. id->h, got+prior, offset-prior, TIMER_ELAPSED));
  2904. return got+prior;
  2905. }
  2906. /*
  2907. ** Read data from a file into a buffer. Return SQLITE_OK if all
  2908. ** bytes were read successfully and SQLITE_IOERR if anything goes
  2909. ** wrong.
  2910. */
  2911. static int unixRead(
  2912. sqlite3_file *id,
  2913. void *pBuf,
  2914. int amt,
  2915. sqlite3_int64 offset
  2916. ){
  2917. unixFile *pFile = (unixFile *)id;
  2918. int got;
  2919. assert( id );
  2920. assert( offset>=0 );
  2921. assert( amt>0 );
  2922. /* If this is a database file (not a journal, master-journal or temp
  2923. ** file), the bytes in the locking range should never be read or written. */
  2924. #if 0
  2925. assert( pFile->pUnused==0
  2926. || offset>=PENDING_BYTE+512
  2927. || offset+amt<=PENDING_BYTE
  2928. );
  2929. #endif
  2930. #if SQLITE_MAX_MMAP_SIZE>0
  2931. /* Deal with as much of this read request as possible by transfering
  2932. ** data from the memory mapping using memcpy(). */
  2933. if( offset<pFile->mmapSize ){
  2934. if( offset+amt <= pFile->mmapSize ){
  2935. memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], amt);
  2936. return SQLITE_OK;
  2937. }else{
  2938. int nCopy = pFile->mmapSize - offset;
  2939. memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], nCopy);
  2940. pBuf = &((u8 *)pBuf)[nCopy];
  2941. amt -= nCopy;
  2942. offset += nCopy;
  2943. }
  2944. }
  2945. #endif
  2946. got = seekAndRead(pFile, offset, pBuf, amt);
  2947. if( got==amt ){
  2948. return SQLITE_OK;
  2949. }else if( got<0 ){
  2950. /* lastErrno set by seekAndRead */
  2951. return SQLITE_IOERR_READ;
  2952. }else{
  2953. pFile->lastErrno = 0; /* not a system error */
  2954. /* Unread parts of the buffer must be zero-filled */
  2955. memset(&((char*)pBuf)[got], 0, amt-got);
  2956. return SQLITE_IOERR_SHORT_READ;
  2957. }
  2958. }
  2959. /*
  2960. ** Attempt to seek the file-descriptor passed as the first argument to
  2961. ** absolute offset iOff, then attempt to write nBuf bytes of data from
  2962. ** pBuf to it. If an error occurs, return -1 and set *piErrno. Otherwise,
  2963. ** return the actual number of bytes written (which may be less than
  2964. ** nBuf).
  2965. */
  2966. static int seekAndWriteFd(
  2967. int fd, /* File descriptor to write to */
  2968. i64 iOff, /* File offset to begin writing at */
  2969. const void *pBuf, /* Copy data from this buffer to the file */
  2970. int nBuf, /* Size of buffer pBuf in bytes */
  2971. int *piErrno /* OUT: Error number if error occurs */
  2972. ){
  2973. int rc = 0; /* Value returned by system call */
  2974. assert( nBuf==(nBuf&0x1ffff) );
  2975. assert( fd>2 );
  2976. nBuf &= 0x1ffff;
  2977. TIMER_START;
  2978. #if defined(USE_PREAD)
  2979. do{ rc = osPwrite(fd, pBuf, nBuf, iOff); }while( rc<0 && errno==EINTR );
  2980. #elif defined(USE_PREAD64)
  2981. do{ rc = osPwrite64(fd, pBuf, nBuf, iOff);}while( rc<0 && errno==EINTR);
  2982. #else
  2983. do{
  2984. i64 iSeek = lseek(fd, iOff, SEEK_SET);
  2985. SimulateIOError( iSeek-- );
  2986. if( iSeek!=iOff ){
  2987. if( piErrno ) *piErrno = (iSeek==-1 ? errno : 0);
  2988. return -1;
  2989. }
  2990. rc = osWrite(fd, pBuf, nBuf);
  2991. }while( rc<0 && errno==EINTR );
  2992. #endif
  2993. TIMER_END;
  2994. OSTRACE(("WRITE %-3d %5d %7lld %llu\n", fd, rc, iOff, TIMER_ELAPSED));
  2995. if( rc<0 && piErrno ) *piErrno = errno;
  2996. return rc;
  2997. }
  2998. /*
  2999. ** Seek to the offset in id->offset then read cnt bytes into pBuf.
  3000. ** Return the number of bytes actually read. Update the offset.
  3001. **
  3002. ** To avoid stomping the errno value on a failed write the lastErrno value
  3003. ** is set before returning.
  3004. */
  3005. static int seekAndWrite(unixFile *id, i64 offset, const void *pBuf, int cnt){
  3006. return seekAndWriteFd(id->h, offset, pBuf, cnt, &id->lastErrno);
  3007. }
  3008. /*
  3009. ** Write data from a buffer into a file. Return SQLITE_OK on success
  3010. ** or some other error code on failure.
  3011. */
  3012. static int unixWrite(
  3013. sqlite3_file *id,
  3014. const void *pBuf,
  3015. int amt,
  3016. sqlite3_int64 offset
  3017. ){
  3018. unixFile *pFile = (unixFile*)id;
  3019. int wrote = 0;
  3020. assert( id );
  3021. assert( amt>0 );
  3022. /* If this is a database file (not a journal, master-journal or temp
  3023. ** file), the bytes in the locking range should never be read or written. */
  3024. #if 0
  3025. assert( pFile->pUnused==0
  3026. || offset>=PENDING_BYTE+512
  3027. || offset+amt<=PENDING_BYTE
  3028. );
  3029. #endif
  3030. #ifdef SQLITE_DEBUG
  3031. /* If we are doing a normal write to a database file (as opposed to
  3032. ** doing a hot-journal rollback or a write to some file other than a
  3033. ** normal database file) then record the fact that the database
  3034. ** has changed. If the transaction counter is modified, record that
  3035. ** fact too.
  3036. */
  3037. if( pFile->inNormalWrite ){
  3038. pFile->dbUpdate = 1; /* The database has been modified */
  3039. if( offset<=24 && offset+amt>=27 ){
  3040. int rc;
  3041. char oldCntr[4];
  3042. SimulateIOErrorBenign(1);
  3043. rc = seekAndRead(pFile, 24, oldCntr, 4);
  3044. SimulateIOErrorBenign(0);
  3045. if( rc!=4 || memcmp(oldCntr, &((char*)pBuf)[24-offset], 4)!=0 ){
  3046. pFile->transCntrChng = 1; /* The transaction counter has changed */
  3047. }
  3048. }
  3049. }
  3050. #endif
  3051. #if SQLITE_MAX_MMAP_SIZE>0
  3052. /* Deal with as much of this write request as possible by transfering
  3053. ** data from the memory mapping using memcpy(). */
  3054. if( offset<pFile->mmapSize ){
  3055. if( offset+amt <= pFile->mmapSize ){
  3056. memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, amt);
  3057. return SQLITE_OK;
  3058. }else{
  3059. int nCopy = pFile->mmapSize - offset;
  3060. memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, nCopy);
  3061. pBuf = &((u8 *)pBuf)[nCopy];
  3062. amt -= nCopy;
  3063. offset += nCopy;
  3064. }
  3065. }
  3066. #endif
  3067. while( amt>0 && (wrote = seekAndWrite(pFile, offset, pBuf, amt))>0 ){
  3068. amt -= wrote;
  3069. offset += wrote;
  3070. pBuf = &((char*)pBuf)[wrote];
  3071. }
  3072. SimulateIOError(( wrote=(-1), amt=1 ));
  3073. SimulateDiskfullError(( wrote=0, amt=1 ));
  3074. if( amt>0 ){
  3075. if( wrote<0 && pFile->lastErrno!=ENOSPC ){
  3076. /* lastErrno set by seekAndWrite */
  3077. return SQLITE_IOERR_WRITE;
  3078. }else{
  3079. pFile->lastErrno = 0; /* not a system error */
  3080. return SQLITE_FULL;
  3081. }
  3082. }
  3083. return SQLITE_OK;
  3084. }
  3085. #ifdef SQLITE_TEST
  3086. /*
  3087. ** Count the number of fullsyncs and normal syncs. This is used to test
  3088. ** that syncs and fullsyncs are occurring at the right times.
  3089. */
  3090. int sqlite3_sync_count = 0;
  3091. int sqlite3_fullsync_count = 0;
  3092. #endif
  3093. /*
  3094. ** We do not trust systems to provide a working fdatasync(). Some do.
  3095. ** Others do no. To be safe, we will stick with the (slightly slower)
  3096. ** fsync(). If you know that your system does support fdatasync() correctly,
  3097. ** then simply compile with -Dfdatasync=fdatasync
  3098. */
  3099. #if !defined(fdatasync)
  3100. # define fdatasync fsync
  3101. #endif
  3102. /*
  3103. ** Define HAVE_FULLFSYNC to 0 or 1 depending on whether or not
  3104. ** the F_FULLFSYNC macro is defined. F_FULLFSYNC is currently
  3105. ** only available on Mac OS X. But that could change.
  3106. */
  3107. #ifdef F_FULLFSYNC
  3108. # define HAVE_FULLFSYNC 1
  3109. #else
  3110. # define HAVE_FULLFSYNC 0
  3111. #endif
  3112. /*
  3113. ** The fsync() system call does not work as advertised on many
  3114. ** unix systems. The following procedure is an attempt to make
  3115. ** it work better.
  3116. **
  3117. ** The SQLITE_NO_SYNC macro disables all fsync()s. This is useful
  3118. ** for testing when we want to run through the test suite quickly.
  3119. ** You are strongly advised *not* to deploy with SQLITE_NO_SYNC
  3120. ** enabled, however, since with SQLITE_NO_SYNC enabled, an OS crash
  3121. ** or power failure will likely corrupt the database file.
  3122. **
  3123. ** SQLite sets the dataOnly flag if the size of the file is unchanged.
  3124. ** The idea behind dataOnly is that it should only write the file content
  3125. ** to disk, not the inode. We only set dataOnly if the file size is
  3126. ** unchanged since the file size is part of the inode. However,
  3127. ** Ted Ts'o tells us that fdatasync() will also write the inode if the
  3128. ** file size has changed. The only real difference between fdatasync()
  3129. ** and fsync(), Ted tells us, is that fdatasync() will not flush the
  3130. ** inode if the mtime or owner or other inode attributes have changed.
  3131. ** We only care about the file size, not the other file attributes, so
  3132. ** as far as SQLite is concerned, an fdatasync() is always adequate.
  3133. ** So, we always use fdatasync() if it is available, regardless of
  3134. ** the value of the dataOnly flag.
  3135. */
  3136. static int full_fsync(int fd, int fullSync, int dataOnly){
  3137. int rc;
  3138. /* The following "ifdef/elif/else/" block has the same structure as
  3139. ** the one below. It is replicated here solely to avoid cluttering
  3140. ** up the real code with the UNUSED_PARAMETER() macros.
  3141. */
  3142. #ifdef SQLITE_NO_SYNC
  3143. UNUSED_PARAMETER(fd);
  3144. UNUSED_PARAMETER(fullSync);
  3145. UNUSED_PARAMETER(dataOnly);
  3146. #elif HAVE_FULLFSYNC
  3147. UNUSED_PARAMETER(dataOnly);
  3148. #else
  3149. UNUSED_PARAMETER(fullSync);
  3150. UNUSED_PARAMETER(dataOnly);
  3151. #endif
  3152. /* Record the number of times that we do a normal fsync() and
  3153. ** FULLSYNC. This is used during testing to verify that this procedure
  3154. ** gets called with the correct arguments.
  3155. */
  3156. #ifdef SQLITE_TEST
  3157. if( fullSync ) sqlite3_fullsync_count++;
  3158. sqlite3_sync_count++;
  3159. #endif
  3160. /* If we compiled with the SQLITE_NO_SYNC flag, then syncing is a
  3161. ** no-op
  3162. */
  3163. #ifdef SQLITE_NO_SYNC
  3164. rc = SQLITE_OK;
  3165. #elif HAVE_FULLFSYNC
  3166. if( fullSync ){
  3167. rc = osFcntl(fd, F_FULLFSYNC, 0);
  3168. }else{
  3169. rc = 1;
  3170. }
  3171. /* If the FULLFSYNC failed, fall back to attempting an fsync().
  3172. ** It shouldn't be possible for fullfsync to fail on the local
  3173. ** file system (on OSX), so failure indicates that FULLFSYNC
  3174. ** isn't supported for this file system. So, attempt an fsync
  3175. ** and (for now) ignore the overhead of a superfluous fcntl call.
  3176. ** It'd be better to detect fullfsync support once and avoid
  3177. ** the fcntl call every time sync is called.
  3178. */
  3179. if( rc ) rc = fsync(fd);
  3180. #elif defined(__APPLE__)
  3181. /* fdatasync() on HFS+ doesn't yet flush the file size if it changed correctly
  3182. ** so currently we default to the macro that redefines fdatasync to fsync
  3183. */
  3184. rc = fsync(fd);
  3185. #else
  3186. rc = fdatasync(fd);
  3187. #if OS_VXWORKS
  3188. if( rc==-1 && errno==ENOTSUP ){
  3189. rc = fsync(fd);
  3190. }
  3191. #endif /* OS_VXWORKS */
  3192. #endif /* ifdef SQLITE_NO_SYNC elif HAVE_FULLFSYNC */
  3193. if( OS_VXWORKS && rc!= -1 ){
  3194. rc = 0;
  3195. }
  3196. return rc;
  3197. }
  3198. /*
  3199. ** Open a file descriptor to the directory containing file zFilename.
  3200. ** If successful, *pFd is set to the opened file descriptor and
  3201. ** SQLITE_OK is returned. If an error occurs, either SQLITE_NOMEM
  3202. ** or SQLITE_CANTOPEN is returned and *pFd is set to an undefined
  3203. ** value.
  3204. **
  3205. ** The directory file descriptor is used for only one thing - to
  3206. ** fsync() a directory to make sure file creation and deletion events
  3207. ** are flushed to disk. Such fsyncs are not needed on newer
  3208. ** journaling filesystems, but are required on older filesystems.
  3209. **
  3210. ** This routine can be overridden using the xSetSysCall interface.
  3211. ** The ability to override this routine was added in support of the
  3212. ** chromium sandbox. Opening a directory is a security risk (we are
  3213. ** told) so making it overrideable allows the chromium sandbox to
  3214. ** replace this routine with a harmless no-op. To make this routine
  3215. ** a no-op, replace it with a stub that returns SQLITE_OK but leaves
  3216. ** *pFd set to a negative number.
  3217. **
  3218. ** If SQLITE_OK is returned, the caller is responsible for closing
  3219. ** the file descriptor *pFd using close().
  3220. */
  3221. static int openDirectory(const char *zFilename, int *pFd){
  3222. int ii;
  3223. int fd = -1;
  3224. char zDirname[MAX_PATHNAME+1];
  3225. sqlite3_snprintf(MAX_PATHNAME, zDirname, "%s", zFilename);
  3226. for(ii=(int)strlen(zDirname); ii>1 && zDirname[ii]!='/'; ii--);
  3227. if( ii>0 ){
  3228. zDirname[ii] = '\0';
  3229. fd = robust_open(zDirname, O_RDONLY|O_BINARY, 0);
  3230. if( fd>=0 ){
  3231. OSTRACE(("OPENDIR %-3d %s\n", fd, zDirname));
  3232. }
  3233. }
  3234. *pFd = fd;
  3235. return (fd>=0?SQLITE_OK:unixLogError(SQLITE_CANTOPEN_BKPT, "open", zDirname));
  3236. }
  3237. /*
  3238. ** Make sure all writes to a particular file are committed to disk.
  3239. **
  3240. ** If dataOnly==0 then both the file itself and its metadata (file
  3241. ** size, access time, etc) are synced. If dataOnly!=0 then only the
  3242. ** file data is synced.
  3243. **
  3244. ** Under Unix, also make sure that the directory entry for the file
  3245. ** has been created by fsync-ing the directory that contains the file.
  3246. ** If we do not do this and we encounter a power failure, the directory
  3247. ** entry for the journal might not exist after we reboot. The next
  3248. ** SQLite to access the file will not know that the journal exists (because
  3249. ** the directory entry for the journal was never created) and the transaction
  3250. ** will not roll back - possibly leading to database corruption.
  3251. */
  3252. static int unixSync(sqlite3_file *id, int flags){
  3253. int rc;
  3254. unixFile *pFile = (unixFile*)id;
  3255. int isDataOnly = (flags&SQLITE_SYNC_DATAONLY);
  3256. int isFullsync = (flags&0x0F)==SQLITE_SYNC_FULL;
  3257. /* Check that one of SQLITE_SYNC_NORMAL or FULL was passed */
  3258. assert((flags&0x0F)==SQLITE_SYNC_NORMAL
  3259. || (flags&0x0F)==SQLITE_SYNC_FULL
  3260. );
  3261. /* Unix cannot, but some systems may return SQLITE_FULL from here. This
  3262. ** line is to test that doing so does not cause any problems.
  3263. */
  3264. SimulateDiskfullError( return SQLITE_FULL );
  3265. assert( pFile );
  3266. OSTRACE(("SYNC %-3d\n", pFile->h));
  3267. rc = full_fsync(pFile->h, isFullsync, isDataOnly);
  3268. SimulateIOError( rc=1 );
  3269. if( rc ){
  3270. pFile->lastErrno = errno;
  3271. return unixLogError(SQLITE_IOERR_FSYNC, "full_fsync", pFile->zPath);
  3272. }
  3273. /* Also fsync the directory containing the file if the DIRSYNC flag
  3274. ** is set. This is a one-time occurrence. Many systems (examples: AIX)
  3275. ** are unable to fsync a directory, so ignore errors on the fsync.
  3276. */
  3277. if( pFile->ctrlFlags & UNIXFILE_DIRSYNC ){
  3278. int dirfd;
  3279. OSTRACE(("DIRSYNC %s (have_fullfsync=%d fullsync=%d)\n", pFile->zPath,
  3280. HAVE_FULLFSYNC, isFullsync));
  3281. rc = osOpenDirectory(pFile->zPath, &dirfd);
  3282. if( rc==SQLITE_OK && dirfd>=0 ){
  3283. full_fsync(dirfd, 0, 0);
  3284. robust_close(pFile, dirfd, __LINE__);
  3285. }else if( rc==SQLITE_CANTOPEN ){
  3286. rc = SQLITE_OK;
  3287. }
  3288. pFile->ctrlFlags &= ~UNIXFILE_DIRSYNC;
  3289. }
  3290. return rc;
  3291. }
  3292. /*
  3293. ** Truncate an open file to a specified size
  3294. */
  3295. static int unixTruncate(sqlite3_file *id, i64 nByte){
  3296. unixFile *pFile = (unixFile *)id;
  3297. int rc;
  3298. assert( pFile );
  3299. SimulateIOError( return SQLITE_IOERR_TRUNCATE );
  3300. /* If the user has configured a chunk-size for this file, truncate the
  3301. ** file so that it consists of an integer number of chunks (i.e. the
  3302. ** actual file size after the operation may be larger than the requested
  3303. ** size).
  3304. */
  3305. if( pFile->szChunk>0 ){
  3306. nByte = ((nByte + pFile->szChunk - 1)/pFile->szChunk) * pFile->szChunk;
  3307. }
  3308. rc = robust_ftruncate(pFile->h, (off_t)nByte);
  3309. if( rc ){
  3310. pFile->lastErrno = errno;
  3311. return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate", pFile->zPath);
  3312. }else{
  3313. #ifdef SQLITE_DEBUG
  3314. /* If we are doing a normal write to a database file (as opposed to
  3315. ** doing a hot-journal rollback or a write to some file other than a
  3316. ** normal database file) and we truncate the file to zero length,
  3317. ** that effectively updates the change counter. This might happen
  3318. ** when restoring a database using the backup API from a zero-length
  3319. ** source.
  3320. */
  3321. if( pFile->inNormalWrite && nByte==0 ){
  3322. pFile->transCntrChng = 1;
  3323. }
  3324. #endif
  3325. #if SQLITE_MAX_MMAP_SIZE>0
  3326. /* If the file was just truncated to a size smaller than the currently
  3327. ** mapped region, reduce the effective mapping size as well. SQLite will
  3328. ** use read() and write() to access data beyond this point from now on.
  3329. */
  3330. if( nByte<pFile->mmapSize ){
  3331. pFile->mmapSize = nByte;
  3332. }
  3333. #endif
  3334. return SQLITE_OK;
  3335. }
  3336. }
  3337. /*
  3338. ** Determine the current size of a file in bytes
  3339. */
  3340. static int unixFileSize(sqlite3_file *id, i64 *pSize){
  3341. int rc;
  3342. struct stat buf;
  3343. assert( id );
  3344. rc = osFstat(((unixFile*)id)->h, &buf);
  3345. SimulateIOError( rc=1 );
  3346. if( rc!=0 ){
  3347. ((unixFile*)id)->lastErrno = errno;
  3348. return SQLITE_IOERR_FSTAT;
  3349. }
  3350. *pSize = buf.st_size;
  3351. /* When opening a zero-size database, the findInodeInfo() procedure
  3352. ** writes a single byte into that file in order to work around a bug
  3353. ** in the OS-X msdos filesystem. In order to avoid problems with upper
  3354. ** layers, we need to report this file size as zero even though it is
  3355. ** really 1. Ticket #3260.
  3356. */
  3357. if( *pSize==1 ) *pSize = 0;
  3358. return SQLITE_OK;
  3359. }
  3360. #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__)
  3361. /*
  3362. ** Handler for proxy-locking file-control verbs. Defined below in the
  3363. ** proxying locking division.
  3364. */
  3365. static int proxyFileControl(sqlite3_file*,int,void*);
  3366. #endif
  3367. /*
  3368. ** This function is called to handle the SQLITE_FCNTL_SIZE_HINT
  3369. ** file-control operation. Enlarge the database to nBytes in size
  3370. ** (rounded up to the next chunk-size). If the database is already
  3371. ** nBytes or larger, this routine is a no-op.
  3372. */
  3373. static int fcntlSizeHint(unixFile *pFile, i64 nByte){
  3374. if( pFile->szChunk>0 ){
  3375. i64 nSize; /* Required file size */
  3376. struct stat buf; /* Used to hold return values of fstat() */
  3377. if( osFstat(pFile->h, &buf) ) return SQLITE_IOERR_FSTAT;
  3378. nSize = ((nByte+pFile->szChunk-1) / pFile->szChunk) * pFile->szChunk;
  3379. if( nSize>(i64)buf.st_size ){
  3380. #if defined(HAVE_POSIX_FALLOCATE) && HAVE_POSIX_FALLOCATE
  3381. /* The code below is handling the return value of osFallocate()
  3382. ** correctly. posix_fallocate() is defined to "returns zero on success,
  3383. ** or an error number on failure". See the manpage for details. */
  3384. int err;
  3385. do{
  3386. err = osFallocate(pFile->h, buf.st_size, nSize-buf.st_size);
  3387. }while( err==EINTR );
  3388. if( err ) return SQLITE_IOERR_WRITE;
  3389. #else
  3390. /* If the OS does not have posix_fallocate(), fake it. First use
  3391. ** ftruncate() to set the file size, then write a single byte to
  3392. ** the last byte in each block within the extended region. This
  3393. ** is the same technique used by glibc to implement posix_fallocate()
  3394. ** on systems that do not have a real fallocate() system call.
  3395. */
  3396. int nBlk = buf.st_blksize; /* File-system block size */
  3397. i64 iWrite; /* Next offset to write to */
  3398. if( robust_ftruncate(pFile->h, nSize) ){
  3399. pFile->lastErrno = errno;
  3400. return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate", pFile->zPath);
  3401. }
  3402. iWrite = ((buf.st_size + 2*nBlk - 1)/nBlk)*nBlk-1;
  3403. while( iWrite<nSize ){
  3404. int nWrite = seekAndWrite(pFile, iWrite, "", 1);
  3405. if( nWrite!=1 ) return SQLITE_IOERR_WRITE;
  3406. iWrite += nBlk;
  3407. }
  3408. #endif
  3409. }
  3410. }
  3411. #if SQLITE_MAX_MMAP_SIZE>0
  3412. if( pFile->mmapSizeMax>0 && nByte>pFile->mmapSize ){
  3413. int rc;
  3414. if( pFile->szChunk<=0 ){
  3415. if( robust_ftruncate(pFile->h, nByte) ){
  3416. pFile->lastErrno = errno;
  3417. return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate", pFile->zPath);
  3418. }
  3419. }
  3420. rc = unixMapfile(pFile, nByte);
  3421. return rc;
  3422. }
  3423. #endif
  3424. return SQLITE_OK;
  3425. }
  3426. /*
  3427. ** If *pArg is inititially negative then this is a query. Set *pArg to
  3428. ** 1 or 0 depending on whether or not bit mask of pFile->ctrlFlags is set.
  3429. **
  3430. ** If *pArg is 0 or 1, then clear or set the mask bit of pFile->ctrlFlags.
  3431. */
  3432. static void unixModeBit(unixFile *pFile, unsigned char mask, int *pArg){
  3433. if( *pArg<0 ){
  3434. *pArg = (pFile->ctrlFlags & mask)!=0;
  3435. }else if( (*pArg)==0 ){
  3436. pFile->ctrlFlags &= ~mask;
  3437. }else{
  3438. pFile->ctrlFlags |= mask;
  3439. }
  3440. }
  3441. /* Forward declaration */
  3442. static int unixGetTempname(int nBuf, char *zBuf);
  3443. /*
  3444. ** Information and control of an open file handle.
  3445. */
  3446. static int unixFileControl(sqlite3_file *id, int op, void *pArg){
  3447. unixFile *pFile = (unixFile*)id;
  3448. switch( op ){
  3449. case SQLITE_FCNTL_LOCKSTATE: {
  3450. *(int*)pArg = pFile->eFileLock;
  3451. return SQLITE_OK;
  3452. }
  3453. case SQLITE_LAST_ERRNO: {
  3454. *(int*)pArg = pFile->lastErrno;
  3455. return SQLITE_OK;
  3456. }
  3457. case SQLITE_FCNTL_CHUNK_SIZE: {
  3458. pFile->szChunk = *(int *)pArg;
  3459. return SQLITE_OK;
  3460. }
  3461. case SQLITE_FCNTL_SIZE_HINT: {
  3462. int rc;
  3463. SimulateIOErrorBenign(1);
  3464. rc = fcntlSizeHint(pFile, *(i64 *)pArg);
  3465. SimulateIOErrorBenign(0);
  3466. return rc;
  3467. }
  3468. case SQLITE_FCNTL_PERSIST_WAL: {
  3469. unixModeBit(pFile, UNIXFILE_PERSIST_WAL, (int*)pArg);
  3470. return SQLITE_OK;
  3471. }
  3472. case SQLITE_FCNTL_POWERSAFE_OVERWRITE: {
  3473. unixModeBit(pFile, UNIXFILE_PSOW, (int*)pArg);
  3474. return SQLITE_OK;
  3475. }
  3476. case SQLITE_FCNTL_VFSNAME: {
  3477. *(char**)pArg = sqlite3_mprintf("%s", pFile->pVfs->zName);
  3478. return SQLITE_OK;
  3479. }
  3480. case SQLITE_FCNTL_TEMPFILENAME: {
  3481. char *zTFile = sqlite3_malloc( pFile->pVfs->mxPathname );
  3482. if( zTFile ){
  3483. unixGetTempname(pFile->pVfs->mxPathname, zTFile);
  3484. *(char**)pArg = zTFile;
  3485. }
  3486. return SQLITE_OK;
  3487. }
  3488. #if SQLITE_MAX_MMAP_SIZE>0
  3489. case SQLITE_FCNTL_MMAP_SIZE: {
  3490. i64 newLimit = *(i64*)pArg;
  3491. int rc = SQLITE_OK;
  3492. if( newLimit>sqlite3GlobalConfig.mxMmap ){
  3493. newLimit = sqlite3GlobalConfig.mxMmap;
  3494. }
  3495. *(i64*)pArg = pFile->mmapSizeMax;
  3496. if( newLimit>=0 && newLimit!=pFile->mmapSizeMax && pFile->nFetchOut==0 ){
  3497. pFile->mmapSizeMax = newLimit;
  3498. if( pFile->mmapSize>0 ){
  3499. unixUnmapfile(pFile);
  3500. rc = unixMapfile(pFile, -1);
  3501. }
  3502. }
  3503. return rc;
  3504. }
  3505. #endif
  3506. #ifdef SQLITE_DEBUG
  3507. /* The pager calls this method to signal that it has done
  3508. ** a rollback and that the database is therefore unchanged and
  3509. ** it hence it is OK for the transaction change counter to be
  3510. ** unchanged.
  3511. */
  3512. case SQLITE_FCNTL_DB_UNCHANGED: {
  3513. ((unixFile*)id)->dbUpdate = 0;
  3514. return SQLITE_OK;
  3515. }
  3516. #endif
  3517. #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__)
  3518. case SQLITE_SET_LOCKPROXYFILE:
  3519. case SQLITE_GET_LOCKPROXYFILE: {
  3520. return proxyFileControl(id,op,pArg);
  3521. }
  3522. #endif /* SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) */
  3523. }
  3524. return SQLITE_NOTFOUND;
  3525. }
  3526. /*
  3527. ** Return the sector size in bytes of the underlying block device for
  3528. ** the specified file. This is almost always 512 bytes, but may be
  3529. ** larger for some devices.
  3530. **
  3531. ** SQLite code assumes this function cannot fail. It also assumes that
  3532. ** if two files are created in the same file-system directory (i.e.
  3533. ** a database and its journal file) that the sector size will be the
  3534. ** same for both.
  3535. */
  3536. #ifndef __QNXNTO__
  3537. static int unixSectorSize(sqlite3_file *NotUsed){
  3538. UNUSED_PARAMETER(NotUsed);
  3539. return SQLITE_DEFAULT_SECTOR_SIZE;
  3540. }
  3541. #endif
  3542. /*
  3543. ** The following version of unixSectorSize() is optimized for QNX.
  3544. */
  3545. #ifdef __QNXNTO__
  3546. #include <sys/dcmd_blk.h>
  3547. #include <sys/statvfs.h>
  3548. static int unixSectorSize(sqlite3_file *id){
  3549. unixFile *pFile = (unixFile*)id;
  3550. if( pFile->sectorSize == 0 ){
  3551. struct statvfs fsInfo;
  3552. /* Set defaults for non-supported filesystems */
  3553. pFile->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE;
  3554. pFile->deviceCharacteristics = 0;
  3555. if( fstatvfs(pFile->h, &fsInfo) == -1 ) {
  3556. return pFile->sectorSize;
  3557. }
  3558. if( !strcmp(fsInfo.f_basetype, "tmp") ) {
  3559. pFile->sectorSize = fsInfo.f_bsize;
  3560. pFile->deviceCharacteristics =
  3561. SQLITE_IOCAP_ATOMIC4K | /* All ram filesystem writes are atomic */
  3562. SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until
  3563. ** the write succeeds */
  3564. SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind
  3565. ** so it is ordered */
  3566. 0;
  3567. }else if( strstr(fsInfo.f_basetype, "etfs") ){
  3568. pFile->sectorSize = fsInfo.f_bsize;
  3569. pFile->deviceCharacteristics =
  3570. /* etfs cluster size writes are atomic */
  3571. (pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) |
  3572. SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until
  3573. ** the write succeeds */
  3574. SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind
  3575. ** so it is ordered */
  3576. 0;
  3577. }else if( !strcmp(fsInfo.f_basetype, "qnx6") ){
  3578. pFile->sectorSize = fsInfo.f_bsize;
  3579. pFile->deviceCharacteristics =
  3580. SQLITE_IOCAP_ATOMIC | /* All filesystem writes are atomic */
  3581. SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until
  3582. ** the write succeeds */
  3583. SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind
  3584. ** so it is ordered */
  3585. 0;
  3586. }else if( !strcmp(fsInfo.f_basetype, "qnx4") ){
  3587. pFile->sectorSize = fsInfo.f_bsize;
  3588. pFile->deviceCharacteristics =
  3589. /* full bitset of atomics from max sector size and smaller */
  3590. ((pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) << 1) - 2 |
  3591. SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind
  3592. ** so it is ordered */
  3593. 0;
  3594. }else if( strstr(fsInfo.f_basetype, "dos") ){
  3595. pFile->sectorSize = fsInfo.f_bsize;
  3596. pFile->deviceCharacteristics =
  3597. /* full bitset of atomics from max sector size and smaller */
  3598. ((pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) << 1) - 2 |
  3599. SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind
  3600. ** so it is ordered */
  3601. 0;
  3602. }else{
  3603. pFile->deviceCharacteristics =
  3604. SQLITE_IOCAP_ATOMIC512 | /* blocks are atomic */
  3605. SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until
  3606. ** the write succeeds */
  3607. 0;
  3608. }
  3609. }
  3610. /* Last chance verification. If the sector size isn't a multiple of 512
  3611. ** then it isn't valid.*/
  3612. if( pFile->sectorSize % 512 != 0 ){
  3613. pFile->deviceCharacteristics = 0;
  3614. pFile->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE;
  3615. }
  3616. return pFile->sectorSize;
  3617. }
  3618. #endif /* __QNXNTO__ */
  3619. /*
  3620. ** Return the device characteristics for the file.
  3621. **
  3622. ** This VFS is set up to return SQLITE_IOCAP_POWERSAFE_OVERWRITE by default.
  3623. ** However, that choice is contraversial since technically the underlying
  3624. ** file system does not always provide powersafe overwrites. (In other
  3625. ** words, after a power-loss event, parts of the file that were never
  3626. ** written might end up being altered.) However, non-PSOW behavior is very,
  3627. ** very rare. And asserting PSOW makes a large reduction in the amount
  3628. ** of required I/O for journaling, since a lot of padding is eliminated.
  3629. ** Hence, while POWERSAFE_OVERWRITE is on by default, there is a file-control
  3630. ** available to turn it off and URI query parameter available to turn it off.
  3631. */
  3632. static int unixDeviceCharacteristics(sqlite3_file *id){
  3633. unixFile *p = (unixFile*)id;
  3634. int rc = 0;
  3635. #ifdef __QNXNTO__
  3636. if( p->sectorSize==0 ) unixSectorSize(id);
  3637. rc = p->deviceCharacteristics;
  3638. #endif
  3639. if( p->ctrlFlags & UNIXFILE_PSOW ){
  3640. rc |= SQLITE_IOCAP_POWERSAFE_OVERWRITE;
  3641. }
  3642. return rc;
  3643. }
  3644. #ifndef SQLITE_OMIT_WAL
  3645. /*
  3646. ** Object used to represent an shared memory buffer.
  3647. **
  3648. ** When multiple threads all reference the same wal-index, each thread
  3649. ** has its own unixShm object, but they all point to a single instance
  3650. ** of this unixShmNode object. In other words, each wal-index is opened
  3651. ** only once per process.
  3652. **
  3653. ** Each unixShmNode object is connected to a single unixInodeInfo object.
  3654. ** We could coalesce this object into unixInodeInfo, but that would mean
  3655. ** every open file that does not use shared memory (in other words, most
  3656. ** open files) would have to carry around this extra information. So
  3657. ** the unixInodeInfo object contains a pointer to this unixShmNode object
  3658. ** and the unixShmNode object is created only when needed.
  3659. **
  3660. ** unixMutexHeld() must be true when creating or destroying
  3661. ** this object or while reading or writing the following fields:
  3662. **
  3663. ** nRef
  3664. **
  3665. ** The following fields are read-only after the object is created:
  3666. **
  3667. ** fid
  3668. ** zFilename
  3669. **
  3670. ** Either unixShmNode.mutex must be held or unixShmNode.nRef==0 and
  3671. ** unixMutexHeld() is true when reading or writing any other field
  3672. ** in this structure.
  3673. */
  3674. struct unixShmNode {
  3675. unixInodeInfo *pInode; /* unixInodeInfo that owns this SHM node */
  3676. sqlite3_mutex *mutex; /* Mutex to access this object */
  3677. char *zFilename; /* Name of the mmapped file */
  3678. int h; /* Open file descriptor */
  3679. int szRegion; /* Size of shared-memory regions */
  3680. u16 nRegion; /* Size of array apRegion */
  3681. u8 isReadonly; /* True if read-only */
  3682. char **apRegion; /* Array of mapped shared-memory regions */
  3683. int nRef; /* Number of unixShm objects pointing to this */
  3684. unixShm *pFirst; /* All unixShm objects pointing to this */
  3685. #ifdef SQLITE_DEBUG
  3686. u8 exclMask; /* Mask of exclusive locks held */
  3687. u8 sharedMask; /* Mask of shared locks held */
  3688. u8 nextShmId; /* Next available unixShm.id value */
  3689. #endif
  3690. };
  3691. /*
  3692. ** Structure used internally by this VFS to record the state of an
  3693. ** open shared memory connection.
  3694. **
  3695. ** The following fields are initialized when this object is created and
  3696. ** are read-only thereafter:
  3697. **
  3698. ** unixShm.pFile
  3699. ** unixShm.id
  3700. **
  3701. ** All other fields are read/write. The unixShm.pFile->mutex must be held
  3702. ** while accessing any read/write fields.
  3703. */
  3704. struct unixShm {
  3705. unixShmNode *pShmNode; /* The underlying unixShmNode object */
  3706. unixShm *pNext; /* Next unixShm with the same unixShmNode */
  3707. u8 hasMutex; /* True if holding the unixShmNode mutex */
  3708. u8 id; /* Id of this connection within its unixShmNode */
  3709. u16 sharedMask; /* Mask of shared locks held */
  3710. u16 exclMask; /* Mask of exclusive locks held */
  3711. };
  3712. /*
  3713. ** Constants used for locking
  3714. */
  3715. #define UNIX_SHM_BASE ((22+SQLITE_SHM_NLOCK)*4) /* first lock byte */
  3716. #define UNIX_SHM_DMS (UNIX_SHM_BASE+SQLITE_SHM_NLOCK) /* deadman switch */
  3717. /*
  3718. ** Apply posix advisory locks for all bytes from ofst through ofst+n-1.
  3719. **
  3720. ** Locks block if the mask is exactly UNIX_SHM_C and are non-blocking
  3721. ** otherwise.
  3722. */
  3723. static int unixShmSystemLock(
  3724. unixShmNode *pShmNode, /* Apply locks to this open shared-memory segment */
  3725. int lockType, /* F_UNLCK, F_RDLCK, or F_WRLCK */
  3726. int ofst, /* First byte of the locking range */
  3727. int n /* Number of bytes to lock */
  3728. ){
  3729. struct flock f; /* The posix advisory locking structure */
  3730. int rc = SQLITE_OK; /* Result code form fcntl() */
  3731. /* Access to the unixShmNode object is serialized by the caller */
  3732. assert( sqlite3_mutex_held(pShmNode->mutex) || pShmNode->nRef==0 );
  3733. /* Shared locks never span more than one byte */
  3734. assert( n==1 || lockType!=F_RDLCK );
  3735. /* Locks are within range */
  3736. assert( n>=1 && n<SQLITE_SHM_NLOCK );
  3737. if( pShmNode->h>=0 ){
  3738. /* Initialize the locking parameters */
  3739. memset(&f, 0, sizeof(f));
  3740. f.l_type = lockType;
  3741. f.l_whence = SEEK_SET;
  3742. f.l_start = ofst;
  3743. f.l_len = n;
  3744. rc = osFcntl(pShmNode->h, F_SETLK, &f);
  3745. rc = (rc!=(-1)) ? SQLITE_OK : SQLITE_BUSY;
  3746. }
  3747. /* Update the global lock state and do debug tracing */
  3748. #ifdef SQLITE_DEBUG
  3749. { u16 mask;
  3750. OSTRACE(("SHM-LOCK "));
  3751. mask = (1<<(ofst+n)) - (1<<ofst);
  3752. if( rc==SQLITE_OK ){
  3753. if( lockType==F_UNLCK ){
  3754. OSTRACE(("unlock %d ok", ofst));
  3755. pShmNode->exclMask &= ~mask;
  3756. pShmNode->sharedMask &= ~mask;
  3757. }else if( lockType==F_RDLCK ){
  3758. OSTRACE(("read-lock %d ok", ofst));
  3759. pShmNode->exclMask &= ~mask;
  3760. pShmNode->sharedMask |= mask;
  3761. }else{
  3762. assert( lockType==F_WRLCK );
  3763. OSTRACE(("write-lock %d ok", ofst));
  3764. pShmNode->exclMask |= mask;
  3765. pShmNode->sharedMask &= ~mask;
  3766. }
  3767. }else{
  3768. if( lockType==F_UNLCK ){
  3769. OSTRACE(("unlock %d failed", ofst));
  3770. }else if( lockType==F_RDLCK ){
  3771. OSTRACE(("read-lock failed"));
  3772. }else{
  3773. assert( lockType==F_WRLCK );
  3774. OSTRACE(("write-lock %d failed", ofst));
  3775. }
  3776. }
  3777. OSTRACE((" - afterwards %03x,%03x\n",
  3778. pShmNode->sharedMask, pShmNode->exclMask));
  3779. }
  3780. #endif
  3781. return rc;
  3782. }
  3783. /*
  3784. ** Purge the unixShmNodeList list of all entries with unixShmNode.nRef==0.
  3785. **
  3786. ** This is not a VFS shared-memory method; it is a utility function called
  3787. ** by VFS shared-memory methods.
  3788. */
  3789. static void unixShmPurge(unixFile *pFd){
  3790. unixShmNode *p = pFd->pInode->pShmNode;
  3791. assert( unixMutexHeld() );
  3792. if( p && p->nRef==0 ){
  3793. int i;
  3794. assert( p->pInode==pFd->pInode );
  3795. sqlite3_mutex_free(p->mutex);
  3796. for(i=0; i<p->nRegion; i++){
  3797. if( p->h>=0 ){
  3798. osMunmap(p->apRegion[i], p->szRegion);
  3799. }else{
  3800. sqlite3_free(p->apRegion[i]);
  3801. }
  3802. }
  3803. sqlite3_free(p->apRegion);
  3804. if( p->h>=0 ){
  3805. robust_close(pFd, p->h, __LINE__);
  3806. p->h = -1;
  3807. }
  3808. p->pInode->pShmNode = 0;
  3809. sqlite3_free(p);
  3810. }
  3811. }
  3812. /*
  3813. ** Open a shared-memory area associated with open database file pDbFd.
  3814. ** This particular implementation uses mmapped files.
  3815. **
  3816. ** The file used to implement shared-memory is in the same directory
  3817. ** as the open database file and has the same name as the open database
  3818. ** file with the "-shm" suffix added. For example, if the database file
  3819. ** is "/home/user1/config.db" then the file that is created and mmapped
  3820. ** for shared memory will be called "/home/user1/config.db-shm".
  3821. **
  3822. ** Another approach to is to use files in /dev/shm or /dev/tmp or an
  3823. ** some other tmpfs mount. But if a file in a different directory
  3824. ** from the database file is used, then differing access permissions
  3825. ** or a chroot() might cause two different processes on the same
  3826. ** database to end up using different files for shared memory -
  3827. ** meaning that their memory would not really be shared - resulting
  3828. ** in database corruption. Nevertheless, this tmpfs file usage
  3829. ** can be enabled at compile-time using -DSQLITE_SHM_DIRECTORY="/dev/shm"
  3830. ** or the equivalent. The use of the SQLITE_SHM_DIRECTORY compile-time
  3831. ** option results in an incompatible build of SQLite; builds of SQLite
  3832. ** that with differing SQLITE_SHM_DIRECTORY settings attempt to use the
  3833. ** same database file at the same time, database corruption will likely
  3834. ** result. The SQLITE_SHM_DIRECTORY compile-time option is considered
  3835. ** "unsupported" and may go away in a future SQLite release.
  3836. **
  3837. ** When opening a new shared-memory file, if no other instances of that
  3838. ** file are currently open, in this process or in other processes, then
  3839. ** the file must be truncated to zero length or have its header cleared.
  3840. **
  3841. ** If the original database file (pDbFd) is using the "unix-excl" VFS
  3842. ** that means that an exclusive lock is held on the database file and
  3843. ** that no other processes are able to read or write the database. In
  3844. ** that case, we do not really need shared memory. No shared memory
  3845. ** file is created. The shared memory will be simulated with heap memory.
  3846. */
  3847. static int unixOpenSharedMemory(unixFile *pDbFd){
  3848. struct unixShm *p = 0; /* The connection to be opened */
  3849. struct unixShmNode *pShmNode; /* The underlying mmapped file */
  3850. int rc; /* Result code */
  3851. unixInodeInfo *pInode; /* The inode of fd */
  3852. char *zShmFilename; /* Name of the file used for SHM */
  3853. int nShmFilename; /* Size of the SHM filename in bytes */
  3854. /* Allocate space for the new unixShm object. */
  3855. p = sqlite3_malloc( sizeof(*p) );
  3856. if( p==0 ) return SQLITE_NOMEM;
  3857. memset(p, 0, sizeof(*p));
  3858. assert( pDbFd->pShm==0 );
  3859. /* Check to see if a unixShmNode object already exists. Reuse an existing
  3860. ** one if present. Create a new one if necessary.
  3861. */
  3862. unixEnterMutex();
  3863. pInode = pDbFd->pInode;
  3864. pShmNode = pInode->pShmNode;
  3865. if( pShmNode==0 ){
  3866. struct stat sStat; /* fstat() info for database file */
  3867. /* Call fstat() to figure out the permissions on the database file. If
  3868. ** a new *-shm file is created, an attempt will be made to create it
  3869. ** with the same permissions.
  3870. */
  3871. if( osFstat(pDbFd->h, &sStat) && pInode->bProcessLock==0 ){
  3872. rc = SQLITE_IOERR_FSTAT;
  3873. goto shm_open_err;
  3874. }
  3875. #ifdef SQLITE_SHM_DIRECTORY
  3876. nShmFilename = sizeof(SQLITE_SHM_DIRECTORY) + 31;
  3877. #else
  3878. nShmFilename = 6 + (int)strlen(pDbFd->zPath);
  3879. #endif
  3880. pShmNode = sqlite3_malloc( sizeof(*pShmNode) + nShmFilename );
  3881. if( pShmNode==0 ){
  3882. rc = SQLITE_NOMEM;
  3883. goto shm_open_err;
  3884. }
  3885. memset(pShmNode, 0, sizeof(*pShmNode)+nShmFilename);
  3886. zShmFilename = pShmNode->zFilename = (char*)&pShmNode[1];
  3887. #ifdef SQLITE_SHM_DIRECTORY
  3888. sqlite3_snprintf(nShmFilename, zShmFilename,
  3889. SQLITE_SHM_DIRECTORY "/sqlite-shm-%x-%x",
  3890. (u32)sStat.st_ino, (u32)sStat.st_dev);
  3891. #else
  3892. sqlite3_snprintf(nShmFilename, zShmFilename, "%s-shm", pDbFd->zPath);
  3893. sqlite3FileSuffix3(pDbFd->zPath, zShmFilename);
  3894. #endif
  3895. pShmNode->h = -1;
  3896. pDbFd->pInode->pShmNode = pShmNode;
  3897. pShmNode->pInode = pDbFd->pInode;
  3898. pShmNode->mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST);
  3899. if( pShmNode->mutex==0 ){
  3900. rc = SQLITE_NOMEM;
  3901. goto shm_open_err;
  3902. }
  3903. if( pInode->bProcessLock==0 ){
  3904. int openFlags = O_RDWR | O_CREAT;
  3905. if( sqlite3_uri_boolean(pDbFd->zPath, "readonly_shm", 0) ){
  3906. openFlags = O_RDONLY;
  3907. pShmNode->isReadonly = 1;
  3908. }
  3909. pShmNode->h = robust_open(zShmFilename, openFlags, (sStat.st_mode&0777));
  3910. if( pShmNode->h<0 ){
  3911. rc = unixLogError(SQLITE_CANTOPEN_BKPT, "open", zShmFilename);
  3912. goto shm_open_err;
  3913. }
  3914. /* If this process is running as root, make sure that the SHM file
  3915. ** is owned by the same user that owns the original database. Otherwise,
  3916. ** the original owner will not be able to connect.
  3917. */
  3918. osFchown(pShmNode->h, sStat.st_uid, sStat.st_gid);
  3919. /* Check to see if another process is holding the dead-man switch.
  3920. ** If not, truncate the file to zero length.
  3921. */
  3922. rc = SQLITE_OK;
  3923. if( unixShmSystemLock(pShmNode, F_WRLCK, UNIX_SHM_DMS, 1)==SQLITE_OK ){
  3924. if( robust_ftruncate(pShmNode->h, 0) ){
  3925. rc = unixLogError(SQLITE_IOERR_SHMOPEN, "ftruncate", zShmFilename);
  3926. }
  3927. }
  3928. if( rc==SQLITE_OK ){
  3929. rc = unixShmSystemLock(pShmNode, F_RDLCK, UNIX_SHM_DMS, 1);
  3930. }
  3931. if( rc ) goto shm_open_err;
  3932. }
  3933. }
  3934. /* Make the new connection a child of the unixShmNode */
  3935. p->pShmNode = pShmNode;
  3936. #ifdef SQLITE_DEBUG
  3937. p->id = pShmNode->nextShmId++;
  3938. #endif
  3939. pShmNode->nRef++;
  3940. pDbFd->pShm = p;
  3941. unixLeaveMutex();
  3942. /* The reference count on pShmNode has already been incremented under
  3943. ** the cover of the unixEnterMutex() mutex and the pointer from the
  3944. ** new (struct unixShm) object to the pShmNode has been set. All that is
  3945. ** left to do is to link the new object into the linked list starting
  3946. ** at pShmNode->pFirst. This must be done while holding the pShmNode->mutex
  3947. ** mutex.
  3948. */
  3949. sqlite3_mutex_enter(pShmNode->mutex);
  3950. p->pNext = pShmNode->pFirst;
  3951. pShmNode->pFirst = p;
  3952. sqlite3_mutex_leave(pShmNode->mutex);
  3953. return SQLITE_OK;
  3954. /* Jump here on any error */
  3955. shm_open_err:
  3956. unixShmPurge(pDbFd); /* This call frees pShmNode if required */
  3957. sqlite3_free(p);
  3958. unixLeaveMutex();
  3959. return rc;
  3960. }
  3961. /*
  3962. ** This function is called to obtain a pointer to region iRegion of the
  3963. ** shared-memory associated with the database file fd. Shared-memory regions
  3964. ** are numbered starting from zero. Each shared-memory region is szRegion
  3965. ** bytes in size.
  3966. **
  3967. ** If an error occurs, an error code is returned and *pp is set to NULL.
  3968. **
  3969. ** Otherwise, if the bExtend parameter is 0 and the requested shared-memory
  3970. ** region has not been allocated (by any client, including one running in a
  3971. ** separate process), then *pp is set to NULL and SQLITE_OK returned. If
  3972. ** bExtend is non-zero and the requested shared-memory region has not yet
  3973. ** been allocated, it is allocated by this function.
  3974. **
  3975. ** If the shared-memory region has already been allocated or is allocated by
  3976. ** this call as described above, then it is mapped into this processes
  3977. ** address space (if it is not already), *pp is set to point to the mapped
  3978. ** memory and SQLITE_OK returned.
  3979. */
  3980. static int unixShmMap(
  3981. sqlite3_file *fd, /* Handle open on database file */
  3982. int iRegion, /* Region to retrieve */
  3983. int szRegion, /* Size of regions */
  3984. int bExtend, /* True to extend file if necessary */
  3985. void volatile **pp /* OUT: Mapped memory */
  3986. ){
  3987. unixFile *pDbFd = (unixFile*)fd;
  3988. unixShm *p;
  3989. unixShmNode *pShmNode;
  3990. int rc = SQLITE_OK;
  3991. /* If the shared-memory file has not yet been opened, open it now. */
  3992. if( pDbFd->pShm==0 ){
  3993. rc = unixOpenSharedMemory(pDbFd);
  3994. if( rc!=SQLITE_OK ) return rc;
  3995. }
  3996. p = pDbFd->pShm;
  3997. pShmNode = p->pShmNode;
  3998. sqlite3_mutex_enter(pShmNode->mutex);
  3999. assert( szRegion==pShmNode->szRegion || pShmNode->nRegion==0 );
  4000. assert( pShmNode->pInode==pDbFd->pInode );
  4001. assert( pShmNode->h>=0 || pDbFd->pInode->bProcessLock==1 );
  4002. assert( pShmNode->h<0 || pDbFd->pInode->bProcessLock==0 );
  4003. if( pShmNode->nRegion<=iRegion ){
  4004. char **apNew; /* New apRegion[] array */
  4005. int nByte = (iRegion+1)*szRegion; /* Minimum required file size */
  4006. struct stat sStat; /* Used by fstat() */
  4007. pShmNode->szRegion = szRegion;
  4008. if( pShmNode->h>=0 ){
  4009. /* The requested region is not mapped into this processes address space.
  4010. ** Check to see if it has been allocated (i.e. if the wal-index file is
  4011. ** large enough to contain the requested region).
  4012. */
  4013. if( osFstat(pShmNode->h, &sStat) ){
  4014. rc = SQLITE_IOERR_SHMSIZE;
  4015. goto shmpage_out;
  4016. }
  4017. if( sStat.st_size<nByte ){
  4018. /* The requested memory region does not exist. If bExtend is set to
  4019. ** false, exit early. *pp will be set to NULL and SQLITE_OK returned.
  4020. */
  4021. if( !bExtend ){
  4022. goto shmpage_out;
  4023. }
  4024. /* Alternatively, if bExtend is true, extend the file. Do this by
  4025. ** writing a single byte to the end of each (OS) page being
  4026. ** allocated or extended. Technically, we need only write to the
  4027. ** last page in order to extend the file. But writing to all new
  4028. ** pages forces the OS to allocate them immediately, which reduces
  4029. ** the chances of SIGBUS while accessing the mapped region later on.
  4030. */
  4031. else{
  4032. static const int pgsz = 4096;
  4033. int iPg;
  4034. /* Write to the last byte of each newly allocated or extended page */
  4035. assert( (nByte % pgsz)==0 );
  4036. for(iPg=(sStat.st_size/pgsz); iPg<(nByte/pgsz); iPg++){
  4037. if( seekAndWriteFd(pShmNode->h, iPg*pgsz + pgsz-1, "", 1, 0)!=1 ){
  4038. const char *zFile = pShmNode->zFilename;
  4039. rc = unixLogError(SQLITE_IOERR_SHMSIZE, "write", zFile);
  4040. goto shmpage_out;
  4041. }
  4042. }
  4043. }
  4044. }
  4045. }
  4046. /* Map the requested memory region into this processes address space. */
  4047. apNew = (char **)sqlite3_realloc(
  4048. pShmNode->apRegion, (iRegion+1)*sizeof(char *)
  4049. );
  4050. if( !apNew ){
  4051. rc = SQLITE_IOERR_NOMEM;
  4052. goto shmpage_out;
  4053. }
  4054. pShmNode->apRegion = apNew;
  4055. while(pShmNode->nRegion<=iRegion){
  4056. void *pMem;
  4057. if( pShmNode->h>=0 ){
  4058. pMem = osMmap(0, szRegion,
  4059. pShmNode->isReadonly ? PROT_READ : PROT_READ|PROT_WRITE,
  4060. MAP_SHARED, pShmNode->h, szRegion*(i64)pShmNode->nRegion
  4061. );
  4062. if( pMem==MAP_FAILED ){
  4063. rc = unixLogError(SQLITE_IOERR_SHMMAP, "mmap", pShmNode->zFilename);
  4064. goto shmpage_out;
  4065. }
  4066. }else{
  4067. pMem = sqlite3_malloc(szRegion);
  4068. if( pMem==0 ){
  4069. rc = SQLITE_NOMEM;
  4070. goto shmpage_out;
  4071. }
  4072. memset(pMem, 0, szRegion);
  4073. }
  4074. pShmNode->apRegion[pShmNode->nRegion] = pMem;
  4075. pShmNode->nRegion++;
  4076. }
  4077. }
  4078. shmpage_out:
  4079. if( pShmNode->nRegion>iRegion ){
  4080. *pp = pShmNode->apRegion[iRegion];
  4081. }else{
  4082. *pp = 0;
  4083. }
  4084. if( pShmNode->isReadonly && rc==SQLITE_OK ) rc = SQLITE_READONLY;
  4085. sqlite3_mutex_leave(pShmNode->mutex);
  4086. return rc;
  4087. }
  4088. /*
  4089. ** Change the lock state for a shared-memory segment.
  4090. **
  4091. ** Note that the relationship between SHAREd and EXCLUSIVE locks is a little
  4092. ** different here than in posix. In xShmLock(), one can go from unlocked
  4093. ** to shared and back or from unlocked to exclusive and back. But one may
  4094. ** not go from shared to exclusive or from exclusive to shared.
  4095. */
  4096. static int unixShmLock(
  4097. sqlite3_file *fd, /* Database file holding the shared memory */
  4098. int ofst, /* First lock to acquire or release */
  4099. int n, /* Number of locks to acquire or release */
  4100. int flags /* What to do with the lock */
  4101. ){
  4102. unixFile *pDbFd = (unixFile*)fd; /* Connection holding shared memory */
  4103. unixShm *p = pDbFd->pShm; /* The shared memory being locked */
  4104. unixShm *pX; /* For looping over all siblings */
  4105. unixShmNode *pShmNode = p->pShmNode; /* The underlying file iNode */
  4106. int rc = SQLITE_OK; /* Result code */
  4107. u16 mask; /* Mask of locks to take or release */
  4108. assert( pShmNode==pDbFd->pInode->pShmNode );
  4109. assert( pShmNode->pInode==pDbFd->pInode );
  4110. assert( ofst>=0 && ofst+n<=SQLITE_SHM_NLOCK );
  4111. assert( n>=1 );
  4112. assert( flags==(SQLITE_SHM_LOCK | SQLITE_SHM_SHARED)
  4113. || flags==(SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE)
  4114. || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED)
  4115. || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE) );
  4116. assert( n==1 || (flags & SQLITE_SHM_EXCLUSIVE)!=0 );
  4117. assert( pShmNode->h>=0 || pDbFd->pInode->bProcessLock==1 );
  4118. assert( pShmNode->h<0 || pDbFd->pInode->bProcessLock==0 );
  4119. mask = (1<<(ofst+n)) - (1<<ofst);
  4120. assert( n>1 || mask==(1<<ofst) );
  4121. sqlite3_mutex_enter(pShmNode->mutex);
  4122. if( flags & SQLITE_SHM_UNLOCK ){
  4123. u16 allMask = 0; /* Mask of locks held by siblings */
  4124. /* See if any siblings hold this same lock */
  4125. for(pX=pShmNode->pFirst; pX; pX=pX->pNext){
  4126. if( pX==p ) continue;
  4127. assert( (pX->exclMask & (p->exclMask|p->sharedMask))==0 );
  4128. allMask |= pX->sharedMask;
  4129. }
  4130. /* Unlock the system-level locks */
  4131. if( (mask & allMask)==0 ){
  4132. rc = unixShmSystemLock(pShmNode, F_UNLCK, ofst+UNIX_SHM_BASE, n);
  4133. }else{
  4134. rc = SQLITE_OK;
  4135. }
  4136. /* Undo the local locks */
  4137. if( rc==SQLITE_OK ){
  4138. p->exclMask &= ~mask;
  4139. p->sharedMask &= ~mask;
  4140. }
  4141. }else if( flags & SQLITE_SHM_SHARED ){
  4142. u16 allShared = 0; /* Union of locks held by connections other than "p" */
  4143. /* Find out which shared locks are already held by sibling connections.
  4144. ** If any sibling already holds an exclusive lock, go ahead and return
  4145. ** SQLITE_BUSY.
  4146. */
  4147. for(pX=pShmNode->pFirst; pX; pX=pX->pNext){
  4148. if( (pX->exclMask & mask)!=0 ){
  4149. rc = SQLITE_BUSY;
  4150. break;
  4151. }
  4152. allShared |= pX->sharedMask;
  4153. }
  4154. /* Get shared locks at the system level, if necessary */
  4155. if( rc==SQLITE_OK ){
  4156. if( (allShared & mask)==0 ){
  4157. rc = unixShmSystemLock(pShmNode, F_RDLCK, ofst+UNIX_SHM_BASE, n);
  4158. }else{
  4159. rc = SQLITE_OK;
  4160. }
  4161. }
  4162. /* Get the local shared locks */
  4163. if( rc==SQLITE_OK ){
  4164. p->sharedMask |= mask;
  4165. }
  4166. }else{
  4167. /* Make sure no sibling connections hold locks that will block this
  4168. ** lock. If any do, return SQLITE_BUSY right away.
  4169. */
  4170. for(pX=pShmNode->pFirst; pX; pX=pX->pNext){
  4171. if( (pX->exclMask & mask)!=0 || (pX->sharedMask & mask)!=0 ){
  4172. rc = SQLITE_BUSY;
  4173. break;
  4174. }
  4175. }
  4176. /* Get the exclusive locks at the system level. Then if successful
  4177. ** also mark the local connection as being locked.
  4178. */
  4179. if( rc==SQLITE_OK ){
  4180. rc = unixShmSystemLock(pShmNode, F_WRLCK, ofst+UNIX_SHM_BASE, n);
  4181. if( rc==SQLITE_OK ){
  4182. assert( (p->sharedMask & mask)==0 );
  4183. p->exclMask |= mask;
  4184. }
  4185. }
  4186. }
  4187. sqlite3_mutex_leave(pShmNode->mutex);
  4188. OSTRACE(("SHM-LOCK shmid-%d, pid-%d got %03x,%03x\n",
  4189. p->id, getpid(), p->sharedMask, p->exclMask));
  4190. return rc;
  4191. }
  4192. /*
  4193. ** Implement a memory barrier or memory fence on shared memory.
  4194. **
  4195. ** All loads and stores begun before the barrier must complete before
  4196. ** any load or store begun after the barrier.
  4197. */
  4198. static void unixShmBarrier(
  4199. sqlite3_file *fd /* Database file holding the shared memory */
  4200. ){
  4201. UNUSED_PARAMETER(fd);
  4202. unixEnterMutex();
  4203. unixLeaveMutex();
  4204. }
  4205. /*
  4206. ** Close a connection to shared-memory. Delete the underlying
  4207. ** storage if deleteFlag is true.
  4208. **
  4209. ** If there is no shared memory associated with the connection then this
  4210. ** routine is a harmless no-op.
  4211. */
  4212. static int unixShmUnmap(
  4213. sqlite3_file *fd, /* The underlying database file */
  4214. int deleteFlag /* Delete shared-memory if true */
  4215. ){
  4216. unixShm *p; /* The connection to be closed */
  4217. unixShmNode *pShmNode; /* The underlying shared-memory file */
  4218. unixShm **pp; /* For looping over sibling connections */
  4219. unixFile *pDbFd; /* The underlying database file */
  4220. pDbFd = (unixFile*)fd;
  4221. p = pDbFd->pShm;
  4222. if( p==0 ) return SQLITE_OK;
  4223. pShmNode = p->pShmNode;
  4224. assert( pShmNode==pDbFd->pInode->pShmNode );
  4225. assert( pShmNode->pInode==pDbFd->pInode );
  4226. /* Remove connection p from the set of connections associated
  4227. ** with pShmNode */
  4228. sqlite3_mutex_enter(pShmNode->mutex);
  4229. for(pp=&pShmNode->pFirst; (*pp)!=p; pp = &(*pp)->pNext){}
  4230. *pp = p->pNext;
  4231. /* Free the connection p */
  4232. sqlite3_free(p);
  4233. pDbFd->pShm = 0;
  4234. sqlite3_mutex_leave(pShmNode->mutex);
  4235. /* If pShmNode->nRef has reached 0, then close the underlying
  4236. ** shared-memory file, too */
  4237. unixEnterMutex();
  4238. assert( pShmNode->nRef>0 );
  4239. pShmNode->nRef--;
  4240. if( pShmNode->nRef==0 ){
  4241. if( deleteFlag && pShmNode->h>=0 ) osUnlink(pShmNode->zFilename);
  4242. unixShmPurge(pDbFd);
  4243. }
  4244. unixLeaveMutex();
  4245. return SQLITE_OK;
  4246. }
  4247. #else
  4248. # define unixShmMap 0
  4249. # define unixShmLock 0
  4250. # define unixShmBarrier 0
  4251. # define unixShmUnmap 0
  4252. #endif /* #ifndef SQLITE_OMIT_WAL */
  4253. #if SQLITE_MAX_MMAP_SIZE>0
  4254. /*
  4255. ** If it is currently memory mapped, unmap file pFd.
  4256. */
  4257. static void unixUnmapfile(unixFile *pFd){
  4258. assert( pFd->nFetchOut==0 );
  4259. if( pFd->pMapRegion ){
  4260. osMunmap(pFd->pMapRegion, pFd->mmapSizeActual);
  4261. pFd->pMapRegion = 0;
  4262. pFd->mmapSize = 0;
  4263. pFd->mmapSizeActual = 0;
  4264. }
  4265. }
  4266. /*
  4267. ** Return the system page size.
  4268. */
  4269. static int unixGetPagesize(void){
  4270. #if HAVE_MREMAP
  4271. return 512;
  4272. #elif defined(_BSD_SOURCE)
  4273. return getpagesize();
  4274. #else
  4275. return (int)sysconf(_SC_PAGESIZE);
  4276. #endif
  4277. }
  4278. /*
  4279. ** Attempt to set the size of the memory mapping maintained by file
  4280. ** descriptor pFd to nNew bytes. Any existing mapping is discarded.
  4281. **
  4282. ** If successful, this function sets the following variables:
  4283. **
  4284. ** unixFile.pMapRegion
  4285. ** unixFile.mmapSize
  4286. ** unixFile.mmapSizeActual
  4287. **
  4288. ** If unsuccessful, an error message is logged via sqlite3_log() and
  4289. ** the three variables above are zeroed. In this case SQLite should
  4290. ** continue accessing the database using the xRead() and xWrite()
  4291. ** methods.
  4292. */
  4293. static void unixRemapfile(
  4294. unixFile *pFd, /* File descriptor object */
  4295. i64 nNew /* Required mapping size */
  4296. ){
  4297. const char *zErr = "mmap";
  4298. int h = pFd->h; /* File descriptor open on db file */
  4299. u8 *pOrig = (u8 *)pFd->pMapRegion; /* Pointer to current file mapping */
  4300. i64 nOrig = pFd->mmapSizeActual; /* Size of pOrig region in bytes */
  4301. u8 *pNew = 0; /* Location of new mapping */
  4302. int flags = PROT_READ; /* Flags to pass to mmap() */
  4303. assert( pFd->nFetchOut==0 );
  4304. assert( nNew>pFd->mmapSize );
  4305. assert( nNew<=pFd->mmapSizeMax );
  4306. assert( nNew>0 );
  4307. assert( pFd->mmapSizeActual>=pFd->mmapSize );
  4308. assert( MAP_FAILED!=0 );
  4309. if( (pFd->ctrlFlags & UNIXFILE_RDONLY)==0 ) flags |= PROT_WRITE;
  4310. if( pOrig ){
  4311. const int szSyspage = unixGetPagesize();
  4312. i64 nReuse = (pFd->mmapSize & ~(szSyspage-1));
  4313. u8 *pReq = &pOrig[nReuse];
  4314. /* Unmap any pages of the existing mapping that cannot be reused. */
  4315. if( nReuse!=nOrig ){
  4316. osMunmap(pReq, nOrig-nReuse);
  4317. }
  4318. #if HAVE_MREMAP
  4319. pNew = osMremap(pOrig, nReuse, nNew, MREMAP_MAYMOVE);
  4320. zErr = "mremap";
  4321. #else
  4322. pNew = osMmap(pReq, nNew-nReuse, flags, MAP_SHARED, h, nReuse);
  4323. if( pNew!=MAP_FAILED ){
  4324. if( pNew!=pReq ){
  4325. osMunmap(pNew, nNew - nReuse);
  4326. pNew = 0;
  4327. }else{
  4328. pNew = pOrig;
  4329. }
  4330. }
  4331. #endif
  4332. /* The attempt to extend the existing mapping failed. Free it. */
  4333. if( pNew==MAP_FAILED || pNew==0 ){
  4334. osMunmap(pOrig, nReuse);
  4335. }
  4336. }
  4337. /* If pNew is still NULL, try to create an entirely new mapping. */
  4338. if( pNew==0 ){
  4339. pNew = osMmap(0, nNew, flags, MAP_SHARED, h, 0);
  4340. }
  4341. if( pNew==MAP_FAILED ){
  4342. pNew = 0;
  4343. nNew = 0;
  4344. unixLogError(SQLITE_OK, zErr, pFd->zPath);
  4345. /* If the mmap() above failed, assume that all subsequent mmap() calls
  4346. ** will probably fail too. Fall back to using xRead/xWrite exclusively
  4347. ** in this case. */
  4348. pFd->mmapSizeMax = 0;
  4349. }
  4350. pFd->pMapRegion = (void *)pNew;
  4351. pFd->mmapSize = pFd->mmapSizeActual = nNew;
  4352. }
  4353. /*
  4354. ** Memory map or remap the file opened by file-descriptor pFd (if the file
  4355. ** is already mapped, the existing mapping is replaced by the new). Or, if
  4356. ** there already exists a mapping for this file, and there are still
  4357. ** outstanding xFetch() references to it, this function is a no-op.
  4358. **
  4359. ** If parameter nByte is non-negative, then it is the requested size of
  4360. ** the mapping to create. Otherwise, if nByte is less than zero, then the
  4361. ** requested size is the size of the file on disk. The actual size of the
  4362. ** created mapping is either the requested size or the value configured
  4363. ** using SQLITE_FCNTL_MMAP_LIMIT, whichever is smaller.
  4364. **
  4365. ** SQLITE_OK is returned if no error occurs (even if the mapping is not
  4366. ** recreated as a result of outstanding references) or an SQLite error
  4367. ** code otherwise.
  4368. */
  4369. static int unixMapfile(unixFile *pFd, i64 nByte){
  4370. i64 nMap = nByte;
  4371. int rc;
  4372. assert( nMap>=0 || pFd->nFetchOut==0 );
  4373. if( pFd->nFetchOut>0 ) return SQLITE_OK;
  4374. if( nMap<0 ){
  4375. struct stat statbuf; /* Low-level file information */
  4376. rc = osFstat(pFd->h, &statbuf);
  4377. if( rc!=SQLITE_OK ){
  4378. return SQLITE_IOERR_FSTAT;
  4379. }
  4380. nMap = statbuf.st_size;
  4381. }
  4382. if( nMap>pFd->mmapSizeMax ){
  4383. nMap = pFd->mmapSizeMax;
  4384. }
  4385. if( nMap!=pFd->mmapSize ){
  4386. if( nMap>0 ){
  4387. unixRemapfile(pFd, nMap);
  4388. }else{
  4389. unixUnmapfile(pFd);
  4390. }
  4391. }
  4392. return SQLITE_OK;
  4393. }
  4394. #endif /* SQLITE_MAX_MMAP_SIZE>0 */
  4395. /*
  4396. ** If possible, return a pointer to a mapping of file fd starting at offset
  4397. ** iOff. The mapping must be valid for at least nAmt bytes.
  4398. **
  4399. ** If such a pointer can be obtained, store it in *pp and return SQLITE_OK.
  4400. ** Or, if one cannot but no error occurs, set *pp to 0 and return SQLITE_OK.
  4401. ** Finally, if an error does occur, return an SQLite error code. The final
  4402. ** value of *pp is undefined in this case.
  4403. **
  4404. ** If this function does return a pointer, the caller must eventually
  4405. ** release the reference by calling unixUnfetch().
  4406. */
  4407. static int unixFetch(sqlite3_file *fd, i64 iOff, int nAmt, void **pp){
  4408. #if SQLITE_MAX_MMAP_SIZE>0
  4409. unixFile *pFd = (unixFile *)fd; /* The underlying database file */
  4410. #endif
  4411. *pp = 0;
  4412. #if SQLITE_MAX_MMAP_SIZE>0
  4413. if( pFd->mmapSizeMax>0 ){
  4414. if( pFd->pMapRegion==0 ){
  4415. int rc = unixMapfile(pFd, -1);
  4416. if( rc!=SQLITE_OK ) return rc;
  4417. }
  4418. if( pFd->mmapSize >= iOff+nAmt ){
  4419. *pp = &((u8 *)pFd->pMapRegion)[iOff];
  4420. pFd->nFetchOut++;
  4421. }
  4422. }
  4423. #endif
  4424. return SQLITE_OK;
  4425. }
  4426. /*
  4427. ** If the third argument is non-NULL, then this function releases a
  4428. ** reference obtained by an earlier call to unixFetch(). The second
  4429. ** argument passed to this function must be the same as the corresponding
  4430. ** argument that was passed to the unixFetch() invocation.
  4431. **
  4432. ** Or, if the third argument is NULL, then this function is being called
  4433. ** to inform the VFS layer that, according to POSIX, any existing mapping
  4434. ** may now be invalid and should be unmapped.
  4435. */
  4436. static int unixUnfetch(sqlite3_file *fd, i64 iOff, void *p){
  4437. unixFile *pFd = (unixFile *)fd; /* The underlying database file */
  4438. UNUSED_PARAMETER(iOff);
  4439. #if SQLITE_MAX_MMAP_SIZE>0
  4440. /* If p==0 (unmap the entire file) then there must be no outstanding
  4441. ** xFetch references. Or, if p!=0 (meaning it is an xFetch reference),
  4442. ** then there must be at least one outstanding. */
  4443. assert( (p==0)==(pFd->nFetchOut==0) );
  4444. /* If p!=0, it must match the iOff value. */
  4445. assert( p==0 || p==&((u8 *)pFd->pMapRegion)[iOff] );
  4446. if( p ){
  4447. pFd->nFetchOut--;
  4448. }else{
  4449. unixUnmapfile(pFd);
  4450. }
  4451. assert( pFd->nFetchOut>=0 );
  4452. #endif
  4453. return SQLITE_OK;
  4454. }
  4455. /*
  4456. ** Here ends the implementation of all sqlite3_file methods.
  4457. **
  4458. ********************** End sqlite3_file Methods *******************************
  4459. ******************************************************************************/
  4460. /*
  4461. ** This division contains definitions of sqlite3_io_methods objects that
  4462. ** implement various file locking strategies. It also contains definitions
  4463. ** of "finder" functions. A finder-function is used to locate the appropriate
  4464. ** sqlite3_io_methods object for a particular database file. The pAppData
  4465. ** field of the sqlite3_vfs VFS objects are initialized to be pointers to
  4466. ** the correct finder-function for that VFS.
  4467. **
  4468. ** Most finder functions return a pointer to a fixed sqlite3_io_methods
  4469. ** object. The only interesting finder-function is autolockIoFinder, which
  4470. ** looks at the filesystem type and tries to guess the best locking
  4471. ** strategy from that.
  4472. **
  4473. ** For finder-funtion F, two objects are created:
  4474. **
  4475. ** (1) The real finder-function named "FImpt()".
  4476. **
  4477. ** (2) A constant pointer to this function named just "F".
  4478. **
  4479. **
  4480. ** A pointer to the F pointer is used as the pAppData value for VFS
  4481. ** objects. We have to do this instead of letting pAppData point
  4482. ** directly at the finder-function since C90 rules prevent a void*
  4483. ** from be cast into a function pointer.
  4484. **
  4485. **
  4486. ** Each instance of this macro generates two objects:
  4487. **
  4488. ** * A constant sqlite3_io_methods object call METHOD that has locking
  4489. ** methods CLOSE, LOCK, UNLOCK, CKRESLOCK.
  4490. **
  4491. ** * An I/O method finder function called FINDER that returns a pointer
  4492. ** to the METHOD object in the previous bullet.
  4493. */
  4494. #define IOMETHODS(FINDER, METHOD, VERSION, CLOSE, LOCK, UNLOCK, CKLOCK) \
  4495. static const sqlite3_io_methods METHOD = { \
  4496. VERSION, /* iVersion */ \
  4497. CLOSE, /* xClose */ \
  4498. unixRead, /* xRead */ \
  4499. unixWrite, /* xWrite */ \
  4500. unixTruncate, /* xTruncate */ \
  4501. unixSync, /* xSync */ \
  4502. unixFileSize, /* xFileSize */ \
  4503. LOCK, /* xLock */ \
  4504. UNLOCK, /* xUnlock */ \
  4505. CKLOCK, /* xCheckReservedLock */ \
  4506. unixFileControl, /* xFileControl */ \
  4507. unixSectorSize, /* xSectorSize */ \
  4508. unixDeviceCharacteristics, /* xDeviceCapabilities */ \
  4509. unixShmMap, /* xShmMap */ \
  4510. unixShmLock, /* xShmLock */ \
  4511. unixShmBarrier, /* xShmBarrier */ \
  4512. unixShmUnmap, /* xShmUnmap */ \
  4513. unixFetch, /* xFetch */ \
  4514. unixUnfetch, /* xUnfetch */ \
  4515. }; \
  4516. static const sqlite3_io_methods *FINDER##Impl(const char *z, unixFile *p){ \
  4517. UNUSED_PARAMETER(z); UNUSED_PARAMETER(p); \
  4518. return &METHOD; \
  4519. } \
  4520. static const sqlite3_io_methods *(*const FINDER)(const char*,unixFile *p) \
  4521. = FINDER##Impl;
  4522. /*
  4523. ** Here are all of the sqlite3_io_methods objects for each of the
  4524. ** locking strategies. Functions that return pointers to these methods
  4525. ** are also created.
  4526. */
  4527. IOMETHODS(
  4528. posixIoFinder, /* Finder function name */
  4529. posixIoMethods, /* sqlite3_io_methods object name */
  4530. 3, /* shared memory and mmap are enabled */
  4531. unixClose, /* xClose method */
  4532. unixLock, /* xLock method */
  4533. unixUnlock, /* xUnlock method */
  4534. unixCheckReservedLock /* xCheckReservedLock method */
  4535. )
  4536. IOMETHODS(
  4537. nolockIoFinder, /* Finder function name */
  4538. nolockIoMethods, /* sqlite3_io_methods object name */
  4539. 1, /* shared memory is disabled */
  4540. nolockClose, /* xClose method */
  4541. nolockLock, /* xLock method */
  4542. nolockUnlock, /* xUnlock method */
  4543. nolockCheckReservedLock /* xCheckReservedLock method */
  4544. )
  4545. IOMETHODS(
  4546. dotlockIoFinder, /* Finder function name */
  4547. dotlockIoMethods, /* sqlite3_io_methods object name */
  4548. 1, /* shared memory is disabled */
  4549. dotlockClose, /* xClose method */
  4550. dotlockLock, /* xLock method */
  4551. dotlockUnlock, /* xUnlock method */
  4552. dotlockCheckReservedLock /* xCheckReservedLock method */
  4553. )
  4554. #if SQLITE_ENABLE_LOCKING_STYLE && !OS_VXWORKS
  4555. IOMETHODS(
  4556. flockIoFinder, /* Finder function name */
  4557. flockIoMethods, /* sqlite3_io_methods object name */
  4558. 1, /* shared memory is disabled */
  4559. flockClose, /* xClose method */
  4560. flockLock, /* xLock method */
  4561. flockUnlock, /* xUnlock method */
  4562. flockCheckReservedLock /* xCheckReservedLock method */
  4563. )
  4564. #endif
  4565. #if OS_VXWORKS
  4566. IOMETHODS(
  4567. semIoFinder, /* Finder function name */
  4568. semIoMethods, /* sqlite3_io_methods object name */
  4569. 1, /* shared memory is disabled */
  4570. semClose, /* xClose method */
  4571. semLock, /* xLock method */
  4572. semUnlock, /* xUnlock method */
  4573. semCheckReservedLock /* xCheckReservedLock method */
  4574. )
  4575. #endif
  4576. #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE
  4577. IOMETHODS(
  4578. afpIoFinder, /* Finder function name */
  4579. afpIoMethods, /* sqlite3_io_methods object name */
  4580. 1, /* shared memory is disabled */
  4581. afpClose, /* xClose method */
  4582. afpLock, /* xLock method */
  4583. afpUnlock, /* xUnlock method */
  4584. afpCheckReservedLock /* xCheckReservedLock method */
  4585. )
  4586. #endif
  4587. /*
  4588. ** The proxy locking method is a "super-method" in the sense that it
  4589. ** opens secondary file descriptors for the conch and lock files and
  4590. ** it uses proxy, dot-file, AFP, and flock() locking methods on those
  4591. ** secondary files. For this reason, the division that implements
  4592. ** proxy locking is located much further down in the file. But we need
  4593. ** to go ahead and define the sqlite3_io_methods and finder function
  4594. ** for proxy locking here. So we forward declare the I/O methods.
  4595. */
  4596. #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE
  4597. static int proxyClose(sqlite3_file*);
  4598. static int proxyLock(sqlite3_file*, int);
  4599. static int proxyUnlock(sqlite3_file*, int);
  4600. static int proxyCheckReservedLock(sqlite3_file*, int*);
  4601. IOMETHODS(
  4602. proxyIoFinder, /* Finder function name */
  4603. proxyIoMethods, /* sqlite3_io_methods object name */
  4604. 1, /* shared memory is disabled */
  4605. proxyClose, /* xClose method */
  4606. proxyLock, /* xLock method */
  4607. proxyUnlock, /* xUnlock method */
  4608. proxyCheckReservedLock /* xCheckReservedLock method */
  4609. )
  4610. #endif
  4611. /* nfs lockd on OSX 10.3+ doesn't clear write locks when a read lock is set */
  4612. #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE
  4613. IOMETHODS(
  4614. nfsIoFinder, /* Finder function name */
  4615. nfsIoMethods, /* sqlite3_io_methods object name */
  4616. 1, /* shared memory is disabled */
  4617. unixClose, /* xClose method */
  4618. unixLock, /* xLock method */
  4619. nfsUnlock, /* xUnlock method */
  4620. unixCheckReservedLock /* xCheckReservedLock method */
  4621. )
  4622. #endif
  4623. #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE
  4624. /*
  4625. ** This "finder" function attempts to determine the best locking strategy
  4626. ** for the database file "filePath". It then returns the sqlite3_io_methods
  4627. ** object that implements that strategy.
  4628. **
  4629. ** This is for MacOSX only.
  4630. */
  4631. static const sqlite3_io_methods *autolockIoFinderImpl(
  4632. const char *filePath, /* name of the database file */
  4633. unixFile *pNew /* open file object for the database file */
  4634. ){
  4635. static const struct Mapping {
  4636. const char *zFilesystem; /* Filesystem type name */
  4637. const sqlite3_io_methods *pMethods; /* Appropriate locking method */
  4638. } aMap[] = {
  4639. { "hfs", &posixIoMethods },
  4640. { "ufs", &posixIoMethods },
  4641. { "afpfs", &afpIoMethods },
  4642. { "smbfs", &afpIoMethods },
  4643. { "webdav", &nolockIoMethods },
  4644. { 0, 0 }
  4645. };
  4646. int i;
  4647. struct statfs fsInfo;
  4648. struct flock lockInfo;
  4649. if( !filePath ){
  4650. /* If filePath==NULL that means we are dealing with a transient file
  4651. ** that does not need to be locked. */
  4652. return &nolockIoMethods;
  4653. }
  4654. if( statfs(filePath, &fsInfo) != -1 ){
  4655. if( fsInfo.f_flags & MNT_RDONLY ){
  4656. return &nolockIoMethods;
  4657. }
  4658. for(i=0; aMap[i].zFilesystem; i++){
  4659. if( strcmp(fsInfo.f_fstypename, aMap[i].zFilesystem)==0 ){
  4660. return aMap[i].pMethods;
  4661. }
  4662. }
  4663. }
  4664. /* Default case. Handles, amongst others, "nfs".
  4665. ** Test byte-range lock using fcntl(). If the call succeeds,
  4666. ** assume that the file-system supports POSIX style locks.
  4667. */
  4668. lockInfo.l_len = 1;
  4669. lockInfo.l_start = 0;
  4670. lockInfo.l_whence = SEEK_SET;
  4671. lockInfo.l_type = F_RDLCK;
  4672. if( osFcntl(pNew->h, F_GETLK, &lockInfo)!=-1 ) {
  4673. if( strcmp(fsInfo.f_fstypename, "nfs")==0 ){
  4674. return &nfsIoMethods;
  4675. } else {
  4676. return &posixIoMethods;
  4677. }
  4678. }else{
  4679. return &dotlockIoMethods;
  4680. }
  4681. }
  4682. static const sqlite3_io_methods
  4683. *(*const autolockIoFinder)(const char*,unixFile*) = autolockIoFinderImpl;
  4684. #endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */
  4685. #if OS_VXWORKS && SQLITE_ENABLE_LOCKING_STYLE
  4686. /*
  4687. ** This "finder" function attempts to determine the best locking strategy
  4688. ** for the database file "filePath". It then returns the sqlite3_io_methods
  4689. ** object that implements that strategy.
  4690. **
  4691. ** This is for VXWorks only.
  4692. */
  4693. static const sqlite3_io_methods *autolockIoFinderImpl(
  4694. const char *filePath, /* name of the database file */
  4695. unixFile *pNew /* the open file object */
  4696. ){
  4697. struct flock lockInfo;
  4698. if( !filePath ){
  4699. /* If filePath==NULL that means we are dealing with a transient file
  4700. ** that does not need to be locked. */
  4701. return &nolockIoMethods;
  4702. }
  4703. /* Test if fcntl() is supported and use POSIX style locks.
  4704. ** Otherwise fall back to the named semaphore method.
  4705. */
  4706. lockInfo.l_len = 1;
  4707. lockInfo.l_start = 0;
  4708. lockInfo.l_whence = SEEK_SET;
  4709. lockInfo.l_type = F_RDLCK;
  4710. if( osFcntl(pNew->h, F_GETLK, &lockInfo)!=-1 ) {
  4711. return &posixIoMethods;
  4712. }else{
  4713. return &semIoMethods;
  4714. }
  4715. }
  4716. static const sqlite3_io_methods
  4717. *(*const autolockIoFinder)(const char*,unixFile*) = autolockIoFinderImpl;
  4718. #endif /* OS_VXWORKS && SQLITE_ENABLE_LOCKING_STYLE */
  4719. /*
  4720. ** An abstract type for a pointer to a IO method finder function:
  4721. */
  4722. typedef const sqlite3_io_methods *(*finder_type)(const char*,unixFile*);
  4723. /****************************************************************************
  4724. **************************** sqlite3_vfs methods ****************************
  4725. **
  4726. ** This division contains the implementation of methods on the
  4727. ** sqlite3_vfs object.
  4728. */
  4729. /*
  4730. ** Initialize the contents of the unixFile structure pointed to by pId.
  4731. */
  4732. static int fillInUnixFile(
  4733. sqlite3_vfs *pVfs, /* Pointer to vfs object */
  4734. int h, /* Open file descriptor of file being opened */
  4735. sqlite3_file *pId, /* Write to the unixFile structure here */
  4736. const char *zFilename, /* Name of the file being opened */
  4737. int ctrlFlags /* Zero or more UNIXFILE_* values */
  4738. ){
  4739. const sqlite3_io_methods *pLockingStyle;
  4740. unixFile *pNew = (unixFile *)pId;
  4741. int rc = SQLITE_OK;
  4742. assert( pNew->pInode==NULL );
  4743. /* Usually the path zFilename should not be a relative pathname. The
  4744. ** exception is when opening the proxy "conch" file in builds that
  4745. ** include the special Apple locking styles.
  4746. */
  4747. #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE
  4748. assert( zFilename==0 || zFilename[0]=='/'
  4749. || pVfs->pAppData==(void*)&autolockIoFinder );
  4750. #else
  4751. assert( zFilename==0 || zFilename[0]=='/' );
  4752. #endif
  4753. /* No locking occurs in temporary files */
  4754. assert( zFilename!=0 || (ctrlFlags & UNIXFILE_NOLOCK)!=0 );
  4755. OSTRACE(("OPEN %-3d %s\n", h, zFilename));
  4756. pNew->h = h;
  4757. pNew->pVfs = pVfs;
  4758. pNew->zPath = zFilename;
  4759. pNew->ctrlFlags = (u8)ctrlFlags;
  4760. #if SQLITE_MAX_MMAP_SIZE>0
  4761. pNew->mmapSizeMax = sqlite3GlobalConfig.szMmap;
  4762. #endif
  4763. if( sqlite3_uri_boolean(((ctrlFlags & UNIXFILE_URI) ? zFilename : 0),
  4764. "psow", SQLITE_POWERSAFE_OVERWRITE) ){
  4765. pNew->ctrlFlags |= UNIXFILE_PSOW;
  4766. }
  4767. if( strcmp(pVfs->zName,"unix-excl")==0 ){
  4768. pNew->ctrlFlags |= UNIXFILE_EXCL;
  4769. }
  4770. #if OS_VXWORKS
  4771. pNew->pId = vxworksFindFileId(zFilename);
  4772. if( pNew->pId==0 ){
  4773. ctrlFlags |= UNIXFILE_NOLOCK;
  4774. rc = SQLITE_NOMEM;
  4775. }
  4776. #endif
  4777. if( ctrlFlags & UNIXFILE_NOLOCK ){
  4778. pLockingStyle = &nolockIoMethods;
  4779. }else{
  4780. pLockingStyle = (**(finder_type*)pVfs->pAppData)(zFilename, pNew);
  4781. #if SQLITE_ENABLE_LOCKING_STYLE
  4782. /* Cache zFilename in the locking context (AFP and dotlock override) for
  4783. ** proxyLock activation is possible (remote proxy is based on db name)
  4784. ** zFilename remains valid until file is closed, to support */
  4785. pNew->lockingContext = (void*)zFilename;
  4786. #endif
  4787. }
  4788. if( pLockingStyle == &posixIoMethods
  4789. #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE
  4790. || pLockingStyle == &nfsIoMethods
  4791. #endif
  4792. ){
  4793. unixEnterMutex();
  4794. rc = findInodeInfo(pNew, &pNew->pInode);
  4795. if( rc!=SQLITE_OK ){
  4796. /* If an error occurred in findInodeInfo(), close the file descriptor
  4797. ** immediately, before releasing the mutex. findInodeInfo() may fail
  4798. ** in two scenarios:
  4799. **
  4800. ** (a) A call to fstat() failed.
  4801. ** (b) A malloc failed.
  4802. **
  4803. ** Scenario (b) may only occur if the process is holding no other
  4804. ** file descriptors open on the same file. If there were other file
  4805. ** descriptors on this file, then no malloc would be required by
  4806. ** findInodeInfo(). If this is the case, it is quite safe to close
  4807. ** handle h - as it is guaranteed that no posix locks will be released
  4808. ** by doing so.
  4809. **
  4810. ** If scenario (a) caused the error then things are not so safe. The
  4811. ** implicit assumption here is that if fstat() fails, things are in
  4812. ** such bad shape that dropping a lock or two doesn't matter much.
  4813. */
  4814. robust_close(pNew, h, __LINE__);
  4815. h = -1;
  4816. }
  4817. unixLeaveMutex();
  4818. }
  4819. #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__)
  4820. else if( pLockingStyle == &afpIoMethods ){
  4821. /* AFP locking uses the file path so it needs to be included in
  4822. ** the afpLockingContext.
  4823. */
  4824. afpLockingContext *pCtx;
  4825. pNew->lockingContext = pCtx = sqlite3_malloc( sizeof(*pCtx) );
  4826. if( pCtx==0 ){
  4827. rc = SQLITE_NOMEM;
  4828. }else{
  4829. /* NB: zFilename exists and remains valid until the file is closed
  4830. ** according to requirement F11141. So we do not need to make a
  4831. ** copy of the filename. */
  4832. pCtx->dbPath = zFilename;
  4833. pCtx->reserved = 0;
  4834. srandomdev();
  4835. unixEnterMutex();
  4836. rc = findInodeInfo(pNew, &pNew->pInode);
  4837. if( rc!=SQLITE_OK ){
  4838. sqlite3_free(pNew->lockingContext);
  4839. robust_close(pNew, h, __LINE__);
  4840. h = -1;
  4841. }
  4842. unixLeaveMutex();
  4843. }
  4844. }
  4845. #endif
  4846. else if( pLockingStyle == &dotlockIoMethods ){
  4847. /* Dotfile locking uses the file path so it needs to be included in
  4848. ** the dotlockLockingContext
  4849. */
  4850. char *zLockFile;
  4851. int nFilename;
  4852. assert( zFilename!=0 );
  4853. nFilename = (int)strlen(zFilename) + 6;
  4854. zLockFile = (char *)sqlite3_malloc(nFilename);
  4855. if( zLockFile==0 ){
  4856. rc = SQLITE_NOMEM;
  4857. }else{
  4858. sqlite3_snprintf(nFilename, zLockFile, "%s" DOTLOCK_SUFFIX, zFilename);
  4859. }
  4860. pNew->lockingContext = zLockFile;
  4861. }
  4862. #if OS_VXWORKS
  4863. else if( pLockingStyle == &semIoMethods ){
  4864. /* Named semaphore locking uses the file path so it needs to be
  4865. ** included in the semLockingContext
  4866. */
  4867. unixEnterMutex();
  4868. rc = findInodeInfo(pNew, &pNew->pInode);
  4869. if( (rc==SQLITE_OK) && (pNew->pInode->pSem==NULL) ){
  4870. char *zSemName = pNew->pInode->aSemName;
  4871. int n;
  4872. sqlite3_snprintf(MAX_PATHNAME, zSemName, "/%s.sem",
  4873. pNew->pId->zCanonicalName);
  4874. for( n=1; zSemName[n]; n++ )
  4875. if( zSemName[n]=='/' ) zSemName[n] = '_';
  4876. pNew->pInode->pSem = sem_open(zSemName, O_CREAT, 0666, 1);
  4877. if( pNew->pInode->pSem == SEM_FAILED ){
  4878. rc = SQLITE_NOMEM;
  4879. pNew->pInode->aSemName[0] = '\0';
  4880. }
  4881. }
  4882. unixLeaveMutex();
  4883. }
  4884. #endif
  4885. pNew->lastErrno = 0;
  4886. #if OS_VXWORKS
  4887. if( rc!=SQLITE_OK ){
  4888. if( h>=0 ) robust_close(pNew, h, __LINE__);
  4889. h = -1;
  4890. osUnlink(zFilename);
  4891. pNew->ctrlFlags |= UNIXFILE_DELETE;
  4892. }
  4893. #endif
  4894. if( rc!=SQLITE_OK ){
  4895. if( h>=0 ) robust_close(pNew, h, __LINE__);
  4896. }else{
  4897. pNew->pMethod = pLockingStyle;
  4898. OpenCounter(+1);
  4899. verifyDbFile(pNew);
  4900. }
  4901. return rc;
  4902. }
  4903. /*
  4904. ** Return the name of a directory in which to put temporary files.
  4905. ** If no suitable temporary file directory can be found, return NULL.
  4906. */
  4907. static const char *unixTempFileDir(void){
  4908. static const char *azDirs[] = {
  4909. 0,
  4910. 0,
  4911. 0,
  4912. "/var/tmp",
  4913. "/usr/tmp",
  4914. "/tmp",
  4915. 0 /* List terminator */
  4916. };
  4917. unsigned int i;
  4918. struct stat buf;
  4919. const char *zDir = 0;
  4920. azDirs[0] = sqlite3_temp_directory;
  4921. if( !azDirs[1] ) azDirs[1] = getenv("SQLITE_TMPDIR");
  4922. if( !azDirs[2] ) azDirs[2] = getenv("TMPDIR");
  4923. for(i=0; i<sizeof(azDirs)/sizeof(azDirs[0]); zDir=azDirs[i++]){
  4924. if( zDir==0 ) continue;
  4925. if( osStat(zDir, &buf) ) continue;
  4926. if( !S_ISDIR(buf.st_mode) ) continue;
  4927. if( osAccess(zDir, 07) ) continue;
  4928. break;
  4929. }
  4930. return zDir;
  4931. }
  4932. /*
  4933. ** Create a temporary file name in zBuf. zBuf must be allocated
  4934. ** by the calling process and must be big enough to hold at least
  4935. ** pVfs->mxPathname bytes.
  4936. */
  4937. static int unixGetTempname(int nBuf, char *zBuf){
  4938. static const unsigned char zChars[] =
  4939. "abcdefghijklmnopqrstuvwxyz"
  4940. "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
  4941. "0123456789";
  4942. unsigned int i, j;
  4943. const char *zDir;
  4944. /* It's odd to simulate an io-error here, but really this is just
  4945. ** using the io-error infrastructure to test that SQLite handles this
  4946. ** function failing.
  4947. */
  4948. SimulateIOError( return SQLITE_IOERR );
  4949. zDir = unixTempFileDir();
  4950. if( zDir==0 ) zDir = ".";
  4951. /* Check that the output buffer is large enough for the temporary file
  4952. ** name. If it is not, return SQLITE_ERROR.
  4953. */
  4954. if( (strlen(zDir) + strlen(SQLITE_TEMP_FILE_PREFIX) + 18) >= (size_t)nBuf ){
  4955. return SQLITE_ERROR;
  4956. }
  4957. do{
  4958. sqlite3_snprintf(nBuf-18, zBuf, "%s/"SQLITE_TEMP_FILE_PREFIX, zDir);
  4959. j = (int)strlen(zBuf);
  4960. sqlite3_randomness(15, &zBuf[j]);
  4961. for(i=0; i<15; i++, j++){
  4962. zBuf[j] = (char)zChars[ ((unsigned char)zBuf[j])%(sizeof(zChars)-1) ];
  4963. }
  4964. zBuf[j] = 0;
  4965. zBuf[j+1] = 0;
  4966. }while( osAccess(zBuf,0)==0 );
  4967. return SQLITE_OK;
  4968. }
  4969. #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__)
  4970. /*
  4971. ** Routine to transform a unixFile into a proxy-locking unixFile.
  4972. ** Implementation in the proxy-lock division, but used by unixOpen()
  4973. ** if SQLITE_PREFER_PROXY_LOCKING is defined.
  4974. */
  4975. static int proxyTransformUnixFile(unixFile*, const char*);
  4976. #endif
  4977. /*
  4978. ** Search for an unused file descriptor that was opened on the database
  4979. ** file (not a journal or master-journal file) identified by pathname
  4980. ** zPath with SQLITE_OPEN_XXX flags matching those passed as the second
  4981. ** argument to this function.
  4982. **
  4983. ** Such a file descriptor may exist if a database connection was closed
  4984. ** but the associated file descriptor could not be closed because some
  4985. ** other file descriptor open on the same file is holding a file-lock.
  4986. ** Refer to comments in the unixClose() function and the lengthy comment
  4987. ** describing "Posix Advisory Locking" at the start of this file for
  4988. ** further details. Also, ticket #4018.
  4989. **
  4990. ** If a suitable file descriptor is found, then it is returned. If no
  4991. ** such file descriptor is located, -1 is returned.
  4992. */
  4993. static UnixUnusedFd *findReusableFd(const char *zPath, int flags){
  4994. UnixUnusedFd *pUnused = 0;
  4995. /* Do not search for an unused file descriptor on vxworks. Not because
  4996. ** vxworks would not benefit from the change (it might, we're not sure),
  4997. ** but because no way to test it is currently available. It is better
  4998. ** not to risk breaking vxworks support for the sake of such an obscure
  4999. ** feature. */
  5000. #if !OS_VXWORKS
  5001. struct stat sStat; /* Results of stat() call */
  5002. /* A stat() call may fail for various reasons. If this happens, it is
  5003. ** almost certain that an open() call on the same path will also fail.
  5004. ** For this reason, if an error occurs in the stat() call here, it is
  5005. ** ignored and -1 is returned. The caller will try to open a new file
  5006. ** descriptor on the same path, fail, and return an error to SQLite.
  5007. **
  5008. ** Even if a subsequent open() call does succeed, the consequences of
  5009. ** not searching for a resusable file descriptor are not dire. */
  5010. if( 0==osStat(zPath, &sStat) ){
  5011. unixInodeInfo *pInode;
  5012. unixEnterMutex();
  5013. pInode = inodeList;
  5014. while( pInode && (pInode->fileId.dev!=sStat.st_dev
  5015. || pInode->fileId.ino!=sStat.st_ino) ){
  5016. pInode = pInode->pNext;
  5017. }
  5018. if( pInode ){
  5019. UnixUnusedFd **pp;
  5020. for(pp=&pInode->pUnused; *pp && (*pp)->flags!=flags; pp=&((*pp)->pNext));
  5021. pUnused = *pp;
  5022. if( pUnused ){
  5023. *pp = pUnused->pNext;
  5024. }
  5025. }
  5026. unixLeaveMutex();
  5027. }
  5028. #endif /* if !OS_VXWORKS */
  5029. return pUnused;
  5030. }
  5031. /*
  5032. ** This function is called by unixOpen() to determine the unix permissions
  5033. ** to create new files with. If no error occurs, then SQLITE_OK is returned
  5034. ** and a value suitable for passing as the third argument to open(2) is
  5035. ** written to *pMode. If an IO error occurs, an SQLite error code is
  5036. ** returned and the value of *pMode is not modified.
  5037. **
  5038. ** In most cases cases, this routine sets *pMode to 0, which will become
  5039. ** an indication to robust_open() to create the file using
  5040. ** SQLITE_DEFAULT_FILE_PERMISSIONS adjusted by the umask.
  5041. ** But if the file being opened is a WAL or regular journal file, then
  5042. ** this function queries the file-system for the permissions on the
  5043. ** corresponding database file and sets *pMode to this value. Whenever
  5044. ** possible, WAL and journal files are created using the same permissions
  5045. ** as the associated database file.
  5046. **
  5047. ** If the SQLITE_ENABLE_8_3_NAMES option is enabled, then the
  5048. ** original filename is unavailable. But 8_3_NAMES is only used for
  5049. ** FAT filesystems and permissions do not matter there, so just use
  5050. ** the default permissions.
  5051. */
  5052. static int findCreateFileMode(
  5053. const char *zPath, /* Path of file (possibly) being created */
  5054. int flags, /* Flags passed as 4th argument to xOpen() */
  5055. mode_t *pMode, /* OUT: Permissions to open file with */
  5056. uid_t *pUid, /* OUT: uid to set on the file */
  5057. gid_t *pGid /* OUT: gid to set on the file */
  5058. ){
  5059. int rc = SQLITE_OK; /* Return Code */
  5060. *pMode = 0;
  5061. *pUid = 0;
  5062. *pGid = 0;
  5063. if( flags & (SQLITE_OPEN_WAL|SQLITE_OPEN_MAIN_JOURNAL) ){
  5064. char zDb[MAX_PATHNAME+1]; /* Database file path */
  5065. int nDb; /* Number of valid bytes in zDb */
  5066. struct stat sStat; /* Output of stat() on database file */
  5067. /* zPath is a path to a WAL or journal file. The following block derives
  5068. ** the path to the associated database file from zPath. This block handles
  5069. ** the following naming conventions:
  5070. **
  5071. ** "<path to db>-journal"
  5072. ** "<path to db>-wal"
  5073. ** "<path to db>-journalNN"
  5074. ** "<path to db>-walNN"
  5075. **
  5076. ** where NN is a decimal number. The NN naming schemes are
  5077. ** used by the test_multiplex.c module.
  5078. */
  5079. nDb = sqlite3Strlen30(zPath) - 1;
  5080. #ifdef SQLITE_ENABLE_8_3_NAMES
  5081. while( nDb>0 && sqlite3Isalnum(zPath[nDb]) ) nDb--;
  5082. if( nDb==0 || zPath[nDb]!='-' ) return SQLITE_OK;
  5083. #else
  5084. while( zPath[nDb]!='-' ){
  5085. assert( nDb>0 );
  5086. assert( zPath[nDb]!='\n' );
  5087. nDb--;
  5088. }
  5089. #endif
  5090. memcpy(zDb, zPath, nDb);
  5091. zDb[nDb] = '\0';
  5092. if( 0==osStat(zDb, &sStat) ){
  5093. *pMode = sStat.st_mode & 0777;
  5094. *pUid = sStat.st_uid;
  5095. *pGid = sStat.st_gid;
  5096. }else{
  5097. rc = SQLITE_IOERR_FSTAT;
  5098. }
  5099. }else if( flags & SQLITE_OPEN_DELETEONCLOSE ){
  5100. *pMode = 0600;
  5101. }
  5102. return rc;
  5103. }
  5104. /*
  5105. ** Open the file zPath.
  5106. **
  5107. ** Previously, the SQLite OS layer used three functions in place of this
  5108. ** one:
  5109. **
  5110. ** sqlite3OsOpenReadWrite();
  5111. ** sqlite3OsOpenReadOnly();
  5112. ** sqlite3OsOpenExclusive();
  5113. **
  5114. ** These calls correspond to the following combinations of flags:
  5115. **
  5116. ** ReadWrite() -> (READWRITE | CREATE)
  5117. ** ReadOnly() -> (READONLY)
  5118. ** OpenExclusive() -> (READWRITE | CREATE | EXCLUSIVE)
  5119. **
  5120. ** The old OpenExclusive() accepted a boolean argument - "delFlag". If
  5121. ** true, the file was configured to be automatically deleted when the
  5122. ** file handle closed. To achieve the same effect using this new
  5123. ** interface, add the DELETEONCLOSE flag to those specified above for
  5124. ** OpenExclusive().
  5125. */
  5126. static int unixOpen(
  5127. sqlite3_vfs *pVfs, /* The VFS for which this is the xOpen method */
  5128. const char *zPath, /* Pathname of file to be opened */
  5129. sqlite3_file *pFile, /* The file descriptor to be filled in */
  5130. int flags, /* Input flags to control the opening */
  5131. int *pOutFlags /* Output flags returned to SQLite core */
  5132. ){
  5133. unixFile *p = (unixFile *)pFile;
  5134. int fd = -1; /* File descriptor returned by open() */
  5135. int openFlags = 0; /* Flags to pass to open() */
  5136. int eType = flags&0xFFFFFF00; /* Type of file to open */
  5137. int noLock; /* True to omit locking primitives */
  5138. int rc = SQLITE_OK; /* Function Return Code */
  5139. int ctrlFlags = 0; /* UNIXFILE_* flags */
  5140. int isExclusive = (flags & SQLITE_OPEN_EXCLUSIVE);
  5141. int isDelete = (flags & SQLITE_OPEN_DELETEONCLOSE);
  5142. int isCreate = (flags & SQLITE_OPEN_CREATE);
  5143. int isReadonly = (flags & SQLITE_OPEN_READONLY);
  5144. int isReadWrite = (flags & SQLITE_OPEN_READWRITE);
  5145. #if SQLITE_ENABLE_LOCKING_STYLE
  5146. int isAutoProxy = (flags & SQLITE_OPEN_AUTOPROXY);
  5147. #endif
  5148. #if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE
  5149. struct statfs fsInfo;
  5150. #endif
  5151. /* If creating a master or main-file journal, this function will open
  5152. ** a file-descriptor on the directory too. The first time unixSync()
  5153. ** is called the directory file descriptor will be fsync()ed and close()d.
  5154. */
  5155. int syncDir = (isCreate && (
  5156. eType==SQLITE_OPEN_MASTER_JOURNAL
  5157. || eType==SQLITE_OPEN_MAIN_JOURNAL
  5158. || eType==SQLITE_OPEN_WAL
  5159. ));
  5160. /* If argument zPath is a NULL pointer, this function is required to open
  5161. ** a temporary file. Use this buffer to store the file name in.
  5162. */
  5163. char zTmpname[MAX_PATHNAME+2];
  5164. const char *zName = zPath;
  5165. /* Check the following statements are true:
  5166. **
  5167. ** (a) Exactly one of the READWRITE and READONLY flags must be set, and
  5168. ** (b) if CREATE is set, then READWRITE must also be set, and
  5169. ** (c) if EXCLUSIVE is set, then CREATE must also be set.
  5170. ** (d) if DELETEONCLOSE is set, then CREATE must also be set.
  5171. */
  5172. assert((isReadonly==0 || isReadWrite==0) && (isReadWrite || isReadonly));
  5173. assert(isCreate==0 || isReadWrite);
  5174. assert(isExclusive==0 || isCreate);
  5175. assert(isDelete==0 || isCreate);
  5176. /* The main DB, main journal, WAL file and master journal are never
  5177. ** automatically deleted. Nor are they ever temporary files. */
  5178. assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MAIN_DB );
  5179. assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MAIN_JOURNAL );
  5180. assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MASTER_JOURNAL );
  5181. assert( (!isDelete && zName) || eType!=SQLITE_OPEN_WAL );
  5182. /* Assert that the upper layer has set one of the "file-type" flags. */
  5183. assert( eType==SQLITE_OPEN_MAIN_DB || eType==SQLITE_OPEN_TEMP_DB
  5184. || eType==SQLITE_OPEN_MAIN_JOURNAL || eType==SQLITE_OPEN_TEMP_JOURNAL
  5185. || eType==SQLITE_OPEN_SUBJOURNAL || eType==SQLITE_OPEN_MASTER_JOURNAL
  5186. || eType==SQLITE_OPEN_TRANSIENT_DB || eType==SQLITE_OPEN_WAL
  5187. );
  5188. memset(p, 0, sizeof(unixFile));
  5189. if( eType==SQLITE_OPEN_MAIN_DB ){
  5190. UnixUnusedFd *pUnused;
  5191. pUnused = findReusableFd(zName, flags);
  5192. if( pUnused ){
  5193. fd = pUnused->fd;
  5194. }else{
  5195. pUnused = sqlite3_malloc(sizeof(*pUnused));
  5196. if( !pUnused ){
  5197. return SQLITE_NOMEM;
  5198. }
  5199. }
  5200. p->pUnused = pUnused;
  5201. /* Database filenames are double-zero terminated if they are not
  5202. ** URIs with parameters. Hence, they can always be passed into
  5203. ** sqlite3_uri_parameter(). */
  5204. assert( (flags & SQLITE_OPEN_URI) || zName[strlen(zName)+1]==0 );
  5205. }else if( !zName ){
  5206. /* If zName is NULL, the upper layer is requesting a temp file. */
  5207. assert(isDelete && !syncDir);
  5208. rc = unixGetTempname(MAX_PATHNAME+2, zTmpname);
  5209. if( rc!=SQLITE_OK ){
  5210. return rc;
  5211. }
  5212. zName = zTmpname;
  5213. /* Generated temporary filenames are always double-zero terminated
  5214. ** for use by sqlite3_uri_parameter(). */
  5215. assert( zName[strlen(zName)+1]==0 );
  5216. }
  5217. /* Determine the value of the flags parameter passed to POSIX function
  5218. ** open(). These must be calculated even if open() is not called, as
  5219. ** they may be stored as part of the file handle and used by the
  5220. ** 'conch file' locking functions later on. */
  5221. if( isReadonly ) openFlags |= O_RDONLY;
  5222. if( isReadWrite ) openFlags |= O_RDWR;
  5223. if( isCreate ) openFlags |= O_CREAT;
  5224. if( isExclusive ) openFlags |= (O_EXCL|O_NOFOLLOW);
  5225. openFlags |= (O_LARGEFILE|O_BINARY);
  5226. if( fd<0 ){
  5227. mode_t openMode; /* Permissions to create file with */
  5228. uid_t uid; /* Userid for the file */
  5229. gid_t gid; /* Groupid for the file */
  5230. rc = findCreateFileMode(zName, flags, &openMode, &uid, &gid);
  5231. if( rc!=SQLITE_OK ){
  5232. assert( !p->pUnused );
  5233. assert( eType==SQLITE_OPEN_WAL || eType==SQLITE_OPEN_MAIN_JOURNAL );
  5234. return rc;
  5235. }
  5236. fd = robust_open(zName, openFlags, openMode);
  5237. OSTRACE(("OPENX %-3d %s 0%o\n", fd, zName, openFlags));
  5238. if( fd<0 && errno!=EISDIR && isReadWrite && !isExclusive ){
  5239. /* Failed to open the file for read/write access. Try read-only. */
  5240. flags &= ~(SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE);
  5241. openFlags &= ~(O_RDWR|O_CREAT);
  5242. flags |= SQLITE_OPEN_READONLY;
  5243. openFlags |= O_RDONLY;
  5244. isReadonly = 1;
  5245. fd = robust_open(zName, openFlags, openMode);
  5246. }
  5247. if( fd<0 ){
  5248. rc = unixLogError(SQLITE_CANTOPEN_BKPT, "open", zName);
  5249. goto open_finished;
  5250. }
  5251. /* If this process is running as root and if creating a new rollback
  5252. ** journal or WAL file, set the ownership of the journal or WAL to be
  5253. ** the same as the original database.
  5254. */
  5255. if( flags & (SQLITE_OPEN_WAL|SQLITE_OPEN_MAIN_JOURNAL) ){
  5256. osFchown(fd, uid, gid);
  5257. }
  5258. }
  5259. assert( fd>=0 );
  5260. if( pOutFlags ){
  5261. *pOutFlags = flags;
  5262. }
  5263. if( p->pUnused ){
  5264. p->pUnused->fd = fd;
  5265. p->pUnused->flags = flags;
  5266. }
  5267. if( isDelete ){
  5268. #if OS_VXWORKS
  5269. zPath = zName;
  5270. #else
  5271. osUnlink(zName);
  5272. #endif
  5273. }
  5274. #if SQLITE_ENABLE_LOCKING_STYLE
  5275. else{
  5276. p->openFlags = openFlags;
  5277. }
  5278. #endif
  5279. noLock = eType!=SQLITE_OPEN_MAIN_DB;
  5280. #if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE
  5281. if( fstatfs(fd, &fsInfo) == -1 ){
  5282. ((unixFile*)pFile)->lastErrno = errno;
  5283. robust_close(p, fd, __LINE__);
  5284. return SQLITE_IOERR_ACCESS;
  5285. }
  5286. if (0 == strncmp("msdos", fsInfo.f_fstypename, 5)) {
  5287. ((unixFile*)pFile)->fsFlags |= SQLITE_FSFLAGS_IS_MSDOS;
  5288. }
  5289. #endif
  5290. /* Set up appropriate ctrlFlags */
  5291. if( isDelete ) ctrlFlags |= UNIXFILE_DELETE;
  5292. if( isReadonly ) ctrlFlags |= UNIXFILE_RDONLY;
  5293. if( noLock ) ctrlFlags |= UNIXFILE_NOLOCK;
  5294. if( syncDir ) ctrlFlags |= UNIXFILE_DIRSYNC;
  5295. if( flags & SQLITE_OPEN_URI ) ctrlFlags |= UNIXFILE_URI;
  5296. #if SQLITE_ENABLE_LOCKING_STYLE
  5297. #if SQLITE_PREFER_PROXY_LOCKING
  5298. isAutoProxy = 1;
  5299. #endif
  5300. if( isAutoProxy && (zPath!=NULL) && (!noLock) && pVfs->xOpen ){
  5301. char *envforce = getenv("SQLITE_FORCE_PROXY_LOCKING");
  5302. int useProxy = 0;
  5303. /* SQLITE_FORCE_PROXY_LOCKING==1 means force always use proxy, 0 means
  5304. ** never use proxy, NULL means use proxy for non-local files only. */
  5305. if( envforce!=NULL ){
  5306. useProxy = atoi(envforce)>0;
  5307. }else{
  5308. if( statfs(zPath, &fsInfo) == -1 ){
  5309. /* In theory, the close(fd) call is sub-optimal. If the file opened
  5310. ** with fd is a database file, and there are other connections open
  5311. ** on that file that are currently holding advisory locks on it,
  5312. ** then the call to close() will cancel those locks. In practice,
  5313. ** we're assuming that statfs() doesn't fail very often. At least
  5314. ** not while other file descriptors opened by the same process on
  5315. ** the same file are working. */
  5316. p->lastErrno = errno;
  5317. robust_close(p, fd, __LINE__);
  5318. rc = SQLITE_IOERR_ACCESS;
  5319. goto open_finished;
  5320. }
  5321. useProxy = !(fsInfo.f_flags&MNT_LOCAL);
  5322. }
  5323. if( useProxy ){
  5324. rc = fillInUnixFile(pVfs, fd, pFile, zPath, ctrlFlags);
  5325. if( rc==SQLITE_OK ){
  5326. rc = proxyTransformUnixFile((unixFile*)pFile, ":auto:");
  5327. if( rc!=SQLITE_OK ){
  5328. /* Use unixClose to clean up the resources added in fillInUnixFile
  5329. ** and clear all the structure's references. Specifically,
  5330. ** pFile->pMethods will be NULL so sqlite3OsClose will be a no-op
  5331. */
  5332. unixClose(pFile);
  5333. return rc;
  5334. }
  5335. }
  5336. goto open_finished;
  5337. }
  5338. }
  5339. #endif
  5340. rc = fillInUnixFile(pVfs, fd, pFile, zPath, ctrlFlags);
  5341. open_finished:
  5342. if( rc!=SQLITE_OK ){
  5343. sqlite3_free(p->pUnused);
  5344. }
  5345. return rc;
  5346. }
  5347. /*
  5348. ** Delete the file at zPath. If the dirSync argument is true, fsync()
  5349. ** the directory after deleting the file.
  5350. */
  5351. static int unixDelete(
  5352. sqlite3_vfs *NotUsed, /* VFS containing this as the xDelete method */
  5353. const char *zPath, /* Name of file to be deleted */
  5354. int dirSync /* If true, fsync() directory after deleting file */
  5355. ){
  5356. int rc = SQLITE_OK;
  5357. UNUSED_PARAMETER(NotUsed);
  5358. SimulateIOError(return SQLITE_IOERR_DELETE);
  5359. if( osUnlink(zPath)==(-1) ){
  5360. if( errno==ENOENT ){
  5361. rc = SQLITE_IOERR_DELETE_NOENT;
  5362. }else{
  5363. rc = unixLogError(SQLITE_IOERR_DELETE, "unlink", zPath);
  5364. }
  5365. return rc;
  5366. }
  5367. #ifndef SQLITE_DISABLE_DIRSYNC
  5368. if( (dirSync & 1)!=0 ){
  5369. int fd;
  5370. rc = osOpenDirectory(zPath, &fd);
  5371. if( rc==SQLITE_OK ){
  5372. #if OS_VXWORKS
  5373. if( fsync(fd)==-1 )
  5374. #else
  5375. if( fsync(fd) )
  5376. #endif
  5377. {
  5378. rc = unixLogError(SQLITE_IOERR_DIR_FSYNC, "fsync", zPath);
  5379. }
  5380. robust_close(0, fd, __LINE__);
  5381. }else if( rc==SQLITE_CANTOPEN ){
  5382. rc = SQLITE_OK;
  5383. }
  5384. }
  5385. #endif
  5386. return rc;
  5387. }
  5388. /*
  5389. ** Test the existence of or access permissions of file zPath. The
  5390. ** test performed depends on the value of flags:
  5391. **
  5392. ** SQLITE_ACCESS_EXISTS: Return 1 if the file exists
  5393. ** SQLITE_ACCESS_READWRITE: Return 1 if the file is read and writable.
  5394. ** SQLITE_ACCESS_READONLY: Return 1 if the file is readable.
  5395. **
  5396. ** Otherwise return 0.
  5397. */
  5398. static int unixAccess(
  5399. sqlite3_vfs *NotUsed, /* The VFS containing this xAccess method */
  5400. const char *zPath, /* Path of the file to examine */
  5401. int flags, /* What do we want to learn about the zPath file? */
  5402. int *pResOut /* Write result boolean here */
  5403. ){
  5404. int amode = 0;
  5405. UNUSED_PARAMETER(NotUsed);
  5406. SimulateIOError( return SQLITE_IOERR_ACCESS; );
  5407. switch( flags ){
  5408. case SQLITE_ACCESS_EXISTS:
  5409. amode = F_OK;
  5410. break;
  5411. case SQLITE_ACCESS_READWRITE:
  5412. amode = W_OK|R_OK;
  5413. break;
  5414. case SQLITE_ACCESS_READ:
  5415. amode = R_OK;
  5416. break;
  5417. default:
  5418. assert(!"Invalid flags argument");
  5419. }
  5420. *pResOut = (osAccess(zPath, amode)==0);
  5421. if( flags==SQLITE_ACCESS_EXISTS && *pResOut ){
  5422. struct stat buf;
  5423. if( 0==osStat(zPath, &buf) && buf.st_size==0 ){
  5424. *pResOut = 0;
  5425. }
  5426. }
  5427. return SQLITE_OK;
  5428. }
  5429. /*
  5430. ** Turn a relative pathname into a full pathname. The relative path
  5431. ** is stored as a nul-terminated string in the buffer pointed to by
  5432. ** zPath.
  5433. **
  5434. ** zOut points to a buffer of at least sqlite3_vfs.mxPathname bytes
  5435. ** (in this case, MAX_PATHNAME bytes). The full-path is written to
  5436. ** this buffer before returning.
  5437. */
  5438. static int unixFullPathname(
  5439. sqlite3_vfs *pVfs, /* Pointer to vfs object */
  5440. const char *zPath, /* Possibly relative input path */
  5441. int nOut, /* Size of output buffer in bytes */
  5442. char *zOut /* Output buffer */
  5443. ){
  5444. /* It's odd to simulate an io-error here, but really this is just
  5445. ** using the io-error infrastructure to test that SQLite handles this
  5446. ** function failing. This function could fail if, for example, the
  5447. ** current working directory has been unlinked.
  5448. */
  5449. SimulateIOError( return SQLITE_ERROR );
  5450. assert( pVfs->mxPathname==MAX_PATHNAME );
  5451. UNUSED_PARAMETER(pVfs);
  5452. zOut[nOut-1] = '\0';
  5453. if( zPath[0]=='/' ){
  5454. sqlite3_snprintf(nOut, zOut, "%s", zPath);
  5455. }else{
  5456. int nCwd;
  5457. if( osGetcwd(zOut, nOut-1)==0 ){
  5458. return unixLogError(SQLITE_CANTOPEN_BKPT, "getcwd", zPath);
  5459. }
  5460. nCwd = (int)strlen(zOut);
  5461. sqlite3_snprintf(nOut-nCwd, &zOut[nCwd], "/%s", zPath);
  5462. }
  5463. return SQLITE_OK;
  5464. }
  5465. #ifndef SQLITE_OMIT_LOAD_EXTENSION
  5466. /*
  5467. ** Interfaces for opening a shared library, finding entry points
  5468. ** within the shared library, and closing the shared library.
  5469. */
  5470. #include <dlfcn.h>
  5471. static void *unixDlOpen(sqlite3_vfs *NotUsed, const char *zFilename){
  5472. UNUSED_PARAMETER(NotUsed);
  5473. return dlopen(zFilename, RTLD_NOW | RTLD_GLOBAL);
  5474. }
  5475. /*
  5476. ** SQLite calls this function immediately after a call to unixDlSym() or
  5477. ** unixDlOpen() fails (returns a null pointer). If a more detailed error
  5478. ** message is available, it is written to zBufOut. If no error message
  5479. ** is available, zBufOut is left unmodified and SQLite uses a default
  5480. ** error message.
  5481. */
  5482. static void unixDlError(sqlite3_vfs *NotUsed, int nBuf, char *zBufOut){
  5483. const char *zErr;
  5484. UNUSED_PARAMETER(NotUsed);
  5485. unixEnterMutex();
  5486. zErr = dlerror();
  5487. if( zErr ){
  5488. sqlite3_snprintf(nBuf, zBufOut, "%s", zErr);
  5489. }
  5490. unixLeaveMutex();
  5491. }
  5492. static void (*unixDlSym(sqlite3_vfs *NotUsed, void *p, const char*zSym))(void){
  5493. /*
  5494. ** GCC with -pedantic-errors says that C90 does not allow a void* to be
  5495. ** cast into a pointer to a function. And yet the library dlsym() routine
  5496. ** returns a void* which is really a pointer to a function. So how do we
  5497. ** use dlsym() with -pedantic-errors?
  5498. **
  5499. ** Variable x below is defined to be a pointer to a function taking
  5500. ** parameters void* and const char* and returning a pointer to a function.
  5501. ** We initialize x by assigning it a pointer to the dlsym() function.
  5502. ** (That assignment requires a cast.) Then we call the function that
  5503. ** x points to.
  5504. **
  5505. ** This work-around is unlikely to work correctly on any system where
  5506. ** you really cannot cast a function pointer into void*. But then, on the
  5507. ** other hand, dlsym() will not work on such a system either, so we have
  5508. ** not really lost anything.
  5509. */
  5510. void (*(*x)(void*,const char*))(void);
  5511. UNUSED_PARAMETER(NotUsed);
  5512. x = (void(*(*)(void*,const char*))(void))dlsym;
  5513. return (*x)(p, zSym);
  5514. }
  5515. static void unixDlClose(sqlite3_vfs *NotUsed, void *pHandle){
  5516. UNUSED_PARAMETER(NotUsed);
  5517. dlclose(pHandle);
  5518. }
  5519. #else /* if SQLITE_OMIT_LOAD_EXTENSION is defined: */
  5520. #define unixDlOpen 0
  5521. #define unixDlError 0
  5522. #define unixDlSym 0
  5523. #define unixDlClose 0
  5524. #endif
  5525. /*
  5526. ** Write nBuf bytes of random data to the supplied buffer zBuf.
  5527. */
  5528. static int unixRandomness(sqlite3_vfs *NotUsed, int nBuf, char *zBuf){
  5529. UNUSED_PARAMETER(NotUsed);
  5530. assert((size_t)nBuf>=(sizeof(time_t)+sizeof(int)));
  5531. /* We have to initialize zBuf to prevent valgrind from reporting
  5532. ** errors. The reports issued by valgrind are incorrect - we would
  5533. ** prefer that the randomness be increased by making use of the
  5534. ** uninitialized space in zBuf - but valgrind errors tend to worry
  5535. ** some users. Rather than argue, it seems easier just to initialize
  5536. ** the whole array and silence valgrind, even if that means less randomness
  5537. ** in the random seed.
  5538. **
  5539. ** When testing, initializing zBuf[] to zero is all we do. That means
  5540. ** that we always use the same random number sequence. This makes the
  5541. ** tests repeatable.
  5542. */
  5543. memset(zBuf, 0, nBuf);
  5544. #if !defined(SQLITE_TEST)
  5545. {
  5546. int pid, fd, got;
  5547. fd = robust_open("/dev/urandom", O_RDONLY, 0);
  5548. if( fd<0 ){
  5549. time_t t;
  5550. time(&t);
  5551. memcpy(zBuf, &t, sizeof(t));
  5552. pid = getpid();
  5553. memcpy(&zBuf[sizeof(t)], &pid, sizeof(pid));
  5554. assert( sizeof(t)+sizeof(pid)<=(size_t)nBuf );
  5555. nBuf = sizeof(t) + sizeof(pid);
  5556. }else{
  5557. do{ got = osRead(fd, zBuf, nBuf); }while( got<0 && errno==EINTR );
  5558. robust_close(0, fd, __LINE__);
  5559. }
  5560. }
  5561. #endif
  5562. return nBuf;
  5563. }
  5564. /*
  5565. ** Sleep for a little while. Return the amount of time slept.
  5566. ** The argument is the number of microseconds we want to sleep.
  5567. ** The return value is the number of microseconds of sleep actually
  5568. ** requested from the underlying operating system, a number which
  5569. ** might be greater than or equal to the argument, but not less
  5570. ** than the argument.
  5571. */
  5572. static int unixSleep(sqlite3_vfs *NotUsed, int microseconds){
  5573. #if OS_VXWORKS
  5574. struct timespec sp;
  5575. sp.tv_sec = microseconds / 1000000;
  5576. sp.tv_nsec = (microseconds % 1000000) * 1000;
  5577. nanosleep(&sp, NULL);
  5578. UNUSED_PARAMETER(NotUsed);
  5579. return microseconds;
  5580. #elif defined(HAVE_USLEEP) && HAVE_USLEEP
  5581. usleep(microseconds);
  5582. UNUSED_PARAMETER(NotUsed);
  5583. return microseconds;
  5584. #else
  5585. int seconds = (microseconds+999999)/1000000;
  5586. sleep(seconds);
  5587. UNUSED_PARAMETER(NotUsed);
  5588. return seconds*1000000;
  5589. #endif
  5590. }
  5591. /*
  5592. ** The following variable, if set to a non-zero value, is interpreted as
  5593. ** the number of seconds since 1970 and is used to set the result of
  5594. ** sqlite3OsCurrentTime() during testing.
  5595. */
  5596. #ifdef SQLITE_TEST
  5597. int sqlite3_current_time = 0; /* Fake system time in seconds since 1970. */
  5598. #endif
  5599. /*
  5600. ** Find the current time (in Universal Coordinated Time). Write into *piNow
  5601. ** the current time and date as a Julian Day number times 86_400_000. In
  5602. ** other words, write into *piNow the number of milliseconds since the Julian
  5603. ** epoch of noon in Greenwich on November 24, 4714 B.C according to the
  5604. ** proleptic Gregorian calendar.
  5605. **
  5606. ** On success, return SQLITE_OK. Return SQLITE_ERROR if the time and date
  5607. ** cannot be found.
  5608. */
  5609. static int unixCurrentTimeInt64(sqlite3_vfs *NotUsed, sqlite3_int64 *piNow){
  5610. static const sqlite3_int64 unixEpoch = 24405875*(sqlite3_int64)8640000;
  5611. int rc = SQLITE_OK;
  5612. #if defined(NO_GETTOD)
  5613. time_t t;
  5614. time(&t);
  5615. *piNow = ((sqlite3_int64)t)*1000 + unixEpoch;
  5616. #elif OS_VXWORKS
  5617. struct timespec sNow;
  5618. clock_gettime(CLOCK_REALTIME, &sNow);
  5619. *piNow = unixEpoch + 1000*(sqlite3_int64)sNow.tv_sec + sNow.tv_nsec/1000000;
  5620. #else
  5621. struct timeval sNow;
  5622. if( gettimeofday(&sNow, 0)==0 ){
  5623. *piNow = unixEpoch + 1000*(sqlite3_int64)sNow.tv_sec + sNow.tv_usec/1000;
  5624. }else{
  5625. rc = SQLITE_ERROR;
  5626. }
  5627. #endif
  5628. #ifdef SQLITE_TEST
  5629. if( sqlite3_current_time ){
  5630. *piNow = 1000*(sqlite3_int64)sqlite3_current_time + unixEpoch;
  5631. }
  5632. #endif
  5633. UNUSED_PARAMETER(NotUsed);
  5634. return rc;
  5635. }
  5636. /*
  5637. ** Find the current time (in Universal Coordinated Time). Write the
  5638. ** current time and date as a Julian Day number into *prNow and
  5639. ** return 0. Return 1 if the time and date cannot be found.
  5640. */
  5641. static int unixCurrentTime(sqlite3_vfs *NotUsed, double *prNow){
  5642. sqlite3_int64 i = 0;
  5643. int rc;
  5644. UNUSED_PARAMETER(NotUsed);
  5645. rc = unixCurrentTimeInt64(0, &i);
  5646. *prNow = i/86400000.0;
  5647. return rc;
  5648. }
  5649. /*
  5650. ** We added the xGetLastError() method with the intention of providing
  5651. ** better low-level error messages when operating-system problems come up
  5652. ** during SQLite operation. But so far, none of that has been implemented
  5653. ** in the core. So this routine is never called. For now, it is merely
  5654. ** a place-holder.
  5655. */
  5656. static int unixGetLastError(sqlite3_vfs *NotUsed, int NotUsed2, char *NotUsed3){
  5657. UNUSED_PARAMETER(NotUsed);
  5658. UNUSED_PARAMETER(NotUsed2);
  5659. UNUSED_PARAMETER(NotUsed3);
  5660. return 0;
  5661. }
  5662. /*
  5663. ************************ End of sqlite3_vfs methods ***************************
  5664. ******************************************************************************/
  5665. /******************************************************************************
  5666. ************************** Begin Proxy Locking ********************************
  5667. **
  5668. ** Proxy locking is a "uber-locking-method" in this sense: It uses the
  5669. ** other locking methods on secondary lock files. Proxy locking is a
  5670. ** meta-layer over top of the primitive locking implemented above. For
  5671. ** this reason, the division that implements of proxy locking is deferred
  5672. ** until late in the file (here) after all of the other I/O methods have
  5673. ** been defined - so that the primitive locking methods are available
  5674. ** as services to help with the implementation of proxy locking.
  5675. **
  5676. ****
  5677. **
  5678. ** The default locking schemes in SQLite use byte-range locks on the
  5679. ** database file to coordinate safe, concurrent access by multiple readers
  5680. ** and writers [http://sqlite.org/lockingv3.html]. The five file locking
  5681. ** states (UNLOCKED, PENDING, SHARED, RESERVED, EXCLUSIVE) are implemented
  5682. ** as POSIX read & write locks over fixed set of locations (via fsctl),
  5683. ** on AFP and SMB only exclusive byte-range locks are available via fsctl
  5684. ** with _IOWR('z', 23, struct ByteRangeLockPB2) to track the same 5 states.
  5685. ** To simulate a F_RDLCK on the shared range, on AFP a randomly selected
  5686. ** address in the shared range is taken for a SHARED lock, the entire
  5687. ** shared range is taken for an EXCLUSIVE lock):
  5688. **
  5689. ** PENDING_BYTE 0x40000000
  5690. ** RESERVED_BYTE 0x40000001
  5691. ** SHARED_RANGE 0x40000002 -> 0x40000200
  5692. **
  5693. ** This works well on the local file system, but shows a nearly 100x
  5694. ** slowdown in read performance on AFP because the AFP client disables
  5695. ** the read cache when byte-range locks are present. Enabling the read
  5696. ** cache exposes a cache coherency problem that is present on all OS X
  5697. ** supported network file systems. NFS and AFP both observe the
  5698. ** close-to-open semantics for ensuring cache coherency
  5699. ** [http://nfs.sourceforge.net/#faq_a8], which does not effectively
  5700. ** address the requirements for concurrent database access by multiple
  5701. ** readers and writers
  5702. ** [http://www.nabble.com/SQLite-on-NFS-cache-coherency-td15655701.html].
  5703. **
  5704. ** To address the performance and cache coherency issues, proxy file locking
  5705. ** changes the way database access is controlled by limiting access to a
  5706. ** single host at a time and moving file locks off of the database file
  5707. ** and onto a proxy file on the local file system.
  5708. **
  5709. **
  5710. ** Using proxy locks
  5711. ** -----------------
  5712. **
  5713. ** C APIs
  5714. **
  5715. ** sqlite3_file_control(db, dbname, SQLITE_SET_LOCKPROXYFILE,
  5716. ** <proxy_path> | ":auto:");
  5717. ** sqlite3_file_control(db, dbname, SQLITE_GET_LOCKPROXYFILE, &<proxy_path>);
  5718. **
  5719. **
  5720. ** SQL pragmas
  5721. **
  5722. ** PRAGMA [database.]lock_proxy_file=<proxy_path> | :auto:
  5723. ** PRAGMA [database.]lock_proxy_file
  5724. **
  5725. ** Specifying ":auto:" means that if there is a conch file with a matching
  5726. ** host ID in it, the proxy path in the conch file will be used, otherwise
  5727. ** a proxy path based on the user's temp dir
  5728. ** (via confstr(_CS_DARWIN_USER_TEMP_DIR,...)) will be used and the
  5729. ** actual proxy file name is generated from the name and path of the
  5730. ** database file. For example:
  5731. **
  5732. ** For database path "/Users/me/foo.db"
  5733. ** The lock path will be "<tmpdir>/sqliteplocks/_Users_me_foo.db:auto:")
  5734. **
  5735. ** Once a lock proxy is configured for a database connection, it can not
  5736. ** be removed, however it may be switched to a different proxy path via
  5737. ** the above APIs (assuming the conch file is not being held by another
  5738. ** connection or process).
  5739. **
  5740. **
  5741. ** How proxy locking works
  5742. ** -----------------------
  5743. **
  5744. ** Proxy file locking relies primarily on two new supporting files:
  5745. **
  5746. ** * conch file to limit access to the database file to a single host
  5747. ** at a time
  5748. **
  5749. ** * proxy file to act as a proxy for the advisory locks normally
  5750. ** taken on the database
  5751. **
  5752. ** The conch file - to use a proxy file, sqlite must first "hold the conch"
  5753. ** by taking an sqlite-style shared lock on the conch file, reading the
  5754. ** contents and comparing the host's unique host ID (see below) and lock
  5755. ** proxy path against the values stored in the conch. The conch file is
  5756. ** stored in the same directory as the database file and the file name
  5757. ** is patterned after the database file name as ".<databasename>-conch".
  5758. ** If the conch file does not exist, or it's contents do not match the
  5759. ** host ID and/or proxy path, then the lock is escalated to an exclusive
  5760. ** lock and the conch file contents is updated with the host ID and proxy
  5761. ** path and the lock is downgraded to a shared lock again. If the conch
  5762. ** is held by another process (with a shared lock), the exclusive lock
  5763. ** will fail and SQLITE_BUSY is returned.
  5764. **
  5765. ** The proxy file - a single-byte file used for all advisory file locks
  5766. ** normally taken on the database file. This allows for safe sharing
  5767. ** of the database file for multiple readers and writers on the same
  5768. ** host (the conch ensures that they all use the same local lock file).
  5769. **
  5770. ** Requesting the lock proxy does not immediately take the conch, it is
  5771. ** only taken when the first request to lock database file is made.
  5772. ** This matches the semantics of the traditional locking behavior, where
  5773. ** opening a connection to a database file does not take a lock on it.
  5774. ** The shared lock and an open file descriptor are maintained until
  5775. ** the connection to the database is closed.
  5776. **
  5777. ** The proxy file and the lock file are never deleted so they only need
  5778. ** to be created the first time they are used.
  5779. **
  5780. ** Configuration options
  5781. ** ---------------------
  5782. **
  5783. ** SQLITE_PREFER_PROXY_LOCKING
  5784. **
  5785. ** Database files accessed on non-local file systems are
  5786. ** automatically configured for proxy locking, lock files are
  5787. ** named automatically using the same logic as
  5788. ** PRAGMA lock_proxy_file=":auto:"
  5789. **
  5790. ** SQLITE_PROXY_DEBUG
  5791. **
  5792. ** Enables the logging of error messages during host id file
  5793. ** retrieval and creation
  5794. **
  5795. ** LOCKPROXYDIR
  5796. **
  5797. ** Overrides the default directory used for lock proxy files that
  5798. ** are named automatically via the ":auto:" setting
  5799. **
  5800. ** SQLITE_DEFAULT_PROXYDIR_PERMISSIONS
  5801. **
  5802. ** Permissions to use when creating a directory for storing the
  5803. ** lock proxy files, only used when LOCKPROXYDIR is not set.
  5804. **
  5805. **
  5806. ** As mentioned above, when compiled with SQLITE_PREFER_PROXY_LOCKING,
  5807. ** setting the environment variable SQLITE_FORCE_PROXY_LOCKING to 1 will
  5808. ** force proxy locking to be used for every database file opened, and 0
  5809. ** will force automatic proxy locking to be disabled for all database
  5810. ** files (explicity calling the SQLITE_SET_LOCKPROXYFILE pragma or
  5811. ** sqlite_file_control API is not affected by SQLITE_FORCE_PROXY_LOCKING).
  5812. */
  5813. /*
  5814. ** Proxy locking is only available on MacOSX
  5815. */
  5816. #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE
  5817. /*
  5818. ** The proxyLockingContext has the path and file structures for the remote
  5819. ** and local proxy files in it
  5820. */
  5821. typedef struct proxyLockingContext proxyLockingContext;
  5822. struct proxyLockingContext {
  5823. unixFile *conchFile; /* Open conch file */
  5824. char *conchFilePath; /* Name of the conch file */
  5825. unixFile *lockProxy; /* Open proxy lock file */
  5826. char *lockProxyPath; /* Name of the proxy lock file */
  5827. char *dbPath; /* Name of the open file */
  5828. int conchHeld; /* 1 if the conch is held, -1 if lockless */
  5829. void *oldLockingContext; /* Original lockingcontext to restore on close */
  5830. sqlite3_io_methods const *pOldMethod; /* Original I/O methods for close */
  5831. };
  5832. /*
  5833. ** The proxy lock file path for the database at dbPath is written into lPath,
  5834. ** which must point to valid, writable memory large enough for a maxLen length
  5835. ** file path.
  5836. */
  5837. static int proxyGetLockPath(const char *dbPath, char *lPath, size_t maxLen){
  5838. int len;
  5839. int dbLen;
  5840. int i;
  5841. #ifdef LOCKPROXYDIR
  5842. len = strlcpy(lPath, LOCKPROXYDIR, maxLen);
  5843. #else
  5844. # ifdef _CS_DARWIN_USER_TEMP_DIR
  5845. {
  5846. if( !confstr(_CS_DARWIN_USER_TEMP_DIR, lPath, maxLen) ){
  5847. OSTRACE(("GETLOCKPATH failed %s errno=%d pid=%d\n",
  5848. lPath, errno, getpid()));
  5849. return SQLITE_IOERR_LOCK;
  5850. }
  5851. len = strlcat(lPath, "sqliteplocks", maxLen);
  5852. }
  5853. # else
  5854. len = strlcpy(lPath, "/tmp/", maxLen);
  5855. # endif
  5856. #endif
  5857. if( lPath[len-1]!='/' ){
  5858. len = strlcat(lPath, "/", maxLen);
  5859. }
  5860. /* transform the db path to a unique cache name */
  5861. dbLen = (int)strlen(dbPath);
  5862. for( i=0; i<dbLen && (i+len+7)<(int)maxLen; i++){
  5863. char c = dbPath[i];
  5864. lPath[i+len] = (c=='/')?'_':c;
  5865. }
  5866. lPath[i+len]='\0';
  5867. strlcat(lPath, ":auto:", maxLen);
  5868. OSTRACE(("GETLOCKPATH proxy lock path=%s pid=%d\n", lPath, getpid()));
  5869. return SQLITE_OK;
  5870. }
  5871. /*
  5872. ** Creates the lock file and any missing directories in lockPath
  5873. */
  5874. static int proxyCreateLockPath(const char *lockPath){
  5875. int i, len;
  5876. char buf[MAXPATHLEN];
  5877. int start = 0;
  5878. assert(lockPath!=NULL);
  5879. /* try to create all the intermediate directories */
  5880. len = (int)strlen(lockPath);
  5881. buf[0] = lockPath[0];
  5882. for( i=1; i<len; i++ ){
  5883. if( lockPath[i] == '/' && (i - start > 0) ){
  5884. /* only mkdir if leaf dir != "." or "/" or ".." */
  5885. if( i-start>2 || (i-start==1 && buf[start] != '.' && buf[start] != '/')
  5886. || (i-start==2 && buf[start] != '.' && buf[start+1] != '.') ){
  5887. buf[i]='\0';
  5888. if( osMkdir(buf, SQLITE_DEFAULT_PROXYDIR_PERMISSIONS) ){
  5889. int err=errno;
  5890. if( err!=EEXIST ) {
  5891. OSTRACE(("CREATELOCKPATH FAILED creating %s, "
  5892. "'%s' proxy lock path=%s pid=%d\n",
  5893. buf, strerror(err), lockPath, getpid()));
  5894. return err;
  5895. }
  5896. }
  5897. }
  5898. start=i+1;
  5899. }
  5900. buf[i] = lockPath[i];
  5901. }
  5902. OSTRACE(("CREATELOCKPATH proxy lock path=%s pid=%d\n", lockPath, getpid()));
  5903. return 0;
  5904. }
  5905. /*
  5906. ** Create a new VFS file descriptor (stored in memory obtained from
  5907. ** sqlite3_malloc) and open the file named "path" in the file descriptor.
  5908. **
  5909. ** The caller is responsible not only for closing the file descriptor
  5910. ** but also for freeing the memory associated with the file descriptor.
  5911. */
  5912. static int proxyCreateUnixFile(
  5913. const char *path, /* path for the new unixFile */
  5914. unixFile **ppFile, /* unixFile created and returned by ref */
  5915. int islockfile /* if non zero missing dirs will be created */
  5916. ) {
  5917. int fd = -1;
  5918. unixFile *pNew;
  5919. int rc = SQLITE_OK;
  5920. int openFlags = O_RDWR | O_CREAT;
  5921. sqlite3_vfs dummyVfs;
  5922. int terrno = 0;
  5923. UnixUnusedFd *pUnused = NULL;
  5924. /* 1. first try to open/create the file
  5925. ** 2. if that fails, and this is a lock file (not-conch), try creating
  5926. ** the parent directories and then try again.
  5927. ** 3. if that fails, try to open the file read-only
  5928. ** otherwise return BUSY (if lock file) or CANTOPEN for the conch file
  5929. */
  5930. pUnused = findReusableFd(path, openFlags);
  5931. if( pUnused ){
  5932. fd = pUnused->fd;
  5933. }else{
  5934. pUnused = sqlite3_malloc(sizeof(*pUnused));
  5935. if( !pUnused ){
  5936. return SQLITE_NOMEM;
  5937. }
  5938. }
  5939. if( fd<0 ){
  5940. fd = robust_open(path, openFlags, 0);
  5941. terrno = errno;
  5942. if( fd<0 && errno==ENOENT && islockfile ){
  5943. if( proxyCreateLockPath(path) == SQLITE_OK ){
  5944. fd = robust_open(path, openFlags, 0);
  5945. }
  5946. }
  5947. }
  5948. if( fd<0 ){
  5949. openFlags = O_RDONLY;
  5950. fd = robust_open(path, openFlags, 0);
  5951. terrno = errno;
  5952. }
  5953. if( fd<0 ){
  5954. if( islockfile ){
  5955. return SQLITE_BUSY;
  5956. }
  5957. switch (terrno) {
  5958. case EACCES:
  5959. return SQLITE_PERM;
  5960. case EIO:
  5961. return SQLITE_IOERR_LOCK; /* even though it is the conch */
  5962. default:
  5963. return SQLITE_CANTOPEN_BKPT;
  5964. }
  5965. }
  5966. pNew = (unixFile *)sqlite3_malloc(sizeof(*pNew));
  5967. if( pNew==NULL ){
  5968. rc = SQLITE_NOMEM;
  5969. goto end_create_proxy;
  5970. }
  5971. memset(pNew, 0, sizeof(unixFile));
  5972. pNew->openFlags = openFlags;
  5973. memset(&dummyVfs, 0, sizeof(dummyVfs));
  5974. dummyVfs.pAppData = (void*)&autolockIoFinder;
  5975. dummyVfs.zName = "dummy";
  5976. pUnused->fd = fd;
  5977. pUnused->flags = openFlags;
  5978. pNew->pUnused = pUnused;
  5979. rc = fillInUnixFile(&dummyVfs, fd, (sqlite3_file*)pNew, path, 0);
  5980. if( rc==SQLITE_OK ){
  5981. *ppFile = pNew;
  5982. return SQLITE_OK;
  5983. }
  5984. end_create_proxy:
  5985. robust_close(pNew, fd, __LINE__);
  5986. sqlite3_free(pNew);
  5987. sqlite3_free(pUnused);
  5988. return rc;
  5989. }
  5990. #ifdef SQLITE_TEST
  5991. /* simulate multiple hosts by creating unique hostid file paths */
  5992. int sqlite3_hostid_num = 0;
  5993. #endif
  5994. #define PROXY_HOSTIDLEN 16 /* conch file host id length */
  5995. /* Not always defined in the headers as it ought to be */
  5996. extern int gethostuuid(uuid_t id, const struct timespec *wait);
  5997. /* get the host ID via gethostuuid(), pHostID must point to PROXY_HOSTIDLEN
  5998. ** bytes of writable memory.
  5999. */
  6000. static int proxyGetHostID(unsigned char *pHostID, int *pError){
  6001. assert(PROXY_HOSTIDLEN == sizeof(uuid_t));
  6002. memset(pHostID, 0, PROXY_HOSTIDLEN);
  6003. #if defined(__MAX_OS_X_VERSION_MIN_REQUIRED)\
  6004. && __MAC_OS_X_VERSION_MIN_REQUIRED<1050
  6005. {
  6006. static const struct timespec timeout = {1, 0}; /* 1 sec timeout */
  6007. if( gethostuuid(pHostID, &timeout) ){
  6008. int err = errno;
  6009. if( pError ){
  6010. *pError = err;
  6011. }
  6012. return SQLITE_IOERR;
  6013. }
  6014. }
  6015. #else
  6016. UNUSED_PARAMETER(pError);
  6017. #endif
  6018. #ifdef SQLITE_TEST
  6019. /* simulate multiple hosts by creating unique hostid file paths */
  6020. if( sqlite3_hostid_num != 0){
  6021. pHostID[0] = (char)(pHostID[0] + (char)(sqlite3_hostid_num & 0xFF));
  6022. }
  6023. #endif
  6024. return SQLITE_OK;
  6025. }
  6026. /* The conch file contains the header, host id and lock file path
  6027. */
  6028. #define PROXY_CONCHVERSION 2 /* 1-byte header, 16-byte host id, path */
  6029. #define PROXY_HEADERLEN 1 /* conch file header length */
  6030. #define PROXY_PATHINDEX (PROXY_HEADERLEN+PROXY_HOSTIDLEN)
  6031. #define PROXY_MAXCONCHLEN (PROXY_HEADERLEN+PROXY_HOSTIDLEN+MAXPATHLEN)
  6032. /*
  6033. ** Takes an open conch file, copies the contents to a new path and then moves
  6034. ** it back. The newly created file's file descriptor is assigned to the
  6035. ** conch file structure and finally the original conch file descriptor is
  6036. ** closed. Returns zero if successful.
  6037. */
  6038. static int proxyBreakConchLock(unixFile *pFile, uuid_t myHostID){
  6039. proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext;
  6040. unixFile *conchFile = pCtx->conchFile;
  6041. char tPath[MAXPATHLEN];
  6042. char buf[PROXY_MAXCONCHLEN];
  6043. char *cPath = pCtx->conchFilePath;
  6044. size_t readLen = 0;
  6045. size_t pathLen = 0;
  6046. char errmsg[64] = "";
  6047. int fd = -1;
  6048. int rc = -1;
  6049. UNUSED_PARAMETER(myHostID);
  6050. /* create a new path by replace the trailing '-conch' with '-break' */
  6051. pathLen = strlcpy(tPath, cPath, MAXPATHLEN);
  6052. if( pathLen>MAXPATHLEN || pathLen<6 ||
  6053. (strlcpy(&tPath[pathLen-5], "break", 6) != 5) ){
  6054. sqlite3_snprintf(sizeof(errmsg),errmsg,"path error (len %d)",(int)pathLen);
  6055. goto end_breaklock;
  6056. }
  6057. /* read the conch content */
  6058. readLen = osPread(conchFile->h, buf, PROXY_MAXCONCHLEN, 0);
  6059. if( readLen<PROXY_PATHINDEX ){
  6060. sqlite3_snprintf(sizeof(errmsg),errmsg,"read error (len %d)",(int)readLen);
  6061. goto end_breaklock;
  6062. }
  6063. /* write it out to the temporary break file */
  6064. fd = robust_open(tPath, (O_RDWR|O_CREAT|O_EXCL), 0);
  6065. if( fd<0 ){
  6066. sqlite3_snprintf(sizeof(errmsg), errmsg, "create failed (%d)", errno);
  6067. goto end_breaklock;
  6068. }
  6069. if( osPwrite(fd, buf, readLen, 0) != (ssize_t)readLen ){
  6070. sqlite3_snprintf(sizeof(errmsg), errmsg, "write failed (%d)", errno);
  6071. goto end_breaklock;
  6072. }
  6073. if( rename(tPath, cPath) ){
  6074. sqlite3_snprintf(sizeof(errmsg), errmsg, "rename failed (%d)", errno);
  6075. goto end_breaklock;
  6076. }
  6077. rc = 0;
  6078. fprintf(stderr, "broke stale lock on %s\n", cPath);
  6079. robust_close(pFile, conchFile->h, __LINE__);
  6080. conchFile->h = fd;
  6081. conchFile->openFlags = O_RDWR | O_CREAT;
  6082. end_breaklock:
  6083. if( rc ){
  6084. if( fd>=0 ){
  6085. osUnlink(tPath);
  6086. robust_close(pFile, fd, __LINE__);
  6087. }
  6088. fprintf(stderr, "failed to break stale lock on %s, %s\n", cPath, errmsg);
  6089. }
  6090. return rc;
  6091. }
  6092. /* Take the requested lock on the conch file and break a stale lock if the
  6093. ** host id matches.
  6094. */
  6095. static int proxyConchLock(unixFile *pFile, uuid_t myHostID, int lockType){
  6096. proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext;
  6097. unixFile *conchFile = pCtx->conchFile;
  6098. int rc = SQLITE_OK;
  6099. int nTries = 0;
  6100. struct timespec conchModTime;
  6101. memset(&conchModTime, 0, sizeof(conchModTime));
  6102. do {
  6103. rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, lockType);
  6104. nTries ++;
  6105. if( rc==SQLITE_BUSY ){
  6106. /* If the lock failed (busy):
  6107. * 1st try: get the mod time of the conch, wait 0.5s and try again.
  6108. * 2nd try: fail if the mod time changed or host id is different, wait
  6109. * 10 sec and try again
  6110. * 3rd try: break the lock unless the mod time has changed.
  6111. */
  6112. struct stat buf;
  6113. if( osFstat(conchFile->h, &buf) ){
  6114. pFile->lastErrno = errno;
  6115. return SQLITE_IOERR_LOCK;
  6116. }
  6117. if( nTries==1 ){
  6118. conchModTime = buf.st_mtimespec;
  6119. usleep(500000); /* wait 0.5 sec and try the lock again*/
  6120. continue;
  6121. }
  6122. assert( nTries>1 );
  6123. if( conchModTime.tv_sec != buf.st_mtimespec.tv_sec ||
  6124. conchModTime.tv_nsec != buf.st_mtimespec.tv_nsec ){
  6125. return SQLITE_BUSY;
  6126. }
  6127. if( nTries==2 ){
  6128. char tBuf[PROXY_MAXCONCHLEN];
  6129. int len = osPread(conchFile->h, tBuf, PROXY_MAXCONCHLEN, 0);
  6130. if( len<0 ){
  6131. pFile->lastErrno = errno;
  6132. return SQLITE_IOERR_LOCK;
  6133. }
  6134. if( len>PROXY_PATHINDEX && tBuf[0]==(char)PROXY_CONCHVERSION){
  6135. /* don't break the lock if the host id doesn't match */
  6136. if( 0!=memcmp(&tBuf[PROXY_HEADERLEN], myHostID, PROXY_HOSTIDLEN) ){
  6137. return SQLITE_BUSY;
  6138. }
  6139. }else{
  6140. /* don't break the lock on short read or a version mismatch */
  6141. return SQLITE_BUSY;
  6142. }
  6143. usleep(10000000); /* wait 10 sec and try the lock again */
  6144. continue;
  6145. }
  6146. assert( nTries==3 );
  6147. if( 0==proxyBreakConchLock(pFile, myHostID) ){
  6148. rc = SQLITE_OK;
  6149. if( lockType==EXCLUSIVE_LOCK ){
  6150. rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, SHARED_LOCK);
  6151. }
  6152. if( !rc ){
  6153. rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, lockType);
  6154. }
  6155. }
  6156. }
  6157. } while( rc==SQLITE_BUSY && nTries<3 );
  6158. return rc;
  6159. }
  6160. /* Takes the conch by taking a shared lock and read the contents conch, if
  6161. ** lockPath is non-NULL, the host ID and lock file path must match. A NULL
  6162. ** lockPath means that the lockPath in the conch file will be used if the
  6163. ** host IDs match, or a new lock path will be generated automatically
  6164. ** and written to the conch file.
  6165. */
  6166. static int proxyTakeConch(unixFile *pFile){
  6167. proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext;
  6168. if( pCtx->conchHeld!=0 ){
  6169. return SQLITE_OK;
  6170. }else{
  6171. unixFile *conchFile = pCtx->conchFile;
  6172. uuid_t myHostID;
  6173. int pError = 0;
  6174. char readBuf[PROXY_MAXCONCHLEN];
  6175. char lockPath[MAXPATHLEN];
  6176. char *tempLockPath = NULL;
  6177. int rc = SQLITE_OK;
  6178. int createConch = 0;
  6179. int hostIdMatch = 0;
  6180. int readLen = 0;
  6181. int tryOldLockPath = 0;
  6182. int forceNewLockPath = 0;
  6183. OSTRACE(("TAKECONCH %d for %s pid=%d\n", conchFile->h,
  6184. (pCtx->lockProxyPath ? pCtx->lockProxyPath : ":auto:"), getpid()));
  6185. rc = proxyGetHostID(myHostID, &pError);
  6186. if( (rc&0xff)==SQLITE_IOERR ){
  6187. pFile->lastErrno = pError;
  6188. goto end_takeconch;
  6189. }
  6190. rc = proxyConchLock(pFile, myHostID, SHARED_LOCK);
  6191. if( rc!=SQLITE_OK ){
  6192. goto end_takeconch;
  6193. }
  6194. /* read the existing conch file */
  6195. readLen = seekAndRead((unixFile*)conchFile, 0, readBuf, PROXY_MAXCONCHLEN);
  6196. if( readLen<0 ){
  6197. /* I/O error: lastErrno set by seekAndRead */
  6198. pFile->lastErrno = conchFile->lastErrno;
  6199. rc = SQLITE_IOERR_READ;
  6200. goto end_takeconch;
  6201. }else if( readLen<=(PROXY_HEADERLEN+PROXY_HOSTIDLEN) ||
  6202. readBuf[0]!=(char)PROXY_CONCHVERSION ){
  6203. /* a short read or version format mismatch means we need to create a new
  6204. ** conch file.
  6205. */
  6206. createConch = 1;
  6207. }
  6208. /* if the host id matches and the lock path already exists in the conch
  6209. ** we'll try to use the path there, if we can't open that path, we'll
  6210. ** retry with a new auto-generated path
  6211. */
  6212. do { /* in case we need to try again for an :auto: named lock file */
  6213. if( !createConch && !forceNewLockPath ){
  6214. hostIdMatch = !memcmp(&readBuf[PROXY_HEADERLEN], myHostID,
  6215. PROXY_HOSTIDLEN);
  6216. /* if the conch has data compare the contents */
  6217. if( !pCtx->lockProxyPath ){
  6218. /* for auto-named local lock file, just check the host ID and we'll
  6219. ** use the local lock file path that's already in there
  6220. */
  6221. if( hostIdMatch ){
  6222. size_t pathLen = (readLen - PROXY_PATHINDEX);
  6223. if( pathLen>=MAXPATHLEN ){
  6224. pathLen=MAXPATHLEN-1;
  6225. }
  6226. memcpy(lockPath, &readBuf[PROXY_PATHINDEX], pathLen);
  6227. lockPath[pathLen] = 0;
  6228. tempLockPath = lockPath;
  6229. tryOldLockPath = 1;
  6230. /* create a copy of the lock path if the conch is taken */
  6231. goto end_takeconch;
  6232. }
  6233. }else if( hostIdMatch
  6234. && !strncmp(pCtx->lockProxyPath, &readBuf[PROXY_PATHINDEX],
  6235. readLen-PROXY_PATHINDEX)
  6236. ){
  6237. /* conch host and lock path match */
  6238. goto end_takeconch;
  6239. }
  6240. }
  6241. /* if the conch isn't writable and doesn't match, we can't take it */
  6242. if( (conchFile->openFlags&O_RDWR) == 0 ){
  6243. rc = SQLITE_BUSY;
  6244. goto end_takeconch;
  6245. }
  6246. /* either the conch didn't match or we need to create a new one */
  6247. if( !pCtx->lockProxyPath ){
  6248. proxyGetLockPath(pCtx->dbPath, lockPath, MAXPATHLEN);
  6249. tempLockPath = lockPath;
  6250. /* create a copy of the lock path _only_ if the conch is taken */
  6251. }
  6252. /* update conch with host and path (this will fail if other process
  6253. ** has a shared lock already), if the host id matches, use the big
  6254. ** stick.
  6255. */
  6256. futimes(conchFile->h, NULL);
  6257. if( hostIdMatch && !createConch ){
  6258. if( conchFile->pInode && conchFile->pInode->nShared>1 ){
  6259. /* We are trying for an exclusive lock but another thread in this
  6260. ** same process is still holding a shared lock. */
  6261. rc = SQLITE_BUSY;
  6262. } else {
  6263. rc = proxyConchLock(pFile, myHostID, EXCLUSIVE_LOCK);
  6264. }
  6265. }else{
  6266. rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, EXCLUSIVE_LOCK);
  6267. }
  6268. if( rc==SQLITE_OK ){
  6269. char writeBuffer[PROXY_MAXCONCHLEN];
  6270. int writeSize = 0;
  6271. writeBuffer[0] = (char)PROXY_CONCHVERSION;
  6272. memcpy(&writeBuffer[PROXY_HEADERLEN], myHostID, PROXY_HOSTIDLEN);
  6273. if( pCtx->lockProxyPath!=NULL ){
  6274. strlcpy(&writeBuffer[PROXY_PATHINDEX], pCtx->lockProxyPath, MAXPATHLEN);
  6275. }else{
  6276. strlcpy(&writeBuffer[PROXY_PATHINDEX], tempLockPath, MAXPATHLEN);
  6277. }
  6278. writeSize = PROXY_PATHINDEX + strlen(&writeBuffer[PROXY_PATHINDEX]);
  6279. robust_ftruncate(conchFile->h, writeSize);
  6280. rc = unixWrite((sqlite3_file *)conchFile, writeBuffer, writeSize, 0);
  6281. fsync(conchFile->h);
  6282. /* If we created a new conch file (not just updated the contents of a
  6283. ** valid conch file), try to match the permissions of the database
  6284. */
  6285. if( rc==SQLITE_OK && createConch ){
  6286. struct stat buf;
  6287. int err = osFstat(pFile->h, &buf);
  6288. if( err==0 ){
  6289. mode_t cmode = buf.st_mode&(S_IRUSR|S_IWUSR | S_IRGRP|S_IWGRP |
  6290. S_IROTH|S_IWOTH);
  6291. /* try to match the database file R/W permissions, ignore failure */
  6292. #ifndef SQLITE_PROXY_DEBUG
  6293. osFchmod(conchFile->h, cmode);
  6294. #else
  6295. do{
  6296. rc = osFchmod(conchFile->h, cmode);
  6297. }while( rc==(-1) && errno==EINTR );
  6298. if( rc!=0 ){
  6299. int code = errno;
  6300. fprintf(stderr, "fchmod %o FAILED with %d %s\n",
  6301. cmode, code, strerror(code));
  6302. } else {
  6303. fprintf(stderr, "fchmod %o SUCCEDED\n",cmode);
  6304. }
  6305. }else{
  6306. int code = errno;
  6307. fprintf(stderr, "STAT FAILED[%d] with %d %s\n",
  6308. err, code, strerror(code));
  6309. #endif
  6310. }
  6311. }
  6312. }
  6313. conchFile->pMethod->xUnlock((sqlite3_file*)conchFile, SHARED_LOCK);
  6314. end_takeconch:
  6315. OSTRACE(("TRANSPROXY: CLOSE %d\n", pFile->h));
  6316. if( rc==SQLITE_OK && pFile->openFlags ){
  6317. int fd;
  6318. if( pFile->h>=0 ){
  6319. robust_close(pFile, pFile->h, __LINE__);
  6320. }
  6321. pFile->h = -1;
  6322. fd = robust_open(pCtx->dbPath, pFile->openFlags, 0);
  6323. OSTRACE(("TRANSPROXY: OPEN %d\n", fd));
  6324. if( fd>=0 ){
  6325. pFile->h = fd;
  6326. }else{
  6327. rc=SQLITE_CANTOPEN_BKPT; /* SQLITE_BUSY? proxyTakeConch called
  6328. during locking */
  6329. }
  6330. }
  6331. if( rc==SQLITE_OK && !pCtx->lockProxy ){
  6332. char *path = tempLockPath ? tempLockPath : pCtx->lockProxyPath;
  6333. rc = proxyCreateUnixFile(path, &pCtx->lockProxy, 1);
  6334. if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM && tryOldLockPath ){
  6335. /* we couldn't create the proxy lock file with the old lock file path
  6336. ** so try again via auto-naming
  6337. */
  6338. forceNewLockPath = 1;
  6339. tryOldLockPath = 0;
  6340. continue; /* go back to the do {} while start point, try again */
  6341. }
  6342. }
  6343. if( rc==SQLITE_OK ){
  6344. /* Need to make a copy of path if we extracted the value
  6345. ** from the conch file or the path was allocated on the stack
  6346. */
  6347. if( tempLockPath ){
  6348. pCtx->lockProxyPath = sqlite3DbStrDup(0, tempLockPath);
  6349. if( !pCtx->lockProxyPath ){
  6350. rc = SQLITE_NOMEM;
  6351. }
  6352. }
  6353. }
  6354. if( rc==SQLITE_OK ){
  6355. pCtx->conchHeld = 1;
  6356. if( pCtx->lockProxy->pMethod == &afpIoMethods ){
  6357. afpLockingContext *afpCtx;
  6358. afpCtx = (afpLockingContext *)pCtx->lockProxy->lockingContext;
  6359. afpCtx->dbPath = pCtx->lockProxyPath;
  6360. }
  6361. } else {
  6362. conchFile->pMethod->xUnlock((sqlite3_file*)conchFile, NO_LOCK);
  6363. }
  6364. OSTRACE(("TAKECONCH %d %s\n", conchFile->h,
  6365. rc==SQLITE_OK?"ok":"failed"));
  6366. return rc;
  6367. } while (1); /* in case we need to retry the :auto: lock file -
  6368. ** we should never get here except via the 'continue' call. */
  6369. }
  6370. }
  6371. /*
  6372. ** If pFile holds a lock on a conch file, then release that lock.
  6373. */
  6374. static int proxyReleaseConch(unixFile *pFile){
  6375. int rc = SQLITE_OK; /* Subroutine return code */
  6376. proxyLockingContext *pCtx; /* The locking context for the proxy lock */
  6377. unixFile *conchFile; /* Name of the conch file */
  6378. pCtx = (proxyLockingContext *)pFile->lockingContext;
  6379. conchFile = pCtx->conchFile;
  6380. OSTRACE(("RELEASECONCH %d for %s pid=%d\n", conchFile->h,
  6381. (pCtx->lockProxyPath ? pCtx->lockProxyPath : ":auto:"),
  6382. getpid()));
  6383. if( pCtx->conchHeld>0 ){
  6384. rc = conchFile->pMethod->xUnlock((sqlite3_file*)conchFile, NO_LOCK);
  6385. }
  6386. pCtx->conchHeld = 0;
  6387. OSTRACE(("RELEASECONCH %d %s\n", conchFile->h,
  6388. (rc==SQLITE_OK ? "ok" : "failed")));
  6389. return rc;
  6390. }
  6391. /*
  6392. ** Given the name of a database file, compute the name of its conch file.
  6393. ** Store the conch filename in memory obtained from sqlite3_malloc().
  6394. ** Make *pConchPath point to the new name. Return SQLITE_OK on success
  6395. ** or SQLITE_NOMEM if unable to obtain memory.
  6396. **
  6397. ** The caller is responsible for ensuring that the allocated memory
  6398. ** space is eventually freed.
  6399. **
  6400. ** *pConchPath is set to NULL if a memory allocation error occurs.
  6401. */
  6402. static int proxyCreateConchPathname(char *dbPath, char **pConchPath){
  6403. int i; /* Loop counter */
  6404. int len = (int)strlen(dbPath); /* Length of database filename - dbPath */
  6405. char *conchPath; /* buffer in which to construct conch name */
  6406. /* Allocate space for the conch filename and initialize the name to
  6407. ** the name of the original database file. */
  6408. *pConchPath = conchPath = (char *)sqlite3_malloc(len + 8);
  6409. if( conchPath==0 ){
  6410. return SQLITE_NOMEM;
  6411. }
  6412. memcpy(conchPath, dbPath, len+1);
  6413. /* now insert a "." before the last / character */
  6414. for( i=(len-1); i>=0; i-- ){
  6415. if( conchPath[i]=='/' ){
  6416. i++;
  6417. break;
  6418. }
  6419. }
  6420. conchPath[i]='.';
  6421. while ( i<len ){
  6422. conchPath[i+1]=dbPath[i];
  6423. i++;
  6424. }
  6425. /* append the "-conch" suffix to the file */
  6426. memcpy(&conchPath[i+1], "-conch", 7);
  6427. assert( (int)strlen(conchPath) == len+7 );
  6428. return SQLITE_OK;
  6429. }
  6430. /* Takes a fully configured proxy locking-style unix file and switches
  6431. ** the local lock file path
  6432. */
  6433. static int switchLockProxyPath(unixFile *pFile, const char *path) {
  6434. proxyLockingContext *pCtx = (proxyLockingContext*)pFile->lockingContext;
  6435. char *oldPath = pCtx->lockProxyPath;
  6436. int rc = SQLITE_OK;
  6437. if( pFile->eFileLock!=NO_LOCK ){
  6438. return SQLITE_BUSY;
  6439. }
  6440. /* nothing to do if the path is NULL, :auto: or matches the existing path */
  6441. if( !path || path[0]=='\0' || !strcmp(path, ":auto:") ||
  6442. (oldPath && !strncmp(oldPath, path, MAXPATHLEN)) ){
  6443. return SQLITE_OK;
  6444. }else{
  6445. unixFile *lockProxy = pCtx->lockProxy;
  6446. pCtx->lockProxy=NULL;
  6447. pCtx->conchHeld = 0;
  6448. if( lockProxy!=NULL ){
  6449. rc=lockProxy->pMethod->xClose((sqlite3_file *)lockProxy);
  6450. if( rc ) return rc;
  6451. sqlite3_free(lockProxy);
  6452. }
  6453. sqlite3_free(oldPath);
  6454. pCtx->lockProxyPath = sqlite3DbStrDup(0, path);
  6455. }
  6456. return rc;
  6457. }
  6458. /*
  6459. ** pFile is a file that has been opened by a prior xOpen call. dbPath
  6460. ** is a string buffer at least MAXPATHLEN+1 characters in size.
  6461. **
  6462. ** This routine find the filename associated with pFile and writes it
  6463. ** int dbPath.
  6464. */
  6465. static int proxyGetDbPathForUnixFile(unixFile *pFile, char *dbPath){
  6466. #if defined(__APPLE__)
  6467. if( pFile->pMethod == &afpIoMethods ){
  6468. /* afp style keeps a reference to the db path in the filePath field
  6469. ** of the struct */
  6470. assert( (int)strlen((char*)pFile->lockingContext)<=MAXPATHLEN );
  6471. strlcpy(dbPath, ((afpLockingContext *)pFile->lockingContext)->dbPath, MAXPATHLEN);
  6472. } else
  6473. #endif
  6474. if( pFile->pMethod == &dotlockIoMethods ){
  6475. /* dot lock style uses the locking context to store the dot lock
  6476. ** file path */
  6477. int len = strlen((char *)pFile->lockingContext) - strlen(DOTLOCK_SUFFIX);
  6478. memcpy(dbPath, (char *)pFile->lockingContext, len + 1);
  6479. }else{
  6480. /* all other styles use the locking context to store the db file path */
  6481. assert( strlen((char*)pFile->lockingContext)<=MAXPATHLEN );
  6482. strlcpy(dbPath, (char *)pFile->lockingContext, MAXPATHLEN);
  6483. }
  6484. return SQLITE_OK;
  6485. }
  6486. /*
  6487. ** Takes an already filled in unix file and alters it so all file locking
  6488. ** will be performed on the local proxy lock file. The following fields
  6489. ** are preserved in the locking context so that they can be restored and
  6490. ** the unix structure properly cleaned up at close time:
  6491. ** ->lockingContext
  6492. ** ->pMethod
  6493. */
  6494. static int proxyTransformUnixFile(unixFile *pFile, const char *path) {
  6495. proxyLockingContext *pCtx;
  6496. char dbPath[MAXPATHLEN+1]; /* Name of the database file */
  6497. char *lockPath=NULL;
  6498. int rc = SQLITE_OK;
  6499. if( pFile->eFileLock!=NO_LOCK ){
  6500. return SQLITE_BUSY;
  6501. }
  6502. proxyGetDbPathForUnixFile(pFile, dbPath);
  6503. if( !path || path[0]=='\0' || !strcmp(path, ":auto:") ){
  6504. lockPath=NULL;
  6505. }else{
  6506. lockPath=(char *)path;
  6507. }
  6508. OSTRACE(("TRANSPROXY %d for %s pid=%d\n", pFile->h,
  6509. (lockPath ? lockPath : ":auto:"), getpid()));
  6510. pCtx = sqlite3_malloc( sizeof(*pCtx) );
  6511. if( pCtx==0 ){
  6512. return SQLITE_NOMEM;
  6513. }
  6514. memset(pCtx, 0, sizeof(*pCtx));
  6515. rc = proxyCreateConchPathname(dbPath, &pCtx->conchFilePath);
  6516. if( rc==SQLITE_OK ){
  6517. rc = proxyCreateUnixFile(pCtx->conchFilePath, &pCtx->conchFile, 0);
  6518. if( rc==SQLITE_CANTOPEN && ((pFile->openFlags&O_RDWR) == 0) ){
  6519. /* if (a) the open flags are not O_RDWR, (b) the conch isn't there, and
  6520. ** (c) the file system is read-only, then enable no-locking access.
  6521. ** Ugh, since O_RDONLY==0x0000 we test for !O_RDWR since unixOpen asserts
  6522. ** that openFlags will have only one of O_RDONLY or O_RDWR.
  6523. */
  6524. struct statfs fsInfo;
  6525. struct stat conchInfo;
  6526. int goLockless = 0;
  6527. if( osStat(pCtx->conchFilePath, &conchInfo) == -1 ) {
  6528. int err = errno;
  6529. if( (err==ENOENT) && (statfs(dbPath, &fsInfo) != -1) ){
  6530. goLockless = (fsInfo.f_flags&MNT_RDONLY) == MNT_RDONLY;
  6531. }
  6532. }
  6533. if( goLockless ){
  6534. pCtx->conchHeld = -1; /* read only FS/ lockless */
  6535. rc = SQLITE_OK;
  6536. }
  6537. }
  6538. }
  6539. if( rc==SQLITE_OK && lockPath ){
  6540. pCtx->lockProxyPath = sqlite3DbStrDup(0, lockPath);
  6541. }
  6542. if( rc==SQLITE_OK ){
  6543. pCtx->dbPath = sqlite3DbStrDup(0, dbPath);
  6544. if( pCtx->dbPath==NULL ){
  6545. rc = SQLITE_NOMEM;
  6546. }
  6547. }
  6548. if( rc==SQLITE_OK ){
  6549. /* all memory is allocated, proxys are created and assigned,
  6550. ** switch the locking context and pMethod then return.
  6551. */
  6552. pCtx->oldLockingContext = pFile->lockingContext;
  6553. pFile->lockingContext = pCtx;
  6554. pCtx->pOldMethod = pFile->pMethod;
  6555. pFile->pMethod = &proxyIoMethods;
  6556. }else{
  6557. if( pCtx->conchFile ){
  6558. pCtx->conchFile->pMethod->xClose((sqlite3_file *)pCtx->conchFile);
  6559. sqlite3_free(pCtx->conchFile);
  6560. }
  6561. sqlite3DbFree(0, pCtx->lockProxyPath);
  6562. sqlite3_free(pCtx->conchFilePath);
  6563. sqlite3_free(pCtx);
  6564. }
  6565. OSTRACE(("TRANSPROXY %d %s\n", pFile->h,
  6566. (rc==SQLITE_OK ? "ok" : "failed")));
  6567. return rc;
  6568. }
  6569. /*
  6570. ** This routine handles sqlite3_file_control() calls that are specific
  6571. ** to proxy locking.
  6572. */
  6573. static int proxyFileControl(sqlite3_file *id, int op, void *pArg){
  6574. switch( op ){
  6575. case SQLITE_GET_LOCKPROXYFILE: {
  6576. unixFile *pFile = (unixFile*)id;
  6577. if( pFile->pMethod == &proxyIoMethods ){
  6578. proxyLockingContext *pCtx = (proxyLockingContext*)pFile->lockingContext;
  6579. proxyTakeConch(pFile);
  6580. if( pCtx->lockProxyPath ){
  6581. *(const char **)pArg = pCtx->lockProxyPath;
  6582. }else{
  6583. *(const char **)pArg = ":auto: (not held)";
  6584. }
  6585. } else {
  6586. *(const char **)pArg = NULL;
  6587. }
  6588. return SQLITE_OK;
  6589. }
  6590. case SQLITE_SET_LOCKPROXYFILE: {
  6591. unixFile *pFile = (unixFile*)id;
  6592. int rc = SQLITE_OK;
  6593. int isProxyStyle = (pFile->pMethod == &proxyIoMethods);
  6594. if( pArg==NULL || (const char *)pArg==0 ){
  6595. if( isProxyStyle ){
  6596. /* turn off proxy locking - not supported */
  6597. rc = SQLITE_ERROR /*SQLITE_PROTOCOL? SQLITE_MISUSE?*/;
  6598. }else{
  6599. /* turn off proxy locking - already off - NOOP */
  6600. rc = SQLITE_OK;
  6601. }
  6602. }else{
  6603. const char *proxyPath = (const char *)pArg;
  6604. if( isProxyStyle ){
  6605. proxyLockingContext *pCtx =
  6606. (proxyLockingContext*)pFile->lockingContext;
  6607. if( !strcmp(pArg, ":auto:")
  6608. || (pCtx->lockProxyPath &&
  6609. !strncmp(pCtx->lockProxyPath, proxyPath, MAXPATHLEN))
  6610. ){
  6611. rc = SQLITE_OK;
  6612. }else{
  6613. rc = switchLockProxyPath(pFile, proxyPath);
  6614. }
  6615. }else{
  6616. /* turn on proxy file locking */
  6617. rc = proxyTransformUnixFile(pFile, proxyPath);
  6618. }
  6619. }
  6620. return rc;
  6621. }
  6622. default: {
  6623. assert( 0 ); /* The call assures that only valid opcodes are sent */
  6624. }
  6625. }
  6626. /*NOTREACHED*/
  6627. return SQLITE_ERROR;
  6628. }
  6629. /*
  6630. ** Within this division (the proxying locking implementation) the procedures
  6631. ** above this point are all utilities. The lock-related methods of the
  6632. ** proxy-locking sqlite3_io_method object follow.
  6633. */
  6634. /*
  6635. ** This routine checks if there is a RESERVED lock held on the specified
  6636. ** file by this or any other process. If such a lock is held, set *pResOut
  6637. ** to a non-zero value otherwise *pResOut is set to zero. The return value
  6638. ** is set to SQLITE_OK unless an I/O error occurs during lock checking.
  6639. */
  6640. static int proxyCheckReservedLock(sqlite3_file *id, int *pResOut) {
  6641. unixFile *pFile = (unixFile*)id;
  6642. int rc = proxyTakeConch(pFile);
  6643. if( rc==SQLITE_OK ){
  6644. proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext;
  6645. if( pCtx->conchHeld>0 ){
  6646. unixFile *proxy = pCtx->lockProxy;
  6647. return proxy->pMethod->xCheckReservedLock((sqlite3_file*)proxy, pResOut);
  6648. }else{ /* conchHeld < 0 is lockless */
  6649. pResOut=0;
  6650. }
  6651. }
  6652. return rc;
  6653. }
  6654. /*
  6655. ** Lock the file with the lock specified by parameter eFileLock - one
  6656. ** of the following:
  6657. **
  6658. ** (1) SHARED_LOCK
  6659. ** (2) RESERVED_LOCK
  6660. ** (3) PENDING_LOCK
  6661. ** (4) EXCLUSIVE_LOCK
  6662. **
  6663. ** Sometimes when requesting one lock state, additional lock states
  6664. ** are inserted in between. The locking might fail on one of the later
  6665. ** transitions leaving the lock state different from what it started but
  6666. ** still short of its goal. The following chart shows the allowed
  6667. ** transitions and the inserted intermediate states:
  6668. **
  6669. ** UNLOCKED -> SHARED
  6670. ** SHARED -> RESERVED
  6671. ** SHARED -> (PENDING) -> EXCLUSIVE
  6672. ** RESERVED -> (PENDING) -> EXCLUSIVE
  6673. ** PENDING -> EXCLUSIVE
  6674. **
  6675. ** This routine will only increase a lock. Use the sqlite3OsUnlock()
  6676. ** routine to lower a locking level.
  6677. */
  6678. static int proxyLock(sqlite3_file *id, int eFileLock) {
  6679. unixFile *pFile = (unixFile*)id;
  6680. int rc = proxyTakeConch(pFile);
  6681. if( rc==SQLITE_OK ){
  6682. proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext;
  6683. if( pCtx->conchHeld>0 ){
  6684. unixFile *proxy = pCtx->lockProxy;
  6685. rc = proxy->pMethod->xLock((sqlite3_file*)proxy, eFileLock);
  6686. pFile->eFileLock = proxy->eFileLock;
  6687. }else{
  6688. /* conchHeld < 0 is lockless */
  6689. }
  6690. }
  6691. return rc;
  6692. }
  6693. /*
  6694. ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock
  6695. ** must be either NO_LOCK or SHARED_LOCK.
  6696. **
  6697. ** If the locking level of the file descriptor is already at or below
  6698. ** the requested locking level, this routine is a no-op.
  6699. */
  6700. static int proxyUnlock(sqlite3_file *id, int eFileLock) {
  6701. unixFile *pFile = (unixFile*)id;
  6702. int rc = proxyTakeConch(pFile);
  6703. if( rc==SQLITE_OK ){
  6704. proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext;
  6705. if( pCtx->conchHeld>0 ){
  6706. unixFile *proxy = pCtx->lockProxy;
  6707. rc = proxy->pMethod->xUnlock((sqlite3_file*)proxy, eFileLock);
  6708. pFile->eFileLock = proxy->eFileLock;
  6709. }else{
  6710. /* conchHeld < 0 is lockless */
  6711. }
  6712. }
  6713. return rc;
  6714. }
  6715. /*
  6716. ** Close a file that uses proxy locks.
  6717. */
  6718. static int proxyClose(sqlite3_file *id) {
  6719. if( id ){
  6720. unixFile *pFile = (unixFile*)id;
  6721. proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext;
  6722. unixFile *lockProxy = pCtx->lockProxy;
  6723. unixFile *conchFile = pCtx->conchFile;
  6724. int rc = SQLITE_OK;
  6725. if( lockProxy ){
  6726. rc = lockProxy->pMethod->xUnlock((sqlite3_file*)lockProxy, NO_LOCK);
  6727. if( rc ) return rc;
  6728. rc = lockProxy->pMethod->xClose((sqlite3_file*)lockProxy);
  6729. if( rc ) return rc;
  6730. sqlite3_free(lockProxy);
  6731. pCtx->lockProxy = 0;
  6732. }
  6733. if( conchFile ){
  6734. if( pCtx->conchHeld ){
  6735. rc = proxyReleaseConch(pFile);
  6736. if( rc ) return rc;
  6737. }
  6738. rc = conchFile->pMethod->xClose((sqlite3_file*)conchFile);
  6739. if( rc ) return rc;
  6740. sqlite3_free(conchFile);
  6741. }
  6742. sqlite3DbFree(0, pCtx->lockProxyPath);
  6743. sqlite3_free(pCtx->conchFilePath);
  6744. sqlite3DbFree(0, pCtx->dbPath);
  6745. /* restore the original locking context and pMethod then close it */
  6746. pFile->lockingContext = pCtx->oldLockingContext;
  6747. pFile->pMethod = pCtx->pOldMethod;
  6748. sqlite3_free(pCtx);
  6749. return pFile->pMethod->xClose(id);
  6750. }
  6751. return SQLITE_OK;
  6752. }
  6753. #endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */
  6754. /*
  6755. ** The proxy locking style is intended for use with AFP filesystems.
  6756. ** And since AFP is only supported on MacOSX, the proxy locking is also
  6757. ** restricted to MacOSX.
  6758. **
  6759. **
  6760. ******************* End of the proxy lock implementation **********************
  6761. ******************************************************************************/
  6762. /*
  6763. ** Initialize the operating system interface.
  6764. **
  6765. ** This routine registers all VFS implementations for unix-like operating
  6766. ** systems. This routine, and the sqlite3_os_end() routine that follows,
  6767. ** should be the only routines in this file that are visible from other
  6768. ** files.
  6769. **
  6770. ** This routine is called once during SQLite initialization and by a
  6771. ** single thread. The memory allocation and mutex subsystems have not
  6772. ** necessarily been initialized when this routine is called, and so they
  6773. ** should not be used.
  6774. */
  6775. int sqlite3_os_init(void){
  6776. /*
  6777. ** The following macro defines an initializer for an sqlite3_vfs object.
  6778. ** The name of the VFS is NAME. The pAppData is a pointer to a pointer
  6779. ** to the "finder" function. (pAppData is a pointer to a pointer because
  6780. ** silly C90 rules prohibit a void* from being cast to a function pointer
  6781. ** and so we have to go through the intermediate pointer to avoid problems
  6782. ** when compiling with -pedantic-errors on GCC.)
  6783. **
  6784. ** The FINDER parameter to this macro is the name of the pointer to the
  6785. ** finder-function. The finder-function returns a pointer to the
  6786. ** sqlite_io_methods object that implements the desired locking
  6787. ** behaviors. See the division above that contains the IOMETHODS
  6788. ** macro for addition information on finder-functions.
  6789. **
  6790. ** Most finders simply return a pointer to a fixed sqlite3_io_methods
  6791. ** object. But the "autolockIoFinder" available on MacOSX does a little
  6792. ** more than that; it looks at the filesystem type that hosts the
  6793. ** database file and tries to choose an locking method appropriate for
  6794. ** that filesystem time.
  6795. */
  6796. #define UNIXVFS(VFSNAME, FINDER) { \
  6797. 3, /* iVersion */ \
  6798. sizeof(unixFile), /* szOsFile */ \
  6799. MAX_PATHNAME, /* mxPathname */ \
  6800. 0, /* pNext */ \
  6801. VFSNAME, /* zName */ \
  6802. (void*)&FINDER, /* pAppData */ \
  6803. unixOpen, /* xOpen */ \
  6804. unixDelete, /* xDelete */ \
  6805. unixAccess, /* xAccess */ \
  6806. unixFullPathname, /* xFullPathname */ \
  6807. unixDlOpen, /* xDlOpen */ \
  6808. unixDlError, /* xDlError */ \
  6809. unixDlSym, /* xDlSym */ \
  6810. unixDlClose, /* xDlClose */ \
  6811. unixRandomness, /* xRandomness */ \
  6812. unixSleep, /* xSleep */ \
  6813. unixCurrentTime, /* xCurrentTime */ \
  6814. unixGetLastError, /* xGetLastError */ \
  6815. unixCurrentTimeInt64, /* xCurrentTimeInt64 */ \
  6816. unixSetSystemCall, /* xSetSystemCall */ \
  6817. unixGetSystemCall, /* xGetSystemCall */ \
  6818. unixNextSystemCall, /* xNextSystemCall */ \
  6819. }
  6820. /*
  6821. ** All default VFSes for unix are contained in the following array.
  6822. **
  6823. ** Note that the sqlite3_vfs.pNext field of the VFS object is modified
  6824. ** by the SQLite core when the VFS is registered. So the following
  6825. ** array cannot be const.
  6826. */
  6827. static sqlite3_vfs aVfs[] = {
  6828. #if SQLITE_ENABLE_LOCKING_STYLE && (OS_VXWORKS || defined(__APPLE__))
  6829. UNIXVFS("unix", autolockIoFinder ),
  6830. #else
  6831. UNIXVFS("unix", posixIoFinder ),
  6832. #endif
  6833. UNIXVFS("unix-none", nolockIoFinder ),
  6834. UNIXVFS("unix-dotfile", dotlockIoFinder ),
  6835. UNIXVFS("unix-excl", posixIoFinder ),
  6836. #if OS_VXWORKS
  6837. UNIXVFS("unix-namedsem", semIoFinder ),
  6838. #endif
  6839. #if SQLITE_ENABLE_LOCKING_STYLE
  6840. UNIXVFS("unix-posix", posixIoFinder ),
  6841. #if !OS_VXWORKS
  6842. UNIXVFS("unix-flock", flockIoFinder ),
  6843. #endif
  6844. #endif
  6845. #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__)
  6846. UNIXVFS("unix-afp", afpIoFinder ),
  6847. UNIXVFS("unix-nfs", nfsIoFinder ),
  6848. UNIXVFS("unix-proxy", proxyIoFinder ),
  6849. #endif
  6850. };
  6851. unsigned int i; /* Loop counter */
  6852. /* Double-check that the aSyscall[] array has been constructed
  6853. ** correctly. See ticket [bb3a86e890c8e96ab] */
  6854. assert( ArraySize(aSyscall)==24 );
  6855. /* Register all VFSes defined in the aVfs[] array */
  6856. for(i=0; i<(sizeof(aVfs)/sizeof(sqlite3_vfs)); i++){
  6857. sqlite3_vfs_register(&aVfs[i], i==0);
  6858. }
  6859. return SQLITE_OK;
  6860. }
  6861. /*
  6862. ** Shutdown the operating system interface.
  6863. **
  6864. ** Some operating systems might need to do some cleanup in this routine,
  6865. ** to release dynamically allocated objects. But not on unix.
  6866. ** This routine is a no-op for unix.
  6867. */
  6868. int sqlite3_os_end(void){
  6869. return SQLITE_OK;
  6870. }
  6871. #endif /* SQLITE_OS_UNIX */