env_posix.cc 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876
  1. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style license that can be
  3. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  4. #include <dirent.h>
  5. #include <fcntl.h>
  6. #include <pthread.h>
  7. #include <sys/mman.h>
  8. #include <sys/resource.h>
  9. #include <sys/stat.h>
  10. #include <sys/time.h>
  11. #include <sys/types.h>
  12. #include <unistd.h>
  13. #include <atomic>
  14. #include <cerrno>
  15. #include <cstddef>
  16. #include <cstdint>
  17. #include <cstdio>
  18. #include <cstdlib>
  19. #include <cstring>
  20. #include <limits>
  21. #include <queue>
  22. #include <set>
  23. #include <string>
  24. #include <thread>
  25. #include <type_traits>
  26. #include <utility>
  27. #include "leveldb/env.h"
  28. #include "leveldb/slice.h"
  29. #include "leveldb/status.h"
  30. #include "port/port.h"
  31. #include "port/thread_annotations.h"
  32. #include "util/env_posix_test_helper.h"
  33. #include "util/posix_logger.h"
  34. namespace leveldb {
  35. namespace {
  36. // Set by EnvPosixTestHelper::SetReadOnlyMMapLimit() and MaxOpenFiles().
  37. int g_open_read_only_file_limit = -1;
  38. // Up to 1000 mmap regions for 64-bit binaries; none for 32-bit.
  39. constexpr const int kDefaultMmapLimit = (sizeof(void*) >= 8) ? 1000 : 0;
  40. // Can be set using EnvPosixTestHelper::SetReadOnlyMMapLimit.
  41. int g_mmap_limit = kDefaultMmapLimit;
  42. constexpr const size_t kWritableFileBufferSize = 65536;
  43. Status PosixError(const std::string& context, int error_number) {
  44. if (error_number == ENOENT) {
  45. return Status::NotFound(context, std::strerror(error_number));
  46. } else {
  47. return Status::IOError(context, std::strerror(error_number));
  48. }
  49. }
  50. // Helper class to limit resource usage to avoid exhaustion.
  51. // Currently used to limit read-only file descriptors and mmap file usage
  52. // so that we do not run out of file descriptors or virtual memory, or run into
  53. // kernel performance problems for very large databases.
  54. class Limiter {
  55. public:
  56. // Limit maximum number of resources to |max_acquires|.
  57. Limiter(int max_acquires) : acquires_allowed_(max_acquires) {}
  58. Limiter(const Limiter&) = delete;
  59. Limiter operator=(const Limiter&) = delete;
  60. // If another resource is available, acquire it and return true.
  61. // Else return false.
  62. bool Acquire() {
  63. int old_acquires_allowed =
  64. acquires_allowed_.fetch_sub(1, std::memory_order_relaxed);
  65. if (old_acquires_allowed > 0) return true;
  66. acquires_allowed_.fetch_add(1, std::memory_order_relaxed);
  67. return false;
  68. }
  69. // Release a resource acquired by a previous call to Acquire() that returned
  70. // true.
  71. void Release() { acquires_allowed_.fetch_add(1, std::memory_order_relaxed); }
  72. private:
  73. // The number of available resources.
  74. //
  75. // This is a counter and is not tied to the invariants of any other class, so
  76. // it can be operated on safely using std::memory_order_relaxed.
  77. std::atomic<int> acquires_allowed_;
  78. };
  79. // Implements sequential read access in a file using read().
  80. //
  81. // Instances of this class are thread-friendly but not thread-safe, as required
  82. // by the SequentialFile API.
  83. class PosixSequentialFile final : public SequentialFile {
  84. public:
  85. PosixSequentialFile(std::string filename, int fd)
  86. : fd_(fd), filename_(filename) {}
  87. ~PosixSequentialFile() override { close(fd_); }
  88. Status Read(size_t n, Slice* result, char* scratch) override {
  89. Status status;
  90. while (true) {
  91. ::ssize_t read_size = ::read(fd_, scratch, n);
  92. if (read_size < 0) { // Read error.
  93. if (errno == EINTR) {
  94. continue; // Retry
  95. }
  96. status = PosixError(filename_, errno);
  97. break;
  98. }
  99. *result = Slice(scratch, read_size);
  100. break;
  101. }
  102. return status;
  103. }
  104. Status Skip(uint64_t n) override {
  105. if (::lseek(fd_, n, SEEK_CUR) == static_cast<off_t>(-1)) {
  106. return PosixError(filename_, errno);
  107. }
  108. return Status::OK();
  109. }
  110. private:
  111. const int fd_;
  112. const std::string filename_;
  113. };
  114. // Implements random read access in a file using pread().
  115. //
  116. // Instances of this class are thread-safe, as required by the RandomAccessFile
  117. // API. Instances are immutable and Read() only calls thread-safe library
  118. // functions.
  119. class PosixRandomAccessFile final : public RandomAccessFile {
  120. public:
  121. // The new instance takes ownership of |fd|. |fd_limiter| must outlive this
  122. // instance, and will be used to determine if .
  123. PosixRandomAccessFile(std::string filename, int fd, Limiter* fd_limiter)
  124. : has_permanent_fd_(fd_limiter->Acquire()),
  125. fd_(has_permanent_fd_ ? fd : -1),
  126. fd_limiter_(fd_limiter),
  127. filename_(std::move(filename)) {
  128. if (!has_permanent_fd_) {
  129. assert(fd_ == -1);
  130. ::close(fd); // The file will be opened on every read.
  131. }
  132. }
  133. ~PosixRandomAccessFile() override {
  134. if (has_permanent_fd_) {
  135. assert(fd_ != -1);
  136. ::close(fd_);
  137. fd_limiter_->Release();
  138. }
  139. }
  140. Status Read(uint64_t offset, size_t n, Slice* result,
  141. char* scratch) const override {
  142. int fd = fd_;
  143. if (!has_permanent_fd_) {
  144. fd = ::open(filename_.c_str(), O_RDONLY);
  145. if (fd < 0) {
  146. return PosixError(filename_, errno);
  147. }
  148. }
  149. assert(fd != -1);
  150. Status status;
  151. ssize_t read_size = ::pread(fd, scratch, n, static_cast<off_t>(offset));
  152. *result = Slice(scratch, (read_size < 0) ? 0 : read_size);
  153. if (read_size < 0) {
  154. // An error: return a non-ok status.
  155. status = PosixError(filename_, errno);
  156. }
  157. if (!has_permanent_fd_) {
  158. // Close the temporary file descriptor opened earlier.
  159. assert(fd != fd_);
  160. ::close(fd);
  161. }
  162. return status;
  163. }
  164. private:
  165. const bool has_permanent_fd_; // If false, the file is opened on every read.
  166. const int fd_; // -1 if has_permanent_fd_ is false.
  167. Limiter* const fd_limiter_;
  168. const std::string filename_;
  169. };
  170. // Implements random read access in a file using mmap().
  171. //
  172. // Instances of this class are thread-safe, as required by the RandomAccessFile
  173. // API. Instances are immutable and Read() only calls thread-safe library
  174. // functions.
  175. class PosixMmapReadableFile final : public RandomAccessFile {
  176. public:
  177. // mmap_base[0, length-1] points to the memory-mapped contents of the file. It
  178. // must be the result of a successful call to mmap(). This instances takes
  179. // over the ownership of the region.
  180. //
  181. // |mmap_limiter| must outlive this instance. The caller must have already
  182. // aquired the right to use one mmap region, which will be released when this
  183. // instance is destroyed.
  184. PosixMmapReadableFile(std::string filename, char* mmap_base, size_t length,
  185. Limiter* mmap_limiter)
  186. : mmap_base_(mmap_base),
  187. length_(length),
  188. mmap_limiter_(mmap_limiter),
  189. filename_(std::move(filename)) {}
  190. ~PosixMmapReadableFile() override {
  191. ::munmap(static_cast<void*>(mmap_base_), length_);
  192. mmap_limiter_->Release();
  193. }
  194. Status Read(uint64_t offset, size_t n, Slice* result,
  195. char* scratch) const override {
  196. if (offset + n > length_) {
  197. *result = Slice();
  198. return PosixError(filename_, EINVAL);
  199. }
  200. *result = Slice(mmap_base_ + offset, n);
  201. return Status::OK();
  202. }
  203. private:
  204. char* const mmap_base_;
  205. const size_t length_;
  206. Limiter* const mmap_limiter_;
  207. const std::string filename_;
  208. };
  209. class PosixWritableFile final : public WritableFile {
  210. public:
  211. PosixWritableFile(std::string filename, int fd)
  212. : pos_(0),
  213. fd_(fd),
  214. is_manifest_(IsManifest(filename)),
  215. filename_(std::move(filename)),
  216. dirname_(Dirname(filename_)) {}
  217. ~PosixWritableFile() override {
  218. if (fd_ >= 0) {
  219. // Ignoring any potential errors
  220. Close();
  221. }
  222. }
  223. Status Append(const Slice& data) override {
  224. size_t write_size = data.size();
  225. const char* write_data = data.data();
  226. // Fit as much as possible into buffer.
  227. size_t copy_size = std::min(write_size, kWritableFileBufferSize - pos_);
  228. std::memcpy(buf_ + pos_, write_data, copy_size);
  229. write_data += copy_size;
  230. write_size -= copy_size;
  231. pos_ += copy_size;
  232. if (write_size == 0) {
  233. return Status::OK();
  234. }
  235. // Can't fit in buffer, so need to do at least one write.
  236. Status status = FlushBuffer();
  237. if (!status.ok()) {
  238. return status;
  239. }
  240. // Small writes go to buffer, large writes are written directly.
  241. if (write_size < kWritableFileBufferSize) {
  242. std::memcpy(buf_, write_data, write_size);
  243. pos_ = write_size;
  244. return Status::OK();
  245. }
  246. return WriteUnbuffered(write_data, write_size);
  247. }
  248. Status Close() override {
  249. Status status = FlushBuffer();
  250. const int close_result = ::close(fd_);
  251. if (close_result < 0 && status.ok()) {
  252. status = PosixError(filename_, errno);
  253. }
  254. fd_ = -1;
  255. return status;
  256. }
  257. Status Flush() override { return FlushBuffer(); }
  258. Status Sync() override {
  259. // Ensure new files referred to by the manifest are in the filesystem.
  260. //
  261. // This needs to happen before the manifest file is flushed to disk, to
  262. // avoid crashing in a state where the manifest refers to files that are not
  263. // yet on disk.
  264. Status status = SyncDirIfManifest();
  265. if (!status.ok()) {
  266. return status;
  267. }
  268. status = FlushBuffer();
  269. if (!status.ok()) {
  270. return status;
  271. }
  272. return SyncFd(fd_, filename_);
  273. }
  274. private:
  275. Status FlushBuffer() {
  276. Status status = WriteUnbuffered(buf_, pos_);
  277. pos_ = 0;
  278. return status;
  279. }
  280. Status WriteUnbuffered(const char* data, size_t size) {
  281. while (size > 0) {
  282. ssize_t write_result = ::write(fd_, data, size);
  283. if (write_result < 0) {
  284. if (errno == EINTR) {
  285. continue; // Retry
  286. }
  287. return PosixError(filename_, errno);
  288. }
  289. data += write_result;
  290. size -= write_result;
  291. }
  292. return Status::OK();
  293. }
  294. Status SyncDirIfManifest() {
  295. Status status;
  296. if (!is_manifest_) {
  297. return status;
  298. }
  299. int fd = ::open(dirname_.c_str(), O_RDONLY);
  300. if (fd < 0) {
  301. status = PosixError(dirname_, errno);
  302. } else {
  303. status = SyncFd(fd, dirname_);
  304. ::close(fd);
  305. }
  306. return status;
  307. }
  308. // Ensures that all the caches associated with the given file descriptor's
  309. // data are flushed all the way to durable media, and can withstand power
  310. // failures.
  311. //
  312. // The path argument is only used to populate the description string in the
  313. // returned Status if an error occurs.
  314. static Status SyncFd(int fd, const std::string& fd_path) {
  315. #if HAVE_FULLFSYNC
  316. // On macOS and iOS, fsync() doesn't guarantee durability past power
  317. // failures. fcntl(F_FULLFSYNC) is required for that purpose. Some
  318. // filesystems don't support fcntl(F_FULLFSYNC), and require a fallback to
  319. // fsync().
  320. if (::fcntl(fd, F_FULLFSYNC) == 0) {
  321. return Status::OK();
  322. }
  323. #endif // HAVE_FULLFSYNC
  324. #if HAVE_FDATASYNC
  325. bool sync_success = ::fdatasync(fd) == 0;
  326. #else
  327. bool sync_success = ::fsync(fd) == 0;
  328. #endif // HAVE_FDATASYNC
  329. if (sync_success) {
  330. return Status::OK();
  331. }
  332. return PosixError(fd_path, errno);
  333. }
  334. // Returns the directory name in a path pointing to a file.
  335. //
  336. // Returns "." if the path does not contain any directory separator.
  337. static std::string Dirname(const std::string& filename) {
  338. std::string::size_type separator_pos = filename.rfind('/');
  339. if (separator_pos == std::string::npos) {
  340. return std::string(".");
  341. }
  342. // The filename component should not contain a path separator. If it does,
  343. // the splitting was done incorrectly.
  344. assert(filename.find('/', separator_pos + 1) == std::string::npos);
  345. return filename.substr(0, separator_pos);
  346. }
  347. // Extracts the file name from a path pointing to a file.
  348. //
  349. // The returned Slice points to |filename|'s data buffer, so it is only valid
  350. // while |filename| is alive and unchanged.
  351. static Slice Basename(const std::string& filename) {
  352. std::string::size_type separator_pos = filename.rfind('/');
  353. if (separator_pos == std::string::npos) {
  354. return Slice(filename);
  355. }
  356. // The filename component should not contain a path separator. If it does,
  357. // the splitting was done incorrectly.
  358. assert(filename.find('/', separator_pos + 1) == std::string::npos);
  359. return Slice(filename.data() + separator_pos + 1,
  360. filename.length() - separator_pos - 1);
  361. }
  362. // True if the given file is a manifest file.
  363. static bool IsManifest(const std::string& filename) {
  364. return Basename(filename).starts_with("MANIFEST");
  365. }
  366. // buf_[0, pos_ - 1] contains data to be written to fd_.
  367. char buf_[kWritableFileBufferSize];
  368. size_t pos_;
  369. int fd_;
  370. const bool is_manifest_; // True if the file's name starts with MANIFEST.
  371. const std::string filename_;
  372. const std::string dirname_; // The directory of filename_.
  373. };
  374. int LockOrUnlock(int fd, bool lock) {
  375. errno = 0;
  376. struct ::flock file_lock_info;
  377. std::memset(&file_lock_info, 0, sizeof(file_lock_info));
  378. file_lock_info.l_type = (lock ? F_WRLCK : F_UNLCK);
  379. file_lock_info.l_whence = SEEK_SET;
  380. file_lock_info.l_start = 0;
  381. file_lock_info.l_len = 0; // Lock/unlock entire file.
  382. return ::fcntl(fd, F_SETLK, &file_lock_info);
  383. }
  384. // Instances are thread-safe because they are immutable.
  385. class PosixFileLock : public FileLock {
  386. public:
  387. PosixFileLock(int fd, std::string filename)
  388. : fd_(fd), filename_(std::move(filename)) {}
  389. int fd() const { return fd_; }
  390. const std::string& filename() const { return filename_; }
  391. private:
  392. const int fd_;
  393. const std::string filename_;
  394. };
  395. // Tracks the files locked by PosixEnv::LockFile().
  396. //
  397. // We maintain a separate set instead of relying on fcntrl(F_SETLK) because
  398. // fcntl(F_SETLK) does not provide any protection against multiple uses from the
  399. // same process.
  400. //
  401. // Instances are thread-safe because all member data is guarded by a mutex.
  402. class PosixLockTable {
  403. public:
  404. bool Insert(const std::string& fname) LOCKS_EXCLUDED(mu_) {
  405. mu_.Lock();
  406. bool succeeded = locked_files_.insert(fname).second;
  407. mu_.Unlock();
  408. return succeeded;
  409. }
  410. void Remove(const std::string& fname) LOCKS_EXCLUDED(mu_) {
  411. mu_.Lock();
  412. locked_files_.erase(fname);
  413. mu_.Unlock();
  414. }
  415. private:
  416. port::Mutex mu_;
  417. std::set<std::string> locked_files_ GUARDED_BY(mu_);
  418. };
  419. class PosixEnv : public Env {
  420. public:
  421. PosixEnv();
  422. ~PosixEnv() override {
  423. static char msg[] = "PosixEnv singleton destroyed. Unsupported behavior!\n";
  424. std::fwrite(msg, 1, sizeof(msg), stderr);
  425. std::abort();
  426. }
  427. Status NewSequentialFile(const std::string& filename,
  428. SequentialFile** result) override {
  429. int fd = ::open(filename.c_str(), O_RDONLY);
  430. if (fd < 0) {
  431. *result = nullptr;
  432. return PosixError(filename, errno);
  433. }
  434. *result = new PosixSequentialFile(filename, fd);
  435. return Status::OK();
  436. }
  437. Status NewRandomAccessFile(const std::string& filename,
  438. RandomAccessFile** result) override {
  439. *result = nullptr;
  440. int fd = ::open(filename.c_str(), O_RDONLY);
  441. if (fd < 0) {
  442. return PosixError(filename, errno);
  443. }
  444. if (!mmap_limiter_.Acquire()) {
  445. *result = new PosixRandomAccessFile(filename, fd, &fd_limiter_);
  446. return Status::OK();
  447. }
  448. uint64_t file_size;
  449. Status status = GetFileSize(filename, &file_size);
  450. if (status.ok()) {
  451. void* mmap_base =
  452. ::mmap(/*addr=*/nullptr, file_size, PROT_READ, MAP_SHARED, fd, 0);
  453. if (mmap_base != MAP_FAILED) {
  454. *result = new PosixMmapReadableFile(filename,
  455. reinterpret_cast<char*>(mmap_base),
  456. file_size, &mmap_limiter_);
  457. } else {
  458. status = PosixError(filename, errno);
  459. }
  460. }
  461. ::close(fd);
  462. if (!status.ok()) {
  463. mmap_limiter_.Release();
  464. }
  465. return status;
  466. }
  467. Status NewWritableFile(const std::string& filename,
  468. WritableFile** result) override {
  469. int fd = ::open(filename.c_str(), O_TRUNC | O_WRONLY | O_CREAT, 0644);
  470. if (fd < 0) {
  471. *result = nullptr;
  472. return PosixError(filename, errno);
  473. }
  474. *result = new PosixWritableFile(filename, fd);
  475. return Status::OK();
  476. }
  477. Status NewAppendableFile(const std::string& filename,
  478. WritableFile** result) override {
  479. int fd = ::open(filename.c_str(), O_APPEND | O_WRONLY | O_CREAT, 0644);
  480. if (fd < 0) {
  481. *result = nullptr;
  482. return PosixError(filename, errno);
  483. }
  484. *result = new PosixWritableFile(filename, fd);
  485. return Status::OK();
  486. }
  487. bool FileExists(const std::string& filename) override {
  488. return ::access(filename.c_str(), F_OK) == 0;
  489. }
  490. Status GetChildren(const std::string& directory_path,
  491. std::vector<std::string>* result) override {
  492. result->clear();
  493. ::DIR* dir = ::opendir(directory_path.c_str());
  494. if (dir == nullptr) {
  495. return PosixError(directory_path, errno);
  496. }
  497. struct ::dirent* entry;
  498. while ((entry = ::readdir(dir)) != nullptr) {
  499. result->emplace_back(entry->d_name);
  500. }
  501. ::closedir(dir);
  502. return Status::OK();
  503. }
  504. Status DeleteFile(const std::string& filename) override {
  505. if (::unlink(filename.c_str()) != 0) {
  506. return PosixError(filename, errno);
  507. }
  508. return Status::OK();
  509. }
  510. Status CreateDir(const std::string& dirname) override {
  511. if (::mkdir(dirname.c_str(), 0755) != 0) {
  512. return PosixError(dirname, errno);
  513. }
  514. return Status::OK();
  515. }
  516. Status DeleteDir(const std::string& dirname) override {
  517. if (::rmdir(dirname.c_str()) != 0) {
  518. return PosixError(dirname, errno);
  519. }
  520. return Status::OK();
  521. }
  522. Status GetFileSize(const std::string& filename, uint64_t* size) override {
  523. struct ::stat file_stat;
  524. if (::stat(filename.c_str(), &file_stat) != 0) {
  525. *size = 0;
  526. return PosixError(filename, errno);
  527. }
  528. *size = file_stat.st_size;
  529. return Status::OK();
  530. }
  531. Status RenameFile(const std::string& from, const std::string& to) override {
  532. if (std::rename(from.c_str(), to.c_str()) != 0) {
  533. return PosixError(from, errno);
  534. }
  535. return Status::OK();
  536. }
  537. Status LockFile(const std::string& filename, FileLock** lock) override {
  538. *lock = nullptr;
  539. int fd = ::open(filename.c_str(), O_RDWR | O_CREAT, 0644);
  540. if (fd < 0) {
  541. return PosixError(filename, errno);
  542. }
  543. if (!locks_.Insert(filename)) {
  544. ::close(fd);
  545. return Status::IOError("lock " + filename, "already held by process");
  546. }
  547. if (LockOrUnlock(fd, true) == -1) {
  548. int lock_errno = errno;
  549. ::close(fd);
  550. locks_.Remove(filename);
  551. return PosixError("lock " + filename, lock_errno);
  552. }
  553. *lock = new PosixFileLock(fd, filename);
  554. return Status::OK();
  555. }
  556. Status UnlockFile(FileLock* lock) override {
  557. PosixFileLock* posix_file_lock = static_cast<PosixFileLock*>(lock);
  558. if (LockOrUnlock(posix_file_lock->fd(), false) == -1) {
  559. return PosixError("unlock " + posix_file_lock->filename(), errno);
  560. }
  561. locks_.Remove(posix_file_lock->filename());
  562. ::close(posix_file_lock->fd());
  563. delete posix_file_lock;
  564. return Status::OK();
  565. }
  566. void Schedule(void (*background_work_function)(void* background_work_arg),
  567. void* background_work_arg) override;
  568. void StartThread(void (*thread_main)(void* thread_main_arg),
  569. void* thread_main_arg) override;
  570. Status GetTestDirectory(std::string* result) override {
  571. const char* env = std::getenv("TEST_TMPDIR");
  572. if (env && env[0] != '\0') {
  573. *result = env;
  574. } else {
  575. char buf[100];
  576. std::snprintf(buf, sizeof(buf), "/tmp/leveldbtest-%d",
  577. static_cast<int>(::geteuid()));
  578. *result = buf;
  579. }
  580. // The CreateDir status is ignored because the directory may already exist.
  581. CreateDir(*result);
  582. return Status::OK();
  583. }
  584. Status NewLogger(const std::string& filename, Logger** result) override {
  585. std::FILE* fp = std::fopen(filename.c_str(), "w");
  586. if (fp == nullptr) {
  587. *result = nullptr;
  588. return PosixError(filename, errno);
  589. } else {
  590. *result = new PosixLogger(fp);
  591. return Status::OK();
  592. }
  593. }
  594. uint64_t NowMicros() override {
  595. static constexpr uint64_t kUsecondsPerSecond = 1000000;
  596. struct ::timeval tv;
  597. ::gettimeofday(&tv, nullptr);
  598. return static_cast<uint64_t>(tv.tv_sec) * kUsecondsPerSecond + tv.tv_usec;
  599. }
  600. void SleepForMicroseconds(int micros) override { ::usleep(micros); }
  601. private:
  602. void BackgroundThreadMain();
  603. static void BackgroundThreadEntryPoint(PosixEnv* env) {
  604. env->BackgroundThreadMain();
  605. }
  606. // Stores the work item data in a Schedule() call.
  607. //
  608. // Instances are constructed on the thread calling Schedule() and used on the
  609. // background thread.
  610. //
  611. // This structure is thread-safe beacuse it is immutable.
  612. struct BackgroundWorkItem {
  613. explicit BackgroundWorkItem(void (*function)(void* arg), void* arg)
  614. : function(function), arg(arg) {}
  615. void (*const function)(void*);
  616. void* const arg;
  617. };
  618. port::Mutex background_work_mutex_;
  619. port::CondVar background_work_cv_ GUARDED_BY(background_work_mutex_);
  620. bool started_background_thread_ GUARDED_BY(background_work_mutex_);
  621. std::queue<BackgroundWorkItem> background_work_queue_
  622. GUARDED_BY(background_work_mutex_);
  623. PosixLockTable locks_; // Thread-safe.
  624. Limiter mmap_limiter_; // Thread-safe.
  625. Limiter fd_limiter_; // Thread-safe.
  626. };
  627. // Return the maximum number of concurrent mmaps.
  628. int MaxMmaps() { return g_mmap_limit; }
  629. // Return the maximum number of read-only files to keep open.
  630. int MaxOpenFiles() {
  631. if (g_open_read_only_file_limit >= 0) {
  632. return g_open_read_only_file_limit;
  633. }
  634. struct ::rlimit rlim;
  635. if (::getrlimit(RLIMIT_NOFILE, &rlim)) {
  636. // getrlimit failed, fallback to hard-coded default.
  637. g_open_read_only_file_limit = 50;
  638. } else if (rlim.rlim_cur == RLIM_INFINITY) {
  639. g_open_read_only_file_limit = std::numeric_limits<int>::max();
  640. } else {
  641. // Allow use of 20% of available file descriptors for read-only files.
  642. g_open_read_only_file_limit = rlim.rlim_cur / 5;
  643. }
  644. return g_open_read_only_file_limit;
  645. }
  646. } // namespace
  647. PosixEnv::PosixEnv()
  648. : background_work_cv_(&background_work_mutex_),
  649. started_background_thread_(false),
  650. mmap_limiter_(MaxMmaps()),
  651. fd_limiter_(MaxOpenFiles()) {}
  652. void PosixEnv::Schedule(
  653. void (*background_work_function)(void* background_work_arg),
  654. void* background_work_arg) {
  655. background_work_mutex_.Lock();
  656. // Start the background thread, if we haven't done so already.
  657. if (!started_background_thread_) {
  658. started_background_thread_ = true;
  659. std::thread background_thread(PosixEnv::BackgroundThreadEntryPoint, this);
  660. background_thread.detach();
  661. }
  662. // If the queue is empty, the background thread may be waiting for work.
  663. if (background_work_queue_.empty()) {
  664. background_work_cv_.Signal();
  665. }
  666. background_work_queue_.emplace(background_work_function, background_work_arg);
  667. background_work_mutex_.Unlock();
  668. }
  669. void PosixEnv::BackgroundThreadMain() {
  670. while (true) {
  671. background_work_mutex_.Lock();
  672. // Wait until there is work to be done.
  673. while (background_work_queue_.empty()) {
  674. background_work_cv_.Wait();
  675. }
  676. assert(!background_work_queue_.empty());
  677. auto background_work_function = background_work_queue_.front().function;
  678. void* background_work_arg = background_work_queue_.front().arg;
  679. background_work_queue_.pop();
  680. background_work_mutex_.Unlock();
  681. background_work_function(background_work_arg);
  682. }
  683. }
  684. namespace {
  685. // Wraps an Env instance whose destructor is never created.
  686. //
  687. // Intended usage:
  688. // using PlatformSingletonEnv = SingletonEnv<PlatformEnv>;
  689. // void ConfigurePosixEnv(int param) {
  690. // PlatformSingletonEnv::AssertEnvNotInitialized();
  691. // // set global configuration flags.
  692. // }
  693. // Env* Env::Default() {
  694. // static PlatformSingletonEnv default_env;
  695. // return default_env.env();
  696. // }
  697. template <typename EnvType>
  698. class SingletonEnv {
  699. public:
  700. SingletonEnv() {
  701. #if !defined(NDEBUG)
  702. env_initialized_.store(true, std::memory_order::memory_order_relaxed);
  703. #endif // !defined(NDEBUG)
  704. static_assert(sizeof(env_storage_) >= sizeof(EnvType),
  705. "env_storage_ will not fit the Env");
  706. static_assert(alignof(decltype(env_storage_)) >= alignof(EnvType),
  707. "env_storage_ does not meet the Env's alignment needs");
  708. new (&env_storage_) EnvType();
  709. }
  710. ~SingletonEnv() = default;
  711. SingletonEnv(const SingletonEnv&) = delete;
  712. SingletonEnv& operator=(const SingletonEnv&) = delete;
  713. Env* env() { return reinterpret_cast<Env*>(&env_storage_); }
  714. static void AssertEnvNotInitialized() {
  715. #if !defined(NDEBUG)
  716. assert(!env_initialized_.load(std::memory_order::memory_order_relaxed));
  717. #endif // !defined(NDEBUG)
  718. }
  719. private:
  720. typename std::aligned_storage<sizeof(EnvType), alignof(EnvType)>::type
  721. env_storage_;
  722. #if !defined(NDEBUG)
  723. static std::atomic<bool> env_initialized_;
  724. #endif // !defined(NDEBUG)
  725. };
  726. #if !defined(NDEBUG)
  727. template <typename EnvType>
  728. std::atomic<bool> SingletonEnv<EnvType>::env_initialized_;
  729. #endif // !defined(NDEBUG)
  730. using PosixDefaultEnv = SingletonEnv<PosixEnv>;
  731. } // namespace
  732. void PosixEnv::StartThread(void (*thread_main)(void* thread_main_arg),
  733. void* thread_main_arg) {
  734. std::thread new_thread(thread_main, thread_main_arg);
  735. new_thread.detach();
  736. }
  737. void EnvPosixTestHelper::SetReadOnlyFDLimit(int limit) {
  738. PosixDefaultEnv::AssertEnvNotInitialized();
  739. g_open_read_only_file_limit = limit;
  740. }
  741. void EnvPosixTestHelper::SetReadOnlyMMapLimit(int limit) {
  742. PosixDefaultEnv::AssertEnvNotInitialized();
  743. g_mmap_limit = limit;
  744. }
  745. Env* Env::Default() {
  746. static PosixDefaultEnv env_container;
  747. return env_container.env();
  748. }
  749. } // namespace leveldb