From d336b7ab85dd2b4f049f2f6fe176ffdd2621215e Mon Sep 17 00:00:00 2001 From: fanquake Date: Thu, 16 Jan 2025 11:09:56 +0000 Subject: [PATCH] Squashed 'src/leveldb/' changes from 688561cba8..04b5790928 04b5790928 Merge bitcoin-core/leveldb-subtree#46: Fix invalid pointer arithmetic in Hash (#1222) 59669817c5 Merge bitcoin-core/leveldb-subtree#40: cherry-pick: Remove leveldb::port::kLittleEndian. 73013d1a37 Merge bitcoin-core/leveldb-subtree#45: [jumbo] Add begin()/end() to Slice. a8844b23ab Fix invalid pointer arithmetic in Hash (#1222) be4dfc94b3 [jumbo] Add begin()/end() to Slice. 2e3c0131d3 Remove leveldb::port::kLittleEndian. git-subtree-dir: src/leveldb git-subtree-split: 04b57909285c7335c1908d53bcde9b90fe0439be --- CMakeLists.txt | 3 --- include/leveldb/slice.h | 3 +++ port/port_config.h.in | 6 ----- port/port_example.h | 4 ---- port/port_stdcxx.h | 2 -- util/coding.h | 50 ++++------------------------------------- util/hash.cc | 2 +- 7 files changed, 8 insertions(+), 62 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1cb46256c29..cfd4faa3252 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,9 +28,6 @@ option(LEVELDB_BUILD_TESTS "Build LevelDB's unit tests" ON) option(LEVELDB_BUILD_BENCHMARKS "Build LevelDB's benchmarks" ON) option(LEVELDB_INSTALL "Install LevelDB's header and library" ON) -include(TestBigEndian) -test_big_endian(LEVELDB_IS_BIG_ENDIAN) - include(CheckIncludeFile) check_include_file("unistd.h" HAVE_UNISTD_H) diff --git a/include/leveldb/slice.h b/include/leveldb/slice.h index 2df417dc313..44de9038c89 100644 --- a/include/leveldb/slice.h +++ b/include/leveldb/slice.h @@ -52,6 +52,9 @@ class LEVELDB_EXPORT Slice { // Return true iff the length of the referenced data is zero bool empty() const { return size_ == 0; } + const char* begin() const { return data(); } + const char* end() const { return data() + size(); } + // Return the ith byte in the referenced data. // REQUIRES: n < size() char operator[](size_t n) const { diff --git a/port/port_config.h.in b/port/port_config.h.in index 21273153a3f..272671d39f3 100644 --- a/port/port_config.h.in +++ b/port/port_config.h.in @@ -30,10 +30,4 @@ #cmakedefine01 HAVE_SNAPPY #endif // !defined(HAVE_SNAPPY) -// Define to 1 if your processor stores words with the most significant byte -// first (like Motorola and SPARC, unlike Intel and VAX). -#if !defined(LEVELDB_IS_BIG_ENDIAN) -#cmakedefine01 LEVELDB_IS_BIG_ENDIAN -#endif // !defined(LEVELDB_IS_BIG_ENDIAN) - #endif // STORAGE_LEVELDB_PORT_PORT_CONFIG_H_ \ No newline at end of file diff --git a/port/port_example.h b/port/port_example.h index 1a8fca24b36..a665910d95a 100644 --- a/port/port_example.h +++ b/port/port_example.h @@ -18,10 +18,6 @@ namespace port { // TODO(jorlow): Many of these belong more in the environment class rather than // here. We should try moving them and see if it affects perf. -// The following boolean constant must be true on a little-endian machine -// and false otherwise. -static const bool kLittleEndian = true /* or some other expression */; - // ------------------ Threading ------------------- // A Mutex represents an exclusive lock. diff --git a/port/port_stdcxx.h b/port/port_stdcxx.h index e9cb0e53afd..2bda48db42d 100644 --- a/port/port_stdcxx.h +++ b/port/port_stdcxx.h @@ -41,8 +41,6 @@ namespace leveldb { namespace port { -static const bool kLittleEndian = !LEVELDB_IS_BIG_ENDIAN; - class CondVar; // Thinly wraps std::mutex. diff --git a/util/coding.h b/util/coding.h index 1983ae71730..f0bb57b8e41 100644 --- a/util/coding.h +++ b/util/coding.h @@ -48,29 +48,13 @@ int VarintLength(uint64_t v); char* EncodeVarint32(char* dst, uint32_t value); char* EncodeVarint64(char* dst, uint64_t value); -// TODO(costan): Remove port::kLittleEndian and the fast paths based on -// std::memcpy when clang learns to optimize the generic code, as -// described in https://bugs.llvm.org/show_bug.cgi?id=41761 -// -// The platform-independent code in DecodeFixed{32,64}() gets optimized to mov -// on x86 and ldr on ARM64, by both clang and gcc. However, only gcc optimizes -// the platform-independent code in EncodeFixed{32,64}() to mov / str. - // Lower-level versions of Put... that write directly into a character buffer // REQUIRES: dst has enough space for the value being written inline void EncodeFixed32(char* dst, uint32_t value) { uint8_t* const buffer = reinterpret_cast(dst); - if (port::kLittleEndian) { - // Fast path for little-endian CPUs. All major compilers optimize this to a - // single mov (x86_64) / str (ARM) instruction. - std::memcpy(buffer, &value, sizeof(uint32_t)); - return; - } - - // Platform-independent code. - // Currently, only gcc optimizes this to a single mov / str instruction. + // Recent clang and gcc optimize this to a single mov / str instruction. buffer[0] = static_cast(value); buffer[1] = static_cast(value >> 8); buffer[2] = static_cast(value >> 16); @@ -80,15 +64,7 @@ inline void EncodeFixed32(char* dst, uint32_t value) { inline void EncodeFixed64(char* dst, uint64_t value) { uint8_t* const buffer = reinterpret_cast(dst); - if (port::kLittleEndian) { - // Fast path for little-endian CPUs. All major compilers optimize this to a - // single mov (x86_64) / str (ARM) instruction. - std::memcpy(buffer, &value, sizeof(uint64_t)); - return; - } - - // Platform-independent code. - // Currently, only gcc optimizes this to a single mov / str instruction. + // Recent clang and gcc optimize this to a single mov / str instruction. buffer[0] = static_cast(value); buffer[1] = static_cast(value >> 8); buffer[2] = static_cast(value >> 16); @@ -105,16 +81,7 @@ inline void EncodeFixed64(char* dst, uint64_t value) { inline uint32_t DecodeFixed32(const char* ptr) { const uint8_t* const buffer = reinterpret_cast(ptr); - if (port::kLittleEndian) { - // Fast path for little-endian CPUs. All major compilers optimize this to a - // single mov (x86_64) / ldr (ARM) instruction. - uint32_t result; - std::memcpy(&result, buffer, sizeof(uint32_t)); - return result; - } - - // Platform-independent code. - // Clang and gcc optimize this to a single mov / ldr instruction. + // Recent clang and gcc optimize this to a single mov / ldr instruction. return (static_cast(buffer[0])) | (static_cast(buffer[1]) << 8) | (static_cast(buffer[2]) << 16) | @@ -124,16 +91,7 @@ inline uint32_t DecodeFixed32(const char* ptr) { inline uint64_t DecodeFixed64(const char* ptr) { const uint8_t* const buffer = reinterpret_cast(ptr); - if (port::kLittleEndian) { - // Fast path for little-endian CPUs. All major compilers optimize this to a - // single mov (x86_64) / ldr (ARM) instruction. - uint64_t result; - std::memcpy(&result, buffer, sizeof(uint64_t)); - return result; - } - - // Platform-independent code. - // Clang and gcc optimize this to a single mov / ldr instruction. + // Recent clang and gcc optimize this to a single mov / ldr instruction. return (static_cast(buffer[0])) | (static_cast(buffer[1]) << 8) | (static_cast(buffer[2]) << 16) | diff --git a/util/hash.cc b/util/hash.cc index dd47c110ee9..5432b6180dd 100644 --- a/util/hash.cc +++ b/util/hash.cc @@ -27,7 +27,7 @@ uint32_t Hash(const char* data, size_t n, uint32_t seed) { uint32_t h = seed ^ (n * m); // Pick up four bytes at a time - while (data + 4 <= limit) { + while (limit - data >= 4) { uint32_t w = DecodeFixed32(data); data += 4; h += w;