From 6e97b04d197be7aa86229c47e397d8a9f82fbde4 Mon Sep 17 00:00:00 2001 From: Fabian Jahr Date: Fri, 30 Aug 2024 21:43:43 +0200 Subject: [PATCH] refactor: Use span of bytes for asmap This prevents holding the asmap data in memory twice. Also modernizes the logging in util/asmap.cpp (LogPrintf -> LogInfo). --- src/bench/addrman.cpp | 2 +- src/init.cpp | 17 ++-- src/netgroup.cpp | 21 ++--- src/netgroup.h | 33 ++++++-- src/test/addrman_tests.cpp | 25 +----- src/test/fuzz/asmap.cpp | 29 +++---- src/test/fuzz/asmap_direct.cpp | 16 ++-- src/test/fuzz/p2p_handshake.cpp | 2 +- src/test/fuzz/util.h | 5 -- src/test/fuzz/util/net.h | 8 +- src/test/netbase_tests.cpp | 2 +- src/test/util/setup_common.cpp | 2 +- src/util/asmap.cpp | 138 +++++++++++++++++-------------- src/util/asmap.h | 14 +++- test/functional/feature_asmap.py | 2 +- 15 files changed, 162 insertions(+), 154 deletions(-) diff --git a/src/bench/addrman.cpp b/src/bench/addrman.cpp index ceef6c29ab3..1e93b9c3951 100644 --- a/src/bench/addrman.cpp +++ b/src/bench/addrman.cpp @@ -24,7 +24,7 @@ static constexpr size_t NUM_SOURCES = 64; static constexpr size_t NUM_ADDRESSES_PER_SOURCE = 256; -static NetGroupManager EMPTY_NETGROUPMAN{std::vector()}; +static NetGroupManager EMPTY_NETGROUPMAN{NetGroupManager::NoAsmap()}; static constexpr uint32_t ADDRMAN_CONSISTENCY_CHECK_RATIO{0}; static std::vector g_sources; diff --git a/src/init.cpp b/src/init.cpp index 3cfd301fbab..683edb9c453 100644 --- a/src/init.cpp +++ b/src/init.cpp @@ -1431,9 +1431,10 @@ bool AppInitMain(NodeContext& node, interfaces::BlockAndHeaderTipInfo* tip_info) ApplyArgsManOptions(args, peerman_opts); { - - // Read asmap file if configured - std::vector asmap; + // Read asmap file or embedded data if configured and initialize + // Netgroupman with or without it + assert(!node.netgroupman); + uint256 asmap_version; if (args.IsArgSet("-asmap") && !args.IsArgNegated("-asmap")) { fs::path asmap_path = args.GetPathArg("-asmap", DEFAULT_ASMAP_FILENAME); if (!asmap_path.is_absolute()) { @@ -1443,21 +1444,19 @@ bool AppInitMain(NodeContext& node, interfaces::BlockAndHeaderTipInfo* tip_info) InitError(strprintf(_("Could not find asmap file %s"), fs::quoted(fs::PathToString(asmap_path)))); return false; } - asmap = DecodeAsmap(asmap_path); + std::vector asmap{DecodeAsmap(asmap_path)}; if (asmap.size() == 0) { InitError(strprintf(_("Could not parse asmap file %s"), fs::quoted(fs::PathToString(asmap_path)))); return false; } - const uint256 asmap_version = (HashWriter{} << asmap).GetHash(); + node.netgroupman = std::make_unique(NetGroupManager::WithLoadedAsmap(asmap)); + asmap_version = AsmapChecksum(asmap); LogPrintf("Using asmap version %s for IP bucketing\n", asmap_version.ToString()); } else { + node.netgroupman = std::make_unique(NetGroupManager::NoAsmap()); LogPrintf("Using /16 prefix for IP bucketing\n"); } - // Initialize netgroup manager - assert(!node.netgroupman); - node.netgroupman = std::make_unique(std::move(asmap)); - // Initialize addrman assert(!node.addrman); uiInterface.InitMessage(_("Loading P2P addresses…")); diff --git a/src/netgroup.cpp b/src/netgroup.cpp index 0ae229b3f35..5016fe31bbc 100644 --- a/src/netgroup.cpp +++ b/src/netgroup.cpp @@ -6,13 +6,13 @@ #include #include +#include #include uint256 NetGroupManager::GetAsmapChecksum() const { if (!m_asmap.size()) return {}; - - return (HashWriter{} << m_asmap).GetHash(); + return AsmapChecksum(m_asmap); } std::vector NetGroupManager::GetGroup(const CNetAddr& address) const @@ -81,30 +81,25 @@ std::vector NetGroupManager::GetGroup(const CNetAddr& address) co uint32_t NetGroupManager::GetMappedAS(const CNetAddr& address) const { uint32_t net_class = address.GetNetClass(); - if (m_asmap.size() == 0 || (net_class != NET_IPV4 && net_class != NET_IPV6)) { + if (m_asmap.empty() || (net_class != NET_IPV4 && net_class != NET_IPV6)) { return 0; // Indicates not found, safe because AS0 is reserved per RFC7607. } - std::vector ip_bits(128); + std::vector ip_bits(16); if (address.HasLinkedIPv4()) { // For lookup, treat as if it was just an IPv4 address (IPV4_IN_IPV6_PREFIX + IPv4 bits) for (int8_t byte_i = 0; byte_i < 12; ++byte_i) { - for (uint8_t bit_i = 0; bit_i < 8; ++bit_i) { - ip_bits[byte_i * 8 + bit_i] = (IPV4_IN_IPV6_PREFIX[byte_i] >> (7 - bit_i)) & 1; - } + ip_bits[byte_i] = static_cast(IPV4_IN_IPV6_PREFIX[byte_i]); } uint32_t ipv4 = address.GetLinkedIPv4(); - for (int i = 0; i < 32; ++i) { - ip_bits[96 + i] = (ipv4 >> (31 - i)) & 1; + for (int i = 0; i < 4; ++i) { + ip_bits[12 + i] = static_cast((ipv4 >> (24 - i * 8)) & 0xFF); } } else { // Use all 128 bits of the IPv6 address otherwise assert(address.IsIPv6()); auto addr_bytes = address.GetAddrBytes(); for (int8_t byte_i = 0; byte_i < 16; ++byte_i) { - uint8_t cur_byte = addr_bytes[byte_i]; - for (uint8_t bit_i = 0; bit_i < 8; ++bit_i) { - ip_bits[byte_i * 8 + bit_i] = (cur_byte >> (7 - bit_i)) & 1; - } + ip_bits[byte_i] = static_cast(addr_bytes[byte_i]); } } uint32_t mapped_as = Interpret(m_asmap, ip_bits); diff --git a/src/netgroup.h b/src/netgroup.h index 5aa6ef77425..6671b53ea1d 100644 --- a/src/netgroup.h +++ b/src/netgroup.h @@ -15,9 +15,18 @@ */ class NetGroupManager { public: - explicit NetGroupManager(std::vector asmap) - : m_asmap{std::move(asmap)} - {} + static NetGroupManager WithEmbeddedAsmap(std::span asmap) { + return NetGroupManager(asmap, {}); + } + + static NetGroupManager WithLoadedAsmap(std::vector loaded_asmap) { + std::span asmap_span(loaded_asmap); + return NetGroupManager(asmap_span, std::move(loaded_asmap)); + } + + static NetGroupManager NoAsmap() { + return NetGroupManager({}, {}); + } /** Get a checksum identifying the asmap being used. */ uint256 GetAsmapChecksum() const; @@ -52,7 +61,10 @@ public: bool UsingASMap() const; private: - /** Compressed IP->ASN mapping, loaded from a file when a node starts. + /** Compressed IP->ASN mapping. + * + * Data may beloaded from a file when a node starts or embedded in the + * binary. * * This mapping is then used for bucketing nodes in Addrman and for * ensuring we connect to a diverse set of peers in Connman. The map is @@ -69,8 +81,17 @@ private: * re-bucketed. * * This is initialized in the constructor, const, and therefore is - * thread-safe. */ - const std::vector m_asmap; + * thread-safe. m_asmap can either point to m_loaded_asmap which holds + * data loaded from an external file at runtime or it can point to embedded + * asmap data. + */ + const std::span m_asmap; + std::vector m_loaded_asmap; + + explicit NetGroupManager(std::span asmap, std::vector loaded_asmap) + : m_asmap(asmap.empty() ? std::span() : asmap), + m_loaded_asmap(std::move(loaded_asmap)) + {} }; #endif // BITCOIN_NETGROUP_H diff --git a/src/test/addrman_tests.cpp b/src/test/addrman_tests.cpp index ddb1d5b43f3..7869d8ce85c 100644 --- a/src/test/addrman_tests.cpp +++ b/src/test/addrman_tests.cpp @@ -24,7 +24,7 @@ using namespace std::literals; using node::NodeContext; using util::ToString; -static NetGroupManager EMPTY_NETGROUPMAN{std::vector()}; +static NetGroupManager EMPTY_NETGROUPMAN{NetGroupManager::NoAsmap()}; static const bool DETERMINISTIC{true}; static int32_t GetCheckRatio(const NodeContext& node_ctx) @@ -46,20 +46,6 @@ static CService ResolveService(const std::string& ip, uint16_t port = 0) return serv.value_or(CService{}); } - -static std::vector FromBytes(std::span source) -{ - int vector_size(source.size() * 8); - std::vector result(vector_size); - for (int byte_i = 0; byte_i < vector_size / 8; ++byte_i) { - uint8_t cur_byte{std::to_integer(source[byte_i])}; - for (int bit_i = 0; bit_i < 8; ++bit_i) { - result[byte_i * 8 + bit_i] = (cur_byte >> bit_i) & 1; - } - } - return result; -} - BOOST_FIXTURE_TEST_SUITE(addrman_tests, BasicTestingSetup) BOOST_AUTO_TEST_CASE(addrman_simple) @@ -592,8 +578,7 @@ BOOST_AUTO_TEST_CASE(caddrinfo_get_new_bucket_legacy) // 101.8.0.0/16 AS8 BOOST_AUTO_TEST_CASE(caddrinfo_get_tried_bucket) { - std::vector asmap = FromBytes(test::data::asmap); - NetGroupManager ngm_asmap{asmap}; + NetGroupManager ngm_asmap{NetGroupManager::WithEmbeddedAsmap(test::data::asmap)}; CAddress addr1 = CAddress(ResolveService("250.1.1.1", 8333), NODE_NONE); CAddress addr2 = CAddress(ResolveService("250.1.1.1", 9999), NODE_NONE); @@ -646,8 +631,7 @@ BOOST_AUTO_TEST_CASE(caddrinfo_get_tried_bucket) BOOST_AUTO_TEST_CASE(caddrinfo_get_new_bucket) { - std::vector asmap = FromBytes(test::data::asmap); - NetGroupManager ngm_asmap{asmap}; + NetGroupManager ngm_asmap{NetGroupManager::WithEmbeddedAsmap(test::data::asmap)}; CAddress addr1 = CAddress(ResolveService("250.1.2.1", 8333), NODE_NONE); CAddress addr2 = CAddress(ResolveService("250.1.2.1", 9999), NODE_NONE); @@ -724,8 +708,7 @@ BOOST_AUTO_TEST_CASE(caddrinfo_get_new_bucket) BOOST_AUTO_TEST_CASE(addrman_serialization) { - std::vector asmap1 = FromBytes(test::data::asmap); - NetGroupManager netgroupman{asmap1}; + NetGroupManager netgroupman{NetGroupManager::WithEmbeddedAsmap(test::data::asmap)}; const auto ratio = GetCheckRatio(m_node); auto addrman_asmap1 = std::make_unique(netgroupman, DETERMINISTIC, ratio); diff --git a/src/test/fuzz/asmap.cpp b/src/test/fuzz/asmap.cpp index 1720f8e0ab0..8a2a8090280 100644 --- a/src/test/fuzz/asmap.cpp +++ b/src/test/fuzz/asmap.cpp @@ -6,28 +6,18 @@ #include #include #include +#include #include #include +using namespace util::hex_literals; + //! asmap code that consumes nothing -static const std::vector IPV6_PREFIX_ASMAP = {}; +static const std::vector IPV6_PREFIX_ASMAP = {}; //! asmap code that consumes the 96 prefix bits of ::ffff:0/96 (IPv4-in-IPv6 map) -static const std::vector IPV4_PREFIX_ASMAP = { - true, true, false, true, true, true, true, true, true, true, false, false, false, false, false, false, false, false, // Match 0x00 - true, true, false, true, true, true, true, true, true, true, false, false, false, false, false, false, false, false, // Match 0x00 - true, true, false, true, true, true, true, true, true, true, false, false, false, false, false, false, false, false, // Match 0x00 - true, true, false, true, true, true, true, true, true, true, false, false, false, false, false, false, false, false, // Match 0x00 - true, true, false, true, true, true, true, true, true, true, false, false, false, false, false, false, false, false, // Match 0x00 - true, true, false, true, true, true, true, true, true, true, false, false, false, false, false, false, false, false, // Match 0x00 - true, true, false, true, true, true, true, true, true, true, false, false, false, false, false, false, false, false, // Match 0x00 - true, true, false, true, true, true, true, true, true, true, false, false, false, false, false, false, false, false, // Match 0x00 - true, true, false, true, true, true, true, true, true, true, false, false, false, false, false, false, false, false, // Match 0x00 - true, true, false, true, true, true, true, true, true, true, false, false, false, false, false, false, false, false, // Match 0x00 - true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, // Match 0xFF - true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true // Match 0xFF -}; +static const auto IPV4_PREFIX_ASMAP = "fb03ec0fb03fc0fe00fb03ec0fb03fc0fe00fb03ec0fb0fffffeff"_hex_v; FUZZ_TARGET(asmap) { @@ -37,13 +27,14 @@ FUZZ_TARGET(asmap) bool ipv6 = buffer[0] & 128; const size_t addr_size = ipv6 ? ADDR_IPV6_SIZE : ADDR_IPV4_SIZE; if (buffer.size() < size_t(1 + asmap_size + addr_size)) return; - std::vector asmap = ipv6 ? IPV6_PREFIX_ASMAP : IPV4_PREFIX_ASMAP; - asmap.reserve(asmap.size() + 8 * asmap_size); + std::vector asmap_vec = ipv6 ? IPV6_PREFIX_ASMAP : IPV4_PREFIX_ASMAP; for (int i = 0; i < asmap_size; ++i) { + uint8_t byte = buffer[1 + i]; for (int j = 0; j < 8; ++j) { - asmap.push_back((buffer[1 + i] >> j) & 1); + asmap_vec.push_back(static_cast((byte >> j) & 1)); } } + std::span asmap(asmap_vec); if (!SanityCheckASMap(asmap, 128)) return; const uint8_t* addr_data = buffer.data() + 1 + asmap_size; @@ -57,6 +48,6 @@ FUZZ_TARGET(asmap) memcpy(&ipv4, addr_data, addr_size); net_addr.SetIP(CNetAddr{ipv4}); } - NetGroupManager netgroupman{asmap}; + NetGroupManager netgroupman{NetGroupManager::WithEmbeddedAsmap(asmap)}; (void)netgroupman.GetMappedAS(net_addr); } diff --git a/src/test/fuzz/asmap_direct.cpp b/src/test/fuzz/asmap_direct.cpp index 8a355fd3f60..5e7d0029c21 100644 --- a/src/test/fuzz/asmap_direct.cpp +++ b/src/test/fuzz/asmap_direct.cpp @@ -31,19 +31,21 @@ FUZZ_TARGET(asmap_direct) if (buffer.size() - sep_pos - 1 > 128) return; // At most 128 bits in IP address // Checks on asmap - std::vector asmap(buffer.begin(), buffer.begin() + sep_pos); - if (SanityCheckASMap(asmap, buffer.size() - 1 - sep_pos)) { + std::vector asmap(reinterpret_cast(buffer.data()), + reinterpret_cast(buffer.data() + sep_pos)); + if (SanityCheckASMap(std::span(asmap), buffer.size() - 1 - sep_pos)) { // Verify that for valid asmaps, no prefix (except up to 7 zero padding bits) is valid. - std::vector asmap_prefix = asmap; - while (!asmap_prefix.empty() && asmap_prefix.size() + 7 > asmap.size() && asmap_prefix.back() == false) { + std::vector asmap_prefix = asmap; + while (!asmap_prefix.empty() && asmap_prefix.size() + 7 > asmap.size() && asmap_prefix.back() == std::byte{0}) { asmap_prefix.pop_back(); } while (!asmap_prefix.empty()) { asmap_prefix.pop_back(); - assert(!SanityCheckASMap(asmap_prefix, buffer.size() - 1 - sep_pos)); + assert(!SanityCheckASMap(std::span(asmap_prefix), buffer.size() - 1 - sep_pos)); } // No address input should trigger assertions in interpreter - std::vector addr(buffer.begin() + sep_pos + 1, buffer.end()); - (void)Interpret(asmap, addr); + std::vector addr(reinterpret_cast(buffer.data() + sep_pos + 1), + reinterpret_cast(buffer.data() + buffer.size())); + (void)Interpret(std::span(asmap), std::span(addr)); } } diff --git a/src/test/fuzz/p2p_handshake.cpp b/src/test/fuzz/p2p_handshake.cpp index d608efd87ac..d02b686e4d7 100644 --- a/src/test/fuzz/p2p_handshake.cpp +++ b/src/test/fuzz/p2p_handshake.cpp @@ -48,7 +48,7 @@ FUZZ_TARGET(p2p_handshake, .init = ::initialize) chainman.ResetIbd(); node::Warnings warnings{}; - NetGroupManager netgroupman{{}}; + NetGroupManager netgroupman{NetGroupManager::NoAsmap()}; AddrMan addrman{netgroupman, /*deterministic=*/true, 0}; auto peerman = PeerManager::make(connman, addrman, /*banman=*/nullptr, chainman, diff --git a/src/test/fuzz/util.h b/src/test/fuzz/util.h index 38be59fb64b..5abfda9bef4 100644 --- a/src/test/fuzz/util.h +++ b/src/test/fuzz/util.h @@ -65,11 +65,6 @@ template return ret; } -[[nodiscard]] inline std::vector ConsumeRandomLengthBitVector(FuzzedDataProvider& fuzzed_data_provider, const std::optional& max_length = std::nullopt) noexcept -{ - return BytesToBits(ConsumeRandomLengthByteVector(fuzzed_data_provider, max_length)); -} - [[nodiscard]] inline DataStream ConsumeDataStream(FuzzedDataProvider& fuzzed_data_provider, const std::optional& max_length = std::nullopt) noexcept { return DataStream{ConsumeRandomLengthByteVector(fuzzed_data_provider, max_length)}; diff --git a/src/test/fuzz/util/net.h b/src/test/fuzz/util/net.h index 698001a7f15..8c58f3dd0da 100644 --- a/src/test/fuzz/util/net.h +++ b/src/test/fuzz/util/net.h @@ -210,9 +210,11 @@ public: [[nodiscard]] inline NetGroupManager ConsumeNetGroupManager(FuzzedDataProvider& fuzzed_data_provider) noexcept { - std::vector asmap = ConsumeRandomLengthBitVector(fuzzed_data_provider); - if (!SanityCheckASMap(asmap, 128)) asmap.clear(); - return NetGroupManager(asmap); + std::vector asmap{ConsumeRandomLengthByteVector(fuzzed_data_provider)}; + if (!SanityCheckASMap(std::span(asmap), 128)) { + return NetGroupManager::NoAsmap(); + } + return NetGroupManager::WithLoadedAsmap(asmap); } inline CSubNet ConsumeSubNet(FuzzedDataProvider& fuzzed_data_provider) noexcept diff --git a/src/test/netbase_tests.cpp b/src/test/netbase_tests.cpp index 3422cb80233..bf715e41276 100644 --- a/src/test/netbase_tests.cpp +++ b/src/test/netbase_tests.cpp @@ -323,7 +323,7 @@ BOOST_AUTO_TEST_CASE(subnet_test) BOOST_AUTO_TEST_CASE(netbase_getgroup) { - NetGroupManager netgroupman{std::vector()}; // use /16 + NetGroupManager netgroupman{NetGroupManager::NoAsmap()}; // use /16 BOOST_CHECK(netgroupman.GetGroup(ResolveIP("127.0.0.1")) == std::vector({0})); // Local -> !Routable() BOOST_CHECK(netgroupman.GetGroup(ResolveIP("257.0.0.1")) == std::vector({0})); // !Valid -> !Routable() BOOST_CHECK(netgroupman.GetGroup(ResolveIP("10.0.0.1")) == std::vector({0})); // RFC1918 -> !Routable() diff --git a/src/test/util/setup_common.cpp b/src/test/util/setup_common.cpp index bf26997c076..1eeb43e37e1 100644 --- a/src/test/util/setup_common.cpp +++ b/src/test/util/setup_common.cpp @@ -317,7 +317,7 @@ TestingSetup::TestingSetup( if (!opts.setup_net) return; - m_node.netgroupman = std::make_unique(/*asmap=*/std::vector()); + m_node.netgroupman = std::make_unique(NetGroupManager::NoAsmap()); m_node.addrman = std::make_unique(*m_node.netgroupman, /*deterministic=*/false, m_node.args->GetIntArg("-checkaddrman", 0)); diff --git a/src/util/asmap.cpp b/src/util/asmap.cpp index 04b0673c49b..c5b7705c58f 100644 --- a/src/util/asmap.cpp +++ b/src/util/asmap.cpp @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -14,6 +15,7 @@ #include #include #include +#include #include #include @@ -21,15 +23,14 @@ namespace { constexpr uint32_t INVALID = 0xFFFFFFFF; -uint32_t DecodeBits(std::vector::const_iterator& bitpos, const std::vector::const_iterator& endpos, uint8_t minval, const std::vector &bit_sizes) +uint32_t DecodeBits(size_t& bitpos, const std::span& data, uint8_t minval, const std::vector& bit_sizes) { uint32_t val = minval; bool bit; - for (std::vector::const_iterator bit_sizes_it = bit_sizes.begin(); - bit_sizes_it != bit_sizes.end(); ++bit_sizes_it) { + for (auto bit_sizes_it = bit_sizes.begin(); bit_sizes_it != bit_sizes.end(); ++bit_sizes_it) { if (bit_sizes_it + 1 != bit_sizes.end()) { - if (bitpos == endpos) break; - bit = *bitpos; + if (bitpos >= data.size() * 8) break; + bit = (std::to_integer(data[bitpos / 8]) >> (bitpos % 8)) & 1; bitpos++; } else { bit = 0; @@ -38,8 +39,8 @@ uint32_t DecodeBits(std::vector::const_iterator& bitpos, const std::vector val += (1 << *bit_sizes_it); } else { for (int b = 0; b < *bit_sizes_it; b++) { - if (bitpos == endpos) return INVALID; // Reached EOF in mantissa - bit = *bitpos; + if (bitpos >= data.size() * 8) return INVALID; // Reached EOF in mantissa + bit = (std::to_integer(data[bitpos / 8]) >> (bitpos % 8)) & 1; bitpos++; val += bit << (*bit_sizes_it - 1 - b); } @@ -58,69 +59,68 @@ enum class Instruction : uint32_t }; const std::vector TYPE_BIT_SIZES{0, 0, 1}; -Instruction DecodeType(std::vector::const_iterator& bitpos, const std::vector::const_iterator& endpos) +Instruction DecodeType(size_t& bitpos, const std::span& data) { - return Instruction(DecodeBits(bitpos, endpos, 0, TYPE_BIT_SIZES)); + return Instruction(DecodeBits(bitpos, data, 0, TYPE_BIT_SIZES)); } const std::vector ASN_BIT_SIZES{15, 16, 17, 18, 19, 20, 21, 22, 23, 24}; -uint32_t DecodeASN(std::vector::const_iterator& bitpos, const std::vector::const_iterator& endpos) +uint32_t DecodeASN(size_t& bitpos, const std::span& data) { - return DecodeBits(bitpos, endpos, 1, ASN_BIT_SIZES); + return DecodeBits(bitpos, data, 1, ASN_BIT_SIZES); } const std::vector MATCH_BIT_SIZES{1, 2, 3, 4, 5, 6, 7, 8}; -uint32_t DecodeMatch(std::vector::const_iterator& bitpos, const std::vector::const_iterator& endpos) +uint32_t DecodeMatch(size_t& bitpos, const std::span& data) { - return DecodeBits(bitpos, endpos, 2, MATCH_BIT_SIZES); + return DecodeBits(bitpos, data, 2, MATCH_BIT_SIZES); } const std::vector JUMP_BIT_SIZES{5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30}; -uint32_t DecodeJump(std::vector::const_iterator& bitpos, const std::vector::const_iterator& endpos) +uint32_t DecodeJump(size_t& bitpos, const std::span& data) { - return DecodeBits(bitpos, endpos, 17, JUMP_BIT_SIZES); + return DecodeBits(bitpos, data, 17, JUMP_BIT_SIZES); } } -uint32_t Interpret(const std::vector &asmap, const std::vector &ip) +uint32_t Interpret(const std::span& asmap, const std::span& ip) { - std::vector::const_iterator pos = asmap.begin(); - const std::vector::const_iterator endpos = asmap.end(); - uint8_t bits = ip.size(); + size_t pos{0}; + uint8_t bits = ip.size() * 8; uint32_t default_asn = 0; uint32_t jump, match, matchlen; Instruction opcode; - while (pos != endpos) { - opcode = DecodeType(pos, endpos); + while (pos < asmap.size() * 8) { + opcode = DecodeType(pos, asmap); if (opcode == Instruction::RETURN) { - default_asn = DecodeASN(pos, endpos); + default_asn = DecodeASN(pos, asmap); if (default_asn == INVALID) break; // ASN straddles EOF return default_asn; } else if (opcode == Instruction::JUMP) { - jump = DecodeJump(pos, endpos); + jump = DecodeJump(pos, asmap); if (jump == INVALID) break; // Jump offset straddles EOF if (bits == 0) break; // No input bits left - if (int64_t{jump} >= int64_t{endpos - pos}) break; // Jumping past EOF - if (ip[ip.size() - bits]) { + if (int64_t{jump} >= static_cast(asmap.size() * 8 - pos)) break; // Jumping past EOF + if ((std::to_integer(ip[(ip.size() * 8 - bits) / 8]) >> (7 - ((ip.size() * 8 - bits) % 8))) & 1) { pos += jump; } bits--; } else if (opcode == Instruction::MATCH) { - match = DecodeMatch(pos, endpos); + match = DecodeMatch(pos, asmap); if (match == INVALID) break; // Match bits straddle EOF matchlen = std::bit_width(match) - 1; if (bits < matchlen) break; // Not enough input bits for (uint32_t bit = 0; bit < matchlen; bit++) { - if ((ip[ip.size() - bits]) != ((match >> (matchlen - 1 - bit)) & 1)) { + if (((std::to_integer(ip[(ip.size() * 8 - bits) / 8]) >> (7 - ((ip.size() * 8 - bits) % 8))) & 1) != ((match >> (matchlen - 1 - bit)) & 1)) { return default_asn; } bits--; } } else if (opcode == Instruction::DEFAULT) { - default_asn = DecodeASN(pos, endpos); + default_asn = DecodeASN(pos, asmap); if (default_asn == INVALID) break; // ASN straddles EOF } else { break; // Instruction straddles EOF @@ -130,50 +130,48 @@ uint32_t Interpret(const std::vector &asmap, const std::vector &ip) return 0; // 0 is not a valid ASN } -bool SanityCheckASMap(const std::vector& asmap, int bits) +bool SanityCheckASMap(const std::span& asmap, int bits) { - const std::vector::const_iterator begin = asmap.begin(), endpos = asmap.end(); - std::vector::const_iterator pos = begin; + size_t pos{0}; + size_t endpos{asmap.size() * 8}; std::vector> jumps; // All future positions we may jump to (bit offset in asmap -> bits to consume left) jumps.reserve(bits); Instruction prevopcode = Instruction::JUMP; bool had_incomplete_match = false; while (pos != endpos) { - uint32_t offset = pos - begin; - if (!jumps.empty() && offset >= jumps.back().first) return false; // There was a jump into the middle of the previous instruction - Instruction opcode = DecodeType(pos, endpos); + if (!jumps.empty() && pos >= jumps.back().first) return false; // There was a jump into the middle of the previous instruction + Instruction opcode = DecodeType(pos, asmap); if (opcode == Instruction::RETURN) { if (prevopcode == Instruction::DEFAULT) return false; // There should not be any RETURN immediately after a DEFAULT (could be combined into just RETURN) - uint32_t asn = DecodeASN(pos, endpos); + uint32_t asn = DecodeASN(pos, asmap); if (asn == INVALID) return false; // ASN straddles EOF if (jumps.empty()) { // Nothing to execute anymore if (endpos - pos > 7) return false; // Excessive padding while (pos != endpos) { - if (*pos) return false; // Nonzero padding bit + if ((std::to_integer(asmap[pos / 8]) >> (pos % 8)) & 1) return false; // Nonzero padding bit ++pos; } return true; // Sanely reached EOF } else { // Continue by pretending we jumped to the next instruction - offset = pos - begin; - if (offset != jumps.back().first) return false; // Unreachable code + if (pos != jumps.back().first) return false; // Unreachable code bits = jumps.back().second; // Restore the number of bits we would have had left after this jump jumps.pop_back(); prevopcode = Instruction::JUMP; } } else if (opcode == Instruction::JUMP) { - uint32_t jump = DecodeJump(pos, endpos); + uint32_t jump = DecodeJump(pos, asmap); if (jump == INVALID) return false; // Jump offset straddles EOF - if (int64_t{jump} > int64_t{endpos - pos}) return false; // Jump out of range + if (int64_t{jump} > static_cast(endpos - pos)) return false; // Jump out of range if (bits == 0) return false; // Consuming bits past the end of the input --bits; - uint32_t jump_offset = pos - begin + jump; + uint32_t jump_offset = pos + jump; if (!jumps.empty() && jump_offset >= jumps.back().first) return false; // Intersecting jumps jumps.emplace_back(jump_offset, bits); prevopcode = Instruction::JUMP; } else if (opcode == Instruction::MATCH) { - uint32_t match = DecodeMatch(pos, endpos); + uint32_t match = DecodeMatch(pos, asmap); if (match == INVALID) return false; // Match bits straddle EOF int matchlen = std::bit_width(match) - 1; if (prevopcode != Instruction::MATCH) had_incomplete_match = false; @@ -184,7 +182,7 @@ bool SanityCheckASMap(const std::vector& asmap, int bits) prevopcode = Instruction::MATCH; } else if (opcode == Instruction::DEFAULT) { if (prevopcode == Instruction::DEFAULT) return false; // There should not be two successive DEFAULTs (they could be combined into one) - uint32_t asn = DecodeASN(pos, endpos); + uint32_t asn = DecodeASN(pos, asmap); if (asn == INVALID) return false; // ASN straddles EOF prevopcode = Instruction::DEFAULT; } else { @@ -194,30 +192,46 @@ bool SanityCheckASMap(const std::vector& asmap, int bits) return false; // Reached EOF without RETURN instruction } -std::vector DecodeAsmap(fs::path path) +std::span CheckAsmap(const std::span& data) +{ + if (data.empty()) { + return {}; + } + if (!SanityCheckASMap(data, 128)) { + LogInfo("Sanity check of asmap data failed\n"); + return {}; + } + return data; +} + +std::vector DecodeAsmap(fs::path path) { - std::vector bits; FILE *filestr = fsbridge::fopen(path, "rb"); AutoFile file{filestr}; if (file.IsNull()) { - LogPrintf("Failed to open asmap file from disk\n"); - return bits; - } - file.seek(0, SEEK_END); - int length = file.tell(); - LogPrintf("Opened asmap file %s (%d bytes) from disk\n", fs::quoted(fs::PathToString(path)), length); - file.seek(0, SEEK_SET); - uint8_t cur_byte; - for (int i = 0; i < length; ++i) { - file >> cur_byte; - for (int bit = 0; bit < 8; ++bit) { - bits.push_back((cur_byte >> bit) & 1); - } - } - if (!SanityCheckASMap(bits, 128)) { - LogPrintf("Sanity check of asmap file %s failed\n", fs::quoted(fs::PathToString(path))); + LogInfo("Failed to open asmap file from disk\n"); return {}; } - return bits; + + file.seek(0, SEEK_END); + int length = file.tell(); + LogInfo("Opened asmap file %s (%d bytes) from disk\n", fs::quoted(fs::PathToString(path)), length); + file.seek(0, SEEK_SET); + + std::vector buffer(length); + file.read(buffer); + + if (!SanityCheckASMap(buffer, 128)) { + LogInfo("Sanity check of asmap data failed\n"); + return {}; + } + + return buffer; } +uint256 AsmapChecksum(const std::span& data) +{ + HashWriter asmap_hasher; + asmap_hasher << data; + return asmap_hasher.GetHash(); +} diff --git a/src/util/asmap.h b/src/util/asmap.h index 08a88f1b3c5..45506951410 100644 --- a/src/util/asmap.h +++ b/src/util/asmap.h @@ -5,16 +5,22 @@ #ifndef BITCOIN_UTIL_ASMAP_H #define BITCOIN_UTIL_ASMAP_H +#include #include #include +#include #include -uint32_t Interpret(const std::vector &asmap, const std::vector &ip); +uint32_t Interpret(const std::span& asmap, const std::span& ip); -bool SanityCheckASMap(const std::vector& asmap, int bits); +bool SanityCheckASMap(const std::span& asmap, int bits); -/** Read asmap from provided binary file */ -std::vector DecodeAsmap(fs::path path); +/** Read and check asmap from provided binary file */ +std::vector DecodeAsmap(fs::path path); +/** Check asmap from embedded data */ +std::span CheckAsmap(const std::span& data); +/** Calculate asmap checksum */ +uint256 AsmapChecksum(const std::span& data); #endif // BITCOIN_UTIL_ASMAP_H diff --git a/test/functional/feature_asmap.py b/test/functional/feature_asmap.py index 7f0103ece3b..c2128c58872 100755 --- a/test/functional/feature_asmap.py +++ b/test/functional/feature_asmap.py @@ -31,7 +31,7 @@ from test_framework.util import assert_equal DEFAULT_ASMAP_FILENAME = 'ip_asn.map' # defined in src/init.cpp ASMAP = 'src/test/data/asmap.raw' # path to unit test skeleton asmap -VERSION = 'fec61fa21a9f46f3b17bdcd660d7f4cd90b966aad3aec593c99b35f0aca15853' +VERSION = 'bafc9da308f45179443bd1d22325400ac9104f741522d003e3fac86700f68895' def expected_messages(filename): return [f'Opened asmap file "{filename}" (59 bytes) from disk',