mirror of
https://github.com/bitcoin/bitcoin.git
synced 2025-03-13 03:09:37 +01:00
This replaces the current benchmarking framework with nanobench [1], an MIT licensed single-header benchmarking library, of which I am the autor. This has in my opinion several advantages, especially on Linux: * fast: Running all benchmarks takes ~6 seconds instead of 4m13s on an Intel i7-8700 CPU @ 3.20GHz. * accurate: I ran e.g. the benchmark for SipHash_32b 10 times and calculate standard deviation / mean = coefficient of variation: * 0.57% CV for old benchmarking framework * 0.20% CV for nanobench So the benchmark results with nanobench seem to vary less than with the old framework. * It automatically determines runtime based on clock precision, no need to specify number of evaluations. * measure instructions, cycles, branches, instructions per cycle, branch misses (only Linux, when performance counters are available) * output in markdown table format. * Warn about unstable environment (frequency scaling, turbo, ...) * For better profiling, it is possible to set the environment variable NANOBENCH_ENDLESS to force endless running of a particular benchmark without the need to recompile. This makes it to e.g. run "perf top" and look at hotspots. Here is an example copy & pasted from the terminal output: | ns/byte | byte/s | err% | ins/byte | cyc/byte | IPC | bra/byte | miss% | total | benchmark |--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:---------- | 2.52 | 396,529,415.94 | 0.6% | 25.42 | 8.02 | 3.169 | 0.06 | 0.0% | 0.03 | `bench/crypto_hash.cpp RIPEMD160` | 1.87 | 535,161,444.83 | 0.3% | 21.36 | 5.95 | 3.589 | 0.06 | 0.0% | 0.02 | `bench/crypto_hash.cpp SHA1` | 3.22 | 310,344,174.79 | 1.1% | 36.80 | 10.22 | 3.601 | 0.09 | 0.0% | 0.04 | `bench/crypto_hash.cpp SHA256` | 2.01 | 496,375,796.23 | 0.0% | 18.72 | 6.43 | 2.911 | 0.01 | 1.0% | 0.00 | `bench/crypto_hash.cpp SHA256D64_1024` | 7.23 | 138,263,519.35 | 0.1% | 82.66 | 23.11 | 3.577 | 1.63 | 0.1% | 0.00 | `bench/crypto_hash.cpp SHA256_32b` | 3.04 | 328,780,166.40 | 0.3% | 35.82 | 9.69 | 3.696 | 0.03 | 0.0% | 0.03 | `bench/crypto_hash.cpp SHA512` [1] https://github.com/martinus/nanobench * Adds support for asymptotes This adds support to calculate asymptotic complexity of a benchmark. This is similar to #17375, but currently only one asymptote is supported, and I have added support in the benchmark `ComplexMemPool` as an example. Usage is e.g. like this: ``` ./bench_bitcoin -filter=ComplexMemPool -asymptote=25,50,100,200,400,600,800 ``` This runs the benchmark `ComplexMemPool` several times but with different complexityN settings. The benchmark can extract that number and use it accordingly. Here, it's used for `childTxs`. The output is this: | complexityN | ns/op | op/s | err% | ins/op | cyc/op | IPC | total | benchmark |------------:|--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|----------:|:---------- | 25 | 1,064,241.00 | 939.64 | 1.4% | 3,960,279.00 | 2,829,708.00 | 1.400 | 0.01 | `ComplexMemPool` | 50 | 1,579,530.00 | 633.10 | 1.0% | 6,231,810.00 | 4,412,674.00 | 1.412 | 0.02 | `ComplexMemPool` | 100 | 4,022,774.00 | 248.58 | 0.6% | 16,544,406.00 | 11,889,535.00 | 1.392 | 0.04 | `ComplexMemPool` | 200 | 15,390,986.00 | 64.97 | 0.2% | 63,904,254.00 | 47,731,705.00 | 1.339 | 0.17 | `ComplexMemPool` | 400 | 69,394,711.00 | 14.41 | 0.1% | 272,602,461.00 | 219,014,691.00 | 1.245 | 0.76 | `ComplexMemPool` | 600 | 168,977,165.00 | 5.92 | 0.1% | 639,108,082.00 | 535,316,887.00 | 1.194 | 1.86 | `ComplexMemPool` | 800 | 310,109,077.00 | 3.22 | 0.1% |1,149,134,246.00 | 984,620,812.00 | 1.167 | 3.41 | `ComplexMemPool` | coefficient | err% | complexity |--------------:|-------:|------------ | 4.78486e-07 | 4.5% | O(n^2) | 6.38557e-10 | 21.7% | O(n^3) | 3.42338e-05 | 38.0% | O(n log n) | 0.000313914 | 46.9% | O(n) | 0.0129823 | 114.4% | O(log n) | 0.0815055 | 133.8% | O(1) The best fitting curve is O(n^2), so the algorithm seems to scale quadratic with `childTxs` in the range 25 to 800.
140 lines
3.5 KiB
C++
140 lines
3.5 KiB
C++
// Copyright (c) 2020-2020 The Bitcoin Core developers
|
|
// Distributed under the MIT software license, see the accompanying
|
|
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
|
|
|
#include <addrman.h>
|
|
#include <bench/bench.h>
|
|
#include <random.h>
|
|
#include <util/time.h>
|
|
|
|
#include <vector>
|
|
|
|
/* A "source" is a source address from which we have received a bunch of other addresses. */
|
|
|
|
static constexpr size_t NUM_SOURCES = 64;
|
|
static constexpr size_t NUM_ADDRESSES_PER_SOURCE = 256;
|
|
|
|
static std::vector<CAddress> g_sources;
|
|
static std::vector<std::vector<CAddress>> g_addresses;
|
|
|
|
static void CreateAddresses()
|
|
{
|
|
if (g_sources.size() > 0) { // already created
|
|
return;
|
|
}
|
|
|
|
FastRandomContext rng(uint256(std::vector<unsigned char>(32, 123)));
|
|
|
|
auto randAddr = [&rng]() {
|
|
in6_addr addr;
|
|
memcpy(&addr, rng.randbytes(sizeof(addr)).data(), sizeof(addr));
|
|
|
|
uint16_t port;
|
|
memcpy(&port, rng.randbytes(sizeof(port)).data(), sizeof(port));
|
|
if (port == 0) {
|
|
port = 1;
|
|
}
|
|
|
|
CAddress ret(CService(addr, port), NODE_NETWORK);
|
|
|
|
ret.nTime = GetAdjustedTime();
|
|
|
|
return ret;
|
|
};
|
|
|
|
for (size_t source_i = 0; source_i < NUM_SOURCES; ++source_i) {
|
|
g_sources.emplace_back(randAddr());
|
|
g_addresses.emplace_back();
|
|
for (size_t addr_i = 0; addr_i < NUM_ADDRESSES_PER_SOURCE; ++addr_i) {
|
|
g_addresses[source_i].emplace_back(randAddr());
|
|
}
|
|
}
|
|
}
|
|
|
|
static void AddAddressesToAddrMan(CAddrMan& addrman)
|
|
{
|
|
for (size_t source_i = 0; source_i < NUM_SOURCES; ++source_i) {
|
|
addrman.Add(g_addresses[source_i], g_sources[source_i]);
|
|
}
|
|
}
|
|
|
|
static void FillAddrMan(CAddrMan& addrman)
|
|
{
|
|
CreateAddresses();
|
|
|
|
AddAddressesToAddrMan(addrman);
|
|
}
|
|
|
|
/* Benchmarks */
|
|
|
|
static void AddrManAdd(benchmark::Bench& bench)
|
|
{
|
|
CreateAddresses();
|
|
|
|
CAddrMan addrman;
|
|
|
|
bench.run([&] {
|
|
AddAddressesToAddrMan(addrman);
|
|
addrman.Clear();
|
|
});
|
|
}
|
|
|
|
static void AddrManSelect(benchmark::Bench& bench)
|
|
{
|
|
CAddrMan addrman;
|
|
|
|
FillAddrMan(addrman);
|
|
|
|
bench.run([&] {
|
|
const auto& address = addrman.Select();
|
|
assert(address.GetPort() > 0);
|
|
});
|
|
}
|
|
|
|
static void AddrManGetAddr(benchmark::Bench& bench)
|
|
{
|
|
CAddrMan addrman;
|
|
|
|
FillAddrMan(addrman);
|
|
|
|
bench.run([&] {
|
|
const auto& addresses = addrman.GetAddr();
|
|
assert(addresses.size() > 0);
|
|
});
|
|
}
|
|
|
|
static void AddrManGood(benchmark::Bench& bench)
|
|
{
|
|
/* Create many CAddrMan objects - one to be modified at each loop iteration.
|
|
* This is necessary because the CAddrMan::Good() method modifies the
|
|
* object, affecting the timing of subsequent calls to the same method and
|
|
* we want to do the same amount of work in every loop iteration. */
|
|
|
|
bench.epochs(5).epochIterations(1);
|
|
|
|
std::vector<CAddrMan> addrmans(bench.epochs() * bench.epochIterations());
|
|
for (auto& addrman : addrmans) {
|
|
FillAddrMan(addrman);
|
|
}
|
|
|
|
auto markSomeAsGood = [](CAddrMan& addrman) {
|
|
for (size_t source_i = 0; source_i < NUM_SOURCES; ++source_i) {
|
|
for (size_t addr_i = 0; addr_i < NUM_ADDRESSES_PER_SOURCE; ++addr_i) {
|
|
if (addr_i % 32 == 0) {
|
|
addrman.Good(g_addresses[source_i][addr_i]);
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
uint64_t i = 0;
|
|
bench.run([&] {
|
|
markSomeAsGood(addrmans.at(i));
|
|
++i;
|
|
});
|
|
}
|
|
|
|
BENCHMARK(AddrManAdd);
|
|
BENCHMARK(AddrManSelect);
|
|
BENCHMARK(AddrManGetAddr);
|
|
BENCHMARK(AddrManGood);
|