From 22a5d3dd2ab793336e911d3aceb8cacd278e0f48 Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Tue, 30 Jan 2018 18:11:16 -0500 Subject: [PATCH 1/7] remove a redundant semicolon --- src/test/test_dos.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/test_dos.c b/src/test/test_dos.c index 9496b0735c..6db98b9ed3 100644 --- a/src/test/test_dos.c +++ b/src/test/test_dos.c @@ -78,7 +78,7 @@ static int mock_channel_get_addr_if_possible(channel_t *chan, tor_addr_t *addr_out) { (void)chan; - tt_int_op(AF_INET,OP_EQ, tor_addr_parse(addr_out, "18.0.0.1"));; + tt_int_op(AF_INET,OP_EQ, tor_addr_parse(addr_out, "18.0.0.1")); return 1; done: From 46bd2aed915f17d520f9ff237262d1510fe25e12 Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Wed, 7 Feb 2018 09:49:35 -0500 Subject: [PATCH 2/7] Add an address-set backend using a bloom filter. We're going to need this to make our anti-DoS code (see 24902) more robust. --- src/common/address.c | 22 +++++++ src/common/address.h | 2 + src/common/address_set.c | 120 +++++++++++++++++++++++++++++++++++++++ src/common/address_set.h | 33 +++++++++++ src/common/include.am | 2 + 5 files changed, 179 insertions(+) create mode 100644 src/common/address_set.c create mode 100644 src/common/address_set.h diff --git a/src/common/address.c b/src/common/address.c index 773e688554..1bd52d24b6 100644 --- a/src/common/address.c +++ b/src/common/address.c @@ -1200,6 +1200,28 @@ tor_addr_hash(const tor_addr_t *addr) } } +/** As tor_addr_hash, but use a particular siphash key. */ +uint64_t +tor_addr_keyed_hash(const struct sipkey *key, const tor_addr_t *addr) +{ + /* This is duplicate code with tor_addr_hash, since this function needs to + * be backportable all the way to 0.2.9. */ + + switch (tor_addr_family(addr)) { + case AF_INET: + return siphash24(&addr->addr.in_addr.s_addr, 4, key); + case AF_UNSPEC: + return 0x4e4d5342; + case AF_INET6: + return siphash24(&addr->addr.in6_addr.s6_addr, 16, key); + default: + /* LCOV_EXCL_START */ + tor_fragile_assert(); + return 0; + /* LCOV_EXCL_END */ + } +} + /** Return a newly allocated string with a representation of addr. */ char * tor_addr_to_str_dup(const tor_addr_t *addr) diff --git a/src/common/address.h b/src/common/address.h index 51db42c315..d57abd0d9e 100644 --- a/src/common/address.h +++ b/src/common/address.h @@ -228,6 +228,8 @@ int tor_addr_compare_masked(const tor_addr_t *addr1, const tor_addr_t *addr2, #define tor_addr_eq(a,b) (0==tor_addr_compare((a),(b),CMP_EXACT)) uint64_t tor_addr_hash(const tor_addr_t *addr); +struct sipkey; +uint64_t tor_addr_keyed_hash(const struct sipkey *key, const tor_addr_t *addr); int tor_addr_is_v4(const tor_addr_t *addr); int tor_addr_is_internal_(const tor_addr_t *ip, int for_listening, const char *filename, int lineno); diff --git a/src/common/address_set.c b/src/common/address_set.c new file mode 100644 index 0000000000..df7022174c --- /dev/null +++ b/src/common/address_set.c @@ -0,0 +1,120 @@ +/* Copyright (c) 2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file address_set.c + * \brief Implementation for a set of addresses. + * + * This module was first written on a semi-emergency basis to improve the + * robustness of the anti-DoS module. As such, it's written in a pretty + * conservative way, and should be susceptible to improvement later on. + **/ + +#include "orconfig.h" +#include "address_set.h" +#include "address.h" +#include "compat.h" +#include "container.h" +#include "crypto.h" +#include "util.h" +#include "siphash.h" + +/** How many 64-bit siphash values to extract per address */ +#define N_HASHES 2 +/** How many bloom-filter bits we set per address. This is twice the N_HASHES + * value, since we split the siphash outcome two 32-bit values. */ +#define N_BITS_PER_ITEM (N_HASHES * 2) + +/* XXXX This code is largely duplicated with digestset_t. We should merge + * them together into a common bloom-filter implementation. I'm keeping + * them separate for now, though, since this module needs to be backported + * all the way to 0.2.9. + * + * The main difference between digestset_t and this code is that we use + * independent siphashes rather than messing around with bit-shifts. The + * approach here is probably more sound, and we should prefer it if&when we + * unify the implementations. + **/ + +struct address_set_t { + /** siphash keys to make N_HASHES independent hashes for each address. */ + struct sipkey key[N_HASHES]; + int mask; /**< One less than the number of bits in ba; always one less + * than a power of two. */ + bitarray_t *ba; /**< A bit array to implement the Bloom filter. */ +}; + +/** + * Allocate and return an address_set, suitable for holding up to + * max_address_guess distinct values. + */ +address_set_t * +address_set_new(int max_addresses_guess) +{ + /* See digestset_new() for rationale on this equation. */ + int n_bits = 1u << (tor_log2(max_addresses_guess)+5); + + address_set_t *set = tor_malloc_zero(sizeof(address_set_t)); + set->mask = n_bits - 1; + set->ba = bitarray_init_zero(n_bits); + crypto_rand((char*) set->key, sizeof(set->key)); + + return set; +} + +/** + * Release all storage associated with set + */ +void +address_set_free(address_set_t *set) +{ + if (! set) + return; + + bitarray_free(set->ba); + tor_free(set); +} + +/** Yield the bit index corresponding to 'val' for set. */ +#define BIT(set, val) ((val) & (set)->mask) + +/** + * Add addr to set. + * + * All future queries for addr in set will return true. Removing + * items is not possible. + */ +void +address_set_add(address_set_t *set, const struct tor_addr_t *addr) +{ + int i; + for (i = 0; i < N_HASHES; ++i) { + uint64_t h = tor_addr_keyed_hash(&set->key[i], addr); + uint32_t high_bits = (uint32_t)(h >> 32); + uint32_t low_bits = (uint32_t)(h); + bitarray_set(set->ba, BIT(set, high_bits)); + bitarray_set(set->ba, BIT(set, low_bits)); + } +} + +/** + * Return true if addr if a member of set. (And probably, + * return false if addr is not a member of set.) + */ +int +address_set_probably_contains(address_set_t *set, + const struct tor_addr_t *addr) +{ + int i, matches = 0; + for (i = 0; i < N_HASHES; ++i) { + uint64_t h = tor_addr_keyed_hash(&set->key[i], addr); + uint32_t high_bits = (uint32_t)(h >> 32); + uint32_t low_bits = (uint32_t)(h); + // Note that !! is necessary here, since bitarray_is_set does not + // necessarily return 1 on true. + matches += !! bitarray_is_set(set->ba, BIT(set, high_bits)); + matches += !! bitarray_is_set(set->ba, BIT(set, low_bits)); + } + return matches == N_BITS_PER_ITEM; +} + diff --git a/src/common/address_set.h b/src/common/address_set.h new file mode 100644 index 0000000000..568528c89e --- /dev/null +++ b/src/common/address_set.h @@ -0,0 +1,33 @@ +/* Copyright (c) 2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file addressset.h + * \brief Types to handle sets of addresses. + * + * This module was first written on a semi-emergency basis to improve the + * robustness of the anti-DoS module. As such, it's written in a pretty + * conservative way, and should be susceptible to improvement later on. + **/ + +#ifndef TOR_ADDRESS_SET_H +#define TOR_ADDRESS_SET_H + +#include "orconfig.h" + +/** + * An address_set_t represents a set of tor_addr_t values. The implementation + * is probabilistic: false negatives cannot occur but false positives are + * possible. + */ +typedef struct address_set_t address_set_t; +struct tor_addr_t; + +address_set_t *address_set_new(int max_addresses_guess); +void address_set_free(address_set_t *set); +void address_set_add(address_set_t *set, const struct tor_addr_t *addr); +int address_set_probably_contains(address_set_t *set, + const struct tor_addr_t *addr); + +#endif + diff --git a/src/common/include.am b/src/common/include.am index 40c463c9d9..cb307e9d5f 100644 --- a/src/common/include.am +++ b/src/common/include.am @@ -80,6 +80,7 @@ src_common_libor_ctime_testing_a_CFLAGS = @CFLAGS_CONSTTIME@ $(TEST_CFLAGS) LIBOR_A_SRC = \ src/common/address.c \ + src/common/address_set.c \ src/common/backtrace.c \ src/common/compat.c \ src/common/compat_threads.c \ @@ -135,6 +136,7 @@ src_common_libor_event_testing_a_CFLAGS = $(AM_CFLAGS) $(TEST_CFLAGS) COMMONHEADERS = \ src/common/address.h \ + src/common/address_set.h \ src/common/backtrace.h \ src/common/aes.h \ src/common/ciphers.inc \ From 0640da42696a666382dd569839e98312d720a22a Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Thu, 8 Feb 2018 12:13:56 -0500 Subject: [PATCH 3/7] Function to add an ipv4 address to an address_set This is a convenience function, so callers don't need to wrap the IPv4 address. --- src/common/address_set.c | 9 +++++++++ src/common/address_set.h | 2 ++ 2 files changed, 11 insertions(+) diff --git a/src/common/address_set.c b/src/common/address_set.c index df7022174c..6fa942b0dc 100644 --- a/src/common/address_set.c +++ b/src/common/address_set.c @@ -97,6 +97,15 @@ address_set_add(address_set_t *set, const struct tor_addr_t *addr) } } +/** As address_set_add(), but take an ipv4 address in host order. */ +void +address_set_add_ipv4h(address_set_t *set, uint32_t addr) +{ + tor_addr_t a; + tor_addr_from_ipv4h(&a, addr); + address_set_add(set, &a); +} + /** * Return true if addr if a member of set. (And probably, * return false if addr is not a member of set.) diff --git a/src/common/address_set.h b/src/common/address_set.h index 568528c89e..aedf17fc66 100644 --- a/src/common/address_set.h +++ b/src/common/address_set.h @@ -14,6 +14,7 @@ #define TOR_ADDRESS_SET_H #include "orconfig.h" +#include "torint.h" /** * An address_set_t represents a set of tor_addr_t values. The implementation @@ -26,6 +27,7 @@ struct tor_addr_t; address_set_t *address_set_new(int max_addresses_guess); void address_set_free(address_set_t *set); void address_set_add(address_set_t *set, const struct tor_addr_t *addr); +void address_set_add_ipv4h(address_set_t *set, uint32_t addr); int address_set_probably_contains(address_set_t *set, const struct tor_addr_t *addr); From 6892d3292121d02900ac9968e832353ecacca4ad Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Thu, 8 Feb 2018 12:14:42 -0500 Subject: [PATCH 4/7] Add an address_set to the nodelist. This set is rebuilt whenever a consensus arrives. In between consensuses, it is add-only. --- src/or/nodelist.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++ src/or/nodelist.h | 1 + 2 files changed, 67 insertions(+) diff --git a/src/or/nodelist.c b/src/or/nodelist.c index 0e9a651818..c2080db12c 100644 --- a/src/or/nodelist.c +++ b/src/or/nodelist.c @@ -14,6 +14,7 @@ #include "or.h" #include "address.h" +#include "address_set.h" #include "config.h" #include "control.h" #include "dirserv.h" @@ -52,6 +53,7 @@ static void count_usable_descriptors(int *num_present, static void update_router_have_minimum_dir_info(void); static double get_frac_paths_needed_for_circs(const or_options_t *options, const networkstatus_t *ns); +static void node_add_to_address_set(const node_t *node); /** A nodelist_t holds a node_t object for every router we're "willing to use * for something". Specifically, it should hold a node_t for every node that @@ -63,6 +65,8 @@ typedef struct nodelist_t { /* Hash table to map from node ID digest to node. */ HT_HEAD(nodelist_map, node_t) nodes_by_id; + /* Set of addresses that belong to nodes we believe in. */ + address_set_t *node_addrs; } nodelist_t; static inline unsigned int @@ -150,6 +154,50 @@ node_addrs_changed(node_t *node) node->country = -1; } +/** Add all address information about node to the current address + * set (if there is one). + */ +static void +node_add_to_address_set(const node_t *node) +{ + if (!the_nodelist || !the_nodelist->node_addrs) + return; + + /* These various address sources can be redundant, but it's likely faster + * to add them all than to compare them all for equality. */ + + if (node->rs) { + if (node->rs->addr) + address_set_add_ipv4h(the_nodelist->node_addrs, node->rs->addr); + if (!tor_addr_is_null(&node->rs->ipv6_addr)) + address_set_add(the_nodelist->node_addrs, &node->rs->ipv6_addr); + } + if (node->ri) { + if (node->ri->addr) + address_set_add_ipv4h(the_nodelist->node_addrs, node->ri->addr); + if (!tor_addr_is_null(&node->ri->ipv6_addr)) + address_set_add(the_nodelist->node_addrs, &node->ri->ipv6_addr); + } + if (node->md) { + if (!tor_addr_is_null(&node->md->ipv6_addr)) + address_set_add(the_nodelist->node_addrs, &node->md->ipv6_addr); + } +} + +/** Return true if addr is the address of some node in the nodelist. + * If not, probably return false. */ +int +nodelist_probably_contains_address(const tor_addr_t *addr) +{ + if (BUG(!addr)) + return 0; + + if (!the_nodelist || !the_nodelist->node_addrs) + return 0; + + return address_set_probably_contains(the_nodelist->node_addrs, addr); +} + /** Add ri to an appropriate node in the nodelist. If we replace an * old routerinfo, and ri_old_out is not NULL, set *ri_old_out * to the previous routerinfo. @@ -188,6 +236,8 @@ nodelist_set_routerinfo(routerinfo_t *ri, routerinfo_t **ri_old_out) dirserv_set_node_flags_from_authoritative_status(node, status); } + node_add_to_address_set(node); + return node; } @@ -219,6 +269,9 @@ nodelist_add_microdesc(microdesc_t *md) node->md = md; md->held_by_nodes++; } + + node_add_to_address_set(node); + return node; } @@ -240,6 +293,11 @@ nodelist_set_consensus(networkstatus_t *ns) SMARTLIST_FOREACH(the_nodelist->nodes, node_t *, node, node->rs = NULL); + /* Conservatively estimate that every node will have 2 addresses. */ + const int estimated_addresses = smartlist_len(ns->routerstatus_list) * 2; + address_set_free(the_nodelist->node_addrs); + the_nodelist->node_addrs = address_set_new(estimated_addresses); + SMARTLIST_FOREACH_BEGIN(ns->routerstatus_list, routerstatus_t *, rs) { node_t *node = node_get_or_create(rs->identity_digest); node->rs = rs; @@ -278,6 +336,11 @@ nodelist_set_consensus(networkstatus_t *ns) nodelist_purge(); + /* Now add all the nodes we have to the address set. */ + SMARTLIST_FOREACH_BEGIN(the_nodelist->nodes, node_t *, node) { + node_add_to_address_set(node); + } SMARTLIST_FOREACH_END(node); + if (! authdir) { SMARTLIST_FOREACH_BEGIN(the_nodelist->nodes, node_t *, node) { /* We have no routerstatus for this router. Clear flags so we can skip @@ -430,6 +493,9 @@ nodelist_free_all(void) smartlist_free(the_nodelist->nodes); + address_set_free(the_nodelist->node_addrs); + the_nodelist->node_addrs = NULL; + tor_free(the_nodelist); } diff --git a/src/or/nodelist.h b/src/or/nodelist.h index 71a91e107f..355057f398 100644 --- a/src/or/nodelist.h +++ b/src/or/nodelist.h @@ -22,6 +22,7 @@ const node_t *node_get_by_hex_id(const char *identity_digest); node_t *nodelist_set_routerinfo(routerinfo_t *ri, routerinfo_t **ri_old_out); node_t *nodelist_add_microdesc(microdesc_t *md); void nodelist_set_consensus(networkstatus_t *ns); +int nodelist_probably_contains_address(const tor_addr_t *addr); void nodelist_remove_microdesc(const char *identity_digest, microdesc_t *md); void nodelist_remove_routerinfo(routerinfo_t *ri); From a445327b80478c72093d8f1b0e205a279318f651 Mon Sep 17 00:00:00 2001 From: David Goulet Date: Thu, 8 Feb 2018 14:35:22 -0500 Subject: [PATCH 5/7] test: Add unit tests for addressset.c This also adds one that tests the integration with the nodelist. Signed-off-by: David Goulet --- src/or/nodelist.c | 14 ++- src/or/nodelist.h | 2 + src/test/include.am | 1 + src/test/test.c | 1 + src/test/test.h | 1 + src/test/test_address_set.c | 174 ++++++++++++++++++++++++++++++++++++ 6 files changed, 192 insertions(+), 1 deletion(-) create mode 100644 src/test/test_address_set.c diff --git a/src/or/nodelist.c b/src/or/nodelist.c index c2080db12c..5a02648c5c 100644 --- a/src/or/nodelist.c +++ b/src/or/nodelist.c @@ -275,6 +275,17 @@ nodelist_add_microdesc(microdesc_t *md) return node; } +/* Default value. */ +#define ESTIMATED_ADDRESS_PER_NODE 2 + +/* Return the estimated number of address per node_t. This is used for the + * size of the bloom filter in the nodelist (node_addrs). */ +MOCK_IMPL(int, +get_estimated_address_per_node, (void)) +{ + return ESTIMATED_ADDRESS_PER_NODE; +} + /** Tell the nodelist that the current usable consensus is ns. * This makes the nodelist change all of the routerstatus entries for * the nodes, drop nodes that no longer have enough info to get used, @@ -294,7 +305,8 @@ nodelist_set_consensus(networkstatus_t *ns) node->rs = NULL); /* Conservatively estimate that every node will have 2 addresses. */ - const int estimated_addresses = smartlist_len(ns->routerstatus_list) * 2; + const int estimated_addresses = smartlist_len(ns->routerstatus_list) * + get_estimated_address_per_node(); address_set_free(the_nodelist->node_addrs); the_nodelist->node_addrs = address_set_new(estimated_addresses); diff --git a/src/or/nodelist.h b/src/or/nodelist.h index 355057f398..098f1d1555 100644 --- a/src/or/nodelist.h +++ b/src/or/nodelist.h @@ -125,5 +125,7 @@ void router_dir_info_changed(void); const char *get_dir_info_status_string(void); int count_loading_descriptors_progress(void); +MOCK_DECL(int, get_estimated_address_per_node, (void)); + #endif diff --git a/src/test/include.am b/src/test/include.am index 8ecfaf10c6..cf29d4cb2b 100644 --- a/src/test/include.am +++ b/src/test/include.am @@ -72,6 +72,7 @@ src_test_test_SOURCES = \ src/test/test_accounting.c \ src/test/test_addr.c \ src/test/test_address.c \ + src/test/test_address_set.c \ src/test/test_buffers.c \ src/test/test_cell_formats.c \ src/test/test_cell_queue.c \ diff --git a/src/test/test.c b/src/test/test.c index 9a41b976b8..9b0775ce45 100644 --- a/src/test/test.c +++ b/src/test/test.c @@ -1182,6 +1182,7 @@ struct testgroup_t testgroups[] = { { "accounting/", accounting_tests }, { "addr/", addr_tests }, { "address/", address_tests }, + { "address_set/", address_set_tests }, { "buffer/", buffer_tests }, { "cellfmt/", cell_format_tests }, { "cellqueue/", cell_queue_tests }, diff --git a/src/test/test.h b/src/test/test.h index 25336ac83e..22b207207f 100644 --- a/src/test/test.h +++ b/src/test/test.h @@ -175,6 +175,7 @@ extern const struct testcase_setup_t ed25519_test_setup; extern struct testcase_t accounting_tests[]; extern struct testcase_t addr_tests[]; extern struct testcase_t address_tests[]; +extern struct testcase_t address_set_tests[]; extern struct testcase_t buffer_tests[]; extern struct testcase_t cell_format_tests[]; extern struct testcase_t cell_queue_tests[]; diff --git a/src/test/test_address_set.c b/src/test/test_address_set.c new file mode 100644 index 0000000000..df022f539a --- /dev/null +++ b/src/test/test_address_set.c @@ -0,0 +1,174 @@ +/* Copyright (c) 2017, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +#include "or.h" +#include "address_set.h" +#include "microdesc.h" +#include "networkstatus.h" +#include "nodelist.h" +#include "routerlist.h" +#include "torcert.h" + +#include "test.h" + +static networkstatus_t *dummy_ns = NULL; +static networkstatus_t * +mock_networkstatus_get_latest_consensus(void) +{ + return dummy_ns; +} + +static networkstatus_t * +mock_networkstatus_get_latest_consensus_by_flavor(consensus_flavor_t f) +{ + tor_assert(f == FLAV_MICRODESC); + return dummy_ns; +} + +/* Number of address a single node_t can have. Default to the production + * value. This is to control the size of the bloom filter. */ +static int addr_per_node = 2; +static int +mock_get_estimated_address_per_node(void) +{ + return addr_per_node; +} + +static void +test_contains(void *arg) +{ + int ret; + address_set_t *set = NULL; + + (void) arg; + + /* Setup an IPv4 and IPv6 addresses. */ + tor_addr_t addr_v6; + tor_addr_parse(&addr_v6, "1:2:3:4::"); + tor_addr_t addr_v4; + tor_addr_parse(&addr_v4, "42.42.42.42"); + uint32_t ipv4h = tor_addr_to_ipv4h(&addr_v4); + + /* Make it very big so the chance of failing the contain test will be + * extremely rare. */ + set = address_set_new(1024); + tt_assert(set); + + /* Add and lookup IPv6. */ + address_set_add(set, &addr_v6); + ret = address_set_probably_contains(set, &addr_v6); + tt_int_op(ret, OP_EQ, 1); + + /* Add and lookup IPv4. */ + address_set_add_ipv4h(set, ipv4h); + ret = address_set_probably_contains(set, &addr_v4); + tt_int_op(ret, OP_EQ, 1); + + /* Try a lookup of rubbish. */ + tor_addr_t dummy_addr; + memset(&dummy_addr, 'A', sizeof(dummy_addr)); + dummy_addr.family = AF_INET; + ret = address_set_probably_contains(set, &dummy_addr); + tt_int_op(ret, OP_EQ, 0); + dummy_addr.family = AF_INET6; + ret = address_set_probably_contains(set, &dummy_addr); + tt_int_op(ret, OP_EQ, 0); + + done: + address_set_free(set); +} + +static void +test_nodelist(void *arg) +{ + int ret; + routerstatus_t *rs = NULL; microdesc_t *md = NULL; routerinfo_t *ri = NULL; + + (void) arg; + + MOCK(networkstatus_get_latest_consensus, + mock_networkstatus_get_latest_consensus); + MOCK(networkstatus_get_latest_consensus_by_flavor, + mock_networkstatus_get_latest_consensus_by_flavor); + MOCK(get_estimated_address_per_node, + mock_get_estimated_address_per_node); + + dummy_ns = tor_malloc_zero(sizeof(*dummy_ns)); + dummy_ns->flavor = FLAV_MICRODESC; + dummy_ns->routerstatus_list = smartlist_new(); + + tor_addr_t addr_v4, addr_v6, dummy_addr; + tor_addr_parse(&addr_v4, "42.42.42.42"); + uint32_t ipv4h = tor_addr_to_ipv4h(&addr_v4); + tor_addr_parse(&addr_v6, "1:2:3:4::"); + memset(&dummy_addr, 'A', sizeof(dummy_addr)); + + /* This will make the nodelist bloom filter very large + * (the_nodelist->node_addrs) so we will fail the contain test rarely. */ + addr_per_node = 1024; + + /* No node no nothing. The lookups should be empty. */ + nodelist_set_consensus(dummy_ns); + + /* The address set should be empty. */ + ret = nodelist_probably_contains_address(&addr_v4); + tt_int_op(ret, OP_EQ, 0); + ret = nodelist_probably_contains_address(&addr_v6); + tt_int_op(ret, OP_EQ, 0); + dummy_addr.family = AF_INET; + ret = nodelist_probably_contains_address(&dummy_addr); + tt_int_op(ret, OP_EQ, 0); + dummy_addr.family = AF_INET6; + ret = nodelist_probably_contains_address(&dummy_addr); + tt_int_op(ret, OP_EQ, 0); + + md = tor_malloc_zero(sizeof(*md)); + ri = tor_malloc_zero(sizeof(*ri)); + rs = tor_malloc_zero(sizeof(*rs)); + crypto_rand(rs->identity_digest, sizeof(rs->identity_digest)); + crypto_rand(md->digest, sizeof(md->digest)); + memcpy(rs->descriptor_digest, md->digest, DIGEST256_LEN); + + /* Setup the rs, ri and md addresses. */ + rs->addr = ipv4h; + tor_addr_parse(&rs->ipv6_addr, "1:2:3:4::"); + ri->addr = ipv4h; + tor_addr_parse(&ri->ipv6_addr, "1:2:3:4::"); + tor_addr_parse(&md->ipv6_addr, "1:2:3:4::"); + + /* Add the rs to the consensus becoming a node_t. */ + smartlist_add(dummy_ns->routerstatus_list, rs); + nodelist_set_consensus(dummy_ns); + + /* At this point, the address set should be initialized in the nodelist and + * we should be able to lookup. */ + ret = nodelist_probably_contains_address(&addr_v4); + tt_int_op(ret, OP_EQ, 1); + ret = nodelist_probably_contains_address(&addr_v6); + tt_int_op(ret, OP_EQ, 1); + /* Lookup unknown address. */ + dummy_addr.family = AF_INET; + ret = nodelist_probably_contains_address(&dummy_addr); + tt_int_op(ret, OP_EQ, 0); + dummy_addr.family = AF_INET6; + ret = nodelist_probably_contains_address(&dummy_addr); + tt_int_op(ret, OP_EQ, 0); + + done: + routerstatus_free(rs); routerinfo_free(ri); microdesc_free(md); + smartlist_clear(dummy_ns->routerstatus_list); + networkstatus_vote_free(dummy_ns); + UNMOCK(networkstatus_get_latest_consensus); + UNMOCK(networkstatus_get_latest_consensus_by_flavor); + UNMOCK(get_estimated_address_per_node); +} + +struct testcase_t address_set_tests[] = { + { "contains", test_contains, TT_FORK, + NULL, NULL }, + { "nodelist", test_nodelist, TT_FORK, + NULL, NULL }, + + END_OF_TESTCASES +}; + From 666582a679cdfb2d69620db6aadf55a57d430e23 Mon Sep 17 00:00:00 2001 From: David Goulet Date: Fri, 9 Feb 2018 11:11:41 -0500 Subject: [PATCH 6/7] dos: Exclude known relays from client connection count This is to avoid positively identifying Exit relays if tor client connection comes from them that is reentering the network. One thing to note is that this is done only in the DoS subsystem but we'll still add it to the geoip cache as a "client" seen. This is done that way so to avoid as much as possible changing the current behavior of the geoip client cache since this is being backported. Closes #25193 Signed-off-by: David Goulet --- src/or/dos.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/or/dos.c b/src/or/dos.c index 88f1351a3f..9e8a7a9abe 100644 --- a/src/or/dos.c +++ b/src/or/dos.c @@ -14,6 +14,7 @@ #include "geoip.h" #include "main.h" #include "networkstatus.h" +#include "nodelist.h" #include "router.h" #include "dos.h" @@ -664,6 +665,14 @@ dos_new_client_conn(or_connection_t *or_conn) goto end; } + /* We ignore any known address meaning an address of a known relay. The + * reason to do so is because network reentry is possible where a client + * connection comes from an Exit node. Even when we'll fix reentry, this is + * a robust defense to keep in place. */ + if (nodelist_probably_contains_address(&or_conn->real_addr)) { + goto end; + } + /* We are only interested in client connection from the geoip cache. */ entry = geoip_lookup_client(&or_conn->real_addr, NULL, GEOIP_CLIENT_CONNECT); From 1a4fc9cddf27595db6f5da981a557f768fa32f66 Mon Sep 17 00:00:00 2001 From: David Goulet Date: Fri, 9 Feb 2018 11:31:01 -0500 Subject: [PATCH 7/7] test: DoS test to make sure we exclude known relays Part of #25193 Signed-off-by: David Goulet --- src/test/test_dos.c | 103 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) diff --git a/src/test/test_dos.c b/src/test/test_dos.c index 6db98b9ed3..cb9d9e559c 100644 --- a/src/test/test_dos.c +++ b/src/test/test_dos.c @@ -10,9 +10,36 @@ #include "circuitlist.h" #include "geoip.h" #include "channel.h" +#include "microdesc.h" +#include "networkstatus.h" +#include "nodelist.h" +#include "routerlist.h" #include "test.h" #include "log_test_helpers.h" +static networkstatus_t *dummy_ns = NULL; +static networkstatus_t * +mock_networkstatus_get_latest_consensus(void) +{ + return dummy_ns; +} + +static networkstatus_t * +mock_networkstatus_get_latest_consensus_by_flavor(consensus_flavor_t f) +{ + tor_assert(f == FLAV_MICRODESC); + return dummy_ns; +} + +/* Number of address a single node_t can have. Default to the production + * value. This is to control the size of the bloom filter. */ +static int addr_per_node = 2; +static int +mock_get_estimated_address_per_node(void) +{ + return addr_per_node; +} + static unsigned int mock_enable_dos_protection(const networkstatus_t *ns) { @@ -385,10 +412,86 @@ test_dos_bucket_refill(void *arg) dos_free_all(); } +/* Test if we avoid counting a known relay. */ +static void +test_known_relay(void *arg) +{ + clientmap_entry_t *entry = NULL; + routerstatus_t *rs = NULL; microdesc_t *md = NULL; routerinfo_t *ri = NULL; + + (void) arg; + + MOCK(networkstatus_get_latest_consensus, + mock_networkstatus_get_latest_consensus); + MOCK(networkstatus_get_latest_consensus_by_flavor, + mock_networkstatus_get_latest_consensus_by_flavor); + MOCK(get_estimated_address_per_node, + mock_get_estimated_address_per_node); + MOCK(get_param_cc_enabled, mock_enable_dos_protection); + + dos_init(); + + dummy_ns = tor_malloc_zero(sizeof(*dummy_ns)); + dummy_ns->flavor = FLAV_MICRODESC; + dummy_ns->routerstatus_list = smartlist_new(); + + /* Setup an OR conn so we can pass it to the DoS subsystem. */ + or_connection_t or_conn; + tor_addr_parse(&or_conn.real_addr, "42.42.42.42"); + + rs = tor_malloc_zero(sizeof(*rs)); + rs->addr = tor_addr_to_ipv4h(&or_conn.real_addr); + crypto_rand(rs->identity_digest, sizeof(rs->identity_digest)); + smartlist_add(dummy_ns->routerstatus_list, rs); + + /* This will make the nodelist bloom filter very large + * (the_nodelist->node_addrs) so we will fail the contain test rarely. */ + addr_per_node = 1024; + nodelist_set_consensus(dummy_ns); + + /* We have now a node in our list so we'll make sure we don't count it as a + * client connection. */ + geoip_note_client_seen(GEOIP_CLIENT_CONNECT, &or_conn.real_addr, NULL, 0); + /* Suppose we have 5 connections in rapid succession, the counter should + * always be 0 because we should ignore this. */ + dos_new_client_conn(&or_conn); + dos_new_client_conn(&or_conn); + dos_new_client_conn(&or_conn); + dos_new_client_conn(&or_conn); + dos_new_client_conn(&or_conn); + entry = geoip_lookup_client(&or_conn.real_addr, NULL, GEOIP_CLIENT_CONNECT); + tt_assert(entry); + /* We should have a count of 0. */ + tt_uint_op(entry->dos_stats.concurrent_count, OP_EQ, 0); + + /* To make sure that his is working properly, make a unknown client + * connection and see if we do get it. */ + tor_addr_parse(&or_conn.real_addr, "42.42.42.43"); + geoip_note_client_seen(GEOIP_CLIENT_CONNECT, &or_conn.real_addr, NULL, 0); + dos_new_client_conn(&or_conn); + dos_new_client_conn(&or_conn); + entry = geoip_lookup_client(&or_conn.real_addr, NULL, GEOIP_CLIENT_CONNECT); + tt_assert(entry); + /* We should have a count of 2. */ + tt_uint_op(entry->dos_stats.concurrent_count, OP_EQ, 2); + + done: + routerstatus_free(rs); routerinfo_free(ri); microdesc_free(md); + smartlist_clear(dummy_ns->routerstatus_list); + networkstatus_vote_free(dummy_ns); + dos_free_all(); + UNMOCK(networkstatus_get_latest_consensus); + UNMOCK(networkstatus_get_latest_consensus_by_flavor); + UNMOCK(get_estimated_address_per_node); + UNMOCK(get_param_cc_enabled); +} + struct testcase_t dos_tests[] = { { "conn_creation", test_dos_conn_creation, TT_FORK, NULL, NULL }, { "circuit_creation", test_dos_circuit_creation, TT_FORK, NULL, NULL }, { "bucket_refill", test_dos_bucket_refill, TT_FORK, NULL, NULL }, + { "known_relay" , test_known_relay, TT_FORK, + NULL, NULL }, END_OF_TESTCASES };