From 80357abb11023ff285c251ee75b864e0a27d5809 Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Mon, 22 Nov 2010 11:36:22 -0500 Subject: [PATCH 1/8] Remove an incorrect comment in connection_or_check_valid_tls_handshake --- src/or/connection_or.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/or/connection_or.c b/src/or/connection_or.c index 415a48dca3..6279c93ab1 100644 --- a/src/or/connection_or.c +++ b/src/or/connection_or.c @@ -1098,9 +1098,6 @@ connection_or_check_valid_tls_handshake(or_connection_t *conn, as_advertised = 0; } if (authdir_mode_tests_reachability(options)) { - /* We initiated this connection to address:port. Drop all routers - * with the same address:port and a different key. - */ dirserv_orconn_tls_done(conn->_base.address, conn->_base.port, digest_rcvd_out, as_advertised); } From bea0a31c1c859a563f065d8b868570560abc5135 Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Mon, 22 Nov 2010 12:30:33 -0500 Subject: [PATCH 2/8] Treat routers whose IPs have changed as having been down for MTBF/routerinfo calculation purposes. --- changes/bug1035 | 7 +++++++ src/or/dirserv.c | 16 +++++++++++----- src/or/rephist.c | 38 +++++++++++++++++++++++++++++++++++++- src/or/rephist.h | 3 ++- 4 files changed, 57 insertions(+), 7 deletions(-) create mode 100644 changes/bug1035 diff --git a/changes/bug1035 b/changes/bug1035 new file mode 100644 index 0000000000..4b62b46cd0 --- /dev/null +++ b/changes/bug1035 @@ -0,0 +1,7 @@ + o Minor features (authorities) + - Take altered router IPs into account when determining router stability. + Previously, if a router changed its IP, the authorities would not + treat it as having any downtime for the purposes of stability + calculation, whereas clients would experience downtime since the + IP could take a while to propagate to them. Resolves issue 1035. + diff --git a/src/or/dirserv.c b/src/or/dirserv.c index 42d7d561ce..52e59cd9d1 100644 --- a/src/or/dirserv.c +++ b/src/or/dirserv.c @@ -3108,19 +3108,25 @@ dirserv_orconn_tls_done(const char *address, tor_assert(address); tor_assert(digest_rcvd); - SMARTLIST_FOREACH(rl->routers, routerinfo_t *, ri, { + /* XXX023 Doing a loop like this is stupid. We should just look up the + * router by digest_rcvd, and see if address, orport, and as_advertised + * match up. -NM */ + SMARTLIST_FOREACH_BEGIN(rl->routers, routerinfo_t *, ri) { if (!strcasecmp(address, ri->address) && or_port == ri->or_port && as_advertised && !memcmp(ri->cache_info.identity_digest, digest_rcvd, DIGEST_LEN)) { /* correct digest. mark this router reachable! */ if (!bridge_auth || ri->purpose == ROUTER_PURPOSE_BRIDGE) { - log_info(LD_DIRSERV, "Found router %s to be reachable. Yay.", - ri->nickname); - rep_hist_note_router_reachable(digest_rcvd, now); + tor_addr_t addr, *addrp=NULL; + log_info(LD_DIRSERV, "Found router %s to be reachable at %s. Yay.", + ri->nickname, address); + if (tor_addr_from_str(&addr, ri->address) != -1) + addrp = &addr; + rep_hist_note_router_reachable(digest_rcvd, addrp, now); ri->last_reachable = now; } } - }); + } SMARTLIST_FOREACH_END(ri); /* FFFF Maybe we should reinstate the code that dumps routers with the same * addr/port but with nonmatching keys, but instead of dumping, we should * skip testing. */ diff --git a/src/or/rephist.c b/src/or/rephist.c index 22b3ec5217..a0c9c9f39d 100644 --- a/src/or/rephist.c +++ b/src/or/rephist.c @@ -14,6 +14,7 @@ #include "circuitlist.h" #include "circuituse.h" #include "config.h" +#include "networkstatus.h" #include "rephist.h" #include "router.h" #include "routerlist.h" @@ -73,6 +74,10 @@ typedef struct or_history_t { /** If nonzero, we have been unable to connect since this time. */ time_t down_since; + /** The address at which we most recently connected to this OR + * sucessfully. */ + tor_addr_t last_reached_addr; + /* === For MTBF tracking: */ /** Weighted sum total of all times that this router has been online. */ @@ -119,6 +124,7 @@ get_or_history(const char* id) rephist_total_num++; hist->link_history_map = digestmap_new(); hist->since = hist->changed = time(NULL); + tor_addr_make_unspec(&hist->last_reached_addr); digestmap_set(history_map, id, hist); } return hist; @@ -289,14 +295,19 @@ rep_hist_note_connection_died(const char* id, time_t when) /** We have just decided that this router with identity digest id is * reachable, meaning we will give it a "Running" flag for the next while. */ void -rep_hist_note_router_reachable(const char *id, time_t when) +rep_hist_note_router_reachable(const char *id, const tor_addr_t *at_addr, + time_t when) { or_history_t *hist = get_or_history(id); int was_in_run = 1; char tbuf[ISO_TIME_LEN+1]; + int addr_changed; tor_assert(hist); + addr_changed = at_addr && + tor_addr_compare(at_addr, &hist->last_reached_addr, CMP_EXACT) != 0; + if (!started_tracking_stability) started_tracking_stability = time(NULL); if (!hist->start_of_run) { @@ -315,6 +326,29 @@ rep_hist_note_router_reachable(const char *id, time_t when) down_length = when - hist->start_of_downtime; hist->total_weighted_time += down_length; hist->start_of_downtime = 0; + } else if (addr_changed) { + /* If we're reachable, but the address changed, treat this as some + * downtime. */ +#define MIN_DOWNTIME_FOR_ADDR_CHANGE 600 + int penalty = get_options()->TestingTorNetwork ? 240 : 3600; + networkstatus_t *ns; + tor_assert(at_addr); + + if ((ns = networkstatus_get_latest_consensus())) { + int fresh_interval = ns->fresh_until - ns->valid_after; + int live_interval = ns->valid_until - ns->valid_after; + /* on average, a descriptor addr change takes .5 intervals to make it + * into a consensus, and half a liveness period to make it to + * clients. */ + penalty = (fresh_interval + live_interval) / 2; + } + format_local_iso_time(tbuf, hist->start_of_run); + log_info(LD_HIST,"Router %s still seems Running, but its address appears " + "to have changed since the last time it was reachable. I'm " + "going to treat it as having been down for %d seconds", + hex_str(id, DIGEST_LEN), penalty); + rep_hist_note_router_unreachable(id, when-penalty); + rep_hist_note_router_reachable(id, NULL, when); } else { format_local_iso_time(tbuf, hist->start_of_run); if (was_in_run) @@ -324,6 +358,8 @@ rep_hist_note_router_reachable(const char *id, time_t when) log_info(LD_HIST,"Router %s is now Running; it was previously untracked", hex_str(id, DIGEST_LEN)); } + if (at_addr) + tor_addr_copy(&hist->last_reached_addr, at_addr); } /** We have just decided that this router is unreachable, meaning diff --git a/src/or/rephist.h b/src/or/rephist.h index 8f5a34dacf..1ddbac4dad 100644 --- a/src/or/rephist.h +++ b/src/or/rephist.h @@ -33,7 +33,8 @@ void rep_hist_update_state(or_state_t *state); int rep_hist_load_state(or_state_t *state, char **err); void rep_history_clean(time_t before); -void rep_hist_note_router_reachable(const char *id, time_t when); +void rep_hist_note_router_reachable(const char *id, const tor_addr_t *at_addr, + time_t when); void rep_hist_note_router_unreachable(const char *id, time_t when); int rep_hist_record_mtbf_data(time_t now, int missing_means_down); int rep_hist_load_mtbf_data(time_t now); From 5a9903b9e09697ac131c841310dd82eebcca02e0 Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Mon, 22 Nov 2010 12:39:22 -0500 Subject: [PATCH 3/8] Handle negative run lengths in wfu/mtbf calculations --- changes/bug1035 | 5 +++++ src/or/rephist.c | 20 ++++++++++++++------ 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/changes/bug1035 b/changes/bug1035 index 4b62b46cd0..041e3b3bb6 100644 --- a/changes/bug1035 +++ b/changes/bug1035 @@ -4,4 +4,9 @@ treat it as having any downtime for the purposes of stability calculation, whereas clients would experience downtime since the IP could take a while to propagate to them. Resolves issue 1035. + o Minor bugfixes (authorities) + - Try to be more robust to hops back in time when calculating + router stability. Previously, if a run of uptime or downtime + appeared to be negative, the calculation could give incorrect + results. Bugfix on 0.2.0.6-alpha. diff --git a/src/or/rephist.c b/src/or/rephist.c index a0c9c9f39d..e59fcb56a9 100644 --- a/src/or/rephist.c +++ b/src/or/rephist.c @@ -380,12 +380,20 @@ rep_hist_note_router_unreachable(const char *id, time_t when) long run_length = when - hist->start_of_run; format_local_iso_time(tbuf, hist->start_of_run); - hist->weighted_run_length += run_length; hist->total_run_weights += 1.0; hist->start_of_run = 0; - hist->weighted_uptime += run_length; - hist->total_weighted_time += run_length; + if (run_length < 0) { + unsigned long penalty = -run_length; +#define SUBTRACT_CLAMPED(var, penalty) \ + do { (var) = (var) < (penalty) ? 0 : (var) - (penalty); } while (0) + SUBTRACT_CLAMPED(hist->weighted_run_length, penalty); + SUBTRACT_CLAMPED(hist->weighted_uptime, penalty); + } else { + hist->weighted_run_length += run_length; + hist->weighted_uptime += run_length; + hist->total_weighted_time += run_length; + } was_running = 1; log_info(LD_HIST, "Router %s is now non-Running: it had previously been " "Running since %s. Its total weighted uptime is %lu/%lu.", @@ -458,7 +466,7 @@ rep_hist_downrate_old_runs(time_t now) static double get_stability(or_history_t *hist, time_t when) { - unsigned long total = hist->weighted_run_length; + long total = hist->weighted_run_length; double total_weights = hist->total_run_weights; if (hist->start_of_run) { @@ -494,8 +502,8 @@ get_total_weighted_time(or_history_t *hist, time_t when) static double get_weighted_fractional_uptime(or_history_t *hist, time_t when) { - unsigned long total = hist->total_weighted_time; - unsigned long up = hist->weighted_uptime; + long total = hist->total_weighted_time; + long up = hist->weighted_uptime; if (hist->start_of_run) { long run_length = (when - hist->start_of_run); From 59a3d536d80d49da956d5e4e0c59a97164e1f9c4 Mon Sep 17 00:00:00 2001 From: Sebastian Hahn Date: Mon, 27 Dec 2010 11:37:16 +0100 Subject: [PATCH 4/8] Fix compile without warnings on OS X 10.6 --- src/or/rephist.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/or/rephist.c b/src/or/rephist.c index e59fcb56a9..1bbfe310f3 100644 --- a/src/or/rephist.c +++ b/src/or/rephist.c @@ -335,12 +335,12 @@ rep_hist_note_router_reachable(const char *id, const tor_addr_t *at_addr, tor_assert(at_addr); if ((ns = networkstatus_get_latest_consensus())) { - int fresh_interval = ns->fresh_until - ns->valid_after; - int live_interval = ns->valid_until - ns->valid_after; + int fresh_interval = (int)(ns->fresh_until - ns->valid_after); + int live_interval = (int)(ns->valid_until - ns->valid_after); /* on average, a descriptor addr change takes .5 intervals to make it * into a consensus, and half a liveness period to make it to * clients. */ - penalty = (fresh_interval + live_interval) / 2; + penalty = (int)(fresh_interval + live_interval) / 2; } format_local_iso_time(tbuf, hist->start_of_run); log_info(LD_HIST,"Router %s still seems Running, but its address appears " From a68e2043aba4d33e39a8cb47be5dc3082f27ad07 Mon Sep 17 00:00:00 2001 From: Sebastian Hahn Date: Mon, 7 Feb 2011 16:15:02 +0100 Subject: [PATCH 5/8] Fix spelling and an unused #define both noticed by rransom --- src/or/rephist.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/or/rephist.c b/src/or/rephist.c index 1bbfe310f3..28699901fa 100644 --- a/src/or/rephist.c +++ b/src/or/rephist.c @@ -75,7 +75,7 @@ typedef struct or_history_t { time_t down_since; /** The address at which we most recently connected to this OR - * sucessfully. */ + * successfully. */ tor_addr_t last_reached_addr; /* === For MTBF tracking: */ @@ -329,7 +329,6 @@ rep_hist_note_router_reachable(const char *id, const tor_addr_t *at_addr, } else if (addr_changed) { /* If we're reachable, but the address changed, treat this as some * downtime. */ -#define MIN_DOWNTIME_FOR_ADDR_CHANGE 600 int penalty = get_options()->TestingTorNetwork ? 240 : 3600; networkstatus_t *ns; tor_assert(at_addr); From 9b64227ffd38e9406c5c88ace137a0eae010771d Mon Sep 17 00:00:00 2001 From: Sebastian Hahn Date: Mon, 7 Feb 2011 16:31:20 +0100 Subject: [PATCH 6/8] Routers count as down when they change ORPort, too rransom noticed that a change of ORPort is just as bad as a change of IP address from a client's perspective, because both mean that the relay is not available to them while the new information hasn't propagated. Change the bug1035 fix accordingly. Also make sure we don't log a bridge's IP address (which might happen when we are the bridge authority). --- changes/bug1035 | 11 ++++++----- src/or/dirserv.c | 8 +++++--- src/or/rephist.c | 23 +++++++++++++++-------- src/or/rephist.h | 2 +- 4 files changed, 27 insertions(+), 17 deletions(-) diff --git a/changes/bug1035 b/changes/bug1035 index 041e3b3bb6..3d86330e63 100644 --- a/changes/bug1035 +++ b/changes/bug1035 @@ -1,9 +1,10 @@ o Minor features (authorities) - - Take altered router IPs into account when determining router stability. - Previously, if a router changed its IP, the authorities would not - treat it as having any downtime for the purposes of stability - calculation, whereas clients would experience downtime since the - IP could take a while to propagate to them. Resolves issue 1035. + - Take altered router IP addresses and ORPorts into account when + determining router stability. Previously, if a router changed + its IP or ORPort, the authorities would not treat it as having + any downtime for the purposes of stability calculation, whereas + clients would experience downtime since the change could take a + while to propagate to them. Resolves issue 1035. o Minor bugfixes (authorities) - Try to be more robust to hops back in time when calculating router stability. Previously, if a run of uptime or downtime diff --git a/src/or/dirserv.c b/src/or/dirserv.c index 52e59cd9d1..f426881440 100644 --- a/src/or/dirserv.c +++ b/src/or/dirserv.c @@ -3118,11 +3118,13 @@ dirserv_orconn_tls_done(const char *address, /* correct digest. mark this router reachable! */ if (!bridge_auth || ri->purpose == ROUTER_PURPOSE_BRIDGE) { tor_addr_t addr, *addrp=NULL; - log_info(LD_DIRSERV, "Found router %s to be reachable at %s. Yay.", - ri->nickname, address); + log_info(LD_DIRSERV, "Found router %s to be reachable at %s:%d. Yay.", + ri->nickname, address, ri->or_port ); if (tor_addr_from_str(&addr, ri->address) != -1) addrp = &addr; - rep_hist_note_router_reachable(digest_rcvd, addrp, now); + else + log_warn(LD_BUG, "Couldn't parse IP address \"%s\"", ri->address); + rep_hist_note_router_reachable(digest_rcvd, addrp, or_port, now); ri->last_reachable = now; } } diff --git a/src/or/rephist.c b/src/or/rephist.c index 28699901fa..7c570e26a6 100644 --- a/src/or/rephist.c +++ b/src/or/rephist.c @@ -78,6 +78,9 @@ typedef struct or_history_t { * successfully. */ tor_addr_t last_reached_addr; + /** The port at which we most recently connected to this OR successfully */ + uint16_t last_reached_port; + /* === For MTBF tracking: */ /** Weighted sum total of all times that this router has been online. */ @@ -296,17 +299,18 @@ rep_hist_note_connection_died(const char* id, time_t when) * reachable, meaning we will give it a "Running" flag for the next while. */ void rep_hist_note_router_reachable(const char *id, const tor_addr_t *at_addr, - time_t when) + const uint16_t at_port, time_t when) { or_history_t *hist = get_or_history(id); int was_in_run = 1; char tbuf[ISO_TIME_LEN+1]; - int addr_changed; + int addr_changed, port_changed; tor_assert(hist); addr_changed = at_addr && tor_addr_compare(at_addr, &hist->last_reached_addr, CMP_EXACT) != 0; + port_changed = at_port && at_port != hist->last_reached_port; if (!started_tracking_stability) started_tracking_stability = time(NULL); @@ -326,7 +330,7 @@ rep_hist_note_router_reachable(const char *id, const tor_addr_t *at_addr, down_length = when - hist->start_of_downtime; hist->total_weighted_time += down_length; hist->start_of_downtime = 0; - } else if (addr_changed) { + } else if (addr_changed || port_changed) { /* If we're reachable, but the address changed, treat this as some * downtime. */ int penalty = get_options()->TestingTorNetwork ? 240 : 3600; @@ -342,12 +346,13 @@ rep_hist_note_router_reachable(const char *id, const tor_addr_t *at_addr, penalty = (int)(fresh_interval + live_interval) / 2; } format_local_iso_time(tbuf, hist->start_of_run); - log_info(LD_HIST,"Router %s still seems Running, but its address appears " - "to have changed since the last time it was reachable. I'm " - "going to treat it as having been down for %d seconds", - hex_str(id, DIGEST_LEN), penalty); + if (!authdir_mode_bridge(get_options())) + log_info(LD_HIST,"Router %s still seems Running, but its address appears " + "to have changed since the last time it was reachable. I'm " + "going to treat it as having been down for %d seconds", + hex_str(id, DIGEST_LEN), penalty); rep_hist_note_router_unreachable(id, when-penalty); - rep_hist_note_router_reachable(id, NULL, when); + rep_hist_note_router_reachable(id, NULL, 0, when); } else { format_local_iso_time(tbuf, hist->start_of_run); if (was_in_run) @@ -359,6 +364,8 @@ rep_hist_note_router_reachable(const char *id, const tor_addr_t *at_addr, } if (at_addr) tor_addr_copy(&hist->last_reached_addr, at_addr); + if (at_port) + hist->last_reached_port = at_port; } /** We have just decided that this router is unreachable, meaning diff --git a/src/or/rephist.h b/src/or/rephist.h index 1ddbac4dad..610c1a0704 100644 --- a/src/or/rephist.h +++ b/src/or/rephist.h @@ -34,7 +34,7 @@ int rep_hist_load_state(or_state_t *state, char **err); void rep_history_clean(time_t before); void rep_hist_note_router_reachable(const char *id, const tor_addr_t *at_addr, - time_t when); + uint16_t at_port, time_t when); void rep_hist_note_router_unreachable(const char *id, time_t when); int rep_hist_record_mtbf_data(time_t now, int missing_means_down); int rep_hist_load_mtbf_data(time_t now); From 5a4f7fa1e48923730376c0a42121e4c3022eef3b Mon Sep 17 00:00:00 2001 From: Sebastian Hahn Date: Sat, 26 Feb 2011 09:42:44 +0100 Subject: [PATCH 7/8] clarify an assert also log about running changes, even on a bridge authority. --- src/or/rephist.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/or/rephist.c b/src/or/rephist.c index 7c570e26a6..207eb88935 100644 --- a/src/or/rephist.c +++ b/src/or/rephist.c @@ -307,6 +307,7 @@ rep_hist_note_router_reachable(const char *id, const tor_addr_t *at_addr, int addr_changed, port_changed; tor_assert(hist); + tor_assert((!at_addr && !at_port) || (at_addr && at_port)); addr_changed = at_addr && tor_addr_compare(at_addr, &hist->last_reached_addr, CMP_EXACT) != 0; @@ -335,7 +336,6 @@ rep_hist_note_router_reachable(const char *id, const tor_addr_t *at_addr, * downtime. */ int penalty = get_options()->TestingTorNetwork ? 240 : 3600; networkstatus_t *ns; - tor_assert(at_addr); if ((ns = networkstatus_get_latest_consensus())) { int fresh_interval = (int)(ns->fresh_until - ns->valid_after); @@ -346,11 +346,10 @@ rep_hist_note_router_reachable(const char *id, const tor_addr_t *at_addr, penalty = (int)(fresh_interval + live_interval) / 2; } format_local_iso_time(tbuf, hist->start_of_run); - if (!authdir_mode_bridge(get_options())) - log_info(LD_HIST,"Router %s still seems Running, but its address appears " - "to have changed since the last time it was reachable. I'm " - "going to treat it as having been down for %d seconds", - hex_str(id, DIGEST_LEN), penalty); + log_info(LD_HIST,"Router %s still seems Running, but its address appears " + "to have changed since the last time it was reachable. I'm " + "going to treat it as having been down for %d seconds", + hex_str(id, DIGEST_LEN), penalty); rep_hist_note_router_unreachable(id, when-penalty); rep_hist_note_router_reachable(id, NULL, 0, when); } else { From 9c72324ae85c3f2cc23fee7d383128fa239b36d0 Mon Sep 17 00:00:00 2001 From: Roger Dingledine Date: Tue, 8 Mar 2011 15:31:04 -0500 Subject: [PATCH 8/8] update spec locations --- doc/HACKING | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/doc/HACKING b/doc/HACKING index bdb86c06c6..f42628b36d 100644 --- a/doc/HACKING +++ b/doc/HACKING @@ -5,17 +5,17 @@ Getting started --------------- For full information on how Tor is supposed to work, look at the files in -doc/spec/ . +https://gitweb.torproject.org/torspec.git/tree For an explanation of how to change Tor's design to work differently, look at -doc/spec/proposals/001-process.txt . +https://gitweb.torproject.org/torspec.git/blob_plain/HEAD:/proposals/001-process.txt For the latest version of the code, get a copy of git, and git clone git://git.torproject.org/git/tor . -We talk about Tor on the or-talk mailing list. Design proposals and -discussion belong on the or-dev mailing list. We hang around on +We talk about Tor on the tor-talk mailing list. Design proposals and +discussion belong on the tor-dev mailing list. We hang around on irc.oftc.net, with general discussion happening on #tor and development happening on #tor-dev. @@ -65,8 +65,8 @@ If at all possible, try to create this file in the same commit where you are making the change. Please give it a distinctive name that no other branch will use for the lifetime of your change. -When Roger goes to make a release, he will concatenate all the entries -in changes to make a draft changelog, and clear the directory. He'll +When we go to make a release, we will concatenate all the entries +in changes to make a draft changelog, and clear the directory. We'll then edit the draft changelog into a nice readable format. What needs a changes file?:: @@ -405,4 +405,3 @@ function should mention that it does that something in the documentation. If you rely on a function doing something beyond what is in its documentation, then you should watch out, or it might do something else later. -