handle ugly edge case in retrying entrynodes

Specifically, a circ attempt that we'd launched while the network was
down could timeout after we've marked our entrynodes up, marking them
back down again. The fix is to annotate as bad the OR conns that were
around before we did the retry, so if a circuit that's attached to them
times out we don't do anything about it.
This commit is contained in:
Roger Dingledine 2010-09-28 22:32:38 -04:00
parent 7de1caa33f
commit 9997676802
5 changed files with 30 additions and 10 deletions

View File

@ -4604,7 +4604,16 @@ entries_retry_helper(or_options_t *options, int act)
any_known = 1;
if (ri->is_running)
any_running = 1; /* some entry is both known and running */
else if (act) { /* mark it for retry */
else if (act) {
/* Mark-for-close all TLS connections to this node, since
* otherwise there could be one that started 30 seconds
* ago, and in 30 seconds it will time out, causing us to mark
* the node down and undermine the retry attempt. We mark even
* the established conns, since if the network just came back
* we'll want to attach circuits to fresh conns. */
connection_or_set_bad_connections(ri->cache_info.identity_digest, 1);
/* mark it for retry */
router_set_status(ri->cache_info.identity_digest, 1);
e->can_retry = 1;
e->bad_since = 0;

View File

@ -955,8 +955,15 @@ circuit_build_failed(origin_circuit_t *circ)
* to blame, blame it. Also, avoid this relay for a while, and
* fail any one-hop directory fetches destined for it. */
const char *n_conn_id = circ->cpath->extend_info->identity_digest;
int already_marked = 0;
if (circ->_base.n_conn) {
or_connection_t *n_conn = circ->_base.n_conn;
if (n_conn->is_bad_for_new_circs) {
/* no need to blow away circuits/streams/etc. Also, don't mark this
* router as newly down, since maybe this was just an old circuit
* attempt that's finally timing out now. */
already_marked = 1;
}
log_info(LD_OR,
"Our circuit failed to get a response from the first hop "
"(%s:%d). I'm going to try to rotate to a better connection.",
@ -966,7 +973,7 @@ circuit_build_failed(origin_circuit_t *circ)
log_info(LD_OR,
"Our circuit died before the first hop with no connection");
}
if (n_conn_id) {
if (n_conn_id && !already_marked) {
entry_guard_register_connect_status(n_conn_id, 0, 1, time(NULL));
/* if there are any one-hop streams waiting on this circuit, fail
* them now so they can retry elsewhere. */

View File

@ -610,7 +610,7 @@ connection_or_get_for_extend(const char *digest,
* appropriate. Helper for connection_or_set_bad_connections().
*/
static void
connection_or_group_set_badness(or_connection_t *head)
connection_or_group_set_badness(or_connection_t *head, int force)
{
or_connection_t *or_conn = NULL, *best = NULL;
int n_old = 0, n_inprogress = 0, n_canonical = 0, n_other = 0;
@ -622,7 +622,8 @@ connection_or_group_set_badness(or_connection_t *head)
if (or_conn->_base.marked_for_close ||
or_conn->is_bad_for_new_circs)
continue;
if (or_conn->_base.timestamp_created + TIME_BEFORE_OR_CONN_IS_TOO_OLD
if (force ||
or_conn->_base.timestamp_created + TIME_BEFORE_OR_CONN_IS_TOO_OLD
< now) {
log_info(LD_OR,
"Marking OR conn to %s:%d as too old for new circuits "
@ -718,8 +719,10 @@ connection_or_group_set_badness(or_connection_t *head)
}
}
/** Go through all the OR connections, and set the is_bad_for_new_circs
/** Go through all the OR connections (or if <b>digest</b> is non-NULL, just
* the OR connections with that digest), and set the is_bad_for_new_circs
* flag on:
* - all connections if <b>force</b> is true.
* - all connections that are too old.
* - all open non-canonical connections for which a canonical connection
* exists to the same router.
@ -732,13 +735,14 @@ connection_or_group_set_badness(or_connection_t *head)
* better than another.
*/
void
connection_or_set_bad_connections(void)
connection_or_set_bad_connections(const char *digest, int force)
{
if (!orconn_identity_map)
return;
DIGESTMAP_FOREACH(orconn_identity_map, identity, or_connection_t *, conn) {
connection_or_group_set_badness(conn);
if (!digest || !memcmp(digest, conn->identity_digest, DIGEST_LEN))
connection_or_group_set_badness(conn, force);
} DIGESTMAP_FOREACH_END;
}

View File

@ -18,7 +18,7 @@ or_connection_t *connection_or_get_for_extend(const char *digest,
const tor_addr_t *target_addr,
const char **msg_out,
int *launch_out);
void connection_or_set_bad_connections(void);
void connection_or_set_bad_connections(const char *digest, int force);
int connection_or_reached_eof(or_connection_t *conn);
int connection_or_process_inbuf(or_connection_t *conn);

View File

@ -1173,7 +1173,7 @@ run_scheduled_events(time_t now)
circuit_expire_old_circuits_serverside(now);
/** 5. We do housekeeping for each connection... */
connection_or_set_bad_connections();
connection_or_set_bad_connections(NULL, 0);
for (i=0;i<smartlist_len(connection_array);i++) {
run_connection_housekeeping(i, now);
}