From b5c8a8ae53c141c14651485794999910d0168be8 Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Thu, 27 Sep 2007 20:46:30 +0000 Subject: [PATCH] r15422@catbus: nickm | 2007-09-27 16:42:35 -0400 Use descriptor annotations to record the source, download t time, and purpose of every descriptor we add to the store. The remaining to-do item is to stop setting do_not_cache on bridges. svn:r11680 --- ChangeLog | 3 +++ doc/TODO | 16 ++++++++++------ doc/tor.1.in | 6 +++++- src/or/control.c | 14 ++++++++++---- src/or/directory.c | 35 ++++++++++++++++++++++++++++++++--- src/or/dirserv.c | 26 +++++++++++++++++++++++--- src/or/or.h | 8 +++++++- src/or/router.c | 29 +++++++++++++++++++++++++++++ src/or/routerlist.c | 33 +++++++++++++++++++-------------- src/or/routerparse.c | 28 +++++++++++++++------------- 10 files changed, 153 insertions(+), 45 deletions(-) diff --git a/ChangeLog b/ChangeLog index bc1d80a70b..d1ea30eda0 100644 --- a/ChangeLog +++ b/ChangeLog @@ -8,6 +8,9 @@ Changes in version 0.2.0.8-alpha - 2007-??-?? cached-routers. Initialize cached-descriptors from cached-routers if the old format is around. The new format allows us to store annotations along with descriptors. + - Use annotations to record the time we received each descriptor. + - Use annotations to record the source for each descriptor. + - Use annotations to record the purpose of each descriptor. o Minor bugfixes (controller): - When sending a status event to the controller telling it that an diff --git a/doc/TODO b/doc/TODO index 798d65f89a..d165f9f72f 100644 --- a/doc/TODO +++ b/doc/TODO @@ -132,7 +132,7 @@ R - drop 'authority' queries if they're to our own identity key; accept o be more robust to bridges being marked as down and leaving us stranded without any known "running" bridges. N . Cache for bridge descriptors - . Annotated router store + o Annotated router store o Accept annotations before routers o Preserve and ignore unexpected annotations o Mechanism to add annotations when we first add a descriptor @@ -141,11 +141,15 @@ N . Cache for bridge descriptors o Name the router store something different: cached-descriptors? o But load from cached-routers if no cached-descriptors is found. - - Document this. - - Use annotations to denote router purpose - - Learn purpose from annotations - - Set annotations based on purpose - - Preserve routers with unrecognized purpose. + o Document this. + o Add a few example annotations to make sure this works: source + and downloaded/uploaded-at seem like a good start + - Drop this later as needed. + o Use annotations to denote router purpose + o Learn purpose from annotations + o Set annotations based on purpose + o Preserve routers with unrecognized purpose. +R - Stop setting the do-not-cache flag based on purpose. - Bridges operators (rudimentary version) - Ability to act as dir cache without a dir port. o Bridges publish to bridge authorities diff --git a/doc/tor.1.in b/doc/tor.1.in index f00b7e5244..ead667ea3a 100644 --- a/doc/tor.1.in +++ b/doc/tor.1.in @@ -1169,8 +1169,12 @@ The tor process stores keys and other data here. The most recently downloaded network status document for each authority. Each file holds one such document; the filenames are the hexadecimal identity key fingerprints of the directory authorities. .LP .TP +.B \fIDataDirectory\fB/cached-descriptors\fR and \fBcached-descriptors.new\fR +These files hold downloaded router statuses. Some routers may appear more than once; if so, the most recently published descriptor is used. Lines beginning with @-signs are annotations that contain more information about a given router. The ".new" file is an append-only journal; when it gets too large, all entries are merged into a new cached-routers file. +.LP +.TP .B \fIDataDirectory\fB/cached-routers\fR and \fBcached-routers.new\fR -These files hold downloaded router statuses. Some routers may appear more than once; if so, the most recently published descriptor is used. The ".new" file is an append-only journal; when it gets too large, all entries are merged into a new cached-routers file. +Obsolete versions of cached-descriptors and cached-descriptors.new. When Tor can't find the newer files, it looks here instead. .LP .TP .B \fIDataDirectory\fP/state diff --git a/src/or/control.c b/src/or/control.c index 0faaa502a1..aa4d6af775 100644 --- a/src/or/control.c +++ b/src/or/control.c @@ -1884,12 +1884,18 @@ get_purpose(char **string, int for_circuits, uint8_t *purpose) if (!strcmpstart(*string, "purpose=")) *string += strlen("purpose="); + if (!for_circuits) { + int r = router_purpose_from_string(*string); + if (r == ROUTER_PURPOSE_UNKNOWN) + return -1; + *purpose = r; + return 0; + } + if (!strcmp(*string, "general")) - *purpose = for_circuits ? CIRCUIT_PURPOSE_C_GENERAL : - ROUTER_PURPOSE_GENERAL; + *purpose = CIRCUIT_PURPOSE_C_GENERAL; else if (!strcmp(*string, "controller")) - *purpose = for_circuits ? CIRCUIT_PURPOSE_CONTROLLER : - ROUTER_PURPOSE_CONTROLLER; + *purpose = CIRCUIT_PURPOSE_CONTROLLER; else { /* not a recognized purpose */ return -1; } diff --git a/src/or/directory.c b/src/or/directory.c index 290abec347..cedaeb9f38 100644 --- a/src/or/directory.c +++ b/src/or/directory.c @@ -1066,6 +1066,31 @@ body_is_plausible(const char *body, size_t len, int purpose) } } +/** DOCDOC */ +static void +load_downloaded_routers(const char *body, smartlist_t *which, + int descriptor_digests, + int router_purpose, + const char *source) +{ + char buf[256]; + char time_buf[ISO_TIME_LEN+1]; + int general = router_purpose == ROUTER_PURPOSE_GENERAL; + format_iso_time(time_buf, time(NULL)); + + if (tor_snprintf(buf, sizeof(buf), + "@downloaded-at %s\n" + "@source %s\n" + "%s%s%s", time_buf, escaped(source), + !general ? "@purpose " : "", + !general ? router_purpose_to_string(router_purpose) : "", + !general ? "\n" : "")<0) + return; + + router_load_routers_from_string(body, NULL, SAVED_NOWHERE, which, + descriptor_digests, buf); +} + /** We are a client, and we've finished reading the server's * response. Parse and it and act appropriately. * @@ -1460,8 +1485,11 @@ connection_dir_client_reached_eof(dir_connection_t *conn) router_load_extrainfo_from_string(body, NULL, SAVED_NOWHERE, which, descriptor_digests); } else { - router_load_routers_from_string(body, NULL, SAVED_NOWHERE, which, - descriptor_digests, conn->router_purpose); + //router_load_routers_from_string(body, NULL, SAVED_NOWHERE, which, + // descriptor_digests, conn->router_purpose); + load_downloaded_routers(body, which, descriptor_digests, + conn->router_purpose, + conn->_base.address); directory_info_has_arrived(now, 0); } } @@ -2414,7 +2442,8 @@ directory_handle_command_post(dir_connection_t *conn, const char *headers, const char *msg = NULL; uint8_t purpose = authdir_mode_bridge(options) ? ROUTER_PURPOSE_BRIDGE : ROUTER_PURPOSE_GENERAL; - int r = dirserv_add_multiple_descriptors(body, purpose, &msg); + int r = dirserv_add_multiple_descriptors(body, purpose, + conn->_base.address, &msg); tor_assert(msg); if (r > 0) dirserv_get_directory(); /* rebuild and write to disk */ diff --git a/src/or/dirserv.c b/src/or/dirserv.c index 0c31eef01c..9ad888d2a1 100644 --- a/src/or/dirserv.c +++ b/src/or/dirserv.c @@ -524,6 +524,7 @@ authdir_wants_to_reject_router(routerinfo_t *ri, const char **msg, * returns the most severe error that occurred for any one of them. */ int dirserv_add_multiple_descriptors(const char *desc, uint8_t purpose, + const char *source, const char **msg) { int r=100; /* higher than any actual return value. */ @@ -532,12 +533,28 @@ dirserv_add_multiple_descriptors(const char *desc, uint8_t purpose, smartlist_t *list; const char *s; int n_parsed = 0; + time_t now = time(NULL); + char annotation_buf[256]; + char time_buf[ISO_TIME_LEN+1]; + int general = purpose == ROUTER_PURPOSE_GENERAL; tor_assert(msg); + format_iso_time(time_buf, now); + if (tor_snprintf(annotation_buf, sizeof(annotation_buf), + "@uploaded-at %s\n" + "@source %s\n" + "%s%s%s", time_buf, escaped(source), + !general ? "@purpose " : "", + !general ? router_purpose_to_string(purpose) : "", + !general ? "\n" : "")<0) { + *msg = "Couldn't format annotations"; + return -1; + } + s = desc; list = smartlist_create(); if (!router_parse_list_from_string(&s, NULL, list, SAVED_NOWHERE, 0, 0, - NULL)) { + annotation_buf)) { SMARTLIST_FOREACH(list, routerinfo_t *, ri, { msg_out = NULL; @@ -548,8 +565,8 @@ dirserv_add_multiple_descriptors(const char *desc, uint8_t purpose, * router_load_single_router()? Lastly, does extrainfo_t want * a purpose field too, or can we just piggyback off the one * in routerinfo_t? */ - ri->purpose = purpose; - if (purpose != ROUTER_PURPOSE_GENERAL) + tor_assert(ri->purpose == purpose); + if (purpose != ROUTER_PURPOSE_GENERAL) /*XXXXX020 wrong. */ ri->cache_info.do_not_cache = 1; r_tmp = dirserv_add_descriptor(ri, &msg_out); @@ -603,6 +620,9 @@ dirserv_add_multiple_descriptors(const char *desc, uint8_t purpose, * 1 if well-formed and accepted but origin should hear *msg; * 0 if well-formed but redundant with one we already have; * -1 if it looks vaguely like a router descriptor but rejected; + * + * This function is only called when fresh descriptors are posted, not when + * we re-load the cache. */ int dirserv_add_descriptor(routerinfo_t *ri, const char **msg) diff --git a/src/or/or.h b/src/or/or.h index c0853f9032..361569f36e 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -1167,6 +1167,8 @@ typedef struct { #define ROUTER_PURPOSE_CONTROLLER 1 /** Tor should use this router only for bridge positions in circuits. */ #define ROUTER_PURPOSE_BRIDGE 2 +/** DOCDOC */ +#define ROUTER_PURPOSE_UNKNOWN 255 uint8_t purpose; /** What positions in a circuit is this router good for? */ @@ -2779,6 +2781,7 @@ int dirserv_load_fingerprint_file(void); void dirserv_free_fingerprint_list(void); const char *dirserv_get_nickname_by_digest(const char *digest); int dirserv_add_multiple_descriptors(const char *desc, uint8_t purpose, + const char *source, const char **msg); int dirserv_add_descriptor(routerinfo_t *ri, const char **msg); int getinfo_helper_dirserv_unregistered(control_connection_t *conn, @@ -3357,6 +3360,9 @@ void router_reset_warnings(void); void router_reset_reachability(void); void router_free_all(void); +const char *router_purpose_to_string(uint8_t p); +uint8_t router_purpose_from_string(const char *s); + #ifdef ROUTER_PRIVATE /* Used only by router.c and test.c */ void get_platform_str(char *platform, size_t len); @@ -3480,7 +3486,7 @@ void router_load_routers_from_string(const char *s, const char *eos, saved_location_t saved_location, smartlist_t *requested_fingerprints, int descriptor_digests, - uint8_t purpose); + const char *prepend_annotations); void router_load_extrainfo_from_string(const char *s, const char *eos, saved_location_t saved_location, smartlist_t *requested_fps, diff --git a/src/or/router.c b/src/or/router.c index f3aa25299e..b73d420fa0 100644 --- a/src/or/router.c +++ b/src/or/router.c @@ -1773,6 +1773,35 @@ router_reset_warnings(void) } } +/** DOCDOC */ +const char * +router_purpose_to_string(uint8_t p) +{ + switch (p) + { + case ROUTER_PURPOSE_GENERAL: return "general"; + case ROUTER_PURPOSE_BRIDGE: return "bridge"; + case ROUTER_PURPOSE_CONTROLLER: return "controller"; + default: + tor_assert(0); + } + return NULL; +} + +/** DOCDOC */ +uint8_t +router_purpose_from_string(const char *s) +{ + if (!strcmp(s, "general")) + return ROUTER_PURPOSE_GENERAL; + else if (!strcmp(s, "bridge")) + return ROUTER_PURPOSE_BRIDGE; + else if (!strcmp(s, "controller")) + return ROUTER_PURPOSE_CONTROLLER; + else + return ROUTER_PURPOSE_UNKNOWN; +} + /** Release all static resources held in router.c */ void router_free_all(void) diff --git a/src/or/routerlist.c b/src/or/routerlist.c index 54d08ff502..1fe04813e1 100644 --- a/src/or/routerlist.c +++ b/src/or/routerlist.c @@ -675,7 +675,7 @@ router_rebuild_store(int force, desc_store_t *store) tor_free(sd->signed_descriptor_body); // sets it to null sd->saved_offset = offset; } - offset += sd->signed_descriptor_len; + offset += sd->signed_descriptor_len + sd->annotations_len; signed_descriptor_get_body(sd); /* reconstruct and assert */ }); @@ -743,8 +743,7 @@ router_reload_router_list_impl(desc_store_t *store) else router_load_routers_from_string(store->mmap->data, store->mmap->data+store->mmap->size, - SAVED_IN_CACHE, NULL, 0, - ROUTER_PURPOSE_GENERAL); + SAVED_IN_CACHE, NULL, 0, NULL); } tor_snprintf(fname, fname_len, "%s"PATH_SEPARATOR"%s.new", @@ -762,7 +761,7 @@ router_reload_router_list_impl(desc_store_t *store) NULL, 0); else router_load_routers_from_string(contents, NULL, SAVED_IN_JOURNAL, - NULL, 0, ROUTER_PURPOSE_GENERAL); + NULL, 0, NULL); store->journal_len = (size_t) st.st_size; tor_free(contents); } @@ -2010,11 +2009,13 @@ signed_descriptor_get_body_impl(signed_descriptor_t *desc, (with_annotations ? 0 : desc->annotations_len); tor_assert(r); - if (memcmp("router ", r, 7) && memcmp("extra-info ", r, 11)) { - log_err(LD_DIR, "descriptor at %p begins with unexpected string %s", - desc, tor_strndup(r, 64)); + if (!with_annotations) { + if (memcmp("router ", r, 7) && memcmp("extra-info ", r, 11)) { + log_err(LD_DIR, "descriptor at %p begins with unexpected string %s", + desc, tor_strndup(r, 64)); + } + tor_assert(!memcmp("router ", r, 7) || !memcmp("extra-info ", r, 11)); } - tor_assert(!memcmp("router ", r, 7) || !memcmp("extra-info ", r, 11)); return r; } @@ -3116,15 +3117,20 @@ router_load_single_router(const char *s, uint8_t purpose, const char **msg) routerinfo_t *ri; int r; smartlist_t *lst; + char annotation_buf[256]; tor_assert(msg); *msg = NULL; - if (!(ri = router_parse_entry_from_string(s, NULL, 1, 0, NULL))) { + tor_snprintf(annotation_buf, sizeof(annotation_buf), + "@source controller\n" + "@purpose %s\n", router_purpose_to_string(purpose)); + + if (!(ri = router_parse_entry_from_string(s, NULL, 1, 0, annotation_buf))) { log_warn(LD_DIR, "Error parsing router descriptor; dropping."); *msg = "Couldn't parse router descriptor."; return -1; } - ri->purpose = purpose; + tor_assert(ri->purpose == purpose); if (ri->purpose != ROUTER_PURPOSE_GENERAL) ri->cache_info.do_not_cache = 1; if (router_is_me(ri)) { @@ -3171,7 +3177,7 @@ router_load_routers_from_string(const char *s, const char *eos, saved_location_t saved_location, smartlist_t *requested_fingerprints, int descriptor_digests, - uint8_t purpose) + const char *prepend_annotations) { smartlist_t *routers = smartlist_create(), *changed = smartlist_create(); char fp[HEX_DIGEST_LEN+1]; @@ -3180,7 +3186,7 @@ router_load_routers_from_string(const char *s, const char *eos, int allow_annotations = (saved_location != SAVED_NOWHERE); router_parse_list_from_string(&s, eos, routers, saved_location, 0, - allow_annotations, NULL); + allow_annotations, prepend_annotations); routers_update_status_from_networkstatus(routers, !from_cache); @@ -3208,8 +3214,7 @@ router_load_routers_from_string(const char *s, const char *eos, } } - ri->purpose = purpose; - if (purpose != ROUTER_PURPOSE_GENERAL) + if (ri->purpose != ROUTER_PURPOSE_GENERAL) /* XXXX020 wrong. */ ri->cache_info.do_not_cache = 1; if (router_add_to_routerlist(ri, &msg, from_cache, !from_cache) >= 0) { diff --git a/src/or/routerparse.c b/src/or/routerparse.c index 761e6176db..adf6ca3145 100644 --- a/src/or/routerparse.c +++ b/src/or/routerparse.c @@ -1015,6 +1015,7 @@ router_parse_entry_from_string(const char *s, const char *end, directory_token_t *tok; struct in_addr in; const char *start_of_annotations, *cp; + size_t prepend_len = prepend_annotations ? strlen(prepend_annotations) : 0; tor_assert(!allow_annotations || !prepend_annotations); @@ -1068,17 +1069,12 @@ router_parse_entry_from_string(const char *s, const char *end, goto err; } - tok = smartlist_get(tokens,0); - if (tok->tp != K_ROUTER) { - log_warn(LD_DIR,"Entry does not start with \"router\""); - goto err; - } + tok = find_first_by_keyword(tokens, K_ROUTER); tor_assert(tok->n_args >= 5); router = tor_malloc_zero(sizeof(routerinfo_t)); router->routerlist_index = -1; - router->cache_info.annotations_len = s-start_of_annotations + - (prepend_annotations ? strlen(prepend_annotations) : 0) ; + router->cache_info.annotations_len = s-start_of_annotations + prepend_len; router->cache_info.signed_descriptor_len = end-s; if (cache_copy) { size_t len = router->cache_info.signed_descriptor_len + @@ -1086,11 +1082,12 @@ router_parse_entry_from_string(const char *s, const char *end, char *cp = router->cache_info.signed_descriptor_body = tor_malloc(len+1); if (prepend_annotations) { - strlcpy(cp, prepend_annotations, len+1); - cp += strlen(prepend_annotations); + memcpy(cp, prepend_annotations, prepend_len); + cp += prepend_len; } - memcpy(cp, s, end-s); - cp[len] = '\0'; + memcpy(cp, start_of_annotations, end-start_of_annotations); + router->cache_info.signed_descriptor_body[len] = '\0'; + tor_assert(strlen(router->cache_info.signed_descriptor_body) == len); } memcpy(router->cache_info.signed_descriptor_digest, digest, DIGEST_LEN); @@ -1116,8 +1113,6 @@ router_parse_entry_from_string(const char *s, const char *end, router->bandwidthrate = tor_parse_long(tok->args[0],10,0,INT_MAX,NULL,NULL); - /* Set purpose XXXX020 NM NM*/ - if (!router->bandwidthrate) { log_warn(LD_DIR, "bandwidthrate %s unreadable or 0. Failing.", escaped(tok->args[0])); @@ -1129,6 +1124,13 @@ router_parse_entry_from_string(const char *s, const char *end, tor_parse_long(tok->args[2],10,0,INT_MAX,NULL,NULL); /* XXXX020 we don't error-check these values? -RD */ + if ((tok = find_first_by_keyword(tokens, A_PURPOSE))) { + tor_assert(tok->n_args); + router->purpose = router_purpose_from_string(tok->args[0]); + } else { + router->purpose = ROUTER_PURPOSE_GENERAL; + } + if ((tok = find_first_by_keyword(tokens, K_UPTIME))) { tor_assert(tok->n_args >= 1); router->uptime = tor_parse_long(tok->args[0],10,0,LONG_MAX,NULL,NULL);