From e3113502ad60d25f936d13693ac1934b1b3dff8b Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Wed, 17 Oct 2007 19:23:56 +0000 Subject: [PATCH] r15882@catbus: nickm | 2007-10-17 15:23:05 -0400 oprofile was telling me that a fair bit of our time in openssl was spent in base64_decode, so replace base64_decode with an all-at-once fairly optimized implementation. For decoding keys and digests, it seems 3-3.5x faster than calling out to openssl. (Yes, I wrote it from scratch.) svn:r12002 --- ChangeLog | 6 +++ src/common/crypto.c | 120 ++++++++++++++++++++++++++++++++++++++++++-- src/or/test.c | 15 ++++-- 3 files changed, 135 insertions(+), 6 deletions(-) diff --git a/ChangeLog b/ChangeLog index c61f022124..954b6f0ba3 100644 --- a/ChangeLog +++ b/ChangeLog @@ -49,6 +49,12 @@ Changes in version 0.2.0.9-alpha - 2007-10-?? - Make base32_decode() accept upper-case letters. Bugfix on 0.2.0.7-alpha. + o Minor bugfixes (performance): + - Base64 decoding was actually showing up on our profile when parsing + the initial descriptor file; switch to an in-process all-at-once + implementation that's about 3.5x times faster than calling out to + OpenSSL. + o Code simplifications and refactoring: - Remove support for the old bw_accounting file: we've been storing bandwidth accounting information in the state file since 0.1.2.5-alpha. diff --git a/src/common/crypto.c b/src/common/crypto.c index b215298238..f603b064a0 100644 --- a/src/common/crypto.c +++ b/src/common/crypto.c @@ -1768,6 +1768,8 @@ smartlist_shuffle(smartlist_t *sl) int base64_encode(char *dest, size_t destlen, const char *src, size_t srclen) { + /* XXXX we might want to rewrite this along the lines of base64_decode, if + * it ever shows up in the profile. */ EVP_ENCODE_CTX ctx; int len, ret; @@ -1787,18 +1789,48 @@ base64_encode(char *dest, size_t destlen, const char *src, size_t srclen) return ret; } +#define X 255 +#define SP 64 +#define PAD 65 +/** Internal table mapping byte values to what they represent in base64. + * Numbers 0..63 are 6-bit integers. SPs are spaces, and should be + * skipped. Xs are invalid and must not appear in base64. PAD indicates + * end-of-string. */ +static const uint8_t base64_decode_table[256] = { + X, X, X, X, X, X, X, X, X, SP, SP, SP, X, SP, X, X, /* */ + X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, + SP, X, X, X, X, X, X, X, X, X, X, 62, X, X, X, 63, + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, X, X, X, PAD, X, X, + X, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, X, X, X, X, X, + X, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, X, X, X, X, X, + X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, + X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, + X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, + X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, + X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, + X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, + X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, + X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, +}; + /** Base-64 decode srclen bytes of data from src. Write * the result into dest, if it will fit within destlen * bytes. Return the number of bytes written on success; -1 if * destlen is too short, or other failure. * - * NOTE: destlen should be a little longer than the amount of data it - * will contain, since we check for sufficient space conservatively. - * Here, "a little" is around 64-ish bytes. + * NOTE 1: destlen is checked conservatively, as though srclen contained no + * spaces or padding. + * + * NOTE 2: This implementation does not check for the correct number of + * padding "=" characters at the end of the string, and does not check + * for internal padding characters. */ int base64_decode(char *dest, size_t destlen, const char *src, size_t srclen) { +#ifdef USE_OPENSSL_BASE64 EVP_ENCODE_CTX ctx; int len, ret; /* 64 bytes of input -> *up to* 48 bytes of output. @@ -1815,7 +1847,80 @@ base64_decode(char *dest, size_t destlen, const char *src, size_t srclen) EVP_DecodeFinal(&ctx, (unsigned char*)dest, &ret); ret += len; return ret; +#else + #define ACC32 + const char *eos = src+srclen; + uint32_t n=0; + int n_idx=0; + char *dest_orig = dest; + + /* Max number of bits == srclen*6. + * Number of bytes required to hold all bits == (srclen*6)/8. + * Yes, we want to round down: anything that hangs over the end of a + * byte is padding. */ + if (destlen < (srclen*3)/4) + return -1; + if (destlen > SIZE_T_CEILING) + return -1; + + /* Iterate over all the bytes in src. Each one will add 0 or 6 bits to the + * value we're decoding. Accumulate bits in n, and whenever we have + * 24 bits, batch them into 3 bytes and flush those bytes to dest. + */ + for ( ; src < eos; ++src) { + unsigned char c = (unsigned char) *src; + uint8_t v = base64_decode_table[c]; + switch (v) { + case X: + /* This character isn't allowed in base64. */ + return -1; + case SP: + /* This character is whitespace, and has no effect. */ + continue; + case PAD: + /* We've hit an = character: the data is over. */ + goto end_of_loop; + default: + /* We have an actual 6-bit value. Append it to the bits in n. */ + n = (n<<6) | v; + if ((++n_idx) == 4) { + /* We've accumulated 24 bits in n. Flush them. */ + *dest++ = (n>>16); + *dest++ = (n>>8) & 0xff; + *dest++ = (n) & 0xff; + n_idx = 0; + n = 0; + } + } + } + end_of_loop: + /* If we have leftover bits, we need to cope. */ + switch (n_idx) { + case 0: + default: + /* No leftover bits. We win. */ + break; + case 1: + /* 6 leftover bits. That's invalid; we can't form a byte out of that. */ + return -1; + case 2: + /* 12 leftover bits: The last 4 are padding and the first 8 are data. */ + *dest++ = n >> 4; + break; + case 3: + /* 18 leftover bits: The last 2 are padding and the first 16 are data. */ + *dest++ = n >> 10; + *dest++ = n >> 2; + } + + tor_assert((dest-dest_orig) <= (ssize_t)destlen); + + return dest-dest_orig; +#endif } +#undef X +#undef SP +#undef NIL /** Base-64 encode DIGEST_LINE bytes from digest, remove the trailing = * and newline characters, and store the nul-terminated result in the first @@ -1836,6 +1941,7 @@ digest_to_base64(char *d64, const char *digest) int digest_from_base64(char *digest, const char *d64) { +#ifdef USE_OPENSSL_BASE64 char buf_in[BASE64_DIGEST_LEN+3]; char buf[256]; if (strlen(d64) != BASE64_DIGEST_LEN) @@ -1846,6 +1952,12 @@ digest_from_base64(char *digest, const char *d64) return -1; memcpy(digest, buf, DIGEST_LEN); return 0; +#else + if (base64_decode(digest, DIGEST_LEN, d64, strlen(d64)) == DIGEST_LEN) + return 0; + else + return -1; +#endif } /** Implements base32 encoding as in rfc3548. Limitation: Requires @@ -1878,6 +1990,8 @@ base32_encode(char *dest, size_t destlen, const char *src, size_t srclen) int base32_decode(char *dest, size_t destlen, const char *src, size_t srclen) { + /* XXXX we might want to rewrite this along the lines of base64_decode, if + * it ever shows up in the profile. */ unsigned int nbits, i, j, bit; char *tmp; nbits = srclen * 5; diff --git a/src/or/test.c b/src/or/test.c index 4b74d41180..f7da07bc03 100644 --- a/src/or/test.c +++ b/src/or/test.c @@ -363,7 +363,7 @@ test_crypto(void) crypto_cipher_env_t *env1, *env2; crypto_pk_env_t *pk1, *pk2; char *data1, *data2, *data3, *cp; - int i, j, p, len; + int i, j, p, len, idx; size_t size; data1 = tor_malloc(1024); @@ -528,13 +528,21 @@ test_crypto(void) crypto_free_pk_env(pk2); /* Base64 tests */ + memset(data1, 6, 1024); + for (idx = 0; idx < 10; ++idx) { + i = base64_encode(data2, 1024, data1, idx); + j = base64_decode(data3, 1024, data2, i); + test_eq(j,idx); + test_memeq(data3, data1, idx); + } + strlcpy(data1, "Test string that contains 35 chars.", 1024); strlcat(data1, " 2nd string that contains 35 chars.", 1024); i = base64_encode(data2, 1024, data1, 71); j = base64_decode(data3, 1024, data2, i); - test_streq(data3, data1); test_eq(j, 71); + test_streq(data3, data1); test_assert(data2[i] == '\0'); crypto_rand(data1, DIGEST_LEN); @@ -543,7 +551,7 @@ test_crypto(void) test_eq(BASE64_DIGEST_LEN, strlen(data2)); test_eq(100, data2[BASE64_DIGEST_LEN+2]); memset(data3, 99, 1024); - digest_from_base64(data3, data2); + test_eq(digest_from_base64(data3, data2), 0); test_memeq(data1, data3, DIGEST_LEN); test_eq(99, data3[DIGEST_LEN+1]); @@ -2639,6 +2647,7 @@ test_v3_networkstatus(void) v3_text = format_networkstatus_vote(sign_skey_3, vote); test_assert(v3_text); + v3 = networkstatus_parse_vote_from_string(v3_text, NULL, 1); test_assert(v3);