Merge branch 'karsten_bug13192_026_03_teor'

This commit is contained in:
Nick Mathewson 2015-11-12 11:40:58 -05:00
commit d20a3d07e3
5 changed files with 260 additions and 28 deletions

View file

@ -0,0 +1,8 @@
o Code simplifications and unit tests:
- Handle edge cases in the laplace functions: avoid division by zero,
avoid taking the log of zero, and silence clang type conversion
warnings using round and trunc. Add unit tests for edge cases with
maximal values.
- Consistently check for overflow in round_*_to_next_multiple_of
functions, and add unit tests with additional and maximal values.

View file

@ -488,42 +488,58 @@ round_to_power_of_2(uint64_t u64)
}
/** Return the lowest x such that x is at least <b>number</b>, and x modulo
* <b>divisor</b> == 0. */
* <b>divisor</b> == 0. If no such x can be expressed as an unsigned, return
* UINT_MAX */
unsigned
round_to_next_multiple_of(unsigned number, unsigned divisor)
{
tor_assert(divisor > 0);
if (UINT_MAX - divisor + 1 < number)
return UINT_MAX;
number += divisor - 1;
number -= number % divisor;
return number;
}
/** Return the lowest x such that x is at least <b>number</b>, and x modulo
* <b>divisor</b> == 0. */
* <b>divisor</b> == 0. If no such x can be expressed as a uint32_t, return
* UINT32_MAX */
uint32_t
round_uint32_to_next_multiple_of(uint32_t number, uint32_t divisor)
{
tor_assert(divisor > 0);
if (UINT32_MAX - divisor + 1 < number)
return UINT32_MAX;
number += divisor - 1;
number -= number % divisor;
return number;
}
/** Return the lowest x such that x is at least <b>number</b>, and x modulo
* <b>divisor</b> == 0. */
* <b>divisor</b> == 0. If no such x can be expressed as a uint64_t, return
* UINT64_MAX */
uint64_t
round_uint64_to_next_multiple_of(uint64_t number, uint64_t divisor)
{
tor_assert(divisor > 0);
if (UINT64_MAX - divisor + 1 < number)
return UINT64_MAX;
number += divisor - 1;
number -= number % divisor;
return number;
}
/** Return the lowest x in [INT64_MIN, INT64_MAX] such that x is at least
* <b>number</b>, and x modulo <b>divisor</b> == 0. */
* <b>number</b>, and x modulo <b>divisor</b> == 0. If no such x can be
* expressed as an int64_t, return INT64_MAX */
int64_t
round_int64_to_next_multiple_of(int64_t number, int64_t divisor)
{
tor_assert(divisor > 0);
if (number >= 0 && INT64_MAX - divisor + 1 >= number)
if (INT64_MAX - divisor + 1 < number)
return INT64_MAX;
if (number >= 0)
number += divisor - 1;
number -= number % divisor;
return number;
@ -537,33 +553,44 @@ int64_t
sample_laplace_distribution(double mu, double b, double p)
{
double result;
tor_assert(p >= 0.0 && p < 1.0);
/* This is the "inverse cumulative distribution function" from:
* http://en.wikipedia.org/wiki/Laplace_distribution */
result = mu - b * (p > 0.5 ? 1.0 : -1.0)
* tor_mathlog(1.0 - 2.0 * fabs(p - 0.5));
if (result >= INT64_MAX)
return INT64_MAX;
else if (result <= INT64_MIN)
if (p <= 0.0) {
/* Avoid taking log(0.0) == -INFINITY, as some processors or compiler
* options can cause the program to trap. */
return INT64_MIN;
else
return (int64_t) result;
}
result = mu - b * (p > 0.5 ? 1.0 : -1.0)
* tor_mathlog(1.0 - 2.0 * fabs(p - 0.5));
return clamp_double_to_int64(result);
}
/** Add random noise between INT64_MIN and INT64_MAX coming from a
* Laplace distribution with mu = 0 and b = <b>delta_f</b>/<b>epsilon</b>
* to <b>signal</b> based on the provided <b>random</b> value in
* [0.0, 1.0[. */
/** Add random noise between INT64_MIN and INT64_MAX coming from a Laplace
* distribution with mu = 0 and b = <b>delta_f</b>/<b>epsilon</b> to
* <b>signal</b> based on the provided <b>random</b> value in [0.0, 1.0[.
* The epsilon value must be between ]0.0, 1.0]. delta_f must be greater
* than 0. */
int64_t
add_laplace_noise(int64_t signal, double random, double delta_f,
double epsilon)
{
int64_t noise = sample_laplace_distribution(
0.0, /* just add noise, no further signal */
delta_f / epsilon, random);
int64_t noise;
/* epsilon MUST be between ]0.0, 1.0] */
tor_assert(epsilon > 0.0 && epsilon <= 1.0);
/* delta_f MUST be greater than 0. */
tor_assert(delta_f > 0.0);
/* Just add noise, no further signal */
noise = sample_laplace_distribution(0.0,
delta_f / epsilon,
random);
/* Clip (signal + noise) to [INT64_MIN, INT64_MAX] */
if (noise > 0 && INT64_MAX - noise < signal)
return INT64_MAX;
else if (noise < 0 && INT64_MIN - noise > signal)
@ -5385,3 +5412,36 @@ tor_weak_random_range(tor_weak_rng_t *rng, int32_t top)
return result;
}
/** Cast a given double value to a int64_t. Return 0 if number is NaN.
* Returns either INT64_MIN or INT64_MAX if number is outside of the int64_t
* range. */
int64_t clamp_double_to_int64(double number)
{
int exp;
/* NaN is a special case that can't be used with the logic below. */
if (isnan(number)) {
return 0;
}
/* Time to validate if result can overflows a int64_t value. Fun with
* float! Find that exponent exp such that
* number == x * 2^exp
* for some x with abs(x) in [0.5, 1.0). Note that this implies that the
* magnitude of number is strictly less than 2^exp.
*
* If number is infinite, the call to frexp is legal but the contents of
* exp are unspecified. */
frexp(number, &exp);
/* If the magnitude of number is strictly less than 2^63, the truncated
* version of number is guaranteed to be representable. The only
* representable integer for which this is not the case is INT64_MIN, but
* it is covered by the logic below. */
if (isfinite(number) && exp <= 63) {
return number;
}
/* Handle infinities and finite numbers with magnitude >= 2^63. */
return signbit(number) ? INT64_MIN : INT64_MAX;
}

View file

@ -185,6 +185,7 @@ int64_t sample_laplace_distribution(double mu, double b, double p);
int64_t add_laplace_noise(int64_t signal, double random, double delta_f,
double epsilon);
int n_bits_set_u8(uint8_t v);
int64_t clamp_double_to_int64(double number);
/* Compute the CEIL of <b>a</b> divided by <b>b</b>, for nonnegative <b>a</b>
* and positive <b>b</b>. Works on integer types only. Not defined if a+b can

View file

@ -3026,21 +3026,21 @@ rep_hist_stored_maybe_new_hs(const crypto_pk_t *pubkey)
/* The number of cells that are supposed to be hidden from the adversary
* by adding noise from the Laplace distribution. This value, divided by
* EPSILON, is Laplace parameter b. */
* EPSILON, is Laplace parameter b. It must be greather than 0. */
#define REND_CELLS_DELTA_F 2048
/* Security parameter for obfuscating number of cells with a value between
* 0 and 1. Smaller values obfuscate observations more, but at the same
* ]0.0, 1.0]. Smaller values obfuscate observations more, but at the same
* time make statistics less usable. */
#define REND_CELLS_EPSILON 0.3
/* The number of cells that are supposed to be hidden from the adversary
* by rounding up to the next multiple of this number. */
#define REND_CELLS_BIN_SIZE 1024
/* The number of service identities that are supposed to be hidden from
* the adversary by adding noise from the Laplace distribution. This
* value, divided by EPSILON, is Laplace parameter b. */
/* The number of service identities that are supposed to be hidden from the
* adversary by adding noise from the Laplace distribution. This value,
* divided by EPSILON, is Laplace parameter b. It must be greater than 0. */
#define ONIONS_SEEN_DELTA_F 8
/* Security parameter for obfuscating number of service identities with a
* value between 0 and 1. Smaller values obfuscate observations more, but
* value between ]0.0, 1.0]. Smaller values obfuscate observations more, but
* at the same time make statistics less usable. */
#define ONIONS_SEEN_EPSILON 0.3
/* The number of service identities that are supposed to be hidden from

View file

@ -19,6 +19,7 @@
#endif
#include <math.h>
#include <ctype.h>
#include <float.h>
/* XXXX this is a minimal wrapper to make the unit tests compile with the
* changed tor_timegm interface. */
@ -4097,6 +4098,9 @@ test_util_round_to_next_multiple_of(void *arg)
tt_u64_op(round_uint64_to_next_multiple_of(99,7), ==, 105);
tt_u64_op(round_uint64_to_next_multiple_of(99,9), ==, 99);
tt_u64_op(round_uint64_to_next_multiple_of(UINT64_MAX,2), ==,
UINT64_MAX);
tt_i64_op(round_int64_to_next_multiple_of(0,1), ==, 0);
tt_i64_op(round_int64_to_next_multiple_of(0,7), ==, 0);
@ -4110,7 +4114,27 @@ test_util_round_to_next_multiple_of(void *arg)
tt_i64_op(round_int64_to_next_multiple_of(INT64_MIN,2), ==, INT64_MIN);
tt_i64_op(round_int64_to_next_multiple_of(INT64_MAX,2), ==,
INT64_MAX-INT64_MAX%2);
INT64_MAX);
tt_int_op(round_uint32_to_next_multiple_of(0,1), ==, 0);
tt_int_op(round_uint32_to_next_multiple_of(0,7), ==, 0);
tt_int_op(round_uint32_to_next_multiple_of(99,1), ==, 99);
tt_int_op(round_uint32_to_next_multiple_of(99,7), ==, 105);
tt_int_op(round_uint32_to_next_multiple_of(99,9), ==, 99);
tt_int_op(round_uint32_to_next_multiple_of(UINT32_MAX,2), ==,
UINT32_MAX);
tt_uint_op(round_to_next_multiple_of(0,1), ==, 0);
tt_uint_op(round_to_next_multiple_of(0,7), ==, 0);
tt_uint_op(round_to_next_multiple_of(99,1), ==, 99);
tt_uint_op(round_to_next_multiple_of(99,7), ==, 105);
tt_uint_op(round_to_next_multiple_of(99,9), ==, 99);
tt_uint_op(round_to_next_multiple_of(UINT_MAX,2), ==,
UINT_MAX);
done:
;
}
@ -4143,6 +4167,7 @@ test_util_laplace(void *arg)
*/
tt_i64_op(INT64_MIN + 20, ==,
add_laplace_noise(20, 0.0, delta_f, epsilon));
tt_i64_op(-60, ==, add_laplace_noise(20, 0.1, delta_f, epsilon));
tt_i64_op(-14, ==, add_laplace_noise(20, 0.25, delta_f, epsilon));
tt_i64_op(20, ==, add_laplace_noise(20, 0.5, delta_f, epsilon));
@ -4150,6 +4175,143 @@ test_util_laplace(void *arg)
tt_i64_op(100, ==, add_laplace_noise(20, 0.9, delta_f, epsilon));
tt_i64_op(215, ==, add_laplace_noise(20, 0.99, delta_f, epsilon));
/* Test extreme values of signal with maximally negative values of noise
* 1.0000000000000002 is the smallest number > 1
* 0.0000000000000002 is the double epsilon (error when calculating near 1)
* this is approximately 1/(2^52)
* per https://en.wikipedia.org/wiki/Double_precision
* (let's not descend into the world of subnormals)
* >>> laplace.ppf([0, 0.0000000000000002], loc = 0, scale = 1)
* array([ -inf, -35.45506713])
*/
const double noscale_df = 1.0, noscale_eps = 1.0;
tt_i64_op(INT64_MIN, ==,
add_laplace_noise(0, 0.0, noscale_df, noscale_eps));
/* is it clipped to INT64_MIN? */
tt_i64_op(INT64_MIN, ==,
add_laplace_noise(-1, 0.0, noscale_df, noscale_eps));
tt_i64_op(INT64_MIN, ==,
add_laplace_noise(INT64_MIN, 0.0,
noscale_df, noscale_eps));
/* ... even when scaled? */
tt_i64_op(INT64_MIN, ==,
add_laplace_noise(0, 0.0, delta_f, epsilon));
tt_i64_op(INT64_MIN, ==,
add_laplace_noise(0, 0.0,
DBL_MAX, 1));
tt_i64_op(INT64_MIN, ==,
add_laplace_noise(INT64_MIN, 0.0,
DBL_MAX, 1));
/* does it play nice with INT64_MAX? */
tt_i64_op((INT64_MIN + INT64_MAX), ==,
add_laplace_noise(INT64_MAX, 0.0,
noscale_df, noscale_eps));
/* do near-zero fractional values work? */
const double min_dbl_error = 0.0000000000000002;
tt_i64_op(-35, ==,
add_laplace_noise(0, min_dbl_error,
noscale_df, noscale_eps));
tt_i64_op(INT64_MIN, ==,
add_laplace_noise(INT64_MIN, min_dbl_error,
noscale_df, noscale_eps));
tt_i64_op((-35 + INT64_MAX), ==,
add_laplace_noise(INT64_MAX, min_dbl_error,
noscale_df, noscale_eps));
tt_i64_op(INT64_MIN, ==,
add_laplace_noise(0, min_dbl_error,
DBL_MAX, 1));
tt_i64_op((INT64_MAX + INT64_MIN), ==,
add_laplace_noise(INT64_MAX, min_dbl_error,
DBL_MAX, 1));
tt_i64_op(INT64_MIN, ==,
add_laplace_noise(INT64_MIN, min_dbl_error,
DBL_MAX, 1));
/* does it play nice with INT64_MAX? */
tt_i64_op((INT64_MAX - 35), ==,
add_laplace_noise(INT64_MAX, min_dbl_error,
noscale_df, noscale_eps));
/* Test extreme values of signal with maximally positive values of noise
* 1.0000000000000002 is the smallest number > 1
* 0.9999999999999998 is the greatest number < 1 by calculation
* per https://en.wikipedia.org/wiki/Double_precision
* >>> laplace.ppf([1.0, 0.9999999999999998], loc = 0, scale = 1)
* array([inf, 35.35050621])
* but the function rejects p == 1.0, so we just use max_dbl_lt_one
*/
const double max_dbl_lt_one = 0.9999999999999998;
/* do near-one fractional values work? */
tt_i64_op(35, ==,
add_laplace_noise(0, max_dbl_lt_one, noscale_df, noscale_eps));
/* is it clipped to INT64_MAX? */
tt_i64_op(INT64_MAX, ==,
add_laplace_noise(INT64_MAX - 35, max_dbl_lt_one,
noscale_df, noscale_eps));
tt_i64_op(INT64_MAX, ==,
add_laplace_noise(INT64_MAX - 34, max_dbl_lt_one,
noscale_df, noscale_eps));
tt_i64_op(INT64_MAX, ==,
add_laplace_noise(INT64_MAX, max_dbl_lt_one,
noscale_df, noscale_eps));
/* ... even when scaled? */
tt_i64_op(INT64_MAX, ==,
add_laplace_noise(INT64_MAX, max_dbl_lt_one,
delta_f, epsilon));
tt_i64_op((INT64_MIN + INT64_MAX), ==,
add_laplace_noise(INT64_MIN, max_dbl_lt_one,
DBL_MAX, 1));
tt_i64_op(INT64_MAX, ==,
add_laplace_noise(INT64_MAX, max_dbl_lt_one,
DBL_MAX, 1));
/* does it play nice with INT64_MIN? */
tt_i64_op((INT64_MIN + 35), ==,
add_laplace_noise(INT64_MIN, max_dbl_lt_one,
noscale_df, noscale_eps));
done:
;
}
static void
test_util_clamp_double_to_int64(void *arg)
{
(void)arg;
tt_i64_op(INT64_MIN, ==, clamp_double_to_int64(-INFINITY));
tt_i64_op(INT64_MIN, ==,
clamp_double_to_int64(-1.0 * pow(2.0, 64.0) - 1.0));
tt_i64_op(INT64_MIN, ==,
clamp_double_to_int64(-1.0 * pow(2.0, 63.0) - 1.0));
tt_i64_op(((uint64_t) -1) << 53, ==,
clamp_double_to_int64(-1.0 * pow(2.0, 53.0)));
tt_i64_op((((uint64_t) -1) << 53) + 1, ==,
clamp_double_to_int64(-1.0 * pow(2.0, 53.0) + 1.0));
tt_i64_op(-1, ==, clamp_double_to_int64(-1.0));
tt_i64_op(0, ==, clamp_double_to_int64(-0.9));
tt_i64_op(0, ==, clamp_double_to_int64(-0.1));
tt_i64_op(0, ==, clamp_double_to_int64(0.0));
tt_i64_op(0, ==, clamp_double_to_int64(NAN));
tt_i64_op(0, ==, clamp_double_to_int64(0.1));
tt_i64_op(0, ==, clamp_double_to_int64(0.9));
tt_i64_op(1, ==, clamp_double_to_int64(1.0));
tt_i64_op((((int64_t) 1) << 53) - 1, ==,
clamp_double_to_int64(pow(2.0, 53.0) - 1.0));
tt_i64_op(((int64_t) 1) << 53, ==,
clamp_double_to_int64(pow(2.0, 53.0)));
tt_i64_op(INT64_MAX, ==,
clamp_double_to_int64(pow(2.0, 63.0)));
tt_i64_op(INT64_MAX, ==,
clamp_double_to_int64(pow(2.0, 64.0)));
tt_i64_op(INT64_MAX, ==, clamp_double_to_int64(INFINITY));
done:
;
}
@ -4441,6 +4603,7 @@ struct testcase_t util_tests[] = {
UTIL_TEST(di_map, 0),
UTIL_TEST(round_to_next_multiple_of, 0),
UTIL_TEST(laplace, 0),
UTIL_TEST(clamp_double_to_int64, 0),
UTIL_TEST(find_str_at_start_of_line, 0),
UTIL_TEST(string_is_C_identifier, 0),
UTIL_TEST(asprintf, 0),