Merge branch 'karsten_bug13192_026_03_teor'

2025-02-25 07:07:52 +01:00 · 2015-11-12 11:40:58 -05:00 · 2015-11-12 11:40:58 -05:00 · d20a3d07e3
commit d20a3d07e3
parent 0a3eed5f20 1f7ba115a4
5 changed files with 260 additions and 28 deletions
--- a/changes/laplace-edge-cases
+++ b/changes/laplace-edge-cases
@ -0,0 +1,8 @@
+  o Code simplifications and unit tests:
+    - Handle edge cases in the laplace functions: avoid division by zero,
+      avoid taking the log of zero, and silence clang type conversion
+      warnings using round and trunc.  Add unit tests for edge cases with
+      maximal values.
+    - Consistently check for overflow in round_*_to_next_multiple_of
+      functions, and add unit tests with additional and maximal values.
+
--- a/src/common/util.c
+++ b/src/common/util.c
@ -488,42 +488,58 @@ round_to_power_of_2(uint64_t u64)
 }

 /** Return the lowest x such that x is at least <b>number</b>, and x modulo
- * <b>divisor</b> == 0. */
+ * <b>divisor</b> == 0.  If no such x can be expressed as an unsigned, return
+ * UINT_MAX */
 unsigned
 round_to_next_multiple_of(unsigned number, unsigned divisor)
 {
+  tor_assert(divisor > 0);
+  if (UINT_MAX - divisor + 1 < number)
+    return UINT_MAX;
  number += divisor - 1;
  number -= number % divisor;
  return number;
 }

 /** Return the lowest x such that x is at least <b>number</b>, and x modulo
- * <b>divisor</b> == 0. */
+ * <b>divisor</b> == 0. If no such x can be expressed as a uint32_t, return
+ * UINT32_MAX */
 uint32_t
 round_uint32_to_next_multiple_of(uint32_t number, uint32_t divisor)
 {
+  tor_assert(divisor > 0);
+  if (UINT32_MAX - divisor + 1 < number)
+    return UINT32_MAX;
+
  number += divisor - 1;
  number -= number % divisor;
  return number;
 }

 /** Return the lowest x such that x is at least <b>number</b>, and x modulo
- * <b>divisor</b> == 0. */
+ * <b>divisor</b> == 0. If no such x can be expressed as a uint64_t, return
+ * UINT64_MAX */
 uint64_t
 round_uint64_to_next_multiple_of(uint64_t number, uint64_t divisor)
 {
+  tor_assert(divisor > 0);
+  if (UINT64_MAX - divisor + 1 < number)
+    return UINT64_MAX;
  number += divisor - 1;
  number -= number % divisor;
  return number;
 }

 /** Return the lowest x in [INT64_MIN, INT64_MAX] such that x is at least
- * <b>number</b>, and x modulo <b>divisor</b> == 0. */
+ * <b>number</b>, and x modulo <b>divisor</b> == 0. If no such x can be
+ * expressed as an int64_t, return INT64_MAX */
 int64_t
 round_int64_to_next_multiple_of(int64_t number, int64_t divisor)
 {
  tor_assert(divisor > 0);
-  if (number >= 0 && INT64_MAX - divisor + 1 >= number)
+  if (INT64_MAX - divisor + 1 < number)
+    return INT64_MAX;
+  if (number >= 0)
    number += divisor - 1;
  number -= number % divisor;
  return number;
@ -537,33 +553,44 @@ int64_t
 sample_laplace_distribution(double mu, double b, double p)
 {
  double result;
-
  tor_assert(p >= 0.0 && p < 1.0);
+
  /* This is the "inverse cumulative distribution function" from:
   * http://en.wikipedia.org/wiki/Laplace_distribution */
+  if (p <= 0.0) {
+    /* Avoid taking log(0.0) == -INFINITY, as some processors or compiler
+     * options can cause the program to trap. */
+    return INT64_MIN;
+  }
+
  result = mu - b * (p > 0.5 ? 1.0 : -1.0)
                  * tor_mathlog(1.0 - 2.0 * fabs(p - 0.5));

-  if (result >= INT64_MAX)
-    return INT64_MAX;
-  else if (result <= INT64_MIN)
-    return INT64_MIN;
-  else
-    return (int64_t) result;
+  return clamp_double_to_int64(result);
 }

-/** Add random noise between INT64_MIN and INT64_MAX coming from a
- * Laplace distribution with mu = 0 and b = <b>delta_f</b>/<b>epsilon</b>
- * to <b>signal</b> based on the provided <b>random</b> value in
- * [0.0, 1.0[. */
+/** Add random noise between INT64_MIN and INT64_MAX coming from a Laplace
+ * distribution with mu = 0 and b = <b>delta_f</b>/<b>epsilon</b> to
+ * <b>signal</b> based on the provided <b>random</b> value in [0.0, 1.0[.
+ * The epsilon value must be between ]0.0, 1.0]. delta_f must be greater
+ * than 0. */
 int64_t
 add_laplace_noise(int64_t signal, double random, double delta_f,
                  double epsilon)
 {
-  int64_t noise = sample_laplace_distribution(
-               0.0, /* just add noise, no further signal */
-               delta_f / epsilon, random);
+  int64_t noise;

+  /* epsilon MUST be between ]0.0, 1.0] */
+  tor_assert(epsilon > 0.0 && epsilon <= 1.0);
+  /* delta_f MUST be greater than 0. */
+  tor_assert(delta_f > 0.0);
+
+  /* Just add noise, no further signal */
+  noise = sample_laplace_distribution(0.0,
+                                      delta_f / epsilon,
+                                      random);
+
+  /* Clip (signal + noise) to [INT64_MIN, INT64_MAX] */
  if (noise > 0 && INT64_MAX - noise < signal)
    return INT64_MAX;
  else if (noise < 0 && INT64_MIN - noise > signal)
@ -5385,3 +5412,36 @@ tor_weak_random_range(tor_weak_rng_t *rng, int32_t top)
  return result;
 }

+/** Cast a given double value to a int64_t. Return 0 if number is NaN.
+ * Returns either INT64_MIN or INT64_MAX if number is outside of the int64_t
+ * range. */
+int64_t clamp_double_to_int64(double number)
+{
+  int exp;
+
+  /* NaN is a special case that can't be used with the logic below. */
+  if (isnan(number)) {
+    return 0;
+  }
+
+  /* Time to validate if result can overflows a int64_t value. Fun with
+   * float! Find that exponent exp such that
+   *    number == x * 2^exp
+   * for some x with abs(x) in [0.5, 1.0). Note that this implies that the
+   * magnitude of number is strictly less than 2^exp.
+   *
+   * If number is infinite, the call to frexp is legal but the contents of
+   * exp are unspecified. */
+  frexp(number, &exp);
+
+  /* If the magnitude of number is strictly less than 2^63, the truncated
+   * version of number is guaranteed to be representable. The only
+   * representable integer for which this is not the case is INT64_MIN, but
+   * it is covered by the logic below. */
+  if (isfinite(number) && exp <= 63) {
+    return number;
+  }
+
+  /* Handle infinities and finite numbers with magnitude >= 2^63. */
+  return signbit(number) ? INT64_MIN : INT64_MAX;
+}
--- a/src/common/util.h
+++ b/src/common/util.h
@ -185,6 +185,7 @@ int64_t sample_laplace_distribution(double mu, double b, double p);
 int64_t add_laplace_noise(int64_t signal, double random, double delta_f,
                          double epsilon);
 int n_bits_set_u8(uint8_t v);
+int64_t clamp_double_to_int64(double number);

 /* Compute the CEIL of <b>a</b> divided by <b>b</b>, for nonnegative <b>a</b>
 * and positive <b>b</b>.  Works on integer types only. Not defined if a+b can
--- a/src/or/rephist.c
+++ b/src/or/rephist.c
@ -3026,21 +3026,21 @@ rep_hist_stored_maybe_new_hs(const crypto_pk_t *pubkey)

 /* The number of cells that are supposed to be hidden from the adversary
 * by adding noise from the Laplace distribution.  This value, divided by
- * EPSILON, is Laplace parameter b. */
+ * EPSILON, is Laplace parameter b. It must be greather than 0. */
 #define REND_CELLS_DELTA_F 2048
 /* Security parameter for obfuscating number of cells with a value between
- * 0 and 1.  Smaller values obfuscate observations more, but at the same
+ * ]0.0, 1.0]. Smaller values obfuscate observations more, but at the same
 * time make statistics less usable. */
 #define REND_CELLS_EPSILON 0.3
 /* The number of cells that are supposed to be hidden from the adversary
 * by rounding up to the next multiple of this number. */
 #define REND_CELLS_BIN_SIZE 1024
-/* The number of service identities that are supposed to be hidden from
- * the adversary by adding noise from the Laplace distribution.  This
- * value, divided by EPSILON, is Laplace parameter b. */
+/* The number of service identities that are supposed to be hidden from the
+ * adversary by adding noise from the Laplace distribution. This value,
+ * divided by EPSILON, is Laplace parameter b. It must be greater than 0. */
 #define ONIONS_SEEN_DELTA_F 8
 /* Security parameter for obfuscating number of service identities with a
- * value between 0 and 1.  Smaller values obfuscate observations more, but
+ * value between ]0.0, 1.0]. Smaller values obfuscate observations more, but
 * at the same time make statistics less usable. */
 #define ONIONS_SEEN_EPSILON 0.3
 /* The number of service identities that are supposed to be hidden from
--- a/src/test/test_util.c
+++ b/src/test/test_util.c
@ -19,6 +19,7 @@
 #endif
 #include <math.h>
 #include <ctype.h>
+#include <float.h>

 /* XXXX this is a minimal wrapper to make the unit tests compile with the
 * changed tor_timegm interface. */
@ -4097,6 +4098,9 @@ test_util_round_to_next_multiple_of(void *arg)
  tt_u64_op(round_uint64_to_next_multiple_of(99,7), ==, 105);
  tt_u64_op(round_uint64_to_next_multiple_of(99,9), ==, 99);

+  tt_u64_op(round_uint64_to_next_multiple_of(UINT64_MAX,2), ==,
+            UINT64_MAX);
+
  tt_i64_op(round_int64_to_next_multiple_of(0,1), ==, 0);
  tt_i64_op(round_int64_to_next_multiple_of(0,7), ==, 0);

@ -4110,7 +4114,27 @@ test_util_round_to_next_multiple_of(void *arg)

  tt_i64_op(round_int64_to_next_multiple_of(INT64_MIN,2), ==, INT64_MIN);
  tt_i64_op(round_int64_to_next_multiple_of(INT64_MAX,2), ==,
-                                            INT64_MAX-INT64_MAX%2);
+                                            INT64_MAX);
+
+  tt_int_op(round_uint32_to_next_multiple_of(0,1), ==, 0);
+  tt_int_op(round_uint32_to_next_multiple_of(0,7), ==, 0);
+
+  tt_int_op(round_uint32_to_next_multiple_of(99,1), ==, 99);
+  tt_int_op(round_uint32_to_next_multiple_of(99,7), ==, 105);
+  tt_int_op(round_uint32_to_next_multiple_of(99,9), ==, 99);
+
+  tt_int_op(round_uint32_to_next_multiple_of(UINT32_MAX,2), ==,
+            UINT32_MAX);
+
+  tt_uint_op(round_to_next_multiple_of(0,1), ==, 0);
+  tt_uint_op(round_to_next_multiple_of(0,7), ==, 0);
+
+  tt_uint_op(round_to_next_multiple_of(99,1), ==, 99);
+  tt_uint_op(round_to_next_multiple_of(99,7), ==, 105);
+  tt_uint_op(round_to_next_multiple_of(99,9), ==, 99);
+
+  tt_uint_op(round_to_next_multiple_of(UINT_MAX,2), ==,
+            UINT_MAX);
 done:
  ;
 }
@ -4143,6 +4167,7 @@ test_util_laplace(void *arg)
   */
  tt_i64_op(INT64_MIN + 20, ==,
            add_laplace_noise(20, 0.0, delta_f, epsilon));
+
  tt_i64_op(-60, ==, add_laplace_noise(20, 0.1, delta_f, epsilon));
  tt_i64_op(-14, ==, add_laplace_noise(20, 0.25, delta_f, epsilon));
  tt_i64_op(20, ==, add_laplace_noise(20, 0.5, delta_f, epsilon));
@ -4150,6 +4175,143 @@ test_util_laplace(void *arg)
  tt_i64_op(100, ==, add_laplace_noise(20, 0.9, delta_f, epsilon));
  tt_i64_op(215, ==, add_laplace_noise(20, 0.99, delta_f, epsilon));

+  /* Test extreme values of signal with maximally negative values of noise
+   * 1.0000000000000002 is the smallest number > 1
+   * 0.0000000000000002 is the double epsilon (error when calculating near 1)
+   * this is approximately 1/(2^52)
+   * per https://en.wikipedia.org/wiki/Double_precision
+   * (let's not descend into the world of subnormals)
+   * >>> laplace.ppf([0, 0.0000000000000002], loc = 0, scale = 1)
+   * array([        -inf, -35.45506713])
+   */
+  const double noscale_df = 1.0, noscale_eps = 1.0;
+
+  tt_i64_op(INT64_MIN, ==,
+            add_laplace_noise(0, 0.0, noscale_df, noscale_eps));
+
+  /* is it clipped to INT64_MIN? */
+  tt_i64_op(INT64_MIN, ==,
+            add_laplace_noise(-1, 0.0, noscale_df, noscale_eps));
+  tt_i64_op(INT64_MIN, ==,
+            add_laplace_noise(INT64_MIN, 0.0,
+                              noscale_df, noscale_eps));
+  /* ... even when scaled? */
+  tt_i64_op(INT64_MIN, ==,
+            add_laplace_noise(0, 0.0, delta_f, epsilon));
+  tt_i64_op(INT64_MIN, ==,
+            add_laplace_noise(0, 0.0,
+                              DBL_MAX, 1));
+  tt_i64_op(INT64_MIN, ==,
+            add_laplace_noise(INT64_MIN, 0.0,
+                              DBL_MAX, 1));
+
+  /* does it play nice with INT64_MAX? */
+  tt_i64_op((INT64_MIN + INT64_MAX), ==,
+            add_laplace_noise(INT64_MAX, 0.0,
+                              noscale_df, noscale_eps));
+
+  /* do near-zero fractional values work? */
+  const double min_dbl_error = 0.0000000000000002;
+
+  tt_i64_op(-35, ==,
+            add_laplace_noise(0, min_dbl_error,
+                              noscale_df, noscale_eps));
+  tt_i64_op(INT64_MIN, ==,
+            add_laplace_noise(INT64_MIN, min_dbl_error,
+                              noscale_df, noscale_eps));
+  tt_i64_op((-35 + INT64_MAX), ==,
+            add_laplace_noise(INT64_MAX, min_dbl_error,
+                              noscale_df, noscale_eps));
+  tt_i64_op(INT64_MIN, ==,
+            add_laplace_noise(0, min_dbl_error,
+                              DBL_MAX, 1));
+  tt_i64_op((INT64_MAX + INT64_MIN), ==,
+            add_laplace_noise(INT64_MAX, min_dbl_error,
+                              DBL_MAX, 1));
+  tt_i64_op(INT64_MIN, ==,
+            add_laplace_noise(INT64_MIN, min_dbl_error,
+                              DBL_MAX, 1));
+
+  /* does it play nice with INT64_MAX? */
+  tt_i64_op((INT64_MAX - 35), ==,
+            add_laplace_noise(INT64_MAX, min_dbl_error,
+                              noscale_df, noscale_eps));
+
+  /* Test extreme values of signal with maximally positive values of noise
+   * 1.0000000000000002 is the smallest number > 1
+   * 0.9999999999999998 is the greatest number < 1 by calculation
+   * per https://en.wikipedia.org/wiki/Double_precision
+   * >>> laplace.ppf([1.0, 0.9999999999999998], loc = 0, scale = 1)
+   * array([inf,  35.35050621])
+   * but the function rejects p == 1.0, so we just use max_dbl_lt_one
+   */
+  const double max_dbl_lt_one = 0.9999999999999998;
+
+  /* do near-one fractional values work? */
+  tt_i64_op(35, ==,
+            add_laplace_noise(0, max_dbl_lt_one, noscale_df, noscale_eps));
+
+  /* is it clipped to INT64_MAX? */
+  tt_i64_op(INT64_MAX, ==,
+            add_laplace_noise(INT64_MAX - 35, max_dbl_lt_one,
+                              noscale_df, noscale_eps));
+  tt_i64_op(INT64_MAX, ==,
+            add_laplace_noise(INT64_MAX - 34, max_dbl_lt_one,
+                              noscale_df, noscale_eps));
+  tt_i64_op(INT64_MAX, ==,
+            add_laplace_noise(INT64_MAX, max_dbl_lt_one,
+                              noscale_df, noscale_eps));
+  /* ... even when scaled? */
+  tt_i64_op(INT64_MAX, ==,
+            add_laplace_noise(INT64_MAX, max_dbl_lt_one,
+                              delta_f, epsilon));
+  tt_i64_op((INT64_MIN + INT64_MAX), ==,
+            add_laplace_noise(INT64_MIN, max_dbl_lt_one,
+                              DBL_MAX, 1));
+  tt_i64_op(INT64_MAX, ==,
+            add_laplace_noise(INT64_MAX, max_dbl_lt_one,
+                              DBL_MAX, 1));
+  /* does it play nice with INT64_MIN? */
+  tt_i64_op((INT64_MIN + 35), ==,
+            add_laplace_noise(INT64_MIN, max_dbl_lt_one,
+                              noscale_df, noscale_eps));
+
+ done:
+  ;
+}
+
+static void
+test_util_clamp_double_to_int64(void *arg)
+{
+  (void)arg;
+
+  tt_i64_op(INT64_MIN, ==, clamp_double_to_int64(-INFINITY));
+  tt_i64_op(INT64_MIN, ==,
+            clamp_double_to_int64(-1.0 * pow(2.0, 64.0) - 1.0));
+  tt_i64_op(INT64_MIN, ==,
+            clamp_double_to_int64(-1.0 * pow(2.0, 63.0) - 1.0));
+  tt_i64_op(((uint64_t) -1) << 53, ==,
+            clamp_double_to_int64(-1.0 * pow(2.0, 53.0)));
+  tt_i64_op((((uint64_t) -1) << 53) + 1, ==,
+            clamp_double_to_int64(-1.0 * pow(2.0, 53.0) + 1.0));
+  tt_i64_op(-1, ==, clamp_double_to_int64(-1.0));
+  tt_i64_op(0, ==, clamp_double_to_int64(-0.9));
+  tt_i64_op(0, ==, clamp_double_to_int64(-0.1));
+  tt_i64_op(0, ==, clamp_double_to_int64(0.0));
+  tt_i64_op(0, ==, clamp_double_to_int64(NAN));
+  tt_i64_op(0, ==, clamp_double_to_int64(0.1));
+  tt_i64_op(0, ==, clamp_double_to_int64(0.9));
+  tt_i64_op(1, ==, clamp_double_to_int64(1.0));
+  tt_i64_op((((int64_t) 1) << 53) - 1, ==,
+            clamp_double_to_int64(pow(2.0, 53.0) - 1.0));
+  tt_i64_op(((int64_t) 1) << 53, ==,
+            clamp_double_to_int64(pow(2.0, 53.0)));
+  tt_i64_op(INT64_MAX, ==,
+            clamp_double_to_int64(pow(2.0, 63.0)));
+  tt_i64_op(INT64_MAX, ==,
+            clamp_double_to_int64(pow(2.0, 64.0)));
+  tt_i64_op(INT64_MAX, ==, clamp_double_to_int64(INFINITY));
+
 done:
  ;
 }
@ -4441,6 +4603,7 @@ struct testcase_t util_tests[] = {
  UTIL_TEST(di_map, 0),
  UTIL_TEST(round_to_next_multiple_of, 0),
  UTIL_TEST(laplace, 0),
+  UTIL_TEST(clamp_double_to_int64, 0),
  UTIL_TEST(find_str_at_start_of_line, 0),
  UTIL_TEST(string_is_C_identifier, 0),
  UTIL_TEST(asprintf, 0),