Implement endomorphism optimization for secp256k1_ecmult_const

2024-11-20 10:38:42 +01:00 · 2015-05-22 12:09:36 -05:00 · 2015-05-22 12:09:36 -05:00 · 92e53fc4c8
commit 92e53fc4c8
parent ed35d43a0c
3 changed files with 144 additions and 14 deletions
--- a/src/bench_internal.c
+++ b/src/bench_internal.c
@ -241,7 +241,7 @@ void bench_wnaf_const(void* arg) {
    bench_inv_t *data = (bench_inv_t*)arg;

    for (i = 0; i < 20000; i++) {
-        secp256k1_wnaf_const(data->wnaf, &data->scalar_x, WINDOW_A);
+        secp256k1_wnaf_const(data->wnaf, data->scalar_x, WINDOW_A);
        secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
    }
 }
--- a/src/ecmult_const_impl.h
+++ b/src/ecmult_const_impl.h
@ -12,7 +12,11 @@
 #include "ecmult_const.h"
 #include "ecmult_impl.h"

+#ifdef USE_ENDOMORPHISM
+    #define WNAF_BITS 128
+#else
    #define WNAF_BITS 256
+#endif
 #define WNAF_SIZE(w) ((WNAF_BITS + (w) - 1) / (w))

 /* This is like `ECMULT_TABLE_GET_GE` but is constant time */
@ -49,17 +53,47 @@
 *
 *  Numbers reference steps of `Algorithm SPA-resistant Width-w NAF with Odd Scalar` on pp. 335
 */
-static void secp256k1_wnaf_const(int *wnaf, const secp256k1_scalar_t *a, int w) {
-    secp256k1_scalar_t s = *a;
-    /* Negate to force oddness */
-    int is_even = secp256k1_scalar_is_even(&s);
-    int global_sign = secp256k1_scalar_cond_negate(&s, is_even);
-
+static int secp256k1_wnaf_const(int *wnaf, secp256k1_scalar_t s, int w) {
+    int global_sign = 1;
+    int skew = 0;
    int word = 0;
    /* 1 2 3 */
-    int u_last = secp256k1_scalar_shr_int(&s, w);
+    int u_last;
    int u;
+
+#ifdef USE_ENDOMORPHISM
+    /* If we are using the endomorphism, we cannot handle even numbers by negating
+     * them, since we are working with 128-bit numbers whose negations would be 256
+     * bits, eliminating the performance advantage. Instead we use a technique from
+     * Section 4.2 of the Okeya/Tagaki paper, which is to add either 1 (for even)
+     * or 2 (for odd) to the number we are encoding, then compensating after the
+     * multiplication. */
+    /* Negative 128-bit numbers will be negated, since otherwise they are 256-bit */
+    int flip = secp256k1_scalar_is_high(&s);
+    /* We add 1 to even numbers, 2 to odd ones, noting that negation flips parity */
+    int bit = flip ^ (s.d[0] & 1);
+    /* We check for negative one, since adding 2 to it will cause an overflow */
+    secp256k1_scalar_t neg_s;
+    int not_neg_one;
+    secp256k1_scalar_negate(&neg_s, &s);
+    not_neg_one = !secp256k1_scalar_is_one(&neg_s);
+    secp256k1_scalar_cadd_bit(&s, bit, not_neg_one);
+    /* If we had negative one, flip == 1, s.d[0] == 0, bit == 1, so caller expects
+     * that we added two to it and flipped it. In fact for -1 these operations are
+     * identical. We only flipped, but since skewing is required (in the sense that
+     * the skew must be 1 or 2, never zero) and flipping is not, we need to change
+     * our flags to claim that we only skewed. */
+    global_sign = secp256k1_scalar_cond_negate(&s, flip);
+    global_sign *= not_neg_one * 2 - 1;
+    skew = 1 << bit;
+#else
+    /* Otherwise, we just negate to force oddness */
+    int is_even = secp256k1_scalar_is_even(&s);
+    global_sign = secp256k1_scalar_cond_negate(&s, is_even);
+#endif
+
    /* 4 */
+    u_last = secp256k1_scalar_shr_int(&s, w);
    while (word * w < WNAF_BITS) {
        int sign;
        int even;
@ -81,6 +115,7 @@ static void secp256k1_wnaf_const(int *wnaf, const secp256k1_scalar_t *a, int w)

    VERIFY_CHECK(secp256k1_scalar_is_zero(&s));
    VERIFY_CHECK(word == WNAF_SIZE(w));
+    return skew;
 }


@ -89,17 +124,37 @@ static void secp256k1_ecmult_const(secp256k1_gej_t *r, const secp256k1_ge_t *a,
    secp256k1_ge_t tmpa;
    secp256k1_fe_t Z;

+#ifdef USE_ENDOMORPHISM
+    secp256k1_ge_t pre_a_lam[ECMULT_TABLE_SIZE(WINDOW_A)];
+    int wnaf_1[1 + WNAF_SIZE(WINDOW_A - 1)];
+    int wnaf_lam[1 + WNAF_SIZE(WINDOW_A - 1)];
+    int skew_1;
+    int skew_lam;
+    secp256k1_scalar_t q_1, q_lam;
+#else
    int wnaf[1 + WNAF_SIZE(WINDOW_A - 1)];
+#endif

    int i;
-    int is_zero = secp256k1_scalar_is_zero(scalar);
    secp256k1_scalar_t sc = *scalar;
+
+    /* build wnaf representation for q. */
+#ifdef USE_ENDOMORPHISM
+    /* split q into q_1 and q_lam (where q = q_1 + q_lam*lambda, and q_1 and q_lam are ~128 bit) */
+    secp256k1_scalar_split_lambda(&q_1, &q_lam, &sc);
+    /* no need for zero correction when using endomorphism since even
+     * numbers have one added to them anyway */
+    skew_1   = secp256k1_wnaf_const(wnaf_1,   q_1,   WINDOW_A - 1);
+    skew_lam = secp256k1_wnaf_const(wnaf_lam, q_lam, WINDOW_A - 1);
+#else
+    int is_zero = secp256k1_scalar_is_zero(scalar);
    /* the wNAF ladder cannot handle zero, so bump this to one .. we will
     * correct the result after the fact */
    sc.d[0] += is_zero;
+    VERIFY_CHECK(!secp256k1_scalar_is_zero(&sc));

-    /* build wnaf representation for q. */
-    secp256k1_wnaf_const(wnaf, &sc, WINDOW_A - 1);
+    secp256k1_wnaf_const(wnaf, sc, WINDOW_A - 1);
+#endif

    /* Calculate odd multiples of a.
     * All multiples are brought to the same Z 'denominator', which is stored
@ -109,14 +164,31 @@ static void secp256k1_ecmult_const(secp256k1_gej_t *r, const secp256k1_ge_t *a,
     */
    secp256k1_gej_set_ge(r, a);
    secp256k1_ecmult_odd_multiples_table_globalz_windowa(pre_a, &Z, r);
+#ifdef USE_ENDOMORPHISM
+    for (i = 0; i < ECMULT_TABLE_SIZE(WINDOW_A); i++) {
+        secp256k1_ge_mul_lambda(&pre_a_lam[i], &pre_a[i]);
+    }
+#endif

    /* first loop iteration (separated out so we can directly set r, rather
     * than having it start at infinity, get doubled several times, then have
     * its new value added to it) */
+#ifdef USE_ENDOMORPHISM
+    i = wnaf_1[WNAF_SIZE(WINDOW_A - 1)];
+    VERIFY_CHECK(i != 0);
+    ECMULT_CONST_TABLE_GET_GE(&tmpa, pre_a, i, WINDOW_A);
+    secp256k1_gej_set_ge(r, &tmpa);
+
+    i = wnaf_lam[WNAF_SIZE(WINDOW_A - 1)];
+    VERIFY_CHECK(i != 0);
+    ECMULT_CONST_TABLE_GET_GE(&tmpa, pre_a_lam, i, WINDOW_A);
+    secp256k1_gej_add_ge(r, r, &tmpa);
+#else
    i = wnaf[WNAF_SIZE(WINDOW_A - 1)];
    VERIFY_CHECK(i != 0);
    ECMULT_CONST_TABLE_GET_GE(&tmpa, pre_a, i, WINDOW_A);
    secp256k1_gej_set_ge(r, &tmpa);
+#endif
    /* remaining loop iterations */
    for (i = WNAF_SIZE(WINDOW_A - 1) - 1; i >= 0; i--) {
        int n;
@ -124,16 +196,59 @@ static void secp256k1_ecmult_const(secp256k1_gej_t *r, const secp256k1_ge_t *a,
        for (j = 0; j < WINDOW_A - 1; ++j) {
            secp256k1_gej_double_nonzero(r, r, NULL);
        }
+#ifdef USE_ENDOMORPHISM
+        n = wnaf_1[i];
+        ECMULT_CONST_TABLE_GET_GE(&tmpa, pre_a, n, WINDOW_A);
+        VERIFY_CHECK(n != 0);
+        secp256k1_gej_add_ge(r, r, &tmpa);
+
+        n = wnaf_lam[i];
+        ECMULT_CONST_TABLE_GET_GE(&tmpa, pre_a_lam, n, WINDOW_A);
+        VERIFY_CHECK(n != 0);
+        secp256k1_gej_add_ge(r, r, &tmpa);
+#else
        n = wnaf[i];
        VERIFY_CHECK(n != 0);
        ECMULT_CONST_TABLE_GET_GE(&tmpa, pre_a, n, WINDOW_A);
        secp256k1_gej_add_ge(r, r, &tmpa);
+#endif
    }

    secp256k1_fe_mul(&r->z, &r->z, &Z);

+#ifdef USE_ENDOMORPHISM
+    {
+        /* Correct for wNAF skew */
+        secp256k1_ge_t correction = *a;
+        secp256k1_ge_storage_t correction_1_stor;
+        secp256k1_ge_storage_t correction_lam_stor;
+        secp256k1_ge_storage_t a2_stor;
+        secp256k1_gej_t tmpj;
+        secp256k1_gej_set_ge(&tmpj, &correction);
+        secp256k1_gej_double_var(&tmpj, &tmpj, NULL);
+        secp256k1_ge_set_gej(&correction, &tmpj);
+        secp256k1_ge_to_storage(&correction_1_stor, a);
+        secp256k1_ge_to_storage(&correction_lam_stor, a);
+        secp256k1_ge_to_storage(&a2_stor, &correction);
+
+        /* For odd numbers this is 2a (so replace it), for even ones a (so no-op) */
+        secp256k1_ge_storage_cmov(&correction_1_stor, &a2_stor, skew_1 == 2);
+        secp256k1_ge_storage_cmov(&correction_lam_stor, &a2_stor, skew_lam == 2);
+
+        /* Apply the correction */
+        secp256k1_ge_from_storage(&correction, &correction_1_stor);
+        secp256k1_ge_neg(&correction, &correction);
+        secp256k1_gej_add_ge(r, r, &correction);
+
+        secp256k1_ge_from_storage(&correction, &correction_lam_stor);
+        secp256k1_ge_neg(&correction, &correction);
+        secp256k1_ge_mul_lambda(&correction, &correction);
+        secp256k1_gej_add_ge(r, r, &correction);
+    }
+#else
    /* correct for zero */
    r->infinity |= is_zero;
+#endif
 }

 #endif
--- a/src/tests.c
+++ b/src/tests.c
@ -1550,10 +1550,21 @@ void test_constant_wnaf(const secp256k1_scalar_t *number, int w) {
    secp256k1_scalar_t x, shift;
    int wnaf[256] = {0};
    int i;
+#ifdef USE_ENDOMORPHISM
+    int skew;
+#endif
+    secp256k1_scalar_t num = *number;

    secp256k1_scalar_set_int(&x, 0);
    secp256k1_scalar_set_int(&shift, 1 << w);
-    secp256k1_wnaf_const(wnaf, number, w);
+    /* With USE_ENDOMORPHISM on we only consider 128-bit numbers */
+#ifdef USE_ENDOMORPHISM
+    for (i = 0; i < 16; ++i)
+        secp256k1_scalar_shr_int(&num, 8);
+    skew = secp256k1_wnaf_const(wnaf, num, w);
+#else
+    secp256k1_wnaf_const(wnaf, num, w);
+#endif

    for (i = WNAF_SIZE(w); i >= 0; --i) {
        secp256k1_scalar_t t;
@ -1572,7 +1583,11 @@ void test_constant_wnaf(const secp256k1_scalar_t *number, int w) {
        }
        secp256k1_scalar_add(&x, &x, &t);
    }
-    CHECK(secp256k1_scalar_eq(&x, number));
+#ifdef USE_ENDOMORPHISM
+    /* Skew num because when encoding 128-bit numbers as odd we use an offset */
+    secp256k1_scalar_cadd_bit(&num, skew == 2, 1);
+#endif
+    CHECK(secp256k1_scalar_eq(&x, &num));
 }

 void run_wnaf(void) {