mirror of
https://github.com/bitcoin/bitcoin.git
synced 2024-11-20 02:25:40 +01:00
Merge pull request #142
f22d73e
Explicitly access %0..%2 as 64-bit so we use the right registers for x32 ABI (Luke Dashjr)e66d4d6
Avoid the stack in assembly and use explicit registers (Pieter Wuille)
This commit is contained in:
commit
3907277dcf
@ -21,19 +21,16 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint64_t *r, const uint64_t
|
||||
* r15:rcx = d
|
||||
* r10-r14 = a0-a4
|
||||
* rbx = b
|
||||
* %2 = r
|
||||
* %0 = a / t?
|
||||
* rbp = R (0x1000003d10)
|
||||
* rdi = r
|
||||
* rsi = a / t?
|
||||
*/
|
||||
uint64_t tmp1, tmp2, tmp3;
|
||||
__asm__ __volatile__(
|
||||
"pushq %%rbp\n"
|
||||
|
||||
"movq 0(%0),%%r10\n"
|
||||
"movq 8(%0),%%r11\n"
|
||||
"movq 16(%0),%%r12\n"
|
||||
"movq 24(%0),%%r13\n"
|
||||
"movq 32(%0),%%r14\n"
|
||||
"movq $0x1000003d10,%%rbp\n"
|
||||
"movq 0(%%rsi),%%r10\n"
|
||||
"movq 8(%%rsi),%%r11\n"
|
||||
"movq 16(%%rsi),%%r12\n"
|
||||
"movq 24(%%rsi),%%r13\n"
|
||||
"movq 32(%%rsi),%%r14\n"
|
||||
|
||||
/* d += a3 * b0 */
|
||||
"movq 0(%%rbx),%%rax\n"
|
||||
@ -63,16 +60,17 @@ __asm__ __volatile__(
|
||||
/* d += (c & M) * R */
|
||||
"movq $0xfffffffffffff,%%rdx\n"
|
||||
"andq %%rdx,%%rax\n"
|
||||
"mulq %%rbp\n"
|
||||
"movq $0x1000003d10,%%rdx\n"
|
||||
"mulq %%rdx\n"
|
||||
"addq %%rax,%%rcx\n"
|
||||
"adcq %%rdx,%%r15\n"
|
||||
/* c >>= 52 (%%r8 only) */
|
||||
"shrdq $52,%%r9,%%r8\n"
|
||||
/* t3 (stack) = d & M */
|
||||
"movq %%rcx,%0\n"
|
||||
/* t3 (tmp1) = d & M */
|
||||
"movq %%rcx,%%rsi\n"
|
||||
"movq $0xfffffffffffff,%%rdx\n"
|
||||
"andq %%rdx,%0\n"
|
||||
"pushq %0\n"
|
||||
"andq %%rdx,%%rsi\n"
|
||||
"movq %%rsi,%q1\n"
|
||||
/* d >>= 52 */
|
||||
"shrdq $52,%%r15,%%rcx\n"
|
||||
"xorq %%r15,%%r15\n"
|
||||
@ -103,23 +101,25 @@ __asm__ __volatile__(
|
||||
"adcq %%rdx,%%r15\n"
|
||||
/* d += c * R */
|
||||
"movq %%r8,%%rax\n"
|
||||
"mulq %%rbp\n"
|
||||
"movq $0x1000003d10,%%rdx\n"
|
||||
"mulq %%rdx\n"
|
||||
"addq %%rax,%%rcx\n"
|
||||
"adcq %%rdx,%%r15\n"
|
||||
/* t4 = d & M (%0) */
|
||||
"movq %%rcx,%0\n"
|
||||
/* t4 = d & M (%%rsi) */
|
||||
"movq %%rcx,%%rsi\n"
|
||||
"movq $0xfffffffffffff,%%rdx\n"
|
||||
"andq %%rdx,%0\n"
|
||||
"andq %%rdx,%%rsi\n"
|
||||
/* d >>= 52 */
|
||||
"shrdq $52,%%r15,%%rcx\n"
|
||||
"xorq %%r15,%%r15\n"
|
||||
/* tx = t4 >> 48 (%%rbp, overwrites R) */
|
||||
"movq %0,%%rbp\n"
|
||||
"shrq $48,%%rbp\n"
|
||||
/* t4 &= (M >> 4) (stack) */
|
||||
/* tx = t4 >> 48 (tmp3) */
|
||||
"movq %%rsi,%%rax\n"
|
||||
"shrq $48,%%rax\n"
|
||||
"movq %%rax,%q3\n"
|
||||
/* t4 &= (M >> 4) (tmp2) */
|
||||
"movq $0xffffffffffff,%%rax\n"
|
||||
"andq %%rax,%0\n"
|
||||
"pushq %0\n"
|
||||
"andq %%rax,%%rsi\n"
|
||||
"movq %%rsi,%q2\n"
|
||||
/* c = a0 * b0 */
|
||||
"movq 0(%%rbx),%%rax\n"
|
||||
"mulq %%r10\n"
|
||||
@ -145,26 +145,27 @@ __asm__ __volatile__(
|
||||
"mulq %%r11\n"
|
||||
"addq %%rax,%%rcx\n"
|
||||
"adcq %%rdx,%%r15\n"
|
||||
/* u0 = d & M (%0) */
|
||||
"movq %%rcx,%0\n"
|
||||
/* u0 = d & M (%%rsi) */
|
||||
"movq %%rcx,%%rsi\n"
|
||||
"movq $0xfffffffffffff,%%rdx\n"
|
||||
"andq %%rdx,%0\n"
|
||||
"andq %%rdx,%%rsi\n"
|
||||
/* d >>= 52 */
|
||||
"shrdq $52,%%r15,%%rcx\n"
|
||||
"xorq %%r15,%%r15\n"
|
||||
/* u0 = (u0 << 4) | tx (%0) */
|
||||
"shlq $4,%0\n"
|
||||
"orq %%rbp,%0\n"
|
||||
/* u0 = (u0 << 4) | tx (%%rsi) */
|
||||
"shlq $4,%%rsi\n"
|
||||
"movq %q3,%%rax\n"
|
||||
"orq %%rax,%%rsi\n"
|
||||
/* c += u0 * (R >> 4) */
|
||||
"movq $0x1000003d1,%%rax\n"
|
||||
"mulq %0\n"
|
||||
"mulq %%rsi\n"
|
||||
"addq %%rax,%%r8\n"
|
||||
"adcq %%rdx,%%r9\n"
|
||||
/* r[0] = c & M */
|
||||
"movq %%r8,%%rax\n"
|
||||
"movq $0xfffffffffffff,%%rdx\n"
|
||||
"andq %%rdx,%%rax\n"
|
||||
"movq %%rax,0(%2)\n"
|
||||
"movq %%rax,0(%%rdi)\n"
|
||||
/* c >>= 52 */
|
||||
"shrdq $52,%%r9,%%r8\n"
|
||||
"xorq %%r9,%%r9\n"
|
||||
@ -193,13 +194,12 @@ __asm__ __volatile__(
|
||||
"mulq %%r12\n"
|
||||
"addq %%rax,%%rcx\n"
|
||||
"adcq %%rdx,%%r15\n"
|
||||
/* restore rdp = R */
|
||||
"movq $0x1000003d10,%%rbp\n"
|
||||
/* c += (d & M) * R */
|
||||
"movq %%rcx,%%rax\n"
|
||||
"movq $0xfffffffffffff,%%rdx\n"
|
||||
"andq %%rdx,%%rax\n"
|
||||
"mulq %%rbp\n"
|
||||
"movq $0x1000003d10,%%rdx\n"
|
||||
"mulq %%rdx\n"
|
||||
"addq %%rax,%%r8\n"
|
||||
"adcq %%rdx,%%r9\n"
|
||||
/* d >>= 52 */
|
||||
@ -209,7 +209,7 @@ __asm__ __volatile__(
|
||||
"movq %%r8,%%rax\n"
|
||||
"movq $0xfffffffffffff,%%rdx\n"
|
||||
"andq %%rdx,%%rax\n"
|
||||
"movq %%rax,8(%2)\n"
|
||||
"movq %%rax,8(%%rdi)\n"
|
||||
/* c >>= 52 */
|
||||
"shrdq $52,%%r9,%%r8\n"
|
||||
"xorq %%r9,%%r9\n"
|
||||
@ -228,9 +228,9 @@ __asm__ __volatile__(
|
||||
"mulq %%r10\n"
|
||||
"addq %%rax,%%r8\n"
|
||||
"adcq %%rdx,%%r9\n"
|
||||
/* fetch t3 (%%r10, overwrites a0),t4 (%0) */
|
||||
"popq %0\n"
|
||||
"popq %%r10\n"
|
||||
/* fetch t3 (%%r10, overwrites a0), t4 (%%rsi) */
|
||||
"movq %q2,%%rsi\n"
|
||||
"movq %q1,%%r10\n"
|
||||
/* d += a4 * b3 */
|
||||
"movq 24(%%rbx),%%rax\n"
|
||||
"mulq %%r14\n"
|
||||
@ -245,7 +245,8 @@ __asm__ __volatile__(
|
||||
"movq %%rcx,%%rax\n"
|
||||
"movq $0xfffffffffffff,%%rdx\n"
|
||||
"andq %%rdx,%%rax\n"
|
||||
"mulq %%rbp\n"
|
||||
"movq $0x1000003d10,%%rdx\n"
|
||||
"mulq %%rdx\n"
|
||||
"addq %%rax,%%r8\n"
|
||||
"adcq %%rdx,%%r9\n"
|
||||
/* d >>= 52 (%%rcx only) */
|
||||
@ -254,7 +255,7 @@ __asm__ __volatile__(
|
||||
"movq %%r8,%%rax\n"
|
||||
"movq $0xfffffffffffff,%%rdx\n"
|
||||
"andq %%rdx,%%rax\n"
|
||||
"movq %%rax,16(%2)\n"
|
||||
"movq %%rax,16(%%rdi)\n"
|
||||
/* c >>= 52 */
|
||||
"shrdq $52,%%r9,%%r8\n"
|
||||
"xorq %%r9,%%r9\n"
|
||||
@ -262,23 +263,22 @@ __asm__ __volatile__(
|
||||
"addq %%r10,%%r8\n"
|
||||
/* c += d * R */
|
||||
"movq %%rcx,%%rax\n"
|
||||
"mulq %%rbp\n"
|
||||
"movq $0x1000003d10,%%rdx\n"
|
||||
"mulq %%rdx\n"
|
||||
"addq %%rax,%%r8\n"
|
||||
"adcq %%rdx,%%r9\n"
|
||||
/* r[3] = c & M */
|
||||
"movq %%r8,%%rax\n"
|
||||
"movq $0xfffffffffffff,%%rdx\n"
|
||||
"andq %%rdx,%%rax\n"
|
||||
"movq %%rax,24(%2)\n"
|
||||
"movq %%rax,24(%%rdi)\n"
|
||||
/* c >>= 52 (%%r8 only) */
|
||||
"shrdq $52,%%r9,%%r8\n"
|
||||
/* c += t4 (%%r8 only) */
|
||||
"addq %0,%%r8\n"
|
||||
"addq %%rsi,%%r8\n"
|
||||
/* r[4] = c */
|
||||
"movq %%r8,32(%2)\n"
|
||||
|
||||
"popq %%rbp\n"
|
||||
: "+S"(a)
|
||||
"movq %%r8,32(%%rdi)\n"
|
||||
: "+S"(a), "=m"(tmp1), "=m"(tmp2), "=m"(tmp3)
|
||||
: "b"(b), "D"(r)
|
||||
: "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "cc", "memory"
|
||||
);
|
||||
@ -291,19 +291,16 @@ SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint64_t *r, const uint64_t
|
||||
* rcx:rbx = d
|
||||
* r10-r14 = a0-a4
|
||||
* r15 = M (0xfffffffffffff)
|
||||
* %1 = r
|
||||
* %0 = a / t?
|
||||
* rbp = R (0x1000003d10)
|
||||
* rdi = r
|
||||
* rsi = a / t?
|
||||
*/
|
||||
uint64_t tmp1, tmp2, tmp3;
|
||||
__asm__ __volatile__(
|
||||
"pushq %%rbp\n"
|
||||
|
||||
"movq 0(%0),%%r10\n"
|
||||
"movq 8(%0),%%r11\n"
|
||||
"movq 16(%0),%%r12\n"
|
||||
"movq 24(%0),%%r13\n"
|
||||
"movq 32(%0),%%r14\n"
|
||||
"movq $0x1000003d10,%%rbp\n"
|
||||
"movq 0(%%rsi),%%r10\n"
|
||||
"movq 8(%%rsi),%%r11\n"
|
||||
"movq 16(%%rsi),%%r12\n"
|
||||
"movq 24(%%rsi),%%r13\n"
|
||||
"movq 32(%%rsi),%%r14\n"
|
||||
"movq $0xfffffffffffff,%%r15\n"
|
||||
|
||||
/* d = (a0*2) * a3 */
|
||||
@ -323,15 +320,16 @@ __asm__ __volatile__(
|
||||
"movq %%rdx,%%r9\n"
|
||||
/* d += (c & M) * R */
|
||||
"andq %%r15,%%rax\n"
|
||||
"mulq %%rbp\n"
|
||||
"movq $0x1000003d10,%%rdx\n"
|
||||
"mulq %%rdx\n"
|
||||
"addq %%rax,%%rbx\n"
|
||||
"adcq %%rdx,%%rcx\n"
|
||||
/* c >>= 52 (%%r8 only) */
|
||||
"shrdq $52,%%r9,%%r8\n"
|
||||
/* t3 (stack) = d & M */
|
||||
"movq %%rbx,%0\n"
|
||||
"andq %%r15,%0\n"
|
||||
"pushq %0\n"
|
||||
/* t3 (tmp1) = d & M */
|
||||
"movq %%rbx,%%rsi\n"
|
||||
"andq %%r15,%%rsi\n"
|
||||
"movq %%rsi,%q1\n"
|
||||
/* d >>= 52 */
|
||||
"shrdq $52,%%rcx,%%rbx\n"
|
||||
"xorq %%rcx,%%rcx\n"
|
||||
@ -354,22 +352,24 @@ __asm__ __volatile__(
|
||||
"adcq %%rdx,%%rcx\n"
|
||||
/* d += c * R */
|
||||
"movq %%r8,%%rax\n"
|
||||
"mulq %%rbp\n"
|
||||
"movq $0x1000003d10,%%rdx\n"
|
||||
"mulq %%rdx\n"
|
||||
"addq %%rax,%%rbx\n"
|
||||
"adcq %%rdx,%%rcx\n"
|
||||
/* t4 = d & M (%0) */
|
||||
"movq %%rbx,%0\n"
|
||||
"andq %%r15,%0\n"
|
||||
/* t4 = d & M (%%rsi) */
|
||||
"movq %%rbx,%%rsi\n"
|
||||
"andq %%r15,%%rsi\n"
|
||||
/* d >>= 52 */
|
||||
"shrdq $52,%%rcx,%%rbx\n"
|
||||
"xorq %%rcx,%%rcx\n"
|
||||
/* tx = t4 >> 48 (%%rbp, overwrites constant) */
|
||||
"movq %0,%%rbp\n"
|
||||
"shrq $48,%%rbp\n"
|
||||
/* t4 &= (M >> 4) (stack) */
|
||||
/* tx = t4 >> 48 (tmp3) */
|
||||
"movq %%rsi,%%rax\n"
|
||||
"shrq $48,%%rax\n"
|
||||
"movq %%rax,%q3\n"
|
||||
/* t4 &= (M >> 4) (tmp2) */
|
||||
"movq $0xffffffffffff,%%rax\n"
|
||||
"andq %%rax,%0\n"
|
||||
"pushq %0\n"
|
||||
"andq %%rax,%%rsi\n"
|
||||
"movq %%rsi,%q2\n"
|
||||
/* c = a0 * a0 */
|
||||
"movq %%r10,%%rax\n"
|
||||
"mulq %%r10\n"
|
||||
@ -385,24 +385,25 @@ __asm__ __volatile__(
|
||||
"mulq %%r13\n"
|
||||
"addq %%rax,%%rbx\n"
|
||||
"adcq %%rdx,%%rcx\n"
|
||||
/* u0 = d & M (%0) */
|
||||
"movq %%rbx,%0\n"
|
||||
"andq %%r15,%0\n"
|
||||
/* u0 = d & M (%%rsi) */
|
||||
"movq %%rbx,%%rsi\n"
|
||||
"andq %%r15,%%rsi\n"
|
||||
/* d >>= 52 */
|
||||
"shrdq $52,%%rcx,%%rbx\n"
|
||||
"xorq %%rcx,%%rcx\n"
|
||||
/* u0 = (u0 << 4) | tx (%0) */
|
||||
"shlq $4,%0\n"
|
||||
"orq %%rbp,%0\n"
|
||||
/* u0 = (u0 << 4) | tx (%%rsi) */
|
||||
"shlq $4,%%rsi\n"
|
||||
"movq %q3,%%rax\n"
|
||||
"orq %%rax,%%rsi\n"
|
||||
/* c += u0 * (R >> 4) */
|
||||
"movq $0x1000003d1,%%rax\n"
|
||||
"mulq %0\n"
|
||||
"mulq %%rsi\n"
|
||||
"addq %%rax,%%r8\n"
|
||||
"adcq %%rdx,%%r9\n"
|
||||
/* r[0] = c & M */
|
||||
"movq %%r8,%%rax\n"
|
||||
"andq %%r15,%%rax\n"
|
||||
"movq %%rax,0(%1)\n"
|
||||
"movq %%rax,0(%%rdi)\n"
|
||||
/* c >>= 52 */
|
||||
"shrdq $52,%%r9,%%r8\n"
|
||||
"xorq %%r9,%%r9\n"
|
||||
@ -423,12 +424,11 @@ __asm__ __volatile__(
|
||||
"mulq %%r13\n"
|
||||
"addq %%rax,%%rbx\n"
|
||||
"adcq %%rdx,%%rcx\n"
|
||||
/* load R in %%rbp */
|
||||
"movq $0x1000003d10,%%rbp\n"
|
||||
/* c += (d & M) * R */
|
||||
"movq %%rbx,%%rax\n"
|
||||
"andq %%r15,%%rax\n"
|
||||
"mulq %%rbp\n"
|
||||
"movq $0x1000003d10,%%rdx\n"
|
||||
"mulq %%rdx\n"
|
||||
"addq %%rax,%%r8\n"
|
||||
"adcq %%rdx,%%r9\n"
|
||||
/* d >>= 52 */
|
||||
@ -437,7 +437,7 @@ __asm__ __volatile__(
|
||||
/* r[1] = c & M */
|
||||
"movq %%r8,%%rax\n"
|
||||
"andq %%r15,%%rax\n"
|
||||
"movq %%rax,8(%1)\n"
|
||||
"movq %%rax,8(%%rdi)\n"
|
||||
/* c >>= 52 */
|
||||
"shrdq $52,%%r9,%%r8\n"
|
||||
"xorq %%r9,%%r9\n"
|
||||
@ -446,9 +446,9 @@ __asm__ __volatile__(
|
||||
"mulq %%r12\n"
|
||||
"addq %%rax,%%r8\n"
|
||||
"adcq %%rdx,%%r9\n"
|
||||
/* fetch t3 (%%r10, overwrites a0),t4 (%0) */
|
||||
"popq %0\n"
|
||||
"popq %%r10\n"
|
||||
/* fetch t3 (%%r10, overwrites a0),t4 (%%rsi) */
|
||||
"movq %q2,%%rsi\n"
|
||||
"movq %q1,%%r10\n"
|
||||
/* c += a1 * a1 */
|
||||
"movq %%r11,%%rax\n"
|
||||
"mulq %%r11\n"
|
||||
@ -462,7 +462,8 @@ __asm__ __volatile__(
|
||||
/* c += (d & M) * R */
|
||||
"movq %%rbx,%%rax\n"
|
||||
"andq %%r15,%%rax\n"
|
||||
"mulq %%rbp\n"
|
||||
"movq $0x1000003d10,%%rdx\n"
|
||||
"mulq %%rdx\n"
|
||||
"addq %%rax,%%r8\n"
|
||||
"adcq %%rdx,%%r9\n"
|
||||
/* d >>= 52 (%%rbx only) */
|
||||
@ -470,7 +471,7 @@ __asm__ __volatile__(
|
||||
/* r[2] = c & M */
|
||||
"movq %%r8,%%rax\n"
|
||||
"andq %%r15,%%rax\n"
|
||||
"movq %%rax,16(%1)\n"
|
||||
"movq %%rax,16(%%rdi)\n"
|
||||
/* c >>= 52 */
|
||||
"shrdq $52,%%r9,%%r8\n"
|
||||
"xorq %%r9,%%r9\n"
|
||||
@ -478,22 +479,21 @@ __asm__ __volatile__(
|
||||
"addq %%r10,%%r8\n"
|
||||
/* c += d * R */
|
||||
"movq %%rbx,%%rax\n"
|
||||
"mulq %%rbp\n"
|
||||
"movq $0x1000003d10,%%rdx\n"
|
||||
"mulq %%rdx\n"
|
||||
"addq %%rax,%%r8\n"
|
||||
"adcq %%rdx,%%r9\n"
|
||||
/* r[3] = c & M */
|
||||
"movq %%r8,%%rax\n"
|
||||
"andq %%r15,%%rax\n"
|
||||
"movq %%rax,24(%1)\n"
|
||||
"movq %%rax,24(%%rdi)\n"
|
||||
/* c >>= 52 (%%r8 only) */
|
||||
"shrdq $52,%%r9,%%r8\n"
|
||||
/* c += t4 (%%r8 only) */
|
||||
"addq %0,%%r8\n"
|
||||
"addq %%rsi,%%r8\n"
|
||||
/* r[4] = c */
|
||||
"movq %%r8,32(%1)\n"
|
||||
|
||||
"popq %%rbp\n"
|
||||
: "+S"(a)
|
||||
"movq %%r8,32(%%rdi)\n"
|
||||
: "+S"(a), "=m"(tmp1), "=m"(tmp2), "=m"(tmp3)
|
||||
: "D"(r)
|
||||
: "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "cc", "memory"
|
||||
);
|
||||
|
Loading…
Reference in New Issue
Block a user