mirror of
https://github.com/ElementsProject/lightning.git
synced 2025-03-01 09:40:19 +01:00
ccan: update.
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
This commit is contained in:
parent
ec399e13a4
commit
81e73926f3
38 changed files with 6158 additions and 12 deletions
|
@ -1,3 +1,3 @@
|
|||
CCAN imported from http://ccodearchive.net.
|
||||
|
||||
CCAN version: init-1956-ged95d86
|
||||
CCAN version: init-2039-g396f2fc
|
||||
|
|
|
@ -58,7 +58,7 @@ int main(int argc, char *argv[])
|
|||
return 1;
|
||||
|
||||
if (strcmp(argv[1], "depends") == 0) {
|
||||
printf("ccan/typesafe_cb\n");
|
||||
printf("ccan/order\n");
|
||||
return 0;
|
||||
}
|
||||
if (strcmp(argv[1], "testdepends") == 0) {
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
#ifndef CCAN_ASORT_H
|
||||
#define CCAN_ASORT_H
|
||||
#include "config.h"
|
||||
#include <ccan/typesafe_cb/typesafe_cb.h>
|
||||
#include <ccan/order/order.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
/**
|
||||
|
@ -20,19 +20,13 @@
|
|||
*/
|
||||
#define asort(base, num, cmp, ctx) \
|
||||
_asort((base), (num), sizeof(*(base)), \
|
||||
typesafe_cb_cast(int (*)(const void *, const void *, void *), \
|
||||
int (*)(const __typeof__(*(base)) *, \
|
||||
const __typeof__(*(base)) *, \
|
||||
__typeof__(ctx)), \
|
||||
(cmp)), \
|
||||
(ctx))
|
||||
total_order_cast((cmp), *(base), (ctx)), (ctx))
|
||||
|
||||
#if HAVE_QSORT_R_PRIVATE_LAST
|
||||
#define _asort(b, n, s, cmp, ctx) qsort_r(b, n, s, cmp, ctx)
|
||||
#else
|
||||
void _asort(void *base, size_t nmemb, size_t size,
|
||||
int(*compar)(const void *, const void *, void *),
|
||||
void *ctx);
|
||||
_total_order_cb compar, void *ctx);
|
||||
#endif
|
||||
|
||||
#endif /* CCAN_ASORT_H */
|
||||
|
|
20
ccan/ccan/crypto/sha256/benchmarks/Makefile
Normal file
20
ccan/ccan/crypto/sha256/benchmarks/Makefile
Normal file
|
@ -0,0 +1,20 @@
|
|||
CCANDIR := ../../../../
|
||||
CFLAGS := -Wall -I$(CCANDIR) -O3 -flto -DCCAN_USE_ORIGINAL=1
|
||||
LDFLAGS := -O3 -flto
|
||||
|
||||
INTEL_OBJS := sha256_avx1.o sha256_avx2_rorx2.o sha256_avx2_rorx8.o sha256_sse4.o
|
||||
|
||||
double-sha-bench: double-sha-bench.o ccan-time.o $(INTEL_OBJS) #ccan-crypto-sha256.o
|
||||
|
||||
$(INTEL_OBJS): %.o : %.asm
|
||||
|
||||
%.o : %.asm
|
||||
yasm -f x64 -f elf64 -X gnu -g dwarf2 -D LINUX -o $@ $<
|
||||
|
||||
clean:
|
||||
$(RM) -f *.o
|
||||
|
||||
ccan-crypto-sha256.o: $(CCANDIR)/ccan/crypto/sha256/sha256.c
|
||||
$(CC) $(CFLAGS) -c -o $@ $<
|
||||
ccan-time.o: $(CCANDIR)/ccan/time/time.c
|
||||
$(CC) $(CFLAGS) -c -o $@ $<
|
122
ccan/ccan/crypto/sha256/benchmarks/double-sha-bench.c
Normal file
122
ccan/ccan/crypto/sha256/benchmarks/double-sha-bench.c
Normal file
|
@ -0,0 +1,122 @@
|
|||
/* Bitcoin does a lot of SHA of SHA. Benchmark that. */
|
||||
#include <ccan/crypto/sha256/sha256.c>
|
||||
#include <ccan/time/time.h>
|
||||
#include <stdio.h>
|
||||
|
||||
void sha256_avx(void *input_data, uint32_t digest[8], uint64_t num_blks);
|
||||
void sha256_rorx(void *input_data, uint32_t digest[8], uint64_t num_blks);
|
||||
void sha256_rorx_x8ms(void *input_data, uint32_t digest[8], uint64_t num_blks);
|
||||
void sha256_sse4(void *input_data, uint32_t digest[8], uint64_t num_blks);
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
struct timeabs start;
|
||||
struct timerel diff;
|
||||
size_t i, n;
|
||||
union {
|
||||
struct sha256 h;
|
||||
uint32_t u32[16];
|
||||
uint8_t u8[64];
|
||||
} block;
|
||||
|
||||
n = atoi(argv[1] ? argv[1] : "1000000");
|
||||
memset(&block, 0, sizeof(block));
|
||||
sha256(&block.h, &n, sizeof(n));
|
||||
|
||||
start = time_now();
|
||||
for (i = 0; i < n; i++) {
|
||||
sha256(&block.h, &block.h, sizeof(block.h));
|
||||
}
|
||||
diff = time_divide(time_between(time_now(), start), n);
|
||||
printf("Normal gave %02x%02x%02x%02x%02x%02x... in %llu nsec\n",
|
||||
block.h.u.u8[0], block.h.u.u8[1], block.h.u.u8[2],
|
||||
block.h.u.u8[3], block.h.u.u8[4], block.h.u.u8[5],
|
||||
(unsigned long long)time_to_nsec(diff));
|
||||
|
||||
/* Now, don't re-initalize every time; use Transform */
|
||||
memset(&block, 0, sizeof(block));
|
||||
sha256(&block.h, &n, sizeof(n));
|
||||
block.u8[sizeof(block.h)] = 0x80;
|
||||
// Size is 256 bits
|
||||
block.u8[sizeof(block)-2] = 1;
|
||||
|
||||
start = time_now();
|
||||
for (i = 0; i < n; i++) {
|
||||
struct sha256_ctx ctx = SHA256_INIT;
|
||||
size_t j;
|
||||
Transform(ctx.s, block.u32);
|
||||
for (j = 0; j < sizeof(ctx.s) / sizeof(ctx.s[0]); j++)
|
||||
block.h.u.u32[j] = cpu_to_be32(ctx.s[j]);
|
||||
}
|
||||
diff = time_divide(time_between(time_now(), start), n);
|
||||
printf("Transform gave %02x%02x%02x%02x%02x%02x... in %llu nsec\n",
|
||||
block.h.u.u8[0], block.h.u.u8[1], block.h.u.u8[2],
|
||||
block.h.u.u8[3], block.h.u.u8[4], block.h.u.u8[5],
|
||||
(unsigned long long)time_to_nsec(diff));
|
||||
|
||||
/* Now, assembler variants */
|
||||
sha256(&block.h, &n, sizeof(n));
|
||||
|
||||
start = time_now();
|
||||
for (i = 0; i < n; i++) {
|
||||
struct sha256_ctx ctx = SHA256_INIT;
|
||||
size_t j;
|
||||
sha256_rorx(block.u32, ctx.s, 1);
|
||||
for (j = 0; j < sizeof(ctx.s) / sizeof(ctx.s[0]); j++)
|
||||
block.h.u.u32[j] = cpu_to_be32(ctx.s[j]);
|
||||
}
|
||||
diff = time_divide(time_between(time_now(), start), n);
|
||||
printf("Asm rorx for %02x%02x%02x%02x%02x%02x... is %llu nsec\n",
|
||||
block.h.u.u8[0], block.h.u.u8[1], block.h.u.u8[2],
|
||||
block.h.u.u8[3], block.h.u.u8[4], block.h.u.u8[5],
|
||||
(unsigned long long)time_to_nsec(diff));
|
||||
|
||||
sha256(&block.h, &n, sizeof(n));
|
||||
|
||||
start = time_now();
|
||||
for (i = 0; i < n; i++) {
|
||||
struct sha256_ctx ctx = SHA256_INIT;
|
||||
size_t j;
|
||||
sha256_sse4(block.u32, ctx.s, 1);
|
||||
for (j = 0; j < sizeof(ctx.s) / sizeof(ctx.s[0]); j++)
|
||||
block.h.u.u32[j] = cpu_to_be32(ctx.s[j]);
|
||||
}
|
||||
diff = time_divide(time_between(time_now(), start), n);
|
||||
printf("Asm SSE4 for %02x%02x%02x%02x%02x%02x... is %llu nsec\n",
|
||||
block.h.u.u8[0], block.h.u.u8[1], block.h.u.u8[2],
|
||||
block.h.u.u8[3], block.h.u.u8[4], block.h.u.u8[5],
|
||||
(unsigned long long)time_to_nsec(diff));
|
||||
|
||||
sha256(&block.h, &n, sizeof(n));
|
||||
start = time_now();
|
||||
for (i = 0; i < n; i++) {
|
||||
struct sha256_ctx ctx = SHA256_INIT;
|
||||
size_t j;
|
||||
sha256_rorx_x8ms(block.u32, ctx.s, 1);
|
||||
for (j = 0; j < sizeof(ctx.s) / sizeof(ctx.s[0]); j++)
|
||||
block.h.u.u32[j] = cpu_to_be32(ctx.s[j]);
|
||||
}
|
||||
diff = time_divide(time_between(time_now(), start), n);
|
||||
printf("Asm RORx-x8ms for %02x%02x%02x%02x%02x%02x... is %llu nsec\n",
|
||||
block.h.u.u8[0], block.h.u.u8[1], block.h.u.u8[2],
|
||||
block.h.u.u8[3], block.h.u.u8[4], block.h.u.u8[5],
|
||||
(unsigned long long)time_to_nsec(diff));
|
||||
|
||||
sha256(&block.h, &n, sizeof(n));
|
||||
start = time_now();
|
||||
for (i = 0; i < n; i++) {
|
||||
struct sha256_ctx ctx = SHA256_INIT;
|
||||
size_t j;
|
||||
sha256_avx(block.u32, ctx.s, 1);
|
||||
for (j = 0; j < sizeof(ctx.s) / sizeof(ctx.s[0]); j++)
|
||||
block.h.u.u32[j] = cpu_to_be32(ctx.s[j]);
|
||||
}
|
||||
diff = time_divide(time_between(time_now(), start), n);
|
||||
printf("Asm AVX for %02x%02x%02x%02x%02x%02x... is %llu nsec\n",
|
||||
block.h.u.u8[0], block.h.u.u8[1], block.h.u.u8[2],
|
||||
block.h.u.u8[3], block.h.u.u8[4], block.h.u.u8[5],
|
||||
(unsigned long long)time_to_nsec(diff));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
32
ccan/ccan/crypto/sha256/benchmarks/open_software_license.txt
Normal file
32
ccan/ccan/crypto/sha256/benchmarks/open_software_license.txt
Normal file
|
@ -0,0 +1,32 @@
|
|||
Copyright (c) 2012, Intel Corporation
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the
|
||||
distribution.
|
||||
|
||||
* Neither the name of the Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY
|
||||
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
586
ccan/ccan/crypto/sha256/benchmarks/sha256_avx1.asm
Normal file
586
ccan/ccan/crypto/sha256/benchmarks/sha256_avx1.asm
Normal file
|
@ -0,0 +1,586 @@
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright (c) 2012, Intel Corporation
|
||||
;
|
||||
; All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions are
|
||||
; met:
|
||||
;
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
;
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in the
|
||||
; documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
;
|
||||
; * Neither the name of the Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived from
|
||||
; this software without specific prior written permission.
|
||||
;
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION "AS IS" AND ANY
|
||||
; EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
; PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
|
||||
; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;
|
||||
; Example YASM command lines:
|
||||
; Windows: yasm -Xvc -f x64 -rnasm -pnasm -o sha256_avx1.obj -g cv8 sha256_avx1.asm
|
||||
; Linux: yasm -f x64 -f elf64 -X gnu -g dwarf2 -D LINUX -o sha256_avx1.o sha256_avx1.asm
|
||||
;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;
|
||||
; This code is described in an Intel White-Paper:
|
||||
; "Fast SHA-256 Implementations on Intel Architecture Processors"
|
||||
;
|
||||
; To find it, surf to http://www.intel.com/p/en_US/embedded
|
||||
; and search for that title.
|
||||
; The paper is expected to be released roughly at the end of April, 2012
|
||||
;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; This code schedules 1 blocks at a time, with 4 lanes per block
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
%define VMOVDQ vmovdqu ;; assume buffers not aligned
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Define Macros
|
||||
|
||||
; addm [mem], reg
|
||||
; Add reg to mem using reg-mem add and store
|
||||
%macro addm 2
|
||||
add %2, %1
|
||||
mov %1, %2
|
||||
%endm
|
||||
|
||||
%macro MY_ROR 2
|
||||
shld %1,%1,(32-(%2))
|
||||
%endm
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
; COPY_XMM_AND_BSWAP xmm, [mem], byte_flip_mask
|
||||
; Load xmm with mem and byte swap each dword
|
||||
%macro COPY_XMM_AND_BSWAP 3
|
||||
VMOVDQ %1, %2
|
||||
vpshufb %1, %1, %3
|
||||
%endmacro
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
%define X0 xmm4
|
||||
%define X1 xmm5
|
||||
%define X2 xmm6
|
||||
%define X3 xmm7
|
||||
|
||||
%define XTMP0 xmm0
|
||||
%define XTMP1 xmm1
|
||||
%define XTMP2 xmm2
|
||||
%define XTMP3 xmm3
|
||||
%define XTMP4 xmm8
|
||||
%define XFER xmm9
|
||||
%define XTMP5 xmm11
|
||||
|
||||
%define SHUF_00BA xmm10 ; shuffle xBxA -> 00BA
|
||||
%define SHUF_DC00 xmm12 ; shuffle xDxC -> DC00
|
||||
%define BYTE_FLIP_MASK xmm13
|
||||
|
||||
%ifdef LINUX
|
||||
%define NUM_BLKS rdx ; 3rd arg
|
||||
%define CTX rsi ; 2nd arg
|
||||
%define INP rdi ; 1st arg
|
||||
|
||||
%define SRND rdi ; clobbers INP
|
||||
%define c ecx
|
||||
%define d r8d
|
||||
%define e edx
|
||||
%else
|
||||
%define NUM_BLKS r8 ; 3rd arg
|
||||
%define CTX rdx ; 2nd arg
|
||||
%define INP rcx ; 1st arg
|
||||
|
||||
%define SRND rcx ; clobbers INP
|
||||
%define c edi
|
||||
%define d esi
|
||||
%define e r8d
|
||||
|
||||
%endif
|
||||
%define TBL rbp
|
||||
%define a eax
|
||||
%define b ebx
|
||||
|
||||
%define f r9d
|
||||
%define g r10d
|
||||
%define h r11d
|
||||
|
||||
%define y0 r13d
|
||||
%define y1 r14d
|
||||
%define y2 r15d
|
||||
|
||||
|
||||
_INP_END_SIZE equ 8
|
||||
_INP_SIZE equ 8
|
||||
_XFER_SIZE equ 8
|
||||
%ifdef LINUX
|
||||
_XMM_SAVE_SIZE equ 0
|
||||
%else
|
||||
_XMM_SAVE_SIZE equ 8*16
|
||||
%endif
|
||||
; STACK_SIZE plus pushes must be an odd multiple of 8
|
||||
_ALIGN_SIZE equ 8
|
||||
|
||||
_INP_END equ 0
|
||||
_INP equ _INP_END + _INP_END_SIZE
|
||||
_XFER equ _INP + _INP_SIZE
|
||||
_XMM_SAVE equ _XFER + _XFER_SIZE + _ALIGN_SIZE
|
||||
STACK_SIZE equ _XMM_SAVE + _XMM_SAVE_SIZE
|
||||
|
||||
; rotate_Xs
|
||||
; Rotate values of symbols X0...X3
|
||||
%macro rotate_Xs 0
|
||||
%xdefine X_ X0
|
||||
%xdefine X0 X1
|
||||
%xdefine X1 X2
|
||||
%xdefine X2 X3
|
||||
%xdefine X3 X_
|
||||
%endm
|
||||
|
||||
; ROTATE_ARGS
|
||||
; Rotate values of symbols a...h
|
||||
%macro ROTATE_ARGS 0
|
||||
%xdefine TMP_ h
|
||||
%xdefine h g
|
||||
%xdefine g f
|
||||
%xdefine f e
|
||||
%xdefine e d
|
||||
%xdefine d c
|
||||
%xdefine c b
|
||||
%xdefine b a
|
||||
%xdefine a TMP_
|
||||
%endm
|
||||
|
||||
%macro FOUR_ROUNDS_AND_SCHED 0
|
||||
;; compute s0 four at a time and s1 two at a time
|
||||
;; compute W[-16] + W[-7] 4 at a time
|
||||
;vmovdqa XTMP0, X3
|
||||
mov y0, e ; y0 = e
|
||||
MY_ROR y0, (25-11) ; y0 = e >> (25-11)
|
||||
mov y1, a ; y1 = a
|
||||
vpalignr XTMP0, X3, X2, 4 ; XTMP0 = W[-7]
|
||||
MY_ROR y1, (22-13) ; y1 = a >> (22-13)
|
||||
xor y0, e ; y0 = e ^ (e >> (25-11))
|
||||
mov y2, f ; y2 = f
|
||||
MY_ROR y0, (11-6) ; y0 = (e >> (11-6)) ^ (e >> (25-6))
|
||||
;vmovdqa XTMP1, X1
|
||||
xor y1, a ; y1 = a ^ (a >> (22-13)
|
||||
xor y2, g ; y2 = f^g
|
||||
vpaddd XTMP0, XTMP0, X0 ; XTMP0 = W[-7] + W[-16]
|
||||
xor y0, e ; y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
|
||||
and y2, e ; y2 = (f^g)&e
|
||||
MY_ROR y1, (13-2) ; y1 = (a >> (13-2)) ^ (a >> (22-2))
|
||||
;; compute s0
|
||||
vpalignr XTMP1, X1, X0, 4 ; XTMP1 = W[-15]
|
||||
xor y1, a ; y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
|
||||
MY_ROR y0, 6 ; y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
|
||||
xor y2, g ; y2 = CH = ((f^g)&e)^g
|
||||
|
||||
|
||||
MY_ROR y1, 2 ; y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
|
||||
add y2, y0 ; y2 = S1 + CH
|
||||
add y2, [rsp + _XFER + 0*4] ; y2 = k + w + S1 + CH
|
||||
|
||||
mov y0, a ; y0 = a
|
||||
add h, y2 ; h = h + S1 + CH + k + w
|
||||
mov y2, a ; y2 = a
|
||||
|
||||
vpsrld XTMP2, XTMP1, 7
|
||||
|
||||
or y0, c ; y0 = a|c
|
||||
add d, h ; d = d + h + S1 + CH + k + w
|
||||
and y2, c ; y2 = a&c
|
||||
|
||||
vpslld XTMP3, XTMP1, (32-7)
|
||||
|
||||
and y0, b ; y0 = (a|c)&b
|
||||
add h, y1 ; h = h + S1 + CH + k + w + S0
|
||||
|
||||
vpor XTMP3, XTMP3, XTMP2 ; XTMP1 = W[-15] MY_ROR 7
|
||||
|
||||
or y0, y2 ; y0 = MAJ = (a|c)&b)|(a&c)
|
||||
add h, y0 ; h = h + S1 + CH + k + w + S0 + MAJ
|
||||
|
||||
ROTATE_ARGS
|
||||
|
||||
mov y0, e ; y0 = e
|
||||
mov y1, a ; y1 = a
|
||||
|
||||
|
||||
MY_ROR y0, (25-11) ; y0 = e >> (25-11)
|
||||
xor y0, e ; y0 = e ^ (e >> (25-11))
|
||||
mov y2, f ; y2 = f
|
||||
MY_ROR y1, (22-13) ; y1 = a >> (22-13)
|
||||
|
||||
vpsrld XTMP2, XTMP1,18
|
||||
|
||||
xor y1, a ; y1 = a ^ (a >> (22-13)
|
||||
MY_ROR y0, (11-6) ; y0 = (e >> (11-6)) ^ (e >> (25-6))
|
||||
xor y2, g ; y2 = f^g
|
||||
|
||||
vpsrld XTMP4, XTMP1, 3 ; XTMP4 = W[-15] >> 3
|
||||
|
||||
MY_ROR y1, (13-2) ; y1 = (a >> (13-2)) ^ (a >> (22-2))
|
||||
xor y0, e ; y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
|
||||
and y2, e ; y2 = (f^g)&e
|
||||
MY_ROR y0, 6 ; y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
|
||||
|
||||
vpslld XTMP1, XTMP1, (32-18)
|
||||
|
||||
xor y1, a ; y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
|
||||
xor y2, g ; y2 = CH = ((f^g)&e)^g
|
||||
|
||||
vpxor XTMP3, XTMP3, XTMP1
|
||||
|
||||
add y2, y0 ; y2 = S1 + CH
|
||||
add y2, [rsp + _XFER + 1*4] ; y2 = k + w + S1 + CH
|
||||
MY_ROR y1, 2 ; y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
|
||||
|
||||
vpxor XTMP3, XTMP3, XTMP2 ; XTMP1 = W[-15] MY_ROR 7 ^ W[-15] MY_ROR 18
|
||||
|
||||
mov y0, a ; y0 = a
|
||||
add h, y2 ; h = h + S1 + CH + k + w
|
||||
mov y2, a ; y2 = a
|
||||
|
||||
vpxor XTMP1, XTMP3, XTMP4 ; XTMP1 = s0
|
||||
|
||||
or y0, c ; y0 = a|c
|
||||
add d, h ; d = d + h + S1 + CH + k + w
|
||||
and y2, c ; y2 = a&c
|
||||
;; compute low s1
|
||||
vpshufd XTMP2, X3, 11111010b ; XTMP2 = W[-2] {BBAA}
|
||||
and y0, b ; y0 = (a|c)&b
|
||||
add h, y1 ; h = h + S1 + CH + k + w + S0
|
||||
vpaddd XTMP0, XTMP0, XTMP1 ; XTMP0 = W[-16] + W[-7] + s0
|
||||
or y0, y2 ; y0 = MAJ = (a|c)&b)|(a&c)
|
||||
add h, y0 ; h = h + S1 + CH + k + w + S0 + MAJ
|
||||
|
||||
ROTATE_ARGS
|
||||
;vmovdqa XTMP3, XTMP2 ; XTMP3 = W[-2] {BBAA}
|
||||
|
||||
mov y0, e ; y0 = e
|
||||
mov y1, a ; y1 = a
|
||||
MY_ROR y0, (25-11) ; y0 = e >> (25-11)
|
||||
|
||||
;vmovdqa XTMP4, XTMP2 ; XTMP4 = W[-2] {BBAA}
|
||||
|
||||
xor y0, e ; y0 = e ^ (e >> (25-11))
|
||||
MY_ROR y1, (22-13) ; y1 = a >> (22-13)
|
||||
mov y2, f ; y2 = f
|
||||
xor y1, a ; y1 = a ^ (a >> (22-13)
|
||||
MY_ROR y0, (11-6) ; y0 = (e >> (11-6)) ^ (e >> (25-6))
|
||||
|
||||
vpsrld XTMP4, XTMP2, 10 ; XTMP4 = W[-2] >> 10 {BBAA}
|
||||
|
||||
xor y2, g ; y2 = f^g
|
||||
|
||||
vpsrlq XTMP3, XTMP2, 19 ; XTMP3 = W[-2] MY_ROR 19 {xBxA}
|
||||
|
||||
xor y0, e ; y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
|
||||
and y2, e ; y2 = (f^g)&e
|
||||
|
||||
vpsrlq XTMP2, XTMP2, 17 ; XTMP2 = W[-2] MY_ROR 17 {xBxA}
|
||||
|
||||
MY_ROR y1, (13-2) ; y1 = (a >> (13-2)) ^ (a >> (22-2))
|
||||
xor y1, a ; y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
|
||||
xor y2, g ; y2 = CH = ((f^g)&e)^g
|
||||
MY_ROR y0, 6 ; y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
|
||||
vpxor XTMP2, XTMP2, XTMP3
|
||||
add y2, y0 ; y2 = S1 + CH
|
||||
MY_ROR y1, 2 ; y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
|
||||
add y2, [rsp + _XFER + 2*4] ; y2 = k + w + S1 + CH
|
||||
vpxor XTMP4, XTMP4, XTMP2 ; XTMP4 = s1 {xBxA}
|
||||
mov y0, a ; y0 = a
|
||||
add h, y2 ; h = h + S1 + CH + k + w
|
||||
mov y2, a ; y2 = a
|
||||
vpshufb XTMP4, XTMP4, SHUF_00BA ; XTMP4 = s1 {00BA}
|
||||
or y0, c ; y0 = a|c
|
||||
add d, h ; d = d + h + S1 + CH + k + w
|
||||
and y2, c ; y2 = a&c
|
||||
vpaddd XTMP0, XTMP0, XTMP4 ; XTMP0 = {..., ..., W[1], W[0]}
|
||||
and y0, b ; y0 = (a|c)&b
|
||||
add h, y1 ; h = h + S1 + CH + k + w + S0
|
||||
;; compute high s1
|
||||
vpshufd XTMP2, XTMP0, 01010000b ; XTMP2 = W[-2] {DDCC}
|
||||
or y0, y2 ; y0 = MAJ = (a|c)&b)|(a&c)
|
||||
add h, y0 ; h = h + S1 + CH + k + w + S0 + MAJ
|
||||
|
||||
ROTATE_ARGS
|
||||
;vmovdqa XTMP3, XTMP2 ; XTMP3 = W[-2] {DDCC}
|
||||
mov y0, e ; y0 = e
|
||||
MY_ROR y0, (25-11) ; y0 = e >> (25-11)
|
||||
mov y1, a ; y1 = a
|
||||
;vmovdqa XTMP5, XTMP2 ; XTMP5 = W[-2] {DDCC}
|
||||
MY_ROR y1, (22-13) ; y1 = a >> (22-13)
|
||||
xor y0, e ; y0 = e ^ (e >> (25-11))
|
||||
mov y2, f ; y2 = f
|
||||
MY_ROR y0, (11-6) ; y0 = (e >> (11-6)) ^ (e >> (25-6))
|
||||
|
||||
vpsrld XTMP5, XTMP2, 10 ; XTMP5 = W[-2] >> 10 {DDCC}
|
||||
|
||||
xor y1, a ; y1 = a ^ (a >> (22-13)
|
||||
xor y2, g ; y2 = f^g
|
||||
|
||||
vpsrlq XTMP3, XTMP2, 19 ; XTMP3 = W[-2] MY_ROR 19 {xDxC}
|
||||
|
||||
xor y0, e ; y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
|
||||
and y2, e ; y2 = (f^g)&e
|
||||
MY_ROR y1, (13-2) ; y1 = (a >> (13-2)) ^ (a >> (22-2))
|
||||
|
||||
vpsrlq XTMP2, XTMP2, 17 ; XTMP2 = W[-2] MY_ROR 17 {xDxC}
|
||||
|
||||
xor y1, a ; y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
|
||||
MY_ROR y0, 6 ; y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
|
||||
xor y2, g ; y2 = CH = ((f^g)&e)^g
|
||||
|
||||
vpxor XTMP2, XTMP2, XTMP3
|
||||
|
||||
MY_ROR y1, 2 ; y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
|
||||
add y2, y0 ; y2 = S1 + CH
|
||||
add y2, [rsp + _XFER + 3*4] ; y2 = k + w + S1 + CH
|
||||
vpxor XTMP5, XTMP5, XTMP2 ; XTMP5 = s1 {xDxC}
|
||||
mov y0, a ; y0 = a
|
||||
add h, y2 ; h = h + S1 + CH + k + w
|
||||
mov y2, a ; y2 = a
|
||||
vpshufb XTMP5, XTMP5, SHUF_DC00 ; XTMP5 = s1 {DC00}
|
||||
or y0, c ; y0 = a|c
|
||||
add d, h ; d = d + h + S1 + CH + k + w
|
||||
and y2, c ; y2 = a&c
|
||||
vpaddd X0, XTMP5, XTMP0 ; X0 = {W[3], W[2], W[1], W[0]}
|
||||
and y0, b ; y0 = (a|c)&b
|
||||
add h, y1 ; h = h + S1 + CH + k + w + S0
|
||||
or y0, y2 ; y0 = MAJ = (a|c)&b)|(a&c)
|
||||
add h, y0 ; h = h + S1 + CH + k + w + S0 + MAJ
|
||||
|
||||
ROTATE_ARGS
|
||||
rotate_Xs
|
||||
%endm
|
||||
|
||||
;; input is [rsp + _XFER + %1 * 4]
|
||||
%macro DO_ROUND 1
|
||||
mov y0, e ; y0 = e
|
||||
MY_ROR y0, (25-11) ; y0 = e >> (25-11)
|
||||
mov y1, a ; y1 = a
|
||||
xor y0, e ; y0 = e ^ (e >> (25-11))
|
||||
MY_ROR y1, (22-13) ; y1 = a >> (22-13)
|
||||
mov y2, f ; y2 = f
|
||||
xor y1, a ; y1 = a ^ (a >> (22-13)
|
||||
MY_ROR y0, (11-6) ; y0 = (e >> (11-6)) ^ (e >> (25-6))
|
||||
xor y2, g ; y2 = f^g
|
||||
xor y0, e ; y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
|
||||
MY_ROR y1, (13-2) ; y1 = (a >> (13-2)) ^ (a >> (22-2))
|
||||
and y2, e ; y2 = (f^g)&e
|
||||
xor y1, a ; y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
|
||||
MY_ROR y0, 6 ; y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
|
||||
xor y2, g ; y2 = CH = ((f^g)&e)^g
|
||||
add y2, y0 ; y2 = S1 + CH
|
||||
MY_ROR y1, 2 ; y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
|
||||
add y2, [rsp + _XFER + %1 * 4] ; y2 = k + w + S1 + CH
|
||||
mov y0, a ; y0 = a
|
||||
add h, y2 ; h = h + S1 + CH + k + w
|
||||
mov y2, a ; y2 = a
|
||||
or y0, c ; y0 = a|c
|
||||
add d, h ; d = d + h + S1 + CH + k + w
|
||||
and y2, c ; y2 = a&c
|
||||
and y0, b ; y0 = (a|c)&b
|
||||
add h, y1 ; h = h + S1 + CH + k + w + S0
|
||||
or y0, y2 ; y0 = MAJ = (a|c)&b)|(a&c)
|
||||
add h, y0 ; h = h + S1 + CH + k + w + S0 + MAJ
|
||||
ROTATE_ARGS
|
||||
%endm
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; void sha256_avx(void *input_data, UINT32 digest[8], UINT64 num_blks)
|
||||
;; arg 1 : pointer to input data
|
||||
;; arg 2 : pointer to digest
|
||||
;; arg 3 : Num blocks
|
||||
section .text
|
||||
global sha256_avx
|
||||
align 32
|
||||
sha256_avx:
|
||||
push rbx
|
||||
%ifndef LINUX
|
||||
push rsi
|
||||
push rdi
|
||||
%endif
|
||||
push rbp
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
|
||||
sub rsp,STACK_SIZE
|
||||
%ifndef LINUX
|
||||
vmovdqa [rsp + _XMM_SAVE + 0*16],xmm6
|
||||
vmovdqa [rsp + _XMM_SAVE + 1*16],xmm7
|
||||
vmovdqa [rsp + _XMM_SAVE + 2*16],xmm8
|
||||
vmovdqa [rsp + _XMM_SAVE + 3*16],xmm9
|
||||
vmovdqa [rsp + _XMM_SAVE + 4*16],xmm10
|
||||
vmovdqa [rsp + _XMM_SAVE + 5*16],xmm11
|
||||
vmovdqa [rsp + _XMM_SAVE + 6*16],xmm12
|
||||
vmovdqa [rsp + _XMM_SAVE + 7*16],xmm13
|
||||
%endif
|
||||
|
||||
shl NUM_BLKS, 6 ; convert to bytes
|
||||
jz done_hash
|
||||
add NUM_BLKS, INP ; pointer to end of data
|
||||
mov [rsp + _INP_END], NUM_BLKS
|
||||
|
||||
;; load initial digest
|
||||
mov a,[4*0 + CTX]
|
||||
mov b,[4*1 + CTX]
|
||||
mov c,[4*2 + CTX]
|
||||
mov d,[4*3 + CTX]
|
||||
mov e,[4*4 + CTX]
|
||||
mov f,[4*5 + CTX]
|
||||
mov g,[4*6 + CTX]
|
||||
mov h,[4*7 + CTX]
|
||||
|
||||
vmovdqa BYTE_FLIP_MASK, [PSHUFFLE_BYTE_FLIP_MASK wrt rip]
|
||||
vmovdqa SHUF_00BA, [_SHUF_00BA wrt rip]
|
||||
vmovdqa SHUF_DC00, [_SHUF_DC00 wrt rip]
|
||||
|
||||
loop0:
|
||||
lea TBL,[K256 wrt rip]
|
||||
|
||||
;; byte swap first 16 dwords
|
||||
COPY_XMM_AND_BSWAP X0, [INP + 0*16], BYTE_FLIP_MASK
|
||||
COPY_XMM_AND_BSWAP X1, [INP + 1*16], BYTE_FLIP_MASK
|
||||
COPY_XMM_AND_BSWAP X2, [INP + 2*16], BYTE_FLIP_MASK
|
||||
COPY_XMM_AND_BSWAP X3, [INP + 3*16], BYTE_FLIP_MASK
|
||||
|
||||
mov [rsp + _INP], INP
|
||||
|
||||
;; schedule 48 input dwords, by doing 3 rounds of 16 each
|
||||
mov SRND, 3
|
||||
align 16
|
||||
loop1:
|
||||
vpaddd XFER, X0, [TBL + 0*16]
|
||||
vmovdqa [rsp + _XFER], XFER
|
||||
FOUR_ROUNDS_AND_SCHED
|
||||
|
||||
vpaddd XFER, X0, [TBL + 1*16]
|
||||
vmovdqa [rsp + _XFER], XFER
|
||||
FOUR_ROUNDS_AND_SCHED
|
||||
|
||||
vpaddd XFER, X0, [TBL + 2*16]
|
||||
vmovdqa [rsp + _XFER], XFER
|
||||
FOUR_ROUNDS_AND_SCHED
|
||||
|
||||
vpaddd XFER, X0, [TBL + 3*16]
|
||||
vmovdqa [rsp + _XFER], XFER
|
||||
add TBL, 4*16
|
||||
FOUR_ROUNDS_AND_SCHED
|
||||
|
||||
sub SRND, 1
|
||||
jne loop1
|
||||
|
||||
mov SRND, 2
|
||||
loop2:
|
||||
vpaddd XFER, X0, [TBL + 0*16]
|
||||
vmovdqa [rsp + _XFER], XFER
|
||||
DO_ROUND 0
|
||||
DO_ROUND 1
|
||||
DO_ROUND 2
|
||||
DO_ROUND 3
|
||||
|
||||
vpaddd XFER, X1, [TBL + 1*16]
|
||||
vmovdqa [rsp + _XFER], XFER
|
||||
add TBL, 2*16
|
||||
DO_ROUND 0
|
||||
DO_ROUND 1
|
||||
DO_ROUND 2
|
||||
DO_ROUND 3
|
||||
|
||||
vmovdqa X0, X2
|
||||
vmovdqa X1, X3
|
||||
|
||||
sub SRND, 1
|
||||
jne loop2
|
||||
|
||||
|
||||
addm [4*0 + CTX],a
|
||||
addm [4*1 + CTX],b
|
||||
addm [4*2 + CTX],c
|
||||
addm [4*3 + CTX],d
|
||||
addm [4*4 + CTX],e
|
||||
addm [4*5 + CTX],f
|
||||
addm [4*6 + CTX],g
|
||||
addm [4*7 + CTX],h
|
||||
|
||||
mov INP, [rsp + _INP]
|
||||
add INP, 64
|
||||
cmp INP, [rsp + _INP_END]
|
||||
jne loop0
|
||||
|
||||
done_hash:
|
||||
%ifndef LINUX
|
||||
vmovdqa xmm6,[rsp + _XMM_SAVE + 0*16]
|
||||
vmovdqa xmm7,[rsp + _XMM_SAVE + 1*16]
|
||||
vmovdqa xmm8,[rsp + _XMM_SAVE + 2*16]
|
||||
vmovdqa xmm9,[rsp + _XMM_SAVE + 3*16]
|
||||
vmovdqa xmm10,[rsp + _XMM_SAVE + 4*16]
|
||||
vmovdqa xmm11,[rsp + _XMM_SAVE + 5*16]
|
||||
vmovdqa xmm12,[rsp + _XMM_SAVE + 6*16]
|
||||
vmovdqa xmm13,[rsp + _XMM_SAVE + 7*16]
|
||||
%endif
|
||||
|
||||
|
||||
add rsp, STACK_SIZE
|
||||
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop rbp
|
||||
%ifndef LINUX
|
||||
pop rdi
|
||||
pop rsi
|
||||
%endif
|
||||
pop rbx
|
||||
|
||||
ret
|
||||
|
||||
|
||||
section .data
|
||||
align 64
|
||||
K256:
|
||||
dd 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
|
||||
dd 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
|
||||
dd 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
|
||||
dd 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
|
||||
dd 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
|
||||
dd 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
|
||||
dd 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
|
||||
dd 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
|
||||
dd 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
|
||||
dd 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
|
||||
dd 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
|
||||
dd 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
|
||||
dd 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
|
||||
dd 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
|
||||
dd 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
|
||||
dd 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
|
||||
|
||||
PSHUFFLE_BYTE_FLIP_MASK: ddq 0x0c0d0e0f08090a0b0405060700010203
|
||||
|
||||
; shuffle xBxA -> 00BA
|
||||
_SHUF_00BA: ddq 0xFFFFFFFFFFFFFFFF0b0a090803020100
|
||||
|
||||
; shuffle xDxC -> DC00
|
||||
_SHUF_DC00: ddq 0x0b0a090803020100FFFFFFFFFFFFFFFF
|
826
ccan/ccan/crypto/sha256/benchmarks/sha256_avx2_rorx2.asm
Normal file
826
ccan/ccan/crypto/sha256/benchmarks/sha256_avx2_rorx2.asm
Normal file
|
@ -0,0 +1,826 @@
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright (c) 2012, Intel Corporation
|
||||
;
|
||||
; All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions are
|
||||
; met:
|
||||
;
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
;
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in the
|
||||
; documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
;
|
||||
; * Neither the name of the Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived from
|
||||
; this software without specific prior written permission.
|
||||
;
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION "AS IS" AND ANY
|
||||
; EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
; PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
|
||||
; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;
|
||||
; Example YASM command lines:
|
||||
; Windows: yasm -Xvc -f x64 -rnasm -pnasm -o sha256_avx2_rorx2.obj -g cv8 sha256_avx2_rorx2.asm
|
||||
; Linux: yasm -f x64 -f elf64 -X gnu -g dwarf2 -D LINUX -o sha256_avx2_rorx2.o sha256_avx2_rorx2.asm
|
||||
;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;
|
||||
; This code is described in an Intel White-Paper:
|
||||
; "Fast SHA-256 Implementations on Intel Architecture Processors"
|
||||
;
|
||||
; To find it, surf to http://www.intel.com/p/en_US/embedded
|
||||
; and search for that title.
|
||||
; The paper is expected to be released roughly at the end of April, 2012
|
||||
;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; This code schedules 2 blocks at a time, with 4 lanes per block
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
%define VMOVDQ vmovdqu ;; assume buffers not aligned
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Define Macros
|
||||
|
||||
; addm [mem], reg
|
||||
; Add reg to mem using reg-mem add and store
|
||||
%macro addm 2
|
||||
add %2, %1
|
||||
mov %1, %2
|
||||
%endm
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
%define X0 ymm4
|
||||
%define X1 ymm5
|
||||
%define X2 ymm6
|
||||
%define X3 ymm7
|
||||
|
||||
; XMM versions of above
|
||||
%define XWORD0 xmm4
|
||||
%define XWORD1 xmm5
|
||||
%define XWORD2 xmm6
|
||||
%define XWORD3 xmm7
|
||||
|
||||
%define XTMP0 ymm0
|
||||
%define XTMP1 ymm1
|
||||
%define XTMP2 ymm2
|
||||
%define XTMP3 ymm3
|
||||
%define XTMP4 ymm8
|
||||
%define XFER ymm9
|
||||
%define XTMP5 ymm11
|
||||
|
||||
%define SHUF_00BA ymm10 ; shuffle xBxA -> 00BA
|
||||
%define SHUF_DC00 ymm12 ; shuffle xDxC -> DC00
|
||||
%define BYTE_FLIP_MASK ymm13
|
||||
|
||||
%define X_BYTE_FLIP_MASK xmm13 ; XMM version of BYTE_FLIP_MASK
|
||||
|
||||
%ifdef LINUX
|
||||
%define NUM_BLKS rdx ; 3rd arg
|
||||
%define CTX rsi ; 2nd arg
|
||||
%define INP rdi ; 1st arg
|
||||
%define c ecx
|
||||
%define d r8d
|
||||
%define e edx ; clobbers NUM_BLKS
|
||||
%define y3 edi ; clobbers INP
|
||||
%else
|
||||
%define NUM_BLKS r8 ; 3rd arg
|
||||
%define CTX rdx ; 2nd arg
|
||||
%define INP rcx ; 1st arg
|
||||
%define c edi
|
||||
%define d esi
|
||||
%define e r8d ; clobbers NUM_BLKS
|
||||
%define y3 ecx ; clobbers INP
|
||||
|
||||
%endif
|
||||
|
||||
|
||||
%define TBL rbp
|
||||
%define SRND CTX ; SRND is same register as CTX
|
||||
|
||||
%define a eax
|
||||
%define b ebx
|
||||
%define f r9d
|
||||
%define g r10d
|
||||
%define h r11d
|
||||
%define old_h r11d
|
||||
|
||||
%define T1 r12d
|
||||
%define y0 r13d
|
||||
%define y1 r14d
|
||||
%define y2 r15d
|
||||
|
||||
|
||||
_XFER_SIZE equ 2*64*4 ; 2 blocks, 64 rounds, 4 bytes/round
|
||||
%ifdef LINUX
|
||||
_XMM_SAVE_SIZE equ 0
|
||||
%else
|
||||
_XMM_SAVE_SIZE equ 8*16
|
||||
%endif
|
||||
_INP_END_SIZE equ 8
|
||||
_INP_SIZE equ 8
|
||||
_CTX_SIZE equ 8
|
||||
_RSP_SIZE equ 8
|
||||
|
||||
_XFER equ 0
|
||||
_XMM_SAVE equ _XFER + _XFER_SIZE
|
||||
_INP_END equ _XMM_SAVE + _XMM_SAVE_SIZE
|
||||
_INP equ _INP_END + _INP_END_SIZE
|
||||
_CTX equ _INP + _INP_SIZE
|
||||
_RSP equ _CTX + _CTX_SIZE
|
||||
STACK_SIZE equ _RSP + _RSP_SIZE
|
||||
|
||||
; rotate_Xs
|
||||
; Rotate values of symbols X0...X3
|
||||
%macro rotate_Xs 0
|
||||
%xdefine X_ X0
|
||||
%xdefine X0 X1
|
||||
%xdefine X1 X2
|
||||
%xdefine X2 X3
|
||||
%xdefine X3 X_
|
||||
%endm
|
||||
|
||||
; ROTATE_ARGS
|
||||
; Rotate values of symbols a...h
|
||||
%macro ROTATE_ARGS 0
|
||||
%xdefine old_h h
|
||||
%xdefine TMP_ h
|
||||
%xdefine h g
|
||||
%xdefine g f
|
||||
%xdefine f e
|
||||
%xdefine e d
|
||||
%xdefine d c
|
||||
%xdefine c b
|
||||
%xdefine b a
|
||||
%xdefine a TMP_
|
||||
%endm
|
||||
|
||||
%macro FOUR_ROUNDS_AND_SCHED 1
|
||||
%define %%XFER %1
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; RND N + 0 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
mov y3, a ; y3 = a ; MAJA
|
||||
rorx y0, e, 25 ; y0 = e >> 25 ; S1A
|
||||
rorx y1, e, 11 ; y1 = e >> 11 ; S1B
|
||||
|
||||
add h, dword[%%XFER+0*4] ; h = k + w + h ; --
|
||||
or y3, c ; y3 = a|c ; MAJA
|
||||
vpalignr XTMP0, X3, X2, 4 ; XTMP0 = W[-7]
|
||||
mov y2, f ; y2 = f ; CH
|
||||
rorx T1, a, 13 ; T1 = a >> 13 ; S0B
|
||||
|
||||
xor y0, y1 ; y0 = (e>>25) ^ (e>>11) ; S1
|
||||
xor y2, g ; y2 = f^g ; CH
|
||||
vpaddd XTMP0, XTMP0, X0 ; XTMP0 = W[-7] + W[-16]; y1 = (e >> 6) ; S1
|
||||
rorx y1, e, 6 ; y1 = (e >> 6) ; S1
|
||||
|
||||
and y2, e ; y2 = (f^g)&e ; CH
|
||||
xor y0, y1 ; y0 = (e>>25) ^ (e>>11) ^ (e>>6) ; S1
|
||||
rorx y1, a, 22 ; y1 = a >> 22 ; S0A
|
||||
add d, h ; d = k + w + h + d ; --
|
||||
|
||||
and y3, b ; y3 = (a|c)&b ; MAJA
|
||||
vpalignr XTMP1, X1, X0, 4 ; XTMP1 = W[-15]
|
||||
xor y1, T1 ; y1 = (a>>22) ^ (a>>13) ; S0
|
||||
rorx T1, a, 2 ; T1 = (a >> 2) ; S0
|
||||
|
||||
xor y2, g ; y2 = CH = ((f^g)&e)^g ; CH
|
||||
vpsrld XTMP2, XTMP1, 7
|
||||
xor y1, T1 ; y1 = (a>>22) ^ (a>>13) ^ (a>>2) ; S0
|
||||
mov T1, a ; T1 = a ; MAJB
|
||||
and T1, c ; T1 = a&c ; MAJB
|
||||
|
||||
add y2, y0 ; y2 = S1 + CH ; --
|
||||
vpslld XTMP3, XTMP1, (32-7)
|
||||
or y3, T1 ; y3 = MAJ = (a|c)&b)|(a&c) ; MAJ
|
||||
add h, y1 ; h = k + w + h + S0 ; --
|
||||
|
||||
add d, y2 ; d = k + w + h + d + S1 + CH = d + t1 ; --
|
||||
vpor XTMP3, XTMP3, XTMP2 ; XTMP3 = W[-15] ror 7
|
||||
|
||||
vpsrld XTMP2, XTMP1,18
|
||||
add h, y2 ; h = k + w + h + S0 + S1 + CH = t1 + S0; --
|
||||
add h, y3 ; h = t1 + S0 + MAJ ; --
|
||||
|
||||
|
||||
ROTATE_ARGS
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; RND N + 1 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
|
||||
mov y3, a ; y3 = a ; MAJA
|
||||
rorx y0, e, 25 ; y0 = e >> 25 ; S1A
|
||||
rorx y1, e, 11 ; y1 = e >> 11 ; S1B
|
||||
add h, dword[%%XFER+1*4] ; h = k + w + h ; --
|
||||
or y3, c ; y3 = a|c ; MAJA
|
||||
|
||||
|
||||
vpsrld XTMP4, XTMP1, 3 ; XTMP4 = W[-15] >> 3
|
||||
mov y2, f ; y2 = f ; CH
|
||||
rorx T1, a, 13 ; T1 = a >> 13 ; S0B
|
||||
xor y0, y1 ; y0 = (e>>25) ^ (e>>11) ; S1
|
||||
xor y2, g ; y2 = f^g ; CH
|
||||
|
||||
|
||||
rorx y1, e, 6 ; y1 = (e >> 6) ; S1
|
||||
xor y0, y1 ; y0 = (e>>25) ^ (e>>11) ^ (e>>6) ; S1
|
||||
rorx y1, a, 22 ; y1 = a >> 22 ; S0A
|
||||
and y2, e ; y2 = (f^g)&e ; CH
|
||||
add d, h ; d = k + w + h + d ; --
|
||||
|
||||
vpslld XTMP1, XTMP1, (32-18)
|
||||
and y3, b ; y3 = (a|c)&b ; MAJA
|
||||
xor y1, T1 ; y1 = (a>>22) ^ (a>>13) ; S0
|
||||
|
||||
vpxor XTMP3, XTMP3, XTMP1
|
||||
rorx T1, a, 2 ; T1 = (a >> 2) ; S0
|
||||
xor y2, g ; y2 = CH = ((f^g)&e)^g ; CH
|
||||
|
||||
vpxor XTMP3, XTMP3, XTMP2 ; XTMP3 = W[-15] ror 7 ^ W[-15] ror 18
|
||||
xor y1, T1 ; y1 = (a>>22) ^ (a>>13) ^ (a>>2) ; S0
|
||||
mov T1, a ; T1 = a ; MAJB
|
||||
and T1, c ; T1 = a&c ; MAJB
|
||||
add y2, y0 ; y2 = S1 + CH ; --
|
||||
|
||||
vpxor XTMP1, XTMP3, XTMP4 ; XTMP1 = s0
|
||||
vpshufd XTMP2, X3, 11111010b ; XTMP2 = W[-2] {BBAA}
|
||||
or y3, T1 ; y3 = MAJ = (a|c)&b)|(a&c) ; MAJ
|
||||
add h, y1 ; h = k + w + h + S0 ; --
|
||||
|
||||
vpaddd XTMP0, XTMP0, XTMP1 ; XTMP0 = W[-16] + W[-7] + s0
|
||||
add d, y2 ; d = k + w + h + d + S1 + CH = d + t1 ; --
|
||||
add h, y2 ; h = k + w + h + S0 + S1 + CH = t1 + S0; --
|
||||
add h, y3 ; h = t1 + S0 + MAJ ; --
|
||||
|
||||
vpsrld XTMP4, XTMP2, 10 ; XTMP4 = W[-2] >> 10 {BBAA}
|
||||
|
||||
|
||||
ROTATE_ARGS
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; RND N + 2 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
mov y3, a ; y3 = a ; MAJA
|
||||
rorx y0, e, 25 ; y0 = e >> 25 ; S1A
|
||||
add h, [%%XFER+2*4] ; h = k + w + h ; --
|
||||
|
||||
vpsrlq XTMP3, XTMP2, 19 ; XTMP3 = W[-2] ror 19 {xBxA}
|
||||
rorx y1, e, 11 ; y1 = e >> 11 ; S1B
|
||||
or y3, c ; y3 = a|c ; MAJA
|
||||
mov y2, f ; y2 = f ; CH
|
||||
xor y2, g ; y2 = f^g ; CH
|
||||
|
||||
rorx T1, a, 13 ; T1 = a >> 13 ; S0B
|
||||
xor y0, y1 ; y0 = (e>>25) ^ (e>>11) ; S1
|
||||
vpsrlq XTMP2, XTMP2, 17 ; XTMP2 = W[-2] ror 17 {xBxA}
|
||||
and y2, e ; y2 = (f^g)&e ; CH
|
||||
|
||||
rorx y1, e, 6 ; y1 = (e >> 6) ; S1
|
||||
vpxor XTMP2, XTMP2, XTMP3
|
||||
add d, h ; d = k + w + h + d ; --
|
||||
and y3, b ; y3 = (a|c)&b ; MAJA
|
||||
|
||||
xor y0, y1 ; y0 = (e>>25) ^ (e>>11) ^ (e>>6) ; S1
|
||||
rorx y1, a, 22 ; y1 = a >> 22 ; S0A
|
||||
vpxor XTMP4, XTMP4, XTMP2 ; XTMP4 = s1 {xBxA}
|
||||
xor y2, g ; y2 = CH = ((f^g)&e)^g ; CH
|
||||
|
||||
vpshufb XTMP4, XTMP4, SHUF_00BA ; XTMP4 = s1 {00BA}
|
||||
xor y1, T1 ; y1 = (a>>22) ^ (a>>13) ; S0
|
||||
rorx T1, a, 2 ; T1 = (a >> 2) ; S0
|
||||
vpaddd XTMP0, XTMP0, XTMP4 ; XTMP0 = {..., ..., W[1], W[0]}
|
||||
|
||||
xor y1, T1 ; y1 = (a>>22) ^ (a>>13) ^ (a>>2) ; S0
|
||||
mov T1, a ; T1 = a ; MAJB
|
||||
and T1, c ; T1 = a&c ; MAJB
|
||||
add y2, y0 ; y2 = S1 + CH ; --
|
||||
vpshufd XTMP2, XTMP0, 01010000b ; XTMP2 = W[-2] {DDCC}
|
||||
|
||||
or y3, T1 ; y3 = MAJ = (a|c)&b)|(a&c) ; MAJ
|
||||
add h, y1 ; h = k + w + h + S0 ; --
|
||||
add d, y2 ; d = k + w + h + d + S1 + CH = d + t1 ; --
|
||||
add h, y2 ; h = k + w + h + S0 + S1 + CH = t1 + S0; --
|
||||
|
||||
add h, y3 ; h = t1 + S0 + MAJ ; --
|
||||
|
||||
|
||||
ROTATE_ARGS
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; RND N + 3 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
mov y3, a ; y3 = a ; MAJA
|
||||
rorx y0, e, 25 ; y0 = e >> 25 ; S1A
|
||||
rorx y1, e, 11 ; y1 = e >> 11 ; S1B
|
||||
add h, dword[%%XFER+3*4] ; h = k + w + h ; --
|
||||
or y3, c ; y3 = a|c ; MAJA
|
||||
|
||||
|
||||
vpsrld XTMP5, XTMP2, 10 ; XTMP5 = W[-2] >> 10 {DDCC}
|
||||
mov y2, f ; y2 = f ; CH
|
||||
rorx T1, a, 13 ; T1 = a >> 13 ; S0B
|
||||
xor y0, y1 ; y0 = (e>>25) ^ (e>>11) ; S1
|
||||
xor y2, g ; y2 = f^g ; CH
|
||||
|
||||
|
||||
vpsrlq XTMP3, XTMP2, 19 ; XTMP3 = W[-2] ror 19 {xDxC}
|
||||
rorx y1, e, 6 ; y1 = (e >> 6) ; S1
|
||||
and y2, e ; y2 = (f^g)&e ; CH
|
||||
add d, h ; d = k + w + h + d ; --
|
||||
and y3, b ; y3 = (a|c)&b ; MAJA
|
||||
|
||||
vpsrlq XTMP2, XTMP2, 17 ; XTMP2 = W[-2] ror 17 {xDxC}
|
||||
xor y0, y1 ; y0 = (e>>25) ^ (e>>11) ^ (e>>6) ; S1
|
||||
xor y2, g ; y2 = CH = ((f^g)&e)^g ; CH
|
||||
|
||||
vpxor XTMP2, XTMP2, XTMP3
|
||||
rorx y1, a, 22 ; y1 = a >> 22 ; S0A
|
||||
add y2, y0 ; y2 = S1 + CH ; --
|
||||
|
||||
vpxor XTMP5, XTMP5, XTMP2 ; XTMP5 = s1 {xDxC}
|
||||
xor y1, T1 ; y1 = (a>>22) ^ (a>>13) ; S0
|
||||
add d, y2 ; d = k + w + h + d + S1 + CH = d + t1 ; --
|
||||
|
||||
rorx T1, a, 2 ; T1 = (a >> 2) ; S0
|
||||
vpshufb XTMP5, XTMP5, SHUF_DC00 ; XTMP5 = s1 {DC00}
|
||||
|
||||
vpaddd X0, XTMP5, XTMP0 ; X0 = {W[3], W[2], W[1], W[0]}
|
||||
xor y1, T1 ; y1 = (a>>22) ^ (a>>13) ^ (a>>2) ; S0
|
||||
mov T1, a ; T1 = a ; MAJB
|
||||
and T1, c ; T1 = a&c ; MAJB
|
||||
or y3, T1 ; y3 = MAJ = (a|c)&b)|(a&c) ; MAJ
|
||||
|
||||
add h, y1 ; h = k + w + h + S0 ; --
|
||||
add h, y2 ; h = k + w + h + S0 + S1 + CH = t1 + S0; --
|
||||
add h, y3 ; h = t1 + S0 + MAJ ; --
|
||||
|
||||
ROTATE_ARGS
|
||||
rotate_Xs
|
||||
%endm
|
||||
|
||||
%macro DO_4ROUNDS 1
|
||||
%define %%XFER %1
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; RND N + 0 ;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
mov y2, f ; y2 = f ; CH
|
||||
rorx y0, e, 25 ; y0 = e >> 25 ; S1A
|
||||
rorx y1, e, 11 ; y1 = e >> 11 ; S1B
|
||||
xor y2, g ; y2 = f^g ; CH
|
||||
|
||||
xor y0, y1 ; y0 = (e>>25) ^ (e>>11) ; S1
|
||||
rorx y1, e, 6 ; y1 = (e >> 6) ; S1
|
||||
and y2, e ; y2 = (f^g)&e ; CH
|
||||
|
||||
xor y0, y1 ; y0 = (e>>25) ^ (e>>11) ^ (e>>6) ; S1
|
||||
rorx T1, a, 13 ; T1 = a >> 13 ; S0B
|
||||
xor y2, g ; y2 = CH = ((f^g)&e)^g ; CH
|
||||
rorx y1, a, 22 ; y1 = a >> 22 ; S0A
|
||||
mov y3, a ; y3 = a ; MAJA
|
||||
|
||||
xor y1, T1 ; y1 = (a>>22) ^ (a>>13) ; S0
|
||||
rorx T1, a, 2 ; T1 = (a >> 2) ; S0
|
||||
add h, dword[%%XFER + 4*0] ; h = k + w + h ; --
|
||||
or y3, c ; y3 = a|c ; MAJA
|
||||
|
||||
xor y1, T1 ; y1 = (a>>22) ^ (a>>13) ^ (a>>2) ; S0
|
||||
mov T1, a ; T1 = a ; MAJB
|
||||
and y3, b ; y3 = (a|c)&b ; MAJA
|
||||
and T1, c ; T1 = a&c ; MAJB
|
||||
add y2, y0 ; y2 = S1 + CH ; --
|
||||
|
||||
|
||||
add d, h ; d = k + w + h + d ; --
|
||||
or y3, T1 ; y3 = MAJ = (a|c)&b)|(a&c) ; MAJ
|
||||
add h, y1 ; h = k + w + h + S0 ; --
|
||||
|
||||
add d, y2 ; d = k + w + h + d + S1 + CH = d + t1 ; --
|
||||
|
||||
|
||||
;add h, y2 ; h = k + w + h + S0 + S1 + CH = t1 + S0; --
|
||||
|
||||
;add h, y3 ; h = t1 + S0 + MAJ ; --
|
||||
|
||||
ROTATE_ARGS
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; RND N + 1 ;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
add old_h, y2 ; h = k + w + h + S0 + S1 + CH = t1 + S0; --
|
||||
mov y2, f ; y2 = f ; CH
|
||||
rorx y0, e, 25 ; y0 = e >> 25 ; S1A
|
||||
rorx y1, e, 11 ; y1 = e >> 11 ; S1B
|
||||
xor y2, g ; y2 = f^g ; CH
|
||||
|
||||
xor y0, y1 ; y0 = (e>>25) ^ (e>>11) ; S1
|
||||
rorx y1, e, 6 ; y1 = (e >> 6) ; S1
|
||||
and y2, e ; y2 = (f^g)&e ; CH
|
||||
add old_h, y3 ; h = t1 + S0 + MAJ ; --
|
||||
|
||||
xor y0, y1 ; y0 = (e>>25) ^ (e>>11) ^ (e>>6) ; S1
|
||||
rorx T1, a, 13 ; T1 = a >> 13 ; S0B
|
||||
xor y2, g ; y2 = CH = ((f^g)&e)^g ; CH
|
||||
rorx y1, a, 22 ; y1 = a >> 22 ; S0A
|
||||
mov y3, a ; y3 = a ; MAJA
|
||||
|
||||
xor y1, T1 ; y1 = (a>>22) ^ (a>>13) ; S0
|
||||
rorx T1, a, 2 ; T1 = (a >> 2) ; S0
|
||||
add h, dword[%%XFER + 4*1] ; h = k + w + h ; --
|
||||
or y3, c ; y3 = a|c ; MAJA
|
||||
|
||||
xor y1, T1 ; y1 = (a>>22) ^ (a>>13) ^ (a>>2) ; S0
|
||||
mov T1, a ; T1 = a ; MAJB
|
||||
and y3, b ; y3 = (a|c)&b ; MAJA
|
||||
and T1, c ; T1 = a&c ; MAJB
|
||||
add y2, y0 ; y2 = S1 + CH ; --
|
||||
|
||||
|
||||
add d, h ; d = k + w + h + d ; --
|
||||
or y3, T1 ; y3 = MAJ = (a|c)&b)|(a&c) ; MAJ
|
||||
add h, y1 ; h = k + w + h + S0 ; --
|
||||
|
||||
add d, y2 ; d = k + w + h + d + S1 + CH = d + t1 ; --
|
||||
|
||||
|
||||
;add h, y2 ; h = k + w + h + S0 + S1 + CH = t1 + S0; --
|
||||
|
||||
;add h, y3 ; h = t1 + S0 + MAJ ; --
|
||||
|
||||
ROTATE_ARGS
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; RND N + 2 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
add old_h, y2 ; h = k + w + h + S0 + S1 + CH = t1 + S0; --
|
||||
mov y2, f ; y2 = f ; CH
|
||||
rorx y0, e, 25 ; y0 = e >> 25 ; S1A
|
||||
rorx y1, e, 11 ; y1 = e >> 11 ; S1B
|
||||
xor y2, g ; y2 = f^g ; CH
|
||||
|
||||
xor y0, y1 ; y0 = (e>>25) ^ (e>>11) ; S1
|
||||
rorx y1, e, 6 ; y1 = (e >> 6) ; S1
|
||||
and y2, e ; y2 = (f^g)&e ; CH
|
||||
add old_h, y3 ; h = t1 + S0 + MAJ ; --
|
||||
|
||||
xor y0, y1 ; y0 = (e>>25) ^ (e>>11) ^ (e>>6) ; S1
|
||||
rorx T1, a, 13 ; T1 = a >> 13 ; S0B
|
||||
xor y2, g ; y2 = CH = ((f^g)&e)^g ; CH
|
||||
rorx y1, a, 22 ; y1 = a >> 22 ; S0A
|
||||
mov y3, a ; y3 = a ; MAJA
|
||||
|
||||
xor y1, T1 ; y1 = (a>>22) ^ (a>>13) ; S0
|
||||
rorx T1, a, 2 ; T1 = (a >> 2) ; S0
|
||||
add h, dword[%%XFER + 4*2] ; h = k + w + h ; --
|
||||
or y3, c ; y3 = a|c ; MAJA
|
||||
|
||||
xor y1, T1 ; y1 = (a>>22) ^ (a>>13) ^ (a>>2) ; S0
|
||||
mov T1, a ; T1 = a ; MAJB
|
||||
and y3, b ; y3 = (a|c)&b ; MAJA
|
||||
and T1, c ; T1 = a&c ; MAJB
|
||||
add y2, y0 ; y2 = S1 + CH ; --
|
||||
|
||||
|
||||
add d, h ; d = k + w + h + d ; --
|
||||
or y3, T1 ; y3 = MAJ = (a|c)&b)|(a&c) ; MAJ
|
||||
add h, y1 ; h = k + w + h + S0 ; --
|
||||
|
||||
add d, y2 ; d = k + w + h + d + S1 + CH = d + t1 ; --
|
||||
|
||||
|
||||
;add h, y2 ; h = k + w + h + S0 + S1 + CH = t1 + S0; --
|
||||
|
||||
;add h, y3 ; h = t1 + S0 + MAJ ; --
|
||||
|
||||
ROTATE_ARGS
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; RND N + 3 ;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
add old_h, y2 ; h = k + w + h + S0 + S1 + CH = t1 + S0; --
|
||||
mov y2, f ; y2 = f ; CH
|
||||
rorx y0, e, 25 ; y0 = e >> 25 ; S1A
|
||||
rorx y1, e, 11 ; y1 = e >> 11 ; S1B
|
||||
xor y2, g ; y2 = f^g ; CH
|
||||
|
||||
xor y0, y1 ; y0 = (e>>25) ^ (e>>11) ; S1
|
||||
rorx y1, e, 6 ; y1 = (e >> 6) ; S1
|
||||
and y2, e ; y2 = (f^g)&e ; CH
|
||||
add old_h, y3 ; h = t1 + S0 + MAJ ; --
|
||||
|
||||
xor y0, y1 ; y0 = (e>>25) ^ (e>>11) ^ (e>>6) ; S1
|
||||
rorx T1, a, 13 ; T1 = a >> 13 ; S0B
|
||||
xor y2, g ; y2 = CH = ((f^g)&e)^g ; CH
|
||||
rorx y1, a, 22 ; y1 = a >> 22 ; S0A
|
||||
mov y3, a ; y3 = a ; MAJA
|
||||
|
||||
xor y1, T1 ; y1 = (a>>22) ^ (a>>13) ; S0
|
||||
rorx T1, a, 2 ; T1 = (a >> 2) ; S0
|
||||
add h, dword[%%XFER + 4*3] ; h = k + w + h ; --
|
||||
or y3, c ; y3 = a|c ; MAJA
|
||||
|
||||
xor y1, T1 ; y1 = (a>>22) ^ (a>>13) ^ (a>>2) ; S0
|
||||
mov T1, a ; T1 = a ; MAJB
|
||||
and y3, b ; y3 = (a|c)&b ; MAJA
|
||||
and T1, c ; T1 = a&c ; MAJB
|
||||
add y2, y0 ; y2 = S1 + CH ; --
|
||||
|
||||
|
||||
add d, h ; d = k + w + h + d ; --
|
||||
or y3, T1 ; y3 = MAJ = (a|c)&b)|(a&c) ; MAJ
|
||||
add h, y1 ; h = k + w + h + S0 ; --
|
||||
|
||||
add d, y2 ; d = k + w + h + d + S1 + CH = d + t1 ; --
|
||||
|
||||
|
||||
add h, y2 ; h = k + w + h + S0 + S1 + CH = t1 + S0; --
|
||||
|
||||
add h, y3 ; h = t1 + S0 + MAJ ; --
|
||||
|
||||
ROTATE_ARGS
|
||||
|
||||
%endm
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; void sha256_rorx(void *input_data, UINT32 digest[8], UINT64 num_blks)
|
||||
;; arg 1 : pointer to input data
|
||||
;; arg 2 : pointer to digest
|
||||
;; arg 3 : Num blocks
|
||||
section .text
|
||||
global sha256_rorx
|
||||
align 32
|
||||
sha256_rorx:
|
||||
push rbx
|
||||
%ifndef LINUX
|
||||
push rsi
|
||||
push rdi
|
||||
%endif
|
||||
push rbp
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
|
||||
mov rax, rsp
|
||||
sub rsp,STACK_SIZE
|
||||
and rsp, -32
|
||||
mov [rsp + _RSP], rax
|
||||
|
||||
%ifndef LINUX
|
||||
vmovdqa [rsp + _XMM_SAVE + 0*16],xmm6
|
||||
vmovdqa [rsp + _XMM_SAVE + 1*16],xmm7
|
||||
vmovdqa [rsp + _XMM_SAVE + 2*16],xmm8
|
||||
vmovdqa [rsp + _XMM_SAVE + 3*16],xmm9
|
||||
vmovdqa [rsp + _XMM_SAVE + 4*16],xmm10
|
||||
vmovdqa [rsp + _XMM_SAVE + 5*16],xmm11
|
||||
vmovdqa [rsp + _XMM_SAVE + 6*16],xmm12
|
||||
vmovdqa [rsp + _XMM_SAVE + 7*16],xmm13
|
||||
%endif
|
||||
|
||||
shl NUM_BLKS, 6 ; convert to bytes
|
||||
jz done_hash
|
||||
lea NUM_BLKS, [NUM_BLKS + INP - 64] ; pointer to last block
|
||||
mov [rsp + _INP_END], NUM_BLKS
|
||||
|
||||
cmp INP, NUM_BLKS
|
||||
je only_one_block
|
||||
|
||||
;; load initial digest
|
||||
mov a,[4*0 + CTX]
|
||||
mov b,[4*1 + CTX]
|
||||
mov c,[4*2 + CTX]
|
||||
mov d,[4*3 + CTX]
|
||||
mov e,[4*4 + CTX]
|
||||
mov f,[4*5 + CTX]
|
||||
mov g,[4*6 + CTX]
|
||||
mov h,[4*7 + CTX]
|
||||
|
||||
vmovdqa BYTE_FLIP_MASK, [PSHUFFLE_BYTE_FLIP_MASK wrt rip]
|
||||
vmovdqa SHUF_00BA, [_SHUF_00BA wrt rip]
|
||||
vmovdqa SHUF_DC00, [_SHUF_DC00 wrt rip]
|
||||
|
||||
mov [rsp + _CTX], CTX
|
||||
|
||||
loop0:
|
||||
lea TBL,[K256 wrt rip]
|
||||
|
||||
;; Load first 16 dwords from two blocks
|
||||
VMOVDQ XTMP0, [INP + 0*32]
|
||||
VMOVDQ XTMP1, [INP + 1*32]
|
||||
VMOVDQ XTMP2, [INP + 2*32]
|
||||
VMOVDQ XTMP3, [INP + 3*32]
|
||||
|
||||
;; byte swap data
|
||||
vpshufb XTMP0, XTMP0, BYTE_FLIP_MASK
|
||||
vpshufb XTMP1, XTMP1, BYTE_FLIP_MASK
|
||||
vpshufb XTMP2, XTMP2, BYTE_FLIP_MASK
|
||||
vpshufb XTMP3, XTMP3, BYTE_FLIP_MASK
|
||||
|
||||
;; transpose data into high/low halves
|
||||
vperm2i128 X0, XTMP0, XTMP2, 0x20
|
||||
vperm2i128 X1, XTMP0, XTMP2, 0x31
|
||||
vperm2i128 X2, XTMP1, XTMP3, 0x20
|
||||
vperm2i128 X3, XTMP1, XTMP3, 0x31
|
||||
|
||||
last_block_enter:
|
||||
add INP, 64
|
||||
mov [rsp + _INP], INP
|
||||
|
||||
;; schedule 48 input dwords, by doing 3 rounds of 12 each
|
||||
xor SRND, SRND
|
||||
|
||||
align 16
|
||||
loop1:
|
||||
vpaddd XFER, X0, [TBL + SRND + 0*32]
|
||||
vmovdqa [rsp + _XFER + SRND + 0*32], XFER
|
||||
FOUR_ROUNDS_AND_SCHED rsp + _XFER + SRND + 0*32
|
||||
|
||||
vpaddd XFER, X0, [TBL + SRND + 1*32]
|
||||
vmovdqa [rsp + _XFER + SRND + 1*32], XFER
|
||||
FOUR_ROUNDS_AND_SCHED rsp + _XFER + SRND + 1*32
|
||||
|
||||
vpaddd XFER, X0, [TBL + SRND + 2*32]
|
||||
vmovdqa [rsp + _XFER + SRND + 2*32], XFER
|
||||
FOUR_ROUNDS_AND_SCHED rsp + _XFER + SRND + 2*32
|
||||
|
||||
vpaddd XFER, X0, [TBL + SRND + 3*32]
|
||||
vmovdqa [rsp + _XFER + SRND + 3*32], XFER
|
||||
FOUR_ROUNDS_AND_SCHED rsp + _XFER + SRND + 3*32
|
||||
|
||||
add SRND, 4*32
|
||||
cmp SRND, 3 * 4*32
|
||||
jb loop1
|
||||
|
||||
loop2:
|
||||
;; Do last 16 rounds with no scheduling
|
||||
vpaddd XFER, X0, [TBL + SRND + 0*32]
|
||||
vmovdqa [rsp + _XFER + SRND + 0*32], XFER
|
||||
DO_4ROUNDS rsp + _XFER + SRND + 0*32
|
||||
vpaddd XFER, X1, [TBL + SRND + 1*32]
|
||||
vmovdqa [rsp + _XFER + SRND + 1*32], XFER
|
||||
DO_4ROUNDS rsp + _XFER + SRND + 1*32
|
||||
add SRND, 2*32
|
||||
|
||||
vmovdqa X0, X2
|
||||
vmovdqa X1, X3
|
||||
|
||||
cmp SRND, 4 * 4*32
|
||||
jb loop2
|
||||
|
||||
mov CTX, [rsp + _CTX]
|
||||
mov INP, [rsp + _INP]
|
||||
|
||||
addm [4*0 + CTX],a
|
||||
addm [4*1 + CTX],b
|
||||
addm [4*2 + CTX],c
|
||||
addm [4*3 + CTX],d
|
||||
addm [4*4 + CTX],e
|
||||
addm [4*5 + CTX],f
|
||||
addm [4*6 + CTX],g
|
||||
addm [4*7 + CTX],h
|
||||
|
||||
cmp INP, [rsp + _INP_END]
|
||||
ja done_hash
|
||||
|
||||
;;;; Do second block using previously scheduled results
|
||||
xor SRND, SRND
|
||||
align 16
|
||||
loop3:
|
||||
DO_4ROUNDS rsp + _XFER + SRND + 0*32 + 16
|
||||
DO_4ROUNDS rsp + _XFER + SRND + 1*32 + 16
|
||||
add SRND, 2*32
|
||||
cmp SRND, 4 * 4*32
|
||||
jb loop3
|
||||
|
||||
mov CTX, [rsp + _CTX]
|
||||
mov INP, [rsp + _INP]
|
||||
add INP, 64
|
||||
|
||||
addm [4*0 + CTX],a
|
||||
addm [4*1 + CTX],b
|
||||
addm [4*2 + CTX],c
|
||||
addm [4*3 + CTX],d
|
||||
addm [4*4 + CTX],e
|
||||
addm [4*5 + CTX],f
|
||||
addm [4*6 + CTX],g
|
||||
addm [4*7 + CTX],h
|
||||
|
||||
cmp INP, [rsp + _INP_END]
|
||||
jb loop0
|
||||
ja done_hash
|
||||
|
||||
do_last_block:
|
||||
;;;; do last block
|
||||
lea TBL,[K256 wrt rip]
|
||||
|
||||
VMOVDQ XWORD0, [INP + 0*16]
|
||||
VMOVDQ XWORD1, [INP + 1*16]
|
||||
VMOVDQ XWORD2, [INP + 2*16]
|
||||
VMOVDQ XWORD3, [INP + 3*16]
|
||||
|
||||
vpshufb XWORD0, XWORD0, X_BYTE_FLIP_MASK
|
||||
vpshufb XWORD1, XWORD1, X_BYTE_FLIP_MASK
|
||||
vpshufb XWORD2, XWORD2, X_BYTE_FLIP_MASK
|
||||
vpshufb XWORD3, XWORD3, X_BYTE_FLIP_MASK
|
||||
|
||||
jmp last_block_enter
|
||||
|
||||
only_one_block:
|
||||
|
||||
;; load initial digest
|
||||
mov a,[4*0 + CTX]
|
||||
mov b,[4*1 + CTX]
|
||||
mov c,[4*2 + CTX]
|
||||
mov d,[4*3 + CTX]
|
||||
mov e,[4*4 + CTX]
|
||||
mov f,[4*5 + CTX]
|
||||
mov g,[4*6 + CTX]
|
||||
mov h,[4*7 + CTX]
|
||||
|
||||
vmovdqa BYTE_FLIP_MASK, [PSHUFFLE_BYTE_FLIP_MASK wrt rip]
|
||||
vmovdqa SHUF_00BA, [_SHUF_00BA wrt rip]
|
||||
vmovdqa SHUF_DC00, [_SHUF_DC00 wrt rip]
|
||||
|
||||
mov [rsp + _CTX], CTX
|
||||
jmp do_last_block
|
||||
|
||||
done_hash:
|
||||
%ifndef LINUX
|
||||
vmovdqa xmm6,[rsp + _XMM_SAVE + 0*16]
|
||||
vmovdqa xmm7,[rsp + _XMM_SAVE + 1*16]
|
||||
vmovdqa xmm8,[rsp + _XMM_SAVE + 2*16]
|
||||
vmovdqa xmm9,[rsp + _XMM_SAVE + 3*16]
|
||||
vmovdqa xmm10,[rsp + _XMM_SAVE + 4*16]
|
||||
vmovdqa xmm11,[rsp + _XMM_SAVE + 5*16]
|
||||
vmovdqa xmm12,[rsp + _XMM_SAVE + 6*16]
|
||||
vmovdqa xmm13,[rsp + _XMM_SAVE + 7*16]
|
||||
%endif
|
||||
|
||||
mov rsp, [rsp + _RSP]
|
||||
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rbp
|
||||
%ifndef LINUX
|
||||
pop rdi
|
||||
pop rsi
|
||||
%endif
|
||||
pop rbx
|
||||
|
||||
ret
|
||||
|
||||
section .data
|
||||
align 64
|
||||
K256:
|
||||
dd 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
|
||||
dd 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
|
||||
dd 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
|
||||
dd 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
|
||||
dd 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
|
||||
dd 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
|
||||
dd 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
|
||||
dd 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
|
||||
dd 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
|
||||
dd 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
|
||||
dd 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
|
||||
dd 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
|
||||
dd 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
|
||||
dd 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
|
||||
dd 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
|
||||
dd 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
|
||||
dd 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
|
||||
dd 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
|
||||
dd 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
|
||||
dd 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
|
||||
dd 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
|
||||
dd 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
|
||||
dd 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
|
||||
dd 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
|
||||
dd 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
|
||||
dd 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
|
||||
dd 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
|
||||
dd 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
|
||||
dd 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
|
||||
dd 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
|
||||
dd 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
|
||||
dd 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
|
||||
|
||||
PSHUFFLE_BYTE_FLIP_MASK:
|
||||
ddq 0x0c0d0e0f08090a0b0405060700010203,0x0c0d0e0f08090a0b0405060700010203
|
||||
|
||||
; shuffle xBxA -> 00BA
|
||||
_SHUF_00BA:
|
||||
ddq 0xFFFFFFFFFFFFFFFF0b0a090803020100,0xFFFFFFFFFFFFFFFF0b0a090803020100
|
||||
|
||||
; shuffle xDxC -> DC00
|
||||
_SHUF_DC00:
|
||||
ddq 0x0b0a090803020100FFFFFFFFFFFFFFFF,0x0b0a090803020100FFFFFFFFFFFFFFFF
|
1507
ccan/ccan/crypto/sha256/benchmarks/sha256_avx2_rorx8.asm
Normal file
1507
ccan/ccan/crypto/sha256/benchmarks/sha256_avx2_rorx8.asm
Normal file
File diff suppressed because it is too large
Load diff
544
ccan/ccan/crypto/sha256/benchmarks/sha256_sse4.asm
Normal file
544
ccan/ccan/crypto/sha256/benchmarks/sha256_sse4.asm
Normal file
|
@ -0,0 +1,544 @@
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright (c) 2012, Intel Corporation
|
||||
;
|
||||
; All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions are
|
||||
; met:
|
||||
;
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
;
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in the
|
||||
; documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
;
|
||||
; * Neither the name of the Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived from
|
||||
; this software without specific prior written permission.
|
||||
;
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION "AS IS" AND ANY
|
||||
; EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
; PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
|
||||
; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;
|
||||
; Example YASM command lines:
|
||||
; Windows: yasm -Xvc -f x64 -rnasm -pnasm -o sha256_sse4.obj -g cv8 sha256_sse4.asm
|
||||
; Linux: yasm -f x64 -f elf64 -X gnu -g dwarf2 -D LINUX -o sha256_sse4.o sha256_sse4.asm
|
||||
;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;
|
||||
; This code is described in an Intel White-Paper:
|
||||
; "Fast SHA-256 Implementations on Intel Architecture Processors"
|
||||
;
|
||||
; To find it, surf to http://www.intel.com/p/en_US/embedded
|
||||
; and search for that title.
|
||||
; The paper is expected to be released roughly at the end of April, 2012
|
||||
;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; This code schedules 1 blocks at a time, with 4 lanes per block
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
%define MOVDQ movdqu ;; assume buffers not aligned
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Define Macros
|
||||
|
||||
; addm [mem], reg
|
||||
; Add reg to mem using reg-mem add and store
|
||||
%macro addm 2
|
||||
add %2, %1
|
||||
mov %1, %2
|
||||
%endm
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
; COPY_XMM_AND_BSWAP xmm, [mem], byte_flip_mask
|
||||
; Load xmm with mem and byte swap each dword
|
||||
%macro COPY_XMM_AND_BSWAP 3
|
||||
MOVDQ %1, %2
|
||||
pshufb %1, %3
|
||||
%endmacro
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
%define X0 xmm4
|
||||
%define X1 xmm5
|
||||
%define X2 xmm6
|
||||
%define X3 xmm7
|
||||
|
||||
%define XTMP0 xmm0
|
||||
%define XTMP1 xmm1
|
||||
%define XTMP2 xmm2
|
||||
%define XTMP3 xmm3
|
||||
%define XTMP4 xmm8
|
||||
%define XFER xmm9
|
||||
|
||||
%define SHUF_00BA xmm10 ; shuffle xBxA -> 00BA
|
||||
%define SHUF_DC00 xmm11 ; shuffle xDxC -> DC00
|
||||
%define BYTE_FLIP_MASK xmm12
|
||||
|
||||
%ifdef LINUX
|
||||
%define NUM_BLKS rdx ; 3rd arg
|
||||
%define CTX rsi ; 2nd arg
|
||||
%define INP rdi ; 1st arg
|
||||
|
||||
%define SRND rdi ; clobbers INP
|
||||
%define c ecx
|
||||
%define d r8d
|
||||
%define e edx
|
||||
%else
|
||||
%define NUM_BLKS r8 ; 3rd arg
|
||||
%define CTX rdx ; 2nd arg
|
||||
%define INP rcx ; 1st arg
|
||||
|
||||
%define SRND rcx ; clobbers INP
|
||||
%define c edi
|
||||
%define d esi
|
||||
%define e r8d
|
||||
|
||||
%endif
|
||||
%define TBL rbp
|
||||
%define a eax
|
||||
%define b ebx
|
||||
|
||||
%define f r9d
|
||||
%define g r10d
|
||||
%define h r11d
|
||||
|
||||
%define y0 r13d
|
||||
%define y1 r14d
|
||||
%define y2 r15d
|
||||
|
||||
|
||||
|
||||
_INP_END_SIZE equ 8
|
||||
_INP_SIZE equ 8
|
||||
_XFER_SIZE equ 8
|
||||
%ifdef LINUX
|
||||
_XMM_SAVE_SIZE equ 0
|
||||
%else
|
||||
_XMM_SAVE_SIZE equ 7*16
|
||||
%endif
|
||||
; STACK_SIZE plus pushes must be an odd multiple of 8
|
||||
_ALIGN_SIZE equ 8
|
||||
|
||||
_INP_END equ 0
|
||||
_INP equ _INP_END + _INP_END_SIZE
|
||||
_XFER equ _INP + _INP_SIZE
|
||||
_XMM_SAVE equ _XFER + _XFER_SIZE + _ALIGN_SIZE
|
||||
STACK_SIZE equ _XMM_SAVE + _XMM_SAVE_SIZE
|
||||
|
||||
; rotate_Xs
|
||||
; Rotate values of symbols X0...X3
|
||||
%macro rotate_Xs 0
|
||||
%xdefine X_ X0
|
||||
%xdefine X0 X1
|
||||
%xdefine X1 X2
|
||||
%xdefine X2 X3
|
||||
%xdefine X3 X_
|
||||
%endm
|
||||
|
||||
; ROTATE_ARGS
|
||||
; Rotate values of symbols a...h
|
||||
%macro ROTATE_ARGS 0
|
||||
%xdefine TMP_ h
|
||||
%xdefine h g
|
||||
%xdefine g f
|
||||
%xdefine f e
|
||||
%xdefine e d
|
||||
%xdefine d c
|
||||
%xdefine c b
|
||||
%xdefine b a
|
||||
%xdefine a TMP_
|
||||
%endm
|
||||
|
||||
%macro FOUR_ROUNDS_AND_SCHED 0
|
||||
;; compute s0 four at a time and s1 two at a time
|
||||
;; compute W[-16] + W[-7] 4 at a time
|
||||
movdqa XTMP0, X3
|
||||
mov y0, e ; y0 = e
|
||||
ror y0, (25-11) ; y0 = e >> (25-11)
|
||||
mov y1, a ; y1 = a
|
||||
palignr XTMP0, X2, 4 ; XTMP0 = W[-7]
|
||||
ror y1, (22-13) ; y1 = a >> (22-13)
|
||||
xor y0, e ; y0 = e ^ (e >> (25-11))
|
||||
mov y2, f ; y2 = f
|
||||
ror y0, (11-6) ; y0 = (e >> (11-6)) ^ (e >> (25-6))
|
||||
movdqa XTMP1, X1
|
||||
xor y1, a ; y1 = a ^ (a >> (22-13)
|
||||
xor y2, g ; y2 = f^g
|
||||
paddd XTMP0, X0 ; XTMP0 = W[-7] + W[-16]
|
||||
xor y0, e ; y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
|
||||
and y2, e ; y2 = (f^g)&e
|
||||
ror y1, (13-2) ; y1 = (a >> (13-2)) ^ (a >> (22-2))
|
||||
;; compute s0
|
||||
palignr XTMP1, X0, 4 ; XTMP1 = W[-15]
|
||||
xor y1, a ; y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
|
||||
ror y0, 6 ; y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
|
||||
xor y2, g ; y2 = CH = ((f^g)&e)^g
|
||||
movdqa XTMP2, XTMP1 ; XTMP2 = W[-15]
|
||||
ror y1, 2 ; y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
|
||||
add y2, y0 ; y2 = S1 + CH
|
||||
add y2, [rsp + _XFER + 0*4] ; y2 = k + w + S1 + CH
|
||||
movdqa XTMP3, XTMP1 ; XTMP3 = W[-15]
|
||||
mov y0, a ; y0 = a
|
||||
add h, y2 ; h = h + S1 + CH + k + w
|
||||
mov y2, a ; y2 = a
|
||||
pslld XTMP1, (32-7)
|
||||
or y0, c ; y0 = a|c
|
||||
add d, h ; d = d + h + S1 + CH + k + w
|
||||
and y2, c ; y2 = a&c
|
||||
psrld XTMP2, 7
|
||||
and y0, b ; y0 = (a|c)&b
|
||||
add h, y1 ; h = h + S1 + CH + k + w + S0
|
||||
por XTMP1, XTMP2 ; XTMP1 = W[-15] ror 7
|
||||
or y0, y2 ; y0 = MAJ = (a|c)&b)|(a&c)
|
||||
add h, y0 ; h = h + S1 + CH + k + w + S0 + MAJ
|
||||
|
||||
ROTATE_ARGS
|
||||
movdqa XTMP2, XTMP3 ; XTMP2 = W[-15]
|
||||
mov y0, e ; y0 = e
|
||||
mov y1, a ; y1 = a
|
||||
movdqa XTMP4, XTMP3 ; XTMP4 = W[-15]
|
||||
ror y0, (25-11) ; y0 = e >> (25-11)
|
||||
xor y0, e ; y0 = e ^ (e >> (25-11))
|
||||
mov y2, f ; y2 = f
|
||||
ror y1, (22-13) ; y1 = a >> (22-13)
|
||||
pslld XTMP3, (32-18)
|
||||
xor y1, a ; y1 = a ^ (a >> (22-13)
|
||||
ror y0, (11-6) ; y0 = (e >> (11-6)) ^ (e >> (25-6))
|
||||
xor y2, g ; y2 = f^g
|
||||
psrld XTMP2, 18
|
||||
ror y1, (13-2) ; y1 = (a >> (13-2)) ^ (a >> (22-2))
|
||||
xor y0, e ; y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
|
||||
and y2, e ; y2 = (f^g)&e
|
||||
ror y0, 6 ; y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
|
||||
pxor XTMP1, XTMP3
|
||||
xor y1, a ; y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
|
||||
xor y2, g ; y2 = CH = ((f^g)&e)^g
|
||||
psrld XTMP4, 3 ; XTMP4 = W[-15] >> 3
|
||||
add y2, y0 ; y2 = S1 + CH
|
||||
add y2, [rsp + _XFER + 1*4] ; y2 = k + w + S1 + CH
|
||||
ror y1, 2 ; y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
|
||||
pxor XTMP1, XTMP2 ; XTMP1 = W[-15] ror 7 ^ W[-15] ror 18
|
||||
mov y0, a ; y0 = a
|
||||
add h, y2 ; h = h + S1 + CH + k + w
|
||||
mov y2, a ; y2 = a
|
||||
pxor XTMP1, XTMP4 ; XTMP1 = s0
|
||||
or y0, c ; y0 = a|c
|
||||
add d, h ; d = d + h + S1 + CH + k + w
|
||||
and y2, c ; y2 = a&c
|
||||
;; compute low s1
|
||||
pshufd XTMP2, X3, 11111010b ; XTMP2 = W[-2] {BBAA}
|
||||
and y0, b ; y0 = (a|c)&b
|
||||
add h, y1 ; h = h + S1 + CH + k + w + S0
|
||||
paddd XTMP0, XTMP1 ; XTMP0 = W[-16] + W[-7] + s0
|
||||
or y0, y2 ; y0 = MAJ = (a|c)&b)|(a&c)
|
||||
add h, y0 ; h = h + S1 + CH + k + w + S0 + MAJ
|
||||
|
||||
ROTATE_ARGS
|
||||
movdqa XTMP3, XTMP2 ; XTMP3 = W[-2] {BBAA}
|
||||
mov y0, e ; y0 = e
|
||||
mov y1, a ; y1 = a
|
||||
ror y0, (25-11) ; y0 = e >> (25-11)
|
||||
movdqa XTMP4, XTMP2 ; XTMP4 = W[-2] {BBAA}
|
||||
xor y0, e ; y0 = e ^ (e >> (25-11))
|
||||
ror y1, (22-13) ; y1 = a >> (22-13)
|
||||
mov y2, f ; y2 = f
|
||||
xor y1, a ; y1 = a ^ (a >> (22-13)
|
||||
ror y0, (11-6) ; y0 = (e >> (11-6)) ^ (e >> (25-6))
|
||||
psrlq XTMP2, 17 ; XTMP2 = W[-2] ror 17 {xBxA}
|
||||
xor y2, g ; y2 = f^g
|
||||
psrlq XTMP3, 19 ; XTMP3 = W[-2] ror 19 {xBxA}
|
||||
xor y0, e ; y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
|
||||
and y2, e ; y2 = (f^g)&e
|
||||
psrld XTMP4, 10 ; XTMP4 = W[-2] >> 10 {BBAA}
|
||||
ror y1, (13-2) ; y1 = (a >> (13-2)) ^ (a >> (22-2))
|
||||
xor y1, a ; y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
|
||||
xor y2, g ; y2 = CH = ((f^g)&e)^g
|
||||
ror y0, 6 ; y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
|
||||
pxor XTMP2, XTMP3
|
||||
add y2, y0 ; y2 = S1 + CH
|
||||
ror y1, 2 ; y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
|
||||
add y2, [rsp + _XFER + 2*4] ; y2 = k + w + S1 + CH
|
||||
pxor XTMP4, XTMP2 ; XTMP4 = s1 {xBxA}
|
||||
mov y0, a ; y0 = a
|
||||
add h, y2 ; h = h + S1 + CH + k + w
|
||||
mov y2, a ; y2 = a
|
||||
pshufb XTMP4, SHUF_00BA ; XTMP4 = s1 {00BA}
|
||||
or y0, c ; y0 = a|c
|
||||
add d, h ; d = d + h + S1 + CH + k + w
|
||||
and y2, c ; y2 = a&c
|
||||
paddd XTMP0, XTMP4 ; XTMP0 = {..., ..., W[1], W[0]}
|
||||
and y0, b ; y0 = (a|c)&b
|
||||
add h, y1 ; h = h + S1 + CH + k + w + S0
|
||||
;; compute high s1
|
||||
pshufd XTMP2, XTMP0, 01010000b ; XTMP2 = W[-2] {DDCC}
|
||||
or y0, y2 ; y0 = MAJ = (a|c)&b)|(a&c)
|
||||
add h, y0 ; h = h + S1 + CH + k + w + S0 + MAJ
|
||||
|
||||
ROTATE_ARGS
|
||||
movdqa XTMP3, XTMP2 ; XTMP3 = W[-2] {DDCC}
|
||||
mov y0, e ; y0 = e
|
||||
ror y0, (25-11) ; y0 = e >> (25-11)
|
||||
mov y1, a ; y1 = a
|
||||
movdqa X0, XTMP2 ; X0 = W[-2] {DDCC}
|
||||
ror y1, (22-13) ; y1 = a >> (22-13)
|
||||
xor y0, e ; y0 = e ^ (e >> (25-11))
|
||||
mov y2, f ; y2 = f
|
||||
ror y0, (11-6) ; y0 = (e >> (11-6)) ^ (e >> (25-6))
|
||||
psrlq XTMP2, 17 ; XTMP2 = W[-2] ror 17 {xDxC}
|
||||
xor y1, a ; y1 = a ^ (a >> (22-13)
|
||||
xor y2, g ; y2 = f^g
|
||||
psrlq XTMP3, 19 ; XTMP3 = W[-2] ror 19 {xDxC}
|
||||
xor y0, e ; y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
|
||||
and y2, e ; y2 = (f^g)&e
|
||||
ror y1, (13-2) ; y1 = (a >> (13-2)) ^ (a >> (22-2))
|
||||
psrld X0, 10 ; X0 = W[-2] >> 10 {DDCC}
|
||||
xor y1, a ; y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
|
||||
ror y0, 6 ; y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
|
||||
xor y2, g ; y2 = CH = ((f^g)&e)^g
|
||||
pxor XTMP2, XTMP3
|
||||
ror y1, 2 ; y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
|
||||
add y2, y0 ; y2 = S1 + CH
|
||||
add y2, [rsp + _XFER + 3*4] ; y2 = k + w + S1 + CH
|
||||
pxor X0, XTMP2 ; X0 = s1 {xDxC}
|
||||
mov y0, a ; y0 = a
|
||||
add h, y2 ; h = h + S1 + CH + k + w
|
||||
mov y2, a ; y2 = a
|
||||
pshufb X0, SHUF_DC00 ; X0 = s1 {DC00}
|
||||
or y0, c ; y0 = a|c
|
||||
add d, h ; d = d + h + S1 + CH + k + w
|
||||
and y2, c ; y2 = a&c
|
||||
paddd X0, XTMP0 ; X0 = {W[3], W[2], W[1], W[0]}
|
||||
and y0, b ; y0 = (a|c)&b
|
||||
add h, y1 ; h = h + S1 + CH + k + w + S0
|
||||
or y0, y2 ; y0 = MAJ = (a|c)&b)|(a&c)
|
||||
add h, y0 ; h = h + S1 + CH + k + w + S0 + MAJ
|
||||
|
||||
ROTATE_ARGS
|
||||
rotate_Xs
|
||||
%endm
|
||||
|
||||
;; input is [rsp + _XFER + %1 * 4]
|
||||
%macro DO_ROUND 1
|
||||
mov y0, e ; y0 = e
|
||||
ror y0, (25-11) ; y0 = e >> (25-11)
|
||||
mov y1, a ; y1 = a
|
||||
xor y0, e ; y0 = e ^ (e >> (25-11))
|
||||
ror y1, (22-13) ; y1 = a >> (22-13)
|
||||
mov y2, f ; y2 = f
|
||||
xor y1, a ; y1 = a ^ (a >> (22-13)
|
||||
ror y0, (11-6) ; y0 = (e >> (11-6)) ^ (e >> (25-6))
|
||||
xor y2, g ; y2 = f^g
|
||||
xor y0, e ; y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
|
||||
ror y1, (13-2) ; y1 = (a >> (13-2)) ^ (a >> (22-2))
|
||||
and y2, e ; y2 = (f^g)&e
|
||||
xor y1, a ; y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
|
||||
ror y0, 6 ; y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
|
||||
xor y2, g ; y2 = CH = ((f^g)&e)^g
|
||||
add y2, y0 ; y2 = S1 + CH
|
||||
ror y1, 2 ; y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
|
||||
add y2, [rsp + _XFER + %1 * 4] ; y2 = k + w + S1 + CH
|
||||
mov y0, a ; y0 = a
|
||||
add h, y2 ; h = h + S1 + CH + k + w
|
||||
mov y2, a ; y2 = a
|
||||
or y0, c ; y0 = a|c
|
||||
add d, h ; d = d + h + S1 + CH + k + w
|
||||
and y2, c ; y2 = a&c
|
||||
and y0, b ; y0 = (a|c)&b
|
||||
add h, y1 ; h = h + S1 + CH + k + w + S0
|
||||
or y0, y2 ; y0 = MAJ = (a|c)&b)|(a&c)
|
||||
add h, y0 ; h = h + S1 + CH + k + w + S0 + MAJ
|
||||
ROTATE_ARGS
|
||||
%endm
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; void sha256_sse4(void *input_data, UINT32 digest[8], UINT64 num_blks)
|
||||
;; arg 1 : pointer to input data
|
||||
;; arg 2 : pointer to digest
|
||||
;; arg 3 : Num blocks
|
||||
section .text
|
||||
global sha256_sse4
|
||||
align 32
|
||||
sha256_sse4:
|
||||
push rbx
|
||||
%ifndef LINUX
|
||||
push rsi
|
||||
push rdi
|
||||
%endif
|
||||
push rbp
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
|
||||
sub rsp,STACK_SIZE
|
||||
%ifndef LINUX
|
||||
movdqa [rsp + _XMM_SAVE + 0*16],xmm6
|
||||
movdqa [rsp + _XMM_SAVE + 1*16],xmm7
|
||||
movdqa [rsp + _XMM_SAVE + 2*16],xmm8
|
||||
movdqa [rsp + _XMM_SAVE + 3*16],xmm9
|
||||
movdqa [rsp + _XMM_SAVE + 4*16],xmm10
|
||||
movdqa [rsp + _XMM_SAVE + 5*16],xmm11
|
||||
movdqa [rsp + _XMM_SAVE + 6*16],xmm12
|
||||
%endif
|
||||
|
||||
shl NUM_BLKS, 6 ; convert to bytes
|
||||
jz done_hash
|
||||
add NUM_BLKS, INP ; pointer to end of data
|
||||
mov [rsp + _INP_END], NUM_BLKS
|
||||
|
||||
;; load initial digest
|
||||
mov a,[4*0 + CTX]
|
||||
mov b,[4*1 + CTX]
|
||||
mov c,[4*2 + CTX]
|
||||
mov d,[4*3 + CTX]
|
||||
mov e,[4*4 + CTX]
|
||||
mov f,[4*5 + CTX]
|
||||
mov g,[4*6 + CTX]
|
||||
mov h,[4*7 + CTX]
|
||||
|
||||
movdqa BYTE_FLIP_MASK, [PSHUFFLE_BYTE_FLIP_MASK wrt rip]
|
||||
movdqa SHUF_00BA, [_SHUF_00BA wrt rip]
|
||||
movdqa SHUF_DC00, [_SHUF_DC00 wrt rip]
|
||||
|
||||
loop0:
|
||||
lea TBL,[K256 wrt rip]
|
||||
|
||||
;; byte swap first 16 dwords
|
||||
COPY_XMM_AND_BSWAP X0, [INP + 0*16], BYTE_FLIP_MASK
|
||||
COPY_XMM_AND_BSWAP X1, [INP + 1*16], BYTE_FLIP_MASK
|
||||
COPY_XMM_AND_BSWAP X2, [INP + 2*16], BYTE_FLIP_MASK
|
||||
COPY_XMM_AND_BSWAP X3, [INP + 3*16], BYTE_FLIP_MASK
|
||||
|
||||
mov [rsp + _INP], INP
|
||||
|
||||
;; schedule 48 input dwords, by doing 3 rounds of 16 each
|
||||
mov SRND, 3
|
||||
align 16
|
||||
loop1:
|
||||
movdqa XFER, [TBL + 0*16]
|
||||
paddd XFER, X0
|
||||
movdqa [rsp + _XFER], XFER
|
||||
FOUR_ROUNDS_AND_SCHED
|
||||
|
||||
movdqa XFER, [TBL + 1*16]
|
||||
paddd XFER, X0
|
||||
movdqa [rsp + _XFER], XFER
|
||||
FOUR_ROUNDS_AND_SCHED
|
||||
|
||||
movdqa XFER, [TBL + 2*16]
|
||||
paddd XFER, X0
|
||||
movdqa [rsp + _XFER], XFER
|
||||
FOUR_ROUNDS_AND_SCHED
|
||||
|
||||
movdqa XFER, [TBL + 3*16]
|
||||
paddd XFER, X0
|
||||
movdqa [rsp + _XFER], XFER
|
||||
add TBL, 4*16
|
||||
FOUR_ROUNDS_AND_SCHED
|
||||
|
||||
sub SRND, 1
|
||||
jne loop1
|
||||
|
||||
mov SRND, 2
|
||||
loop2:
|
||||
paddd X0, [TBL + 0*16]
|
||||
movdqa [rsp + _XFER], X0
|
||||
DO_ROUND 0
|
||||
DO_ROUND 1
|
||||
DO_ROUND 2
|
||||
DO_ROUND 3
|
||||
paddd X1, [TBL + 1*16]
|
||||
movdqa [rsp + _XFER], X1
|
||||
add TBL, 2*16
|
||||
DO_ROUND 0
|
||||
DO_ROUND 1
|
||||
DO_ROUND 2
|
||||
DO_ROUND 3
|
||||
|
||||
movdqa X0, X2
|
||||
movdqa X1, X3
|
||||
|
||||
sub SRND, 1
|
||||
jne loop2
|
||||
|
||||
addm [4*0 + CTX],a
|
||||
addm [4*1 + CTX],b
|
||||
addm [4*2 + CTX],c
|
||||
addm [4*3 + CTX],d
|
||||
addm [4*4 + CTX],e
|
||||
addm [4*5 + CTX],f
|
||||
addm [4*6 + CTX],g
|
||||
addm [4*7 + CTX],h
|
||||
|
||||
mov INP, [rsp + _INP]
|
||||
add INP, 64
|
||||
cmp INP, [rsp + _INP_END]
|
||||
jne loop0
|
||||
|
||||
done_hash:
|
||||
%ifndef LINUX
|
||||
movdqa xmm6,[rsp + _XMM_SAVE + 0*16]
|
||||
movdqa xmm7,[rsp + _XMM_SAVE + 1*16]
|
||||
movdqa xmm8,[rsp + _XMM_SAVE + 2*16]
|
||||
movdqa xmm9,[rsp + _XMM_SAVE + 3*16]
|
||||
movdqa xmm10,[rsp + _XMM_SAVE + 4*16]
|
||||
movdqa xmm11,[rsp + _XMM_SAVE + 5*16]
|
||||
movdqa xmm12,[rsp + _XMM_SAVE + 6*16]
|
||||
%endif
|
||||
|
||||
add rsp, STACK_SIZE
|
||||
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop rbp
|
||||
%ifndef LINUX
|
||||
pop rdi
|
||||
pop rsi
|
||||
%endif
|
||||
pop rbx
|
||||
|
||||
ret
|
||||
|
||||
|
||||
section .data
|
||||
align 64
|
||||
K256:
|
||||
dd 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
|
||||
dd 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
|
||||
dd 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
|
||||
dd 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
|
||||
dd 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
|
||||
dd 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
|
||||
dd 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
|
||||
dd 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
|
||||
dd 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
|
||||
dd 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
|
||||
dd 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
|
||||
dd 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
|
||||
dd 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
|
||||
dd 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
|
||||
dd 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
|
||||
dd 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
|
||||
|
||||
PSHUFFLE_BYTE_FLIP_MASK: ddq 0x0c0d0e0f08090a0b0405060700010203
|
||||
|
||||
; shuffle xBxA -> 00BA
|
||||
_SHUF_00BA: ddq 0xFFFFFFFFFFFFFFFF0b0a090803020100
|
||||
|
||||
; shuffle xDxC -> DC00
|
||||
_SHUF_DC00: ddq 0x0b0a090803020100FFFFFFFFFFFFFFFF
|
|
@ -36,7 +36,7 @@ void sha256_init(struct sha256_ctx *ctx)
|
|||
SHA256_Init(&ctx->c);
|
||||
}
|
||||
|
||||
void sha256_update_bytes(struct sha256_ctx *ctx, const void *p, size_t size)
|
||||
void sha256_update(struct sha256_ctx *ctx, const void *p, size_t size)
|
||||
{
|
||||
check_sha256(ctx);
|
||||
SHA256_Update(&ctx->c, p, size);
|
||||
|
|
1
ccan/ccan/htable/LICENSE
Symbolic link
1
ccan/ccan/htable/LICENSE
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../licenses/LGPL-2.1
|
116
ccan/ccan/htable/_info
Normal file
116
ccan/ccan/htable/_info
Normal file
|
@ -0,0 +1,116 @@
|
|||
#include "config.h"
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
/**
|
||||
* htable - hash table routines
|
||||
*
|
||||
* A hash table is an efficient structure for looking up keys. This version
|
||||
* grows with usage and allows efficient deletion.
|
||||
*
|
||||
* Example:
|
||||
* #include <ccan/htable/htable.h>
|
||||
* #include <ccan/hash/hash.h>
|
||||
* #include <stdio.h>
|
||||
* #include <err.h>
|
||||
* #include <string.h>
|
||||
*
|
||||
* struct name_to_digit {
|
||||
* const char *name;
|
||||
* unsigned int val;
|
||||
* };
|
||||
*
|
||||
* static struct name_to_digit map[] = {
|
||||
* { "zero", 0},
|
||||
* { "one", 1 },
|
||||
* { "two", 2 },
|
||||
* { "three", 3 },
|
||||
* { "four", 4 },
|
||||
* { "five", 5 },
|
||||
* { "six", 6 },
|
||||
* { "seven", 7 },
|
||||
* { "eight", 8 },
|
||||
* { "nine", 9 }
|
||||
* };
|
||||
*
|
||||
* // Wrapper for rehash function pointer.
|
||||
* static size_t rehash(const void *e, void *unused)
|
||||
* {
|
||||
* return hash_string(((struct name_to_digit *)e)->name);
|
||||
* }
|
||||
*
|
||||
* // Comparison function.
|
||||
* static bool streq(const void *e, void *string)
|
||||
* {
|
||||
* return strcmp(((struct name_to_digit *)e)->name, string) == 0;
|
||||
* }
|
||||
*
|
||||
* // We let them add their own aliases, eg. --alias=v=5
|
||||
* static void add_alias(struct htable *ht, const char *alias)
|
||||
* {
|
||||
* char *eq;
|
||||
* struct name_to_digit *n;
|
||||
*
|
||||
* n = malloc(sizeof(*n));
|
||||
* n->name = strdup(alias);
|
||||
*
|
||||
* eq = strchr(n->name, '=');
|
||||
* if (!eq || ((n->val = atoi(eq+1)) == 0 && !strcmp(eq+1, "0")))
|
||||
* errx(1, "Usage: --alias=<name>=<value>");
|
||||
* *eq = '\0';
|
||||
* htable_add(ht, hash_string(n->name), n);
|
||||
* }
|
||||
*
|
||||
* int main(int argc, char *argv[])
|
||||
* {
|
||||
* struct htable ht;
|
||||
* unsigned int i;
|
||||
* unsigned long val;
|
||||
*
|
||||
* if (argc < 2)
|
||||
* errx(1, "Usage: %s [--alias=<name>=<val>]... <str>...",
|
||||
* argv[0]);
|
||||
*
|
||||
* // Create and populate hash table.
|
||||
* htable_init(&ht, rehash, NULL);
|
||||
* for (i = 0; i < sizeof(map)/sizeof(map[0]); i++)
|
||||
* htable_add(&ht, hash_string(map[i].name), &map[i]);
|
||||
*
|
||||
* // Add any aliases to the hash table.
|
||||
* for (i = 1; i < argc; i++) {
|
||||
* if (!strncmp(argv[i], "--alias=", strlen("--alias=")))
|
||||
* add_alias(&ht, argv[i] + strlen("--alias="));
|
||||
* else
|
||||
* break;
|
||||
* }
|
||||
*
|
||||
* // Find the other args in the hash table.
|
||||
* for (val = 0; i < argc; i++) {
|
||||
* struct name_to_digit *n;
|
||||
* n = htable_get(&ht, hash_string(argv[i]),
|
||||
* streq, argv[i]);
|
||||
* if (!n)
|
||||
* errx(1, "Invalid digit name %s", argv[i]);
|
||||
* // Append it to the value we are building up.
|
||||
* val *= 10;
|
||||
* val += n->val;
|
||||
* }
|
||||
* printf("%lu\n", val);
|
||||
* return 0;
|
||||
* }
|
||||
*
|
||||
* License: LGPL (v2.1 or any later version)
|
||||
* Author: Rusty Russell <rusty@rustcorp.com.au>
|
||||
*/
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
if (argc != 2)
|
||||
return 1;
|
||||
|
||||
if (strcmp(argv[1], "depends") == 0) {
|
||||
printf("ccan/compiler\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
296
ccan/ccan/htable/htable.c
Normal file
296
ccan/ccan/htable/htable.c
Normal file
|
@ -0,0 +1,296 @@
|
|||
/* Licensed under LGPLv2+ - see LICENSE file for details */
|
||||
#include <ccan/htable/htable.h>
|
||||
#include <ccan/compiler/compiler.h>
|
||||
#include <stdlib.h>
|
||||
#include <limits.h>
|
||||
#include <stdbool.h>
|
||||
#include <assert.h>
|
||||
|
||||
/* We use 0x1 as deleted marker. */
|
||||
#define HTABLE_DELETED (0x1)
|
||||
|
||||
/* We clear out the bits which are always the same, and put metadata there. */
|
||||
static inline uintptr_t get_extra_ptr_bits(const struct htable *ht,
|
||||
uintptr_t e)
|
||||
{
|
||||
return e & ht->common_mask;
|
||||
}
|
||||
|
||||
static inline void *get_raw_ptr(const struct htable *ht, uintptr_t e)
|
||||
{
|
||||
return (void *)((e & ~ht->common_mask) | ht->common_bits);
|
||||
}
|
||||
|
||||
static inline uintptr_t make_hval(const struct htable *ht,
|
||||
const void *p, uintptr_t bits)
|
||||
{
|
||||
return ((uintptr_t)p & ~ht->common_mask) | bits;
|
||||
}
|
||||
|
||||
static inline bool entry_is_valid(uintptr_t e)
|
||||
{
|
||||
return e > HTABLE_DELETED;
|
||||
}
|
||||
|
||||
static inline uintptr_t get_hash_ptr_bits(const struct htable *ht,
|
||||
size_t hash)
|
||||
{
|
||||
/* Shuffling the extra bits (as specified in mask) down the
|
||||
* end is quite expensive. But the lower bits are redundant, so
|
||||
* we fold the value first. */
|
||||
return (hash ^ (hash >> ht->bits))
|
||||
& ht->common_mask & ~ht->perfect_bit;
|
||||
}
|
||||
|
||||
void htable_init(struct htable *ht,
|
||||
size_t (*rehash)(const void *elem, void *priv), void *priv)
|
||||
{
|
||||
struct htable empty = HTABLE_INITIALIZER(empty, NULL, NULL);
|
||||
*ht = empty;
|
||||
ht->rehash = rehash;
|
||||
ht->priv = priv;
|
||||
ht->table = &ht->perfect_bit;
|
||||
}
|
||||
|
||||
bool htable_init_sized(struct htable *ht,
|
||||
size_t (*rehash)(const void *, void *),
|
||||
void *priv, size_t expect)
|
||||
{
|
||||
htable_init(ht, rehash, priv);
|
||||
|
||||
/* Don't go insane with sizing. */
|
||||
for (ht->bits = 1; ((size_t)3 << ht->bits) / 4 < expect; ht->bits++) {
|
||||
if (ht->bits == 30)
|
||||
break;
|
||||
}
|
||||
|
||||
ht->table = calloc(1 << ht->bits, sizeof(size_t));
|
||||
if (!ht->table) {
|
||||
ht->table = &ht->perfect_bit;
|
||||
return false;
|
||||
}
|
||||
ht->max = ((size_t)3 << ht->bits) / 4;
|
||||
ht->max_with_deleted = ((size_t)9 << ht->bits) / 10;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void htable_clear(struct htable *ht)
|
||||
{
|
||||
if (ht->table != &ht->perfect_bit)
|
||||
free((void *)ht->table);
|
||||
htable_init(ht, ht->rehash, ht->priv);
|
||||
}
|
||||
|
||||
static size_t hash_bucket(const struct htable *ht, size_t h)
|
||||
{
|
||||
return h & ((1 << ht->bits)-1);
|
||||
}
|
||||
|
||||
static void *htable_val(const struct htable *ht,
|
||||
struct htable_iter *i, size_t hash, uintptr_t perfect)
|
||||
{
|
||||
uintptr_t h2 = get_hash_ptr_bits(ht, hash) | perfect;
|
||||
|
||||
while (ht->table[i->off]) {
|
||||
if (ht->table[i->off] != HTABLE_DELETED) {
|
||||
if (get_extra_ptr_bits(ht, ht->table[i->off]) == h2)
|
||||
return get_raw_ptr(ht, ht->table[i->off]);
|
||||
}
|
||||
i->off = (i->off + 1) & ((1 << ht->bits)-1);
|
||||
h2 &= ~perfect;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void *htable_firstval(const struct htable *ht,
|
||||
struct htable_iter *i, size_t hash)
|
||||
{
|
||||
i->off = hash_bucket(ht, hash);
|
||||
return htable_val(ht, i, hash, ht->perfect_bit);
|
||||
}
|
||||
|
||||
void *htable_nextval(const struct htable *ht,
|
||||
struct htable_iter *i, size_t hash)
|
||||
{
|
||||
i->off = (i->off + 1) & ((1 << ht->bits)-1);
|
||||
return htable_val(ht, i, hash, 0);
|
||||
}
|
||||
|
||||
void *htable_first(const struct htable *ht, struct htable_iter *i)
|
||||
{
|
||||
for (i->off = 0; i->off < (size_t)1 << ht->bits; i->off++) {
|
||||
if (entry_is_valid(ht->table[i->off]))
|
||||
return get_raw_ptr(ht, ht->table[i->off]);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void *htable_next(const struct htable *ht, struct htable_iter *i)
|
||||
{
|
||||
for (i->off++; i->off < (size_t)1 << ht->bits; i->off++) {
|
||||
if (entry_is_valid(ht->table[i->off]))
|
||||
return get_raw_ptr(ht, ht->table[i->off]);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* This does not expand the hash table, that's up to caller. */
|
||||
static void ht_add(struct htable *ht, const void *new, size_t h)
|
||||
{
|
||||
size_t i;
|
||||
uintptr_t perfect = ht->perfect_bit;
|
||||
|
||||
i = hash_bucket(ht, h);
|
||||
|
||||
while (entry_is_valid(ht->table[i])) {
|
||||
perfect = 0;
|
||||
i = (i + 1) & ((1 << ht->bits)-1);
|
||||
}
|
||||
ht->table[i] = make_hval(ht, new, get_hash_ptr_bits(ht, h)|perfect);
|
||||
}
|
||||
|
||||
static COLD bool double_table(struct htable *ht)
|
||||
{
|
||||
unsigned int i;
|
||||
size_t oldnum = (size_t)1 << ht->bits;
|
||||
uintptr_t *oldtable, e;
|
||||
|
||||
oldtable = ht->table;
|
||||
ht->table = calloc(1 << (ht->bits+1), sizeof(size_t));
|
||||
if (!ht->table) {
|
||||
ht->table = oldtable;
|
||||
return false;
|
||||
}
|
||||
ht->bits++;
|
||||
ht->max = ((size_t)3 << ht->bits) / 4;
|
||||
ht->max_with_deleted = ((size_t)9 << ht->bits) / 10;
|
||||
|
||||
/* If we lost our "perfect bit", get it back now. */
|
||||
if (!ht->perfect_bit && ht->common_mask) {
|
||||
for (i = 0; i < sizeof(ht->common_mask) * CHAR_BIT; i++) {
|
||||
if (ht->common_mask & ((size_t)1 << i)) {
|
||||
ht->perfect_bit = (size_t)1 << i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (oldtable != &ht->perfect_bit) {
|
||||
for (i = 0; i < oldnum; i++) {
|
||||
if (entry_is_valid(e = oldtable[i])) {
|
||||
void *p = get_raw_ptr(ht, e);
|
||||
ht_add(ht, p, ht->rehash(p, ht->priv));
|
||||
}
|
||||
}
|
||||
free(oldtable);
|
||||
}
|
||||
ht->deleted = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
static COLD void rehash_table(struct htable *ht)
|
||||
{
|
||||
size_t start, i;
|
||||
uintptr_t e;
|
||||
|
||||
/* Beware wrap cases: we need to start from first empty bucket. */
|
||||
for (start = 0; ht->table[start]; start++);
|
||||
|
||||
for (i = 0; i < (size_t)1 << ht->bits; i++) {
|
||||
size_t h = (i + start) & ((1 << ht->bits)-1);
|
||||
e = ht->table[h];
|
||||
if (!e)
|
||||
continue;
|
||||
if (e == HTABLE_DELETED)
|
||||
ht->table[h] = 0;
|
||||
else if (!(e & ht->perfect_bit)) {
|
||||
void *p = get_raw_ptr(ht, e);
|
||||
ht->table[h] = 0;
|
||||
ht_add(ht, p, ht->rehash(p, ht->priv));
|
||||
}
|
||||
}
|
||||
ht->deleted = 0;
|
||||
}
|
||||
|
||||
/* We stole some bits, now we need to put them back... */
|
||||
static COLD void update_common(struct htable *ht, const void *p)
|
||||
{
|
||||
unsigned int i;
|
||||
uintptr_t maskdiff, bitsdiff;
|
||||
|
||||
if (ht->elems == 0) {
|
||||
/* Always reveal one bit of the pointer in the bucket,
|
||||
* so it's not zero or HTABLE_DELETED (1), even if
|
||||
* hash happens to be 0. Assumes (void *)1 is not a
|
||||
* valid pointer. */
|
||||
for (i = sizeof(uintptr_t)*CHAR_BIT - 1; i > 0; i--) {
|
||||
if ((uintptr_t)p & ((uintptr_t)1 << i))
|
||||
break;
|
||||
}
|
||||
|
||||
ht->common_mask = ~((uintptr_t)1 << i);
|
||||
ht->common_bits = ((uintptr_t)p & ht->common_mask);
|
||||
ht->perfect_bit = 1;
|
||||
return;
|
||||
}
|
||||
|
||||
/* Find bits which are unequal to old common set. */
|
||||
maskdiff = ht->common_bits ^ ((uintptr_t)p & ht->common_mask);
|
||||
|
||||
/* These are the bits which go there in existing entries. */
|
||||
bitsdiff = ht->common_bits & maskdiff;
|
||||
|
||||
for (i = 0; i < (size_t)1 << ht->bits; i++) {
|
||||
if (!entry_is_valid(ht->table[i]))
|
||||
continue;
|
||||
/* Clear the bits no longer in the mask, set them as
|
||||
* expected. */
|
||||
ht->table[i] &= ~maskdiff;
|
||||
ht->table[i] |= bitsdiff;
|
||||
}
|
||||
|
||||
/* Take away those bits from our mask, bits and perfect bit. */
|
||||
ht->common_mask &= ~maskdiff;
|
||||
ht->common_bits &= ~maskdiff;
|
||||
ht->perfect_bit &= ~maskdiff;
|
||||
}
|
||||
|
||||
bool htable_add(struct htable *ht, size_t hash, const void *p)
|
||||
{
|
||||
if (ht->elems+1 > ht->max && !double_table(ht))
|
||||
return false;
|
||||
if (ht->elems+1 + ht->deleted > ht->max_with_deleted)
|
||||
rehash_table(ht);
|
||||
assert(p);
|
||||
if (((uintptr_t)p & ht->common_mask) != ht->common_bits)
|
||||
update_common(ht, p);
|
||||
|
||||
ht_add(ht, p, hash);
|
||||
ht->elems++;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool htable_del(struct htable *ht, size_t h, const void *p)
|
||||
{
|
||||
struct htable_iter i;
|
||||
void *c;
|
||||
|
||||
for (c = htable_firstval(ht,&i,h); c; c = htable_nextval(ht,&i,h)) {
|
||||
if (c == p) {
|
||||
htable_delval(ht, &i);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void htable_delval(struct htable *ht, struct htable_iter *i)
|
||||
{
|
||||
assert(i->off < (size_t)1 << ht->bits);
|
||||
assert(entry_is_valid(ht->table[i->off]));
|
||||
|
||||
ht->elems--;
|
||||
ht->table[i->off] = HTABLE_DELETED;
|
||||
ht->deleted++;
|
||||
}
|
191
ccan/ccan/htable/htable.h
Normal file
191
ccan/ccan/htable/htable.h
Normal file
|
@ -0,0 +1,191 @@
|
|||
/* Licensed under LGPLv2+ - see LICENSE file for details */
|
||||
#ifndef CCAN_HTABLE_H
|
||||
#define CCAN_HTABLE_H
|
||||
#include "config.h"
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
/**
|
||||
* struct htable - private definition of a htable.
|
||||
*
|
||||
* It's exposed here so you can put it in your structures and so we can
|
||||
* supply inline functions.
|
||||
*/
|
||||
struct htable {
|
||||
size_t (*rehash)(const void *elem, void *priv);
|
||||
void *priv;
|
||||
unsigned int bits;
|
||||
size_t elems, deleted, max, max_with_deleted;
|
||||
/* These are the bits which are the same in all pointers. */
|
||||
uintptr_t common_mask, common_bits;
|
||||
uintptr_t perfect_bit;
|
||||
uintptr_t *table;
|
||||
};
|
||||
|
||||
/**
|
||||
* HTABLE_INITIALIZER - static initialization for a hash table.
|
||||
* @name: name of this htable.
|
||||
* @rehash: hash function to use for rehashing.
|
||||
* @priv: private argument to @rehash function.
|
||||
*
|
||||
* This is useful for setting up static and global hash tables.
|
||||
*
|
||||
* Example:
|
||||
* // For simplicity's sake, say hash value is contents of elem.
|
||||
* static size_t rehash(const void *elem, void *unused)
|
||||
* {
|
||||
* return *(size_t *)elem;
|
||||
* }
|
||||
* static struct htable ht = HTABLE_INITIALIZER(ht, rehash, NULL);
|
||||
*/
|
||||
#define HTABLE_INITIALIZER(name, rehash, priv) \
|
||||
{ rehash, priv, 0, 0, 0, 0, 0, -1, 0, 0, &name.perfect_bit }
|
||||
|
||||
/**
|
||||
* htable_init - initialize an empty hash table.
|
||||
* @ht: the hash table to initialize
|
||||
* @rehash: hash function to use for rehashing.
|
||||
* @priv: private argument to @rehash function.
|
||||
*/
|
||||
void htable_init(struct htable *ht,
|
||||
size_t (*rehash)(const void *elem, void *priv), void *priv);
|
||||
|
||||
/**
|
||||
* htable_init_sized - initialize an empty hash table of given size.
|
||||
* @ht: the hash table to initialize
|
||||
* @rehash: hash function to use for rehashing.
|
||||
* @priv: private argument to @rehash function.
|
||||
* @size: the number of element.
|
||||
*
|
||||
* If this returns false, @ht is still usable, but may need to do reallocation
|
||||
* upon an add. If this returns true, it will not need to reallocate within
|
||||
* @size htable_adds.
|
||||
*/
|
||||
bool htable_init_sized(struct htable *ht,
|
||||
size_t (*rehash)(const void *elem, void *priv),
|
||||
void *priv, size_t size);
|
||||
|
||||
/**
|
||||
* htable_clear - empty a hash table.
|
||||
* @ht: the hash table to clear
|
||||
*
|
||||
* This doesn't do anything to any pointers left in it.
|
||||
*/
|
||||
void htable_clear(struct htable *ht);
|
||||
|
||||
/**
|
||||
* htable_rehash - use a hashtree's rehash function
|
||||
* @elem: the argument to rehash()
|
||||
*
|
||||
*/
|
||||
size_t htable_rehash(const void *elem);
|
||||
|
||||
/**
|
||||
* htable_add - add a pointer into a hash table.
|
||||
* @ht: the htable
|
||||
* @hash: the hash value of the object
|
||||
* @p: the non-NULL pointer
|
||||
*
|
||||
* Also note that this can only fail due to allocation failure. Otherwise, it
|
||||
* returns true.
|
||||
*/
|
||||
bool htable_add(struct htable *ht, size_t hash, const void *p);
|
||||
|
||||
/**
|
||||
* htable_del - remove a pointer from a hash table
|
||||
* @ht: the htable
|
||||
* @hash: the hash value of the object
|
||||
* @p: the pointer
|
||||
*
|
||||
* Returns true if the pointer was found (and deleted).
|
||||
*/
|
||||
bool htable_del(struct htable *ht, size_t hash, const void *p);
|
||||
|
||||
/**
|
||||
* struct htable_iter - iterator or htable_first or htable_firstval etc.
|
||||
*
|
||||
* This refers to a location inside the hashtable.
|
||||
*/
|
||||
struct htable_iter {
|
||||
size_t off;
|
||||
};
|
||||
|
||||
/**
|
||||
* htable_firstval - find a candidate for a given hash value
|
||||
* @htable: the hashtable
|
||||
* @i: the struct htable_iter to initialize
|
||||
* @hash: the hash value
|
||||
*
|
||||
* You'll need to check the value is what you want; returns NULL if none.
|
||||
* See Also:
|
||||
* htable_delval()
|
||||
*/
|
||||
void *htable_firstval(const struct htable *htable,
|
||||
struct htable_iter *i, size_t hash);
|
||||
|
||||
/**
|
||||
* htable_nextval - find another candidate for a given hash value
|
||||
* @htable: the hashtable
|
||||
* @i: the struct htable_iter to initialize
|
||||
* @hash: the hash value
|
||||
*
|
||||
* You'll need to check the value is what you want; returns NULL if no more.
|
||||
*/
|
||||
void *htable_nextval(const struct htable *htable,
|
||||
struct htable_iter *i, size_t hash);
|
||||
|
||||
/**
|
||||
* htable_get - find an entry in the hash table
|
||||
* @ht: the hashtable
|
||||
* @h: the hash value of the entry
|
||||
* @cmp: the comparison function
|
||||
* @ptr: the pointer to hand to the comparison function.
|
||||
*
|
||||
* Convenient inline wrapper for htable_firstval/htable_nextval loop.
|
||||
*/
|
||||
static inline void *htable_get(const struct htable *ht,
|
||||
size_t h,
|
||||
bool (*cmp)(const void *candidate, void *ptr),
|
||||
const void *ptr)
|
||||
{
|
||||
struct htable_iter i;
|
||||
void *c;
|
||||
|
||||
for (c = htable_firstval(ht,&i,h); c; c = htable_nextval(ht,&i,h)) {
|
||||
if (cmp(c, (void *)ptr))
|
||||
return c;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* htable_first - find an entry in the hash table
|
||||
* @ht: the hashtable
|
||||
* @i: the struct htable_iter to initialize
|
||||
*
|
||||
* Get an entry in the hashtable; NULL if empty.
|
||||
*/
|
||||
void *htable_first(const struct htable *htable, struct htable_iter *i);
|
||||
|
||||
/**
|
||||
* htable_next - find another entry in the hash table
|
||||
* @ht: the hashtable
|
||||
* @i: the struct htable_iter to use
|
||||
*
|
||||
* Get another entry in the hashtable; NULL if all done.
|
||||
* This is usually used after htable_first or prior non-NULL htable_next.
|
||||
*/
|
||||
void *htable_next(const struct htable *htable, struct htable_iter *i);
|
||||
|
||||
/**
|
||||
* htable_delval - remove an iterated pointer from a hash table
|
||||
* @ht: the htable
|
||||
* @i: the htable_iter
|
||||
*
|
||||
* Usually used to delete a hash entry after it has been found with
|
||||
* htable_firstval etc.
|
||||
*/
|
||||
void htable_delval(struct htable *ht, struct htable_iter *i);
|
||||
|
||||
#endif /* CCAN_HTABLE_H */
|
108
ccan/ccan/htable/htable_type.h
Normal file
108
ccan/ccan/htable/htable_type.h
Normal file
|
@ -0,0 +1,108 @@
|
|||
/* Licensed under LGPLv2+ - see LICENSE file for details */
|
||||
#ifndef CCAN_HTABLE_TYPE_H
|
||||
#define CCAN_HTABLE_TYPE_H
|
||||
#include <ccan/htable/htable.h>
|
||||
#include "config.h"
|
||||
|
||||
/**
|
||||
* HTABLE_DEFINE_TYPE - create a set of htable ops for a type
|
||||
* @type: a type whose pointers will be values in the hash.
|
||||
* @keyof: a function/macro to extract a key: <keytype> @keyof(const type *elem)
|
||||
* @hashfn: a hash function for a @key: size_t @hashfn(const <keytype> *)
|
||||
* @eqfn: an equality function keys: bool @eqfn(const type *, const <keytype> *)
|
||||
* @prefix: a prefix for all the functions to define (of form <name>_*)
|
||||
*
|
||||
* NULL values may not be placed into the hash table.
|
||||
*
|
||||
* This defines the type hashtable type and an iterator type:
|
||||
* struct <name>;
|
||||
* struct <name>_iter;
|
||||
*
|
||||
* It also defines initialization and freeing functions:
|
||||
* void <name>_init(struct <name> *);
|
||||
* void <name>_init_sized(struct <name> *, size_t);
|
||||
* void <name>_clear(struct <name> *);
|
||||
*
|
||||
* Add function only fails if we run out of memory:
|
||||
* bool <name>_add(struct <name> *ht, const <type> *e);
|
||||
*
|
||||
* Delete and delete-by key return true if it was in the set:
|
||||
* bool <name>_del(struct <name> *ht, const <type> *e);
|
||||
* bool <name>_delkey(struct <name> *ht, const <keytype> *k);
|
||||
*
|
||||
* Find function return the matching element, or NULL:
|
||||
* type *<name>_get(const struct @name *ht, const <keytype> *k);
|
||||
*
|
||||
* Iteration over hashtable is also supported:
|
||||
* type *<name>_first(const struct <name> *ht, struct <name>_iter *i);
|
||||
* type *<name>_next(const struct <name> *ht, struct <name>_iter *i);
|
||||
*
|
||||
* It's currently safe to iterate over a changing hashtable, but you might
|
||||
* miss an element. Iteration isn't very efficient, either.
|
||||
*
|
||||
* You can use HTABLE_INITIALIZER like so:
|
||||
* struct <name> ht = { HTABLE_INITIALIZER(ht.raw, <name>_hash, NULL) };
|
||||
*/
|
||||
#define HTABLE_DEFINE_TYPE(type, keyof, hashfn, eqfn, name) \
|
||||
struct name { struct htable raw; }; \
|
||||
struct name##_iter { struct htable_iter i; }; \
|
||||
static inline size_t name##_hash(const void *elem, void *priv) \
|
||||
{ \
|
||||
return hashfn(keyof((const type *)elem)); \
|
||||
} \
|
||||
static inline void name##_init(struct name *ht) \
|
||||
{ \
|
||||
htable_init(&ht->raw, name##_hash, NULL); \
|
||||
} \
|
||||
static inline void name##_init_sized(struct name *ht, size_t s) \
|
||||
{ \
|
||||
htable_init_sized(&ht->raw, name##_hash, NULL, s); \
|
||||
} \
|
||||
static inline void name##_clear(struct name *ht) \
|
||||
{ \
|
||||
htable_clear(&ht->raw); \
|
||||
} \
|
||||
static inline bool name##_add(struct name *ht, const type *elem) \
|
||||
{ \
|
||||
return htable_add(&ht->raw, hashfn(keyof(elem)), elem); \
|
||||
} \
|
||||
static inline bool name##_del(struct name *ht, const type *elem) \
|
||||
{ \
|
||||
return htable_del(&ht->raw, hashfn(keyof(elem)), elem); \
|
||||
} \
|
||||
static inline type *name##_get(const struct name *ht, \
|
||||
const HTABLE_KTYPE(keyof) k) \
|
||||
{ \
|
||||
/* Typecheck for eqfn */ \
|
||||
(void)sizeof(eqfn((const type *)NULL, \
|
||||
keyof((const type *)NULL))); \
|
||||
return htable_get(&ht->raw, \
|
||||
hashfn(k), \
|
||||
(bool (*)(const void *, void *))(eqfn), \
|
||||
k); \
|
||||
} \
|
||||
static inline bool name##_delkey(struct name *ht, \
|
||||
const HTABLE_KTYPE(keyof) k) \
|
||||
{ \
|
||||
type *elem = name##_get(ht, k); \
|
||||
if (elem) \
|
||||
return name##_del(ht, elem); \
|
||||
return false; \
|
||||
} \
|
||||
static inline type *name##_first(const struct name *ht, \
|
||||
struct name##_iter *iter) \
|
||||
{ \
|
||||
return htable_first(&ht->raw, &iter->i); \
|
||||
} \
|
||||
static inline type *name##_next(const struct name *ht, \
|
||||
struct name##_iter *iter) \
|
||||
{ \
|
||||
return htable_next(&ht->raw, &iter->i); \
|
||||
}
|
||||
|
||||
#if HAVE_TYPEOF
|
||||
#define HTABLE_KTYPE(keyof) typeof(keyof(NULL))
|
||||
#else
|
||||
#define HTABLE_KTYPE(keyof) void *
|
||||
#endif
|
||||
#endif /* CCAN_HTABLE_TYPE_H */
|
36
ccan/ccan/htable/test/run-size.c
Normal file
36
ccan/ccan/htable/test/run-size.c
Normal file
|
@ -0,0 +1,36 @@
|
|||
#include <ccan/htable/htable.h>
|
||||
#include <ccan/htable/htable.c>
|
||||
#include <ccan/tap/tap.h>
|
||||
#include <stdbool.h>
|
||||
#include <string.h>
|
||||
|
||||
#define NUM_VALS 512
|
||||
|
||||
/* We use the number divided by two as the hash (for lots of
|
||||
collisions). */
|
||||
static size_t hash(const void *elem, void *unused)
|
||||
{
|
||||
size_t h = *(uint64_t *)elem / 2;
|
||||
return h;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
struct htable ht;
|
||||
uint64_t val[NUM_VALS];
|
||||
unsigned int i;
|
||||
|
||||
plan_tests((NUM_VALS) * 2);
|
||||
for (i = 0; i < NUM_VALS; i++)
|
||||
val[i] = i;
|
||||
|
||||
htable_init(&ht, hash, NULL);
|
||||
for (i = 0; i < NUM_VALS; i++) {
|
||||
ok1(ht.max >= i);
|
||||
ok1(ht.max <= i * 2);
|
||||
htable_add(&ht, hash(&val[i], NULL), &val[i]);
|
||||
}
|
||||
htable_clear(&ht);
|
||||
|
||||
return exit_status();
|
||||
}
|
175
ccan/ccan/htable/test/run-type.c
Normal file
175
ccan/ccan/htable/test/run-type.c
Normal file
|
@ -0,0 +1,175 @@
|
|||
#include <ccan/htable/htable_type.h>
|
||||
#include <ccan/htable/htable.c>
|
||||
#include <ccan/tap/tap.h>
|
||||
#include <stdbool.h>
|
||||
#include <string.h>
|
||||
|
||||
#define NUM_BITS 7
|
||||
#define NUM_VALS (1 << NUM_BITS)
|
||||
|
||||
struct obj {
|
||||
/* Makes sure we don't try to treat and obj as a key or vice versa */
|
||||
unsigned char unused;
|
||||
unsigned int key;
|
||||
};
|
||||
|
||||
static const unsigned int *objkey(const struct obj *obj)
|
||||
{
|
||||
return &obj->key;
|
||||
}
|
||||
|
||||
/* We use the number divided by two as the hash (for lots of
|
||||
collisions), plus set all the higher bits so we can detect if they
|
||||
don't get masked out. */
|
||||
static size_t objhash(const unsigned int *key)
|
||||
{
|
||||
size_t h = *key / 2;
|
||||
h |= -1UL << NUM_BITS;
|
||||
return h;
|
||||
}
|
||||
|
||||
static bool cmp(const struct obj *obj, const unsigned int *key)
|
||||
{
|
||||
return obj->key == *key;
|
||||
}
|
||||
|
||||
HTABLE_DEFINE_TYPE(struct obj, objkey, objhash, cmp, htable_obj);
|
||||
|
||||
static void add_vals(struct htable_obj *ht,
|
||||
struct obj val[], unsigned int num)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < num; i++) {
|
||||
if (htable_obj_get(ht, &i)) {
|
||||
fail("%u already in hash", i);
|
||||
return;
|
||||
}
|
||||
htable_obj_add(ht, &val[i]);
|
||||
if (htable_obj_get(ht, &i) != &val[i]) {
|
||||
fail("%u not added to hash", i);
|
||||
return;
|
||||
}
|
||||
}
|
||||
pass("Added %u numbers to hash", i);
|
||||
}
|
||||
|
||||
static void find_vals(const struct htable_obj *ht,
|
||||
const struct obj val[], unsigned int num)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < num; i++) {
|
||||
if (htable_obj_get(ht, &i) != &val[i]) {
|
||||
fail("%u not found in hash", i);
|
||||
return;
|
||||
}
|
||||
}
|
||||
pass("Found %u numbers in hash", i);
|
||||
}
|
||||
|
||||
static void del_vals(struct htable_obj *ht,
|
||||
const struct obj val[], unsigned int num)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < num; i++) {
|
||||
if (!htable_obj_delkey(ht, &val[i].key)) {
|
||||
fail("%u not deleted from hash", i);
|
||||
return;
|
||||
}
|
||||
}
|
||||
pass("Deleted %u numbers in hash", i);
|
||||
}
|
||||
|
||||
static void del_vals_bykey(struct htable_obj *ht,
|
||||
const struct obj val[], unsigned int num)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < num; i++) {
|
||||
if (!htable_obj_delkey(ht, &i)) {
|
||||
fail("%u not deleted by key from hash", i);
|
||||
return;
|
||||
}
|
||||
}
|
||||
pass("Deleted %u numbers by key from hash", i);
|
||||
}
|
||||
|
||||
static bool check_mask(struct htable *ht, const struct obj val[], unsigned num)
|
||||
{
|
||||
uint64_t i;
|
||||
|
||||
for (i = 0; i < num; i++) {
|
||||
if (((uintptr_t)&val[i] & ht->common_mask) != ht->common_bits)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
unsigned int i;
|
||||
struct htable_obj ht;
|
||||
struct obj val[NUM_VALS];
|
||||
unsigned int dne;
|
||||
void *p;
|
||||
struct htable_obj_iter iter;
|
||||
|
||||
plan_tests(20);
|
||||
for (i = 0; i < NUM_VALS; i++)
|
||||
val[i].key = i;
|
||||
dne = i;
|
||||
|
||||
htable_obj_init(&ht);
|
||||
ok1(ht.raw.max == 0);
|
||||
ok1(ht.raw.bits == 0);
|
||||
|
||||
/* We cannot find an entry which doesn't exist. */
|
||||
ok1(!htable_obj_get(&ht, &dne));
|
||||
|
||||
/* Fill it, it should increase in size. */
|
||||
add_vals(&ht, val, NUM_VALS);
|
||||
ok1(ht.raw.bits == NUM_BITS + 1);
|
||||
ok1(ht.raw.max < (1 << ht.raw.bits));
|
||||
|
||||
/* Mask should be set. */
|
||||
ok1(ht.raw.common_mask != 0);
|
||||
ok1(ht.raw.common_mask != -1);
|
||||
ok1(check_mask(&ht.raw, val, NUM_VALS));
|
||||
|
||||
/* Find all. */
|
||||
find_vals(&ht, val, NUM_VALS);
|
||||
ok1(!htable_obj_get(&ht, &dne));
|
||||
|
||||
/* Walk once, should get them all. */
|
||||
i = 0;
|
||||
for (p = htable_obj_first(&ht,&iter); p; p = htable_obj_next(&ht, &iter))
|
||||
i++;
|
||||
ok1(i == NUM_VALS);
|
||||
|
||||
/* Delete all. */
|
||||
del_vals(&ht, val, NUM_VALS);
|
||||
ok1(!htable_obj_get(&ht, &val[0].key));
|
||||
|
||||
/* Worst case, a "pointer" which doesn't have any matching bits. */
|
||||
htable_add(&ht.raw, 0, (void *)~(uintptr_t)&val[NUM_VALS-1]);
|
||||
htable_obj_add(&ht, &val[NUM_VALS-1]);
|
||||
ok1(ht.raw.common_mask == 0);
|
||||
ok1(ht.raw.common_bits == 0);
|
||||
/* Delete the bogus one before we trip over it. */
|
||||
htable_del(&ht.raw, 0, (void *)~(uintptr_t)&val[NUM_VALS-1]);
|
||||
|
||||
/* Add the rest. */
|
||||
add_vals(&ht, val, NUM_VALS-1);
|
||||
|
||||
/* Check we can find them all. */
|
||||
find_vals(&ht, val, NUM_VALS);
|
||||
ok1(!htable_obj_get(&ht, &dne));
|
||||
|
||||
/* Delete them all by key. */
|
||||
del_vals_bykey(&ht, val, NUM_VALS);
|
||||
htable_obj_clear(&ht);
|
||||
|
||||
return exit_status();
|
||||
}
|
61
ccan/ccan/htable/test/run-zero-hash-first-entry.c
Normal file
61
ccan/ccan/htable/test/run-zero-hash-first-entry.c
Normal file
|
@ -0,0 +1,61 @@
|
|||
#include <ccan/htable/htable.h>
|
||||
#include <ccan/htable/htable.c>
|
||||
#include <ccan/tap/tap.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
struct data {
|
||||
size_t key;
|
||||
};
|
||||
|
||||
/* Hash is simply key itself. */
|
||||
static size_t hash(const void *e, void *unused)
|
||||
{
|
||||
struct data *d = (struct data *)e;
|
||||
|
||||
return d->key;
|
||||
}
|
||||
|
||||
static bool eq(const void *e, void *k)
|
||||
{
|
||||
struct data *d = (struct data *)e;
|
||||
size_t *key = (size_t *)k;
|
||||
|
||||
return (d->key == *key);
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
struct htable table;
|
||||
struct data *d0, *d1;
|
||||
|
||||
plan_tests(6);
|
||||
|
||||
d1 = malloc(sizeof(struct data));
|
||||
d1->key = 1;
|
||||
d0 = malloc(sizeof(struct data));
|
||||
d0->key = 0;
|
||||
|
||||
htable_init(&table, hash, NULL);
|
||||
|
||||
htable_add(&table, d0->key, d0);
|
||||
htable_add(&table, d1->key, d1);
|
||||
|
||||
ok1(table.elems == 2);
|
||||
ok1(htable_get(&table, 1, eq, &d1->key) == d1);
|
||||
ok1(htable_get(&table, 0, eq, &d0->key) == d0);
|
||||
htable_clear(&table);
|
||||
|
||||
/* Now add in reverse order, should still be OK. */
|
||||
htable_add(&table, d1->key, d1);
|
||||
htable_add(&table, d0->key, d0);
|
||||
|
||||
ok1(table.elems == 2);
|
||||
ok1(htable_get(&table, 1, eq, &d1->key) == d1);
|
||||
ok1(htable_get(&table, 0, eq, &d0->key) == d0);
|
||||
htable_clear(&table);
|
||||
|
||||
free(d0);
|
||||
free(d1);
|
||||
return exit_status();
|
||||
}
|
||||
|
207
ccan/ccan/htable/test/run.c
Normal file
207
ccan/ccan/htable/test/run.c
Normal file
|
@ -0,0 +1,207 @@
|
|||
#include <ccan/htable/htable.h>
|
||||
#include <ccan/htable/htable.c>
|
||||
#include <ccan/tap/tap.h>
|
||||
#include <stdbool.h>
|
||||
#include <string.h>
|
||||
|
||||
#define NUM_BITS 7
|
||||
#define NUM_VALS (1 << NUM_BITS)
|
||||
|
||||
/* We use the number divided by two as the hash (for lots of
|
||||
collisions), plus set all the higher bits so we can detect if they
|
||||
don't get masked out. */
|
||||
static size_t hash(const void *elem, void *unused)
|
||||
{
|
||||
size_t h = *(uint64_t *)elem / 2;
|
||||
h |= -1UL << NUM_BITS;
|
||||
return h;
|
||||
}
|
||||
|
||||
static bool objcmp(const void *htelem, void *cmpdata)
|
||||
{
|
||||
return *(uint64_t *)htelem == *(uint64_t *)cmpdata;
|
||||
}
|
||||
|
||||
static void add_vals(struct htable *ht,
|
||||
const uint64_t val[],
|
||||
unsigned int off, unsigned int num)
|
||||
{
|
||||
uint64_t i;
|
||||
|
||||
for (i = off; i < off+num; i++) {
|
||||
if (htable_get(ht, hash(&i, NULL), objcmp, &i)) {
|
||||
fail("%llu already in hash", (long long)i);
|
||||
return;
|
||||
}
|
||||
htable_add(ht, hash(&val[i], NULL), &val[i]);
|
||||
if (htable_get(ht, hash(&i, NULL), objcmp, &i) != &val[i]) {
|
||||
fail("%llu not added to hash", (long long)i);
|
||||
return;
|
||||
}
|
||||
}
|
||||
pass("Added %llu numbers to hash", (long long)i);
|
||||
}
|
||||
|
||||
#if 0
|
||||
static void refill_vals(struct htable *ht,
|
||||
const uint64_t val[], unsigned int num)
|
||||
{
|
||||
uint64_t i;
|
||||
|
||||
for (i = 0; i < num; i++) {
|
||||
if (htable_get(ht, hash(&i, NULL), objcmp, &i))
|
||||
continue;
|
||||
htable_add(ht, hash(&val[i], NULL), &val[i]);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static void find_vals(struct htable *ht,
|
||||
const uint64_t val[], unsigned int num)
|
||||
{
|
||||
uint64_t i;
|
||||
|
||||
for (i = 0; i < num; i++) {
|
||||
if (htable_get(ht, hash(&i, NULL), objcmp, &i) != &val[i]) {
|
||||
fail("%llu not found in hash", (long long)i);
|
||||
return;
|
||||
}
|
||||
}
|
||||
pass("Found %llu numbers in hash", (long long)i);
|
||||
}
|
||||
|
||||
static void del_vals(struct htable *ht,
|
||||
const uint64_t val[], unsigned int num)
|
||||
{
|
||||
uint64_t i;
|
||||
|
||||
for (i = 0; i < num; i++) {
|
||||
if (!htable_del(ht, hash(&val[i], NULL), &val[i])) {
|
||||
fail("%llu not deleted from hash", (long long)i);
|
||||
return;
|
||||
}
|
||||
}
|
||||
pass("Deleted %llu numbers in hash", (long long)i);
|
||||
}
|
||||
|
||||
static bool check_mask(struct htable *ht, uint64_t val[], unsigned num)
|
||||
{
|
||||
uint64_t i;
|
||||
|
||||
for (i = 0; i < num; i++) {
|
||||
if (((uintptr_t)&val[i] & ht->common_mask) != ht->common_bits)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
unsigned int i, weight;
|
||||
uintptr_t perfect_bit;
|
||||
struct htable ht;
|
||||
uint64_t val[NUM_VALS];
|
||||
uint64_t dne;
|
||||
void *p;
|
||||
struct htable_iter iter;
|
||||
|
||||
plan_tests(35);
|
||||
for (i = 0; i < NUM_VALS; i++)
|
||||
val[i] = i;
|
||||
dne = i;
|
||||
|
||||
htable_init(&ht, hash, NULL);
|
||||
ok1(ht.max == 0);
|
||||
ok1(ht.bits == 0);
|
||||
|
||||
/* We cannot find an entry which doesn't exist. */
|
||||
ok1(!htable_get(&ht, hash(&dne, NULL), objcmp, &dne));
|
||||
|
||||
/* This should increase it once. */
|
||||
add_vals(&ht, val, 0, 1);
|
||||
ok1(ht.bits == 1);
|
||||
ok1(ht.max == 1);
|
||||
weight = 0;
|
||||
for (i = 0; i < sizeof(ht.common_mask) * CHAR_BIT; i++) {
|
||||
if (ht.common_mask & ((uintptr_t)1 << i)) {
|
||||
weight++;
|
||||
}
|
||||
}
|
||||
/* Only one bit should be clear. */
|
||||
ok1(weight == i-1);
|
||||
|
||||
/* Mask should be set. */
|
||||
ok1(check_mask(&ht, val, 1));
|
||||
|
||||
/* This should increase it again. */
|
||||
add_vals(&ht, val, 1, 1);
|
||||
ok1(ht.bits == 2);
|
||||
ok1(ht.max == 3);
|
||||
|
||||
/* Mask should be set. */
|
||||
ok1(ht.common_mask != 0);
|
||||
ok1(ht.common_mask != -1);
|
||||
ok1(check_mask(&ht, val, 2));
|
||||
|
||||
/* Now do the rest. */
|
||||
add_vals(&ht, val, 2, NUM_VALS - 2);
|
||||
|
||||
/* Find all. */
|
||||
find_vals(&ht, val, NUM_VALS);
|
||||
ok1(!htable_get(&ht, hash(&dne, NULL), objcmp, &dne));
|
||||
|
||||
/* Walk once, should get them all. */
|
||||
i = 0;
|
||||
for (p = htable_first(&ht,&iter); p; p = htable_next(&ht, &iter))
|
||||
i++;
|
||||
ok1(i == NUM_VALS);
|
||||
|
||||
/* Delete all. */
|
||||
del_vals(&ht, val, NUM_VALS);
|
||||
ok1(!htable_get(&ht, hash(&val[0], NULL), objcmp, &val[0]));
|
||||
|
||||
/* Worst case, a "pointer" which doesn't have any matching bits. */
|
||||
htable_add(&ht, 0, (void *)~(uintptr_t)&val[NUM_VALS-1]);
|
||||
htable_add(&ht, hash(&val[NUM_VALS-1], NULL), &val[NUM_VALS-1]);
|
||||
ok1(ht.common_mask == 0);
|
||||
ok1(ht.common_bits == 0);
|
||||
/* Get rid of bogus pointer before we trip over it! */
|
||||
htable_del(&ht, 0, (void *)~(uintptr_t)&val[NUM_VALS-1]);
|
||||
|
||||
/* Add the rest. */
|
||||
add_vals(&ht, val, 0, NUM_VALS-1);
|
||||
|
||||
/* Check we can find them all. */
|
||||
find_vals(&ht, val, NUM_VALS);
|
||||
ok1(!htable_get(&ht, hash(&dne, NULL), objcmp, &dne));
|
||||
|
||||
/* Corner cases: wipe out the perfect bit using bogus pointer. */
|
||||
htable_clear(&ht);
|
||||
htable_add(&ht, 0, (void *)((uintptr_t)&val[NUM_VALS-1]));
|
||||
ok1(ht.perfect_bit);
|
||||
perfect_bit = ht.perfect_bit;
|
||||
htable_add(&ht, 0, (void *)((uintptr_t)&val[NUM_VALS-1]
|
||||
| perfect_bit));
|
||||
ok1(ht.perfect_bit == 0);
|
||||
htable_del(&ht, 0, (void *)((uintptr_t)&val[NUM_VALS-1] | perfect_bit));
|
||||
|
||||
/* Enlarging should restore it... */
|
||||
add_vals(&ht, val, 0, NUM_VALS-1);
|
||||
|
||||
ok1(ht.perfect_bit != 0);
|
||||
htable_clear(&ht);
|
||||
|
||||
ok1(htable_init_sized(&ht, hash, NULL, 1024));
|
||||
ok1(ht.max >= 1024);
|
||||
htable_clear(&ht);
|
||||
|
||||
ok1(htable_init_sized(&ht, hash, NULL, 1023));
|
||||
ok1(ht.max >= 1023);
|
||||
htable_clear(&ht);
|
||||
|
||||
ok1(htable_init_sized(&ht, hash, NULL, 1025));
|
||||
ok1(ht.max >= 1025);
|
||||
htable_clear(&ht);
|
||||
|
||||
return exit_status();
|
||||
}
|
40
ccan/ccan/htable/tools/Makefile
Normal file
40
ccan/ccan/htable/tools/Makefile
Normal file
|
@ -0,0 +1,40 @@
|
|||
CCANDIR=../../..
|
||||
CFLAGS=-Wall -Werror -O3 -I$(CCANDIR)
|
||||
#CFLAGS=-Wall -Werror -g -I$(CCANDIR)
|
||||
|
||||
CCAN_OBJS:=ccan-tal.o ccan-tal-str.o ccan-tal-grab_file.o ccan-take.o ccan-time.o ccan-str.o ccan-noerr.o ccan-list.o
|
||||
|
||||
all: speed stringspeed hsearchspeed
|
||||
|
||||
speed: speed.o hash.o $(CCAN_OBJS)
|
||||
|
||||
speed.o: speed.c ../htable.h ../htable.c
|
||||
|
||||
hash.o: ../../hash/hash.c
|
||||
$(CC) $(CFLAGS) -c -o $@ $<
|
||||
|
||||
stringspeed: stringspeed.o hash.o $(CCAN_OBJS)
|
||||
|
||||
stringspeed.o: speed.c ../htable.h ../htable.c
|
||||
|
||||
hsearchspeed: hsearchspeed.o $(CCAN_OBJS)
|
||||
|
||||
clean:
|
||||
rm -f stringspeed speed hsearchspeed *.o
|
||||
|
||||
ccan-tal.o: $(CCANDIR)/ccan/tal/tal.c
|
||||
$(CC) $(CFLAGS) -c -o $@ $<
|
||||
ccan-tal-str.o: $(CCANDIR)/ccan/tal/str/str.c
|
||||
$(CC) $(CFLAGS) -c -o $@ $<
|
||||
ccan-take.o: $(CCANDIR)/ccan/take/take.c
|
||||
$(CC) $(CFLAGS) -c -o $@ $<
|
||||
ccan-tal-grab_file.o: $(CCANDIR)/ccan/tal/grab_file/grab_file.c
|
||||
$(CC) $(CFLAGS) -c -o $@ $<
|
||||
ccan-time.o: $(CCANDIR)/ccan/time/time.c
|
||||
$(CC) $(CFLAGS) -c -o $@ $<
|
||||
ccan-list.o: $(CCANDIR)/ccan/list/list.c
|
||||
$(CC) $(CFLAGS) -c -o $@ $<
|
||||
ccan-str.o: $(CCANDIR)/ccan/str/str.c
|
||||
$(CC) $(CFLAGS) -c -o $@ $<
|
||||
ccan-noerr.o: $(CCANDIR)/ccan/noerr/noerr.c
|
||||
$(CC) $(CFLAGS) -c -o $@ $<
|
95
ccan/ccan/htable/tools/hsearchspeed.c
Normal file
95
ccan/ccan/htable/tools/hsearchspeed.c
Normal file
|
@ -0,0 +1,95 @@
|
|||
/* Simple speed tests for a hash of strings using hsearch */
|
||||
#include <ccan/htable/htable_type.h>
|
||||
#include <ccan/htable/htable.c>
|
||||
#include <ccan/tal/str/str.h>
|
||||
#include <ccan/tal/grab_file/grab_file.h>
|
||||
#include <ccan/tal/tal.h>
|
||||
#include <ccan/hash/hash.h>
|
||||
#include <ccan/time/time.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/time.h>
|
||||
#include <search.h>
|
||||
|
||||
/* Nanoseconds per operation */
|
||||
static size_t normalize(const struct timeabs *start,
|
||||
const struct timeabs *stop,
|
||||
unsigned int num)
|
||||
{
|
||||
return time_to_nsec(time_divide(time_between(*stop, *start), num));
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
size_t i, j, num;
|
||||
struct timeabs start, stop;
|
||||
char **w;
|
||||
ENTRY *words, *misswords;
|
||||
|
||||
w = tal_strsplit(NULL, grab_file(NULL,
|
||||
argv[1] ? argv[1] : "/usr/share/dict/words"), "\n", STR_NO_EMPTY);
|
||||
num = tal_count(w) - 1;
|
||||
printf("%zu words\n", num);
|
||||
|
||||
hcreate(num+num/3);
|
||||
|
||||
words = tal_arr(w, ENTRY, num);
|
||||
for (i = 0; i < num; i++) {
|
||||
words[i].key = w[i];
|
||||
words[i].data = words[i].key;
|
||||
}
|
||||
|
||||
/* Append and prepend last char for miss testing. */
|
||||
misswords = tal_arr(w, ENTRY, num);
|
||||
for (i = 0; i < num; i++) {
|
||||
char lastc;
|
||||
if (strlen(w[i]))
|
||||
lastc = w[i][strlen(w[i])-1];
|
||||
else
|
||||
lastc = 'z';
|
||||
misswords[i].key = tal_fmt(misswords, "%c%s%c%c",
|
||||
lastc, w[i], lastc, lastc);
|
||||
}
|
||||
|
||||
printf("#01: Initial insert: ");
|
||||
fflush(stdout);
|
||||
start = time_now();
|
||||
for (i = 0; i < num; i++)
|
||||
hsearch(words[i], ENTER);
|
||||
stop = time_now();
|
||||
printf(" %zu ns\n", normalize(&start, &stop, num));
|
||||
|
||||
printf("#02: Initial lookup (match): ");
|
||||
fflush(stdout);
|
||||
start = time_now();
|
||||
for (i = 0; i < num; i++)
|
||||
if (hsearch(words[i], FIND)->data != words[i].data)
|
||||
abort();
|
||||
stop = time_now();
|
||||
printf(" %zu ns\n", normalize(&start, &stop, num));
|
||||
|
||||
printf("#03: Initial lookup (miss): ");
|
||||
fflush(stdout);
|
||||
start = time_now();
|
||||
for (i = 0; i < num; i++) {
|
||||
if (hsearch(misswords[i], FIND))
|
||||
abort();
|
||||
}
|
||||
stop = time_now();
|
||||
printf(" %zu ns\n", normalize(&start, &stop, num));
|
||||
|
||||
/* Lookups in order are very cache-friendly for judy; try random */
|
||||
printf("#04: Initial lookup (random): ");
|
||||
fflush(stdout);
|
||||
start = time_now();
|
||||
for (i = 0, j = 0; i < num; i++, j = (j + 10007) % num)
|
||||
if (hsearch(words[i], FIND)->data != words[i].data)
|
||||
abort();
|
||||
stop = time_now();
|
||||
printf(" %zu ns\n", normalize(&start, &stop, num));
|
||||
|
||||
return 0;
|
||||
}
|
370
ccan/ccan/htable/tools/speed.c
Normal file
370
ccan/ccan/htable/tools/speed.c
Normal file
|
@ -0,0 +1,370 @@
|
|||
/* Simple speed tests for hashtables. */
|
||||
#include <ccan/htable/htable_type.h>
|
||||
#include <ccan/htable/htable.c>
|
||||
#include <ccan/hash/hash.h>
|
||||
#include <ccan/time/time.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
|
||||
static size_t hashcount;
|
||||
struct object {
|
||||
/* The key. */
|
||||
unsigned int key;
|
||||
|
||||
/* Some contents. Doubles as consistency check. */
|
||||
struct object *self;
|
||||
};
|
||||
|
||||
static const unsigned int *objkey(const struct object *obj)
|
||||
{
|
||||
return &obj->key;
|
||||
}
|
||||
|
||||
static size_t hash_obj(const unsigned int *key)
|
||||
{
|
||||
hashcount++;
|
||||
return hashl(key, 1, 0);
|
||||
}
|
||||
|
||||
static bool cmp(const struct object *object, const unsigned int *key)
|
||||
{
|
||||
return object->key == *key;
|
||||
}
|
||||
|
||||
HTABLE_DEFINE_TYPE(struct object, objkey, hash_obj, cmp, htable_obj);
|
||||
|
||||
static unsigned int popcount(unsigned long val)
|
||||
{
|
||||
#if HAVE_BUILTIN_POPCOUNTL
|
||||
return __builtin_popcountl(val);
|
||||
#else
|
||||
if (sizeof(long) == sizeof(u64)) {
|
||||
u64 v = val;
|
||||
v = (v & 0x5555555555555555ULL)
|
||||
+ ((v >> 1) & 0x5555555555555555ULL);
|
||||
v = (v & 0x3333333333333333ULL)
|
||||
+ ((v >> 1) & 0x3333333333333333ULL);
|
||||
v = (v & 0x0F0F0F0F0F0F0F0FULL)
|
||||
+ ((v >> 1) & 0x0F0F0F0F0F0F0F0FULL);
|
||||
v = (v & 0x00FF00FF00FF00FFULL)
|
||||
+ ((v >> 1) & 0x00FF00FF00FF00FFULL);
|
||||
v = (v & 0x0000FFFF0000FFFFULL)
|
||||
+ ((v >> 1) & 0x0000FFFF0000FFFFULL);
|
||||
v = (v & 0x00000000FFFFFFFFULL)
|
||||
+ ((v >> 1) & 0x00000000FFFFFFFFULL);
|
||||
return v;
|
||||
}
|
||||
val = (val & 0x55555555ULL) + ((val >> 1) & 0x55555555ULL);
|
||||
val = (val & 0x33333333ULL) + ((val >> 1) & 0x33333333ULL);
|
||||
val = (val & 0x0F0F0F0FULL) + ((val >> 1) & 0x0F0F0F0FULL);
|
||||
val = (val & 0x00FF00FFULL) + ((val >> 1) & 0x00FF00FFULL);
|
||||
val = (val & 0x0000FFFFULL) + ((val >> 1) & 0x0000FFFFULL);
|
||||
return val;
|
||||
#endif
|
||||
}
|
||||
|
||||
static size_t perfect(const struct htable *ht)
|
||||
{
|
||||
size_t i, placed_perfect = 0;
|
||||
|
||||
for (i = 0; i < ((size_t)1 << ht->bits); i++) {
|
||||
if (!entry_is_valid(ht->table[i]))
|
||||
continue;
|
||||
if (hash_bucket(ht, ht->rehash(get_raw_ptr(ht, ht->table[i]),
|
||||
ht->priv)) == i) {
|
||||
assert((ht->table[i] & ht->perfect_bit)
|
||||
== ht->perfect_bit);
|
||||
placed_perfect++;
|
||||
}
|
||||
}
|
||||
return placed_perfect;
|
||||
}
|
||||
|
||||
static size_t count_deleted(const struct htable *ht)
|
||||
{
|
||||
size_t i, delete_markers = 0;
|
||||
|
||||
for (i = 0; i < ((size_t)1 << ht->bits); i++) {
|
||||
if (ht->table[i] == HTABLE_DELETED)
|
||||
delete_markers++;
|
||||
}
|
||||
return delete_markers;
|
||||
}
|
||||
|
||||
/* Nanoseconds per operation */
|
||||
static size_t normalize(const struct timeabs *start,
|
||||
const struct timeabs *stop,
|
||||
unsigned int num)
|
||||
{
|
||||
return time_to_nsec(time_divide(time_between(*stop, *start), num));
|
||||
}
|
||||
|
||||
static size_t worst_run(struct htable *ht, size_t *deleted)
|
||||
{
|
||||
size_t longest = 0, len = 0, this_del = 0, i;
|
||||
|
||||
*deleted = 0;
|
||||
/* This doesn't take into account end-wrap, but gives an idea. */
|
||||
for (i = 0; i < ((size_t)1 << ht->bits); i++) {
|
||||
if (ht->table[i]) {
|
||||
len++;
|
||||
if (ht->table[i] == HTABLE_DELETED)
|
||||
this_del++;
|
||||
} else {
|
||||
if (len > longest) {
|
||||
longest = len;
|
||||
*deleted = this_del;
|
||||
}
|
||||
len = 0;
|
||||
this_del = 0;
|
||||
}
|
||||
}
|
||||
return longest;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
struct object *objs;
|
||||
unsigned int i, j;
|
||||
size_t num, deleted;
|
||||
struct timeabs start, stop;
|
||||
struct htable_obj ht;
|
||||
bool make_dumb = false;
|
||||
|
||||
if (argv[1] && strcmp(argv[1], "--dumb") == 0) {
|
||||
argv++;
|
||||
make_dumb = true;
|
||||
}
|
||||
num = argv[1] ? atoi(argv[1]) : 1000000;
|
||||
objs = calloc(num, sizeof(objs[0]));
|
||||
|
||||
for (i = 0; i < num; i++) {
|
||||
objs[i].key = i;
|
||||
objs[i].self = &objs[i];
|
||||
}
|
||||
|
||||
htable_obj_init(&ht);
|
||||
|
||||
printf("Initial insert: ");
|
||||
fflush(stdout);
|
||||
start = time_now();
|
||||
for (i = 0; i < num; i++)
|
||||
htable_obj_add(&ht, objs[i].self);
|
||||
stop = time_now();
|
||||
printf(" %zu ns\n", normalize(&start, &stop, num));
|
||||
printf("Details: hash size %u, mask bits %u, perfect %.0f%%\n",
|
||||
1U << ht.raw.bits, popcount(ht.raw.common_mask),
|
||||
perfect(&ht.raw) * 100.0 / ht.raw.elems);
|
||||
|
||||
if (make_dumb) {
|
||||
/* Screw with mask, to hobble us. */
|
||||
update_common(&ht.raw, (void *)~ht.raw.common_bits);
|
||||
printf("Details: DUMB MODE: mask bits %u\n",
|
||||
popcount(ht.raw.common_mask));
|
||||
}
|
||||
|
||||
printf("Initial lookup (match): ");
|
||||
fflush(stdout);
|
||||
start = time_now();
|
||||
for (i = 0; i < num; i++)
|
||||
if (htable_obj_get(&ht, &i)->self != objs[i].self)
|
||||
abort();
|
||||
stop = time_now();
|
||||
printf(" %zu ns\n", normalize(&start, &stop, num));
|
||||
|
||||
printf("Initial lookup (miss): ");
|
||||
fflush(stdout);
|
||||
start = time_now();
|
||||
for (i = 0; i < num; i++) {
|
||||
unsigned int n = i + num;
|
||||
if (htable_obj_get(&ht, &n))
|
||||
abort();
|
||||
}
|
||||
stop = time_now();
|
||||
printf(" %zu ns\n", normalize(&start, &stop, num));
|
||||
|
||||
/* Lookups in order are very cache-friendly for judy; try random */
|
||||
printf("Initial lookup (random): ");
|
||||
fflush(stdout);
|
||||
start = time_now();
|
||||
for (i = 0, j = 0; i < num; i++, j = (j + 10007) % num)
|
||||
if (htable_obj_get(&ht, &j)->self != &objs[j])
|
||||
abort();
|
||||
stop = time_now();
|
||||
printf(" %zu ns\n", normalize(&start, &stop, num));
|
||||
|
||||
hashcount = 0;
|
||||
printf("Initial delete all: ");
|
||||
fflush(stdout);
|
||||
start = time_now();
|
||||
for (i = 0; i < num; i++)
|
||||
if (!htable_obj_del(&ht, objs[i].self))
|
||||
abort();
|
||||
stop = time_now();
|
||||
printf(" %zu ns\n", normalize(&start, &stop, num));
|
||||
printf("Details: rehashes %zu\n", hashcount);
|
||||
|
||||
printf("Initial re-inserting: ");
|
||||
fflush(stdout);
|
||||
start = time_now();
|
||||
for (i = 0; i < num; i++)
|
||||
htable_obj_add(&ht, objs[i].self);
|
||||
stop = time_now();
|
||||
printf(" %zu ns\n", normalize(&start, &stop, num));
|
||||
|
||||
hashcount = 0;
|
||||
printf("Deleting first half: ");
|
||||
fflush(stdout);
|
||||
start = time_now();
|
||||
for (i = 0; i < num; i+=2)
|
||||
if (!htable_obj_del(&ht, objs[i].self))
|
||||
abort();
|
||||
stop = time_now();
|
||||
printf(" %zu ns\n", normalize(&start, &stop, num));
|
||||
|
||||
printf("Details: rehashes %zu, delete markers %zu\n",
|
||||
hashcount, count_deleted(&ht.raw));
|
||||
|
||||
printf("Adding (a different) half: ");
|
||||
fflush(stdout);
|
||||
|
||||
for (i = 0; i < num; i+=2)
|
||||
objs[i].key = num+i;
|
||||
|
||||
start = time_now();
|
||||
for (i = 0; i < num; i+=2)
|
||||
htable_obj_add(&ht, objs[i].self);
|
||||
stop = time_now();
|
||||
printf(" %zu ns\n", normalize(&start, &stop, num));
|
||||
|
||||
printf("Details: delete markers %zu, perfect %.0f%%\n",
|
||||
count_deleted(&ht.raw), perfect(&ht.raw) * 100.0 / ht.raw.elems);
|
||||
|
||||
printf("Lookup after half-change (match): ");
|
||||
fflush(stdout);
|
||||
start = time_now();
|
||||
for (i = 1; i < num; i+=2)
|
||||
if (htable_obj_get(&ht, &i)->self != objs[i].self)
|
||||
abort();
|
||||
for (i = 0; i < num; i+=2) {
|
||||
unsigned int n = i + num;
|
||||
if (htable_obj_get(&ht, &n)->self != objs[i].self)
|
||||
abort();
|
||||
}
|
||||
stop = time_now();
|
||||
printf(" %zu ns\n", normalize(&start, &stop, num));
|
||||
|
||||
printf("Lookup after half-change (miss): ");
|
||||
fflush(stdout);
|
||||
start = time_now();
|
||||
for (i = 0; i < num; i++) {
|
||||
unsigned int n = i + num * 2;
|
||||
if (htable_obj_get(&ht, &n))
|
||||
abort();
|
||||
}
|
||||
stop = time_now();
|
||||
printf(" %zu ns\n", normalize(&start, &stop, num));
|
||||
|
||||
/* Hashtables with delete markers can fill with markers over time.
|
||||
* so do some changes to see how it operates in long-term. */
|
||||
for (i = 0; i < 5; i++) {
|
||||
if (i == 0) {
|
||||
/* We don't measure this: jmap is different. */
|
||||
printf("Details: initial churn\n");
|
||||
} else {
|
||||
printf("Churning %s time: ",
|
||||
i == 1 ? "second"
|
||||
: i == 2 ? "third"
|
||||
: i == 3 ? "fourth"
|
||||
: "fifth");
|
||||
fflush(stdout);
|
||||
}
|
||||
start = time_now();
|
||||
for (j = 0; j < num; j++) {
|
||||
if (!htable_obj_del(&ht, &objs[j]))
|
||||
abort();
|
||||
objs[j].key = num*i+j;
|
||||
if (!htable_obj_add(&ht, &objs[j]))
|
||||
abort();
|
||||
}
|
||||
stop = time_now();
|
||||
if (i != 0)
|
||||
printf(" %zu ns\n", normalize(&start, &stop, num));
|
||||
}
|
||||
|
||||
/* Spread out the keys more to try to make it harder. */
|
||||
printf("Details: reinserting with spread\n");
|
||||
for (i = 0; i < num; i++) {
|
||||
if (!htable_obj_del(&ht, objs[i].self))
|
||||
abort();
|
||||
objs[i].key = num * 5 + i * 9;
|
||||
if (!htable_obj_add(&ht, objs[i].self))
|
||||
abort();
|
||||
}
|
||||
printf("Details: delete markers %zu, perfect %.0f%%\n",
|
||||
count_deleted(&ht.raw), perfect(&ht.raw) * 100.0 / ht.raw.elems);
|
||||
i = worst_run(&ht.raw, &deleted);
|
||||
printf("Details: worst run %u (%zu deleted)\n", i, deleted);
|
||||
|
||||
printf("Lookup after churn & spread (match): ");
|
||||
fflush(stdout);
|
||||
start = time_now();
|
||||
for (i = 0; i < num; i++) {
|
||||
unsigned int n = num * 5 + i * 9;
|
||||
if (htable_obj_get(&ht, &n)->self != objs[i].self)
|
||||
abort();
|
||||
}
|
||||
stop = time_now();
|
||||
printf(" %zu ns\n", normalize(&start, &stop, num));
|
||||
|
||||
printf("Lookup after churn & spread (miss): ");
|
||||
fflush(stdout);
|
||||
start = time_now();
|
||||
for (i = 0; i < num; i++) {
|
||||
unsigned int n = num * (5 + 9) + i * 9;
|
||||
if (htable_obj_get(&ht, &n))
|
||||
abort();
|
||||
}
|
||||
stop = time_now();
|
||||
printf(" %zu ns\n", normalize(&start, &stop, num));
|
||||
|
||||
printf("Lookup after churn & spread (random): ");
|
||||
fflush(stdout);
|
||||
start = time_now();
|
||||
for (i = 0, j = 0; i < num; i++, j = (j + 10007) % num) {
|
||||
unsigned int n = num * 5 + j * 9;
|
||||
if (htable_obj_get(&ht, &n)->self != &objs[j])
|
||||
abort();
|
||||
}
|
||||
stop = time_now();
|
||||
printf(" %zu ns\n", normalize(&start, &stop, num));
|
||||
|
||||
hashcount = 0;
|
||||
printf("Deleting half after churn & spread: ");
|
||||
fflush(stdout);
|
||||
start = time_now();
|
||||
for (i = 0; i < num; i+=2)
|
||||
if (!htable_obj_del(&ht, objs[i].self))
|
||||
abort();
|
||||
stop = time_now();
|
||||
printf(" %zu ns\n", normalize(&start, &stop, num));
|
||||
|
||||
printf("Adding (a different) half after churn & spread: ");
|
||||
fflush(stdout);
|
||||
|
||||
for (i = 0; i < num; i+=2)
|
||||
objs[i].key = num*6+i*9;
|
||||
|
||||
start = time_now();
|
||||
for (i = 0; i < num; i+=2)
|
||||
htable_obj_add(&ht, objs[i].self);
|
||||
stop = time_now();
|
||||
printf(" %zu ns\n", normalize(&start, &stop, num));
|
||||
|
||||
printf("Details: delete markers %zu, perfect %.0f%%\n",
|
||||
count_deleted(&ht.raw), perfect(&ht.raw) * 100.0 / ht.raw.elems);
|
||||
|
||||
return 0;
|
||||
}
|
240
ccan/ccan/htable/tools/stringspeed.c
Normal file
240
ccan/ccan/htable/tools/stringspeed.c
Normal file
|
@ -0,0 +1,240 @@
|
|||
/* Simple speed tests for a hash of strings. */
|
||||
#include <ccan/htable/htable_type.h>
|
||||
#include <ccan/htable/htable.c>
|
||||
#include <ccan/tal/str/str.h>
|
||||
#include <ccan/tal/grab_file/grab_file.h>
|
||||
#include <ccan/tal/tal.h>
|
||||
#include <ccan/hash/hash.h>
|
||||
#include <ccan/time/time.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/time.h>
|
||||
|
||||
static size_t hashcount;
|
||||
|
||||
static const char *strkey(const char *str)
|
||||
{
|
||||
return str;
|
||||
}
|
||||
|
||||
static size_t hash_str(const char *key)
|
||||
{
|
||||
hashcount++;
|
||||
return hash(key, strlen(key), 0);
|
||||
}
|
||||
|
||||
static bool cmp(const char *obj, const char *key)
|
||||
{
|
||||
return strcmp(obj, key) == 0;
|
||||
}
|
||||
|
||||
HTABLE_DEFINE_TYPE(char, strkey, hash_str, cmp, htable_str);
|
||||
|
||||
/* Nanoseconds per operation */
|
||||
static size_t normalize(const struct timeabs *start,
|
||||
const struct timeabs *stop,
|
||||
unsigned int num)
|
||||
{
|
||||
return time_to_nsec(time_divide(time_between(*stop, *start), num));
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
size_t i, j, num;
|
||||
struct timeabs start, stop;
|
||||
struct htable_str ht;
|
||||
char **words, **misswords;
|
||||
|
||||
words = tal_strsplit(NULL, grab_file(NULL,
|
||||
argv[1] ? argv[1] : "/usr/share/dict/words"), "\n",
|
||||
STR_NO_EMPTY);
|
||||
htable_str_init(&ht);
|
||||
num = tal_count(words) - 1;
|
||||
/* Note that on my system, num is just > 98304, where we double! */
|
||||
printf("%zu words\n", num);
|
||||
|
||||
/* Append and prepend last char for miss testing. */
|
||||
misswords = tal_arr(words, char *, num);
|
||||
for (i = 0; i < num; i++) {
|
||||
char lastc;
|
||||
if (strlen(words[i]))
|
||||
lastc = words[i][strlen(words[i])-1];
|
||||
else
|
||||
lastc = 'z';
|
||||
misswords[i] = tal_fmt(misswords, "%c%s%c%c",
|
||||
lastc, words[i], lastc, lastc);
|
||||
}
|
||||
|
||||
printf("#01: Initial insert: ");
|
||||
fflush(stdout);
|
||||
start = time_now();
|
||||
for (i = 0; i < num; i++)
|
||||
htable_str_add(&ht, words[i]);
|
||||
stop = time_now();
|
||||
printf(" %zu ns\n", normalize(&start, &stop, num));
|
||||
|
||||
printf("Bytes allocated: %zu\n",
|
||||
sizeof(ht.raw.table[0]) << ht.raw.bits);
|
||||
|
||||
printf("#02: Initial lookup (match): ");
|
||||
fflush(stdout);
|
||||
start = time_now();
|
||||
for (i = 0; i < num; i++)
|
||||
if (htable_str_get(&ht, words[i]) != words[i])
|
||||
abort();
|
||||
stop = time_now();
|
||||
printf(" %zu ns\n", normalize(&start, &stop, num));
|
||||
|
||||
printf("#03: Initial lookup (miss): ");
|
||||
fflush(stdout);
|
||||
start = time_now();
|
||||
for (i = 0; i < num; i++) {
|
||||
if (htable_str_get(&ht, misswords[i]))
|
||||
abort();
|
||||
}
|
||||
stop = time_now();
|
||||
printf(" %zu ns\n", normalize(&start, &stop, num));
|
||||
|
||||
/* Lookups in order are very cache-friendly for judy; try random */
|
||||
printf("#04: Initial lookup (random): ");
|
||||
fflush(stdout);
|
||||
start = time_now();
|
||||
for (i = 0, j = 0; i < num; i++, j = (j + 10007) % num)
|
||||
if (htable_str_get(&ht, words[j]) != words[j])
|
||||
abort();
|
||||
stop = time_now();
|
||||
printf(" %zu ns\n", normalize(&start, &stop, num));
|
||||
|
||||
hashcount = 0;
|
||||
printf("#05: Initial delete all: ");
|
||||
fflush(stdout);
|
||||
start = time_now();
|
||||
for (i = 0; i < num; i++)
|
||||
if (!htable_str_del(&ht, words[i]))
|
||||
abort();
|
||||
stop = time_now();
|
||||
printf(" %zu ns\n", normalize(&start, &stop, num));
|
||||
|
||||
printf("#06: Initial re-inserting: ");
|
||||
fflush(stdout);
|
||||
start = time_now();
|
||||
for (i = 0; i < num; i++)
|
||||
htable_str_add(&ht, words[i]);
|
||||
stop = time_now();
|
||||
printf(" %zu ns\n", normalize(&start, &stop, num));
|
||||
|
||||
hashcount = 0;
|
||||
printf("#07: Deleting first half: ");
|
||||
fflush(stdout);
|
||||
start = time_now();
|
||||
for (i = 0; i < num; i+=2)
|
||||
if (!htable_str_del(&ht, words[i]))
|
||||
abort();
|
||||
stop = time_now();
|
||||
printf(" %zu ns\n", normalize(&start, &stop, num));
|
||||
|
||||
printf("#08: Adding (a different) half: ");
|
||||
fflush(stdout);
|
||||
|
||||
start = time_now();
|
||||
for (i = 0; i < num; i+=2)
|
||||
htable_str_add(&ht, misswords[i]);
|
||||
stop = time_now();
|
||||
printf(" %zu ns\n", normalize(&start, &stop, num));
|
||||
|
||||
printf("#09: Lookup after half-change (match): ");
|
||||
fflush(stdout);
|
||||
start = time_now();
|
||||
for (i = 1; i < num; i+=2)
|
||||
if (htable_str_get(&ht, words[i]) != words[i])
|
||||
abort();
|
||||
for (i = 0; i < num; i+=2) {
|
||||
if (htable_str_get(&ht, misswords[i]) != misswords[i])
|
||||
abort();
|
||||
}
|
||||
stop = time_now();
|
||||
printf(" %zu ns\n", normalize(&start, &stop, num));
|
||||
|
||||
printf("#10: Lookup after half-change (miss): ");
|
||||
fflush(stdout);
|
||||
start = time_now();
|
||||
for (i = 0; i < num; i+=2)
|
||||
if (htable_str_get(&ht, words[i]))
|
||||
abort();
|
||||
for (i = 1; i < num; i+=2) {
|
||||
if (htable_str_get(&ht, misswords[i]))
|
||||
abort();
|
||||
}
|
||||
stop = time_now();
|
||||
printf(" %zu ns\n", normalize(&start, &stop, num));
|
||||
|
||||
/* Hashtables with delete markers can fill with markers over time.
|
||||
* so do some changes to see how it operates in long-term. */
|
||||
printf("#11: Churn 1: ");
|
||||
start = time_now();
|
||||
for (j = 0; j < num; j+=2) {
|
||||
if (!htable_str_del(&ht, misswords[j]))
|
||||
abort();
|
||||
if (!htable_str_add(&ht, words[j]))
|
||||
abort();
|
||||
}
|
||||
stop = time_now();
|
||||
printf(" %zu ns\n", normalize(&start, &stop, num));
|
||||
|
||||
printf("#12: Churn 2: ");
|
||||
start = time_now();
|
||||
for (j = 1; j < num; j+=2) {
|
||||
if (!htable_str_del(&ht, words[j]))
|
||||
abort();
|
||||
if (!htable_str_add(&ht, misswords[j]))
|
||||
abort();
|
||||
}
|
||||
stop = time_now();
|
||||
printf(" %zu ns\n", normalize(&start, &stop, num));
|
||||
|
||||
printf("#13: Churn 3: ");
|
||||
start = time_now();
|
||||
for (j = 1; j < num; j+=2) {
|
||||
if (!htable_str_del(&ht, misswords[j]))
|
||||
abort();
|
||||
if (!htable_str_add(&ht, words[j]))
|
||||
abort();
|
||||
}
|
||||
stop = time_now();
|
||||
printf(" %zu ns\n", normalize(&start, &stop, num));
|
||||
|
||||
/* Now it's back to normal... */
|
||||
printf("#14: Post-Churn lookup (match): ");
|
||||
fflush(stdout);
|
||||
start = time_now();
|
||||
for (i = 0; i < num; i++)
|
||||
if (htable_str_get(&ht, words[i]) != words[i])
|
||||
abort();
|
||||
stop = time_now();
|
||||
printf(" %zu ns\n", normalize(&start, &stop, num));
|
||||
|
||||
printf("#15: Post-Churn lookup (miss): ");
|
||||
fflush(stdout);
|
||||
start = time_now();
|
||||
for (i = 0; i < num; i++) {
|
||||
if (htable_str_get(&ht, misswords[i]))
|
||||
abort();
|
||||
}
|
||||
stop = time_now();
|
||||
printf(" %zu ns\n", normalize(&start, &stop, num));
|
||||
|
||||
/* Lookups in order are very cache-friendly for judy; try random */
|
||||
printf("#16: Post-Churn lookup (random): ");
|
||||
fflush(stdout);
|
||||
start = time_now();
|
||||
for (i = 0, j = 0; i < num; i++, j = (j + 10007) % num)
|
||||
if (htable_str_get(&ht, words[j]) != words[j])
|
||||
abort();
|
||||
stop = time_now();
|
||||
printf(" %zu ns\n", normalize(&start, &stop, num));
|
||||
|
||||
return 0;
|
||||
}
|
1
ccan/ccan/order/LICENSE
Symbolic link
1
ccan/ccan/order/LICENSE
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../licenses/CC0
|
33
ccan/ccan/order/_info
Normal file
33
ccan/ccan/order/_info
Normal file
|
@ -0,0 +1,33 @@
|
|||
#include "config.h"
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
/**
|
||||
* order - Simple, common value comparison functions
|
||||
*
|
||||
* This implements a number of commonly useful comparison functions in
|
||||
* a form which can be used with qsort() and bsearch() in the standard
|
||||
* library, or asort() and asearch() in ccan amongst other places.
|
||||
*
|
||||
* License: CC0
|
||||
* Author: David Gibson <david@gibson.dropbear.id.au>
|
||||
*/
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
/* Expect exactly one argument */
|
||||
if (argc != 2)
|
||||
return 1;
|
||||
|
||||
if (strcmp(argv[1], "depends") == 0) {
|
||||
printf("ccan/typesafe_cb\n");
|
||||
printf("ccan/ptrint\n");
|
||||
return 0;
|
||||
}
|
||||
if (strcmp(argv[1], "testdepends") == 0) {
|
||||
printf("ccan/array_size\n");
|
||||
printf("ccan/asort\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
70
ccan/ccan/order/order.c
Normal file
70
ccan/ccan/order/order.c
Normal file
|
@ -0,0 +1,70 @@
|
|||
/* CC0 license (public domain) - see LICENSE file for details */
|
||||
|
||||
#include <ccan/order/order.h>
|
||||
|
||||
#define SCALAR_ORDER(_oname, _type) \
|
||||
int _order_##_oname(const void *a, \
|
||||
const void *b, \
|
||||
void *ctx) \
|
||||
{ \
|
||||
ptrdiff_t offset = ptr2int(ctx); \
|
||||
const _type *aa = (const _type *)((char *)a + offset); \
|
||||
const _type *bb = (const _type *)((char *)b + offset); \
|
||||
\
|
||||
if (*aa < *bb) { \
|
||||
return -1; \
|
||||
} else if (*aa > *bb) { \
|
||||
return 1; \
|
||||
} else { \
|
||||
assert(*aa == *bb); \
|
||||
return 0; \
|
||||
} \
|
||||
} \
|
||||
int order_##_oname(const _type *a, \
|
||||
const _type *b, \
|
||||
void *ctx) \
|
||||
{ \
|
||||
return _order_##_oname(a, b, int2ptr(0)); \
|
||||
} \
|
||||
int _order_##_oname##_reverse(const void *a, \
|
||||
const void *b, \
|
||||
void *ctx) \
|
||||
{ \
|
||||
return -_order_##_oname(a, b, ctx); \
|
||||
} \
|
||||
int order_##_oname##_reverse(const _type *a, \
|
||||
const _type *b, \
|
||||
void *ctx) \
|
||||
{ \
|
||||
return _order_##_oname##_reverse(a, b, int2ptr(0)); \
|
||||
} \
|
||||
int order_##_oname##_noctx(const void *a, \
|
||||
const void *b) \
|
||||
{ \
|
||||
return _order_##_oname(a, b, int2ptr(0)); \
|
||||
} \
|
||||
int order_##_oname##_reverse_noctx(const void *a, \
|
||||
const void *b) \
|
||||
{ \
|
||||
return _order_##_oname##_reverse(a, b, int2ptr(0)); \
|
||||
}
|
||||
|
||||
SCALAR_ORDER(s8, int8_t)
|
||||
SCALAR_ORDER(s16, int16_t)
|
||||
SCALAR_ORDER(s32, int32_t)
|
||||
SCALAR_ORDER(s64, int64_t)
|
||||
|
||||
SCALAR_ORDER(u8, uint8_t)
|
||||
SCALAR_ORDER(u16, uint16_t)
|
||||
SCALAR_ORDER(u32, uint32_t)
|
||||
SCALAR_ORDER(u64, uint64_t)
|
||||
|
||||
SCALAR_ORDER(int, int)
|
||||
SCALAR_ORDER(uint, unsigned int)
|
||||
SCALAR_ORDER(long, long)
|
||||
SCALAR_ORDER(ulong, unsigned long)
|
||||
SCALAR_ORDER(size, size_t)
|
||||
SCALAR_ORDER(ptrdiff, ptrdiff_t)
|
||||
|
||||
SCALAR_ORDER(float, float)
|
||||
SCALAR_ORDER(double, double)
|
73
ccan/ccan/order/order.h
Normal file
73
ccan/ccan/order/order.h
Normal file
|
@ -0,0 +1,73 @@
|
|||
/* CC0 license (public domain) - see LICENSE file for details */
|
||||
#ifndef CCAN_ORDER_H
|
||||
#define CCAN_ORDER_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include <ccan/typesafe_cb/typesafe_cb.h>
|
||||
#include <ccan/ptrint/ptrint.h>
|
||||
|
||||
typedef int (*_total_order_cb)(const void *, const void *, void *);
|
||||
typedef int (*total_order_noctx_cb)(const void *, const void *);
|
||||
|
||||
#define total_order_cb(_name, _item, _ctx) \
|
||||
int (*_name)(const __typeof__(_item) *, \
|
||||
const __typeof__(_item) *, \
|
||||
__typeof__(_ctx))
|
||||
|
||||
#define total_order_cast(cmp, item, ctx) \
|
||||
typesafe_cb_cast(_total_order_cb, total_order_cb(, item, ctx), \
|
||||
(cmp))
|
||||
|
||||
struct _total_order {
|
||||
_total_order_cb cb;
|
||||
void *ctx;
|
||||
};
|
||||
|
||||
#define total_order(_name, _item, _ctx) \
|
||||
struct { \
|
||||
total_order_cb(cb, _item, _ctx); \
|
||||
_ctx ctx; \
|
||||
} _name
|
||||
|
||||
#define _DECL_ONAME(_oname, _itype) \
|
||||
extern int _order_##_oname(const void *, const void *, void *); \
|
||||
extern int order_##_oname(const _itype *, const _itype *, void *); \
|
||||
extern int order_##_oname##_noctx(const void *, const void *);
|
||||
|
||||
#define _DECL_ONAME_BIDIR(_oname, _itype) \
|
||||
_DECL_ONAME(_oname, _itype) \
|
||||
_DECL_ONAME(_oname##_reverse, _itype)
|
||||
|
||||
_DECL_ONAME_BIDIR(s8, int8_t)
|
||||
_DECL_ONAME_BIDIR(s16, int16_t)
|
||||
_DECL_ONAME_BIDIR(s32, int32_t)
|
||||
_DECL_ONAME_BIDIR(s64, int64_t)
|
||||
|
||||
_DECL_ONAME_BIDIR(u8, uint8_t)
|
||||
_DECL_ONAME_BIDIR(u16, uint16_t)
|
||||
_DECL_ONAME_BIDIR(u32, uint32_t)
|
||||
_DECL_ONAME_BIDIR(u64, uint64_t)
|
||||
|
||||
_DECL_ONAME_BIDIR(int, int)
|
||||
_DECL_ONAME_BIDIR(uint, unsigned int)
|
||||
_DECL_ONAME_BIDIR(long, long)
|
||||
_DECL_ONAME_BIDIR(ulong, unsigned long)
|
||||
_DECL_ONAME_BIDIR(size, size_t)
|
||||
_DECL_ONAME_BIDIR(ptrdiff, ptrdiff_t)
|
||||
|
||||
_DECL_ONAME_BIDIR(float, float)
|
||||
_DECL_ONAME_BIDIR(double, double)
|
||||
|
||||
#undef _DECL_ONAME
|
||||
#undef _DECL_ONAME_BIDIR
|
||||
|
||||
#define total_order_by_field(_name, _oname, _itype, _field) \
|
||||
total_order(_name, _itype, ptrint_t *) = { \
|
||||
(total_order_cb(, _itype, \
|
||||
ptrint_t *))(_order_##_oname), \
|
||||
int2ptr(offsetof(_itype, _field)), \
|
||||
}
|
||||
|
||||
#endif /* CCAN_ORDER_H */
|
138
ccan/ccan/order/test/api.c
Normal file
138
ccan/ccan/order/test/api.c
Normal file
|
@ -0,0 +1,138 @@
|
|||
#include "config.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <limits.h>
|
||||
#include <float.h>
|
||||
#include <math.h>
|
||||
|
||||
#include <ccan/array_size/array_size.h>
|
||||
|
||||
#include <ccan/order/order.h>
|
||||
#include <ccan/tap/tap.h>
|
||||
|
||||
#include <ccan/asort/asort.h>
|
||||
|
||||
#define QSORT_SCALAR(t, oname, ...) \
|
||||
{ \
|
||||
t arr0[] = { __VA_ARGS__ }; \
|
||||
const int num = ARRAY_SIZE(arr0); \
|
||||
t arr1[num], arr2[num]; \
|
||||
int i; \
|
||||
\
|
||||
/* Intialize arr1 in reverse order */ \
|
||||
for (i = 0; i < num; i++) \
|
||||
arr1[i] = arr0[num-i-1]; \
|
||||
\
|
||||
memcpy(arr2, arr1, sizeof(arr1)); \
|
||||
qsort(arr2, num, sizeof(t), order_##oname##_noctx); \
|
||||
ok(memcmp(arr2, arr0, sizeof(arr0)) == 0, \
|
||||
"qsort order_%s_noctx", #oname); \
|
||||
\
|
||||
qsort(arr2, num, sizeof(t), order_##oname##_reverse_noctx); \
|
||||
ok(memcmp(arr2, arr1, sizeof(arr1)) == 0, \
|
||||
"qsort order_%s_reverse_noctx", #oname); \
|
||||
}
|
||||
|
||||
#define ASORT_SCALAR(t, oname, ...) \
|
||||
{ \
|
||||
t arr0[] = { __VA_ARGS__ }; \
|
||||
const int num = ARRAY_SIZE(arr0); \
|
||||
t arr1[num], arr2[num]; \
|
||||
int i; \
|
||||
\
|
||||
/* Intialize arr1 in reverse order */ \
|
||||
for (i = 0; i < num; i++) \
|
||||
arr1[i] = arr0[num-i-1]; \
|
||||
\
|
||||
memcpy(arr2, arr1, sizeof(arr1)); \
|
||||
asort(arr2, num, order_##oname, NULL); \
|
||||
ok(memcmp(arr2, arr0, sizeof(arr0)) == 0, \
|
||||
"asort order_%s", #oname); \
|
||||
\
|
||||
asort(arr2, num, order_##oname##_reverse, NULL); \
|
||||
ok(memcmp(arr2, arr1, sizeof(arr1)) == 0, \
|
||||
"asort order_%s_reverse", #oname); \
|
||||
}
|
||||
|
||||
#define ASORT_STRUCT_BY_SCALAR(t, oname, ...) \
|
||||
{ \
|
||||
t arrbase[] = { __VA_ARGS__ }; \
|
||||
struct tstruct { \
|
||||
char dummy0[5]; \
|
||||
t val; \
|
||||
long dummy1; \
|
||||
}; \
|
||||
const int num = ARRAY_SIZE(arrbase); \
|
||||
struct tstruct arr0[num], arr1[num], arr2[num]; \
|
||||
int i; \
|
||||
total_order_by_field(order, oname, struct tstruct, val); \
|
||||
total_order_by_field(rorder, oname##_reverse, \
|
||||
struct tstruct, val); \
|
||||
\
|
||||
/* Set up dummy structures */ \
|
||||
memset(arr0, 0, sizeof(arr0)); \
|
||||
for (i = 0; i < num; i++) { \
|
||||
arr0[i].dummy1 = i; \
|
||||
strcpy(arr0[i].dummy0, "abc"); \
|
||||
arr0[i].val = arrbase[i]; \
|
||||
} \
|
||||
\
|
||||
/* Intialize arr1 in reverse order */ \
|
||||
for (i = 0; i < num; i++) \
|
||||
arr1[i] = arr0[num-i-1]; \
|
||||
\
|
||||
memcpy(arr2, arr1, sizeof(arr1)); \
|
||||
asort(arr2, num, order.cb, order.ctx); \
|
||||
ok(memcmp(arr2, arr0, sizeof(arr0)) == 0, \
|
||||
"asort by field %s", #oname); \
|
||||
\
|
||||
asort(arr2, num, rorder.cb, rorder.ctx); \
|
||||
ok(memcmp(arr2, arr1, sizeof(arr1)) == 0, \
|
||||
"asort by field %s_reverse", #oname); \
|
||||
}
|
||||
|
||||
#define TEST_SCALAR(t, oname, ...) \
|
||||
{ \
|
||||
QSORT_SCALAR(t, oname, __VA_ARGS__); \
|
||||
ASORT_SCALAR(t, oname, __VA_ARGS__); \
|
||||
ASORT_STRUCT_BY_SCALAR(t, oname, __VA_ARGS__); \
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
/* This is how many tests you plan to run */
|
||||
plan_tests(84);
|
||||
|
||||
TEST_SCALAR(int8_t, s8, -128, -4, 0, 1, 2, 88, 126, 127);
|
||||
TEST_SCALAR(int16_t, s16, -32768, -4, 0, 1, 2, 88, 126, 32767);
|
||||
TEST_SCALAR(int32_t, s32, -2000000000, -4, 0, 1, 2, 88, 126,
|
||||
2000000000);
|
||||
TEST_SCALAR(int64_t, s64, -999999999999999999LL, -2000000000, -4, 0,
|
||||
1, 2, 88, 126, 2000000000, 999999999999999999LL);
|
||||
|
||||
TEST_SCALAR(uint8_t, u8, 0, 1, 2, 88, 126, 127, -10, -1);
|
||||
TEST_SCALAR(uint16_t, u16, 0, 1, 2, 88, 126, 32767, -10, -1);
|
||||
TEST_SCALAR(uint32_t, u32, 0, 1, 2, 88, 126, 2000000000, -10, -1);
|
||||
TEST_SCALAR(uint64_t, u64, 0, 1, 2, 88, 126, 2000000000,
|
||||
999999999999999999LL, -10, -1);
|
||||
|
||||
TEST_SCALAR(int, int, INT_MIN, -10, -1, 0, 1, 10, INT_MAX);
|
||||
TEST_SCALAR(unsigned, uint, 0, 1, 10, INT_MAX, (unsigned)INT_MAX+1,
|
||||
-10, -1);
|
||||
|
||||
TEST_SCALAR(long, long, LONG_MIN, INT_MIN, -10, -1, 0, 1, 10, INT_MAX,
|
||||
LONG_MAX);
|
||||
TEST_SCALAR(unsigned long, ulong, 0, 1, 10, INT_MAX,
|
||||
(unsigned long)INT_MAX+1, LONG_MAX,
|
||||
(unsigned long)LONG_MAX+1, -10, -1);
|
||||
|
||||
TEST_SCALAR(float, float, -INFINITY, -FLT_MAX, -1.0, 0.0, FLT_MIN,
|
||||
0.1, M_E, M_PI, 5.79, FLT_MAX, INFINITY);
|
||||
TEST_SCALAR(double, double, -INFINITY, -DBL_MAX, -FLT_MAX, -1.0, 0.0,
|
||||
DBL_MIN, FLT_MIN, 0.1, M_E, M_PI, 5.79, FLT_MAX, DBL_MAX,
|
||||
INFINITY);
|
||||
|
||||
/* This exits depending on whether all tests passed */
|
||||
return exit_status();
|
||||
}
|
24
ccan/ccan/order/test/compile_fail_1.c
Normal file
24
ccan/ccan/order/test/compile_fail_1.c
Normal file
|
@ -0,0 +1,24 @@
|
|||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include <ccan/order/order.h>
|
||||
|
||||
#include "fancy_cmp.h"
|
||||
|
||||
#ifdef FAIL
|
||||
typedef int item_t;
|
||||
#else
|
||||
typedef struct item item_t;
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
total_order_cb(cb0, struct item, struct cmp_info *) = fancy_cmp;
|
||||
_total_order_cb cb1 = total_order_cast(fancy_cmp,
|
||||
item_t, struct cmp_info *);
|
||||
|
||||
printf("%p %p\n", cb0, cb1);
|
||||
|
||||
exit(0);
|
||||
}
|
25
ccan/ccan/order/test/compile_fail_2.c
Normal file
25
ccan/ccan/order/test/compile_fail_2.c
Normal file
|
@ -0,0 +1,25 @@
|
|||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include <ccan/order/order.h>
|
||||
|
||||
#include "fancy_cmp.h"
|
||||
|
||||
#ifdef FAIL
|
||||
typedef int ctx_t;
|
||||
#else
|
||||
typedef struct cmp_info ctx_t;
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
total_order_cb(cb0, struct item, struct cmp_info *) = fancy_cmp;
|
||||
_total_order_cb cb1 = total_order_cast(fancy_cmp, struct item,
|
||||
ctx_t *);
|
||||
|
||||
printf("%p %p\n", cb0, cb1);
|
||||
|
||||
exit(0);
|
||||
|
||||
}
|
19
ccan/ccan/order/test/compile_ok.c
Normal file
19
ccan/ccan/order/test/compile_ok.c
Normal file
|
@ -0,0 +1,19 @@
|
|||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include <ccan/order/order.h>
|
||||
|
||||
#include "fancy_cmp.h"
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
total_order_cb(cb0, struct item, struct cmp_info *) = fancy_cmp;
|
||||
_total_order_cb cb1 = total_order_cast(fancy_cmp,
|
||||
struct item, struct cmp_info *);
|
||||
total_order_noctx_cb cb_noctx = fancy_cmp_noctx;
|
||||
|
||||
printf("%p %p %p\n", cb0, cb1, cb_noctx);
|
||||
|
||||
exit(0);
|
||||
}
|
47
ccan/ccan/order/test/fancy_cmp.h
Normal file
47
ccan/ccan/order/test/fancy_cmp.h
Normal file
|
@ -0,0 +1,47 @@
|
|||
#ifndef _FANCY_CMP_H
|
||||
#define _FANCY_CMP_H
|
||||
|
||||
struct cmp_info {
|
||||
unsigned xcode;
|
||||
int offset;
|
||||
};
|
||||
|
||||
struct item {
|
||||
unsigned value;
|
||||
char *str;
|
||||
};
|
||||
|
||||
static inline int fancy_cmp(const struct item *a, const struct item *b,
|
||||
struct cmp_info *ctx)
|
||||
{
|
||||
unsigned vala = a->value ^ ctx->xcode;
|
||||
unsigned valb = b->value ^ ctx->xcode;
|
||||
const char *stra, *strb;
|
||||
|
||||
if (vala < valb)
|
||||
return -1;
|
||||
else if (valb < vala)
|
||||
return 1;
|
||||
|
||||
stra = a->str + ctx->offset;
|
||||
strb = b->str + ctx->offset;
|
||||
|
||||
return strcmp(stra, strb);
|
||||
}
|
||||
|
||||
static inline int fancy_cmp_noctx(const void *av, const void *bv)
|
||||
{
|
||||
const struct item *a = (const struct item *)av;
|
||||
const struct item *b = (const struct item *)bv;
|
||||
struct cmp_info ctx_default = {
|
||||
.xcode = 0x1234,
|
||||
.offset = 3,
|
||||
};
|
||||
total_order(default_order, struct item, struct cmp_info *) = {
|
||||
fancy_cmp, &ctx_default,
|
||||
};
|
||||
|
||||
return default_order.cb(a, b, default_order.ctx);
|
||||
}
|
||||
|
||||
#endif /* _FANCY_CMP_H */
|
1
ccan/ccan/ptrint/LICENSE
Symbolic link
1
ccan/ccan/ptrint/LICENSE
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../licenses/CC0
|
59
ccan/ccan/ptrint/_info
Normal file
59
ccan/ccan/ptrint/_info
Normal file
|
@ -0,0 +1,59 @@
|
|||
#include "config.h"
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
/**
|
||||
* ptrint - Encoding integers in pointer values
|
||||
*
|
||||
* Library (standard or ccan) functions which take user supplied
|
||||
* callbacks usually have the callback supplied with a void * context
|
||||
* pointer. For simple cases, it's sometimes sufficient to pass a
|
||||
* simple integer cast into a void *, rather than having to allocate a
|
||||
* context structure. This module provides some helper macros to do
|
||||
* this relatively safely and portably.
|
||||
*
|
||||
* The key characteristics of these functions are:
|
||||
* ptr2int(int2ptr(val)) == val
|
||||
* and
|
||||
* !int2ptr(val) == !val
|
||||
* (i.e. the transformation preserves truth value).
|
||||
*
|
||||
* Example:
|
||||
* #include <ccan/ptrint/ptrint.h>
|
||||
*
|
||||
* static void callback(void *opaque)
|
||||
* {
|
||||
* int val = ptr2int(opaque);
|
||||
* printf("Value is %d\n", val);
|
||||
* }
|
||||
*
|
||||
* void (*cb)(void *opaque) = callback;
|
||||
*
|
||||
* int main(int argc, char *argv[])
|
||||
* {
|
||||
* int val = 17;
|
||||
*
|
||||
* (*cb)(int2ptr(val));
|
||||
* exit(0);
|
||||
* }
|
||||
*
|
||||
* License: CC0 (Public domain)
|
||||
* Author: David Gibson <david@gibson.dropbear.id.au>
|
||||
*/
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
/* Expect exactly one argument */
|
||||
if (argc != 2)
|
||||
return 1;
|
||||
|
||||
if (strcmp(argv[1], "depends") == 0) {
|
||||
printf("ccan/build_assert\n");
|
||||
return 0;
|
||||
}
|
||||
if (strcmp(argv[1], "testdepends") == 0) {
|
||||
printf("ccan/array_size\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
34
ccan/ccan/ptrint/ptrint.h
Normal file
34
ccan/ccan/ptrint/ptrint.h
Normal file
|
@ -0,0 +1,34 @@
|
|||
/* CC0 (Public domain) - see LICENSE file for details */
|
||||
#ifndef CCAN_PTRINT_H
|
||||
#define CCAN_PTRINT_H
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <ccan/build_assert/build_assert.h>
|
||||
|
||||
/*
|
||||
* This is a deliberately incomplete type, because it should never be
|
||||
* dereferenced - instead it marks pointer values which are actually
|
||||
* encoding integers
|
||||
*/
|
||||
typedef struct ptrint ptrint_t;
|
||||
|
||||
static inline ptrdiff_t ptr2int(const ptrint_t *p)
|
||||
{
|
||||
/*
|
||||
* ptrdiff_t is the right size by definition, but to avoid
|
||||
* surprises we want a warning if the user can't fit at least
|
||||
* a regular int in there
|
||||
*/
|
||||
BUILD_ASSERT(sizeof(int) <= sizeof(ptrdiff_t));
|
||||
return (const char *)p - (const char *)NULL;
|
||||
}
|
||||
|
||||
static inline ptrint_t *int2ptr(ptrdiff_t i)
|
||||
{
|
||||
return (ptrint_t *)((char *)NULL + i);
|
||||
}
|
||||
|
||||
#endif /* CCAN_PTRINT_H */
|
29
ccan/ccan/ptrint/test/run.c
Normal file
29
ccan/ccan/ptrint/test/run.c
Normal file
|
@ -0,0 +1,29 @@
|
|||
#include <limits.h>
|
||||
|
||||
#include <ccan/array_size/array_size.h>
|
||||
|
||||
#include <ccan/ptrint/ptrint.h>
|
||||
#include <ccan/tap/tap.h>
|
||||
|
||||
static ptrdiff_t testvals[] = {
|
||||
-INT_MAX, -1, 0, 1, 2, 17, INT_MAX,
|
||||
};
|
||||
|
||||
int main(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* This is how many tests you plan to run */
|
||||
plan_tests(2 * ARRAY_SIZE(testvals));
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(testvals); i++) {
|
||||
ptrdiff_t val = testvals[i];
|
||||
void *ptr = int2ptr(val);
|
||||
|
||||
ok1(ptr2int(ptr) == val);
|
||||
ok1(!val == !ptr);
|
||||
}
|
||||
|
||||
/* This exits depending on whether all tests passed */
|
||||
return exit_status();
|
||||
}
|
26
ccan/ccan/tal/benchmark/Makefile
Normal file
26
ccan/ccan/tal/benchmark/Makefile
Normal file
|
@ -0,0 +1,26 @@
|
|||
CFLAGS=-O3 -Wall -flto -I../../..
|
||||
#CFLAGS=-O3 -Wall -I../../..
|
||||
#CFLAGS=-g -Wall -I../../..
|
||||
LDFLAGS=-O3 -flto
|
||||
LDLIBS=-lrt
|
||||
|
||||
all: speed samba-allocs
|
||||
|
||||
speed: speed.o tal.o talloc.o time.o list.o take.o str.o
|
||||
samba-allocs: samba-allocs.o tal.o talloc.o time.o list.o take.o
|
||||
|
||||
tal.o: ../tal.c
|
||||
$(CC) $(CFLAGS) -c -o $@ $<
|
||||
str.o: ../str/str.c
|
||||
$(CC) $(CFLAGS) -c -o $@ $<
|
||||
talloc.o: ../../talloc/talloc.c
|
||||
$(CC) $(CFLAGS) -c -o $@ $<
|
||||
time.o: ../../time/time.c
|
||||
$(CC) $(CFLAGS) -c -o $@ $<
|
||||
list.o: ../../list/list.c
|
||||
$(CC) $(CFLAGS) -c -o $@ $<
|
||||
take.o: ../../take/take.c
|
||||
$(CC) $(CFLAGS) -c -o $@ $<
|
||||
|
||||
clean:
|
||||
rm -f speed samba-allocs *.o
|
Loading…
Add table
Reference in a new issue