mirror of https://github.com/wolfSSL/wolfssl.git
RISC-V ChaCha20: assembly implementations
ChaCha20: scalar and vector implementations vector implementations doing 6, 4, 2, 1 block at a time. scalar implemetations using roriw and pack vector implementations using VROR_VI and roriw. RISC-V SHA-256: avoid using s0 if it can be helped.pull/7818/head
parent
1b8254d668
commit
ebb49b6e68
10
configure.ac
10
configure.ac
|
@ -3077,10 +3077,14 @@ do
|
|||
AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_RISCV_CARRYLESS"
|
||||
;;
|
||||
zkn|zkned)
|
||||
# AES encrypt/decrpyt
|
||||
# AES encrypt/decrpyt, SHA-2
|
||||
ENABLED_RISCV_ASM=yes
|
||||
AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_RISCV_SCALAR_CRYPTO_ASM"
|
||||
;;
|
||||
zv)
|
||||
ENABLED_RISCV_ASM=yes
|
||||
AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_RISCV_VECTOR"
|
||||
;;
|
||||
zvkg)
|
||||
# VGMUL, VHHSH
|
||||
ENABLED_RISCV_ASM=yes
|
||||
|
@ -3097,12 +3101,12 @@ do
|
|||
AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_RISCV_VECTOR_BASE_BIT_MANIPULATION"
|
||||
;;
|
||||
zvkned)
|
||||
# Vector AES
|
||||
# Vector AES, SHA-2
|
||||
ENABLED_RISCV_ASM=yes
|
||||
AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_RISCV_VECTOR_CRYPTO_ASM"
|
||||
;;
|
||||
*)
|
||||
AC_MSG_ERROR([Invalid RISC-V option [yes,zbkb,zbb,zbc,zbkc,zkn,zkned,zvkg,zvbc,zvbb,zvkb,zvkned]: $ENABLED_RISCV_ASM.])
|
||||
AC_MSG_ERROR([Invalid RISC-V option [yes,zbkb,zbb,zbc,zbkc,zkn,zkned,zv,zvkg,zvbc,zvbb,zvkb,zvkned]: $ENABLED_RISCV_ASM.])
|
||||
break
|
||||
;;
|
||||
esac
|
||||
|
|
|
@ -971,17 +971,21 @@ if BUILD_CHACHA
|
|||
if BUILD_ARMASM_NEON
|
||||
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-chacha.c
|
||||
else
|
||||
if BUILD_RISCV_ASM
|
||||
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/riscv/riscv-64-chacha.c
|
||||
else
|
||||
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/chacha.c
|
||||
endif !BUILD_RISCV_ASM
|
||||
if !BUILD_X86_ASM
|
||||
if BUILD_INTELASM
|
||||
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/chacha_asm.S
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif BUILD_INTELASM
|
||||
endif !BUILD_X86_ASM
|
||||
endif !BUILD_ARMASM_NEON
|
||||
if BUILD_POLY1305
|
||||
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/chacha20_poly1305.c
|
||||
endif
|
||||
endif
|
||||
endif BUILD_POLY1305
|
||||
endif BUILD_CHACHA
|
||||
|
||||
if !BUILD_INLINE
|
||||
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/misc.c
|
||||
|
|
|
@ -38,6 +38,9 @@ Public domain.
|
|||
#if defined(WOLFSSL_ARMASM) && !defined(WOLFSSL_ARMASM_NO_NEON)
|
||||
/* implementation is located in wolfcrypt/src/port/arm/armv8-chacha.c */
|
||||
|
||||
#elif defined(WOLFSSL_RISCV_ASM)
|
||||
/* implementation located in wolfcrypt/src/port/rsicv/riscv-64-chacha.c */
|
||||
|
||||
#else
|
||||
#if defined(HAVE_CHACHA)
|
||||
|
||||
|
|
|
@ -75,18 +75,6 @@ static WC_INLINE void memcpy16(byte* out, const byte* in)
|
|||
#endif
|
||||
|
||||
|
||||
/* vd = vs2 << uimm */
|
||||
#define VSLL_VI(vd, vs2, uimm) \
|
||||
ASM_WORD((0b100101 << 26) | (0b1 << 25) | \
|
||||
(0b011 << 12) | (0b1010111 << 0) | \
|
||||
(vd << 7) | (uimm << 15) | (vs2 << 20))
|
||||
/* vd = vs2 >> uimm */
|
||||
#define VSRL_VI(vd, vs2, uimm) \
|
||||
ASM_WORD((0b101000 << 26) | (0b1 << 25) | \
|
||||
(0b011 << 12) | (0b1010111 << 0) | \
|
||||
(vd << 7) | (uimm << 15) | (vs2 << 20))
|
||||
|
||||
|
||||
/* Vector register set if equal: vd[i] = vs1[i] == vs2[i] ? 1 : 0 */
|
||||
#define VMSEQ_VV(vd, vs1, vs2) \
|
||||
ASM_WORD((0b011000 << 26) | (0b1 << 25) | \
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -846,41 +846,41 @@ static WC_INLINE void Sha256Final(wc_Sha256* sha256, byte* hash)
|
|||
#elif defined(WOLFSSL_RISCV_BASE_BIT_MANIPULATION)
|
||||
"ld t1, 0(%[digest])\n\t"
|
||||
"ld t3, 8(%[digest])\n\t"
|
||||
"ld s1, 16(%[digest])\n\t"
|
||||
"ld s3, 24(%[digest])\n\t"
|
||||
"ld a5, 16(%[digest])\n\t"
|
||||
"ld a7, 24(%[digest])\n\t"
|
||||
REV8(REG_T1, REG_T1)
|
||||
REV8(REG_T3, REG_T3)
|
||||
REV8(REG_S1, REG_S1)
|
||||
REV8(REG_S3, REG_S3)
|
||||
REV8(REG_A5, REG_A5)
|
||||
REV8(REG_A7, REG_A7)
|
||||
"srli t0, t1, 32\n\t"
|
||||
"srli t2, t3, 32\n\t"
|
||||
"srli s0, s1, 32\n\t"
|
||||
"srli s2, s3, 32\n\t"
|
||||
"srli a4, a5, 32\n\t"
|
||||
"srli a6, a7, 32\n\t"
|
||||
"sw t0, 0(%[hash])\n\t"
|
||||
"sw t1, 4(%[hash])\n\t"
|
||||
"sw t2, 8(%[hash])\n\t"
|
||||
"sw t3, 12(%[hash])\n\t"
|
||||
"sw s0, 16(%[hash])\n\t"
|
||||
"sw s1, 20(%[hash])\n\t"
|
||||
"sw s2, 24(%[hash])\n\t"
|
||||
"sw s3, 28(%[hash])\n\t"
|
||||
"sw a4, 16(%[hash])\n\t"
|
||||
"sw a5, 20(%[hash])\n\t"
|
||||
"sw a6, 24(%[hash])\n\t"
|
||||
"sw a7, 28(%[hash])\n\t"
|
||||
#else
|
||||
LOAD_WORD_REV(t0, 0, %[digest], t2, t3, t4)
|
||||
LOAD_WORD_REV(t1, 4, %[digest], t2, t3, t4)
|
||||
LOAD_WORD_REV(s0, 8, %[digest], t2, t3, t4)
|
||||
LOAD_WORD_REV(s1, 12, %[digest], t2, t3, t4)
|
||||
LOAD_WORD_REV(a4, 8, %[digest], t2, t3, t4)
|
||||
LOAD_WORD_REV(a5, 12, %[digest], t2, t3, t4)
|
||||
"sw t0, 0(%[hash])\n\t"
|
||||
"sw t1, 4(%[hash])\n\t"
|
||||
"sw s0, 8(%[hash])\n\t"
|
||||
"sw s1, 12(%[hash])\n\t"
|
||||
"sw a4, 8(%[hash])\n\t"
|
||||
"sw a5, 12(%[hash])\n\t"
|
||||
LOAD_WORD_REV(t0, 16, %[digest], t2, t3, t4)
|
||||
LOAD_WORD_REV(t1, 20, %[digest], t2, t3, t4)
|
||||
LOAD_WORD_REV(s0, 24, %[digest], t2, t3, t4)
|
||||
LOAD_WORD_REV(s1, 28, %[digest], t2, t3, t4)
|
||||
LOAD_WORD_REV(a4, 24, %[digest], t2, t3, t4)
|
||||
LOAD_WORD_REV(a5, 28, %[digest], t2, t3, t4)
|
||||
"sw t0, 16(%[hash])\n\t"
|
||||
"sw t1, 20(%[hash])\n\t"
|
||||
"sw s0, 24(%[hash])\n\t"
|
||||
"sw s1, 28(%[hash])\n\t"
|
||||
"sw a4, 24(%[hash])\n\t"
|
||||
"sw a5, 28(%[hash])\n\t"
|
||||
#endif
|
||||
:
|
||||
: [digest] "r" (sha256->digest), [hash] "r" (hash)
|
||||
|
@ -889,7 +889,7 @@ static WC_INLINE void Sha256Final(wc_Sha256* sha256, byte* hash)
|
|||
, [rev_idx] "r" (rev_idx)
|
||||
#endif
|
||||
: "cc", "memory", "t0", "t1", "t2", "t3", "t4", "t5", "t6",
|
||||
"s0", "s1", "s2", "s3"
|
||||
"a4", "a5", "a6", "a7"
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
@ -7789,10 +7789,10 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t chacha_test(void)
|
|||
return WC_TEST_RET_ENC_EC(ret);
|
||||
|
||||
if (XMEMCMP(plain_big, input_big, CHACHA_BIG_TEST_SIZE))
|
||||
return WC_TEST_RET_ENC_NC;
|
||||
return WC_TEST_RET_ENC_I(i);
|
||||
|
||||
if (XMEMCMP(cipher_big, cipher_big_result, CHACHA_BIG_TEST_SIZE))
|
||||
return WC_TEST_RET_ENC_NC;
|
||||
return WC_TEST_RET_ENC_I(i);
|
||||
}
|
||||
|
||||
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC)
|
||||
|
|
|
@ -82,7 +82,8 @@ typedef struct ChaCha {
|
|||
byte extra[12];
|
||||
#endif
|
||||
word32 left; /* number of bytes leftover */
|
||||
#if defined(USE_INTEL_CHACHA_SPEEDUP) || defined(WOLFSSL_ARMASM)
|
||||
#if defined(USE_INTEL_CHACHA_SPEEDUP) || defined(WOLFSSL_ARMASM) || \
|
||||
defined(WOLFSSL_RISCV_ASM)
|
||||
word32 over[CHACHA_CHUNK_WORDS];
|
||||
#endif
|
||||
} ChaCha;
|
||||
|
|
|
@ -137,6 +137,12 @@
|
|||
(0b0010011 << 0) | \
|
||||
(rs << 15) | (rd << 7))
|
||||
|
||||
#define RORIW(rd, rs, imm) \
|
||||
ASM_WORD((0b0110000 << 25) | (0b101 << 12) | \
|
||||
(0b0011011 << 0) | \
|
||||
(imm << 20) | (rs << 15) | (rd << 7))
|
||||
|
||||
|
||||
/* rd = rs1[0..31] | rs2[0..31]. */
|
||||
#define PACK(rd, rs1, rs2) \
|
||||
ASM_WORD((0b0000100 << 25) | (0b100 << 12) | 0b0110011 | \
|
||||
|
@ -184,16 +190,36 @@
|
|||
/* Move from vector register to vector registor. */
|
||||
#define VMV_V_V(vd, vs1) \
|
||||
ASM_WORD((0b1010111 << 0) | (0b000 << 12) | (0b1 << 25) | \
|
||||
(0b010111 << 26) | (vd << 7) | (vs1 << 15))
|
||||
(0b010111 << 26) | ((vd) << 7) | ((vs1) << 15))
|
||||
/* Splat register to each component of the vector registor. */
|
||||
#define VMV_V_X(vd, rs1) \
|
||||
ASM_WORD((0b1010111 << 0) | (0b100 << 12) | (0b1 << 25) | \
|
||||
(0b010111 << 26) | (vd << 7) | (rs1 << 15))
|
||||
(0b010111 << 26) | ((vd) << 7) | ((rs1) << 15))
|
||||
/* Splat immediate to each component of the vector registor. */
|
||||
#define VMV_V_I(vd, imm) \
|
||||
ASM_WORD((0b1010111 << 0) | (0b011 << 12) | (0b1 << 25) | \
|
||||
(0b010111 << 26) | ((vd) << 7) | ((imm) << 15))
|
||||
/* Move n vector registers to vector registers. */
|
||||
#define VMVR_V(vd, vs2, n) \
|
||||
ASM_WORD((0b1010111 << 0) | (0b011 << 12) | (0b1 << 25) | \
|
||||
(0b100111 << 26) | (vd << 7) | ((n-1) << 15) | \
|
||||
(vs2 << 20))
|
||||
(0b100111 << 26) | ((vd) << 7) | ((n-1) << 15) | \
|
||||
((vs2) << 20))
|
||||
|
||||
|
||||
/*
|
||||
* Logic
|
||||
*/
|
||||
|
||||
/* vd = vs2 << uimm */
|
||||
#define VSLL_VI(vd, vs2, uimm) \
|
||||
ASM_WORD((0b100101 << 26) | (0b1 << 25) | \
|
||||
(0b011 << 12) | (0b1010111 << 0) | \
|
||||
(vd << 7) | (uimm << 15) | (vs2 << 20))
|
||||
/* vd = vs2 >> uimm */
|
||||
#define VSRL_VI(vd, vs2, uimm) \
|
||||
ASM_WORD((0b101000 << 26) | (0b1 << 25) | \
|
||||
(0b011 << 12) | (0b1010111 << 0) | \
|
||||
(vd << 7) | (uimm << 15) | (vs2 << 20))
|
||||
|
||||
|
||||
/*
|
||||
|
@ -235,13 +261,13 @@
|
|||
#define VMV_X_S(rd, vs2) \
|
||||
ASM_WORD((0b010000 << 26) | (0b1 << 25) | \
|
||||
(0b010 << 12) | (0b1010111 << 0) | \
|
||||
(rd << 7) | (vs2 << 20))
|
||||
((rd) << 7) | ((vs2) << 20))
|
||||
|
||||
/* vd[0] = x[rs1] */
|
||||
#define VMV_S_X(vd, rs1) \
|
||||
ASM_WORD((0b010000 << 26) | (0b1 << 25) | \
|
||||
(0b110 << 12) | (0b1010111 << 0) | \
|
||||
(vd << 7) | (rs1 << 15))
|
||||
((vd) << 7) | ((rs1) << 15))
|
||||
|
||||
/* vd[shift..max] = vs2[0..max-shift]
|
||||
* Sliding up doesn't change bottom part of destination.
|
||||
|
@ -249,7 +275,7 @@
|
|||
#define VSLIDEUP_VI(vd, vs2, shift) \
|
||||
ASM_WORD((0b001110 << 26) | (0b1 << 25) | \
|
||||
(0b011 << 12) | (0b1010111 << 0) | \
|
||||
(vd << 7) | (shift << 15) | (vs2 << 20))
|
||||
((vd) << 7) | ((shift) << 15) | ((vs2) << 20))
|
||||
|
||||
/* vd[0..max-shift] = vs2[shift..max]
|
||||
* Sliding down change top part of destination.
|
||||
|
@ -257,13 +283,18 @@
|
|||
#define VSLIDEDOWN_VI(vd, vs2, shift) \
|
||||
ASM_WORD((0b001111 << 26) | (0b1 << 25) | \
|
||||
(0b011 << 12) | (0b1010111 << 0) | \
|
||||
(vd << 7) | (shift << 15) | (vs2 << 20))
|
||||
((vd) << 7) | ((shift) << 15) | ((vs2) << 20))
|
||||
|
||||
/* vd[i] = vs1[vs2[i]] */
|
||||
#define VRGATHER_VV(vd, vs1, vs2) \
|
||||
ASM_WORD((0b001100 << 26) | (0b1 << 25) | \
|
||||
(0b000 << 12) | (0b1010111 << 0) | \
|
||||
(vd << 7) | (vs1 << 15) | (vs2 << 20))
|
||||
((vd) << 7) | ((vs1) << 15) | ((vs2) << 20))
|
||||
|
||||
#define VID_V(vd) \
|
||||
ASM_WORD((0b010100 << 26) | (0b1 << 25) | (0b00000 << 20) | \
|
||||
(0b10001 << 15) | (0b010 << 12) | \
|
||||
(0b1010111 << 0) | ((vd) << 7))
|
||||
|
||||
|
||||
/*
|
||||
|
@ -281,15 +312,22 @@
|
|||
defined(WOLFSSL_RISCV_VECTOR_CRYPTO_ASM)
|
||||
|
||||
/*
|
||||
* Bit Manipulation
|
||||
* Vector Bit Manipulation
|
||||
*/
|
||||
|
||||
/* Reverse order of bytes in words of vector regsiter. */
|
||||
#define VREV8(vd, vs2) \
|
||||
ASM_WORD((0b010010 << 26) | (0b1 << 25) | (0b01001<< 15) | \
|
||||
(0b010 << 12) | (0b1010111 << 0) | \
|
||||
(0b010 << 12) | (0b1010111 << 0) | \
|
||||
(vs2 << 20) | (vd << 7))
|
||||
|
||||
/* Reverse order of bytes in words of vector regsiter. */
|
||||
#define VROR_VI(vd, imm, vs2) \
|
||||
ASM_WORD((0b01010 << 27) | (0b1 << 25) | (0b011 << 12) | \
|
||||
(0b1010111 << 0) | ((imm >> 5) << 26) | \
|
||||
(vs2 << 20) | ((imm & 0x1f) << 15) | (vd << 7))
|
||||
|
||||
|
||||
#endif /* WOLFSSL_RISCV_VECTOR_BASE_BIT_MANIPULATION ||
|
||||
* WOLFSSL_RISCV_VECTOR_CRYPTO_ASM */
|
||||
|
||||
|
|
Loading…
Reference in New Issue