RISC-V ChaCha20: assembly implementations

ChaCha20:
  scalar and vector implementations
  vector implementations doing 6, 4, 2, 1 block at a time.
  scalar implemetations using roriw and pack
  vector implementations using VROR_VI and roriw.

RISC-V SHA-256: avoid using s0 if it can be helped.
pull/7818/head
Sean Parkinson 2024-08-01 16:57:42 +10:00
parent 1b8254d668
commit ebb49b6e68
9 changed files with 2470 additions and 53 deletions

View File

@ -3077,10 +3077,14 @@ do
AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_RISCV_CARRYLESS"
;;
zkn|zkned)
# AES encrypt/decrpyt
# AES encrypt/decrpyt, SHA-2
ENABLED_RISCV_ASM=yes
AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_RISCV_SCALAR_CRYPTO_ASM"
;;
zv)
ENABLED_RISCV_ASM=yes
AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_RISCV_VECTOR"
;;
zvkg)
# VGMUL, VHHSH
ENABLED_RISCV_ASM=yes
@ -3097,12 +3101,12 @@ do
AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_RISCV_VECTOR_BASE_BIT_MANIPULATION"
;;
zvkned)
# Vector AES
# Vector AES, SHA-2
ENABLED_RISCV_ASM=yes
AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_RISCV_VECTOR_CRYPTO_ASM"
;;
*)
AC_MSG_ERROR([Invalid RISC-V option [yes,zbkb,zbb,zbc,zbkc,zkn,zkned,zvkg,zvbc,zvbb,zvkb,zvkned]: $ENABLED_RISCV_ASM.])
AC_MSG_ERROR([Invalid RISC-V option [yes,zbkb,zbb,zbc,zbkc,zkn,zkned,zv,zvkg,zvbc,zvbb,zvkb,zvkned]: $ENABLED_RISCV_ASM.])
break
;;
esac

View File

@ -971,17 +971,21 @@ if BUILD_CHACHA
if BUILD_ARMASM_NEON
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-chacha.c
else
if BUILD_RISCV_ASM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/riscv/riscv-64-chacha.c
else
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/chacha.c
endif !BUILD_RISCV_ASM
if !BUILD_X86_ASM
if BUILD_INTELASM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/chacha_asm.S
endif
endif
endif
endif BUILD_INTELASM
endif !BUILD_X86_ASM
endif !BUILD_ARMASM_NEON
if BUILD_POLY1305
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/chacha20_poly1305.c
endif
endif
endif BUILD_POLY1305
endif BUILD_CHACHA
if !BUILD_INLINE
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/misc.c

View File

@ -38,6 +38,9 @@ Public domain.
#if defined(WOLFSSL_ARMASM) && !defined(WOLFSSL_ARMASM_NO_NEON)
/* implementation is located in wolfcrypt/src/port/arm/armv8-chacha.c */
#elif defined(WOLFSSL_RISCV_ASM)
/* implementation located in wolfcrypt/src/port/rsicv/riscv-64-chacha.c */
#else
#if defined(HAVE_CHACHA)

View File

@ -75,18 +75,6 @@ static WC_INLINE void memcpy16(byte* out, const byte* in)
#endif
/* vd = vs2 << uimm */
#define VSLL_VI(vd, vs2, uimm) \
ASM_WORD((0b100101 << 26) | (0b1 << 25) | \
(0b011 << 12) | (0b1010111 << 0) | \
(vd << 7) | (uimm << 15) | (vs2 << 20))
/* vd = vs2 >> uimm */
#define VSRL_VI(vd, vs2, uimm) \
ASM_WORD((0b101000 << 26) | (0b1 << 25) | \
(0b011 << 12) | (0b1010111 << 0) | \
(vd << 7) | (uimm << 15) | (vs2 << 20))
/* Vector register set if equal: vd[i] = vs1[i] == vs2[i] ? 1 : 0 */
#define VMSEQ_VV(vd, vs1, vs2) \
ASM_WORD((0b011000 << 26) | (0b1 << 25) | \

File diff suppressed because it is too large Load Diff

View File

@ -846,41 +846,41 @@ static WC_INLINE void Sha256Final(wc_Sha256* sha256, byte* hash)
#elif defined(WOLFSSL_RISCV_BASE_BIT_MANIPULATION)
"ld t1, 0(%[digest])\n\t"
"ld t3, 8(%[digest])\n\t"
"ld s1, 16(%[digest])\n\t"
"ld s3, 24(%[digest])\n\t"
"ld a5, 16(%[digest])\n\t"
"ld a7, 24(%[digest])\n\t"
REV8(REG_T1, REG_T1)
REV8(REG_T3, REG_T3)
REV8(REG_S1, REG_S1)
REV8(REG_S3, REG_S3)
REV8(REG_A5, REG_A5)
REV8(REG_A7, REG_A7)
"srli t0, t1, 32\n\t"
"srli t2, t3, 32\n\t"
"srli s0, s1, 32\n\t"
"srli s2, s3, 32\n\t"
"srli a4, a5, 32\n\t"
"srli a6, a7, 32\n\t"
"sw t0, 0(%[hash])\n\t"
"sw t1, 4(%[hash])\n\t"
"sw t2, 8(%[hash])\n\t"
"sw t3, 12(%[hash])\n\t"
"sw s0, 16(%[hash])\n\t"
"sw s1, 20(%[hash])\n\t"
"sw s2, 24(%[hash])\n\t"
"sw s3, 28(%[hash])\n\t"
"sw a4, 16(%[hash])\n\t"
"sw a5, 20(%[hash])\n\t"
"sw a6, 24(%[hash])\n\t"
"sw a7, 28(%[hash])\n\t"
#else
LOAD_WORD_REV(t0, 0, %[digest], t2, t3, t4)
LOAD_WORD_REV(t1, 4, %[digest], t2, t3, t4)
LOAD_WORD_REV(s0, 8, %[digest], t2, t3, t4)
LOAD_WORD_REV(s1, 12, %[digest], t2, t3, t4)
LOAD_WORD_REV(a4, 8, %[digest], t2, t3, t4)
LOAD_WORD_REV(a5, 12, %[digest], t2, t3, t4)
"sw t0, 0(%[hash])\n\t"
"sw t1, 4(%[hash])\n\t"
"sw s0, 8(%[hash])\n\t"
"sw s1, 12(%[hash])\n\t"
"sw a4, 8(%[hash])\n\t"
"sw a5, 12(%[hash])\n\t"
LOAD_WORD_REV(t0, 16, %[digest], t2, t3, t4)
LOAD_WORD_REV(t1, 20, %[digest], t2, t3, t4)
LOAD_WORD_REV(s0, 24, %[digest], t2, t3, t4)
LOAD_WORD_REV(s1, 28, %[digest], t2, t3, t4)
LOAD_WORD_REV(a4, 24, %[digest], t2, t3, t4)
LOAD_WORD_REV(a5, 28, %[digest], t2, t3, t4)
"sw t0, 16(%[hash])\n\t"
"sw t1, 20(%[hash])\n\t"
"sw s0, 24(%[hash])\n\t"
"sw s1, 28(%[hash])\n\t"
"sw a4, 24(%[hash])\n\t"
"sw a5, 28(%[hash])\n\t"
#endif
:
: [digest] "r" (sha256->digest), [hash] "r" (hash)
@ -889,7 +889,7 @@ static WC_INLINE void Sha256Final(wc_Sha256* sha256, byte* hash)
, [rev_idx] "r" (rev_idx)
#endif
: "cc", "memory", "t0", "t1", "t2", "t3", "t4", "t5", "t6",
"s0", "s1", "s2", "s3"
"a4", "a5", "a6", "a7"
);
}

View File

@ -7789,10 +7789,10 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t chacha_test(void)
return WC_TEST_RET_ENC_EC(ret);
if (XMEMCMP(plain_big, input_big, CHACHA_BIG_TEST_SIZE))
return WC_TEST_RET_ENC_NC;
return WC_TEST_RET_ENC_I(i);
if (XMEMCMP(cipher_big, cipher_big_result, CHACHA_BIG_TEST_SIZE))
return WC_TEST_RET_ENC_NC;
return WC_TEST_RET_ENC_I(i);
}
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC)

View File

@ -82,7 +82,8 @@ typedef struct ChaCha {
byte extra[12];
#endif
word32 left; /* number of bytes leftover */
#if defined(USE_INTEL_CHACHA_SPEEDUP) || defined(WOLFSSL_ARMASM)
#if defined(USE_INTEL_CHACHA_SPEEDUP) || defined(WOLFSSL_ARMASM) || \
defined(WOLFSSL_RISCV_ASM)
word32 over[CHACHA_CHUNK_WORDS];
#endif
} ChaCha;

View File

@ -137,6 +137,12 @@
(0b0010011 << 0) | \
(rs << 15) | (rd << 7))
#define RORIW(rd, rs, imm) \
ASM_WORD((0b0110000 << 25) | (0b101 << 12) | \
(0b0011011 << 0) | \
(imm << 20) | (rs << 15) | (rd << 7))
/* rd = rs1[0..31] | rs2[0..31]. */
#define PACK(rd, rs1, rs2) \
ASM_WORD((0b0000100 << 25) | (0b100 << 12) | 0b0110011 | \
@ -184,16 +190,36 @@
/* Move from vector register to vector registor. */
#define VMV_V_V(vd, vs1) \
ASM_WORD((0b1010111 << 0) | (0b000 << 12) | (0b1 << 25) | \
(0b010111 << 26) | (vd << 7) | (vs1 << 15))
(0b010111 << 26) | ((vd) << 7) | ((vs1) << 15))
/* Splat register to each component of the vector registor. */
#define VMV_V_X(vd, rs1) \
ASM_WORD((0b1010111 << 0) | (0b100 << 12) | (0b1 << 25) | \
(0b010111 << 26) | (vd << 7) | (rs1 << 15))
(0b010111 << 26) | ((vd) << 7) | ((rs1) << 15))
/* Splat immediate to each component of the vector registor. */
#define VMV_V_I(vd, imm) \
ASM_WORD((0b1010111 << 0) | (0b011 << 12) | (0b1 << 25) | \
(0b010111 << 26) | ((vd) << 7) | ((imm) << 15))
/* Move n vector registers to vector registers. */
#define VMVR_V(vd, vs2, n) \
ASM_WORD((0b1010111 << 0) | (0b011 << 12) | (0b1 << 25) | \
(0b100111 << 26) | (vd << 7) | ((n-1) << 15) | \
(vs2 << 20))
(0b100111 << 26) | ((vd) << 7) | ((n-1) << 15) | \
((vs2) << 20))
/*
* Logic
*/
/* vd = vs2 << uimm */
#define VSLL_VI(vd, vs2, uimm) \
ASM_WORD((0b100101 << 26) | (0b1 << 25) | \
(0b011 << 12) | (0b1010111 << 0) | \
(vd << 7) | (uimm << 15) | (vs2 << 20))
/* vd = vs2 >> uimm */
#define VSRL_VI(vd, vs2, uimm) \
ASM_WORD((0b101000 << 26) | (0b1 << 25) | \
(0b011 << 12) | (0b1010111 << 0) | \
(vd << 7) | (uimm << 15) | (vs2 << 20))
/*
@ -235,13 +261,13 @@
#define VMV_X_S(rd, vs2) \
ASM_WORD((0b010000 << 26) | (0b1 << 25) | \
(0b010 << 12) | (0b1010111 << 0) | \
(rd << 7) | (vs2 << 20))
((rd) << 7) | ((vs2) << 20))
/* vd[0] = x[rs1] */
#define VMV_S_X(vd, rs1) \
ASM_WORD((0b010000 << 26) | (0b1 << 25) | \
(0b110 << 12) | (0b1010111 << 0) | \
(vd << 7) | (rs1 << 15))
((vd) << 7) | ((rs1) << 15))
/* vd[shift..max] = vs2[0..max-shift]
* Sliding up doesn't change bottom part of destination.
@ -249,7 +275,7 @@
#define VSLIDEUP_VI(vd, vs2, shift) \
ASM_WORD((0b001110 << 26) | (0b1 << 25) | \
(0b011 << 12) | (0b1010111 << 0) | \
(vd << 7) | (shift << 15) | (vs2 << 20))
((vd) << 7) | ((shift) << 15) | ((vs2) << 20))
/* vd[0..max-shift] = vs2[shift..max]
* Sliding down change top part of destination.
@ -257,13 +283,18 @@
#define VSLIDEDOWN_VI(vd, vs2, shift) \
ASM_WORD((0b001111 << 26) | (0b1 << 25) | \
(0b011 << 12) | (0b1010111 << 0) | \
(vd << 7) | (shift << 15) | (vs2 << 20))
((vd) << 7) | ((shift) << 15) | ((vs2) << 20))
/* vd[i] = vs1[vs2[i]] */
#define VRGATHER_VV(vd, vs1, vs2) \
ASM_WORD((0b001100 << 26) | (0b1 << 25) | \
(0b000 << 12) | (0b1010111 << 0) | \
(vd << 7) | (vs1 << 15) | (vs2 << 20))
((vd) << 7) | ((vs1) << 15) | ((vs2) << 20))
#define VID_V(vd) \
ASM_WORD((0b010100 << 26) | (0b1 << 25) | (0b00000 << 20) | \
(0b10001 << 15) | (0b010 << 12) | \
(0b1010111 << 0) | ((vd) << 7))
/*
@ -281,15 +312,22 @@
defined(WOLFSSL_RISCV_VECTOR_CRYPTO_ASM)
/*
* Bit Manipulation
* Vector Bit Manipulation
*/
/* Reverse order of bytes in words of vector regsiter. */
#define VREV8(vd, vs2) \
ASM_WORD((0b010010 << 26) | (0b1 << 25) | (0b01001<< 15) | \
(0b010 << 12) | (0b1010111 << 0) | \
(0b010 << 12) | (0b1010111 << 0) | \
(vs2 << 20) | (vd << 7))
/* Reverse order of bytes in words of vector regsiter. */
#define VROR_VI(vd, imm, vs2) \
ASM_WORD((0b01010 << 27) | (0b1 << 25) | (0b011 << 12) | \
(0b1010111 << 0) | ((imm >> 5) << 26) | \
(vs2 << 20) | ((imm & 0x1f) << 15) | (vd << 7))
#endif /* WOLFSSL_RISCV_VECTOR_BASE_BIT_MANIPULATION ||
* WOLFSSL_RISCV_VECTOR_CRYPTO_ASM */