diff --git a/src/include.am b/src/include.am index ee33cd956..0900bbec3 100644 --- a/src/include.am +++ b/src/include.am @@ -1199,7 +1199,13 @@ src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-kyber-asm_ else src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-kyber-asm.S endif !BUILD_ARMASM_INLINE -endif BUILD_ARM_THUMB +else +if BUILD_ARMASM_INLINE +src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-kyber-asm_c.c +else +src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-kyber-asm.S +endif !BUILD_ARMASM_INLINE +endif !BUILD_ARM_THUMB endif BUILD_ARMASM if !BUILD_X86_ASM if BUILD_INTELASM diff --git a/wolfcrypt/src/port/arm/armv8-32-curve25519.S b/wolfcrypt/src/port/arm/armv8-32-curve25519.S index bf8daeec0..669c3c023 100644 --- a/wolfcrypt/src/port/arm/armv8-32-curve25519.S +++ b/wolfcrypt/src/port/arm/armv8-32-curve25519.S @@ -617,7 +617,7 @@ fe_cmov_table: #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) lsl r3, r2, #24 - asr r3, r2, #31 + asr r3, r3, #31 #else sbfx r3, r2, #7, #1 #endif @@ -1045,7 +1045,7 @@ fe_cmov_table: #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) lsl r3, r2, #24 - asr r3, r2, #31 + asr r3, r3, #31 #else sbfx r3, r2, #7, #1 #endif @@ -1474,7 +1474,7 @@ fe_cmov_table: #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) lsl r3, r2, #24 - asr r3, r2, #31 + asr r3, r3, #31 #else sbfx r3, r2, #7, #1 #endif @@ -1903,7 +1903,7 @@ fe_cmov_table: #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) lsl r3, r2, #24 - asr r3, r2, #31 + asr r3, r3, #31 #else sbfx r3, r2, #7, #1 #endif @@ -2346,7 +2346,7 @@ fe_cmov_table: #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) lsl r3, r2, #24 - asr r3, r2, #31 + asr r3, r3, #31 #else sbfx r3, r2, #7, #1 #endif @@ -3418,7 +3418,11 @@ fe_mul121666: #else mov r10, #0xdb42 #endif - movt r10, #1 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r10, r10, #0x10000 +#else + movt r10, #0x1 +#endif #endif umull r2, r12, r10, r2 umull r3, lr, r10, r3 @@ -3485,7 +3489,11 @@ fe_mul121666: #else mov lr, #0xdb42 #endif - movt lr, #1 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr lr, lr, #0x10000 +#else + movt lr, #0x1 +#endif #endif umull r2, r10, lr, r2 sub r12, lr, #1 @@ -5479,8 +5487,13 @@ sc_reduce: #else mov r10, #0x2c13 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r10, r10, #0xa3000000 + orr r10, r10, #0xa0000 +#else movt r10, #0xa30a #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) mov r11, #0xa7 lsl r11, r11, #8 @@ -5497,7 +5510,12 @@ sc_reduce: #else mov r11, #0x9ce5 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r11, r11, #0xa7000000 + orr r11, r11, #0xed0000 +#else movt r11, #0xa7ed +#endif #endif mov r1, #0 umlal r2, r1, r10, lr @@ -5521,8 +5539,13 @@ sc_reduce: #else mov r10, #0x6329 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r10, r10, #0x5d000000 + orr r10, r10, #0x80000 +#else movt r10, #0x5d08 #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) mov r11, #0xeb lsl r11, r11, #8 @@ -5539,7 +5562,12 @@ sc_reduce: #else mov r11, #0x621 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r11, r11, #0xeb000000 + orr r11, r11, #0x210000 +#else movt r11, #0xeb21 +#endif #endif adds r4, r4, r1 mov r1, #0 @@ -5575,7 +5603,12 @@ sc_reduce: #else mov r1, #0x2c13 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r1, r1, #0xa3000000 + orr r1, r1, #0xa0000 +#else movt r1, #0xa30a +#endif #endif mov lr, #0 ldm r0!, {r10, r11} @@ -5638,7 +5671,12 @@ sc_reduce: #else mov r1, #0x9ce5 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r1, r1, #0xa7000000 + orr r1, r1, #0xed0000 +#else movt r1, #0xa7ed +#endif #endif mov lr, #0 ldm r12, {r10, r11} @@ -5695,7 +5733,12 @@ sc_reduce: #else mov r1, #0x6329 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r1, r1, #0x5d000000 + orr r1, r1, #0x80000 +#else movt r1, #0x5d08 +#endif #endif mov lr, #0 ldm r12, {r10, r11} @@ -5752,7 +5795,12 @@ sc_reduce: #else mov r1, #0x621 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r1, r1, #0xeb000000 + orr r1, r1, #0x210000 +#else movt r1, #0xeb21 +#endif #endif mov lr, #0 ldm r12, {r10, r11} @@ -5836,8 +5884,13 @@ sc_reduce: #else mov r2, #0xba7d #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r2, r2, #0x4b000000 + orr r2, r2, #0x9e0000 +#else movt r2, #0x4b9e #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) mov r3, #0xcb lsl r3, r3, #8 @@ -5854,8 +5907,13 @@ sc_reduce: #else mov r3, #0x4c63 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r3, r3, #0xcb000000 + orr r3, r3, #0x20000 +#else movt r3, #0xcb02 #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) mov r4, #0xd4 lsl r4, r4, #8 @@ -5872,8 +5930,13 @@ sc_reduce: #else mov r4, #0xf39a #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r4, r4, #0xd4000000 + orr r4, r4, #0x5e0000 +#else movt r4, #0xd45e #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) mov r5, #2 lsl r5, r5, #8 @@ -5890,8 +5953,13 @@ sc_reduce: #else mov r5, #0xdf3b #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r5, r5, #0x2000000 + orr r5, r5, #0x9b0000 +#else movt r5, #0x29b #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) mov r9, #0x20000 lsl r9, r9, #8 @@ -5962,7 +6030,12 @@ sc_reduce: #else mov r1, #0x2c13 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r1, r1, #0xa3000000 + orr r1, r1, #0xa0000 +#else movt r1, #0xa30a +#endif #endif mov lr, #0 ldm r0, {r6, r7, r8, r9} @@ -5998,7 +6071,12 @@ sc_reduce: #else mov r1, #0x9ce5 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r1, r1, #0xa7000000 + orr r1, r1, #0xed0000 +#else movt r1, #0xa7ed +#endif #endif mov r10, #0 ldm r0, {r6, r7, r8, r9} @@ -6034,7 +6112,12 @@ sc_reduce: #else mov r1, #0x6329 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r1, r1, #0x5d000000 + orr r1, r1, #0x80000 +#else movt r1, #0x5d08 +#endif #endif mov r11, #0 ldm r0, {r6, r7, r8, r9} @@ -6070,7 +6153,12 @@ sc_reduce: #else mov r1, #0x621 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r1, r1, #0xeb000000 + orr r1, r1, #0x210000 +#else movt r1, #0xeb21 +#endif #endif mov r12, #0 ldm r0, {r6, r7, r8, r9} @@ -6124,8 +6212,13 @@ sc_reduce: #else mov r10, #0xd3ed #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r10, r10, #0x5c000000 + orr r10, r10, #0xf50000 +#else movt r10, #0x5cf5 #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) mov r11, #0x58 lsl r11, r11, #8 @@ -6142,8 +6235,13 @@ sc_reduce: #else mov r11, #0x631a #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r11, r11, #0x58000000 + orr r11, r11, #0x120000 +#else movt r11, #0x5812 #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) mov r12, #0xa2 lsl r12, r12, #8 @@ -6160,8 +6258,13 @@ sc_reduce: #else mov r12, #0x9cd6 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r12, r12, #0xa2000000 + orr r12, r12, #0xf70000 +#else movt r12, #0xa2f7 #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) mov lr, #20 lsl lr, lr, #8 @@ -6178,7 +6281,12 @@ sc_reduce: #else mov lr, #0xf9de #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr lr, lr, #0x14000000 + orr lr, lr, #0xde0000 +#else movt lr, #0x14de +#endif #endif and r10, r10, r1 and r11, r11, r1 @@ -6256,8 +6364,13 @@ sc_reduce: #else mov r10, #0x2c13 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r10, r10, #0xa3000000 + orr r10, r10, #0xa0000 +#else movt r10, #0xa30a #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) mov r11, #0xa7 lsl r11, r11, #8 @@ -6274,7 +6387,12 @@ sc_reduce: #else mov r11, #0x9ce5 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r11, r11, #0xa7000000 + orr r11, r11, #0xed0000 +#else movt r11, #0xa7ed +#endif #endif mov r1, #0 umlal r2, r1, r10, lr @@ -6295,8 +6413,13 @@ sc_reduce: #else mov r10, #0x6329 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r10, r10, #0x5d000000 + orr r10, r10, #0x80000 +#else movt r10, #0x5d08 #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) mov r11, #0xeb lsl r11, r11, #8 @@ -6313,7 +6436,12 @@ sc_reduce: #else mov r11, #0x621 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r11, r11, #0xeb000000 + orr r11, r11, #0x210000 +#else movt r11, #0xeb21 +#endif #endif umaal r4, r1, r10, lr umaal r5, r1, r11, lr @@ -6343,7 +6471,12 @@ sc_reduce: #else mov r1, #0x2c13 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r1, r1, #0xa3000000 + orr r1, r1, #0xa0000 +#else movt r1, #0xa30a +#endif #endif mov lr, #0 ldm r0!, {r10, r11} @@ -6385,7 +6518,12 @@ sc_reduce: #else mov r1, #0x9ce5 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r1, r1, #0xa7000000 + orr r1, r1, #0xed0000 +#else movt r1, #0xa7ed +#endif #endif mov lr, #0 ldm r12, {r10, r11} @@ -6421,7 +6559,12 @@ sc_reduce: #else mov r1, #0x6329 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r1, r1, #0x5d000000 + orr r1, r1, #0x80000 +#else movt r1, #0x5d08 +#endif #endif mov lr, #0 ldm r12, {r10, r11} @@ -6457,7 +6600,12 @@ sc_reduce: #else mov r1, #0x621 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r1, r1, #0xeb000000 + orr r1, r1, #0x210000 +#else movt r1, #0xeb21 +#endif #endif mov lr, #0 ldm r12, {r10, r11} @@ -6520,8 +6668,13 @@ sc_reduce: #else mov r2, #0xba7d #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r2, r2, #0x4b000000 + orr r2, r2, #0x9e0000 +#else movt r2, #0x4b9e #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) mov r3, #0xcb lsl r3, r3, #8 @@ -6538,8 +6691,13 @@ sc_reduce: #else mov r3, #0x4c63 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r3, r3, #0xcb000000 + orr r3, r3, #0x20000 +#else movt r3, #0xcb02 #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) mov r4, #0xd4 lsl r4, r4, #8 @@ -6556,8 +6714,13 @@ sc_reduce: #else mov r4, #0xf39a #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r4, r4, #0xd4000000 + orr r4, r4, #0x5e0000 +#else movt r4, #0xd45e #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) mov r5, #2 lsl r5, r5, #8 @@ -6574,8 +6737,13 @@ sc_reduce: #else mov r5, #0xdf3b #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r5, r5, #0x2000000 + orr r5, r5, #0x9b0000 +#else movt r5, #0x29b #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) mov r9, #0x20000 lsl r9, r9, #8 @@ -6646,7 +6814,12 @@ sc_reduce: #else mov r1, #0x2c13 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r1, r1, #0xa3000000 + orr r1, r1, #0xa0000 +#else movt r1, #0xa30a +#endif #endif mov lr, #0 ldm r0, {r6, r7, r8, r9} @@ -6673,7 +6846,12 @@ sc_reduce: #else mov r1, #0x9ce5 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r1, r1, #0xa7000000 + orr r1, r1, #0xed0000 +#else movt r1, #0xa7ed +#endif #endif mov r10, #0 ldm r0, {r6, r7, r8, r9} @@ -6700,7 +6878,12 @@ sc_reduce: #else mov r1, #0x6329 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r1, r1, #0x5d000000 + orr r1, r1, #0x80000 +#else movt r1, #0x5d08 +#endif #endif mov r11, #0 ldm r0, {r6, r7, r8, r9} @@ -6727,7 +6910,12 @@ sc_reduce: #else mov r1, #0x621 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r1, r1, #0xeb000000 + orr r1, r1, #0x210000 +#else movt r1, #0xeb21 +#endif #endif mov r12, #0 ldm r0, {r6, r7, r8, r9} @@ -6772,8 +6960,13 @@ sc_reduce: #else mov r10, #0xd3ed #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r10, r10, #0x5c000000 + orr r10, r10, #0xf50000 +#else movt r10, #0x5cf5 #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) mov r11, #0x58 lsl r11, r11, #8 @@ -6790,8 +6983,13 @@ sc_reduce: #else mov r11, #0x631a #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r11, r11, #0x58000000 + orr r11, r11, #0x120000 +#else movt r11, #0x5812 #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) mov r12, #0xa2 lsl r12, r12, #8 @@ -6808,8 +7006,13 @@ sc_reduce: #else mov r12, #0x9cd6 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r12, r12, #0xa2000000 + orr r12, r12, #0xf70000 +#else movt r12, #0xa2f7 #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) mov lr, #20 lsl lr, lr, #8 @@ -6826,7 +7029,12 @@ sc_reduce: #else mov lr, #0xf9de #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr lr, lr, #0x14000000 + orr lr, lr, #0xde0000 +#else movt lr, #0x14de +#endif #endif and r10, r10, r1 and r11, r11, r1 @@ -7256,8 +7464,13 @@ sc_muladd: #else mov r10, #0x2c13 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r10, r10, #0xa3000000 + orr r10, r10, #0xa0000 +#else movt r10, #0xa30a #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) mov r11, #0xa7 lsl r11, r11, #8 @@ -7274,7 +7487,12 @@ sc_muladd: #else mov r11, #0x9ce5 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r11, r11, #0xa7000000 + orr r11, r11, #0xed0000 +#else movt r11, #0xa7ed +#endif #endif mov r1, #0 umlal r2, r1, r10, lr @@ -7298,8 +7516,13 @@ sc_muladd: #else mov r10, #0x6329 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r10, r10, #0x5d000000 + orr r10, r10, #0x80000 +#else movt r10, #0x5d08 #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) mov r11, #0xeb lsl r11, r11, #8 @@ -7316,7 +7539,12 @@ sc_muladd: #else mov r11, #0x621 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r11, r11, #0xeb000000 + orr r11, r11, #0x210000 +#else movt r11, #0xeb21 +#endif #endif adds r4, r4, r1 mov r1, #0 @@ -7352,7 +7580,12 @@ sc_muladd: #else mov r1, #0x2c13 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r1, r1, #0xa3000000 + orr r1, r1, #0xa0000 +#else movt r1, #0xa30a +#endif #endif mov lr, #0 ldm r0!, {r10, r11} @@ -7415,7 +7648,12 @@ sc_muladd: #else mov r1, #0x9ce5 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r1, r1, #0xa7000000 + orr r1, r1, #0xed0000 +#else movt r1, #0xa7ed +#endif #endif mov lr, #0 ldm r12, {r10, r11} @@ -7472,7 +7710,12 @@ sc_muladd: #else mov r1, #0x6329 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r1, r1, #0x5d000000 + orr r1, r1, #0x80000 +#else movt r1, #0x5d08 +#endif #endif mov lr, #0 ldm r12, {r10, r11} @@ -7529,7 +7772,12 @@ sc_muladd: #else mov r1, #0x621 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r1, r1, #0xeb000000 + orr r1, r1, #0x210000 +#else movt r1, #0xeb21 +#endif #endif mov lr, #0 ldm r12, {r10, r11} @@ -7613,8 +7861,13 @@ sc_muladd: #else mov r2, #0xba7d #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r2, r2, #0x4b000000 + orr r2, r2, #0x9e0000 +#else movt r2, #0x4b9e #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) mov r3, #0xcb lsl r3, r3, #8 @@ -7631,8 +7884,13 @@ sc_muladd: #else mov r3, #0x4c63 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r3, r3, #0xcb000000 + orr r3, r3, #0x20000 +#else movt r3, #0xcb02 #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) mov r4, #0xd4 lsl r4, r4, #8 @@ -7649,8 +7907,13 @@ sc_muladd: #else mov r4, #0xf39a #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r4, r4, #0xd4000000 + orr r4, r4, #0x5e0000 +#else movt r4, #0xd45e #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) mov r5, #2 lsl r5, r5, #8 @@ -7667,8 +7930,13 @@ sc_muladd: #else mov r5, #0xdf3b #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r5, r5, #0x2000000 + orr r5, r5, #0x9b0000 +#else movt r5, #0x29b #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) mov r9, #0x20000 lsl r9, r9, #8 @@ -7739,7 +8007,12 @@ sc_muladd: #else mov r1, #0x2c13 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r1, r1, #0xa3000000 + orr r1, r1, #0xa0000 +#else movt r1, #0xa30a +#endif #endif mov lr, #0 ldm r0, {r6, r7, r8, r9} @@ -7775,7 +8048,12 @@ sc_muladd: #else mov r1, #0x9ce5 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r1, r1, #0xa7000000 + orr r1, r1, #0xed0000 +#else movt r1, #0xa7ed +#endif #endif mov r10, #0 ldm r0, {r6, r7, r8, r9} @@ -7811,7 +8089,12 @@ sc_muladd: #else mov r1, #0x6329 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r1, r1, #0x5d000000 + orr r1, r1, #0x80000 +#else movt r1, #0x5d08 +#endif #endif mov r11, #0 ldm r0, {r6, r7, r8, r9} @@ -7847,7 +8130,12 @@ sc_muladd: #else mov r1, #0x621 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r1, r1, #0xeb000000 + orr r1, r1, #0x210000 +#else movt r1, #0xeb21 +#endif #endif mov r12, #0 ldm r0, {r6, r7, r8, r9} @@ -7901,8 +8189,13 @@ sc_muladd: #else mov r10, #0xd3ed #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r10, r10, #0x5c000000 + orr r10, r10, #0xf50000 +#else movt r10, #0x5cf5 #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) mov r11, #0x58 lsl r11, r11, #8 @@ -7919,8 +8212,13 @@ sc_muladd: #else mov r11, #0x631a #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r11, r11, #0x58000000 + orr r11, r11, #0x120000 +#else movt r11, #0x5812 #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) mov r12, #0xa2 lsl r12, r12, #8 @@ -7937,8 +8235,13 @@ sc_muladd: #else mov r12, #0x9cd6 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r12, r12, #0xa2000000 + orr r12, r12, #0xf70000 +#else movt r12, #0xa2f7 #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) mov lr, #20 lsl lr, lr, #8 @@ -7955,7 +8258,12 @@ sc_muladd: #else mov lr, #0xf9de #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr lr, lr, #0x14000000 + orr lr, lr, #0xde0000 +#else movt lr, #0x14de +#endif #endif and r10, r10, r1 and r11, r11, r1 @@ -8163,8 +8471,13 @@ sc_muladd: #else mov r10, #0x2c13 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r10, r10, #0xa3000000 + orr r10, r10, #0xa0000 +#else movt r10, #0xa30a #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) mov r11, #0xa7 lsl r11, r11, #8 @@ -8181,7 +8494,12 @@ sc_muladd: #else mov r11, #0x9ce5 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r11, r11, #0xa7000000 + orr r11, r11, #0xed0000 +#else movt r11, #0xa7ed +#endif #endif mov r1, #0 umlal r2, r1, r10, lr @@ -8202,8 +8520,13 @@ sc_muladd: #else mov r10, #0x6329 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r10, r10, #0x5d000000 + orr r10, r10, #0x80000 +#else movt r10, #0x5d08 #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) mov r11, #0xeb lsl r11, r11, #8 @@ -8220,7 +8543,12 @@ sc_muladd: #else mov r11, #0x621 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r11, r11, #0xeb000000 + orr r11, r11, #0x210000 +#else movt r11, #0xeb21 +#endif #endif umaal r4, r1, r10, lr umaal r5, r1, r11, lr @@ -8250,7 +8578,12 @@ sc_muladd: #else mov r1, #0x2c13 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r1, r1, #0xa3000000 + orr r1, r1, #0xa0000 +#else movt r1, #0xa30a +#endif #endif mov lr, #0 ldm r0!, {r10, r11} @@ -8292,7 +8625,12 @@ sc_muladd: #else mov r1, #0x9ce5 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r1, r1, #0xa7000000 + orr r1, r1, #0xed0000 +#else movt r1, #0xa7ed +#endif #endif mov lr, #0 ldm r12, {r10, r11} @@ -8328,7 +8666,12 @@ sc_muladd: #else mov r1, #0x6329 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r1, r1, #0x5d000000 + orr r1, r1, #0x80000 +#else movt r1, #0x5d08 +#endif #endif mov lr, #0 ldm r12, {r10, r11} @@ -8364,7 +8707,12 @@ sc_muladd: #else mov r1, #0x621 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r1, r1, #0xeb000000 + orr r1, r1, #0x210000 +#else movt r1, #0xeb21 +#endif #endif mov lr, #0 ldm r12, {r10, r11} @@ -8427,8 +8775,13 @@ sc_muladd: #else mov r2, #0xba7d #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r2, r2, #0x4b000000 + orr r2, r2, #0x9e0000 +#else movt r2, #0x4b9e #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) mov r3, #0xcb lsl r3, r3, #8 @@ -8445,8 +8798,13 @@ sc_muladd: #else mov r3, #0x4c63 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r3, r3, #0xcb000000 + orr r3, r3, #0x20000 +#else movt r3, #0xcb02 #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) mov r4, #0xd4 lsl r4, r4, #8 @@ -8463,8 +8821,13 @@ sc_muladd: #else mov r4, #0xf39a #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r4, r4, #0xd4000000 + orr r4, r4, #0x5e0000 +#else movt r4, #0xd45e #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) mov r5, #2 lsl r5, r5, #8 @@ -8481,8 +8844,13 @@ sc_muladd: #else mov r5, #0xdf3b #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r5, r5, #0x2000000 + orr r5, r5, #0x9b0000 +#else movt r5, #0x29b #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) mov r9, #0x20000 lsl r9, r9, #8 @@ -8553,7 +8921,12 @@ sc_muladd: #else mov r1, #0x2c13 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r1, r1, #0xa3000000 + orr r1, r1, #0xa0000 +#else movt r1, #0xa30a +#endif #endif mov lr, #0 ldm r0, {r6, r7, r8, r9} @@ -8580,7 +8953,12 @@ sc_muladd: #else mov r1, #0x9ce5 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r1, r1, #0xa7000000 + orr r1, r1, #0xed0000 +#else movt r1, #0xa7ed +#endif #endif mov r10, #0 ldm r0, {r6, r7, r8, r9} @@ -8607,7 +8985,12 @@ sc_muladd: #else mov r1, #0x6329 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r1, r1, #0x5d000000 + orr r1, r1, #0x80000 +#else movt r1, #0x5d08 +#endif #endif mov r11, #0 ldm r0, {r6, r7, r8, r9} @@ -8634,7 +9017,12 @@ sc_muladd: #else mov r1, #0x621 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r1, r1, #0xeb000000 + orr r1, r1, #0x210000 +#else movt r1, #0xeb21 +#endif #endif mov r12, #0 ldm r0, {r6, r7, r8, r9} @@ -8679,8 +9067,13 @@ sc_muladd: #else mov r10, #0xd3ed #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r10, r10, #0x5c000000 + orr r10, r10, #0xf50000 +#else movt r10, #0x5cf5 #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) mov r11, #0x58 lsl r11, r11, #8 @@ -8697,8 +9090,13 @@ sc_muladd: #else mov r11, #0x631a #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r11, r11, #0x58000000 + orr r11, r11, #0x120000 +#else movt r11, #0x5812 #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) mov r12, #0xa2 lsl r12, r12, #8 @@ -8715,8 +9113,13 @@ sc_muladd: #else mov r12, #0x9cd6 #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r12, r12, #0xa2000000 + orr r12, r12, #0xf70000 +#else movt r12, #0xa2f7 #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) mov lr, #20 lsl lr, lr, #8 @@ -8733,7 +9136,12 @@ sc_muladd: #else mov lr, #0xf9de #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr lr, lr, #0x14000000 + orr lr, lr, #0xde0000 +#else movt lr, #0x14de +#endif #endif and r10, r10, r1 and r11, r11, r1 diff --git a/wolfcrypt/src/port/arm/armv8-32-curve25519_c.c b/wolfcrypt/src/port/arm/armv8-32-curve25519_c.c index d00916ec6..cd778c1d5 100644 --- a/wolfcrypt/src/port/arm/armv8-32-curve25519_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-curve25519_c.c @@ -697,7 +697,7 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "lsl r3, %[b], #24\n\t" - "asr r3, %[b], #31\n\t" + "asr r3, r3, #31\n\t" #else "sbfx r3, %[b], #7, #1\n\t" #endif @@ -1125,7 +1125,7 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "lsl r3, %[b], #24\n\t" - "asr r3, %[b], #31\n\t" + "asr r3, r3, #31\n\t" #else "sbfx r3, %[b], #7, #1\n\t" #endif @@ -1554,7 +1554,7 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "lsl r3, %[b], #24\n\t" - "asr r3, %[b], #31\n\t" + "asr r3, r3, #31\n\t" #else "sbfx r3, %[b], #7, #1\n\t" #endif @@ -1983,7 +1983,7 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "lsl r3, %[b], #24\n\t" - "asr r3, %[b], #31\n\t" + "asr r3, r3, #31\n\t" #else "sbfx r3, %[b], #7, #1\n\t" #endif @@ -2432,7 +2432,7 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "lsl r3, %[b], #24\n\t" - "asr r3, %[b], #31\n\t" + "asr r3, r3, #31\n\t" #else "sbfx r3, %[b], #7, #1\n\t" #endif @@ -3528,7 +3528,11 @@ void fe_mul121666(fe r_p, fe a_p) #else "mov r10, #0xdb42\n\t" #endif - "movt r10, #1\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r10, r10, #0x10000\n\t" +#else + "movt r10, #0x1\n\t" +#endif #endif "umull r2, r12, r10, r2\n\t" "umull r3, lr, r10, r3\n\t" @@ -3600,7 +3604,11 @@ void fe_mul121666(fe r_p, fe a_p) #else "mov lr, #0xdb42\n\t" #endif - "movt lr, #1\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr lr, lr, #0x10000\n\t" +#else + "movt lr, #0x1\n\t" +#endif #endif "umull r2, r10, lr, r2\n\t" "sub r12, lr, #1\n\t" @@ -5704,8 +5712,13 @@ void sc_reduce(byte* s_p) #else "mov r10, #0x2c13\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r10, r10, #0xa3000000\n\t" + "orr r10, r10, #0xa0000\n\t" +#else "movt r10, #0xa30a\n\t" #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "mov r11, #0xa7\n\t" "lsl r11, r11, #8\n\t" @@ -5722,7 +5735,12 @@ void sc_reduce(byte* s_p) #else "mov r11, #0x9ce5\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r11, r11, #0xa7000000\n\t" + "orr r11, r11, #0xed0000\n\t" +#else "movt r11, #0xa7ed\n\t" +#endif #endif "mov r1, #0\n\t" "umlal r2, r1, r10, lr\n\t" @@ -5746,8 +5764,13 @@ void sc_reduce(byte* s_p) #else "mov r10, #0x6329\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r10, r10, #0x5d000000\n\t" + "orr r10, r10, #0x80000\n\t" +#else "movt r10, #0x5d08\n\t" #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "mov r11, #0xeb\n\t" "lsl r11, r11, #8\n\t" @@ -5764,7 +5787,12 @@ void sc_reduce(byte* s_p) #else "mov r11, #0x621\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r11, r11, #0xeb000000\n\t" + "orr r11, r11, #0x210000\n\t" +#else "movt r11, #0xeb21\n\t" +#endif #endif "adds r4, r4, r1\n\t" "mov r1, #0\n\t" @@ -5800,7 +5828,12 @@ void sc_reduce(byte* s_p) #else "mov r1, #0x2c13\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r1, r1, #0xa3000000\n\t" + "orr r1, r1, #0xa0000\n\t" +#else "movt r1, #0xa30a\n\t" +#endif #endif "mov lr, #0\n\t" "ldm %[s]!, {r10, r11}\n\t" @@ -5863,7 +5896,12 @@ void sc_reduce(byte* s_p) #else "mov r1, #0x9ce5\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r1, r1, #0xa7000000\n\t" + "orr r1, r1, #0xed0000\n\t" +#else "movt r1, #0xa7ed\n\t" +#endif #endif "mov lr, #0\n\t" "ldm r12, {r10, r11}\n\t" @@ -5920,7 +5958,12 @@ void sc_reduce(byte* s_p) #else "mov r1, #0x6329\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r1, r1, #0x5d000000\n\t" + "orr r1, r1, #0x80000\n\t" +#else "movt r1, #0x5d08\n\t" +#endif #endif "mov lr, #0\n\t" "ldm r12, {r10, r11}\n\t" @@ -5977,7 +6020,12 @@ void sc_reduce(byte* s_p) #else "mov r1, #0x621\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r1, r1, #0xeb000000\n\t" + "orr r1, r1, #0x210000\n\t" +#else "movt r1, #0xeb21\n\t" +#endif #endif "mov lr, #0\n\t" "ldm r12, {r10, r11}\n\t" @@ -6061,8 +6109,13 @@ void sc_reduce(byte* s_p) #else "mov r2, #0xba7d\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r2, r2, #0x4b000000\n\t" + "orr r2, r2, #0x9e0000\n\t" +#else "movt r2, #0x4b9e\n\t" #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "mov r3, #0xcb\n\t" "lsl r3, r3, #8\n\t" @@ -6079,8 +6132,13 @@ void sc_reduce(byte* s_p) #else "mov r3, #0x4c63\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r3, r3, #0xcb000000\n\t" + "orr r3, r3, #0x20000\n\t" +#else "movt r3, #0xcb02\n\t" #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "mov r4, #0xd4\n\t" "lsl r4, r4, #8\n\t" @@ -6097,8 +6155,13 @@ void sc_reduce(byte* s_p) #else "mov r4, #0xf39a\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r4, r4, #0xd4000000\n\t" + "orr r4, r4, #0x5e0000\n\t" +#else "movt r4, #0xd45e\n\t" #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "mov r5, #2\n\t" "lsl r5, r5, #8\n\t" @@ -6115,8 +6178,13 @@ void sc_reduce(byte* s_p) #else "mov r5, #0xdf3b\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r5, r5, #0x2000000\n\t" + "orr r5, r5, #0x9b0000\n\t" +#else "movt r5, #0x29b\n\t" #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "mov r9, #0x20000\n\t" "lsl r9, r9, #8\n\t" @@ -6187,7 +6255,12 @@ void sc_reduce(byte* s_p) #else "mov r1, #0x2c13\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r1, r1, #0xa3000000\n\t" + "orr r1, r1, #0xa0000\n\t" +#else "movt r1, #0xa30a\n\t" +#endif #endif "mov lr, #0\n\t" "ldm %[s], {r6, r7, r8, r9}\n\t" @@ -6223,7 +6296,12 @@ void sc_reduce(byte* s_p) #else "mov r1, #0x9ce5\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r1, r1, #0xa7000000\n\t" + "orr r1, r1, #0xed0000\n\t" +#else "movt r1, #0xa7ed\n\t" +#endif #endif "mov r10, #0\n\t" "ldm %[s], {r6, r7, r8, r9}\n\t" @@ -6259,7 +6337,12 @@ void sc_reduce(byte* s_p) #else "mov r1, #0x6329\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r1, r1, #0x5d000000\n\t" + "orr r1, r1, #0x80000\n\t" +#else "movt r1, #0x5d08\n\t" +#endif #endif "mov r11, #0\n\t" "ldm %[s], {r6, r7, r8, r9}\n\t" @@ -6295,7 +6378,12 @@ void sc_reduce(byte* s_p) #else "mov r1, #0x621\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r1, r1, #0xeb000000\n\t" + "orr r1, r1, #0x210000\n\t" +#else "movt r1, #0xeb21\n\t" +#endif #endif "mov r12, #0\n\t" "ldm %[s], {r6, r7, r8, r9}\n\t" @@ -6349,8 +6437,13 @@ void sc_reduce(byte* s_p) #else "mov r10, #0xd3ed\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r10, r10, #0x5c000000\n\t" + "orr r10, r10, #0xf50000\n\t" +#else "movt r10, #0x5cf5\n\t" #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "mov r11, #0x58\n\t" "lsl r11, r11, #8\n\t" @@ -6367,8 +6460,13 @@ void sc_reduce(byte* s_p) #else "mov r11, #0x631a\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r11, r11, #0x58000000\n\t" + "orr r11, r11, #0x120000\n\t" +#else "movt r11, #0x5812\n\t" #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "mov r12, #0xa2\n\t" "lsl r12, r12, #8\n\t" @@ -6385,8 +6483,13 @@ void sc_reduce(byte* s_p) #else "mov r12, #0x9cd6\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r12, r12, #0xa2000000\n\t" + "orr r12, r12, #0xf70000\n\t" +#else "movt r12, #0xa2f7\n\t" #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "mov lr, #20\n\t" "lsl lr, lr, #8\n\t" @@ -6403,7 +6506,12 @@ void sc_reduce(byte* s_p) #else "mov lr, #0xf9de\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr lr, lr, #0x14000000\n\t" + "orr lr, lr, #0xde0000\n\t" +#else "movt lr, #0x14de\n\t" +#endif #endif "and r10, r10, r1\n\t" "and r11, r11, r1\n\t" @@ -6485,8 +6593,13 @@ void sc_reduce(byte* s_p) #else "mov r10, #0x2c13\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r10, r10, #0xa3000000\n\t" + "orr r10, r10, #0xa0000\n\t" +#else "movt r10, #0xa30a\n\t" #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "mov r11, #0xa7\n\t" "lsl r11, r11, #8\n\t" @@ -6503,7 +6616,12 @@ void sc_reduce(byte* s_p) #else "mov r11, #0x9ce5\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r11, r11, #0xa7000000\n\t" + "orr r11, r11, #0xed0000\n\t" +#else "movt r11, #0xa7ed\n\t" +#endif #endif "mov r1, #0\n\t" "umlal r2, r1, r10, lr\n\t" @@ -6524,8 +6642,13 @@ void sc_reduce(byte* s_p) #else "mov r10, #0x6329\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r10, r10, #0x5d000000\n\t" + "orr r10, r10, #0x80000\n\t" +#else "movt r10, #0x5d08\n\t" #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "mov r11, #0xeb\n\t" "lsl r11, r11, #8\n\t" @@ -6542,7 +6665,12 @@ void sc_reduce(byte* s_p) #else "mov r11, #0x621\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r11, r11, #0xeb000000\n\t" + "orr r11, r11, #0x210000\n\t" +#else "movt r11, #0xeb21\n\t" +#endif #endif "umaal r4, r1, r10, lr\n\t" "umaal r5, r1, r11, lr\n\t" @@ -6572,7 +6700,12 @@ void sc_reduce(byte* s_p) #else "mov r1, #0x2c13\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r1, r1, #0xa3000000\n\t" + "orr r1, r1, #0xa0000\n\t" +#else "movt r1, #0xa30a\n\t" +#endif #endif "mov lr, #0\n\t" "ldm %[s]!, {r10, r11}\n\t" @@ -6614,7 +6747,12 @@ void sc_reduce(byte* s_p) #else "mov r1, #0x9ce5\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r1, r1, #0xa7000000\n\t" + "orr r1, r1, #0xed0000\n\t" +#else "movt r1, #0xa7ed\n\t" +#endif #endif "mov lr, #0\n\t" "ldm r12, {r10, r11}\n\t" @@ -6650,7 +6788,12 @@ void sc_reduce(byte* s_p) #else "mov r1, #0x6329\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r1, r1, #0x5d000000\n\t" + "orr r1, r1, #0x80000\n\t" +#else "movt r1, #0x5d08\n\t" +#endif #endif "mov lr, #0\n\t" "ldm r12, {r10, r11}\n\t" @@ -6686,7 +6829,12 @@ void sc_reduce(byte* s_p) #else "mov r1, #0x621\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r1, r1, #0xeb000000\n\t" + "orr r1, r1, #0x210000\n\t" +#else "movt r1, #0xeb21\n\t" +#endif #endif "mov lr, #0\n\t" "ldm r12, {r10, r11}\n\t" @@ -6749,8 +6897,13 @@ void sc_reduce(byte* s_p) #else "mov r2, #0xba7d\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r2, r2, #0x4b000000\n\t" + "orr r2, r2, #0x9e0000\n\t" +#else "movt r2, #0x4b9e\n\t" #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "mov r3, #0xcb\n\t" "lsl r3, r3, #8\n\t" @@ -6767,8 +6920,13 @@ void sc_reduce(byte* s_p) #else "mov r3, #0x4c63\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r3, r3, #0xcb000000\n\t" + "orr r3, r3, #0x20000\n\t" +#else "movt r3, #0xcb02\n\t" #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "mov r4, #0xd4\n\t" "lsl r4, r4, #8\n\t" @@ -6785,8 +6943,13 @@ void sc_reduce(byte* s_p) #else "mov r4, #0xf39a\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r4, r4, #0xd4000000\n\t" + "orr r4, r4, #0x5e0000\n\t" +#else "movt r4, #0xd45e\n\t" #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "mov r5, #2\n\t" "lsl r5, r5, #8\n\t" @@ -6803,8 +6966,13 @@ void sc_reduce(byte* s_p) #else "mov r5, #0xdf3b\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r5, r5, #0x2000000\n\t" + "orr r5, r5, #0x9b0000\n\t" +#else "movt r5, #0x29b\n\t" #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "mov r9, #0x20000\n\t" "lsl r9, r9, #8\n\t" @@ -6875,7 +7043,12 @@ void sc_reduce(byte* s_p) #else "mov r1, #0x2c13\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r1, r1, #0xa3000000\n\t" + "orr r1, r1, #0xa0000\n\t" +#else "movt r1, #0xa30a\n\t" +#endif #endif "mov lr, #0\n\t" "ldm %[s], {r6, r7, r8, r9}\n\t" @@ -6902,7 +7075,12 @@ void sc_reduce(byte* s_p) #else "mov r1, #0x9ce5\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r1, r1, #0xa7000000\n\t" + "orr r1, r1, #0xed0000\n\t" +#else "movt r1, #0xa7ed\n\t" +#endif #endif "mov r10, #0\n\t" "ldm %[s], {r6, r7, r8, r9}\n\t" @@ -6929,7 +7107,12 @@ void sc_reduce(byte* s_p) #else "mov r1, #0x6329\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r1, r1, #0x5d000000\n\t" + "orr r1, r1, #0x80000\n\t" +#else "movt r1, #0x5d08\n\t" +#endif #endif "mov r11, #0\n\t" "ldm %[s], {r6, r7, r8, r9}\n\t" @@ -6956,7 +7139,12 @@ void sc_reduce(byte* s_p) #else "mov r1, #0x621\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r1, r1, #0xeb000000\n\t" + "orr r1, r1, #0x210000\n\t" +#else "movt r1, #0xeb21\n\t" +#endif #endif "mov r12, #0\n\t" "ldm %[s], {r6, r7, r8, r9}\n\t" @@ -7001,8 +7189,13 @@ void sc_reduce(byte* s_p) #else "mov r10, #0xd3ed\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r10, r10, #0x5c000000\n\t" + "orr r10, r10, #0xf50000\n\t" +#else "movt r10, #0x5cf5\n\t" #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "mov r11, #0x58\n\t" "lsl r11, r11, #8\n\t" @@ -7019,8 +7212,13 @@ void sc_reduce(byte* s_p) #else "mov r11, #0x631a\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r11, r11, #0x58000000\n\t" + "orr r11, r11, #0x120000\n\t" +#else "movt r11, #0x5812\n\t" #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "mov r12, #0xa2\n\t" "lsl r12, r12, #8\n\t" @@ -7037,8 +7235,13 @@ void sc_reduce(byte* s_p) #else "mov r12, #0x9cd6\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r12, r12, #0xa2000000\n\t" + "orr r12, r12, #0xf70000\n\t" +#else "movt r12, #0xa2f7\n\t" #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "mov lr, #20\n\t" "lsl lr, lr, #8\n\t" @@ -7055,7 +7258,12 @@ void sc_reduce(byte* s_p) #else "mov lr, #0xf9de\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr lr, lr, #0x14000000\n\t" + "orr lr, lr, #0xde0000\n\t" +#else "movt lr, #0x14de\n\t" +#endif #endif "and r10, r10, r1\n\t" "and r11, r11, r1\n\t" @@ -7492,8 +7700,13 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #else "mov r10, #0x2c13\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r10, r10, #0xa3000000\n\t" + "orr r10, r10, #0xa0000\n\t" +#else "movt r10, #0xa30a\n\t" #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "mov r11, #0xa7\n\t" "lsl r11, r11, #8\n\t" @@ -7510,7 +7723,12 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #else "mov r11, #0x9ce5\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r11, r11, #0xa7000000\n\t" + "orr r11, r11, #0xed0000\n\t" +#else "movt r11, #0xa7ed\n\t" +#endif #endif "mov %[a], #0\n\t" "umlal %[b], %[a], r10, lr\n\t" @@ -7534,8 +7752,13 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #else "mov r10, #0x6329\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r10, r10, #0x5d000000\n\t" + "orr r10, r10, #0x80000\n\t" +#else "movt r10, #0x5d08\n\t" #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "mov r11, #0xeb\n\t" "lsl r11, r11, #8\n\t" @@ -7552,7 +7775,12 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #else "mov r11, #0x621\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r11, r11, #0xeb000000\n\t" + "orr r11, r11, #0x210000\n\t" +#else "movt r11, #0xeb21\n\t" +#endif #endif "adds r4, r4, %[a]\n\t" "mov %[a], #0\n\t" @@ -7588,7 +7816,12 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #else "mov %[a], #0x2c13\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr %[a], %[a], #0xa3000000\n\t" + "orr %[a], %[a], #0xa0000\n\t" +#else "movt %[a], #0xa30a\n\t" +#endif #endif "mov lr, #0\n\t" "ldm %[s]!, {r10, r11}\n\t" @@ -7651,7 +7884,12 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #else "mov %[a], #0x9ce5\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr %[a], %[a], #0xa7000000\n\t" + "orr %[a], %[a], #0xed0000\n\t" +#else "movt %[a], #0xa7ed\n\t" +#endif #endif "mov lr, #0\n\t" "ldm r12, {r10, r11}\n\t" @@ -7708,7 +7946,12 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #else "mov %[a], #0x6329\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr %[a], %[a], #0x5d000000\n\t" + "orr %[a], %[a], #0x80000\n\t" +#else "movt %[a], #0x5d08\n\t" +#endif #endif "mov lr, #0\n\t" "ldm r12, {r10, r11}\n\t" @@ -7765,7 +8008,12 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #else "mov %[a], #0x621\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr %[a], %[a], #0xeb000000\n\t" + "orr %[a], %[a], #0x210000\n\t" +#else "movt %[a], #0xeb21\n\t" +#endif #endif "mov lr, #0\n\t" "ldm r12, {r10, r11}\n\t" @@ -7849,8 +8097,13 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #else "mov %[b], #0xba7d\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr %[b], %[b], #0x4b000000\n\t" + "orr %[b], %[b], #0x9e0000\n\t" +#else "movt %[b], #0x4b9e\n\t" #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "mov %[c], #0xcb\n\t" "lsl %[c], %[c], #8\n\t" @@ -7867,8 +8120,13 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #else "mov %[c], #0x4c63\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr %[c], %[c], #0xcb000000\n\t" + "orr %[c], %[c], #0x20000\n\t" +#else "movt %[c], #0xcb02\n\t" #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "mov r4, #0xd4\n\t" "lsl r4, r4, #8\n\t" @@ -7885,8 +8143,13 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #else "mov r4, #0xf39a\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r4, r4, #0xd4000000\n\t" + "orr r4, r4, #0x5e0000\n\t" +#else "movt r4, #0xd45e\n\t" #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "mov r5, #2\n\t" "lsl r5, r5, #8\n\t" @@ -7903,8 +8166,13 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #else "mov r5, #0xdf3b\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r5, r5, #0x2000000\n\t" + "orr r5, r5, #0x9b0000\n\t" +#else "movt r5, #0x29b\n\t" #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "mov r9, #0x20000\n\t" "lsl r9, r9, #8\n\t" @@ -7975,7 +8243,12 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #else "mov %[a], #0x2c13\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr %[a], %[a], #0xa3000000\n\t" + "orr %[a], %[a], #0xa0000\n\t" +#else "movt %[a], #0xa30a\n\t" +#endif #endif "mov lr, #0\n\t" "ldm %[s], {r6, r7, r8, r9}\n\t" @@ -8011,7 +8284,12 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #else "mov %[a], #0x9ce5\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr %[a], %[a], #0xa7000000\n\t" + "orr %[a], %[a], #0xed0000\n\t" +#else "movt %[a], #0xa7ed\n\t" +#endif #endif "mov r10, #0\n\t" "ldm %[s], {r6, r7, r8, r9}\n\t" @@ -8047,7 +8325,12 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #else "mov %[a], #0x6329\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr %[a], %[a], #0x5d000000\n\t" + "orr %[a], %[a], #0x80000\n\t" +#else "movt %[a], #0x5d08\n\t" +#endif #endif "mov r11, #0\n\t" "ldm %[s], {r6, r7, r8, r9}\n\t" @@ -8083,7 +8366,12 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #else "mov %[a], #0x621\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr %[a], %[a], #0xeb000000\n\t" + "orr %[a], %[a], #0x210000\n\t" +#else "movt %[a], #0xeb21\n\t" +#endif #endif "mov r12, #0\n\t" "ldm %[s], {r6, r7, r8, r9}\n\t" @@ -8137,8 +8425,13 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #else "mov r10, #0xd3ed\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r10, r10, #0x5c000000\n\t" + "orr r10, r10, #0xf50000\n\t" +#else "movt r10, #0x5cf5\n\t" #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "mov r11, #0x58\n\t" "lsl r11, r11, #8\n\t" @@ -8155,8 +8448,13 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #else "mov r11, #0x631a\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r11, r11, #0x58000000\n\t" + "orr r11, r11, #0x120000\n\t" +#else "movt r11, #0x5812\n\t" #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "mov r12, #0xa2\n\t" "lsl r12, r12, #8\n\t" @@ -8173,8 +8471,13 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #else "mov r12, #0x9cd6\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r12, r12, #0xa2000000\n\t" + "orr r12, r12, #0xf70000\n\t" +#else "movt r12, #0xa2f7\n\t" #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "mov lr, #20\n\t" "lsl lr, lr, #8\n\t" @@ -8191,7 +8494,12 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #else "mov lr, #0xf9de\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr lr, lr, #0x14000000\n\t" + "orr lr, lr, #0xde0000\n\t" +#else "movt lr, #0x14de\n\t" +#endif #endif "and r10, r10, %[a]\n\t" "and r11, r11, %[a]\n\t" @@ -8406,8 +8714,13 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #else "mov r10, #0x2c13\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r10, r10, #0xa3000000\n\t" + "orr r10, r10, #0xa0000\n\t" +#else "movt r10, #0xa30a\n\t" #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "mov r11, #0xa7\n\t" "lsl r11, r11, #8\n\t" @@ -8424,7 +8737,12 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #else "mov r11, #0x9ce5\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r11, r11, #0xa7000000\n\t" + "orr r11, r11, #0xed0000\n\t" +#else "movt r11, #0xa7ed\n\t" +#endif #endif "mov %[a], #0\n\t" "umlal %[b], %[a], r10, lr\n\t" @@ -8445,8 +8763,13 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #else "mov r10, #0x6329\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r10, r10, #0x5d000000\n\t" + "orr r10, r10, #0x80000\n\t" +#else "movt r10, #0x5d08\n\t" #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "mov r11, #0xeb\n\t" "lsl r11, r11, #8\n\t" @@ -8463,7 +8786,12 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #else "mov r11, #0x621\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r11, r11, #0xeb000000\n\t" + "orr r11, r11, #0x210000\n\t" +#else "movt r11, #0xeb21\n\t" +#endif #endif "umaal r4, %[a], r10, lr\n\t" "umaal r5, %[a], r11, lr\n\t" @@ -8493,7 +8821,12 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #else "mov %[a], #0x2c13\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr %[a], %[a], #0xa3000000\n\t" + "orr %[a], %[a], #0xa0000\n\t" +#else "movt %[a], #0xa30a\n\t" +#endif #endif "mov lr, #0\n\t" "ldm %[s]!, {r10, r11}\n\t" @@ -8535,7 +8868,12 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #else "mov %[a], #0x9ce5\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr %[a], %[a], #0xa7000000\n\t" + "orr %[a], %[a], #0xed0000\n\t" +#else "movt %[a], #0xa7ed\n\t" +#endif #endif "mov lr, #0\n\t" "ldm r12, {r10, r11}\n\t" @@ -8571,7 +8909,12 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #else "mov %[a], #0x6329\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr %[a], %[a], #0x5d000000\n\t" + "orr %[a], %[a], #0x80000\n\t" +#else "movt %[a], #0x5d08\n\t" +#endif #endif "mov lr, #0\n\t" "ldm r12, {r10, r11}\n\t" @@ -8607,7 +8950,12 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #else "mov %[a], #0x621\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr %[a], %[a], #0xeb000000\n\t" + "orr %[a], %[a], #0x210000\n\t" +#else "movt %[a], #0xeb21\n\t" +#endif #endif "mov lr, #0\n\t" "ldm r12, {r10, r11}\n\t" @@ -8670,8 +9018,13 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #else "mov %[b], #0xba7d\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr %[b], %[b], #0x4b000000\n\t" + "orr %[b], %[b], #0x9e0000\n\t" +#else "movt %[b], #0x4b9e\n\t" #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "mov %[c], #0xcb\n\t" "lsl %[c], %[c], #8\n\t" @@ -8688,8 +9041,13 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #else "mov %[c], #0x4c63\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr %[c], %[c], #0xcb000000\n\t" + "orr %[c], %[c], #0x20000\n\t" +#else "movt %[c], #0xcb02\n\t" #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "mov r4, #0xd4\n\t" "lsl r4, r4, #8\n\t" @@ -8706,8 +9064,13 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #else "mov r4, #0xf39a\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r4, r4, #0xd4000000\n\t" + "orr r4, r4, #0x5e0000\n\t" +#else "movt r4, #0xd45e\n\t" #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "mov r5, #2\n\t" "lsl r5, r5, #8\n\t" @@ -8724,8 +9087,13 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #else "mov r5, #0xdf3b\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r5, r5, #0x2000000\n\t" + "orr r5, r5, #0x9b0000\n\t" +#else "movt r5, #0x29b\n\t" #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "mov r9, #0x20000\n\t" "lsl r9, r9, #8\n\t" @@ -8796,7 +9164,12 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #else "mov %[a], #0x2c13\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr %[a], %[a], #0xa3000000\n\t" + "orr %[a], %[a], #0xa0000\n\t" +#else "movt %[a], #0xa30a\n\t" +#endif #endif "mov lr, #0\n\t" "ldm %[s], {r6, r7, r8, r9}\n\t" @@ -8823,7 +9196,12 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #else "mov %[a], #0x9ce5\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr %[a], %[a], #0xa7000000\n\t" + "orr %[a], %[a], #0xed0000\n\t" +#else "movt %[a], #0xa7ed\n\t" +#endif #endif "mov r10, #0\n\t" "ldm %[s], {r6, r7, r8, r9}\n\t" @@ -8850,7 +9228,12 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #else "mov %[a], #0x6329\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr %[a], %[a], #0x5d000000\n\t" + "orr %[a], %[a], #0x80000\n\t" +#else "movt %[a], #0x5d08\n\t" +#endif #endif "mov r11, #0\n\t" "ldm %[s], {r6, r7, r8, r9}\n\t" @@ -8877,7 +9260,12 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #else "mov %[a], #0x621\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr %[a], %[a], #0xeb000000\n\t" + "orr %[a], %[a], #0x210000\n\t" +#else "movt %[a], #0xeb21\n\t" +#endif #endif "mov r12, #0\n\t" "ldm %[s], {r6, r7, r8, r9}\n\t" @@ -8922,8 +9310,13 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #else "mov r10, #0xd3ed\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r10, r10, #0x5c000000\n\t" + "orr r10, r10, #0xf50000\n\t" +#else "movt r10, #0x5cf5\n\t" #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "mov r11, #0x58\n\t" "lsl r11, r11, #8\n\t" @@ -8940,8 +9333,13 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #else "mov r11, #0x631a\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r11, r11, #0x58000000\n\t" + "orr r11, r11, #0x120000\n\t" +#else "movt r11, #0x5812\n\t" #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "mov r12, #0xa2\n\t" "lsl r12, r12, #8\n\t" @@ -8958,8 +9356,13 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #else "mov r12, #0x9cd6\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r12, r12, #0xa2000000\n\t" + "orr r12, r12, #0xf70000\n\t" +#else "movt r12, #0xa2f7\n\t" #endif +#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "mov lr, #20\n\t" "lsl lr, lr, #8\n\t" @@ -8976,7 +9379,12 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #else "mov lr, #0xf9de\n\t" #endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr lr, lr, #0x14000000\n\t" + "orr lr, lr, #0xde0000\n\t" +#else "movt lr, #0x14de\n\t" +#endif #endif "and r10, r10, %[a]\n\t" "and r11, r11, %[a]\n\t" diff --git a/wolfcrypt/src/port/arm/armv8-32-kyber-asm.S b/wolfcrypt/src/port/arm/armv8-32-kyber-asm.S new file mode 100644 index 000000000..e24888a27 --- /dev/null +++ b/wolfcrypt/src/port/arm/armv8-32-kyber-asm.S @@ -0,0 +1,9442 @@ +/* armv8-32-kyber-asm + * + * Copyright (C) 2006-2024 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +/* Generated using (from wolfssl): + * cd ../scripts + * ruby ./kyber/kyber.rb arm32 \ + * ../wolfssl/wolfcrypt/src/port/arm/armv8-32-kyber-asm.S + */ + +#ifdef HAVE_CONFIG_H + #include +#endif /* HAVE_CONFIG_H */ +#include + +#ifdef WOLFSSL_ARMASM +#if !defined(__aarch64__) && defined(__arm__) && !defined(__thumb__) +#ifndef WOLFSSL_ARMASM_INLINE +#ifdef WOLFSSL_WC_KYBER + .text + .type L_kyber_arm32_ntt_zetas, %object + .size L_kyber_arm32_ntt_zetas, 256 + .align 4 +L_kyber_arm32_ntt_zetas: + .short 0x8ed + .short 0xa0b + .short 0xb9a + .short 0x714 + .short 0x5d5 + .short 0x58e + .short 0x11f + .short 0xca + .short 0xc56 + .short 0x26e + .short 0x629 + .short 0xb6 + .short 0x3c2 + .short 0x84f + .short 0x73f + .short 0x5bc + .short 0x23d + .short 0x7d4 + .short 0x108 + .short 0x17f + .short 0x9c4 + .short 0x5b2 + .short 0x6bf + .short 0xc7f + .short 0xa58 + .short 0x3f9 + .short 0x2dc + .short 0x260 + .short 0x6fb + .short 0x19b + .short 0xc34 + .short 0x6de + .short 0x4c7 + .short 0x28c + .short 0xad9 + .short 0x3f7 + .short 0x7f4 + .short 0x5d3 + .short 0xbe7 + .short 0x6f9 + .short 0x204 + .short 0xcf9 + .short 0xbc1 + .short 0xa67 + .short 0x6af + .short 0x877 + .short 0x7e + .short 0x5bd + .short 0x9ac + .short 0xca7 + .short 0xbf2 + .short 0x33e + .short 0x6b + .short 0x774 + .short 0xc0a + .short 0x94a + .short 0xb73 + .short 0x3c1 + .short 0x71d + .short 0xa2c + .short 0x1c0 + .short 0x8d8 + .short 0x2a5 + .short 0x806 + .short 0x8b2 + .short 0x1ae + .short 0x22b + .short 0x34b + .short 0x81e + .short 0x367 + .short 0x60e + .short 0x69 + .short 0x1a6 + .short 0x24b + .short 0xb1 + .short 0xc16 + .short 0xbde + .short 0xb35 + .short 0x626 + .short 0x675 + .short 0xc0b + .short 0x30a + .short 0x487 + .short 0xc6e + .short 0x9f8 + .short 0x5cb + .short 0xaa7 + .short 0x45f + .short 0x6cb + .short 0x284 + .short 0x999 + .short 0x15d + .short 0x1a2 + .short 0x149 + .short 0xc65 + .short 0xcb6 + .short 0x331 + .short 0x449 + .short 0x25b + .short 0x262 + .short 0x52a + .short 0x7fc + .short 0x748 + .short 0x180 + .short 0x842 + .short 0xc79 + .short 0x4c2 + .short 0x7ca + .short 0x997 + .short 0xdc + .short 0x85e + .short 0x686 + .short 0x860 + .short 0x707 + .short 0x803 + .short 0x31a + .short 0x71b + .short 0x9ab + .short 0x99b + .short 0x1de + .short 0xc95 + .short 0xbcd + .short 0x3e4 + .short 0x3df + .short 0x3be + .short 0x74d + .short 0x5f2 + .short 0x65c + .text + .align 4 + .globl kyber_arm32_ntt + .type kyber_arm32_ntt, %function +kyber_arm32_ntt: + push {r4, r5, r6, r7, r8, r9, r10, r11, lr} + sub sp, sp, #8 + adr r1, L_kyber_arm32_ntt_zetas +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r10, r10, #0xc000000 + orr r10, r10, #0xff0000 +#else + movt r10, #0xcff +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ + mov r2, #16 +L_kyber_arm32_ntt_loop_123: + str r2, [sp] + ldrh r11, [r1, #2] + ldr r2, [r0] + ldr r3, [r0, #64] + ldr r4, [r0, #128] + ldr r5, [r0, #192] + ldr r6, [r0, #256] + ldr r7, [r0, #320] + ldr r8, [r0, #384] + ldr r9, [r0, #448] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smulbb r12, r11, r6 + smulbt r6, r11, r6 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r6 + smlabb lr, r10, lr, r6 + pkhtb r12, lr, r12, ASR #16 + ssub16 r6, r2, r12 + sadd16 r2, r2, r12 +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r6, #16 + asr r12, r12, #16 +#else + sbfx r12, r6, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r11, #16 + asr lr, lr, #16 +#else + sbfx lr, r11, #0, #16 +#endif + asr r6, r6, #16 + mul r12, lr, r12 + mul r6, lr, r6 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif + mul lr, r10, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r6, #16 + asr lr, lr, #16 +#else + sbfx lr, r6, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla lr, r10, lr, r6 + sub r6, r2, lr + add r2, r2, lr + sub lr, r2, r12, lsr #16 + add r12, r2, r12, lsr #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r6, r6, #0xff + bic r6, r6, #0xff00 + ror r6, r6, #16 + orr r6, r6, lr, lsl #16 + ror r6, r6, #16 +#else + bfi r6, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r2, r2, #0xff + bic r2, r2, #0xff00 + ror r2, r2, #16 + orr r2, r2, r12, lsl #16 + ror r2, r2, #16 +#else + bfi r2, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smulbb r12, r11, r7 + smulbt r7, r11, r7 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r7 + smlabb lr, r10, lr, r7 + pkhtb r12, lr, r12, ASR #16 + ssub16 r7, r3, r12 + sadd16 r3, r3, r12 +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r7, #16 + asr r12, r12, #16 +#else + sbfx r12, r7, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r11, #16 + asr lr, lr, #16 +#else + sbfx lr, r11, #0, #16 +#endif + asr r7, r7, #16 + mul r12, lr, r12 + mul r7, lr, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif + mul lr, r10, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r7, #16 + asr lr, lr, #16 +#else + sbfx lr, r7, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla lr, r10, lr, r7 + sub r7, r3, lr + add r3, r3, lr + sub lr, r3, r12, lsr #16 + add r12, r3, r12, lsr #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r7, r7, #0xff + bic r7, r7, #0xff00 + ror r7, r7, #16 + orr r7, r7, lr, lsl #16 + ror r7, r7, #16 +#else + bfi r7, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r3, r3, #0xff + bic r3, r3, #0xff00 + ror r3, r3, #16 + orr r3, r3, r12, lsl #16 + ror r3, r3, #16 +#else + bfi r3, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smulbb r12, r11, r8 + smulbt r8, r11, r8 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r8 + smlabb lr, r10, lr, r8 + pkhtb r12, lr, r12, ASR #16 + ssub16 r8, r4, r12 + sadd16 r4, r4, r12 +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r8, #16 + asr r12, r12, #16 +#else + sbfx r12, r8, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r11, #16 + asr lr, lr, #16 +#else + sbfx lr, r11, #0, #16 +#endif + asr r8, r8, #16 + mul r12, lr, r12 + mul r8, lr, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif + mul lr, r10, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r8, #16 + asr lr, lr, #16 +#else + sbfx lr, r8, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla lr, r10, lr, r8 + sub r8, r4, lr + add r4, r4, lr + sub lr, r4, r12, lsr #16 + add r12, r4, r12, lsr #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r8, r8, #0xff + bic r8, r8, #0xff00 + ror r8, r8, #16 + orr r8, r8, lr, lsl #16 + ror r8, r8, #16 +#else + bfi r8, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r4, r4, #0xff + bic r4, r4, #0xff00 + ror r4, r4, #16 + orr r4, r4, r12, lsl #16 + ror r4, r4, #16 +#else + bfi r4, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smulbb r12, r11, r9 + smulbt r9, r11, r9 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r9 + smlabb lr, r10, lr, r9 + pkhtb r12, lr, r12, ASR #16 + ssub16 r9, r5, r12 + sadd16 r5, r5, r12 +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r9, #16 + asr r12, r12, #16 +#else + sbfx r12, r9, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r11, #16 + asr lr, lr, #16 +#else + sbfx lr, r11, #0, #16 +#endif + asr r9, r9, #16 + mul r12, lr, r12 + mul r9, lr, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif + mul lr, r10, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r9, #16 + asr lr, lr, #16 +#else + sbfx lr, r9, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla lr, r10, lr, r9 + sub r9, r5, lr + add r5, r5, lr + sub lr, r5, r12, lsr #16 + add r12, r5, r12, lsr #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r9, r9, #0xff + bic r9, r9, #0xff00 + ror r9, r9, #16 + orr r9, r9, lr, lsl #16 + ror r9, r9, #16 +#else + bfi r9, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r5, r5, #0xff + bic r5, r5, #0xff00 + ror r5, r5, #16 + orr r5, r5, r12, lsl #16 + ror r5, r5, #16 +#else + bfi r5, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ + ldr r11, [r1, #4] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smulbb r12, r11, r4 + smulbt r4, r11, r4 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r4 + smlabb lr, r10, lr, r4 + pkhtb r12, lr, r12, ASR #16 + ssub16 r4, r2, r12 + sadd16 r2, r2, r12 +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r4, #16 + asr r12, r12, #16 +#else + sbfx r12, r4, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r11, #16 + asr lr, lr, #16 +#else + sbfx lr, r11, #0, #16 +#endif + asr r4, r4, #16 + mul r12, lr, r12 + mul r4, lr, r4 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif + mul lr, r10, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r4, #16 + asr lr, lr, #16 +#else + sbfx lr, r4, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla lr, r10, lr, r4 + sub r4, r2, lr + add r2, r2, lr + sub lr, r2, r12, lsr #16 + add r12, r2, r12, lsr #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r4, r4, #0xff + bic r4, r4, #0xff00 + ror r4, r4, #16 + orr r4, r4, lr, lsl #16 + ror r4, r4, #16 +#else + bfi r4, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r2, r2, #0xff + bic r2, r2, #0xff00 + ror r2, r2, #16 + orr r2, r2, r12, lsl #16 + ror r2, r2, #16 +#else + bfi r2, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smulbb r12, r11, r5 + smulbt r5, r11, r5 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r5 + smlabb lr, r10, lr, r5 + pkhtb r12, lr, r12, ASR #16 + ssub16 r5, r3, r12 + sadd16 r3, r3, r12 +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r5, #16 + asr r12, r12, #16 +#else + sbfx r12, r5, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r11, #16 + asr lr, lr, #16 +#else + sbfx lr, r11, #0, #16 +#endif + asr r5, r5, #16 + mul r12, lr, r12 + mul r5, lr, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif + mul lr, r10, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r5, #16 + asr lr, lr, #16 +#else + sbfx lr, r5, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla lr, r10, lr, r5 + sub r5, r3, lr + add r3, r3, lr + sub lr, r3, r12, lsr #16 + add r12, r3, r12, lsr #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r5, r5, #0xff + bic r5, r5, #0xff00 + ror r5, r5, #16 + orr r5, r5, lr, lsl #16 + ror r5, r5, #16 +#else + bfi r5, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r3, r3, #0xff + bic r3, r3, #0xff00 + ror r3, r3, #16 + orr r3, r3, r12, lsl #16 + ror r3, r3, #16 +#else + bfi r3, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smultb r12, r11, r8 + smultt r8, r11, r8 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r8 + smlabb lr, r10, lr, r8 + pkhtb r12, lr, r12, ASR #16 + ssub16 r8, r6, r12 + sadd16 r6, r6, r12 +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r8, #16 + asr r12, r12, #16 +#else + sbfx r12, r8, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + asr lr, r11, #16 +#else + sbfx lr, r11, #16, #16 +#endif + asr r8, r8, #16 + mul r12, lr, r12 + mul r8, lr, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif + mul lr, r10, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r8, #16 + asr lr, lr, #16 +#else + sbfx lr, r8, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla lr, r10, lr, r8 + sub r8, r6, lr + add r6, r6, lr + sub lr, r6, r12, lsr #16 + add r12, r6, r12, lsr #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r8, r8, #0xff + bic r8, r8, #0xff00 + ror r8, r8, #16 + orr r8, r8, lr, lsl #16 + ror r8, r8, #16 +#else + bfi r8, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r6, r6, #0xff + bic r6, r6, #0xff00 + ror r6, r6, #16 + orr r6, r6, r12, lsl #16 + ror r6, r6, #16 +#else + bfi r6, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smultb r12, r11, r9 + smultt r9, r11, r9 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r9 + smlabb lr, r10, lr, r9 + pkhtb r12, lr, r12, ASR #16 + ssub16 r9, r7, r12 + sadd16 r7, r7, r12 +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r9, #16 + asr r12, r12, #16 +#else + sbfx r12, r9, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + asr lr, r11, #16 +#else + sbfx lr, r11, #16, #16 +#endif + asr r9, r9, #16 + mul r12, lr, r12 + mul r9, lr, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif + mul lr, r10, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r9, #16 + asr lr, lr, #16 +#else + sbfx lr, r9, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla lr, r10, lr, r9 + sub r9, r7, lr + add r7, r7, lr + sub lr, r7, r12, lsr #16 + add r12, r7, r12, lsr #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r9, r9, #0xff + bic r9, r9, #0xff00 + ror r9, r9, #16 + orr r9, r9, lr, lsl #16 + ror r9, r9, #16 +#else + bfi r9, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r7, r7, #0xff + bic r7, r7, #0xff00 + ror r7, r7, #16 + orr r7, r7, r12, lsl #16 + ror r7, r7, #16 +#else + bfi r7, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ + ldr r11, [r1, #8] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smulbb r12, r11, r3 + smulbt r3, r11, r3 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r3 + smlabb lr, r10, lr, r3 + pkhtb r12, lr, r12, ASR #16 + ssub16 r3, r2, r12 + sadd16 r2, r2, r12 +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r3, #16 + asr r12, r12, #16 +#else + sbfx r12, r3, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r11, #16 + asr lr, lr, #16 +#else + sbfx lr, r11, #0, #16 +#endif + asr r3, r3, #16 + mul r12, lr, r12 + mul r3, lr, r3 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif + mul lr, r10, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r3, #16 + asr lr, lr, #16 +#else + sbfx lr, r3, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla lr, r10, lr, r3 + sub r3, r2, lr + add r2, r2, lr + sub lr, r2, r12, lsr #16 + add r12, r2, r12, lsr #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r3, r3, #0xff + bic r3, r3, #0xff00 + ror r3, r3, #16 + orr r3, r3, lr, lsl #16 + ror r3, r3, #16 +#else + bfi r3, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r2, r2, #0xff + bic r2, r2, #0xff00 + ror r2, r2, #16 + orr r2, r2, r12, lsl #16 + ror r2, r2, #16 +#else + bfi r2, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smultb r12, r11, r5 + smultt r5, r11, r5 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r5 + smlabb lr, r10, lr, r5 + pkhtb r12, lr, r12, ASR #16 + ssub16 r5, r4, r12 + sadd16 r4, r4, r12 +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r5, #16 + asr r12, r12, #16 +#else + sbfx r12, r5, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + asr lr, r11, #16 +#else + sbfx lr, r11, #16, #16 +#endif + asr r5, r5, #16 + mul r12, lr, r12 + mul r5, lr, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif + mul lr, r10, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r5, #16 + asr lr, lr, #16 +#else + sbfx lr, r5, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla lr, r10, lr, r5 + sub r5, r4, lr + add r4, r4, lr + sub lr, r4, r12, lsr #16 + add r12, r4, r12, lsr #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r5, r5, #0xff + bic r5, r5, #0xff00 + ror r5, r5, #16 + orr r5, r5, lr, lsl #16 + ror r5, r5, #16 +#else + bfi r5, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r4, r4, #0xff + bic r4, r4, #0xff00 + ror r4, r4, #16 + orr r4, r4, r12, lsl #16 + ror r4, r4, #16 +#else + bfi r4, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ + ldr r11, [r1, #12] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smulbb r12, r11, r7 + smulbt r7, r11, r7 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r7 + smlabb lr, r10, lr, r7 + pkhtb r12, lr, r12, ASR #16 + ssub16 r7, r6, r12 + sadd16 r6, r6, r12 +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r7, #16 + asr r12, r12, #16 +#else + sbfx r12, r7, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r11, #16 + asr lr, lr, #16 +#else + sbfx lr, r11, #0, #16 +#endif + asr r7, r7, #16 + mul r12, lr, r12 + mul r7, lr, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif + mul lr, r10, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r7, #16 + asr lr, lr, #16 +#else + sbfx lr, r7, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla lr, r10, lr, r7 + sub r7, r6, lr + add r6, r6, lr + sub lr, r6, r12, lsr #16 + add r12, r6, r12, lsr #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r7, r7, #0xff + bic r7, r7, #0xff00 + ror r7, r7, #16 + orr r7, r7, lr, lsl #16 + ror r7, r7, #16 +#else + bfi r7, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r6, r6, #0xff + bic r6, r6, #0xff00 + ror r6, r6, #16 + orr r6, r6, r12, lsl #16 + ror r6, r6, #16 +#else + bfi r6, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smultb r12, r11, r9 + smultt r9, r11, r9 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r9 + smlabb lr, r10, lr, r9 + pkhtb r12, lr, r12, ASR #16 + ssub16 r9, r8, r12 + sadd16 r8, r8, r12 +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r9, #16 + asr r12, r12, #16 +#else + sbfx r12, r9, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + asr lr, r11, #16 +#else + sbfx lr, r11, #16, #16 +#endif + asr r9, r9, #16 + mul r12, lr, r12 + mul r9, lr, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif + mul lr, r10, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r9, #16 + asr lr, lr, #16 +#else + sbfx lr, r9, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla lr, r10, lr, r9 + sub r9, r8, lr + add r8, r8, lr + sub lr, r8, r12, lsr #16 + add r12, r8, r12, lsr #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r9, r9, #0xff + bic r9, r9, #0xff00 + ror r9, r9, #16 + orr r9, r9, lr, lsl #16 + ror r9, r9, #16 +#else + bfi r9, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r8, r8, #0xff + bic r8, r8, #0xff00 + ror r8, r8, #16 + orr r8, r8, r12, lsl #16 + ror r8, r8, #16 +#else + bfi r8, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ + str r2, [r0] + str r3, [r0, #64] + str r4, [r0, #128] + str r5, [r0, #192] + str r6, [r0, #256] + str r7, [r0, #320] + str r8, [r0, #384] + str r9, [r0, #448] + ldr r2, [sp] + subs r2, r2, #1 + add r0, r0, #4 + bne L_kyber_arm32_ntt_loop_123 + sub r0, r0, #0x40 + mov r3, #0 +L_kyber_arm32_ntt_loop_4_j: + str r3, [sp, #4] + add r11, r1, r3, lsr #4 + mov r2, #4 + ldr r11, [r11, #16] +L_kyber_arm32_ntt_loop_4_i: + str r2, [sp] + ldr r2, [r0] + ldr r3, [r0, #16] + ldr r4, [r0, #32] + ldr r5, [r0, #48] + ldr r6, [r0, #64] + ldr r7, [r0, #80] + ldr r8, [r0, #96] + ldr r9, [r0, #112] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smulbb r12, r11, r4 + smulbt r4, r11, r4 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r4 + smlabb lr, r10, lr, r4 + pkhtb r12, lr, r12, ASR #16 + ssub16 r4, r2, r12 + sadd16 r2, r2, r12 +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r4, #16 + asr r12, r12, #16 +#else + sbfx r12, r4, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r11, #16 + asr lr, lr, #16 +#else + sbfx lr, r11, #0, #16 +#endif + asr r4, r4, #16 + mul r12, lr, r12 + mul r4, lr, r4 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif + mul lr, r10, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r4, #16 + asr lr, lr, #16 +#else + sbfx lr, r4, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla lr, r10, lr, r4 + sub r4, r2, lr + add r2, r2, lr + sub lr, r2, r12, lsr #16 + add r12, r2, r12, lsr #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r4, r4, #0xff + bic r4, r4, #0xff00 + ror r4, r4, #16 + orr r4, r4, lr, lsl #16 + ror r4, r4, #16 +#else + bfi r4, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r2, r2, #0xff + bic r2, r2, #0xff00 + ror r2, r2, #16 + orr r2, r2, r12, lsl #16 + ror r2, r2, #16 +#else + bfi r2, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smulbb r12, r11, r5 + smulbt r5, r11, r5 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r5 + smlabb lr, r10, lr, r5 + pkhtb r12, lr, r12, ASR #16 + ssub16 r5, r3, r12 + sadd16 r3, r3, r12 +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r5, #16 + asr r12, r12, #16 +#else + sbfx r12, r5, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r11, #16 + asr lr, lr, #16 +#else + sbfx lr, r11, #0, #16 +#endif + asr r5, r5, #16 + mul r12, lr, r12 + mul r5, lr, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif + mul lr, r10, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r5, #16 + asr lr, lr, #16 +#else + sbfx lr, r5, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla lr, r10, lr, r5 + sub r5, r3, lr + add r3, r3, lr + sub lr, r3, r12, lsr #16 + add r12, r3, r12, lsr #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r5, r5, #0xff + bic r5, r5, #0xff00 + ror r5, r5, #16 + orr r5, r5, lr, lsl #16 + ror r5, r5, #16 +#else + bfi r5, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r3, r3, #0xff + bic r3, r3, #0xff00 + ror r3, r3, #16 + orr r3, r3, r12, lsl #16 + ror r3, r3, #16 +#else + bfi r3, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smultb r12, r11, r8 + smultt r8, r11, r8 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r8 + smlabb lr, r10, lr, r8 + pkhtb r12, lr, r12, ASR #16 + ssub16 r8, r6, r12 + sadd16 r6, r6, r12 +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r8, #16 + asr r12, r12, #16 +#else + sbfx r12, r8, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + asr lr, r11, #16 +#else + sbfx lr, r11, #16, #16 +#endif + asr r8, r8, #16 + mul r12, lr, r12 + mul r8, lr, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif + mul lr, r10, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r8, #16 + asr lr, lr, #16 +#else + sbfx lr, r8, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla lr, r10, lr, r8 + sub r8, r6, lr + add r6, r6, lr + sub lr, r6, r12, lsr #16 + add r12, r6, r12, lsr #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r8, r8, #0xff + bic r8, r8, #0xff00 + ror r8, r8, #16 + orr r8, r8, lr, lsl #16 + ror r8, r8, #16 +#else + bfi r8, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r6, r6, #0xff + bic r6, r6, #0xff00 + ror r6, r6, #16 + orr r6, r6, r12, lsl #16 + ror r6, r6, #16 +#else + bfi r6, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smultb r12, r11, r9 + smultt r9, r11, r9 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r9 + smlabb lr, r10, lr, r9 + pkhtb r12, lr, r12, ASR #16 + ssub16 r9, r7, r12 + sadd16 r7, r7, r12 +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r9, #16 + asr r12, r12, #16 +#else + sbfx r12, r9, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + asr lr, r11, #16 +#else + sbfx lr, r11, #16, #16 +#endif + asr r9, r9, #16 + mul r12, lr, r12 + mul r9, lr, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif + mul lr, r10, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r9, #16 + asr lr, lr, #16 +#else + sbfx lr, r9, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla lr, r10, lr, r9 + sub r9, r7, lr + add r7, r7, lr + sub lr, r7, r12, lsr #16 + add r12, r7, r12, lsr #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r9, r9, #0xff + bic r9, r9, #0xff00 + ror r9, r9, #16 + orr r9, r9, lr, lsl #16 + ror r9, r9, #16 +#else + bfi r9, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r7, r7, #0xff + bic r7, r7, #0xff00 + ror r7, r7, #16 + orr r7, r7, r12, lsl #16 + ror r7, r7, #16 +#else + bfi r7, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ + str r2, [r0] + str r3, [r0, #16] + str r4, [r0, #32] + str r5, [r0, #48] + str r6, [r0, #64] + str r7, [r0, #80] + str r8, [r0, #96] + str r9, [r0, #112] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r2, [sp] + ldr r3, [sp, #4] +#else + ldrd r2, r3, [sp] +#endif + subs r2, r2, #1 + add r0, r0, #4 + bne L_kyber_arm32_ntt_loop_4_i + add r3, r3, #0x40 + rsbs r12, r3, #0x100 + add r0, r0, #0x70 + bne L_kyber_arm32_ntt_loop_4_j + sub r0, r0, #0x200 + mov r3, #0 +L_kyber_arm32_ntt_loop_567: + add r11, r1, r3, lsr #3 + str r3, [sp, #4] + ldrh r11, [r11, #32] + ldr r2, [r0] + ldr r3, [r0, #4] + ldr r4, [r0, #8] + ldr r5, [r0, #12] + ldr r6, [r0, #16] + ldr r7, [r0, #20] + ldr r8, [r0, #24] + ldr r9, [r0, #28] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smulbb r12, r11, r6 + smulbt r6, r11, r6 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r6 + smlabb lr, r10, lr, r6 + pkhtb r12, lr, r12, ASR #16 + ssub16 r6, r2, r12 + sadd16 r2, r2, r12 +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r6, #16 + asr r12, r12, #16 +#else + sbfx r12, r6, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r11, #16 + asr lr, lr, #16 +#else + sbfx lr, r11, #0, #16 +#endif + asr r6, r6, #16 + mul r12, lr, r12 + mul r6, lr, r6 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif + mul lr, r10, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r6, #16 + asr lr, lr, #16 +#else + sbfx lr, r6, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla lr, r10, lr, r6 + sub r6, r2, lr + add r2, r2, lr + sub lr, r2, r12, lsr #16 + add r12, r2, r12, lsr #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r6, r6, #0xff + bic r6, r6, #0xff00 + ror r6, r6, #16 + orr r6, r6, lr, lsl #16 + ror r6, r6, #16 +#else + bfi r6, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r2, r2, #0xff + bic r2, r2, #0xff00 + ror r2, r2, #16 + orr r2, r2, r12, lsl #16 + ror r2, r2, #16 +#else + bfi r2, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smulbb r12, r11, r7 + smulbt r7, r11, r7 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r7 + smlabb lr, r10, lr, r7 + pkhtb r12, lr, r12, ASR #16 + ssub16 r7, r3, r12 + sadd16 r3, r3, r12 +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r7, #16 + asr r12, r12, #16 +#else + sbfx r12, r7, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r11, #16 + asr lr, lr, #16 +#else + sbfx lr, r11, #0, #16 +#endif + asr r7, r7, #16 + mul r12, lr, r12 + mul r7, lr, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif + mul lr, r10, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r7, #16 + asr lr, lr, #16 +#else + sbfx lr, r7, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla lr, r10, lr, r7 + sub r7, r3, lr + add r3, r3, lr + sub lr, r3, r12, lsr #16 + add r12, r3, r12, lsr #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r7, r7, #0xff + bic r7, r7, #0xff00 + ror r7, r7, #16 + orr r7, r7, lr, lsl #16 + ror r7, r7, #16 +#else + bfi r7, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r3, r3, #0xff + bic r3, r3, #0xff00 + ror r3, r3, #16 + orr r3, r3, r12, lsl #16 + ror r3, r3, #16 +#else + bfi r3, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smulbb r12, r11, r8 + smulbt r8, r11, r8 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r8 + smlabb lr, r10, lr, r8 + pkhtb r12, lr, r12, ASR #16 + ssub16 r8, r4, r12 + sadd16 r4, r4, r12 +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r8, #16 + asr r12, r12, #16 +#else + sbfx r12, r8, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r11, #16 + asr lr, lr, #16 +#else + sbfx lr, r11, #0, #16 +#endif + asr r8, r8, #16 + mul r12, lr, r12 + mul r8, lr, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif + mul lr, r10, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r8, #16 + asr lr, lr, #16 +#else + sbfx lr, r8, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla lr, r10, lr, r8 + sub r8, r4, lr + add r4, r4, lr + sub lr, r4, r12, lsr #16 + add r12, r4, r12, lsr #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r8, r8, #0xff + bic r8, r8, #0xff00 + ror r8, r8, #16 + orr r8, r8, lr, lsl #16 + ror r8, r8, #16 +#else + bfi r8, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r4, r4, #0xff + bic r4, r4, #0xff00 + ror r4, r4, #16 + orr r4, r4, r12, lsl #16 + ror r4, r4, #16 +#else + bfi r4, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smulbb r12, r11, r9 + smulbt r9, r11, r9 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r9 + smlabb lr, r10, lr, r9 + pkhtb r12, lr, r12, ASR #16 + ssub16 r9, r5, r12 + sadd16 r5, r5, r12 +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r9, #16 + asr r12, r12, #16 +#else + sbfx r12, r9, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r11, #16 + asr lr, lr, #16 +#else + sbfx lr, r11, #0, #16 +#endif + asr r9, r9, #16 + mul r12, lr, r12 + mul r9, lr, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif + mul lr, r10, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r9, #16 + asr lr, lr, #16 +#else + sbfx lr, r9, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla lr, r10, lr, r9 + sub r9, r5, lr + add r5, r5, lr + sub lr, r5, r12, lsr #16 + add r12, r5, r12, lsr #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r9, r9, #0xff + bic r9, r9, #0xff00 + ror r9, r9, #16 + orr r9, r9, lr, lsl #16 + ror r9, r9, #16 +#else + bfi r9, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r5, r5, #0xff + bic r5, r5, #0xff00 + ror r5, r5, #16 + orr r5, r5, r12, lsl #16 + ror r5, r5, #16 +#else + bfi r5, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ + ldr r11, [sp, #4] + add r11, r1, r11, lsr #2 + ldr r11, [r11, #64] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smulbb r12, r11, r4 + smulbt r4, r11, r4 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r4 + smlabb lr, r10, lr, r4 + pkhtb r12, lr, r12, ASR #16 + ssub16 r4, r2, r12 + sadd16 r2, r2, r12 +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r4, #16 + asr r12, r12, #16 +#else + sbfx r12, r4, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r11, #16 + asr lr, lr, #16 +#else + sbfx lr, r11, #0, #16 +#endif + asr r4, r4, #16 + mul r12, lr, r12 + mul r4, lr, r4 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif + mul lr, r10, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r4, #16 + asr lr, lr, #16 +#else + sbfx lr, r4, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla lr, r10, lr, r4 + sub r4, r2, lr + add r2, r2, lr + sub lr, r2, r12, lsr #16 + add r12, r2, r12, lsr #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r4, r4, #0xff + bic r4, r4, #0xff00 + ror r4, r4, #16 + orr r4, r4, lr, lsl #16 + ror r4, r4, #16 +#else + bfi r4, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r2, r2, #0xff + bic r2, r2, #0xff00 + ror r2, r2, #16 + orr r2, r2, r12, lsl #16 + ror r2, r2, #16 +#else + bfi r2, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smulbb r12, r11, r5 + smulbt r5, r11, r5 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r5 + smlabb lr, r10, lr, r5 + pkhtb r12, lr, r12, ASR #16 + ssub16 r5, r3, r12 + sadd16 r3, r3, r12 +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r5, #16 + asr r12, r12, #16 +#else + sbfx r12, r5, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r11, #16 + asr lr, lr, #16 +#else + sbfx lr, r11, #0, #16 +#endif + asr r5, r5, #16 + mul r12, lr, r12 + mul r5, lr, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif + mul lr, r10, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r5, #16 + asr lr, lr, #16 +#else + sbfx lr, r5, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla lr, r10, lr, r5 + sub r5, r3, lr + add r3, r3, lr + sub lr, r3, r12, lsr #16 + add r12, r3, r12, lsr #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r5, r5, #0xff + bic r5, r5, #0xff00 + ror r5, r5, #16 + orr r5, r5, lr, lsl #16 + ror r5, r5, #16 +#else + bfi r5, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r3, r3, #0xff + bic r3, r3, #0xff00 + ror r3, r3, #16 + orr r3, r3, r12, lsl #16 + ror r3, r3, #16 +#else + bfi r3, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smultb r12, r11, r8 + smultt r8, r11, r8 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r8 + smlabb lr, r10, lr, r8 + pkhtb r12, lr, r12, ASR #16 + ssub16 r8, r6, r12 + sadd16 r6, r6, r12 +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r8, #16 + asr r12, r12, #16 +#else + sbfx r12, r8, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + asr lr, r11, #16 +#else + sbfx lr, r11, #16, #16 +#endif + asr r8, r8, #16 + mul r12, lr, r12 + mul r8, lr, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif + mul lr, r10, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r8, #16 + asr lr, lr, #16 +#else + sbfx lr, r8, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla lr, r10, lr, r8 + sub r8, r6, lr + add r6, r6, lr + sub lr, r6, r12, lsr #16 + add r12, r6, r12, lsr #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r8, r8, #0xff + bic r8, r8, #0xff00 + ror r8, r8, #16 + orr r8, r8, lr, lsl #16 + ror r8, r8, #16 +#else + bfi r8, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r6, r6, #0xff + bic r6, r6, #0xff00 + ror r6, r6, #16 + orr r6, r6, r12, lsl #16 + ror r6, r6, #16 +#else + bfi r6, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smultb r12, r11, r9 + smultt r9, r11, r9 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r9 + smlabb lr, r10, lr, r9 + pkhtb r12, lr, r12, ASR #16 + ssub16 r9, r7, r12 + sadd16 r7, r7, r12 +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r9, #16 + asr r12, r12, #16 +#else + sbfx r12, r9, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + asr lr, r11, #16 +#else + sbfx lr, r11, #16, #16 +#endif + asr r9, r9, #16 + mul r12, lr, r12 + mul r9, lr, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif + mul lr, r10, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r9, #16 + asr lr, lr, #16 +#else + sbfx lr, r9, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla lr, r10, lr, r9 + sub r9, r7, lr + add r7, r7, lr + sub lr, r7, r12, lsr #16 + add r12, r7, r12, lsr #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r9, r9, #0xff + bic r9, r9, #0xff00 + ror r9, r9, #16 + orr r9, r9, lr, lsl #16 + ror r9, r9, #16 +#else + bfi r9, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r7, r7, #0xff + bic r7, r7, #0xff00 + ror r7, r7, #16 + orr r7, r7, r12, lsl #16 + ror r7, r7, #16 +#else + bfi r7, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ + ldr r11, [sp, #4] + add r11, r1, r11, lsr #1 + ldr r11, [r11, #128] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smulbb r12, r11, r3 + smulbt r3, r11, r3 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r3 + smlabb lr, r10, lr, r3 + pkhtb r12, lr, r12, ASR #16 + ssub16 r3, r2, r12 + sadd16 r2, r2, r12 +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r3, #16 + asr r12, r12, #16 +#else + sbfx r12, r3, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r11, #16 + asr lr, lr, #16 +#else + sbfx lr, r11, #0, #16 +#endif + asr r3, r3, #16 + mul r12, lr, r12 + mul r3, lr, r3 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif + mul lr, r10, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r3, #16 + asr lr, lr, #16 +#else + sbfx lr, r3, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla lr, r10, lr, r3 + sub r3, r2, lr + add r2, r2, lr + sub lr, r2, r12, lsr #16 + add r12, r2, r12, lsr #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r3, r3, #0xff + bic r3, r3, #0xff00 + ror r3, r3, #16 + orr r3, r3, lr, lsl #16 + ror r3, r3, #16 +#else + bfi r3, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r2, r2, #0xff + bic r2, r2, #0xff00 + ror r2, r2, #16 + orr r2, r2, r12, lsl #16 + ror r2, r2, #16 +#else + bfi r2, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smultb r12, r11, r5 + smultt r5, r11, r5 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r5 + smlabb lr, r10, lr, r5 + pkhtb r12, lr, r12, ASR #16 + ssub16 r5, r4, r12 + sadd16 r4, r4, r12 +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r5, #16 + asr r12, r12, #16 +#else + sbfx r12, r5, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + asr lr, r11, #16 +#else + sbfx lr, r11, #16, #16 +#endif + asr r5, r5, #16 + mul r12, lr, r12 + mul r5, lr, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif + mul lr, r10, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r5, #16 + asr lr, lr, #16 +#else + sbfx lr, r5, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla lr, r10, lr, r5 + sub r5, r4, lr + add r4, r4, lr + sub lr, r4, r12, lsr #16 + add r12, r4, r12, lsr #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r5, r5, #0xff + bic r5, r5, #0xff00 + ror r5, r5, #16 + orr r5, r5, lr, lsl #16 + ror r5, r5, #16 +#else + bfi r5, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r4, r4, #0xff + bic r4, r4, #0xff00 + ror r4, r4, #16 + orr r4, r4, r12, lsl #16 + ror r4, r4, #16 +#else + bfi r4, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ + ldr r11, [sp, #4] + add r11, r1, r11, lsr #1 + ldr r11, [r11, #132] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smulbb r12, r11, r7 + smulbt r7, r11, r7 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r7 + smlabb lr, r10, lr, r7 + pkhtb r12, lr, r12, ASR #16 + ssub16 r7, r6, r12 + sadd16 r6, r6, r12 +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r7, #16 + asr r12, r12, #16 +#else + sbfx r12, r7, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r11, #16 + asr lr, lr, #16 +#else + sbfx lr, r11, #0, #16 +#endif + asr r7, r7, #16 + mul r12, lr, r12 + mul r7, lr, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif + mul lr, r10, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r7, #16 + asr lr, lr, #16 +#else + sbfx lr, r7, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla lr, r10, lr, r7 + sub r7, r6, lr + add r6, r6, lr + sub lr, r6, r12, lsr #16 + add r12, r6, r12, lsr #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r7, r7, #0xff + bic r7, r7, #0xff00 + ror r7, r7, #16 + orr r7, r7, lr, lsl #16 + ror r7, r7, #16 +#else + bfi r7, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r6, r6, #0xff + bic r6, r6, #0xff00 + ror r6, r6, #16 + orr r6, r6, r12, lsl #16 + ror r6, r6, #16 +#else + bfi r6, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smultb r12, r11, r9 + smultt r9, r11, r9 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r9 + smlabb lr, r10, lr, r9 + pkhtb r12, lr, r12, ASR #16 + ssub16 r9, r8, r12 + sadd16 r8, r8, r12 +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r9, #16 + asr r12, r12, #16 +#else + sbfx r12, r9, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + asr lr, r11, #16 +#else + sbfx lr, r11, #16, #16 +#endif + asr r9, r9, #16 + mul r12, lr, r12 + mul r9, lr, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif + mul lr, r10, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r9, #16 + asr lr, lr, #16 +#else + sbfx lr, r9, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla lr, r10, lr, r9 + sub r9, r8, lr + add r8, r8, lr + sub lr, r8, r12, lsr #16 + add r12, r8, r12, lsr #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r9, r9, #0xff + bic r9, r9, #0xff00 + ror r9, r9, #16 + orr r9, r9, lr, lsl #16 + ror r9, r9, #16 +#else + bfi r9, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r8, r8, #0xff + bic r8, r8, #0xff00 + ror r8, r8, #16 + orr r8, r8, r12, lsl #16 + ror r8, r8, #16 +#else + bfi r8, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r11, #0xaf + lsl r11, r11, #8 + add r11, r11, #0xc0 +#else + mov r11, #0xafc0 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r11, r11, #0x130000 +#else + movt r11, #0x13 +#endif +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r11, #0x4e + lsl r11, r11, #8 + add r11, r11, #0xbf +#else + mov r11, #0x4ebf +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smulwb r12, r11, r2 + smulwt lr, r11, r2 + smulbt r12, r10, r12 + smulbt lr, r10, lr + pkhbt r12, r12, lr, LSL #16 + ssub16 r2, r2, r12 +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r2, #16 + asr r12, r12, #16 +#else + sbfx r12, r2, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + asr lr, r2, #16 +#else + sbfx lr, r2, #16, #16 +#endif + mul r12, r11, r12 + mul lr, r11, lr + asr r12, r12, #26 + asr lr, lr, #26 + mul r12, r10, r12 + mul lr, r10, lr + sub lr, r2, lr, lsl #16 + sub r2, r2, r12 + lsr lr, lr, #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r2, r2, #0xff0000 + bic r2, r2, #0xff000000 + orr r2, r2, lr, lsl #16 +#else + bfi r2, lr, #16, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smulwb r12, r11, r3 + smulwt lr, r11, r3 + smulbt r12, r10, r12 + smulbt lr, r10, lr + pkhbt r12, r12, lr, LSL #16 + ssub16 r3, r3, r12 +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r3, #16 + asr r12, r12, #16 +#else + sbfx r12, r3, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + asr lr, r3, #16 +#else + sbfx lr, r3, #16, #16 +#endif + mul r12, r11, r12 + mul lr, r11, lr + asr r12, r12, #26 + asr lr, lr, #26 + mul r12, r10, r12 + mul lr, r10, lr + sub lr, r3, lr, lsl #16 + sub r3, r3, r12 + lsr lr, lr, #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r3, r3, #0xff0000 + bic r3, r3, #0xff000000 + orr r3, r3, lr, lsl #16 +#else + bfi r3, lr, #16, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smulwb r12, r11, r4 + smulwt lr, r11, r4 + smulbt r12, r10, r12 + smulbt lr, r10, lr + pkhbt r12, r12, lr, LSL #16 + ssub16 r4, r4, r12 +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r4, #16 + asr r12, r12, #16 +#else + sbfx r12, r4, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + asr lr, r4, #16 +#else + sbfx lr, r4, #16, #16 +#endif + mul r12, r11, r12 + mul lr, r11, lr + asr r12, r12, #26 + asr lr, lr, #26 + mul r12, r10, r12 + mul lr, r10, lr + sub lr, r4, lr, lsl #16 + sub r4, r4, r12 + lsr lr, lr, #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r4, r4, #0xff0000 + bic r4, r4, #0xff000000 + orr r4, r4, lr, lsl #16 +#else + bfi r4, lr, #16, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smulwb r12, r11, r5 + smulwt lr, r11, r5 + smulbt r12, r10, r12 + smulbt lr, r10, lr + pkhbt r12, r12, lr, LSL #16 + ssub16 r5, r5, r12 +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r5, #16 + asr r12, r12, #16 +#else + sbfx r12, r5, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + asr lr, r5, #16 +#else + sbfx lr, r5, #16, #16 +#endif + mul r12, r11, r12 + mul lr, r11, lr + asr r12, r12, #26 + asr lr, lr, #26 + mul r12, r10, r12 + mul lr, r10, lr + sub lr, r5, lr, lsl #16 + sub r5, r5, r12 + lsr lr, lr, #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r5, r5, #0xff0000 + bic r5, r5, #0xff000000 + orr r5, r5, lr, lsl #16 +#else + bfi r5, lr, #16, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smulwb r12, r11, r6 + smulwt lr, r11, r6 + smulbt r12, r10, r12 + smulbt lr, r10, lr + pkhbt r12, r12, lr, LSL #16 + ssub16 r6, r6, r12 +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r6, #16 + asr r12, r12, #16 +#else + sbfx r12, r6, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + asr lr, r6, #16 +#else + sbfx lr, r6, #16, #16 +#endif + mul r12, r11, r12 + mul lr, r11, lr + asr r12, r12, #26 + asr lr, lr, #26 + mul r12, r10, r12 + mul lr, r10, lr + sub lr, r6, lr, lsl #16 + sub r6, r6, r12 + lsr lr, lr, #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r6, r6, #0xff0000 + bic r6, r6, #0xff000000 + orr r6, r6, lr, lsl #16 +#else + bfi r6, lr, #16, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smulwb r12, r11, r7 + smulwt lr, r11, r7 + smulbt r12, r10, r12 + smulbt lr, r10, lr + pkhbt r12, r12, lr, LSL #16 + ssub16 r7, r7, r12 +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r7, #16 + asr r12, r12, #16 +#else + sbfx r12, r7, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + asr lr, r7, #16 +#else + sbfx lr, r7, #16, #16 +#endif + mul r12, r11, r12 + mul lr, r11, lr + asr r12, r12, #26 + asr lr, lr, #26 + mul r12, r10, r12 + mul lr, r10, lr + sub lr, r7, lr, lsl #16 + sub r7, r7, r12 + lsr lr, lr, #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r7, r7, #0xff0000 + bic r7, r7, #0xff000000 + orr r7, r7, lr, lsl #16 +#else + bfi r7, lr, #16, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smulwb r12, r11, r8 + smulwt lr, r11, r8 + smulbt r12, r10, r12 + smulbt lr, r10, lr + pkhbt r12, r12, lr, LSL #16 + ssub16 r8, r8, r12 +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r8, #16 + asr r12, r12, #16 +#else + sbfx r12, r8, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + asr lr, r8, #16 +#else + sbfx lr, r8, #16, #16 +#endif + mul r12, r11, r12 + mul lr, r11, lr + asr r12, r12, #26 + asr lr, lr, #26 + mul r12, r10, r12 + mul lr, r10, lr + sub lr, r8, lr, lsl #16 + sub r8, r8, r12 + lsr lr, lr, #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r8, r8, #0xff0000 + bic r8, r8, #0xff000000 + orr r8, r8, lr, lsl #16 +#else + bfi r8, lr, #16, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smulwb r12, r11, r9 + smulwt lr, r11, r9 + smulbt r12, r10, r12 + smulbt lr, r10, lr + pkhbt r12, r12, lr, LSL #16 + ssub16 r9, r9, r12 +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r9, #16 + asr r12, r12, #16 +#else + sbfx r12, r9, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + asr lr, r9, #16 +#else + sbfx lr, r9, #16, #16 +#endif + mul r12, r11, r12 + mul lr, r11, lr + asr r12, r12, #26 + asr lr, lr, #26 + mul r12, r10, r12 + mul lr, r10, lr + sub lr, r9, lr, lsl #16 + sub r9, r9, r12 + lsr lr, lr, #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r9, r9, #0xff0000 + bic r9, r9, #0xff000000 + orr r9, r9, lr, lsl #16 +#else + bfi r9, lr, #16, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r10, r10, #0xc000000 + orr r10, r10, #0xff0000 +#else + movt r10, #0xcff +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ + str r2, [r0] + str r3, [r0, #4] + str r4, [r0, #8] + str r5, [r0, #12] + str r6, [r0, #16] + str r7, [r0, #20] + str r8, [r0, #24] + str r9, [r0, #28] + ldr r3, [sp, #4] + add r3, r3, #16 + rsbs r12, r3, #0x100 + add r0, r0, #32 + bne L_kyber_arm32_ntt_loop_567 + add sp, sp, #8 + pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} + .size kyber_arm32_ntt,.-kyber_arm32_ntt + .text + .type L_kyber_arm32_invntt_zetas_inv, %object + .size L_kyber_arm32_invntt_zetas_inv, 256 + .align 4 +L_kyber_arm32_invntt_zetas_inv: + .short 0x6a5 + .short 0x70f + .short 0x5b4 + .short 0x943 + .short 0x922 + .short 0x91d + .short 0x134 + .short 0x6c + .short 0xb23 + .short 0x366 + .short 0x356 + .short 0x5e6 + .short 0x9e7 + .short 0x4fe + .short 0x5fa + .short 0x4a1 + .short 0x67b + .short 0x4a3 + .short 0xc25 + .short 0x36a + .short 0x537 + .short 0x83f + .short 0x88 + .short 0x4bf + .short 0xb81 + .short 0x5b9 + .short 0x505 + .short 0x7d7 + .short 0xa9f + .short 0xaa6 + .short 0x8b8 + .short 0x9d0 + .short 0x4b + .short 0x9c + .short 0xbb8 + .short 0xb5f + .short 0xba4 + .short 0x368 + .short 0xa7d + .short 0x636 + .short 0x8a2 + .short 0x25a + .short 0x736 + .short 0x309 + .short 0x93 + .short 0x87a + .short 0x9f7 + .short 0xf6 + .short 0x68c + .short 0x6db + .short 0x1cc + .short 0x123 + .short 0xeb + .short 0xc50 + .short 0xab6 + .short 0xb5b + .short 0xc98 + .short 0x6f3 + .short 0x99a + .short 0x4e3 + .short 0x9b6 + .short 0xad6 + .short 0xb53 + .short 0x44f + .short 0x4fb + .short 0xa5c + .short 0x429 + .short 0xb41 + .short 0x2d5 + .short 0x5e4 + .short 0x940 + .short 0x18e + .short 0x3b7 + .short 0xf7 + .short 0x58d + .short 0xc96 + .short 0x9c3 + .short 0x10f + .short 0x5a + .short 0x355 + .short 0x744 + .short 0xc83 + .short 0x48a + .short 0x652 + .short 0x29a + .short 0x140 + .short 0x8 + .short 0xafd + .short 0x608 + .short 0x11a + .short 0x72e + .short 0x50d + .short 0x90a + .short 0x228 + .short 0xa75 + .short 0x83a + .short 0x623 + .short 0xcd + .short 0xb66 + .short 0x606 + .short 0xaa1 + .short 0xa25 + .short 0x908 + .short 0x2a9 + .short 0x82 + .short 0x642 + .short 0x74f + .short 0x33d + .short 0xb82 + .short 0xbf9 + .short 0x52d + .short 0xac4 + .short 0x745 + .short 0x5c2 + .short 0x4b2 + .short 0x93f + .short 0xc4b + .short 0x6d8 + .short 0xa93 + .short 0xab + .short 0xc37 + .short 0xbe2 + .short 0x773 + .short 0x72c + .short 0x5ed + .short 0x167 + .short 0x2f6 + .short 0x5a1 + .text + .align 4 + .globl kyber_arm32_invntt + .type kyber_arm32_invntt, %function +kyber_arm32_invntt: + push {r4, r5, r6, r7, r8, r9, r10, r11, lr} + sub sp, sp, #8 + adr r1, L_kyber_arm32_invntt_zetas_inv +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r10, r10, #0xc000000 + orr r10, r10, #0xff0000 +#else + movt r10, #0xcff +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ + mov r3, #0 +L_kyber_arm32_invntt_loop_765: + add r11, r1, r3, lsr #1 + str r3, [sp, #4] + ldr r2, [r0] + ldr r3, [r0, #4] + ldr r4, [r0, #8] + ldr r5, [r0, #12] + ldr r6, [r0, #16] + ldr r7, [r0, #20] + ldr r8, [r0, #24] + ldr r9, [r0, #28] + ldr r11, [r11] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + ssub16 r12, r2, r3 + sadd16 r2, r2, r3 + smulbt r3, r11, r12 + smulbb r12, r11, r12 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r3 + smlabb r3, r10, lr, r3 + pkhtb r3, r3, r12, ASR #16 +#else + sub lr, r2, r3 + add r10, r2, r3 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r3, r3, #0xff + bic r3, r3, #0xff00 +#else + bfc r3, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r2, r2, #0xff + bic r2, r2, #0xff00 +#else + bfc r2, #0, #16 +#endif + sub r12, r2, r3 + add r2, r2, r3 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r12, r12, #0xff + bic r12, r12, #0xff00 + ror r12, r12, #16 + orr r12, r12, lr, lsl #16 + ror r12, r12, #16 +#else + bfi r12, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r2, r2, #0xff + bic r2, r2, #0xff00 + ror r2, r2, #16 + orr r2, r2, r10, lsl #16 + ror r2, r2, #16 +#else + bfi r2, r10, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r11, #16 + asr lr, lr, #16 +#else + sbfx lr, r11, #0, #16 +#endif + asr r10, r12, #16 + mul r3, lr, r10 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r12, #16 + asr r12, r12, #16 +#else + sbfx r12, r12, #0, #16 +#endif + mul r12, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r12, #16 + asr lr, lr, #16 +#else + sbfx lr, r12, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r3, #16 + asr lr, lr, #16 +#else + sbfx lr, r3, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + lsr r12, r12, #16 + mla r3, r10, lr, r3 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r3, r3, #0xff + bic r3, r3, #0xff00 + ror r3, r3, #16 + orr r3, r3, r12, lsl #16 + ror r3, r3, #16 +#else + bfi r3, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + ssub16 r12, r4, r5 + sadd16 r4, r4, r5 + smultt r5, r11, r12 + smultb r12, r11, r12 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r5 + smlabb r5, r10, lr, r5 + pkhtb r5, r5, r12, ASR #16 +#else + sub lr, r4, r5 + add r10, r4, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r5, r5, #0xff + bic r5, r5, #0xff00 +#else + bfc r5, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r4, r4, #0xff + bic r4, r4, #0xff00 +#else + bfc r4, #0, #16 +#endif + sub r12, r4, r5 + add r4, r4, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r12, r12, #0xff + bic r12, r12, #0xff00 + ror r12, r12, #16 + orr r12, r12, lr, lsl #16 + ror r12, r12, #16 +#else + bfi r12, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r4, r4, #0xff + bic r4, r4, #0xff00 + ror r4, r4, #16 + orr r4, r4, r10, lsl #16 + ror r4, r4, #16 +#else + bfi r4, r10, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + asr lr, r11, #16 +#else + sbfx lr, r11, #16, #16 +#endif + asr r10, r12, #16 + mul r5, lr, r10 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r12, #16 + asr r12, r12, #16 +#else + sbfx r12, r12, #0, #16 +#endif + mul r12, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r12, #16 + asr lr, lr, #16 +#else + sbfx lr, r12, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r5, #16 + asr lr, lr, #16 +#else + sbfx lr, r5, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + lsr r12, r12, #16 + mla r5, r10, lr, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r5, r5, #0xff + bic r5, r5, #0xff00 + ror r5, r5, #16 + orr r5, r5, r12, lsl #16 + ror r5, r5, #16 +#else + bfi r5, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ + ldr r11, [sp, #4] + add r11, r1, r11, lsr #1 + ldr r11, [r11, #4] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + ssub16 r12, r6, r7 + sadd16 r6, r6, r7 + smulbt r7, r11, r12 + smulbb r12, r11, r12 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r7 + smlabb r7, r10, lr, r7 + pkhtb r7, r7, r12, ASR #16 +#else + sub lr, r6, r7 + add r10, r6, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r7, r7, #0xff + bic r7, r7, #0xff00 +#else + bfc r7, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r6, r6, #0xff + bic r6, r6, #0xff00 +#else + bfc r6, #0, #16 +#endif + sub r12, r6, r7 + add r6, r6, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r12, r12, #0xff + bic r12, r12, #0xff00 + ror r12, r12, #16 + orr r12, r12, lr, lsl #16 + ror r12, r12, #16 +#else + bfi r12, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r6, r6, #0xff + bic r6, r6, #0xff00 + ror r6, r6, #16 + orr r6, r6, r10, lsl #16 + ror r6, r6, #16 +#else + bfi r6, r10, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r11, #16 + asr lr, lr, #16 +#else + sbfx lr, r11, #0, #16 +#endif + asr r10, r12, #16 + mul r7, lr, r10 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r12, #16 + asr r12, r12, #16 +#else + sbfx r12, r12, #0, #16 +#endif + mul r12, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r12, #16 + asr lr, lr, #16 +#else + sbfx lr, r12, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r7, #16 + asr lr, lr, #16 +#else + sbfx lr, r7, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + lsr r12, r12, #16 + mla r7, r10, lr, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r7, r7, #0xff + bic r7, r7, #0xff00 + ror r7, r7, #16 + orr r7, r7, r12, lsl #16 + ror r7, r7, #16 +#else + bfi r7, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + ssub16 r12, r8, r9 + sadd16 r8, r8, r9 + smultt r9, r11, r12 + smultb r12, r11, r12 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r9 + smlabb r9, r10, lr, r9 + pkhtb r9, r9, r12, ASR #16 +#else + sub lr, r8, r9 + add r10, r8, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r9, r9, #0xff + bic r9, r9, #0xff00 +#else + bfc r9, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r8, r8, #0xff + bic r8, r8, #0xff00 +#else + bfc r8, #0, #16 +#endif + sub r12, r8, r9 + add r8, r8, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r12, r12, #0xff + bic r12, r12, #0xff00 + ror r12, r12, #16 + orr r12, r12, lr, lsl #16 + ror r12, r12, #16 +#else + bfi r12, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r8, r8, #0xff + bic r8, r8, #0xff00 + ror r8, r8, #16 + orr r8, r8, r10, lsl #16 + ror r8, r8, #16 +#else + bfi r8, r10, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + asr lr, r11, #16 +#else + sbfx lr, r11, #16, #16 +#endif + asr r10, r12, #16 + mul r9, lr, r10 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r12, #16 + asr r12, r12, #16 +#else + sbfx r12, r12, #0, #16 +#endif + mul r12, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r12, #16 + asr lr, lr, #16 +#else + sbfx lr, r12, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r9, #16 + asr lr, lr, #16 +#else + sbfx lr, r9, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + lsr r12, r12, #16 + mla r9, r10, lr, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r9, r9, #0xff + bic r9, r9, #0xff00 + ror r9, r9, #16 + orr r9, r9, r12, lsl #16 + ror r9, r9, #16 +#else + bfi r9, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ + ldr r11, [sp, #4] + add r11, r1, r11, lsr #2 + ldr r11, [r11, #128] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + ssub16 r12, r2, r4 + sadd16 r2, r2, r4 + smulbt r4, r11, r12 + smulbb r12, r11, r12 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r4 + smlabb r4, r10, lr, r4 + pkhtb r4, r4, r12, ASR #16 +#else + sub lr, r2, r4 + add r10, r2, r4 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r4, r4, #0xff + bic r4, r4, #0xff00 +#else + bfc r4, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r2, r2, #0xff + bic r2, r2, #0xff00 +#else + bfc r2, #0, #16 +#endif + sub r12, r2, r4 + add r2, r2, r4 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r12, r12, #0xff + bic r12, r12, #0xff00 + ror r12, r12, #16 + orr r12, r12, lr, lsl #16 + ror r12, r12, #16 +#else + bfi r12, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r2, r2, #0xff + bic r2, r2, #0xff00 + ror r2, r2, #16 + orr r2, r2, r10, lsl #16 + ror r2, r2, #16 +#else + bfi r2, r10, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r11, #16 + asr lr, lr, #16 +#else + sbfx lr, r11, #0, #16 +#endif + asr r10, r12, #16 + mul r4, lr, r10 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r12, #16 + asr r12, r12, #16 +#else + sbfx r12, r12, #0, #16 +#endif + mul r12, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r12, #16 + asr lr, lr, #16 +#else + sbfx lr, r12, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r4, #16 + asr lr, lr, #16 +#else + sbfx lr, r4, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + lsr r12, r12, #16 + mla r4, r10, lr, r4 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r4, r4, #0xff + bic r4, r4, #0xff00 + ror r4, r4, #16 + orr r4, r4, r12, lsl #16 + ror r4, r4, #16 +#else + bfi r4, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + ssub16 r12, r3, r5 + sadd16 r3, r3, r5 + smulbt r5, r11, r12 + smulbb r12, r11, r12 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r5 + smlabb r5, r10, lr, r5 + pkhtb r5, r5, r12, ASR #16 +#else + sub lr, r3, r5 + add r10, r3, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r5, r5, #0xff + bic r5, r5, #0xff00 +#else + bfc r5, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r3, r3, #0xff + bic r3, r3, #0xff00 +#else + bfc r3, #0, #16 +#endif + sub r12, r3, r5 + add r3, r3, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r12, r12, #0xff + bic r12, r12, #0xff00 + ror r12, r12, #16 + orr r12, r12, lr, lsl #16 + ror r12, r12, #16 +#else + bfi r12, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r3, r3, #0xff + bic r3, r3, #0xff00 + ror r3, r3, #16 + orr r3, r3, r10, lsl #16 + ror r3, r3, #16 +#else + bfi r3, r10, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r11, #16 + asr lr, lr, #16 +#else + sbfx lr, r11, #0, #16 +#endif + asr r10, r12, #16 + mul r5, lr, r10 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r12, #16 + asr r12, r12, #16 +#else + sbfx r12, r12, #0, #16 +#endif + mul r12, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r12, #16 + asr lr, lr, #16 +#else + sbfx lr, r12, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r5, #16 + asr lr, lr, #16 +#else + sbfx lr, r5, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + lsr r12, r12, #16 + mla r5, r10, lr, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r5, r5, #0xff + bic r5, r5, #0xff00 + ror r5, r5, #16 + orr r5, r5, r12, lsl #16 + ror r5, r5, #16 +#else + bfi r5, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + ssub16 r12, r6, r8 + sadd16 r6, r6, r8 + smultt r8, r11, r12 + smultb r12, r11, r12 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r8 + smlabb r8, r10, lr, r8 + pkhtb r8, r8, r12, ASR #16 +#else + sub lr, r6, r8 + add r10, r6, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r8, r8, #0xff + bic r8, r8, #0xff00 +#else + bfc r8, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r6, r6, #0xff + bic r6, r6, #0xff00 +#else + bfc r6, #0, #16 +#endif + sub r12, r6, r8 + add r6, r6, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r12, r12, #0xff + bic r12, r12, #0xff00 + ror r12, r12, #16 + orr r12, r12, lr, lsl #16 + ror r12, r12, #16 +#else + bfi r12, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r6, r6, #0xff + bic r6, r6, #0xff00 + ror r6, r6, #16 + orr r6, r6, r10, lsl #16 + ror r6, r6, #16 +#else + bfi r6, r10, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + asr lr, r11, #16 +#else + sbfx lr, r11, #16, #16 +#endif + asr r10, r12, #16 + mul r8, lr, r10 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r12, #16 + asr r12, r12, #16 +#else + sbfx r12, r12, #0, #16 +#endif + mul r12, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r12, #16 + asr lr, lr, #16 +#else + sbfx lr, r12, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r8, #16 + asr lr, lr, #16 +#else + sbfx lr, r8, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + lsr r12, r12, #16 + mla r8, r10, lr, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r8, r8, #0xff + bic r8, r8, #0xff00 + ror r8, r8, #16 + orr r8, r8, r12, lsl #16 + ror r8, r8, #16 +#else + bfi r8, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + ssub16 r12, r7, r9 + sadd16 r7, r7, r9 + smultt r9, r11, r12 + smultb r12, r11, r12 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r9 + smlabb r9, r10, lr, r9 + pkhtb r9, r9, r12, ASR #16 +#else + sub lr, r7, r9 + add r10, r7, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r9, r9, #0xff + bic r9, r9, #0xff00 +#else + bfc r9, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r7, r7, #0xff + bic r7, r7, #0xff00 +#else + bfc r7, #0, #16 +#endif + sub r12, r7, r9 + add r7, r7, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r12, r12, #0xff + bic r12, r12, #0xff00 + ror r12, r12, #16 + orr r12, r12, lr, lsl #16 + ror r12, r12, #16 +#else + bfi r12, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r7, r7, #0xff + bic r7, r7, #0xff00 + ror r7, r7, #16 + orr r7, r7, r10, lsl #16 + ror r7, r7, #16 +#else + bfi r7, r10, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + asr lr, r11, #16 +#else + sbfx lr, r11, #16, #16 +#endif + asr r10, r12, #16 + mul r9, lr, r10 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r12, #16 + asr r12, r12, #16 +#else + sbfx r12, r12, #0, #16 +#endif + mul r12, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r12, #16 + asr lr, lr, #16 +#else + sbfx lr, r12, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r9, #16 + asr lr, lr, #16 +#else + sbfx lr, r9, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + lsr r12, r12, #16 + mla r9, r10, lr, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r9, r9, #0xff + bic r9, r9, #0xff00 + ror r9, r9, #16 + orr r9, r9, r12, lsl #16 + ror r9, r9, #16 +#else + bfi r9, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ + ldr r11, [sp, #4] + add r11, r1, r11, lsr #3 + ldr r11, [r11, #192] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + ssub16 r12, r2, r6 + sadd16 r2, r2, r6 + smulbt r6, r11, r12 + smulbb r12, r11, r12 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r6 + smlabb r6, r10, lr, r6 + pkhtb r6, r6, r12, ASR #16 +#else + sub lr, r2, r6 + add r10, r2, r6 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r6, r6, #0xff + bic r6, r6, #0xff00 +#else + bfc r6, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r2, r2, #0xff + bic r2, r2, #0xff00 +#else + bfc r2, #0, #16 +#endif + sub r12, r2, r6 + add r2, r2, r6 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r12, r12, #0xff + bic r12, r12, #0xff00 + ror r12, r12, #16 + orr r12, r12, lr, lsl #16 + ror r12, r12, #16 +#else + bfi r12, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r2, r2, #0xff + bic r2, r2, #0xff00 + ror r2, r2, #16 + orr r2, r2, r10, lsl #16 + ror r2, r2, #16 +#else + bfi r2, r10, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r11, #16 + asr lr, lr, #16 +#else + sbfx lr, r11, #0, #16 +#endif + asr r10, r12, #16 + mul r6, lr, r10 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r12, #16 + asr r12, r12, #16 +#else + sbfx r12, r12, #0, #16 +#endif + mul r12, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r12, #16 + asr lr, lr, #16 +#else + sbfx lr, r12, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r6, #16 + asr lr, lr, #16 +#else + sbfx lr, r6, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + lsr r12, r12, #16 + mla r6, r10, lr, r6 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r6, r6, #0xff + bic r6, r6, #0xff00 + ror r6, r6, #16 + orr r6, r6, r12, lsl #16 + ror r6, r6, #16 +#else + bfi r6, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + ssub16 r12, r3, r7 + sadd16 r3, r3, r7 + smulbt r7, r11, r12 + smulbb r12, r11, r12 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r7 + smlabb r7, r10, lr, r7 + pkhtb r7, r7, r12, ASR #16 +#else + sub lr, r3, r7 + add r10, r3, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r7, r7, #0xff + bic r7, r7, #0xff00 +#else + bfc r7, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r3, r3, #0xff + bic r3, r3, #0xff00 +#else + bfc r3, #0, #16 +#endif + sub r12, r3, r7 + add r3, r3, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r12, r12, #0xff + bic r12, r12, #0xff00 + ror r12, r12, #16 + orr r12, r12, lr, lsl #16 + ror r12, r12, #16 +#else + bfi r12, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r3, r3, #0xff + bic r3, r3, #0xff00 + ror r3, r3, #16 + orr r3, r3, r10, lsl #16 + ror r3, r3, #16 +#else + bfi r3, r10, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r11, #16 + asr lr, lr, #16 +#else + sbfx lr, r11, #0, #16 +#endif + asr r10, r12, #16 + mul r7, lr, r10 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r12, #16 + asr r12, r12, #16 +#else + sbfx r12, r12, #0, #16 +#endif + mul r12, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r12, #16 + asr lr, lr, #16 +#else + sbfx lr, r12, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r7, #16 + asr lr, lr, #16 +#else + sbfx lr, r7, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + lsr r12, r12, #16 + mla r7, r10, lr, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r7, r7, #0xff + bic r7, r7, #0xff00 + ror r7, r7, #16 + orr r7, r7, r12, lsl #16 + ror r7, r7, #16 +#else + bfi r7, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + ssub16 r12, r4, r8 + sadd16 r4, r4, r8 + smulbt r8, r11, r12 + smulbb r12, r11, r12 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r8 + smlabb r8, r10, lr, r8 + pkhtb r8, r8, r12, ASR #16 +#else + sub lr, r4, r8 + add r10, r4, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r8, r8, #0xff + bic r8, r8, #0xff00 +#else + bfc r8, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r4, r4, #0xff + bic r4, r4, #0xff00 +#else + bfc r4, #0, #16 +#endif + sub r12, r4, r8 + add r4, r4, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r12, r12, #0xff + bic r12, r12, #0xff00 + ror r12, r12, #16 + orr r12, r12, lr, lsl #16 + ror r12, r12, #16 +#else + bfi r12, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r4, r4, #0xff + bic r4, r4, #0xff00 + ror r4, r4, #16 + orr r4, r4, r10, lsl #16 + ror r4, r4, #16 +#else + bfi r4, r10, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r11, #16 + asr lr, lr, #16 +#else + sbfx lr, r11, #0, #16 +#endif + asr r10, r12, #16 + mul r8, lr, r10 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r12, #16 + asr r12, r12, #16 +#else + sbfx r12, r12, #0, #16 +#endif + mul r12, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r12, #16 + asr lr, lr, #16 +#else + sbfx lr, r12, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r8, #16 + asr lr, lr, #16 +#else + sbfx lr, r8, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + lsr r12, r12, #16 + mla r8, r10, lr, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r8, r8, #0xff + bic r8, r8, #0xff00 + ror r8, r8, #16 + orr r8, r8, r12, lsl #16 + ror r8, r8, #16 +#else + bfi r8, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + ssub16 r12, r5, r9 + sadd16 r5, r5, r9 + smulbt r9, r11, r12 + smulbb r12, r11, r12 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r9 + smlabb r9, r10, lr, r9 + pkhtb r9, r9, r12, ASR #16 +#else + sub lr, r5, r9 + add r10, r5, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r9, r9, #0xff + bic r9, r9, #0xff00 +#else + bfc r9, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r5, r5, #0xff + bic r5, r5, #0xff00 +#else + bfc r5, #0, #16 +#endif + sub r12, r5, r9 + add r5, r5, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r12, r12, #0xff + bic r12, r12, #0xff00 + ror r12, r12, #16 + orr r12, r12, lr, lsl #16 + ror r12, r12, #16 +#else + bfi r12, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r5, r5, #0xff + bic r5, r5, #0xff00 + ror r5, r5, #16 + orr r5, r5, r10, lsl #16 + ror r5, r5, #16 +#else + bfi r5, r10, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r11, #16 + asr lr, lr, #16 +#else + sbfx lr, r11, #0, #16 +#endif + asr r10, r12, #16 + mul r9, lr, r10 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r12, #16 + asr r12, r12, #16 +#else + sbfx r12, r12, #0, #16 +#endif + mul r12, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r12, #16 + asr lr, lr, #16 +#else + sbfx lr, r12, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r9, #16 + asr lr, lr, #16 +#else + sbfx lr, r9, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + lsr r12, r12, #16 + mla r9, r10, lr, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r9, r9, #0xff + bic r9, r9, #0xff00 + ror r9, r9, #16 + orr r9, r9, r12, lsl #16 + ror r9, r9, #16 +#else + bfi r9, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r11, #0xaf + lsl r11, r11, #8 + add r11, r11, #0xc0 +#else + mov r11, #0xafc0 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r11, r11, #0x130000 +#else + movt r11, #0x13 +#endif +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r11, #0x4e + lsl r11, r11, #8 + add r11, r11, #0xbf +#else + mov r11, #0x4ebf +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smulwb r12, r11, r2 + smulwt lr, r11, r2 + smulbt r12, r10, r12 + smulbt lr, r10, lr + pkhbt r12, r12, lr, LSL #16 + ssub16 r2, r2, r12 +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r2, #16 + asr r12, r12, #16 +#else + sbfx r12, r2, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + asr lr, r2, #16 +#else + sbfx lr, r2, #16, #16 +#endif + mul r12, r11, r12 + mul lr, r11, lr + asr r12, r12, #26 + asr lr, lr, #26 + mul r12, r10, r12 + mul lr, r10, lr + sub lr, r2, lr, lsl #16 + sub r2, r2, r12 + lsr lr, lr, #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r2, r2, #0xff0000 + bic r2, r2, #0xff000000 + orr r2, r2, lr, lsl #16 +#else + bfi r2, lr, #16, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smulwb r12, r11, r3 + smulwt lr, r11, r3 + smulbt r12, r10, r12 + smulbt lr, r10, lr + pkhbt r12, r12, lr, LSL #16 + ssub16 r3, r3, r12 +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r3, #16 + asr r12, r12, #16 +#else + sbfx r12, r3, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + asr lr, r3, #16 +#else + sbfx lr, r3, #16, #16 +#endif + mul r12, r11, r12 + mul lr, r11, lr + asr r12, r12, #26 + asr lr, lr, #26 + mul r12, r10, r12 + mul lr, r10, lr + sub lr, r3, lr, lsl #16 + sub r3, r3, r12 + lsr lr, lr, #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r3, r3, #0xff0000 + bic r3, r3, #0xff000000 + orr r3, r3, lr, lsl #16 +#else + bfi r3, lr, #16, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smulwb r12, r11, r4 + smulwt lr, r11, r4 + smulbt r12, r10, r12 + smulbt lr, r10, lr + pkhbt r12, r12, lr, LSL #16 + ssub16 r4, r4, r12 +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r4, #16 + asr r12, r12, #16 +#else + sbfx r12, r4, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + asr lr, r4, #16 +#else + sbfx lr, r4, #16, #16 +#endif + mul r12, r11, r12 + mul lr, r11, lr + asr r12, r12, #26 + asr lr, lr, #26 + mul r12, r10, r12 + mul lr, r10, lr + sub lr, r4, lr, lsl #16 + sub r4, r4, r12 + lsr lr, lr, #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r4, r4, #0xff0000 + bic r4, r4, #0xff000000 + orr r4, r4, lr, lsl #16 +#else + bfi r4, lr, #16, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smulwb r12, r11, r5 + smulwt lr, r11, r5 + smulbt r12, r10, r12 + smulbt lr, r10, lr + pkhbt r12, r12, lr, LSL #16 + ssub16 r5, r5, r12 +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r5, #16 + asr r12, r12, #16 +#else + sbfx r12, r5, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + asr lr, r5, #16 +#else + sbfx lr, r5, #16, #16 +#endif + mul r12, r11, r12 + mul lr, r11, lr + asr r12, r12, #26 + asr lr, lr, #26 + mul r12, r10, r12 + mul lr, r10, lr + sub lr, r5, lr, lsl #16 + sub r5, r5, r12 + lsr lr, lr, #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r5, r5, #0xff0000 + bic r5, r5, #0xff000000 + orr r5, r5, lr, lsl #16 +#else + bfi r5, lr, #16, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ + str r2, [r0] + str r3, [r0, #4] + str r4, [r0, #8] + str r5, [r0, #12] + str r6, [r0, #16] + str r7, [r0, #20] + str r8, [r0, #24] + str r9, [r0, #28] + ldr r3, [sp, #4] + add r3, r3, #16 + rsbs r12, r3, #0x100 + add r0, r0, #32 + bne L_kyber_arm32_invntt_loop_765 + sub r0, r0, #0x200 + mov r3, #0 +L_kyber_arm32_invntt_loop_4_j: + str r3, [sp, #4] + add r11, r1, r3, lsr #4 + mov r2, #4 + ldr r11, [r11, #224] +L_kyber_arm32_invntt_loop_4_i: + str r2, [sp] + ldr r2, [r0] + ldr r3, [r0, #16] + ldr r4, [r0, #32] + ldr r5, [r0, #48] + ldr r6, [r0, #64] + ldr r7, [r0, #80] + ldr r8, [r0, #96] + ldr r9, [r0, #112] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + ssub16 r12, r2, r4 + sadd16 r2, r2, r4 + smulbt r4, r11, r12 + smulbb r12, r11, r12 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r4 + smlabb r4, r10, lr, r4 + pkhtb r4, r4, r12, ASR #16 +#else + sub lr, r2, r4 + add r10, r2, r4 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r4, r4, #0xff + bic r4, r4, #0xff00 +#else + bfc r4, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r2, r2, #0xff + bic r2, r2, #0xff00 +#else + bfc r2, #0, #16 +#endif + sub r12, r2, r4 + add r2, r2, r4 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r12, r12, #0xff + bic r12, r12, #0xff00 + ror r12, r12, #16 + orr r12, r12, lr, lsl #16 + ror r12, r12, #16 +#else + bfi r12, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r2, r2, #0xff + bic r2, r2, #0xff00 + ror r2, r2, #16 + orr r2, r2, r10, lsl #16 + ror r2, r2, #16 +#else + bfi r2, r10, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r11, #16 + asr lr, lr, #16 +#else + sbfx lr, r11, #0, #16 +#endif + asr r10, r12, #16 + mul r4, lr, r10 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r12, #16 + asr r12, r12, #16 +#else + sbfx r12, r12, #0, #16 +#endif + mul r12, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r12, #16 + asr lr, lr, #16 +#else + sbfx lr, r12, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r4, #16 + asr lr, lr, #16 +#else + sbfx lr, r4, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + lsr r12, r12, #16 + mla r4, r10, lr, r4 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r4, r4, #0xff + bic r4, r4, #0xff00 + ror r4, r4, #16 + orr r4, r4, r12, lsl #16 + ror r4, r4, #16 +#else + bfi r4, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + ssub16 r12, r3, r5 + sadd16 r3, r3, r5 + smulbt r5, r11, r12 + smulbb r12, r11, r12 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r5 + smlabb r5, r10, lr, r5 + pkhtb r5, r5, r12, ASR #16 +#else + sub lr, r3, r5 + add r10, r3, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r5, r5, #0xff + bic r5, r5, #0xff00 +#else + bfc r5, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r3, r3, #0xff + bic r3, r3, #0xff00 +#else + bfc r3, #0, #16 +#endif + sub r12, r3, r5 + add r3, r3, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r12, r12, #0xff + bic r12, r12, #0xff00 + ror r12, r12, #16 + orr r12, r12, lr, lsl #16 + ror r12, r12, #16 +#else + bfi r12, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r3, r3, #0xff + bic r3, r3, #0xff00 + ror r3, r3, #16 + orr r3, r3, r10, lsl #16 + ror r3, r3, #16 +#else + bfi r3, r10, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r11, #16 + asr lr, lr, #16 +#else + sbfx lr, r11, #0, #16 +#endif + asr r10, r12, #16 + mul r5, lr, r10 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r12, #16 + asr r12, r12, #16 +#else + sbfx r12, r12, #0, #16 +#endif + mul r12, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r12, #16 + asr lr, lr, #16 +#else + sbfx lr, r12, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r5, #16 + asr lr, lr, #16 +#else + sbfx lr, r5, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + lsr r12, r12, #16 + mla r5, r10, lr, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r5, r5, #0xff + bic r5, r5, #0xff00 + ror r5, r5, #16 + orr r5, r5, r12, lsl #16 + ror r5, r5, #16 +#else + bfi r5, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + ssub16 r12, r6, r8 + sadd16 r6, r6, r8 + smultt r8, r11, r12 + smultb r12, r11, r12 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r8 + smlabb r8, r10, lr, r8 + pkhtb r8, r8, r12, ASR #16 +#else + sub lr, r6, r8 + add r10, r6, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r8, r8, #0xff + bic r8, r8, #0xff00 +#else + bfc r8, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r6, r6, #0xff + bic r6, r6, #0xff00 +#else + bfc r6, #0, #16 +#endif + sub r12, r6, r8 + add r6, r6, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r12, r12, #0xff + bic r12, r12, #0xff00 + ror r12, r12, #16 + orr r12, r12, lr, lsl #16 + ror r12, r12, #16 +#else + bfi r12, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r6, r6, #0xff + bic r6, r6, #0xff00 + ror r6, r6, #16 + orr r6, r6, r10, lsl #16 + ror r6, r6, #16 +#else + bfi r6, r10, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + asr lr, r11, #16 +#else + sbfx lr, r11, #16, #16 +#endif + asr r10, r12, #16 + mul r8, lr, r10 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r12, #16 + asr r12, r12, #16 +#else + sbfx r12, r12, #0, #16 +#endif + mul r12, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r12, #16 + asr lr, lr, #16 +#else + sbfx lr, r12, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r8, #16 + asr lr, lr, #16 +#else + sbfx lr, r8, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + lsr r12, r12, #16 + mla r8, r10, lr, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r8, r8, #0xff + bic r8, r8, #0xff00 + ror r8, r8, #16 + orr r8, r8, r12, lsl #16 + ror r8, r8, #16 +#else + bfi r8, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + ssub16 r12, r7, r9 + sadd16 r7, r7, r9 + smultt r9, r11, r12 + smultb r12, r11, r12 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r9 + smlabb r9, r10, lr, r9 + pkhtb r9, r9, r12, ASR #16 +#else + sub lr, r7, r9 + add r10, r7, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r9, r9, #0xff + bic r9, r9, #0xff00 +#else + bfc r9, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r7, r7, #0xff + bic r7, r7, #0xff00 +#else + bfc r7, #0, #16 +#endif + sub r12, r7, r9 + add r7, r7, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r12, r12, #0xff + bic r12, r12, #0xff00 + ror r12, r12, #16 + orr r12, r12, lr, lsl #16 + ror r12, r12, #16 +#else + bfi r12, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r7, r7, #0xff + bic r7, r7, #0xff00 + ror r7, r7, #16 + orr r7, r7, r10, lsl #16 + ror r7, r7, #16 +#else + bfi r7, r10, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + asr lr, r11, #16 +#else + sbfx lr, r11, #16, #16 +#endif + asr r10, r12, #16 + mul r9, lr, r10 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r12, #16 + asr r12, r12, #16 +#else + sbfx r12, r12, #0, #16 +#endif + mul r12, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r12, #16 + asr lr, lr, #16 +#else + sbfx lr, r12, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r9, #16 + asr lr, lr, #16 +#else + sbfx lr, r9, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + lsr r12, r12, #16 + mla r9, r10, lr, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r9, r9, #0xff + bic r9, r9, #0xff00 + ror r9, r9, #16 + orr r9, r9, r12, lsl #16 + ror r9, r9, #16 +#else + bfi r9, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ + str r2, [r0] + str r3, [r0, #16] + str r4, [r0, #32] + str r5, [r0, #48] + str r6, [r0, #64] + str r7, [r0, #80] + str r8, [r0, #96] + str r9, [r0, #112] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r2, [sp] + ldr r3, [sp, #4] +#else + ldrd r2, r3, [sp] +#endif + subs r2, r2, #1 + add r0, r0, #4 + bne L_kyber_arm32_invntt_loop_4_i + add r3, r3, #0x40 + rsbs r12, r3, #0x100 + add r0, r0, #0x70 + bne L_kyber_arm32_invntt_loop_4_j + sub r0, r0, #0x200 + mov r2, #16 +L_kyber_arm32_invntt_loop_321: + str r2, [sp] + ldrh r11, [r1, #2] + ldr r2, [r0] + ldr r3, [r0, #64] + ldr r4, [r0, #128] + ldr r5, [r0, #192] + ldr r6, [r0, #256] + ldr r7, [r0, #320] + ldr r8, [r0, #384] + ldr r9, [r0, #448] + ldr r11, [r1, #240] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + ssub16 r12, r2, r3 + sadd16 r2, r2, r3 + smulbt r3, r11, r12 + smulbb r12, r11, r12 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r3 + smlabb r3, r10, lr, r3 + pkhtb r3, r3, r12, ASR #16 +#else + sub lr, r2, r3 + add r10, r2, r3 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r3, r3, #0xff + bic r3, r3, #0xff00 +#else + bfc r3, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r2, r2, #0xff + bic r2, r2, #0xff00 +#else + bfc r2, #0, #16 +#endif + sub r12, r2, r3 + add r2, r2, r3 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r12, r12, #0xff + bic r12, r12, #0xff00 + ror r12, r12, #16 + orr r12, r12, lr, lsl #16 + ror r12, r12, #16 +#else + bfi r12, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r2, r2, #0xff + bic r2, r2, #0xff00 + ror r2, r2, #16 + orr r2, r2, r10, lsl #16 + ror r2, r2, #16 +#else + bfi r2, r10, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r11, #16 + asr lr, lr, #16 +#else + sbfx lr, r11, #0, #16 +#endif + asr r10, r12, #16 + mul r3, lr, r10 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r12, #16 + asr r12, r12, #16 +#else + sbfx r12, r12, #0, #16 +#endif + mul r12, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r12, #16 + asr lr, lr, #16 +#else + sbfx lr, r12, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r3, #16 + asr lr, lr, #16 +#else + sbfx lr, r3, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + lsr r12, r12, #16 + mla r3, r10, lr, r3 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r3, r3, #0xff + bic r3, r3, #0xff00 + ror r3, r3, #16 + orr r3, r3, r12, lsl #16 + ror r3, r3, #16 +#else + bfi r3, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + ssub16 r12, r4, r5 + sadd16 r4, r4, r5 + smultt r5, r11, r12 + smultb r12, r11, r12 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r5 + smlabb r5, r10, lr, r5 + pkhtb r5, r5, r12, ASR #16 +#else + sub lr, r4, r5 + add r10, r4, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r5, r5, #0xff + bic r5, r5, #0xff00 +#else + bfc r5, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r4, r4, #0xff + bic r4, r4, #0xff00 +#else + bfc r4, #0, #16 +#endif + sub r12, r4, r5 + add r4, r4, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r12, r12, #0xff + bic r12, r12, #0xff00 + ror r12, r12, #16 + orr r12, r12, lr, lsl #16 + ror r12, r12, #16 +#else + bfi r12, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r4, r4, #0xff + bic r4, r4, #0xff00 + ror r4, r4, #16 + orr r4, r4, r10, lsl #16 + ror r4, r4, #16 +#else + bfi r4, r10, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + asr lr, r11, #16 +#else + sbfx lr, r11, #16, #16 +#endif + asr r10, r12, #16 + mul r5, lr, r10 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r12, #16 + asr r12, r12, #16 +#else + sbfx r12, r12, #0, #16 +#endif + mul r12, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r12, #16 + asr lr, lr, #16 +#else + sbfx lr, r12, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r5, #16 + asr lr, lr, #16 +#else + sbfx lr, r5, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + lsr r12, r12, #16 + mla r5, r10, lr, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r5, r5, #0xff + bic r5, r5, #0xff00 + ror r5, r5, #16 + orr r5, r5, r12, lsl #16 + ror r5, r5, #16 +#else + bfi r5, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ + ldr r11, [r1, #244] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + ssub16 r12, r6, r7 + sadd16 r6, r6, r7 + smulbt r7, r11, r12 + smulbb r12, r11, r12 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r7 + smlabb r7, r10, lr, r7 + pkhtb r7, r7, r12, ASR #16 +#else + sub lr, r6, r7 + add r10, r6, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r7, r7, #0xff + bic r7, r7, #0xff00 +#else + bfc r7, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r6, r6, #0xff + bic r6, r6, #0xff00 +#else + bfc r6, #0, #16 +#endif + sub r12, r6, r7 + add r6, r6, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r12, r12, #0xff + bic r12, r12, #0xff00 + ror r12, r12, #16 + orr r12, r12, lr, lsl #16 + ror r12, r12, #16 +#else + bfi r12, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r6, r6, #0xff + bic r6, r6, #0xff00 + ror r6, r6, #16 + orr r6, r6, r10, lsl #16 + ror r6, r6, #16 +#else + bfi r6, r10, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r11, #16 + asr lr, lr, #16 +#else + sbfx lr, r11, #0, #16 +#endif + asr r10, r12, #16 + mul r7, lr, r10 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r12, #16 + asr r12, r12, #16 +#else + sbfx r12, r12, #0, #16 +#endif + mul r12, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r12, #16 + asr lr, lr, #16 +#else + sbfx lr, r12, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r7, #16 + asr lr, lr, #16 +#else + sbfx lr, r7, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + lsr r12, r12, #16 + mla r7, r10, lr, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r7, r7, #0xff + bic r7, r7, #0xff00 + ror r7, r7, #16 + orr r7, r7, r12, lsl #16 + ror r7, r7, #16 +#else + bfi r7, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + ssub16 r12, r8, r9 + sadd16 r8, r8, r9 + smultt r9, r11, r12 + smultb r12, r11, r12 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r9 + smlabb r9, r10, lr, r9 + pkhtb r9, r9, r12, ASR #16 +#else + sub lr, r8, r9 + add r10, r8, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r9, r9, #0xff + bic r9, r9, #0xff00 +#else + bfc r9, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r8, r8, #0xff + bic r8, r8, #0xff00 +#else + bfc r8, #0, #16 +#endif + sub r12, r8, r9 + add r8, r8, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r12, r12, #0xff + bic r12, r12, #0xff00 + ror r12, r12, #16 + orr r12, r12, lr, lsl #16 + ror r12, r12, #16 +#else + bfi r12, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r8, r8, #0xff + bic r8, r8, #0xff00 + ror r8, r8, #16 + orr r8, r8, r10, lsl #16 + ror r8, r8, #16 +#else + bfi r8, r10, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + asr lr, r11, #16 +#else + sbfx lr, r11, #16, #16 +#endif + asr r10, r12, #16 + mul r9, lr, r10 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r12, #16 + asr r12, r12, #16 +#else + sbfx r12, r12, #0, #16 +#endif + mul r12, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r12, #16 + asr lr, lr, #16 +#else + sbfx lr, r12, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r9, #16 + asr lr, lr, #16 +#else + sbfx lr, r9, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + lsr r12, r12, #16 + mla r9, r10, lr, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r9, r9, #0xff + bic r9, r9, #0xff00 + ror r9, r9, #16 + orr r9, r9, r12, lsl #16 + ror r9, r9, #16 +#else + bfi r9, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ + ldr r11, [r1, #248] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + ssub16 r12, r2, r4 + sadd16 r2, r2, r4 + smulbt r4, r11, r12 + smulbb r12, r11, r12 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r4 + smlabb r4, r10, lr, r4 + pkhtb r4, r4, r12, ASR #16 +#else + sub lr, r2, r4 + add r10, r2, r4 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r4, r4, #0xff + bic r4, r4, #0xff00 +#else + bfc r4, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r2, r2, #0xff + bic r2, r2, #0xff00 +#else + bfc r2, #0, #16 +#endif + sub r12, r2, r4 + add r2, r2, r4 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r12, r12, #0xff + bic r12, r12, #0xff00 + ror r12, r12, #16 + orr r12, r12, lr, lsl #16 + ror r12, r12, #16 +#else + bfi r12, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r2, r2, #0xff + bic r2, r2, #0xff00 + ror r2, r2, #16 + orr r2, r2, r10, lsl #16 + ror r2, r2, #16 +#else + bfi r2, r10, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r11, #16 + asr lr, lr, #16 +#else + sbfx lr, r11, #0, #16 +#endif + asr r10, r12, #16 + mul r4, lr, r10 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r12, #16 + asr r12, r12, #16 +#else + sbfx r12, r12, #0, #16 +#endif + mul r12, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r12, #16 + asr lr, lr, #16 +#else + sbfx lr, r12, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r4, #16 + asr lr, lr, #16 +#else + sbfx lr, r4, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + lsr r12, r12, #16 + mla r4, r10, lr, r4 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r4, r4, #0xff + bic r4, r4, #0xff00 + ror r4, r4, #16 + orr r4, r4, r12, lsl #16 + ror r4, r4, #16 +#else + bfi r4, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + ssub16 r12, r3, r5 + sadd16 r3, r3, r5 + smulbt r5, r11, r12 + smulbb r12, r11, r12 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r5 + smlabb r5, r10, lr, r5 + pkhtb r5, r5, r12, ASR #16 +#else + sub lr, r3, r5 + add r10, r3, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r5, r5, #0xff + bic r5, r5, #0xff00 +#else + bfc r5, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r3, r3, #0xff + bic r3, r3, #0xff00 +#else + bfc r3, #0, #16 +#endif + sub r12, r3, r5 + add r3, r3, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r12, r12, #0xff + bic r12, r12, #0xff00 + ror r12, r12, #16 + orr r12, r12, lr, lsl #16 + ror r12, r12, #16 +#else + bfi r12, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r3, r3, #0xff + bic r3, r3, #0xff00 + ror r3, r3, #16 + orr r3, r3, r10, lsl #16 + ror r3, r3, #16 +#else + bfi r3, r10, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r11, #16 + asr lr, lr, #16 +#else + sbfx lr, r11, #0, #16 +#endif + asr r10, r12, #16 + mul r5, lr, r10 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r12, #16 + asr r12, r12, #16 +#else + sbfx r12, r12, #0, #16 +#endif + mul r12, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r12, #16 + asr lr, lr, #16 +#else + sbfx lr, r12, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r5, #16 + asr lr, lr, #16 +#else + sbfx lr, r5, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + lsr r12, r12, #16 + mla r5, r10, lr, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r5, r5, #0xff + bic r5, r5, #0xff00 + ror r5, r5, #16 + orr r5, r5, r12, lsl #16 + ror r5, r5, #16 +#else + bfi r5, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + ssub16 r12, r6, r8 + sadd16 r6, r6, r8 + smultt r8, r11, r12 + smultb r12, r11, r12 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r8 + smlabb r8, r10, lr, r8 + pkhtb r8, r8, r12, ASR #16 +#else + sub lr, r6, r8 + add r10, r6, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r8, r8, #0xff + bic r8, r8, #0xff00 +#else + bfc r8, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r6, r6, #0xff + bic r6, r6, #0xff00 +#else + bfc r6, #0, #16 +#endif + sub r12, r6, r8 + add r6, r6, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r12, r12, #0xff + bic r12, r12, #0xff00 + ror r12, r12, #16 + orr r12, r12, lr, lsl #16 + ror r12, r12, #16 +#else + bfi r12, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r6, r6, #0xff + bic r6, r6, #0xff00 + ror r6, r6, #16 + orr r6, r6, r10, lsl #16 + ror r6, r6, #16 +#else + bfi r6, r10, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + asr lr, r11, #16 +#else + sbfx lr, r11, #16, #16 +#endif + asr r10, r12, #16 + mul r8, lr, r10 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r12, #16 + asr r12, r12, #16 +#else + sbfx r12, r12, #0, #16 +#endif + mul r12, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r12, #16 + asr lr, lr, #16 +#else + sbfx lr, r12, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r8, #16 + asr lr, lr, #16 +#else + sbfx lr, r8, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + lsr r12, r12, #16 + mla r8, r10, lr, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r8, r8, #0xff + bic r8, r8, #0xff00 + ror r8, r8, #16 + orr r8, r8, r12, lsl #16 + ror r8, r8, #16 +#else + bfi r8, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + ssub16 r12, r7, r9 + sadd16 r7, r7, r9 + smultt r9, r11, r12 + smultb r12, r11, r12 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r9 + smlabb r9, r10, lr, r9 + pkhtb r9, r9, r12, ASR #16 +#else + sub lr, r7, r9 + add r10, r7, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r9, r9, #0xff + bic r9, r9, #0xff00 +#else + bfc r9, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r7, r7, #0xff + bic r7, r7, #0xff00 +#else + bfc r7, #0, #16 +#endif + sub r12, r7, r9 + add r7, r7, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r12, r12, #0xff + bic r12, r12, #0xff00 + ror r12, r12, #16 + orr r12, r12, lr, lsl #16 + ror r12, r12, #16 +#else + bfi r12, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r7, r7, #0xff + bic r7, r7, #0xff00 + ror r7, r7, #16 + orr r7, r7, r10, lsl #16 + ror r7, r7, #16 +#else + bfi r7, r10, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + asr lr, r11, #16 +#else + sbfx lr, r11, #16, #16 +#endif + asr r10, r12, #16 + mul r9, lr, r10 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r12, #16 + asr r12, r12, #16 +#else + sbfx r12, r12, #0, #16 +#endif + mul r12, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r12, #16 + asr lr, lr, #16 +#else + sbfx lr, r12, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r9, #16 + asr lr, lr, #16 +#else + sbfx lr, r9, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + lsr r12, r12, #16 + mla r9, r10, lr, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r9, r9, #0xff + bic r9, r9, #0xff00 + ror r9, r9, #16 + orr r9, r9, r12, lsl #16 + ror r9, r9, #16 +#else + bfi r9, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r11, #0xaf + lsl r11, r11, #8 + add r11, r11, #0xc0 +#else + mov r11, #0xafc0 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r11, r11, #0x130000 +#else + movt r11, #0x13 +#endif +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r11, #0x4e + lsl r11, r11, #8 + add r11, r11, #0xbf +#else + mov r11, #0x4ebf +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smulwb r12, r11, r2 + smulwt lr, r11, r2 + smulbt r12, r10, r12 + smulbt lr, r10, lr + pkhbt r12, r12, lr, LSL #16 + ssub16 r2, r2, r12 +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r2, #16 + asr r12, r12, #16 +#else + sbfx r12, r2, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + asr lr, r2, #16 +#else + sbfx lr, r2, #16, #16 +#endif + mul r12, r11, r12 + mul lr, r11, lr + asr r12, r12, #26 + asr lr, lr, #26 + mul r12, r10, r12 + mul lr, r10, lr + sub lr, r2, lr, lsl #16 + sub r2, r2, r12 + lsr lr, lr, #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r2, r2, #0xff0000 + bic r2, r2, #0xff000000 + orr r2, r2, lr, lsl #16 +#else + bfi r2, lr, #16, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smulwb r12, r11, r3 + smulwt lr, r11, r3 + smulbt r12, r10, r12 + smulbt lr, r10, lr + pkhbt r12, r12, lr, LSL #16 + ssub16 r3, r3, r12 +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r3, #16 + asr r12, r12, #16 +#else + sbfx r12, r3, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + asr lr, r3, #16 +#else + sbfx lr, r3, #16, #16 +#endif + mul r12, r11, r12 + mul lr, r11, lr + asr r12, r12, #26 + asr lr, lr, #26 + mul r12, r10, r12 + mul lr, r10, lr + sub lr, r3, lr, lsl #16 + sub r3, r3, r12 + lsr lr, lr, #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r3, r3, #0xff0000 + bic r3, r3, #0xff000000 + orr r3, r3, lr, lsl #16 +#else + bfi r3, lr, #16, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smulwb r12, r11, r4 + smulwt lr, r11, r4 + smulbt r12, r10, r12 + smulbt lr, r10, lr + pkhbt r12, r12, lr, LSL #16 + ssub16 r4, r4, r12 +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r4, #16 + asr r12, r12, #16 +#else + sbfx r12, r4, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + asr lr, r4, #16 +#else + sbfx lr, r4, #16, #16 +#endif + mul r12, r11, r12 + mul lr, r11, lr + asr r12, r12, #26 + asr lr, lr, #26 + mul r12, r10, r12 + mul lr, r10, lr + sub lr, r4, lr, lsl #16 + sub r4, r4, r12 + lsr lr, lr, #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r4, r4, #0xff0000 + bic r4, r4, #0xff000000 + orr r4, r4, lr, lsl #16 +#else + bfi r4, lr, #16, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smulwb r12, r11, r5 + smulwt lr, r11, r5 + smulbt r12, r10, r12 + smulbt lr, r10, lr + pkhbt r12, r12, lr, LSL #16 + ssub16 r5, r5, r12 +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r5, #16 + asr r12, r12, #16 +#else + sbfx r12, r5, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + asr lr, r5, #16 +#else + sbfx lr, r5, #16, #16 +#endif + mul r12, r11, r12 + mul lr, r11, lr + asr r12, r12, #26 + asr lr, lr, #26 + mul r12, r10, r12 + mul lr, r10, lr + sub lr, r5, lr, lsl #16 + sub r5, r5, r12 + lsr lr, lr, #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r5, r5, #0xff0000 + bic r5, r5, #0xff000000 + orr r5, r5, lr, lsl #16 +#else + bfi r5, lr, #16, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ + ldr r11, [r1, #252] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + ssub16 r12, r2, r6 + sadd16 r2, r2, r6 + smulbt r6, r11, r12 + smulbb r12, r11, r12 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r6 + smlabb r6, r10, lr, r6 + pkhtb r6, r6, r12, ASR #16 +#else + sub lr, r2, r6 + add r10, r2, r6 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r6, r6, #0xff + bic r6, r6, #0xff00 +#else + bfc r6, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r2, r2, #0xff + bic r2, r2, #0xff00 +#else + bfc r2, #0, #16 +#endif + sub r12, r2, r6 + add r2, r2, r6 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r12, r12, #0xff + bic r12, r12, #0xff00 + ror r12, r12, #16 + orr r12, r12, lr, lsl #16 + ror r12, r12, #16 +#else + bfi r12, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r2, r2, #0xff + bic r2, r2, #0xff00 + ror r2, r2, #16 + orr r2, r2, r10, lsl #16 + ror r2, r2, #16 +#else + bfi r2, r10, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r11, #16 + asr lr, lr, #16 +#else + sbfx lr, r11, #0, #16 +#endif + asr r10, r12, #16 + mul r6, lr, r10 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r12, #16 + asr r12, r12, #16 +#else + sbfx r12, r12, #0, #16 +#endif + mul r12, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r12, #16 + asr lr, lr, #16 +#else + sbfx lr, r12, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r6, #16 + asr lr, lr, #16 +#else + sbfx lr, r6, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + lsr r12, r12, #16 + mla r6, r10, lr, r6 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r6, r6, #0xff + bic r6, r6, #0xff00 + ror r6, r6, #16 + orr r6, r6, r12, lsl #16 + ror r6, r6, #16 +#else + bfi r6, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + ssub16 r12, r3, r7 + sadd16 r3, r3, r7 + smulbt r7, r11, r12 + smulbb r12, r11, r12 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r7 + smlabb r7, r10, lr, r7 + pkhtb r7, r7, r12, ASR #16 +#else + sub lr, r3, r7 + add r10, r3, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r7, r7, #0xff + bic r7, r7, #0xff00 +#else + bfc r7, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r3, r3, #0xff + bic r3, r3, #0xff00 +#else + bfc r3, #0, #16 +#endif + sub r12, r3, r7 + add r3, r3, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r12, r12, #0xff + bic r12, r12, #0xff00 + ror r12, r12, #16 + orr r12, r12, lr, lsl #16 + ror r12, r12, #16 +#else + bfi r12, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r3, r3, #0xff + bic r3, r3, #0xff00 + ror r3, r3, #16 + orr r3, r3, r10, lsl #16 + ror r3, r3, #16 +#else + bfi r3, r10, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r11, #16 + asr lr, lr, #16 +#else + sbfx lr, r11, #0, #16 +#endif + asr r10, r12, #16 + mul r7, lr, r10 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r12, #16 + asr r12, r12, #16 +#else + sbfx r12, r12, #0, #16 +#endif + mul r12, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r12, #16 + asr lr, lr, #16 +#else + sbfx lr, r12, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r7, #16 + asr lr, lr, #16 +#else + sbfx lr, r7, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + lsr r12, r12, #16 + mla r7, r10, lr, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r7, r7, #0xff + bic r7, r7, #0xff00 + ror r7, r7, #16 + orr r7, r7, r12, lsl #16 + ror r7, r7, #16 +#else + bfi r7, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + ssub16 r12, r4, r8 + sadd16 r4, r4, r8 + smulbt r8, r11, r12 + smulbb r12, r11, r12 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r8 + smlabb r8, r10, lr, r8 + pkhtb r8, r8, r12, ASR #16 +#else + sub lr, r4, r8 + add r10, r4, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r8, r8, #0xff + bic r8, r8, #0xff00 +#else + bfc r8, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r4, r4, #0xff + bic r4, r4, #0xff00 +#else + bfc r4, #0, #16 +#endif + sub r12, r4, r8 + add r4, r4, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r12, r12, #0xff + bic r12, r12, #0xff00 + ror r12, r12, #16 + orr r12, r12, lr, lsl #16 + ror r12, r12, #16 +#else + bfi r12, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r4, r4, #0xff + bic r4, r4, #0xff00 + ror r4, r4, #16 + orr r4, r4, r10, lsl #16 + ror r4, r4, #16 +#else + bfi r4, r10, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r11, #16 + asr lr, lr, #16 +#else + sbfx lr, r11, #0, #16 +#endif + asr r10, r12, #16 + mul r8, lr, r10 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r12, #16 + asr r12, r12, #16 +#else + sbfx r12, r12, #0, #16 +#endif + mul r12, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r12, #16 + asr lr, lr, #16 +#else + sbfx lr, r12, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r8, #16 + asr lr, lr, #16 +#else + sbfx lr, r8, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + lsr r12, r12, #16 + mla r8, r10, lr, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r8, r8, #0xff + bic r8, r8, #0xff00 + ror r8, r8, #16 + orr r8, r8, r12, lsl #16 + ror r8, r8, #16 +#else + bfi r8, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + ssub16 r12, r5, r9 + sadd16 r5, r5, r9 + smulbt r9, r11, r12 + smulbb r12, r11, r12 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r9 + smlabb r9, r10, lr, r9 + pkhtb r9, r9, r12, ASR #16 +#else + sub lr, r5, r9 + add r10, r5, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r9, r9, #0xff + bic r9, r9, #0xff00 +#else + bfc r9, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r5, r5, #0xff + bic r5, r5, #0xff00 +#else + bfc r5, #0, #16 +#endif + sub r12, r5, r9 + add r5, r5, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r12, r12, #0xff + bic r12, r12, #0xff00 + ror r12, r12, #16 + orr r12, r12, lr, lsl #16 + ror r12, r12, #16 +#else + bfi r12, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r5, r5, #0xff + bic r5, r5, #0xff00 + ror r5, r5, #16 + orr r5, r5, r10, lsl #16 + ror r5, r5, #16 +#else + bfi r5, r10, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r11, #16 + asr lr, lr, #16 +#else + sbfx lr, r11, #0, #16 +#endif + asr r10, r12, #16 + mul r9, lr, r10 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r12, #16 + asr r12, r12, #16 +#else + sbfx r12, r12, #0, #16 +#endif + mul r12, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r12, #16 + asr lr, lr, #16 +#else + sbfx lr, r12, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r9, #16 + asr lr, lr, #16 +#else + sbfx lr, r9, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + lsr r12, r12, #16 + mla r9, r10, lr, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r9, r9, #0xff + bic r9, r9, #0xff00 + ror r9, r9, #16 + orr r9, r9, r12, lsl #16 + ror r9, r9, #16 +#else + bfi r9, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ + ldr r11, [r1, #254] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smulbb r12, r11, r2 + smulbt r2, r11, r2 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r2 + smlabb r2, r10, lr, r2 + pkhtb r2, r2, r12, ASR #16 +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r11, #16 + asr lr, lr, #16 +#else + sbfx lr, r11, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r2, #16 + asr r12, r12, #16 +#else + sbfx r12, r2, #0, #16 +#endif + mul r12, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + asr r2, r2, #16 +#else + sbfx r2, r2, #16, #16 +#endif + mul r2, lr, r2 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif + mul lr, r10, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r2, #16 + asr lr, lr, #16 +#else + sbfx lr, r2, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + lsr r12, r12, #16 + mla r2, r10, lr, r2 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r2, r2, #0xff + bic r2, r2, #0xff00 + ror r2, r2, #16 + orr r2, r2, r12, lsl #16 + ror r2, r2, #16 +#else + bfi r2, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smulbb r12, r11, r3 + smulbt r3, r11, r3 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r3 + smlabb r3, r10, lr, r3 + pkhtb r3, r3, r12, ASR #16 +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r11, #16 + asr lr, lr, #16 +#else + sbfx lr, r11, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r3, #16 + asr r12, r12, #16 +#else + sbfx r12, r3, #0, #16 +#endif + mul r12, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + asr r3, r3, #16 +#else + sbfx r3, r3, #16, #16 +#endif + mul r3, lr, r3 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif + mul lr, r10, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r3, #16 + asr lr, lr, #16 +#else + sbfx lr, r3, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + lsr r12, r12, #16 + mla r3, r10, lr, r3 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r3, r3, #0xff + bic r3, r3, #0xff00 + ror r3, r3, #16 + orr r3, r3, r12, lsl #16 + ror r3, r3, #16 +#else + bfi r3, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smulbb r12, r11, r4 + smulbt r4, r11, r4 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r4 + smlabb r4, r10, lr, r4 + pkhtb r4, r4, r12, ASR #16 +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r11, #16 + asr lr, lr, #16 +#else + sbfx lr, r11, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r4, #16 + asr r12, r12, #16 +#else + sbfx r12, r4, #0, #16 +#endif + mul r12, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + asr r4, r4, #16 +#else + sbfx r4, r4, #16, #16 +#endif + mul r4, lr, r4 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif + mul lr, r10, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r4, #16 + asr lr, lr, #16 +#else + sbfx lr, r4, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + lsr r12, r12, #16 + mla r4, r10, lr, r4 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r4, r4, #0xff + bic r4, r4, #0xff00 + ror r4, r4, #16 + orr r4, r4, r12, lsl #16 + ror r4, r4, #16 +#else + bfi r4, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smulbb r12, r11, r5 + smulbt r5, r11, r5 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r5 + smlabb r5, r10, lr, r5 + pkhtb r5, r5, r12, ASR #16 +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r11, #16 + asr lr, lr, #16 +#else + sbfx lr, r11, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r5, #16 + asr r12, r12, #16 +#else + sbfx r12, r5, #0, #16 +#endif + mul r12, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + asr r5, r5, #16 +#else + sbfx r5, r5, #16, #16 +#endif + mul r5, lr, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif + mul lr, r10, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r5, #16 + asr lr, lr, #16 +#else + sbfx lr, r5, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + lsr r12, r12, #16 + mla r5, r10, lr, r5 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r5, r5, #0xff + bic r5, r5, #0xff00 + ror r5, r5, #16 + orr r5, r5, r12, lsl #16 + ror r5, r5, #16 +#else + bfi r5, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smulbb r12, r11, r6 + smulbt r6, r11, r6 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r6 + smlabb r6, r10, lr, r6 + pkhtb r6, r6, r12, ASR #16 +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r11, #16 + asr lr, lr, #16 +#else + sbfx lr, r11, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r6, #16 + asr r12, r12, #16 +#else + sbfx r12, r6, #0, #16 +#endif + mul r12, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + asr r6, r6, #16 +#else + sbfx r6, r6, #16, #16 +#endif + mul r6, lr, r6 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif + mul lr, r10, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r6, #16 + asr lr, lr, #16 +#else + sbfx lr, r6, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + lsr r12, r12, #16 + mla r6, r10, lr, r6 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r6, r6, #0xff + bic r6, r6, #0xff00 + ror r6, r6, #16 + orr r6, r6, r12, lsl #16 + ror r6, r6, #16 +#else + bfi r6, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smulbb r12, r11, r7 + smulbt r7, r11, r7 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r7 + smlabb r7, r10, lr, r7 + pkhtb r7, r7, r12, ASR #16 +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r11, #16 + asr lr, lr, #16 +#else + sbfx lr, r11, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r7, #16 + asr r12, r12, #16 +#else + sbfx r12, r7, #0, #16 +#endif + mul r12, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + asr r7, r7, #16 +#else + sbfx r7, r7, #16, #16 +#endif + mul r7, lr, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif + mul lr, r10, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r7, #16 + asr lr, lr, #16 +#else + sbfx lr, r7, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + lsr r12, r12, #16 + mla r7, r10, lr, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r7, r7, #0xff + bic r7, r7, #0xff00 + ror r7, r7, #16 + orr r7, r7, r12, lsl #16 + ror r7, r7, #16 +#else + bfi r7, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smulbb r12, r11, r8 + smulbt r8, r11, r8 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r8 + smlabb r8, r10, lr, r8 + pkhtb r8, r8, r12, ASR #16 +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r11, #16 + asr lr, lr, #16 +#else + sbfx lr, r11, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r8, #16 + asr r12, r12, #16 +#else + sbfx r12, r8, #0, #16 +#endif + mul r12, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + asr r8, r8, #16 +#else + sbfx r8, r8, #16, #16 +#endif + mul r8, lr, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif + mul lr, r10, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r8, #16 + asr lr, lr, #16 +#else + sbfx lr, r8, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + lsr r12, r12, #16 + mla r8, r10, lr, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r8, r8, #0xff + bic r8, r8, #0xff00 + ror r8, r8, #16 + orr r8, r8, r12, lsl #16 + ror r8, r8, #16 +#else + bfi r8, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smulbb r12, r11, r9 + smulbt r9, r11, r9 + smultb lr, r10, r12 + smlabb r12, r10, lr, r12 + smultb lr, r10, r9 + smlabb r9, r10, lr, r9 + pkhtb r9, r9, r12, ASR #16 +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r11, #16 + asr lr, lr, #16 +#else + sbfx lr, r11, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r9, #16 + asr r12, r12, #16 +#else + sbfx r12, r9, #0, #16 +#endif + mul r12, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + asr r9, r9, #16 +#else + sbfx r9, r9, #16, #16 +#endif + mul r9, lr, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif + mul lr, r10, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + mla r12, r10, lr, r12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xc + lsl r10, r10, #8 + add r10, r10, #0xff +#else + mov r10, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, r9, #16 + asr lr, lr, #16 +#else + sbfx lr, r9, #0, #16 +#endif + mul lr, r10, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r10, #0xd + lsl r10, r10, #8 + add r10, r10, #0x1 +#else + mov r10, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl lr, lr, #16 + asr lr, lr, #16 +#else + sbfx lr, lr, #0, #16 +#endif + lsr r12, r12, #16 + mla r9, r10, lr, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r9, r9, #0xff + bic r9, r9, #0xff00 + ror r9, r9, #16 + orr r9, r9, r12, lsl #16 + ror r9, r9, #16 +#else + bfi r9, r12, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ + str r2, [r0] + str r3, [r0, #64] + str r4, [r0, #128] + str r5, [r0, #192] + str r6, [r0, #256] + str r7, [r0, #320] + str r8, [r0, #384] + str r9, [r0, #448] + ldr r2, [sp] + subs r2, r2, #1 + add r0, r0, #4 + bne L_kyber_arm32_invntt_loop_321 + add sp, sp, #8 + pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} + .size kyber_arm32_invntt,.-kyber_arm32_invntt + .text + .type L_kyber_arm32_basemul_mont_zetas, %object + .size L_kyber_arm32_basemul_mont_zetas, 256 + .align 4 +L_kyber_arm32_basemul_mont_zetas: + .short 0x8ed + .short 0xa0b + .short 0xb9a + .short 0x714 + .short 0x5d5 + .short 0x58e + .short 0x11f + .short 0xca + .short 0xc56 + .short 0x26e + .short 0x629 + .short 0xb6 + .short 0x3c2 + .short 0x84f + .short 0x73f + .short 0x5bc + .short 0x23d + .short 0x7d4 + .short 0x108 + .short 0x17f + .short 0x9c4 + .short 0x5b2 + .short 0x6bf + .short 0xc7f + .short 0xa58 + .short 0x3f9 + .short 0x2dc + .short 0x260 + .short 0x6fb + .short 0x19b + .short 0xc34 + .short 0x6de + .short 0x4c7 + .short 0x28c + .short 0xad9 + .short 0x3f7 + .short 0x7f4 + .short 0x5d3 + .short 0xbe7 + .short 0x6f9 + .short 0x204 + .short 0xcf9 + .short 0xbc1 + .short 0xa67 + .short 0x6af + .short 0x877 + .short 0x7e + .short 0x5bd + .short 0x9ac + .short 0xca7 + .short 0xbf2 + .short 0x33e + .short 0x6b + .short 0x774 + .short 0xc0a + .short 0x94a + .short 0xb73 + .short 0x3c1 + .short 0x71d + .short 0xa2c + .short 0x1c0 + .short 0x8d8 + .short 0x2a5 + .short 0x806 + .short 0x8b2 + .short 0x1ae + .short 0x22b + .short 0x34b + .short 0x81e + .short 0x367 + .short 0x60e + .short 0x69 + .short 0x1a6 + .short 0x24b + .short 0xb1 + .short 0xc16 + .short 0xbde + .short 0xb35 + .short 0x626 + .short 0x675 + .short 0xc0b + .short 0x30a + .short 0x487 + .short 0xc6e + .short 0x9f8 + .short 0x5cb + .short 0xaa7 + .short 0x45f + .short 0x6cb + .short 0x284 + .short 0x999 + .short 0x15d + .short 0x1a2 + .short 0x149 + .short 0xc65 + .short 0xcb6 + .short 0x331 + .short 0x449 + .short 0x25b + .short 0x262 + .short 0x52a + .short 0x7fc + .short 0x748 + .short 0x180 + .short 0x842 + .short 0xc79 + .short 0x4c2 + .short 0x7ca + .short 0x997 + .short 0xdc + .short 0x85e + .short 0x686 + .short 0x860 + .short 0x707 + .short 0x803 + .short 0x31a + .short 0x71b + .short 0x9ab + .short 0x99b + .short 0x1de + .short 0xc95 + .short 0xbcd + .short 0x3e4 + .short 0x3df + .short 0x3be + .short 0x74d + .short 0x5f2 + .short 0x65c + .text + .align 4 + .globl kyber_arm32_basemul_mont + .type kyber_arm32_basemul_mont, %function +kyber_arm32_basemul_mont: + push {r4, r5, r6, r7, r8, r9, r10, r11, lr} + adr r3, L_kyber_arm32_basemul_mont_zetas + add r3, r3, #0x80 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r12, #0xd + lsl r12, r12, #8 + add r12, r12, #0x1 +#else + mov r12, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r12, r12, #0xc000000 + orr r12, r12, #0xff0000 +#else + movt r12, #0xcff +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ + mov r8, #0 +L_kyber_arm32_basemul_mont_loop: + ldm r1!, {r4, r5} + ldm r2!, {r6, r7} + ldr lr, [r3, r8] + add r8, r8, #2 + push {r8} + cmp r8, #0x80 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smultt r8, r4, r6 + smultt r10, r5, r7 + smultb r9, r12, r8 + smultb r11, r12, r10 + smlabb r8, r12, r9, r8 + smlabb r10, r12, r11, r10 + rsb r11, lr, #0 + smulbt r8, lr, r8 + smulbt r10, r11, r10 + smlabb r8, r4, r6, r8 + smlabb r10, r5, r7, r10 + smultb r9, r12, r8 + smultb r11, r12, r10 + smlabb r8, r12, r9, r8 + smlabb r10, r12, r11, r10 + smulbt r9, r4, r6 + smulbt r11, r5, r7 + smlatb r9, r4, r6, r9 + smlatb r11, r5, r7, r11 + smultb r6, r12, r9 + smultb r7, r12, r11 + smlabb r9, r12, r6, r9 + smlabb r11, r12, r7, r11 + pkhtb r4, r9, r8, ASR #16 + pkhtb r5, r11, r10, ASR #16 +#else + asr r8, r4, #16 + asr r10, r5, #16 + asr r9, r6, #16 + asr r11, r7, #16 + mul r8, r9, r8 + mul r10, r11, r10 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r12, #0xc + lsl r12, r12, #8 + add r12, r12, #0xff +#else + mov r12, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r9, r8, #16 + asr r9, r9, #16 +#else + sbfx r9, r8, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r11, r10, #16 + asr r11, r11, #16 +#else + sbfx r11, r10, #0, #16 +#endif + mul r9, r12, r8 + mul r11, r12, r11 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r12, #0xd + lsl r12, r12, #8 + add r12, r12, #0x1 +#else + mov r12, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r9, r9, #16 + asr r9, r9, #16 +#else + sbfx r9, r9, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r11, r11, #16 + asr r11, r11, #16 +#else + sbfx r11, r11, #0, #16 +#endif + mla r8, r12, r9, r8 + mla r10, r12, r11, r10 + rsb r11, lr, #0 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r9, lr, #16 + asr r9, r9, #16 +#else + sbfx r9, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r11, r11, #16 + asr r11, r11, #16 +#else + sbfx r11, r11, #0, #16 +#endif + asr r8, r8, #16 + asr r10, r10, #16 + mul r8, r9, r8 + mul r10, r11, r10 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r9, r4, #16 + asr r9, r9, #16 +#else + sbfx r9, r4, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r11, r5, #16 + asr r11, r11, #16 +#else + sbfx r11, r5, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r6, #16 + asr r12, r12, #16 +#else + sbfx r12, r6, #0, #16 +#endif + mla r8, r9, r12, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r7, #16 + asr r12, r12, #16 +#else + sbfx r12, r7, #0, #16 +#endif + mla r10, r11, r12, r10 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r12, #0xc + lsl r12, r12, #8 + add r12, r12, #0xff +#else + mov r12, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r9, r8, #16 + asr r9, r9, #16 +#else + sbfx r9, r8, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r11, r10, #16 + asr r11, r11, #16 +#else + sbfx r11, r10, #0, #16 +#endif + mul r9, r12, r9 + mul r11, r12, r11 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r12, #0xd + lsl r12, r12, #8 + add r12, r12, #0x1 +#else + mov r12, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r9, r9, #16 + asr r9, r9, #16 +#else + sbfx r9, r9, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r11, r11, #16 + asr r11, r11, #16 +#else + sbfx r11, r11, #0, #16 +#endif + mla r8, r12, r9, r8 + mla r10, r12, r11, r10 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r9, r4, #16 + asr r9, r9, #16 +#else + sbfx r9, r4, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r11, r5, #16 + asr r11, r11, #16 +#else + sbfx r11, r5, #0, #16 +#endif + asr r12, r6, #16 + mul r9, r12, r9 + asr r12, r7, #16 + mul r11, r12, r11 + asr r4, r4, #16 + asr r5, r5, #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r6, #16 + asr r12, r12, #16 +#else + sbfx r12, r6, #0, #16 +#endif + mla r9, r4, r12, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r7, #16 + asr r12, r12, #16 +#else + sbfx r12, r7, #0, #16 +#endif + mla r11, r5, r12, r11 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r12, #0xc + lsl r12, r12, #8 + add r12, r12, #0xff +#else + mov r12, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r6, r9, #16 + asr r6, r6, #16 +#else + sbfx r6, r9, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r7, r11, #16 + asr r7, r7, #16 +#else + sbfx r7, r11, #0, #16 +#endif + mul r6, r12, r6 + mul r7, r12, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r12, #0xd + lsl r12, r12, #8 + add r12, r12, #0x1 +#else + mov r12, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r4, r6, #16 + asr r4, r4, #16 +#else + sbfx r4, r6, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r5, r7, #16 + asr r5, r5, #16 +#else + sbfx r5, r7, #0, #16 +#endif + mla r9, r12, r4, r9 + mla r11, r12, r5, r11 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r9, r9, #0xff + bic r9, r9, #0xff00 +#else + bfc r9, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r11, r11, #0xff + bic r11, r11, #0xff00 +#else + bfc r11, #0, #16 +#endif + orr r4, r9, r8, lsr #16 + orr r5, r11, r10, lsr #16 +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ + stm r0!, {r4, r5} + pop {r8} + bne L_kyber_arm32_basemul_mont_loop + pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} + .size kyber_arm32_basemul_mont,.-kyber_arm32_basemul_mont + .text + .align 4 + .globl kyber_arm32_basemul_mont_add + .type kyber_arm32_basemul_mont_add, %function +kyber_arm32_basemul_mont_add: + push {r4, r5, r6, r7, r8, r9, r10, r11, lr} + adr r3, L_kyber_arm32_basemul_mont_zetas + add r3, r3, #0x80 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r12, #0xd + lsl r12, r12, #8 + add r12, r12, #0x1 +#else + mov r12, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r12, r12, #0xc000000 + orr r12, r12, #0xff0000 +#else + movt r12, #0xcff +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ + mov r8, #0 +L_kyber_arm32_basemul_mont_add_loop: + ldm r1!, {r4, r5} + ldm r2!, {r6, r7} + ldr lr, [r3, r8] + add r8, r8, #2 + push {r8} + cmp r8, #0x80 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + smultt r8, r4, r6 + smultt r10, r5, r7 + smultb r9, r12, r8 + smultb r11, r12, r10 + smlabb r8, r12, r9, r8 + smlabb r10, r12, r11, r10 + rsb r11, lr, #0 + smulbt r8, lr, r8 + smulbt r10, r11, r10 + smlabb r8, r4, r6, r8 + smlabb r10, r5, r7, r10 + smultb r9, r12, r8 + smultb r11, r12, r10 + smlabb r8, r12, r9, r8 + smlabb r10, r12, r11, r10 + smulbt r9, r4, r6 + smulbt r11, r5, r7 + smlatb r9, r4, r6, r9 + smlatb r11, r5, r7, r11 + smultb r6, r12, r9 + smultb r7, r12, r11 + smlabb r9, r12, r6, r9 + smlabb r11, r12, r7, r11 + ldm r0, {r4, r5} + pkhtb r9, r9, r8, ASR #16 + pkhtb r11, r11, r10, ASR #16 + sadd16 r4, r4, r9 + sadd16 r5, r5, r11 +#else + asr r8, r4, #16 + asr r10, r5, #16 + asr r9, r6, #16 + asr r11, r7, #16 + mul r8, r9, r8 + mul r10, r11, r10 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r12, #0xc + lsl r12, r12, #8 + add r12, r12, #0xff +#else + mov r12, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r9, r8, #16 + asr r9, r9, #16 +#else + sbfx r9, r8, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r11, r10, #16 + asr r11, r11, #16 +#else + sbfx r11, r10, #0, #16 +#endif + mul r9, r12, r8 + mul r11, r12, r11 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r12, #0xd + lsl r12, r12, #8 + add r12, r12, #0x1 +#else + mov r12, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r9, r9, #16 + asr r9, r9, #16 +#else + sbfx r9, r9, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r11, r11, #16 + asr r11, r11, #16 +#else + sbfx r11, r11, #0, #16 +#endif + mla r8, r12, r9, r8 + mla r10, r12, r11, r10 + rsb r11, lr, #0 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r9, lr, #16 + asr r9, r9, #16 +#else + sbfx r9, lr, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r11, r11, #16 + asr r11, r11, #16 +#else + sbfx r11, r11, #0, #16 +#endif + asr r8, r8, #16 + asr r10, r10, #16 + mul r8, r9, r8 + mul r10, r11, r10 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r9, r4, #16 + asr r9, r9, #16 +#else + sbfx r9, r4, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r11, r5, #16 + asr r11, r11, #16 +#else + sbfx r11, r5, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r6, #16 + asr r12, r12, #16 +#else + sbfx r12, r6, #0, #16 +#endif + mla r8, r9, r12, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r7, #16 + asr r12, r12, #16 +#else + sbfx r12, r7, #0, #16 +#endif + mla r10, r11, r12, r10 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r12, #0xc + lsl r12, r12, #8 + add r12, r12, #0xff +#else + mov r12, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r9, r8, #16 + asr r9, r9, #16 +#else + sbfx r9, r8, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r11, r10, #16 + asr r11, r11, #16 +#else + sbfx r11, r10, #0, #16 +#endif + mul r9, r12, r9 + mul r11, r12, r11 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r12, #0xd + lsl r12, r12, #8 + add r12, r12, #0x1 +#else + mov r12, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r9, r9, #16 + asr r9, r9, #16 +#else + sbfx r9, r9, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r11, r11, #16 + asr r11, r11, #16 +#else + sbfx r11, r11, #0, #16 +#endif + mla r8, r12, r9, r8 + mla r10, r12, r11, r10 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r9, r4, #16 + asr r9, r9, #16 +#else + sbfx r9, r4, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r11, r5, #16 + asr r11, r11, #16 +#else + sbfx r11, r5, #0, #16 +#endif + asr r12, r6, #16 + mul r9, r12, r9 + asr r12, r7, #16 + mul r11, r12, r11 + asr r4, r4, #16 + asr r5, r5, #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r6, #16 + asr r12, r12, #16 +#else + sbfx r12, r6, #0, #16 +#endif + mla r9, r4, r12, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r12, r7, #16 + asr r12, r12, #16 +#else + sbfx r12, r7, #0, #16 +#endif + mla r11, r5, r12, r11 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r12, #0xc + lsl r12, r12, #8 + add r12, r12, #0xff +#else + mov r12, #0xcff +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r6, r9, #16 + asr r6, r6, #16 +#else + sbfx r6, r9, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r7, r11, #16 + asr r7, r7, #16 +#else + sbfx r7, r11, #0, #16 +#endif + mul r6, r12, r6 + mul r7, r12, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r12, #0xd + lsl r12, r12, #8 + add r12, r12, #0x1 +#else + mov r12, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r4, r6, #16 + asr r4, r4, #16 +#else + sbfx r4, r6, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r5, r7, #16 + asr r5, r5, #16 +#else + sbfx r5, r7, #0, #16 +#endif + mla r9, r12, r4, r9 + mla r11, r12, r5, r11 + ldm r0, {r4, r5} +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r9, r9, #0xff + bic r9, r9, #0xff00 +#else + bfc r9, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r11, r11, #0xff + bic r11, r11, #0xff00 +#else + bfc r11, #0, #16 +#endif + orr r9, r9, r8, lsr #16 + orr r11, r11, r10, lsr #16 + add r8, r4, r9 + add r10, r5, r11 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r9, r9, #0xff + bic r9, r9, #0xff00 +#else + bfc r9, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r11, r11, #0xff + bic r11, r11, #0xff00 +#else + bfc r11, #0, #16 +#endif + add r4, r4, r9 + add r5, r5, r11 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r4, r4, #0xff + bic r4, r4, #0xff00 + ror r4, r4, #16 + orr r4, r4, r8, lsl #16 + ror r4, r4, #16 +#else + bfi r4, r8, #0, #16 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r5, r5, #0xff + bic r5, r5, #0xff00 + ror r5, r5, #16 + orr r5, r5, r10, lsl #16 + ror r5, r5, #16 +#else + bfi r5, r10, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ + stm r0!, {r4, r5} + pop {r8} + bne L_kyber_arm32_basemul_mont_add_loop + pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} + .size kyber_arm32_basemul_mont_add,.-kyber_arm32_basemul_mont_add + .text + .align 4 + .globl kyber_arm32_csubq + .type kyber_arm32_csubq, %function +kyber_arm32_csubq: + push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r12, #0xd + lsl r12, r12, #8 + add r12, r12, #0x1 +#else + mov r12, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov lr, #0xd + lsl lr, lr, #8 + add lr, lr, #0x1 +#else + mov lr, #0xd01 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr lr, lr, #0xd000000 + orr lr, lr, #0x10000 +#else + movt lr, #0xd01 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r11, #0x80 + lsl r11, r11, #8 + add r11, r11, #0x0 +#else + mov r11, #0x8000 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + orr r11, r11, #0x80000000 +#else + movt r11, #0x8000 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r1, #0x1 + lsl r1, r1, #8 + add r1, r1, #0x0 +#else + mov r1, #0x100 +#endif +L_kyber_arm32_csubq_loop: + ldm r0, {r2, r3, r4, r5} +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + ssub16 r2, r2, lr + ssub16 r3, r3, lr + ssub16 r4, r4, lr + ssub16 r5, r5, lr + and r6, r2, r11 + and r7, r3, r11 + and r8, r4, r11 + and r9, r5, r11 + lsr r6, r6, #15 + lsr r7, r7, #15 + lsr r8, r8, #15 + lsr r9, r9, #15 + mul r6, r12, r6 + mul r7, r12, r7 + mul r8, r12, r8 + mul r9, r12, r9 + sadd16 r2, r2, r6 + sadd16 r3, r3, r7 + sadd16 r4, r4, r8 + sadd16 r5, r5, r9 +#else + sub r6, r2, lr + sub r2, r2, lr, lsl #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r2, r2, #0xff + bic r2, r2, #0xff00 + ror r2, r2, #16 + orr r2, r2, r6, lsl #16 + ror r2, r2, #16 +#else + bfi r2, r6, #0, #16 +#endif + sub r7, r3, lr + sub r3, r3, lr, lsl #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r3, r3, #0xff + bic r3, r3, #0xff00 + ror r3, r3, #16 + orr r3, r3, r7, lsl #16 + ror r3, r3, #16 +#else + bfi r3, r7, #0, #16 +#endif + sub r8, r4, lr + sub r4, r4, lr, lsl #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r4, r4, #0xff + bic r4, r4, #0xff00 + ror r4, r4, #16 + orr r4, r4, r8, lsl #16 + ror r4, r4, #16 +#else + bfi r4, r8, #0, #16 +#endif + sub r9, r5, lr + sub r5, r5, lr, lsl #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r5, r5, #0xff + bic r5, r5, #0xff00 + ror r5, r5, #16 + orr r5, r5, r9, lsl #16 + ror r5, r5, #16 +#else + bfi r5, r9, #0, #16 +#endif + and r6, r2, r11 + and r7, r3, r11 + and r8, r4, r11 + and r9, r5, r11 + lsr r6, r6, #15 + lsr r7, r7, #15 + lsr r8, r8, #15 + lsr r9, r9, #15 + mul r6, r12, r6 + mul r7, r12, r7 + mul r8, r12, r8 + mul r9, r12, r9 + add r10, r2, r6 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r6, r6, #0xff + bic r6, r6, #0xff00 +#else + bfc r6, #0, #16 +#endif + add r2, r2, r6 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r2, r2, #0xff + bic r2, r2, #0xff00 + ror r2, r2, #16 + orr r2, r2, r10, lsl #16 + ror r2, r2, #16 +#else + bfi r2, r10, #0, #16 +#endif + add r10, r3, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r7, r7, #0xff + bic r7, r7, #0xff00 +#else + bfc r7, #0, #16 +#endif + add r3, r3, r7 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r3, r3, #0xff + bic r3, r3, #0xff00 + ror r3, r3, #16 + orr r3, r3, r10, lsl #16 + ror r3, r3, #16 +#else + bfi r3, r10, #0, #16 +#endif + add r10, r4, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r8, r8, #0xff + bic r8, r8, #0xff00 +#else + bfc r8, #0, #16 +#endif + add r4, r4, r8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r4, r4, #0xff + bic r4, r4, #0xff00 + ror r4, r4, #16 + orr r4, r4, r10, lsl #16 + ror r4, r4, #16 +#else + bfi r4, r10, #0, #16 +#endif + add r10, r5, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r9, r9, #0xff + bic r9, r9, #0xff00 +#else + bfc r9, #0, #16 +#endif + add r5, r5, r9 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r5, r5, #0xff + bic r5, r5, #0xff00 + ror r5, r5, #16 + orr r5, r5, r10, lsl #16 + ror r5, r5, #16 +#else + bfi r5, r10, #0, #16 +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ + stm r0!, {r2, r3, r4, r5} + subs r1, r1, #8 + bne L_kyber_arm32_csubq_loop + pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} + .size kyber_arm32_csubq,.-kyber_arm32_csubq + .text + .align 4 + .globl kyber_arm32_rej_uniform + .type kyber_arm32_rej_uniform, %function +kyber_arm32_rej_uniform: + push {r4, r5, r6, r7, r8, lr} +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + mov r8, #0xd + lsl r8, r8, #8 + add r8, r8, #0x1 +#else + mov r8, #0xd01 +#endif + mov r12, #0 +L_kyber_arm32_rej_uniform_loop_no_fail: + cmp r1, #8 + blt L_kyber_arm32_rej_uniform_done_no_fail + ldm r2!, {r4, r5, r6} +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r7, r4, #20 + lsr r7, r7, #20 +#else + ubfx r7, r4, #0, #12 +#endif + strh r7, [r0, r12] + sub lr, r7, r8 + lsr lr, lr, #31 + sub r1, r1, lr + add r12, r12, lr, lsl #1 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r7, r4, #8 + lsr r7, r7, #20 +#else + ubfx r7, r4, #12, #12 +#endif + strh r7, [r0, r12] + sub lr, r7, r8 + lsr lr, lr, #31 + sub r1, r1, lr + add r12, r12, lr, lsl #1 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsr r7, r4, #24 +#else + ubfx r7, r4, #24, #8 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r7, r7, #0xf00 + ror r7, r7, #12 + orr r7, r7, r5, lsl #28 + ror r7, r7, #20 +#else + bfi r7, r5, #8, #4 +#endif + strh r7, [r0, r12] + sub lr, r7, r8 + lsr lr, lr, #31 + sub r1, r1, lr + add r12, r12, lr, lsl #1 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r7, r5, #16 + lsr r7, r7, #20 +#else + ubfx r7, r5, #4, #12 +#endif + strh r7, [r0, r12] + sub lr, r7, r8 + lsr lr, lr, #31 + sub r1, r1, lr + add r12, r12, lr, lsl #1 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r7, r5, #4 + lsr r7, r7, #20 +#else + ubfx r7, r5, #16, #12 +#endif + strh r7, [r0, r12] + sub lr, r7, r8 + lsr lr, lr, #31 + sub r1, r1, lr + add r12, r12, lr, lsl #1 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsr r7, r5, #28 +#else + ubfx r7, r5, #28, #4 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r7, r7, #0xff0 + ror r7, r7, #12 + orr r7, r7, r6, lsl #24 + ror r7, r7, #20 +#else + bfi r7, r6, #4, #8 +#endif + strh r7, [r0, r12] + sub lr, r7, r8 + lsr lr, lr, #31 + sub r1, r1, lr + add r12, r12, lr, lsl #1 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r7, r6, #12 + lsr r7, r7, #20 +#else + ubfx r7, r6, #8, #12 +#endif + strh r7, [r0, r12] + sub lr, r7, r8 + lsr lr, lr, #31 + sub r1, r1, lr + add r12, r12, lr, lsl #1 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsr r7, r6, #20 +#else + ubfx r7, r6, #20, #12 +#endif + strh r7, [r0, r12] + sub lr, r7, r8 + lsr lr, lr, #31 + sub r1, r1, lr + add r12, r12, lr, lsl #1 + subs r3, r3, #12 + bne L_kyber_arm32_rej_uniform_loop_no_fail + b L_kyber_arm32_rej_uniform_done +L_kyber_arm32_rej_uniform_done_no_fail: + cmp r1, #0 + beq L_kyber_arm32_rej_uniform_done +L_kyber_arm32_rej_uniform_loop: + ldm r2!, {r4, r5, r6} +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r7, r4, #20 + lsr r7, r7, #20 +#else + ubfx r7, r4, #0, #12 +#endif + cmp r7, r8 + bge L_kyber_arm32_rej_uniform_fail_0 + strh r7, [r0, r12] + subs r1, r1, #1 + add r12, r12, #2 + beq L_kyber_arm32_rej_uniform_done +L_kyber_arm32_rej_uniform_fail_0: +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r7, r4, #8 + lsr r7, r7, #20 +#else + ubfx r7, r4, #12, #12 +#endif + cmp r7, r8 + bge L_kyber_arm32_rej_uniform_fail_1 + strh r7, [r0, r12] + subs r1, r1, #1 + add r12, r12, #2 + beq L_kyber_arm32_rej_uniform_done +L_kyber_arm32_rej_uniform_fail_1: +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsr r7, r4, #24 +#else + ubfx r7, r4, #24, #8 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r7, r7, #0xf00 + ror r7, r7, #12 + orr r7, r7, r5, lsl #28 + ror r7, r7, #20 +#else + bfi r7, r5, #8, #4 +#endif + cmp r7, r8 + bge L_kyber_arm32_rej_uniform_fail_2 + strh r7, [r0, r12] + subs r1, r1, #1 + add r12, r12, #2 + beq L_kyber_arm32_rej_uniform_done +L_kyber_arm32_rej_uniform_fail_2: +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r7, r5, #16 + lsr r7, r7, #20 +#else + ubfx r7, r5, #4, #12 +#endif + cmp r7, r8 + bge L_kyber_arm32_rej_uniform_fail_3 + strh r7, [r0, r12] + subs r1, r1, #1 + add r12, r12, #2 + beq L_kyber_arm32_rej_uniform_done +L_kyber_arm32_rej_uniform_fail_3: +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r7, r5, #4 + lsr r7, r7, #20 +#else + ubfx r7, r5, #16, #12 +#endif + cmp r7, r8 + bge L_kyber_arm32_rej_uniform_fail_4 + strh r7, [r0, r12] + subs r1, r1, #1 + add r12, r12, #2 + beq L_kyber_arm32_rej_uniform_done +L_kyber_arm32_rej_uniform_fail_4: +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsr r7, r5, #28 +#else + ubfx r7, r5, #28, #4 +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + bic r7, r7, #0xff0 + ror r7, r7, #12 + orr r7, r7, r6, lsl #24 + ror r7, r7, #20 +#else + bfi r7, r6, #4, #8 +#endif + cmp r7, r8 + bge L_kyber_arm32_rej_uniform_fail_5 + strh r7, [r0, r12] + subs r1, r1, #1 + add r12, r12, #2 + beq L_kyber_arm32_rej_uniform_done +L_kyber_arm32_rej_uniform_fail_5: +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r7, r6, #12 + lsr r7, r7, #20 +#else + ubfx r7, r6, #8, #12 +#endif + cmp r7, r8 + bge L_kyber_arm32_rej_uniform_fail_6 + strh r7, [r0, r12] + subs r1, r1, #1 + add r12, r12, #2 + beq L_kyber_arm32_rej_uniform_done +L_kyber_arm32_rej_uniform_fail_6: +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsr r7, r6, #20 +#else + ubfx r7, r6, #20, #12 +#endif + cmp r7, r8 + bge L_kyber_arm32_rej_uniform_fail_7 + strh r7, [r0, r12] + subs r1, r1, #1 + add r12, r12, #2 + beq L_kyber_arm32_rej_uniform_done +L_kyber_arm32_rej_uniform_fail_7: + subs r3, r3, #12 + bgt L_kyber_arm32_rej_uniform_loop +L_kyber_arm32_rej_uniform_done: + lsr r0, r12, #1 + pop {r4, r5, r6, r7, r8, pc} + .size kyber_arm32_rej_uniform,.-kyber_arm32_rej_uniform +#endif /* WOLFSSL_WC_KYBER */ +#endif /* !__aarch64__ && __arm__ && !__thumb__ */ +#endif /* WOLFSSL_ARMASM */ + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif +#endif /* !WOLFSSL_ARMASM_INLINE */ diff --git a/wolfcrypt/src/port/arm/armv8-32-kyber-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-kyber-asm_c.c new file mode 100644 index 000000000..df4285015 --- /dev/null +++ b/wolfcrypt/src/port/arm/armv8-32-kyber-asm_c.c @@ -0,0 +1,9239 @@ +/* armv8-32-kyber-asm + * + * Copyright (C) 2006-2024 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +/* Generated using (from wolfssl): + * cd ../scripts + * ruby ./kyber/kyber.rb arm32 \ + * ../wolfssl/wolfcrypt/src/port/arm/armv8-32-kyber-asm.c + */ + +#ifdef HAVE_CONFIG_H + #include +#endif /* HAVE_CONFIG_H */ +#include +#include + +#ifdef WOLFSSL_ARMASM +#if !defined(__aarch64__) && defined(__arm__) && !defined(__thumb__) +#include +#ifdef HAVE_CONFIG_H + #include +#endif /* HAVE_CONFIG_H */ +#include +#include +#ifdef WOLFSSL_ARMASM_INLINE + +#ifdef WOLFSSL_ARMASM +#if !defined(__aarch64__) && defined(__arm__) && !defined(__thumb__) + +#ifdef __IAR_SYSTEMS_ICC__ +#define __asm__ asm +#define __volatile__ volatile +#endif /* __IAR_SYSTEMS_ICC__ */ +#ifdef __KEIL__ +#define __asm__ __asm +#define __volatile__ volatile +#endif /* __KEIL__ */ +#include + +#ifdef WOLFSSL_WC_KYBER +static const uint16_t L_kyber_arm32_ntt_zetas[] = { + 0x08ed, 0x0a0b, 0x0b9a, 0x0714, + 0x05d5, 0x058e, 0x011f, 0x00ca, + 0x0c56, 0x026e, 0x0629, 0x00b6, + 0x03c2, 0x084f, 0x073f, 0x05bc, + 0x023d, 0x07d4, 0x0108, 0x017f, + 0x09c4, 0x05b2, 0x06bf, 0x0c7f, + 0x0a58, 0x03f9, 0x02dc, 0x0260, + 0x06fb, 0x019b, 0x0c34, 0x06de, + 0x04c7, 0x028c, 0x0ad9, 0x03f7, + 0x07f4, 0x05d3, 0x0be7, 0x06f9, + 0x0204, 0x0cf9, 0x0bc1, 0x0a67, + 0x06af, 0x0877, 0x007e, 0x05bd, + 0x09ac, 0x0ca7, 0x0bf2, 0x033e, + 0x006b, 0x0774, 0x0c0a, 0x094a, + 0x0b73, 0x03c1, 0x071d, 0x0a2c, + 0x01c0, 0x08d8, 0x02a5, 0x0806, + 0x08b2, 0x01ae, 0x022b, 0x034b, + 0x081e, 0x0367, 0x060e, 0x0069, + 0x01a6, 0x024b, 0x00b1, 0x0c16, + 0x0bde, 0x0b35, 0x0626, 0x0675, + 0x0c0b, 0x030a, 0x0487, 0x0c6e, + 0x09f8, 0x05cb, 0x0aa7, 0x045f, + 0x06cb, 0x0284, 0x0999, 0x015d, + 0x01a2, 0x0149, 0x0c65, 0x0cb6, + 0x0331, 0x0449, 0x025b, 0x0262, + 0x052a, 0x07fc, 0x0748, 0x0180, + 0x0842, 0x0c79, 0x04c2, 0x07ca, + 0x0997, 0x00dc, 0x085e, 0x0686, + 0x0860, 0x0707, 0x0803, 0x031a, + 0x071b, 0x09ab, 0x099b, 0x01de, + 0x0c95, 0x0bcd, 0x03e4, 0x03df, + 0x03be, 0x074d, 0x05f2, 0x065c, +}; + +void kyber_arm32_ntt(sword16* r_p) +{ + register sword16* r asm ("r0") = (sword16*)r_p; + register uint16_t* L_kyber_arm32_ntt_zetas_c asm ("r1") = + (uint16_t*)&L_kyber_arm32_ntt_zetas; + + __asm__ __volatile__ ( + "sub sp, sp, #8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r10, r10, #0xc000000\n\t" + "orr r10, r10, #0xff0000\n\t" +#else + "movt r10, #0xcff\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ + "mov r2, #16\n\t" + "\n" + "L_kyber_arm32_ntt_loop_123_%=: \n\t" + "str r2, [sp]\n\t" + "ldrh r11, [r1, #2]\n\t" + "ldr r2, [%[r]]\n\t" + "ldr r3, [%[r], #64]\n\t" + "ldr r4, [%[r], #128]\n\t" + "ldr r5, [%[r], #192]\n\t" + "ldr r6, [%[r], #256]\n\t" + "ldr r7, [%[r], #320]\n\t" + "ldr r8, [%[r], #384]\n\t" + "ldr r9, [%[r], #448]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smulbb r12, r11, r6\n\t" + "smulbt r6, r11, r6\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r6\n\t" + "smlabb lr, r10, lr, r6\n\t" + "pkhtb r12, lr, r12, ASR #16\n\t" + "ssub16 r6, r2, r12\n\t" + "sadd16 r2, r2, r12\n\t" +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r6, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r6, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r11, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r11, #0, #16\n\t" +#endif + "asr r6, r6, #16\n\t" + "mul r12, lr, r12\n\t" + "mul r6, lr, r6\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif + "mul lr, r10, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r6, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r6, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla lr, r10, lr, r6\n\t" + "sub r6, r2, lr\n\t" + "add r2, r2, lr\n\t" + "sub lr, r2, r12, lsr #16\n\t" + "add r12, r2, r12, lsr #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r6, r6, #0xff\n\t" + "bic r6, r6, #0xff00\n\t" + "ror r6, r6, #16\n\t" + "orr r6, r6, lr, lsl #16\n\t" + "ror r6, r6, #16\n\t" +#else + "bfi r6, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r2, r2, #0xff\n\t" + "bic r2, r2, #0xff00\n\t" + "ror r2, r2, #16\n\t" + "orr r2, r2, r12, lsl #16\n\t" + "ror r2, r2, #16\n\t" +#else + "bfi r2, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smulbb r12, r11, r7\n\t" + "smulbt r7, r11, r7\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r7\n\t" + "smlabb lr, r10, lr, r7\n\t" + "pkhtb r12, lr, r12, ASR #16\n\t" + "ssub16 r7, r3, r12\n\t" + "sadd16 r3, r3, r12\n\t" +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r7, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r7, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r11, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r11, #0, #16\n\t" +#endif + "asr r7, r7, #16\n\t" + "mul r12, lr, r12\n\t" + "mul r7, lr, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif + "mul lr, r10, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r7, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r7, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla lr, r10, lr, r7\n\t" + "sub r7, r3, lr\n\t" + "add r3, r3, lr\n\t" + "sub lr, r3, r12, lsr #16\n\t" + "add r12, r3, r12, lsr #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r7, r7, #0xff\n\t" + "bic r7, r7, #0xff00\n\t" + "ror r7, r7, #16\n\t" + "orr r7, r7, lr, lsl #16\n\t" + "ror r7, r7, #16\n\t" +#else + "bfi r7, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r3, r3, #0xff\n\t" + "bic r3, r3, #0xff00\n\t" + "ror r3, r3, #16\n\t" + "orr r3, r3, r12, lsl #16\n\t" + "ror r3, r3, #16\n\t" +#else + "bfi r3, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smulbb r12, r11, r8\n\t" + "smulbt r8, r11, r8\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r8\n\t" + "smlabb lr, r10, lr, r8\n\t" + "pkhtb r12, lr, r12, ASR #16\n\t" + "ssub16 r8, r4, r12\n\t" + "sadd16 r4, r4, r12\n\t" +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r8, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r8, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r11, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r11, #0, #16\n\t" +#endif + "asr r8, r8, #16\n\t" + "mul r12, lr, r12\n\t" + "mul r8, lr, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif + "mul lr, r10, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r8, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r8, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla lr, r10, lr, r8\n\t" + "sub r8, r4, lr\n\t" + "add r4, r4, lr\n\t" + "sub lr, r4, r12, lsr #16\n\t" + "add r12, r4, r12, lsr #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r8, r8, #0xff\n\t" + "bic r8, r8, #0xff00\n\t" + "ror r8, r8, #16\n\t" + "orr r8, r8, lr, lsl #16\n\t" + "ror r8, r8, #16\n\t" +#else + "bfi r8, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r4, r4, #0xff\n\t" + "bic r4, r4, #0xff00\n\t" + "ror r4, r4, #16\n\t" + "orr r4, r4, r12, lsl #16\n\t" + "ror r4, r4, #16\n\t" +#else + "bfi r4, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smulbb r12, r11, r9\n\t" + "smulbt r9, r11, r9\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r9\n\t" + "smlabb lr, r10, lr, r9\n\t" + "pkhtb r12, lr, r12, ASR #16\n\t" + "ssub16 r9, r5, r12\n\t" + "sadd16 r5, r5, r12\n\t" +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r9, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r9, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r11, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r11, #0, #16\n\t" +#endif + "asr r9, r9, #16\n\t" + "mul r12, lr, r12\n\t" + "mul r9, lr, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif + "mul lr, r10, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r9, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r9, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla lr, r10, lr, r9\n\t" + "sub r9, r5, lr\n\t" + "add r5, r5, lr\n\t" + "sub lr, r5, r12, lsr #16\n\t" + "add r12, r5, r12, lsr #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r9, r9, #0xff\n\t" + "bic r9, r9, #0xff00\n\t" + "ror r9, r9, #16\n\t" + "orr r9, r9, lr, lsl #16\n\t" + "ror r9, r9, #16\n\t" +#else + "bfi r9, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r5, r5, #0xff\n\t" + "bic r5, r5, #0xff00\n\t" + "ror r5, r5, #16\n\t" + "orr r5, r5, r12, lsl #16\n\t" + "ror r5, r5, #16\n\t" +#else + "bfi r5, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ + "ldr r11, [r1, #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smulbb r12, r11, r4\n\t" + "smulbt r4, r11, r4\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r4\n\t" + "smlabb lr, r10, lr, r4\n\t" + "pkhtb r12, lr, r12, ASR #16\n\t" + "ssub16 r4, r2, r12\n\t" + "sadd16 r2, r2, r12\n\t" +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r4, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r4, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r11, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r11, #0, #16\n\t" +#endif + "asr r4, r4, #16\n\t" + "mul r12, lr, r12\n\t" + "mul r4, lr, r4\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif + "mul lr, r10, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r4, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r4, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla lr, r10, lr, r4\n\t" + "sub r4, r2, lr\n\t" + "add r2, r2, lr\n\t" + "sub lr, r2, r12, lsr #16\n\t" + "add r12, r2, r12, lsr #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r4, r4, #0xff\n\t" + "bic r4, r4, #0xff00\n\t" + "ror r4, r4, #16\n\t" + "orr r4, r4, lr, lsl #16\n\t" + "ror r4, r4, #16\n\t" +#else + "bfi r4, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r2, r2, #0xff\n\t" + "bic r2, r2, #0xff00\n\t" + "ror r2, r2, #16\n\t" + "orr r2, r2, r12, lsl #16\n\t" + "ror r2, r2, #16\n\t" +#else + "bfi r2, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smulbb r12, r11, r5\n\t" + "smulbt r5, r11, r5\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r5\n\t" + "smlabb lr, r10, lr, r5\n\t" + "pkhtb r12, lr, r12, ASR #16\n\t" + "ssub16 r5, r3, r12\n\t" + "sadd16 r3, r3, r12\n\t" +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r5, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r5, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r11, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r11, #0, #16\n\t" +#endif + "asr r5, r5, #16\n\t" + "mul r12, lr, r12\n\t" + "mul r5, lr, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif + "mul lr, r10, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r5, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r5, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla lr, r10, lr, r5\n\t" + "sub r5, r3, lr\n\t" + "add r3, r3, lr\n\t" + "sub lr, r3, r12, lsr #16\n\t" + "add r12, r3, r12, lsr #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r5, r5, #0xff\n\t" + "bic r5, r5, #0xff00\n\t" + "ror r5, r5, #16\n\t" + "orr r5, r5, lr, lsl #16\n\t" + "ror r5, r5, #16\n\t" +#else + "bfi r5, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r3, r3, #0xff\n\t" + "bic r3, r3, #0xff00\n\t" + "ror r3, r3, #16\n\t" + "orr r3, r3, r12, lsl #16\n\t" + "ror r3, r3, #16\n\t" +#else + "bfi r3, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smultb r12, r11, r8\n\t" + "smultt r8, r11, r8\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r8\n\t" + "smlabb lr, r10, lr, r8\n\t" + "pkhtb r12, lr, r12, ASR #16\n\t" + "ssub16 r8, r6, r12\n\t" + "sadd16 r6, r6, r12\n\t" +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r8, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r8, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "asr lr, r11, #16\n\t" +#else + "sbfx lr, r11, #16, #16\n\t" +#endif + "asr r8, r8, #16\n\t" + "mul r12, lr, r12\n\t" + "mul r8, lr, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif + "mul lr, r10, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r8, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r8, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla lr, r10, lr, r8\n\t" + "sub r8, r6, lr\n\t" + "add r6, r6, lr\n\t" + "sub lr, r6, r12, lsr #16\n\t" + "add r12, r6, r12, lsr #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r8, r8, #0xff\n\t" + "bic r8, r8, #0xff00\n\t" + "ror r8, r8, #16\n\t" + "orr r8, r8, lr, lsl #16\n\t" + "ror r8, r8, #16\n\t" +#else + "bfi r8, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r6, r6, #0xff\n\t" + "bic r6, r6, #0xff00\n\t" + "ror r6, r6, #16\n\t" + "orr r6, r6, r12, lsl #16\n\t" + "ror r6, r6, #16\n\t" +#else + "bfi r6, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smultb r12, r11, r9\n\t" + "smultt r9, r11, r9\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r9\n\t" + "smlabb lr, r10, lr, r9\n\t" + "pkhtb r12, lr, r12, ASR #16\n\t" + "ssub16 r9, r7, r12\n\t" + "sadd16 r7, r7, r12\n\t" +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r9, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r9, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "asr lr, r11, #16\n\t" +#else + "sbfx lr, r11, #16, #16\n\t" +#endif + "asr r9, r9, #16\n\t" + "mul r12, lr, r12\n\t" + "mul r9, lr, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif + "mul lr, r10, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r9, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r9, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla lr, r10, lr, r9\n\t" + "sub r9, r7, lr\n\t" + "add r7, r7, lr\n\t" + "sub lr, r7, r12, lsr #16\n\t" + "add r12, r7, r12, lsr #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r9, r9, #0xff\n\t" + "bic r9, r9, #0xff00\n\t" + "ror r9, r9, #16\n\t" + "orr r9, r9, lr, lsl #16\n\t" + "ror r9, r9, #16\n\t" +#else + "bfi r9, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r7, r7, #0xff\n\t" + "bic r7, r7, #0xff00\n\t" + "ror r7, r7, #16\n\t" + "orr r7, r7, r12, lsl #16\n\t" + "ror r7, r7, #16\n\t" +#else + "bfi r7, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ + "ldr r11, [r1, #8]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smulbb r12, r11, r3\n\t" + "smulbt r3, r11, r3\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r3\n\t" + "smlabb lr, r10, lr, r3\n\t" + "pkhtb r12, lr, r12, ASR #16\n\t" + "ssub16 r3, r2, r12\n\t" + "sadd16 r2, r2, r12\n\t" +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r3, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r3, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r11, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r11, #0, #16\n\t" +#endif + "asr r3, r3, #16\n\t" + "mul r12, lr, r12\n\t" + "mul r3, lr, r3\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif + "mul lr, r10, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r3, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r3, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla lr, r10, lr, r3\n\t" + "sub r3, r2, lr\n\t" + "add r2, r2, lr\n\t" + "sub lr, r2, r12, lsr #16\n\t" + "add r12, r2, r12, lsr #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r3, r3, #0xff\n\t" + "bic r3, r3, #0xff00\n\t" + "ror r3, r3, #16\n\t" + "orr r3, r3, lr, lsl #16\n\t" + "ror r3, r3, #16\n\t" +#else + "bfi r3, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r2, r2, #0xff\n\t" + "bic r2, r2, #0xff00\n\t" + "ror r2, r2, #16\n\t" + "orr r2, r2, r12, lsl #16\n\t" + "ror r2, r2, #16\n\t" +#else + "bfi r2, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smultb r12, r11, r5\n\t" + "smultt r5, r11, r5\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r5\n\t" + "smlabb lr, r10, lr, r5\n\t" + "pkhtb r12, lr, r12, ASR #16\n\t" + "ssub16 r5, r4, r12\n\t" + "sadd16 r4, r4, r12\n\t" +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r5, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r5, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "asr lr, r11, #16\n\t" +#else + "sbfx lr, r11, #16, #16\n\t" +#endif + "asr r5, r5, #16\n\t" + "mul r12, lr, r12\n\t" + "mul r5, lr, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif + "mul lr, r10, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r5, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r5, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla lr, r10, lr, r5\n\t" + "sub r5, r4, lr\n\t" + "add r4, r4, lr\n\t" + "sub lr, r4, r12, lsr #16\n\t" + "add r12, r4, r12, lsr #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r5, r5, #0xff\n\t" + "bic r5, r5, #0xff00\n\t" + "ror r5, r5, #16\n\t" + "orr r5, r5, lr, lsl #16\n\t" + "ror r5, r5, #16\n\t" +#else + "bfi r5, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r4, r4, #0xff\n\t" + "bic r4, r4, #0xff00\n\t" + "ror r4, r4, #16\n\t" + "orr r4, r4, r12, lsl #16\n\t" + "ror r4, r4, #16\n\t" +#else + "bfi r4, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ + "ldr r11, [r1, #12]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smulbb r12, r11, r7\n\t" + "smulbt r7, r11, r7\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r7\n\t" + "smlabb lr, r10, lr, r7\n\t" + "pkhtb r12, lr, r12, ASR #16\n\t" + "ssub16 r7, r6, r12\n\t" + "sadd16 r6, r6, r12\n\t" +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r7, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r7, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r11, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r11, #0, #16\n\t" +#endif + "asr r7, r7, #16\n\t" + "mul r12, lr, r12\n\t" + "mul r7, lr, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif + "mul lr, r10, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r7, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r7, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla lr, r10, lr, r7\n\t" + "sub r7, r6, lr\n\t" + "add r6, r6, lr\n\t" + "sub lr, r6, r12, lsr #16\n\t" + "add r12, r6, r12, lsr #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r7, r7, #0xff\n\t" + "bic r7, r7, #0xff00\n\t" + "ror r7, r7, #16\n\t" + "orr r7, r7, lr, lsl #16\n\t" + "ror r7, r7, #16\n\t" +#else + "bfi r7, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r6, r6, #0xff\n\t" + "bic r6, r6, #0xff00\n\t" + "ror r6, r6, #16\n\t" + "orr r6, r6, r12, lsl #16\n\t" + "ror r6, r6, #16\n\t" +#else + "bfi r6, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smultb r12, r11, r9\n\t" + "smultt r9, r11, r9\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r9\n\t" + "smlabb lr, r10, lr, r9\n\t" + "pkhtb r12, lr, r12, ASR #16\n\t" + "ssub16 r9, r8, r12\n\t" + "sadd16 r8, r8, r12\n\t" +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r9, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r9, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "asr lr, r11, #16\n\t" +#else + "sbfx lr, r11, #16, #16\n\t" +#endif + "asr r9, r9, #16\n\t" + "mul r12, lr, r12\n\t" + "mul r9, lr, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif + "mul lr, r10, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r9, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r9, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla lr, r10, lr, r9\n\t" + "sub r9, r8, lr\n\t" + "add r8, r8, lr\n\t" + "sub lr, r8, r12, lsr #16\n\t" + "add r12, r8, r12, lsr #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r9, r9, #0xff\n\t" + "bic r9, r9, #0xff00\n\t" + "ror r9, r9, #16\n\t" + "orr r9, r9, lr, lsl #16\n\t" + "ror r9, r9, #16\n\t" +#else + "bfi r9, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r8, r8, #0xff\n\t" + "bic r8, r8, #0xff00\n\t" + "ror r8, r8, #16\n\t" + "orr r8, r8, r12, lsl #16\n\t" + "ror r8, r8, #16\n\t" +#else + "bfi r8, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ + "str r2, [%[r]]\n\t" + "str r3, [%[r], #64]\n\t" + "str r4, [%[r], #128]\n\t" + "str r5, [%[r], #192]\n\t" + "str r6, [%[r], #256]\n\t" + "str r7, [%[r], #320]\n\t" + "str r8, [%[r], #384]\n\t" + "str r9, [%[r], #448]\n\t" + "ldr r2, [sp]\n\t" + "subs r2, r2, #1\n\t" + "add %[r], %[r], #4\n\t" + "bne L_kyber_arm32_ntt_loop_123_%=\n\t" + "sub %[r], %[r], #0x40\n\t" + "mov r3, #0\n\t" + "\n" + "L_kyber_arm32_ntt_loop_4_j_%=: \n\t" + "str r3, [sp, #4]\n\t" + "add r11, r1, r3, lsr #4\n\t" + "mov r2, #4\n\t" + "ldr r11, [r11, #16]\n\t" + "\n" + "L_kyber_arm32_ntt_loop_4_i_%=: \n\t" + "str r2, [sp]\n\t" + "ldr r2, [%[r]]\n\t" + "ldr r3, [%[r], #16]\n\t" + "ldr r4, [%[r], #32]\n\t" + "ldr r5, [%[r], #48]\n\t" + "ldr r6, [%[r], #64]\n\t" + "ldr r7, [%[r], #80]\n\t" + "ldr r8, [%[r], #96]\n\t" + "ldr r9, [%[r], #112]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smulbb r12, r11, r4\n\t" + "smulbt r4, r11, r4\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r4\n\t" + "smlabb lr, r10, lr, r4\n\t" + "pkhtb r12, lr, r12, ASR #16\n\t" + "ssub16 r4, r2, r12\n\t" + "sadd16 r2, r2, r12\n\t" +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r4, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r4, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r11, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r11, #0, #16\n\t" +#endif + "asr r4, r4, #16\n\t" + "mul r12, lr, r12\n\t" + "mul r4, lr, r4\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif + "mul lr, r10, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r4, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r4, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla lr, r10, lr, r4\n\t" + "sub r4, r2, lr\n\t" + "add r2, r2, lr\n\t" + "sub lr, r2, r12, lsr #16\n\t" + "add r12, r2, r12, lsr #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r4, r4, #0xff\n\t" + "bic r4, r4, #0xff00\n\t" + "ror r4, r4, #16\n\t" + "orr r4, r4, lr, lsl #16\n\t" + "ror r4, r4, #16\n\t" +#else + "bfi r4, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r2, r2, #0xff\n\t" + "bic r2, r2, #0xff00\n\t" + "ror r2, r2, #16\n\t" + "orr r2, r2, r12, lsl #16\n\t" + "ror r2, r2, #16\n\t" +#else + "bfi r2, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smulbb r12, r11, r5\n\t" + "smulbt r5, r11, r5\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r5\n\t" + "smlabb lr, r10, lr, r5\n\t" + "pkhtb r12, lr, r12, ASR #16\n\t" + "ssub16 r5, r3, r12\n\t" + "sadd16 r3, r3, r12\n\t" +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r5, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r5, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r11, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r11, #0, #16\n\t" +#endif + "asr r5, r5, #16\n\t" + "mul r12, lr, r12\n\t" + "mul r5, lr, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif + "mul lr, r10, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r5, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r5, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla lr, r10, lr, r5\n\t" + "sub r5, r3, lr\n\t" + "add r3, r3, lr\n\t" + "sub lr, r3, r12, lsr #16\n\t" + "add r12, r3, r12, lsr #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r5, r5, #0xff\n\t" + "bic r5, r5, #0xff00\n\t" + "ror r5, r5, #16\n\t" + "orr r5, r5, lr, lsl #16\n\t" + "ror r5, r5, #16\n\t" +#else + "bfi r5, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r3, r3, #0xff\n\t" + "bic r3, r3, #0xff00\n\t" + "ror r3, r3, #16\n\t" + "orr r3, r3, r12, lsl #16\n\t" + "ror r3, r3, #16\n\t" +#else + "bfi r3, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smultb r12, r11, r8\n\t" + "smultt r8, r11, r8\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r8\n\t" + "smlabb lr, r10, lr, r8\n\t" + "pkhtb r12, lr, r12, ASR #16\n\t" + "ssub16 r8, r6, r12\n\t" + "sadd16 r6, r6, r12\n\t" +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r8, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r8, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "asr lr, r11, #16\n\t" +#else + "sbfx lr, r11, #16, #16\n\t" +#endif + "asr r8, r8, #16\n\t" + "mul r12, lr, r12\n\t" + "mul r8, lr, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif + "mul lr, r10, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r8, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r8, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla lr, r10, lr, r8\n\t" + "sub r8, r6, lr\n\t" + "add r6, r6, lr\n\t" + "sub lr, r6, r12, lsr #16\n\t" + "add r12, r6, r12, lsr #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r8, r8, #0xff\n\t" + "bic r8, r8, #0xff00\n\t" + "ror r8, r8, #16\n\t" + "orr r8, r8, lr, lsl #16\n\t" + "ror r8, r8, #16\n\t" +#else + "bfi r8, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r6, r6, #0xff\n\t" + "bic r6, r6, #0xff00\n\t" + "ror r6, r6, #16\n\t" + "orr r6, r6, r12, lsl #16\n\t" + "ror r6, r6, #16\n\t" +#else + "bfi r6, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smultb r12, r11, r9\n\t" + "smultt r9, r11, r9\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r9\n\t" + "smlabb lr, r10, lr, r9\n\t" + "pkhtb r12, lr, r12, ASR #16\n\t" + "ssub16 r9, r7, r12\n\t" + "sadd16 r7, r7, r12\n\t" +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r9, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r9, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "asr lr, r11, #16\n\t" +#else + "sbfx lr, r11, #16, #16\n\t" +#endif + "asr r9, r9, #16\n\t" + "mul r12, lr, r12\n\t" + "mul r9, lr, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif + "mul lr, r10, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r9, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r9, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla lr, r10, lr, r9\n\t" + "sub r9, r7, lr\n\t" + "add r7, r7, lr\n\t" + "sub lr, r7, r12, lsr #16\n\t" + "add r12, r7, r12, lsr #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r9, r9, #0xff\n\t" + "bic r9, r9, #0xff00\n\t" + "ror r9, r9, #16\n\t" + "orr r9, r9, lr, lsl #16\n\t" + "ror r9, r9, #16\n\t" +#else + "bfi r9, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r7, r7, #0xff\n\t" + "bic r7, r7, #0xff00\n\t" + "ror r7, r7, #16\n\t" + "orr r7, r7, r12, lsl #16\n\t" + "ror r7, r7, #16\n\t" +#else + "bfi r7, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ + "str r2, [%[r]]\n\t" + "str r3, [%[r], #16]\n\t" + "str r4, [%[r], #32]\n\t" + "str r5, [%[r], #48]\n\t" + "str r6, [%[r], #64]\n\t" + "str r7, [%[r], #80]\n\t" + "str r8, [%[r], #96]\n\t" + "str r9, [%[r], #112]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r2, [sp]\n\t" + "ldr r3, [sp, #4]\n\t" +#else + "ldrd r2, r3, [sp]\n\t" +#endif + "subs r2, r2, #1\n\t" + "add %[r], %[r], #4\n\t" + "bne L_kyber_arm32_ntt_loop_4_i_%=\n\t" + "add r3, r3, #0x40\n\t" + "rsbs r12, r3, #0x100\n\t" + "add %[r], %[r], #0x70\n\t" + "bne L_kyber_arm32_ntt_loop_4_j_%=\n\t" + "sub %[r], %[r], #0x200\n\t" + "mov r3, #0\n\t" + "\n" + "L_kyber_arm32_ntt_loop_567_%=: \n\t" + "add r11, r1, r3, lsr #3\n\t" + "str r3, [sp, #4]\n\t" + "ldrh r11, [r11, #32]\n\t" + "ldr r2, [%[r]]\n\t" + "ldr r3, [%[r], #4]\n\t" + "ldr r4, [%[r], #8]\n\t" + "ldr r5, [%[r], #12]\n\t" + "ldr r6, [%[r], #16]\n\t" + "ldr r7, [%[r], #20]\n\t" + "ldr r8, [%[r], #24]\n\t" + "ldr r9, [%[r], #28]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smulbb r12, r11, r6\n\t" + "smulbt r6, r11, r6\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r6\n\t" + "smlabb lr, r10, lr, r6\n\t" + "pkhtb r12, lr, r12, ASR #16\n\t" + "ssub16 r6, r2, r12\n\t" + "sadd16 r2, r2, r12\n\t" +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r6, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r6, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r11, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r11, #0, #16\n\t" +#endif + "asr r6, r6, #16\n\t" + "mul r12, lr, r12\n\t" + "mul r6, lr, r6\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif + "mul lr, r10, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r6, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r6, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla lr, r10, lr, r6\n\t" + "sub r6, r2, lr\n\t" + "add r2, r2, lr\n\t" + "sub lr, r2, r12, lsr #16\n\t" + "add r12, r2, r12, lsr #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r6, r6, #0xff\n\t" + "bic r6, r6, #0xff00\n\t" + "ror r6, r6, #16\n\t" + "orr r6, r6, lr, lsl #16\n\t" + "ror r6, r6, #16\n\t" +#else + "bfi r6, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r2, r2, #0xff\n\t" + "bic r2, r2, #0xff00\n\t" + "ror r2, r2, #16\n\t" + "orr r2, r2, r12, lsl #16\n\t" + "ror r2, r2, #16\n\t" +#else + "bfi r2, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smulbb r12, r11, r7\n\t" + "smulbt r7, r11, r7\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r7\n\t" + "smlabb lr, r10, lr, r7\n\t" + "pkhtb r12, lr, r12, ASR #16\n\t" + "ssub16 r7, r3, r12\n\t" + "sadd16 r3, r3, r12\n\t" +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r7, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r7, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r11, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r11, #0, #16\n\t" +#endif + "asr r7, r7, #16\n\t" + "mul r12, lr, r12\n\t" + "mul r7, lr, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif + "mul lr, r10, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r7, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r7, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla lr, r10, lr, r7\n\t" + "sub r7, r3, lr\n\t" + "add r3, r3, lr\n\t" + "sub lr, r3, r12, lsr #16\n\t" + "add r12, r3, r12, lsr #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r7, r7, #0xff\n\t" + "bic r7, r7, #0xff00\n\t" + "ror r7, r7, #16\n\t" + "orr r7, r7, lr, lsl #16\n\t" + "ror r7, r7, #16\n\t" +#else + "bfi r7, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r3, r3, #0xff\n\t" + "bic r3, r3, #0xff00\n\t" + "ror r3, r3, #16\n\t" + "orr r3, r3, r12, lsl #16\n\t" + "ror r3, r3, #16\n\t" +#else + "bfi r3, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smulbb r12, r11, r8\n\t" + "smulbt r8, r11, r8\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r8\n\t" + "smlabb lr, r10, lr, r8\n\t" + "pkhtb r12, lr, r12, ASR #16\n\t" + "ssub16 r8, r4, r12\n\t" + "sadd16 r4, r4, r12\n\t" +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r8, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r8, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r11, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r11, #0, #16\n\t" +#endif + "asr r8, r8, #16\n\t" + "mul r12, lr, r12\n\t" + "mul r8, lr, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif + "mul lr, r10, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r8, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r8, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla lr, r10, lr, r8\n\t" + "sub r8, r4, lr\n\t" + "add r4, r4, lr\n\t" + "sub lr, r4, r12, lsr #16\n\t" + "add r12, r4, r12, lsr #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r8, r8, #0xff\n\t" + "bic r8, r8, #0xff00\n\t" + "ror r8, r8, #16\n\t" + "orr r8, r8, lr, lsl #16\n\t" + "ror r8, r8, #16\n\t" +#else + "bfi r8, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r4, r4, #0xff\n\t" + "bic r4, r4, #0xff00\n\t" + "ror r4, r4, #16\n\t" + "orr r4, r4, r12, lsl #16\n\t" + "ror r4, r4, #16\n\t" +#else + "bfi r4, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smulbb r12, r11, r9\n\t" + "smulbt r9, r11, r9\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r9\n\t" + "smlabb lr, r10, lr, r9\n\t" + "pkhtb r12, lr, r12, ASR #16\n\t" + "ssub16 r9, r5, r12\n\t" + "sadd16 r5, r5, r12\n\t" +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r9, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r9, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r11, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r11, #0, #16\n\t" +#endif + "asr r9, r9, #16\n\t" + "mul r12, lr, r12\n\t" + "mul r9, lr, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif + "mul lr, r10, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r9, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r9, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla lr, r10, lr, r9\n\t" + "sub r9, r5, lr\n\t" + "add r5, r5, lr\n\t" + "sub lr, r5, r12, lsr #16\n\t" + "add r12, r5, r12, lsr #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r9, r9, #0xff\n\t" + "bic r9, r9, #0xff00\n\t" + "ror r9, r9, #16\n\t" + "orr r9, r9, lr, lsl #16\n\t" + "ror r9, r9, #16\n\t" +#else + "bfi r9, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r5, r5, #0xff\n\t" + "bic r5, r5, #0xff00\n\t" + "ror r5, r5, #16\n\t" + "orr r5, r5, r12, lsl #16\n\t" + "ror r5, r5, #16\n\t" +#else + "bfi r5, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ + "ldr r11, [sp, #4]\n\t" + "add r11, r1, r11, lsr #2\n\t" + "ldr r11, [r11, #64]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smulbb r12, r11, r4\n\t" + "smulbt r4, r11, r4\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r4\n\t" + "smlabb lr, r10, lr, r4\n\t" + "pkhtb r12, lr, r12, ASR #16\n\t" + "ssub16 r4, r2, r12\n\t" + "sadd16 r2, r2, r12\n\t" +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r4, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r4, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r11, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r11, #0, #16\n\t" +#endif + "asr r4, r4, #16\n\t" + "mul r12, lr, r12\n\t" + "mul r4, lr, r4\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif + "mul lr, r10, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r4, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r4, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla lr, r10, lr, r4\n\t" + "sub r4, r2, lr\n\t" + "add r2, r2, lr\n\t" + "sub lr, r2, r12, lsr #16\n\t" + "add r12, r2, r12, lsr #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r4, r4, #0xff\n\t" + "bic r4, r4, #0xff00\n\t" + "ror r4, r4, #16\n\t" + "orr r4, r4, lr, lsl #16\n\t" + "ror r4, r4, #16\n\t" +#else + "bfi r4, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r2, r2, #0xff\n\t" + "bic r2, r2, #0xff00\n\t" + "ror r2, r2, #16\n\t" + "orr r2, r2, r12, lsl #16\n\t" + "ror r2, r2, #16\n\t" +#else + "bfi r2, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smulbb r12, r11, r5\n\t" + "smulbt r5, r11, r5\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r5\n\t" + "smlabb lr, r10, lr, r5\n\t" + "pkhtb r12, lr, r12, ASR #16\n\t" + "ssub16 r5, r3, r12\n\t" + "sadd16 r3, r3, r12\n\t" +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r5, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r5, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r11, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r11, #0, #16\n\t" +#endif + "asr r5, r5, #16\n\t" + "mul r12, lr, r12\n\t" + "mul r5, lr, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif + "mul lr, r10, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r5, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r5, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla lr, r10, lr, r5\n\t" + "sub r5, r3, lr\n\t" + "add r3, r3, lr\n\t" + "sub lr, r3, r12, lsr #16\n\t" + "add r12, r3, r12, lsr #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r5, r5, #0xff\n\t" + "bic r5, r5, #0xff00\n\t" + "ror r5, r5, #16\n\t" + "orr r5, r5, lr, lsl #16\n\t" + "ror r5, r5, #16\n\t" +#else + "bfi r5, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r3, r3, #0xff\n\t" + "bic r3, r3, #0xff00\n\t" + "ror r3, r3, #16\n\t" + "orr r3, r3, r12, lsl #16\n\t" + "ror r3, r3, #16\n\t" +#else + "bfi r3, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smultb r12, r11, r8\n\t" + "smultt r8, r11, r8\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r8\n\t" + "smlabb lr, r10, lr, r8\n\t" + "pkhtb r12, lr, r12, ASR #16\n\t" + "ssub16 r8, r6, r12\n\t" + "sadd16 r6, r6, r12\n\t" +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r8, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r8, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "asr lr, r11, #16\n\t" +#else + "sbfx lr, r11, #16, #16\n\t" +#endif + "asr r8, r8, #16\n\t" + "mul r12, lr, r12\n\t" + "mul r8, lr, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif + "mul lr, r10, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r8, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r8, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla lr, r10, lr, r8\n\t" + "sub r8, r6, lr\n\t" + "add r6, r6, lr\n\t" + "sub lr, r6, r12, lsr #16\n\t" + "add r12, r6, r12, lsr #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r8, r8, #0xff\n\t" + "bic r8, r8, #0xff00\n\t" + "ror r8, r8, #16\n\t" + "orr r8, r8, lr, lsl #16\n\t" + "ror r8, r8, #16\n\t" +#else + "bfi r8, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r6, r6, #0xff\n\t" + "bic r6, r6, #0xff00\n\t" + "ror r6, r6, #16\n\t" + "orr r6, r6, r12, lsl #16\n\t" + "ror r6, r6, #16\n\t" +#else + "bfi r6, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smultb r12, r11, r9\n\t" + "smultt r9, r11, r9\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r9\n\t" + "smlabb lr, r10, lr, r9\n\t" + "pkhtb r12, lr, r12, ASR #16\n\t" + "ssub16 r9, r7, r12\n\t" + "sadd16 r7, r7, r12\n\t" +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r9, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r9, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "asr lr, r11, #16\n\t" +#else + "sbfx lr, r11, #16, #16\n\t" +#endif + "asr r9, r9, #16\n\t" + "mul r12, lr, r12\n\t" + "mul r9, lr, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif + "mul lr, r10, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r9, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r9, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla lr, r10, lr, r9\n\t" + "sub r9, r7, lr\n\t" + "add r7, r7, lr\n\t" + "sub lr, r7, r12, lsr #16\n\t" + "add r12, r7, r12, lsr #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r9, r9, #0xff\n\t" + "bic r9, r9, #0xff00\n\t" + "ror r9, r9, #16\n\t" + "orr r9, r9, lr, lsl #16\n\t" + "ror r9, r9, #16\n\t" +#else + "bfi r9, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r7, r7, #0xff\n\t" + "bic r7, r7, #0xff00\n\t" + "ror r7, r7, #16\n\t" + "orr r7, r7, r12, lsl #16\n\t" + "ror r7, r7, #16\n\t" +#else + "bfi r7, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ + "ldr r11, [sp, #4]\n\t" + "add r11, r1, r11, lsr #1\n\t" + "ldr r11, [r11, #128]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smulbb r12, r11, r3\n\t" + "smulbt r3, r11, r3\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r3\n\t" + "smlabb lr, r10, lr, r3\n\t" + "pkhtb r12, lr, r12, ASR #16\n\t" + "ssub16 r3, r2, r12\n\t" + "sadd16 r2, r2, r12\n\t" +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r3, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r3, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r11, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r11, #0, #16\n\t" +#endif + "asr r3, r3, #16\n\t" + "mul r12, lr, r12\n\t" + "mul r3, lr, r3\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif + "mul lr, r10, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r3, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r3, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla lr, r10, lr, r3\n\t" + "sub r3, r2, lr\n\t" + "add r2, r2, lr\n\t" + "sub lr, r2, r12, lsr #16\n\t" + "add r12, r2, r12, lsr #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r3, r3, #0xff\n\t" + "bic r3, r3, #0xff00\n\t" + "ror r3, r3, #16\n\t" + "orr r3, r3, lr, lsl #16\n\t" + "ror r3, r3, #16\n\t" +#else + "bfi r3, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r2, r2, #0xff\n\t" + "bic r2, r2, #0xff00\n\t" + "ror r2, r2, #16\n\t" + "orr r2, r2, r12, lsl #16\n\t" + "ror r2, r2, #16\n\t" +#else + "bfi r2, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smultb r12, r11, r5\n\t" + "smultt r5, r11, r5\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r5\n\t" + "smlabb lr, r10, lr, r5\n\t" + "pkhtb r12, lr, r12, ASR #16\n\t" + "ssub16 r5, r4, r12\n\t" + "sadd16 r4, r4, r12\n\t" +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r5, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r5, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "asr lr, r11, #16\n\t" +#else + "sbfx lr, r11, #16, #16\n\t" +#endif + "asr r5, r5, #16\n\t" + "mul r12, lr, r12\n\t" + "mul r5, lr, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif + "mul lr, r10, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r5, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r5, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla lr, r10, lr, r5\n\t" + "sub r5, r4, lr\n\t" + "add r4, r4, lr\n\t" + "sub lr, r4, r12, lsr #16\n\t" + "add r12, r4, r12, lsr #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r5, r5, #0xff\n\t" + "bic r5, r5, #0xff00\n\t" + "ror r5, r5, #16\n\t" + "orr r5, r5, lr, lsl #16\n\t" + "ror r5, r5, #16\n\t" +#else + "bfi r5, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r4, r4, #0xff\n\t" + "bic r4, r4, #0xff00\n\t" + "ror r4, r4, #16\n\t" + "orr r4, r4, r12, lsl #16\n\t" + "ror r4, r4, #16\n\t" +#else + "bfi r4, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ + "ldr r11, [sp, #4]\n\t" + "add r11, r1, r11, lsr #1\n\t" + "ldr r11, [r11, #132]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smulbb r12, r11, r7\n\t" + "smulbt r7, r11, r7\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r7\n\t" + "smlabb lr, r10, lr, r7\n\t" + "pkhtb r12, lr, r12, ASR #16\n\t" + "ssub16 r7, r6, r12\n\t" + "sadd16 r6, r6, r12\n\t" +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r7, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r7, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r11, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r11, #0, #16\n\t" +#endif + "asr r7, r7, #16\n\t" + "mul r12, lr, r12\n\t" + "mul r7, lr, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif + "mul lr, r10, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r7, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r7, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla lr, r10, lr, r7\n\t" + "sub r7, r6, lr\n\t" + "add r6, r6, lr\n\t" + "sub lr, r6, r12, lsr #16\n\t" + "add r12, r6, r12, lsr #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r7, r7, #0xff\n\t" + "bic r7, r7, #0xff00\n\t" + "ror r7, r7, #16\n\t" + "orr r7, r7, lr, lsl #16\n\t" + "ror r7, r7, #16\n\t" +#else + "bfi r7, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r6, r6, #0xff\n\t" + "bic r6, r6, #0xff00\n\t" + "ror r6, r6, #16\n\t" + "orr r6, r6, r12, lsl #16\n\t" + "ror r6, r6, #16\n\t" +#else + "bfi r6, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smultb r12, r11, r9\n\t" + "smultt r9, r11, r9\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r9\n\t" + "smlabb lr, r10, lr, r9\n\t" + "pkhtb r12, lr, r12, ASR #16\n\t" + "ssub16 r9, r8, r12\n\t" + "sadd16 r8, r8, r12\n\t" +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r9, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r9, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "asr lr, r11, #16\n\t" +#else + "sbfx lr, r11, #16, #16\n\t" +#endif + "asr r9, r9, #16\n\t" + "mul r12, lr, r12\n\t" + "mul r9, lr, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif + "mul lr, r10, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r9, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r9, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla lr, r10, lr, r9\n\t" + "sub r9, r8, lr\n\t" + "add r8, r8, lr\n\t" + "sub lr, r8, r12, lsr #16\n\t" + "add r12, r8, r12, lsr #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r9, r9, #0xff\n\t" + "bic r9, r9, #0xff00\n\t" + "ror r9, r9, #16\n\t" + "orr r9, r9, lr, lsl #16\n\t" + "ror r9, r9, #16\n\t" +#else + "bfi r9, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r8, r8, #0xff\n\t" + "bic r8, r8, #0xff00\n\t" + "ror r8, r8, #16\n\t" + "orr r8, r8, r12, lsl #16\n\t" + "ror r8, r8, #16\n\t" +#else + "bfi r8, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r11, #0xaf\n\t" + "lsl r11, r11, #8\n\t" + "add r11, r11, #0xc0\n\t" +#else + "mov r11, #0xafc0\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r11, r11, #0x130000\n\t" +#else + "movt r11, #0x13\n\t" +#endif +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r11, #0x4e\n\t" + "lsl r11, r11, #8\n\t" + "add r11, r11, #0xbf\n\t" +#else + "mov r11, #0x4ebf\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smulwb r12, r11, r2\n\t" + "smulwt lr, r11, r2\n\t" + "smulbt r12, r10, r12\n\t" + "smulbt lr, r10, lr\n\t" + "pkhbt r12, r12, lr, LSL #16\n\t" + "ssub16 r2, r2, r12\n\t" +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r2, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r2, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "asr lr, r2, #16\n\t" +#else + "sbfx lr, r2, #16, #16\n\t" +#endif + "mul r12, r11, r12\n\t" + "mul lr, r11, lr\n\t" + "asr r12, r12, #26\n\t" + "asr lr, lr, #26\n\t" + "mul r12, r10, r12\n\t" + "mul lr, r10, lr\n\t" + "sub lr, r2, lr, lsl #16\n\t" + "sub r2, r2, r12\n\t" + "lsr lr, lr, #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r2, r2, #0xff0000\n\t" + "bic r2, r2, #0xff000000\n\t" + "orr r2, r2, lr, lsl #16\n\t" +#else + "bfi r2, lr, #16, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smulwb r12, r11, r3\n\t" + "smulwt lr, r11, r3\n\t" + "smulbt r12, r10, r12\n\t" + "smulbt lr, r10, lr\n\t" + "pkhbt r12, r12, lr, LSL #16\n\t" + "ssub16 r3, r3, r12\n\t" +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r3, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r3, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "asr lr, r3, #16\n\t" +#else + "sbfx lr, r3, #16, #16\n\t" +#endif + "mul r12, r11, r12\n\t" + "mul lr, r11, lr\n\t" + "asr r12, r12, #26\n\t" + "asr lr, lr, #26\n\t" + "mul r12, r10, r12\n\t" + "mul lr, r10, lr\n\t" + "sub lr, r3, lr, lsl #16\n\t" + "sub r3, r3, r12\n\t" + "lsr lr, lr, #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r3, r3, #0xff0000\n\t" + "bic r3, r3, #0xff000000\n\t" + "orr r3, r3, lr, lsl #16\n\t" +#else + "bfi r3, lr, #16, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smulwb r12, r11, r4\n\t" + "smulwt lr, r11, r4\n\t" + "smulbt r12, r10, r12\n\t" + "smulbt lr, r10, lr\n\t" + "pkhbt r12, r12, lr, LSL #16\n\t" + "ssub16 r4, r4, r12\n\t" +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r4, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r4, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "asr lr, r4, #16\n\t" +#else + "sbfx lr, r4, #16, #16\n\t" +#endif + "mul r12, r11, r12\n\t" + "mul lr, r11, lr\n\t" + "asr r12, r12, #26\n\t" + "asr lr, lr, #26\n\t" + "mul r12, r10, r12\n\t" + "mul lr, r10, lr\n\t" + "sub lr, r4, lr, lsl #16\n\t" + "sub r4, r4, r12\n\t" + "lsr lr, lr, #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r4, r4, #0xff0000\n\t" + "bic r4, r4, #0xff000000\n\t" + "orr r4, r4, lr, lsl #16\n\t" +#else + "bfi r4, lr, #16, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smulwb r12, r11, r5\n\t" + "smulwt lr, r11, r5\n\t" + "smulbt r12, r10, r12\n\t" + "smulbt lr, r10, lr\n\t" + "pkhbt r12, r12, lr, LSL #16\n\t" + "ssub16 r5, r5, r12\n\t" +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r5, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r5, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "asr lr, r5, #16\n\t" +#else + "sbfx lr, r5, #16, #16\n\t" +#endif + "mul r12, r11, r12\n\t" + "mul lr, r11, lr\n\t" + "asr r12, r12, #26\n\t" + "asr lr, lr, #26\n\t" + "mul r12, r10, r12\n\t" + "mul lr, r10, lr\n\t" + "sub lr, r5, lr, lsl #16\n\t" + "sub r5, r5, r12\n\t" + "lsr lr, lr, #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r5, r5, #0xff0000\n\t" + "bic r5, r5, #0xff000000\n\t" + "orr r5, r5, lr, lsl #16\n\t" +#else + "bfi r5, lr, #16, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smulwb r12, r11, r6\n\t" + "smulwt lr, r11, r6\n\t" + "smulbt r12, r10, r12\n\t" + "smulbt lr, r10, lr\n\t" + "pkhbt r12, r12, lr, LSL #16\n\t" + "ssub16 r6, r6, r12\n\t" +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r6, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r6, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "asr lr, r6, #16\n\t" +#else + "sbfx lr, r6, #16, #16\n\t" +#endif + "mul r12, r11, r12\n\t" + "mul lr, r11, lr\n\t" + "asr r12, r12, #26\n\t" + "asr lr, lr, #26\n\t" + "mul r12, r10, r12\n\t" + "mul lr, r10, lr\n\t" + "sub lr, r6, lr, lsl #16\n\t" + "sub r6, r6, r12\n\t" + "lsr lr, lr, #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r6, r6, #0xff0000\n\t" + "bic r6, r6, #0xff000000\n\t" + "orr r6, r6, lr, lsl #16\n\t" +#else + "bfi r6, lr, #16, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smulwb r12, r11, r7\n\t" + "smulwt lr, r11, r7\n\t" + "smulbt r12, r10, r12\n\t" + "smulbt lr, r10, lr\n\t" + "pkhbt r12, r12, lr, LSL #16\n\t" + "ssub16 r7, r7, r12\n\t" +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r7, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r7, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "asr lr, r7, #16\n\t" +#else + "sbfx lr, r7, #16, #16\n\t" +#endif + "mul r12, r11, r12\n\t" + "mul lr, r11, lr\n\t" + "asr r12, r12, #26\n\t" + "asr lr, lr, #26\n\t" + "mul r12, r10, r12\n\t" + "mul lr, r10, lr\n\t" + "sub lr, r7, lr, lsl #16\n\t" + "sub r7, r7, r12\n\t" + "lsr lr, lr, #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r7, r7, #0xff0000\n\t" + "bic r7, r7, #0xff000000\n\t" + "orr r7, r7, lr, lsl #16\n\t" +#else + "bfi r7, lr, #16, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smulwb r12, r11, r8\n\t" + "smulwt lr, r11, r8\n\t" + "smulbt r12, r10, r12\n\t" + "smulbt lr, r10, lr\n\t" + "pkhbt r12, r12, lr, LSL #16\n\t" + "ssub16 r8, r8, r12\n\t" +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r8, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r8, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "asr lr, r8, #16\n\t" +#else + "sbfx lr, r8, #16, #16\n\t" +#endif + "mul r12, r11, r12\n\t" + "mul lr, r11, lr\n\t" + "asr r12, r12, #26\n\t" + "asr lr, lr, #26\n\t" + "mul r12, r10, r12\n\t" + "mul lr, r10, lr\n\t" + "sub lr, r8, lr, lsl #16\n\t" + "sub r8, r8, r12\n\t" + "lsr lr, lr, #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r8, r8, #0xff0000\n\t" + "bic r8, r8, #0xff000000\n\t" + "orr r8, r8, lr, lsl #16\n\t" +#else + "bfi r8, lr, #16, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smulwb r12, r11, r9\n\t" + "smulwt lr, r11, r9\n\t" + "smulbt r12, r10, r12\n\t" + "smulbt lr, r10, lr\n\t" + "pkhbt r12, r12, lr, LSL #16\n\t" + "ssub16 r9, r9, r12\n\t" +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r9, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r9, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "asr lr, r9, #16\n\t" +#else + "sbfx lr, r9, #16, #16\n\t" +#endif + "mul r12, r11, r12\n\t" + "mul lr, r11, lr\n\t" + "asr r12, r12, #26\n\t" + "asr lr, lr, #26\n\t" + "mul r12, r10, r12\n\t" + "mul lr, r10, lr\n\t" + "sub lr, r9, lr, lsl #16\n\t" + "sub r9, r9, r12\n\t" + "lsr lr, lr, #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r9, r9, #0xff0000\n\t" + "bic r9, r9, #0xff000000\n\t" + "orr r9, r9, lr, lsl #16\n\t" +#else + "bfi r9, lr, #16, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r10, r10, #0xc000000\n\t" + "orr r10, r10, #0xff0000\n\t" +#else + "movt r10, #0xcff\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ + "str r2, [%[r]]\n\t" + "str r3, [%[r], #4]\n\t" + "str r4, [%[r], #8]\n\t" + "str r5, [%[r], #12]\n\t" + "str r6, [%[r], #16]\n\t" + "str r7, [%[r], #20]\n\t" + "str r8, [%[r], #24]\n\t" + "str r9, [%[r], #28]\n\t" + "ldr r3, [sp, #4]\n\t" + "add r3, r3, #16\n\t" + "rsbs r12, r3, #0x100\n\t" + "add %[r], %[r], #32\n\t" + "bne L_kyber_arm32_ntt_loop_567_%=\n\t" + "add sp, sp, #8\n\t" + : [r] "+r" (r), + [L_kyber_arm32_ntt_zetas] "+r" (L_kyber_arm32_ntt_zetas_c) + : + : "memory", "cc", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", + "r9", "r10", "r11" + ); +} + +static const uint16_t L_kyber_arm32_invntt_zetas_inv[] = { + 0x06a5, 0x070f, 0x05b4, 0x0943, + 0x0922, 0x091d, 0x0134, 0x006c, + 0x0b23, 0x0366, 0x0356, 0x05e6, + 0x09e7, 0x04fe, 0x05fa, 0x04a1, + 0x067b, 0x04a3, 0x0c25, 0x036a, + 0x0537, 0x083f, 0x0088, 0x04bf, + 0x0b81, 0x05b9, 0x0505, 0x07d7, + 0x0a9f, 0x0aa6, 0x08b8, 0x09d0, + 0x004b, 0x009c, 0x0bb8, 0x0b5f, + 0x0ba4, 0x0368, 0x0a7d, 0x0636, + 0x08a2, 0x025a, 0x0736, 0x0309, + 0x0093, 0x087a, 0x09f7, 0x00f6, + 0x068c, 0x06db, 0x01cc, 0x0123, + 0x00eb, 0x0c50, 0x0ab6, 0x0b5b, + 0x0c98, 0x06f3, 0x099a, 0x04e3, + 0x09b6, 0x0ad6, 0x0b53, 0x044f, + 0x04fb, 0x0a5c, 0x0429, 0x0b41, + 0x02d5, 0x05e4, 0x0940, 0x018e, + 0x03b7, 0x00f7, 0x058d, 0x0c96, + 0x09c3, 0x010f, 0x005a, 0x0355, + 0x0744, 0x0c83, 0x048a, 0x0652, + 0x029a, 0x0140, 0x0008, 0x0afd, + 0x0608, 0x011a, 0x072e, 0x050d, + 0x090a, 0x0228, 0x0a75, 0x083a, + 0x0623, 0x00cd, 0x0b66, 0x0606, + 0x0aa1, 0x0a25, 0x0908, 0x02a9, + 0x0082, 0x0642, 0x074f, 0x033d, + 0x0b82, 0x0bf9, 0x052d, 0x0ac4, + 0x0745, 0x05c2, 0x04b2, 0x093f, + 0x0c4b, 0x06d8, 0x0a93, 0x00ab, + 0x0c37, 0x0be2, 0x0773, 0x072c, + 0x05ed, 0x0167, 0x02f6, 0x05a1, +}; + +void kyber_arm32_invntt(sword16* r_p) +{ + register sword16* r asm ("r0") = (sword16*)r_p; + register uint16_t* L_kyber_arm32_invntt_zetas_inv_c asm ("r1") = + (uint16_t*)&L_kyber_arm32_invntt_zetas_inv; + + __asm__ __volatile__ ( + "sub sp, sp, #8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r10, r10, #0xc000000\n\t" + "orr r10, r10, #0xff0000\n\t" +#else + "movt r10, #0xcff\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ + "mov r3, #0\n\t" + "\n" + "L_kyber_arm32_invntt_loop_765_%=: \n\t" + "add r11, r1, r3, lsr #1\n\t" + "str r3, [sp, #4]\n\t" + "ldr r2, [%[r]]\n\t" + "ldr r3, [%[r], #4]\n\t" + "ldr r4, [%[r], #8]\n\t" + "ldr r5, [%[r], #12]\n\t" + "ldr r6, [%[r], #16]\n\t" + "ldr r7, [%[r], #20]\n\t" + "ldr r8, [%[r], #24]\n\t" + "ldr r9, [%[r], #28]\n\t" + "ldr r11, [r11]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "ssub16 r12, r2, r3\n\t" + "sadd16 r2, r2, r3\n\t" + "smulbt r3, r11, r12\n\t" + "smulbb r12, r11, r12\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r3\n\t" + "smlabb r3, r10, lr, r3\n\t" + "pkhtb r3, r3, r12, ASR #16\n\t" +#else + "sub lr, r2, r3\n\t" + "add r10, r2, r3\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r3, r3, #0xff\n\t" + "bic r3, r3, #0xff00\n\t" +#else + "bfc r3, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r2, r2, #0xff\n\t" + "bic r2, r2, #0xff00\n\t" +#else + "bfc r2, #0, #16\n\t" +#endif + "sub r12, r2, r3\n\t" + "add r2, r2, r3\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r12, r12, #0xff\n\t" + "bic r12, r12, #0xff00\n\t" + "ror r12, r12, #16\n\t" + "orr r12, r12, lr, lsl #16\n\t" + "ror r12, r12, #16\n\t" +#else + "bfi r12, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r2, r2, #0xff\n\t" + "bic r2, r2, #0xff00\n\t" + "ror r2, r2, #16\n\t" + "orr r2, r2, r10, lsl #16\n\t" + "ror r2, r2, #16\n\t" +#else + "bfi r2, r10, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r11, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r11, #0, #16\n\t" +#endif + "asr r10, r12, #16\n\t" + "mul r3, lr, r10\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r12, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r12, #0, #16\n\t" +#endif + "mul r12, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r12, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r12, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r3, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r3, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "lsr r12, r12, #16\n\t" + "mla r3, r10, lr, r3\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r3, r3, #0xff\n\t" + "bic r3, r3, #0xff00\n\t" + "ror r3, r3, #16\n\t" + "orr r3, r3, r12, lsl #16\n\t" + "ror r3, r3, #16\n\t" +#else + "bfi r3, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "ssub16 r12, r4, r5\n\t" + "sadd16 r4, r4, r5\n\t" + "smultt r5, r11, r12\n\t" + "smultb r12, r11, r12\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r5\n\t" + "smlabb r5, r10, lr, r5\n\t" + "pkhtb r5, r5, r12, ASR #16\n\t" +#else + "sub lr, r4, r5\n\t" + "add r10, r4, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r5, r5, #0xff\n\t" + "bic r5, r5, #0xff00\n\t" +#else + "bfc r5, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r4, r4, #0xff\n\t" + "bic r4, r4, #0xff00\n\t" +#else + "bfc r4, #0, #16\n\t" +#endif + "sub r12, r4, r5\n\t" + "add r4, r4, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r12, r12, #0xff\n\t" + "bic r12, r12, #0xff00\n\t" + "ror r12, r12, #16\n\t" + "orr r12, r12, lr, lsl #16\n\t" + "ror r12, r12, #16\n\t" +#else + "bfi r12, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r4, r4, #0xff\n\t" + "bic r4, r4, #0xff00\n\t" + "ror r4, r4, #16\n\t" + "orr r4, r4, r10, lsl #16\n\t" + "ror r4, r4, #16\n\t" +#else + "bfi r4, r10, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "asr lr, r11, #16\n\t" +#else + "sbfx lr, r11, #16, #16\n\t" +#endif + "asr r10, r12, #16\n\t" + "mul r5, lr, r10\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r12, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r12, #0, #16\n\t" +#endif + "mul r12, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r12, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r12, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r5, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r5, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "lsr r12, r12, #16\n\t" + "mla r5, r10, lr, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r5, r5, #0xff\n\t" + "bic r5, r5, #0xff00\n\t" + "ror r5, r5, #16\n\t" + "orr r5, r5, r12, lsl #16\n\t" + "ror r5, r5, #16\n\t" +#else + "bfi r5, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ + "ldr r11, [sp, #4]\n\t" + "add r11, r1, r11, lsr #1\n\t" + "ldr r11, [r11, #4]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "ssub16 r12, r6, r7\n\t" + "sadd16 r6, r6, r7\n\t" + "smulbt r7, r11, r12\n\t" + "smulbb r12, r11, r12\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r7\n\t" + "smlabb r7, r10, lr, r7\n\t" + "pkhtb r7, r7, r12, ASR #16\n\t" +#else + "sub lr, r6, r7\n\t" + "add r10, r6, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r7, r7, #0xff\n\t" + "bic r7, r7, #0xff00\n\t" +#else + "bfc r7, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r6, r6, #0xff\n\t" + "bic r6, r6, #0xff00\n\t" +#else + "bfc r6, #0, #16\n\t" +#endif + "sub r12, r6, r7\n\t" + "add r6, r6, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r12, r12, #0xff\n\t" + "bic r12, r12, #0xff00\n\t" + "ror r12, r12, #16\n\t" + "orr r12, r12, lr, lsl #16\n\t" + "ror r12, r12, #16\n\t" +#else + "bfi r12, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r6, r6, #0xff\n\t" + "bic r6, r6, #0xff00\n\t" + "ror r6, r6, #16\n\t" + "orr r6, r6, r10, lsl #16\n\t" + "ror r6, r6, #16\n\t" +#else + "bfi r6, r10, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r11, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r11, #0, #16\n\t" +#endif + "asr r10, r12, #16\n\t" + "mul r7, lr, r10\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r12, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r12, #0, #16\n\t" +#endif + "mul r12, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r12, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r12, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r7, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r7, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "lsr r12, r12, #16\n\t" + "mla r7, r10, lr, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r7, r7, #0xff\n\t" + "bic r7, r7, #0xff00\n\t" + "ror r7, r7, #16\n\t" + "orr r7, r7, r12, lsl #16\n\t" + "ror r7, r7, #16\n\t" +#else + "bfi r7, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "ssub16 r12, r8, r9\n\t" + "sadd16 r8, r8, r9\n\t" + "smultt r9, r11, r12\n\t" + "smultb r12, r11, r12\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r9\n\t" + "smlabb r9, r10, lr, r9\n\t" + "pkhtb r9, r9, r12, ASR #16\n\t" +#else + "sub lr, r8, r9\n\t" + "add r10, r8, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r9, r9, #0xff\n\t" + "bic r9, r9, #0xff00\n\t" +#else + "bfc r9, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r8, r8, #0xff\n\t" + "bic r8, r8, #0xff00\n\t" +#else + "bfc r8, #0, #16\n\t" +#endif + "sub r12, r8, r9\n\t" + "add r8, r8, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r12, r12, #0xff\n\t" + "bic r12, r12, #0xff00\n\t" + "ror r12, r12, #16\n\t" + "orr r12, r12, lr, lsl #16\n\t" + "ror r12, r12, #16\n\t" +#else + "bfi r12, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r8, r8, #0xff\n\t" + "bic r8, r8, #0xff00\n\t" + "ror r8, r8, #16\n\t" + "orr r8, r8, r10, lsl #16\n\t" + "ror r8, r8, #16\n\t" +#else + "bfi r8, r10, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "asr lr, r11, #16\n\t" +#else + "sbfx lr, r11, #16, #16\n\t" +#endif + "asr r10, r12, #16\n\t" + "mul r9, lr, r10\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r12, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r12, #0, #16\n\t" +#endif + "mul r12, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r12, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r12, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r9, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r9, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "lsr r12, r12, #16\n\t" + "mla r9, r10, lr, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r9, r9, #0xff\n\t" + "bic r9, r9, #0xff00\n\t" + "ror r9, r9, #16\n\t" + "orr r9, r9, r12, lsl #16\n\t" + "ror r9, r9, #16\n\t" +#else + "bfi r9, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ + "ldr r11, [sp, #4]\n\t" + "add r11, r1, r11, lsr #2\n\t" + "ldr r11, [r11, #128]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "ssub16 r12, r2, r4\n\t" + "sadd16 r2, r2, r4\n\t" + "smulbt r4, r11, r12\n\t" + "smulbb r12, r11, r12\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r4\n\t" + "smlabb r4, r10, lr, r4\n\t" + "pkhtb r4, r4, r12, ASR #16\n\t" +#else + "sub lr, r2, r4\n\t" + "add r10, r2, r4\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r4, r4, #0xff\n\t" + "bic r4, r4, #0xff00\n\t" +#else + "bfc r4, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r2, r2, #0xff\n\t" + "bic r2, r2, #0xff00\n\t" +#else + "bfc r2, #0, #16\n\t" +#endif + "sub r12, r2, r4\n\t" + "add r2, r2, r4\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r12, r12, #0xff\n\t" + "bic r12, r12, #0xff00\n\t" + "ror r12, r12, #16\n\t" + "orr r12, r12, lr, lsl #16\n\t" + "ror r12, r12, #16\n\t" +#else + "bfi r12, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r2, r2, #0xff\n\t" + "bic r2, r2, #0xff00\n\t" + "ror r2, r2, #16\n\t" + "orr r2, r2, r10, lsl #16\n\t" + "ror r2, r2, #16\n\t" +#else + "bfi r2, r10, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r11, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r11, #0, #16\n\t" +#endif + "asr r10, r12, #16\n\t" + "mul r4, lr, r10\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r12, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r12, #0, #16\n\t" +#endif + "mul r12, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r12, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r12, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r4, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r4, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "lsr r12, r12, #16\n\t" + "mla r4, r10, lr, r4\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r4, r4, #0xff\n\t" + "bic r4, r4, #0xff00\n\t" + "ror r4, r4, #16\n\t" + "orr r4, r4, r12, lsl #16\n\t" + "ror r4, r4, #16\n\t" +#else + "bfi r4, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "ssub16 r12, r3, r5\n\t" + "sadd16 r3, r3, r5\n\t" + "smulbt r5, r11, r12\n\t" + "smulbb r12, r11, r12\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r5\n\t" + "smlabb r5, r10, lr, r5\n\t" + "pkhtb r5, r5, r12, ASR #16\n\t" +#else + "sub lr, r3, r5\n\t" + "add r10, r3, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r5, r5, #0xff\n\t" + "bic r5, r5, #0xff00\n\t" +#else + "bfc r5, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r3, r3, #0xff\n\t" + "bic r3, r3, #0xff00\n\t" +#else + "bfc r3, #0, #16\n\t" +#endif + "sub r12, r3, r5\n\t" + "add r3, r3, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r12, r12, #0xff\n\t" + "bic r12, r12, #0xff00\n\t" + "ror r12, r12, #16\n\t" + "orr r12, r12, lr, lsl #16\n\t" + "ror r12, r12, #16\n\t" +#else + "bfi r12, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r3, r3, #0xff\n\t" + "bic r3, r3, #0xff00\n\t" + "ror r3, r3, #16\n\t" + "orr r3, r3, r10, lsl #16\n\t" + "ror r3, r3, #16\n\t" +#else + "bfi r3, r10, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r11, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r11, #0, #16\n\t" +#endif + "asr r10, r12, #16\n\t" + "mul r5, lr, r10\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r12, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r12, #0, #16\n\t" +#endif + "mul r12, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r12, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r12, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r5, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r5, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "lsr r12, r12, #16\n\t" + "mla r5, r10, lr, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r5, r5, #0xff\n\t" + "bic r5, r5, #0xff00\n\t" + "ror r5, r5, #16\n\t" + "orr r5, r5, r12, lsl #16\n\t" + "ror r5, r5, #16\n\t" +#else + "bfi r5, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "ssub16 r12, r6, r8\n\t" + "sadd16 r6, r6, r8\n\t" + "smultt r8, r11, r12\n\t" + "smultb r12, r11, r12\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r8\n\t" + "smlabb r8, r10, lr, r8\n\t" + "pkhtb r8, r8, r12, ASR #16\n\t" +#else + "sub lr, r6, r8\n\t" + "add r10, r6, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r8, r8, #0xff\n\t" + "bic r8, r8, #0xff00\n\t" +#else + "bfc r8, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r6, r6, #0xff\n\t" + "bic r6, r6, #0xff00\n\t" +#else + "bfc r6, #0, #16\n\t" +#endif + "sub r12, r6, r8\n\t" + "add r6, r6, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r12, r12, #0xff\n\t" + "bic r12, r12, #0xff00\n\t" + "ror r12, r12, #16\n\t" + "orr r12, r12, lr, lsl #16\n\t" + "ror r12, r12, #16\n\t" +#else + "bfi r12, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r6, r6, #0xff\n\t" + "bic r6, r6, #0xff00\n\t" + "ror r6, r6, #16\n\t" + "orr r6, r6, r10, lsl #16\n\t" + "ror r6, r6, #16\n\t" +#else + "bfi r6, r10, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "asr lr, r11, #16\n\t" +#else + "sbfx lr, r11, #16, #16\n\t" +#endif + "asr r10, r12, #16\n\t" + "mul r8, lr, r10\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r12, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r12, #0, #16\n\t" +#endif + "mul r12, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r12, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r12, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r8, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r8, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "lsr r12, r12, #16\n\t" + "mla r8, r10, lr, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r8, r8, #0xff\n\t" + "bic r8, r8, #0xff00\n\t" + "ror r8, r8, #16\n\t" + "orr r8, r8, r12, lsl #16\n\t" + "ror r8, r8, #16\n\t" +#else + "bfi r8, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "ssub16 r12, r7, r9\n\t" + "sadd16 r7, r7, r9\n\t" + "smultt r9, r11, r12\n\t" + "smultb r12, r11, r12\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r9\n\t" + "smlabb r9, r10, lr, r9\n\t" + "pkhtb r9, r9, r12, ASR #16\n\t" +#else + "sub lr, r7, r9\n\t" + "add r10, r7, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r9, r9, #0xff\n\t" + "bic r9, r9, #0xff00\n\t" +#else + "bfc r9, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r7, r7, #0xff\n\t" + "bic r7, r7, #0xff00\n\t" +#else + "bfc r7, #0, #16\n\t" +#endif + "sub r12, r7, r9\n\t" + "add r7, r7, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r12, r12, #0xff\n\t" + "bic r12, r12, #0xff00\n\t" + "ror r12, r12, #16\n\t" + "orr r12, r12, lr, lsl #16\n\t" + "ror r12, r12, #16\n\t" +#else + "bfi r12, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r7, r7, #0xff\n\t" + "bic r7, r7, #0xff00\n\t" + "ror r7, r7, #16\n\t" + "orr r7, r7, r10, lsl #16\n\t" + "ror r7, r7, #16\n\t" +#else + "bfi r7, r10, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "asr lr, r11, #16\n\t" +#else + "sbfx lr, r11, #16, #16\n\t" +#endif + "asr r10, r12, #16\n\t" + "mul r9, lr, r10\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r12, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r12, #0, #16\n\t" +#endif + "mul r12, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r12, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r12, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r9, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r9, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "lsr r12, r12, #16\n\t" + "mla r9, r10, lr, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r9, r9, #0xff\n\t" + "bic r9, r9, #0xff00\n\t" + "ror r9, r9, #16\n\t" + "orr r9, r9, r12, lsl #16\n\t" + "ror r9, r9, #16\n\t" +#else + "bfi r9, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ + "ldr r11, [sp, #4]\n\t" + "add r11, r1, r11, lsr #3\n\t" + "ldr r11, [r11, #192]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "ssub16 r12, r2, r6\n\t" + "sadd16 r2, r2, r6\n\t" + "smulbt r6, r11, r12\n\t" + "smulbb r12, r11, r12\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r6\n\t" + "smlabb r6, r10, lr, r6\n\t" + "pkhtb r6, r6, r12, ASR #16\n\t" +#else + "sub lr, r2, r6\n\t" + "add r10, r2, r6\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r6, r6, #0xff\n\t" + "bic r6, r6, #0xff00\n\t" +#else + "bfc r6, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r2, r2, #0xff\n\t" + "bic r2, r2, #0xff00\n\t" +#else + "bfc r2, #0, #16\n\t" +#endif + "sub r12, r2, r6\n\t" + "add r2, r2, r6\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r12, r12, #0xff\n\t" + "bic r12, r12, #0xff00\n\t" + "ror r12, r12, #16\n\t" + "orr r12, r12, lr, lsl #16\n\t" + "ror r12, r12, #16\n\t" +#else + "bfi r12, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r2, r2, #0xff\n\t" + "bic r2, r2, #0xff00\n\t" + "ror r2, r2, #16\n\t" + "orr r2, r2, r10, lsl #16\n\t" + "ror r2, r2, #16\n\t" +#else + "bfi r2, r10, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r11, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r11, #0, #16\n\t" +#endif + "asr r10, r12, #16\n\t" + "mul r6, lr, r10\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r12, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r12, #0, #16\n\t" +#endif + "mul r12, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r12, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r12, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r6, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r6, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "lsr r12, r12, #16\n\t" + "mla r6, r10, lr, r6\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r6, r6, #0xff\n\t" + "bic r6, r6, #0xff00\n\t" + "ror r6, r6, #16\n\t" + "orr r6, r6, r12, lsl #16\n\t" + "ror r6, r6, #16\n\t" +#else + "bfi r6, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "ssub16 r12, r3, r7\n\t" + "sadd16 r3, r3, r7\n\t" + "smulbt r7, r11, r12\n\t" + "smulbb r12, r11, r12\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r7\n\t" + "smlabb r7, r10, lr, r7\n\t" + "pkhtb r7, r7, r12, ASR #16\n\t" +#else + "sub lr, r3, r7\n\t" + "add r10, r3, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r7, r7, #0xff\n\t" + "bic r7, r7, #0xff00\n\t" +#else + "bfc r7, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r3, r3, #0xff\n\t" + "bic r3, r3, #0xff00\n\t" +#else + "bfc r3, #0, #16\n\t" +#endif + "sub r12, r3, r7\n\t" + "add r3, r3, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r12, r12, #0xff\n\t" + "bic r12, r12, #0xff00\n\t" + "ror r12, r12, #16\n\t" + "orr r12, r12, lr, lsl #16\n\t" + "ror r12, r12, #16\n\t" +#else + "bfi r12, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r3, r3, #0xff\n\t" + "bic r3, r3, #0xff00\n\t" + "ror r3, r3, #16\n\t" + "orr r3, r3, r10, lsl #16\n\t" + "ror r3, r3, #16\n\t" +#else + "bfi r3, r10, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r11, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r11, #0, #16\n\t" +#endif + "asr r10, r12, #16\n\t" + "mul r7, lr, r10\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r12, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r12, #0, #16\n\t" +#endif + "mul r12, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r12, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r12, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r7, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r7, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "lsr r12, r12, #16\n\t" + "mla r7, r10, lr, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r7, r7, #0xff\n\t" + "bic r7, r7, #0xff00\n\t" + "ror r7, r7, #16\n\t" + "orr r7, r7, r12, lsl #16\n\t" + "ror r7, r7, #16\n\t" +#else + "bfi r7, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "ssub16 r12, r4, r8\n\t" + "sadd16 r4, r4, r8\n\t" + "smulbt r8, r11, r12\n\t" + "smulbb r12, r11, r12\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r8\n\t" + "smlabb r8, r10, lr, r8\n\t" + "pkhtb r8, r8, r12, ASR #16\n\t" +#else + "sub lr, r4, r8\n\t" + "add r10, r4, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r8, r8, #0xff\n\t" + "bic r8, r8, #0xff00\n\t" +#else + "bfc r8, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r4, r4, #0xff\n\t" + "bic r4, r4, #0xff00\n\t" +#else + "bfc r4, #0, #16\n\t" +#endif + "sub r12, r4, r8\n\t" + "add r4, r4, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r12, r12, #0xff\n\t" + "bic r12, r12, #0xff00\n\t" + "ror r12, r12, #16\n\t" + "orr r12, r12, lr, lsl #16\n\t" + "ror r12, r12, #16\n\t" +#else + "bfi r12, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r4, r4, #0xff\n\t" + "bic r4, r4, #0xff00\n\t" + "ror r4, r4, #16\n\t" + "orr r4, r4, r10, lsl #16\n\t" + "ror r4, r4, #16\n\t" +#else + "bfi r4, r10, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r11, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r11, #0, #16\n\t" +#endif + "asr r10, r12, #16\n\t" + "mul r8, lr, r10\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r12, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r12, #0, #16\n\t" +#endif + "mul r12, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r12, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r12, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r8, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r8, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "lsr r12, r12, #16\n\t" + "mla r8, r10, lr, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r8, r8, #0xff\n\t" + "bic r8, r8, #0xff00\n\t" + "ror r8, r8, #16\n\t" + "orr r8, r8, r12, lsl #16\n\t" + "ror r8, r8, #16\n\t" +#else + "bfi r8, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "ssub16 r12, r5, r9\n\t" + "sadd16 r5, r5, r9\n\t" + "smulbt r9, r11, r12\n\t" + "smulbb r12, r11, r12\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r9\n\t" + "smlabb r9, r10, lr, r9\n\t" + "pkhtb r9, r9, r12, ASR #16\n\t" +#else + "sub lr, r5, r9\n\t" + "add r10, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r9, r9, #0xff\n\t" + "bic r9, r9, #0xff00\n\t" +#else + "bfc r9, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r5, r5, #0xff\n\t" + "bic r5, r5, #0xff00\n\t" +#else + "bfc r5, #0, #16\n\t" +#endif + "sub r12, r5, r9\n\t" + "add r5, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r12, r12, #0xff\n\t" + "bic r12, r12, #0xff00\n\t" + "ror r12, r12, #16\n\t" + "orr r12, r12, lr, lsl #16\n\t" + "ror r12, r12, #16\n\t" +#else + "bfi r12, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r5, r5, #0xff\n\t" + "bic r5, r5, #0xff00\n\t" + "ror r5, r5, #16\n\t" + "orr r5, r5, r10, lsl #16\n\t" + "ror r5, r5, #16\n\t" +#else + "bfi r5, r10, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r11, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r11, #0, #16\n\t" +#endif + "asr r10, r12, #16\n\t" + "mul r9, lr, r10\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r12, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r12, #0, #16\n\t" +#endif + "mul r12, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r12, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r12, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r9, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r9, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "lsr r12, r12, #16\n\t" + "mla r9, r10, lr, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r9, r9, #0xff\n\t" + "bic r9, r9, #0xff00\n\t" + "ror r9, r9, #16\n\t" + "orr r9, r9, r12, lsl #16\n\t" + "ror r9, r9, #16\n\t" +#else + "bfi r9, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r11, #0xaf\n\t" + "lsl r11, r11, #8\n\t" + "add r11, r11, #0xc0\n\t" +#else + "mov r11, #0xafc0\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r11, r11, #0x130000\n\t" +#else + "movt r11, #0x13\n\t" +#endif +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r11, #0x4e\n\t" + "lsl r11, r11, #8\n\t" + "add r11, r11, #0xbf\n\t" +#else + "mov r11, #0x4ebf\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smulwb r12, r11, r2\n\t" + "smulwt lr, r11, r2\n\t" + "smulbt r12, r10, r12\n\t" + "smulbt lr, r10, lr\n\t" + "pkhbt r12, r12, lr, LSL #16\n\t" + "ssub16 r2, r2, r12\n\t" +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r2, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r2, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "asr lr, r2, #16\n\t" +#else + "sbfx lr, r2, #16, #16\n\t" +#endif + "mul r12, r11, r12\n\t" + "mul lr, r11, lr\n\t" + "asr r12, r12, #26\n\t" + "asr lr, lr, #26\n\t" + "mul r12, r10, r12\n\t" + "mul lr, r10, lr\n\t" + "sub lr, r2, lr, lsl #16\n\t" + "sub r2, r2, r12\n\t" + "lsr lr, lr, #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r2, r2, #0xff0000\n\t" + "bic r2, r2, #0xff000000\n\t" + "orr r2, r2, lr, lsl #16\n\t" +#else + "bfi r2, lr, #16, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smulwb r12, r11, r3\n\t" + "smulwt lr, r11, r3\n\t" + "smulbt r12, r10, r12\n\t" + "smulbt lr, r10, lr\n\t" + "pkhbt r12, r12, lr, LSL #16\n\t" + "ssub16 r3, r3, r12\n\t" +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r3, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r3, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "asr lr, r3, #16\n\t" +#else + "sbfx lr, r3, #16, #16\n\t" +#endif + "mul r12, r11, r12\n\t" + "mul lr, r11, lr\n\t" + "asr r12, r12, #26\n\t" + "asr lr, lr, #26\n\t" + "mul r12, r10, r12\n\t" + "mul lr, r10, lr\n\t" + "sub lr, r3, lr, lsl #16\n\t" + "sub r3, r3, r12\n\t" + "lsr lr, lr, #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r3, r3, #0xff0000\n\t" + "bic r3, r3, #0xff000000\n\t" + "orr r3, r3, lr, lsl #16\n\t" +#else + "bfi r3, lr, #16, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smulwb r12, r11, r4\n\t" + "smulwt lr, r11, r4\n\t" + "smulbt r12, r10, r12\n\t" + "smulbt lr, r10, lr\n\t" + "pkhbt r12, r12, lr, LSL #16\n\t" + "ssub16 r4, r4, r12\n\t" +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r4, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r4, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "asr lr, r4, #16\n\t" +#else + "sbfx lr, r4, #16, #16\n\t" +#endif + "mul r12, r11, r12\n\t" + "mul lr, r11, lr\n\t" + "asr r12, r12, #26\n\t" + "asr lr, lr, #26\n\t" + "mul r12, r10, r12\n\t" + "mul lr, r10, lr\n\t" + "sub lr, r4, lr, lsl #16\n\t" + "sub r4, r4, r12\n\t" + "lsr lr, lr, #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r4, r4, #0xff0000\n\t" + "bic r4, r4, #0xff000000\n\t" + "orr r4, r4, lr, lsl #16\n\t" +#else + "bfi r4, lr, #16, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smulwb r12, r11, r5\n\t" + "smulwt lr, r11, r5\n\t" + "smulbt r12, r10, r12\n\t" + "smulbt lr, r10, lr\n\t" + "pkhbt r12, r12, lr, LSL #16\n\t" + "ssub16 r5, r5, r12\n\t" +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r5, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r5, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "asr lr, r5, #16\n\t" +#else + "sbfx lr, r5, #16, #16\n\t" +#endif + "mul r12, r11, r12\n\t" + "mul lr, r11, lr\n\t" + "asr r12, r12, #26\n\t" + "asr lr, lr, #26\n\t" + "mul r12, r10, r12\n\t" + "mul lr, r10, lr\n\t" + "sub lr, r5, lr, lsl #16\n\t" + "sub r5, r5, r12\n\t" + "lsr lr, lr, #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r5, r5, #0xff0000\n\t" + "bic r5, r5, #0xff000000\n\t" + "orr r5, r5, lr, lsl #16\n\t" +#else + "bfi r5, lr, #16, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ + "str r2, [%[r]]\n\t" + "str r3, [%[r], #4]\n\t" + "str r4, [%[r], #8]\n\t" + "str r5, [%[r], #12]\n\t" + "str r6, [%[r], #16]\n\t" + "str r7, [%[r], #20]\n\t" + "str r8, [%[r], #24]\n\t" + "str r9, [%[r], #28]\n\t" + "ldr r3, [sp, #4]\n\t" + "add r3, r3, #16\n\t" + "rsbs r12, r3, #0x100\n\t" + "add %[r], %[r], #32\n\t" + "bne L_kyber_arm32_invntt_loop_765_%=\n\t" + "sub %[r], %[r], #0x200\n\t" + "mov r3, #0\n\t" + "\n" + "L_kyber_arm32_invntt_loop_4_j_%=: \n\t" + "str r3, [sp, #4]\n\t" + "add r11, r1, r3, lsr #4\n\t" + "mov r2, #4\n\t" + "ldr r11, [r11, #224]\n\t" + "\n" + "L_kyber_arm32_invntt_loop_4_i_%=: \n\t" + "str r2, [sp]\n\t" + "ldr r2, [%[r]]\n\t" + "ldr r3, [%[r], #16]\n\t" + "ldr r4, [%[r], #32]\n\t" + "ldr r5, [%[r], #48]\n\t" + "ldr r6, [%[r], #64]\n\t" + "ldr r7, [%[r], #80]\n\t" + "ldr r8, [%[r], #96]\n\t" + "ldr r9, [%[r], #112]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "ssub16 r12, r2, r4\n\t" + "sadd16 r2, r2, r4\n\t" + "smulbt r4, r11, r12\n\t" + "smulbb r12, r11, r12\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r4\n\t" + "smlabb r4, r10, lr, r4\n\t" + "pkhtb r4, r4, r12, ASR #16\n\t" +#else + "sub lr, r2, r4\n\t" + "add r10, r2, r4\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r4, r4, #0xff\n\t" + "bic r4, r4, #0xff00\n\t" +#else + "bfc r4, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r2, r2, #0xff\n\t" + "bic r2, r2, #0xff00\n\t" +#else + "bfc r2, #0, #16\n\t" +#endif + "sub r12, r2, r4\n\t" + "add r2, r2, r4\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r12, r12, #0xff\n\t" + "bic r12, r12, #0xff00\n\t" + "ror r12, r12, #16\n\t" + "orr r12, r12, lr, lsl #16\n\t" + "ror r12, r12, #16\n\t" +#else + "bfi r12, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r2, r2, #0xff\n\t" + "bic r2, r2, #0xff00\n\t" + "ror r2, r2, #16\n\t" + "orr r2, r2, r10, lsl #16\n\t" + "ror r2, r2, #16\n\t" +#else + "bfi r2, r10, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r11, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r11, #0, #16\n\t" +#endif + "asr r10, r12, #16\n\t" + "mul r4, lr, r10\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r12, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r12, #0, #16\n\t" +#endif + "mul r12, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r12, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r12, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r4, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r4, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "lsr r12, r12, #16\n\t" + "mla r4, r10, lr, r4\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r4, r4, #0xff\n\t" + "bic r4, r4, #0xff00\n\t" + "ror r4, r4, #16\n\t" + "orr r4, r4, r12, lsl #16\n\t" + "ror r4, r4, #16\n\t" +#else + "bfi r4, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "ssub16 r12, r3, r5\n\t" + "sadd16 r3, r3, r5\n\t" + "smulbt r5, r11, r12\n\t" + "smulbb r12, r11, r12\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r5\n\t" + "smlabb r5, r10, lr, r5\n\t" + "pkhtb r5, r5, r12, ASR #16\n\t" +#else + "sub lr, r3, r5\n\t" + "add r10, r3, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r5, r5, #0xff\n\t" + "bic r5, r5, #0xff00\n\t" +#else + "bfc r5, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r3, r3, #0xff\n\t" + "bic r3, r3, #0xff00\n\t" +#else + "bfc r3, #0, #16\n\t" +#endif + "sub r12, r3, r5\n\t" + "add r3, r3, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r12, r12, #0xff\n\t" + "bic r12, r12, #0xff00\n\t" + "ror r12, r12, #16\n\t" + "orr r12, r12, lr, lsl #16\n\t" + "ror r12, r12, #16\n\t" +#else + "bfi r12, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r3, r3, #0xff\n\t" + "bic r3, r3, #0xff00\n\t" + "ror r3, r3, #16\n\t" + "orr r3, r3, r10, lsl #16\n\t" + "ror r3, r3, #16\n\t" +#else + "bfi r3, r10, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r11, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r11, #0, #16\n\t" +#endif + "asr r10, r12, #16\n\t" + "mul r5, lr, r10\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r12, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r12, #0, #16\n\t" +#endif + "mul r12, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r12, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r12, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r5, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r5, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "lsr r12, r12, #16\n\t" + "mla r5, r10, lr, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r5, r5, #0xff\n\t" + "bic r5, r5, #0xff00\n\t" + "ror r5, r5, #16\n\t" + "orr r5, r5, r12, lsl #16\n\t" + "ror r5, r5, #16\n\t" +#else + "bfi r5, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "ssub16 r12, r6, r8\n\t" + "sadd16 r6, r6, r8\n\t" + "smultt r8, r11, r12\n\t" + "smultb r12, r11, r12\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r8\n\t" + "smlabb r8, r10, lr, r8\n\t" + "pkhtb r8, r8, r12, ASR #16\n\t" +#else + "sub lr, r6, r8\n\t" + "add r10, r6, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r8, r8, #0xff\n\t" + "bic r8, r8, #0xff00\n\t" +#else + "bfc r8, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r6, r6, #0xff\n\t" + "bic r6, r6, #0xff00\n\t" +#else + "bfc r6, #0, #16\n\t" +#endif + "sub r12, r6, r8\n\t" + "add r6, r6, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r12, r12, #0xff\n\t" + "bic r12, r12, #0xff00\n\t" + "ror r12, r12, #16\n\t" + "orr r12, r12, lr, lsl #16\n\t" + "ror r12, r12, #16\n\t" +#else + "bfi r12, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r6, r6, #0xff\n\t" + "bic r6, r6, #0xff00\n\t" + "ror r6, r6, #16\n\t" + "orr r6, r6, r10, lsl #16\n\t" + "ror r6, r6, #16\n\t" +#else + "bfi r6, r10, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "asr lr, r11, #16\n\t" +#else + "sbfx lr, r11, #16, #16\n\t" +#endif + "asr r10, r12, #16\n\t" + "mul r8, lr, r10\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r12, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r12, #0, #16\n\t" +#endif + "mul r12, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r12, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r12, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r8, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r8, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "lsr r12, r12, #16\n\t" + "mla r8, r10, lr, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r8, r8, #0xff\n\t" + "bic r8, r8, #0xff00\n\t" + "ror r8, r8, #16\n\t" + "orr r8, r8, r12, lsl #16\n\t" + "ror r8, r8, #16\n\t" +#else + "bfi r8, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "ssub16 r12, r7, r9\n\t" + "sadd16 r7, r7, r9\n\t" + "smultt r9, r11, r12\n\t" + "smultb r12, r11, r12\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r9\n\t" + "smlabb r9, r10, lr, r9\n\t" + "pkhtb r9, r9, r12, ASR #16\n\t" +#else + "sub lr, r7, r9\n\t" + "add r10, r7, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r9, r9, #0xff\n\t" + "bic r9, r9, #0xff00\n\t" +#else + "bfc r9, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r7, r7, #0xff\n\t" + "bic r7, r7, #0xff00\n\t" +#else + "bfc r7, #0, #16\n\t" +#endif + "sub r12, r7, r9\n\t" + "add r7, r7, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r12, r12, #0xff\n\t" + "bic r12, r12, #0xff00\n\t" + "ror r12, r12, #16\n\t" + "orr r12, r12, lr, lsl #16\n\t" + "ror r12, r12, #16\n\t" +#else + "bfi r12, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r7, r7, #0xff\n\t" + "bic r7, r7, #0xff00\n\t" + "ror r7, r7, #16\n\t" + "orr r7, r7, r10, lsl #16\n\t" + "ror r7, r7, #16\n\t" +#else + "bfi r7, r10, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "asr lr, r11, #16\n\t" +#else + "sbfx lr, r11, #16, #16\n\t" +#endif + "asr r10, r12, #16\n\t" + "mul r9, lr, r10\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r12, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r12, #0, #16\n\t" +#endif + "mul r12, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r12, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r12, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r9, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r9, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "lsr r12, r12, #16\n\t" + "mla r9, r10, lr, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r9, r9, #0xff\n\t" + "bic r9, r9, #0xff00\n\t" + "ror r9, r9, #16\n\t" + "orr r9, r9, r12, lsl #16\n\t" + "ror r9, r9, #16\n\t" +#else + "bfi r9, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ + "str r2, [%[r]]\n\t" + "str r3, [%[r], #16]\n\t" + "str r4, [%[r], #32]\n\t" + "str r5, [%[r], #48]\n\t" + "str r6, [%[r], #64]\n\t" + "str r7, [%[r], #80]\n\t" + "str r8, [%[r], #96]\n\t" + "str r9, [%[r], #112]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r2, [sp]\n\t" + "ldr r3, [sp, #4]\n\t" +#else + "ldrd r2, r3, [sp]\n\t" +#endif + "subs r2, r2, #1\n\t" + "add %[r], %[r], #4\n\t" + "bne L_kyber_arm32_invntt_loop_4_i_%=\n\t" + "add r3, r3, #0x40\n\t" + "rsbs r12, r3, #0x100\n\t" + "add %[r], %[r], #0x70\n\t" + "bne L_kyber_arm32_invntt_loop_4_j_%=\n\t" + "sub %[r], %[r], #0x200\n\t" + "mov r2, #16\n\t" + "\n" + "L_kyber_arm32_invntt_loop_321_%=: \n\t" + "str r2, [sp]\n\t" + "ldrh r11, [r1, #2]\n\t" + "ldr r2, [%[r]]\n\t" + "ldr r3, [%[r], #64]\n\t" + "ldr r4, [%[r], #128]\n\t" + "ldr r5, [%[r], #192]\n\t" + "ldr r6, [%[r], #256]\n\t" + "ldr r7, [%[r], #320]\n\t" + "ldr r8, [%[r], #384]\n\t" + "ldr r9, [%[r], #448]\n\t" + "ldr r11, [r1, #240]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "ssub16 r12, r2, r3\n\t" + "sadd16 r2, r2, r3\n\t" + "smulbt r3, r11, r12\n\t" + "smulbb r12, r11, r12\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r3\n\t" + "smlabb r3, r10, lr, r3\n\t" + "pkhtb r3, r3, r12, ASR #16\n\t" +#else + "sub lr, r2, r3\n\t" + "add r10, r2, r3\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r3, r3, #0xff\n\t" + "bic r3, r3, #0xff00\n\t" +#else + "bfc r3, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r2, r2, #0xff\n\t" + "bic r2, r2, #0xff00\n\t" +#else + "bfc r2, #0, #16\n\t" +#endif + "sub r12, r2, r3\n\t" + "add r2, r2, r3\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r12, r12, #0xff\n\t" + "bic r12, r12, #0xff00\n\t" + "ror r12, r12, #16\n\t" + "orr r12, r12, lr, lsl #16\n\t" + "ror r12, r12, #16\n\t" +#else + "bfi r12, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r2, r2, #0xff\n\t" + "bic r2, r2, #0xff00\n\t" + "ror r2, r2, #16\n\t" + "orr r2, r2, r10, lsl #16\n\t" + "ror r2, r2, #16\n\t" +#else + "bfi r2, r10, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r11, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r11, #0, #16\n\t" +#endif + "asr r10, r12, #16\n\t" + "mul r3, lr, r10\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r12, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r12, #0, #16\n\t" +#endif + "mul r12, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r12, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r12, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r3, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r3, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "lsr r12, r12, #16\n\t" + "mla r3, r10, lr, r3\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r3, r3, #0xff\n\t" + "bic r3, r3, #0xff00\n\t" + "ror r3, r3, #16\n\t" + "orr r3, r3, r12, lsl #16\n\t" + "ror r3, r3, #16\n\t" +#else + "bfi r3, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "ssub16 r12, r4, r5\n\t" + "sadd16 r4, r4, r5\n\t" + "smultt r5, r11, r12\n\t" + "smultb r12, r11, r12\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r5\n\t" + "smlabb r5, r10, lr, r5\n\t" + "pkhtb r5, r5, r12, ASR #16\n\t" +#else + "sub lr, r4, r5\n\t" + "add r10, r4, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r5, r5, #0xff\n\t" + "bic r5, r5, #0xff00\n\t" +#else + "bfc r5, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r4, r4, #0xff\n\t" + "bic r4, r4, #0xff00\n\t" +#else + "bfc r4, #0, #16\n\t" +#endif + "sub r12, r4, r5\n\t" + "add r4, r4, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r12, r12, #0xff\n\t" + "bic r12, r12, #0xff00\n\t" + "ror r12, r12, #16\n\t" + "orr r12, r12, lr, lsl #16\n\t" + "ror r12, r12, #16\n\t" +#else + "bfi r12, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r4, r4, #0xff\n\t" + "bic r4, r4, #0xff00\n\t" + "ror r4, r4, #16\n\t" + "orr r4, r4, r10, lsl #16\n\t" + "ror r4, r4, #16\n\t" +#else + "bfi r4, r10, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "asr lr, r11, #16\n\t" +#else + "sbfx lr, r11, #16, #16\n\t" +#endif + "asr r10, r12, #16\n\t" + "mul r5, lr, r10\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r12, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r12, #0, #16\n\t" +#endif + "mul r12, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r12, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r12, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r5, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r5, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "lsr r12, r12, #16\n\t" + "mla r5, r10, lr, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r5, r5, #0xff\n\t" + "bic r5, r5, #0xff00\n\t" + "ror r5, r5, #16\n\t" + "orr r5, r5, r12, lsl #16\n\t" + "ror r5, r5, #16\n\t" +#else + "bfi r5, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ + "ldr r11, [r1, #244]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "ssub16 r12, r6, r7\n\t" + "sadd16 r6, r6, r7\n\t" + "smulbt r7, r11, r12\n\t" + "smulbb r12, r11, r12\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r7\n\t" + "smlabb r7, r10, lr, r7\n\t" + "pkhtb r7, r7, r12, ASR #16\n\t" +#else + "sub lr, r6, r7\n\t" + "add r10, r6, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r7, r7, #0xff\n\t" + "bic r7, r7, #0xff00\n\t" +#else + "bfc r7, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r6, r6, #0xff\n\t" + "bic r6, r6, #0xff00\n\t" +#else + "bfc r6, #0, #16\n\t" +#endif + "sub r12, r6, r7\n\t" + "add r6, r6, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r12, r12, #0xff\n\t" + "bic r12, r12, #0xff00\n\t" + "ror r12, r12, #16\n\t" + "orr r12, r12, lr, lsl #16\n\t" + "ror r12, r12, #16\n\t" +#else + "bfi r12, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r6, r6, #0xff\n\t" + "bic r6, r6, #0xff00\n\t" + "ror r6, r6, #16\n\t" + "orr r6, r6, r10, lsl #16\n\t" + "ror r6, r6, #16\n\t" +#else + "bfi r6, r10, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r11, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r11, #0, #16\n\t" +#endif + "asr r10, r12, #16\n\t" + "mul r7, lr, r10\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r12, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r12, #0, #16\n\t" +#endif + "mul r12, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r12, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r12, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r7, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r7, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "lsr r12, r12, #16\n\t" + "mla r7, r10, lr, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r7, r7, #0xff\n\t" + "bic r7, r7, #0xff00\n\t" + "ror r7, r7, #16\n\t" + "orr r7, r7, r12, lsl #16\n\t" + "ror r7, r7, #16\n\t" +#else + "bfi r7, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "ssub16 r12, r8, r9\n\t" + "sadd16 r8, r8, r9\n\t" + "smultt r9, r11, r12\n\t" + "smultb r12, r11, r12\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r9\n\t" + "smlabb r9, r10, lr, r9\n\t" + "pkhtb r9, r9, r12, ASR #16\n\t" +#else + "sub lr, r8, r9\n\t" + "add r10, r8, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r9, r9, #0xff\n\t" + "bic r9, r9, #0xff00\n\t" +#else + "bfc r9, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r8, r8, #0xff\n\t" + "bic r8, r8, #0xff00\n\t" +#else + "bfc r8, #0, #16\n\t" +#endif + "sub r12, r8, r9\n\t" + "add r8, r8, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r12, r12, #0xff\n\t" + "bic r12, r12, #0xff00\n\t" + "ror r12, r12, #16\n\t" + "orr r12, r12, lr, lsl #16\n\t" + "ror r12, r12, #16\n\t" +#else + "bfi r12, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r8, r8, #0xff\n\t" + "bic r8, r8, #0xff00\n\t" + "ror r8, r8, #16\n\t" + "orr r8, r8, r10, lsl #16\n\t" + "ror r8, r8, #16\n\t" +#else + "bfi r8, r10, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "asr lr, r11, #16\n\t" +#else + "sbfx lr, r11, #16, #16\n\t" +#endif + "asr r10, r12, #16\n\t" + "mul r9, lr, r10\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r12, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r12, #0, #16\n\t" +#endif + "mul r12, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r12, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r12, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r9, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r9, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "lsr r12, r12, #16\n\t" + "mla r9, r10, lr, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r9, r9, #0xff\n\t" + "bic r9, r9, #0xff00\n\t" + "ror r9, r9, #16\n\t" + "orr r9, r9, r12, lsl #16\n\t" + "ror r9, r9, #16\n\t" +#else + "bfi r9, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ + "ldr r11, [r1, #248]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "ssub16 r12, r2, r4\n\t" + "sadd16 r2, r2, r4\n\t" + "smulbt r4, r11, r12\n\t" + "smulbb r12, r11, r12\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r4\n\t" + "smlabb r4, r10, lr, r4\n\t" + "pkhtb r4, r4, r12, ASR #16\n\t" +#else + "sub lr, r2, r4\n\t" + "add r10, r2, r4\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r4, r4, #0xff\n\t" + "bic r4, r4, #0xff00\n\t" +#else + "bfc r4, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r2, r2, #0xff\n\t" + "bic r2, r2, #0xff00\n\t" +#else + "bfc r2, #0, #16\n\t" +#endif + "sub r12, r2, r4\n\t" + "add r2, r2, r4\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r12, r12, #0xff\n\t" + "bic r12, r12, #0xff00\n\t" + "ror r12, r12, #16\n\t" + "orr r12, r12, lr, lsl #16\n\t" + "ror r12, r12, #16\n\t" +#else + "bfi r12, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r2, r2, #0xff\n\t" + "bic r2, r2, #0xff00\n\t" + "ror r2, r2, #16\n\t" + "orr r2, r2, r10, lsl #16\n\t" + "ror r2, r2, #16\n\t" +#else + "bfi r2, r10, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r11, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r11, #0, #16\n\t" +#endif + "asr r10, r12, #16\n\t" + "mul r4, lr, r10\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r12, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r12, #0, #16\n\t" +#endif + "mul r12, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r12, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r12, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r4, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r4, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "lsr r12, r12, #16\n\t" + "mla r4, r10, lr, r4\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r4, r4, #0xff\n\t" + "bic r4, r4, #0xff00\n\t" + "ror r4, r4, #16\n\t" + "orr r4, r4, r12, lsl #16\n\t" + "ror r4, r4, #16\n\t" +#else + "bfi r4, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "ssub16 r12, r3, r5\n\t" + "sadd16 r3, r3, r5\n\t" + "smulbt r5, r11, r12\n\t" + "smulbb r12, r11, r12\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r5\n\t" + "smlabb r5, r10, lr, r5\n\t" + "pkhtb r5, r5, r12, ASR #16\n\t" +#else + "sub lr, r3, r5\n\t" + "add r10, r3, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r5, r5, #0xff\n\t" + "bic r5, r5, #0xff00\n\t" +#else + "bfc r5, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r3, r3, #0xff\n\t" + "bic r3, r3, #0xff00\n\t" +#else + "bfc r3, #0, #16\n\t" +#endif + "sub r12, r3, r5\n\t" + "add r3, r3, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r12, r12, #0xff\n\t" + "bic r12, r12, #0xff00\n\t" + "ror r12, r12, #16\n\t" + "orr r12, r12, lr, lsl #16\n\t" + "ror r12, r12, #16\n\t" +#else + "bfi r12, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r3, r3, #0xff\n\t" + "bic r3, r3, #0xff00\n\t" + "ror r3, r3, #16\n\t" + "orr r3, r3, r10, lsl #16\n\t" + "ror r3, r3, #16\n\t" +#else + "bfi r3, r10, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r11, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r11, #0, #16\n\t" +#endif + "asr r10, r12, #16\n\t" + "mul r5, lr, r10\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r12, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r12, #0, #16\n\t" +#endif + "mul r12, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r12, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r12, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r5, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r5, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "lsr r12, r12, #16\n\t" + "mla r5, r10, lr, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r5, r5, #0xff\n\t" + "bic r5, r5, #0xff00\n\t" + "ror r5, r5, #16\n\t" + "orr r5, r5, r12, lsl #16\n\t" + "ror r5, r5, #16\n\t" +#else + "bfi r5, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "ssub16 r12, r6, r8\n\t" + "sadd16 r6, r6, r8\n\t" + "smultt r8, r11, r12\n\t" + "smultb r12, r11, r12\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r8\n\t" + "smlabb r8, r10, lr, r8\n\t" + "pkhtb r8, r8, r12, ASR #16\n\t" +#else + "sub lr, r6, r8\n\t" + "add r10, r6, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r8, r8, #0xff\n\t" + "bic r8, r8, #0xff00\n\t" +#else + "bfc r8, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r6, r6, #0xff\n\t" + "bic r6, r6, #0xff00\n\t" +#else + "bfc r6, #0, #16\n\t" +#endif + "sub r12, r6, r8\n\t" + "add r6, r6, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r12, r12, #0xff\n\t" + "bic r12, r12, #0xff00\n\t" + "ror r12, r12, #16\n\t" + "orr r12, r12, lr, lsl #16\n\t" + "ror r12, r12, #16\n\t" +#else + "bfi r12, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r6, r6, #0xff\n\t" + "bic r6, r6, #0xff00\n\t" + "ror r6, r6, #16\n\t" + "orr r6, r6, r10, lsl #16\n\t" + "ror r6, r6, #16\n\t" +#else + "bfi r6, r10, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "asr lr, r11, #16\n\t" +#else + "sbfx lr, r11, #16, #16\n\t" +#endif + "asr r10, r12, #16\n\t" + "mul r8, lr, r10\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r12, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r12, #0, #16\n\t" +#endif + "mul r12, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r12, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r12, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r8, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r8, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "lsr r12, r12, #16\n\t" + "mla r8, r10, lr, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r8, r8, #0xff\n\t" + "bic r8, r8, #0xff00\n\t" + "ror r8, r8, #16\n\t" + "orr r8, r8, r12, lsl #16\n\t" + "ror r8, r8, #16\n\t" +#else + "bfi r8, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "ssub16 r12, r7, r9\n\t" + "sadd16 r7, r7, r9\n\t" + "smultt r9, r11, r12\n\t" + "smultb r12, r11, r12\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r9\n\t" + "smlabb r9, r10, lr, r9\n\t" + "pkhtb r9, r9, r12, ASR #16\n\t" +#else + "sub lr, r7, r9\n\t" + "add r10, r7, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r9, r9, #0xff\n\t" + "bic r9, r9, #0xff00\n\t" +#else + "bfc r9, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r7, r7, #0xff\n\t" + "bic r7, r7, #0xff00\n\t" +#else + "bfc r7, #0, #16\n\t" +#endif + "sub r12, r7, r9\n\t" + "add r7, r7, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r12, r12, #0xff\n\t" + "bic r12, r12, #0xff00\n\t" + "ror r12, r12, #16\n\t" + "orr r12, r12, lr, lsl #16\n\t" + "ror r12, r12, #16\n\t" +#else + "bfi r12, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r7, r7, #0xff\n\t" + "bic r7, r7, #0xff00\n\t" + "ror r7, r7, #16\n\t" + "orr r7, r7, r10, lsl #16\n\t" + "ror r7, r7, #16\n\t" +#else + "bfi r7, r10, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "asr lr, r11, #16\n\t" +#else + "sbfx lr, r11, #16, #16\n\t" +#endif + "asr r10, r12, #16\n\t" + "mul r9, lr, r10\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r12, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r12, #0, #16\n\t" +#endif + "mul r12, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r12, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r12, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r9, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r9, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "lsr r12, r12, #16\n\t" + "mla r9, r10, lr, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r9, r9, #0xff\n\t" + "bic r9, r9, #0xff00\n\t" + "ror r9, r9, #16\n\t" + "orr r9, r9, r12, lsl #16\n\t" + "ror r9, r9, #16\n\t" +#else + "bfi r9, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r11, #0xaf\n\t" + "lsl r11, r11, #8\n\t" + "add r11, r11, #0xc0\n\t" +#else + "mov r11, #0xafc0\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r11, r11, #0x130000\n\t" +#else + "movt r11, #0x13\n\t" +#endif +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r11, #0x4e\n\t" + "lsl r11, r11, #8\n\t" + "add r11, r11, #0xbf\n\t" +#else + "mov r11, #0x4ebf\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smulwb r12, r11, r2\n\t" + "smulwt lr, r11, r2\n\t" + "smulbt r12, r10, r12\n\t" + "smulbt lr, r10, lr\n\t" + "pkhbt r12, r12, lr, LSL #16\n\t" + "ssub16 r2, r2, r12\n\t" +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r2, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r2, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "asr lr, r2, #16\n\t" +#else + "sbfx lr, r2, #16, #16\n\t" +#endif + "mul r12, r11, r12\n\t" + "mul lr, r11, lr\n\t" + "asr r12, r12, #26\n\t" + "asr lr, lr, #26\n\t" + "mul r12, r10, r12\n\t" + "mul lr, r10, lr\n\t" + "sub lr, r2, lr, lsl #16\n\t" + "sub r2, r2, r12\n\t" + "lsr lr, lr, #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r2, r2, #0xff0000\n\t" + "bic r2, r2, #0xff000000\n\t" + "orr r2, r2, lr, lsl #16\n\t" +#else + "bfi r2, lr, #16, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smulwb r12, r11, r3\n\t" + "smulwt lr, r11, r3\n\t" + "smulbt r12, r10, r12\n\t" + "smulbt lr, r10, lr\n\t" + "pkhbt r12, r12, lr, LSL #16\n\t" + "ssub16 r3, r3, r12\n\t" +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r3, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r3, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "asr lr, r3, #16\n\t" +#else + "sbfx lr, r3, #16, #16\n\t" +#endif + "mul r12, r11, r12\n\t" + "mul lr, r11, lr\n\t" + "asr r12, r12, #26\n\t" + "asr lr, lr, #26\n\t" + "mul r12, r10, r12\n\t" + "mul lr, r10, lr\n\t" + "sub lr, r3, lr, lsl #16\n\t" + "sub r3, r3, r12\n\t" + "lsr lr, lr, #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r3, r3, #0xff0000\n\t" + "bic r3, r3, #0xff000000\n\t" + "orr r3, r3, lr, lsl #16\n\t" +#else + "bfi r3, lr, #16, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smulwb r12, r11, r4\n\t" + "smulwt lr, r11, r4\n\t" + "smulbt r12, r10, r12\n\t" + "smulbt lr, r10, lr\n\t" + "pkhbt r12, r12, lr, LSL #16\n\t" + "ssub16 r4, r4, r12\n\t" +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r4, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r4, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "asr lr, r4, #16\n\t" +#else + "sbfx lr, r4, #16, #16\n\t" +#endif + "mul r12, r11, r12\n\t" + "mul lr, r11, lr\n\t" + "asr r12, r12, #26\n\t" + "asr lr, lr, #26\n\t" + "mul r12, r10, r12\n\t" + "mul lr, r10, lr\n\t" + "sub lr, r4, lr, lsl #16\n\t" + "sub r4, r4, r12\n\t" + "lsr lr, lr, #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r4, r4, #0xff0000\n\t" + "bic r4, r4, #0xff000000\n\t" + "orr r4, r4, lr, lsl #16\n\t" +#else + "bfi r4, lr, #16, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smulwb r12, r11, r5\n\t" + "smulwt lr, r11, r5\n\t" + "smulbt r12, r10, r12\n\t" + "smulbt lr, r10, lr\n\t" + "pkhbt r12, r12, lr, LSL #16\n\t" + "ssub16 r5, r5, r12\n\t" +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r5, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r5, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "asr lr, r5, #16\n\t" +#else + "sbfx lr, r5, #16, #16\n\t" +#endif + "mul r12, r11, r12\n\t" + "mul lr, r11, lr\n\t" + "asr r12, r12, #26\n\t" + "asr lr, lr, #26\n\t" + "mul r12, r10, r12\n\t" + "mul lr, r10, lr\n\t" + "sub lr, r5, lr, lsl #16\n\t" + "sub r5, r5, r12\n\t" + "lsr lr, lr, #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r5, r5, #0xff0000\n\t" + "bic r5, r5, #0xff000000\n\t" + "orr r5, r5, lr, lsl #16\n\t" +#else + "bfi r5, lr, #16, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ + "ldr r11, [r1, #252]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "ssub16 r12, r2, r6\n\t" + "sadd16 r2, r2, r6\n\t" + "smulbt r6, r11, r12\n\t" + "smulbb r12, r11, r12\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r6\n\t" + "smlabb r6, r10, lr, r6\n\t" + "pkhtb r6, r6, r12, ASR #16\n\t" +#else + "sub lr, r2, r6\n\t" + "add r10, r2, r6\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r6, r6, #0xff\n\t" + "bic r6, r6, #0xff00\n\t" +#else + "bfc r6, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r2, r2, #0xff\n\t" + "bic r2, r2, #0xff00\n\t" +#else + "bfc r2, #0, #16\n\t" +#endif + "sub r12, r2, r6\n\t" + "add r2, r2, r6\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r12, r12, #0xff\n\t" + "bic r12, r12, #0xff00\n\t" + "ror r12, r12, #16\n\t" + "orr r12, r12, lr, lsl #16\n\t" + "ror r12, r12, #16\n\t" +#else + "bfi r12, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r2, r2, #0xff\n\t" + "bic r2, r2, #0xff00\n\t" + "ror r2, r2, #16\n\t" + "orr r2, r2, r10, lsl #16\n\t" + "ror r2, r2, #16\n\t" +#else + "bfi r2, r10, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r11, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r11, #0, #16\n\t" +#endif + "asr r10, r12, #16\n\t" + "mul r6, lr, r10\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r12, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r12, #0, #16\n\t" +#endif + "mul r12, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r12, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r12, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r6, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r6, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "lsr r12, r12, #16\n\t" + "mla r6, r10, lr, r6\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r6, r6, #0xff\n\t" + "bic r6, r6, #0xff00\n\t" + "ror r6, r6, #16\n\t" + "orr r6, r6, r12, lsl #16\n\t" + "ror r6, r6, #16\n\t" +#else + "bfi r6, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "ssub16 r12, r3, r7\n\t" + "sadd16 r3, r3, r7\n\t" + "smulbt r7, r11, r12\n\t" + "smulbb r12, r11, r12\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r7\n\t" + "smlabb r7, r10, lr, r7\n\t" + "pkhtb r7, r7, r12, ASR #16\n\t" +#else + "sub lr, r3, r7\n\t" + "add r10, r3, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r7, r7, #0xff\n\t" + "bic r7, r7, #0xff00\n\t" +#else + "bfc r7, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r3, r3, #0xff\n\t" + "bic r3, r3, #0xff00\n\t" +#else + "bfc r3, #0, #16\n\t" +#endif + "sub r12, r3, r7\n\t" + "add r3, r3, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r12, r12, #0xff\n\t" + "bic r12, r12, #0xff00\n\t" + "ror r12, r12, #16\n\t" + "orr r12, r12, lr, lsl #16\n\t" + "ror r12, r12, #16\n\t" +#else + "bfi r12, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r3, r3, #0xff\n\t" + "bic r3, r3, #0xff00\n\t" + "ror r3, r3, #16\n\t" + "orr r3, r3, r10, lsl #16\n\t" + "ror r3, r3, #16\n\t" +#else + "bfi r3, r10, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r11, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r11, #0, #16\n\t" +#endif + "asr r10, r12, #16\n\t" + "mul r7, lr, r10\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r12, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r12, #0, #16\n\t" +#endif + "mul r12, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r12, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r12, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r7, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r7, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "lsr r12, r12, #16\n\t" + "mla r7, r10, lr, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r7, r7, #0xff\n\t" + "bic r7, r7, #0xff00\n\t" + "ror r7, r7, #16\n\t" + "orr r7, r7, r12, lsl #16\n\t" + "ror r7, r7, #16\n\t" +#else + "bfi r7, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "ssub16 r12, r4, r8\n\t" + "sadd16 r4, r4, r8\n\t" + "smulbt r8, r11, r12\n\t" + "smulbb r12, r11, r12\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r8\n\t" + "smlabb r8, r10, lr, r8\n\t" + "pkhtb r8, r8, r12, ASR #16\n\t" +#else + "sub lr, r4, r8\n\t" + "add r10, r4, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r8, r8, #0xff\n\t" + "bic r8, r8, #0xff00\n\t" +#else + "bfc r8, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r4, r4, #0xff\n\t" + "bic r4, r4, #0xff00\n\t" +#else + "bfc r4, #0, #16\n\t" +#endif + "sub r12, r4, r8\n\t" + "add r4, r4, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r12, r12, #0xff\n\t" + "bic r12, r12, #0xff00\n\t" + "ror r12, r12, #16\n\t" + "orr r12, r12, lr, lsl #16\n\t" + "ror r12, r12, #16\n\t" +#else + "bfi r12, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r4, r4, #0xff\n\t" + "bic r4, r4, #0xff00\n\t" + "ror r4, r4, #16\n\t" + "orr r4, r4, r10, lsl #16\n\t" + "ror r4, r4, #16\n\t" +#else + "bfi r4, r10, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r11, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r11, #0, #16\n\t" +#endif + "asr r10, r12, #16\n\t" + "mul r8, lr, r10\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r12, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r12, #0, #16\n\t" +#endif + "mul r12, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r12, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r12, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r8, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r8, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "lsr r12, r12, #16\n\t" + "mla r8, r10, lr, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r8, r8, #0xff\n\t" + "bic r8, r8, #0xff00\n\t" + "ror r8, r8, #16\n\t" + "orr r8, r8, r12, lsl #16\n\t" + "ror r8, r8, #16\n\t" +#else + "bfi r8, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "ssub16 r12, r5, r9\n\t" + "sadd16 r5, r5, r9\n\t" + "smulbt r9, r11, r12\n\t" + "smulbb r12, r11, r12\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r9\n\t" + "smlabb r9, r10, lr, r9\n\t" + "pkhtb r9, r9, r12, ASR #16\n\t" +#else + "sub lr, r5, r9\n\t" + "add r10, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r9, r9, #0xff\n\t" + "bic r9, r9, #0xff00\n\t" +#else + "bfc r9, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r5, r5, #0xff\n\t" + "bic r5, r5, #0xff00\n\t" +#else + "bfc r5, #0, #16\n\t" +#endif + "sub r12, r5, r9\n\t" + "add r5, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r12, r12, #0xff\n\t" + "bic r12, r12, #0xff00\n\t" + "ror r12, r12, #16\n\t" + "orr r12, r12, lr, lsl #16\n\t" + "ror r12, r12, #16\n\t" +#else + "bfi r12, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r5, r5, #0xff\n\t" + "bic r5, r5, #0xff00\n\t" + "ror r5, r5, #16\n\t" + "orr r5, r5, r10, lsl #16\n\t" + "ror r5, r5, #16\n\t" +#else + "bfi r5, r10, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r11, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r11, #0, #16\n\t" +#endif + "asr r10, r12, #16\n\t" + "mul r9, lr, r10\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r12, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r12, #0, #16\n\t" +#endif + "mul r12, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r12, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r12, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r9, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r9, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "lsr r12, r12, #16\n\t" + "mla r9, r10, lr, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r9, r9, #0xff\n\t" + "bic r9, r9, #0xff00\n\t" + "ror r9, r9, #16\n\t" + "orr r9, r9, r12, lsl #16\n\t" + "ror r9, r9, #16\n\t" +#else + "bfi r9, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ + "ldr r11, [r1, #254]\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smulbb r12, r11, r2\n\t" + "smulbt r2, r11, r2\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r2\n\t" + "smlabb r2, r10, lr, r2\n\t" + "pkhtb r2, r2, r12, ASR #16\n\t" +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r11, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r11, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r2, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r2, #0, #16\n\t" +#endif + "mul r12, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "asr r2, r2, #16\n\t" +#else + "sbfx r2, r2, #16, #16\n\t" +#endif + "mul r2, lr, r2\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif + "mul lr, r10, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r2, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r2, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "lsr r12, r12, #16\n\t" + "mla r2, r10, lr, r2\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r2, r2, #0xff\n\t" + "bic r2, r2, #0xff00\n\t" + "ror r2, r2, #16\n\t" + "orr r2, r2, r12, lsl #16\n\t" + "ror r2, r2, #16\n\t" +#else + "bfi r2, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smulbb r12, r11, r3\n\t" + "smulbt r3, r11, r3\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r3\n\t" + "smlabb r3, r10, lr, r3\n\t" + "pkhtb r3, r3, r12, ASR #16\n\t" +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r11, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r11, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r3, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r3, #0, #16\n\t" +#endif + "mul r12, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "asr r3, r3, #16\n\t" +#else + "sbfx r3, r3, #16, #16\n\t" +#endif + "mul r3, lr, r3\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif + "mul lr, r10, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r3, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r3, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "lsr r12, r12, #16\n\t" + "mla r3, r10, lr, r3\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r3, r3, #0xff\n\t" + "bic r3, r3, #0xff00\n\t" + "ror r3, r3, #16\n\t" + "orr r3, r3, r12, lsl #16\n\t" + "ror r3, r3, #16\n\t" +#else + "bfi r3, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smulbb r12, r11, r4\n\t" + "smulbt r4, r11, r4\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r4\n\t" + "smlabb r4, r10, lr, r4\n\t" + "pkhtb r4, r4, r12, ASR #16\n\t" +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r11, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r11, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r4, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r4, #0, #16\n\t" +#endif + "mul r12, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "asr r4, r4, #16\n\t" +#else + "sbfx r4, r4, #16, #16\n\t" +#endif + "mul r4, lr, r4\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif + "mul lr, r10, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r4, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r4, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "lsr r12, r12, #16\n\t" + "mla r4, r10, lr, r4\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r4, r4, #0xff\n\t" + "bic r4, r4, #0xff00\n\t" + "ror r4, r4, #16\n\t" + "orr r4, r4, r12, lsl #16\n\t" + "ror r4, r4, #16\n\t" +#else + "bfi r4, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smulbb r12, r11, r5\n\t" + "smulbt r5, r11, r5\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r5\n\t" + "smlabb r5, r10, lr, r5\n\t" + "pkhtb r5, r5, r12, ASR #16\n\t" +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r11, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r11, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r5, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r5, #0, #16\n\t" +#endif + "mul r12, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "asr r5, r5, #16\n\t" +#else + "sbfx r5, r5, #16, #16\n\t" +#endif + "mul r5, lr, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif + "mul lr, r10, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r5, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r5, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "lsr r12, r12, #16\n\t" + "mla r5, r10, lr, r5\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r5, r5, #0xff\n\t" + "bic r5, r5, #0xff00\n\t" + "ror r5, r5, #16\n\t" + "orr r5, r5, r12, lsl #16\n\t" + "ror r5, r5, #16\n\t" +#else + "bfi r5, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smulbb r12, r11, r6\n\t" + "smulbt r6, r11, r6\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r6\n\t" + "smlabb r6, r10, lr, r6\n\t" + "pkhtb r6, r6, r12, ASR #16\n\t" +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r11, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r11, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r6, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r6, #0, #16\n\t" +#endif + "mul r12, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "asr r6, r6, #16\n\t" +#else + "sbfx r6, r6, #16, #16\n\t" +#endif + "mul r6, lr, r6\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif + "mul lr, r10, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r6, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r6, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "lsr r12, r12, #16\n\t" + "mla r6, r10, lr, r6\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r6, r6, #0xff\n\t" + "bic r6, r6, #0xff00\n\t" + "ror r6, r6, #16\n\t" + "orr r6, r6, r12, lsl #16\n\t" + "ror r6, r6, #16\n\t" +#else + "bfi r6, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smulbb r12, r11, r7\n\t" + "smulbt r7, r11, r7\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r7\n\t" + "smlabb r7, r10, lr, r7\n\t" + "pkhtb r7, r7, r12, ASR #16\n\t" +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r11, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r11, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r7, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r7, #0, #16\n\t" +#endif + "mul r12, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "asr r7, r7, #16\n\t" +#else + "sbfx r7, r7, #16, #16\n\t" +#endif + "mul r7, lr, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif + "mul lr, r10, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r7, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r7, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "lsr r12, r12, #16\n\t" + "mla r7, r10, lr, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r7, r7, #0xff\n\t" + "bic r7, r7, #0xff00\n\t" + "ror r7, r7, #16\n\t" + "orr r7, r7, r12, lsl #16\n\t" + "ror r7, r7, #16\n\t" +#else + "bfi r7, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smulbb r12, r11, r8\n\t" + "smulbt r8, r11, r8\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r8\n\t" + "smlabb r8, r10, lr, r8\n\t" + "pkhtb r8, r8, r12, ASR #16\n\t" +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r11, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r11, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r8, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r8, #0, #16\n\t" +#endif + "mul r12, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "asr r8, r8, #16\n\t" +#else + "sbfx r8, r8, #16, #16\n\t" +#endif + "mul r8, lr, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif + "mul lr, r10, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r8, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r8, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "lsr r12, r12, #16\n\t" + "mla r8, r10, lr, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r8, r8, #0xff\n\t" + "bic r8, r8, #0xff00\n\t" + "ror r8, r8, #16\n\t" + "orr r8, r8, r12, lsl #16\n\t" + "ror r8, r8, #16\n\t" +#else + "bfi r8, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smulbb r12, r11, r9\n\t" + "smulbt r9, r11, r9\n\t" + "smultb lr, r10, r12\n\t" + "smlabb r12, r10, lr, r12\n\t" + "smultb lr, r10, r9\n\t" + "smlabb r9, r10, lr, r9\n\t" + "pkhtb r9, r9, r12, ASR #16\n\t" +#else +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r11, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r11, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r9, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r9, #0, #16\n\t" +#endif + "mul r12, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "asr r9, r9, #16\n\t" +#else + "sbfx r9, r9, #16, #16\n\t" +#endif + "mul r9, lr, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif + "mul lr, r10, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "mla r12, r10, lr, r12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xc\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0xff\n\t" +#else + "mov r10, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, r9, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, r9, #0, #16\n\t" +#endif + "mul lr, r10, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r10, #0xd\n\t" + "lsl r10, r10, #8\n\t" + "add r10, r10, #0x1\n\t" +#else + "mov r10, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl lr, lr, #16\n\t" + "asr lr, lr, #16\n\t" +#else + "sbfx lr, lr, #0, #16\n\t" +#endif + "lsr r12, r12, #16\n\t" + "mla r9, r10, lr, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r9, r9, #0xff\n\t" + "bic r9, r9, #0xff00\n\t" + "ror r9, r9, #16\n\t" + "orr r9, r9, r12, lsl #16\n\t" + "ror r9, r9, #16\n\t" +#else + "bfi r9, r12, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ + "str r2, [%[r]]\n\t" + "str r3, [%[r], #64]\n\t" + "str r4, [%[r], #128]\n\t" + "str r5, [%[r], #192]\n\t" + "str r6, [%[r], #256]\n\t" + "str r7, [%[r], #320]\n\t" + "str r8, [%[r], #384]\n\t" + "str r9, [%[r], #448]\n\t" + "ldr r2, [sp]\n\t" + "subs r2, r2, #1\n\t" + "add %[r], %[r], #4\n\t" + "bne L_kyber_arm32_invntt_loop_321_%=\n\t" + "add sp, sp, #8\n\t" + : [r] "+r" (r), + [L_kyber_arm32_invntt_zetas_inv] "+r" (L_kyber_arm32_invntt_zetas_inv_c) + : + : "memory", "cc", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", + "r9", "r10", "r11" + ); +} + +static const uint16_t L_kyber_arm32_basemul_mont_zetas[] = { + 0x08ed, 0x0a0b, 0x0b9a, 0x0714, + 0x05d5, 0x058e, 0x011f, 0x00ca, + 0x0c56, 0x026e, 0x0629, 0x00b6, + 0x03c2, 0x084f, 0x073f, 0x05bc, + 0x023d, 0x07d4, 0x0108, 0x017f, + 0x09c4, 0x05b2, 0x06bf, 0x0c7f, + 0x0a58, 0x03f9, 0x02dc, 0x0260, + 0x06fb, 0x019b, 0x0c34, 0x06de, + 0x04c7, 0x028c, 0x0ad9, 0x03f7, + 0x07f4, 0x05d3, 0x0be7, 0x06f9, + 0x0204, 0x0cf9, 0x0bc1, 0x0a67, + 0x06af, 0x0877, 0x007e, 0x05bd, + 0x09ac, 0x0ca7, 0x0bf2, 0x033e, + 0x006b, 0x0774, 0x0c0a, 0x094a, + 0x0b73, 0x03c1, 0x071d, 0x0a2c, + 0x01c0, 0x08d8, 0x02a5, 0x0806, + 0x08b2, 0x01ae, 0x022b, 0x034b, + 0x081e, 0x0367, 0x060e, 0x0069, + 0x01a6, 0x024b, 0x00b1, 0x0c16, + 0x0bde, 0x0b35, 0x0626, 0x0675, + 0x0c0b, 0x030a, 0x0487, 0x0c6e, + 0x09f8, 0x05cb, 0x0aa7, 0x045f, + 0x06cb, 0x0284, 0x0999, 0x015d, + 0x01a2, 0x0149, 0x0c65, 0x0cb6, + 0x0331, 0x0449, 0x025b, 0x0262, + 0x052a, 0x07fc, 0x0748, 0x0180, + 0x0842, 0x0c79, 0x04c2, 0x07ca, + 0x0997, 0x00dc, 0x085e, 0x0686, + 0x0860, 0x0707, 0x0803, 0x031a, + 0x071b, 0x09ab, 0x099b, 0x01de, + 0x0c95, 0x0bcd, 0x03e4, 0x03df, + 0x03be, 0x074d, 0x05f2, 0x065c, +}; + +void kyber_arm32_basemul_mont(sword16* r_p, const sword16* a_p, + const sword16* b_p) +{ + register sword16* r asm ("r0") = (sword16*)r_p; + register const sword16* a asm ("r1") = (const sword16*)a_p; + register const sword16* b asm ("r2") = (const sword16*)b_p; + register uint16_t* L_kyber_arm32_basemul_mont_zetas_c asm ("r3") = + (uint16_t*)&L_kyber_arm32_basemul_mont_zetas; + + __asm__ __volatile__ ( + "add r3, r3, #0x80\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r12, #0xd\n\t" + "lsl r12, r12, #8\n\t" + "add r12, r12, #0x1\n\t" +#else + "mov r12, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r12, r12, #0xc000000\n\t" + "orr r12, r12, #0xff0000\n\t" +#else + "movt r12, #0xcff\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ + "mov r8, #0\n\t" + "\n" + "L_kyber_arm32_basemul_mont_loop_%=: \n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "ldr lr, [r3, r8]\n\t" + "add r8, r8, #2\n\t" + "push {r8}\n\t" + "cmp r8, #0x80\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smultt r8, r4, r6\n\t" + "smultt r10, r5, r7\n\t" + "smultb r9, r12, r8\n\t" + "smultb r11, r12, r10\n\t" + "smlabb r8, r12, r9, r8\n\t" + "smlabb r10, r12, r11, r10\n\t" + "rsb r11, lr, #0\n\t" + "smulbt r8, lr, r8\n\t" + "smulbt r10, r11, r10\n\t" + "smlabb r8, r4, r6, r8\n\t" + "smlabb r10, r5, r7, r10\n\t" + "smultb r9, r12, r8\n\t" + "smultb r11, r12, r10\n\t" + "smlabb r8, r12, r9, r8\n\t" + "smlabb r10, r12, r11, r10\n\t" + "smulbt r9, r4, r6\n\t" + "smulbt r11, r5, r7\n\t" + "smlatb r9, r4, r6, r9\n\t" + "smlatb r11, r5, r7, r11\n\t" + "smultb r6, r12, r9\n\t" + "smultb r7, r12, r11\n\t" + "smlabb r9, r12, r6, r9\n\t" + "smlabb r11, r12, r7, r11\n\t" + "pkhtb r4, r9, r8, ASR #16\n\t" + "pkhtb r5, r11, r10, ASR #16\n\t" +#else + "asr r8, r4, #16\n\t" + "asr r10, r5, #16\n\t" + "asr r9, r6, #16\n\t" + "asr r11, r7, #16\n\t" + "mul r8, r9, r8\n\t" + "mul r10, r11, r10\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r12, #0xc\n\t" + "lsl r12, r12, #8\n\t" + "add r12, r12, #0xff\n\t" +#else + "mov r12, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r9, r8, #16\n\t" + "asr r9, r9, #16\n\t" +#else + "sbfx r9, r8, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r11, r10, #16\n\t" + "asr r11, r11, #16\n\t" +#else + "sbfx r11, r10, #0, #16\n\t" +#endif + "mul r9, r12, r8\n\t" + "mul r11, r12, r11\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r12, #0xd\n\t" + "lsl r12, r12, #8\n\t" + "add r12, r12, #0x1\n\t" +#else + "mov r12, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r9, r9, #16\n\t" + "asr r9, r9, #16\n\t" +#else + "sbfx r9, r9, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r11, r11, #16\n\t" + "asr r11, r11, #16\n\t" +#else + "sbfx r11, r11, #0, #16\n\t" +#endif + "mla r8, r12, r9, r8\n\t" + "mla r10, r12, r11, r10\n\t" + "rsb r11, lr, #0\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r9, lr, #16\n\t" + "asr r9, r9, #16\n\t" +#else + "sbfx r9, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r11, r11, #16\n\t" + "asr r11, r11, #16\n\t" +#else + "sbfx r11, r11, #0, #16\n\t" +#endif + "asr r8, r8, #16\n\t" + "asr r10, r10, #16\n\t" + "mul r8, r9, r8\n\t" + "mul r10, r11, r10\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r9, r4, #16\n\t" + "asr r9, r9, #16\n\t" +#else + "sbfx r9, r4, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r11, r5, #16\n\t" + "asr r11, r11, #16\n\t" +#else + "sbfx r11, r5, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r6, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r6, #0, #16\n\t" +#endif + "mla r8, r9, r12, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r7, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r7, #0, #16\n\t" +#endif + "mla r10, r11, r12, r10\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r12, #0xc\n\t" + "lsl r12, r12, #8\n\t" + "add r12, r12, #0xff\n\t" +#else + "mov r12, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r9, r8, #16\n\t" + "asr r9, r9, #16\n\t" +#else + "sbfx r9, r8, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r11, r10, #16\n\t" + "asr r11, r11, #16\n\t" +#else + "sbfx r11, r10, #0, #16\n\t" +#endif + "mul r9, r12, r9\n\t" + "mul r11, r12, r11\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r12, #0xd\n\t" + "lsl r12, r12, #8\n\t" + "add r12, r12, #0x1\n\t" +#else + "mov r12, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r9, r9, #16\n\t" + "asr r9, r9, #16\n\t" +#else + "sbfx r9, r9, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r11, r11, #16\n\t" + "asr r11, r11, #16\n\t" +#else + "sbfx r11, r11, #0, #16\n\t" +#endif + "mla r8, r12, r9, r8\n\t" + "mla r10, r12, r11, r10\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r9, r4, #16\n\t" + "asr r9, r9, #16\n\t" +#else + "sbfx r9, r4, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r11, r5, #16\n\t" + "asr r11, r11, #16\n\t" +#else + "sbfx r11, r5, #0, #16\n\t" +#endif + "asr r12, r6, #16\n\t" + "mul r9, r12, r9\n\t" + "asr r12, r7, #16\n\t" + "mul r11, r12, r11\n\t" + "asr r4, r4, #16\n\t" + "asr r5, r5, #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r6, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r6, #0, #16\n\t" +#endif + "mla r9, r4, r12, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r7, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r7, #0, #16\n\t" +#endif + "mla r11, r5, r12, r11\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r12, #0xc\n\t" + "lsl r12, r12, #8\n\t" + "add r12, r12, #0xff\n\t" +#else + "mov r12, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r6, r9, #16\n\t" + "asr r6, r6, #16\n\t" +#else + "sbfx r6, r9, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r7, r11, #16\n\t" + "asr r7, r7, #16\n\t" +#else + "sbfx r7, r11, #0, #16\n\t" +#endif + "mul r6, r12, r6\n\t" + "mul r7, r12, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r12, #0xd\n\t" + "lsl r12, r12, #8\n\t" + "add r12, r12, #0x1\n\t" +#else + "mov r12, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r4, r6, #16\n\t" + "asr r4, r4, #16\n\t" +#else + "sbfx r4, r6, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r5, r7, #16\n\t" + "asr r5, r5, #16\n\t" +#else + "sbfx r5, r7, #0, #16\n\t" +#endif + "mla r9, r12, r4, r9\n\t" + "mla r11, r12, r5, r11\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r9, r9, #0xff\n\t" + "bic r9, r9, #0xff00\n\t" +#else + "bfc r9, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r11, r11, #0xff\n\t" + "bic r11, r11, #0xff00\n\t" +#else + "bfc r11, #0, #16\n\t" +#endif + "orr r4, r9, r8, lsr #16\n\t" + "orr r5, r11, r10, lsr #16\n\t" +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ + "stm %[r]!, {r4, r5}\n\t" + "pop {r8}\n\t" + "bne L_kyber_arm32_basemul_mont_loop_%=\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), + [L_kyber_arm32_basemul_mont_zetas] "+r" (L_kyber_arm32_basemul_mont_zetas_c) + : + : "memory", "cc", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", + "r10", "r11" + ); +} + +void kyber_arm32_basemul_mont_add(sword16* r_p, const sword16* a_p, + const sword16* b_p) +{ + register sword16* r asm ("r0") = (sword16*)r_p; + register const sword16* a asm ("r1") = (const sword16*)a_p; + register const sword16* b asm ("r2") = (const sword16*)b_p; + register uint16_t* L_kyber_arm32_basemul_mont_zetas_c asm ("r3") = + (uint16_t*)&L_kyber_arm32_basemul_mont_zetas; + + __asm__ __volatile__ ( + "add r3, r3, #0x80\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r12, #0xd\n\t" + "lsl r12, r12, #8\n\t" + "add r12, r12, #0x1\n\t" +#else + "mov r12, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r12, r12, #0xc000000\n\t" + "orr r12, r12, #0xff0000\n\t" +#else + "movt r12, #0xcff\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ + "mov r8, #0\n\t" + "\n" + "L_kyber_arm32_basemul_mont_add_loop_%=: \n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "ldr lr, [r3, r8]\n\t" + "add r8, r8, #2\n\t" + "push {r8}\n\t" + "cmp r8, #0x80\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "smultt r8, r4, r6\n\t" + "smultt r10, r5, r7\n\t" + "smultb r9, r12, r8\n\t" + "smultb r11, r12, r10\n\t" + "smlabb r8, r12, r9, r8\n\t" + "smlabb r10, r12, r11, r10\n\t" + "rsb r11, lr, #0\n\t" + "smulbt r8, lr, r8\n\t" + "smulbt r10, r11, r10\n\t" + "smlabb r8, r4, r6, r8\n\t" + "smlabb r10, r5, r7, r10\n\t" + "smultb r9, r12, r8\n\t" + "smultb r11, r12, r10\n\t" + "smlabb r8, r12, r9, r8\n\t" + "smlabb r10, r12, r11, r10\n\t" + "smulbt r9, r4, r6\n\t" + "smulbt r11, r5, r7\n\t" + "smlatb r9, r4, r6, r9\n\t" + "smlatb r11, r5, r7, r11\n\t" + "smultb r6, r12, r9\n\t" + "smultb r7, r12, r11\n\t" + "smlabb r9, r12, r6, r9\n\t" + "smlabb r11, r12, r7, r11\n\t" + "ldm %[r], {r4, r5}\n\t" + "pkhtb r9, r9, r8, ASR #16\n\t" + "pkhtb r11, r11, r10, ASR #16\n\t" + "sadd16 r4, r4, r9\n\t" + "sadd16 r5, r5, r11\n\t" +#else + "asr r8, r4, #16\n\t" + "asr r10, r5, #16\n\t" + "asr r9, r6, #16\n\t" + "asr r11, r7, #16\n\t" + "mul r8, r9, r8\n\t" + "mul r10, r11, r10\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r12, #0xc\n\t" + "lsl r12, r12, #8\n\t" + "add r12, r12, #0xff\n\t" +#else + "mov r12, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r9, r8, #16\n\t" + "asr r9, r9, #16\n\t" +#else + "sbfx r9, r8, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r11, r10, #16\n\t" + "asr r11, r11, #16\n\t" +#else + "sbfx r11, r10, #0, #16\n\t" +#endif + "mul r9, r12, r8\n\t" + "mul r11, r12, r11\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r12, #0xd\n\t" + "lsl r12, r12, #8\n\t" + "add r12, r12, #0x1\n\t" +#else + "mov r12, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r9, r9, #16\n\t" + "asr r9, r9, #16\n\t" +#else + "sbfx r9, r9, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r11, r11, #16\n\t" + "asr r11, r11, #16\n\t" +#else + "sbfx r11, r11, #0, #16\n\t" +#endif + "mla r8, r12, r9, r8\n\t" + "mla r10, r12, r11, r10\n\t" + "rsb r11, lr, #0\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r9, lr, #16\n\t" + "asr r9, r9, #16\n\t" +#else + "sbfx r9, lr, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r11, r11, #16\n\t" + "asr r11, r11, #16\n\t" +#else + "sbfx r11, r11, #0, #16\n\t" +#endif + "asr r8, r8, #16\n\t" + "asr r10, r10, #16\n\t" + "mul r8, r9, r8\n\t" + "mul r10, r11, r10\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r9, r4, #16\n\t" + "asr r9, r9, #16\n\t" +#else + "sbfx r9, r4, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r11, r5, #16\n\t" + "asr r11, r11, #16\n\t" +#else + "sbfx r11, r5, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r6, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r6, #0, #16\n\t" +#endif + "mla r8, r9, r12, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r7, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r7, #0, #16\n\t" +#endif + "mla r10, r11, r12, r10\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r12, #0xc\n\t" + "lsl r12, r12, #8\n\t" + "add r12, r12, #0xff\n\t" +#else + "mov r12, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r9, r8, #16\n\t" + "asr r9, r9, #16\n\t" +#else + "sbfx r9, r8, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r11, r10, #16\n\t" + "asr r11, r11, #16\n\t" +#else + "sbfx r11, r10, #0, #16\n\t" +#endif + "mul r9, r12, r9\n\t" + "mul r11, r12, r11\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r12, #0xd\n\t" + "lsl r12, r12, #8\n\t" + "add r12, r12, #0x1\n\t" +#else + "mov r12, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r9, r9, #16\n\t" + "asr r9, r9, #16\n\t" +#else + "sbfx r9, r9, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r11, r11, #16\n\t" + "asr r11, r11, #16\n\t" +#else + "sbfx r11, r11, #0, #16\n\t" +#endif + "mla r8, r12, r9, r8\n\t" + "mla r10, r12, r11, r10\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r9, r4, #16\n\t" + "asr r9, r9, #16\n\t" +#else + "sbfx r9, r4, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r11, r5, #16\n\t" + "asr r11, r11, #16\n\t" +#else + "sbfx r11, r5, #0, #16\n\t" +#endif + "asr r12, r6, #16\n\t" + "mul r9, r12, r9\n\t" + "asr r12, r7, #16\n\t" + "mul r11, r12, r11\n\t" + "asr r4, r4, #16\n\t" + "asr r5, r5, #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r6, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r6, #0, #16\n\t" +#endif + "mla r9, r4, r12, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r12, r7, #16\n\t" + "asr r12, r12, #16\n\t" +#else + "sbfx r12, r7, #0, #16\n\t" +#endif + "mla r11, r5, r12, r11\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r12, #0xc\n\t" + "lsl r12, r12, #8\n\t" + "add r12, r12, #0xff\n\t" +#else + "mov r12, #0xcff\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r6, r9, #16\n\t" + "asr r6, r6, #16\n\t" +#else + "sbfx r6, r9, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r7, r11, #16\n\t" + "asr r7, r7, #16\n\t" +#else + "sbfx r7, r11, #0, #16\n\t" +#endif + "mul r6, r12, r6\n\t" + "mul r7, r12, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r12, #0xd\n\t" + "lsl r12, r12, #8\n\t" + "add r12, r12, #0x1\n\t" +#else + "mov r12, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r4, r6, #16\n\t" + "asr r4, r4, #16\n\t" +#else + "sbfx r4, r6, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r5, r7, #16\n\t" + "asr r5, r5, #16\n\t" +#else + "sbfx r5, r7, #0, #16\n\t" +#endif + "mla r9, r12, r4, r9\n\t" + "mla r11, r12, r5, r11\n\t" + "ldm %[r], {r4, r5}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r9, r9, #0xff\n\t" + "bic r9, r9, #0xff00\n\t" +#else + "bfc r9, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r11, r11, #0xff\n\t" + "bic r11, r11, #0xff00\n\t" +#else + "bfc r11, #0, #16\n\t" +#endif + "orr r9, r9, r8, lsr #16\n\t" + "orr r11, r11, r10, lsr #16\n\t" + "add r8, r4, r9\n\t" + "add r10, r5, r11\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r9, r9, #0xff\n\t" + "bic r9, r9, #0xff00\n\t" +#else + "bfc r9, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r11, r11, #0xff\n\t" + "bic r11, r11, #0xff00\n\t" +#else + "bfc r11, #0, #16\n\t" +#endif + "add r4, r4, r9\n\t" + "add r5, r5, r11\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r4, r4, #0xff\n\t" + "bic r4, r4, #0xff00\n\t" + "ror r4, r4, #16\n\t" + "orr r4, r4, r8, lsl #16\n\t" + "ror r4, r4, #16\n\t" +#else + "bfi r4, r8, #0, #16\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r5, r5, #0xff\n\t" + "bic r5, r5, #0xff00\n\t" + "ror r5, r5, #16\n\t" + "orr r5, r5, r10, lsl #16\n\t" + "ror r5, r5, #16\n\t" +#else + "bfi r5, r10, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ + "stm %[r]!, {r4, r5}\n\t" + "pop {r8}\n\t" + "bne L_kyber_arm32_basemul_mont_add_loop_%=\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), + [L_kyber_arm32_basemul_mont_zetas] "+r" (L_kyber_arm32_basemul_mont_zetas_c) + : + : "memory", "cc", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", + "r10", "r11" + ); +} + +void kyber_arm32_csubq(sword16* p_p) +{ + register sword16* p asm ("r0") = (sword16*)p_p; + register uint16_t* L_kyber_arm32_basemul_mont_zetas_c asm ("r1") = + (uint16_t*)&L_kyber_arm32_basemul_mont_zetas; + + __asm__ __volatile__ ( +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r12, #0xd\n\t" + "lsl r12, r12, #8\n\t" + "add r12, r12, #0x1\n\t" +#else + "mov r12, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov lr, #0xd\n\t" + "lsl lr, lr, #8\n\t" + "add lr, lr, #0x1\n\t" +#else + "mov lr, #0xd01\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr lr, lr, #0xd000000\n\t" + "orr lr, lr, #0x10000\n\t" +#else + "movt lr, #0xd01\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r11, #0x80\n\t" + "lsl r11, r11, #8\n\t" + "add r11, r11, #0x0\n\t" +#else + "mov r11, #0x8000\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "orr r11, r11, #0x80000000\n\t" +#else + "movt r11, #0x8000\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r1, #0x1\n\t" + "lsl r1, r1, #8\n\t" + "add r1, r1, #0x0\n\t" +#else + "mov r1, #0x100\n\t" +#endif + "\n" + "L_kyber_arm32_csubq_loop_%=: \n\t" + "ldm %[p], {r2, r3, r4, r5}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) + "ssub16 r2, r2, lr\n\t" + "ssub16 r3, r3, lr\n\t" + "ssub16 r4, r4, lr\n\t" + "ssub16 r5, r5, lr\n\t" + "and r6, r2, r11\n\t" + "and r7, r3, r11\n\t" + "and r8, r4, r11\n\t" + "and r9, r5, r11\n\t" + "lsr r6, r6, #15\n\t" + "lsr r7, r7, #15\n\t" + "lsr r8, r8, #15\n\t" + "lsr r9, r9, #15\n\t" + "mul r6, r12, r6\n\t" + "mul r7, r12, r7\n\t" + "mul r8, r12, r8\n\t" + "mul r9, r12, r9\n\t" + "sadd16 r2, r2, r6\n\t" + "sadd16 r3, r3, r7\n\t" + "sadd16 r4, r4, r8\n\t" + "sadd16 r5, r5, r9\n\t" +#else + "sub r6, r2, lr\n\t" + "sub r2, r2, lr, lsl #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r2, r2, #0xff\n\t" + "bic r2, r2, #0xff00\n\t" + "ror r2, r2, #16\n\t" + "orr r2, r2, r6, lsl #16\n\t" + "ror r2, r2, #16\n\t" +#else + "bfi r2, r6, #0, #16\n\t" +#endif + "sub r7, r3, lr\n\t" + "sub r3, r3, lr, lsl #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r3, r3, #0xff\n\t" + "bic r3, r3, #0xff00\n\t" + "ror r3, r3, #16\n\t" + "orr r3, r3, r7, lsl #16\n\t" + "ror r3, r3, #16\n\t" +#else + "bfi r3, r7, #0, #16\n\t" +#endif + "sub r8, r4, lr\n\t" + "sub r4, r4, lr, lsl #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r4, r4, #0xff\n\t" + "bic r4, r4, #0xff00\n\t" + "ror r4, r4, #16\n\t" + "orr r4, r4, r8, lsl #16\n\t" + "ror r4, r4, #16\n\t" +#else + "bfi r4, r8, #0, #16\n\t" +#endif + "sub r9, r5, lr\n\t" + "sub r5, r5, lr, lsl #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r5, r5, #0xff\n\t" + "bic r5, r5, #0xff00\n\t" + "ror r5, r5, #16\n\t" + "orr r5, r5, r9, lsl #16\n\t" + "ror r5, r5, #16\n\t" +#else + "bfi r5, r9, #0, #16\n\t" +#endif + "and r6, r2, r11\n\t" + "and r7, r3, r11\n\t" + "and r8, r4, r11\n\t" + "and r9, r5, r11\n\t" + "lsr r6, r6, #15\n\t" + "lsr r7, r7, #15\n\t" + "lsr r8, r8, #15\n\t" + "lsr r9, r9, #15\n\t" + "mul r6, r12, r6\n\t" + "mul r7, r12, r7\n\t" + "mul r8, r12, r8\n\t" + "mul r9, r12, r9\n\t" + "add r10, r2, r6\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r6, r6, #0xff\n\t" + "bic r6, r6, #0xff00\n\t" +#else + "bfc r6, #0, #16\n\t" +#endif + "add r2, r2, r6\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r2, r2, #0xff\n\t" + "bic r2, r2, #0xff00\n\t" + "ror r2, r2, #16\n\t" + "orr r2, r2, r10, lsl #16\n\t" + "ror r2, r2, #16\n\t" +#else + "bfi r2, r10, #0, #16\n\t" +#endif + "add r10, r3, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r7, r7, #0xff\n\t" + "bic r7, r7, #0xff00\n\t" +#else + "bfc r7, #0, #16\n\t" +#endif + "add r3, r3, r7\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r3, r3, #0xff\n\t" + "bic r3, r3, #0xff00\n\t" + "ror r3, r3, #16\n\t" + "orr r3, r3, r10, lsl #16\n\t" + "ror r3, r3, #16\n\t" +#else + "bfi r3, r10, #0, #16\n\t" +#endif + "add r10, r4, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r8, r8, #0xff\n\t" + "bic r8, r8, #0xff00\n\t" +#else + "bfc r8, #0, #16\n\t" +#endif + "add r4, r4, r8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r4, r4, #0xff\n\t" + "bic r4, r4, #0xff00\n\t" + "ror r4, r4, #16\n\t" + "orr r4, r4, r10, lsl #16\n\t" + "ror r4, r4, #16\n\t" +#else + "bfi r4, r10, #0, #16\n\t" +#endif + "add r10, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r9, r9, #0xff\n\t" + "bic r9, r9, #0xff00\n\t" +#else + "bfc r9, #0, #16\n\t" +#endif + "add r5, r5, r9\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r5, r5, #0xff\n\t" + "bic r5, r5, #0xff00\n\t" + "ror r5, r5, #16\n\t" + "orr r5, r5, r10, lsl #16\n\t" + "ror r5, r5, #16\n\t" +#else + "bfi r5, r10, #0, #16\n\t" +#endif +#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ + "stm %[p]!, {r2, r3, r4, r5}\n\t" + "subs r1, r1, #8\n\t" + "bne L_kyber_arm32_csubq_loop_%=\n\t" + : [p] "+r" (p), + [L_kyber_arm32_basemul_mont_zetas] "+r" (L_kyber_arm32_basemul_mont_zetas_c) + : + : "memory", "cc", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", + "r9", "r10", "r11" + ); +} + +unsigned int kyber_arm32_rej_uniform(sword16* p_p, unsigned int len_p, + const byte* r_p, unsigned int rLen_p) +{ + register sword16* p asm ("r0") = (sword16*)p_p; + register unsigned int len asm ("r1") = (unsigned int)len_p; + register const byte* r asm ("r2") = (const byte*)r_p; + register unsigned int rLen asm ("r3") = (unsigned int)rLen_p; + register uint16_t* L_kyber_arm32_basemul_mont_zetas_c asm ("r4") = + (uint16_t*)&L_kyber_arm32_basemul_mont_zetas; + + __asm__ __volatile__ ( +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "mov r8, #0xd\n\t" + "lsl r8, r8, #8\n\t" + "add r8, r8, #0x1\n\t" +#else + "mov r8, #0xd01\n\t" +#endif + "mov r12, #0\n\t" + "\n" + "L_kyber_arm32_rej_uniform_loop_no_fail_%=: \n\t" + "cmp %[len], #8\n\t" + "blt L_kyber_arm32_rej_uniform_done_no_fail_%=\n\t" + "ldm %[r]!, {r4, r5, r6}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r7, r4, #20\n\t" + "lsr r7, r7, #20\n\t" +#else + "ubfx r7, r4, #0, #12\n\t" +#endif + "strh r7, [%[p], r12]\n\t" + "sub lr, r7, r8\n\t" + "lsr lr, lr, #31\n\t" + "sub %[len], %[len], lr\n\t" + "add r12, r12, lr, lsl #1\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r7, r4, #8\n\t" + "lsr r7, r7, #20\n\t" +#else + "ubfx r7, r4, #12, #12\n\t" +#endif + "strh r7, [%[p], r12]\n\t" + "sub lr, r7, r8\n\t" + "lsr lr, lr, #31\n\t" + "sub %[len], %[len], lr\n\t" + "add r12, r12, lr, lsl #1\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsr r7, r4, #24\n\t" +#else + "ubfx r7, r4, #24, #8\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r7, r7, #0xf00\n\t" + "ror r7, r7, #12\n\t" + "orr r7, r7, r5, lsl #28\n\t" + "ror r7, r7, #20\n\t" +#else + "bfi r7, r5, #8, #4\n\t" +#endif + "strh r7, [%[p], r12]\n\t" + "sub lr, r7, r8\n\t" + "lsr lr, lr, #31\n\t" + "sub %[len], %[len], lr\n\t" + "add r12, r12, lr, lsl #1\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r7, r5, #16\n\t" + "lsr r7, r7, #20\n\t" +#else + "ubfx r7, r5, #4, #12\n\t" +#endif + "strh r7, [%[p], r12]\n\t" + "sub lr, r7, r8\n\t" + "lsr lr, lr, #31\n\t" + "sub %[len], %[len], lr\n\t" + "add r12, r12, lr, lsl #1\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r7, r5, #4\n\t" + "lsr r7, r7, #20\n\t" +#else + "ubfx r7, r5, #16, #12\n\t" +#endif + "strh r7, [%[p], r12]\n\t" + "sub lr, r7, r8\n\t" + "lsr lr, lr, #31\n\t" + "sub %[len], %[len], lr\n\t" + "add r12, r12, lr, lsl #1\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsr r7, r5, #28\n\t" +#else + "ubfx r7, r5, #28, #4\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r7, r7, #0xff0\n\t" + "ror r7, r7, #12\n\t" + "orr r7, r7, r6, lsl #24\n\t" + "ror r7, r7, #20\n\t" +#else + "bfi r7, r6, #4, #8\n\t" +#endif + "strh r7, [%[p], r12]\n\t" + "sub lr, r7, r8\n\t" + "lsr lr, lr, #31\n\t" + "sub %[len], %[len], lr\n\t" + "add r12, r12, lr, lsl #1\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r7, r6, #12\n\t" + "lsr r7, r7, #20\n\t" +#else + "ubfx r7, r6, #8, #12\n\t" +#endif + "strh r7, [%[p], r12]\n\t" + "sub lr, r7, r8\n\t" + "lsr lr, lr, #31\n\t" + "sub %[len], %[len], lr\n\t" + "add r12, r12, lr, lsl #1\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsr r7, r6, #20\n\t" +#else + "ubfx r7, r6, #20, #12\n\t" +#endif + "strh r7, [%[p], r12]\n\t" + "sub lr, r7, r8\n\t" + "lsr lr, lr, #31\n\t" + "sub %[len], %[len], lr\n\t" + "add r12, r12, lr, lsl #1\n\t" + "subs %[rLen], %[rLen], #12\n\t" + "bne L_kyber_arm32_rej_uniform_loop_no_fail_%=\n\t" + "b L_kyber_arm32_rej_uniform_done_%=\n\t" + "\n" + "L_kyber_arm32_rej_uniform_done_no_fail_%=: \n\t" + "cmp %[len], #0\n\t" + "beq L_kyber_arm32_rej_uniform_done_%=\n\t" + "\n" + "L_kyber_arm32_rej_uniform_loop_%=: \n\t" + "ldm %[r]!, {r4, r5, r6}\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r7, r4, #20\n\t" + "lsr r7, r7, #20\n\t" +#else + "ubfx r7, r4, #0, #12\n\t" +#endif + "cmp r7, r8\n\t" + "bge L_kyber_arm32_rej_uniform_fail_0_%=\n\t" + "strh r7, [%[p], r12]\n\t" + "subs %[len], %[len], #1\n\t" + "add r12, r12, #2\n\t" + "beq L_kyber_arm32_rej_uniform_done_%=\n\t" + "\n" + "L_kyber_arm32_rej_uniform_fail_0_%=: \n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r7, r4, #8\n\t" + "lsr r7, r7, #20\n\t" +#else + "ubfx r7, r4, #12, #12\n\t" +#endif + "cmp r7, r8\n\t" + "bge L_kyber_arm32_rej_uniform_fail_1_%=\n\t" + "strh r7, [%[p], r12]\n\t" + "subs %[len], %[len], #1\n\t" + "add r12, r12, #2\n\t" + "beq L_kyber_arm32_rej_uniform_done_%=\n\t" + "\n" + "L_kyber_arm32_rej_uniform_fail_1_%=: \n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsr r7, r4, #24\n\t" +#else + "ubfx r7, r4, #24, #8\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r7, r7, #0xf00\n\t" + "ror r7, r7, #12\n\t" + "orr r7, r7, r5, lsl #28\n\t" + "ror r7, r7, #20\n\t" +#else + "bfi r7, r5, #8, #4\n\t" +#endif + "cmp r7, r8\n\t" + "bge L_kyber_arm32_rej_uniform_fail_2_%=\n\t" + "strh r7, [%[p], r12]\n\t" + "subs %[len], %[len], #1\n\t" + "add r12, r12, #2\n\t" + "beq L_kyber_arm32_rej_uniform_done_%=\n\t" + "\n" + "L_kyber_arm32_rej_uniform_fail_2_%=: \n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r7, r5, #16\n\t" + "lsr r7, r7, #20\n\t" +#else + "ubfx r7, r5, #4, #12\n\t" +#endif + "cmp r7, r8\n\t" + "bge L_kyber_arm32_rej_uniform_fail_3_%=\n\t" + "strh r7, [%[p], r12]\n\t" + "subs %[len], %[len], #1\n\t" + "add r12, r12, #2\n\t" + "beq L_kyber_arm32_rej_uniform_done_%=\n\t" + "\n" + "L_kyber_arm32_rej_uniform_fail_3_%=: \n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r7, r5, #4\n\t" + "lsr r7, r7, #20\n\t" +#else + "ubfx r7, r5, #16, #12\n\t" +#endif + "cmp r7, r8\n\t" + "bge L_kyber_arm32_rej_uniform_fail_4_%=\n\t" + "strh r7, [%[p], r12]\n\t" + "subs %[len], %[len], #1\n\t" + "add r12, r12, #2\n\t" + "beq L_kyber_arm32_rej_uniform_done_%=\n\t" + "\n" + "L_kyber_arm32_rej_uniform_fail_4_%=: \n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsr r7, r5, #28\n\t" +#else + "ubfx r7, r5, #28, #4\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "bic r7, r7, #0xff0\n\t" + "ror r7, r7, #12\n\t" + "orr r7, r7, r6, lsl #24\n\t" + "ror r7, r7, #20\n\t" +#else + "bfi r7, r6, #4, #8\n\t" +#endif + "cmp r7, r8\n\t" + "bge L_kyber_arm32_rej_uniform_fail_5_%=\n\t" + "strh r7, [%[p], r12]\n\t" + "subs %[len], %[len], #1\n\t" + "add r12, r12, #2\n\t" + "beq L_kyber_arm32_rej_uniform_done_%=\n\t" + "\n" + "L_kyber_arm32_rej_uniform_fail_5_%=: \n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r7, r6, #12\n\t" + "lsr r7, r7, #20\n\t" +#else + "ubfx r7, r6, #8, #12\n\t" +#endif + "cmp r7, r8\n\t" + "bge L_kyber_arm32_rej_uniform_fail_6_%=\n\t" + "strh r7, [%[p], r12]\n\t" + "subs %[len], %[len], #1\n\t" + "add r12, r12, #2\n\t" + "beq L_kyber_arm32_rej_uniform_done_%=\n\t" + "\n" + "L_kyber_arm32_rej_uniform_fail_6_%=: \n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsr r7, r6, #20\n\t" +#else + "ubfx r7, r6, #20, #12\n\t" +#endif + "cmp r7, r8\n\t" + "bge L_kyber_arm32_rej_uniform_fail_7_%=\n\t" + "strh r7, [%[p], r12]\n\t" + "subs %[len], %[len], #1\n\t" + "add r12, r12, #2\n\t" + "beq L_kyber_arm32_rej_uniform_done_%=\n\t" + "\n" + "L_kyber_arm32_rej_uniform_fail_7_%=: \n\t" + "subs %[rLen], %[rLen], #12\n\t" + "bgt L_kyber_arm32_rej_uniform_loop_%=\n\t" + "\n" + "L_kyber_arm32_rej_uniform_done_%=: \n\t" + "lsr r0, r12, #1\n\t" + : [p] "+r" (p), [len] "+r" (len), [r] "+r" (r), [rLen] "+r" (rLen), + [L_kyber_arm32_basemul_mont_zetas] "+r" (L_kyber_arm32_basemul_mont_zetas_c) + : + : "memory", "cc", "r12", "lr", "r5", "r6", "r7", "r8" + ); + return (uint32_t)(size_t)p; +} + +#endif /* WOLFSSL_WC_KYBER */ +#endif /* !__aarch64__ && __arm__ && !__thumb__ */ +#endif /* WOLFSSL_ARMASM */ +#endif /* !defined(__aarch64__) && defined(__arm__) && !defined(__thumb__) */ +#endif /* WOLFSSL_ARMASM */ + +#endif /* WOLFSSL_ARMASM_INLINE */ diff --git a/wolfcrypt/src/port/arm/armv8-32-poly1305-asm.S b/wolfcrypt/src/port/arm/armv8-32-poly1305-asm.S index d7225828f..63409c849 100644 --- a/wolfcrypt/src/port/arm/armv8-32-poly1305-asm.S +++ b/wolfcrypt/src/port/arm/armv8-32-poly1305-asm.S @@ -218,7 +218,7 @@ L_poly1305_arm32_16_loop: # Load length ldr r2, [sp, #20] # Reduce mod 2^130 - 5 - bic r3, r8, #3 + bic r3, r8, #0x3 and r8, r8, #3 adds r4, r4, r3 lsr r3, r3, #2 diff --git a/wolfcrypt/src/port/arm/armv8-32-poly1305-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-poly1305-asm_c.c index da604101b..b1985b9a2 100644 --- a/wolfcrypt/src/port/arm/armv8-32-poly1305-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-poly1305-asm_c.c @@ -243,7 +243,7 @@ void poly1305_blocks_arm32_16(Poly1305* ctx_p, const byte* m_p, word32 len_p, /* Load length */ "ldr %[len], [sp, #20]\n\t" /* Reduce mod 2^130 - 5 */ - "bic %[notLast], r8, #3\n\t" + "bic %[notLast], r8, #0x3\n\t" "and r8, r8, #3\n\t" "adds r4, r4, %[notLast]\n\t" "lsr %[notLast], %[notLast], #2\n\t" diff --git a/wolfcrypt/src/wc_kyber_poly.c b/wolfcrypt/src/wc_kyber_poly.c index 8c8c97dd8..ef38d8cd6 100644 --- a/wolfcrypt/src/wc_kyber_poly.c +++ b/wolfcrypt/src/wc_kyber_poly.c @@ -182,7 +182,7 @@ const sword16 zetas_inv[KYBER_N / 2] = { "SSUB16 " #a ", " #a ", r10\n\t" -#if !(defined(__thumb__) || (defined(__aarch64__)) && defined(WOLFSSL_ARMASM)) +#if !defined(WOLFSSL_ARMASM) /* Number-Theoretic Transform. * * @param [in, out] r Polynomial to transform. @@ -2154,7 +2154,7 @@ int kyber_kdf(byte* seed, int seedLen, byte* out, int outLen) } #endif -#if !(defined(WOLFSSL_ARMASM) && (defined(__aarch64__) || defined(__thumb__))) +#if !defined(WOLFSSL_ARMASM) /* Rejection sampling on uniform random bytes to generate uniform random * integers mod q. * @@ -3350,7 +3350,7 @@ int kyber_cmp(const byte* a, const byte* b, int sz) /******************************************************************************/ -#if !(defined(__thumb__) || (defined(__aarch64__)) && defined(WOLFSSL_ARMASM)) +#if !defined(WOLFSSL_ARMASM) /* Conditional subtraction of q to each coefficient of a polynomial. * @@ -3371,10 +3371,14 @@ static KYBER_NOINLINE void kyber_csubq_c(sword16* p) #define kyber_csubq_c kyber_csubq_neon -#else +#elif defined(__thumb__) #define kyber_csubq_c kyber_thumb2_csubq +#else + +#define kyber_csubq_c kyber_arm32_csubq + #endif /******************************************************************************/ diff --git a/wolfssl/wolfcrypt/wc_kyber.h b/wolfssl/wolfcrypt/wc_kyber.h index 354fc8b01..71d3fe73b 100644 --- a/wolfssl/wolfcrypt/wc_kyber.h +++ b/wolfssl/wolfcrypt/wc_kyber.h @@ -320,12 +320,28 @@ WOLFSSL_LOCAL void kyber_to_msg_neon(byte* msg, sword16* p); WOLFSSL_LOCAL void kyber_thumb2_ntt(sword16* r); WOLFSSL_LOCAL void kyber_thumb2_invntt(sword16* r); WOLFSSL_LOCAL void kyber_thumb2_basemul_mont(sword16* r, const sword16* a, - const sword16* b); + const sword16* b); WOLFSSL_LOCAL void kyber_thumb2_basemul_mont_add(sword16* r, const sword16* a, - const sword16* b); + const sword16* b); WOLFSSL_LOCAL void kyber_thumb2_csubq(sword16* p); WOLFSSL_LOCAL unsigned int kyber_thumb2_rej_uniform(sword16* p, unsigned int len, const byte* r, unsigned int rLen); +#elif defined(WOLFSSL_ARMASM) +#define kyber_ntt kyber_arm32_ntt +#define kyber_invntt kyber_arm32_invntt +#define kyber_basemul_mont kyber_arm32_basemul_mont +#define kyber_basemul_mont_add kyber_arm32_basemul_mont_add +#define kyber_rej_uniform_c kyber_arm32_rej_uniform + +WOLFSSL_LOCAL void kyber_arm32_ntt(sword16* r); +WOLFSSL_LOCAL void kyber_arm32_invntt(sword16* r); +WOLFSSL_LOCAL void kyber_arm32_basemul_mont(sword16* r, const sword16* a, + const sword16* b); +WOLFSSL_LOCAL void kyber_arm32_basemul_mont_add(sword16* r, const sword16* a, + const sword16* b); +WOLFSSL_LOCAL void kyber_arm32_csubq(sword16* p); +WOLFSSL_LOCAL unsigned int kyber_arm32_rej_uniform(sword16* p, unsigned int len, + const byte* r, unsigned int rLen); #endif #ifdef __cplusplus