mirror of https://github.com/wolfSSL/wolfssl.git
Espressif benchmark update
parent
2c9208b0c6
commit
4bd78e5e31
|
@ -49,6 +49,10 @@
|
||||||
*
|
*
|
||||||
* set the default devId for cryptocb to the value instead of INVALID_DEVID
|
* set the default devId for cryptocb to the value instead of INVALID_DEVID
|
||||||
* WC_USE_DEVID=0x1234
|
* WC_USE_DEVID=0x1234
|
||||||
|
*
|
||||||
|
* Turn on benchmark timing debugging (CPU Cycles, RTOS ticks, etc)
|
||||||
|
* DEBUG_WOLFSSL_BENCHMARK_TIMING
|
||||||
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
@ -226,6 +230,7 @@
|
||||||
|
|
||||||
/* default units per second. See WOLFSSL_BENCHMARK_FIXED_UNITS_* to change */
|
/* default units per second. See WOLFSSL_BENCHMARK_FIXED_UNITS_* to change */
|
||||||
#define WOLFSSL_FIXED_UNIT "MB" /* may be re-set by fixed units */
|
#define WOLFSSL_FIXED_UNIT "MB" /* may be re-set by fixed units */
|
||||||
|
#define MILLION_VALUE 1000000.0
|
||||||
|
|
||||||
#ifdef BENCH_MICROSECOND
|
#ifdef BENCH_MICROSECOND
|
||||||
#define WOLFSSL_FIXED_TIME_UNIT "μs"
|
#define WOLFSSL_FIXED_TIME_UNIT "μs"
|
||||||
|
@ -296,20 +301,46 @@
|
||||||
#endif /* WOLFSSL_NO_FLOAT_FMT */
|
#endif /* WOLFSSL_NO_FLOAT_FMT */
|
||||||
|
|
||||||
#ifdef WOLFSSL_ESPIDF
|
#ifdef WOLFSSL_ESPIDF
|
||||||
#if defined(CONFIG_IDF_TARGET_ESP32C3) || defined(CONFIG_IDF_TARGET_ESP32C6)
|
#ifdef configTICK_RATE_HZ
|
||||||
|
/* Define CPU clock cycles per tick of FreeRTOS clock
|
||||||
|
* CONFIG_ESP_DEFAULT_CPU_FREQ_MHZ is typically a value like 240
|
||||||
|
* configTICK_RATE_HZ is typically 100 or 1000.
|
||||||
|
**/
|
||||||
|
#define CPU_TICK_CYCLES ( \
|
||||||
|
(CONFIG_ESP_DEFAULT_CPU_FREQ_MHZ * MILLION_VALUE) \
|
||||||
|
/ configTICK_RATE_HZ \
|
||||||
|
)
|
||||||
|
#endif
|
||||||
|
#if defined(CONFIG_IDF_TARGET_ESP32C2)
|
||||||
#include "driver/gptimer.h"
|
#include "driver/gptimer.h"
|
||||||
static gptimer_handle_t esp_gptimer = NULL;
|
static gptimer_handle_t esp_gptimer = NULL;
|
||||||
static gptimer_config_t esp_timer_config = {
|
static gptimer_config_t esp_timer_config = {
|
||||||
.clk_src = GPTIMER_CLK_SRC_DEFAULT,
|
.clk_src = GPTIMER_CLK_SRC_DEFAULT,
|
||||||
.direction = GPTIMER_COUNT_UP,
|
.direction = GPTIMER_COUNT_UP,
|
||||||
.resolution_hz = CONFIG_XTAL_FREQ * 1000000,
|
.resolution_hz = CONFIG_XTAL_FREQ * 100000,
|
||||||
};
|
};
|
||||||
|
#elif defined(CONFIG_IDF_TARGET_ESP32C3) || \
|
||||||
|
defined(CONFIG_IDF_TARGET_ESP32C6)
|
||||||
|
#include <esp_cpu.h>
|
||||||
|
#include "driver/gptimer.h"
|
||||||
|
#ifdef WOLFSSL_BENCHMARK_TIMER_DEBUG
|
||||||
|
#define RESOLUTION_SCALE 100
|
||||||
|
static gptimer_handle_t esp_gptimer = NULL;
|
||||||
|
static gptimer_config_t esp_timer_config = {
|
||||||
|
.clk_src = GPTIMER_CLK_SRC_DEFAULT,
|
||||||
|
.direction = GPTIMER_COUNT_UP,
|
||||||
|
.resolution_hz = CONFIG_ESP_DEFAULT_CPU_FREQ_MHZ * (MILLION_VALUE / RESOLUTION_SCALE), /* CONFIG_XTAL_FREQ = 40, CONFIG_ESP_DEFAULT_CPU_FREQ_MHZ = 160 */
|
||||||
|
};
|
||||||
|
#endif /* WOLFSSL_BENCHMARK_TIMER_DEBUG */
|
||||||
|
|
||||||
#elif defined(CONFIG_IDF_TARGET_ESP32) || \
|
#elif defined(CONFIG_IDF_TARGET_ESP32) || \
|
||||||
defined(CONFIG_IDF_TARGET_ESP32S2) || \
|
defined(CONFIG_IDF_TARGET_ESP32S2) || \
|
||||||
defined(CONFIG_IDF_TARGET_ESP32S3)
|
defined(CONFIG_IDF_TARGET_ESP32S3)
|
||||||
#include <xtensa/hal.h>
|
#include <xtensa/hal.h>
|
||||||
|
#elif defined(CONFIG_IDF_TARGET_ESP32H2)
|
||||||
|
|
||||||
#else
|
#else
|
||||||
#error "CONFIG_IDF_TARGET not implemented"
|
|
||||||
#endif
|
#endif
|
||||||
#include <esp_log.h>
|
#include <esp_log.h>
|
||||||
#endif /* WOLFSSL_ESPIDF */
|
#endif /* WOLFSSL_ESPIDF */
|
||||||
|
@ -1212,32 +1243,57 @@ static const char* bench_result_words3[][5] = {
|
||||||
(void)XSNPRINTF(b + XSTRLEN(b), n - XSTRLEN(b), FLT_FMT_PREC ",\n", \
|
(void)XSNPRINTF(b + XSTRLEN(b), n - XSTRLEN(b), FLT_FMT_PREC ",\n", \
|
||||||
FLT_FMT_PREC_ARGS(6, (double)total_cycles / (count*s)))
|
FLT_FMT_PREC_ARGS(6, (double)total_cycles / (count*s)))
|
||||||
#elif defined(WOLFSSL_ESPIDF)
|
#elif defined(WOLFSSL_ESPIDF)
|
||||||
static THREAD_LS_T word64 begin_cycles;
|
|
||||||
static THREAD_LS_T word64 total_cycles;
|
|
||||||
|
|
||||||
/* the return value */
|
|
||||||
static THREAD_LS_T word64 _xthal_get_ccount_ex = 0;
|
|
||||||
|
|
||||||
/* the last value seen, adjusted for an overflow */
|
|
||||||
static THREAD_LS_T word64 _xthal_get_ccount_last = 0;
|
|
||||||
|
|
||||||
/* TAG for ESP_LOGx() */
|
/* TAG for ESP_LOGx() */
|
||||||
static const char* TAG = "wolfssl_benchmark";
|
static const char* TAG = "wolfssl_benchmark";
|
||||||
|
|
||||||
#define HAVE_GET_CYCLES
|
static THREAD_LS_T word64 begin_cycles;
|
||||||
#define INIT_CYCLE_COUNTER
|
static THREAD_LS_T word64 begin_cycles_ticks;
|
||||||
static WC_INLINE word64 get_xtensa_cycles(void);
|
static THREAD_LS_T word64 end_cycles;
|
||||||
|
static THREAD_LS_T word64 total_cycles;
|
||||||
|
|
||||||
/* WARNING the hal UINT xthal_get_ccount() quietly rolls over. */
|
/* the return value, as a global var */
|
||||||
#define BEGIN_ESP_CYCLES begin_cycles = (get_xtensa_cycles());
|
static THREAD_LS_T word64 _esp_get_cycle_count_ex = 0;
|
||||||
|
|
||||||
|
/* the last value seen, adjusted for an overflow, as a global var */
|
||||||
|
static THREAD_LS_T word64 _esp_cpu_count_last = 0;
|
||||||
|
|
||||||
|
static THREAD_LS_T TickType_t last_tickCount = 0; /* last FreeRTOS value */
|
||||||
|
|
||||||
|
/* esp_get_cpu_benchmark_cycles(void):
|
||||||
|
*
|
||||||
|
* Architecture-independant CPU clock counter.
|
||||||
|
* WARNING: the hal UINT xthal_get_ccount() quietly rolls over. */
|
||||||
|
static WC_INLINE word64 esp_get_cpu_benchmark_cycles(void);
|
||||||
|
|
||||||
|
/* Some vars for debugging, compare ticks to cycles */
|
||||||
|
#ifdef WOLFSSL_BENCHMARK_TIMER_DEBUG
|
||||||
|
static THREAD_LS_T word64 _esp_cpu_timer_last = 0;
|
||||||
|
static THREAD_LS_T word64 _esp_cpu_timer_diff = 0;
|
||||||
|
static THREAD_LS_T word64 _xthal_get_ccount_exAlt = 0;
|
||||||
|
static THREAD_LS_T word64 _xthal_get_ccount_exDiff = 0;
|
||||||
|
#endif /* WOLFSSL_BENCHMARK_TIMER_DEBUG */
|
||||||
|
|
||||||
|
/* The ESP32 (both Xtensa and RISC-V have raw CPU counters). */
|
||||||
|
#define HAVE_GET_CYCLES
|
||||||
|
#define INIT_CYCLE_COUNTER do { \
|
||||||
|
ESP_LOGV(TAG, "INIT_CYCLE_COUNTER"); \
|
||||||
|
esp_cpu_set_cycle_count(0); \
|
||||||
|
} while (0);
|
||||||
|
|
||||||
|
#define BEGIN_ESP_CYCLES do { \
|
||||||
|
ESP_LOGV(TAG, "BEGIN_ESP_CYCLES"); \
|
||||||
|
begin_cycles = esp_get_cpu_benchmark_cycles(); \
|
||||||
|
begin_cycles_ticks = xTaskGetTickCount(); \
|
||||||
|
} while (0);
|
||||||
|
|
||||||
/* since it rolls over, we have something that will tolerate one */
|
/* since it rolls over, we have something that will tolerate one */
|
||||||
#define END_ESP_CYCLES \
|
#define END_ESP_CYCLES \
|
||||||
ESP_LOGV(TAG,"%llu - %llu", \
|
end_cycles = esp_get_cpu_benchmark_cycles(); \
|
||||||
get_xtensa_cycles(), \
|
ESP_LOGV(TAG,"END_ESP_CYCLES %llu - %llu", \
|
||||||
|
end_cycles, \
|
||||||
begin_cycles \
|
begin_cycles \
|
||||||
); \
|
); \
|
||||||
total_cycles = (get_xtensa_cycles() - begin_cycles);
|
total_cycles = (end_cycles - begin_cycles);
|
||||||
|
|
||||||
#define SHOW_ESP_CYCLES(b, n, s) \
|
#define SHOW_ESP_CYCLES(b, n, s) \
|
||||||
(void)XSNPRINTF(b + XSTRLEN(b), n - XSTRLEN(b), \
|
(void)XSNPRINTF(b + XSTRLEN(b), n - XSTRLEN(b), \
|
||||||
|
@ -1250,29 +1306,86 @@ static const char* bench_result_words3[][5] = {
|
||||||
(void)XSNPRINTF(b + XSTRLEN(b), n - XSTRLEN(b), FLT_FMT_PREC ",\n", \
|
(void)XSNPRINTF(b + XSTRLEN(b), n - XSTRLEN(b), FLT_FMT_PREC ",\n", \
|
||||||
FLT_FMT_PREC_ARGS(6, (double)total_cycles / (count*s)))
|
FLT_FMT_PREC_ARGS(6, (double)total_cycles / (count*s)))
|
||||||
|
|
||||||
/* xthal_get_ccount_ex() is a single-overflow-tolerant extension to
|
#ifdef WOLFSSL_BENCHMARK_TIMER_DEBUG
|
||||||
** the Espressif `unsigned xthal_get_ccount()` which is known to overflow
|
/* 64 bit, unisgned, absolute difference
|
||||||
|
* used in CPU cycle counter debug calcs. */
|
||||||
|
static uint64_t esp_cycle_abs_diff(uint64_t x, uint64_t y)
|
||||||
|
{
|
||||||
|
uint64_t ret;
|
||||||
|
ret = (x > y) ? (x - y) : (y - x);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* esp_get_cycle_count_ex() is a single-overflow-tolerant extension to
|
||||||
|
** the Espressif `unsigned xthal_get_ccount()` (Xtensa) or
|
||||||
|
** `esp_cpu_get_cycle_count` (RISC-V) which are known to overflow
|
||||||
** at least once during full benchmark tests.
|
** at least once during full benchmark tests.
|
||||||
|
**
|
||||||
|
** To test timing overflow, add a delay longer than max cycles:
|
||||||
|
** vTaskDelay( (const TickType_t)(configTICK_RATE_HZ * 17 * 5) );
|
||||||
*/
|
*/
|
||||||
uint64_t xthal_get_ccount_ex()
|
uint64_t esp_get_cycle_count_ex()
|
||||||
{
|
{
|
||||||
/* reminder: unsigned long long max = 18,446,744,073,709,551,615 */
|
/* reminder: unsigned long long max = 18,446,744,073,709,551,615 */
|
||||||
|
/* unsigned int max = 4,294,967,295 */
|
||||||
/* the currently observed clock counter value */
|
uint64_t thisVal = 0; /* CPU counter, "this current value" as read. */
|
||||||
#if defined(CONFIG_IDF_TARGET_ESP32C3) || defined(CONFIG_IDF_TARGET_ESP32C6)
|
uint64_t thisIncrement = 0; /* The adjusted increment amount. */
|
||||||
uint64_t thisVal = 0;
|
uint64_t expected_diff = 0; /* FreeRTOS esimated expected CPU diff. */
|
||||||
ESP_ERROR_CHECK(gptimer_get_raw_count(esp_gptimer, &thisVal));
|
#ifdef DEBUG_WOLFSSL_BENCHMARK_TIMING
|
||||||
#else
|
uint32_t tickCount = 0; /* Currrent rtos tick counter. */
|
||||||
/* reminder unsupported CONFIG_IDF_TARGET captured above */
|
uint32_t tickDiff = 0; /* Tick difference from last check. */
|
||||||
uint64_t thisVal = xthal_get_ccount();
|
uint32_t tickBeginDiff = 0; /* Tick difference from beginning. */
|
||||||
#endif
|
#endif
|
||||||
/* if the current value is less than the previous value,
|
|
||||||
** we likely overflowed at least once.
|
#if defined(CONFIG_IDF_TARGET_ESP32C2) || \
|
||||||
*/
|
defined(CONFIG_IDF_TARGET_ESP32C3) || \
|
||||||
if (thisVal < _xthal_get_ccount_last)
|
defined(CONFIG_IDF_TARGET_ESP32C6)
|
||||||
|
|
||||||
|
#ifdef WOLFSSL_BENCHMARK_TIMER_DEBUG
|
||||||
|
uint64_t thisTimerVal = 0; /* Timer Value as alternate to compare */
|
||||||
|
uint64_t diffDiff = 0; /* Difference between CPU & Timer differences:
|
||||||
|
* (current - last) */
|
||||||
|
ESP_ERROR_CHECK(gptimer_get_raw_count(esp_gptimer, &thisTimerVal));
|
||||||
|
thisTimerVal = thisTimerVal * RESOLUTION_SCALE;
|
||||||
|
#endif /* WOLFSSL_BENCHMARK_TIMER_DEBUG */
|
||||||
|
|
||||||
|
thisVal = esp_cpu_get_cycle_count();
|
||||||
|
|
||||||
|
#elif defined(CONFIG_IDF_TARGET_ESP32H2)
|
||||||
|
thisVal = esp_cpu_get_cycle_count();
|
||||||
|
#else
|
||||||
|
/* TODO: Why doesn't esp_cpu_get_cycle_count work for Xtensa?
|
||||||
|
* Calling current_time(1) to reset time causes thisVal overflow,
|
||||||
|
* on Xtensa, but not on RISC-V architecture. See also, below */
|
||||||
|
#ifndef __XTENSA__
|
||||||
|
thisVal = esp_cpu_get_cycle_count();
|
||||||
|
#else
|
||||||
|
thisVal = xthal_get_ccount(); /* or esp_cpu_get_cycle_count(); */
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef DEBUG_WOLFSSL_BENCHMARK_TIMING
|
||||||
{
|
{
|
||||||
/* Warning: we assume the return type of xthal_get_ccount()
|
tickCount = xTaskGetTickCount(); /* Our local FreeRTOS tick count */
|
||||||
** will always be unsigned int to add UINT_MAX.
|
tickDiff = tickCount - last_tickCount; /* ticks since bench start */
|
||||||
|
expected_diff = CPU_TICK_CYCLES * tickDiff; /* CPU expected count */
|
||||||
|
ESP_LOGV(TAG, "CPU_TICK_CYCLES = %d", (int)CPU_TICK_CYCLES);
|
||||||
|
ESP_LOGV(TAG, "tickCount = %lu", tickCount);
|
||||||
|
ESP_LOGV(TAG, "last_tickCount = %lu", last_tickCount);
|
||||||
|
ESP_LOGV(TAG, "tickDiff = %lu", tickDiff);
|
||||||
|
ESP_LOGV(TAG, "expected_diff1 = %llu", expected_diff);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* If either thisVal is smaller than last (overflow), and/or the
|
||||||
|
* expected value calculated from FreeRTOS tick difference that would
|
||||||
|
* have never fit into an unsigned 32 bit integer anyhow... then we
|
||||||
|
* need to adjust thisVal to save. */
|
||||||
|
if ( (thisVal < _esp_cpu_count_last) || (expected_diff > UINT_MAX) )
|
||||||
|
{
|
||||||
|
/* Warning: we assume the return type of esp_cpu_get_cycle_count()
|
||||||
|
** will always be unsigned int (or uint32_t) to add UINT_MAX.
|
||||||
**
|
**
|
||||||
** NOTE for long duration between calls with multiple overflows:
|
** NOTE for long duration between calls with multiple overflows:
|
||||||
**
|
**
|
||||||
|
@ -1283,22 +1396,109 @@ static const char* bench_result_words3[][5] = {
|
||||||
** as well call xthal_get_ccount_ex() with no more than one
|
** as well call xthal_get_ccount_ex() with no more than one
|
||||||
** overflow CPU tick count, all will be well.
|
** overflow CPU tick count, all will be well.
|
||||||
*/
|
*/
|
||||||
ESP_LOGV(TAG, "Alert: Detected xthal_get_ccount overflow, "
|
#ifdef DEBUG_WOLFSSL_BENCHMARK_TIMING
|
||||||
"adding %ull", UINT_MAX);
|
ESP_LOGW(TAG,
|
||||||
thisVal += (word64)UINT_MAX;
|
"Alert: Detected xthal_get_ccount overflow at %llu, "
|
||||||
|
"adding UINT_MAX.",
|
||||||
|
thisVal);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* double check expected diff calc */
|
||||||
|
#ifdef DEBUG_WOLFSSL_BENCHMARK_TIMING
|
||||||
|
expected_diff = (CONFIG_ESP_DEFAULT_CPU_FREQ_MHZ * MILLION_VALUE)
|
||||||
|
* tickDiff / configTICK_RATE_HZ;
|
||||||
|
ESP_LOGI(TAG, "expected_diff2 = %llu", expected_diff);
|
||||||
|
#endif
|
||||||
|
if (expected_diff > UINT_MAX) {
|
||||||
|
/* The number of cycles expected from FreeRTOS ticks is
|
||||||
|
* greater than the maximum size of an unsigned 32-bit
|
||||||
|
* integer, meaning multiple overflows occured. */
|
||||||
|
#ifdef DEBUG_WOLFSSL_BENCHMARK_TIMING
|
||||||
|
ESP_LOGW(TAG, "expected_diff > UINT_MAX (%u)", UINT_MAX);
|
||||||
|
#endif
|
||||||
|
thisVal += expected_diff; /* FreeRTOS calc to our 64 bit val */
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
thisVal += (word64)UINT_MAX; /* add 32 bit max to our 64 bit */
|
||||||
}
|
}
|
||||||
|
|
||||||
/* adjust our actual returned value that takes into account overflow */
|
#ifdef DEBUG_WOLFSSL_BENCHMARK_TIMING
|
||||||
_xthal_get_ccount_ex += (thisVal - _xthal_get_ccount_last);
|
{
|
||||||
|
tickBeginDiff = tickCount - begin_cycles_ticks;
|
||||||
|
|
||||||
/* all of this took some time, so reset the "last seen" value */
|
ESP_LOGI(TAG, "begin_cycles_ticks = %llu", begin_cycles_ticks);
|
||||||
#if defined(CONFIG_IDF_TARGET_ESP32C3) || defined(CONFIG_IDF_TARGET_ESP32C6)
|
ESP_LOGI(TAG, "tickDiff = %lu", tickDiff);
|
||||||
ESP_ERROR_CHECK(gptimer_get_raw_count(esp_gptimer,
|
ESP_LOGI(TAG, "expected_diff = %llu", expected_diff);
|
||||||
&_xthal_get_ccount_last));
|
ESP_LOGI(TAG, "tickBeginDiff = %lu", tickBeginDiff);
|
||||||
#else
|
|
||||||
_xthal_get_ccount_last = xthal_get_ccount();
|
ESP_LOGW(TAG, "");
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
return _xthal_get_ccount_ex;
|
}
|
||||||
|
else {
|
||||||
|
#ifdef DEBUG_WOLFSSL_BENCHMARK_TIMING
|
||||||
|
ESP_LOGI(TAG, "thisVal, read CPU = %llu", thisVal);
|
||||||
|
#endif
|
||||||
|
} /* if thisVal adjustment check */
|
||||||
|
|
||||||
|
#ifdef WOLFSSL_BENCHMARK_TIMER_DEBUG
|
||||||
|
if (thisTimerVal < _esp_cpu_timer_last)
|
||||||
|
{
|
||||||
|
ESP_LOGW(TAG, "Alert: Detected xthal_get_ccountAlt overflow, "
|
||||||
|
"adding %ull", UINT_MAX);
|
||||||
|
thisTimerVal += (word64)UINT_MAX;
|
||||||
|
}
|
||||||
|
/* Check an alternate counter using a timer */
|
||||||
|
|
||||||
|
_esp_cpu_timer_diff = esp_cycle_abs_diff(_esp_cpu_count_last, _esp_cpu_timer_last);
|
||||||
|
#endif /* WOLFSSL_BENCHMARK_TIMER_DEBUG */
|
||||||
|
|
||||||
|
/* Adjust our actual returned value that takes into account overflow,
|
||||||
|
* increment 64 bit extended total by this 32 bit differential: */
|
||||||
|
thisIncrement = (thisVal - _esp_cpu_count_last);
|
||||||
|
|
||||||
|
#ifdef DEBUG_WOLFSSL_BENCHMARK_TIMING
|
||||||
|
ESP_LOGI(TAG, "thisIncrement = %llu", thisIncrement);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Add our adjustment, taking into account overflows (see above) */
|
||||||
|
_esp_get_cycle_count_ex += thisIncrement;
|
||||||
|
|
||||||
|
#ifdef WOLFSSL_BENCHMARK_TIMER_DEBUG
|
||||||
|
_xthal_get_ccount_exDiff = esp_cycle_abs_diff(_esp_get_cycle_count_ex, _xthal_get_ccount_exAlt);
|
||||||
|
_xthal_get_ccount_exAlt += (thisTimerVal - _esp_cpu_timer_last);
|
||||||
|
diffDiff = esp_cycle_abs_diff(_xthal_get_ccount_exDiff, _esp_cpu_timer_diff);
|
||||||
|
#endif /* WOLFSSL_BENCHMARK_TIMER_DEBUG */
|
||||||
|
|
||||||
|
/* all of this took some time, so reset the "last seen" value
|
||||||
|
* for the next measurement. */
|
||||||
|
#if defined(CONFIG_IDF_TARGET_ESP32C2) || \
|
||||||
|
defined(CONFIG_IDF_TARGET_ESP32C3) || \
|
||||||
|
defined(CONFIG_IDF_TARGET_ESP32C6)
|
||||||
|
{
|
||||||
|
#ifdef WOLFSSL_BENCHMARK_TIMER_DEBUG
|
||||||
|
ESP_ERROR_CHECK(gptimer_get_raw_count(esp_gptimer,
|
||||||
|
&_esp_cpu_timer_last));
|
||||||
|
ESP_LOGI(TAG, "thisVal = %llu", thisVal);
|
||||||
|
ESP_LOGI(TAG, "thisTimerVal = %llu", thisTimerVal);
|
||||||
|
ESP_LOGI(TAG, "diffDiff = %llu", diffDiff);
|
||||||
|
ESP_LOGI(TAG, "_xthal_get_ccount_exDiff = %llu", _xthal_get_ccount_exDiff);
|
||||||
|
#endif /* WOLFSSL_BENCHMARK_TIMER_DEBUG */
|
||||||
|
_esp_cpu_count_last = esp_cpu_get_cycle_count();
|
||||||
|
ESP_LOGV(TAG, "_xthal_get_ccount_last = %llu", _esp_cpu_count_last);
|
||||||
|
}
|
||||||
|
#elif defined(CONFIG_IDF_TARGET_ESP32H2)
|
||||||
|
_esp_cpu_count_last = esp_cpu_get_cycle_count();
|
||||||
|
#else
|
||||||
|
/* TODO: Why doesn't esp_cpu_get_cycle_count work for Xtensa
|
||||||
|
* when resetting CPU cycle counter? FreeRTOS tick collison?
|
||||||
|
* thisVal = esp_cpu_get_cycle_count(); See also, above
|
||||||
|
* or thisVal = xthal_get_ccount(); */
|
||||||
|
_esp_cpu_count_last = esp_cpu_get_cycle_count();
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Return the 64 bit extended total from 32 bit counter. */
|
||||||
|
return _esp_get_cycle_count_ex;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* implement other architecture cycle counters here */
|
/* implement other architecture cycle counters here */
|
||||||
|
@ -1911,9 +2111,10 @@ static WC_INLINE void bench_stats_start(int* count, double* start)
|
||||||
*start = current_time(1);
|
*start = current_time(1);
|
||||||
|
|
||||||
#ifdef WOLFSSL_ESPIDF
|
#ifdef WOLFSSL_ESPIDF
|
||||||
ESP_LOGV(TAG, "finish total_cycles = %llu, start=" FLT_FMT,
|
#ifdef DEBUG_WOLFSSL_BENCHMARK_TIMING
|
||||||
|
ESP_LOGI(TAG, "bench_stats_start total_cycles = %llu, start=" FLT_FMT,
|
||||||
total_cycles, FLT_FMT_ARGS(*start) );
|
total_cycles, FLT_FMT_ARGS(*start) );
|
||||||
|
#endif
|
||||||
BEGIN_ESP_CYCLES
|
BEGIN_ESP_CYCLES
|
||||||
#else
|
#else
|
||||||
BEGIN_INTEL_CYCLES
|
BEGIN_INTEL_CYCLES
|
||||||
|
@ -1931,11 +2132,23 @@ static WC_INLINE void bench_stats_start(int* count, double* start)
|
||||||
|
|
||||||
static WC_INLINE int bench_stats_check(double start)
|
static WC_INLINE int bench_stats_check(double start)
|
||||||
{
|
{
|
||||||
return ((current_time(0) - start) < BENCH_MIN_RUNTIME_SEC
|
int ret = 0;
|
||||||
|
double this_current_time;
|
||||||
|
this_current_time = current_time(0); /* get the timestamp, no reset */
|
||||||
|
#if defined(DEBUG_WOLFSSL_BENCHMARK_TIMING)
|
||||||
|
#if (WOLFSSL_ESPIDF)
|
||||||
|
ESP_LOGI(TAG, "bench_stats_check Current time %f, start %f",
|
||||||
|
this_current_time, start );
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
ret = ((this_current_time - start) < BENCH_MIN_RUNTIME_SEC
|
||||||
#ifdef BENCH_MICROSECOND
|
#ifdef BENCH_MICROSECOND
|
||||||
* 1000000
|
* 1000000
|
||||||
#endif
|
#endif
|
||||||
);
|
);
|
||||||
|
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* return text for units and scale the value of blocks as needed */
|
/* return text for units and scale the value of blocks as needed */
|
||||||
|
@ -2106,8 +2319,8 @@ static void bench_stats_sym_finish(const char* desc, int useDeviceID,
|
||||||
|
|
||||||
total = current_time(0) - start;
|
total = current_time(0) - start;
|
||||||
|
|
||||||
#ifdef WOLFSSL_ESPIDF
|
#if defined(WOLFSSL_ESPIDF) && defined(DEBUG_WOLFSSL_BENCHMARK_TIMING)
|
||||||
ESP_LOGV(TAG, "%s total_cycles = %llu", desc, total_cycles);
|
ESP_LOGI(TAG, "%s total_cycles = %llu", desc, total_cycles);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef LINUX_RUSAGE_UTIME
|
#ifdef LINUX_RUSAGE_UTIME
|
||||||
|
@ -2229,9 +2442,15 @@ static void bench_stats_sym_finish(const char* desc, int useDeviceID,
|
||||||
|
|
||||||
#ifdef WOLFSSL_ESPIDF
|
#ifdef WOLFSSL_ESPIDF
|
||||||
SHOW_ESP_CYCLES_CSV(msg, sizeof(msg), countSz);
|
SHOW_ESP_CYCLES_CSV(msg, sizeof(msg), countSz);
|
||||||
ESP_LOGV(TAG, "finish total_cycles = %llu", total_cycles);
|
#ifdef DEBUG_WOLFSSL_BENCHMARK_TIMING
|
||||||
|
ESP_LOGI(TAG, "bench_stats_sym_finish total_cycles = %llu",
|
||||||
|
total_cycles);
|
||||||
|
#endif
|
||||||
|
|
||||||
/* implement other cycle counters here */
|
/* implement other cycle counters here */
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
/* the default cycle counter is Intel */
|
||||||
SHOW_INTEL_CYCLES_CSV(msg, sizeof(msg), (unsigned)countSz);
|
SHOW_INTEL_CYCLES_CSV(msg, sizeof(msg), (unsigned)countSz);
|
||||||
#endif
|
#endif
|
||||||
} /* if (csv_format == 1) */
|
} /* if (csv_format == 1) */
|
||||||
|
@ -11950,20 +12169,93 @@ void bench_sphincsKeySign(byte level, byte optim)
|
||||||
int construct_argv();
|
int construct_argv();
|
||||||
extern char* __argv[22];
|
extern char* __argv[22];
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* current_time(reset)
|
||||||
|
*
|
||||||
|
* Benchmark passage of time, in fractional seconds.
|
||||||
|
* [reset] is non zero to adjust timer or counter to zero
|
||||||
|
*
|
||||||
|
* Use care when repeatedly calling calling. See implementation. */
|
||||||
double current_time(int reset)
|
double current_time(int reset)
|
||||||
{
|
{
|
||||||
|
double ret;
|
||||||
#if ESP_IDF_VERSION_MAJOR >= 4
|
#if ESP_IDF_VERSION_MAJOR >= 4
|
||||||
TickType_t tickCount;
|
TickType_t tickCount; /* typically 32 bit, local FreeRTOS ticks */
|
||||||
#else
|
#else
|
||||||
portTickType tickCount;
|
portTickType tickCount;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
(void) reset;
|
#if defined(__XTENSA__)
|
||||||
|
(void)reset;
|
||||||
|
|
||||||
|
if (reset) {
|
||||||
|
/* TODO: Determine a mechanism for reset that does not interfere
|
||||||
|
* with freeRTOS tick. Using this code for Xtensa appears to cause
|
||||||
|
* RTOS tick timer to stick. See "last_tickCount unchanged".
|
||||||
|
ESP_LOGW(TAG, "Current_time() reset!");
|
||||||
|
portTICK_TYPE_ENTER_CRITICAL();
|
||||||
|
{
|
||||||
|
esp_cpu_set_cycle_count((esp_cpu_cycle_count_t)0);
|
||||||
|
_esp_cpu_count_last = xthal_get_ccount();
|
||||||
|
_esp_cpu_count_last = esp_cpu_get_cycle_count();
|
||||||
|
}
|
||||||
|
portTICK_TYPE_EXIT_CRITICAL();
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
/* Only reset the CPU counter for RISC-V */
|
||||||
|
if (reset) {
|
||||||
|
ESP_LOGV(TAG, "current_time() reset!");
|
||||||
|
/* TODO: why does Espressif esp_cpu_get_cycle_count() cause
|
||||||
|
* unexpected rollovers in return values for Xtensa but not RISC-V?
|
||||||
|
* See also esp_get_cycle_count_ex() */
|
||||||
|
#ifdef __XTENSA__
|
||||||
|
_esp_cpu_count_last = xthal_get_ccount();
|
||||||
|
#else
|
||||||
|
esp_cpu_set_cycle_count((esp_cpu_cycle_count_t)0);
|
||||||
|
_esp_cpu_count_last = esp_cpu_get_cycle_count();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/* tick count == ms, if configTICK_RATE_HZ is set to 1000 */
|
/* tick count == ms, if configTICK_RATE_HZ is set to 1000 */
|
||||||
tickCount = xTaskGetTickCount();
|
tickCount = xTaskGetTickCount(); /* RTOS ticks, not CPU cycles!
|
||||||
return (double)tickCount / 1000;
|
The count of ticks since vTaskStartScheduler was called,
|
||||||
|
typiclly in app_startup.c */
|
||||||
|
|
||||||
|
#ifdef DEBUG_WOLFSSL_BENCHMARK_TIMING
|
||||||
|
ESP_LOGV(TAG, "tickCount = %lu", tickCount);
|
||||||
|
if (tickCount == last_tickCount) {
|
||||||
|
ESP_LOGW(TAG, "last_tickCount unchanged? %lu", tickCount);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
if (tickCount < last_tickCount) {
|
||||||
|
ESP_LOGW(TAG, "last_tickCount overflow?");
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (reset) {
|
||||||
|
#ifdef DEBUG_WOLFSSL_BENCHMARK_TIMING
|
||||||
|
ESP_LOGW(TAG, "Assign last_tickCount = %lu", tickCount);
|
||||||
|
#endif
|
||||||
|
last_tickCount = tickCount;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
#ifdef DEBUG_WOLFSSL_BENCHMARK_TIMING
|
||||||
|
ESP_LOGW(TAG, "No Reset last_tickCount = %lu", tickCount);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(configTICK_RATE_HZ) && defined(CONFIG_FREERTOS_HZ)
|
||||||
|
ret = (double)tickCount / configTICK_RATE_HZ;
|
||||||
|
#else
|
||||||
|
ESP_LOGW(TAG, "Warning: configTICK_RATE_HZ not defined,"
|
||||||
|
"assuming 1000 Hz.");
|
||||||
|
ret = (double)(tickCount / 1000.0);
|
||||||
|
#endif /* configTICK_RATE_HZ */
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
} /* current_time */
|
||||||
|
|
||||||
#elif defined (WOLFSSL_TIRTOS)
|
#elif defined (WOLFSSL_TIRTOS)
|
||||||
|
|
||||||
|
@ -12002,7 +12294,7 @@ void bench_sphincsKeySign(byte level, byte optim)
|
||||||
{
|
{
|
||||||
double time_now;
|
double time_now;
|
||||||
double current_s = OS_GetTime() / 1000.0;
|
double current_s = OS_GetTime() / 1000.0;
|
||||||
double current_us = OS_GetTime_us() / 1000000.0;
|
double current_us = OS_GetTime_us() / MILLION_VALUE;
|
||||||
time_now = (double)( current_s + current_us);
|
time_now = (double)( current_s + current_us);
|
||||||
|
|
||||||
(void) reset;
|
(void) reset;
|
||||||
|
@ -12129,20 +12421,20 @@ void bench_sphincsKeySign(byte level, byte optim)
|
||||||
* outside wolfcrypt.
|
* outside wolfcrypt.
|
||||||
*/
|
*/
|
||||||
return (double)rusage.ru_utime.tv_sec +
|
return (double)rusage.ru_utime.tv_sec +
|
||||||
(double)rusage.ru_utime.tv_usec / 1000000.0;
|
(double)rusage.ru_utime.tv_usec / MILLION_VALUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void check_for_excessive_stime(const char *desc,
|
static void check_for_excessive_stime(const char *desc,
|
||||||
const char *desc_extra)
|
const char *desc_extra)
|
||||||
{
|
{
|
||||||
double start_utime = (double)base_rusage.ru_utime.tv_sec +
|
double start_utime = (double)base_rusage.ru_utime.tv_sec +
|
||||||
(double)base_rusage.ru_utime.tv_usec / 1000000.0;
|
(double)base_rusage.ru_utime.tv_usec / MILLION_VALUE;
|
||||||
double start_stime = (double)base_rusage.ru_stime.tv_sec +
|
double start_stime = (double)base_rusage.ru_stime.tv_sec +
|
||||||
(double)base_rusage.ru_stime.tv_usec / 1000000.0;
|
(double)base_rusage.ru_stime.tv_usec / MILLION_VALUE;
|
||||||
double cur_utime = (double)cur_rusage.ru_utime.tv_sec +
|
double cur_utime = (double)cur_rusage.ru_utime.tv_sec +
|
||||||
(double)cur_rusage.ru_utime.tv_usec / 1000000.0;
|
(double)cur_rusage.ru_utime.tv_usec / MILLION_VALUE;
|
||||||
double cur_stime = (double)cur_rusage.ru_stime.tv_sec +
|
double cur_stime = (double)cur_rusage.ru_stime.tv_sec +
|
||||||
(double)cur_rusage.ru_stime.tv_usec / 1000000.0;
|
(double)cur_rusage.ru_stime.tv_usec / MILLION_VALUE;
|
||||||
double stime_utime_ratio =
|
double stime_utime_ratio =
|
||||||
(cur_stime - start_stime) / (cur_utime - start_utime);
|
(cur_stime - start_stime) / (cur_utime - start_utime);
|
||||||
if (stime_utime_ratio > .1)
|
if (stime_utime_ratio > .1)
|
||||||
|
@ -12185,9 +12477,12 @@ void bench_sphincsKeySign(byte level, byte optim)
|
||||||
#if defined(HAVE_GET_CYCLES)
|
#if defined(HAVE_GET_CYCLES)
|
||||||
|
|
||||||
#if defined(WOLFSSL_ESPIDF)
|
#if defined(WOLFSSL_ESPIDF)
|
||||||
static WC_INLINE word64 get_xtensa_cycles(void)
|
/* Generic CPU cycle counter for either Xtensa or RISC-V */
|
||||||
|
static WC_INLINE word64 esp_get_cpu_benchmark_cycles(void)
|
||||||
{
|
{
|
||||||
return xthal_get_ccount_ex();
|
/* Reminder for long duration between calls with
|
||||||
|
* multiple overflows will not be detected. */
|
||||||
|
return esp_get_cycle_count_ex();
|
||||||
}
|
}
|
||||||
|
|
||||||
/* implement other architectures here */
|
/* implement other architectures here */
|
||||||
|
@ -12390,25 +12685,18 @@ static int string_matches(const char* arg, const char* str)
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
|
/* Code for main() or wolf_benchmark_task() */
|
||||||
#ifdef WOLFSSL_ESPIDF
|
#ifdef WOLFSSL_ESPIDF
|
||||||
int argc = construct_argv();
|
int argc = construct_argv();
|
||||||
char** argv = (char**)__argv;
|
char** argv = (char**)__argv;
|
||||||
|
|
||||||
#if defined(CONFIG_IDF_TARGET_ESP32C3) || defined(CONFIG_IDF_TARGET_ESP32C6)
|
|
||||||
ESP_ERROR_CHECK(gptimer_new_timer(&esp_timer_config, &esp_gptimer));
|
|
||||||
ESP_LOGI(TAG, "Enable ESP32-C3 timer ");
|
|
||||||
ESP_ERROR_CHECK(gptimer_enable(esp_gptimer));
|
|
||||||
ESP_ERROR_CHECK(gptimer_start(esp_gptimer));
|
|
||||||
#endif
|
|
||||||
#elif defined(MAIN_NO_ARGS)
|
#elif defined(MAIN_NO_ARGS)
|
||||||
int argc = 0;
|
int argc = 0;
|
||||||
char** argv = NULL;
|
char** argv = NULL;
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return wolfcrypt_benchmark_main(argc, argv);
|
return wolfcrypt_benchmark_main(argc, argv);
|
||||||
}
|
}
|
||||||
#endif /* NO_MAIN_DRIVER && NO_MAIN_FUNCTION */
|
#endif /* !NO_MAIN_DRIVER && !NO_MAIN_FUNCTION */
|
||||||
|
|
||||||
int wolfcrypt_benchmark_main(int argc, char** argv)
|
int wolfcrypt_benchmark_main(int argc, char** argv)
|
||||||
{
|
{
|
||||||
|
@ -12671,6 +12959,22 @@ int wolfcrypt_benchmark_main(int argc, char** argv)
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
|
#if defined(CONFIG_IDF_TARGET_ESP32C2) || \
|
||||||
|
defined(CONFIG_IDF_TARGET_ESP32C3) || \
|
||||||
|
defined(CONFIG_IDF_TARGET_ESP32C6)
|
||||||
|
{
|
||||||
|
#ifdef WOLFSSL_BENCHMARK_TIMER_DEBUG
|
||||||
|
if (esp_gptimer == NULL) {
|
||||||
|
ESP_ERROR_CHECK(gptimer_new_timer(&esp_timer_config,
|
||||||
|
&esp_gptimer) );
|
||||||
|
}
|
||||||
|
ESP_ERROR_CHECK(gptimer_enable(esp_gptimer));
|
||||||
|
ESP_ERROR_CHECK(gptimer_start(esp_gptimer));
|
||||||
|
ESP_LOGI(TAG, "Enable %s timer", CONFIG_IDF_TARGET);
|
||||||
|
#endif /* WOLFSSL_BENCHMARK_TIMER_DEBUG */
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef HAVE_STACK_SIZE
|
#ifdef HAVE_STACK_SIZE
|
||||||
ret = StackSizeCheck(NULL, benchmark_test);
|
ret = StackSizeCheck(NULL, benchmark_test);
|
||||||
#else
|
#else
|
||||||
|
@ -12678,6 +12982,17 @@ int wolfcrypt_benchmark_main(int argc, char** argv)
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(CONFIG_IDF_TARGET_ESP32C2) || \
|
||||||
|
defined(CONFIG_IDF_TARGET_ESP32C3) || \
|
||||||
|
defined(CONFIG_IDF_TARGET_ESP32C6)
|
||||||
|
{
|
||||||
|
#ifdef WOLFSSL_BENCHMARK_TIMER_DEBUG
|
||||||
|
ESP_ERROR_CHECK(gptimer_stop(esp_gptimer));
|
||||||
|
ESP_ERROR_CHECK(gptimer_disable(esp_gptimer));
|
||||||
|
#endif /* WOLFSSL_BENCHMARK_TIMER_DEBUG */
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
#endif /* !NO_MAIN_DRIVER */
|
#endif /* !NO_MAIN_DRIVER */
|
||||||
|
|
Loading…
Reference in New Issue