diff --git a/native/com_wolfssl_WolfSSLSession.h b/native/com_wolfssl_WolfSSLSession.h index 12a4cd4..95b6dda 100644 --- a/native/com_wolfssl_WolfSSLSession.h +++ b/native/com_wolfssl_WolfSSLSession.h @@ -7,6 +7,10 @@ #ifdef __cplusplus extern "C" { #endif +#undef com_wolfssl_WolfSSLSession_MAX_POOL_SIZE +#define com_wolfssl_WolfSSLSession_MAX_POOL_SIZE 32L +#undef com_wolfssl_WolfSSLSession_BUFFER_SIZE +#define com_wolfssl_WolfSSLSession_BUFFER_SIZE 17408L /* * Class: com_wolfssl_WolfSSLSession * Method: newSSL diff --git a/src/java/com/wolfssl/WolfSSLSession.java b/src/java/com/wolfssl/WolfSSLSession.java index 3d7b44a..62a72c5 100644 --- a/src/java/com/wolfssl/WolfSSLSession.java +++ b/src/java/com/wolfssl/WolfSSLSession.java @@ -30,6 +30,7 @@ import java.net.SocketTimeoutException; import java.lang.StringBuilder; import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; +import java.util.concurrent.ConcurrentLinkedQueue; /** * Wraps a native WolfSSL session object and contains methods directly related @@ -108,6 +109,74 @@ public class WolfSSLSession { /* lock around native WOLFSSL pointer use */ private final Object sslLock = new Object(); + /* Maximum direct ByteBuffer pool size */ + private static final int MAX_POOL_SIZE = 32; + + /* Size of each direct ByteBuffer in the pool. This is set to 17KB, which + * is slightly larger than the maximum SSL record size (16KB). This + * allows for some overhead (SSL record header, etc) */ + private static final int BUFFER_SIZE = 17 * 1024; + + /* Thread-local direct ByteBuffer pool for optimized JNI direct memory + * access. Passing byte[] and offset down to JNI, on some systems this + * will cause unaligned memory access, with pointer addition + * (buffer + offset). Unaligned memory access can be considerably slower + * (ex: Aarch64). To avoid this, we use a thread-local pool of ByteBuffers + * here so native JNI does not do unaligned memory access and to eliminate + * cross-thread contention. */ + private static final ThreadLocal> directBufferPool = + ThreadLocal.withInitial(() -> new ConcurrentLinkedQueue<>()); + + /** + * Get a DirectByteBuffer from the thread-local pool or allocate a new one + * if the pool is empty. + * + * @return a direct ByteBuffer ready to use + */ + private static ByteBuffer acquireDirectBuffer() { + ConcurrentLinkedQueue threadPool = directBufferPool.get(); + ByteBuffer buffer = threadPool.poll(); + if (buffer == null) { + WolfSSLDebug.log(WolfSSLSession.class, WolfSSLDebug.Component.JNI, + WolfSSLDebug.INFO, 0, + () -> "Thread-local DirectByteBuffer pool empty, " + + "allocating new buffer"); + buffer = ByteBuffer.allocateDirect(BUFFER_SIZE); + } else { + WolfSSLDebug.log(WolfSSLSession.class, WolfSSLDebug.Component.JNI, + WolfSSLDebug.INFO, 0, + () -> "Reusing DirectByteBuffer from thread-local pool, " + + "pool size: " + threadPool.size()); + buffer.clear(); + } + return buffer; + } + + /** + * Return a DirectByteBuffer to the thread-local pool for reuse. + * + * If the pool is full, the ByteBuffer will be garbage collected. + * + * @param buffer the buffer to return to the pool + */ + private static void releaseDirectBuffer(ByteBuffer buffer) { + + if (buffer != null && buffer.isDirect()) { + + buffer.clear(); + ConcurrentLinkedQueue threadPool = + directBufferPool.get(); + + if (threadPool.size() < MAX_POOL_SIZE) { + WolfSSLDebug.log(WolfSSLSession.class, + WolfSSLDebug.Component.JNI, WolfSSLDebug.INFO, 0, + () -> "Returning DirectByteBuffer to thread-local pool, " + + "pool size: " + threadPool.size()); + threadPool.offer(buffer); + } + } + } + /* SNI requested by this WolfSSLSession if client side and useSNI() * was called successfully. */ private byte[] clientSNIRequested = null; @@ -1118,38 +1187,7 @@ public class WolfSSLSession { public int read(byte[] data, int sz) throws IllegalStateException, SocketTimeoutException, SocketException { - final int ret; - final int err; - final int readSz = sz; - long localPtr; - - confirmObjectIsActive(); - - /* Fix for Infer scan, since not synchronizing on sslLock for - * access to this.sslPtr, see note below */ - synchronized (sslLock) { - localPtr = this.sslPtr; - } - - WolfSSLDebug.log(getClass(), WolfSSLDebug.Component.JNI, - WolfSSLDebug.INFO, localPtr, () -> "entered read(sz: " + - readSz + ")"); - - /* not synchronizing on sslLock here since JNI read() locks - * session mutex around native wolfSSL_read() call. If sslLock - * is locked here, since we call select() inside native JNI we - * could timeout waiting for corresponding write() operation to - * occur if needed */ - ret = read(localPtr, data, 0, readSz, 0); - err = getError(ret); - - WolfSSLDebug.log(getClass(), WolfSSLDebug.Component.JNI, - WolfSSLDebug.INFO, localPtr, - () -> "read() ret: " + ret + ", err: " + err); - - throwExceptionFromIOReturnValue(ret, "wolfSSL_read()"); - - return ret; + return read(data, 0, sz, 0); } /** @@ -1236,12 +1274,14 @@ public class WolfSSLSession { public int read(byte[] data, int offset, int sz, int timeout) throws IllegalStateException, SocketTimeoutException, SocketException { - final int ret; - final int err; + int ret; + int err; + int readSz = sz; final int readOff = offset; - final int readSz = sz; + final int tmpReadSz = sz; final int readTimeout = timeout; long localPtr; + ByteBuffer directBuffer = null; confirmObjectIsActive(); @@ -1253,20 +1293,64 @@ public class WolfSSLSession { WolfSSLDebug.log(getClass(), WolfSSLDebug.Component.JNI, WolfSSLDebug.INFO, localPtr, - () -> "entered read(offset: " + readOff + ", sz: " + readSz + + () -> "entered read(offset: " + readOff + ", sz: " + tmpReadSz + ", timeout: " + readTimeout + ")"); - /* not synchronizing on sslLock here since JNI read() locks - * session mutex around native wolfSSL_read() call. If sslLock - * is locked here, since we call select() inside native JNI we - * could timeout waiting for corresponding write() operation to - * occur if needed */ - ret = read(localPtr, data, readOff, readSz, readTimeout); - err = getError(ret); + /* Use a DirectByteBuffer from the pool to avoid unaligned + * memory access. Otherwise our native JNI code may need to + * do "buffer + offset" and end up with unaligned memory which + * can be slow on some targets (ex: ARM/Aarch64) */ + try { + /* Get a buffer from the pool */ + directBuffer = acquireDirectBuffer(); + WolfSSLDebug.log(getClass(), + WolfSSLDebug.Component.JNI, WolfSSLDebug.INFO, localPtr, + () -> "read() using thread-local ByteBuffer pool: pool size: " + + directBufferPool.get().size()); + + /* Only read up to the size of the buffer or readSz, + * whichever is smaller. */ + readSz = Math.min(readSz, directBuffer.capacity()); + + /* Use direct buffer for JNI call */ + directBuffer.limit(readSz); + + /* Call native read with DirectByteBuffer */ + ret = read(localPtr, directBuffer, 0, readSz, false, + readSz, readTimeout); + + if (ret > 0) { + /* Copy data from direct buffer to user array */ + directBuffer.flip(); + directBuffer.get(data, offset, ret); + } + + err = getError(ret); + + } catch (Exception e) { + + WolfSSLDebug.log(getClass(), WolfSSLDebug.Component.JNI, + WolfSSLDebug.INFO, localPtr, + () -> "read() falling back to use byte[]"); + + /* Fall back to original implementation on errors */ + ret = read(localPtr, data, readOff, readSz, readTimeout); + err = getError(ret); + + } finally { + + /* Return buffer to pool */ + if (directBuffer != null) { + releaseDirectBuffer(directBuffer); + } + } + + final int finalRet = ret; + final int finalErr = err; WolfSSLDebug.log(getClass(), WolfSSLDebug.Component.JNI, WolfSSLDebug.INFO, localPtr, - () -> "read() ret: " + ret + ", err: " + err); + () -> "read() ret: " + finalRet + ", err: " + finalErr); throwExceptionFromIOReturnValue(ret, "wolfSSL_read()");