Merge pull request #63 from drowe67/drowe67-patch-1

Repo Status
Update README.md
2025-01-20 06:19:54 +10:30 · 2023-09-05 05:29:39 +09:30 · 2023-09-04 17:35:14 +09:30 · 2023-08-31 18:32:08 -07:00 · 2023-09-01 05:50:10 +09:00 · 2023-08-31 20:13:57 +09:00
5 changed files with 10 additions and 216 deletions
--- a/README.md
+++ b/README.md
@ -1,3 +1,10 @@
 # Status
 Note: This repo is not being actively maintained by the authors as the FreeDV 2020 mode for which it was developed does not perform well on HF channels and is not widely used. You are welcome to use this repo as is, or fork it, but regretably we are unavailable to assist with Issues or considering Pull Requests.  It has been an interesting experiment which we have now completed.  The only exceptions are:
 * Fixing any build problems on supported platforms (x86/ARM) and OSes (Linux/macOS and MinGW for Windows).
 * If you can demonstrate use of the code in this repo that we are not aware of.
 # LPCNet for FreeDV
 Experimental version of LPCNet that has been used to develop FreeDV 2020 - a HF radio Digital Voice mode for over the air experimentation with Neural Net speech coding.  Possibly the first use of Neural Net speech coding in real world operation.
--- a/src/nnet.c
+++ b/src/nnet.c
@ -41,10 +41,8 @@
 #define SOFTMAX_HACK
-#ifdef __AVX__
+#if defined(__AVX__) || (__SSE__)
 #include "vec_avx.h"
 #elif __SSE__
 #include "vec_sse.h"
 #elif __ARM_NEON__ || __aarch64__
 #include "vec_neon.h"
 #else
--- a/src/test_vec.c
+++ b/src/test_vec.c
@ -29,7 +29,7 @@ const char simd[]="AVX2";
 const char simd[]="AVX";
 #endif
 #elif __SSE__
-#include "vec_sse.h"
+#include "vec_avx.h"
 const char simd[]="SSE";
 #elif __ARM_NEON__ || __aarch64__
 #include "vec_neon.h"
--- a/src/vec_avx.h
+++ b/src/vec_avx.h
@ -81,7 +81,7 @@ static inline void mm256_storeu_ps(float *dst, mm256_emu src) {
 #define _mm256_storeu_ps(dst, src) mm256_storeu_ps(dst, src)
-static inline mm256_emu mm256_setzero_ps() {
+static inline mm256_emu mm256_setzero_ps(void) {
  mm256_emu ret;
  ret.lo = _mm_setzero_ps();
  ret.hi = ret.lo;
--- a/src/vec_sse.h
+++ b/src/vec_sse.h
@ -1,211 +0,0 @@
 /* Copyright (c) 2020 SASANO Takayoshi
                 2018 David Rowe
                 2018 Mozilla
                 2008-2011 Octasic Inc.
                 2012-2017 Jean-Marc Valin */
 /*
   Redistribution and use in source and binary forms, with or without
   modification, are permitted provided that the following conditions
   are met:
   - Redistributions of source code must retain the above copyright
   notice, this list of conditions and the following disclaimer.
   - Redistributions in binary form must reproduce the above copyright
   notice, this list of conditions and the following disclaimer in the
   documentation and/or other materials provided with the distribution.
   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 /*
  SSE implementation of vector operations, compile with -msse
  port from Arm NEON support
 */
 #include <xmmintrin.h>
 #ifndef LPCNET_TEST
 static float celt_exp2(float x)
 {
    int integer;
    float frac;
    union {
 	float f;
 	opus_uint32 i;
    } res;
    integer = floor(x);
    if (integer < -50)
 	return 0;
    frac = x-integer;
    /* K0 = 1, K1 = log(2), K2 = 3-4*log(2), K3 = 3*log(2) - 2 */
    res.f = 0.99992522f + frac * (0.69583354f
 				  + frac * (0.22606716f + 0.078024523f*frac));
    res.i = (res.i + (integer<<23)) & 0x7fffffff;
    return res.f;
 }
 #define celt_exp_sse(x) celt_exp2((x)*1.44269504f)
 static float tansig_approx(float x)
 {
    int i;
    float y, dy;
    float sign=1;
    /* Tests are reversed to catch NaNs */
    if (!(x<8))
        return 1;
    if (!(x>-8))
        return -1;
 #ifndef FIXED_POINT
    /* Another check in case of -ffast-math */
    if (celt_isnan(x))
 	return 0;
 #endif
    if (x<0)
    {
 	x=-x;
 	sign=-1;
    }
    i = (int)floor(.5f+25*x);
    x -= .04f*i;
    y = tansig_table[i];
    dy = 1-y*y;
    y = y + x*dy*(1 - y*x);
    return sign*y;
 }
 static OPUS_INLINE float sigmoid_approx(float x)
 {
    return .5f + .5f*tansig_approx(.5f*x);
 }
 static void softmax(float *y, const float *x, int N)
 {
    int i;
    for (i=0;i<N;i++)
        y[i] = celt_exp_sse(x[i]);
 }
 static void vec_tanh(float *y, const float *x, int N)
 {
    int i;
    for (i=0;i<N;i++)
    {
        y[i] = tansig_approx(x[i]);
    }
 }
 static void vec_sigmoid(float *y, const float *x, int N)
 {
    int i;
    for (i=0;i<N;i++)
    {
        y[i] = sigmoid_approx(x[i]);
    }
 }
 #endif
 static void sgemv_accum16(float *out, const float *weights, int rows, int cols, int col_stride, const float *x)
 {
    int i, j;
    for (i=0;i<rows;i+=16)
    {
 	float * restrict y = &out[i];
 	/* keep y[0..15] in registers for duration of inner loop */
 	__m128 y0_3 = _mm_loadu_ps(&y[0]);
 	__m128 y4_7 = _mm_loadu_ps(&y[4]);
 	__m128 y8_11 = _mm_loadu_ps(&y[8]);
 	__m128 y12_15 = _mm_loadu_ps(&y[12]);
 	for (j=0;j<cols;j++)
 	{
 	    const float * restrict w;
 	    __m128 wvec0_3, wvec4_7, wvec8_11, wvec12_15;
 	    __m128 xj = _mm_set1_ps(x[j]);
 	    w = &weights[j*col_stride + i];
 	    wvec0_3 = _mm_loadu_ps(&w[0]);
 	    wvec4_7 = _mm_loadu_ps(&w[4]);
 	    wvec8_11 = _mm_loadu_ps(&w[8]);
 	    wvec12_15 = _mm_loadu_ps(&w[12]);
 	    wvec0_3 = _mm_mul_ps(wvec0_3, xj);
 	    wvec4_7 = _mm_mul_ps(wvec4_7, xj);
 	    wvec8_11 = _mm_mul_ps(wvec8_11, xj);
 	    wvec12_15 = _mm_mul_ps(wvec12_15, xj);
 	    y0_3 = _mm_add_ps(y0_3, wvec0_3);
 	    y4_7 = _mm_add_ps(y4_7, wvec4_7);
 	    y8_11 = _mm_add_ps(y8_11, wvec8_11);
 	    y12_15 = _mm_add_ps(y12_15, wvec12_15);
 	}
 	/* save y[0..15] back to memory */
 	_mm_storeu_ps(&y[0], y0_3);
 	_mm_storeu_ps(&y[4], y4_7);
 	_mm_storeu_ps(&y[8], y8_11);
 	_mm_storeu_ps(&y[12], y12_15);
    }
 }
 static void sparse_sgemv_accum16(float *out, const float *w, int rows, const int *idx, const float *x)
 {
    int i, j;
    for (i=0;i<rows;i+=16)
    {
 	int cols;
 	cols = *idx++;
 	float * restrict y = &out[i];
 	/* keep y[0..15] in registers for duration of inner loop */
 	__m128 y0_3 = _mm_loadu_ps(&y[0]);
 	__m128 y4_7 = _mm_loadu_ps(&y[4]);
 	__m128 y8_11 = _mm_loadu_ps(&y[8]);
 	__m128 y12_15 = _mm_loadu_ps(&y[12]);
 	for (j=0;j<cols;j++)
 	{
 	    __m128 wvec;
 	    __m128 xj = _mm_set1_ps(x[*idx++]);
 	    wvec = _mm_loadu_ps(&w[0]);
 	    wvec = _mm_mul_ps(wvec, xj);
 	    y0_3 = _mm_add_ps(y0_3, wvec);
 	    wvec = _mm_loadu_ps(&w[4]);
 	    wvec = _mm_mul_ps(wvec, xj);
 	    y4_7 = _mm_add_ps(y4_7, wvec);
 	    wvec = _mm_loadu_ps(&w[8]);
 	    wvec = _mm_mul_ps(wvec, xj);
 	    y8_11 = _mm_add_ps(y8_11, wvec);
 	    wvec = _mm_loadu_ps(&w[12]);
 	    wvec = _mm_mul_ps(wvec, xj);
 	    y12_15 = _mm_add_ps(y12_15, wvec);
 	    w += 16;
 	}
 	/* save y[0..15] back to memory */
 	_mm_storeu_ps(&y[0], y0_3);
 	_mm_storeu_ps(&y[4], y4_7);
 	_mm_storeu_ps(&y[8], y8_11);
 	_mm_storeu_ps(&y[12], y12_15);
    }
 }
Author	SHA1	Message	Date
drowe67	c8e51ac5e2	Merge pull request #63 from drowe67/drowe67-patch-1 Repo Status	2025-01-20 06:19:54 +10:30
drowe67	023e19278d	Update README.md Adding a few exceptions	2023-09-05 05:29:39 +09:30
drowe67	5d75aae3c0	Notice around status of this repo	2023-09-04 17:35:14 +09:30
Mooneer Salem	255b9f4aab	Merge pull request #60 from jg1uaa/master deprectate vec_sse.h	2023-08-31 18:32:08 -07:00
SASANO Takayoshi	97a0df10fc	cherry-pick xiph/LPCNet 42a8a649d5d43457193195e5e9b4e83e75629e8c	2023-09-01 05:50:10 +09:00
SASANO Takayoshi	c0f30d843a	cherry-pick xiph/LPCNet 55d6f1b5eb9902301ecbef9e726303d252a620b7	2023-08-31 20:13:57 +09:00
SASANO Takayoshi	172a033a3b	remove vec_sse.h and use vec_avx.h for SSE(4.1) support	2023-08-31 20:13:18 +09:00