direct-split integrated into lpcnet_enc/lpcnet_dec, and it is indeed better than the previous pred scheme with bit errors. Stands up to 1% BER OK, few pops and crackles

2019-03-23 19:54:39 +10:30 · 2019-03-23 19:54:39 +10:30 · 998cacc7e6
parent 80e9c3821c
commit 998cacc7e6
4 changed files with 57 additions and 21 deletions
--- a/README.md
+++ b/README.md
@ -24,7 +24,7 @@ Clone this repo, then:
 $ make
 ```

-The quantiser files these experiments (pred_v2.tgz and split.tgz) are [here](http://rowetel.com/downloads/deep/lpcnet_quant)
+The quantiser files used for these experiments (pred_v2.tgz and split.tgz) are [here](http://rowetel.com/downloads/deep/lpcnet_quant)

 ## Exploring Features

@ -118,7 +118,7 @@ Four stage VQ of log magnitudes (Ly), 11 bits (2048 entries) per stage, First 3
 sox ~/Desktop/deep/quant/wia.wav -t raw - | ./dump_data --c2pitch --test - - | ./quant_feat -d 3 -i -p 0 --mbest 5 -q split_stage1.f32,split_stage2.f32,split_stage3.f32,split_stage4.f32 | ./test_lpcnet - - | aplay -f S16_LE -r 16000
 ```

-Four stage VQ of Cepstrals (DCT of Ly), 11 bits (2048 entries) per stage, 18 element wide vectors.  We quantise the predictor output.
+Compare this to four stage predictive VQ of Cepstrals (DCT of Ly), 11 bits (2048 entries) per stage, 18 element wide vectors.  We quantise the predictor output.

 ```
 sox ~/Desktop/deep/quant/wia.wav -t raw -  | ./dump_data --c2pitch --test - - | ./quant_feat -d 3 -w --mbest 5 -q pred_v2_stage1.f32,pred_v2_stage2.f32,pred_v2_stage3.f32,pred_v2_stage4.f32 | ./test_lpcnet - - | aplay -f S16_LE -r 16000
--- a/src/lpcnet_dec.c
+++ b/src/lpcnet_dec.c
@ -44,9 +44,13 @@
 #undef NB_FEATURES 
 #include "lpcnet.h"

+// Two sorts of VQs available
 extern int   pred_num_stages;
 extern float pred_vq[MAX_STAGES*NB_BANDS*MAX_ENTRIES];
 extern int   pred_m[MAX_STAGES];
+extern int   direct_split_num_stages;
+extern float direct_split_vq[MAX_STAGES*NB_BANDS*MAX_ENTRIES];
+extern int   direct_split_m[MAX_STAGES];

 int main(int argc, char **argv) {
    FILE *fin, *fout;
@ -60,23 +64,27 @@ int main(int argc, char **argv) {
    int   pitch_bits = 6;
    float ber = 0.0;
    int   num_stages = pred_num_stages;
+    int   *m = pred_m;
+    float *vq = pred_vq;
+    int   logmag = 0;
    
    /* quantiser options */
    
    static struct option long_options[] = {
-        {"ber",        required_argument, 0, 'b'},
-        {"decimate",   required_argument, 0, 'd'},
-        {"numstages",  required_argument, 0, 'n'},
-        {"pitchquant", required_argument, 0, 'o'},
-        {"pred",       required_argument, 0, 'p'},
-        {"verbose",    no_argument,       0, 'v'},
+        {"ber",         required_argument, 0, 'b'},
+        {"decimate",    required_argument, 0, 'd'},
+        {"numstages",   required_argument, 0, 'n'},
+        {"pitchquant",  required_argument, 0, 'o'},
+        {"pred",        required_argument, 0, 'p'},
+        {"directsplit", no_argument,       0, 's'},
+        {"verbose",     no_argument,       0, 'v'},
        {0, 0, 0, 0}
    };

    int   c;
    int opt_index = 0;

-    while ((c = getopt_long (argc, argv, "b:d:n:o:p:v", long_options, &opt_index)) != -1) {
+    while ((c = getopt_long (argc, argv, "b:d:n:o:p:sv", long_options, &opt_index)) != -1) {
        switch (c) {
        case 'b':
            ber = atof(optarg);
@ -98,6 +106,10 @@ int main(int argc, char **argv) {
            pred = atof(optarg);
            fprintf(stderr, "pred = %f\n", pred);
            break;
+        case 's':
+            m = direct_split_m; vq = direct_split_vq; pred = 0.0; logmag = 1; weight = 1.0;
+            fprintf(stderr, "split VQ\n");
+            break;
        case 'v':
            lpcnet_verbose = 1;
            break;
@ -107,13 +119,13 @@ int main(int argc, char **argv) {
            fprintf(stderr,"  [-d --decimation 1/2/3...]\n");
            fprintf(stderr,"  [-n --numstages]\n  [-o --pitchbits nBits]\n");
            fprintf(stderr,"  [-p --pred predCoff]\n");
+            fprintf(stderr,"  [-s --split]\n");
            fprintf(stderr,"  [-v --verbose]\n");
            exit(1);
        }
    }

-
-    LPCNET_QUANT *q = lpcnet_quant_create(num_stages, pred_m, pred_vq);
+    LPCNET_QUANT *q = lpcnet_quant_create(num_stages, m, vq);
    q->weight = weight; q->pred = pred; q->mbest = mbest_survivors;
    q->pitch_bits = pitch_bits; q->dec = dec;
    lpcnet_quant_compute_bits_per_frame(q);
@ -129,7 +141,7 @@ int main(int argc, char **argv) {

    fin = stdin;
    fout = stdout;
-    int nbits = 0, nerrs = 0;
+    int nbits = 0, nerrs = 0, i;

    do {
        float in_features[NB_TOTAL_FEATURES];
@ -151,6 +163,12 @@ int main(int argc, char **argv) {
            
        }
        lpcnet_frame_to_features(q, frame, in_features);
+        /* optionally log magnitudes convert back to cepstrals */
+        if (logmag) {
+            float tmp[NB_BANDS];
+            dct(tmp, in_features);
+            for(i=0; i<NB_BANDS; i++) in_features[i] = tmp[i];
+       }
       
        RNN_COPY(features, in_features, NB_TOTAL_FEATURES);
        RNN_CLEAR(&features[18], 18);
--- a/src/lpcnet_enc.c
+++ b/src/lpcnet_enc.c
@ -39,9 +39,13 @@
 #include "lpcnet_dump.h"
 #include "lpcnet_quant.h"

+// Two sorts of VQs available
 extern int   pred_num_stages;
 extern float pred_vq[MAX_STAGES*NB_BANDS*MAX_ENTRIES];
 extern int   pred_m[MAX_STAGES];
+extern int   direct_split_num_stages;
+extern float direct_split_vq[MAX_STAGES*NB_BANDS*MAX_ENTRIES];
+extern int   direct_split_m[MAX_STAGES];

 int main(int argc, char **argv) {
    FILE *fin, *fout;
@ -54,22 +58,26 @@ int main(int argc, char **argv) {
    float weight = 1.0/sqrt(NB_BANDS);    
    int   pitch_bits = 6;
    int   num_stages = pred_num_stages;
+    int   *m = pred_m;
+    float *vq = pred_vq;
+    int   logmag = 0;
    
    /* quantiser options */
    
    static struct option long_options[] = {
-        {"decimate",   required_argument, 0, 'd'},
-        {"numstages",  required_argument, 0, 'n'},
-        {"pitchquant", required_argument, 0, 'o'},
-        {"pred",       required_argument, 0, 'p'},
-        {"verbose",    no_argument,       0, 'v'},
+        {"decimate",     required_argument, 0, 'd'},
+        {"numstages",    required_argument, 0, 'n'},
+        {"pitchquant",   required_argument, 0, 'o'},
+        {"pred",         required_argument, 0, 'p'},
+        {"directsplit",  no_argument,       0, 's'},
+        {"verbose",      no_argument,       0, 'v'},
        {0, 0, 0, 0}
    };

    int   c;
    int opt_index = 0;

-    while ((c = getopt_long (argc, argv, "d:n:o:p:v", long_options, &opt_index)) != -1) {
+    while ((c = getopt_long (argc, argv, "d:n:o:p:sv", long_options, &opt_index)) != -1) {
        switch (c) {
        case 'd':
            dec = atoi(optarg);
@ -87,20 +95,24 @@ int main(int argc, char **argv) {
            pred = atof(optarg);
            fprintf(stderr, "pred = %f\n", pred);
            break;
+        case 's':
+            m = direct_split_m; vq = direct_split_vq; pred = 0.0; logmag = 1; weight = 1.0;
+            fprintf(stderr, "split VQ\n");
+            break;
        case 'v':
            lpcnet_verbose = 1;
            break;
         default:
            fprintf(stderr,"usage: %s [Options]:\n  [-d --decimation 1/2/3...]\n", argv[0]);
            fprintf(stderr,"  [-n --numstages]\n  [-o --pitchbits nBits]\n");
-            fprintf(stderr,"  [-p --pred predCoff]\n");
+            fprintf(stderr,"  [-p --pred predCoff] [-s --split]\n");
            fprintf(stderr,"  [-v --verbose]\n");
            exit(1);
        }
    }

    LPCNET_DUMP  *d = lpcnet_dump_create();
-    LPCNET_QUANT *q = lpcnet_quant_create(num_stages, pred_m, pred_vq);
+    LPCNET_QUANT *q = lpcnet_quant_create(num_stages, m, vq);
    q->weight = weight; q->pred = pred; q->mbest = mbest_survivors;
    q->pitch_bits = pitch_bits; q->dec = dec;
    lpcnet_quant_compute_bits_per_frame(q);
@ -125,6 +137,12 @@ int main(int argc, char **argv) {
        int nread = fread(&d->tmp, sizeof(short), FRAME_SIZE, fin);
        if (nread != FRAME_SIZE) break;
        lpcnet_dump(d,x,features);
+        /* optionally convert cepstrals to log magnitudes */
+        if (logmag) {
+            float tmp[NB_BANDS];
+            idct(tmp, features);
+            for(i=0; i<NB_BANDS; i++) features[i] = tmp[i];
+        }
        if (lpcnet_features_to_frame(q, features, frame))
            bits_written += fwrite(frame, sizeof(char), q->bits_per_frame, fout);       
        fflush(stdin);
--- a/src/quant_feat.c
+++ b/src/quant_feat.c
@ -417,7 +417,7 @@ int main(int argc, char *argv[]) {
        for(i=0; i<NB_BANDS; i++)
            features_out[i] *= 1/10.0;

-        /* if optionally log magnitudes convert back to cepstrals */
+        /* optionally log magnitudes convert back to cepstrals */
        if (logmag) {
            float tmp[NB_BANDS];
            dct(tmp, features_out);