adding support for split VQ

2019-03-21 18:41:39 +10:30 · 2019-03-21 18:41:39 +10:30 · 446b5afd13
parent 52a2198ad7
commit 446b5afd13
4 changed files with 125 additions and 24 deletions
--- a/9
+++ b/9
@ -21,7 +21,7 @@ CFLAGS+=-mfpu=neon -march=armv8-a -mtune=cortex-a53
 endif
 PROG=dump_data test_lpcnet test_vec quant_feat tcodec2_pitch weight tdump tweak_pitch quant_test \
-     quant2c diff32 quant_enc quant_dec lpcnet_enc lpcnet_dec
+     quant2c diff32 quant_enc quant_dec lpcnet_enc lpcnet_dec idct
 all: $(PROG)
 dump_data_objs := src/dump_data.o src/freq.o src/kiss_fft.o src/pitch.o src/celt_lpc.o src/codec2_pitch.o
@ -106,9 +106,14 @@ weight_objs := src/weight.o
 weight_deps := $(weight_objs:.o=.d)
 weight: $(weight_objs)
 	gcc -o $@ $(CFLAGS) $(weight_objs) -lm
 -include $(weight_deps)
 idct_objs := src/idct.o src/freq.o src/kiss_fft.o src/celt_lpc.o src/pitch.o
 idct_deps := $(idct_objs:.o=.d)
 idct: $(idct_objs)
 	gcc -o $@ $(CFLAGS) $(idct_objs) -lm
 -include $(idct_deps)
 tweak_pitch_objs := src/tweak_pitch.o
 tweak_pitch_deps := $(tweak_pitch_objs:.o=.d)
 tweak_pitch: $(tweak_pitch_objs)
--- a/src/idct.c
+++ b/src/idct.c
@ -0,0 +1,28 @@
 /*
  idct.c
  David Rowe Mar 2019
  inverse DCT so we can experiment with training in the Ly (log magnitude) domain.
 */
 #include <assert.h>
 #include <stdio.h>
 #include <math.h>
 #include "freq.h"
 #define NB_BANDS 18
 int main(void) {
    FILE *fin, *fout;
    float dctLy[NB_BANDS], Ly[NB_BANDS];
    fin = stdin; fout = stdout;
    int ret;
    while(fread(dctLy, sizeof(float), NB_BANDS, fin) == NB_BANDS) {
        idct(Ly, dctLy);
        ret = fwrite(Ly, sizeof(float), NB_BANDS, fout);
        assert(ret == NB_BANDS);
    }
    return 0;
 }
--- a/src/quant_feat.c
+++ b/src/quant_feat.c
@ -52,6 +52,7 @@ int main(int argc, char *argv[]) {
    FILE *fpitch = NULL;
    float Fs = 16000.0;
    float uniform_step = 0.0;
    float uniform_step2 = 0.0;
    int   mbest_survivors = 0;
    char label[80] = "";
    /* experimental limits for dctLy[0], first cepstral */
@ -62,6 +63,7 @@ int main(int argc, char *argv[]) {
    float pitch_gain_bias = 0.0;
    int   pitch_bits = 0;
    int   small_vec = 0;
    int   logmag = 0;
    for(i=0; i<MAX_STAGES*NB_BANDS*MAX_ENTRIES; i++) vq[i] = 0.0;
@ -74,19 +76,21 @@ int main(int argc, char *argv[]) {
        {"hard",       required_argument, 0, 'h'},
        {"label",      required_argument, 0, 'l'},
        {"mbest",      required_argument, 0, 'm'},
        {"mag",        required_argument, 0, 'i'},
        {"pitchquant", required_argument, 0, 'o'},
        {"pred",       required_argument, 0, 'p'},
        {"quant",      required_argument, 0, 'q'},
        {"stagevar",   required_argument, 0, 's'},
        {"uniform",    required_argument, 0, 'u'},
        {"verbose",    no_argument,       0, 'v'},
        {"uniform2",   required_argument, 0, 'x'},
        {"weight",     no_argument,       0, 'w'},
        {0, 0, 0, 0}
    };
    int opt_index = 0;
-    while ((c = getopt_long (argc, argv, "ad:q:vs:f:p:e:u:l:m:h:wg:o:", long_options, &opt_index)) != -1) {
+    while ((c = getopt_long (argc, argv, "ad:q:vs:f:p:e:u:l:m:h:wg:o:ix:", long_options, &opt_index)) != -1) {
        switch (c) {
        case 'a':
            /* small cpectral vectors - zero out several bands */
@ -120,6 +124,10 @@ int main(int argc, char *argv[]) {
            lower_limit = atof(optarg);            
            fprintf(stderr, "lower_limit: %f upper_limit: %f\n", lower_limit, upper_limit);
            break;
        case 'i':
            /* work in log mag rather than cepstral domain */
            logmag = 1;
            break;
        case 'l':
            /* text label to pront with results */
            strcpy(label, optarg);
@ -156,10 +164,13 @@ int main(int argc, char *argv[]) {
                    fprintf(stderr, "Couldn't open: %s\n", fn);
                    exit(1);
                }
                /* count how many entries m of dimension k are in this VQ file */
                m[num_stages] = 0;
-                while (fread(features, sizeof(float), k, fq) == (size_t)k) m[num_stages]++;
+                while (fread(features, sizeof(float), k, fq) == (size_t)k)
                    m[num_stages]++;
                assert(m[num_stages] <= MAX_ENTRIES);
                fprintf(stderr, "%d entries of vectors width %d\n", m[num_stages], k);
                /* now load VQ into memory */
                rewind(fq);                       
                int rd = fread(&vq[num_stages*k*MAX_ENTRIES], sizeof(float), m[num_stages]*k, fq);
                assert(rd == m[num_stages]*k);
@ -170,6 +181,11 @@ int main(int argc, char *argv[]) {
        case 'u':
            uniform_step = atof(optarg);
            fprintf(stderr, "uniform quant step size: %3.2f dB\n", uniform_step);
            uniform_step2 = uniform_step;
            break;
        case 'x':
            uniform_step2 = atof(optarg);
            fprintf(stderr, "uniform quant step size 12..17: %3.2f dB\n", uniform_step2);
            break;
        case 'v':
            lpcnet_verbose = 1;
@ -181,6 +197,7 @@ int main(int argc, char *argv[]) {
            fprintf(stderr,"usage: %s [Options]:\n  [-d --decimation 1/2/3...]\n  [-q --quant quantfile1,quantfile2,....]\n", argv[0]);
            fprintf(stderr,"  [-g --gain pitch gain bias]\n");
            fprintf(stderr,"  [-h --hard lowerLimitdB\n");
            fprintf(stderr,"  [-i --mag\n");
            fprintf(stderr,"  [-l --label txtLabel]\n");
            fprintf(stderr,"  [-m --mbest survivors]\n  [-o --pitchbits nBits]\n");
            fprintf(stderr,"  [-p --pred predCoff]\n  [-f --first firstElement]\n  [-s --stagevar TxtFile]\n");
@ -190,19 +207,14 @@ int main(int argc, char *argv[]) {
        }
    }
-    fprintf(stderr, "dec: %d pred: %3.2f num_stages: %d mbest: %d small: %d", dec, pred, num_stages, mbest_survivors, small_vec);
+    fprintf(stderr, "dec: %d pred: %3.2f num_stages: %d mbest: %d small: %d logmag: %d",
            dec, pred, num_stages, mbest_survivors, small_vec, logmag);
    fprintf(stderr, "\n");
    /* delay line so we can pass some features (like pitch and voicing) through unmodified */
    float features_prev[dec+1][NB_FEATURES];
    /* adjacent vectors used for linear interpolation, note only 0..17 and 38,39 used */
    float features_lin[2][NB_FEATURES];
    /* used for optiona smoothing of features */
    /*
    float features_mem[NB_BANDS];
    for(i=0; i<NB_BANDS; i++)
        features_mem[i] = 0.0;
    */
    for(d=0; d<dec+1; d++)
        for(i=0; i<NB_FEATURES; i++)
@ -258,6 +270,13 @@ int main(int argc, char *argv[]) {
            }
        }
        /* optionally convert cepstrals to log magnitudes */
        if (logmag) {
            float tmp[NB_BANDS];
            idct(tmp, features);
            for(i=0; i<NB_BANDS; i++) features[i] = tmp[i];
        }
        /* convert cepstrals to dB */
        for(i=0; i<NB_BANDS; i++)
            features[i] *= 10.0;
@ -324,9 +343,11 @@ int main(int argc, char *argv[]) {
                        features_quant[i] = features[i];
                }
                if (uniform_step != 0.0) {
-                    for(i=0; i<NB_BANDS; i++) {
+                    for(i=0; i<12; i++) {
                        features_quant[i] = uniform_step*round(features[i]/uniform_step);
-                        //fprintf(stderr, "%d %f %f\n", i, features[i], features_quant[i]);
+                    }
                    for(; i<NB_BANDS; i++) {
                        features_quant[i] = uniform_step2*round(features[i]/uniform_step2);
                    }
                }
            }
@ -400,6 +421,13 @@ int main(int argc, char *argv[]) {
        for(i=0; i<NB_BANDS; i++)
            features_out[i] *= 1/10.0;
        /* if optionally log magnitudes convert back to cepstrals */
        if (logmag) {
            float tmp[NB_BANDS];
            dct(tmp, features_out);
            for(i=0; i<NB_BANDS; i++) features_out[i] = tmp[i];
       }
        /* need to recompute LPCs after every frame, as we have quantised, or interpolated */
        lpc_from_cepstrum(&features_out[2*NB_BANDS+3], features_out);
--- a/train_direct.sh
+++ b/train_direct.sh
@ -1,17 +1,57 @@
 #!/bin/sh
 # train_direct.sh
 # David Rowe March 2019
 # Train multi-stage VQ direct (non predictive) for LPCNet
-PATH=/home/david/codec2-dev/build_linux/misc/
+PATH=$PATH:/home/david/codec2-dev/build_linux/misc/
-VQTRAIN=$PATH/vqtrain
+
-EXTRACT=$PATH/extract
+if [ $# -lt 1 ]; then
-VQTRAIN=/home/david/codec2-dev/build_linux/misc/vqtrain
+    echo "usage: ./train_direct.sh [-i] VQprefix"
-K=8
+    echo "       $ ./train_direct.sh direct_v1"
    echo "  -i   work in Ly (log magnitude) domain"
    exit 1
 fi
 for i in "$@"
 do
 case $i in
    -i)
        LOGMAG=1
        shift # past argument=value
    ;;
 esac
 done
 VQ_NAME=$1
 echo $VQ_NAME
 K=18
 FINAL_K=12
 STOP=1E-3
 echo "*********"
 echo "Direct"
 echo "*********"
-$EXTRACT all_speech_features.f32 all_speech_direct.f32 0 7 10 0
+t=$(mktemp)
-$VQTRAIN all_speech_direct.f32 $K 2048 direct_stage1.f32 sd1.f32
+extract -e `expr $K - 1` -g 10 all_speech_features_5e6.f32 $t 
-$VQTRAIN sd1.f32 $K 2048 direct_stage2.f32 sd2.f32
+if [ -z "$LOGMAG" ]; then
-$VQTRAIN sd2.f32 $K 2048 direct_stage3.f32 sd3.f32
+  echo "weighting dctLy[0] ...."
-$VQTRAIN sd3.f32 $K 2048 direct_stage4.f32 sd4.f32
+  cat $t | ./weight > $VQ_NAME'_s0.f32'
-$VQTRAIN sd4.f32 $K 2048 direct_stage5.f32 sd5.f32
+else
  echo "working in Ly (log magnitude) domain"
  cat $t | ./idct > $VQ_NAME'_s0.f32'
 fi
 vqtrain $VQ_NAME'_s0.f32' $K 2048 $VQ_NAME'_stage1.f32' -r $VQ_NAME'_s1.f32' -s $STOP 
 vqtrain $VQ_NAME'_s1.f32' $K 2048 $VQ_NAME'_stage2.f32' -r $VQ_NAME'_s2.f32' -s $STOP
 vqtrain $VQ_NAME'_s2.f32' $K 2048 $VQ_NAME'_stage3.f32' -r $VQ_NAME'_s3.f32' -s $STOP 
 if [ -z "$LOGMAG" ]; then
  echo "final two stages $K elements"
  vqtrain $VQ_NAME'_s3.f32' $K 2048 $VQ_NAME'_stage4.f32' -r $VQ_NAME'_s5.f32' -s $STOP 
  vqtrain $VQ_NAME'_s4.f32' $K 2048 $VQ_NAME'_stage5.f32' -r $VQ_NAME'_s6.f32' -s $STOP 
 else
  echo "final stage $FINAL_K elements"
  t=$(mktemp)
  extract -e `expr $FINAL_K - 1` -t $K $VQ_NAME'_s3.f32' $t 
  vqtrain $t $FINAL_K 2048 $VQ_NAME'_stage4.f32' -r $VQ_NAME'_s5.f32' -s $STOP 
 fi