mirror of https://github.com/drowe67/LPCNet.git
adding support for split VQ
parent
52a2198ad7
commit
446b5afd13
9
Makefile
9
Makefile
|
@ -21,7 +21,7 @@ CFLAGS+=-mfpu=neon -march=armv8-a -mtune=cortex-a53
|
||||||
endif
|
endif
|
||||||
|
|
||||||
PROG=dump_data test_lpcnet test_vec quant_feat tcodec2_pitch weight tdump tweak_pitch quant_test \
|
PROG=dump_data test_lpcnet test_vec quant_feat tcodec2_pitch weight tdump tweak_pitch quant_test \
|
||||||
quant2c diff32 quant_enc quant_dec lpcnet_enc lpcnet_dec
|
quant2c diff32 quant_enc quant_dec lpcnet_enc lpcnet_dec idct
|
||||||
all: $(PROG)
|
all: $(PROG)
|
||||||
|
|
||||||
dump_data_objs := src/dump_data.o src/freq.o src/kiss_fft.o src/pitch.o src/celt_lpc.o src/codec2_pitch.o
|
dump_data_objs := src/dump_data.o src/freq.o src/kiss_fft.o src/pitch.o src/celt_lpc.o src/codec2_pitch.o
|
||||||
|
@ -106,9 +106,14 @@ weight_objs := src/weight.o
|
||||||
weight_deps := $(weight_objs:.o=.d)
|
weight_deps := $(weight_objs:.o=.d)
|
||||||
weight: $(weight_objs)
|
weight: $(weight_objs)
|
||||||
gcc -o $@ $(CFLAGS) $(weight_objs) -lm
|
gcc -o $@ $(CFLAGS) $(weight_objs) -lm
|
||||||
|
|
||||||
-include $(weight_deps)
|
-include $(weight_deps)
|
||||||
|
|
||||||
|
idct_objs := src/idct.o src/freq.o src/kiss_fft.o src/celt_lpc.o src/pitch.o
|
||||||
|
idct_deps := $(idct_objs:.o=.d)
|
||||||
|
idct: $(idct_objs)
|
||||||
|
gcc -o $@ $(CFLAGS) $(idct_objs) -lm
|
||||||
|
-include $(idct_deps)
|
||||||
|
|
||||||
tweak_pitch_objs := src/tweak_pitch.o
|
tweak_pitch_objs := src/tweak_pitch.o
|
||||||
tweak_pitch_deps := $(tweak_pitch_objs:.o=.d)
|
tweak_pitch_deps := $(tweak_pitch_objs:.o=.d)
|
||||||
tweak_pitch: $(tweak_pitch_objs)
|
tweak_pitch: $(tweak_pitch_objs)
|
||||||
|
|
|
@ -0,0 +1,28 @@
|
||||||
|
/*
|
||||||
|
idct.c
|
||||||
|
David Rowe Mar 2019
|
||||||
|
|
||||||
|
inverse DCT so we can experiment with training in the Ly (log magnitude) domain.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <assert.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <math.h>
|
||||||
|
#include "freq.h"
|
||||||
|
|
||||||
|
#define NB_BANDS 18
|
||||||
|
|
||||||
|
int main(void) {
|
||||||
|
FILE *fin, *fout;
|
||||||
|
float dctLy[NB_BANDS], Ly[NB_BANDS];
|
||||||
|
fin = stdin; fout = stdout;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
while(fread(dctLy, sizeof(float), NB_BANDS, fin) == NB_BANDS) {
|
||||||
|
idct(Ly, dctLy);
|
||||||
|
ret = fwrite(Ly, sizeof(float), NB_BANDS, fout);
|
||||||
|
assert(ret == NB_BANDS);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
|
@ -52,6 +52,7 @@ int main(int argc, char *argv[]) {
|
||||||
FILE *fpitch = NULL;
|
FILE *fpitch = NULL;
|
||||||
float Fs = 16000.0;
|
float Fs = 16000.0;
|
||||||
float uniform_step = 0.0;
|
float uniform_step = 0.0;
|
||||||
|
float uniform_step2 = 0.0;
|
||||||
int mbest_survivors = 0;
|
int mbest_survivors = 0;
|
||||||
char label[80] = "";
|
char label[80] = "";
|
||||||
/* experimental limits for dctLy[0], first cepstral */
|
/* experimental limits for dctLy[0], first cepstral */
|
||||||
|
@ -62,6 +63,7 @@ int main(int argc, char *argv[]) {
|
||||||
float pitch_gain_bias = 0.0;
|
float pitch_gain_bias = 0.0;
|
||||||
int pitch_bits = 0;
|
int pitch_bits = 0;
|
||||||
int small_vec = 0;
|
int small_vec = 0;
|
||||||
|
int logmag = 0;
|
||||||
|
|
||||||
for(i=0; i<MAX_STAGES*NB_BANDS*MAX_ENTRIES; i++) vq[i] = 0.0;
|
for(i=0; i<MAX_STAGES*NB_BANDS*MAX_ENTRIES; i++) vq[i] = 0.0;
|
||||||
|
|
||||||
|
@ -74,19 +76,21 @@ int main(int argc, char *argv[]) {
|
||||||
{"hard", required_argument, 0, 'h'},
|
{"hard", required_argument, 0, 'h'},
|
||||||
{"label", required_argument, 0, 'l'},
|
{"label", required_argument, 0, 'l'},
|
||||||
{"mbest", required_argument, 0, 'm'},
|
{"mbest", required_argument, 0, 'm'},
|
||||||
|
{"mag", required_argument, 0, 'i'},
|
||||||
{"pitchquant", required_argument, 0, 'o'},
|
{"pitchquant", required_argument, 0, 'o'},
|
||||||
{"pred", required_argument, 0, 'p'},
|
{"pred", required_argument, 0, 'p'},
|
||||||
{"quant", required_argument, 0, 'q'},
|
{"quant", required_argument, 0, 'q'},
|
||||||
{"stagevar", required_argument, 0, 's'},
|
{"stagevar", required_argument, 0, 's'},
|
||||||
{"uniform", required_argument, 0, 'u'},
|
{"uniform", required_argument, 0, 'u'},
|
||||||
{"verbose", no_argument, 0, 'v'},
|
{"verbose", no_argument, 0, 'v'},
|
||||||
|
{"uniform2", required_argument, 0, 'x'},
|
||||||
{"weight", no_argument, 0, 'w'},
|
{"weight", no_argument, 0, 'w'},
|
||||||
{0, 0, 0, 0}
|
{0, 0, 0, 0}
|
||||||
};
|
};
|
||||||
|
|
||||||
int opt_index = 0;
|
int opt_index = 0;
|
||||||
|
|
||||||
while ((c = getopt_long (argc, argv, "ad:q:vs:f:p:e:u:l:m:h:wg:o:", long_options, &opt_index)) != -1) {
|
while ((c = getopt_long (argc, argv, "ad:q:vs:f:p:e:u:l:m:h:wg:o:ix:", long_options, &opt_index)) != -1) {
|
||||||
switch (c) {
|
switch (c) {
|
||||||
case 'a':
|
case 'a':
|
||||||
/* small cpectral vectors - zero out several bands */
|
/* small cpectral vectors - zero out several bands */
|
||||||
|
@ -120,6 +124,10 @@ int main(int argc, char *argv[]) {
|
||||||
lower_limit = atof(optarg);
|
lower_limit = atof(optarg);
|
||||||
fprintf(stderr, "lower_limit: %f upper_limit: %f\n", lower_limit, upper_limit);
|
fprintf(stderr, "lower_limit: %f upper_limit: %f\n", lower_limit, upper_limit);
|
||||||
break;
|
break;
|
||||||
|
case 'i':
|
||||||
|
/* work in log mag rather than cepstral domain */
|
||||||
|
logmag = 1;
|
||||||
|
break;
|
||||||
case 'l':
|
case 'l':
|
||||||
/* text label to pront with results */
|
/* text label to pront with results */
|
||||||
strcpy(label, optarg);
|
strcpy(label, optarg);
|
||||||
|
@ -156,10 +164,13 @@ int main(int argc, char *argv[]) {
|
||||||
fprintf(stderr, "Couldn't open: %s\n", fn);
|
fprintf(stderr, "Couldn't open: %s\n", fn);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
/* count how many entries m of dimension k are in this VQ file */
|
||||||
m[num_stages] = 0;
|
m[num_stages] = 0;
|
||||||
while (fread(features, sizeof(float), k, fq) == (size_t)k) m[num_stages]++;
|
while (fread(features, sizeof(float), k, fq) == (size_t)k)
|
||||||
|
m[num_stages]++;
|
||||||
assert(m[num_stages] <= MAX_ENTRIES);
|
assert(m[num_stages] <= MAX_ENTRIES);
|
||||||
fprintf(stderr, "%d entries of vectors width %d\n", m[num_stages], k);
|
fprintf(stderr, "%d entries of vectors width %d\n", m[num_stages], k);
|
||||||
|
/* now load VQ into memory */
|
||||||
rewind(fq);
|
rewind(fq);
|
||||||
int rd = fread(&vq[num_stages*k*MAX_ENTRIES], sizeof(float), m[num_stages]*k, fq);
|
int rd = fread(&vq[num_stages*k*MAX_ENTRIES], sizeof(float), m[num_stages]*k, fq);
|
||||||
assert(rd == m[num_stages]*k);
|
assert(rd == m[num_stages]*k);
|
||||||
|
@ -170,6 +181,11 @@ int main(int argc, char *argv[]) {
|
||||||
case 'u':
|
case 'u':
|
||||||
uniform_step = atof(optarg);
|
uniform_step = atof(optarg);
|
||||||
fprintf(stderr, "uniform quant step size: %3.2f dB\n", uniform_step);
|
fprintf(stderr, "uniform quant step size: %3.2f dB\n", uniform_step);
|
||||||
|
uniform_step2 = uniform_step;
|
||||||
|
break;
|
||||||
|
case 'x':
|
||||||
|
uniform_step2 = atof(optarg);
|
||||||
|
fprintf(stderr, "uniform quant step size 12..17: %3.2f dB\n", uniform_step2);
|
||||||
break;
|
break;
|
||||||
case 'v':
|
case 'v':
|
||||||
lpcnet_verbose = 1;
|
lpcnet_verbose = 1;
|
||||||
|
@ -181,6 +197,7 @@ int main(int argc, char *argv[]) {
|
||||||
fprintf(stderr,"usage: %s [Options]:\n [-d --decimation 1/2/3...]\n [-q --quant quantfile1,quantfile2,....]\n", argv[0]);
|
fprintf(stderr,"usage: %s [Options]:\n [-d --decimation 1/2/3...]\n [-q --quant quantfile1,quantfile2,....]\n", argv[0]);
|
||||||
fprintf(stderr," [-g --gain pitch gain bias]\n");
|
fprintf(stderr," [-g --gain pitch gain bias]\n");
|
||||||
fprintf(stderr," [-h --hard lowerLimitdB\n");
|
fprintf(stderr," [-h --hard lowerLimitdB\n");
|
||||||
|
fprintf(stderr," [-i --mag\n");
|
||||||
fprintf(stderr," [-l --label txtLabel]\n");
|
fprintf(stderr," [-l --label txtLabel]\n");
|
||||||
fprintf(stderr," [-m --mbest survivors]\n [-o --pitchbits nBits]\n");
|
fprintf(stderr," [-m --mbest survivors]\n [-o --pitchbits nBits]\n");
|
||||||
fprintf(stderr," [-p --pred predCoff]\n [-f --first firstElement]\n [-s --stagevar TxtFile]\n");
|
fprintf(stderr," [-p --pred predCoff]\n [-f --first firstElement]\n [-s --stagevar TxtFile]\n");
|
||||||
|
@ -190,19 +207,14 @@ int main(int argc, char *argv[]) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fprintf(stderr, "dec: %d pred: %3.2f num_stages: %d mbest: %d small: %d", dec, pred, num_stages, mbest_survivors, small_vec);
|
fprintf(stderr, "dec: %d pred: %3.2f num_stages: %d mbest: %d small: %d logmag: %d",
|
||||||
|
dec, pred, num_stages, mbest_survivors, small_vec, logmag);
|
||||||
fprintf(stderr, "\n");
|
fprintf(stderr, "\n");
|
||||||
|
|
||||||
/* delay line so we can pass some features (like pitch and voicing) through unmodified */
|
/* delay line so we can pass some features (like pitch and voicing) through unmodified */
|
||||||
float features_prev[dec+1][NB_FEATURES];
|
float features_prev[dec+1][NB_FEATURES];
|
||||||
/* adjacent vectors used for linear interpolation, note only 0..17 and 38,39 used */
|
/* adjacent vectors used for linear interpolation, note only 0..17 and 38,39 used */
|
||||||
float features_lin[2][NB_FEATURES];
|
float features_lin[2][NB_FEATURES];
|
||||||
/* used for optiona smoothing of features */
|
|
||||||
/*
|
|
||||||
float features_mem[NB_BANDS];
|
|
||||||
for(i=0; i<NB_BANDS; i++)
|
|
||||||
features_mem[i] = 0.0;
|
|
||||||
*/
|
|
||||||
|
|
||||||
for(d=0; d<dec+1; d++)
|
for(d=0; d<dec+1; d++)
|
||||||
for(i=0; i<NB_FEATURES; i++)
|
for(i=0; i<NB_FEATURES; i++)
|
||||||
|
@ -258,6 +270,13 @@ int main(int argc, char *argv[]) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* optionally convert cepstrals to log magnitudes */
|
||||||
|
if (logmag) {
|
||||||
|
float tmp[NB_BANDS];
|
||||||
|
idct(tmp, features);
|
||||||
|
for(i=0; i<NB_BANDS; i++) features[i] = tmp[i];
|
||||||
|
}
|
||||||
|
|
||||||
/* convert cepstrals to dB */
|
/* convert cepstrals to dB */
|
||||||
for(i=0; i<NB_BANDS; i++)
|
for(i=0; i<NB_BANDS; i++)
|
||||||
features[i] *= 10.0;
|
features[i] *= 10.0;
|
||||||
|
@ -324,9 +343,11 @@ int main(int argc, char *argv[]) {
|
||||||
features_quant[i] = features[i];
|
features_quant[i] = features[i];
|
||||||
}
|
}
|
||||||
if (uniform_step != 0.0) {
|
if (uniform_step != 0.0) {
|
||||||
for(i=0; i<NB_BANDS; i++) {
|
for(i=0; i<12; i++) {
|
||||||
features_quant[i] = uniform_step*round(features[i]/uniform_step);
|
features_quant[i] = uniform_step*round(features[i]/uniform_step);
|
||||||
//fprintf(stderr, "%d %f %f\n", i, features[i], features_quant[i]);
|
}
|
||||||
|
for(; i<NB_BANDS; i++) {
|
||||||
|
features_quant[i] = uniform_step2*round(features[i]/uniform_step2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -400,6 +421,13 @@ int main(int argc, char *argv[]) {
|
||||||
for(i=0; i<NB_BANDS; i++)
|
for(i=0; i<NB_BANDS; i++)
|
||||||
features_out[i] *= 1/10.0;
|
features_out[i] *= 1/10.0;
|
||||||
|
|
||||||
|
/* if optionally log magnitudes convert back to cepstrals */
|
||||||
|
if (logmag) {
|
||||||
|
float tmp[NB_BANDS];
|
||||||
|
dct(tmp, features_out);
|
||||||
|
for(i=0; i<NB_BANDS; i++) features_out[i] = tmp[i];
|
||||||
|
}
|
||||||
|
|
||||||
/* need to recompute LPCs after every frame, as we have quantised, or interpolated */
|
/* need to recompute LPCs after every frame, as we have quantised, or interpolated */
|
||||||
lpc_from_cepstrum(&features_out[2*NB_BANDS+3], features_out);
|
lpc_from_cepstrum(&features_out[2*NB_BANDS+3], features_out);
|
||||||
|
|
||||||
|
|
|
@ -1,17 +1,57 @@
|
||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
|
# train_direct.sh
|
||||||
|
# David Rowe March 2019
|
||||||
|
# Train multi-stage VQ direct (non predictive) for LPCNet
|
||||||
|
|
||||||
PATH=/home/david/codec2-dev/build_linux/misc/
|
PATH=$PATH:/home/david/codec2-dev/build_linux/misc/
|
||||||
VQTRAIN=$PATH/vqtrain
|
|
||||||
EXTRACT=$PATH/extract
|
if [ $# -lt 1 ]; then
|
||||||
VQTRAIN=/home/david/codec2-dev/build_linux/misc/vqtrain
|
echo "usage: ./train_direct.sh [-i] VQprefix"
|
||||||
K=8
|
echo " $ ./train_direct.sh direct_v1"
|
||||||
|
echo " -i work in Ly (log magnitude) domain"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
for i in "$@"
|
||||||
|
do
|
||||||
|
case $i in
|
||||||
|
-i)
|
||||||
|
LOGMAG=1
|
||||||
|
shift # past argument=value
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
VQ_NAME=$1
|
||||||
|
echo $VQ_NAME
|
||||||
|
|
||||||
|
K=18
|
||||||
|
FINAL_K=12
|
||||||
|
STOP=1E-3
|
||||||
|
|
||||||
echo "*********"
|
echo "*********"
|
||||||
echo "Direct"
|
echo "Direct"
|
||||||
echo "*********"
|
echo "*********"
|
||||||
$EXTRACT all_speech_features.f32 all_speech_direct.f32 0 7 10 0
|
t=$(mktemp)
|
||||||
$VQTRAIN all_speech_direct.f32 $K 2048 direct_stage1.f32 sd1.f32
|
extract -e `expr $K - 1` -g 10 all_speech_features_5e6.f32 $t
|
||||||
$VQTRAIN sd1.f32 $K 2048 direct_stage2.f32 sd2.f32
|
if [ -z "$LOGMAG" ]; then
|
||||||
$VQTRAIN sd2.f32 $K 2048 direct_stage3.f32 sd3.f32
|
echo "weighting dctLy[0] ...."
|
||||||
$VQTRAIN sd3.f32 $K 2048 direct_stage4.f32 sd4.f32
|
cat $t | ./weight > $VQ_NAME'_s0.f32'
|
||||||
$VQTRAIN sd4.f32 $K 2048 direct_stage5.f32 sd5.f32
|
else
|
||||||
|
echo "working in Ly (log magnitude) domain"
|
||||||
|
cat $t | ./idct > $VQ_NAME'_s0.f32'
|
||||||
|
fi
|
||||||
|
|
||||||
|
vqtrain $VQ_NAME'_s0.f32' $K 2048 $VQ_NAME'_stage1.f32' -r $VQ_NAME'_s1.f32' -s $STOP
|
||||||
|
vqtrain $VQ_NAME'_s1.f32' $K 2048 $VQ_NAME'_stage2.f32' -r $VQ_NAME'_s2.f32' -s $STOP
|
||||||
|
vqtrain $VQ_NAME'_s2.f32' $K 2048 $VQ_NAME'_stage3.f32' -r $VQ_NAME'_s3.f32' -s $STOP
|
||||||
|
if [ -z "$LOGMAG" ]; then
|
||||||
|
echo "final two stages $K elements"
|
||||||
|
vqtrain $VQ_NAME'_s3.f32' $K 2048 $VQ_NAME'_stage4.f32' -r $VQ_NAME'_s5.f32' -s $STOP
|
||||||
|
vqtrain $VQ_NAME'_s4.f32' $K 2048 $VQ_NAME'_stage5.f32' -r $VQ_NAME'_s6.f32' -s $STOP
|
||||||
|
else
|
||||||
|
echo "final stage $FINAL_K elements"
|
||||||
|
t=$(mktemp)
|
||||||
|
extract -e `expr $FINAL_K - 1` -t $K $VQ_NAME'_s3.f32' $t
|
||||||
|
vqtrain $t $FINAL_K 2048 $VQ_NAME'_stage4.f32' -r $VQ_NAME'_s5.f32' -s $STOP
|
||||||
|
fi
|
||||||
|
|
Loading…
Reference in New Issue