mirror of https://github.com/drowe67/LPCNet.git
adding support for split VQ
parent
52a2198ad7
commit
446b5afd13
9
Makefile
9
Makefile
|
@ -21,7 +21,7 @@ CFLAGS+=-mfpu=neon -march=armv8-a -mtune=cortex-a53
|
|||
endif
|
||||
|
||||
PROG=dump_data test_lpcnet test_vec quant_feat tcodec2_pitch weight tdump tweak_pitch quant_test \
|
||||
quant2c diff32 quant_enc quant_dec lpcnet_enc lpcnet_dec
|
||||
quant2c diff32 quant_enc quant_dec lpcnet_enc lpcnet_dec idct
|
||||
all: $(PROG)
|
||||
|
||||
dump_data_objs := src/dump_data.o src/freq.o src/kiss_fft.o src/pitch.o src/celt_lpc.o src/codec2_pitch.o
|
||||
|
@ -106,9 +106,14 @@ weight_objs := src/weight.o
|
|||
weight_deps := $(weight_objs:.o=.d)
|
||||
weight: $(weight_objs)
|
||||
gcc -o $@ $(CFLAGS) $(weight_objs) -lm
|
||||
|
||||
-include $(weight_deps)
|
||||
|
||||
idct_objs := src/idct.o src/freq.o src/kiss_fft.o src/celt_lpc.o src/pitch.o
|
||||
idct_deps := $(idct_objs:.o=.d)
|
||||
idct: $(idct_objs)
|
||||
gcc -o $@ $(CFLAGS) $(idct_objs) -lm
|
||||
-include $(idct_deps)
|
||||
|
||||
tweak_pitch_objs := src/tweak_pitch.o
|
||||
tweak_pitch_deps := $(tweak_pitch_objs:.o=.d)
|
||||
tweak_pitch: $(tweak_pitch_objs)
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
/*
|
||||
idct.c
|
||||
David Rowe Mar 2019
|
||||
|
||||
inverse DCT so we can experiment with training in the Ly (log magnitude) domain.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include "freq.h"
|
||||
|
||||
#define NB_BANDS 18
|
||||
|
||||
int main(void) {
|
||||
FILE *fin, *fout;
|
||||
float dctLy[NB_BANDS], Ly[NB_BANDS];
|
||||
fin = stdin; fout = stdout;
|
||||
int ret;
|
||||
|
||||
while(fread(dctLy, sizeof(float), NB_BANDS, fin) == NB_BANDS) {
|
||||
idct(Ly, dctLy);
|
||||
ret = fwrite(Ly, sizeof(float), NB_BANDS, fout);
|
||||
assert(ret == NB_BANDS);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -52,6 +52,7 @@ int main(int argc, char *argv[]) {
|
|||
FILE *fpitch = NULL;
|
||||
float Fs = 16000.0;
|
||||
float uniform_step = 0.0;
|
||||
float uniform_step2 = 0.0;
|
||||
int mbest_survivors = 0;
|
||||
char label[80] = "";
|
||||
/* experimental limits for dctLy[0], first cepstral */
|
||||
|
@ -62,6 +63,7 @@ int main(int argc, char *argv[]) {
|
|||
float pitch_gain_bias = 0.0;
|
||||
int pitch_bits = 0;
|
||||
int small_vec = 0;
|
||||
int logmag = 0;
|
||||
|
||||
for(i=0; i<MAX_STAGES*NB_BANDS*MAX_ENTRIES; i++) vq[i] = 0.0;
|
||||
|
||||
|
@ -74,19 +76,21 @@ int main(int argc, char *argv[]) {
|
|||
{"hard", required_argument, 0, 'h'},
|
||||
{"label", required_argument, 0, 'l'},
|
||||
{"mbest", required_argument, 0, 'm'},
|
||||
{"mag", required_argument, 0, 'i'},
|
||||
{"pitchquant", required_argument, 0, 'o'},
|
||||
{"pred", required_argument, 0, 'p'},
|
||||
{"quant", required_argument, 0, 'q'},
|
||||
{"stagevar", required_argument, 0, 's'},
|
||||
{"uniform", required_argument, 0, 'u'},
|
||||
{"verbose", no_argument, 0, 'v'},
|
||||
{"uniform2", required_argument, 0, 'x'},
|
||||
{"weight", no_argument, 0, 'w'},
|
||||
{0, 0, 0, 0}
|
||||
};
|
||||
|
||||
int opt_index = 0;
|
||||
|
||||
while ((c = getopt_long (argc, argv, "ad:q:vs:f:p:e:u:l:m:h:wg:o:", long_options, &opt_index)) != -1) {
|
||||
while ((c = getopt_long (argc, argv, "ad:q:vs:f:p:e:u:l:m:h:wg:o:ix:", long_options, &opt_index)) != -1) {
|
||||
switch (c) {
|
||||
case 'a':
|
||||
/* small cpectral vectors - zero out several bands */
|
||||
|
@ -120,6 +124,10 @@ int main(int argc, char *argv[]) {
|
|||
lower_limit = atof(optarg);
|
||||
fprintf(stderr, "lower_limit: %f upper_limit: %f\n", lower_limit, upper_limit);
|
||||
break;
|
||||
case 'i':
|
||||
/* work in log mag rather than cepstral domain */
|
||||
logmag = 1;
|
||||
break;
|
||||
case 'l':
|
||||
/* text label to pront with results */
|
||||
strcpy(label, optarg);
|
||||
|
@ -156,10 +164,13 @@ int main(int argc, char *argv[]) {
|
|||
fprintf(stderr, "Couldn't open: %s\n", fn);
|
||||
exit(1);
|
||||
}
|
||||
/* count how many entries m of dimension k are in this VQ file */
|
||||
m[num_stages] = 0;
|
||||
while (fread(features, sizeof(float), k, fq) == (size_t)k) m[num_stages]++;
|
||||
while (fread(features, sizeof(float), k, fq) == (size_t)k)
|
||||
m[num_stages]++;
|
||||
assert(m[num_stages] <= MAX_ENTRIES);
|
||||
fprintf(stderr, "%d entries of vectors width %d\n", m[num_stages], k);
|
||||
/* now load VQ into memory */
|
||||
rewind(fq);
|
||||
int rd = fread(&vq[num_stages*k*MAX_ENTRIES], sizeof(float), m[num_stages]*k, fq);
|
||||
assert(rd == m[num_stages]*k);
|
||||
|
@ -170,6 +181,11 @@ int main(int argc, char *argv[]) {
|
|||
case 'u':
|
||||
uniform_step = atof(optarg);
|
||||
fprintf(stderr, "uniform quant step size: %3.2f dB\n", uniform_step);
|
||||
uniform_step2 = uniform_step;
|
||||
break;
|
||||
case 'x':
|
||||
uniform_step2 = atof(optarg);
|
||||
fprintf(stderr, "uniform quant step size 12..17: %3.2f dB\n", uniform_step2);
|
||||
break;
|
||||
case 'v':
|
||||
lpcnet_verbose = 1;
|
||||
|
@ -181,6 +197,7 @@ int main(int argc, char *argv[]) {
|
|||
fprintf(stderr,"usage: %s [Options]:\n [-d --decimation 1/2/3...]\n [-q --quant quantfile1,quantfile2,....]\n", argv[0]);
|
||||
fprintf(stderr," [-g --gain pitch gain bias]\n");
|
||||
fprintf(stderr," [-h --hard lowerLimitdB\n");
|
||||
fprintf(stderr," [-i --mag\n");
|
||||
fprintf(stderr," [-l --label txtLabel]\n");
|
||||
fprintf(stderr," [-m --mbest survivors]\n [-o --pitchbits nBits]\n");
|
||||
fprintf(stderr," [-p --pred predCoff]\n [-f --first firstElement]\n [-s --stagevar TxtFile]\n");
|
||||
|
@ -190,19 +207,14 @@ int main(int argc, char *argv[]) {
|
|||
}
|
||||
}
|
||||
|
||||
fprintf(stderr, "dec: %d pred: %3.2f num_stages: %d mbest: %d small: %d", dec, pred, num_stages, mbest_survivors, small_vec);
|
||||
fprintf(stderr, "dec: %d pred: %3.2f num_stages: %d mbest: %d small: %d logmag: %d",
|
||||
dec, pred, num_stages, mbest_survivors, small_vec, logmag);
|
||||
fprintf(stderr, "\n");
|
||||
|
||||
/* delay line so we can pass some features (like pitch and voicing) through unmodified */
|
||||
float features_prev[dec+1][NB_FEATURES];
|
||||
/* adjacent vectors used for linear interpolation, note only 0..17 and 38,39 used */
|
||||
float features_lin[2][NB_FEATURES];
|
||||
/* used for optiona smoothing of features */
|
||||
/*
|
||||
float features_mem[NB_BANDS];
|
||||
for(i=0; i<NB_BANDS; i++)
|
||||
features_mem[i] = 0.0;
|
||||
*/
|
||||
|
||||
for(d=0; d<dec+1; d++)
|
||||
for(i=0; i<NB_FEATURES; i++)
|
||||
|
@ -258,6 +270,13 @@ int main(int argc, char *argv[]) {
|
|||
}
|
||||
}
|
||||
|
||||
/* optionally convert cepstrals to log magnitudes */
|
||||
if (logmag) {
|
||||
float tmp[NB_BANDS];
|
||||
idct(tmp, features);
|
||||
for(i=0; i<NB_BANDS; i++) features[i] = tmp[i];
|
||||
}
|
||||
|
||||
/* convert cepstrals to dB */
|
||||
for(i=0; i<NB_BANDS; i++)
|
||||
features[i] *= 10.0;
|
||||
|
@ -324,9 +343,11 @@ int main(int argc, char *argv[]) {
|
|||
features_quant[i] = features[i];
|
||||
}
|
||||
if (uniform_step != 0.0) {
|
||||
for(i=0; i<NB_BANDS; i++) {
|
||||
for(i=0; i<12; i++) {
|
||||
features_quant[i] = uniform_step*round(features[i]/uniform_step);
|
||||
//fprintf(stderr, "%d %f %f\n", i, features[i], features_quant[i]);
|
||||
}
|
||||
for(; i<NB_BANDS; i++) {
|
||||
features_quant[i] = uniform_step2*round(features[i]/uniform_step2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -400,6 +421,13 @@ int main(int argc, char *argv[]) {
|
|||
for(i=0; i<NB_BANDS; i++)
|
||||
features_out[i] *= 1/10.0;
|
||||
|
||||
/* if optionally log magnitudes convert back to cepstrals */
|
||||
if (logmag) {
|
||||
float tmp[NB_BANDS];
|
||||
dct(tmp, features_out);
|
||||
for(i=0; i<NB_BANDS; i++) features_out[i] = tmp[i];
|
||||
}
|
||||
|
||||
/* need to recompute LPCs after every frame, as we have quantised, or interpolated */
|
||||
lpc_from_cepstrum(&features_out[2*NB_BANDS+3], features_out);
|
||||
|
||||
|
|
|
@ -1,17 +1,57 @@
|
|||
#!/bin/sh
|
||||
# train_direct.sh
|
||||
# David Rowe March 2019
|
||||
# Train multi-stage VQ direct (non predictive) for LPCNet
|
||||
|
||||
PATH=/home/david/codec2-dev/build_linux/misc/
|
||||
VQTRAIN=$PATH/vqtrain
|
||||
EXTRACT=$PATH/extract
|
||||
VQTRAIN=/home/david/codec2-dev/build_linux/misc/vqtrain
|
||||
K=8
|
||||
PATH=$PATH:/home/david/codec2-dev/build_linux/misc/
|
||||
|
||||
if [ $# -lt 1 ]; then
|
||||
echo "usage: ./train_direct.sh [-i] VQprefix"
|
||||
echo " $ ./train_direct.sh direct_v1"
|
||||
echo " -i work in Ly (log magnitude) domain"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
for i in "$@"
|
||||
do
|
||||
case $i in
|
||||
-i)
|
||||
LOGMAG=1
|
||||
shift # past argument=value
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
VQ_NAME=$1
|
||||
echo $VQ_NAME
|
||||
|
||||
K=18
|
||||
FINAL_K=12
|
||||
STOP=1E-3
|
||||
|
||||
echo "*********"
|
||||
echo "Direct"
|
||||
echo "*********"
|
||||
$EXTRACT all_speech_features.f32 all_speech_direct.f32 0 7 10 0
|
||||
$VQTRAIN all_speech_direct.f32 $K 2048 direct_stage1.f32 sd1.f32
|
||||
$VQTRAIN sd1.f32 $K 2048 direct_stage2.f32 sd2.f32
|
||||
$VQTRAIN sd2.f32 $K 2048 direct_stage3.f32 sd3.f32
|
||||
$VQTRAIN sd3.f32 $K 2048 direct_stage4.f32 sd4.f32
|
||||
$VQTRAIN sd4.f32 $K 2048 direct_stage5.f32 sd5.f32
|
||||
t=$(mktemp)
|
||||
extract -e `expr $K - 1` -g 10 all_speech_features_5e6.f32 $t
|
||||
if [ -z "$LOGMAG" ]; then
|
||||
echo "weighting dctLy[0] ...."
|
||||
cat $t | ./weight > $VQ_NAME'_s0.f32'
|
||||
else
|
||||
echo "working in Ly (log magnitude) domain"
|
||||
cat $t | ./idct > $VQ_NAME'_s0.f32'
|
||||
fi
|
||||
|
||||
vqtrain $VQ_NAME'_s0.f32' $K 2048 $VQ_NAME'_stage1.f32' -r $VQ_NAME'_s1.f32' -s $STOP
|
||||
vqtrain $VQ_NAME'_s1.f32' $K 2048 $VQ_NAME'_stage2.f32' -r $VQ_NAME'_s2.f32' -s $STOP
|
||||
vqtrain $VQ_NAME'_s2.f32' $K 2048 $VQ_NAME'_stage3.f32' -r $VQ_NAME'_s3.f32' -s $STOP
|
||||
if [ -z "$LOGMAG" ]; then
|
||||
echo "final two stages $K elements"
|
||||
vqtrain $VQ_NAME'_s3.f32' $K 2048 $VQ_NAME'_stage4.f32' -r $VQ_NAME'_s5.f32' -s $STOP
|
||||
vqtrain $VQ_NAME'_s4.f32' $K 2048 $VQ_NAME'_stage5.f32' -r $VQ_NAME'_s6.f32' -s $STOP
|
||||
else
|
||||
echo "final stage $FINAL_K elements"
|
||||
t=$(mktemp)
|
||||
extract -e `expr $FINAL_K - 1` -t $K $VQ_NAME'_s3.f32' $t
|
||||
vqtrain $t $FINAL_K 2048 $VQ_NAME'_stage4.f32' -r $VQ_NAME'_s5.f32' -s $STOP
|
||||
fi
|
||||
|
|
Loading…
Reference in New Issue