adding support for split VQ

pull/1/head
David 2019-03-21 18:41:39 +10:30
parent 52a2198ad7
commit 446b5afd13
4 changed files with 125 additions and 24 deletions

View File

@ -21,7 +21,7 @@ CFLAGS+=-mfpu=neon -march=armv8-a -mtune=cortex-a53
endif
PROG=dump_data test_lpcnet test_vec quant_feat tcodec2_pitch weight tdump tweak_pitch quant_test \
quant2c diff32 quant_enc quant_dec lpcnet_enc lpcnet_dec
quant2c diff32 quant_enc quant_dec lpcnet_enc lpcnet_dec idct
all: $(PROG)
dump_data_objs := src/dump_data.o src/freq.o src/kiss_fft.o src/pitch.o src/celt_lpc.o src/codec2_pitch.o
@ -106,9 +106,14 @@ weight_objs := src/weight.o
weight_deps := $(weight_objs:.o=.d)
weight: $(weight_objs)
gcc -o $@ $(CFLAGS) $(weight_objs) -lm
-include $(weight_deps)
idct_objs := src/idct.o src/freq.o src/kiss_fft.o src/celt_lpc.o src/pitch.o
idct_deps := $(idct_objs:.o=.d)
idct: $(idct_objs)
gcc -o $@ $(CFLAGS) $(idct_objs) -lm
-include $(idct_deps)
tweak_pitch_objs := src/tweak_pitch.o
tweak_pitch_deps := $(tweak_pitch_objs:.o=.d)
tweak_pitch: $(tweak_pitch_objs)

28
src/idct.c 100644
View File

@ -0,0 +1,28 @@
/*
idct.c
David Rowe Mar 2019
inverse DCT so we can experiment with training in the Ly (log magnitude) domain.
*/
#include <assert.h>
#include <stdio.h>
#include <math.h>
#include "freq.h"
#define NB_BANDS 18
int main(void) {
FILE *fin, *fout;
float dctLy[NB_BANDS], Ly[NB_BANDS];
fin = stdin; fout = stdout;
int ret;
while(fread(dctLy, sizeof(float), NB_BANDS, fin) == NB_BANDS) {
idct(Ly, dctLy);
ret = fwrite(Ly, sizeof(float), NB_BANDS, fout);
assert(ret == NB_BANDS);
}
return 0;
}

View File

@ -52,6 +52,7 @@ int main(int argc, char *argv[]) {
FILE *fpitch = NULL;
float Fs = 16000.0;
float uniform_step = 0.0;
float uniform_step2 = 0.0;
int mbest_survivors = 0;
char label[80] = "";
/* experimental limits for dctLy[0], first cepstral */
@ -62,6 +63,7 @@ int main(int argc, char *argv[]) {
float pitch_gain_bias = 0.0;
int pitch_bits = 0;
int small_vec = 0;
int logmag = 0;
for(i=0; i<MAX_STAGES*NB_BANDS*MAX_ENTRIES; i++) vq[i] = 0.0;
@ -74,19 +76,21 @@ int main(int argc, char *argv[]) {
{"hard", required_argument, 0, 'h'},
{"label", required_argument, 0, 'l'},
{"mbest", required_argument, 0, 'm'},
{"mag", required_argument, 0, 'i'},
{"pitchquant", required_argument, 0, 'o'},
{"pred", required_argument, 0, 'p'},
{"quant", required_argument, 0, 'q'},
{"stagevar", required_argument, 0, 's'},
{"uniform", required_argument, 0, 'u'},
{"verbose", no_argument, 0, 'v'},
{"uniform2", required_argument, 0, 'x'},
{"weight", no_argument, 0, 'w'},
{0, 0, 0, 0}
};
int opt_index = 0;
while ((c = getopt_long (argc, argv, "ad:q:vs:f:p:e:u:l:m:h:wg:o:", long_options, &opt_index)) != -1) {
while ((c = getopt_long (argc, argv, "ad:q:vs:f:p:e:u:l:m:h:wg:o:ix:", long_options, &opt_index)) != -1) {
switch (c) {
case 'a':
/* small cpectral vectors - zero out several bands */
@ -120,6 +124,10 @@ int main(int argc, char *argv[]) {
lower_limit = atof(optarg);
fprintf(stderr, "lower_limit: %f upper_limit: %f\n", lower_limit, upper_limit);
break;
case 'i':
/* work in log mag rather than cepstral domain */
logmag = 1;
break;
case 'l':
/* text label to pront with results */
strcpy(label, optarg);
@ -156,10 +164,13 @@ int main(int argc, char *argv[]) {
fprintf(stderr, "Couldn't open: %s\n", fn);
exit(1);
}
/* count how many entries m of dimension k are in this VQ file */
m[num_stages] = 0;
while (fread(features, sizeof(float), k, fq) == (size_t)k) m[num_stages]++;
while (fread(features, sizeof(float), k, fq) == (size_t)k)
m[num_stages]++;
assert(m[num_stages] <= MAX_ENTRIES);
fprintf(stderr, "%d entries of vectors width %d\n", m[num_stages], k);
/* now load VQ into memory */
rewind(fq);
int rd = fread(&vq[num_stages*k*MAX_ENTRIES], sizeof(float), m[num_stages]*k, fq);
assert(rd == m[num_stages]*k);
@ -170,6 +181,11 @@ int main(int argc, char *argv[]) {
case 'u':
uniform_step = atof(optarg);
fprintf(stderr, "uniform quant step size: %3.2f dB\n", uniform_step);
uniform_step2 = uniform_step;
break;
case 'x':
uniform_step2 = atof(optarg);
fprintf(stderr, "uniform quant step size 12..17: %3.2f dB\n", uniform_step2);
break;
case 'v':
lpcnet_verbose = 1;
@ -181,6 +197,7 @@ int main(int argc, char *argv[]) {
fprintf(stderr,"usage: %s [Options]:\n [-d --decimation 1/2/3...]\n [-q --quant quantfile1,quantfile2,....]\n", argv[0]);
fprintf(stderr," [-g --gain pitch gain bias]\n");
fprintf(stderr," [-h --hard lowerLimitdB\n");
fprintf(stderr," [-i --mag\n");
fprintf(stderr," [-l --label txtLabel]\n");
fprintf(stderr," [-m --mbest survivors]\n [-o --pitchbits nBits]\n");
fprintf(stderr," [-p --pred predCoff]\n [-f --first firstElement]\n [-s --stagevar TxtFile]\n");
@ -190,19 +207,14 @@ int main(int argc, char *argv[]) {
}
}
fprintf(stderr, "dec: %d pred: %3.2f num_stages: %d mbest: %d small: %d", dec, pred, num_stages, mbest_survivors, small_vec);
fprintf(stderr, "dec: %d pred: %3.2f num_stages: %d mbest: %d small: %d logmag: %d",
dec, pred, num_stages, mbest_survivors, small_vec, logmag);
fprintf(stderr, "\n");
/* delay line so we can pass some features (like pitch and voicing) through unmodified */
float features_prev[dec+1][NB_FEATURES];
/* adjacent vectors used for linear interpolation, note only 0..17 and 38,39 used */
float features_lin[2][NB_FEATURES];
/* used for optiona smoothing of features */
/*
float features_mem[NB_BANDS];
for(i=0; i<NB_BANDS; i++)
features_mem[i] = 0.0;
*/
for(d=0; d<dec+1; d++)
for(i=0; i<NB_FEATURES; i++)
@ -258,6 +270,13 @@ int main(int argc, char *argv[]) {
}
}
/* optionally convert cepstrals to log magnitudes */
if (logmag) {
float tmp[NB_BANDS];
idct(tmp, features);
for(i=0; i<NB_BANDS; i++) features[i] = tmp[i];
}
/* convert cepstrals to dB */
for(i=0; i<NB_BANDS; i++)
features[i] *= 10.0;
@ -324,9 +343,11 @@ int main(int argc, char *argv[]) {
features_quant[i] = features[i];
}
if (uniform_step != 0.0) {
for(i=0; i<NB_BANDS; i++) {
for(i=0; i<12; i++) {
features_quant[i] = uniform_step*round(features[i]/uniform_step);
//fprintf(stderr, "%d %f %f\n", i, features[i], features_quant[i]);
}
for(; i<NB_BANDS; i++) {
features_quant[i] = uniform_step2*round(features[i]/uniform_step2);
}
}
}
@ -400,6 +421,13 @@ int main(int argc, char *argv[]) {
for(i=0; i<NB_BANDS; i++)
features_out[i] *= 1/10.0;
/* if optionally log magnitudes convert back to cepstrals */
if (logmag) {
float tmp[NB_BANDS];
dct(tmp, features_out);
for(i=0; i<NB_BANDS; i++) features_out[i] = tmp[i];
}
/* need to recompute LPCs after every frame, as we have quantised, or interpolated */
lpc_from_cepstrum(&features_out[2*NB_BANDS+3], features_out);

View File

@ -1,17 +1,57 @@
#!/bin/sh
# train_direct.sh
# David Rowe March 2019
# Train multi-stage VQ direct (non predictive) for LPCNet
PATH=/home/david/codec2-dev/build_linux/misc/
VQTRAIN=$PATH/vqtrain
EXTRACT=$PATH/extract
VQTRAIN=/home/david/codec2-dev/build_linux/misc/vqtrain
K=8
PATH=$PATH:/home/david/codec2-dev/build_linux/misc/
if [ $# -lt 1 ]; then
echo "usage: ./train_direct.sh [-i] VQprefix"
echo " $ ./train_direct.sh direct_v1"
echo " -i work in Ly (log magnitude) domain"
exit 1
fi
for i in "$@"
do
case $i in
-i)
LOGMAG=1
shift # past argument=value
;;
esac
done
VQ_NAME=$1
echo $VQ_NAME
K=18
FINAL_K=12
STOP=1E-3
echo "*********"
echo "Direct"
echo "*********"
$EXTRACT all_speech_features.f32 all_speech_direct.f32 0 7 10 0
$VQTRAIN all_speech_direct.f32 $K 2048 direct_stage1.f32 sd1.f32
$VQTRAIN sd1.f32 $K 2048 direct_stage2.f32 sd2.f32
$VQTRAIN sd2.f32 $K 2048 direct_stage3.f32 sd3.f32
$VQTRAIN sd3.f32 $K 2048 direct_stage4.f32 sd4.f32
$VQTRAIN sd4.f32 $K 2048 direct_stage5.f32 sd5.f32
t=$(mktemp)
extract -e `expr $K - 1` -g 10 all_speech_features_5e6.f32 $t
if [ -z "$LOGMAG" ]; then
echo "weighting dctLy[0] ...."
cat $t | ./weight > $VQ_NAME'_s0.f32'
else
echo "working in Ly (log magnitude) domain"
cat $t | ./idct > $VQ_NAME'_s0.f32'
fi
vqtrain $VQ_NAME'_s0.f32' $K 2048 $VQ_NAME'_stage1.f32' -r $VQ_NAME'_s1.f32' -s $STOP
vqtrain $VQ_NAME'_s1.f32' $K 2048 $VQ_NAME'_stage2.f32' -r $VQ_NAME'_s2.f32' -s $STOP
vqtrain $VQ_NAME'_s2.f32' $K 2048 $VQ_NAME'_stage3.f32' -r $VQ_NAME'_s3.f32' -s $STOP
if [ -z "$LOGMAG" ]; then
echo "final two stages $K elements"
vqtrain $VQ_NAME'_s3.f32' $K 2048 $VQ_NAME'_stage4.f32' -r $VQ_NAME'_s5.f32' -s $STOP
vqtrain $VQ_NAME'_s4.f32' $K 2048 $VQ_NAME'_stage5.f32' -r $VQ_NAME'_s6.f32' -s $STOP
else
echo "final stage $FINAL_K elements"
t=$(mktemp)
extract -e `expr $FINAL_K - 1` -t $K $VQ_NAME'_s3.f32' $t
vqtrain $t $FINAL_K 2048 $VQ_NAME'_stage4.f32' -r $VQ_NAME'_s5.f32' -s $STOP
fi