direct-split integrated into lpcnet_enc/lpcnet_dec, and it is indeed better than the previous pred scheme with bit errors. Stands up to 1% BER OK, few pops and crackles

pull/1/head
David 2019-03-23 19:54:39 +10:30
parent 80e9c3821c
commit 998cacc7e6
4 changed files with 57 additions and 21 deletions

View File

@ -24,7 +24,7 @@ Clone this repo, then:
$ make
```
The quantiser files these experiments (pred_v2.tgz and split.tgz) are [here](http://rowetel.com/downloads/deep/lpcnet_quant)
The quantiser files used for these experiments (pred_v2.tgz and split.tgz) are [here](http://rowetel.com/downloads/deep/lpcnet_quant)
## Exploring Features
@ -118,7 +118,7 @@ Four stage VQ of log magnitudes (Ly), 11 bits (2048 entries) per stage, First 3
sox ~/Desktop/deep/quant/wia.wav -t raw - | ./dump_data --c2pitch --test - - | ./quant_feat -d 3 -i -p 0 --mbest 5 -q split_stage1.f32,split_stage2.f32,split_stage3.f32,split_stage4.f32 | ./test_lpcnet - - | aplay -f S16_LE -r 16000
```
Four stage VQ of Cepstrals (DCT of Ly), 11 bits (2048 entries) per stage, 18 element wide vectors. We quantise the predictor output.
Compare this to four stage predictive VQ of Cepstrals (DCT of Ly), 11 bits (2048 entries) per stage, 18 element wide vectors. We quantise the predictor output.
```
sox ~/Desktop/deep/quant/wia.wav -t raw - | ./dump_data --c2pitch --test - - | ./quant_feat -d 3 -w --mbest 5 -q pred_v2_stage1.f32,pred_v2_stage2.f32,pred_v2_stage3.f32,pred_v2_stage4.f32 | ./test_lpcnet - - | aplay -f S16_LE -r 16000

View File

@ -44,9 +44,13 @@
#undef NB_FEATURES
#include "lpcnet.h"
// Two sorts of VQs available
extern int pred_num_stages;
extern float pred_vq[MAX_STAGES*NB_BANDS*MAX_ENTRIES];
extern int pred_m[MAX_STAGES];
extern int direct_split_num_stages;
extern float direct_split_vq[MAX_STAGES*NB_BANDS*MAX_ENTRIES];
extern int direct_split_m[MAX_STAGES];
int main(int argc, char **argv) {
FILE *fin, *fout;
@ -60,23 +64,27 @@ int main(int argc, char **argv) {
int pitch_bits = 6;
float ber = 0.0;
int num_stages = pred_num_stages;
int *m = pred_m;
float *vq = pred_vq;
int logmag = 0;
/* quantiser options */
static struct option long_options[] = {
{"ber", required_argument, 0, 'b'},
{"decimate", required_argument, 0, 'd'},
{"numstages", required_argument, 0, 'n'},
{"pitchquant", required_argument, 0, 'o'},
{"pred", required_argument, 0, 'p'},
{"verbose", no_argument, 0, 'v'},
{"ber", required_argument, 0, 'b'},
{"decimate", required_argument, 0, 'd'},
{"numstages", required_argument, 0, 'n'},
{"pitchquant", required_argument, 0, 'o'},
{"pred", required_argument, 0, 'p'},
{"directsplit", no_argument, 0, 's'},
{"verbose", no_argument, 0, 'v'},
{0, 0, 0, 0}
};
int c;
int opt_index = 0;
while ((c = getopt_long (argc, argv, "b:d:n:o:p:v", long_options, &opt_index)) != -1) {
while ((c = getopt_long (argc, argv, "b:d:n:o:p:sv", long_options, &opt_index)) != -1) {
switch (c) {
case 'b':
ber = atof(optarg);
@ -98,6 +106,10 @@ int main(int argc, char **argv) {
pred = atof(optarg);
fprintf(stderr, "pred = %f\n", pred);
break;
case 's':
m = direct_split_m; vq = direct_split_vq; pred = 0.0; logmag = 1; weight = 1.0;
fprintf(stderr, "split VQ\n");
break;
case 'v':
lpcnet_verbose = 1;
break;
@ -107,13 +119,13 @@ int main(int argc, char **argv) {
fprintf(stderr," [-d --decimation 1/2/3...]\n");
fprintf(stderr," [-n --numstages]\n [-o --pitchbits nBits]\n");
fprintf(stderr," [-p --pred predCoff]\n");
fprintf(stderr," [-s --split]\n");
fprintf(stderr," [-v --verbose]\n");
exit(1);
}
}
LPCNET_QUANT *q = lpcnet_quant_create(num_stages, pred_m, pred_vq);
LPCNET_QUANT *q = lpcnet_quant_create(num_stages, m, vq);
q->weight = weight; q->pred = pred; q->mbest = mbest_survivors;
q->pitch_bits = pitch_bits; q->dec = dec;
lpcnet_quant_compute_bits_per_frame(q);
@ -129,7 +141,7 @@ int main(int argc, char **argv) {
fin = stdin;
fout = stdout;
int nbits = 0, nerrs = 0;
int nbits = 0, nerrs = 0, i;
do {
float in_features[NB_TOTAL_FEATURES];
@ -151,6 +163,12 @@ int main(int argc, char **argv) {
}
lpcnet_frame_to_features(q, frame, in_features);
/* optionally log magnitudes convert back to cepstrals */
if (logmag) {
float tmp[NB_BANDS];
dct(tmp, in_features);
for(i=0; i<NB_BANDS; i++) in_features[i] = tmp[i];
}
RNN_COPY(features, in_features, NB_TOTAL_FEATURES);
RNN_CLEAR(&features[18], 18);

View File

@ -39,9 +39,13 @@
#include "lpcnet_dump.h"
#include "lpcnet_quant.h"
// Two sorts of VQs available
extern int pred_num_stages;
extern float pred_vq[MAX_STAGES*NB_BANDS*MAX_ENTRIES];
extern int pred_m[MAX_STAGES];
extern int direct_split_num_stages;
extern float direct_split_vq[MAX_STAGES*NB_BANDS*MAX_ENTRIES];
extern int direct_split_m[MAX_STAGES];
int main(int argc, char **argv) {
FILE *fin, *fout;
@ -54,22 +58,26 @@ int main(int argc, char **argv) {
float weight = 1.0/sqrt(NB_BANDS);
int pitch_bits = 6;
int num_stages = pred_num_stages;
int *m = pred_m;
float *vq = pred_vq;
int logmag = 0;
/* quantiser options */
static struct option long_options[] = {
{"decimate", required_argument, 0, 'd'},
{"numstages", required_argument, 0, 'n'},
{"pitchquant", required_argument, 0, 'o'},
{"pred", required_argument, 0, 'p'},
{"verbose", no_argument, 0, 'v'},
{"decimate", required_argument, 0, 'd'},
{"numstages", required_argument, 0, 'n'},
{"pitchquant", required_argument, 0, 'o'},
{"pred", required_argument, 0, 'p'},
{"directsplit", no_argument, 0, 's'},
{"verbose", no_argument, 0, 'v'},
{0, 0, 0, 0}
};
int c;
int opt_index = 0;
while ((c = getopt_long (argc, argv, "d:n:o:p:v", long_options, &opt_index)) != -1) {
while ((c = getopt_long (argc, argv, "d:n:o:p:sv", long_options, &opt_index)) != -1) {
switch (c) {
case 'd':
dec = atoi(optarg);
@ -87,20 +95,24 @@ int main(int argc, char **argv) {
pred = atof(optarg);
fprintf(stderr, "pred = %f\n", pred);
break;
case 's':
m = direct_split_m; vq = direct_split_vq; pred = 0.0; logmag = 1; weight = 1.0;
fprintf(stderr, "split VQ\n");
break;
case 'v':
lpcnet_verbose = 1;
break;
default:
fprintf(stderr,"usage: %s [Options]:\n [-d --decimation 1/2/3...]\n", argv[0]);
fprintf(stderr," [-n --numstages]\n [-o --pitchbits nBits]\n");
fprintf(stderr," [-p --pred predCoff]\n");
fprintf(stderr," [-p --pred predCoff] [-s --split]\n");
fprintf(stderr," [-v --verbose]\n");
exit(1);
}
}
LPCNET_DUMP *d = lpcnet_dump_create();
LPCNET_QUANT *q = lpcnet_quant_create(num_stages, pred_m, pred_vq);
LPCNET_QUANT *q = lpcnet_quant_create(num_stages, m, vq);
q->weight = weight; q->pred = pred; q->mbest = mbest_survivors;
q->pitch_bits = pitch_bits; q->dec = dec;
lpcnet_quant_compute_bits_per_frame(q);
@ -125,6 +137,12 @@ int main(int argc, char **argv) {
int nread = fread(&d->tmp, sizeof(short), FRAME_SIZE, fin);
if (nread != FRAME_SIZE) break;
lpcnet_dump(d,x,features);
/* optionally convert cepstrals to log magnitudes */
if (logmag) {
float tmp[NB_BANDS];
idct(tmp, features);
for(i=0; i<NB_BANDS; i++) features[i] = tmp[i];
}
if (lpcnet_features_to_frame(q, features, frame))
bits_written += fwrite(frame, sizeof(char), q->bits_per_frame, fout);
fflush(stdin);

View File

@ -417,7 +417,7 @@ int main(int argc, char *argv[]) {
for(i=0; i<NB_BANDS; i++)
features_out[i] *= 1/10.0;
/* if optionally log magnitudes convert back to cepstrals */
/* optionally log magnitudes convert back to cepstrals */
if (logmag) {
float tmp[NB_BANDS];
dct(tmp, features_out);