first pass as lpcnet_enc

pull/1/head
David 2019-02-26 16:54:10 +11:00
parent 0adc83d638
commit b372e8e57d
5 changed files with 153 additions and 3 deletions

View File

@ -21,7 +21,7 @@ CFLAGS+=-mfpu=neon -march=armv8-a -mtune=cortex-a53
endif
PROG=dump_data test_lpcnet test_vec quant_feat tcodec2_pitch weight tdump tweak_pitch quant_test \
quant2c diff32 quant_enc quant_dec
quant2c diff32 quant_enc quant_dec lpcnet_enc
all: $(PROG)
dump_data_objs := src/dump_data.o src/freq.o src/kiss_fft.o src/pitch.o src/celt_lpc.o src/codec2_pitch.o
@ -76,6 +76,13 @@ quant_dec: $(quant_dec_objs)
gcc -o $@ $(CFLAGS) $(quant_dec_objs) -lm
-include $(quant_dec_deps)
lpcnet_enc_objs := src/lpcnet_enc.o src/freq.o src/kiss_fft.o src/celt_lpc.o src/pitch.o src/codec2_pitch.o src/mbest.o src/lpcnet_quant.o \
src/4stage_pred_vq.o src/lpcnet_dump.o
lpcnet_enc_deps := $(lpcnet_enc_objs:.o=.d)
lpcnet_enc: $(lpcnet_enc_objs)
gcc -o $@ $(CFLAGS) $(lpcnet_enc_objs) -lm -lcodec2
-include $(lpcnet_enc_deps)
quant2c_objs:= src/quant2c.o
quan2c_deps := $(quant2c_objs:.o=.d)
quant2c: $(quant2c_objs)

View File

@ -132,3 +132,7 @@ Fully quantised at (44+8)/0.03 = 1733 bits/s:
```sox -r 16000 ~/Downloads/wianews-2019-01-20.s16 -t raw - trim 200 | ./dump_data --c2pitch --test - - | ./quant_feat -g 0.25 -o 6 -d 3 -w --mbest 5 -q pred_v2_stage1.f32,pred_v2_stage2.f32,pred_v2_stage3.f32,pred_v2_stage4.f32 | ./test_lpcnet - - | aplay -f S16_LE -r 16000```
Same thing with quantiation code packaged up into library functions. Between quant_enc and quant_dec are 52 bit frames every 30ms:
```cat ~/Downloads/speech_orig_16k.s16 | ./dump_data --c2pitch --test - - | ./quant_enc | ./quant_dec | ./test_lpcnet - - | aplay -f S16_LE -r 16000```

139
src/lpcnet_enc.c 100644
View File

@ -0,0 +1,139 @@
/*
lpcnet_enc.c
Feb 2019
LPCNet to bit stream encoder, takes 16 kHz signed 16 samples on
stdin, outputs fully quantised bit stream on stdout (in 1 bit per
char format).
*/
/* Copyright (c) 2017-2018 Mozilla */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdlib.h>
#include <stdio.h>
#include <getopt.h>
#include "lpcnet_dump.h"
#include "lpcnet_quant.h"
extern int num_stages;
extern float vq[MAX_STAGES*NB_BANDS*MAX_ENTRIES];
extern int m[MAX_STAGES];
int main(int argc, char **argv) {
FILE *fin, *fout;
/* quantiser defaults */
int dec = 3;
float pred = 0.9;
int mbest_survivors = 5;
float weight = 1.0/sqrt(NB_BANDS);
int pitch_bits = 6;
/* quantiser options */
static struct option long_options[] = {
{"decimate", required_argument, 0, 'd'},
{"numstages", required_argument, 0, 'n'},
{"pitchquant", required_argument, 0, 'o'},
{"pred", required_argument, 0, 'p'},
{"verbose", no_argument, 0, 'v'},
{0, 0, 0, 0}
};
int c;
int opt_index = 0;
while ((c = getopt_long (argc, argv, "d:n:o:p:v", long_options, &opt_index)) != -1) {
switch (c) {
case 'd':
dec = atoi(optarg);
fprintf(stderr, "dec = %d\n", dec);
break;
case 'n':
num_stages = atoi(optarg);
fprintf(stderr, "%d VQ stages\n", num_stages);
break;
case 'o':
pitch_bits = atoi(optarg);
fprintf(stderr, "pitch quantised to %d bits\n", pitch_bits);
break;
case 'p':
pred = atof(optarg);
fprintf(stderr, "pred = %f\n", pred);
break;
case 'v':
lpcnet_verbose = 1;
break;
default:
fprintf(stderr,"usage: %s [Options]:\n [-d --decimation 1/2/3...]\n", argv[0]);
fprintf(stderr," [-n --numstages]\n [-o --pitchbits nBits]\n");
fprintf(stderr," [-p --pred predCoff]\n");
fprintf(stderr," [-v --verbose]\n");
exit(1);
}
}
LPCNET_DUMP *d = lpcnet_dump_create();
LPCNET_QUANT *q = lpcnet_quant_create(num_stages, m, vq);
q->weight = weight; q->pred = pred; q->mbest = mbest_survivors;
q->pitch_bits = pitch_bits; q->dec = dec;
lpcnet_quant_compute_bits_per_frame(q);
fprintf(stderr, "dec: %d pred: %3.2f num_stages: %d mbest: %d bits_per_frame: %d frame: %2d ms bit_rate: %5.2f bits/s",
q->dec, q->pred, q->num_stages, q->mbest, q->bits_per_frame, dec*10, (float)q->bits_per_frame/(dec*0.01));
fprintf(stderr, "\n");
fin = stdin;
fout = stdout;
float x[FRAME_SIZE];
float features[LPCNET_NB_FEATURES];
char frame[q->bits_per_frame];
int i;
int f=0;
int bits_written=0;
while (1) {
/* note one frame delay */
for (i=0;i<FRAME_SIZE;i++) x[i] = d->tmp[i];
int nread = fread(&d->tmp, sizeof(short), FRAME_SIZE, fin);
if (nread != FRAME_SIZE) break;
lpcnet_dump(d,x,features);
if (lpcnet_features_to_frame(q, features, frame))
bits_written += fwrite(frame, sizeof(char), q->bits_per_frame, fout);
fflush(stdin);
fflush(stdout);
f++;
}
lpcnet_dump_destroy(d); lpcnet_quant_destroy(q);
fprintf(stderr, "bits_written %d\n", bits_written);
fclose(fin); fclose(fout);
return 0;
}

View File

@ -99,7 +99,7 @@ int main(int argc, char *argv[]) {
}
fprintf(stderr, "bits_written %d\n", bits_written);
fclose(fin); fclose(fout); if (lpcnet_fsv != NULL) fclose(lpcnet_fsv);
fclose(fin); fclose(fout); lpcnet_quant_destroy(q);
}

View File

@ -72,7 +72,7 @@ int main(int argc, char **argv) {
int i;
int f=0;
while (1) {
/* note one frame delay */
/* note one frame delay */
for (i=0;i<FRAME_SIZE;i++) x[i] = d->tmp[i];
int nread = fread(&d->tmp, sizeof(short), FRAME_SIZE, f1);
if (nread != FRAME_SIZE) break;