#!/bin/bash -x # process.sh # David Rowe Jan 2019 # : ' 1. Process an input set of wave files using LPCNet under a variety of conditions. 2. Name output files to make them convenient to listen to in a file manager. 3. Generate a HTML table of samples for convenient replay on the web. 4. Generate a bunch of other HTML files and PNGs. usage: ./process.sh [--lite] OutPath ./process.sh ~/tmp/lpcnet_out To compare generate new samples OutPathA, and compare with those from a previous run of this script in OutPathB: ./process.sh OutPathA OutPathB --lite generates a much smaller page with just the basic LPCNet model case ' # command line arguments if [ $# -lt 1 ]; then echo "usage: ./process2.sh [--lite] /output/path/1 [/output/path/2]" echo " $ ./process.sh ~/tmp/lpcnet_outA" exit 1 fi lite=0 for i in "$@" do case $i in --lite) lite=1 shift ;; esac done OUTPATH=$1 if [ $# -eq 2 ]; then OUTPATHB=$2 fi # set these paths to suit your system CODEC2_PATH=$HOME/codec2-dev/build_linux/src WAVIN_PATH=$HOME/Desktop/deep/quant WAVOUT_PATH=$OUTPATH/wav PATH=$PATH:$CODEC2_PATH STATS=$OUTPATH/stats.txt HTML=$OUTPATH/index.html PNG_PATH=$OUTPATH/png F32_PATH=$OUTPATH/f32 SV_PATH=$OUTPATH/sv WAV_FILES="all birch canadian glue oak separately wanted wia" # check we can find wave files for f in $WAV_INFILES do if [ ! -e $WAVIN_PATH/$f.wav ]; then echo "$WAVIN_PATH/$f.wav Not found" fi done # check we can find codec 2 tools if [ ! -e $CODEC2_PATH/c2enc ]; then echo "$CODEC2_PATH/c2enc not found" fi # # OK lets start processing ------------------------------------------------ # mkdir -p $F32_PATH mkdir -p $SV_PATH mkdir -p $WAVOUT_PATH mkdir -p $PNG_PATH rm -f $STATS vq=pred_v2_stage vq2=split_stage # cp in originals for f in $WAV_FILES do cp $WAVIN_PATH/$f.wav $WAVOUT_PATH/$f'_0_orig.wav' done # Unquantised, baseline analysis-synthesis model, 10ms updates for f in $WAV_FILES do sox $WAVIN_PATH/$f.wav -t raw - | ./dump_data --test --c2pitch - - | \ ./test_lpcnet - - | sox -r 16000 -t .s16 -c 1 - $WAVOUT_PATH/$f'_1_uq'.wav done if [ $lite -eq 0 ]; then # 3dB uniform quantiser, 10ms updates for f in $WAV_FILES do label=$(printf "3dB %-10s" "$f") sox $WAVIN_PATH/$f.wav -t raw - | ./dump_data --test --c2pitch - - | \ ./quant_feat -l "$label" -d 1 --uniform 3 2>>$STATS | ./test_lpcnet - - | \ sox -r 16000 -t .s16 -c 1 - $WAVOUT_PATH/$f'_2_3dB'.wav done # decimate features to 20ms updates, then linearly interpolate back up to 10ms updates for f in $WAV_FILES do sox $WAVIN_PATH/$f.wav -t raw - | ./dump_data --test --c2pitch - - | \ ./quant_feat -d 2 | ./test_lpcnet - - | sox -r 16000 -t .s16 -c 1 - $WAVOUT_PATH/$f'_3_20ms'.wav done # decimate features to 20ms updates, then linearly interpolate back up to 10ms updates, incl pitch + voicing quant for f in $WAV_FILES do sox $WAVIN_PATH/$f.wav -t raw - | ./dump_data --test --c2pitch - - | \ ./quant_feat -d 2 -o 6 | ./test_lpcnet - - | sox -r 16000 -t .s16 -c 1 - $WAVOUT_PATH/$f'_4_20ms_pq'.wav done # 33 bit 3 stage VQ searched with mbest algorithm, 20ms updates for f in $WAV_FILES do label=$(printf "33bit_20ms %-10s" "$f") sox $WAVIN_PATH/$f.wav -t raw - | ./dump_data --test --c2pitch - - | \ ./quant_feat -l "$label" -d 2 -o 6 -w --mbest 5 -q $vq'1.f32',$vq'2.f32',$vq'3.f32' -s $SV_PATH/$f'_5_33bit_20ms'.txt 2>>$STATS | \ ./test_lpcnet - - | sox -r 16000 -t .s16 -c 1 - $WAVOUT_PATH/$f'_5_33bit_20ms'.wav done # 33 bit 3 stage VQ searched with mbest algorithm, 30ms updates for f in $WAV_FILES do label=$(printf "33bit_30ms %-10s" "$f") sox $WAVIN_PATH/$f.wav -t raw - | ./dump_data --test --c2pitch - - | \ ./quant_feat -l "$label" -d 3 -o 6 -w --mbest 5 -q $vq'1.f32',$vq'2.f32',$vq'3.f32' -s $SV_PATH/$f'_6_33bit_30ms'.txt 2>>$STATS | \ ./test_lpcnet - - | sox -r 16000 -t .s16 -c 1 - $WAVOUT_PATH/$f'_6_33bit_30ms'.wav done # 44 bit 4 stage VQ searched with mbest algorithm, 30ms updates for f in $WAV_FILES do label=$(printf "44bit_30ms %-10s" "$f") sox $WAVIN_PATH/$f.wav -t raw - | ./dump_data --test --c2pitch - - | \ ./quant_feat -l "$label" -d 3 -o 6 -w --mbest 5 -q $vq'1.f32',$vq'2.f32',$vq'3.f32',$vq'4.f32' -s $SV_PATH/$f'_7_44bit_30ms'.txt 2>>$STATS | \ ./test_lpcnet - - | sox -r 16000 -t .s16 -c 1 - $WAVOUT_PATH/$f'_7_44bit_30ms'.wav done # non-predictive (direct) 44 bit 4 stage split VQ searched with mbest algorithm, 30ms updates for f in $WAV_FILES do label=$(printf "44bit_sp_30ms %-10s" "$f") sox $WAVIN_PATH/$f.wav -t raw - | ./dump_data --test --c2pitch - - | \ ./quant_feat -l "$label" -d 3 -o 6 -i -p 0 --mbest 5 -q $vq2'1.f32',$vq2'2.f32',$vq2'3.f32',$vq2'4.f32' -s $SV_PATH/$f'_8_44bit_sp_30ms'.txt 2>>$STATS | \ ./test_lpcnet - - | sox -r 16000 -t .s16 -c 1 - $WAVOUT_PATH/$f'_8_44bit_sp_30ms'.wav done fi # ... if [ $lite -eq 0 ] ... # # Create a HTML table of samples ---------------------------------------------------- # cat << EOF > $HTML
Term | Description |
---|---|
Orig | Original source input speech |
UQ | Baseline LPCNet synthesis using unquantised features |
3dB | Cesptral features uniform quantiser with 3dB steps |
20ms | Cesptral features decimated to 20ms frame rate, linear interpolation back to 10ms |
20ms_pq | As above but pitch quantised to 6 bits, pitch gain to 2 bits |
33bit_20ms | 3 stage VQ of prediction error, 11 bits/stage, at 20ms frame rate, (33+8)/0.02 = 2050 bits/s |
33bit_30ms | Same 33 bit VQ, but decimated down to 30ms rate, (33+8)/0.03 = 1367 bits/s |
44bit_30ms | 4 stage VQ, at 30ms update rate, (44+8)/0.03 = 1733 bits/s |
44bit_sp_30ms | Direct (non predictive) 4 stage split VQ, at 30ms update rate, (44+8)/0.03 = 1733 bits/s. First 3 stages are 18 elements wide, last stage is just 12. We quantise log magnitudes (Ly) rather than Ceptrals (dct(Ly)). Targeted at HF radio channel where predictive schemes perform poorly due to high bit error/packet error rate |
EOF fi function heading_row { w=$(echo $WAV_FILES | cut -d ' ' -f 1) headings=$(ls $WAVOUT_PATH/$w* | sed -r "s/.*$w.[[:digit:]]_//" | sed -r 's/.wav//') printf "
%s | \n" $f >> $HTML for w in $files do b=$(basename $w) if [ -z "${OUTPATHB}" ]; then # no comparison printf "play | \n" $b >> $HTML else # compare with another process.sh run printf "play (playB) | \n" $b $OUTPATHB/wav/$b >> $HTML fi done printf "
\n" >> $HTML # HTML table of results --------------------------------------------------------- function table_of_values { printf "
\n" >> $HTML } if [ $lite -eq 0 ]; then table_of_values "var" "Variance" table_of_values "sd" "Standard Deviation" table_of_values "outliers" "Outliers" # # Table of Speech/Pitch countours ---------------------------------------------- # printf "
\n" >> $HTML table_of_values "quant" "Quantiser Error Countours" fi printf "\n\n" >> $HTML