#!/bin/bash -x # process.sh # David Rowe Jan 2019 # : ' 1. Process an input set of wave files using LPCNet under a variety of conditions. 2. Name output files to make them convenient to listen to in a file manager. 3. Generate a HTML table of samples for convenient replay on the web. 4. Generate a bunch of other HTML files and PNGs. usage: ./process.sh [--lite] OutPath ./process.sh ~/tmp/lpcnet_out To compare generate new samples OutPathA, and compare with those from a previous run of this script in OutPathB: ./process.sh OutPathA OutPathB --lite generates a much smaller page with just the basic LPCNet model case ' # command line arguments if [ $# -lt 1 ]; then echo "usage: ./process2.sh [--lite] /output/path/1 [/output/path/2]" echo " $ ./process.sh ~/tmp/lpcnet_outA" exit 1 fi lite=0 for i in "$@" do case $i in --lite) lite=1 shift ;; esac done OUTPATH=$1 if [ $# -eq 2 ]; then OUTPATHB=$2 fi # set these paths to suit your system CODEC2_PATH=$HOME/codec2-dev/build_linux/src WAVIN_PATH=$HOME/Desktop/deep/quant WAVOUT_PATH=$OUTPATH/wav PATH=$PATH:$CODEC2_PATH STATS=$OUTPATH/stats.txt HTML=$OUTPATH/index.html PNG_PATH=$OUTPATH/png F32_PATH=$OUTPATH/f32 SV_PATH=$OUTPATH/sv WAV_FILES="all birch canadian glue oak separately wanted wia" # check we can find wave files for f in $WAV_INFILES do if [ ! -e $WAVIN_PATH/$f.wav ]; then echo "$WAVIN_PATH/$f.wav Not found" fi done # check we can find codec 2 tools if [ ! -e $CODEC2_PATH/c2enc ]; then echo "$CODEC2_PATH/c2enc not found" fi # # OK lets start processing ------------------------------------------------ # mkdir -p $F32_PATH mkdir -p $SV_PATH mkdir -p $WAVOUT_PATH mkdir -p $PNG_PATH rm -f $STATS vq=pred_v2_stage vq2=split_stage # cp in originals for f in $WAV_FILES do cp $WAVIN_PATH/$f.wav $WAVOUT_PATH/$f'_0_orig.wav' done # Unquantised, baseline analysis-synthesis model, 10ms updates for f in $WAV_FILES do sox $WAVIN_PATH/$f.wav -t raw - | ./dump_data --test --c2pitch - - | \ ./test_lpcnet - - | sox -r 16000 -t .s16 -c 1 - $WAVOUT_PATH/$f'_1_uq'.wav done if [ $lite -eq 0 ]; then # 3dB uniform quantiser, 10ms updates for f in $WAV_FILES do label=$(printf "3dB %-10s" "$f") sox $WAVIN_PATH/$f.wav -t raw - | ./dump_data --test --c2pitch - - | \ ./quant_feat -l "$label" -d 1 --uniform 3 2>>$STATS | ./test_lpcnet - - | \ sox -r 16000 -t .s16 -c 1 - $WAVOUT_PATH/$f'_2_3dB'.wav done # decimate features to 20ms updates, then linearly interpolate back up to 10ms updates for f in $WAV_FILES do sox $WAVIN_PATH/$f.wav -t raw - | ./dump_data --test --c2pitch - - | \ ./quant_feat -d 2 | ./test_lpcnet - - | sox -r 16000 -t .s16 -c 1 - $WAVOUT_PATH/$f'_3_20ms'.wav done # decimate features to 20ms updates, then linearly interpolate back up to 10ms updates, incl pitch + voicing quant for f in $WAV_FILES do sox $WAVIN_PATH/$f.wav -t raw - | ./dump_data --test --c2pitch - - | \ ./quant_feat -d 2 -o 6 | ./test_lpcnet - - | sox -r 16000 -t .s16 -c 1 - $WAVOUT_PATH/$f'_4_20ms_pq'.wav done # 33 bit 3 stage VQ searched with mbest algorithm, 20ms updates for f in $WAV_FILES do label=$(printf "33bit_20ms %-10s" "$f") sox $WAVIN_PATH/$f.wav -t raw - | ./dump_data --test --c2pitch - - | \ ./quant_feat -l "$label" -d 2 -o 6 -w --mbest 5 -q $vq'1.f32',$vq'2.f32',$vq'3.f32' -s $SV_PATH/$f'_5_33bit_20ms'.txt 2>>$STATS | \ ./test_lpcnet - - | sox -r 16000 -t .s16 -c 1 - $WAVOUT_PATH/$f'_5_33bit_20ms'.wav done # 33 bit 3 stage VQ searched with mbest algorithm, 30ms updates for f in $WAV_FILES do label=$(printf "33bit_30ms %-10s" "$f") sox $WAVIN_PATH/$f.wav -t raw - | ./dump_data --test --c2pitch - - | \ ./quant_feat -l "$label" -d 3 -o 6 -w --mbest 5 -q $vq'1.f32',$vq'2.f32',$vq'3.f32' -s $SV_PATH/$f'_6_33bit_30ms'.txt 2>>$STATS | \ ./test_lpcnet - - | sox -r 16000 -t .s16 -c 1 - $WAVOUT_PATH/$f'_6_33bit_30ms'.wav done # 44 bit 4 stage VQ searched with mbest algorithm, 30ms updates for f in $WAV_FILES do label=$(printf "44bit_30ms %-10s" "$f") sox $WAVIN_PATH/$f.wav -t raw - | ./dump_data --test --c2pitch - - | \ ./quant_feat -l "$label" -d 3 -o 6 -w --mbest 5 -q $vq'1.f32',$vq'2.f32',$vq'3.f32',$vq'4.f32' -s $SV_PATH/$f'_7_44bit_30ms'.txt 2>>$STATS | \ ./test_lpcnet - - | sox -r 16000 -t .s16 -c 1 - $WAVOUT_PATH/$f'_7_44bit_30ms'.wav done # non-predictive (direct) 44 bit 4 stage split VQ searched with mbest algorithm, 30ms updates for f in $WAV_FILES do label=$(printf "44bit_sp_30ms %-10s" "$f") sox $WAVIN_PATH/$f.wav -t raw - | ./dump_data --test --c2pitch - - | \ ./quant_feat -l "$label" -d 3 -o 6 -i -p 0 --mbest 5 -q $vq2'1.f32',$vq2'2.f32',$vq2'3.f32',$vq2'4.f32' -s $SV_PATH/$f'_8_44bit_sp_30ms'.txt 2>>$STATS | \ ./test_lpcnet - - | sox -r 16000 -t .s16 -c 1 - $WAVOUT_PATH/$f'_8_44bit_sp_30ms'.wav done fi # ... if [ $lite -eq 0 ] ... # # Create a HTML table of samples ---------------------------------------------------- # cat << EOF > $HTML LPCNet Samples EOF if [ $lite -eq 0 ]; then cat << EOF > $HTML
Glossary
TermDescription
OrigOriginal source input speech
UQBaseline LPCNet synthesis using unquantised features
3dBCesptral features uniform quantiser with 3dB steps
20ms Cesptral features decimated to 20ms frame rate, linear interpolation back to 10ms
20ms_pq As above but pitch quantised to 6 bits, pitch gain to 2 bits
33bit_20ms 3 stage VQ of prediction error, 11 bits/stage, at 20ms frame rate, (33+8)/0.02 = 2050 bits/s
33bit_30ms Same 33 bit VQ, but decimated down to 30ms rate, (33+8)/0.03 = 1367 bits/s
44bit_30ms 4 stage VQ, at 30ms update rate, (44+8)/0.03 = 1733 bits/s
44bit_sp_30ms Direct (non predictive) 4 stage split VQ, at 30ms update rate, (44+8)/0.03 = 1733 bits/s. First 3 stages are 18 elements wide, last stage is just 12. We quantise log magnitudes (Ly) rather than Ceptrals (dct(Ly)). Targeted at HF radio channel where predictive schemes perform poorly due to high bit error/packet error rate

EOF fi function heading_row { w=$(echo $WAV_FILES | cut -d ' ' -f 1) headings=$(ls $WAVOUT_PATH/$w* | sed -r "s/.*$w.[[:digit:]]_//" | sed -r 's/.wav//') printf "\n Sample\n" >> $HTML for h in $headings do printf " %s\n" $h >> $HTML done printf "\n" >> $HTML } # for each wave file, create a row printf "\n" >> $HTML printf "\n" >> $HTML heading_row for f in $WAV_FILES do files=$(ls $WAVOUT_PATH/$f*); printf "\n \n" $f >> $HTML for w in $files do b=$(basename $w) if [ -z "${OUTPATHB}" ]; then # no comparison printf " \n" $b >> $HTML else # compare with another process.sh run printf " \n" $b $OUTPATHB/wav/$b >> $HTML fi done printf "\n" >> $HTML done printf "
Samples
%splayplay (playB)

\n" >> $HTML # HTML table of results --------------------------------------------------------- function table_of_values { printf "\n" >> $HTML printf "\n" "$2" >> $HTML heading_row # for each wave file, create a row for f in $WAV_FILES do files=$(ls $WAVOUT_PATH/$f*); printf "\n \n" $f >> $HTML for h in $headings do # extract variance from stats file if [ "$1" = "var" ]; then s=$(cat $STATS | sed -n "s/RESULTS $h $f.*var: \(.*\) sd.*/\1/p") fi if [ "$1" = "sd" ]; then s=$(cat $STATS | sed -n "s/RESULTS $h $f.*sd: \(.*\) n.*/\1/p") fi if [ "$s" = "" ]; then s="-" fi if [ $1 = "outliers" ]; then outliers=$(cat $STATS | sed -n "s/RESULTS $h $f.*dB = \(.*\)/\1/p") if [ ! "$outliers" = "" ]; then png=$PNG_PATH/$f'_'$h'.png' cmd="graphics_toolkit ('gnuplot'); o=[$outliers]; bar([1-sum(o) o],'hist'); axis([0 4 0 1]); print(\"$png\",'-dpng','-S120,120')" octave --no-gui -qf --eval "$cmd" b=$(basename $png) printf " \n" $b >> $HTML else printf " \n" >> $HTML fi elif [ $1 = "quant" ]; then sf=$SV_PATH/$f'_?_'$h.txt if [ -e $sf ]; then png=$PNG_PATH/$f'_'$h'_quant.png' t=$(echo $h | sed -n "s/.*_\(.*\)ms/\1/p") octave --no-gui -p src -qf src/plot_speech_quant.m $F32_PATH/$f.raw $sf $png $t b=$(basename $png) printf " \n" $b $b >> $HTML else printf " \n" >> $HTML fi else printf " \n" $s >> $HTML fi done printf "\n" >> $HTML done printf "
%s
%s%s

\n" >> $HTML } if [ $lite -eq 0 ]; then table_of_values "var" "Variance" table_of_values "sd" "Standard Deviation" table_of_values "outliers" "Outliers" # # Table of Speech/Pitch countours ---------------------------------------------- # printf "\n" >> $HTML printf "\n" >> $HTML # heading row printf "\n" >> $HTML for f in $WAV_FILES do printf " \n" $f >> $HTML done printf "\n" >> $HTML # manually set pitch axis to make plots easier to read. TODO this breaks when we add new samples, need an associative array mx=(400 200 200 400 400 200 400 200) count=0 # row of links to PNGs printf "\n" >> $HTML for f in $WAV_FILES do sox $WAVIN_PATH/$f.wav -t raw $F32_PATH/$f.raw ./dump_data --test --c2pitch $F32_PATH/$f.raw $F32_PATH/$f'_c2'.f32 octave --no-gui -p src -qf src/plot_speech_pitch.m $F32_PATH/$f.raw $F32_PATH/$f'_c2'.f32 - $PNG_PATH/$f'_pitch.png' ${mx[count]} count=$(( $count + 1 )) b=$f'_pitch.png' printf " \n" $b $b >> $HTML done printf "\n" >> $HTML printf "
Pitch Countours
%s

\n" >> $HTML table_of_values "quant" "Quantiser Error Countours" fi printf "\n\n" >> $HTML