mirror of https://github.com/drowe67/LPCNet.git
354 lines
11 KiB
Bash
Executable File
354 lines
11 KiB
Bash
Executable File
#!/bin/bash -x
|
|
# process.sh
|
|
# David Rowe Jan 2019
|
|
#
|
|
: '
|
|
1. Process an input set of wave files using LPCNet under a variety of conditions.
|
|
2. Name output files to make them convenient to listen to in a file manager.
|
|
3. Generate a HTML table of samples for convenient replay on the web.
|
|
4. Generate a bunch of other HTML files and PNGs.
|
|
|
|
usage: ./process.sh [--lite] OutPath
|
|
./process.sh ~/tmp/lpcnet_out
|
|
|
|
To compare generate new samples OutPathA, and compare with those from a previous
|
|
run of this script in OutPathB:
|
|
|
|
./process.sh OutPathA OutPathB
|
|
|
|
--lite generates a much smaller page with just the basic LPCNet model case
|
|
'
|
|
|
|
# command line arguments
|
|
|
|
if [ $# -lt 1 ]; then
|
|
echo "usage: ./process2.sh [--lite] /output/path/1 [/output/path/2]"
|
|
echo " $ ./process.sh ~/tmp/lpcnet_outA"
|
|
exit 1
|
|
fi
|
|
|
|
lite=0
|
|
for i in "$@"
|
|
do
|
|
case $i in
|
|
--lite)
|
|
lite=1
|
|
shift
|
|
;;
|
|
esac
|
|
done
|
|
|
|
OUTPATH=$1
|
|
if [ $# -eq 2 ]; then
|
|
OUTPATHB=$2
|
|
fi
|
|
|
|
# set these paths to suit your system
|
|
CODEC2_PATH=$HOME/codec2-dev/build_linux/src
|
|
WAVIN_PATH=$HOME/Desktop/deep/quant
|
|
|
|
WAVOUT_PATH=$OUTPATH/wav
|
|
PATH=$PATH:$CODEC2_PATH
|
|
STATS=$OUTPATH/stats.txt
|
|
HTML=$OUTPATH/index.html
|
|
PNG_PATH=$OUTPATH/png
|
|
F32_PATH=$OUTPATH/f32
|
|
SV_PATH=$OUTPATH/sv
|
|
WAV_FILES="all birch canadian glue oak separately wanted wia"
|
|
|
|
# check we can find wave files
|
|
for f in $WAV_INFILES
|
|
do
|
|
if [ ! -e $WAVIN_PATH/$f.wav ]; then
|
|
echo "$WAVIN_PATH/$f.wav Not found"
|
|
fi
|
|
done
|
|
|
|
# check we can find codec 2 tools
|
|
if [ ! -e $CODEC2_PATH/c2enc ]; then
|
|
echo "$CODEC2_PATH/c2enc not found"
|
|
fi
|
|
|
|
#
|
|
# OK lets start processing ------------------------------------------------
|
|
#
|
|
mkdir -p $F32_PATH
|
|
mkdir -p $SV_PATH
|
|
|
|
mkdir -p $WAVOUT_PATH
|
|
mkdir -p $PNG_PATH
|
|
rm -f $STATS
|
|
|
|
vq=pred_v2_stage
|
|
vq2=split_stage
|
|
|
|
# cp in originals
|
|
for f in $WAV_FILES
|
|
do
|
|
cp $WAVIN_PATH/$f.wav $WAVOUT_PATH/$f'_0_orig.wav'
|
|
done
|
|
|
|
# Unquantised, baseline analysis-synthesis model, 10ms updates
|
|
for f in $WAV_FILES
|
|
do
|
|
sox $WAVIN_PATH/$f.wav -t raw - | ./dump_data --test --c2pitch - - | \
|
|
./test_lpcnet - - | sox -r 16000 -t .s16 -c 1 - $WAVOUT_PATH/$f'_1_uq'.wav
|
|
done
|
|
|
|
if [ $lite -eq 0 ]; then
|
|
|
|
# 3dB uniform quantiser, 10ms updates
|
|
for f in $WAV_FILES
|
|
do
|
|
label=$(printf "3dB %-10s" "$f")
|
|
sox $WAVIN_PATH/$f.wav -t raw - | ./dump_data --test --c2pitch - - | \
|
|
./quant_feat -l "$label" -d 1 --uniform 3 2>>$STATS | ./test_lpcnet - - | \
|
|
sox -r 16000 -t .s16 -c 1 - $WAVOUT_PATH/$f'_2_3dB'.wav
|
|
|
|
done
|
|
|
|
# decimate features to 20ms updates, then linearly interpolate back up to 10ms updates
|
|
for f in $WAV_FILES
|
|
do
|
|
sox $WAVIN_PATH/$f.wav -t raw - | ./dump_data --test --c2pitch - - | \
|
|
./quant_feat -d 2 | ./test_lpcnet - - | sox -r 16000 -t .s16 -c 1 - $WAVOUT_PATH/$f'_3_20ms'.wav
|
|
|
|
done
|
|
|
|
# decimate features to 20ms updates, then linearly interpolate back up to 10ms updates, incl pitch + voicing quant
|
|
for f in $WAV_FILES
|
|
do
|
|
sox $WAVIN_PATH/$f.wav -t raw - | ./dump_data --test --c2pitch - - | \
|
|
./quant_feat -d 2 -o 6 | ./test_lpcnet - - | sox -r 16000 -t .s16 -c 1 - $WAVOUT_PATH/$f'_4_20ms_pq'.wav
|
|
|
|
done
|
|
|
|
# 33 bit 3 stage VQ searched with mbest algorithm, 20ms updates
|
|
for f in $WAV_FILES
|
|
do
|
|
label=$(printf "33bit_20ms %-10s" "$f")
|
|
sox $WAVIN_PATH/$f.wav -t raw - | ./dump_data --test --c2pitch - - | \
|
|
./quant_feat -l "$label" -d 2 -o 6 -w --mbest 5 -q $vq'1.f32',$vq'2.f32',$vq'3.f32' -s $SV_PATH/$f'_5_33bit_20ms'.txt 2>>$STATS | \
|
|
./test_lpcnet - - | sox -r 16000 -t .s16 -c 1 - $WAVOUT_PATH/$f'_5_33bit_20ms'.wav
|
|
done
|
|
|
|
# 33 bit 3 stage VQ searched with mbest algorithm, 30ms updates
|
|
for f in $WAV_FILES
|
|
do
|
|
label=$(printf "33bit_30ms %-10s" "$f")
|
|
sox $WAVIN_PATH/$f.wav -t raw - | ./dump_data --test --c2pitch - - | \
|
|
./quant_feat -l "$label" -d 3 -o 6 -w --mbest 5 -q $vq'1.f32',$vq'2.f32',$vq'3.f32' -s $SV_PATH/$f'_6_33bit_30ms'.txt 2>>$STATS | \
|
|
./test_lpcnet - - | sox -r 16000 -t .s16 -c 1 - $WAVOUT_PATH/$f'_6_33bit_30ms'.wav
|
|
done
|
|
|
|
# 44 bit 4 stage VQ searched with mbest algorithm, 30ms updates
|
|
for f in $WAV_FILES
|
|
do
|
|
label=$(printf "44bit_30ms %-10s" "$f")
|
|
sox $WAVIN_PATH/$f.wav -t raw - | ./dump_data --test --c2pitch - - | \
|
|
./quant_feat -l "$label" -d 3 -o 6 -w --mbest 5 -q $vq'1.f32',$vq'2.f32',$vq'3.f32',$vq'4.f32' -s $SV_PATH/$f'_7_44bit_30ms'.txt 2>>$STATS | \
|
|
./test_lpcnet - - | sox -r 16000 -t .s16 -c 1 - $WAVOUT_PATH/$f'_7_44bit_30ms'.wav
|
|
done
|
|
|
|
# non-predictive (direct) 44 bit 4 stage split VQ searched with mbest algorithm, 30ms updates
|
|
for f in $WAV_FILES
|
|
do
|
|
label=$(printf "44bit_sp_30ms %-10s" "$f")
|
|
sox $WAVIN_PATH/$f.wav -t raw - | ./dump_data --test --c2pitch - - | \
|
|
./quant_feat -l "$label" -d 3 -o 6 -i -p 0 --mbest 5 -q $vq2'1.f32',$vq2'2.f32',$vq2'3.f32',$vq2'4.f32' -s $SV_PATH/$f'_8_44bit_sp_30ms'.txt 2>>$STATS | \
|
|
./test_lpcnet - - | sox -r 16000 -t .s16 -c 1 - $WAVOUT_PATH/$f'_8_44bit_sp_30ms'.wav
|
|
done
|
|
|
|
fi # ... if [ $lite -eq 0 ] ...
|
|
|
|
#
|
|
# Create a HTML table of samples ----------------------------------------------------
|
|
#
|
|
|
|
cat << EOF > $HTML
|
|
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN">
|
|
<html>
|
|
<head>
|
|
<title>LPCNet Samples</title>
|
|
|
|
</head>
|
|
<body>
|
|
EOF
|
|
|
|
if [ $lite -eq 0 ]; then
|
|
cat << EOF > $HTML
|
|
<table>
|
|
<col width="10%">
|
|
<col width="70%">
|
|
<caption>Glossary</caption>
|
|
<tr><th align="left">Term</th><th align="left">Description</th></tr>
|
|
<tr><td>Orig</td><td>Original source input speech</td></tr>
|
|
<tr><td>UQ</td><td>Baseline LPCNet synthesis using unquantised features</td></tr>
|
|
<tr><td>3dB</td><td>Cesptral features uniform quantiser with 3dB steps</td></tr>
|
|
<tr>
|
|
<td>20ms</td>
|
|
<td>Cesptral features decimated to 20ms frame rate, linear interpolation back to 10ms</td>
|
|
</tr>
|
|
<tr>
|
|
<td>20ms_pq</td>
|
|
<td>As above but pitch quantised to 6 bits, pitch gain to 2 bits</td>
|
|
</tr>
|
|
<tr>
|
|
<td>33bit_20ms</td>
|
|
<td>3 stage VQ of prediction error, 11 bits/stage, at 20ms frame rate, (33+8)/0.02 = 2050 bits/s</td>
|
|
</tr>
|
|
<tr>
|
|
<td>33bit_30ms</td>
|
|
<td>Same 33 bit VQ, but decimated down to 30ms rate, (33+8)/0.03 = 1367 bits/s</td>
|
|
</tr>
|
|
<tr>
|
|
<td>44bit_30ms</td>
|
|
<td>4 stage VQ, at 30ms update rate, (44+8)/0.03 = 1733 bits/s</td>
|
|
</tr>
|
|
<tr>
|
|
<td>44bit_sp_30ms</td>
|
|
<td>Direct (non predictive) 4 stage split VQ, at 30ms update rate, (44+8)/0.03 = 1733 bits/s. First 3 stages are 18 elements wide, last stage is just 12. We quantise log magnitudes (Ly) rather than Ceptrals (dct(Ly)). Targeted at HF radio channel where predictive schemes perform poorly due to high bit error/packet error rate</td>
|
|
</tr>
|
|
</table>
|
|
<p>
|
|
EOF
|
|
fi
|
|
|
|
function heading_row {
|
|
w=$(echo $WAV_FILES | cut -d ' ' -f 1)
|
|
headings=$(ls $WAVOUT_PATH/$w* | sed -r "s/.*$w.[[:digit:]]_//" | sed -r 's/.wav//')
|
|
printf "<tr>\n <th align="left">Sample</th>\n" >> $HTML
|
|
for h in $headings
|
|
do
|
|
printf " <th>%s</th>\n" $h >> $HTML
|
|
done
|
|
printf "</tr>\n" >> $HTML
|
|
}
|
|
|
|
# for each wave file, create a row
|
|
|
|
printf "<table>\n" >> $HTML
|
|
printf "<caption>Samples</caption>\n" >> $HTML
|
|
|
|
heading_row
|
|
|
|
for f in $WAV_FILES
|
|
do
|
|
files=$(ls $WAVOUT_PATH/$f*);
|
|
printf "<tr>\n <td>%s</td>\n" $f >> $HTML
|
|
for w in $files
|
|
do
|
|
b=$(basename $w)
|
|
if [ -z "${OUTPATHB}" ]; then
|
|
# no comparison
|
|
printf " <td align="center"><a href=\"wav/%s\">play</a></td>\n" $b >> $HTML
|
|
else
|
|
# compare with another process.sh run
|
|
printf " <td align="center"><a href=\"wav/%s\">play</a> (<a href=\"%s\">playB</a>) </td>\n" $b $OUTPATHB/wav/$b >> $HTML
|
|
fi
|
|
done
|
|
printf "</tr>\n" >> $HTML
|
|
done
|
|
printf "</table><p>\n" >> $HTML
|
|
|
|
# HTML table of results ---------------------------------------------------------
|
|
|
|
function table_of_values {
|
|
printf "<table>\n" >> $HTML
|
|
printf "<caption>%s</caption>\n" "$2" >> $HTML
|
|
|
|
heading_row
|
|
|
|
# for each wave file, create a row
|
|
|
|
for f in $WAV_FILES
|
|
do
|
|
files=$(ls $WAVOUT_PATH/$f*);
|
|
printf "<tr>\n <td>%s</td>\n" $f >> $HTML
|
|
for h in $headings
|
|
do
|
|
# extract variance from stats file
|
|
if [ "$1" = "var" ]; then
|
|
s=$(cat $STATS | sed -n "s/RESULTS $h $f.*var: \(.*\) sd.*/\1/p")
|
|
fi
|
|
if [ "$1" = "sd" ]; then
|
|
s=$(cat $STATS | sed -n "s/RESULTS $h $f.*sd: \(.*\) n.*/\1/p")
|
|
fi
|
|
if [ "$s" = "" ]; then
|
|
s="-"
|
|
fi
|
|
if [ $1 = "outliers" ]; then
|
|
outliers=$(cat $STATS | sed -n "s/RESULTS $h $f.*dB = \(.*\)/\1/p")
|
|
if [ ! "$outliers" = "" ]; then
|
|
png=$PNG_PATH/$f'_'$h'.png'
|
|
cmd="graphics_toolkit ('gnuplot'); o=[$outliers]; bar([1-sum(o) o],'hist'); axis([0 4 0 1]); print(\"$png\",'-dpng','-S120,120')"
|
|
octave --no-gui -qf --eval "$cmd"
|
|
b=$(basename $png)
|
|
printf " <td align=center><img src=\"png/%s\" ></img></td>\n" $b >> $HTML
|
|
else
|
|
printf " <td></td>\n" >> $HTML
|
|
fi
|
|
elif [ $1 = "quant" ]; then
|
|
sf=$SV_PATH/$f'_?_'$h.txt
|
|
if [ -e $sf ]; then
|
|
png=$PNG_PATH/$f'_'$h'_quant.png'
|
|
t=$(echo $h | sed -n "s/.*_\(.*\)ms/\1/p")
|
|
octave --no-gui -p src -qf src/plot_speech_quant.m $F32_PATH/$f.raw $sf $png $t
|
|
b=$(basename $png)
|
|
printf " <td align=center><a href="png/%s"><img width=100 height=100 src=\"png/%s\" ></img></a></td>\n" $b $b >> $HTML
|
|
else
|
|
printf " <td></td>\n" >> $HTML
|
|
fi
|
|
else
|
|
printf " <td align="center">%s</td>\n" $s >> $HTML
|
|
fi
|
|
done
|
|
printf "</tr>\n" >> $HTML
|
|
done
|
|
|
|
printf "</table><p>\n" >> $HTML
|
|
}
|
|
|
|
if [ $lite -eq 0 ]; then
|
|
table_of_values "var" "Variance"
|
|
table_of_values "sd" "Standard Deviation"
|
|
table_of_values "outliers" "Outliers"
|
|
|
|
#
|
|
# Table of Speech/Pitch countours ----------------------------------------------
|
|
#
|
|
|
|
printf "<table>\n" >> $HTML
|
|
printf "<caption>Pitch Countours</caption>\n" >> $HTML
|
|
|
|
# heading row
|
|
printf "<tr>\n" >> $HTML
|
|
for f in $WAV_FILES
|
|
do
|
|
printf " <th>%s</th>\n" $f >> $HTML
|
|
done
|
|
printf "</tr>\n" >> $HTML
|
|
|
|
# manually set pitch axis to make plots easier to read. TODO this breaks when we add new samples, need an associative array
|
|
mx=(400 200 200 400 400 200 400 200)
|
|
count=0
|
|
|
|
# row of links to PNGs
|
|
printf "<tr>\n" >> $HTML
|
|
for f in $WAV_FILES
|
|
do
|
|
sox $WAVIN_PATH/$f.wav -t raw $F32_PATH/$f.raw
|
|
./dump_data --test --c2pitch $F32_PATH/$f.raw $F32_PATH/$f'_c2'.f32
|
|
octave --no-gui -p src -qf src/plot_speech_pitch.m $F32_PATH/$f.raw $F32_PATH/$f'_c2'.f32 - $PNG_PATH/$f'_pitch.png' ${mx[count]}
|
|
count=$(( $count + 1 ))
|
|
b=$f'_pitch.png'
|
|
printf " <td align="center"><a href=\"png/%s\"><img width=100 height=100 src=\"png/%s\" /></a></td>\n" $b $b >> $HTML
|
|
done
|
|
printf "</tr>\n" >> $HTML
|
|
printf "</table><p>\n" >> $HTML
|
|
|
|
table_of_values "quant" "Quantiser Error Countours"
|
|
fi
|
|
|
|
printf "</body>\n</html>\n" >> $HTML
|