cq_16kHz fine, peter a bit spikey, slightly better than 190920c, but not as gd at 190920a. However, quite remarkably - all.wav sounds fine, even outliers like canadian. This is puzzling, as this training material doesn't include resampling, or wianews sample

2019-09-23 06:51:26 +09:30 · 2019-09-23 06:51:26 +09:30 · 49c95df13d
parent 24bc72efef
commit 49c95df13d
2 changed files with 10 additions and 11 deletions
--- a/src/full_train.sh
+++ b/src/full_train.sh
@ -14,7 +14,7 @@ SRC4=wianews-2019-01-20.s16      # 62 minutes
 SRC5=soldersmoke202.sw           # 76 minutes, lot of material with target voices
 SRC=train_src

-DATE=190920c
+DATE=190922a

 synth() {
  ./src/dump_data --test --c2pitch --c2voicing ~/Downloads/$1.sw $1.f32
@ -22,15 +22,13 @@ synth() {
 }

 train() {
-    # repeat David and Peter to get 60 minutes worth, so it weights training
+    # repeat David and Peter to get 142 minutes worth, so it weights training
    # for this type of speaker that we are struggling with
    x=$(mktemp)
-    sox ~/Downloads/$SRC1 ~/Downloads/$SRC2 $x'.wav' repeat 20
+    sox ~/Downloads/$SRC1 ~/Downloads/$SRC2 $x'.wav' repeat 60
    ls -l $x.wav
    # combine all samples, evaluation data at end of larger database of mixed speakers
    sox $x.wav \
-	-t sw -r 16000 -c 1 ~/Downloads/$SRC4 \
-        -t sw -r 16000 -c 1 ~/Downloads/$SRC5 \
 	-t sw -r 16000 -c 1 ~/Downloads/$SRC3 \
 	-t sw $SRC.sw
    ls -l $SRC.sw
@ -41,8 +39,9 @@ train() {
    make test_lpcnet
 }

-train
-synth c01_01 $DATE'_f'
-synth mk61_01 $DATE'_m'
-synth cq_16kHz $DATE'_cq_16kHz'
-synth peter $DATE'_peter'
+#train
+#synth c01_01 $DATE'_f'
+#synth mk61_01 $DATE'_m'
+#synth cq_16kHz $DATE'_cq_16kHz'
+#synth peter $DATE'_peter'
+synth wia $DATE'_wia'
--- a/src/train_lpcnet.py
+++ b/src/train_lpcnet.py
@ -46,7 +46,7 @@ config = tf.ConfigProto()

 set_session(tf.Session(config=config))

-nb_epochs = 20
+nb_epochs = 10

 # Try reducing batch_size if you run out of memory on your GPU
 batch_size = 32