finally - high quality from tinytrain on m & f samples, samme quality and loss as 4a3998

2019-08-04 15:18:58 +09:30 · 2019-08-04 15:18:58 +09:30 · 6ac9b1902d
parent 45d70671d1
commit 6ac9b1902d
4 changed files with 45 additions and 7 deletions
--- a/src/dump_data.c
+++ b/src/dump_data.c
@ -266,7 +266,9 @@ int main(int argc, char **argv) {
  int nvec = 5000000;
  int fuzz = 1;
  int logmag = 0;
-  
+  int  nb_passes = 0;
+  int  nb_frames_per_pass = 0;  
+
  st = rnnoise_create();

  int o = 0;
@ -290,7 +292,7 @@ int main(int argc, char **argv) {
          training = 1;
          break;
      case 'n':
-	  nvec = atoi(optarg);
+	  nvec = atof(optarg);
 	  assert(nvec > 0);
 	  fprintf(stderr, "nvec: %d\n", nvec);
 	  break;
@ -386,6 +388,9 @@ int main(int argc, char **argv) {
      rewind(f1);
      nread = fread(tmp, sizeof(short), FRAME_SIZE, f1);
      one_pass_completed = 1;
+      nb_passes++;
+      if (!nb_frames_per_pass)
+	  nb_frames_per_pass = count;
    }
    for (i=0;i<FRAME_SIZE;i++) E += tmp[i]*(float)tmp[i];
    if (training) {
@ -460,6 +465,11 @@ int main(int argc, char **argv) {
  fclose(ffeat);
  if (fpcm) fclose(fpcm);
  if (c2pitch_en) { free(c2_Sn); codec2_pitch_destroy(c2pitch); }
+  if (training) {
+      int minutes = (float)nb_frames_per_pass*0.01/60.0;     
+      fprintf(stderr, "frames_per_pass: %d (%d minutes) nb_passes: %d\n",
+	      nb_frames_per_pass, minutes, nb_passes);
+  }
  rnnoise_destroy(st);
  return 0;
 }
--- a/src/tinytrain.sh
+++ b/src/tinytrain.sh
@ -0,0 +1,28 @@
+#!/bin/bash -x
+# tinytrain.sh
+# train using a tiny database, synthesis a few samples from within
+# training database.  Used to perform quick sanity checks with a few hrs training
+#
+# usage:
+#   $ cd LPCNet/build_linux
+#   $ ../src/tinytrain.sh
+
+SRC=all_speech
+DATE=190804a
+
+synth() {
+  ./src/dump_data --mag --test --c2pitch ~/Downloads/$1.sw $1.f32
+  ./src/test_lpcnet --mag $1.f32 "$2".raw
+}
+
+train() {
+  ./src/dump_data --mag --train --c2pitch -z 0 -n 1E6 ~/Downloads/$SRC.sw $SRC.f32 $SRC.pcm
+  ../src/train_lpcnet.py $SRC.f32 $SRC.pcm lpcnet_$DATE
+  ../src/dump_lpcnet.py lpcnet_"$DATE"_10.h5
+  cp nnet_data.c src
+  make test_lpcnet
+}
+
+train
+synth c01_01 $DATE'_f'
+synth mk61_01 $DATE'_m'
--- a/src/train_lpcnet.py
+++ b/src/train_lpcnet.py
@ -90,7 +90,7 @@ features = np.reshape(features, (nb_frames, feature_chunk_size, nb_features))
 features = features[:, :, :nb_used_features]
 # 0..37 features total
 # 0..17 cepstrals, 36 = pitch, 37 = pitch gain, 38 = lpc-gain
-# nb_used_features=38, so 0...37
+# nb_used_features=38, so 0...37, so lpc-gain not used
 features[:,:,18:36] = 0   # zero out 18..35, so pitch and pitch gain being fed in, lpc gain ignored

 fpad1 = np.concatenate([features[0:1, 0:2, :], features[:-1, -2:, :]], axis=0)
@ -100,8 +100,6 @@ features = np.concatenate([fpad1, features, fpad2], axis=1)
 # pitch feature uses as well as cesptrals
 periods = (.1 + 50*features[:,:,36:37]+100).astype('int16')

-features[:,:,36:] = 0      # DR experiment - lets try zeroing out pitch and pitch gain
-
 in_data = np.concatenate([sig, pred, in_exc], axis=-1)

 del sig
--- a/unittest/test_core_nn.sh
+++ b/unittest/test_core_nn.sh
@ -1,9 +1,11 @@
 #!/bin/bash
 # test_core_nn.sh
 #
-# Some tests for core NN, e.g. generation of test data using
-# dump_data, and unquantised synthesis using and test_lpcnet

+# Some tests for core NN, e.g. generation of test data using
+# dump_data, and unquantised synthesis using and test_lpcnet.  Used to
+# ensure no existing features are broken during experimentation and
+# development.

 # test generation of training data (doesn't really test training as that takes hours)
 # TODO: This test not working yet