building up synthesis tools

2019-12-08 17:18:04 +10:30 · 2019-12-08 17:18:04 +10:30 · 67adf33ebe
parent 62c5474cf6
commit 67adf33ebe
2 changed files with 79 additions and 4 deletions
--- a/phasenn_out.py
+++ b/phasenn_out.py
@ -0,0 +1,70 @@
+#!/usr/bin/python3
+# phasenn_out.py
+#
+# David Rowe Dec 2019
+#
+# Generate phasenn output sample from an input Codec 2 model, and phaseNN .h5.
+#
+
+import numpy as np
+import sys
+import codec2_model
+import argparse
+import os
+from keras.models import load_model
+from keras.layers import Input, Dense, Concatenate
+from keras import models,layers
+from keras import initializers
+from keras import backend as K
+
+# less verbose tensorflow ....
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
+
+# constants TODO: put these is a python module 
+width             = 256
+pairs             = 2*width
+Fs                = 8000
+
+def list_str(values):
+    return values.split(',')
+
+parser = argparse.ArgumentParser(description='Train a NN to model Codec 2 phases')
+parser.add_argument('modelin', help='Codec 2 model file in (linear phase removed)')
+parser.add_argument('phasenn', help='PhaseNN trained .h5 file')
+parser.add_argument('modelout', help='Codec 2 model file out (linear phase removed)')
+parser.add_argument('--start', type=int, default=0, help='start frame')
+parser.add_argument('--length', type=int, default=300, help='Number of frames')
+args = parser.parse_args()
+
+# read in model file records
+Wo, L, A, phase, voiced = codec2_model.read(args.modelin)
+nb_samples = Wo.size;
+print("nb_samples: %d" % (nb_samples))
+
+amp = np.zeros((nb_samples, width))
+phase_rect = np.zeros((nb_samples, pairs))
+for i in range(nb_samples):
+    for m in range(1,L[i]+1):
+        bin = int(np.round(m*Wo[i]*width/np.pi)); bin = min(width-1, bin)
+        amp[i,bin] = np.log10(A[i,m])
+
+# our model TODO: make a python module
+model = models.Sequential()
+model.add(layers.Dense(pairs, activation='relu', input_dim=width))
+model.add(layers.Dense(4*pairs, activation='relu'))
+model.add(layers.Dense(pairs))
+model.summary()
+model.load_weights(args.phasenn)
+
+# compute rate L output phases
+phase_rect_est = model.predict(amp)
+phase_est = np.zeros((nb_samples, width))
+st = args.start; en = args.start+args.length;
+for i in range(st,en):
+    for m in range(1,L[i]+1):
+        bin = int(np.round(m*Wo[i]*width/np.pi)); bin = min(width-1, bin)
+        phase_est[i,m] = np.angle(phase_rect_est[i,2*bin] + 1j*phase_rect_est[i,2*bin+1])
+        
+# save to output model file for synthesis
+if args.modelout:
+    codec2_model.write(Wo[st:en], L[st:en], A[st:en], phase_est[st:en], voiced[st:en], args.modelout)
--- a/synth.sh
+++ b/synth.sh
@ -5,13 +5,18 @@

 PATH=$PATH:~/codec2/build_linux/src:~/codec2/build_linux/misc

-if [ "$#" -ne 1 ]; then
-    echo "usage: ./synth.sh rawFile"
+if [ "$#" -ne 4 ]; then
+    echo "usage: ./synth.sh rawFile nn.h5 startSecs lengthSecs"
 fi
 speech=$1
+nn=$2
+st=$3
+len=$4
 x=$(basename $speech)
 base="${x%.*}"
 out_model=out.model

-sox -t .sw -r 8000 -c 1 $speech -t .sw - trim 0 3 | c2sim - --modelout - | est_n0 -a $out_model > $base'_comb.model'
-sox -t .sw -r 8000 -c 1 $speech -t .sw - trim 0 3 | c2sim - --modelin $base'_comb.model' -o $base'_out.raw'
+sox -t .sw -r 8000 -c 1 $speech -t .sw - trim $st $len | c2sim - --modelout - | est_n0 -r > $base'_nolinear.model'
+./phasenn_out.py $base'_nolinear.model' $nn $base'_out.model'
+sox -t .sw -r 8000 -c 1 $speech -t .sw - trim $st $len | c2sim - --modelout - | est_n0 -a $base'_out.model' > $base'_comb.model'
+sox -t .sw -r 8000 -c 1 $speech -t .sw - trim $st $len | c2sim - --modelin $base'_comb.model' -o $base'_out.raw'