From 712b1d5198f93d8303c0cc0e90de60a1a5fabde0 Mon Sep 17 00:00:00 2001
From: David <david@rowetel.com>
Date: Fri, 13 Dec 2019 09:08:34 +1030
Subject: [PATCH] using voiced/unvoiced model in synth

---
 codec2_model.py |  2 +-
 phasenn_out.py  | 19 +++++++++++++++----
 synth.sh        | 15 ++++++++++-----
 3 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/codec2_model.py b/codec2_model.py
index 89ac3e3..3c521b7 100755
--- a/codec2_model.py
+++ b/codec2_model.py
@@ -21,7 +21,7 @@ codec2_model = construct.Struct(
     "voiced" / construct.Int32sl
     )
 
-def read(filename, max_nb_samples):
+def read(filename, max_nb_samples=1E32):
     
     # Determine number of records in file, not very Pythonic I know :-)
 
diff --git a/phasenn_out.py b/phasenn_out.py
index 2aa2816..1f1af6e 100755
--- a/phasenn_out.py
+++ b/phasenn_out.py
@@ -33,7 +33,7 @@ parser.add_argument('modelin', help='Codec 2 model file in (linear phase removed
 parser.add_argument('phasenn', help='PhaseNN trained .h5 file')
 parser.add_argument('modelout', help='Codec 2 model file out (linear phase removed)')
 parser.add_argument('--start', type=int, default=0, help='start frame')
-parser.add_argument('--length', type=int, default=300, help='Number of frames')
+parser.add_argument('--length', type=int, help='Number of frames')
 args = parser.parse_args()
 
 # read in model file records
@@ -59,12 +59,23 @@ model.load_weights(args.phasenn)
 # compute rate L output phases
 phase_rect_est = model.predict(amp)
 phase_est = np.zeros((nb_samples, width))
-st = args.start; en = args.start+args.length;
+st = args.start
+if args.length:
+    en = args.start + args.length
+else:
+    en = nb_samples
+v = 0; uv = 0
 for i in range(st,en):
     for m in range(1,L[i]+1):
         bin = int(np.round(m*Wo[i]*width/np.pi)); bin = min(width-1, bin)
-        phase_est[i,m] = np.angle(phase_rect_est[i,2*bin] + 1j*phase_rect_est[i,2*bin+1])
-        
+        if voiced[i]:
+            phase_est[i,m] = np.angle(phase_rect_est[i,2*bin] + 1j*phase_rect_est[i,2*bin+1])
+            v += 1
+        else:
+            r = np.random.rand(1)
+            phase_est[i,m] = -np.pi + 2*r[0]*np.pi
+            uv += 1
+print(v,uv)        
 # save to output model file for synthesis
 if args.modelout:
     codec2_model.write(Wo[st:en], L[st:en], A[st:en], phase_est[st:en], voiced[st:en], args.modelout)
diff --git a/synth.sh b/synth.sh
index 66276ab..8ea198b 100755
--- a/synth.sh
+++ b/synth.sh
@@ -15,10 +15,15 @@ len=$4
 x=$(basename $speech)
 base="${x%.*}"
 out_model=out.model
+seg=$(mktemp)'.sw'
+echo $seg
 
-sox -t .sw -r 8000 -c 1 $speech -t .sw - trim $st $len | c2sim - --modelout - | est_n0 -r > $base'_nolinear.model'
+sox -t .sw -r 8000 -c 1 $speech -t .sw - trim $st $len > $seg
+c2sim $seg --modelout - | est_n0 -r > $base'_nolinear.model'
 ./phasenn_out.py $base'_nolinear.model' $nn $base'_out.model'
-sox -t .sw -r 8000 -c 1 $speech -t .sw - trim $st $len | c2sim - --modelout - | est_n0 -a $base'_out.model' > $base'_comb.model'
-sox -t .sw -r 8000 -c 1 $speech -t .sw - trim $st $len | c2sim - --modelin $base'_comb.model' -o $base'_outnn.sw'
-sox -t .sw -r 8000 -c 1 $speech -t .sw - trim $st $len | c2sim - -o $base'_out.sw'
-sox -t .sw $base'_outnn.sw' -t .sw $base'_out.sw' $base'_both.sw'
+c2sim $seg --modelout - | est_n0 -a $base'_out.model' > $base'_comb.model'
+c2sim $seg --modelin $base'_comb.model' -o $base'_outnn.sw'
+
+# orig speech - sinusoidal orig phases - sinusoidal phaseNN
+c2sim $seg -o $base'_out.sw'
+sox $seg $base'_out.sw' $base'_outnn.sw' $base'_all.sw'