using voiced/unvoiced model in synth

2019-12-13 09:08:34 +10:30 · 2019-12-13 09:08:34 +10:30 · 712b1d5198
parent a2832ecb6a
commit 712b1d5198
3 changed files with 26 additions and 10 deletions
--- a/codec2_model.py
+++ b/codec2_model.py
@ -21,7 +21,7 @@ codec2_model = construct.Struct(
    "voiced" / construct.Int32sl
    )

-def read(filename, max_nb_samples):
+def read(filename, max_nb_samples=1E32):
    
    # Determine number of records in file, not very Pythonic I know :-)

--- a/phasenn_out.py
+++ b/phasenn_out.py
@ -33,7 +33,7 @@ parser.add_argument('modelin', help='Codec 2 model file in (linear phase removed
 parser.add_argument('phasenn', help='PhaseNN trained .h5 file')
 parser.add_argument('modelout', help='Codec 2 model file out (linear phase removed)')
 parser.add_argument('--start', type=int, default=0, help='start frame')
-parser.add_argument('--length', type=int, default=300, help='Number of frames')
+parser.add_argument('--length', type=int, help='Number of frames')
 args = parser.parse_args()

 # read in model file records
@ -59,12 +59,23 @@ model.load_weights(args.phasenn)
 # compute rate L output phases
 phase_rect_est = model.predict(amp)
 phase_est = np.zeros((nb_samples, width))
-st = args.start; en = args.start+args.length;
+st = args.start
+if args.length:
+    en = args.start + args.length
+else:
+    en = nb_samples
+v = 0; uv = 0
 for i in range(st,en):
    for m in range(1,L[i]+1):
        bin = int(np.round(m*Wo[i]*width/np.pi)); bin = min(width-1, bin)
-        phase_est[i,m] = np.angle(phase_rect_est[i,2*bin] + 1j*phase_rect_est[i,2*bin+1])
-        
+        if voiced[i]:
+            phase_est[i,m] = np.angle(phase_rect_est[i,2*bin] + 1j*phase_rect_est[i,2*bin+1])
+            v += 1
+        else:
+            r = np.random.rand(1)
+            phase_est[i,m] = -np.pi + 2*r[0]*np.pi
+            uv += 1
+print(v,uv)        
 # save to output model file for synthesis
 if args.modelout:
    codec2_model.write(Wo[st:en], L[st:en], A[st:en], phase_est[st:en], voiced[st:en], args.modelout)
--- a/synth.sh
+++ b/synth.sh
@ -15,10 +15,15 @@ len=$4
 x=$(basename $speech)
 base="${x%.*}"
 out_model=out.model
+seg=$(mktemp)'.sw'
+echo $seg

-sox -t .sw -r 8000 -c 1 $speech -t .sw - trim $st $len | c2sim - --modelout - | est_n0 -r > $base'_nolinear.model'
+sox -t .sw -r 8000 -c 1 $speech -t .sw - trim $st $len > $seg
+c2sim $seg --modelout - | est_n0 -r > $base'_nolinear.model'
 ./phasenn_out.py $base'_nolinear.model' $nn $base'_out.model'
-sox -t .sw -r 8000 -c 1 $speech -t .sw - trim $st $len | c2sim - --modelout - | est_n0 -a $base'_out.model' > $base'_comb.model'
-sox -t .sw -r 8000 -c 1 $speech -t .sw - trim $st $len | c2sim - --modelin $base'_comb.model' -o $base'_outnn.sw'
-sox -t .sw -r 8000 -c 1 $speech -t .sw - trim $st $len | c2sim - -o $base'_out.sw'
-sox -t .sw $base'_outnn.sw' -t .sw $base'_out.sw' $base'_both.sw'
+c2sim $seg --modelout - | est_n0 -a $base'_out.model' > $base'_comb.model'
+c2sim $seg --modelin $base'_comb.model' -o $base'_outnn.sw'
+
+# orig speech - sinusoidal orig phases - sinusoidal phaseNN
+c2sim $seg -o $base'_out.sw'
+sox $seg $base'_out.sw' $base'_outnn.sw' $base'_all.sw'