From 712b1d5198f93d8303c0cc0e90de60a1a5fabde0 Mon Sep 17 00:00:00 2001 From: David Date: Fri, 13 Dec 2019 09:08:34 +1030 Subject: [PATCH] using voiced/unvoiced model in synth --- codec2_model.py | 2 +- phasenn_out.py | 19 +++++++++++++++---- synth.sh | 15 ++++++++++----- 3 files changed, 26 insertions(+), 10 deletions(-) diff --git a/codec2_model.py b/codec2_model.py index 89ac3e3..3c521b7 100755 --- a/codec2_model.py +++ b/codec2_model.py @@ -21,7 +21,7 @@ codec2_model = construct.Struct( "voiced" / construct.Int32sl ) -def read(filename, max_nb_samples): +def read(filename, max_nb_samples=1E32): # Determine number of records in file, not very Pythonic I know :-) diff --git a/phasenn_out.py b/phasenn_out.py index 2aa2816..1f1af6e 100755 --- a/phasenn_out.py +++ b/phasenn_out.py @@ -33,7 +33,7 @@ parser.add_argument('modelin', help='Codec 2 model file in (linear phase removed parser.add_argument('phasenn', help='PhaseNN trained .h5 file') parser.add_argument('modelout', help='Codec 2 model file out (linear phase removed)') parser.add_argument('--start', type=int, default=0, help='start frame') -parser.add_argument('--length', type=int, default=300, help='Number of frames') +parser.add_argument('--length', type=int, help='Number of frames') args = parser.parse_args() # read in model file records @@ -59,12 +59,23 @@ model.load_weights(args.phasenn) # compute rate L output phases phase_rect_est = model.predict(amp) phase_est = np.zeros((nb_samples, width)) -st = args.start; en = args.start+args.length; +st = args.start +if args.length: + en = args.start + args.length +else: + en = nb_samples +v = 0; uv = 0 for i in range(st,en): for m in range(1,L[i]+1): bin = int(np.round(m*Wo[i]*width/np.pi)); bin = min(width-1, bin) - phase_est[i,m] = np.angle(phase_rect_est[i,2*bin] + 1j*phase_rect_est[i,2*bin+1]) - + if voiced[i]: + phase_est[i,m] = np.angle(phase_rect_est[i,2*bin] + 1j*phase_rect_est[i,2*bin+1]) + v += 1 + else: + r = np.random.rand(1) + phase_est[i,m] = -np.pi + 2*r[0]*np.pi + uv += 1 +print(v,uv) # save to output model file for synthesis if args.modelout: codec2_model.write(Wo[st:en], L[st:en], A[st:en], phase_est[st:en], voiced[st:en], args.modelout) diff --git a/synth.sh b/synth.sh index 66276ab..8ea198b 100755 --- a/synth.sh +++ b/synth.sh @@ -15,10 +15,15 @@ len=$4 x=$(basename $speech) base="${x%.*}" out_model=out.model +seg=$(mktemp)'.sw' +echo $seg -sox -t .sw -r 8000 -c 1 $speech -t .sw - trim $st $len | c2sim - --modelout - | est_n0 -r > $base'_nolinear.model' +sox -t .sw -r 8000 -c 1 $speech -t .sw - trim $st $len > $seg +c2sim $seg --modelout - | est_n0 -r > $base'_nolinear.model' ./phasenn_out.py $base'_nolinear.model' $nn $base'_out.model' -sox -t .sw -r 8000 -c 1 $speech -t .sw - trim $st $len | c2sim - --modelout - | est_n0 -a $base'_out.model' > $base'_comb.model' -sox -t .sw -r 8000 -c 1 $speech -t .sw - trim $st $len | c2sim - --modelin $base'_comb.model' -o $base'_outnn.sw' -sox -t .sw -r 8000 -c 1 $speech -t .sw - trim $st $len | c2sim - -o $base'_out.sw' -sox -t .sw $base'_outnn.sw' -t .sw $base'_out.sw' $base'_both.sw' +c2sim $seg --modelout - | est_n0 -a $base'_out.model' > $base'_comb.model' +c2sim $seg --modelin $base'_comb.model' -o $base'_outnn.sw' + +# orig speech - sinusoidal orig phases - sinusoidal phaseNN +c2sim $seg -o $base'_out.sw' +sox $seg $base'_out.sw' $base'_outnn.sw' $base'_all.sw'