using voiced/unvoiced model in synth

master
David 2019-12-13 09:08:34 +10:30
parent a2832ecb6a
commit 712b1d5198
3 changed files with 26 additions and 10 deletions

View File

@ -21,7 +21,7 @@ codec2_model = construct.Struct(
"voiced" / construct.Int32sl "voiced" / construct.Int32sl
) )
def read(filename, max_nb_samples): def read(filename, max_nb_samples=1E32):
# Determine number of records in file, not very Pythonic I know :-) # Determine number of records in file, not very Pythonic I know :-)

View File

@ -33,7 +33,7 @@ parser.add_argument('modelin', help='Codec 2 model file in (linear phase removed
parser.add_argument('phasenn', help='PhaseNN trained .h5 file') parser.add_argument('phasenn', help='PhaseNN trained .h5 file')
parser.add_argument('modelout', help='Codec 2 model file out (linear phase removed)') parser.add_argument('modelout', help='Codec 2 model file out (linear phase removed)')
parser.add_argument('--start', type=int, default=0, help='start frame') parser.add_argument('--start', type=int, default=0, help='start frame')
parser.add_argument('--length', type=int, default=300, help='Number of frames') parser.add_argument('--length', type=int, help='Number of frames')
args = parser.parse_args() args = parser.parse_args()
# read in model file records # read in model file records
@ -59,12 +59,23 @@ model.load_weights(args.phasenn)
# compute rate L output phases # compute rate L output phases
phase_rect_est = model.predict(amp) phase_rect_est = model.predict(amp)
phase_est = np.zeros((nb_samples, width)) phase_est = np.zeros((nb_samples, width))
st = args.start; en = args.start+args.length; st = args.start
if args.length:
en = args.start + args.length
else:
en = nb_samples
v = 0; uv = 0
for i in range(st,en): for i in range(st,en):
for m in range(1,L[i]+1): for m in range(1,L[i]+1):
bin = int(np.round(m*Wo[i]*width/np.pi)); bin = min(width-1, bin) bin = int(np.round(m*Wo[i]*width/np.pi)); bin = min(width-1, bin)
phase_est[i,m] = np.angle(phase_rect_est[i,2*bin] + 1j*phase_rect_est[i,2*bin+1]) if voiced[i]:
phase_est[i,m] = np.angle(phase_rect_est[i,2*bin] + 1j*phase_rect_est[i,2*bin+1])
v += 1
else:
r = np.random.rand(1)
phase_est[i,m] = -np.pi + 2*r[0]*np.pi
uv += 1
print(v,uv)
# save to output model file for synthesis # save to output model file for synthesis
if args.modelout: if args.modelout:
codec2_model.write(Wo[st:en], L[st:en], A[st:en], phase_est[st:en], voiced[st:en], args.modelout) codec2_model.write(Wo[st:en], L[st:en], A[st:en], phase_est[st:en], voiced[st:en], args.modelout)

View File

@ -15,10 +15,15 @@ len=$4
x=$(basename $speech) x=$(basename $speech)
base="${x%.*}" base="${x%.*}"
out_model=out.model out_model=out.model
seg=$(mktemp)'.sw'
echo $seg
sox -t .sw -r 8000 -c 1 $speech -t .sw - trim $st $len | c2sim - --modelout - | est_n0 -r > $base'_nolinear.model' sox -t .sw -r 8000 -c 1 $speech -t .sw - trim $st $len > $seg
c2sim $seg --modelout - | est_n0 -r > $base'_nolinear.model'
./phasenn_out.py $base'_nolinear.model' $nn $base'_out.model' ./phasenn_out.py $base'_nolinear.model' $nn $base'_out.model'
sox -t .sw -r 8000 -c 1 $speech -t .sw - trim $st $len | c2sim - --modelout - | est_n0 -a $base'_out.model' > $base'_comb.model' c2sim $seg --modelout - | est_n0 -a $base'_out.model' > $base'_comb.model'
sox -t .sw -r 8000 -c 1 $speech -t .sw - trim $st $len | c2sim - --modelin $base'_comb.model' -o $base'_outnn.sw' c2sim $seg --modelin $base'_comb.model' -o $base'_outnn.sw'
sox -t .sw -r 8000 -c 1 $speech -t .sw - trim $st $len | c2sim - -o $base'_out.sw'
sox -t .sw $base'_outnn.sw' -t .sw $base'_out.sw' $base'_both.sw' # orig speech - sinusoidal orig phases - sinusoidal phaseNN
c2sim $seg -o $base'_out.sw'
sox $seg $base'_out.sw' $base'_outnn.sw' $base'_all.sw'