mirror of https://github.com/drowe67/phasenn.git
using voiced/unvoiced model in synth
parent
a2832ecb6a
commit
712b1d5198
|
@ -21,7 +21,7 @@ codec2_model = construct.Struct(
|
|||
"voiced" / construct.Int32sl
|
||||
)
|
||||
|
||||
def read(filename, max_nb_samples):
|
||||
def read(filename, max_nb_samples=1E32):
|
||||
|
||||
# Determine number of records in file, not very Pythonic I know :-)
|
||||
|
||||
|
|
|
@ -33,7 +33,7 @@ parser.add_argument('modelin', help='Codec 2 model file in (linear phase removed
|
|||
parser.add_argument('phasenn', help='PhaseNN trained .h5 file')
|
||||
parser.add_argument('modelout', help='Codec 2 model file out (linear phase removed)')
|
||||
parser.add_argument('--start', type=int, default=0, help='start frame')
|
||||
parser.add_argument('--length', type=int, default=300, help='Number of frames')
|
||||
parser.add_argument('--length', type=int, help='Number of frames')
|
||||
args = parser.parse_args()
|
||||
|
||||
# read in model file records
|
||||
|
@ -59,12 +59,23 @@ model.load_weights(args.phasenn)
|
|||
# compute rate L output phases
|
||||
phase_rect_est = model.predict(amp)
|
||||
phase_est = np.zeros((nb_samples, width))
|
||||
st = args.start; en = args.start+args.length;
|
||||
st = args.start
|
||||
if args.length:
|
||||
en = args.start + args.length
|
||||
else:
|
||||
en = nb_samples
|
||||
v = 0; uv = 0
|
||||
for i in range(st,en):
|
||||
for m in range(1,L[i]+1):
|
||||
bin = int(np.round(m*Wo[i]*width/np.pi)); bin = min(width-1, bin)
|
||||
phase_est[i,m] = np.angle(phase_rect_est[i,2*bin] + 1j*phase_rect_est[i,2*bin+1])
|
||||
|
||||
if voiced[i]:
|
||||
phase_est[i,m] = np.angle(phase_rect_est[i,2*bin] + 1j*phase_rect_est[i,2*bin+1])
|
||||
v += 1
|
||||
else:
|
||||
r = np.random.rand(1)
|
||||
phase_est[i,m] = -np.pi + 2*r[0]*np.pi
|
||||
uv += 1
|
||||
print(v,uv)
|
||||
# save to output model file for synthesis
|
||||
if args.modelout:
|
||||
codec2_model.write(Wo[st:en], L[st:en], A[st:en], phase_est[st:en], voiced[st:en], args.modelout)
|
||||
|
|
15
synth.sh
15
synth.sh
|
@ -15,10 +15,15 @@ len=$4
|
|||
x=$(basename $speech)
|
||||
base="${x%.*}"
|
||||
out_model=out.model
|
||||
seg=$(mktemp)'.sw'
|
||||
echo $seg
|
||||
|
||||
sox -t .sw -r 8000 -c 1 $speech -t .sw - trim $st $len | c2sim - --modelout - | est_n0 -r > $base'_nolinear.model'
|
||||
sox -t .sw -r 8000 -c 1 $speech -t .sw - trim $st $len > $seg
|
||||
c2sim $seg --modelout - | est_n0 -r > $base'_nolinear.model'
|
||||
./phasenn_out.py $base'_nolinear.model' $nn $base'_out.model'
|
||||
sox -t .sw -r 8000 -c 1 $speech -t .sw - trim $st $len | c2sim - --modelout - | est_n0 -a $base'_out.model' > $base'_comb.model'
|
||||
sox -t .sw -r 8000 -c 1 $speech -t .sw - trim $st $len | c2sim - --modelin $base'_comb.model' -o $base'_outnn.sw'
|
||||
sox -t .sw -r 8000 -c 1 $speech -t .sw - trim $st $len | c2sim - -o $base'_out.sw'
|
||||
sox -t .sw $base'_outnn.sw' -t .sw $base'_out.sw' $base'_both.sw'
|
||||
c2sim $seg --modelout - | est_n0 -a $base'_out.model' > $base'_comb.model'
|
||||
c2sim $seg --modelin $base'_comb.model' -o $base'_outnn.sw'
|
||||
|
||||
# orig speech - sinusoidal orig phases - sinusoidal phaseNN
|
||||
c2sim $seg -o $base'_out.sw'
|
||||
sox $seg $base'_out.sw' $base'_outnn.sw' $base'_all.sw'
|
||||
|
|
Loading…
Reference in New Issue