mirror of https://github.com/drowe67/phasenn.git
building up synthesis tools
parent
62c5474cf6
commit
67adf33ebe
|
@ -0,0 +1,70 @@
|
|||
#!/usr/bin/python3
|
||||
# phasenn_out.py
|
||||
#
|
||||
# David Rowe Dec 2019
|
||||
#
|
||||
# Generate phasenn output sample from an input Codec 2 model, and phaseNN .h5.
|
||||
#
|
||||
|
||||
import numpy as np
|
||||
import sys
|
||||
import codec2_model
|
||||
import argparse
|
||||
import os
|
||||
from keras.models import load_model
|
||||
from keras.layers import Input, Dense, Concatenate
|
||||
from keras import models,layers
|
||||
from keras import initializers
|
||||
from keras import backend as K
|
||||
|
||||
# less verbose tensorflow ....
|
||||
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
|
||||
|
||||
# constants TODO: put these is a python module
|
||||
width = 256
|
||||
pairs = 2*width
|
||||
Fs = 8000
|
||||
|
||||
def list_str(values):
|
||||
return values.split(',')
|
||||
|
||||
parser = argparse.ArgumentParser(description='Train a NN to model Codec 2 phases')
|
||||
parser.add_argument('modelin', help='Codec 2 model file in (linear phase removed)')
|
||||
parser.add_argument('phasenn', help='PhaseNN trained .h5 file')
|
||||
parser.add_argument('modelout', help='Codec 2 model file out (linear phase removed)')
|
||||
parser.add_argument('--start', type=int, default=0, help='start frame')
|
||||
parser.add_argument('--length', type=int, default=300, help='Number of frames')
|
||||
args = parser.parse_args()
|
||||
|
||||
# read in model file records
|
||||
Wo, L, A, phase, voiced = codec2_model.read(args.modelin)
|
||||
nb_samples = Wo.size;
|
||||
print("nb_samples: %d" % (nb_samples))
|
||||
|
||||
amp = np.zeros((nb_samples, width))
|
||||
phase_rect = np.zeros((nb_samples, pairs))
|
||||
for i in range(nb_samples):
|
||||
for m in range(1,L[i]+1):
|
||||
bin = int(np.round(m*Wo[i]*width/np.pi)); bin = min(width-1, bin)
|
||||
amp[i,bin] = np.log10(A[i,m])
|
||||
|
||||
# our model TODO: make a python module
|
||||
model = models.Sequential()
|
||||
model.add(layers.Dense(pairs, activation='relu', input_dim=width))
|
||||
model.add(layers.Dense(4*pairs, activation='relu'))
|
||||
model.add(layers.Dense(pairs))
|
||||
model.summary()
|
||||
model.load_weights(args.phasenn)
|
||||
|
||||
# compute rate L output phases
|
||||
phase_rect_est = model.predict(amp)
|
||||
phase_est = np.zeros((nb_samples, width))
|
||||
st = args.start; en = args.start+args.length;
|
||||
for i in range(st,en):
|
||||
for m in range(1,L[i]+1):
|
||||
bin = int(np.round(m*Wo[i]*width/np.pi)); bin = min(width-1, bin)
|
||||
phase_est[i,m] = np.angle(phase_rect_est[i,2*bin] + 1j*phase_rect_est[i,2*bin+1])
|
||||
|
||||
# save to output model file for synthesis
|
||||
if args.modelout:
|
||||
codec2_model.write(Wo[st:en], L[st:en], A[st:en], phase_est[st:en], voiced[st:en], args.modelout)
|
13
synth.sh
13
synth.sh
|
@ -5,13 +5,18 @@
|
|||
|
||||
PATH=$PATH:~/codec2/build_linux/src:~/codec2/build_linux/misc
|
||||
|
||||
if [ "$#" -ne 1 ]; then
|
||||
echo "usage: ./synth.sh rawFile"
|
||||
if [ "$#" -ne 4 ]; then
|
||||
echo "usage: ./synth.sh rawFile nn.h5 startSecs lengthSecs"
|
||||
fi
|
||||
speech=$1
|
||||
nn=$2
|
||||
st=$3
|
||||
len=$4
|
||||
x=$(basename $speech)
|
||||
base="${x%.*}"
|
||||
out_model=out.model
|
||||
|
||||
sox -t .sw -r 8000 -c 1 $speech -t .sw - trim 0 3 | c2sim - --modelout - | est_n0 -a $out_model > $base'_comb.model'
|
||||
sox -t .sw -r 8000 -c 1 $speech -t .sw - trim 0 3 | c2sim - --modelin $base'_comb.model' -o $base'_out.raw'
|
||||
sox -t .sw -r 8000 -c 1 $speech -t .sw - trim $st $len | c2sim - --modelout - | est_n0 -r > $base'_nolinear.model'
|
||||
./phasenn_out.py $base'_nolinear.model' $nn $base'_out.model'
|
||||
sox -t .sw -r 8000 -c 1 $speech -t .sw - trim $st $len | c2sim - --modelout - | est_n0 -a $base'_out.model' > $base'_comb.model'
|
||||
sox -t .sw -r 8000 -c 1 $speech -t .sw - trim $st $len | c2sim - --modelin $base'_comb.model' -o $base'_out.raw'
|
||||
|
|
Loading…
Reference in New Issue