building up synthesis tools

master
David 2019-12-08 17:18:04 +10:30
parent 62c5474cf6
commit 67adf33ebe
2 changed files with 79 additions and 4 deletions

70
phasenn_out.py 100755
View File

@ -0,0 +1,70 @@
#!/usr/bin/python3
# phasenn_out.py
#
# David Rowe Dec 2019
#
# Generate phasenn output sample from an input Codec 2 model, and phaseNN .h5.
#
import numpy as np
import sys
import codec2_model
import argparse
import os
from keras.models import load_model
from keras.layers import Input, Dense, Concatenate
from keras import models,layers
from keras import initializers
from keras import backend as K
# less verbose tensorflow ....
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
# constants TODO: put these is a python module
width = 256
pairs = 2*width
Fs = 8000
def list_str(values):
return values.split(',')
parser = argparse.ArgumentParser(description='Train a NN to model Codec 2 phases')
parser.add_argument('modelin', help='Codec 2 model file in (linear phase removed)')
parser.add_argument('phasenn', help='PhaseNN trained .h5 file')
parser.add_argument('modelout', help='Codec 2 model file out (linear phase removed)')
parser.add_argument('--start', type=int, default=0, help='start frame')
parser.add_argument('--length', type=int, default=300, help='Number of frames')
args = parser.parse_args()
# read in model file records
Wo, L, A, phase, voiced = codec2_model.read(args.modelin)
nb_samples = Wo.size;
print("nb_samples: %d" % (nb_samples))
amp = np.zeros((nb_samples, width))
phase_rect = np.zeros((nb_samples, pairs))
for i in range(nb_samples):
for m in range(1,L[i]+1):
bin = int(np.round(m*Wo[i]*width/np.pi)); bin = min(width-1, bin)
amp[i,bin] = np.log10(A[i,m])
# our model TODO: make a python module
model = models.Sequential()
model.add(layers.Dense(pairs, activation='relu', input_dim=width))
model.add(layers.Dense(4*pairs, activation='relu'))
model.add(layers.Dense(pairs))
model.summary()
model.load_weights(args.phasenn)
# compute rate L output phases
phase_rect_est = model.predict(amp)
phase_est = np.zeros((nb_samples, width))
st = args.start; en = args.start+args.length;
for i in range(st,en):
for m in range(1,L[i]+1):
bin = int(np.round(m*Wo[i]*width/np.pi)); bin = min(width-1, bin)
phase_est[i,m] = np.angle(phase_rect_est[i,2*bin] + 1j*phase_rect_est[i,2*bin+1])
# save to output model file for synthesis
if args.modelout:
codec2_model.write(Wo[st:en], L[st:en], A[st:en], phase_est[st:en], voiced[st:en], args.modelout)

View File

@ -5,13 +5,18 @@
PATH=$PATH:~/codec2/build_linux/src:~/codec2/build_linux/misc
if [ "$#" -ne 1 ]; then
echo "usage: ./synth.sh rawFile"
if [ "$#" -ne 4 ]; then
echo "usage: ./synth.sh rawFile nn.h5 startSecs lengthSecs"
fi
speech=$1
nn=$2
st=$3
len=$4
x=$(basename $speech)
base="${x%.*}"
out_model=out.model
sox -t .sw -r 8000 -c 1 $speech -t .sw - trim 0 3 | c2sim - --modelout - | est_n0 -a $out_model > $base'_comb.model'
sox -t .sw -r 8000 -c 1 $speech -t .sw - trim 0 3 | c2sim - --modelin $base'_comb.model' -o $base'_out.raw'
sox -t .sw -r 8000 -c 1 $speech -t .sw - trim $st $len | c2sim - --modelout - | est_n0 -r > $base'_nolinear.model'
./phasenn_out.py $base'_nolinear.model' $nn $base'_out.model'
sox -t .sw -r 8000 -c 1 $speech -t .sw - trim $st $len | c2sim - --modelout - | est_n0 -a $base'_out.model' > $base'_comb.model'
sox -t .sw -r 8000 -c 1 $speech -t .sw - trim $st $len | c2sim - --modelin $base'_comb.model' -o $base'_out.raw'