diff --git a/phasenn_out.py b/phasenn_out.py new file mode 100755 index 0000000..2aa2816 --- /dev/null +++ b/phasenn_out.py @@ -0,0 +1,70 @@ +#!/usr/bin/python3 +# phasenn_out.py +# +# David Rowe Dec 2019 +# +# Generate phasenn output sample from an input Codec 2 model, and phaseNN .h5. +# + +import numpy as np +import sys +import codec2_model +import argparse +import os +from keras.models import load_model +from keras.layers import Input, Dense, Concatenate +from keras import models,layers +from keras import initializers +from keras import backend as K + +# less verbose tensorflow .... +os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' + +# constants TODO: put these is a python module +width = 256 +pairs = 2*width +Fs = 8000 + +def list_str(values): + return values.split(',') + +parser = argparse.ArgumentParser(description='Train a NN to model Codec 2 phases') +parser.add_argument('modelin', help='Codec 2 model file in (linear phase removed)') +parser.add_argument('phasenn', help='PhaseNN trained .h5 file') +parser.add_argument('modelout', help='Codec 2 model file out (linear phase removed)') +parser.add_argument('--start', type=int, default=0, help='start frame') +parser.add_argument('--length', type=int, default=300, help='Number of frames') +args = parser.parse_args() + +# read in model file records +Wo, L, A, phase, voiced = codec2_model.read(args.modelin) +nb_samples = Wo.size; +print("nb_samples: %d" % (nb_samples)) + +amp = np.zeros((nb_samples, width)) +phase_rect = np.zeros((nb_samples, pairs)) +for i in range(nb_samples): + for m in range(1,L[i]+1): + bin = int(np.round(m*Wo[i]*width/np.pi)); bin = min(width-1, bin) + amp[i,bin] = np.log10(A[i,m]) + +# our model TODO: make a python module +model = models.Sequential() +model.add(layers.Dense(pairs, activation='relu', input_dim=width)) +model.add(layers.Dense(4*pairs, activation='relu')) +model.add(layers.Dense(pairs)) +model.summary() +model.load_weights(args.phasenn) + +# compute rate L output phases +phase_rect_est = model.predict(amp) +phase_est = np.zeros((nb_samples, width)) +st = args.start; en = args.start+args.length; +for i in range(st,en): + for m in range(1,L[i]+1): + bin = int(np.round(m*Wo[i]*width/np.pi)); bin = min(width-1, bin) + phase_est[i,m] = np.angle(phase_rect_est[i,2*bin] + 1j*phase_rect_est[i,2*bin+1]) + +# save to output model file for synthesis +if args.modelout: + codec2_model.write(Wo[st:en], L[st:en], A[st:en], phase_est[st:en], voiced[st:en], args.modelout) diff --git a/synth.sh b/synth.sh index 0fdf350..8793a20 100755 --- a/synth.sh +++ b/synth.sh @@ -5,13 +5,18 @@ PATH=$PATH:~/codec2/build_linux/src:~/codec2/build_linux/misc -if [ "$#" -ne 1 ]; then - echo "usage: ./synth.sh rawFile" +if [ "$#" -ne 4 ]; then + echo "usage: ./synth.sh rawFile nn.h5 startSecs lengthSecs" fi speech=$1 +nn=$2 +st=$3 +len=$4 x=$(basename $speech) base="${x%.*}" out_model=out.model -sox -t .sw -r 8000 -c 1 $speech -t .sw - trim 0 3 | c2sim - --modelout - | est_n0 -a $out_model > $base'_comb.model' -sox -t .sw -r 8000 -c 1 $speech -t .sw - trim 0 3 | c2sim - --modelin $base'_comb.model' -o $base'_out.raw' +sox -t .sw -r 8000 -c 1 $speech -t .sw - trim $st $len | c2sim - --modelout - | est_n0 -r > $base'_nolinear.model' +./phasenn_out.py $base'_nolinear.model' $nn $base'_out.model' +sox -t .sw -r 8000 -c 1 $speech -t .sw - trim $st $len | c2sim - --modelout - | est_n0 -a $base'_out.model' > $base'_comb.model' +sox -t .sw -r 8000 -c 1 $speech -t .sw - trim $st $len | c2sim - --modelin $base'_comb.model' -o $base'_out.raw'