From 99da10f0d4062fe61e27fabc3131c94a086b4fe9 Mon Sep 17 00:00:00 2001 From: David Date: Sun, 17 Nov 2019 12:05:58 +1030 Subject: [PATCH] moved out of codec2 repo --- phasenn_test1.py | 88 +++++++++++++++++++++++ phasenn_test2.py | 90 +++++++++++++++++++++++ phasenn_test3.py | 90 +++++++++++++++++++++++ phasenn_test4.py | 132 ++++++++++++++++++++++++++++++++++ phasenn_test5.py | 125 ++++++++++++++++++++++++++++++++ phasenn_test6.py | 157 ++++++++++++++++++++++++++++++++++++++++ phasenn_test7.py | 177 ++++++++++++++++++++++++++++++++++++++++++++++ phasenn_test7a.py | 117 ++++++++++++++++++++++++++++++ phasenn_test8.py | 161 +++++++++++++++++++++++++++++++++++++++++ phasenn_test9.py | 166 +++++++++++++++++++++++++++++++++++++++++++ phasenn_train.py | 114 +++++++++++++++++++++++++++++ 11 files changed, 1417 insertions(+) create mode 100755 phasenn_test1.py create mode 100755 phasenn_test2.py create mode 100755 phasenn_test3.py create mode 100755 phasenn_test4.py create mode 100755 phasenn_test5.py create mode 100755 phasenn_test6.py create mode 100755 phasenn_test7.py create mode 100755 phasenn_test7a.py create mode 100755 phasenn_test8.py create mode 100755 phasenn_test9.py create mode 100755 phasenn_train.py diff --git a/phasenn_test1.py b/phasenn_test1.py new file mode 100755 index 0000000..3f8919f --- /dev/null +++ b/phasenn_test1.py @@ -0,0 +1,88 @@ +#!/usr/bin/python3 +# phasenn_test1.py +# +# David Rowe August 2019 + +# Keras model for testing phase modelling using NNs. Self contained, generates +# it's own training data. Can model linear phase at freq up to pi/4 + +import numpy as np +import sys +from keras.layers import Dense +from keras import models,layers +from keras import initializers +import matplotlib.pyplot as plt + +# constants + +N = 80 # number of samples in frames +nb_samples = 1000000 +nb_batch = 512 +nb_epochs = 400 +np_input = 3 +nb_output = 2 + +# Generate training data. Given the phase at the start of the frame, +# and the frequency, determine the phase at the end of the frame + +def quant(val, levels): + return np.round(val*levels)/levels + +w = np.zeros(nb_samples) +phase_start = np.zeros(nb_samples) +phase_end = np.zeros(nb_samples) +for i in range(nb_samples): + x=np.random.rand(2) + w[i] = x[0]*np.pi/4 + phase_start[i] = -np.pi + x[1]*2*np.pi + phase_end[i] = phase_start[i] + N*w[i]; + +train = np.column_stack((w, np.cos(phase_start), np.sin(phase_start))) +target = np.column_stack((np.cos(phase_end), np.sin(phase_end))) +print(train.shape) +print(target.shape) + +model = models.Sequential() +model.add(layers.Dense(256, activation='relu', input_dim=np_input)) +model.add(layers.Dense(256, activation='relu', input_dim=np_input)) +model.add(layers.Dense(nb_output, activation='linear')) +model.summary() + +# Compile our model + +from keras import optimizers +model.compile(loss='mse', optimizer='adam') + +# fit model, using 20% of our data for validation + +history = model.fit(train, target, validation_split=0.2, batch_size=nb_batch, epochs=nb_epochs) + +# test actual error in angle over training data + +train_out = model.predict(train) +err = (train_out - target) +var = np.var(err) +std = np.std(err) +print("rect coord var: %f std: %f" % (var,std)) + +err_angle = np.arctan2(np.sin(err[:,0]),np.cos(err[:,1])) +var = np.var(err_angle) +std = np.std(err_angle) +print("angle var: %f std: %f" % (var,std)) + +plot_en = 1; +if plot_en: + plt.figure(1) + plt.plot(10*np.log10(history.history['loss'])) + plt.plot(10*np.log10(history.history['val_loss'])) + plt.title('model loss (dB)') + plt.xlabel('epoch') + plt.legend(['train', 'valid'], loc='upper right') + + plt.figure(2) + plt.hist(err_angle, bins=20) + plt.show() + + #plt.figure(2) + #plt.hist(target) + #plt.show() diff --git a/phasenn_test2.py b/phasenn_test2.py new file mode 100755 index 0000000..363c174 --- /dev/null +++ b/phasenn_test2.py @@ -0,0 +1,90 @@ +#!/usr/bin/python3 +# phasenn_test2.py +# +# David Rowe Oct 2019 + +# Keras model for testing phase modelling using NNs. A simple four weight +# linear model to map a constant phase shift, arriving at the same model as +# linear algera. Good sanity check + +# | cos(end) | = | cos(Nw) -sin(Nw) | | cos(start) | +# | sin(end) | | sin(Nw) cos(Nw) | | sin(start) | + +import numpy as np +import sys +from keras.layers import Dense +from keras import models,layers +from keras import initializers +import keras.backend as K + +import matplotlib.pyplot as plt + +# constants + +N = 80 # number of time domain samples in frame +nb_samples = 1000 +nb_batch = 1 +nb_epochs = 3 +width = 1 + +# Generate training data. Given the phase at the start of the frame, +# and the frequency, determine the phase at the end of the frame + +# phase encoded as cos,sin pairs +phase_start = np.zeros((nb_samples, 2)) +phase_end = np.zeros((nb_samples, 2)) + +# a shift of pi/4 across the frame of N samples +w = np.pi/(4*N); +for i in range(nb_samples): + r = np.random.rand(1) + phase_start_pol = -np.pi + r[0]*2*np.pi + phase_start[i,0] = np.cos(phase_start_pol) + phase_start[i,1] = np.sin(phase_start_pol) + phase_end_pol = phase_start_pol + N*w + phase_end[i,0] = np.cos(phase_end_pol) + phase_end[i,1] = np.sin(phase_end_pol) + +#print(phase_start.shape) +#print(phase_end.shape) +#print(phase_start[:10]) +model = models.Sequential() +model.add(layers.Dense(2, bias=False, input_dim=2)) +model.summary() + +# Compile and fit our model + +from keras import optimizers +model.compile(loss='mse', optimizer='sgd') +history = model.fit(phase_start, phase_end, batch_size=nb_batch, epochs=nb_epochs) + +print(model.get_weights()) + +# measure error over all samples + +phase_end_est = model.predict(phase_start) +err = (phase_end_est - phase_end) +var = np.var(err) +std = np.std(err) +print("rect var: %f std: %f" % (var,std)) +#print(err[:5,:]) + +err_angle = np.arctan2(phase_end_est[:,1], phase_end_est[:,0]) - np.arctan2(phase_end[:,1], phase_end[:,0]) +#print(err[:5,:]) +print(err_angle.shape) +var = np.var(err_angle) +std = np.std(err_angle) +print("angle var: %f std: %f" % (var,std)) + +plot_en = 0; +if plot_en: + plt.figure(1) + plt.plot(history.history['loss']) + plt.title('model loss') + plt.xlabel('epoch') + + plt.figure(2) + plt.hist(err_angle, bins=20) + plt.title('phase angle error') + plt.show() + diff --git a/phasenn_test3.py b/phasenn_test3.py new file mode 100755 index 0000000..ac99afc --- /dev/null +++ b/phasenn_test3.py @@ -0,0 +1,90 @@ +#!/usr/bin/python3 +# phasenn_test3.py +# +# David Rowe Oct 2019 + +# Keras model for testing phase modelling using NNs. Extending four +# weights per phase test2 to a vector of phases, each with a different +# shift across the frame + +import numpy as np +import sys +from keras.layers import Dense +from keras import models,layers +from keras import initializers +import keras.backend as K + +import matplotlib.pyplot as plt + +# constants + +N = 80 # number of time domain samples in frame +nb_samples = 10000 +nb_batch = 1 +nb_epochs = 5 +width = 256 +pairs = 2*width + +# Generate training data. Given the phase at the start of the frame, +# and the frequency, determine the phase at the end of the frame + +# phase encoded as cos,sin pairs +phase_start = np.zeros((nb_samples, pairs)) +phase_end = np.zeros((nb_samples, pairs)) + +# Map 0...width-1 to 0...pi +w = np.pi/width; +for i in range(nb_samples): + for m in range(width): + r = np.random.rand(1) + phase_start_pol = -np.pi + r[0]*2*np.pi + phase_start[i,2*m] = np.cos(phase_start_pol) + phase_start[i,2*m+1] = np.sin(phase_start_pol) + phase_end_pol = phase_start_pol + N*w*m + phase_end[i,2*m] = np.cos(phase_end_pol) + phase_end[i,2*m+1] = np.sin(phase_end_pol) + +print(phase_start.shape) +print(phase_end.shape) + +# note most of these weights whould end up being 0, as we only need 4 weights to map each phase +# rotation from an input to output pair +model = models.Sequential() +model.add(layers.Dense(pairs, bias=False, input_dim=pairs)) +model.summary() + +# Compile and fit our model + +from keras import optimizers +sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) +model.compile(loss='mse', optimizer=sgd) +history = model.fit(phase_start, phase_end, batch_size=nb_batch, epochs=nb_epochs) + +# measure error over all samples + +phase_end_est = model.predict(phase_start) +err = (phase_end_est - phase_end) +var = np.var(err) +std = np.std(err) +print("rect var: %f std: %f" % (var,std)) +#print(err[:5,:]) + +err_angle = np.arctan2(phase_end_est[:,1], phase_end_est[:,0]) - np.arctan2(phase_end[:,1], phase_end[:,0]) +#print(err[:5,:]) +print(err_angle.shape) +var = np.var(err_angle) +std = np.std(err_angle) +print("angle var: %f std: %f" % (var,std)) + +plot_en = 1; +if plot_en: + plt.figure(1) + plt.plot(history.history['loss']) + plt.title('model loss') + plt.xlabel('epoch') + + plt.figure(2) + plt.hist(err_angle, bins=20) + plt.title('phase angle error') + plt.show() + diff --git a/phasenn_test4.py b/phasenn_test4.py new file mode 100755 index 0000000..029fb25 --- /dev/null +++ b/phasenn_test4.py @@ -0,0 +1,132 @@ +#!/usr/bin/python3 +# phasenn_test4.py +# +# David Rowe Oct 2019 + +# Keras model for testing phase modelling using NNs. Like test3 but +# with sparse input vectors of harmonic phases, as this is how phases +# will be presented from Codec 2. + +# Lessons learned: +# 1. We train slower than test3, as we have less information per samples +# with a sparse subset of discrete harmonics +# 2. We need to quantise the frequency of each bin to get low error, ow +# we introduce a +/- 0.5bin random error in the training data, with is +# theoretically 8 degrees, but about 10-11 in practice. + +import numpy as np +import sys +from keras.layers import Dense +from keras import models,layers +from keras import initializers +import keras.backend as K + +import matplotlib.pyplot as plt + +# constants + +N = 80 # number of time domain samples in frame +nb_samples = 100000 +nb_batch = 32 +nb_epochs = 50 +width = 256 +pairs = 2*width +fo_min = 50 +fo_max = 400 +Fs = 8000 + +# Generate training data. Given the phase at the start of the frame, +# and the frequency, determine the phase at the end of the frame + +# phase encoded as cos,sin pairs +phase_start = np.zeros((nb_samples, pairs)) +phase_end = np.zeros((nb_samples, pairs)) +Wo = np.zeros(nb_samples) +L = np.zeros(nb_samples, dtype=int) + +# Map 0...width-1 to 0...pi +for i in range(nb_samples): + r = np.random.rand(1) + fo = fo_min + (fo_max-fo_min)*r[0] + Wo[i] = fo*2*np.pi/Fs + L[i] = int(np.floor(np.pi/Wo[i])) + for m in range(1, L[i]): + wm = m*Wo[i] + bin = int(np.round(wm*width/np.pi)) + # Quantise frequency to this bin, this step is important to + # match results of test3. Without it we are adding +/- 0.5 bin + # uncertainty in the frequency, which will randomise phase shifts + # across bins + wm = bin*np.pi/width + r = np.random.rand(1) + phase_start_pol = -np.pi + r[0]*2*np.pi + phase_start[i,2*bin] = np.cos(phase_start_pol) + phase_start[i,2*bin+1] = np.sin(phase_start_pol) + phase_end_pol = phase_start_pol + N*wm + phase_end[i,2*bin] = np.cos(phase_end_pol) + phase_end[i,2*bin+1] = np.sin(phase_end_pol) + +print(phase_start.shape) +print(phase_end.shape) + +# note most of these weights whould end up being 0, as we only need +# two wieghts to map each phase rotation +model = models.Sequential() +model.add(layers.Dense(pairs, bias=False, input_dim=pairs)) +model.summary() + +import keras.backend as K + +# Compile and fit our model + +from keras import optimizers +sgd = optimizers.SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True) +model.compile(loss='mse', optimizer=sgd) +history = model.fit(phase_start, phase_end, batch_size=nb_batch, epochs=nb_epochs) + +# measure error over non-zero samples + +phase_end_est = model.predict(phase_start) +ind = np.nonzero(phase_start) +err = (phase_end_est[ind] - phase_end[ind]) +var = np.var(err) +std = np.std(err) +print("rect var: %f std: %f" % (var,std)) +#print(err[:5,:]) + +# approximation if error is small +err_angle = np.arctan2(err[1::2], 1) +#print(err[:5,:]) +print(err_angle.shape) +var = np.var(err_angle) +std = np.std(err_angle) +print("angle var: %f std: %f" % (var,std)) +print("angle var: %4.2f std: %4.2f degs" % (var*180/np.pi,std*180/np.pi)) + +plot_en = 1; +if plot_en: + plt.figure(1) + plt.plot(history.history['loss']) + plt.title('model loss') + plt.xlabel('epoch') + + plt.figure(2) + plt.hist(err_angle, bins=20) + plt.title('phase angle error') + + plt.figure(3) + r = 0 + phase = np.zeros(width) + phase_est = np.zeros(width) + for m in range(1,L[r]): + wm = m*Wo[r] + bin = int(np.round(wm*width/np.pi)) + #print("bin: %d %f %f" % (bin, phase_end[r,2*bin], phase_end[r,2*bin+1])) + phase[m] = np.arctan2(phase_end[r,2*bin+1], phase_end[r,2*bin]) + phase_est[m] = np.arctan2(phase_end_est[r,2*bin+1], phase_end_est[r,2*bin]) + + plt.plot(phase[1:L[r]+1],'g') + plt.plot(phase_est[1:L[r]+1],'r') + + plt.show() + diff --git a/phasenn_test5.py b/phasenn_test5.py new file mode 100755 index 0000000..16a6516 --- /dev/null +++ b/phasenn_test5.py @@ -0,0 +1,125 @@ +#!/usr/bin/python3 +# phasenn_test5.py +# +# David Rowe Oct 2019 + +# Sparse model like test4 but with non-linear layers (activation functions) as +# these will be required later when we add amplitude information. + +import numpy as np +import sys +from keras.layers import Dense +from keras import models,layers +from keras import initializers +import keras.backend as K + +import matplotlib.pyplot as plt + +# constants + +N = 80 # number of time domain samples in frame +nb_samples = 100000 +nb_batch = 32 +nb_epochs = 50 +width = 256 +pairs = 2*width +fo_min = 50 +fo_max = 400 +Fs = 8000 + +# Generate training data. Given the phase at the start of the frame, +# and the frequency, determine the phase at the end of the frame + +# phase encoded as cos,sin pairs +phase_start = np.zeros((nb_samples, pairs)) +phase_end = np.zeros((nb_samples, pairs)) +Wo = np.zeros(nb_samples) +L = np.zeros(nb_samples, dtype=int) + +# Map 0...width-1 to 0...pi +for i in range(nb_samples): + r = np.random.rand(1) + fo = fo_min + (fo_max-fo_min)*r[0] + Wo[i] = fo*2*np.pi/Fs + L[i] = int(np.floor(np.pi/Wo[i])) + for m in range(1, L[i]): + wm = m*Wo[i] + bin = int(np.round(wm*width/np.pi)) + # Quantise frequency to this bin, this step is important to + # match results of test3. Without it we are adding +/- 0.5 bin + # uncertainty in the frequency, which will randomise phase shifts + # across bins + wm = bin*np.pi/width + r = np.random.rand(1) + phase_start_pol = -np.pi + r[0]*2*np.pi + phase_start[i,2*bin] = np.cos(phase_start_pol) + phase_start[i,2*bin+1] = np.sin(phase_start_pol) + phase_end_pol = phase_start_pol + N*wm + phase_end[i,2*bin] = np.cos(phase_end_pol) + phase_end[i,2*bin+1] = np.sin(phase_end_pol) + +print(phase_start.shape) +print(phase_end.shape) + +# note most of these weights whould end up being 0, as we only need +# two wieghts to map each phase rotation +model = models.Sequential() +model.add(layers.Dense(pairs, activation='relu', input_dim=pairs)) +model.add(layers.Dense(pairs, input_dim=pairs)) +model.summary() + +import keras.backend as K + +# Compile and fit our model + +from keras import optimizers +sgd = optimizers.SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True) +model.compile(loss='mse', optimizer=sgd) +history = model.fit(phase_start, phase_end, batch_size=nb_batch, epochs=nb_epochs) + +# measure error over non-zero samples + +phase_end_est = model.predict(phase_start) +ind = np.nonzero(phase_start) +err = (phase_end_est[ind] - phase_end[ind]) +var = np.var(err) +std = np.std(err) +print("rect var: %f std: %f" % (var,std)) +#print(err[:5,:]) + +# approximation if error is small +err_angle = np.arctan2(err[1::2], 1) +#print(err[:5,:]) +print(err_angle.shape) +var = np.var(err_angle) +std = np.std(err_angle) +print("angle var: %f std: %f" % (var,std)) +print("angle var: %4.2f std: %4.2f degs" % (var*180/np.pi,std*180/np.pi)) + +plot_en = 1; +if plot_en: + plt.figure(1) + plt.plot(history.history['loss']) + plt.title('model loss') + plt.xlabel('epoch') + + plt.figure(2) + plt.hist(err_angle, bins=20) + plt.title('phase angle error') + + plt.figure(3) + r = 0 + phase = np.zeros(width) + phase_est = np.zeros(width) + for m in range(1,L[r]): + wm = m*Wo[r] + bin = int(np.round(wm*width/np.pi)) + #print("bin: %d %f %f" % (bin, phase_end[r,2*bin], phase_end[r,2*bin+1])) + phase[m] = np.arctan2(phase_end[r,2*bin+1], phase_end[r,2*bin]) + phase_est[m] = np.arctan2(phase_end_est[r,2*bin+1], phase_end_est[r,2*bin]) + + plt.plot(phase[1:L[r]+1],'g') + plt.plot(phase_est[1:L[r]+1],'r') + + plt.show() + diff --git a/phasenn_test6.py b/phasenn_test6.py new file mode 100755 index 0000000..ab37076 --- /dev/null +++ b/phasenn_test6.py @@ -0,0 +1,157 @@ +#!/usr/bin/python3 +# phasenn_test6.py +# +# David Rowe Oct 2019 + +# Extending test5 to deal with input/output vectors that have slightly +# different "rates", i.e. Wo changing across the frame which is usual +# for voiced speech. + +import numpy as np +import sys +from keras.layers import Dense +from keras import models,layers +from keras import initializers +import matplotlib.pyplot as plt + +# constants + +N = 80 # number of time domain samples in frame +nb_samples = 100000 +nb_batch = 32 +nb_epochs = 25 +width = 256 +pairs = 2*width +fo_min = 50 +fo_max = 400 +Fs = 8000 +dfo = 0.02 + +# Generate training data. Given the phase at the start of the frame, +# and the frequency, determine the phase at the end of the frame + +# phase encoded as cos,sin pairs ref: +phase_start = np.zeros((nb_samples, pairs)) +phase_end = np.zeros((nb_samples, pairs)) +Wo_N = np.zeros(nb_samples) +Wo_0 = np.zeros(nb_samples) +L = np.zeros(nb_samples, dtype=int) + +for i in range(nb_samples): + + # parameters at time 0 (start of current frame) + # distribute fo randomnly on a log scale + r = np.random.rand(1) + log_fo_0 = np.log10(fo_min) + (np.log10(fo_max)-np.log10(fo_min))*r[0] + fo_0 = 10 ** log_fo_0 + Wo_0[i] = fo_0*2*np.pi/Fs + L_0 = int(np.floor(np.pi/Wo_0[i])) + + # parameters at time N (end of current frame), allow a df0 freq change + # across frame, typical of voiced speech + r = np.random.rand(1) + fo_N = fo_0 + (-2*dfo + dfo*r[0])*fo_0 + fo_N = np.max((fo_min, fo_N)) + fo_N = np.min((fo_max, fo_N)) + #fo_N = fo_0 + Wo_N[i] = fo_N*2*np.pi/Fs + L_N = int(np.floor(np.pi/Wo_N[i])) + L[i] = np.min((L_0, L_N)) + #print("fo: %f %f L: %d %d min: %d" % (fo_0, fo_N, L_0, L_N, L[i])) + + for m in range(1,L[i]): + bin_0 = int(np.round(m*Wo_0[i]*width/np.pi)) + mWo_0 = bin_0*np.pi/width + bin_N = int(np.round(m*Wo_N[i]*width/np.pi)) + mWo_N = bin_N*np.pi/width + #print("m: %d bin_0: %d bin_N: %d" % (m, bin_0,bin_N)) + + r = np.random.rand(1) + phase_start_pol = -np.pi + r[0]*2*np.pi + phase_start[i,2*bin_0] = np.cos(phase_start_pol) + phase_start[i,2*bin_0+1] = np.sin(phase_start_pol) + + # phase shift average of two frequencies + phase_end_pol = phase_start_pol + N*(mWo_0 + mWo_N)/2 + + phase_end[i,2*bin_N] = np.cos(phase_end_pol) + phase_end[i,2*bin_N+1] = np.sin(phase_end_pol) + +print(Wo_0.shape, Wo_N.shape, phase_start.shape) +input = np.column_stack([Wo_0, Wo_N, phase_start]) +print(input.shape) +print(phase_end.shape) + +model = models.Sequential() +model.add(layers.Dense(pairs, activation='relu', input_dim=(pairs+2))) +model.add(layers.Dense(pairs)) +model.summary() + +# Compile and fit our model + +from keras import optimizers +sgd = optimizers.SGD(lr=0.04, decay=1e-6, momentum=0.9, nesterov=True) +model.compile(loss='mse', optimizer=sgd) +history = model.fit(input, phase_end, batch_size=nb_batch, epochs=nb_epochs) + +# measure error in rectangular coordinates over all samples + +phase_end_est = model.predict(input) +ind = np.nonzero(phase_end) +err = (phase_end[ind] - phase_end_est[ind]) +var = np.var(err) +std = np.std(err) +print("rect var: %f std: %f" % (var,std)) + +print(phase_end_est.shape, err.shape) +c1 = phase_end[ind]; c1 = c1[::2] + 1j*c1[1::2] +c2 = phase_end_est[ind]; c2 = c2[::2] + 1j*c2[1::2] +err_angle = np.angle(c1 * np.conj(c2)) + +print(err_angle[:5],err_angle.shape) + +var = np.var(err_angle) +std = np.std(err_angle) +print("angle var: %4.2f std: %4.2f rads" % (var,std)) +print("angle var: %4.2f std: %4.2f degs" % (var*180/np.pi,std*180/np.pi)) + +def sample_model(r): + phase = np.zeros(width, dtype=complex) + phase_est = np.zeros(width, dtype=complex) + phase_err = np.zeros(width, dtype=complex) + for m in range(1,L[r]): + wm = m*Wo_N[r] + bin = int(np.round(wm*width/np.pi)) + phase[m] = phase_end[r,2*bin] + 1j*phase_end[r,2*bin+1] + phase_est[m] = phase_end_est[r,2*bin] + 1j*phase_end_est[r,2*bin+1] + phase_err[m] = phase[m] * np.conj(phase_est[m]) + return phase, phase_err + +plot_en = 1; +if plot_en: + plt.figure(1) + plt.plot(history.history['loss']) + plt.title('model loss') + plt.xlabel('epoch') + plt.show(block=False) + + plt.figure(2) + plt.subplot(211) + plt.hist(err_angle*180/np.pi, bins=20) + plt.subplot(212) + plt.hist(Wo_0*(Fs/2)/np.pi, bins=20) + plt.title('phase angle error (deg) and fo (Hz)') + plt.show(block=False) + + plt.figure(3) + plt.title('sample vectors and error') + for r in range(12): + plt.subplot(3,4,r+1) + phase, phase_err = sample_model(r) + plt.plot(np.angle(phase[1:L[r]+1])*180/np.pi,'g') + plt.plot(np.angle(phase_err[1:L[r]+1])*180/np.pi,'r') + plt.show(block=False) + + # click on last figure to close all and finish + plt.waitforbuttonpress(0) + plt.close() diff --git a/phasenn_test7.py b/phasenn_test7.py new file mode 100755 index 0000000..7923231 --- /dev/null +++ b/phasenn_test7.py @@ -0,0 +1,177 @@ +#!/usr/bin/python3 +# phasenn_test7.py +# +# David Rowe Oct 2019 + +# Extending PhaseNN model to have a linear and dispersive phase +# component. The dispersive component is from a simple 2nd order IIR +# filter. + +# TODO: [ ] remove df0 support +# [ ] remove Wo_0, Wo_N +# [ ] replace random starting phases with phase realted to t0 + +import numpy as np +import sys +from keras.layers import Dense +from keras import models,layers +from keras import initializers +import matplotlib.pyplot as plt +from scipy import signal + +# constants + +N = 80 # number of time domain samples in frame +nb_samples = 100000 +nb_batch = 32 +nb_epochs = 25 +width = 256 +pairs = 2*width +fo_min = 50 +fo_max = 400 +Fs = 8000 +dfo = 0.00 # dfo models aren't solved yet + +# Generate training data. Given the phase at the start of the frame, +# and the frequency, determine the phase at the end of the frame + +# phase encoded as cos,sin pairs ref: +phase_start = np.zeros((nb_samples, pairs)) +phase_end = np.zeros((nb_samples, pairs)) +filter_amp = np.zeros((nb_samples, width)) +filter_phase = np.zeros((nb_samples, width)) +Wo_N = np.zeros(nb_samples) +Wo_0 = np.zeros(nb_samples) +L = np.zeros(nb_samples, dtype=int) + +for i in range(nb_samples): + + # parameters at time 0 (start of current frame) + # distribute fo randomnly on a log scale + r = np.random.rand(1) + log_fo_0 = np.log10(fo_min) + (np.log10(fo_max)-np.log10(fo_min))*r[0] + fo_0 = 10 ** log_fo_0 + Wo_0[i] = fo_0*2*np.pi/Fs + L_0 = int(np.floor(np.pi/Wo_0[i])) + + # parameters at time N (end of current frame), allow a df0 freq change + # across frame, typical of voiced speech + r = np.random.rand(1) + fo_N = fo_0 + (-2*dfo + dfo*r[0])*fo_0 + fo_N = np.max((fo_min, fo_N)) + fo_N = np.min((fo_max, fo_N)) + #fo_N = fo_0 + Wo_N[i] = fo_N*2*np.pi/Fs + L_N = int(np.floor(np.pi/Wo_N[i])) + L[i] = np.min((L_0, L_N)) + #print("fo: %f %f L: %d %d min: %d" % (fo_0, fo_N, L_0, L_N, L[i])) + + # sample 2nd order IIR filter with random peak freq and amplitude + + r = np.random.rand(2) + alpha = 0.1*np.pi + 0.8*np.pi*r[0] + gamma = 0.8 + #alpha = np.pi/4; gamma = 0.8 + w,h = signal.freqz(1, [1, -2*gamma*np.cos(alpha), gamma*gamma], range(1,L[i])*Wo_0[i]) + + for m in range(1,L[i]): + bin_0 = int(np.round(m*Wo_0[i]*width/np.pi)) + mWo_0 = bin_0*np.pi/width + bin_N = int(np.round(m*Wo_N[i]*width/np.pi)) + mWo_N = bin_N*np.pi/width + #print("m: %d bin_0: %d bin_N: %d" % (m, bin_0,bin_N)) + + r = np.random.rand(1) + phase_start_pol = -np.pi + r[0]*2*np.pi + phase_start[i,2*bin_0] = np.cos(phase_start_pol) + phase_start[i,2*bin_0+1] = np.sin(phase_start_pol) + + # phase shift average of two frequencies + phase_end_pol = phase_start_pol + N*(mWo_0 + mWo_N)/2 + np.angle(h[m-1]) + + phase_end[i,2*bin_N] = np.cos(phase_end_pol) + phase_end[i,2*bin_N+1] = np.sin(phase_end_pol) + + filter_amp[i,bin_0] = np.log10(abs(h[m-1])) + filter_phase[i,bin_0] = np.angle(h[m-1]) + +input = np.column_stack([filter_amp, phase_start]) + +model = models.Sequential() +model.add(layers.Dense(pairs, activation='relu', input_dim=(width+pairs))) +model.add(layers.Dense(pairs)) +model.summary() + +# Compile and fit our model + +from keras import optimizers +sgd = optimizers.SGD(lr=0.04, decay=1e-6, momentum=0.9, nesterov=True) +model.compile(loss='mse', optimizer=sgd) +history = model.fit(input, phase_end, batch_size=nb_batch, epochs=nb_epochs) + +# measure error in rectangular coordinates over all samples + +phase_end_est = model.predict(input) +ind = np.nonzero(phase_end) +err = (phase_end[ind] - phase_end_est[ind]) +var = np.var(err) +std = np.std(err) +print("rect var: %f std: %f" % (var,std)) + +print(phase_end_est.shape, err.shape) +c1 = phase_end[ind]; c1 = c1[::2] + 1j*c1[1::2] +c2 = phase_end_est[ind]; c2 = c2[::2] + 1j*c2[1::2] +err_angle = np.angle(c1 * np.conj(c2)) + +print(err_angle[:5],err_angle.shape) + +var = np.var(err_angle) +std = np.std(err_angle) +print("angle var: %4.2f std: %4.2f rads" % (var,std)) +print("angle var: %4.2f std: %4.2f degs" % (var*180/np.pi,std*180/np.pi)) + +def sample_model(r): + phase = np.zeros(width, dtype=complex) + phase_est = np.zeros(width, dtype=complex) + phase_err = np.zeros(width, dtype=complex) + phase_filt = np.zeros(width) + + for m in range(1,L[r]): + wm = m*Wo_N[r] + bin = int(np.round(wm*width/np.pi)) + phase[m] = phase_end[r,2*bin] + 1j*phase_end[r,2*bin+1] + phase_est[m] = phase_end_est[r,2*bin] + 1j*phase_end_est[r,2*bin+1] + phase_err[m] = phase[m] * np.conj(phase_est[m]) + phase_filt[m] = filter_phase[r,bin] + return phase, phase_err, phase_filt + +plot_en = 1; +if plot_en: + plt.figure(1) + plt.plot(history.history['loss']) + plt.title('model loss') + plt.xlabel('epoch') + plt.show(block=False) + + plt.figure(2) + plt.subplot(211) + plt.hist(err_angle*180/np.pi, bins=20) + plt.subplot(212) + plt.hist(Wo_0*(Fs/2)/np.pi, bins=20) + plt.title('phase angle error (deg) and fo (Hz)') + plt.show(block=False) + + plt.figure(3) + plt.title('sample vectors and error') + for r in range(12): + plt.subplot(3,4,r+1) + phase, phase_err, phase_filt = sample_model(r) + plt.plot(np.angle(phase[1:L[r]+1])*180/np.pi,'g') + plt.plot(np.angle(phase_err[1:L[r]+1])*180/np.pi,'r') + plt.plot(phase_filt[1:L[r]+1]*180/np.pi,'b') + plt.ylim(-180,180) + plt.show(block=False) + + # click on last figure to close all and finish + plt.waitforbuttonpress(0) + plt.close() diff --git a/phasenn_test7a.py b/phasenn_test7a.py new file mode 100755 index 0000000..af6dda7 --- /dev/null +++ b/phasenn_test7a.py @@ -0,0 +1,117 @@ +#!/usr/bin/python3 +# phasenn_test7.py +# +# David Rowe Oct 2019 + +# Keras model for testing phase modelling using NNs. Here we try +# estimating the phase of a 2nd order system from it's amplitudes. +# This models the dispersive component of speech phase spectra, up +# until now we have been testing with the linear phase component. +# This script emulates a Hilbert Transform, note however in practice +# speech is not minimum phase so HTs have there limitations for real +# speech signals. + +import numpy as np +from scipy import signal +import sys +from keras.layers import Dense +from keras import models,layers +from keras import initializers +import keras.backend as K + +import matplotlib.pyplot as plt + +# constants + +N = 80 # number of time domain samples in frame +nb_samples = 10000 +nb_batch = 64 +nb_epochs = 10 +width = 256 +pairs = 2*width +fo_min = 50 +fo_max = 400 +Fs = 8000 + +# Generate training data. Sparse log magnitude spectrum is input, +# phase spectrum of 2nd order system the output/target + +mag = np.zeros((nb_samples, width)) +phase = np.zeros((nb_samples, pairs)) + +for i in range(nb_samples): + + # choose a random fo + r = np.random.rand(1) + fo = fo_min + (fo_max-fo_min)*r[0] + Wo = fo*2*np.pi/Fs + L = int(np.floor(np.pi/Wo)) + + # sample 2nd order IIR filter with random peak freq and amplitude + + r = np.random.rand(2) + alpha = 0.1*np.pi + 0.8*np.pi*r[0] + gamma = r[1] + w,h = signal.freqz(1, [1, -2*gamma*np.cos(alpha), gamma*gamma], range(1,L)*Wo) + + # map to sparse input and output arrays + + for m in range(1,L): + bin = int(np.floor(m*Wo*width/np.pi)) + mag[i,bin] = np.log10(np.abs(h[m-1])) + phase_rect = h[m-1]/np.abs(h[m-1]) + phase[i,2*bin] = phase_rect.real + phase[i,2*bin+1] = phase_rect.imag + +print(mag.shape) +print(phase.shape) + +model = models.Sequential() +model.add(layers.Dense(pairs, activation='relu', input_dim=width)) +model.add(layers.Dense(pairs, input_dim=pairs)) +model.summary() + +# Compile and fit our model + +from keras import optimizers +sgd = optimizers.SGD(lr=0.04, decay=1e-6, momentum=0.9, nesterov=True) +model.compile(loss='mse', optimizer=sgd) +history = model.fit(mag, phase, batch_size=nb_batch, epochs=nb_epochs) + +# measure error in rectangular coordinates over all samples + +phase_est = model.predict(mag) +err = (phase_est - phase) +var = np.var(err) +std = np.std(err) +print("rect var: %f std: %f" % (var,std)) + +err_angle = np.arctan2(err[:,1], 1) +print(err_angle.shape) +var = np.var(err_angle) +std = np.std(err_angle) +print("angle var: %4.2f std: %4.2f rads" % (var,std)) +print("angle var: %4.2f std: %4.2f degs" % (var*180/np.pi,std*180/np.pi)) + +plot_en = 1; +if plot_en: + plt.figure(1) + plt.plot(history.history['loss']) + plt.title('model loss') + plt.xlabel('epoch') + + plt.figure(2) + plt.hist(err_angle*180/np.pi, bins=20) + plt.title('phase angle error (deg)') + + fig = plt.figure(3) + ax1 = fig.add_subplot(111) + plt.plot(20*mag[1,:]) + ax2 = ax1.twinx() + phase = np.unwrap(np.arctan2(phase[1::2], phase[::2])) + plt.plot(phase, 'g') + phase_est = np.unwrap(np.arctan2(phase_est[1::2], phase_est[::2])) + plt.plot(phase, 'r') + + plt.show() + diff --git a/phasenn_test8.py b/phasenn_test8.py new file mode 100755 index 0000000..426394c --- /dev/null +++ b/phasenn_test8.py @@ -0,0 +1,161 @@ +#!/usr/bin/python3 +# phasenn_test8.py +# +# David Rowe Oct 2019 + +# Estimate phase spectra from amplitude spectra for a 2nd order IIR +# filter, just like a Hilbert Transform. + +import numpy as np +import sys +from keras.layers import Dense +from keras import models,layers +from keras import initializers +import matplotlib.pyplot as plt +from scipy import signal +from keras import backend as K + +# custom loss function +def sparse_loss(y_true, y_pred): + mask = K.cast( K.not_equal(y_pred, 0), dtype='float32') + n = K.sum(mask) + return K.sum(K.square((y_pred - y_true)*mask))/n + +# testing custom loss function +x = layers.Input(shape=(None,)) +y = layers.Input(shape=(None,)) +loss_func = K.Function([x, y], [sparse_loss(x, y)]) +assert loss_func([[[1,1,1]], [[0,2,0]]]) == np.array([1]) +assert loss_func([[[0,1,0]], [[0,2,0]]]) == np.array([1]) + +# constants + +N = 80 # number of time domain samples in frame +nb_samples = 400000 +nb_batch = 32 +nb_epochs = 100 +width = 256 +pairs = 2*width +fo_min = 50 +fo_max = 400 +Fs = 8000 + +# Generate training data. + +filter_amp = np.zeros((nb_samples, width)) +# phase as an angle +filter_phase = np.zeros((nb_samples, width)) +# phase encoded as cos,sin pairs: +filter_phase_rect = np.zeros((nb_samples, pairs)) +Wo = np.zeros(nb_samples) +L = np.zeros(nb_samples, dtype=int) + +for i in range(nb_samples): + + # distribute fo randomly on a log scale, gives us more training + # data with low freq frames which have more harmonics and are + # harder to match + r = np.random.rand(1) + log_fo = np.log10(fo_min) + (np.log10(fo_max)-np.log10(fo_min))*r[0] + fo = 10 ** log_fo + Wo[i] = fo*2*np.pi/Fs + L[i] = int(np.floor(np.pi/Wo[i])) + + # sample 2nd order IIR filter with random peak freq + + r = np.random.rand(2) + alpha = 0.1*np.pi + 0.8*np.pi*r[0] + gamma = r[1] + w,h = signal.freqz(1, [1, -2*gamma*np.cos(alpha), gamma*gamma], range(1,L[i])*Wo[i]) + + for m in range(1,L[i]): + bin = int(np.round(m*Wo[i]*width/np.pi)) + mWo = bin*np.pi/width + + filter_amp[i,bin] = np.log10(abs(h[m-1])) + filter_phase[i,bin] = np.angle(h[m-1]) + filter_phase_rect[i,2*bin] = np.cos(filter_phase[i,bin]) + filter_phase_rect[i,2*bin+1] = np.sin(filter_phase[i,bin]) + +model = models.Sequential() +model.add(layers.Dense(pairs, activation='relu', input_dim=width)) +model.add(layers.Dense(4*pairs, activation='relu')) +model.add(layers.Dense(pairs)) +model.summary() + +from keras import optimizers +sgd = optimizers.SGD(lr=0.08, decay=1e-6, momentum=0.9, nesterov=True) +model.compile(loss=sparse_loss, optimizer=sgd) +history = model.fit(filter_amp, filter_phase_rect, batch_size=nb_batch, epochs=nb_epochs) + +# measure error in rectangular coordinates over all samples + +filter_phase_rect_est = model.predict(filter_amp) +ind = np.nonzero(filter_phase_rect) +err = (filter_phase_rect[ind] - filter_phase_rect_est[ind]) +var = np.var(err) +std = np.std(err) +print("rect var: %f std: %f" % (var,std)) + +c1 = filter_phase_rect[ind]; c1 = c1[::2] + 1j*c1[1::2] +c2 = filter_phase_rect_est[ind]; c2 = c2[::2] + 1j*c2[1::2] +err_angle = np.angle(c1 * np.conj(c2)) + +var = np.var(err_angle) +std = np.std(err_angle) +print("angle var: %4.2f std: %4.2f rads" % (var,std)) +print("angle var: %4.2f std: %4.2f degs" % (var*180/np.pi,std*180/np.pi)) + +def sample_model(r): + phase = np.zeros(width, dtype=complex) + phase_est = np.zeros(width, dtype=complex) + phase_err = np.zeros(width, dtype=complex) + phase_filt = np.zeros(width) + amp_filt = np.zeros(width) + + for m in range(1,L[r]): + wm = m*Wo[r] + bin = int(np.round(wm*width/np.pi)) + phase[m] = filter_phase_rect[r,2*bin] + 1j*filter_phase_rect[r,2*bin+1] + phase_est[m] = filter_phase_rect_est[r,2*bin] + 1j*filter_phase_rect_est[r,2*bin+1] + phase_err[m] = phase[m] * np.conj(phase_est[m]) + amp_filt[m] = filter_amp[r,bin] + return phase, phase_err, amp_filt + +plot_en = 1; +if plot_en: + plt.figure(1) + plt.plot(history.history['loss']) + plt.title('model loss') + plt.xlabel('epoch') + plt.show(block=False) + + plt.figure(2) + plt.subplot(211) + plt.hist(err_angle*180/np.pi, bins=20) + plt.subplot(212) + plt.hist(Wo*(Fs/2)/np.pi, bins=20) + plt.title('phase angle error (deg) and fo (Hz)') + plt.show(block=False) + + plt.figure(3) + plt.title('sample vectors and error') + for r in range(12): + plt.subplot(3,4,r+1) + phase, phase_err, amp_filt = sample_model(r) + plt.plot(np.angle(phase[1:L[r]])*180/np.pi,'g') + plt.plot(np.angle(phase_err[1:L[r]])*180/np.pi,'r') + plt.ylim(-180,180) + plt.show(block=False) + + plt.figure(4) + plt.title('filter amplitudes') + for r in range(12): + plt.subplot(3,4,r+1) + phase, phase_err, amp_filt = sample_model(r) + plt.plot(amp_filt[1:L[r]],'g') + plt.show(block=False) + + # click on last figure to close all and finish + plt.waitforbuttonpress(0) + plt.close() diff --git a/phasenn_test9.py b/phasenn_test9.py new file mode 100755 index 0000000..e3f7cdd --- /dev/null +++ b/phasenn_test9.py @@ -0,0 +1,166 @@ +#!/usr/bin/python3 +# phasenn_test9.py +# +# David Rowe Nov 2019 + +# Estimate an impulse position from the phase spectra of a 2nd order system excited by an impulse +# +# periodic impulse train Wo at time offset n0 -> 2nd order system -> discrete phase specta -> NN -> n0 + +# TODO: +# [ ] sanity check n0 phases + +import numpy as np +import sys +from keras.layers import Dense +from keras import models,layers +from keras import initializers +import matplotlib.pyplot as plt +from scipy import signal +from keras import backend as K + +# constants + +N = 80 # number of time domain samples in frame +nb_samples = 10000 +nb_batch = 32 +nb_epochs = 10 +width = 256 +pairs = 2*width +fo_min = 50 +fo_max = 400 +Fs = 8000 + +# Generate training data + +amp = np.zeros((nb_samples, width)) +# phase as an angle +phase = np.zeros((nb_samples, width)) +# phase encoded as cos,sin pairs: +phase_rect = np.zeros((nb_samples, pairs)) +Wo = np.zeros(nb_samples) +L = np.zeros(nb_samples, dtype=int) +n0 = np.zeros(nb_samples, dtype=int) +target = np.zeros((nb_samples,1)) + +for i in range(nb_samples): + + # distribute fo randomly on a log scale, gives us more training + # data with low freq frames which have more harmonics and are + # harder to match + r = np.random.rand(1) + log_fo = np.log10(fo_min) + (np.log10(fo_max)-np.log10(fo_min))*r[0] + fo = 10 ** log_fo + Wo[i] = fo*2*np.pi/Fs + L[i] = int(np.floor(np.pi/Wo[i])) + # pitch period in samples + P = 2*L[i] + + r = np.random.rand(3) + + # sample 2nd order IIR filter with random peak freq (alpha) and peak amplitude (gamma) + alpha = 0.1*np.pi + 0.4*np.pi*r[0] + gamma = 0.9 + 0.09*r[1] + w,h = signal.freqz(1, [1, -2*gamma*np.cos(alpha), gamma*gamma], range(1,L[i])*Wo[i]) + + # select n0 between 0...P-1 (it's periodic) + n0[i] = r[2]*P + e = np.exp(-1j*n0[i]*range(1,L[i])*Wo[i]) + + for m in range(1,L[i]): + bin = int(np.round(m*Wo[i]*width/np.pi)) + mWo = bin*np.pi/width + + amp[i,bin] = np.log10(abs(h[m-1])) + phase[i,bin] = np.angle(h[m-1]*e[m-1]) + phase_rect[i,2*bin] = np.cos(phase[i,bin]) + phase_rect[i,2*bin+1] = np.sin(phase[i,bin]) + + # target is n0 in rec coords + target[i] = n0[i]/P + +model = models.Sequential() +model.add(layers.Dense(pairs, activation='relu', input_dim=pairs)) +model.add(layers.Dense(128, activation='relu')) +model.add(layers.Dense(1)) +model.summary() + +from keras import optimizers +sgd = optimizers.SGD(lr=0.08, decay=1e-6, momentum=0.9, nesterov=True) +model.compile(loss="mse", optimizer=sgd) +history = model.fit(phase_rect, target, batch_size=nb_batch, epochs=nb_epochs) + +# measure error in rectangular coordinates over all samples + +target_est = model.predict(phase_rect) +err = target - target_est +var = np.var(err) +std = np.std(err) +print("var: %f std: %f" % (var,std)) + +def sample_freq(r): + phase_L = np.zeros(L[r], dtype=complex) + amp_L = np.zeros(L[r]) + + for m in range(1,L[r]): + wm = m*Wo[r] + bin = int(np.round(wm*width/np.pi)) + phase_L[m] = phase_rect[r,2*bin] + 1j*phase_rect[r,2*bin+1] + amp_L[m] = amp[r,bin] + return phase_L, amp_L + +# synthesise time domain signal +def sample_time(r): + s = np.zeros(2*N); + + for m in range(1,L[r]): + wm = m*Wo[r] + bin = int(np.round(wm*width/np.pi)) + Am = 10 ** amp[r,bin] + phi_m = np.angle(phase_rect[r,2*bin] + 1j*phase_rect[r,2*bin+1]) + s = s + Am*np.cos(wm*(range(2*N)) + phi_m) + return s + +plot_en = 1; +if plot_en: + plt.figure(1) + plt.plot(history.history['loss']) + plt.title('model loss') + plt.xlabel('epoch') + plt.show(block=False) + + plt.figure(2) + plt.hist(err, bins=20) + plt.show(block=False) + + plt.figure(3) + plt.plot(target[:12],'b') + plt.plot(target_est[:12],'g') + plt.show(block=False) + + plt.figure(4) + plt.title('Freq Domain') + for r in range(12): + plt.subplot(3,4,r+1) + phase_L, amp_L = sample_freq(r) + plt.plot(20*amp_L,'g') + plt.ylim(-20,20) + plt.show(block=False) + + plt.figure(5) + plt.title('Time Domain') + for r in range(12): + plt.subplot(3,4,r+1) + s = sample_time(r) + P = 2*L[r] + n0_ = target_est[r]*P + print("F0: %5.1f P: %3d L: %3d n0: %3d n0_est: %5.1f" % (Wo[r]*(Fs/2)/np.pi, P, L[r], n0[r], n0_)) + plt.plot(s,'g') + plt.plot([n0[r],n0[r]], [-25,25],'r') + plt.plot([n0_,n0_], [-25,25],'b') + plt.ylim(-50,50) + plt.show(block=False) + + # click on last figure to close all and finish + plt.waitforbuttonpress(0) + plt.close() diff --git a/phasenn_train.py b/phasenn_train.py new file mode 100755 index 0000000..b9fda7f --- /dev/null +++ b/phasenn_train.py @@ -0,0 +1,114 @@ +#!/usr/bin/python3 +# phasenn_train.py +# +# David Rowe August 2019 +# +# Keras model for estimating the phase of sinusoidally modelled speech + +# To generate features: +# $ ./c2sim ~/Downloads/all_speech_8k.sw --dumpphase_nnl train.f32 + +import numpy as np +import sys +from keras.layers import Dense +from keras import models,layers +from keras import initializers + +if len(sys.argv) < 2: + print("usage: phasenn_train.py train.f32") + sys.exit(0) + +# constants + +max_amp = 80 # sparsevector covering 0 ... Fs/2 +nb_features = 243 # number of sparse features/row in input training data file +nb_epochs = 10 + +# load training data + +feature_file = sys.argv[1] +features = np.fromfile(feature_file, dtype='float32') +nb_frames = int(len(features)/nb_features) +print("nb_frames: %d" % (nb_frames)) + +# 0..80 log10(A) +# 81..161 cos(phi) +# 162..242 sin(phi) + +features = np.reshape(features, (nb_frames, nb_features)) +print("features shape:") +print(features.shape) + +# So the idea is we can predict the next frames phases from the +# current frame, and the magnitude spectrum. For voiced speech, the +# sinusoids are continuous, so can be predicted from frame to frame if +# you know the frequency and previous phase. We encode the frequency +# as the position in the sprase vector. + +# Cascased with that is phase spectra due to dispersion of the phase +# response of the vocal tract filter, e.g. a large dispersion around +# resonances. We supply the magnitude spectra to help model the vocal +# tract filter phase. + +# Unvoiced speech has more random phase. Hopefully the NN can work +# out if the speech is voiced or unvoiced from the magnitide spectra. + +# The phase is encoded using cos and sin of the phase, as these are +# bounded by +/-1 + +# So input features are this frame's log(A), and last frames phase. +# The output features we are trying to model are this frames phase. + +train = np.concatenate( (features[1:,:max_amp+1], features[:-1,max_amp+1:]) ) +target = features([1:,max_amp+1:]) + +model = models.Sequential() +model.add(layers.Dense(256, activation='relu', input_dim=nb_input)) +model.add(layers.Dense(256, activation='relu')) +model.add(layers.Dense(256, activation='relu')) +model.add(layers.Dense(nb_ouput, activation='linear')) + +# Custom loss function that measures difference in phase just at +# non-zero elements of target (ytrue). This could be extended to +# weight each phase error by the (log) Amplitude of each harmonic + +import keras.backend as K +def customLoss(yTrue, yPred): + # generate a mask vector with 1's on non zero values of yTrue + mask = abs(K.sign(yTrue)) + # collect error in cos() and sin() terms, ignoring yPred values outside of + # harmonics we care about + error = yTrue - mask * yPred + return K.sum(error * error) + +# Compile our model + +from keras import optimizers +model.compile(loss=customLoss, optimizer='sge') + +# fit model, using 20% of our data for validation + +history = model.fit(train, target, validation_split=0.2, batch_size=32, epochs=nb_epochs) +model.save("phasenn_model.h5") + +import matplotlib.pyplot as plt + +plot_en = 0; +if plot_en: + plt.figure(1) + plt.plot(10*np.sqrt(history.history['loss'])) + plt.plot(10*np.sqrt(history.history['val_loss'])) + plt.title('model loss') + plt.ylabel('rms error (rad)') + plt.xlabel('epoch') + plt.legend(['train', 'valid'], loc='upper right') + plt.show() + +# run model on training data and measure variance, should be similar to training "loss" + +train_out = model.predict(train) +err = (train_out - target) +var = np.var(err) +std = np.std(err) +print("var: %f std: %f" % (var,std)) +