mirror of https://github.com/drowe67/phasenn.git
moved out of codec2 repo
commit
99da10f0d4
|
@ -0,0 +1,88 @@
|
|||
#!/usr/bin/python3
|
||||
# phasenn_test1.py
|
||||
#
|
||||
# David Rowe August 2019
|
||||
|
||||
# Keras model for testing phase modelling using NNs. Self contained, generates
|
||||
# it's own training data. Can model linear phase at freq up to pi/4
|
||||
|
||||
import numpy as np
|
||||
import sys
|
||||
from keras.layers import Dense
|
||||
from keras import models,layers
|
||||
from keras import initializers
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
# constants
|
||||
|
||||
N = 80 # number of samples in frames
|
||||
nb_samples = 1000000
|
||||
nb_batch = 512
|
||||
nb_epochs = 400
|
||||
np_input = 3
|
||||
nb_output = 2
|
||||
|
||||
# Generate training data. Given the phase at the start of the frame,
|
||||
# and the frequency, determine the phase at the end of the frame
|
||||
|
||||
def quant(val, levels):
|
||||
return np.round(val*levels)/levels
|
||||
|
||||
w = np.zeros(nb_samples)
|
||||
phase_start = np.zeros(nb_samples)
|
||||
phase_end = np.zeros(nb_samples)
|
||||
for i in range(nb_samples):
|
||||
x=np.random.rand(2)
|
||||
w[i] = x[0]*np.pi/4
|
||||
phase_start[i] = -np.pi + x[1]*2*np.pi
|
||||
phase_end[i] = phase_start[i] + N*w[i];
|
||||
|
||||
train = np.column_stack((w, np.cos(phase_start), np.sin(phase_start)))
|
||||
target = np.column_stack((np.cos(phase_end), np.sin(phase_end)))
|
||||
print(train.shape)
|
||||
print(target.shape)
|
||||
|
||||
model = models.Sequential()
|
||||
model.add(layers.Dense(256, activation='relu', input_dim=np_input))
|
||||
model.add(layers.Dense(256, activation='relu', input_dim=np_input))
|
||||
model.add(layers.Dense(nb_output, activation='linear'))
|
||||
model.summary()
|
||||
|
||||
# Compile our model
|
||||
|
||||
from keras import optimizers
|
||||
model.compile(loss='mse', optimizer='adam')
|
||||
|
||||
# fit model, using 20% of our data for validation
|
||||
|
||||
history = model.fit(train, target, validation_split=0.2, batch_size=nb_batch, epochs=nb_epochs)
|
||||
|
||||
# test actual error in angle over training data
|
||||
|
||||
train_out = model.predict(train)
|
||||
err = (train_out - target)
|
||||
var = np.var(err)
|
||||
std = np.std(err)
|
||||
print("rect coord var: %f std: %f" % (var,std))
|
||||
|
||||
err_angle = np.arctan2(np.sin(err[:,0]),np.cos(err[:,1]))
|
||||
var = np.var(err_angle)
|
||||
std = np.std(err_angle)
|
||||
print("angle var: %f std: %f" % (var,std))
|
||||
|
||||
plot_en = 1;
|
||||
if plot_en:
|
||||
plt.figure(1)
|
||||
plt.plot(10*np.log10(history.history['loss']))
|
||||
plt.plot(10*np.log10(history.history['val_loss']))
|
||||
plt.title('model loss (dB)')
|
||||
plt.xlabel('epoch')
|
||||
plt.legend(['train', 'valid'], loc='upper right')
|
||||
|
||||
plt.figure(2)
|
||||
plt.hist(err_angle, bins=20)
|
||||
plt.show()
|
||||
|
||||
#plt.figure(2)
|
||||
#plt.hist(target)
|
||||
#plt.show()
|
|
@ -0,0 +1,90 @@
|
|||
#!/usr/bin/python3
|
||||
# phasenn_test2.py
|
||||
#
|
||||
# David Rowe Oct 2019
|
||||
|
||||
# Keras model for testing phase modelling using NNs. A simple four weight
|
||||
# linear model to map a constant phase shift, arriving at the same model as
|
||||
# linear algera. Good sanity check
|
||||
|
||||
# | cos(end) | = | cos(Nw) -sin(Nw) | | cos(start) |
|
||||
# | sin(end) | | sin(Nw) cos(Nw) | | sin(start) |
|
||||
|
||||
import numpy as np
|
||||
import sys
|
||||
from keras.layers import Dense
|
||||
from keras import models,layers
|
||||
from keras import initializers
|
||||
import keras.backend as K
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
# constants
|
||||
|
||||
N = 80 # number of time domain samples in frame
|
||||
nb_samples = 1000
|
||||
nb_batch = 1
|
||||
nb_epochs = 3
|
||||
width = 1
|
||||
|
||||
# Generate training data. Given the phase at the start of the frame,
|
||||
# and the frequency, determine the phase at the end of the frame
|
||||
|
||||
# phase encoded as cos,sin pairs
|
||||
phase_start = np.zeros((nb_samples, 2))
|
||||
phase_end = np.zeros((nb_samples, 2))
|
||||
|
||||
# a shift of pi/4 across the frame of N samples
|
||||
w = np.pi/(4*N);
|
||||
for i in range(nb_samples):
|
||||
r = np.random.rand(1)
|
||||
phase_start_pol = -np.pi + r[0]*2*np.pi
|
||||
phase_start[i,0] = np.cos(phase_start_pol)
|
||||
phase_start[i,1] = np.sin(phase_start_pol)
|
||||
phase_end_pol = phase_start_pol + N*w
|
||||
phase_end[i,0] = np.cos(phase_end_pol)
|
||||
phase_end[i,1] = np.sin(phase_end_pol)
|
||||
|
||||
#print(phase_start.shape)
|
||||
#print(phase_end.shape)
|
||||
#print(phase_start[:10])
|
||||
model = models.Sequential()
|
||||
model.add(layers.Dense(2, bias=False, input_dim=2))
|
||||
model.summary()
|
||||
|
||||
# Compile and fit our model
|
||||
|
||||
from keras import optimizers
|
||||
model.compile(loss='mse', optimizer='sgd')
|
||||
history = model.fit(phase_start, phase_end, batch_size=nb_batch, epochs=nb_epochs)
|
||||
|
||||
print(model.get_weights())
|
||||
|
||||
# measure error over all samples
|
||||
|
||||
phase_end_est = model.predict(phase_start)
|
||||
err = (phase_end_est - phase_end)
|
||||
var = np.var(err)
|
||||
std = np.std(err)
|
||||
print("rect var: %f std: %f" % (var,std))
|
||||
#print(err[:5,:])
|
||||
|
||||
err_angle = np.arctan2(phase_end_est[:,1], phase_end_est[:,0]) - np.arctan2(phase_end[:,1], phase_end[:,0])
|
||||
#print(err[:5,:])
|
||||
print(err_angle.shape)
|
||||
var = np.var(err_angle)
|
||||
std = np.std(err_angle)
|
||||
print("angle var: %f std: %f" % (var,std))
|
||||
|
||||
plot_en = 0;
|
||||
if plot_en:
|
||||
plt.figure(1)
|
||||
plt.plot(history.history['loss'])
|
||||
plt.title('model loss')
|
||||
plt.xlabel('epoch')
|
||||
|
||||
plt.figure(2)
|
||||
plt.hist(err_angle, bins=20)
|
||||
plt.title('phase angle error')
|
||||
plt.show()
|
||||
|
|
@ -0,0 +1,90 @@
|
|||
#!/usr/bin/python3
|
||||
# phasenn_test3.py
|
||||
#
|
||||
# David Rowe Oct 2019
|
||||
|
||||
# Keras model for testing phase modelling using NNs. Extending four
|
||||
# weights per phase test2 to a vector of phases, each with a different
|
||||
# shift across the frame
|
||||
|
||||
import numpy as np
|
||||
import sys
|
||||
from keras.layers import Dense
|
||||
from keras import models,layers
|
||||
from keras import initializers
|
||||
import keras.backend as K
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
# constants
|
||||
|
||||
N = 80 # number of time domain samples in frame
|
||||
nb_samples = 10000
|
||||
nb_batch = 1
|
||||
nb_epochs = 5
|
||||
width = 256
|
||||
pairs = 2*width
|
||||
|
||||
# Generate training data. Given the phase at the start of the frame,
|
||||
# and the frequency, determine the phase at the end of the frame
|
||||
|
||||
# phase encoded as cos,sin pairs
|
||||
phase_start = np.zeros((nb_samples, pairs))
|
||||
phase_end = np.zeros((nb_samples, pairs))
|
||||
|
||||
# Map 0...width-1 to 0...pi
|
||||
w = np.pi/width;
|
||||
for i in range(nb_samples):
|
||||
for m in range(width):
|
||||
r = np.random.rand(1)
|
||||
phase_start_pol = -np.pi + r[0]*2*np.pi
|
||||
phase_start[i,2*m] = np.cos(phase_start_pol)
|
||||
phase_start[i,2*m+1] = np.sin(phase_start_pol)
|
||||
phase_end_pol = phase_start_pol + N*w*m
|
||||
phase_end[i,2*m] = np.cos(phase_end_pol)
|
||||
phase_end[i,2*m+1] = np.sin(phase_end_pol)
|
||||
|
||||
print(phase_start.shape)
|
||||
print(phase_end.shape)
|
||||
|
||||
# note most of these weights whould end up being 0, as we only need 4 weights to map each phase
|
||||
# rotation from an input to output pair
|
||||
model = models.Sequential()
|
||||
model.add(layers.Dense(pairs, bias=False, input_dim=pairs))
|
||||
model.summary()
|
||||
|
||||
# Compile and fit our model
|
||||
|
||||
from keras import optimizers
|
||||
sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
|
||||
model.compile(loss='mse', optimizer=sgd)
|
||||
history = model.fit(phase_start, phase_end, batch_size=nb_batch, epochs=nb_epochs)
|
||||
|
||||
# measure error over all samples
|
||||
|
||||
phase_end_est = model.predict(phase_start)
|
||||
err = (phase_end_est - phase_end)
|
||||
var = np.var(err)
|
||||
std = np.std(err)
|
||||
print("rect var: %f std: %f" % (var,std))
|
||||
#print(err[:5,:])
|
||||
|
||||
err_angle = np.arctan2(phase_end_est[:,1], phase_end_est[:,0]) - np.arctan2(phase_end[:,1], phase_end[:,0])
|
||||
#print(err[:5,:])
|
||||
print(err_angle.shape)
|
||||
var = np.var(err_angle)
|
||||
std = np.std(err_angle)
|
||||
print("angle var: %f std: %f" % (var,std))
|
||||
|
||||
plot_en = 1;
|
||||
if plot_en:
|
||||
plt.figure(1)
|
||||
plt.plot(history.history['loss'])
|
||||
plt.title('model loss')
|
||||
plt.xlabel('epoch')
|
||||
|
||||
plt.figure(2)
|
||||
plt.hist(err_angle, bins=20)
|
||||
plt.title('phase angle error')
|
||||
plt.show()
|
||||
|
|
@ -0,0 +1,132 @@
|
|||
#!/usr/bin/python3
|
||||
# phasenn_test4.py
|
||||
#
|
||||
# David Rowe Oct 2019
|
||||
|
||||
# Keras model for testing phase modelling using NNs. Like test3 but
|
||||
# with sparse input vectors of harmonic phases, as this is how phases
|
||||
# will be presented from Codec 2.
|
||||
|
||||
# Lessons learned:
|
||||
# 1. We train slower than test3, as we have less information per samples
|
||||
# with a sparse subset of discrete harmonics
|
||||
# 2. We need to quantise the frequency of each bin to get low error, ow
|
||||
# we introduce a +/- 0.5bin random error in the training data, with is
|
||||
# theoretically 8 degrees, but about 10-11 in practice.
|
||||
|
||||
import numpy as np
|
||||
import sys
|
||||
from keras.layers import Dense
|
||||
from keras import models,layers
|
||||
from keras import initializers
|
||||
import keras.backend as K
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
# constants
|
||||
|
||||
N = 80 # number of time domain samples in frame
|
||||
nb_samples = 100000
|
||||
nb_batch = 32
|
||||
nb_epochs = 50
|
||||
width = 256
|
||||
pairs = 2*width
|
||||
fo_min = 50
|
||||
fo_max = 400
|
||||
Fs = 8000
|
||||
|
||||
# Generate training data. Given the phase at the start of the frame,
|
||||
# and the frequency, determine the phase at the end of the frame
|
||||
|
||||
# phase encoded as cos,sin pairs
|
||||
phase_start = np.zeros((nb_samples, pairs))
|
||||
phase_end = np.zeros((nb_samples, pairs))
|
||||
Wo = np.zeros(nb_samples)
|
||||
L = np.zeros(nb_samples, dtype=int)
|
||||
|
||||
# Map 0...width-1 to 0...pi
|
||||
for i in range(nb_samples):
|
||||
r = np.random.rand(1)
|
||||
fo = fo_min + (fo_max-fo_min)*r[0]
|
||||
Wo[i] = fo*2*np.pi/Fs
|
||||
L[i] = int(np.floor(np.pi/Wo[i]))
|
||||
for m in range(1, L[i]):
|
||||
wm = m*Wo[i]
|
||||
bin = int(np.round(wm*width/np.pi))
|
||||
# Quantise frequency to this bin, this step is important to
|
||||
# match results of test3. Without it we are adding +/- 0.5 bin
|
||||
# uncertainty in the frequency, which will randomise phase shifts
|
||||
# across bins
|
||||
wm = bin*np.pi/width
|
||||
r = np.random.rand(1)
|
||||
phase_start_pol = -np.pi + r[0]*2*np.pi
|
||||
phase_start[i,2*bin] = np.cos(phase_start_pol)
|
||||
phase_start[i,2*bin+1] = np.sin(phase_start_pol)
|
||||
phase_end_pol = phase_start_pol + N*wm
|
||||
phase_end[i,2*bin] = np.cos(phase_end_pol)
|
||||
phase_end[i,2*bin+1] = np.sin(phase_end_pol)
|
||||
|
||||
print(phase_start.shape)
|
||||
print(phase_end.shape)
|
||||
|
||||
# note most of these weights whould end up being 0, as we only need
|
||||
# two wieghts to map each phase rotation
|
||||
model = models.Sequential()
|
||||
model.add(layers.Dense(pairs, bias=False, input_dim=pairs))
|
||||
model.summary()
|
||||
|
||||
import keras.backend as K
|
||||
|
||||
# Compile and fit our model
|
||||
|
||||
from keras import optimizers
|
||||
sgd = optimizers.SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True)
|
||||
model.compile(loss='mse', optimizer=sgd)
|
||||
history = model.fit(phase_start, phase_end, batch_size=nb_batch, epochs=nb_epochs)
|
||||
|
||||
# measure error over non-zero samples
|
||||
|
||||
phase_end_est = model.predict(phase_start)
|
||||
ind = np.nonzero(phase_start)
|
||||
err = (phase_end_est[ind] - phase_end[ind])
|
||||
var = np.var(err)
|
||||
std = np.std(err)
|
||||
print("rect var: %f std: %f" % (var,std))
|
||||
#print(err[:5,:])
|
||||
|
||||
# approximation if error is small
|
||||
err_angle = np.arctan2(err[1::2], 1)
|
||||
#print(err[:5,:])
|
||||
print(err_angle.shape)
|
||||
var = np.var(err_angle)
|
||||
std = np.std(err_angle)
|
||||
print("angle var: %f std: %f" % (var,std))
|
||||
print("angle var: %4.2f std: %4.2f degs" % (var*180/np.pi,std*180/np.pi))
|
||||
|
||||
plot_en = 1;
|
||||
if plot_en:
|
||||
plt.figure(1)
|
||||
plt.plot(history.history['loss'])
|
||||
plt.title('model loss')
|
||||
plt.xlabel('epoch')
|
||||
|
||||
plt.figure(2)
|
||||
plt.hist(err_angle, bins=20)
|
||||
plt.title('phase angle error')
|
||||
|
||||
plt.figure(3)
|
||||
r = 0
|
||||
phase = np.zeros(width)
|
||||
phase_est = np.zeros(width)
|
||||
for m in range(1,L[r]):
|
||||
wm = m*Wo[r]
|
||||
bin = int(np.round(wm*width/np.pi))
|
||||
#print("bin: %d %f %f" % (bin, phase_end[r,2*bin], phase_end[r,2*bin+1]))
|
||||
phase[m] = np.arctan2(phase_end[r,2*bin+1], phase_end[r,2*bin])
|
||||
phase_est[m] = np.arctan2(phase_end_est[r,2*bin+1], phase_end_est[r,2*bin])
|
||||
|
||||
plt.plot(phase[1:L[r]+1],'g')
|
||||
plt.plot(phase_est[1:L[r]+1],'r')
|
||||
|
||||
plt.show()
|
||||
|
|
@ -0,0 +1,125 @@
|
|||
#!/usr/bin/python3
|
||||
# phasenn_test5.py
|
||||
#
|
||||
# David Rowe Oct 2019
|
||||
|
||||
# Sparse model like test4 but with non-linear layers (activation functions) as
|
||||
# these will be required later when we add amplitude information.
|
||||
|
||||
import numpy as np
|
||||
import sys
|
||||
from keras.layers import Dense
|
||||
from keras import models,layers
|
||||
from keras import initializers
|
||||
import keras.backend as K
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
# constants
|
||||
|
||||
N = 80 # number of time domain samples in frame
|
||||
nb_samples = 100000
|
||||
nb_batch = 32
|
||||
nb_epochs = 50
|
||||
width = 256
|
||||
pairs = 2*width
|
||||
fo_min = 50
|
||||
fo_max = 400
|
||||
Fs = 8000
|
||||
|
||||
# Generate training data. Given the phase at the start of the frame,
|
||||
# and the frequency, determine the phase at the end of the frame
|
||||
|
||||
# phase encoded as cos,sin pairs
|
||||
phase_start = np.zeros((nb_samples, pairs))
|
||||
phase_end = np.zeros((nb_samples, pairs))
|
||||
Wo = np.zeros(nb_samples)
|
||||
L = np.zeros(nb_samples, dtype=int)
|
||||
|
||||
# Map 0...width-1 to 0...pi
|
||||
for i in range(nb_samples):
|
||||
r = np.random.rand(1)
|
||||
fo = fo_min + (fo_max-fo_min)*r[0]
|
||||
Wo[i] = fo*2*np.pi/Fs
|
||||
L[i] = int(np.floor(np.pi/Wo[i]))
|
||||
for m in range(1, L[i]):
|
||||
wm = m*Wo[i]
|
||||
bin = int(np.round(wm*width/np.pi))
|
||||
# Quantise frequency to this bin, this step is important to
|
||||
# match results of test3. Without it we are adding +/- 0.5 bin
|
||||
# uncertainty in the frequency, which will randomise phase shifts
|
||||
# across bins
|
||||
wm = bin*np.pi/width
|
||||
r = np.random.rand(1)
|
||||
phase_start_pol = -np.pi + r[0]*2*np.pi
|
||||
phase_start[i,2*bin] = np.cos(phase_start_pol)
|
||||
phase_start[i,2*bin+1] = np.sin(phase_start_pol)
|
||||
phase_end_pol = phase_start_pol + N*wm
|
||||
phase_end[i,2*bin] = np.cos(phase_end_pol)
|
||||
phase_end[i,2*bin+1] = np.sin(phase_end_pol)
|
||||
|
||||
print(phase_start.shape)
|
||||
print(phase_end.shape)
|
||||
|
||||
# note most of these weights whould end up being 0, as we only need
|
||||
# two wieghts to map each phase rotation
|
||||
model = models.Sequential()
|
||||
model.add(layers.Dense(pairs, activation='relu', input_dim=pairs))
|
||||
model.add(layers.Dense(pairs, input_dim=pairs))
|
||||
model.summary()
|
||||
|
||||
import keras.backend as K
|
||||
|
||||
# Compile and fit our model
|
||||
|
||||
from keras import optimizers
|
||||
sgd = optimizers.SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True)
|
||||
model.compile(loss='mse', optimizer=sgd)
|
||||
history = model.fit(phase_start, phase_end, batch_size=nb_batch, epochs=nb_epochs)
|
||||
|
||||
# measure error over non-zero samples
|
||||
|
||||
phase_end_est = model.predict(phase_start)
|
||||
ind = np.nonzero(phase_start)
|
||||
err = (phase_end_est[ind] - phase_end[ind])
|
||||
var = np.var(err)
|
||||
std = np.std(err)
|
||||
print("rect var: %f std: %f" % (var,std))
|
||||
#print(err[:5,:])
|
||||
|
||||
# approximation if error is small
|
||||
err_angle = np.arctan2(err[1::2], 1)
|
||||
#print(err[:5,:])
|
||||
print(err_angle.shape)
|
||||
var = np.var(err_angle)
|
||||
std = np.std(err_angle)
|
||||
print("angle var: %f std: %f" % (var,std))
|
||||
print("angle var: %4.2f std: %4.2f degs" % (var*180/np.pi,std*180/np.pi))
|
||||
|
||||
plot_en = 1;
|
||||
if plot_en:
|
||||
plt.figure(1)
|
||||
plt.plot(history.history['loss'])
|
||||
plt.title('model loss')
|
||||
plt.xlabel('epoch')
|
||||
|
||||
plt.figure(2)
|
||||
plt.hist(err_angle, bins=20)
|
||||
plt.title('phase angle error')
|
||||
|
||||
plt.figure(3)
|
||||
r = 0
|
||||
phase = np.zeros(width)
|
||||
phase_est = np.zeros(width)
|
||||
for m in range(1,L[r]):
|
||||
wm = m*Wo[r]
|
||||
bin = int(np.round(wm*width/np.pi))
|
||||
#print("bin: %d %f %f" % (bin, phase_end[r,2*bin], phase_end[r,2*bin+1]))
|
||||
phase[m] = np.arctan2(phase_end[r,2*bin+1], phase_end[r,2*bin])
|
||||
phase_est[m] = np.arctan2(phase_end_est[r,2*bin+1], phase_end_est[r,2*bin])
|
||||
|
||||
plt.plot(phase[1:L[r]+1],'g')
|
||||
plt.plot(phase_est[1:L[r]+1],'r')
|
||||
|
||||
plt.show()
|
||||
|
|
@ -0,0 +1,157 @@
|
|||
#!/usr/bin/python3
|
||||
# phasenn_test6.py
|
||||
#
|
||||
# David Rowe Oct 2019
|
||||
|
||||
# Extending test5 to deal with input/output vectors that have slightly
|
||||
# different "rates", i.e. Wo changing across the frame which is usual
|
||||
# for voiced speech.
|
||||
|
||||
import numpy as np
|
||||
import sys
|
||||
from keras.layers import Dense
|
||||
from keras import models,layers
|
||||
from keras import initializers
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
# constants
|
||||
|
||||
N = 80 # number of time domain samples in frame
|
||||
nb_samples = 100000
|
||||
nb_batch = 32
|
||||
nb_epochs = 25
|
||||
width = 256
|
||||
pairs = 2*width
|
||||
fo_min = 50
|
||||
fo_max = 400
|
||||
Fs = 8000
|
||||
dfo = 0.02
|
||||
|
||||
# Generate training data. Given the phase at the start of the frame,
|
||||
# and the frequency, determine the phase at the end of the frame
|
||||
|
||||
# phase encoded as cos,sin pairs ref:
|
||||
phase_start = np.zeros((nb_samples, pairs))
|
||||
phase_end = np.zeros((nb_samples, pairs))
|
||||
Wo_N = np.zeros(nb_samples)
|
||||
Wo_0 = np.zeros(nb_samples)
|
||||
L = np.zeros(nb_samples, dtype=int)
|
||||
|
||||
for i in range(nb_samples):
|
||||
|
||||
# parameters at time 0 (start of current frame)
|
||||
# distribute fo randomnly on a log scale
|
||||
r = np.random.rand(1)
|
||||
log_fo_0 = np.log10(fo_min) + (np.log10(fo_max)-np.log10(fo_min))*r[0]
|
||||
fo_0 = 10 ** log_fo_0
|
||||
Wo_0[i] = fo_0*2*np.pi/Fs
|
||||
L_0 = int(np.floor(np.pi/Wo_0[i]))
|
||||
|
||||
# parameters at time N (end of current frame), allow a df0 freq change
|
||||
# across frame, typical of voiced speech
|
||||
r = np.random.rand(1)
|
||||
fo_N = fo_0 + (-2*dfo + dfo*r[0])*fo_0
|
||||
fo_N = np.max((fo_min, fo_N))
|
||||
fo_N = np.min((fo_max, fo_N))
|
||||
#fo_N = fo_0
|
||||
Wo_N[i] = fo_N*2*np.pi/Fs
|
||||
L_N = int(np.floor(np.pi/Wo_N[i]))
|
||||
L[i] = np.min((L_0, L_N))
|
||||
#print("fo: %f %f L: %d %d min: %d" % (fo_0, fo_N, L_0, L_N, L[i]))
|
||||
|
||||
for m in range(1,L[i]):
|
||||
bin_0 = int(np.round(m*Wo_0[i]*width/np.pi))
|
||||
mWo_0 = bin_0*np.pi/width
|
||||
bin_N = int(np.round(m*Wo_N[i]*width/np.pi))
|
||||
mWo_N = bin_N*np.pi/width
|
||||
#print("m: %d bin_0: %d bin_N: %d" % (m, bin_0,bin_N))
|
||||
|
||||
r = np.random.rand(1)
|
||||
phase_start_pol = -np.pi + r[0]*2*np.pi
|
||||
phase_start[i,2*bin_0] = np.cos(phase_start_pol)
|
||||
phase_start[i,2*bin_0+1] = np.sin(phase_start_pol)
|
||||
|
||||
# phase shift average of two frequencies
|
||||
phase_end_pol = phase_start_pol + N*(mWo_0 + mWo_N)/2
|
||||
|
||||
phase_end[i,2*bin_N] = np.cos(phase_end_pol)
|
||||
phase_end[i,2*bin_N+1] = np.sin(phase_end_pol)
|
||||
|
||||
print(Wo_0.shape, Wo_N.shape, phase_start.shape)
|
||||
input = np.column_stack([Wo_0, Wo_N, phase_start])
|
||||
print(input.shape)
|
||||
print(phase_end.shape)
|
||||
|
||||
model = models.Sequential()
|
||||
model.add(layers.Dense(pairs, activation='relu', input_dim=(pairs+2)))
|
||||
model.add(layers.Dense(pairs))
|
||||
model.summary()
|
||||
|
||||
# Compile and fit our model
|
||||
|
||||
from keras import optimizers
|
||||
sgd = optimizers.SGD(lr=0.04, decay=1e-6, momentum=0.9, nesterov=True)
|
||||
model.compile(loss='mse', optimizer=sgd)
|
||||
history = model.fit(input, phase_end, batch_size=nb_batch, epochs=nb_epochs)
|
||||
|
||||
# measure error in rectangular coordinates over all samples
|
||||
|
||||
phase_end_est = model.predict(input)
|
||||
ind = np.nonzero(phase_end)
|
||||
err = (phase_end[ind] - phase_end_est[ind])
|
||||
var = np.var(err)
|
||||
std = np.std(err)
|
||||
print("rect var: %f std: %f" % (var,std))
|
||||
|
||||
print(phase_end_est.shape, err.shape)
|
||||
c1 = phase_end[ind]; c1 = c1[::2] + 1j*c1[1::2]
|
||||
c2 = phase_end_est[ind]; c2 = c2[::2] + 1j*c2[1::2]
|
||||
err_angle = np.angle(c1 * np.conj(c2))
|
||||
|
||||
print(err_angle[:5],err_angle.shape)
|
||||
|
||||
var = np.var(err_angle)
|
||||
std = np.std(err_angle)
|
||||
print("angle var: %4.2f std: %4.2f rads" % (var,std))
|
||||
print("angle var: %4.2f std: %4.2f degs" % (var*180/np.pi,std*180/np.pi))
|
||||
|
||||
def sample_model(r):
|
||||
phase = np.zeros(width, dtype=complex)
|
||||
phase_est = np.zeros(width, dtype=complex)
|
||||
phase_err = np.zeros(width, dtype=complex)
|
||||
for m in range(1,L[r]):
|
||||
wm = m*Wo_N[r]
|
||||
bin = int(np.round(wm*width/np.pi))
|
||||
phase[m] = phase_end[r,2*bin] + 1j*phase_end[r,2*bin+1]
|
||||
phase_est[m] = phase_end_est[r,2*bin] + 1j*phase_end_est[r,2*bin+1]
|
||||
phase_err[m] = phase[m] * np.conj(phase_est[m])
|
||||
return phase, phase_err
|
||||
|
||||
plot_en = 1;
|
||||
if plot_en:
|
||||
plt.figure(1)
|
||||
plt.plot(history.history['loss'])
|
||||
plt.title('model loss')
|
||||
plt.xlabel('epoch')
|
||||
plt.show(block=False)
|
||||
|
||||
plt.figure(2)
|
||||
plt.subplot(211)
|
||||
plt.hist(err_angle*180/np.pi, bins=20)
|
||||
plt.subplot(212)
|
||||
plt.hist(Wo_0*(Fs/2)/np.pi, bins=20)
|
||||
plt.title('phase angle error (deg) and fo (Hz)')
|
||||
plt.show(block=False)
|
||||
|
||||
plt.figure(3)
|
||||
plt.title('sample vectors and error')
|
||||
for r in range(12):
|
||||
plt.subplot(3,4,r+1)
|
||||
phase, phase_err = sample_model(r)
|
||||
plt.plot(np.angle(phase[1:L[r]+1])*180/np.pi,'g')
|
||||
plt.plot(np.angle(phase_err[1:L[r]+1])*180/np.pi,'r')
|
||||
plt.show(block=False)
|
||||
|
||||
# click on last figure to close all and finish
|
||||
plt.waitforbuttonpress(0)
|
||||
plt.close()
|
|
@ -0,0 +1,177 @@
|
|||
#!/usr/bin/python3
|
||||
# phasenn_test7.py
|
||||
#
|
||||
# David Rowe Oct 2019
|
||||
|
||||
# Extending PhaseNN model to have a linear and dispersive phase
|
||||
# component. The dispersive component is from a simple 2nd order IIR
|
||||
# filter.
|
||||
|
||||
# TODO: [ ] remove df0 support
|
||||
# [ ] remove Wo_0, Wo_N
|
||||
# [ ] replace random starting phases with phase realted to t0
|
||||
|
||||
import numpy as np
|
||||
import sys
|
||||
from keras.layers import Dense
|
||||
from keras import models,layers
|
||||
from keras import initializers
|
||||
import matplotlib.pyplot as plt
|
||||
from scipy import signal
|
||||
|
||||
# constants
|
||||
|
||||
N = 80 # number of time domain samples in frame
|
||||
nb_samples = 100000
|
||||
nb_batch = 32
|
||||
nb_epochs = 25
|
||||
width = 256
|
||||
pairs = 2*width
|
||||
fo_min = 50
|
||||
fo_max = 400
|
||||
Fs = 8000
|
||||
dfo = 0.00 # dfo models aren't solved yet
|
||||
|
||||
# Generate training data. Given the phase at the start of the frame,
|
||||
# and the frequency, determine the phase at the end of the frame
|
||||
|
||||
# phase encoded as cos,sin pairs ref:
|
||||
phase_start = np.zeros((nb_samples, pairs))
|
||||
phase_end = np.zeros((nb_samples, pairs))
|
||||
filter_amp = np.zeros((nb_samples, width))
|
||||
filter_phase = np.zeros((nb_samples, width))
|
||||
Wo_N = np.zeros(nb_samples)
|
||||
Wo_0 = np.zeros(nb_samples)
|
||||
L = np.zeros(nb_samples, dtype=int)
|
||||
|
||||
for i in range(nb_samples):
|
||||
|
||||
# parameters at time 0 (start of current frame)
|
||||
# distribute fo randomnly on a log scale
|
||||
r = np.random.rand(1)
|
||||
log_fo_0 = np.log10(fo_min) + (np.log10(fo_max)-np.log10(fo_min))*r[0]
|
||||
fo_0 = 10 ** log_fo_0
|
||||
Wo_0[i] = fo_0*2*np.pi/Fs
|
||||
L_0 = int(np.floor(np.pi/Wo_0[i]))
|
||||
|
||||
# parameters at time N (end of current frame), allow a df0 freq change
|
||||
# across frame, typical of voiced speech
|
||||
r = np.random.rand(1)
|
||||
fo_N = fo_0 + (-2*dfo + dfo*r[0])*fo_0
|
||||
fo_N = np.max((fo_min, fo_N))
|
||||
fo_N = np.min((fo_max, fo_N))
|
||||
#fo_N = fo_0
|
||||
Wo_N[i] = fo_N*2*np.pi/Fs
|
||||
L_N = int(np.floor(np.pi/Wo_N[i]))
|
||||
L[i] = np.min((L_0, L_N))
|
||||
#print("fo: %f %f L: %d %d min: %d" % (fo_0, fo_N, L_0, L_N, L[i]))
|
||||
|
||||
# sample 2nd order IIR filter with random peak freq and amplitude
|
||||
|
||||
r = np.random.rand(2)
|
||||
alpha = 0.1*np.pi + 0.8*np.pi*r[0]
|
||||
gamma = 0.8
|
||||
#alpha = np.pi/4; gamma = 0.8
|
||||
w,h = signal.freqz(1, [1, -2*gamma*np.cos(alpha), gamma*gamma], range(1,L[i])*Wo_0[i])
|
||||
|
||||
for m in range(1,L[i]):
|
||||
bin_0 = int(np.round(m*Wo_0[i]*width/np.pi))
|
||||
mWo_0 = bin_0*np.pi/width
|
||||
bin_N = int(np.round(m*Wo_N[i]*width/np.pi))
|
||||
mWo_N = bin_N*np.pi/width
|
||||
#print("m: %d bin_0: %d bin_N: %d" % (m, bin_0,bin_N))
|
||||
|
||||
r = np.random.rand(1)
|
||||
phase_start_pol = -np.pi + r[0]*2*np.pi
|
||||
phase_start[i,2*bin_0] = np.cos(phase_start_pol)
|
||||
phase_start[i,2*bin_0+1] = np.sin(phase_start_pol)
|
||||
|
||||
# phase shift average of two frequencies
|
||||
phase_end_pol = phase_start_pol + N*(mWo_0 + mWo_N)/2 + np.angle(h[m-1])
|
||||
|
||||
phase_end[i,2*bin_N] = np.cos(phase_end_pol)
|
||||
phase_end[i,2*bin_N+1] = np.sin(phase_end_pol)
|
||||
|
||||
filter_amp[i,bin_0] = np.log10(abs(h[m-1]))
|
||||
filter_phase[i,bin_0] = np.angle(h[m-1])
|
||||
|
||||
input = np.column_stack([filter_amp, phase_start])
|
||||
|
||||
model = models.Sequential()
|
||||
model.add(layers.Dense(pairs, activation='relu', input_dim=(width+pairs)))
|
||||
model.add(layers.Dense(pairs))
|
||||
model.summary()
|
||||
|
||||
# Compile and fit our model
|
||||
|
||||
from keras import optimizers
|
||||
sgd = optimizers.SGD(lr=0.04, decay=1e-6, momentum=0.9, nesterov=True)
|
||||
model.compile(loss='mse', optimizer=sgd)
|
||||
history = model.fit(input, phase_end, batch_size=nb_batch, epochs=nb_epochs)
|
||||
|
||||
# measure error in rectangular coordinates over all samples
|
||||
|
||||
phase_end_est = model.predict(input)
|
||||
ind = np.nonzero(phase_end)
|
||||
err = (phase_end[ind] - phase_end_est[ind])
|
||||
var = np.var(err)
|
||||
std = np.std(err)
|
||||
print("rect var: %f std: %f" % (var,std))
|
||||
|
||||
print(phase_end_est.shape, err.shape)
|
||||
c1 = phase_end[ind]; c1 = c1[::2] + 1j*c1[1::2]
|
||||
c2 = phase_end_est[ind]; c2 = c2[::2] + 1j*c2[1::2]
|
||||
err_angle = np.angle(c1 * np.conj(c2))
|
||||
|
||||
print(err_angle[:5],err_angle.shape)
|
||||
|
||||
var = np.var(err_angle)
|
||||
std = np.std(err_angle)
|
||||
print("angle var: %4.2f std: %4.2f rads" % (var,std))
|
||||
print("angle var: %4.2f std: %4.2f degs" % (var*180/np.pi,std*180/np.pi))
|
||||
|
||||
def sample_model(r):
|
||||
phase = np.zeros(width, dtype=complex)
|
||||
phase_est = np.zeros(width, dtype=complex)
|
||||
phase_err = np.zeros(width, dtype=complex)
|
||||
phase_filt = np.zeros(width)
|
||||
|
||||
for m in range(1,L[r]):
|
||||
wm = m*Wo_N[r]
|
||||
bin = int(np.round(wm*width/np.pi))
|
||||
phase[m] = phase_end[r,2*bin] + 1j*phase_end[r,2*bin+1]
|
||||
phase_est[m] = phase_end_est[r,2*bin] + 1j*phase_end_est[r,2*bin+1]
|
||||
phase_err[m] = phase[m] * np.conj(phase_est[m])
|
||||
phase_filt[m] = filter_phase[r,bin]
|
||||
return phase, phase_err, phase_filt
|
||||
|
||||
plot_en = 1;
|
||||
if plot_en:
|
||||
plt.figure(1)
|
||||
plt.plot(history.history['loss'])
|
||||
plt.title('model loss')
|
||||
plt.xlabel('epoch')
|
||||
plt.show(block=False)
|
||||
|
||||
plt.figure(2)
|
||||
plt.subplot(211)
|
||||
plt.hist(err_angle*180/np.pi, bins=20)
|
||||
plt.subplot(212)
|
||||
plt.hist(Wo_0*(Fs/2)/np.pi, bins=20)
|
||||
plt.title('phase angle error (deg) and fo (Hz)')
|
||||
plt.show(block=False)
|
||||
|
||||
plt.figure(3)
|
||||
plt.title('sample vectors and error')
|
||||
for r in range(12):
|
||||
plt.subplot(3,4,r+1)
|
||||
phase, phase_err, phase_filt = sample_model(r)
|
||||
plt.plot(np.angle(phase[1:L[r]+1])*180/np.pi,'g')
|
||||
plt.plot(np.angle(phase_err[1:L[r]+1])*180/np.pi,'r')
|
||||
plt.plot(phase_filt[1:L[r]+1]*180/np.pi,'b')
|
||||
plt.ylim(-180,180)
|
||||
plt.show(block=False)
|
||||
|
||||
# click on last figure to close all and finish
|
||||
plt.waitforbuttonpress(0)
|
||||
plt.close()
|
|
@ -0,0 +1,117 @@
|
|||
#!/usr/bin/python3
|
||||
# phasenn_test7.py
|
||||
#
|
||||
# David Rowe Oct 2019
|
||||
|
||||
# Keras model for testing phase modelling using NNs. Here we try
|
||||
# estimating the phase of a 2nd order system from it's amplitudes.
|
||||
# This models the dispersive component of speech phase spectra, up
|
||||
# until now we have been testing with the linear phase component.
|
||||
# This script emulates a Hilbert Transform, note however in practice
|
||||
# speech is not minimum phase so HTs have there limitations for real
|
||||
# speech signals.
|
||||
|
||||
import numpy as np
|
||||
from scipy import signal
|
||||
import sys
|
||||
from keras.layers import Dense
|
||||
from keras import models,layers
|
||||
from keras import initializers
|
||||
import keras.backend as K
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
# constants
|
||||
|
||||
N = 80 # number of time domain samples in frame
|
||||
nb_samples = 10000
|
||||
nb_batch = 64
|
||||
nb_epochs = 10
|
||||
width = 256
|
||||
pairs = 2*width
|
||||
fo_min = 50
|
||||
fo_max = 400
|
||||
Fs = 8000
|
||||
|
||||
# Generate training data. Sparse log magnitude spectrum is input,
|
||||
# phase spectrum of 2nd order system the output/target
|
||||
|
||||
mag = np.zeros((nb_samples, width))
|
||||
phase = np.zeros((nb_samples, pairs))
|
||||
|
||||
for i in range(nb_samples):
|
||||
|
||||
# choose a random fo
|
||||
r = np.random.rand(1)
|
||||
fo = fo_min + (fo_max-fo_min)*r[0]
|
||||
Wo = fo*2*np.pi/Fs
|
||||
L = int(np.floor(np.pi/Wo))
|
||||
|
||||
# sample 2nd order IIR filter with random peak freq and amplitude
|
||||
|
||||
r = np.random.rand(2)
|
||||
alpha = 0.1*np.pi + 0.8*np.pi*r[0]
|
||||
gamma = r[1]
|
||||
w,h = signal.freqz(1, [1, -2*gamma*np.cos(alpha), gamma*gamma], range(1,L)*Wo)
|
||||
|
||||
# map to sparse input and output arrays
|
||||
|
||||
for m in range(1,L):
|
||||
bin = int(np.floor(m*Wo*width/np.pi))
|
||||
mag[i,bin] = np.log10(np.abs(h[m-1]))
|
||||
phase_rect = h[m-1]/np.abs(h[m-1])
|
||||
phase[i,2*bin] = phase_rect.real
|
||||
phase[i,2*bin+1] = phase_rect.imag
|
||||
|
||||
print(mag.shape)
|
||||
print(phase.shape)
|
||||
|
||||
model = models.Sequential()
|
||||
model.add(layers.Dense(pairs, activation='relu', input_dim=width))
|
||||
model.add(layers.Dense(pairs, input_dim=pairs))
|
||||
model.summary()
|
||||
|
||||
# Compile and fit our model
|
||||
|
||||
from keras import optimizers
|
||||
sgd = optimizers.SGD(lr=0.04, decay=1e-6, momentum=0.9, nesterov=True)
|
||||
model.compile(loss='mse', optimizer=sgd)
|
||||
history = model.fit(mag, phase, batch_size=nb_batch, epochs=nb_epochs)
|
||||
|
||||
# measure error in rectangular coordinates over all samples
|
||||
|
||||
phase_est = model.predict(mag)
|
||||
err = (phase_est - phase)
|
||||
var = np.var(err)
|
||||
std = np.std(err)
|
||||
print("rect var: %f std: %f" % (var,std))
|
||||
|
||||
err_angle = np.arctan2(err[:,1], 1)
|
||||
print(err_angle.shape)
|
||||
var = np.var(err_angle)
|
||||
std = np.std(err_angle)
|
||||
print("angle var: %4.2f std: %4.2f rads" % (var,std))
|
||||
print("angle var: %4.2f std: %4.2f degs" % (var*180/np.pi,std*180/np.pi))
|
||||
|
||||
plot_en = 1;
|
||||
if plot_en:
|
||||
plt.figure(1)
|
||||
plt.plot(history.history['loss'])
|
||||
plt.title('model loss')
|
||||
plt.xlabel('epoch')
|
||||
|
||||
plt.figure(2)
|
||||
plt.hist(err_angle*180/np.pi, bins=20)
|
||||
plt.title('phase angle error (deg)')
|
||||
|
||||
fig = plt.figure(3)
|
||||
ax1 = fig.add_subplot(111)
|
||||
plt.plot(20*mag[1,:])
|
||||
ax2 = ax1.twinx()
|
||||
phase = np.unwrap(np.arctan2(phase[1::2], phase[::2]))
|
||||
plt.plot(phase, 'g')
|
||||
phase_est = np.unwrap(np.arctan2(phase_est[1::2], phase_est[::2]))
|
||||
plt.plot(phase, 'r')
|
||||
|
||||
plt.show()
|
||||
|
|
@ -0,0 +1,161 @@
|
|||
#!/usr/bin/python3
|
||||
# phasenn_test8.py
|
||||
#
|
||||
# David Rowe Oct 2019
|
||||
|
||||
# Estimate phase spectra from amplitude spectra for a 2nd order IIR
|
||||
# filter, just like a Hilbert Transform.
|
||||
|
||||
import numpy as np
|
||||
import sys
|
||||
from keras.layers import Dense
|
||||
from keras import models,layers
|
||||
from keras import initializers
|
||||
import matplotlib.pyplot as plt
|
||||
from scipy import signal
|
||||
from keras import backend as K
|
||||
|
||||
# custom loss function
|
||||
def sparse_loss(y_true, y_pred):
|
||||
mask = K.cast( K.not_equal(y_pred, 0), dtype='float32')
|
||||
n = K.sum(mask)
|
||||
return K.sum(K.square((y_pred - y_true)*mask))/n
|
||||
|
||||
# testing custom loss function
|
||||
x = layers.Input(shape=(None,))
|
||||
y = layers.Input(shape=(None,))
|
||||
loss_func = K.Function([x, y], [sparse_loss(x, y)])
|
||||
assert loss_func([[[1,1,1]], [[0,2,0]]]) == np.array([1])
|
||||
assert loss_func([[[0,1,0]], [[0,2,0]]]) == np.array([1])
|
||||
|
||||
# constants
|
||||
|
||||
N = 80 # number of time domain samples in frame
|
||||
nb_samples = 400000
|
||||
nb_batch = 32
|
||||
nb_epochs = 100
|
||||
width = 256
|
||||
pairs = 2*width
|
||||
fo_min = 50
|
||||
fo_max = 400
|
||||
Fs = 8000
|
||||
|
||||
# Generate training data.
|
||||
|
||||
filter_amp = np.zeros((nb_samples, width))
|
||||
# phase as an angle
|
||||
filter_phase = np.zeros((nb_samples, width))
|
||||
# phase encoded as cos,sin pairs:
|
||||
filter_phase_rect = np.zeros((nb_samples, pairs))
|
||||
Wo = np.zeros(nb_samples)
|
||||
L = np.zeros(nb_samples, dtype=int)
|
||||
|
||||
for i in range(nb_samples):
|
||||
|
||||
# distribute fo randomly on a log scale, gives us more training
|
||||
# data with low freq frames which have more harmonics and are
|
||||
# harder to match
|
||||
r = np.random.rand(1)
|
||||
log_fo = np.log10(fo_min) + (np.log10(fo_max)-np.log10(fo_min))*r[0]
|
||||
fo = 10 ** log_fo
|
||||
Wo[i] = fo*2*np.pi/Fs
|
||||
L[i] = int(np.floor(np.pi/Wo[i]))
|
||||
|
||||
# sample 2nd order IIR filter with random peak freq
|
||||
|
||||
r = np.random.rand(2)
|
||||
alpha = 0.1*np.pi + 0.8*np.pi*r[0]
|
||||
gamma = r[1]
|
||||
w,h = signal.freqz(1, [1, -2*gamma*np.cos(alpha), gamma*gamma], range(1,L[i])*Wo[i])
|
||||
|
||||
for m in range(1,L[i]):
|
||||
bin = int(np.round(m*Wo[i]*width/np.pi))
|
||||
mWo = bin*np.pi/width
|
||||
|
||||
filter_amp[i,bin] = np.log10(abs(h[m-1]))
|
||||
filter_phase[i,bin] = np.angle(h[m-1])
|
||||
filter_phase_rect[i,2*bin] = np.cos(filter_phase[i,bin])
|
||||
filter_phase_rect[i,2*bin+1] = np.sin(filter_phase[i,bin])
|
||||
|
||||
model = models.Sequential()
|
||||
model.add(layers.Dense(pairs, activation='relu', input_dim=width))
|
||||
model.add(layers.Dense(4*pairs, activation='relu'))
|
||||
model.add(layers.Dense(pairs))
|
||||
model.summary()
|
||||
|
||||
from keras import optimizers
|
||||
sgd = optimizers.SGD(lr=0.08, decay=1e-6, momentum=0.9, nesterov=True)
|
||||
model.compile(loss=sparse_loss, optimizer=sgd)
|
||||
history = model.fit(filter_amp, filter_phase_rect, batch_size=nb_batch, epochs=nb_epochs)
|
||||
|
||||
# measure error in rectangular coordinates over all samples
|
||||
|
||||
filter_phase_rect_est = model.predict(filter_amp)
|
||||
ind = np.nonzero(filter_phase_rect)
|
||||
err = (filter_phase_rect[ind] - filter_phase_rect_est[ind])
|
||||
var = np.var(err)
|
||||
std = np.std(err)
|
||||
print("rect var: %f std: %f" % (var,std))
|
||||
|
||||
c1 = filter_phase_rect[ind]; c1 = c1[::2] + 1j*c1[1::2]
|
||||
c2 = filter_phase_rect_est[ind]; c2 = c2[::2] + 1j*c2[1::2]
|
||||
err_angle = np.angle(c1 * np.conj(c2))
|
||||
|
||||
var = np.var(err_angle)
|
||||
std = np.std(err_angle)
|
||||
print("angle var: %4.2f std: %4.2f rads" % (var,std))
|
||||
print("angle var: %4.2f std: %4.2f degs" % (var*180/np.pi,std*180/np.pi))
|
||||
|
||||
def sample_model(r):
|
||||
phase = np.zeros(width, dtype=complex)
|
||||
phase_est = np.zeros(width, dtype=complex)
|
||||
phase_err = np.zeros(width, dtype=complex)
|
||||
phase_filt = np.zeros(width)
|
||||
amp_filt = np.zeros(width)
|
||||
|
||||
for m in range(1,L[r]):
|
||||
wm = m*Wo[r]
|
||||
bin = int(np.round(wm*width/np.pi))
|
||||
phase[m] = filter_phase_rect[r,2*bin] + 1j*filter_phase_rect[r,2*bin+1]
|
||||
phase_est[m] = filter_phase_rect_est[r,2*bin] + 1j*filter_phase_rect_est[r,2*bin+1]
|
||||
phase_err[m] = phase[m] * np.conj(phase_est[m])
|
||||
amp_filt[m] = filter_amp[r,bin]
|
||||
return phase, phase_err, amp_filt
|
||||
|
||||
plot_en = 1;
|
||||
if plot_en:
|
||||
plt.figure(1)
|
||||
plt.plot(history.history['loss'])
|
||||
plt.title('model loss')
|
||||
plt.xlabel('epoch')
|
||||
plt.show(block=False)
|
||||
|
||||
plt.figure(2)
|
||||
plt.subplot(211)
|
||||
plt.hist(err_angle*180/np.pi, bins=20)
|
||||
plt.subplot(212)
|
||||
plt.hist(Wo*(Fs/2)/np.pi, bins=20)
|
||||
plt.title('phase angle error (deg) and fo (Hz)')
|
||||
plt.show(block=False)
|
||||
|
||||
plt.figure(3)
|
||||
plt.title('sample vectors and error')
|
||||
for r in range(12):
|
||||
plt.subplot(3,4,r+1)
|
||||
phase, phase_err, amp_filt = sample_model(r)
|
||||
plt.plot(np.angle(phase[1:L[r]])*180/np.pi,'g')
|
||||
plt.plot(np.angle(phase_err[1:L[r]])*180/np.pi,'r')
|
||||
plt.ylim(-180,180)
|
||||
plt.show(block=False)
|
||||
|
||||
plt.figure(4)
|
||||
plt.title('filter amplitudes')
|
||||
for r in range(12):
|
||||
plt.subplot(3,4,r+1)
|
||||
phase, phase_err, amp_filt = sample_model(r)
|
||||
plt.plot(amp_filt[1:L[r]],'g')
|
||||
plt.show(block=False)
|
||||
|
||||
# click on last figure to close all and finish
|
||||
plt.waitforbuttonpress(0)
|
||||
plt.close()
|
|
@ -0,0 +1,166 @@
|
|||
#!/usr/bin/python3
|
||||
# phasenn_test9.py
|
||||
#
|
||||
# David Rowe Nov 2019
|
||||
|
||||
# Estimate an impulse position from the phase spectra of a 2nd order system excited by an impulse
|
||||
#
|
||||
# periodic impulse train Wo at time offset n0 -> 2nd order system -> discrete phase specta -> NN -> n0
|
||||
|
||||
# TODO:
|
||||
# [ ] sanity check n0 phases
|
||||
|
||||
import numpy as np
|
||||
import sys
|
||||
from keras.layers import Dense
|
||||
from keras import models,layers
|
||||
from keras import initializers
|
||||
import matplotlib.pyplot as plt
|
||||
from scipy import signal
|
||||
from keras import backend as K
|
||||
|
||||
# constants
|
||||
|
||||
N = 80 # number of time domain samples in frame
|
||||
nb_samples = 10000
|
||||
nb_batch = 32
|
||||
nb_epochs = 10
|
||||
width = 256
|
||||
pairs = 2*width
|
||||
fo_min = 50
|
||||
fo_max = 400
|
||||
Fs = 8000
|
||||
|
||||
# Generate training data
|
||||
|
||||
amp = np.zeros((nb_samples, width))
|
||||
# phase as an angle
|
||||
phase = np.zeros((nb_samples, width))
|
||||
# phase encoded as cos,sin pairs:
|
||||
phase_rect = np.zeros((nb_samples, pairs))
|
||||
Wo = np.zeros(nb_samples)
|
||||
L = np.zeros(nb_samples, dtype=int)
|
||||
n0 = np.zeros(nb_samples, dtype=int)
|
||||
target = np.zeros((nb_samples,1))
|
||||
|
||||
for i in range(nb_samples):
|
||||
|
||||
# distribute fo randomly on a log scale, gives us more training
|
||||
# data with low freq frames which have more harmonics and are
|
||||
# harder to match
|
||||
r = np.random.rand(1)
|
||||
log_fo = np.log10(fo_min) + (np.log10(fo_max)-np.log10(fo_min))*r[0]
|
||||
fo = 10 ** log_fo
|
||||
Wo[i] = fo*2*np.pi/Fs
|
||||
L[i] = int(np.floor(np.pi/Wo[i]))
|
||||
# pitch period in samples
|
||||
P = 2*L[i]
|
||||
|
||||
r = np.random.rand(3)
|
||||
|
||||
# sample 2nd order IIR filter with random peak freq (alpha) and peak amplitude (gamma)
|
||||
alpha = 0.1*np.pi + 0.4*np.pi*r[0]
|
||||
gamma = 0.9 + 0.09*r[1]
|
||||
w,h = signal.freqz(1, [1, -2*gamma*np.cos(alpha), gamma*gamma], range(1,L[i])*Wo[i])
|
||||
|
||||
# select n0 between 0...P-1 (it's periodic)
|
||||
n0[i] = r[2]*P
|
||||
e = np.exp(-1j*n0[i]*range(1,L[i])*Wo[i])
|
||||
|
||||
for m in range(1,L[i]):
|
||||
bin = int(np.round(m*Wo[i]*width/np.pi))
|
||||
mWo = bin*np.pi/width
|
||||
|
||||
amp[i,bin] = np.log10(abs(h[m-1]))
|
||||
phase[i,bin] = np.angle(h[m-1]*e[m-1])
|
||||
phase_rect[i,2*bin] = np.cos(phase[i,bin])
|
||||
phase_rect[i,2*bin+1] = np.sin(phase[i,bin])
|
||||
|
||||
# target is n0 in rec coords
|
||||
target[i] = n0[i]/P
|
||||
|
||||
model = models.Sequential()
|
||||
model.add(layers.Dense(pairs, activation='relu', input_dim=pairs))
|
||||
model.add(layers.Dense(128, activation='relu'))
|
||||
model.add(layers.Dense(1))
|
||||
model.summary()
|
||||
|
||||
from keras import optimizers
|
||||
sgd = optimizers.SGD(lr=0.08, decay=1e-6, momentum=0.9, nesterov=True)
|
||||
model.compile(loss="mse", optimizer=sgd)
|
||||
history = model.fit(phase_rect, target, batch_size=nb_batch, epochs=nb_epochs)
|
||||
|
||||
# measure error in rectangular coordinates over all samples
|
||||
|
||||
target_est = model.predict(phase_rect)
|
||||
err = target - target_est
|
||||
var = np.var(err)
|
||||
std = np.std(err)
|
||||
print("var: %f std: %f" % (var,std))
|
||||
|
||||
def sample_freq(r):
|
||||
phase_L = np.zeros(L[r], dtype=complex)
|
||||
amp_L = np.zeros(L[r])
|
||||
|
||||
for m in range(1,L[r]):
|
||||
wm = m*Wo[r]
|
||||
bin = int(np.round(wm*width/np.pi))
|
||||
phase_L[m] = phase_rect[r,2*bin] + 1j*phase_rect[r,2*bin+1]
|
||||
amp_L[m] = amp[r,bin]
|
||||
return phase_L, amp_L
|
||||
|
||||
# synthesise time domain signal
|
||||
def sample_time(r):
|
||||
s = np.zeros(2*N);
|
||||
|
||||
for m in range(1,L[r]):
|
||||
wm = m*Wo[r]
|
||||
bin = int(np.round(wm*width/np.pi))
|
||||
Am = 10 ** amp[r,bin]
|
||||
phi_m = np.angle(phase_rect[r,2*bin] + 1j*phase_rect[r,2*bin+1])
|
||||
s = s + Am*np.cos(wm*(range(2*N)) + phi_m)
|
||||
return s
|
||||
|
||||
plot_en = 1;
|
||||
if plot_en:
|
||||
plt.figure(1)
|
||||
plt.plot(history.history['loss'])
|
||||
plt.title('model loss')
|
||||
plt.xlabel('epoch')
|
||||
plt.show(block=False)
|
||||
|
||||
plt.figure(2)
|
||||
plt.hist(err, bins=20)
|
||||
plt.show(block=False)
|
||||
|
||||
plt.figure(3)
|
||||
plt.plot(target[:12],'b')
|
||||
plt.plot(target_est[:12],'g')
|
||||
plt.show(block=False)
|
||||
|
||||
plt.figure(4)
|
||||
plt.title('Freq Domain')
|
||||
for r in range(12):
|
||||
plt.subplot(3,4,r+1)
|
||||
phase_L, amp_L = sample_freq(r)
|
||||
plt.plot(20*amp_L,'g')
|
||||
plt.ylim(-20,20)
|
||||
plt.show(block=False)
|
||||
|
||||
plt.figure(5)
|
||||
plt.title('Time Domain')
|
||||
for r in range(12):
|
||||
plt.subplot(3,4,r+1)
|
||||
s = sample_time(r)
|
||||
P = 2*L[r]
|
||||
n0_ = target_est[r]*P
|
||||
print("F0: %5.1f P: %3d L: %3d n0: %3d n0_est: %5.1f" % (Wo[r]*(Fs/2)/np.pi, P, L[r], n0[r], n0_))
|
||||
plt.plot(s,'g')
|
||||
plt.plot([n0[r],n0[r]], [-25,25],'r')
|
||||
plt.plot([n0_,n0_], [-25,25],'b')
|
||||
plt.ylim(-50,50)
|
||||
plt.show(block=False)
|
||||
|
||||
# click on last figure to close all and finish
|
||||
plt.waitforbuttonpress(0)
|
||||
plt.close()
|
|
@ -0,0 +1,114 @@
|
|||
#!/usr/bin/python3
|
||||
# phasenn_train.py
|
||||
#
|
||||
# David Rowe August 2019
|
||||
#
|
||||
# Keras model for estimating the phase of sinusoidally modelled speech
|
||||
|
||||
# To generate features:
|
||||
# $ ./c2sim ~/Downloads/all_speech_8k.sw --dumpphase_nnl train.f32
|
||||
|
||||
import numpy as np
|
||||
import sys
|
||||
from keras.layers import Dense
|
||||
from keras import models,layers
|
||||
from keras import initializers
|
||||
|
||||
if len(sys.argv) < 2:
|
||||
print("usage: phasenn_train.py train.f32")
|
||||
sys.exit(0)
|
||||
|
||||
# constants
|
||||
|
||||
max_amp = 80 # sparsevector covering 0 ... Fs/2
|
||||
nb_features = 243 # number of sparse features/row in input training data file
|
||||
nb_epochs = 10
|
||||
|
||||
# load training data
|
||||
|
||||
feature_file = sys.argv[1]
|
||||
features = np.fromfile(feature_file, dtype='float32')
|
||||
nb_frames = int(len(features)/nb_features)
|
||||
print("nb_frames: %d" % (nb_frames))
|
||||
|
||||
# 0..80 log10(A)
|
||||
# 81..161 cos(phi)
|
||||
# 162..242 sin(phi)
|
||||
|
||||
features = np.reshape(features, (nb_frames, nb_features))
|
||||
print("features shape:")
|
||||
print(features.shape)
|
||||
|
||||
# So the idea is we can predict the next frames phases from the
|
||||
# current frame, and the magnitude spectrum. For voiced speech, the
|
||||
# sinusoids are continuous, so can be predicted from frame to frame if
|
||||
# you know the frequency and previous phase. We encode the frequency
|
||||
# as the position in the sprase vector.
|
||||
|
||||
# Cascased with that is phase spectra due to dispersion of the phase
|
||||
# response of the vocal tract filter, e.g. a large dispersion around
|
||||
# resonances. We supply the magnitude spectra to help model the vocal
|
||||
# tract filter phase.
|
||||
|
||||
# Unvoiced speech has more random phase. Hopefully the NN can work
|
||||
# out if the speech is voiced or unvoiced from the magnitide spectra.
|
||||
|
||||
# The phase is encoded using cos and sin of the phase, as these are
|
||||
# bounded by +/-1
|
||||
|
||||
# So input features are this frame's log(A), and last frames phase.
|
||||
# The output features we are trying to model are this frames phase.
|
||||
|
||||
train = np.concatenate( (features[1:,:max_amp+1], features[:-1,max_amp+1:]) )
|
||||
target = features([1:,max_amp+1:])
|
||||
|
||||
model = models.Sequential()
|
||||
model.add(layers.Dense(256, activation='relu', input_dim=nb_input))
|
||||
model.add(layers.Dense(256, activation='relu'))
|
||||
model.add(layers.Dense(256, activation='relu'))
|
||||
model.add(layers.Dense(nb_ouput, activation='linear'))
|
||||
|
||||
# Custom loss function that measures difference in phase just at
|
||||
# non-zero elements of target (ytrue). This could be extended to
|
||||
# weight each phase error by the (log) Amplitude of each harmonic
|
||||
|
||||
import keras.backend as K
|
||||
def customLoss(yTrue, yPred):
|
||||
# generate a mask vector with 1's on non zero values of yTrue
|
||||
mask = abs(K.sign(yTrue))
|
||||
# collect error in cos() and sin() terms, ignoring yPred values outside of
|
||||
# harmonics we care about
|
||||
error = yTrue - mask * yPred
|
||||
return K.sum(error * error)
|
||||
|
||||
# Compile our model
|
||||
|
||||
from keras import optimizers
|
||||
model.compile(loss=customLoss, optimizer='sge')
|
||||
|
||||
# fit model, using 20% of our data for validation
|
||||
|
||||
history = model.fit(train, target, validation_split=0.2, batch_size=32, epochs=nb_epochs)
|
||||
model.save("phasenn_model.h5")
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
plot_en = 0;
|
||||
if plot_en:
|
||||
plt.figure(1)
|
||||
plt.plot(10*np.sqrt(history.history['loss']))
|
||||
plt.plot(10*np.sqrt(history.history['val_loss']))
|
||||
plt.title('model loss')
|
||||
plt.ylabel('rms error (rad)')
|
||||
plt.xlabel('epoch')
|
||||
plt.legend(['train', 'valid'], loc='upper right')
|
||||
plt.show()
|
||||
|
||||
# run model on training data and measure variance, should be similar to training "loss"
|
||||
|
||||
train_out = model.predict(train)
|
||||
err = (train_out - target)
|
||||
var = np.var(err)
|
||||
std = np.std(err)
|
||||
print("var: %f std: %f" % (var,std))
|
||||
|
Loading…
Reference in New Issue