moved out of codec2 repo

master
David 2019-11-17 12:05:58 +10:30
commit 99da10f0d4
11 changed files with 1417 additions and 0 deletions

88
phasenn_test1.py 100755
View File

@ -0,0 +1,88 @@
#!/usr/bin/python3
# phasenn_test1.py
#
# David Rowe August 2019
# Keras model for testing phase modelling using NNs. Self contained, generates
# it's own training data. Can model linear phase at freq up to pi/4
import numpy as np
import sys
from keras.layers import Dense
from keras import models,layers
from keras import initializers
import matplotlib.pyplot as plt
# constants
N = 80 # number of samples in frames
nb_samples = 1000000
nb_batch = 512
nb_epochs = 400
np_input = 3
nb_output = 2
# Generate training data. Given the phase at the start of the frame,
# and the frequency, determine the phase at the end of the frame
def quant(val, levels):
return np.round(val*levels)/levels
w = np.zeros(nb_samples)
phase_start = np.zeros(nb_samples)
phase_end = np.zeros(nb_samples)
for i in range(nb_samples):
x=np.random.rand(2)
w[i] = x[0]*np.pi/4
phase_start[i] = -np.pi + x[1]*2*np.pi
phase_end[i] = phase_start[i] + N*w[i];
train = np.column_stack((w, np.cos(phase_start), np.sin(phase_start)))
target = np.column_stack((np.cos(phase_end), np.sin(phase_end)))
print(train.shape)
print(target.shape)
model = models.Sequential()
model.add(layers.Dense(256, activation='relu', input_dim=np_input))
model.add(layers.Dense(256, activation='relu', input_dim=np_input))
model.add(layers.Dense(nb_output, activation='linear'))
model.summary()
# Compile our model
from keras import optimizers
model.compile(loss='mse', optimizer='adam')
# fit model, using 20% of our data for validation
history = model.fit(train, target, validation_split=0.2, batch_size=nb_batch, epochs=nb_epochs)
# test actual error in angle over training data
train_out = model.predict(train)
err = (train_out - target)
var = np.var(err)
std = np.std(err)
print("rect coord var: %f std: %f" % (var,std))
err_angle = np.arctan2(np.sin(err[:,0]),np.cos(err[:,1]))
var = np.var(err_angle)
std = np.std(err_angle)
print("angle var: %f std: %f" % (var,std))
plot_en = 1;
if plot_en:
plt.figure(1)
plt.plot(10*np.log10(history.history['loss']))
plt.plot(10*np.log10(history.history['val_loss']))
plt.title('model loss (dB)')
plt.xlabel('epoch')
plt.legend(['train', 'valid'], loc='upper right')
plt.figure(2)
plt.hist(err_angle, bins=20)
plt.show()
#plt.figure(2)
#plt.hist(target)
#plt.show()

90
phasenn_test2.py 100755
View File

@ -0,0 +1,90 @@
#!/usr/bin/python3
# phasenn_test2.py
#
# David Rowe Oct 2019
# Keras model for testing phase modelling using NNs. A simple four weight
# linear model to map a constant phase shift, arriving at the same model as
# linear algera. Good sanity check
# | cos(end) | = | cos(Nw) -sin(Nw) | | cos(start) |
# | sin(end) | | sin(Nw) cos(Nw) | | sin(start) |
import numpy as np
import sys
from keras.layers import Dense
from keras import models,layers
from keras import initializers
import keras.backend as K
import matplotlib.pyplot as plt
# constants
N = 80 # number of time domain samples in frame
nb_samples = 1000
nb_batch = 1
nb_epochs = 3
width = 1
# Generate training data. Given the phase at the start of the frame,
# and the frequency, determine the phase at the end of the frame
# phase encoded as cos,sin pairs
phase_start = np.zeros((nb_samples, 2))
phase_end = np.zeros((nb_samples, 2))
# a shift of pi/4 across the frame of N samples
w = np.pi/(4*N);
for i in range(nb_samples):
r = np.random.rand(1)
phase_start_pol = -np.pi + r[0]*2*np.pi
phase_start[i,0] = np.cos(phase_start_pol)
phase_start[i,1] = np.sin(phase_start_pol)
phase_end_pol = phase_start_pol + N*w
phase_end[i,0] = np.cos(phase_end_pol)
phase_end[i,1] = np.sin(phase_end_pol)
#print(phase_start.shape)
#print(phase_end.shape)
#print(phase_start[:10])
model = models.Sequential()
model.add(layers.Dense(2, bias=False, input_dim=2))
model.summary()
# Compile and fit our model
from keras import optimizers
model.compile(loss='mse', optimizer='sgd')
history = model.fit(phase_start, phase_end, batch_size=nb_batch, epochs=nb_epochs)
print(model.get_weights())
# measure error over all samples
phase_end_est = model.predict(phase_start)
err = (phase_end_est - phase_end)
var = np.var(err)
std = np.std(err)
print("rect var: %f std: %f" % (var,std))
#print(err[:5,:])
err_angle = np.arctan2(phase_end_est[:,1], phase_end_est[:,0]) - np.arctan2(phase_end[:,1], phase_end[:,0])
#print(err[:5,:])
print(err_angle.shape)
var = np.var(err_angle)
std = np.std(err_angle)
print("angle var: %f std: %f" % (var,std))
plot_en = 0;
if plot_en:
plt.figure(1)
plt.plot(history.history['loss'])
plt.title('model loss')
plt.xlabel('epoch')
plt.figure(2)
plt.hist(err_angle, bins=20)
plt.title('phase angle error')
plt.show()

90
phasenn_test3.py 100755
View File

@ -0,0 +1,90 @@
#!/usr/bin/python3
# phasenn_test3.py
#
# David Rowe Oct 2019
# Keras model for testing phase modelling using NNs. Extending four
# weights per phase test2 to a vector of phases, each with a different
# shift across the frame
import numpy as np
import sys
from keras.layers import Dense
from keras import models,layers
from keras import initializers
import keras.backend as K
import matplotlib.pyplot as plt
# constants
N = 80 # number of time domain samples in frame
nb_samples = 10000
nb_batch = 1
nb_epochs = 5
width = 256
pairs = 2*width
# Generate training data. Given the phase at the start of the frame,
# and the frequency, determine the phase at the end of the frame
# phase encoded as cos,sin pairs
phase_start = np.zeros((nb_samples, pairs))
phase_end = np.zeros((nb_samples, pairs))
# Map 0...width-1 to 0...pi
w = np.pi/width;
for i in range(nb_samples):
for m in range(width):
r = np.random.rand(1)
phase_start_pol = -np.pi + r[0]*2*np.pi
phase_start[i,2*m] = np.cos(phase_start_pol)
phase_start[i,2*m+1] = np.sin(phase_start_pol)
phase_end_pol = phase_start_pol + N*w*m
phase_end[i,2*m] = np.cos(phase_end_pol)
phase_end[i,2*m+1] = np.sin(phase_end_pol)
print(phase_start.shape)
print(phase_end.shape)
# note most of these weights whould end up being 0, as we only need 4 weights to map each phase
# rotation from an input to output pair
model = models.Sequential()
model.add(layers.Dense(pairs, bias=False, input_dim=pairs))
model.summary()
# Compile and fit our model
from keras import optimizers
sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='mse', optimizer=sgd)
history = model.fit(phase_start, phase_end, batch_size=nb_batch, epochs=nb_epochs)
# measure error over all samples
phase_end_est = model.predict(phase_start)
err = (phase_end_est - phase_end)
var = np.var(err)
std = np.std(err)
print("rect var: %f std: %f" % (var,std))
#print(err[:5,:])
err_angle = np.arctan2(phase_end_est[:,1], phase_end_est[:,0]) - np.arctan2(phase_end[:,1], phase_end[:,0])
#print(err[:5,:])
print(err_angle.shape)
var = np.var(err_angle)
std = np.std(err_angle)
print("angle var: %f std: %f" % (var,std))
plot_en = 1;
if plot_en:
plt.figure(1)
plt.plot(history.history['loss'])
plt.title('model loss')
plt.xlabel('epoch')
plt.figure(2)
plt.hist(err_angle, bins=20)
plt.title('phase angle error')
plt.show()

132
phasenn_test4.py 100755
View File

@ -0,0 +1,132 @@
#!/usr/bin/python3
# phasenn_test4.py
#
# David Rowe Oct 2019
# Keras model for testing phase modelling using NNs. Like test3 but
# with sparse input vectors of harmonic phases, as this is how phases
# will be presented from Codec 2.
# Lessons learned:
# 1. We train slower than test3, as we have less information per samples
# with a sparse subset of discrete harmonics
# 2. We need to quantise the frequency of each bin to get low error, ow
# we introduce a +/- 0.5bin random error in the training data, with is
# theoretically 8 degrees, but about 10-11 in practice.
import numpy as np
import sys
from keras.layers import Dense
from keras import models,layers
from keras import initializers
import keras.backend as K
import matplotlib.pyplot as plt
# constants
N = 80 # number of time domain samples in frame
nb_samples = 100000
nb_batch = 32
nb_epochs = 50
width = 256
pairs = 2*width
fo_min = 50
fo_max = 400
Fs = 8000
# Generate training data. Given the phase at the start of the frame,
# and the frequency, determine the phase at the end of the frame
# phase encoded as cos,sin pairs
phase_start = np.zeros((nb_samples, pairs))
phase_end = np.zeros((nb_samples, pairs))
Wo = np.zeros(nb_samples)
L = np.zeros(nb_samples, dtype=int)
# Map 0...width-1 to 0...pi
for i in range(nb_samples):
r = np.random.rand(1)
fo = fo_min + (fo_max-fo_min)*r[0]
Wo[i] = fo*2*np.pi/Fs
L[i] = int(np.floor(np.pi/Wo[i]))
for m in range(1, L[i]):
wm = m*Wo[i]
bin = int(np.round(wm*width/np.pi))
# Quantise frequency to this bin, this step is important to
# match results of test3. Without it we are adding +/- 0.5 bin
# uncertainty in the frequency, which will randomise phase shifts
# across bins
wm = bin*np.pi/width
r = np.random.rand(1)
phase_start_pol = -np.pi + r[0]*2*np.pi
phase_start[i,2*bin] = np.cos(phase_start_pol)
phase_start[i,2*bin+1] = np.sin(phase_start_pol)
phase_end_pol = phase_start_pol + N*wm
phase_end[i,2*bin] = np.cos(phase_end_pol)
phase_end[i,2*bin+1] = np.sin(phase_end_pol)
print(phase_start.shape)
print(phase_end.shape)
# note most of these weights whould end up being 0, as we only need
# two wieghts to map each phase rotation
model = models.Sequential()
model.add(layers.Dense(pairs, bias=False, input_dim=pairs))
model.summary()
import keras.backend as K
# Compile and fit our model
from keras import optimizers
sgd = optimizers.SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='mse', optimizer=sgd)
history = model.fit(phase_start, phase_end, batch_size=nb_batch, epochs=nb_epochs)
# measure error over non-zero samples
phase_end_est = model.predict(phase_start)
ind = np.nonzero(phase_start)
err = (phase_end_est[ind] - phase_end[ind])
var = np.var(err)
std = np.std(err)
print("rect var: %f std: %f" % (var,std))
#print(err[:5,:])
# approximation if error is small
err_angle = np.arctan2(err[1::2], 1)
#print(err[:5,:])
print(err_angle.shape)
var = np.var(err_angle)
std = np.std(err_angle)
print("angle var: %f std: %f" % (var,std))
print("angle var: %4.2f std: %4.2f degs" % (var*180/np.pi,std*180/np.pi))
plot_en = 1;
if plot_en:
plt.figure(1)
plt.plot(history.history['loss'])
plt.title('model loss')
plt.xlabel('epoch')
plt.figure(2)
plt.hist(err_angle, bins=20)
plt.title('phase angle error')
plt.figure(3)
r = 0
phase = np.zeros(width)
phase_est = np.zeros(width)
for m in range(1,L[r]):
wm = m*Wo[r]
bin = int(np.round(wm*width/np.pi))
#print("bin: %d %f %f" % (bin, phase_end[r,2*bin], phase_end[r,2*bin+1]))
phase[m] = np.arctan2(phase_end[r,2*bin+1], phase_end[r,2*bin])
phase_est[m] = np.arctan2(phase_end_est[r,2*bin+1], phase_end_est[r,2*bin])
plt.plot(phase[1:L[r]+1],'g')
plt.plot(phase_est[1:L[r]+1],'r')
plt.show()

125
phasenn_test5.py 100755
View File

@ -0,0 +1,125 @@
#!/usr/bin/python3
# phasenn_test5.py
#
# David Rowe Oct 2019
# Sparse model like test4 but with non-linear layers (activation functions) as
# these will be required later when we add amplitude information.
import numpy as np
import sys
from keras.layers import Dense
from keras import models,layers
from keras import initializers
import keras.backend as K
import matplotlib.pyplot as plt
# constants
N = 80 # number of time domain samples in frame
nb_samples = 100000
nb_batch = 32
nb_epochs = 50
width = 256
pairs = 2*width
fo_min = 50
fo_max = 400
Fs = 8000
# Generate training data. Given the phase at the start of the frame,
# and the frequency, determine the phase at the end of the frame
# phase encoded as cos,sin pairs
phase_start = np.zeros((nb_samples, pairs))
phase_end = np.zeros((nb_samples, pairs))
Wo = np.zeros(nb_samples)
L = np.zeros(nb_samples, dtype=int)
# Map 0...width-1 to 0...pi
for i in range(nb_samples):
r = np.random.rand(1)
fo = fo_min + (fo_max-fo_min)*r[0]
Wo[i] = fo*2*np.pi/Fs
L[i] = int(np.floor(np.pi/Wo[i]))
for m in range(1, L[i]):
wm = m*Wo[i]
bin = int(np.round(wm*width/np.pi))
# Quantise frequency to this bin, this step is important to
# match results of test3. Without it we are adding +/- 0.5 bin
# uncertainty in the frequency, which will randomise phase shifts
# across bins
wm = bin*np.pi/width
r = np.random.rand(1)
phase_start_pol = -np.pi + r[0]*2*np.pi
phase_start[i,2*bin] = np.cos(phase_start_pol)
phase_start[i,2*bin+1] = np.sin(phase_start_pol)
phase_end_pol = phase_start_pol + N*wm
phase_end[i,2*bin] = np.cos(phase_end_pol)
phase_end[i,2*bin+1] = np.sin(phase_end_pol)
print(phase_start.shape)
print(phase_end.shape)
# note most of these weights whould end up being 0, as we only need
# two wieghts to map each phase rotation
model = models.Sequential()
model.add(layers.Dense(pairs, activation='relu', input_dim=pairs))
model.add(layers.Dense(pairs, input_dim=pairs))
model.summary()
import keras.backend as K
# Compile and fit our model
from keras import optimizers
sgd = optimizers.SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='mse', optimizer=sgd)
history = model.fit(phase_start, phase_end, batch_size=nb_batch, epochs=nb_epochs)
# measure error over non-zero samples
phase_end_est = model.predict(phase_start)
ind = np.nonzero(phase_start)
err = (phase_end_est[ind] - phase_end[ind])
var = np.var(err)
std = np.std(err)
print("rect var: %f std: %f" % (var,std))
#print(err[:5,:])
# approximation if error is small
err_angle = np.arctan2(err[1::2], 1)
#print(err[:5,:])
print(err_angle.shape)
var = np.var(err_angle)
std = np.std(err_angle)
print("angle var: %f std: %f" % (var,std))
print("angle var: %4.2f std: %4.2f degs" % (var*180/np.pi,std*180/np.pi))
plot_en = 1;
if plot_en:
plt.figure(1)
plt.plot(history.history['loss'])
plt.title('model loss')
plt.xlabel('epoch')
plt.figure(2)
plt.hist(err_angle, bins=20)
plt.title('phase angle error')
plt.figure(3)
r = 0
phase = np.zeros(width)
phase_est = np.zeros(width)
for m in range(1,L[r]):
wm = m*Wo[r]
bin = int(np.round(wm*width/np.pi))
#print("bin: %d %f %f" % (bin, phase_end[r,2*bin], phase_end[r,2*bin+1]))
phase[m] = np.arctan2(phase_end[r,2*bin+1], phase_end[r,2*bin])
phase_est[m] = np.arctan2(phase_end_est[r,2*bin+1], phase_end_est[r,2*bin])
plt.plot(phase[1:L[r]+1],'g')
plt.plot(phase_est[1:L[r]+1],'r')
plt.show()

157
phasenn_test6.py 100755
View File

@ -0,0 +1,157 @@
#!/usr/bin/python3
# phasenn_test6.py
#
# David Rowe Oct 2019
# Extending test5 to deal with input/output vectors that have slightly
# different "rates", i.e. Wo changing across the frame which is usual
# for voiced speech.
import numpy as np
import sys
from keras.layers import Dense
from keras import models,layers
from keras import initializers
import matplotlib.pyplot as plt
# constants
N = 80 # number of time domain samples in frame
nb_samples = 100000
nb_batch = 32
nb_epochs = 25
width = 256
pairs = 2*width
fo_min = 50
fo_max = 400
Fs = 8000
dfo = 0.02
# Generate training data. Given the phase at the start of the frame,
# and the frequency, determine the phase at the end of the frame
# phase encoded as cos,sin pairs ref:
phase_start = np.zeros((nb_samples, pairs))
phase_end = np.zeros((nb_samples, pairs))
Wo_N = np.zeros(nb_samples)
Wo_0 = np.zeros(nb_samples)
L = np.zeros(nb_samples, dtype=int)
for i in range(nb_samples):
# parameters at time 0 (start of current frame)
# distribute fo randomnly on a log scale
r = np.random.rand(1)
log_fo_0 = np.log10(fo_min) + (np.log10(fo_max)-np.log10(fo_min))*r[0]
fo_0 = 10 ** log_fo_0
Wo_0[i] = fo_0*2*np.pi/Fs
L_0 = int(np.floor(np.pi/Wo_0[i]))
# parameters at time N (end of current frame), allow a df0 freq change
# across frame, typical of voiced speech
r = np.random.rand(1)
fo_N = fo_0 + (-2*dfo + dfo*r[0])*fo_0
fo_N = np.max((fo_min, fo_N))
fo_N = np.min((fo_max, fo_N))
#fo_N = fo_0
Wo_N[i] = fo_N*2*np.pi/Fs
L_N = int(np.floor(np.pi/Wo_N[i]))
L[i] = np.min((L_0, L_N))
#print("fo: %f %f L: %d %d min: %d" % (fo_0, fo_N, L_0, L_N, L[i]))
for m in range(1,L[i]):
bin_0 = int(np.round(m*Wo_0[i]*width/np.pi))
mWo_0 = bin_0*np.pi/width
bin_N = int(np.round(m*Wo_N[i]*width/np.pi))
mWo_N = bin_N*np.pi/width
#print("m: %d bin_0: %d bin_N: %d" % (m, bin_0,bin_N))
r = np.random.rand(1)
phase_start_pol = -np.pi + r[0]*2*np.pi
phase_start[i,2*bin_0] = np.cos(phase_start_pol)
phase_start[i,2*bin_0+1] = np.sin(phase_start_pol)
# phase shift average of two frequencies
phase_end_pol = phase_start_pol + N*(mWo_0 + mWo_N)/2
phase_end[i,2*bin_N] = np.cos(phase_end_pol)
phase_end[i,2*bin_N+1] = np.sin(phase_end_pol)
print(Wo_0.shape, Wo_N.shape, phase_start.shape)
input = np.column_stack([Wo_0, Wo_N, phase_start])
print(input.shape)
print(phase_end.shape)
model = models.Sequential()
model.add(layers.Dense(pairs, activation='relu', input_dim=(pairs+2)))
model.add(layers.Dense(pairs))
model.summary()
# Compile and fit our model
from keras import optimizers
sgd = optimizers.SGD(lr=0.04, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='mse', optimizer=sgd)
history = model.fit(input, phase_end, batch_size=nb_batch, epochs=nb_epochs)
# measure error in rectangular coordinates over all samples
phase_end_est = model.predict(input)
ind = np.nonzero(phase_end)
err = (phase_end[ind] - phase_end_est[ind])
var = np.var(err)
std = np.std(err)
print("rect var: %f std: %f" % (var,std))
print(phase_end_est.shape, err.shape)
c1 = phase_end[ind]; c1 = c1[::2] + 1j*c1[1::2]
c2 = phase_end_est[ind]; c2 = c2[::2] + 1j*c2[1::2]
err_angle = np.angle(c1 * np.conj(c2))
print(err_angle[:5],err_angle.shape)
var = np.var(err_angle)
std = np.std(err_angle)
print("angle var: %4.2f std: %4.2f rads" % (var,std))
print("angle var: %4.2f std: %4.2f degs" % (var*180/np.pi,std*180/np.pi))
def sample_model(r):
phase = np.zeros(width, dtype=complex)
phase_est = np.zeros(width, dtype=complex)
phase_err = np.zeros(width, dtype=complex)
for m in range(1,L[r]):
wm = m*Wo_N[r]
bin = int(np.round(wm*width/np.pi))
phase[m] = phase_end[r,2*bin] + 1j*phase_end[r,2*bin+1]
phase_est[m] = phase_end_est[r,2*bin] + 1j*phase_end_est[r,2*bin+1]
phase_err[m] = phase[m] * np.conj(phase_est[m])
return phase, phase_err
plot_en = 1;
if plot_en:
plt.figure(1)
plt.plot(history.history['loss'])
plt.title('model loss')
plt.xlabel('epoch')
plt.show(block=False)
plt.figure(2)
plt.subplot(211)
plt.hist(err_angle*180/np.pi, bins=20)
plt.subplot(212)
plt.hist(Wo_0*(Fs/2)/np.pi, bins=20)
plt.title('phase angle error (deg) and fo (Hz)')
plt.show(block=False)
plt.figure(3)
plt.title('sample vectors and error')
for r in range(12):
plt.subplot(3,4,r+1)
phase, phase_err = sample_model(r)
plt.plot(np.angle(phase[1:L[r]+1])*180/np.pi,'g')
plt.plot(np.angle(phase_err[1:L[r]+1])*180/np.pi,'r')
plt.show(block=False)
# click on last figure to close all and finish
plt.waitforbuttonpress(0)
plt.close()

177
phasenn_test7.py 100755
View File

@ -0,0 +1,177 @@
#!/usr/bin/python3
# phasenn_test7.py
#
# David Rowe Oct 2019
# Extending PhaseNN model to have a linear and dispersive phase
# component. The dispersive component is from a simple 2nd order IIR
# filter.
# TODO: [ ] remove df0 support
# [ ] remove Wo_0, Wo_N
# [ ] replace random starting phases with phase realted to t0
import numpy as np
import sys
from keras.layers import Dense
from keras import models,layers
from keras import initializers
import matplotlib.pyplot as plt
from scipy import signal
# constants
N = 80 # number of time domain samples in frame
nb_samples = 100000
nb_batch = 32
nb_epochs = 25
width = 256
pairs = 2*width
fo_min = 50
fo_max = 400
Fs = 8000
dfo = 0.00 # dfo models aren't solved yet
# Generate training data. Given the phase at the start of the frame,
# and the frequency, determine the phase at the end of the frame
# phase encoded as cos,sin pairs ref:
phase_start = np.zeros((nb_samples, pairs))
phase_end = np.zeros((nb_samples, pairs))
filter_amp = np.zeros((nb_samples, width))
filter_phase = np.zeros((nb_samples, width))
Wo_N = np.zeros(nb_samples)
Wo_0 = np.zeros(nb_samples)
L = np.zeros(nb_samples, dtype=int)
for i in range(nb_samples):
# parameters at time 0 (start of current frame)
# distribute fo randomnly on a log scale
r = np.random.rand(1)
log_fo_0 = np.log10(fo_min) + (np.log10(fo_max)-np.log10(fo_min))*r[0]
fo_0 = 10 ** log_fo_0
Wo_0[i] = fo_0*2*np.pi/Fs
L_0 = int(np.floor(np.pi/Wo_0[i]))
# parameters at time N (end of current frame), allow a df0 freq change
# across frame, typical of voiced speech
r = np.random.rand(1)
fo_N = fo_0 + (-2*dfo + dfo*r[0])*fo_0
fo_N = np.max((fo_min, fo_N))
fo_N = np.min((fo_max, fo_N))
#fo_N = fo_0
Wo_N[i] = fo_N*2*np.pi/Fs
L_N = int(np.floor(np.pi/Wo_N[i]))
L[i] = np.min((L_0, L_N))
#print("fo: %f %f L: %d %d min: %d" % (fo_0, fo_N, L_0, L_N, L[i]))
# sample 2nd order IIR filter with random peak freq and amplitude
r = np.random.rand(2)
alpha = 0.1*np.pi + 0.8*np.pi*r[0]
gamma = 0.8
#alpha = np.pi/4; gamma = 0.8
w,h = signal.freqz(1, [1, -2*gamma*np.cos(alpha), gamma*gamma], range(1,L[i])*Wo_0[i])
for m in range(1,L[i]):
bin_0 = int(np.round(m*Wo_0[i]*width/np.pi))
mWo_0 = bin_0*np.pi/width
bin_N = int(np.round(m*Wo_N[i]*width/np.pi))
mWo_N = bin_N*np.pi/width
#print("m: %d bin_0: %d bin_N: %d" % (m, bin_0,bin_N))
r = np.random.rand(1)
phase_start_pol = -np.pi + r[0]*2*np.pi
phase_start[i,2*bin_0] = np.cos(phase_start_pol)
phase_start[i,2*bin_0+1] = np.sin(phase_start_pol)
# phase shift average of two frequencies
phase_end_pol = phase_start_pol + N*(mWo_0 + mWo_N)/2 + np.angle(h[m-1])
phase_end[i,2*bin_N] = np.cos(phase_end_pol)
phase_end[i,2*bin_N+1] = np.sin(phase_end_pol)
filter_amp[i,bin_0] = np.log10(abs(h[m-1]))
filter_phase[i,bin_0] = np.angle(h[m-1])
input = np.column_stack([filter_amp, phase_start])
model = models.Sequential()
model.add(layers.Dense(pairs, activation='relu', input_dim=(width+pairs)))
model.add(layers.Dense(pairs))
model.summary()
# Compile and fit our model
from keras import optimizers
sgd = optimizers.SGD(lr=0.04, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='mse', optimizer=sgd)
history = model.fit(input, phase_end, batch_size=nb_batch, epochs=nb_epochs)
# measure error in rectangular coordinates over all samples
phase_end_est = model.predict(input)
ind = np.nonzero(phase_end)
err = (phase_end[ind] - phase_end_est[ind])
var = np.var(err)
std = np.std(err)
print("rect var: %f std: %f" % (var,std))
print(phase_end_est.shape, err.shape)
c1 = phase_end[ind]; c1 = c1[::2] + 1j*c1[1::2]
c2 = phase_end_est[ind]; c2 = c2[::2] + 1j*c2[1::2]
err_angle = np.angle(c1 * np.conj(c2))
print(err_angle[:5],err_angle.shape)
var = np.var(err_angle)
std = np.std(err_angle)
print("angle var: %4.2f std: %4.2f rads" % (var,std))
print("angle var: %4.2f std: %4.2f degs" % (var*180/np.pi,std*180/np.pi))
def sample_model(r):
phase = np.zeros(width, dtype=complex)
phase_est = np.zeros(width, dtype=complex)
phase_err = np.zeros(width, dtype=complex)
phase_filt = np.zeros(width)
for m in range(1,L[r]):
wm = m*Wo_N[r]
bin = int(np.round(wm*width/np.pi))
phase[m] = phase_end[r,2*bin] + 1j*phase_end[r,2*bin+1]
phase_est[m] = phase_end_est[r,2*bin] + 1j*phase_end_est[r,2*bin+1]
phase_err[m] = phase[m] * np.conj(phase_est[m])
phase_filt[m] = filter_phase[r,bin]
return phase, phase_err, phase_filt
plot_en = 1;
if plot_en:
plt.figure(1)
plt.plot(history.history['loss'])
plt.title('model loss')
plt.xlabel('epoch')
plt.show(block=False)
plt.figure(2)
plt.subplot(211)
plt.hist(err_angle*180/np.pi, bins=20)
plt.subplot(212)
plt.hist(Wo_0*(Fs/2)/np.pi, bins=20)
plt.title('phase angle error (deg) and fo (Hz)')
plt.show(block=False)
plt.figure(3)
plt.title('sample vectors and error')
for r in range(12):
plt.subplot(3,4,r+1)
phase, phase_err, phase_filt = sample_model(r)
plt.plot(np.angle(phase[1:L[r]+1])*180/np.pi,'g')
plt.plot(np.angle(phase_err[1:L[r]+1])*180/np.pi,'r')
plt.plot(phase_filt[1:L[r]+1]*180/np.pi,'b')
plt.ylim(-180,180)
plt.show(block=False)
# click on last figure to close all and finish
plt.waitforbuttonpress(0)
plt.close()

117
phasenn_test7a.py 100755
View File

@ -0,0 +1,117 @@
#!/usr/bin/python3
# phasenn_test7.py
#
# David Rowe Oct 2019
# Keras model for testing phase modelling using NNs. Here we try
# estimating the phase of a 2nd order system from it's amplitudes.
# This models the dispersive component of speech phase spectra, up
# until now we have been testing with the linear phase component.
# This script emulates a Hilbert Transform, note however in practice
# speech is not minimum phase so HTs have there limitations for real
# speech signals.
import numpy as np
from scipy import signal
import sys
from keras.layers import Dense
from keras import models,layers
from keras import initializers
import keras.backend as K
import matplotlib.pyplot as plt
# constants
N = 80 # number of time domain samples in frame
nb_samples = 10000
nb_batch = 64
nb_epochs = 10
width = 256
pairs = 2*width
fo_min = 50
fo_max = 400
Fs = 8000
# Generate training data. Sparse log magnitude spectrum is input,
# phase spectrum of 2nd order system the output/target
mag = np.zeros((nb_samples, width))
phase = np.zeros((nb_samples, pairs))
for i in range(nb_samples):
# choose a random fo
r = np.random.rand(1)
fo = fo_min + (fo_max-fo_min)*r[0]
Wo = fo*2*np.pi/Fs
L = int(np.floor(np.pi/Wo))
# sample 2nd order IIR filter with random peak freq and amplitude
r = np.random.rand(2)
alpha = 0.1*np.pi + 0.8*np.pi*r[0]
gamma = r[1]
w,h = signal.freqz(1, [1, -2*gamma*np.cos(alpha), gamma*gamma], range(1,L)*Wo)
# map to sparse input and output arrays
for m in range(1,L):
bin = int(np.floor(m*Wo*width/np.pi))
mag[i,bin] = np.log10(np.abs(h[m-1]))
phase_rect = h[m-1]/np.abs(h[m-1])
phase[i,2*bin] = phase_rect.real
phase[i,2*bin+1] = phase_rect.imag
print(mag.shape)
print(phase.shape)
model = models.Sequential()
model.add(layers.Dense(pairs, activation='relu', input_dim=width))
model.add(layers.Dense(pairs, input_dim=pairs))
model.summary()
# Compile and fit our model
from keras import optimizers
sgd = optimizers.SGD(lr=0.04, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='mse', optimizer=sgd)
history = model.fit(mag, phase, batch_size=nb_batch, epochs=nb_epochs)
# measure error in rectangular coordinates over all samples
phase_est = model.predict(mag)
err = (phase_est - phase)
var = np.var(err)
std = np.std(err)
print("rect var: %f std: %f" % (var,std))
err_angle = np.arctan2(err[:,1], 1)
print(err_angle.shape)
var = np.var(err_angle)
std = np.std(err_angle)
print("angle var: %4.2f std: %4.2f rads" % (var,std))
print("angle var: %4.2f std: %4.2f degs" % (var*180/np.pi,std*180/np.pi))
plot_en = 1;
if plot_en:
plt.figure(1)
plt.plot(history.history['loss'])
plt.title('model loss')
plt.xlabel('epoch')
plt.figure(2)
plt.hist(err_angle*180/np.pi, bins=20)
plt.title('phase angle error (deg)')
fig = plt.figure(3)
ax1 = fig.add_subplot(111)
plt.plot(20*mag[1,:])
ax2 = ax1.twinx()
phase = np.unwrap(np.arctan2(phase[1::2], phase[::2]))
plt.plot(phase, 'g')
phase_est = np.unwrap(np.arctan2(phase_est[1::2], phase_est[::2]))
plt.plot(phase, 'r')
plt.show()

161
phasenn_test8.py 100755
View File

@ -0,0 +1,161 @@
#!/usr/bin/python3
# phasenn_test8.py
#
# David Rowe Oct 2019
# Estimate phase spectra from amplitude spectra for a 2nd order IIR
# filter, just like a Hilbert Transform.
import numpy as np
import sys
from keras.layers import Dense
from keras import models,layers
from keras import initializers
import matplotlib.pyplot as plt
from scipy import signal
from keras import backend as K
# custom loss function
def sparse_loss(y_true, y_pred):
mask = K.cast( K.not_equal(y_pred, 0), dtype='float32')
n = K.sum(mask)
return K.sum(K.square((y_pred - y_true)*mask))/n
# testing custom loss function
x = layers.Input(shape=(None,))
y = layers.Input(shape=(None,))
loss_func = K.Function([x, y], [sparse_loss(x, y)])
assert loss_func([[[1,1,1]], [[0,2,0]]]) == np.array([1])
assert loss_func([[[0,1,0]], [[0,2,0]]]) == np.array([1])
# constants
N = 80 # number of time domain samples in frame
nb_samples = 400000
nb_batch = 32
nb_epochs = 100
width = 256
pairs = 2*width
fo_min = 50
fo_max = 400
Fs = 8000
# Generate training data.
filter_amp = np.zeros((nb_samples, width))
# phase as an angle
filter_phase = np.zeros((nb_samples, width))
# phase encoded as cos,sin pairs:
filter_phase_rect = np.zeros((nb_samples, pairs))
Wo = np.zeros(nb_samples)
L = np.zeros(nb_samples, dtype=int)
for i in range(nb_samples):
# distribute fo randomly on a log scale, gives us more training
# data with low freq frames which have more harmonics and are
# harder to match
r = np.random.rand(1)
log_fo = np.log10(fo_min) + (np.log10(fo_max)-np.log10(fo_min))*r[0]
fo = 10 ** log_fo
Wo[i] = fo*2*np.pi/Fs
L[i] = int(np.floor(np.pi/Wo[i]))
# sample 2nd order IIR filter with random peak freq
r = np.random.rand(2)
alpha = 0.1*np.pi + 0.8*np.pi*r[0]
gamma = r[1]
w,h = signal.freqz(1, [1, -2*gamma*np.cos(alpha), gamma*gamma], range(1,L[i])*Wo[i])
for m in range(1,L[i]):
bin = int(np.round(m*Wo[i]*width/np.pi))
mWo = bin*np.pi/width
filter_amp[i,bin] = np.log10(abs(h[m-1]))
filter_phase[i,bin] = np.angle(h[m-1])
filter_phase_rect[i,2*bin] = np.cos(filter_phase[i,bin])
filter_phase_rect[i,2*bin+1] = np.sin(filter_phase[i,bin])
model = models.Sequential()
model.add(layers.Dense(pairs, activation='relu', input_dim=width))
model.add(layers.Dense(4*pairs, activation='relu'))
model.add(layers.Dense(pairs))
model.summary()
from keras import optimizers
sgd = optimizers.SGD(lr=0.08, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss=sparse_loss, optimizer=sgd)
history = model.fit(filter_amp, filter_phase_rect, batch_size=nb_batch, epochs=nb_epochs)
# measure error in rectangular coordinates over all samples
filter_phase_rect_est = model.predict(filter_amp)
ind = np.nonzero(filter_phase_rect)
err = (filter_phase_rect[ind] - filter_phase_rect_est[ind])
var = np.var(err)
std = np.std(err)
print("rect var: %f std: %f" % (var,std))
c1 = filter_phase_rect[ind]; c1 = c1[::2] + 1j*c1[1::2]
c2 = filter_phase_rect_est[ind]; c2 = c2[::2] + 1j*c2[1::2]
err_angle = np.angle(c1 * np.conj(c2))
var = np.var(err_angle)
std = np.std(err_angle)
print("angle var: %4.2f std: %4.2f rads" % (var,std))
print("angle var: %4.2f std: %4.2f degs" % (var*180/np.pi,std*180/np.pi))
def sample_model(r):
phase = np.zeros(width, dtype=complex)
phase_est = np.zeros(width, dtype=complex)
phase_err = np.zeros(width, dtype=complex)
phase_filt = np.zeros(width)
amp_filt = np.zeros(width)
for m in range(1,L[r]):
wm = m*Wo[r]
bin = int(np.round(wm*width/np.pi))
phase[m] = filter_phase_rect[r,2*bin] + 1j*filter_phase_rect[r,2*bin+1]
phase_est[m] = filter_phase_rect_est[r,2*bin] + 1j*filter_phase_rect_est[r,2*bin+1]
phase_err[m] = phase[m] * np.conj(phase_est[m])
amp_filt[m] = filter_amp[r,bin]
return phase, phase_err, amp_filt
plot_en = 1;
if plot_en:
plt.figure(1)
plt.plot(history.history['loss'])
plt.title('model loss')
plt.xlabel('epoch')
plt.show(block=False)
plt.figure(2)
plt.subplot(211)
plt.hist(err_angle*180/np.pi, bins=20)
plt.subplot(212)
plt.hist(Wo*(Fs/2)/np.pi, bins=20)
plt.title('phase angle error (deg) and fo (Hz)')
plt.show(block=False)
plt.figure(3)
plt.title('sample vectors and error')
for r in range(12):
plt.subplot(3,4,r+1)
phase, phase_err, amp_filt = sample_model(r)
plt.plot(np.angle(phase[1:L[r]])*180/np.pi,'g')
plt.plot(np.angle(phase_err[1:L[r]])*180/np.pi,'r')
plt.ylim(-180,180)
plt.show(block=False)
plt.figure(4)
plt.title('filter amplitudes')
for r in range(12):
plt.subplot(3,4,r+1)
phase, phase_err, amp_filt = sample_model(r)
plt.plot(amp_filt[1:L[r]],'g')
plt.show(block=False)
# click on last figure to close all and finish
plt.waitforbuttonpress(0)
plt.close()

166
phasenn_test9.py 100755
View File

@ -0,0 +1,166 @@
#!/usr/bin/python3
# phasenn_test9.py
#
# David Rowe Nov 2019
# Estimate an impulse position from the phase spectra of a 2nd order system excited by an impulse
#
# periodic impulse train Wo at time offset n0 -> 2nd order system -> discrete phase specta -> NN -> n0
# TODO:
# [ ] sanity check n0 phases
import numpy as np
import sys
from keras.layers import Dense
from keras import models,layers
from keras import initializers
import matplotlib.pyplot as plt
from scipy import signal
from keras import backend as K
# constants
N = 80 # number of time domain samples in frame
nb_samples = 10000
nb_batch = 32
nb_epochs = 10
width = 256
pairs = 2*width
fo_min = 50
fo_max = 400
Fs = 8000
# Generate training data
amp = np.zeros((nb_samples, width))
# phase as an angle
phase = np.zeros((nb_samples, width))
# phase encoded as cos,sin pairs:
phase_rect = np.zeros((nb_samples, pairs))
Wo = np.zeros(nb_samples)
L = np.zeros(nb_samples, dtype=int)
n0 = np.zeros(nb_samples, dtype=int)
target = np.zeros((nb_samples,1))
for i in range(nb_samples):
# distribute fo randomly on a log scale, gives us more training
# data with low freq frames which have more harmonics and are
# harder to match
r = np.random.rand(1)
log_fo = np.log10(fo_min) + (np.log10(fo_max)-np.log10(fo_min))*r[0]
fo = 10 ** log_fo
Wo[i] = fo*2*np.pi/Fs
L[i] = int(np.floor(np.pi/Wo[i]))
# pitch period in samples
P = 2*L[i]
r = np.random.rand(3)
# sample 2nd order IIR filter with random peak freq (alpha) and peak amplitude (gamma)
alpha = 0.1*np.pi + 0.4*np.pi*r[0]
gamma = 0.9 + 0.09*r[1]
w,h = signal.freqz(1, [1, -2*gamma*np.cos(alpha), gamma*gamma], range(1,L[i])*Wo[i])
# select n0 between 0...P-1 (it's periodic)
n0[i] = r[2]*P
e = np.exp(-1j*n0[i]*range(1,L[i])*Wo[i])
for m in range(1,L[i]):
bin = int(np.round(m*Wo[i]*width/np.pi))
mWo = bin*np.pi/width
amp[i,bin] = np.log10(abs(h[m-1]))
phase[i,bin] = np.angle(h[m-1]*e[m-1])
phase_rect[i,2*bin] = np.cos(phase[i,bin])
phase_rect[i,2*bin+1] = np.sin(phase[i,bin])
# target is n0 in rec coords
target[i] = n0[i]/P
model = models.Sequential()
model.add(layers.Dense(pairs, activation='relu', input_dim=pairs))
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(1))
model.summary()
from keras import optimizers
sgd = optimizers.SGD(lr=0.08, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss="mse", optimizer=sgd)
history = model.fit(phase_rect, target, batch_size=nb_batch, epochs=nb_epochs)
# measure error in rectangular coordinates over all samples
target_est = model.predict(phase_rect)
err = target - target_est
var = np.var(err)
std = np.std(err)
print("var: %f std: %f" % (var,std))
def sample_freq(r):
phase_L = np.zeros(L[r], dtype=complex)
amp_L = np.zeros(L[r])
for m in range(1,L[r]):
wm = m*Wo[r]
bin = int(np.round(wm*width/np.pi))
phase_L[m] = phase_rect[r,2*bin] + 1j*phase_rect[r,2*bin+1]
amp_L[m] = amp[r,bin]
return phase_L, amp_L
# synthesise time domain signal
def sample_time(r):
s = np.zeros(2*N);
for m in range(1,L[r]):
wm = m*Wo[r]
bin = int(np.round(wm*width/np.pi))
Am = 10 ** amp[r,bin]
phi_m = np.angle(phase_rect[r,2*bin] + 1j*phase_rect[r,2*bin+1])
s = s + Am*np.cos(wm*(range(2*N)) + phi_m)
return s
plot_en = 1;
if plot_en:
plt.figure(1)
plt.plot(history.history['loss'])
plt.title('model loss')
plt.xlabel('epoch')
plt.show(block=False)
plt.figure(2)
plt.hist(err, bins=20)
plt.show(block=False)
plt.figure(3)
plt.plot(target[:12],'b')
plt.plot(target_est[:12],'g')
plt.show(block=False)
plt.figure(4)
plt.title('Freq Domain')
for r in range(12):
plt.subplot(3,4,r+1)
phase_L, amp_L = sample_freq(r)
plt.plot(20*amp_L,'g')
plt.ylim(-20,20)
plt.show(block=False)
plt.figure(5)
plt.title('Time Domain')
for r in range(12):
plt.subplot(3,4,r+1)
s = sample_time(r)
P = 2*L[r]
n0_ = target_est[r]*P
print("F0: %5.1f P: %3d L: %3d n0: %3d n0_est: %5.1f" % (Wo[r]*(Fs/2)/np.pi, P, L[r], n0[r], n0_))
plt.plot(s,'g')
plt.plot([n0[r],n0[r]], [-25,25],'r')
plt.plot([n0_,n0_], [-25,25],'b')
plt.ylim(-50,50)
plt.show(block=False)
# click on last figure to close all and finish
plt.waitforbuttonpress(0)
plt.close()

114
phasenn_train.py 100755
View File

@ -0,0 +1,114 @@
#!/usr/bin/python3
# phasenn_train.py
#
# David Rowe August 2019
#
# Keras model for estimating the phase of sinusoidally modelled speech
# To generate features:
# $ ./c2sim ~/Downloads/all_speech_8k.sw --dumpphase_nnl train.f32
import numpy as np
import sys
from keras.layers import Dense
from keras import models,layers
from keras import initializers
if len(sys.argv) < 2:
print("usage: phasenn_train.py train.f32")
sys.exit(0)
# constants
max_amp = 80 # sparsevector covering 0 ... Fs/2
nb_features = 243 # number of sparse features/row in input training data file
nb_epochs = 10
# load training data
feature_file = sys.argv[1]
features = np.fromfile(feature_file, dtype='float32')
nb_frames = int(len(features)/nb_features)
print("nb_frames: %d" % (nb_frames))
# 0..80 log10(A)
# 81..161 cos(phi)
# 162..242 sin(phi)
features = np.reshape(features, (nb_frames, nb_features))
print("features shape:")
print(features.shape)
# So the idea is we can predict the next frames phases from the
# current frame, and the magnitude spectrum. For voiced speech, the
# sinusoids are continuous, so can be predicted from frame to frame if
# you know the frequency and previous phase. We encode the frequency
# as the position in the sprase vector.
# Cascased with that is phase spectra due to dispersion of the phase
# response of the vocal tract filter, e.g. a large dispersion around
# resonances. We supply the magnitude spectra to help model the vocal
# tract filter phase.
# Unvoiced speech has more random phase. Hopefully the NN can work
# out if the speech is voiced or unvoiced from the magnitide spectra.
# The phase is encoded using cos and sin of the phase, as these are
# bounded by +/-1
# So input features are this frame's log(A), and last frames phase.
# The output features we are trying to model are this frames phase.
train = np.concatenate( (features[1:,:max_amp+1], features[:-1,max_amp+1:]) )
target = features([1:,max_amp+1:])
model = models.Sequential()
model.add(layers.Dense(256, activation='relu', input_dim=nb_input))
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dense(nb_ouput, activation='linear'))
# Custom loss function that measures difference in phase just at
# non-zero elements of target (ytrue). This could be extended to
# weight each phase error by the (log) Amplitude of each harmonic
import keras.backend as K
def customLoss(yTrue, yPred):
# generate a mask vector with 1's on non zero values of yTrue
mask = abs(K.sign(yTrue))
# collect error in cos() and sin() terms, ignoring yPred values outside of
# harmonics we care about
error = yTrue - mask * yPred
return K.sum(error * error)
# Compile our model
from keras import optimizers
model.compile(loss=customLoss, optimizer='sge')
# fit model, using 20% of our data for validation
history = model.fit(train, target, validation_split=0.2, batch_size=32, epochs=nb_epochs)
model.save("phasenn_model.h5")
import matplotlib.pyplot as plt
plot_en = 0;
if plot_en:
plt.figure(1)
plt.plot(10*np.sqrt(history.history['loss']))
plt.plot(10*np.sqrt(history.history['val_loss']))
plt.title('model loss')
plt.ylabel('rms error (rad)')
plt.xlabel('epoch')
plt.legend(['train', 'valid'], loc='upper right')
plt.show()
# run model on training data and measure variance, should be similar to training "loss"
train_out = model.predict(train)
err = (train_out - target)
var = np.var(err)
std = np.std(err)
print("var: %f std: %f" % (var,std))