mirror of https://github.com/drowe67/phasenn.git
224 lines
7.0 KiB
Python
Executable File
224 lines
7.0 KiB
Python
Executable File
#!/usr/bin/python3
|
|
# phasenn_test11.py
|
|
#
|
|
# David Rowe Nov 2019
|
|
|
|
# test10 didn't work due to n0 -> DFT sample mapping not converging
|
|
# (test9b/9c).
|
|
#
|
|
# This is another attempt using regular DSP to extract n0, then
|
|
# fitting the dispersive model after the n0 component of phase has
|
|
# been removed.
|
|
|
|
# Combine test8 and and test9c:
|
|
# + excite a 2nd order system with a impulse train
|
|
# + pitch (Wo), pulse onset time (n0), 2nd order system parameters
|
|
# (alpha and gamma) random
|
|
# + estimate and extract n0 component of phase using regular DSP
|
|
# + Estimate dispersive part using amplitude spectra
|
|
# + Add n0 component back again
|
|
|
|
import numpy as np
|
|
import sys
|
|
from keras.layers import Input, Dense, Concatenate
|
|
from keras import models,layers
|
|
from keras import initializers
|
|
import matplotlib.pyplot as plt
|
|
from scipy import signal
|
|
from keras import backend as K
|
|
# less verbose tensorflow ....
|
|
import os
|
|
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
|
|
|
|
# custom loss function
|
|
def sparse_loss(y_true, y_pred):
|
|
mask = K.cast( K.not_equal(y_pred, 0), dtype='float32')
|
|
n = K.sum(mask)
|
|
return K.sum(K.square((y_pred - y_true)*mask))/n
|
|
|
|
# testing custom loss function
|
|
x = Input(shape=(None,))
|
|
y = Input(shape=(None,))
|
|
loss_func = K.Function([x, y], [sparse_loss(x, y)])
|
|
assert loss_func([[[1,1,1]], [[0,2,0]]]) == np.array([1])
|
|
assert loss_func([[[0,1,0]], [[0,2,0]]]) == np.array([1])
|
|
|
|
# constants
|
|
|
|
N = 80 # number of time domain samples in frame
|
|
nb_samples = 100000
|
|
nb_batch = 32
|
|
nb_epochs = 100
|
|
width = 256
|
|
pairs = 2*width
|
|
fo_min = 50
|
|
fo_max = 400
|
|
Fs = 8000
|
|
|
|
# Generate training data.
|
|
|
|
print("Generate training data")
|
|
|
|
# amplitude and phase at rate L
|
|
amp = np.zeros((nb_samples, width))
|
|
phase = np.zeros((nb_samples, width))
|
|
phase_disp = np.zeros((nb_samples, width))
|
|
|
|
# rate "width" phase encoded as cos,sin pairs:
|
|
phase_rect = np.zeros((nb_samples, pairs))
|
|
amp_ = np.zeros((nb_samples, width))
|
|
|
|
# side information
|
|
Wo = np.zeros(nb_samples)
|
|
L = np.zeros(nb_samples, dtype=int)
|
|
n0 = np.zeros(nb_samples, dtype=int)
|
|
voiced = np.zeros(nb_samples, dtype=int)
|
|
|
|
for i in range(nb_samples):
|
|
|
|
# distribute fo randomly on a log scale, gives us more training
|
|
# data with low freq frames which have more harmonics and are
|
|
# harder to match
|
|
r = np.random.rand(1)
|
|
log_fo = np.log10(fo_min) + (np.log10(fo_max)-np.log10(fo_min))*r[0]
|
|
fo = fo_min
|
|
fo = 10 ** log_fo
|
|
Wo[i] = fo*2*np.pi/Fs
|
|
L[i] = int(np.floor(np.pi/Wo[i]))
|
|
# pitch period in samples
|
|
P = 2*L[i]
|
|
|
|
r = np.random.rand(1)
|
|
voiced[i] = r[0] > 0.5
|
|
|
|
# sample 2nd order IIR filter with random peak freq
|
|
|
|
r1 = np.random.rand(2)
|
|
r2 = np.random.rand(2)
|
|
if voiced[i]:
|
|
# choose alpha and gamma to get something like voiced speech
|
|
alpha1 = 0.05*np.pi + 0.25*np.pi*r1[0]
|
|
gamma1 = 0.9 + 0.09*r1[1]
|
|
alpha2 = alpha1 + 0.4*np.pi*r2[0]
|
|
gamma2 = 0.9 + 0.05*r2[1]
|
|
else:
|
|
alpha1 = 0.5*np.pi + 0.4*np.pi*r1[0]
|
|
gamma1 = 0.8 + 0.1*r1[1]
|
|
alpha2 = 0.5*np.pi + 0.4*np.pi*r2[0]
|
|
gamma2 = 0.8 + 0.1*r2[1]
|
|
|
|
w1,h1 = signal.freqz(1, [1, -2*gamma1*np.cos(alpha1), gamma1*gamma1], range(1,L[i]+1)*Wo[i])
|
|
w2,h2 = signal.freqz(1, [1, -2*gamma2*np.cos(alpha2), gamma2*gamma2], range(1,L[i]+1)*Wo[i])
|
|
|
|
# select n0 between 0...P-1 (it's periodic)
|
|
r = np.random.rand(1)
|
|
n0[i] = r[0]*P
|
|
e = np.exp(-1j*n0[i]*range(1,L[i]+1)*Wo[i])
|
|
|
|
for m in range(1,L[i]+1):
|
|
amp[i,m] = np.log10(np.abs(h1[m-1]*h2[m-1]))
|
|
if voiced[i]:
|
|
phase[i,m] = np.angle(h1[m-1]*h2[m-1]*e[m-1])
|
|
phase_disp[i,m] = np.angle(h1[m-1]*h2[m-1])
|
|
else:
|
|
r = np.random.rand(1)
|
|
phase[i,m] = r[0]*2*np.pi
|
|
phase_disp[i,m] = phase[i,m]
|
|
|
|
|
|
# use regular DSP to estimate n0, and remove effect of linear phase
|
|
|
|
print("estimate and remove n0")
|
|
n0_est = np.zeros((nb_samples))
|
|
phase_n0_removed = np.zeros((nb_samples, width))
|
|
for i in range(nb_samples):
|
|
err_min = 1E32
|
|
P = 2*L[i]
|
|
'''
|
|
for test_n0 in np.arange(0,P,0.25):
|
|
e = np.exp(-1j*test_n0*np.arange(1,L[i]+1)*Wo[i])
|
|
err = np.dot(10**amp[i,1:L[i]+1], np.abs(np.exp(1j*phase[i,1:L[i]+1]) - e)**2)
|
|
if err < err_min:
|
|
err_min = err
|
|
n0_est[i] = test_n0
|
|
print(n0[i], n0_est[i])
|
|
'''
|
|
r = np.random.rand(1)
|
|
n0_est[i] = n0[i] + 2*r - 1
|
|
|
|
# remove n0_est and set up rect training data
|
|
for m in range(1,L[i]+1):
|
|
phase_n0_removed[i,m] = np.angle(np.exp(1j*phase[i,m]) * np.conj(np.exp(-1j*n0_est[i]*Wo[i]*m)))
|
|
bin = int(np.round(m*Wo[i]*width/np.pi)); bin = min(width-1, bin)
|
|
phase_rect[i,2*bin] = np.cos(phase_n0_removed[i,m])
|
|
phase_rect[i,2*bin+1] = np.sin(phase_n0_removed[i,m])
|
|
amp_[i,bin] = amp[i,m]
|
|
|
|
model = models.Sequential()
|
|
model.add(layers.Dense(pairs, activation='relu', input_dim=width))
|
|
model.add(layers.Dense(4*pairs, activation='relu'))
|
|
model.add(layers.Dense(pairs))
|
|
model.summary()
|
|
|
|
from keras import optimizers
|
|
sgd = optimizers.SGD(lr=0.8, decay=1e-6, momentum=0.9, nesterov=True)
|
|
model.compile(loss=sparse_loss, optimizer=sgd)
|
|
history = model.fit(amp_, phase_rect, batch_size=nb_batch, epochs=nb_epochs)
|
|
|
|
# measure error in angle over all samples
|
|
|
|
phase_rect_est = model.predict(amp_)
|
|
phase_est = np.zeros((nb_samples, width))
|
|
used_bins = np.zeros((nb_samples, width), dtype=int)
|
|
for i in range(nb_samples):
|
|
for m in range(1,L[i]+1):
|
|
bin = int(np.round(m*Wo[i]*width/np.pi)); bin = min(width-1, bin)
|
|
phase_est[i,m] = np.angle(phase_rect_est[i,2*bin] + 1j*phase_rect_est[i,2*bin+1])
|
|
used_bins[i,m] = 1
|
|
|
|
ind = np.nonzero(used_bins)
|
|
c1 = np.exp(1j*phase_disp[ind]); c2 = np.exp(1j*phase_est[ind]);
|
|
err_angle = np.angle(c1 * np.conj(c2))
|
|
var = np.var(err_angle)
|
|
std = np.std(err_angle)
|
|
print("angle var: %4.2f std: %4.2f rads" % (var,std))
|
|
print("angle var: %4.2f std: %4.2f degs" % ((std*180/np.pi)**2,std*180/np.pi))
|
|
|
|
plot_en = 1;
|
|
if plot_en:
|
|
plt.figure(1)
|
|
plt.plot(history.history['loss'])
|
|
plt.title('model loss')
|
|
plt.xlabel('epoch')
|
|
plt.show(block=False)
|
|
|
|
plt.figure(2)
|
|
plt.subplot(211)
|
|
plt.hist(err_angle*180/np.pi, bins=20)
|
|
plt.title('phase angle error (deg) and fo (Hz)')
|
|
plt.subplot(212)
|
|
plt.hist(Wo*(Fs/2)/np.pi, bins=20)
|
|
plt.show(block=False)
|
|
|
|
plt.figure(3)
|
|
plt.title('filter amplitudes')
|
|
for r in range(12):
|
|
plt.subplot(3,4,r+1)
|
|
plt.plot(amp[r,:L[r]],'g')
|
|
plt.show(block=False)
|
|
|
|
plt.figure(4)
|
|
plt.title('sample vectors and error')
|
|
for r in range(12):
|
|
plt.subplot(3,4,r+1)
|
|
plt.plot(phase_disp[r,:L[r]]*180/np.pi,'g')
|
|
if voiced[r]:
|
|
plt.plot(phase_n0_removed[r,:L[r]]*180/np.pi,'r')
|
|
plt.plot(phase_est[r,:L[r]]*180/np.pi,'b')
|
|
plt.ylim(-180,180)
|
|
plt.show(block=False)
|
|
|
|
# click on last figure to close all and finish
|
|
plt.waitforbuttonpress(0)
|
|
plt.close()
|