diff --git a/phasenn_test11.py b/phasenn_test11.py new file mode 100644 index 0000000..8e6930d --- /dev/null +++ b/phasenn_test11.py @@ -0,0 +1,222 @@ +#!/usr/bin/python3 +# phasenn_test11.py +# +# David Rowe Nov 2019 + +# test10 didn't work due to n0 -> DFT sample mapping not converging +# (test9b/9c). +# +# This is another attempt using regular DSP to extract n0, then +# fitting the dispersive model after the n0 component of phase has +# been removed. + +# Combine test8 and and test9c: +# + excite a 2nd order system with a impulse train +# + pitch (Wo), pulse onset time (n0), 2nd order system parameters +# (alpha and gamma) random +# + estimate and extract n0 component of phase using regular DSP +# + Estimate dispersive part using amplitude spectra +# + Add n0 component back again + +import numpy as np +import sys +from keras.layers import Input, Dense, Concatenate +from keras import models,layers +from keras import initializers +import matplotlib.pyplot as plt +from scipy import signal +from keras import backend as K +# less verbose tensorflow .... +import os +os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' + +# custom loss function +def sparse_loss(y_true, y_pred): + mask = K.cast( K.not_equal(y_pred, 0), dtype='float32') + n = K.sum(mask) + return K.sum(K.square((y_pred - y_true)*mask))/n + +# testing custom loss function +x = Input(shape=(None,)) +y = Input(shape=(None,)) +loss_func = K.Function([x, y], [sparse_loss(x, y)]) +assert loss_func([[[1,1,1]], [[0,2,0]]]) == np.array([1]) +assert loss_func([[[0,1,0]], [[0,2,0]]]) == np.array([1]) + +# constants + +N = 80 # number of time domain samples in frame +nb_samples = 1000 +nb_batch = 32 +nb_epochs = 10 +width = 256 +pairs = 2*width +fo_min = 50 +fo_max = 400 +Fs = 8000 + +# Generate training data. + +print("Generate training data") + +# amplitude and phase at rate L +amp = np.zeros((nb_samples, width)) +phase = np.zeros((nb_samples, width)) +phase_disp = np.zeros((nb_samples, width)) + +# rate "width" phase encoded as cos,sin pairs: +phase_rect = np.zeros((nb_samples, pairs)) +amp_ = np.zeros((nb_samples, width)) + +# side information +Wo = np.zeros(nb_samples) +L = np.zeros(nb_samples, dtype=int) +n0 = np.zeros(nb_samples, dtype=int) +voiced = np.zeros(nb_samples, dtype=int) + +for i in range(nb_samples): + + # distribute fo randomly on a log scale, gives us more training + # data with low freq frames which have more harmonics and are + # harder to match + r = np.random.rand(1) + log_fo = np.log10(fo_min) + (np.log10(fo_max)-np.log10(fo_min))*r[0] + fo = fo_min + fo = 10 ** log_fo + Wo[i] = fo*2*np.pi/Fs + L[i] = int(np.floor(np.pi/Wo[i])) + # pitch period in samples + P = 2*L[i] + + r = np.random.rand(1) + voiced = r[0] > 0.5 + + # sample 2nd order IIR filter with random peak freq + + r1 = np.random.rand(2) + r2 = np.random.rand(2) + if voiced: + # choose alpha and gamma to get something like voiced speech + alpha1 = 0.05*np.pi + 0.25*np.pi*r1[0] + gamma1 = 0.9 + 0.09*r1[1] + alpha2 = alpha1 + 0.4*np.pi*r2[0] + gamma2 = 0.9 + 0.05*r2[1] + else: + alpha1 = 0.5*np.pi + 0.5*np.pi*r1[0] + gamma1 = 0.8 + 0.1*r1[1] + alpha2 = 0.5*np.pi + 0.5*np.pi*r2[0] + gamma2 = 0.8 + 0.1*r2[1] + + w1,h1 = signal.freqz(1, [1, -2*gamma1*np.cos(alpha1), gamma1*gamma1], range(1,L[i]+1)*Wo[i]) + w2,h2 = signal.freqz(1, [1, -2*gamma2*np.cos(alpha2), gamma2*gamma2], range(1,L[i]+1)*Wo[i]) + + # select n0 between 0...P-1 (it's periodic) + r = np.random.rand(1) + n0[i] = r[0]*P + e = np.exp(-1j*n0[i]*range(1,L[i]+1)*Wo[i]) + + for m in range(1,L[i]+1): + amp[i,m] = np.log10(np.abs(h1[m-1]*h2[m-1])) + if voiced: + phase[i,m] = np.angle(h1[m-1]*h2[m-1]*e[m-1]) + phase_disp[i,m] = np.angle(h1[m-1]*h2[m-1]) + else: + r = np.random.rand(1) + phase[i,m] = r[0]*2*np.pi + phase_disp[i,m] = phase[i,m] + + +# use regular DSP to estimate n0, and remove effect of linear phase + +print("estimate and remove n0") +n0_est = np.zeros((nb_samples)) +phase_n0_removed = np.zeros((nb_samples, width)) +for i in range(nb_samples): + err_min = 1E32 + P = 2*L[i] + ''' + for test_n0 in np.arange(0,P,0.25): + e = np.exp(-1j*test_n0*np.arange(1,L[i]+1)*Wo[i]) + err = np.dot(10**amp[i,1:L[i]+1], np.abs(np.exp(1j*phase[i,1:L[i]+1]) - e)**2) + if err < err_min: + err_min = err + n0_est[i] = test_n0 + print(n0[i], n0_est[i]) + ''' + r = np.random.rand(1) + n0_est[i] = n0[i] + 2*r - 1 + + # remove n0_est and set up rect training data + for m in range(1,L[i]+1): + phase_n0_removed[i,m] = np.angle(np.exp(1j*phase[i,m]) * np.conj(np.exp(-1j*n0_est[i]*Wo[i]*m))) + bin = int(np.round(m*Wo[i]*width/np.pi)); bin = min(width-1, bin) + phase_rect[i,2*bin] = np.cos(phase_n0_removed[i,m]) + phase_rect[i,2*bin+1] = np.sin(phase_n0_removed[i,m]) + amp_[i,bin] = amp[i,m] + +model = models.Sequential() +model.add(layers.Dense(pairs, activation='relu', input_dim=width)) +model.add(layers.Dense(4*pairs, activation='relu')) +model.add(layers.Dense(pairs)) +model.summary() + +from keras import optimizers +sgd = optimizers.SGD(lr=0.8, decay=1e-6, momentum=0.9, nesterov=True) +model.compile(loss=sparse_loss, optimizer=sgd) +history = model.fit(amp_, phase_rect, batch_size=nb_batch, epochs=nb_epochs) + +# measure error in angle over all samples + +phase_rect_est = model.predict(amp_) +phase_est = np.zeros((nb_samples, width)) +used_bins = np.zeros((nb_samples, width), dtype=int) +for i in range(nb_samples): + for m in range(1,L[i]+1): + bin = int(np.round(m*Wo[i]*width/np.pi)); bin = min(width-1, bin) + phase_est[i,m] = np.angle(phase_rect_est[i,2*bin] + 1j*phase_rect_est[i,2*bin+1]) + used_bins[i,m] = 1 + +ind = np.nonzero(used_bins) +c1 = np.exp(1j*phase_disp[ind]); c2 = np.exp(1j*phase_est[ind]); +err_angle = np.angle(c1 * np.conj(c2)) +var = np.var(err_angle) +std = np.std(err_angle) +print("angle var: %4.2f std: %4.2f rads" % (var,std)) +print("angle var: %4.2f std: %4.2f degs" % ((std*180/np.pi)**2,std*180/np.pi)) + +plot_en = 1; +if plot_en: + plt.figure(1) + plt.plot(history.history['loss']) + plt.title('model loss') + plt.xlabel('epoch') + plt.show(block=False) + + plt.figure(2) + plt.subplot(211) + plt.hist(err_angle*180/np.pi, bins=20) + plt.title('phase angle error (deg) and fo (Hz)') + plt.subplot(212) + plt.hist(Wo*(Fs/2)/np.pi, bins=20) + plt.show(block=False) + + plt.figure(3) + plt.title('filter amplitudes') + for r in range(12): + plt.subplot(3,4,r+1) + plt.plot(amp[r,:L[r]],'g') + plt.show(block=False) + + plt.figure(4) + plt.title('sample vectors and error') + for r in range(12): + plt.subplot(3,4,r+1) + plt.plot(phase_disp[r,:L[r]]*180/np.pi,'g') + plt.plot(phase_n0_removed[r,:L[r]]*180/np.pi,'r') + plt.plot(phase_est[r,:L[r]]*180/np.pi,'b') + plt.ylim(-180,180) + plt.show(block=False) + + # click on last figure to close all and finish + plt.waitforbuttonpress(0) + plt.close()