diff --git a/phasenn_test10.py b/phasenn_test10.py new file mode 100755 index 0000000..94ac3c6 --- /dev/null +++ b/phasenn_test10.py @@ -0,0 +1,190 @@ +#!/usr/bin/python3 +# phasenn_test8.py +# +# David Rowe Oct 2019 + +# Combine test8 and and test9: +# + excite a 2nd order system with a impulse train +# + pitch (Wo), pulse onset time (n0), 2nd order system parameters +# (alpha and gamma) random +# + Estimate phase spectra using the amplitude spectra and (previous) frames +# phase spectra to extract n0. +# + Note in this test the input phase spectra is actually the correct +# output - but we constrict the information flowing through this part of the +# network to ensure just n0 passes through. Future work: it should also +# work with other input phase spectra with the same n0 + +import numpy as np +import sys +from keras.layers import Input, Dense, Concatenate +from keras import Model +from keras import initializers +import matplotlib.pyplot as plt +from scipy import signal +from keras import backend as K + +# custom loss function +def sparse_loss(y_true, y_pred): + mask = K.cast( K.not_equal(y_pred, 0), dtype='float32') + n = K.sum(mask) + return K.sum(K.square((y_pred - y_true)*mask))/n + +# testing custom loss function +x = Input(shape=(None,)) +y = Input(shape=(None,)) +loss_func = K.Function([x, y], [sparse_loss(x, y)]) +assert loss_func([[[1,1,1]], [[0,2,0]]]) == np.array([1]) +assert loss_func([[[0,1,0]], [[0,2,0]]]) == np.array([1]) + +# constants + +N = 80 # number of time domain samples in frame +nb_samples = 400000 +nb_batch = 32 +nb_epochs = 10 +width = 256 +pairs = 2*width +fo_min = 50 +fo_max = 400 +Fs = 8000 + +# Generate training data. + +amp = np.zeros((nb_samples, width)) +# phase as an angle +phase = np.zeros((nb_samples, width)) +# phase encoded as cos,sin pairs: +phase_rect = np.zeros((nb_samples, pairs)) +Wo = np.zeros(nb_samples) +L = np.zeros(nb_samples, dtype=int) +n0 = np.zeros(nb_samples, dtype=int) + +for i in range(nb_samples): + + # distribute fo randomly on a log scale, gives us more training + # data with low freq frames which have more harmonics and are + # harder to match + r = np.random.rand(1) + log_fo = np.log10(fo_min) + (np.log10(fo_max)-np.log10(fo_min))*r[0] + fo = fo_min + fo = 10 ** log_fo + Wo[i] = fo*2*np.pi/Fs + L[i] = int(np.floor(np.pi/Wo[i])) + # pitch period in samples + P = 2*L[i] + + r = np.random.rand(3) + + # sample 2nd order IIR filter with random peak freq, choose alpha + # and gamma to get something like voiced speech + alpha = 0.1*np.pi + 0.4*np.pi*r[0] + gamma = 0.9 + 0.09*r[1] + w,h = signal.freqz(1, [1, -2*gamma*np.cos(alpha), gamma*gamma], range(1,L[i])*Wo[i]) + + # select n0 between 0...P-1 (it's periodic) + n0[i] = r[2]*10 + e = np.exp(-1j*n0[i]*range(1,L[i])*Wo[i]) + + for m in range(1,L[i]): + bin = int(np.round(m*Wo[i]*width/np.pi)) + mWo = bin*np.pi/width + + amp[i,bin] = np.log10(abs(h[m-1])) + phase[i,bin] = np.angle(h[m-1]*e[m-1]) + phase_rect[i,2*bin] = np.cos(phase[i,bin]) + phase_rect[i,2*bin+1] = np.sin(phase[i,bin]) + +# estimate n0 from input phases in this part of network +phase_input = Input(shape=(pairs,), name='phase_input') +y = Dense(pairs, activation='relu')(phase_input) +y = Dense(128, activation='relu')(y) +y = Dense(1)(y) + +# estimate dispersive part of phase from amplitudes in this part of network +amp_input = Input(shape=(width,), name='amp_input') +x = Dense(pairs, activation='relu')(amp_input) +x = Dense(4*pairs, activation='relu')(x) +x = Dense(pairs)(x) + +# combine in final stage, should be some sort of freq dep rotation, function of n0 +z = Concatenate()([y,x]) +output = Dense(pairs, name='main_output')(z) + +model = Model(inputs=[phase_input, amp_input], outputs=[output]) +model.summary() + +from keras import optimizers +sgd = optimizers.SGD(lr=0.08, decay=1e-6, momentum=0.9, nesterov=True) +model.compile(loss=sparse_loss, optimizer=sgd) +history = model.fit([phase_rect, amp], phase_rect, batch_size=nb_batch, epochs=nb_epochs) + +# measure error in rectangular coordinates over all samples + +phase_rect_est = model.predict([phase_rect, amp]) +ind = np.nonzero(phase_rect) +err = (phase_rect[ind] - phase_rect_est[ind]) +var = np.var(err) +std = np.std(err) +print("rect var: %f std: %f" % (var,std)) + +c1 = phase_rect[ind]; c1 = c1[::2] + 1j*c1[1::2] +c2 = phase_rect_est[ind]; c2 = c2[::2] + 1j*c2[1::2] +err_angle = np.angle(c1 * np.conj(c2)) + +var = np.var(err_angle) +std = np.std(err_angle) +print("angle var: %4.2f std: %4.2f rads" % (var,std)) +print("angle var: %4.2f std: %4.2f degs" % (var*180/np.pi,std*180/np.pi)) + +def sample_model(r): + phase_L = np.zeros(width, dtype=complex) + phase_L_est = np.zeros(width, dtype=complex) + phase_L_err = np.zeros(width, dtype=complex) + amp_L = np.zeros(width) + + for m in range(1,L[r]): + wm = m*Wo[r] + bin = int(np.round(wm*width/np.pi)) + phase_L[m] = phase_rect[r,2*bin] + 1j*phase_rect[r,2*bin+1] + phase_L_est[m] = phase_rect_est[r,2*bin] + 1j*phase_rect_est[r,2*bin+1] + phase_L_err[m] = phase_L[m] * np.conj(phase_L_est[m]) + amp_L[m] = amp[r,bin] + return phase_L, phase_L_err, amp_L + +plot_en = 1; +if plot_en: + plt.figure(1) + plt.plot(history.history['loss']) + plt.title('model loss') + plt.xlabel('epoch') + plt.show(block=False) + + plt.figure(2) + plt.subplot(211) + plt.hist(err_angle*180/np.pi, bins=20) + plt.subplot(212) + plt.hist(Wo*(Fs/2)/np.pi, bins=20) + plt.title('phase angle error (deg) and fo (Hz)') + plt.show(block=False) + + plt.figure(3) + plt.title('sample vectors and error') + for r in range(12): + plt.subplot(3,4,r+1) + phase, phase_err, amp_filt = sample_model(r) + plt.plot(np.angle(phase[1:L[r]])*180/np.pi,'g') + plt.plot(np.angle(phase_err[1:L[r]])*180/np.pi,'r') + plt.ylim(-180,180) + plt.show(block=False) + + plt.figure(4) + plt.title('filter amplitudes') + for r in range(12): + plt.subplot(3,4,r+1) + phase, phase_err, amp_filt = sample_model(r) + plt.plot(amp_filt[1:L[r]],'g') + plt.show(block=False) + + # click on last figure to close all and finish + plt.waitforbuttonpress(0) + plt.close()