work in progress combination of n0 est and dispersive part

2019-11-22 12:43:59 +10:30 · 2019-11-22 12:43:59 +10:30 · a82226f9da
parent 371d73d4d4
commit a82226f9da
1 changed files with 190 additions and 0 deletions
--- a/phasenn_test10.py
+++ b/phasenn_test10.py
@ -0,0 +1,190 @@
+#!/usr/bin/python3
+# phasenn_test8.py
+#
+# David Rowe Oct 2019
+
+# Combine test8 and and test9:
+#   + excite a 2nd order system with a impulse train
+#   + pitch (Wo), pulse onset time (n0), 2nd order system parameters
+#     (alpha and gamma) random
+#   + Estimate phase spectra using the amplitude spectra and (previous) frames
+#     phase spectra to extract n0.
+#   + Note in this test the input phase spectra is actually the correct
+#     output - but we constrict the information flowing through this part of the
+#     network to ensure just n0 passes through.  Future work: it should also
+#     work with other input phase spectra with the same n0
+
+import numpy as np
+import sys
+from keras.layers import Input, Dense, Concatenate
+from keras import Model
+from keras import initializers
+import matplotlib.pyplot as plt
+from scipy import signal
+from keras import backend as K
+
+# custom loss function
+def sparse_loss(y_true, y_pred):
+    mask = K.cast( K.not_equal(y_pred, 0), dtype='float32')
+    n = K.sum(mask)
+    return K.sum(K.square((y_pred - y_true)*mask))/n
+
+# testing custom loss function
+x = Input(shape=(None,))
+y = Input(shape=(None,))
+loss_func = K.Function([x, y], [sparse_loss(x, y)])
+assert loss_func([[[1,1,1]], [[0,2,0]]]) == np.array([1])
+assert loss_func([[[0,1,0]], [[0,2,0]]]) == np.array([1])
+
+# constants
+
+N                 = 80      # number of time domain samples in frame
+nb_samples        = 400000
+nb_batch          = 32
+nb_epochs         = 10
+width             = 256
+pairs             = 2*width
+fo_min            = 50
+fo_max            = 400
+Fs                = 8000
+
+# Generate training data.
+
+amp = np.zeros((nb_samples, width))
+# phase as an angle
+phase = np.zeros((nb_samples, width))
+# phase encoded as cos,sin pairs:
+phase_rect = np.zeros((nb_samples, pairs))
+Wo = np.zeros(nb_samples)
+L = np.zeros(nb_samples, dtype=int)
+n0 = np.zeros(nb_samples, dtype=int)
+
+for i in range(nb_samples):
+
+    # distribute fo randomly on a log scale, gives us more training
+    # data with low freq frames which have more harmonics and are
+    # harder to match
+    r = np.random.rand(1)
+    log_fo = np.log10(fo_min) + (np.log10(fo_max)-np.log10(fo_min))*r[0]
+    fo = fo_min
+    fo = 10 ** log_fo
+    Wo[i] = fo*2*np.pi/Fs
+    L[i] = int(np.floor(np.pi/Wo[i]))
+    # pitch period in samples
+    P = 2*L[i]
+ 
+    r = np.random.rand(3)
+    
+    # sample 2nd order IIR filter with random peak freq, choose alpha
+    # and gamma to get something like voiced speech
+    alpha = 0.1*np.pi + 0.4*np.pi*r[0]
+    gamma = 0.9 + 0.09*r[1]
+    w,h = signal.freqz(1, [1, -2*gamma*np.cos(alpha), gamma*gamma], range(1,L[i])*Wo[i])
+
+    # select n0 between 0...P-1 (it's periodic)
+    n0[i] = r[2]*10
+    e = np.exp(-1j*n0[i]*range(1,L[i])*Wo[i])
+
+    for m in range(1,L[i]):
+        bin = int(np.round(m*Wo[i]*width/np.pi))
+        mWo = bin*np.pi/width
+        
+        amp[i,bin] = np.log10(abs(h[m-1]))
+        phase[i,bin] = np.angle(h[m-1]*e[m-1])
+        phase_rect[i,2*bin]   = np.cos(phase[i,bin])
+        phase_rect[i,2*bin+1] = np.sin(phase[i,bin])
+
+# estimate n0 from input phases in this part of network
+phase_input = Input(shape=(pairs,), name='phase_input')
+y = Dense(pairs, activation='relu')(phase_input)
+y = Dense(128, activation='relu')(y)
+y = Dense(1)(y)
+
+# estimate dispersive part of phase from amplitudes in this part of network
+amp_input = Input(shape=(width,), name='amp_input')
+x = Dense(pairs, activation='relu')(amp_input)
+x = Dense(4*pairs, activation='relu')(x)
+x = Dense(pairs)(x)
+
+# combine in final stage, should be some sort of freq dep rotation, function of n0
+z = Concatenate()([y,x])
+output = Dense(pairs, name='main_output')(z)
+                             
+model = Model(inputs=[phase_input, amp_input], outputs=[output])
+model.summary()
+
+from keras import optimizers
+sgd = optimizers.SGD(lr=0.08, decay=1e-6, momentum=0.9, nesterov=True)
+model.compile(loss=sparse_loss, optimizer=sgd)
+history = model.fit([phase_rect, amp], phase_rect, batch_size=nb_batch, epochs=nb_epochs)
+
+# measure error in rectangular coordinates over all samples
+
+phase_rect_est = model.predict([phase_rect, amp])
+ind = np.nonzero(phase_rect)
+err = (phase_rect[ind] - phase_rect_est[ind])
+var = np.var(err)
+std = np.std(err)
+print("rect var: %f std: %f" % (var,std))
+
+c1 = phase_rect[ind]; c1 = c1[::2] + 1j*c1[1::2]
+c2 = phase_rect_est[ind]; c2 = c2[::2] + 1j*c2[1::2]
+err_angle = np.angle(c1 * np.conj(c2))
+
+var = np.var(err_angle)
+std = np.std(err_angle)
+print("angle var: %4.2f std: %4.2f rads" % (var,std))
+print("angle var: %4.2f std: %4.2f degs" % (var*180/np.pi,std*180/np.pi))
+
+def sample_model(r):
+    phase_L = np.zeros(width, dtype=complex)
+    phase_L_est = np.zeros(width, dtype=complex)
+    phase_L_err = np.zeros(width, dtype=complex)
+    amp_L = np.zeros(width)
+    
+    for m in range(1,L[r]):
+        wm = m*Wo[r]
+        bin = int(np.round(wm*width/np.pi))
+        phase_L[m] = phase_rect[r,2*bin] + 1j*phase_rect[r,2*bin+1]
+        phase_L_est[m] = phase_rect_est[r,2*bin] + 1j*phase_rect_est[r,2*bin+1]
+        phase_L_err[m] = phase_L[m] * np.conj(phase_L_est[m])
+        amp_L[m] = amp[r,bin]
+    return phase_L, phase_L_err, amp_L
+    
+plot_en = 1;
+if plot_en:
+    plt.figure(1)
+    plt.plot(history.history['loss'])
+    plt.title('model loss')
+    plt.xlabel('epoch')
+    plt.show(block=False)
+ 
+    plt.figure(2)
+    plt.subplot(211)
+    plt.hist(err_angle*180/np.pi, bins=20)
+    plt.subplot(212)
+    plt.hist(Wo*(Fs/2)/np.pi, bins=20)
+    plt.title('phase angle error (deg) and fo (Hz)')
+    plt.show(block=False)
+
+    plt.figure(3)
+    plt.title('sample vectors and error')
+    for r in range(12):
+        plt.subplot(3,4,r+1)
+        phase, phase_err, amp_filt = sample_model(r)    
+        plt.plot(np.angle(phase[1:L[r]])*180/np.pi,'g')
+        plt.plot(np.angle(phase_err[1:L[r]])*180/np.pi,'r')
+        plt.ylim(-180,180)
+    plt.show(block=False)
+
+    plt.figure(4)
+    plt.title('filter amplitudes')
+    for r in range(12):
+        plt.subplot(3,4,r+1)
+        phase, phase_err, amp_filt = sample_model(r)    
+        plt.plot(amp_filt[1:L[r]],'g')
+    plt.show(block=False)
+    
+    # click on last figure to close all and finish
+    plt.waitforbuttonpress(0)
+    plt.close()