phasenn/phasenn_test11.py

224 lines
7.0 KiB
Python
Executable File

#!/usr/bin/python3
# phasenn_test11.py
#
# David Rowe Nov 2019
# test10 didn't work due to n0 -> DFT sample mapping not converging
# (test9b/9c).
#
# This is another attempt using regular DSP to extract n0, then
# fitting the dispersive model after the n0 component of phase has
# been removed.
# Combine test8 and and test9c:
# + excite a 2nd order system with a impulse train
# + pitch (Wo), pulse onset time (n0), 2nd order system parameters
# (alpha and gamma) random
# + estimate and extract n0 component of phase using regular DSP
# + Estimate dispersive part using amplitude spectra
# + Add n0 component back again
import numpy as np
import sys
from keras.layers import Input, Dense, Concatenate
from keras import models,layers
from keras import initializers
import matplotlib.pyplot as plt
from scipy import signal
from keras import backend as K
# less verbose tensorflow ....
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
# custom loss function
def sparse_loss(y_true, y_pred):
mask = K.cast( K.not_equal(y_pred, 0), dtype='float32')
n = K.sum(mask)
return K.sum(K.square((y_pred - y_true)*mask))/n
# testing custom loss function
x = Input(shape=(None,))
y = Input(shape=(None,))
loss_func = K.Function([x, y], [sparse_loss(x, y)])
assert loss_func([[[1,1,1]], [[0,2,0]]]) == np.array([1])
assert loss_func([[[0,1,0]], [[0,2,0]]]) == np.array([1])
# constants
N = 80 # number of time domain samples in frame
nb_samples = 100000
nb_batch = 32
nb_epochs = 100
width = 256
pairs = 2*width
fo_min = 50
fo_max = 400
Fs = 8000
# Generate training data.
print("Generate training data")
# amplitude and phase at rate L
amp = np.zeros((nb_samples, width))
phase = np.zeros((nb_samples, width))
phase_disp = np.zeros((nb_samples, width))
# rate "width" phase encoded as cos,sin pairs:
phase_rect = np.zeros((nb_samples, pairs))
amp_ = np.zeros((nb_samples, width))
# side information
Wo = np.zeros(nb_samples)
L = np.zeros(nb_samples, dtype=int)
n0 = np.zeros(nb_samples, dtype=int)
voiced = np.zeros(nb_samples, dtype=int)
for i in range(nb_samples):
# distribute fo randomly on a log scale, gives us more training
# data with low freq frames which have more harmonics and are
# harder to match
r = np.random.rand(1)
log_fo = np.log10(fo_min) + (np.log10(fo_max)-np.log10(fo_min))*r[0]
fo = fo_min
fo = 10 ** log_fo
Wo[i] = fo*2*np.pi/Fs
L[i] = int(np.floor(np.pi/Wo[i]))
# pitch period in samples
P = 2*L[i]
r = np.random.rand(1)
voiced[i] = r[0] > 0.5
# sample 2nd order IIR filter with random peak freq
r1 = np.random.rand(2)
r2 = np.random.rand(2)
if voiced[i]:
# choose alpha and gamma to get something like voiced speech
alpha1 = 0.05*np.pi + 0.25*np.pi*r1[0]
gamma1 = 0.9 + 0.09*r1[1]
alpha2 = alpha1 + 0.4*np.pi*r2[0]
gamma2 = 0.9 + 0.05*r2[1]
else:
alpha1 = 0.5*np.pi + 0.4*np.pi*r1[0]
gamma1 = 0.8 + 0.1*r1[1]
alpha2 = 0.5*np.pi + 0.4*np.pi*r2[0]
gamma2 = 0.8 + 0.1*r2[1]
w1,h1 = signal.freqz(1, [1, -2*gamma1*np.cos(alpha1), gamma1*gamma1], range(1,L[i]+1)*Wo[i])
w2,h2 = signal.freqz(1, [1, -2*gamma2*np.cos(alpha2), gamma2*gamma2], range(1,L[i]+1)*Wo[i])
# select n0 between 0...P-1 (it's periodic)
r = np.random.rand(1)
n0[i] = r[0]*P
e = np.exp(-1j*n0[i]*range(1,L[i]+1)*Wo[i])
for m in range(1,L[i]+1):
amp[i,m] = np.log10(np.abs(h1[m-1]*h2[m-1]))
if voiced[i]:
phase[i,m] = np.angle(h1[m-1]*h2[m-1]*e[m-1])
phase_disp[i,m] = np.angle(h1[m-1]*h2[m-1])
else:
r = np.random.rand(1)
phase[i,m] = r[0]*2*np.pi
phase_disp[i,m] = phase[i,m]
# use regular DSP to estimate n0, and remove effect of linear phase
print("estimate and remove n0")
n0_est = np.zeros((nb_samples))
phase_n0_removed = np.zeros((nb_samples, width))
for i in range(nb_samples):
err_min = 1E32
P = 2*L[i]
'''
for test_n0 in np.arange(0,P,0.25):
e = np.exp(-1j*test_n0*np.arange(1,L[i]+1)*Wo[i])
err = np.dot(10**amp[i,1:L[i]+1], np.abs(np.exp(1j*phase[i,1:L[i]+1]) - e)**2)
if err < err_min:
err_min = err
n0_est[i] = test_n0
print(n0[i], n0_est[i])
'''
r = np.random.rand(1)
n0_est[i] = n0[i] + 2*r - 1
# remove n0_est and set up rect training data
for m in range(1,L[i]+1):
phase_n0_removed[i,m] = np.angle(np.exp(1j*phase[i,m]) * np.conj(np.exp(-1j*n0_est[i]*Wo[i]*m)))
bin = int(np.round(m*Wo[i]*width/np.pi)); bin = min(width-1, bin)
phase_rect[i,2*bin] = np.cos(phase_n0_removed[i,m])
phase_rect[i,2*bin+1] = np.sin(phase_n0_removed[i,m])
amp_[i,bin] = amp[i,m]
model = models.Sequential()
model.add(layers.Dense(pairs, activation='relu', input_dim=width))
model.add(layers.Dense(4*pairs, activation='relu'))
model.add(layers.Dense(pairs))
model.summary()
from keras import optimizers
sgd = optimizers.SGD(lr=0.8, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss=sparse_loss, optimizer=sgd)
history = model.fit(amp_, phase_rect, batch_size=nb_batch, epochs=nb_epochs)
# measure error in angle over all samples
phase_rect_est = model.predict(amp_)
phase_est = np.zeros((nb_samples, width))
used_bins = np.zeros((nb_samples, width), dtype=int)
for i in range(nb_samples):
for m in range(1,L[i]+1):
bin = int(np.round(m*Wo[i]*width/np.pi)); bin = min(width-1, bin)
phase_est[i,m] = np.angle(phase_rect_est[i,2*bin] + 1j*phase_rect_est[i,2*bin+1])
used_bins[i,m] = 1
ind = np.nonzero(used_bins)
c1 = np.exp(1j*phase_disp[ind]); c2 = np.exp(1j*phase_est[ind]);
err_angle = np.angle(c1 * np.conj(c2))
var = np.var(err_angle)
std = np.std(err_angle)
print("angle var: %4.2f std: %4.2f rads" % (var,std))
print("angle var: %4.2f std: %4.2f degs" % ((std*180/np.pi)**2,std*180/np.pi))
plot_en = 1;
if plot_en:
plt.figure(1)
plt.plot(history.history['loss'])
plt.title('model loss')
plt.xlabel('epoch')
plt.show(block=False)
plt.figure(2)
plt.subplot(211)
plt.hist(err_angle*180/np.pi, bins=20)
plt.title('phase angle error (deg) and fo (Hz)')
plt.subplot(212)
plt.hist(Wo*(Fs/2)/np.pi, bins=20)
plt.show(block=False)
plt.figure(3)
plt.title('filter amplitudes')
for r in range(12):
plt.subplot(3,4,r+1)
plt.plot(amp[r,:L[r]],'g')
plt.show(block=False)
plt.figure(4)
plt.title('sample vectors and error')
for r in range(12):
plt.subplot(3,4,r+1)
plt.plot(phase_disp[r,:L[r]]*180/np.pi,'g')
if voiced[r]:
plt.plot(phase_n0_removed[r,:L[r]]*180/np.pi,'r')
plt.plot(phase_est[r,:L[r]]*180/np.pi,'b')
plt.ylim(-180,180)
plt.show(block=False)
# click on last figure to close all and finish
plt.waitforbuttonpress(0)
plt.close()