mirror of https://github.com/drowe67/phasenn.git
training on just high energy V frames, didn't improve randomness of UV speech
parent
837de6367c
commit
a2832ecb6a
|
@ -21,20 +21,20 @@ codec2_model = construct.Struct(
|
||||||
"voiced" / construct.Int32sl
|
"voiced" / construct.Int32sl
|
||||||
)
|
)
|
||||||
|
|
||||||
def read(filename):
|
def read(filename, max_nb_samples):
|
||||||
|
|
||||||
# Determine number of records in file, not very Pythonic I know :-)
|
# Determine number of records in file, not very Pythonic I know :-)
|
||||||
|
|
||||||
nb_samples = 0
|
nb_samples = 0
|
||||||
with open(filename, 'rb') as f:
|
with open(filename, 'rb') as f:
|
||||||
while True:
|
while True and (nb_samples < max_nb_samples):
|
||||||
try:
|
try:
|
||||||
model = codec2_model.parse_stream(f)
|
model = codec2_model.parse_stream(f)
|
||||||
nb_samples += 1
|
nb_samples += 1
|
||||||
except:
|
except:
|
||||||
f.close()
|
f.close()
|
||||||
break
|
break
|
||||||
|
|
||||||
Wo = np.zeros(nb_samples)
|
Wo = np.zeros(nb_samples)
|
||||||
L = np.zeros(nb_samples, dtype=int)
|
L = np.zeros(nb_samples, dtype=int)
|
||||||
A = np.zeros((nb_samples, width))
|
A = np.zeros((nb_samples, width))
|
||||||
|
|
|
@ -8,6 +8,7 @@
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import sys
|
import sys
|
||||||
|
from mpl_toolkits.mplot3d import axes3d
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
from scipy import signal
|
from scipy import signal
|
||||||
import codec2_model
|
import codec2_model
|
||||||
|
@ -35,8 +36,9 @@ def list_str(values):
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description='Train a NN to model Codec 2 phases')
|
parser = argparse.ArgumentParser(description='Train a NN to model Codec 2 phases')
|
||||||
parser.add_argument('modelfile', help='Codec 2 model file with linear phase removed')
|
parser.add_argument('modelfile', help='Codec 2 model file with linear phase removed')
|
||||||
|
parser.add_argument('--nb_samples', type=int, default=1000000, help='Number of frames to train on')
|
||||||
parser.add_argument('--frames', type=list_str, default="30,31,32,33,34,35", help='Frames to view')
|
parser.add_argument('--frames', type=list_str, default="30,31,32,33,34,35", help='Frames to view')
|
||||||
parser.add_argument('--epochs', type=int, default=100, help='Number of training epochs')
|
parser.add_argument('--epochs', type=int, default=10, help='Number of training epochs')
|
||||||
parser.add_argument('--nnout', type=str, default="phasenn.h5", help='Name of output Codec 2 model file')
|
parser.add_argument('--nnout', type=str, default="phasenn.h5", help='Name of output Codec 2 model file')
|
||||||
parser.add_argument('--plotunvoiced', action='store_true', help='plot unvoiced frames')
|
parser.add_argument('--plotunvoiced', action='store_true', help='plot unvoiced frames')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
@ -44,9 +46,20 @@ args = parser.parse_args()
|
||||||
assert nb_plots == len(args.frames)
|
assert nb_plots == len(args.frames)
|
||||||
|
|
||||||
# read in model file records
|
# read in model file records
|
||||||
Wo, L, A, phase, voiced = codec2_model.read(args.modelfile)
|
Wo, L, A, phase, voiced = codec2_model.read(args.modelfile, args.nb_samples)
|
||||||
nb_samples = Wo.size;
|
nb_samples = Wo.size;
|
||||||
print("nb_samples: %d" % (nb_samples))
|
nb_voiced = np.count_nonzero(voiced)
|
||||||
|
print("nb_samples: %d voiced %d" % (nb_samples, nb_voiced))
|
||||||
|
|
||||||
|
# work out average energy for each frame (in dB)
|
||||||
|
energy_thresh = 10
|
||||||
|
energy = np.zeros(nb_samples)
|
||||||
|
nb_train = 0
|
||||||
|
for i in range(nb_samples):
|
||||||
|
energy[i] = np.mean(20*np.log10(A[i,1:L[i]+1]))
|
||||||
|
if (energy[i] > energy_thresh) and voiced[i]:
|
||||||
|
nb_train += 1
|
||||||
|
print("energy mean: %4.2f thresh: %4.2f nb_train: %d" % (np.mean(energy),energy_thresh, nb_train))
|
||||||
|
|
||||||
# set up sparse vectors, phase represented by cos(), sin() pairs
|
# set up sparse vectors, phase represented by cos(), sin() pairs
|
||||||
amp = np.zeros((nb_samples, width))
|
amp = np.zeros((nb_samples, width))
|
||||||
|
@ -55,13 +68,19 @@ for i in range(nb_samples):
|
||||||
for m in range(1,L[i]+1):
|
for m in range(1,L[i]+1):
|
||||||
bin = int(np.round(m*Wo[i]*width/np.pi)); bin = min(width-1, bin)
|
bin = int(np.round(m*Wo[i]*width/np.pi)); bin = min(width-1, bin)
|
||||||
amp[i,bin] = np.log10(A[i,m])
|
amp[i,bin] = np.log10(A[i,m])
|
||||||
#phase_rect[i,2*bin] = np.max((1,amp[i,bin]))*np.cos(phase[i,m])
|
|
||||||
#phase_rect[i,2*bin+1] = np.max((1,amp[i,bin]))*np.sin(phase[i,m])
|
|
||||||
#phase_rect[i,2*bin] = amp[i,bin]*np.cos(phase[i,m])
|
|
||||||
#phase_rect[i,2*bin+1] = amp[i,bin]*np.sin(phase[i,m])
|
|
||||||
phase_rect[i,2*bin] = np.cos(phase[i,m])
|
phase_rect[i,2*bin] = np.cos(phase[i,m])
|
||||||
phase_rect[i,2*bin+1] = np.sin(phase[i,m])
|
phase_rect[i,2*bin+1] = np.sin(phase[i,m])
|
||||||
|
|
||||||
|
# extract voiced frames above enregy threshold for training
|
||||||
|
amp_train = np.zeros((nb_train, width))
|
||||||
|
phase_train_rect = np.zeros((nb_train, pairs))
|
||||||
|
j = 0
|
||||||
|
for i in range(nb_samples):
|
||||||
|
if (energy[i] > energy_thresh) and voiced[i]:
|
||||||
|
amp_train[j,:] = amp[i,:]
|
||||||
|
phase_train_rect[j,:] = phase_rect[i,:]
|
||||||
|
j += 1
|
||||||
|
|
||||||
# our model
|
# our model
|
||||||
model = models.Sequential()
|
model = models.Sequential()
|
||||||
model.add(layers.Dense(pairs, activation='relu', input_dim=width))
|
model.add(layers.Dense(pairs, activation='relu', input_dim=width))
|
||||||
|
@ -90,7 +109,9 @@ assert loss_func([[[0,1,0]], [[0,2,0]]]) == np.array([1])
|
||||||
from keras import optimizers
|
from keras import optimizers
|
||||||
sgd = optimizers.SGD(lr=0.05, decay=1e-6, momentum=0.9, nesterov=True)
|
sgd = optimizers.SGD(lr=0.05, decay=1e-6, momentum=0.9, nesterov=True)
|
||||||
model.compile(loss=sparse_loss, optimizer=sgd)
|
model.compile(loss=sparse_loss, optimizer=sgd)
|
||||||
history = model.fit(amp, phase_rect, batch_size=nb_batch, epochs=args.epochs, validation_split=0.1)
|
|
||||||
|
# training propper with real phase data
|
||||||
|
history = model.fit(amp_train, phase_train_rect, batch_size=nb_batch, epochs=args.epochs, validation_split=0.1)
|
||||||
model.save(args.nnout)
|
model.save(args.nnout)
|
||||||
|
|
||||||
# measure error in angle over all samples
|
# measure error in angle over all samples
|
||||||
|
|
1
train.sh
1
train.sh
|
@ -17,3 +17,4 @@ else
|
||||||
c2sim $speech --modelout - | est_n0 -r > $base'_nolinear.model'
|
c2sim $speech --modelout - | est_n0 -r > $base'_nolinear.model'
|
||||||
fi
|
fi
|
||||||
./phasenn_train.py $base'_nolinear.model' --frames 1572,1908,6792,9600,24536,25116 --epochs 10
|
./phasenn_train.py $base'_nolinear.model' --frames 1572,1908,6792,9600,24536,25116 --epochs 10
|
||||||
|
#./phasenn_train.py $base'_nolinear.model' --epochs 10
|
||||||
|
|
Loading…
Reference in New Issue