diff --git a/phasenn_train.py b/phasenn_train.py
index 4e16dfb..9787386 100755
--- a/phasenn_train.py
+++ b/phasenn_train.py
@@ -56,8 +56,10 @@ for i in range(nb_samples):
         amp[i,bin] = np.log10(A[i,m])
         #phase_rect[i,2*bin]   = np.max((1,amp[i,bin]))*np.cos(phase[i,m])
         #phase_rect[i,2*bin+1] = np.max((1,amp[i,bin]))*np.sin(phase[i,m])
-        phase_rect[i,2*bin]   = amp[i,bin]*np.cos(phase[i,m])
-        phase_rect[i,2*bin+1] = amp[i,bin]*np.sin(phase[i,m])
+        #phase_rect[i,2*bin]   = amp[i,bin]*np.cos(phase[i,m])
+        #phase_rect[i,2*bin+1] = amp[i,bin]*np.sin(phase[i,m])
+        phase_rect[i,2*bin]   = np.cos(phase[i,m])
+        phase_rect[i,2*bin+1] = np.sin(phase[i,m])
     
 # our model
 model = models.Sequential()
@@ -66,24 +68,28 @@ model.add(layers.Dense(4*pairs, activation='relu'))
 model.add(layers.Dense(pairs))
 model.summary()
 
-# custom loss function
+# custom loss function - we only care about (cos,sin) outputs at the
+# non-zero positions in the sparse y_true vector.  To avoid driving the
+# other samples to 0 we use a sparse loss function.  The normalisation
+# term accounts for the time varying number of no-zero samples.
 def sparse_loss(y_true, y_pred):
-    mask = K.cast( K.not_equal(y_pred, 0), dtype='float32')
+    mask = K.cast( K.not_equal(y_true, 0), dtype='float32')
     n = K.sum(mask)
     return K.sum(K.square((y_pred - y_true)*mask))/n
 
 # testing custom loss function
-x = Input(shape=(None,))
-y = Input(shape=(None,))
-loss_func = K.Function([x, y], [sparse_loss(x, y)])
-assert loss_func([[[1,1,1]], [[0,2,0]]]) == np.array([1])
+y_true = Input(shape=(None,))
+y_pred = Input(shape=(None,))
+loss_func = K.Function([y_true, y_pred], [sparse_loss(y_true, y_pred)])
+assert loss_func([[[0,1,0]], [[2,2,2]]]) == np.array([1])
+assert loss_func([[[1,1,0]], [[3,2,2]]]) == np.array([2.5])
 assert loss_func([[[0,1,0]], [[0,2,0]]]) == np.array([1])
 
 # fit the model
 from keras import optimizers
-sgd = optimizers.SGD(lr=0.8, decay=1e-6, momentum=0.9, nesterov=True)
+sgd = optimizers.SGD(lr=0.05, decay=1e-6, momentum=0.9, nesterov=True)
 model.compile(loss=sparse_loss, optimizer=sgd)
-history = model.fit(amp, phase_rect, batch_size=nb_batch, epochs=args.epochs)
+history = model.fit(amp, phase_rect, batch_size=nb_batch, epochs=args.epochs, validation_split=0.1)
 model.save(args.nnout)
 
 # measure error in angle over all samples
@@ -113,11 +119,14 @@ def sample_time(r, phase):
         s = s + A[r,m]*np.cos(m*Wo[r]*range(-N,N) + phase[r,m])
     return s
 
-nb_plotsy = np.floor(np.sqrt(nb_plots)); nb_plotsx=nb_plots/nb_plotsy;
 frames = np.array(args.frames,dtype=int)
+nb_plots = frames.size
+nb_plotsy = np.floor(np.sqrt(nb_plots)); nb_plotsx=nb_plots/nb_plotsy;
 
 plt.figure(1)
 plt.plot(history.history['loss'])
+plt.plot(history.history['val_loss'])
+plt.legend(['train', 'valid'], loc='upper right')
 plt.title('model loss')
 plt.xlabel('epoch')
 plt.show(block=False)
diff --git a/train.sh b/train.sh
index 1c3a63d..1842bbe 100755
--- a/train.sh
+++ b/train.sh
@@ -4,9 +4,16 @@
 
 PATH=$PATH:~/codec2/build_linux/src:~/codec2/build_linux/misc
 
-speech=~/Downloads/train_8k.sw
+if [ "$#" -eq 0 ]; then
+    echo "usage: ./train.sh rawFile [secs]"
+fi
+    
+speech=$1
 x=$(basename $speech)
 base="${x%.*}"
-
-sox -t .sw -r 8000 -c 1 $speech -t .sw - trim 0 6000 | c2sim - --modelout - | est_n0 -r > $base'_nolinear.model'
-./phasenn_train.py $base'_nolinear.model' --frames 1572,1908,6792,9600,24536,25116  --epochs 200
+if [ "$#" -eq 2 ]; then
+    sox -t .sw -r 8000 -c 1 $speech -t .sw - trim 0 $2 | c2sim - --modelout - | est_n0 -r > $base'_nolinear.model'
+else
+    c2sim $speech --modelout - | est_n0 -r > $base'_nolinear.model'
+fi
+./phasenn_train.py $base'_nolinear.model' --frames 1572,1908,6792,9600,24536,25116  --epochs 10