tests with synthetic speech to attempt to track down UV issueson real speech

2019-12-11 05:32:54 +10:30 · 2019-12-11 05:32:54 +10:30 · 837de6367c
parent c1484a76c1
commit 837de6367c
2 changed files with 17 additions and 5 deletions
--- a/gen_test_model.py
+++ b/gen_test_model.py
@ -48,7 +48,7 @@ for i in range(nb_samples):
    P = 2*L[i]
 
    r = np.random.rand(1)
-    voiced[i] = r[0] > 0.5
+    voiced[i] = r[0] > 0.1
    
    # sample 2nd order IIR filter with random peak freq

@ -60,14 +60,16 @@ for i in range(nb_samples):
        gamma1 = 0.9 + 0.09*r1[1]
        alpha2 = alpha1 + 0.4*np.pi*r2[0]
        gamma2 = 0.9 + 0.05*r2[1]
+        gain = 10
    else:
        alpha1 = 0.5*np.pi + 0.4*np.pi*r1[0]
        gamma1 = 0.8 + 0.1*r1[1]
        alpha2 = 0.5*np.pi + 0.4*np.pi*r2[0]
        gamma2 = 0.8 + 0.1*r2[1]
+        gain = 1
        
-    w1,h1 = signal.freqz(1, [1, -2*gamma1*np.cos(alpha1), gamma1*gamma1], range(1,L[i]+1)*Wo[i])
-    w2,h2 = signal.freqz(1, [1, -2*gamma2*np.cos(alpha2), gamma2*gamma2], range(1,L[i]+1)*Wo[i])
+    w1,h1 = signal.freqz(gain, [1, -2*gamma1*np.cos(alpha1), gamma1*gamma1], range(1,L[i]+1)*Wo[i])
+    w2,h2 = signal.freqz(gain, [1, -2*gamma2*np.cos(alpha2), gamma2*gamma2], range(1,L[i]+1)*Wo[i])
    
    for m in range(1,L[i]+1):
        A[i,m] = np.abs(h1[m-1]*h2[m-1])
--- a/phasenn_train.py
+++ b/phasenn_train.py
@ -38,6 +38,7 @@ parser.add_argument('modelfile', help='Codec 2 model file with linear phase remo
 parser.add_argument('--frames', type=list_str, default="30,31,32,33,34,35", help='Frames to view')
 parser.add_argument('--epochs', type=int, default=100, help='Number of training epochs')
 parser.add_argument('--nnout', type=str, default="phasenn.h5", help='Name of output Codec 2 model file')
+parser.add_argument('--plotunvoiced', action='store_true', help='plot unvoiced frames')
 args = parser.parse_args()

 assert nb_plots == len(args.frames)
@ -119,8 +120,17 @@ def sample_time(r, phase):
        s = s + A[r,m]*np.cos(m*Wo[r]*range(-N,N) + phase[r,m])
    return s

-frames = np.array(args.frames,dtype=int)
-nb_plots = frames.size
+if args.plotunvoiced:
+    # find first 6 unvoiced frames
+    nb_plots = 6
+    frames = np.zeros(nb_plots, dtype=int)
+    uv = 0
+    for i in range(nb_samples):
+        if (voiced[i] == 0) and (uv < nb_plots):
+            frames[uv] = i; uv += 1
+else:    
+    frames = np.array(args.frames,dtype=int)
+    nb_plots = frames.size
 nb_plotsy = np.floor(np.sqrt(nb_plots)); nb_plotsx=nb_plots/nb_plotsy;

 plt.figure(1)