phasenn/gen_test_model.py

83 lines
2.3 KiB
Python
Executable File

#!/usr/bin/python3
# gen_test_model.py
#
# David Rowe Dec 2019
#
# Generate some contrived pusedo-speech test data for driving phasenn_train.py
import numpy as np
import sys
import matplotlib.pyplot as plt
from scipy import signal
import codec2_model
# constants
N = 80 # number of time domain samples in frame
width = 256
nb_samples = 100000
fo_min = 50
fo_max = 400
Fs = 8000
# Generate training data.
print("Generate training data")
# amplitude and phase at rate L
A = np.zeros((nb_samples, width))
phase = np.zeros((nb_samples, width))
# side information
Wo = np.zeros(nb_samples)
L = np.zeros(nb_samples, dtype=int)
voiced = np.zeros(nb_samples, dtype=int)
for i in range(nb_samples):
# distribute fo randomly on a log scale, gives us more training
# data with low freq frames which have more harmonics and are
# harder to match
r = np.random.rand(1)
log_fo = np.log10(fo_min) + (np.log10(fo_max)-np.log10(fo_min))*r[0]
fo = fo_min
fo = 10 ** log_fo
Wo[i] = fo*2*np.pi/Fs
L[i] = int(np.floor(np.pi/Wo[i]))
# pitch period in samples
P = 2*L[i]
r = np.random.rand(1)
voiced[i] = r[0] > 0.5
# sample 2nd order IIR filter with random peak freq
r1 = np.random.rand(2)
r2 = np.random.rand(2)
if voiced[i]:
# choose alpha and gamma to get something like voiced speech
alpha1 = 0.05*np.pi + 0.25*np.pi*r1[0]
gamma1 = 0.9 + 0.09*r1[1]
alpha2 = alpha1 + 0.4*np.pi*r2[0]
gamma2 = 0.9 + 0.05*r2[1]
else:
alpha1 = 0.5*np.pi + 0.4*np.pi*r1[0]
gamma1 = 0.8 + 0.1*r1[1]
alpha2 = 0.5*np.pi + 0.4*np.pi*r2[0]
gamma2 = 0.8 + 0.1*r2[1]
w1,h1 = signal.freqz(1, [1, -2*gamma1*np.cos(alpha1), gamma1*gamma1], range(1,L[i]+1)*Wo[i])
w2,h2 = signal.freqz(1, [1, -2*gamma2*np.cos(alpha2), gamma2*gamma2], range(1,L[i]+1)*Wo[i])
for m in range(1,L[i]+1):
A[i,m] = np.abs(h1[m-1]*h2[m-1])
if voiced[i]:
phase[i,m] = np.angle(h1[m-1]*h2[m-1])
else:
r = np.random.rand(1)
phase[i,m] = -np.pi + r[0]*2*np.pi
# write to a Codec 2 model file
codec2_model.write(Wo, L, A, phase, voiced, "gen_test.model")