diff --git a/doc/Makefile b/doc/Makefile new file mode 100644 index 00000000..3729f6a8 --- /dev/null +++ b/doc/Makefile @@ -0,0 +1,15 @@ +# Makefile for codec2.pdf + +# set these externally with an env variable (e.g. for GitHub action) to override +# defaults below. Need to run cmake with -DDUMP +CODEC2_SRC ?= $(HOME)/codec2 +CODEC2_BINARY ?= $(HOME)/codec2/build_linux/src + +PATH := $(PATH):$(CODEC2_BINARY) + +PLOT_FILES := hts2a_37_sn.tex hts2a_37_sw.tex + +$(PLOT_FILES): + echo $(PATH) + c2sim $(CODEC2_SRC)/raw/hts2a.raw --dump hts2a + DISPLAY=""; printf "plamp('hts2a',f=37,epslatex=1)\nq\n" | octave-cli -qf -p $(CODEC2_SRC)/octave diff --git a/doc/codec2.pdf b/doc/codec2.pdf index 21fc6cdf..6b59d416 100644 Binary files a/doc/codec2.pdf and b/doc/codec2.pdf differ diff --git a/doc/codec2.tex b/doc/codec2.tex index 0c6bceea..b29a87c5 100644 --- a/doc/codec2.tex +++ b/doc/codec2.tex @@ -35,12 +35,42 @@ This production of this document was kindly supported by an ARDC grant \cite{ard \section{Codec 2 for the Radio Amateur} \label{sect:overview} +\subsection{Model Based Speech Coding} + +A speech codec takes speech samples from an A/D converter (e.g. 16 bit samples at an 8 kHz or 128 kbits/s) and compresses them down to a low bit rate that can be more easily sent over a narrow bandwidth channel (700 bits/s). Speech coding is the art of "what can we throw away". We need to lower the bit rate of the speech while retaining intelligible speech, and making it sound as natural as possible. + +As such low bit rates we use a speech production model. The input speech is anlaysed, and we extract model parameters, which are then sent over the channel. An example of a model based parameter is the pitch of the person speaking. We estimate the pitch of the speaker, quantise it to a 7 bit number, and send that over the channel every 20ms. + +The model based approach used by Codec 2 allows high compression, with some trade offs such as noticeable artefacts in the decoded speech. Higher bit rate codecs (above 5000 bit/s), such as those use for mobile telephony or voice on the Internet, tend to pay more attention to preserving the speech waveform, or use a hybrid approach of waveform and model based techniques. + +Recently, machine learning has been applied to speech coding. This technology promises high quality, artefact free speech quality at low bit rates, but currently (2023) requires significantly more memory and CPU than traditional speech coding technology such as Codec 2. However the field is progressing rapidly, and with the progress of Moore's law will soon be a viable technology for many low bit rate speech applications. + +\subsection{Speech in Time and Frequency} + +\begin{figure} +\caption{ A 40ms segment of the word "these" from a female speaker, sampled at 8 kHz. The waveform repeats itself every 4.3ms (230 Hz), this is the "pitch period" of this segment.} +\label{fig:hts2a_time} +\begin{center} +\input hts2a_37_sn.tex +\\ +\input hts2a_37_sw.tex +\end{center} +\end{figure} + +\subsection{Sinusoidal Speech Coding} + +\subsection{Spectral Magnitude Quantisation} + +\subsection{Bit Allocation} + \section{Signal Processing Details} \label{sect:details} \section{Further Work} - +\begin{enumerate} +\item How to use tools to single step through codec operation +\end{enumerate} \cite{griffin1988multiband} \bibliographystyle{plain} diff --git a/octave/plamp.m b/octave/plamp.m new file mode 100644 index 00000000..a224a5cc --- /dev/null +++ b/octave/plamp.m @@ -0,0 +1,136 @@ +% plamp.m +% Plot ampltiude modelling information from c2sim dump files. + +function plamp(samname, f, epslatex=0) + + plot_sw = 1; + + sn_name = strcat(samname,"_sn.txt"); + Sn = load(sn_name); + + sw_name = strcat(samname,"_sw.txt"); + Sw = load(sw_name); + + sw__name = strcat(samname,"_sw_.txt"); + if (file_in_path(".",sw__name)) + Sw_ = load(sw__name); + endif + + ew_name = strcat(samname,"_ew.txt"); + if (file_in_path(".",ew_name)) + Ew = load(ew_name); + endif + + rk_name = strcat(samname,"_rk.txt"); + if (file_in_path(".",rk_name)) + Rk = load(rk_name); + endif + + model_name = strcat(samname,"_model.txt"); + model = load(model_name); + + modelq_name = strcat(samname,"_qmodel.txt"); + if (file_in_path(".",modelq_name)) + modelq = load(modelq_name); + endif + + pw_name = strcat(samname,"_pw.txt"); + if (file_in_path(".",pw_name)) + Pw = load(pw_name); + endif + + lsp_name = strcat(samname,"_lsp.txt"); + if (file_in_path(".",lsp_name)) + lsp = load(lsp_name); + endif + + phase_name = strcat(samname,"_phase.txt"); + if (file_in_path(".",phase_name)) + phase = load(phase_name); + endif + + phase_name_ = strcat(samname,"_phase_.txt"); + if (file_in_path(".",phase_name_)) + phase_ = load(phase_name_); + endif + + snr_name = strcat(samname,"_snr.txt"); + if (file_in_path(".",snr_name)) + snr = load(snr_name); + endif + + if epslatex, [textfontsize linewidth] = set_fonts(); end + + k = ' '; + do + figure(1); + clf; + s = [ Sn(2*f-1,:) Sn(2*f,:) ]; + plot(s,'b'); + axis([1 length(s) -30000 30000]); + + figure(2); + Wo = model(f,1); + L = model(f,2); + Am = model(f,3:(L+2)); + plot((1:L)*Wo*4000/pi, 20*log10(Am),"+-r"); + axis([1 4000 -10 80]); + hold on; + if plot_sw + plot((0:255)*4000/256, Sw(f,:),"b"); + legend('boxoff'); + end + + hold off; grid minor; + + % print EPS file + + if epslatex + sz = "-S300,250"; + figure(1); + fn = sprintf("%s_%d_sn.tex",samname,f); + print(fn,"-depslatex",sz); printf("\nprinting... %s\n", fn); + + figure(2); + fn = sprintf("%s_%d_sw.tex",samname,f); + print(fn,"-depslatex",sz); printf("printing... %s\n", fn); + + restore_fonts(textfontsize,linewidth); + endif + + % interactive menu + + printf("\rframe: %d menu: n-next b-back s-plot_sw q-quit", f); + fflush(stdout); + k = kbhit(); + if k == 'n'; f = f + 1; endif + if k == 'b'; f = f - 1; endif + if k == 's' + if plot_sw; plot_sw = 0; else; plot_sw = 1; end + endif + + until (k == 'q') + printf("\n"); + +endfunction + +function [textfontsize linewidth] = set_fonts(font_size=12) + textfontsize = get(0,"defaulttextfontsize"); + linewidth = get(0,"defaultlinelinewidth"); + set(0, "defaulttextfontsize", font_size); + set(0, "defaultaxesfontsize", font_size); + set(0, "defaultlinelinewidth", 0.5); +end + +function restore_fonts(textfontsize,linewidth) + set(0, "defaulttextfontsize", textfontsize); + set(0, "defaultaxesfontsize", textfontsize); + set(0, "defaultlinelinewidth", linewidth); +end + +function print_eps_restore(fn,sz,textfontsize,linewidth) + print(fn,"-depslatex",sz); + printf("printing... %s\n", fn); + restore_fonts(textfontsize,linewidth); +end + diff --git a/raw/hts2a.raw b/raw/hts2a.raw new file mode 100644 index 00000000..6d9cf17b Binary files /dev/null and b/raw/hts2a.raw differ