Merge pull request #31 from drowe67/dr-codec2-doc

WP2000 - Codec 2 Algorithm Description
2023-12-12 09:50:48 +10:30 · 2023-12-12 09:50:48 +10:30 · 93dbb62904
parent 4f4f730454 b8e4527093
commit 93dbb62904
15 changed files with 1379 additions and 93 deletions
--- a/.github/workflows/cmake.yml
+++ b/.github/workflows/cmake.yml
@ -22,7 +22,7 @@ jobs:
      run: |
           sudo apt-get update
           sudo apt-get install octave octave-common octave-signal liboctave-dev gnuplot sox p7zip-full python3-numpy valgrind clang-format
-
+   
    - name: Create Build Directory
      shell: bash
      run: mkdir $GITHUB_WORKSPACE/build_linux
@ -50,7 +50,7 @@ jobs:
    - name: Run ctests
      working-directory: ${{github.workspace}}/build_linux
      shell: bash
-      run: ctest --output-on-failure
+      run: ctest --output-on-failure -E test_codec2_doc

    - name: Test library installation
      working-directory: ${{github.workspace}}/build_linux
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -312,6 +312,11 @@ if(UNITTEST)
             COMMAND sh -c "cd ${CMAKE_CURRENT_SOURCE_DIR};
                            clang-format --dry-run --Werror src/*.c src/*.h unittest/*.c demo/*.c")

+    add_test(NAME test_codec2_doc
+             COMMAND sh -c "cd ${CMAKE_CURRENT_SOURCE_DIR}/doc;
+                             make clean;
+                             CODEC2_SRC=${CMAKE_CURRENT_SOURCE_DIR} CODEC2_BINARY=${CMAKE_CURRENT_BINARY_DIR} JOBNAME=test make")
+
    add_test(NAME test_freedv_get_hash
             COMMAND sh -c "${CMAKE_CURRENT_BINARY_DIR}/unittest/thash")

--- a/README.md
+++ b/README.md
@ -110,6 +110,10 @@ We have standardized on C99 and develop and test using gcc on a Linux platform.
   make
   ```

+## Documentation
+
+An algorithm description can be found in `doc/codec2.pdf`.
+
 ## Programs

 + See `demo` directory for simple examples of using Codec and the FreeDV API.
@ -138,7 +142,7 @@ CTest is used as a test framework, with support from [GNU Octave](https://www.gn

 1. Install GNU Octave and libraries on Ubuntu with:
   ```
-   sudo apt install octave octave-common octave-signal liboctave-dev gnuplot python3-numpy sox valgrind clang-format
+   sudo apt install octave octave-common octave-signal liboctave-dev gnuplot python3-numpy sox valgrind clang-format texmaker texlive-bibtex-extra texlive-science
   ```
 1. To build and run the tests:
   ```
@ -180,6 +184,7 @@ CTest is used as a test framework, with support from [GNU Octave](https://www.gn
 ```
 cmake       - cmake support files
 demo        - Simple Codec 2 and FreeDv API demo applications
+doc         - documentation
 octave      - Octave scripts used to support ctests
 src         - C source code for Codec 2, FDMDV modem, COHPSK modem, FreeDV API
 raw         - speech files in raw format (16 bits signed linear 8 kHz)
--- a/doc/Makefile
+++ b/doc/Makefile
@ -0,0 +1,35 @@
+# Makefile for codec2.pdf
+#
+# usage:
+#   Build codec2 with -DUNITEST=1 (see README)
+#   cd ~/codec2/doc
+#   make
+
+DOCNAME ?= codec2
+
+# Set these externally to override defaults. JOBNAME sets the output file basename, 
+# and avoids over writing codec2.pdf (e.g. when we are running a doc build test, but don't actually
+# want to change codec2.pdf in the repo) 
+
+CODEC2_SRC ?= $(HOME)/codec2
+CODEC2_BINARY ?= $(HOME)/codec2/build_linux
+JOBNAME ?= $(DOCNAME)
+
+PATH := $(PATH):$(CODEC2_BINARY)/src
+
+PLOT_FILES := hts2a_37_sn.tex hts2a_37_sw.tex hts2a_37_lpc_lsp.tex hts2a_37_lpc_pf.tex
+
+$(DOCNAME).pdf: $(PLOT_FILES) $(DOCNAME).tex $(DOCNAME)_refs.bib
+	pdflatex -jobname=$(JOBNAME) $(DOCNAME).tex 
+	bibtex   $(JOBNAME).aux
+	pdflatex -jobname=$(JOBNAME) $(DOCNAME).tex
+	pdflatex -jobname=$(JOBNAME) $(DOCNAME).tex
+
+$(PLOT_FILES):
+	echo $(PATH)
+	c2sim $(CODEC2_SRC)/raw/hts2a.raw --dump hts2a --lpc 10 --lsp --lpcpf
+	DISPLAY=""; printf "plamp('hts2a',f=37,epslatex=1)\nq\n" | octave-cli -qf -p $(CODEC2_SRC)/octave
+
+.PHONY: clean
+clean:
+	rm -f *.blg *.bbl *.aux *.log *.out hts2a*
--- a/doc/codec2.pdf
+++ b/doc/codec2.pdf
--- a/doc/codec2.tex
+++ b/doc/codec2.tex
--- a/doc/codec2_refs.bib
+++ b/doc/codec2_refs.bib
@ -0,0 +1,84 @@
+@article{griffin1988multiband,
+  title={Multiband excitation vocoder},
+  author={Griffin, Daniel W and Lim, Jae S},
+  journal={IEEE Transactions on acoustics, speech, and signal processing},
+  volume={36},
+  number={8},
+  pages={1223--1235},
+  year={1988},
+  publisher={IEEE}
+}
+@book{rowe1997techniques,
+  title={Techniques for harmonic sinusoidal coding},
+  author={Rowe, David Grant},
+  year={1997},
+  publisher={Citeseer},
+  note = {\url{https://www.rowetel.com/downloads/1997_rowe_phd_thesis.pdf}}
+}
+
+@misc{ardc2023,
+  title = {{Enhancing HF Digital Voice with FreeDV}},
+  year = {2023}, 
+  note = {\url{https://www.ardc.net/apply/grants/2023-grants/enhancing-hf-digital-voice-with-freedv/}}
+}
+
+@article{mcaulay1986speech,
+  title={Speech analysis/synthesis based on a sinusoidal representation},
+  author={McAulay, Robert and Quatieri, Thomas},
+  journal={IEEE Transactions on Acoustics, Speech, and Signal Processing},
+  volume={34},
+  number={4},
+  pages={744--754},
+  year={1986},
+  publisher={IEEE}
+}
+
+@article{makhoul1975linear,
+  title={Linear prediction: A tutorial review},
+  author={Makhoul, John},
+  journal={Proceedings of the IEEE},
+  volume={63},
+  number={4},
+  pages={561--580},
+  year={1975},
+  publisher={IEEE}
+}
+
+@article{itakura1975line,
+  title={Line spectrum representation of linear predictor coefficients of speech signals},
+  author={Itakura, Fumitada},
+  journal={The Journal of the Acoustical Society of America},
+  volume={57},
+  number={S1},
+  pages={S35--S35},
+  year={1975},
+  publisher={AIP Publishing}
+}
+
+
+@book{o1997human,
+  title={Speech Communication - Human and machine},
+  author={O‘Shaughnessy, Douglas},
+  publisher={Addison-Wesley Publishing Company},
+  year={1997}
+}
+
+@misc{rowe2023ratek,
+  title = {{FreeDV-015 Codec 2 Rate K Resampler}},
+  year = {2023}, 
+  note = {\url{https://github.com/drowe67/misc/blob/master/ratek_resampler/ratek_resampler.pdf}}
+}
+
+@book{kondoz1994digital,
+  title={Digital speech: coding for low bit rate communication systems},
+  author={Kondoz, Ahmet M},
+  year={1994},
+  publisher={John Wiley \& Sons}
+}
+
+@book{kleijn1995speech,
+  title={Speech coding and synthesis},
+  author={Kleijn, W Bastiaan and Paliwal, Kuldip K},
+  year={1995},
+  publisher={Elsevier Science Inc.}
+}
--- a/doc/ratek_mel_fhz.png
+++ b/doc/ratek_mel_fhz.png
--- a/doc/warp_fhz_k.png
+++ b/doc/warp_fhz_k.png
--- a/octave/plamp.m
+++ b/octave/plamp.m
@ -0,0 +1,178 @@
+% plamp.m
+% Plot ampltiude modelling information from c2sim dump files.
+
+function plamp(samname, f, epslatex=0)
+
+  plot_sw = 1;
+
+  sn_name = strcat(samname,"_sn.txt");
+  Sn = load(sn_name);
+
+  sw_name = strcat(samname,"_sw.txt");
+  Sw = load(sw_name);
+
+  sw__name = strcat(samname,"_sw_.txt");
+  if (file_in_path(".",sw__name))
+    Sw_ = load(sw__name);
+  endif
+
+  ew_name = strcat(samname,"_ew.txt");
+  if (file_in_path(".",ew_name))
+    Ew = load(ew_name);
+  endif
+
+  E_name = strcat(samname,"_E.txt");
+  if (file_in_path(".",E_name))
+    E = load(E_name);
+  endif
+
+  rk_name = strcat(samname,"_rk.txt");
+  if (file_in_path(".",rk_name))
+    Rk = load(rk_name);
+  endif
+
+  model_name = strcat(samname,"_model.txt");
+  model = load(model_name);
+
+  modelq_name = strcat(samname,"_qmodel.txt");
+  if (file_in_path(".",modelq_name))
+    modelq = load(modelq_name);
+  endif
+
+  pw_name = strcat(samname,"_pw.txt");
+  if (file_in_path(".",pw_name))
+    Pw = load(pw_name);
+  endif
+  pwb_name = strcat(samname,"_pwb.txt");
+  if (file_in_path(".",pwb_name))
+    Pwb = load(pwb_name);
+  endif
+
+  lsp_name = strcat(samname,"_lsp.txt");
+  if (file_in_path(".",lsp_name))
+    lsp = load(lsp_name);
+  endif
+
+  phase_name = strcat(samname,"_phase.txt");
+  if (file_in_path(".",phase_name))
+    phase = load(phase_name);
+  endif
+
+  phase_name_ = strcat(samname,"_phase_.txt");
+  if (file_in_path(".",phase_name_))
+    phase_ = load(phase_name_);
+  endif
+
+  snr_name = strcat(samname,"_snr.txt");
+  if (file_in_path(".",snr_name))
+    snr = load(snr_name);
+  endif
+
+  if epslatex, [textfontsize linewidth] = set_fonts(); end
+
+  k = ' ';
+  do
+    figure(1); clf;
+    clf;
+    s = [ Sn(2*f-1,:) Sn(2*f,:) ];
+    plot(s,'b');
+    axis([1 length(s) -30000 30000]);
+    xlabel('Time (samples)'); ylabel('Amplitude');
+    
+    figure(2); clf;
+    Wo = model(f,1);
+    L = model(f,2);
+    Am = model(f,3:(L+2));
+    plot((1:L)*Wo*4000/pi, 20*log10(Am),"+-r");
+    axis([1 4000 -10 80]);
+    hold on;
+    if plot_sw
+      plot((0:255)*4000/256, Sw(f,:),"b");
+    end
+    hold off; grid minor;
+    ylabel ('Amplitude (dB)'); xlabel('Frequency (Hz)');
+    
+    figure(3); clf;
+    hold on;
+    plot((0:255)*4000/256, Sw(f,:),"b");
+    plot((1:L)*Wo*4000/pi, 20*log10(Am),"+-r");
+    plot((0:255)*4000/256, E(f)+10*log10(Pwb(f,:)),"g");
+    plot(lsp(f,:)*4000/pi, 75,"g+");
+    hold off; grid minor;
+    axis([1 4000 -10 80]);
+    ylabel ('Amplitude (dB)'); xlabel('Frequency (Hz)');
+
+    figure(4); clf;
+    hold on;
+    plot((0:255)*4000/256, E(f)+10*log10(Pwb(f,:)),"g");
+    plot((0:255)*4000/256, 10*log10(Pw(f,:)),"r");
+    hold off; grid minor;
+    axis([1 4000 -10 80]);
+    ylabel ('Amplitude (dB)'); xlabel('Frequency (Hz)');
+
+    % print EPS file
+
+    if epslatex
+      sz = "-S300,200";
+      figure(1);
+      fn = sprintf("%s_%d_sn.tex",samname,f);
+      print(fn,"-depslatex",sz); printf("\nprinting... %s\n", fn);
+
+      % file of points to plot in sinusoidal model
+      fn = sprintf("%s_%d_sn.txt",samname,f);
+      t_length = 4; s_max = 2; s=s*s_max/max(abs(s));
+      N = length(s); t = (0:N-1)*t_length/N;
+      s_save = [t' s']; size(s_save)
+      save("-ascii",fn,"s_save"); printf("printing... %s\n", fn);
+      
+      figure(2);
+      fn = sprintf("%s_%d_sw.tex",samname,f);
+      print(fn,"-depslatex",sz); printf("printing... %s\n", fn);
+   
+      figure(3);
+      fn = sprintf("%s_%d_lpc_lsp.tex",samname,f);
+      print(fn,"-depslatex",sz); printf("printing... %s\n", fn);
+   
+      figure(4);
+      fn = sprintf("%s_%d_lpc_pf.tex",samname,f);
+      print(fn,"-depslatex",sz); printf("printing... %s\n", fn);
+   
+     restore_fonts(textfontsize,linewidth);
+    endif
+
+    % interactive menu
+
+    printf("\rframe: %d  menu: n-next  b-back  s-plot_sw  q-quit", f);
+    fflush(stdout);
+    k = kbhit();
+    if k == 'n'; f = f + 1; endif
+    if k == 'b'; f = f - 1; endif
+    if k == 's'
+        if plot_sw; plot_sw = 0; else; plot_sw = 1; end
+    endif
+
+  until (k == 'q')
+  printf("\n");
+
+endfunction
+
+function [textfontsize linewidth] = set_fonts(font_size=12)
+  textfontsize = get(0,"defaulttextfontsize");
+  linewidth = get(0,"defaultlinelinewidth");
+  set(0, "defaulttextfontsize", font_size);
+  set(0, "defaultaxesfontsize", font_size);
+  set(0, "defaultlinelinewidth", 0.5);
+end
+
+function restore_fonts(textfontsize,linewidth)
+  set(0, "defaulttextfontsize", textfontsize);
+  set(0, "defaultaxesfontsize", textfontsize);
+  set(0, "defaultlinelinewidth", linewidth);
+end
+
+function print_eps_restore(fn,sz,textfontsize,linewidth)
+  print(fn,"-depslatex",sz);
+  printf("printing... %s\n", fn);
+  restore_fonts(textfontsize,linewidth);
+end
+
--- a/raw/hts2a.raw
+++ b/raw/hts2a.raw
--- a/src/c2sim.c
+++ b/src/c2sim.c
@ -1023,7 +1023,7 @@ int main(int argc, char *argv[]) {
        if (lpc_model) {
          lsp_to_lpc(&lsps_dec[i][0], &ak_dec[i][0], order);
          aks_to_M2(fftr_fwd_cfg, &ak_dec[i][0], order, &model_dec[i], e_dec[i],
-                    &snr, 0, simlpcpf, lpcpf, 1, LPCPF_BETA, LPCPF_GAMMA, Aw);
+                    &snr, 1, simlpcpf, lpcpf, 1, LPCPF_BETA, LPCPF_GAMMA, Aw);
          apply_lpc_correction(&model_dec[i]);
          sum_snr += snr;
 #ifdef DUMP
--- a/src/codec2.c
+++ b/src/codec2.c
@ -46,7 +46,6 @@
 #include "lpc.h"
 #include "lsp.h"
 #include "machdep.h"
-#include "newamp2.h"
 #include "nlp.h"
 #include "phase.h"
 #include "postfilter.h"
--- a/src/codec2_internal.h
+++ b/src/codec2_internal.h
@ -32,7 +32,6 @@

 #include "codec2_fft.h"
 #include "newamp1.h"
-#include "newamp2.h"

 struct CODEC2 {
  int mode;
@ -87,13 +86,6 @@ struct CODEC2 {
  float eq[NEWAMP1_K]; /* optional equaliser */
  bool eq_en;

-  /*newamp2 states (also uses newamp1 states )*/
-  float energy_prev;
-  float n2_rate_K_sample_freqs_kHz[NEWAMP2_K];
-  float n2_prev_rate_K_vec_[NEWAMP2_K];
-  float n2_pwb_rate_K_sample_freqs_kHz[NEWAMP2_16K_K];
-  float n2_pwb_prev_rate_K_vec_[NEWAMP2_16K_K];
-
  /* used to dump features for deep learning experiments */
  FILE *fmlfeat, *fmlmodel;

--- a/src/newamp2.h
+++ b/src/newamp2.h
@ -1,80 +0,0 @@
-/*---------------------------------------------------------------------------*\
-
-  FILE........: newamp2.h
-  AUTHOR......: Thomas Kurin and Stefan Erhardt
-  INSTITUTE...:	Institute for Electronics Engineering, University of
-Erlangen-Nuremberg DATE CREATED: July 2018 BASED ON....:	"newamp1.h" by
-David Rowe
-
-  Quantisation functions for the sinusoidal coder, using "newamp1"
-  algorithm that resamples variable rate L [Am} to a fixed rate K then
-  VQs.
-
-\*---------------------------------------------------------------------------*/
-
-/*
-  Copyright Thomas Kurin and Stefan Erhardt 2018
-
-  All rights reserved.
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of the GNU Lesser General Public License version 2.1, as
-  published by the Free Software Foundation.  This program is
-  distributed in the hope that it will be useful, but WITHOUT ANY
-  WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-  License for more details.
-
-  You should have received a copy of the GNU Lesser General Public License
-  along with this program; if not, see <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef __NEWAMP2__
-#define __NEWAMP2__
-
-#define NEWAMP2_N_INDEXES \
-  4 /* Number of indexes to pack: vq1, vq2, energy, Wo */
-#define NEWAMP2_PHASE_NFFT \
-  128                /* size of FFT used for phase synthesis            */
-#define NEWAMP2_K 29 /* rate K vector length */
-#define NEWAMP2_16K_K \
-  40 /* rate K vector length	for 16k Mode			   */
-
-#include "codec2_fft.h"
-#include "comp.h"
-
-void n2_mel_sample_freqs_kHz(float rate_K_sample_freqs_kHz[], int K);
-void n2_resample_const_rate_f(C2CONST *c2const, MODEL *model,
-                              float rate_K_vec[],
-                              float rate_K_sample_freqs_kHz[], int K);
-void n2_rate_K_mbest_encode(int *indexes, float *x, float *xq, int ndim);
-void n2_resample_rate_L(C2CONST *c2const, MODEL *model, float rate_K_vec[],
-                        float rate_K_sample_freqs_kHz[], int K,
-                        int plosive_flag);
-void n2_post_filter_newamp2(float vec[], float sample_freq_kHz[], int K,
-                            float pf_gain);
-void newamp2_interpolate(float interpolated_surface_[], float left_vec[],
-                         float right_vec[], int K, int plosive_flag);
-void newamp2_model_to_indexes(C2CONST *c2const, int indexes[], MODEL *model,
-                              float rate_K_vec[],
-                              float rate_K_sample_freqs_kHz[], int K,
-                              float *mean, float rate_K_vec_no_mean[],
-                              float rate_K_vec_no_mean_[], int plosiv);
-void newamp2_indexes_to_rate_K_vec(float rate_K_vec_[],
-                                   float rate_K_vec_no_mean_[],
-                                   float rate_K_sample_freqs_kHz[], int K,
-                                   float *mean_, int indexes[], float pf_gain);
-void newamp2_16k_indexes_to_rate_K_vec(float rate_K_vec_[],
-                                       float rate_K_vec_no_mean_[],
-                                       float rate_K_sample_freqs_kHz[], int K,
-                                       float *mean_, int indexes[],
-                                       float pf_gain);
-void newamp2_indexes_to_model(C2CONST *c2const, MODEL model_[], COMP H[],
-                              float interpolated_surface_[],
-                              float prev_rate_K_vec_[], float *Wo_left,
-                              int *voicing_left,
-                              float rate_K_sample_freqs_kHz[], int K,
-                              codec2_fft_cfg fwd_cfg, codec2_fft_cfg inv_cfg,
-                              int indexes[], float pf_gain, int flag16k);
-
-#endif