Merge pull request #31 from drowe67/dr-codec2-doc

WP2000 - Codec 2 Algorithm Description
pull/49/head
drowe67 2023-12-12 09:50:48 +10:30 committed by GitHub
commit 93dbb62904
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 1379 additions and 93 deletions

View File

@ -22,7 +22,7 @@ jobs:
run: |
sudo apt-get update
sudo apt-get install octave octave-common octave-signal liboctave-dev gnuplot sox p7zip-full python3-numpy valgrind clang-format
- name: Create Build Directory
shell: bash
run: mkdir $GITHUB_WORKSPACE/build_linux
@ -50,7 +50,7 @@ jobs:
- name: Run ctests
working-directory: ${{github.workspace}}/build_linux
shell: bash
run: ctest --output-on-failure
run: ctest --output-on-failure -E test_codec2_doc
- name: Test library installation
working-directory: ${{github.workspace}}/build_linux

View File

@ -312,6 +312,11 @@ if(UNITTEST)
COMMAND sh -c "cd ${CMAKE_CURRENT_SOURCE_DIR};
clang-format --dry-run --Werror src/*.c src/*.h unittest/*.c demo/*.c")
add_test(NAME test_codec2_doc
COMMAND sh -c "cd ${CMAKE_CURRENT_SOURCE_DIR}/doc;
make clean;
CODEC2_SRC=${CMAKE_CURRENT_SOURCE_DIR} CODEC2_BINARY=${CMAKE_CURRENT_BINARY_DIR} JOBNAME=test make")
add_test(NAME test_freedv_get_hash
COMMAND sh -c "${CMAKE_CURRENT_BINARY_DIR}/unittest/thash")

View File

@ -110,6 +110,10 @@ We have standardized on C99 and develop and test using gcc on a Linux platform.
make
```
## Documentation
An algorithm description can be found in `doc/codec2.pdf`.
## Programs
+ See `demo` directory for simple examples of using Codec and the FreeDV API.
@ -138,7 +142,7 @@ CTest is used as a test framework, with support from [GNU Octave](https://www.gn
1. Install GNU Octave and libraries on Ubuntu with:
```
sudo apt install octave octave-common octave-signal liboctave-dev gnuplot python3-numpy sox valgrind clang-format
sudo apt install octave octave-common octave-signal liboctave-dev gnuplot python3-numpy sox valgrind clang-format texmaker texlive-bibtex-extra texlive-science
```
1. To build and run the tests:
```
@ -180,6 +184,7 @@ CTest is used as a test framework, with support from [GNU Octave](https://www.gn
```
cmake - cmake support files
demo - Simple Codec 2 and FreeDv API demo applications
doc - documentation
octave - Octave scripts used to support ctests
src - C source code for Codec 2, FDMDV modem, COHPSK modem, FreeDV API
raw - speech files in raw format (16 bits signed linear 8 kHz)

35
doc/Makefile 100644
View File

@ -0,0 +1,35 @@
# Makefile for codec2.pdf
#
# usage:
# Build codec2 with -DUNITEST=1 (see README)
# cd ~/codec2/doc
# make
DOCNAME ?= codec2
# Set these externally to override defaults. JOBNAME sets the output file basename,
# and avoids over writing codec2.pdf (e.g. when we are running a doc build test, but don't actually
# want to change codec2.pdf in the repo)
CODEC2_SRC ?= $(HOME)/codec2
CODEC2_BINARY ?= $(HOME)/codec2/build_linux
JOBNAME ?= $(DOCNAME)
PATH := $(PATH):$(CODEC2_BINARY)/src
PLOT_FILES := hts2a_37_sn.tex hts2a_37_sw.tex hts2a_37_lpc_lsp.tex hts2a_37_lpc_pf.tex
$(DOCNAME).pdf: $(PLOT_FILES) $(DOCNAME).tex $(DOCNAME)_refs.bib
pdflatex -jobname=$(JOBNAME) $(DOCNAME).tex
bibtex $(JOBNAME).aux
pdflatex -jobname=$(JOBNAME) $(DOCNAME).tex
pdflatex -jobname=$(JOBNAME) $(DOCNAME).tex
$(PLOT_FILES):
echo $(PATH)
c2sim $(CODEC2_SRC)/raw/hts2a.raw --dump hts2a --lpc 10 --lsp --lpcpf
DISPLAY=""; printf "plamp('hts2a',f=37,epslatex=1)\nq\n" | octave-cli -qf -p $(CODEC2_SRC)/octave
.PHONY: clean
clean:
rm -f *.blg *.bbl *.aux *.log *.out hts2a*

BIN
doc/codec2.pdf 100644

Binary file not shown.

1068
doc/codec2.tex 100644

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,84 @@
@article{griffin1988multiband,
title={Multiband excitation vocoder},
author={Griffin, Daniel W and Lim, Jae S},
journal={IEEE Transactions on acoustics, speech, and signal processing},
volume={36},
number={8},
pages={1223--1235},
year={1988},
publisher={IEEE}
}
@book{rowe1997techniques,
title={Techniques for harmonic sinusoidal coding},
author={Rowe, David Grant},
year={1997},
publisher={Citeseer},
note = {\url{https://www.rowetel.com/downloads/1997_rowe_phd_thesis.pdf}}
}
@misc{ardc2023,
title = {{Enhancing HF Digital Voice with FreeDV}},
year = {2023},
note = {\url{https://www.ardc.net/apply/grants/2023-grants/enhancing-hf-digital-voice-with-freedv/}}
}
@article{mcaulay1986speech,
title={Speech analysis/synthesis based on a sinusoidal representation},
author={McAulay, Robert and Quatieri, Thomas},
journal={IEEE Transactions on Acoustics, Speech, and Signal Processing},
volume={34},
number={4},
pages={744--754},
year={1986},
publisher={IEEE}
}
@article{makhoul1975linear,
title={Linear prediction: A tutorial review},
author={Makhoul, John},
journal={Proceedings of the IEEE},
volume={63},
number={4},
pages={561--580},
year={1975},
publisher={IEEE}
}
@article{itakura1975line,
title={Line spectrum representation of linear predictor coefficients of speech signals},
author={Itakura, Fumitada},
journal={The Journal of the Acoustical Society of America},
volume={57},
number={S1},
pages={S35--S35},
year={1975},
publisher={AIP Publishing}
}
@book{o1997human,
title={Speech Communication - Human and machine},
author={OShaughnessy, Douglas},
publisher={Addison-Wesley Publishing Company},
year={1997}
}
@misc{rowe2023ratek,
title = {{FreeDV-015 Codec 2 Rate K Resampler}},
year = {2023},
note = {\url{https://github.com/drowe67/misc/blob/master/ratek_resampler/ratek_resampler.pdf}}
}
@book{kondoz1994digital,
title={Digital speech: coding for low bit rate communication systems},
author={Kondoz, Ahmet M},
year={1994},
publisher={John Wiley \& Sons}
}
@book{kleijn1995speech,
title={Speech coding and synthesis},
author={Kleijn, W Bastiaan and Paliwal, Kuldip K},
year={1995},
publisher={Elsevier Science Inc.}
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

BIN
doc/warp_fhz_k.png 100644

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.0 KiB

178
octave/plamp.m 100644
View File

@ -0,0 +1,178 @@
% plamp.m
% Plot ampltiude modelling information from c2sim dump files.
function plamp(samname, f, epslatex=0)
plot_sw = 1;
sn_name = strcat(samname,"_sn.txt");
Sn = load(sn_name);
sw_name = strcat(samname,"_sw.txt");
Sw = load(sw_name);
sw__name = strcat(samname,"_sw_.txt");
if (file_in_path(".",sw__name))
Sw_ = load(sw__name);
endif
ew_name = strcat(samname,"_ew.txt");
if (file_in_path(".",ew_name))
Ew = load(ew_name);
endif
E_name = strcat(samname,"_E.txt");
if (file_in_path(".",E_name))
E = load(E_name);
endif
rk_name = strcat(samname,"_rk.txt");
if (file_in_path(".",rk_name))
Rk = load(rk_name);
endif
model_name = strcat(samname,"_model.txt");
model = load(model_name);
modelq_name = strcat(samname,"_qmodel.txt");
if (file_in_path(".",modelq_name))
modelq = load(modelq_name);
endif
pw_name = strcat(samname,"_pw.txt");
if (file_in_path(".",pw_name))
Pw = load(pw_name);
endif
pwb_name = strcat(samname,"_pwb.txt");
if (file_in_path(".",pwb_name))
Pwb = load(pwb_name);
endif
lsp_name = strcat(samname,"_lsp.txt");
if (file_in_path(".",lsp_name))
lsp = load(lsp_name);
endif
phase_name = strcat(samname,"_phase.txt");
if (file_in_path(".",phase_name))
phase = load(phase_name);
endif
phase_name_ = strcat(samname,"_phase_.txt");
if (file_in_path(".",phase_name_))
phase_ = load(phase_name_);
endif
snr_name = strcat(samname,"_snr.txt");
if (file_in_path(".",snr_name))
snr = load(snr_name);
endif
if epslatex, [textfontsize linewidth] = set_fonts(); end
k = ' ';
do
figure(1); clf;
clf;
s = [ Sn(2*f-1,:) Sn(2*f,:) ];
plot(s,'b');
axis([1 length(s) -30000 30000]);
xlabel('Time (samples)'); ylabel('Amplitude');
figure(2); clf;
Wo = model(f,1);
L = model(f,2);
Am = model(f,3:(L+2));
plot((1:L)*Wo*4000/pi, 20*log10(Am),"+-r");
axis([1 4000 -10 80]);
hold on;
if plot_sw
plot((0:255)*4000/256, Sw(f,:),"b");
end
hold off; grid minor;
ylabel ('Amplitude (dB)'); xlabel('Frequency (Hz)');
figure(3); clf;
hold on;
plot((0:255)*4000/256, Sw(f,:),"b");
plot((1:L)*Wo*4000/pi, 20*log10(Am),"+-r");
plot((0:255)*4000/256, E(f)+10*log10(Pwb(f,:)),"g");
plot(lsp(f,:)*4000/pi, 75,"g+");
hold off; grid minor;
axis([1 4000 -10 80]);
ylabel ('Amplitude (dB)'); xlabel('Frequency (Hz)');
figure(4); clf;
hold on;
plot((0:255)*4000/256, E(f)+10*log10(Pwb(f,:)),"g");
plot((0:255)*4000/256, 10*log10(Pw(f,:)),"r");
hold off; grid minor;
axis([1 4000 -10 80]);
ylabel ('Amplitude (dB)'); xlabel('Frequency (Hz)');
% print EPS file
if epslatex
sz = "-S300,200";
figure(1);
fn = sprintf("%s_%d_sn.tex",samname,f);
print(fn,"-depslatex",sz); printf("\nprinting... %s\n", fn);
% file of points to plot in sinusoidal model
fn = sprintf("%s_%d_sn.txt",samname,f);
t_length = 4; s_max = 2; s=s*s_max/max(abs(s));
N = length(s); t = (0:N-1)*t_length/N;
s_save = [t' s']; size(s_save)
save("-ascii",fn,"s_save"); printf("printing... %s\n", fn);
figure(2);
fn = sprintf("%s_%d_sw.tex",samname,f);
print(fn,"-depslatex",sz); printf("printing... %s\n", fn);
figure(3);
fn = sprintf("%s_%d_lpc_lsp.tex",samname,f);
print(fn,"-depslatex",sz); printf("printing... %s\n", fn);
figure(4);
fn = sprintf("%s_%d_lpc_pf.tex",samname,f);
print(fn,"-depslatex",sz); printf("printing... %s\n", fn);
restore_fonts(textfontsize,linewidth);
endif
% interactive menu
printf("\rframe: %d menu: n-next b-back s-plot_sw q-quit", f);
fflush(stdout);
k = kbhit();
if k == 'n'; f = f + 1; endif
if k == 'b'; f = f - 1; endif
if k == 's'
if plot_sw; plot_sw = 0; else; plot_sw = 1; end
endif
until (k == 'q')
printf("\n");
endfunction
function [textfontsize linewidth] = set_fonts(font_size=12)
textfontsize = get(0,"defaulttextfontsize");
linewidth = get(0,"defaultlinelinewidth");
set(0, "defaulttextfontsize", font_size);
set(0, "defaultaxesfontsize", font_size);
set(0, "defaultlinelinewidth", 0.5);
end
function restore_fonts(textfontsize,linewidth)
set(0, "defaulttextfontsize", textfontsize);
set(0, "defaultaxesfontsize", textfontsize);
set(0, "defaultlinelinewidth", linewidth);
end
function print_eps_restore(fn,sz,textfontsize,linewidth)
print(fn,"-depslatex",sz);
printf("printing... %s\n", fn);
restore_fonts(textfontsize,linewidth);
end

BIN
raw/hts2a.raw 100644

Binary file not shown.

View File

@ -1023,7 +1023,7 @@ int main(int argc, char *argv[]) {
if (lpc_model) {
lsp_to_lpc(&lsps_dec[i][0], &ak_dec[i][0], order);
aks_to_M2(fftr_fwd_cfg, &ak_dec[i][0], order, &model_dec[i], e_dec[i],
&snr, 0, simlpcpf, lpcpf, 1, LPCPF_BETA, LPCPF_GAMMA, Aw);
&snr, 1, simlpcpf, lpcpf, 1, LPCPF_BETA, LPCPF_GAMMA, Aw);
apply_lpc_correction(&model_dec[i]);
sum_snr += snr;
#ifdef DUMP

View File

@ -46,7 +46,6 @@
#include "lpc.h"
#include "lsp.h"
#include "machdep.h"
#include "newamp2.h"
#include "nlp.h"
#include "phase.h"
#include "postfilter.h"

View File

@ -32,7 +32,6 @@
#include "codec2_fft.h"
#include "newamp1.h"
#include "newamp2.h"
struct CODEC2 {
int mode;
@ -87,13 +86,6 @@ struct CODEC2 {
float eq[NEWAMP1_K]; /* optional equaliser */
bool eq_en;
/*newamp2 states (also uses newamp1 states )*/
float energy_prev;
float n2_rate_K_sample_freqs_kHz[NEWAMP2_K];
float n2_prev_rate_K_vec_[NEWAMP2_K];
float n2_pwb_rate_K_sample_freqs_kHz[NEWAMP2_16K_K];
float n2_pwb_prev_rate_K_vec_[NEWAMP2_16K_K];
/* used to dump features for deep learning experiments */
FILE *fmlfeat, *fmlmodel;

View File

@ -1,80 +0,0 @@
/*---------------------------------------------------------------------------*\
FILE........: newamp2.h
AUTHOR......: Thomas Kurin and Stefan Erhardt
INSTITUTE...: Institute for Electronics Engineering, University of
Erlangen-Nuremberg DATE CREATED: July 2018 BASED ON....: "newamp1.h" by
David Rowe
Quantisation functions for the sinusoidal coder, using "newamp1"
algorithm that resamples variable rate L [Am} to a fixed rate K then
VQs.
\*---------------------------------------------------------------------------*/
/*
Copyright Thomas Kurin and Stefan Erhardt 2018
All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License version 2.1, as
published by the Free Software Foundation. This program is
distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __NEWAMP2__
#define __NEWAMP2__
#define NEWAMP2_N_INDEXES \
4 /* Number of indexes to pack: vq1, vq2, energy, Wo */
#define NEWAMP2_PHASE_NFFT \
128 /* size of FFT used for phase synthesis */
#define NEWAMP2_K 29 /* rate K vector length */
#define NEWAMP2_16K_K \
40 /* rate K vector length for 16k Mode */
#include "codec2_fft.h"
#include "comp.h"
void n2_mel_sample_freqs_kHz(float rate_K_sample_freqs_kHz[], int K);
void n2_resample_const_rate_f(C2CONST *c2const, MODEL *model,
float rate_K_vec[],
float rate_K_sample_freqs_kHz[], int K);
void n2_rate_K_mbest_encode(int *indexes, float *x, float *xq, int ndim);
void n2_resample_rate_L(C2CONST *c2const, MODEL *model, float rate_K_vec[],
float rate_K_sample_freqs_kHz[], int K,
int plosive_flag);
void n2_post_filter_newamp2(float vec[], float sample_freq_kHz[], int K,
float pf_gain);
void newamp2_interpolate(float interpolated_surface_[], float left_vec[],
float right_vec[], int K, int plosive_flag);
void newamp2_model_to_indexes(C2CONST *c2const, int indexes[], MODEL *model,
float rate_K_vec[],
float rate_K_sample_freqs_kHz[], int K,
float *mean, float rate_K_vec_no_mean[],
float rate_K_vec_no_mean_[], int plosiv);
void newamp2_indexes_to_rate_K_vec(float rate_K_vec_[],
float rate_K_vec_no_mean_[],
float rate_K_sample_freqs_kHz[], int K,
float *mean_, int indexes[], float pf_gain);
void newamp2_16k_indexes_to_rate_K_vec(float rate_K_vec_[],
float rate_K_vec_no_mean_[],
float rate_K_sample_freqs_kHz[], int K,
float *mean_, int indexes[],
float pf_gain);
void newamp2_indexes_to_model(C2CONST *c2const, MODEL model_[], COMP H[],
float interpolated_surface_[],
float prev_rate_K_vec_[], float *Wo_left,
int *voicing_left,
float rate_K_sample_freqs_kHz[], int K,
codec2_fft_cfg fwd_cfg, codec2_fft_cfg inv_cfg,
int indexes[], float pf_gain, int flag16k);
#endif