Transferred from: https://www.cnblogs.com/cpuimage/p/8908551.html
Previously shared an algorithm "audio gain loudness analysis ReplayGain complete C code example"
Mainly used to evaluate the volume intensity of a certain length of audio,
And after analysis, a lot of similar needs, certainly is to do audio gain, improve the volume of such practices.
But when the project is actually measured, it is really difficult to set the standard,
In what kind of environment, to increase the volume, or lower.
In the communications industry, the general practice is to use silent detection,
Once the detection is silent or noisy, then do not do the processing, the other hand through a certain strategy to deal with.
Here are two algorithms, one for mute detection and one for audio gain.
The gain actually has nothing to say, similar to the practice of data normalization stretching.
In WebRTC, the mute detection is characterized by the calculation of GMM (Gaussian Mixture model, Gaussian mixture).
For a long time, the audio features have 3 main methods,
GMM, Spectrogram (spectrogram), MFCC mel-frequency cepstrum (Mel frequency cepstrum)
With all due respect, GMM extracts features that are less robust than the latter two.
Also do not introduce more, interested classmates, turn over Wikipedia, make up for the missed lessons.
Of course, in the actual use of the algorithm, this will extend a few tips.
For example, use mute detection for audio cropping, or with audio gain to do some audio enhancement.
Automatic gain in WEBRTC source code files are: ANALOG_AGC.C and DIGITAL_AGC.C
Mute Detection Source code file is: WEBRTC_VAD.C
There is a certain historical reason for this naming.
After combing,
The gain algorithm is AGC.C agc.h
Mute Detection for VAD.C vad.h
Full sample code for the gain algorithm:
#include <stdio.h> #include <stdlib.h> #include <stdint.h>//with https://github.com/mackron/dr_libs/ Blob/master/dr_wav.h Decoding # define Dr_wav_implementation#include "Dr_wav.h" #include "agc.h" #ifndef nullptr#define nullptr 0#endif#ifndef Min#define MIN (A, B) ((a) < (b)? (A): (B) #endif//write wav file void wavwrite_int16 (char *filename, int16_t *buffer, size_t samplerate, size_t totalsamplecount) {Drwav_data_format format = {}; Format.container = Drwav_container_riff; <--Drwav_container_riff = normal WAV files, drwav_container_w64 = Sony Wave64. Format.format = DR_WAVE_FORMAT_PCM; <--any of the dr_wave_format_* codes. Format.channels = 1; Format.samplerate = (drwav_uint32) samplerate; Format.bitspersample = 16; Drwav *pwav = drwav_open_file_write (filename, &format); if (pwav) {Drwav_uint64 Sampleswritten = Drwav_write (pwav, totalsamplecount, buffer); Drwav_uninit (pwav); if (sampleswritten! = TotalSamplecount) {fprintf (stderr, "error\n"); Exit (1); }}}//read wav file int16_t *wavread_int16 (char *filename, uint32_t *samplerate, uint64_t *totalsamplecount) {unsigned int Channels int16_t *buffer = drwav_open_and_read_file_s16 (filename, &channels, samplerate, Totalsamplecount); if (buffer = = nullptr) {printf ("failed to read WAV file."); }//Only process single-channel audio if (channels! = 1) {drwav_free (buffer); buffer = nullptr; *samplerate = 0; *totalsamplecount = 0; } return buffer; Partition path function void Splitpath (const char *path, Char *drv, Char *dir, Char *name, char *ext) {const char *end; const char *p; const char *s; if (Path[0] && path[1] = = ': ') {if (DRV) {*drv++ = *path++; *drv++ = *path++; *drv = ' + '; }} else if (DRV) *drv = ' + '; for (end = path; *end && *end! = ': ';) end++; for (p = end; p > Path && *--p! = ' \\ ' && *p! = '/';) if (*p = = '. ') {end = P; Break } if (EXT) for (s = end; (*ext = *s++);) ext++; for (p = end; p > path;) if (*--p = = ' \ \ ' | | *p = = '/') {p++; Break } if (name) {for (s = p; s < end;) *name++ = *s++; *name = ' + '; } if (dir) {for (s = path; s < p;) *dir++ = *s++; *dir = ' + '; }}int agcprocess (int16_t *buffer, uint32_t samplerate, size_t samplescount, int16_t agcmode) {if (buffer = = nullptr) r eturn-1; if (Samplescount = = 0) return-1; Webrtcagcconfig Agcconfig; AGCCONFIG.COMPRESSIONGAINDB = 9; Default 9 DB agcconfig.limiterenable = 1; Default Kagctrue (ON) Agcconfig.targetleveldbfs = 3; Default 3 ( -3 dbov) int minlevel = 0; int maxlevel = 255; size_t samples = MIN (n, samplerate/100); if (samples = = 0) return-1; const int maxsamples = 320; Int16_t *input = buffer; size_t ntotal = (samplescount/samples); void *agcinst = Webrtcagc_create (); if (Agcinst = = NULL) return-1; int status = Webrtcagc_init (Agcinst, MinLevel, Maxlevel, Agcmode, samplerate); if (Status! = 0) {printf ("Webrtcagc_init fail\n"); Webrtcagc_free (Agcinst); return-1; } status = Webrtcagc_set_config (Agcinst, agcconfig); if (Status! = 0) {printf ("Webrtcagc_set_config fail\n"); Webrtcagc_free (Agcinst); return-1; } size_t num_bands = 1; int Inmiclevel, outmiclevel =-1; int16_t Out_buffer[maxsamples]; int16_t *out16 = Out_buffer; uint8_t saturationwarning = 1; If there is an overflow, the maximum value after the gain magnification exceeds 65536 int16_t echo = 0; Whether the gain amplification considers the echo effect for (int i = 0; i < ntotal; i++) {inmiclevel = 0; int nagcret = Webrtcagc_process (Agcinst, (const int16_t *const *) &input, num_bands, samples, (int16_t *const *) &out16, Inmiclevel, &outmiclevel, Echo, &saturation Warning); if (Nagcret! = 0) {printf ("failed in webrtcagc_process\n"); Webrtcagc_free (Agcinst); return-1; } memcpy (Input, out_buffer, samples * sizeof (int16_t)); Input + = samples; } webrtcagc_free (Agcinst); return 1;} void Auto_gain (char *in_file, char *out_file) {//Audio sample rate uint32_t samplerate = 0; Total number of audio samples uint64_t insamplecount = 0; int16_t *inbuffer = wavread_int16 (In_file, &samplerate, &insamplecount); If the load succeeds if (inbuffer! = nullptr) {//Kagcmodeadaptiveanalog analog volume adjustment//Kagcmodeadaptivedigital adaptive gain Kagcmodefixeddigital fixed gain agcprocess (Inbuffer, Samplerate, Insamplecount, kagcmodeadaptivedigital); Wavwrite_int16 (Out_file, Inbuffer, Samplerate, Insamplecount); Free (inbuffer); }}int Main (int argc, char *argv[]) { printf ("WebRTC Automatic Gain control\n"); printf ("Blog: http://cpuimage.cnblogs.com/\n"); printf ("Audio auto gain \ n"); if (ARGC < 2) return-1; Char *in_file = argv[1]; Char drive[3]; Char dir[256]; Char fname[256]; Char ext[256]; Char out_file[1024]; Splitpath (In_file, Drive, dir, fname, ext); sprintf (Out_file, "%s%s%s_out%s", Drive, dir, fname, ext); Auto_gain (In_file, out_file); printf ("Press any key to exit the program \ n"); GetChar (); return 0;}
Silent detection complete Sample code:
#include <stdio.h> #include <stdlib.h> #include <stdint.h>//with https://github.com/mackron/dr_libs/ Blob/master/dr_wav.h Decoding # define Dr_wav_implementation#include "Dr_wav.h" #include "vad.h" #ifndef nullptr#define nullptr 0#endif#ifndef Min#define MIN (A, B) ((a) < (b)? (a): (b) #endif #ifndef max#define MAX (A, B) ((a) > (b)? (A): (B) #endif//Read WAV file int16_t *wavread_int16 (char *filename, uint32_t *samplerate, uint64_t *totalsamplecount) {uns igned int channels; int16_t *buffer = drwav_open_and_read_file_s16 (filename, &channels, samplerate, Totalsamplecount); if (buffer = = nullptr) {printf ("failed to read WAV file."); }//Only process single-channel audio if (channels! = 1) {drwav_free (buffer); buffer = nullptr; *samplerate = 0; *totalsamplecount = 0; } return buffer; int vadprocess (int16_t *buffer, uint32_t samplerate, size_t samplescount, int16_t vad_mode, int per_ms_frames) {if (BU Ffer = = nullptr) return-1; if (SamplEscount = = 0) return-1; kvalidrates:8000, 16000, 32000, 48000//Ten, or-ms frames Per_ms_frames = MAX (MIN (10, Per_ms_frames), H-H (+); ; size_t samples = samplerate * per_ms_frames/1000; if (samples = = 0) return-1; int16_t *input = buffer; size_t ntotal = (samplescount/samples); void *vadinst = Webrtcvad_create (); if (Vadinst = = NULL) return-1; int status = Webrtcvad_init (Vadinst); if (Status! = 0) {printf ("Webrtcvad_init fail\n"); Webrtcvad_free (Vadinst); return-1; } status = Webrtcvad_set_mode (Vadinst, Vad_mode); if (Status! = 0) {printf ("Webrtcvad_set_mode fail\n"); Webrtcvad_free (Vadinst); return-1; } printf ("Activity: \ n"); for (int i = 0; i < ntotal; i++) {int nvadret = webrtcvad_process (vadinst, samplerate, input, samples); if (Nvadret = =-1) {printf ("failed in webrtcvad_process\n"); Webrtcvad_free (Vadinst); return-1; } else {//output result printf ("%d \ T", Nvadret); } input + = samples; } printf ("\ n"); Webrtcvad_free (Vadinst); return 1;} void Vad (char *in_file) {//Audio sample rate uint32_t samplerate = 0; Total number of audio samples uint64_t insamplecount = 0; int16_t *inbuffer = wavread_int16 (In_file, &samplerate, &insamplecount); If the load succeeds if (inbuffer! = nullptr) {//Aggressiveness mode (0, 1, 2, or 3) int16_t mode = 1; int Per_ms = 30; Vadprocess (Inbuffer, samplerate, Insamplecount, mode, Per_ms); Free (inbuffer); }}int Main (int argc, char *argv[]) {printf ("WebRTC Voice Activity detector\n"); printf ("Blog: http://cpuimage.cnblogs.com/\n"); printf ("Mute detection \ n"); if (ARGC < 2) return-1; Char *in_file = argv[1]; VAD (In_file); printf ("Press any key to exit the program \ n"); GetChar (); return 0;}
Auto Gain Project address: Https://github.com/cpuimage/WebRTC_AGC
The specific process is:
Load wav (drag-and-drop WAV file to executable file), gain processing, save as _out.wav file
Mute Detection Project Address: Https://github.com/cpuimage/WebRTC_VAD
The specific process is:
Output mute detection with WAV (drag and drop WAV file to executable file)
Note: 1 is non-muted and 0 is muted
Note the place and parameters, see code comments.
Compile the sample code with CMake, see CMakeLists.txt for details.
If you have other related questions or needs, you can contact me to discuss the email.
e-mail address is:
[Email protected]
Audio automatic gain and mute detection algorithm with full C code "turn"