Use the FFmpeg command line to merge two audio files into an audio file with the following command:
Ffmpeg-i aa.mp3-i bb.mp3-filter_complex amix=inputs=2:duration=first:dropout_transition=2-f mp3 Remix.mp3
Code implementation:
extern "C" {#include "libavcodec/avcodec.h" #include "libavformat/avformat.h" #include "libavdevice/avdevice.h" Include "Libavfilter/avfilter.h" #include "libavfilter/avfiltergraph.h" #include "libavfilter/buffersink.h" #include "Libavfilter/buffersrc.h" #include "libavutil/audio_fifo.h" #include "libavutil/avutil.h" "#include" libavutil/fifo.h "} #pragma comment (lib," Avcodec.lib ") #pragma comment (lib," Avformat.lib ") #pragma comment (lib," Avutil.lib ") #pragma c Omment (Lib, "Avdevice.lib") #pragma comment (lib, "Avfilter.lib")//#pragma comment (lib, "Avfilter.lib")//#pragma Comment (lib, "Postproc.lib")//#pragma comment (lib, "Swresample.lib") #pragma comment (lib, "Swscale.lib") #include < windows.h> #include <conio.h> #include <time.h> enum capturestate {PREPARED, RUNNING, STOPPED
, finished};
typedef struct BUFFERSOURCECONTEXT {const Avclass *bscclass;
Avfifobuffer *fifo; Avrational time_base; < Time_base to Set in the output link avrational frame_rate;
< Frame_rate to set in the output link unsigned nb_failed_requests;
unsigned warning_limit;
/* Video only */int W, H;
Enum Avpixelformat pix_fmt;
Avrational Pixel_aspect;
Char *sws_param;
Avbufferref *hw_frames_ctx;
/* Audio only */int sample_rate;
Enum Avsampleformat sample_fmt;
int channels;
uint64_t channel_layout;
Char *channel_layout_str;
int got_format_from_params;
int EOF;
} Buffersourcecontext;
avformatcontext* _FMT_CTX_SPK = NULL;
avformatcontext* _fmt_ctx_mic = NULL;
avformatcontext* _fmt_ctx_out = NULL;
int _INDEX_SPK =-1;
int _index_mic =-1;
int _index_a_out =-1;
avfiltergraph* _filter_graph = NULL;
avfiltercontext* _FILTER_CTX_SRC_SPK = NULL;
avfiltercontext* _filter_ctx_src_mic = NULL;
avfiltercontext* _filter_ctx_sink = NULL;
Capturestate _state = capturestate::P repared; CRItical_section _SECTION_SPK;
Critical_section _section_mic;
avaudiofifo* _FIFO_SPK = NULL;
avaudiofifo* _fifo_mic = NULL;
void Initrecorder () {av_register_all ();
Avdevice_register_all ();
Avfilter_register_all ();
int Openspeakerinput (char* inputforamt, char* url) {avinputformat* ifmt = Av_find_input_format (InputForamt);
avdictionary* opt1 = NULL;
Av_dict_set (&OPT1, "Rtbufsize", "10M", 0);
int ret = 0;
ret = avformat_open_input (&_FMT_CTX_SPK, URL, ifmt, &opt1);
if (Ret < 0) {printf ("speaker:failed to call avformat_open_input\n");
return-1;
ret = Avformat_find_stream_info (_FMT_CTX_SPK, NULL);
if (Ret < 0) {printf ("speaker:failed to call avformat_find_stream_info\n");
return-1; for (int i = 0; i < _fmt_ctx_spk->nb_streams; i++) {if (_fmt_ctx_spk->streams[i]->codec-> ; codec_type = = Avmedia_type_audio) {_INDEX_SPK =I
Break
} if (_INDEX_SPK < 0) {printf ("Speaker:negative audio index\n");
return-1;
} avcodeccontext* Codec_ctx = _fmt_ctx_spk->streams[_index_spk]->codec;
avcodec* codec = Avcodec_find_decoder (codec_ctx->codec_id);
if (codec = = NULL) {printf ("Speaker:null audio decoder\n");
return-1;
ret = Avcodec_open2 (Codec_ctx, codec, NULL);
if (Ret < 0) {printf ("speaker:failed to call avcodec_open2\n");
return-1;
} av_dump_format (_FMT_CTX_SPK, _INDEX_SPK, url, 0);
return 0;
int Openmicrophoneinput (char* inputforamt, char* url) {avinputformat* ifmt = Av_find_input_format (InputForamt);
avdictionary* opt1 = NULL;
Av_dict_set (&OPT1, "Rtbufsize", "10M", 0);
int ret = 0;
ret = avformat_open_input (&_fmt_ctx_mic, URL, ifmt, &opt1); if (Ret < 0) {printf ("microphone:failed to call Avformat_open_Input\n ");
return-1;
ret = Avformat_find_stream_info (_fmt_ctx_mic, NULL);
if (Ret < 0) {printf ("microphone:failed to call avformat_find_stream_info\n");
return-1; for (int i = 0; i < _fmt_ctx_mic->nb_streams; i++) {if (_fmt_ctx_mic->streams[i]->codec->
; codec_type = = Avmedia_type_audio) {_index_mic = i;
Break
} if (_index_mic < 0) {printf ("Microphone:negative audio index\n");
return-1;
} avcodeccontext* Codec_ctx = _fmt_ctx_mic->streams[_index_mic]->codec;
avcodec* codec = Avcodec_find_decoder (codec_ctx->codec_id);
if (codec = = NULL) {printf ("Microphone:null audio decoder\n");
return-1;
ret = Avcodec_open2 (Codec_ctx, codec, NULL);
if (Ret < 0) {printf ("microphone:failed to call avcodec_open2\n");
return-1; } Av_dump_format (_fmt_ctX_mic, _index_mic, url, 0);
return 0;
int Openfileoutput (char* fileName) {int ret = 0;
ret = AVFORMAT_ALLOC_OUTPUT_CONTEXT2 (&_fmt_ctx_out, NULL, NULL, fileName);
if (Ret < 0) {printf ("mixer:failed to call avformat_alloc_output_context2\n");
return-1;
} avstream* stream_a = NULL;
Stream_a = Avformat_new_stream (_fmt_ctx_out, NULL);
if (stream_a = = NULL) {printf ("mixer:failed to call avformat_new_stream\n");
return-1;
} _index_a_out = 0;
Stream_a->codec->codec_type = Avmedia_type_audio;
avcodec* Codec_mp3 = Avcodec_find_encoder (Av_codec_id_mp3);
Stream_a->codec->codec = Codec_mp3;
Stream_a->codec->sample_rate = 16000;
Stream_a->codec->channels = 1;
Stream_a->codec->channel_layout = av_get_default_channel_layout (1);
STREAM_A->CODEC->SAMPLE_FMT = codec_mp3->sample_fmts[0];
Stream_a->codec->bit_rate = 16000; Stream_a->codec->time_base.num = 1;
Stream_a->codec->time_base.den = stream_a->codec->sample_rate;
Stream_a->codec->codec_tag = 0; if (_fmt_ctx_out->oformat->flags & Avfmt_globalheader) stream_a->codec->flags |= CODEC_FLAG_GLOBAL
_header; if (Avcodec_open2 (Stream_a->codec, Stream_a->codec->codec, NULL) < 0) {printf ("mixer:failed to C
All avcodec_open2\n ");
return-1; } if (! ( _fmt_ctx_out->oformat->flags & Avfmt_nofile)) {if (Avio_open (&_FMT_CTX_OUT->PB, FileName, AV
Io_flag_write) < 0) {printf ("mixer:failed to call avio_open\n");
return-1; } if (Avformat_write_header (_fmt_ctx_out, NULL) < 0) {printf ("mixer:failed to call AVFORMAT_WR
Ite_header\n ");
return-1; BOOL B = (!_fmt_ctx_out->streams[0]->time_base.num && _fmt_ctx_out->streams[0]->codec-> time_base.num);
Av_dump_format (_fmt_ctx_out, _index_a_out, FileName, 1); _FIFO_SPK = Av_audio_fifo_alloc (_fmt_ctx_spk->streams[_index_spk]->codec->sample_fmt, _fmt_ctx_spk->
Streams[_index_spk]->codec->channels, 30*_fmt_ctx_spk->streams[_index_spk]->codec->frame_size); _fifo_mic = Av_audio_fifo_alloc (_fmt_ctx_mic->streams[_index_mic]->codec->sample_fmt, _fmt_ctx_mic->
Streams[_index_mic]->codec->channels, 30*_fmt_ctx_mic->streams[_index_spk]->codec->frame_size);
return 0;
int Initfilter (char* filter_desc) {char args_spk[5120];
char* PAD_NAME_SPK = "In0";
Char args_mic[512];
char* pad_name_mic = "in1";
avfilter* FILTER_SRC_SPK = Avfilter_get_by_name ("Abuffer");
avfilter* filter_src_mic = Avfilter_get_by_name ("Abuffer");
avfilter* Filter_sink = Avfilter_get_by_name ("Abuffersink");
avfilterinout* FILTER_OUTPUT_SPK = Avfilter_inout_alloc (); avfilterinout* filter_output_mic = AVFIlter_inout_alloc ();
avfilterinout* filter_input = Avfilter_inout_alloc ();
_filter_graph = Avfilter_graph_alloc (); sprintf_s (ARGS_SPK, sizeof (ARGS_SPK), "Time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=0x%i64x", _fmt _ctx_spk->streams[_index_spk]->codec->time_base.num, _fmt_ctx_spk->streams[_index_spk]->codec-& Gt;time_base.den, _fmt_ctx_spk->streams[_index_spk]->codec->sample_rate, Av_get_sample_fmt_name (_FMT_CTX_SPK->STREAMS[_INDEX_SPK]->CODEC->SAMPLE_FMT), _fmt_ctx_spk->streams[_index_spk]->
Codec->channel_layout); sprintf_s (args_mic, sizeof (args_mic), "Time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=0x%i64x", _fmt _ctx_mic->streams[_index_mic]->codec->time_base.num, _fmt_ctx_mic->streams[_index_mic]->codec-& Gt;time_base.den, _fmt_ctx_mic->streams[_index_mic]->codec->sample_rate, Av_get_sample_fmt_name(_FMT_CTX_MIC->STREAMS[_INDEX_MIC]->CODEC->SAMPLE_FMT), _fmt_ctx_mic->streams[_index_mic]->
Codec->channel_layout); sprintf_s (ARGS_SPK, sizeof (ARGS_SPK), "Time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=0x%i64x", _ Fmt_ctx_out->streams[_index_a_out]->codec->time_base.num, _fmt_ctx_out->streams[_index_a_out]-> Codec->time_base.den, _fmt_ctx_out->streams[_index_a_out]->codec->sample_rate, Av_get_sample_fmt_ Name (_FMT_CTX_OUT->STREAMS[_INDEX_A_OUT]->CODEC->SAMPLE_FMT), _fmt_ctx_out->streams[_index_a_out]-
>codec->channel_layout); sprintf_s (args_mic, sizeof (args_mic), "Time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=0x%i64x", _ Fmt_ctx_out->streams[_index_a_out]->codec->time_base.num, _fmt_ctx_out->streams[_index_a_out]-> Codec->time_base.den, _fmt_ctx_out->streams[_index_a_out]->codec->sample_rate, Av_get_sample_fmt_ Name (_FMT_CTX_OUT->STREAMS[_INDEX_A_OUT]->CODEC->SAMPLE_FMT), _fmt_ctx_out->streams[_index_a_out]->codec->channel_layout);
int ret = 0; ret = Avfilter_graph_create_filter (&_FILTER_CTX_SRC_SPK, FILTER_SRC_SPK, PAD_NAME_SPK, ARGS_SPK, NULL, _filter_
Graph);
if (Ret < 0) {printf ("filter:failed to call Avfilter_graph_create_filter-src spk\n");
return-1; ret = Avfilter_graph_create_filter (&_filter_ctx_src_mic, Filter_src_mic, Pad_name_mic, Args_mic, NULL, _filter_g
Raph);
if (Ret < 0) {printf ("filter:failed to call Avfilter_graph_create_filter-src mic\n");
return-1;
ret = Avfilter_graph_create_filter (&_filter_ctx_sink, Filter_sink, "out", NULL, NULL, _filter_graph);
if (Ret < 0) {printf ("filter:failed to call Avfilter_graph_create_filter-sink\n");
return-1;
} avcodeccontext* Encodec_ctx = _fmt_ctx_out->streams[_index_a_out]->codec; ret = Av_opt_set_bin (_filter_ctX_sink, "Sample_fmts", (uint8_t*) &encodec_ctx->sample_fmt, sizeof (ENCODEC_CTX->SAMPLE_FMT), AV_OPT_
Search_children);
if (Ret < 0) {printf ("filter:failed to call Av_opt_set_bin-sample_fmts\n");
return-1; ret = Av_opt_set_bin (_filter_ctx_sink, "channel_layouts", (uint8_t*) &encodec_ctx->channel_layout, sizeof (
encodec_ctx->channel_layout), Av_opt_search_children);
if (Ret < 0) {printf ("filter:failed to call Av_opt_set_bin-channel_layouts\n");
return-1; ret = Av_opt_set_bin (_filter_ctx_sink, "Sample_rates", (uint8_t*) &encodec_ctx->sample_rate, sizeof (encodec_
ctx->sample_rate), Av_opt_search_children);
if (Ret < 0) {printf ("filter:failed to call Av_opt_set_bin-sample_rates\n");
return-1;
} filter_output_spk->name = Av_strdup (PAD_NAME_SPK);
Filter_output_spk->filter_ctx = _FILTER_CTX_SRC_SPK; Filter_output_spk->pad_idx = 0;
Filter_output_spk->next = filter_output_mic;
Filter_output_mic->name = Av_strdup (pad_name_mic);
Filter_output_mic->filter_ctx = _filter_ctx_src_mic;
Filter_output_mic->pad_idx = 0;
Filter_output_mic->next = NULL;
Filter_input->name = Av_strdup ("Out");
Filter_input->filter_ctx = _filter_ctx_sink;
Filter_input->pad_idx = 0;
Filter_input->next = NULL;
avfilterinout* filter_outputs[2];
Filter_outputs[0] = FILTER_OUTPUT_SPK;
FILTER_OUTPUTS[1] = filter_output_mic;
ret = Avfilter_graph_parse_ptr (_filter_graph, Filter_desc, &filter_input, filter_outputs, NULL);
if (Ret < 0) {printf ("filter:failed to call avfilter_graph_parse_ptr\n");
return-1;
ret = Avfilter_graph_config (_filter_graph, NULL);
if (Ret < 0) {printf ("filter:failed to call avfilter_graph_config\n");
return-1;
} avfilter_inout_free (&filter_input); Av_free (FILTER_SRC_SPK);
Av_free (filter_src_mic);
Avfilter_inout_free (filter_outputs);
Av_free (filter_outputs);
char* temp = Avfilter_graph_dump (_filter_graph, NULL);
printf ("%s\n", temp);
return 0;
DWORD WINAPI Speakercapthreadproc (lpvoid lpparam) {avframe* pframe = Av_frame_alloc ();
Avpacket packet;
Av_init_packet (&packet);
int got_sound;
while (_state = = capturestate::running) {packet.data = NULL;
packet.size = 0;
if (Av_read_frame (_FMT_CTX_SPK, &packet) < 0) {continue; } if (Packet.stream_index = = _INDEX_SPK) {if Avcodec_decode_audio4 (_fmt_ctx_spk->streams[
_index_spk]->codec, Pframe, &got_sound, &packet) < 0) {break;
} av_free_packet (&packet);
if (!got_sound) {continue; int fifo_spk_space = Av_audiO_fifo_space (_FIFO_SPK);
while (Fifo_spk_space < pframe->nb_samples && _state = = capturestate::running) {
Sleep (10);
printf ("_FIFO_SPK full!\n");
Fifo_spk_space = Av_audio_fifo_space (_FIFO_SPK); } if (Fifo_spk_space >= pframe->nb_samples) {entercriticalsection (&_se
CTION_SPK);
int nwritten = Av_audio_fifo_write (_FIFO_SPK, (void**) pframe->data, pframe->nb_samples);
LeaveCriticalSection (&_SECTION_SPK);
}} av_frame_free (&pframe);
return 0;
DWORD WINAPI Microphonecapthreadproc (lpvoid lpparam) {avframe* pframe = Av_frame_alloc ();
Avpacket packet;
Av_init_packet (&packet);
int got_sound; while (_state = = Capturestate::P repared) {} while (_state = = capturestate::running) {Packet.da
Ta = NULL; Packet.size = 0;
if (Av_read_frame (_fmt_ctx_mic, &packet) < 0) {continue; } if (Packet.stream_index = = _index_mic) {if Avcodec_decode_audio4 (_fmt_ctx_mic->streams[
_index_mic]->codec, Pframe, &got_sound, &packet) < 0) {break;
} av_free_packet (&packet);
if (!got_sound) {continue;
int fifo_mic_space = Av_audio_fifo_space (_fifo_mic);
while (Fifo_mic_space < pframe->nb_samples && _state = = capturestate::running) {
Sleep (10);
printf ("_fifo_mic full!\n");
Fifo_mic_space = Av_audio_fifo_space (_fifo_mic); } if (Fifo_mic_space >= pframe->nb_samples) {entercriticalsection (&_se
Ction_mic); int temp = Av_audio_fifo_space (_fifo_mic);
int TEMP2 = pframe->nb_samples;
int nwritten = Av_audio_fifo_write (_fifo_mic, (void**) pframe->data, pframe->nb_samples);
LeaveCriticalSection (&_section_mic);
}} av_frame_free (&pframe);
return 0;
int main () {int ret = 0;
Initrecorder ();
Char filename[128];
char* outfiletype = ". mp3";
time_t Rawtime;
tm* Timeinfo;
Time (&rawtime);
Timeinfo = LocalTime (&rawtime); sprintf_s (filename, sizeof (filename), "%d_%d_%d_%d_%d_%d%s", Timeinfo->tm_year + 1900, Timeinfo->tm_mon + 1,
Timeinfo->tm_mday, Timeinfo->tm_hour, Timeinfo->tm_min, Timeinfo->tm_sec, OutFileType);
char* Filter_desc = "[In0][in1]amix=inputs=2[out]";
ret = Openspeakerinput ("DShow", "Audio=virtual-audio-capturer");
ret = Openspeakerinput (NULL, "Aa.mp3");
if (Ret < 0) {goto release; }//ret = OpenmicrophoneiNput ("DShow", "audio=external microphone (Conexant S");
ret = Openmicrophoneinput (NULL, "Bb.mp3");
if (Ret < 0) {goto release;
ret = Openfileoutput (fileName);
if (Ret < 0) {goto release;
ret = Initfilter (FILTER_DESC);
if (Ret < 0) {goto release;
} _state = capturestate::running;
InitializeCriticalSection (&_SECTION_SPK);
InitializeCriticalSection (&_section_mic);
CreateThread (null, 0, Speakercapthreadproc, 0, 0, NULL);
CreateThread (null, 0, Microphonecapthreadproc, 0, 0, NULL);
int tmpfifofailed = 0;
int64_t frame_count = 0;
while (_state!= capturestate::finished) {if (_kbhit ()) {_state = capturestate::stopped;
Break
else {int ret = 0;
avframe* PFRAME_SPK = Av_frame_alloc ();