From e80d7676d495ec703f0cedc10599e7c0d49a4f92 Mon Sep 17 00:00:00 2001 From: dec05eba Date: Tue, 4 Oct 2022 00:29:59 +0200 Subject: [PATCH] Livestream: add silent audio track if not audio input is provided (fixes youtube and other sites) --- project.conf | 2 +- src/main.cpp | 45 ++++++++++++++++++++++++++++++++++++++------- src/sound.cpp | 4 +++- 3 files changed, 42 insertions(+), 9 deletions(-) diff --git a/project.conf b/project.conf index dc004f2..9f32d5f 100644 --- a/project.conf +++ b/project.conf @@ -1,7 +1,7 @@ [package] name = "gpu-screen-recorder" type = "executable" -version = "1.1.0" +version = "1.2.0" platforms = ["posix"] [config] diff --git a/src/main.cpp b/src/main.cpp index 682b896..79fee50 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -645,7 +645,7 @@ static AVCodecContext *create_video_codec_context(AVFormatContext *av_format_con codec_context->codec_tag = MKTAG('h', 'v', 'c', '1'); switch(video_quality) { case VideoQuality::MEDIUM: - codec_context->bit_rate = 5000000 + (codec_context->width * codec_context->height) / 2; + codec_context->bit_rate = 6000000 + (codec_context->width * codec_context->height) / 2; /* if(use_hevc) { codec_context->qmin = 20; @@ -796,7 +796,7 @@ static void open_video(AVCodecContext *codec_context, AVDictionary *options = nullptr; switch(video_quality) { case VideoQuality::MEDIUM: - av_dict_set_int(&options, "qp", 36, 0); + av_dict_set_int(&options, "qp", 35, 0); //av_dict_set(&options, "preset", "hq", 0); break; case VideoQuality::HIGH: @@ -815,7 +815,7 @@ static void open_video(AVCodecContext *codec_context, if(is_livestream) { av_dict_set_int(&options, "zerolatency", 1, 0); - av_dict_set(&options, "preset", "llhq", 0); + //av_dict_set(&options, "preset", "llhq", 0); } av_opt_set(&options, "rc", "vbr", 0); @@ -1432,6 +1432,12 @@ int main(int argc, char **argv) { const AVOutputFormat *output_format = av_format_context->oformat; const bool is_livestream = is_livestream_path(filename); + // (Some?) livestreaming services require at least one audio track to work. + // If not audio is provided then create one silent audio track. + if(is_livestream && requested_audio_inputs.empty()) { + fprintf(stderr, "Info: live streaming but no audio track was added. Adding a silent audio track\n"); + requested_audio_inputs.push_back({ "", "gsr-silent" }); + } //bool use_hevc = strcmp(window_str, "screen") == 0 || strcmp(window_str, "screen-direct") == 0; if(video_codec != VideoCodec::H264 && strcmp(container_format, "flv") == 0) { @@ -1472,9 +1478,14 @@ int main(int argc, char **argv) { const int num_channels = audio_codec_context->ch_layout.nb_channels; #endif - if(sound_device_get_by_name(&audio_tracks.back().sound_device, audio_input.name.c_str(), audio_input.description.c_str(), num_channels, audio_codec_context->frame_size) != 0) { - fprintf(stderr, "failed to get 'pulse' sound device\n"); - exit(1); + if(audio_input.name.empty()) { + audio_tracks.back().sound_device.handle = NULL; + audio_tracks.back().sound_device.frames = 0; + } else { + if(sound_device_get_by_name(&audio_tracks.back().sound_device, audio_input.name.c_str(), audio_input.description.c_str(), num_channels, audio_codec_context->frame_size) != 0) { + fprintf(stderr, "failed to get 'pulse' sound device\n"); + exit(1); + } } ++audio_stream_index; @@ -1609,10 +1620,13 @@ int main(int argc, char **argv) { int64_t pts = 0; const double target_audio_hz = 1.0 / (double)audio_track.codec_context->sample_rate; double received_audio_time = clock_get_monotonic_seconds(); + const int64_t timeout_ms = std::round((1000.0 / (double)audio_track.codec_context->sample_rate) * 1000.0); while(running) { void *sound_buffer; - int sound_buffer_size = sound_device_read_next_chunk(&audio_track.sound_device, &sound_buffer); + int sound_buffer_size = -1; + if(audio_track.sound_device.handle) + sound_buffer_size = sound_device_read_next_chunk(&audio_track.sound_device, &sound_buffer); const bool got_audio_data = sound_buffer_size >= 0; const double this_audio_frame_time = clock_get_monotonic_seconds(); @@ -1647,6 +1661,23 @@ int main(int argc, char **argv) { } } + if(!audio_track.sound_device.handle) { + // TODO: + //audio_track.frame->data[0] = empty_audio; + received_audio_time = this_audio_frame_time; + swr_convert(swr, &audio_track.frame->data[0], audio_track.frame->nb_samples, (const uint8_t**)&empty_audio, audio_track.codec_context->frame_size); + audio_track.frame->pts = pts; + pts += audio_track.frame->nb_samples; + ret = avcodec_send_frame(audio_track.codec_context, audio_track.frame); + if(ret >= 0){ + receive_frames(audio_track.codec_context, audio_track.stream_index, audio_track.stream, audio_track.frame, av_format_context, record_start_time, frame_data_queue, replay_buffer_size_secs, frames_erased, *write_output_mutex); + } else { + fprintf(stderr, "Failed to encode audio!\n"); + } + + usleep(timeout_ms * 1000); + } + if(got_audio_data) { // TODO: Instead of converting audio, get float audio from alsa. Or does alsa do conversion internally to get this format? swr_convert(swr, &audio_track.frame->data[0], audio_track.frame->nb_samples, (const uint8_t**)&sound_buffer, audio_track.sound_device.frames); diff --git a/src/sound.cpp b/src/sound.cpp index 3eea498..e4086b1 100644 --- a/src/sound.cpp +++ b/src/sound.cpp @@ -283,7 +283,9 @@ int sound_device_get_by_name(SoundDevice *device, const char *device_name, const } void sound_device_close(SoundDevice *device) { - pa_sound_device_free((pa_handle*)device->handle); + if(device->handle) + pa_sound_device_free((pa_handle*)device->handle); + device->handle = NULL; } int sound_device_read_next_chunk(SoundDevice *device, void **buffer) {