From e80d7676d495ec703f0cedc10599e7c0d49a4f92 Mon Sep 17 00:00:00 2001
From: dec05eba <dec05eba@protonmail.com>
Date: Tue, 4 Oct 2022 00:29:59 +0200
Subject: [PATCH] Livestream: add silent audio track if not audio input is
 provided (fixes youtube and other sites)

---
 project.conf  |  2 +-
 src/main.cpp  | 45 ++++++++++++++++++++++++++++++++++++++-------
 src/sound.cpp |  4 +++-
 3 files changed, 42 insertions(+), 9 deletions(-)

diff --git a/project.conf b/project.conf
index dc004f2..9f32d5f 100644
--- a/project.conf
+++ b/project.conf
@@ -1,7 +1,7 @@
 [package]
 name = "gpu-screen-recorder"
 type = "executable"
-version = "1.1.0"
+version = "1.2.0"
 platforms = ["posix"]
 
 [config]
diff --git a/src/main.cpp b/src/main.cpp
index 682b896..79fee50 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -645,7 +645,7 @@ static AVCodecContext *create_video_codec_context(AVFormatContext *av_format_con
         codec_context->codec_tag = MKTAG('h', 'v', 'c', '1');
     switch(video_quality) {
         case VideoQuality::MEDIUM:
-            codec_context->bit_rate = 5000000 + (codec_context->width * codec_context->height) / 2;
+            codec_context->bit_rate = 6000000 + (codec_context->width * codec_context->height) / 2;
             /*
             if(use_hevc) {
                 codec_context->qmin = 20;
@@ -796,7 +796,7 @@ static void open_video(AVCodecContext *codec_context,
     AVDictionary *options = nullptr;
     switch(video_quality) {
         case VideoQuality::MEDIUM:
-	        av_dict_set_int(&options, "qp", 36, 0);
+	        av_dict_set_int(&options, "qp", 35, 0);
             //av_dict_set(&options, "preset", "hq", 0);
             break;
         case VideoQuality::HIGH:
@@ -815,7 +815,7 @@ static void open_video(AVCodecContext *codec_context,
 
     if(is_livestream) {
         av_dict_set_int(&options, "zerolatency", 1, 0);
-        av_dict_set(&options, "preset", "llhq", 0);
+        //av_dict_set(&options, "preset", "llhq", 0);
     }
 
     av_opt_set(&options, "rc", "vbr", 0);
@@ -1432,6 +1432,12 @@ int main(int argc, char **argv) {
     const AVOutputFormat *output_format = av_format_context->oformat;
 
     const bool is_livestream = is_livestream_path(filename);
+    // (Some?) livestreaming services require at least one audio track to work.
+    // If not audio is provided then create one silent audio track.
+    if(is_livestream && requested_audio_inputs.empty()) {
+        fprintf(stderr, "Info: live streaming but no audio track was added. Adding a silent audio track\n");
+        requested_audio_inputs.push_back({ "", "gsr-silent" });
+    }
 
     //bool use_hevc = strcmp(window_str, "screen") == 0 || strcmp(window_str, "screen-direct") == 0;
     if(video_codec != VideoCodec::H264 && strcmp(container_format, "flv") == 0) {
@@ -1472,9 +1478,14 @@ int main(int argc, char **argv) {
         const int num_channels = audio_codec_context->ch_layout.nb_channels;
 #endif
 
-        if(sound_device_get_by_name(&audio_tracks.back().sound_device, audio_input.name.c_str(), audio_input.description.c_str(), num_channels, audio_codec_context->frame_size) != 0) {
-            fprintf(stderr, "failed to get 'pulse' sound device\n");
-            exit(1);
+        if(audio_input.name.empty()) {
+            audio_tracks.back().sound_device.handle = NULL;
+            audio_tracks.back().sound_device.frames = 0;
+        } else {
+            if(sound_device_get_by_name(&audio_tracks.back().sound_device, audio_input.name.c_str(), audio_input.description.c_str(), num_channels, audio_codec_context->frame_size) != 0) {
+                fprintf(stderr, "failed to get 'pulse' sound device\n");
+                exit(1);
+            }
         }
 
         ++audio_stream_index;
@@ -1609,10 +1620,13 @@ int main(int argc, char **argv) {
             int64_t pts = 0;
             const double target_audio_hz = 1.0 / (double)audio_track.codec_context->sample_rate;
             double received_audio_time = clock_get_monotonic_seconds();
+            const int64_t timeout_ms = std::round((1000.0 / (double)audio_track.codec_context->sample_rate) * 1000.0);
 
             while(running) {
                 void *sound_buffer;
-                int sound_buffer_size = sound_device_read_next_chunk(&audio_track.sound_device, &sound_buffer);
+                int sound_buffer_size = -1;
+                if(audio_track.sound_device.handle)
+                    sound_buffer_size = sound_device_read_next_chunk(&audio_track.sound_device, &sound_buffer);
                 const bool got_audio_data = sound_buffer_size >= 0;
 
                 const double this_audio_frame_time = clock_get_monotonic_seconds();
@@ -1647,6 +1661,23 @@ int main(int argc, char **argv) {
                     }
                 }
 
+                if(!audio_track.sound_device.handle) {
+                    // TODO:
+                    //audio_track.frame->data[0] = empty_audio;
+                    received_audio_time = this_audio_frame_time;
+                    swr_convert(swr, &audio_track.frame->data[0], audio_track.frame->nb_samples, (const uint8_t**)&empty_audio, audio_track.codec_context->frame_size);
+                    audio_track.frame->pts = pts;
+                    pts += audio_track.frame->nb_samples;
+                    ret = avcodec_send_frame(audio_track.codec_context, audio_track.frame);
+                    if(ret >= 0){
+                        receive_frames(audio_track.codec_context, audio_track.stream_index, audio_track.stream, audio_track.frame, av_format_context, record_start_time, frame_data_queue, replay_buffer_size_secs, frames_erased, *write_output_mutex);
+                    } else {
+                        fprintf(stderr, "Failed to encode audio!\n");
+                    }
+
+                    usleep(timeout_ms * 1000);
+                }
+
                 if(got_audio_data) {
                     // TODO: Instead of converting audio, get float audio from alsa. Or does alsa do conversion internally to get this format?
                     swr_convert(swr, &audio_track.frame->data[0], audio_track.frame->nb_samples, (const uint8_t**)&sound_buffer, audio_track.sound_device.frames);
diff --git a/src/sound.cpp b/src/sound.cpp
index 3eea498..e4086b1 100644
--- a/src/sound.cpp
+++ b/src/sound.cpp
@@ -283,7 +283,9 @@ int sound_device_get_by_name(SoundDevice *device, const char *device_name, const
 }
 
 void sound_device_close(SoundDevice *device) {
-    pa_sound_device_free((pa_handle*)device->handle);
+    if(device->handle)
+        pa_sound_device_free((pa_handle*)device->handle);
+    device->handle = NULL;
 }
 
 int sound_device_read_next_chunk(SoundDevice *device, void **buffer) {