Fix replay video/audio desync, fix dummy audio when dropping audio input, give each audio stream a new name so it can be replaced with pipewire graphs

2022-09-20 03:39:15 +02:00 · 2022-09-20 03:39:15 +02:00 · 919890b7b2
commit 919890b7b2
parent 9f2ddf3802
6 changed files with 258 additions and 189 deletions
--- a/build.sh
+++ b/build.sh
@ -1,8 +1,8 @@
 #!/bin/sh -e

-dependencies="glew libavcodec libavformat libavutil x11 xcomposite glfw3 libpulse-simple libswresample"
+dependencies="glew libavcodec libavformat libavutil x11 xcomposite glfw3 libpulse libswresample"
 includes="$(pkg-config --cflags $dependencies) -I/opt/cuda/targets/x86_64-linux/include"
 libs="$(pkg-config --libs $dependencies) /usr/lib64/libcuda.so -ldl -pthread -lm"
-g++ -c src/sound.cpp -O2 $includes -DPULSEAUDIO=1
-g++ -c src/main.cpp -O2 $includes -DPULSEAUDIO=1
+g++ -c src/sound.cpp -O2 $includes
+g++ -c src/main.cpp -O2 $includes
 g++ -o gpu-screen-recorder -O2 sound.o main.o -s $libs
--- a/install_ubuntu.sh
+++ b/install_ubuntu.sh
@ -15,11 +15,11 @@ apt-get -y install build-essential nvidia-cuda-dev\
 	libglew-dev libglfw3-dev\
 	libpulse-dev

-dependencies="glew libavcodec libavformat libavutil x11 xcomposite glfw3 libpulse-simple libswresample"
+dependencies="glew libavcodec libavformat libavutil x11 xcomposite glfw3 libpulse libswresample"
 includes="$(pkg-config --cflags $dependencies) -I/opt/cuda/targets/x86_64-linux/include"
 libs="$(pkg-config --libs $dependencies) /usr/lib/x86_64-linux-gnu/stubs/libcuda.so -ldl -pthread -lm"
-g++ -c src/sound.cpp -O2 $includes -DPULSEAUDIO=1
-g++ -c src/main.cpp -O2 $includes -DPULSEAUDIO=1
+g++ -c src/sound.cpp -O2 $includes
+g++ -c src/main.cpp -O2 $includes
 g++ -o gpu-screen-recorder -O2 sound.o main.o -s $libs
 install -Dm755 "gpu-screen-recorder" "/usr/local/bin/gpu-screen-recorder"

--- a/project.conf
+++ b/project.conf
@ -8,9 +8,6 @@ platforms = ["posix"]
 include_dirs = ["/opt/cuda/targets/x86_64-linux/include"]
 libs = ["/usr/lib64/libcuda.so"]

-[define]
-PULSEAUDIO = "1"
-
 [dependencies]
 glew = ">=2"
 libavcodec = ">=58"
@ -23,7 +20,6 @@ xcomposite = ">=0.2"
 # TODO: Remove this dependency, this is needed right now for glfwMakeContextCurrent
 glfw3 = "3"

-#alsa = "1"
-libpulse-simple = ">=13"
+libpulse = ">=13"

 libswresample = ">=3"
--- a/src/main.cpp
+++ b/src/main.cpp
@ -62,7 +62,6 @@ extern "C" {

 #include <deque>
 #include <future>
-#include <condition_variable>

 //#include <CL/cl.h>

@ -434,7 +433,7 @@ static void receive_frames(AVCodecContext *av_codec_context, int stream_index, A
            } else {
                av_packet_rescale_ts(&av_packet, av_codec_context->time_base, stream->time_base);
                av_packet.stream_index = stream->index;
-                int ret = av_interleaved_write_frame(av_format_context, &av_packet);
+                int ret = av_write_frame(av_format_context, &av_packet);
                if(ret < 0) {
                    fprintf(stderr, "Error: Failed to write frame index %d to muxer, reason: %s (%d)\n", av_packet.stream_index, av_error_to_string(ret), ret);
                }
@ -810,11 +809,17 @@ static std::future<void> save_replay_thread;
 static std::vector<AVPacket> save_replay_packets;
 static std::string save_replay_output_filepath;

-static void save_replay_async(AVCodecContext *video_codec_context, int video_stream_index, std::vector<AudioTrack> &audio_tracks, const std::deque<AVPacket> &frame_data_queue, bool frames_erased, std::string output_dir, std::string container_format) {
+static void save_replay_async(AVCodecContext *video_codec_context, int video_stream_index, std::vector<AudioTrack> &audio_tracks, const std::deque<AVPacket> &frame_data_queue, bool frames_erased, std::string output_dir, std::string container_format, std::mutex &write_output_mutex) {
    if(save_replay_thread.valid())
        return;
    
    size_t start_index = (size_t)-1;
+    int64_t video_pts_offset = 0;
+    int64_t audio_pts_offset = 0;
+
+    {
+        std::lock_guard<std::mutex> lock(write_output_mutex);
+        start_index = (size_t)-1;
        for(size_t i = 0; i < frame_data_queue.size(); ++i) {
            const AVPacket &av_packet = frame_data_queue[i];
            if((av_packet.flags & AV_PKT_FLAG_KEY) && av_packet.stream_index == video_stream_index) {
@ -826,17 +831,29 @@ static void save_replay_async(AVCodecContext *video_codec_context, int video_str
        if(start_index == (size_t)-1)
            return;

-    int64_t pts_offset = 0;
-    if(frames_erased)
-        pts_offset = frame_data_queue[start_index].pts;
+        if(frames_erased) {
+            video_pts_offset = frame_data_queue[start_index].pts;
+            
+            // Find the next audio packet to use as audio pts offset
+            for(size_t i = start_index; i < frame_data_queue.size(); ++i) {
+                const AVPacket &av_packet = frame_data_queue[i];
+                if(av_packet.stream_index != video_stream_index) {
+                    audio_pts_offset = av_packet.pts;
+                    break;
+                }
+            }
+        } else {
+            start_index = 0;
+        }

        save_replay_packets.resize(frame_data_queue.size());
        for(size_t i = 0; i < frame_data_queue.size(); ++i) {
            av_packet_ref(&save_replay_packets[i], &frame_data_queue[i]);
        }
+    }

    save_replay_output_filepath = output_dir + "/Replay_" + get_date_str() + "." + container_format;
-    save_replay_thread = std::async(std::launch::async, [video_stream_index, container_format, start_index, pts_offset, video_codec_context, &audio_tracks]() mutable {
+    save_replay_thread = std::async(std::launch::async, [video_stream_index, container_format, start_index, video_pts_offset, audio_pts_offset, video_codec_context, &audio_tracks]() mutable {
        AVFormatContext *av_format_context;
        // The output format is automatically guessed from the file extension
        avformat_alloc_output_context2(&av_format_context, nullptr, container_format.c_str(), nullptr);
@ -874,18 +891,22 @@ static void save_replay_async(AVCodecContext *video_codec_context, int video_str
            AVStream *stream = video_stream;
            AVCodecContext *codec_context = video_codec_context;

-            if(av_packet.stream_index != video_stream_index) {
+            if(av_packet.stream_index == video_stream_index) {
+                av_packet.pts -= video_pts_offset;
+                av_packet.dts -= video_pts_offset;
+            } else {
                AudioTrack *audio_track = stream_index_to_audio_track_map[av_packet.stream_index];
                stream = audio_track->stream;
                codec_context = audio_track->codec_context;
+
+                av_packet.pts -= audio_pts_offset;
+                av_packet.dts -= audio_pts_offset;
            }

            av_packet.stream_index = stream->index;
-            av_packet.pts -= pts_offset;
-            av_packet.dts -= pts_offset;
            av_packet_rescale_ts(&av_packet, codec_context->time_base, stream->time_base);

-            int ret = av_interleaved_write_frame(av_format_context, &av_packet);
+            int ret = av_write_frame(av_format_context, &av_packet);
            if(ret < 0)
                fprintf(stderr, "Error: Failed to write frame index %d to muxer, reason: %s (%d)\n", stream->index, av_error_to_string(ret), ret);
        }
@ -1426,52 +1447,11 @@ int main(int argc, char **argv) {
            av_opt_set_sample_fmt(swr, "out_sample_fmt", AV_SAMPLE_FMT_FLTP, 0);
            swr_init(swr);

-            std::deque<uint8_t*> buffered_audio;
-            std::mutex buffered_audio_mutex;
-            std::condition_variable buffered_audio_cv;
-            bool got_first_batch = false;
-
-            // TODO: Make the sound device read async instead of using a thread
-            std::thread sound_read_thread([&](){
            while(running) {
                void *sound_buffer;
                int sound_buffer_size = sound_device_read_next_chunk(&audio_track.sound_device, &sound_buffer);
-                    if(sound_buffer_size >= 0) {
-                        uint8_t *data = (uint8_t*)malloc(audio_track.sound_device.buffer_size);
-                        if(data) {
-                            memcpy(data, sound_buffer, audio_track.sound_device.buffer_size);
-                            std::unique_lock<std::mutex> lock(buffered_audio_mutex);
-                            buffered_audio.push_back(data);
-                            buffered_audio_cv.notify_one();
-                        }
-                    }
-                }
-            });
-
-            while(running) {
-                uint8_t *audio_buffer;
-                bool free_audio;
-                {
-                    // TODO: Not a good solution to lack of audio as it causes dropped frames, but it's better then complete audio desync.
-                    // The first packet is delayed for some reason...
-                    std::unique_lock<std::mutex> lock(buffered_audio_mutex);
-                    if(got_first_batch)
-                        buffered_audio_cv.wait(lock, [&]{ return !running || !buffered_audio.empty(); });
-                    else
-                        buffered_audio_cv.wait_for(lock, std::chrono::milliseconds(21), [&]{ return !running || !buffered_audio.empty(); });
-                    if(!running)
-                        break;
-
-                    if(buffered_audio.empty()) {
-                        audio_buffer = empty_audio;
-                        free_audio = false;
-                    } else {
-                        audio_buffer = buffered_audio.front();
-                        buffered_audio.pop_front();
-                        free_audio = true;
-                        got_first_batch = true;
-                    }
-                }
+                if(sound_buffer_size < 0)
+                    sound_buffer = empty_audio;

                int ret = av_frame_make_writable(audio_track.frame);
                if (ret < 0) {
@ -1480,7 +1460,7 @@ int main(int argc, char **argv) {
                }

                // TODO: Instead of converting audio, get float audio from alsa. Or does alsa do conversion internally to get this format?
-                swr_convert(swr, &audio_track.frame->data[0], audio_track.frame->nb_samples, (const uint8_t**)&audio_buffer, audio_track.sound_device.frames);
+                swr_convert(swr, &audio_track.frame->data[0], audio_track.frame->nb_samples, (const uint8_t**)&sound_buffer, audio_track.sound_device.frames);
                audio_track.frame->pts = (clock_get_monotonic_seconds() - start_time_pts) * AV_TIME_BASE;

                ret = avcodec_send_frame(audio_track.codec_context, audio_track.frame);
@ -1489,17 +1469,9 @@ int main(int argc, char **argv) {
                } else {
                    fprintf(stderr, "Failed to encode audio!\n");
                }
-
-                if(free_audio)
-                    free(audio_buffer);
-            }
-
-            sound_read_thread.join();
-            while(!buffered_audio.empty()) {
-                free(buffered_audio.front());
-                buffered_audio.pop_front();
            }

+            sound_device_close(&audio_track.sound_device);
            swr_free(&swr);
        }, av_format_context, &write_output_mutex);
    }
@ -1749,7 +1721,7 @@ int main(int argc, char **argv) {

        if(save_replay == 1 && !save_replay_thread.valid() && replay_buffer_size_secs != -1) {
            save_replay = 0;
-            save_replay_async(video_codec_context, VIDEO_STREAM_INDEX, audio_tracks, frame_data_queue, frames_erased, filename, container_format);
+            save_replay_async(video_codec_context, VIDEO_STREAM_INDEX, audio_tracks, frame_data_queue, frames_erased, filename, container_format, write_output_mutex);
        }

        // av_frame_free(&frame);
@ -1767,7 +1739,6 @@ int main(int argc, char **argv) {

    for(AudioTrack &audio_track : audio_tracks) {
        audio_track.thread.join();
-        sound_device_close(&audio_track.sound_device);
    }

    if (replay_buffer_size_secs == -1 && av_write_trailer(av_format_context) != 0) {
--- a/src/sound.cpp
+++ b/src/sound.cpp
@ -20,11 +20,193 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
+#include <time.h>
+#include <cmath>

-#ifdef PULSEAUDIO
-#include <pulse/simple.h>
+#include <pulse/pulseaudio.h>
+#include <pulse/mainloop.h>
+#include <pulse/xmalloc.h>
 #include <pulse/error.h>

+#define CHECK_DEAD_GOTO(p, rerror, label)                               \
+    do {                                                                \
+        if (!(p)->context || !PA_CONTEXT_IS_GOOD(pa_context_get_state((p)->context)) || \
+            !(p)->stream || !PA_STREAM_IS_GOOD(pa_stream_get_state((p)->stream))) { \
+            if (((p)->context && pa_context_get_state((p)->context) == PA_CONTEXT_FAILED) || \
+                ((p)->stream && pa_stream_get_state((p)->stream) == PA_STREAM_FAILED)) { \
+                if (rerror)                                             \
+                    *(rerror) = pa_context_errno((p)->context);         \
+            } else                                                      \
+                if (rerror)                                             \
+                    *(rerror) = PA_ERR_BADSTATE;                        \
+            goto label;                                                 \
+        }                                                               \
+    } while(false);
+
+static double clock_get_monotonic_seconds() {
+    struct timespec ts;
+    ts.tv_sec = 0;
+    ts.tv_nsec = 0;
+    clock_gettime(CLOCK_MONOTONIC, &ts);
+    return (double)ts.tv_sec + (double)ts.tv_nsec * 0.000000001;
+}
+
+static int sound_device_index = 0;
+
+struct pa_handle {
+    pa_context *context;
+    pa_stream *stream;
+    pa_mainloop *mainloop;
+
+    const void *read_data;
+    size_t read_index, read_length;
+
+    int operation_success;
+};
+
+static void pa_sound_device_free(pa_handle *s) {
+    assert(s);
+
+    if (s->stream)
+        pa_stream_unref(s->stream);
+
+    if (s->context) {
+        pa_context_disconnect(s->context);
+        pa_context_unref(s->context);
+    }
+
+    if (s->mainloop)
+        pa_mainloop_free(s->mainloop);
+
+    pa_xfree(s);
+}
+
+static pa_handle* pa_sound_device_new(const char *server,
+        const char *name,
+        const char *dev,
+        const char *stream_name,
+        const pa_sample_spec *ss,
+        const pa_buffer_attr *attr,
+        int *rerror) {
+    pa_handle *p;
+    int error = PA_ERR_INTERNAL, r;
+
+    p = pa_xnew0(pa_handle, 1);
+
+    if (!(p->mainloop = pa_mainloop_new()))
+        goto fail;
+
+    if (!(p->context = pa_context_new(pa_mainloop_get_api(p->mainloop), name)))
+        goto fail;
+
+    if (pa_context_connect(p->context, server, PA_CONTEXT_NOFLAGS, NULL) < 0) {
+        error = pa_context_errno(p->context);
+        goto fail;
+    }
+
+    for (;;) {
+        pa_context_state_t state = pa_context_get_state(p->context);
+
+        if (state == PA_CONTEXT_READY)
+            break;
+
+        if (!PA_CONTEXT_IS_GOOD(state)) {
+            error = pa_context_errno(p->context);
+            goto fail;
+        }
+
+        pa_mainloop_iterate(p->mainloop, 1, NULL);
+    }
+
+    if (!(p->stream = pa_stream_new(p->context, stream_name, ss, NULL))) {
+        error = pa_context_errno(p->context);
+        goto fail;
+    }
+
+    r = pa_stream_connect_record(p->stream, dev, attr,
+        (pa_stream_flags_t)(PA_STREAM_INTERPOLATE_TIMING|PA_STREAM_ADJUST_LATENCY|PA_STREAM_AUTO_TIMING_UPDATE));
+
+    if (r < 0) {
+        error = pa_context_errno(p->context);
+        goto fail;
+    }
+
+    for (;;) {
+        pa_stream_state_t state = pa_stream_get_state(p->stream);
+
+        if (state == PA_STREAM_READY)
+            break;
+
+        if (!PA_STREAM_IS_GOOD(state)) {
+            error = pa_context_errno(p->context);
+            goto fail;
+        }
+
+        pa_mainloop_iterate(p->mainloop, 1, NULL);
+    }
+
+    return p;
+
+fail:
+    if (rerror)
+        *rerror = error;
+    pa_sound_device_free(p);
+    return NULL;
+}
+
+// Returns a negative value on failure. Always blocks a time specified matching the sampling rate of the audio.
+static int pa_sound_device_read(pa_handle *p, void *data, size_t length) {
+    assert(p);
+
+    int r = 0;
+    int *rerror = &r;
+    bool retry = true;
+
+    pa_mainloop_iterate(p->mainloop, 0, NULL);
+    const int64_t timeout_ms = std::round((1000.0 / (double)pa_stream_get_sample_spec(p->stream)->rate) * 1000.0);
+
+    CHECK_DEAD_GOTO(p, rerror, fail);
+
+    while(true) {
+        if(pa_stream_readable_size(p->stream) < length) {
+            if(!retry)
+                break;
+
+            retry = false;
+
+            const double start_time = clock_get_monotonic_seconds();
+            while((clock_get_monotonic_seconds() - start_time) * 1000.0 < timeout_ms) {
+                pa_mainloop_prepare(p->mainloop, 1 * 1000);
+                pa_mainloop_poll(p->mainloop);
+                pa_mainloop_dispatch(p->mainloop);
+            }
+
+            continue;
+        }
+
+        r = pa_stream_peek(p->stream, &p->read_data, &p->read_length);
+        if(r != 0) {
+            if(retry)
+                usleep(timeout_ms * 1000);
+            return -1;
+        }
+
+        if(p->read_length < length || !p->read_data) {
+            pa_stream_drop(p->stream);
+            if(retry)
+                usleep(timeout_ms * 1000);
+            return -1;
+        }
+
+        memcpy(data, p->read_data, length);
+        pa_stream_drop(p->stream);
+        return 0;
+    }
+
+    fail:
+    return -1;
+}
+
 int sound_device_get_by_name(SoundDevice *device, const char *name, unsigned int num_channels, unsigned int period_frame_size) {
    pa_sample_spec ss;
    ss.format = PA_SAMPLE_S16LE;
@ -39,8 +221,13 @@ int sound_device_get_by_name(SoundDevice *device, const char *name, unsigned int
    buffer_attr.maxlength = period_frame_size * 2 * num_channels; // 2 bytes/sample, @num_channels channels
    buffer_attr.fragsize = buffer_attr.maxlength;

-    pa_simple *pa_handle = pa_simple_new(nullptr, "gpu-screen-recorder", PA_STREAM_RECORD, name, "record", &ss, nullptr, &buffer_attr, &error);
-    if(!pa_handle) {
+    // We want a unique stream name for every device which allows each input to be a different box in pipewire graph software
+    char stream_name[64];
+    snprintf(stream_name, sizeof(stream_name), "record-%d", sound_device_index);
+    ++sound_device_index;
+
+    pa_handle *handle = pa_sound_device_new(nullptr, "gpu-screen-recorder", name, stream_name, &ss, &buffer_attr, &error);
+    if(!handle) {
        fprintf(stderr, "pa_simple_new() failed: %s. Audio input device %s might not be valid\n", pa_strerror(error), name);
        return -1;
    }
@ -49,13 +236,13 @@ int sound_device_get_by_name(SoundDevice *device, const char *name, unsigned int
    void *buffer = malloc(buffer_size);
    if(!buffer) {
        fprintf(stderr, "failed to allocate buffer for audio\n");
-        pa_simple_free(pa_handle);
+        pa_sound_device_free(handle);
        return -1;
    }

    fprintf(stderr, "Using pulseaudio\n");

-    device->handle = pa_handle;
+    device->handle = handle;
    device->buffer = buffer;
    device->buffer_size = buffer_size;
    device->frames = period_frame_size;
@ -63,100 +250,15 @@ int sound_device_get_by_name(SoundDevice *device, const char *name, unsigned int
 }

 void sound_device_close(SoundDevice *device) {
-    pa_simple_free((pa_simple*)device->handle);
+    pa_sound_device_free((pa_handle*)device->handle);
    free(device->buffer);
 }

 int sound_device_read_next_chunk(SoundDevice *device, void **buffer) {
-    int error = 0;
-    if(pa_simple_read((pa_simple*)device->handle, device->buffer, device->buffer_size, &error) < 0) {
-        fprintf(stderr, "pa_simple_read() failed: %s\n", pa_strerror(error));
+    if(pa_sound_device_read((pa_handle*)device->handle, device->buffer, device->buffer_size) < 0) {
+        //fprintf(stderr, "pa_simple_read() failed: %s\n", pa_strerror(error));
        return -1;
    }
    *buffer = device->buffer;
    return device->frames;
 }
-#else
-#define ALSA_PCM_NEW_HW_PARAMS_API
-#include <alsa/asoundlib.h>
-
-int sound_device_get_by_name(SoundDevice *device, const char *name, unsigned int num_channels, unsigned int period_frame_size) {
-    int rc;
-    snd_pcm_t *handle;
-
-    rc = snd_pcm_open(&handle, name, SND_PCM_STREAM_CAPTURE, 0);
-    if(rc < 0) {
-        fprintf(stderr, "unable to open pcm device 'default', reason: %s\n", snd_strerror(rc));
-        return rc;
-    }
-
-    snd_pcm_hw_params_t *params;
-    snd_pcm_hw_params_alloca(&params);
-    // Fill the params with default values
-    snd_pcm_hw_params_any(handle, params);
-    // Interleaved mode
-    snd_pcm_hw_params_set_access(handle, params, SND_PCM_ACCESS_RW_INTERLEAVED);
-    // Signed 16--bit little-endian format
-    snd_pcm_hw_params_set_format(handle, params, SND_PCM_FORMAT_S16_LE);
-    snd_pcm_hw_params_set_channels(handle, params, num_channels);
-
-    // 48000 bits/second samling rate (DVD quality)
-    unsigned int val = 48000;
-    int dir;
-    snd_pcm_hw_params_set_rate_near(handle, params, &val, &dir);
-
-    snd_pcm_uframes_t frames = period_frame_size;
-    snd_pcm_hw_params_set_period_size_near(handle, params, &frames, &dir);
-
-    // Write the parmeters to the driver
-    rc = snd_pcm_hw_params(handle, params);
-    if(rc < 0) {
-        fprintf(stderr, "unable to set hw parameters, reason: %s\n", snd_strerror(rc));
-        snd_pcm_close(handle);
-        return rc;
-    }
-
-    // Use a buffer large enough to hold one period
-    snd_pcm_hw_params_get_period_size(params, &frames, &dir);
-    int buffer_size = frames * 2 * num_channels; // 2 bytes/sample, @num_channels channels
-    void *buffer = malloc(buffer_size);
-    if(!buffer) {
-        fprintf(stderr, "failed to allocate buffer for audio\n");
-        snd_pcm_close(handle);
-        return -1;
-    }
-
-    fprintf(stderr, "Using alsa\n");
-
-    device->handle = handle;
-    device->buffer = buffer;
-    device->buffer_size = buffer_size;
-    device->frames = frames;
-    return 0;
-}
-
-void sound_device_close(SoundDevice *device) {
-    /* TODO: Is this also needed in @sound_device_get_by_name on failure? */
-    // TODO: This has been commented out since it causes the thread to block forever. Why?
-    //snd_pcm_drain((snd_pcm_t*)device->handle);
-    snd_pcm_close((snd_pcm_t*)device->handle);
-    free(device->buffer);
-}
-
-int sound_device_read_next_chunk(SoundDevice *device, void **buffer) {
-    int rc = snd_pcm_readi((snd_pcm_t*)device->handle, device->buffer, device->frames);
-    if (rc == -EPIPE) {
-        /* overrun */
-        fprintf(stderr, "overrun occured\n");
-        snd_pcm_prepare((snd_pcm_t*)device->handle);
-        return rc;
-    } else if(rc < 0) {
-        fprintf(stderr, "failed to read from sound device, reason: %s\n", snd_strerror(rc));
-        return rc;
-    } else if (rc != (int)device->frames) {
-        fprintf(stderr, "short read, read %d frames\n", rc);
-    }
-    *buffer = device->buffer;
-    return rc;
-}
-#endif