Fix replay video/audio desync, fix dummy audio when dropping audio input, give each audio stream a new name so it can be replaced with pipewire graphs

This commit is contained in:
dec05eba 2022-09-20 03:39:15 +02:00
parent 9f2ddf3802
commit 919890b7b2
6 changed files with 258 additions and 189 deletions

2
TODO
View File

@ -15,4 +15,4 @@ Use mov+faststart.
Allow recording all monitors/selected monitor without nvfbc by recording the compositor proxy window and only recording the part that matches the monitor(s).
Allow recording a region by recording the compositor proxy window / nvfbc window and copying part of it.
Resizing the target window to be smaller than the initial size is buggy. The window texture ends up duplicated in the video.
Handle frames (especially for applications with rounded client-side decorations, such as gnome applications. They are huge).
Handle frames (especially for applications with rounded client-side decorations, such as gnome applications. They are huge).

View File

@ -1,8 +1,8 @@
#!/bin/sh -e
dependencies="glew libavcodec libavformat libavutil x11 xcomposite glfw3 libpulse-simple libswresample"
dependencies="glew libavcodec libavformat libavutil x11 xcomposite glfw3 libpulse libswresample"
includes="$(pkg-config --cflags $dependencies) -I/opt/cuda/targets/x86_64-linux/include"
libs="$(pkg-config --libs $dependencies) /usr/lib64/libcuda.so -ldl -pthread -lm"
g++ -c src/sound.cpp -O2 $includes -DPULSEAUDIO=1
g++ -c src/main.cpp -O2 $includes -DPULSEAUDIO=1
g++ -c src/sound.cpp -O2 $includes
g++ -c src/main.cpp -O2 $includes
g++ -o gpu-screen-recorder -O2 sound.o main.o -s $libs

View File

@ -15,11 +15,11 @@ apt-get -y install build-essential nvidia-cuda-dev\
libglew-dev libglfw3-dev\
libpulse-dev
dependencies="glew libavcodec libavformat libavutil x11 xcomposite glfw3 libpulse-simple libswresample"
dependencies="glew libavcodec libavformat libavutil x11 xcomposite glfw3 libpulse libswresample"
includes="$(pkg-config --cflags $dependencies) -I/opt/cuda/targets/x86_64-linux/include"
libs="$(pkg-config --libs $dependencies) /usr/lib/x86_64-linux-gnu/stubs/libcuda.so -ldl -pthread -lm"
g++ -c src/sound.cpp -O2 $includes -DPULSEAUDIO=1
g++ -c src/main.cpp -O2 $includes -DPULSEAUDIO=1
g++ -c src/sound.cpp -O2 $includes
g++ -c src/main.cpp -O2 $includes
g++ -o gpu-screen-recorder -O2 sound.o main.o -s $libs
install -Dm755 "gpu-screen-recorder" "/usr/local/bin/gpu-screen-recorder"

View File

@ -8,9 +8,6 @@ platforms = ["posix"]
include_dirs = ["/opt/cuda/targets/x86_64-linux/include"]
libs = ["/usr/lib64/libcuda.so"]
[define]
PULSEAUDIO = "1"
[dependencies]
glew = ">=2"
libavcodec = ">=58"
@ -23,7 +20,6 @@ xcomposite = ">=0.2"
# TODO: Remove this dependency, this is needed right now for glfwMakeContextCurrent
glfw3 = "3"
#alsa = "1"
libpulse-simple = ">=13"
libpulse = ">=13"
libswresample = ">=3"

View File

@ -62,7 +62,6 @@ extern "C" {
#include <deque>
#include <future>
#include <condition_variable>
//#include <CL/cl.h>
@ -434,7 +433,7 @@ static void receive_frames(AVCodecContext *av_codec_context, int stream_index, A
} else {
av_packet_rescale_ts(&av_packet, av_codec_context->time_base, stream->time_base);
av_packet.stream_index = stream->index;
int ret = av_interleaved_write_frame(av_format_context, &av_packet);
int ret = av_write_frame(av_format_context, &av_packet);
if(ret < 0) {
fprintf(stderr, "Error: Failed to write frame index %d to muxer, reason: %s (%d)\n", av_packet.stream_index, av_error_to_string(ret), ret);
}
@ -810,33 +809,51 @@ static std::future<void> save_replay_thread;
static std::vector<AVPacket> save_replay_packets;
static std::string save_replay_output_filepath;
static void save_replay_async(AVCodecContext *video_codec_context, int video_stream_index, std::vector<AudioTrack> &audio_tracks, const std::deque<AVPacket> &frame_data_queue, bool frames_erased, std::string output_dir, std::string container_format) {
static void save_replay_async(AVCodecContext *video_codec_context, int video_stream_index, std::vector<AudioTrack> &audio_tracks, const std::deque<AVPacket> &frame_data_queue, bool frames_erased, std::string output_dir, std::string container_format, std::mutex &write_output_mutex) {
if(save_replay_thread.valid())
return;
size_t start_index = (size_t)-1;
for(size_t i = 0; i < frame_data_queue.size(); ++i) {
const AVPacket &av_packet = frame_data_queue[i];
if((av_packet.flags & AV_PKT_FLAG_KEY) && av_packet.stream_index == video_stream_index) {
start_index = i;
break;
int64_t video_pts_offset = 0;
int64_t audio_pts_offset = 0;
{
std::lock_guard<std::mutex> lock(write_output_mutex);
start_index = (size_t)-1;
for(size_t i = 0; i < frame_data_queue.size(); ++i) {
const AVPacket &av_packet = frame_data_queue[i];
if((av_packet.flags & AV_PKT_FLAG_KEY) && av_packet.stream_index == video_stream_index) {
start_index = i;
break;
}
}
if(start_index == (size_t)-1)
return;
if(frames_erased) {
video_pts_offset = frame_data_queue[start_index].pts;
// Find the next audio packet to use as audio pts offset
for(size_t i = start_index; i < frame_data_queue.size(); ++i) {
const AVPacket &av_packet = frame_data_queue[i];
if(av_packet.stream_index != video_stream_index) {
audio_pts_offset = av_packet.pts;
break;
}
}
} else {
start_index = 0;
}
save_replay_packets.resize(frame_data_queue.size());
for(size_t i = 0; i < frame_data_queue.size(); ++i) {
av_packet_ref(&save_replay_packets[i], &frame_data_queue[i]);
}
}
if(start_index == (size_t)-1)
return;
int64_t pts_offset = 0;
if(frames_erased)
pts_offset = frame_data_queue[start_index].pts;
save_replay_packets.resize(frame_data_queue.size());
for(size_t i = 0; i < frame_data_queue.size(); ++i) {
av_packet_ref(&save_replay_packets[i], &frame_data_queue[i]);
}
save_replay_output_filepath = output_dir + "/Replay_" + get_date_str() + "." + container_format;
save_replay_thread = std::async(std::launch::async, [video_stream_index, container_format, start_index, pts_offset, video_codec_context, &audio_tracks]() mutable {
save_replay_thread = std::async(std::launch::async, [video_stream_index, container_format, start_index, video_pts_offset, audio_pts_offset, video_codec_context, &audio_tracks]() mutable {
AVFormatContext *av_format_context;
// The output format is automatically guessed from the file extension
avformat_alloc_output_context2(&av_format_context, nullptr, container_format.c_str(), nullptr);
@ -874,18 +891,22 @@ static void save_replay_async(AVCodecContext *video_codec_context, int video_str
AVStream *stream = video_stream;
AVCodecContext *codec_context = video_codec_context;
if(av_packet.stream_index != video_stream_index) {
if(av_packet.stream_index == video_stream_index) {
av_packet.pts -= video_pts_offset;
av_packet.dts -= video_pts_offset;
} else {
AudioTrack *audio_track = stream_index_to_audio_track_map[av_packet.stream_index];
stream = audio_track->stream;
codec_context = audio_track->codec_context;
av_packet.pts -= audio_pts_offset;
av_packet.dts -= audio_pts_offset;
}
av_packet.stream_index = stream->index;
av_packet.pts -= pts_offset;
av_packet.dts -= pts_offset;
av_packet_rescale_ts(&av_packet, codec_context->time_base, stream->time_base);
int ret = av_interleaved_write_frame(av_format_context, &av_packet);
int ret = av_write_frame(av_format_context, &av_packet);
if(ret < 0)
fprintf(stderr, "Error: Failed to write frame index %d to muxer, reason: %s (%d)\n", stream->index, av_error_to_string(ret), ret);
}
@ -1426,52 +1447,11 @@ int main(int argc, char **argv) {
av_opt_set_sample_fmt(swr, "out_sample_fmt", AV_SAMPLE_FMT_FLTP, 0);
swr_init(swr);
std::deque<uint8_t*> buffered_audio;
std::mutex buffered_audio_mutex;
std::condition_variable buffered_audio_cv;
bool got_first_batch = false;
// TODO: Make the sound device read async instead of using a thread
std::thread sound_read_thread([&](){
while(running) {
void *sound_buffer;
int sound_buffer_size = sound_device_read_next_chunk(&audio_track.sound_device, &sound_buffer);
if(sound_buffer_size >= 0) {
uint8_t *data = (uint8_t*)malloc(audio_track.sound_device.buffer_size);
if(data) {
memcpy(data, sound_buffer, audio_track.sound_device.buffer_size);
std::unique_lock<std::mutex> lock(buffered_audio_mutex);
buffered_audio.push_back(data);
buffered_audio_cv.notify_one();
}
}
}
});
while(running) {
uint8_t *audio_buffer;
bool free_audio;
{
// TODO: Not a good solution to lack of audio as it causes dropped frames, but it's better then complete audio desync.
// The first packet is delayed for some reason...
std::unique_lock<std::mutex> lock(buffered_audio_mutex);
if(got_first_batch)
buffered_audio_cv.wait(lock, [&]{ return !running || !buffered_audio.empty(); });
else
buffered_audio_cv.wait_for(lock, std::chrono::milliseconds(21), [&]{ return !running || !buffered_audio.empty(); });
if(!running)
break;
if(buffered_audio.empty()) {
audio_buffer = empty_audio;
free_audio = false;
} else {
audio_buffer = buffered_audio.front();
buffered_audio.pop_front();
free_audio = true;
got_first_batch = true;
}
}
void *sound_buffer;
int sound_buffer_size = sound_device_read_next_chunk(&audio_track.sound_device, &sound_buffer);
if(sound_buffer_size < 0)
sound_buffer = empty_audio;
int ret = av_frame_make_writable(audio_track.frame);
if (ret < 0) {
@ -1480,7 +1460,7 @@ int main(int argc, char **argv) {
}
// TODO: Instead of converting audio, get float audio from alsa. Or does alsa do conversion internally to get this format?
swr_convert(swr, &audio_track.frame->data[0], audio_track.frame->nb_samples, (const uint8_t**)&audio_buffer, audio_track.sound_device.frames);
swr_convert(swr, &audio_track.frame->data[0], audio_track.frame->nb_samples, (const uint8_t**)&sound_buffer, audio_track.sound_device.frames);
audio_track.frame->pts = (clock_get_monotonic_seconds() - start_time_pts) * AV_TIME_BASE;
ret = avcodec_send_frame(audio_track.codec_context, audio_track.frame);
@ -1489,17 +1469,9 @@ int main(int argc, char **argv) {
} else {
fprintf(stderr, "Failed to encode audio!\n");
}
if(free_audio)
free(audio_buffer);
}
sound_read_thread.join();
while(!buffered_audio.empty()) {
free(buffered_audio.front());
buffered_audio.pop_front();
}
sound_device_close(&audio_track.sound_device);
swr_free(&swr);
}, av_format_context, &write_output_mutex);
}
@ -1749,7 +1721,7 @@ int main(int argc, char **argv) {
if(save_replay == 1 && !save_replay_thread.valid() && replay_buffer_size_secs != -1) {
save_replay = 0;
save_replay_async(video_codec_context, VIDEO_STREAM_INDEX, audio_tracks, frame_data_queue, frames_erased, filename, container_format);
save_replay_async(video_codec_context, VIDEO_STREAM_INDEX, audio_tracks, frame_data_queue, frames_erased, filename, container_format, write_output_mutex);
}
// av_frame_free(&frame);
@ -1767,7 +1739,6 @@ int main(int argc, char **argv) {
for(AudioTrack &audio_track : audio_tracks) {
audio_track.thread.join();
sound_device_close(&audio_track.sound_device);
}
if (replay_buffer_size_secs == -1 && av_write_trailer(av_format_context) != 0) {

View File

@ -20,11 +20,193 @@
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <time.h>
#include <cmath>
#ifdef PULSEAUDIO
#include <pulse/simple.h>
#include <pulse/pulseaudio.h>
#include <pulse/mainloop.h>
#include <pulse/xmalloc.h>
#include <pulse/error.h>
#define CHECK_DEAD_GOTO(p, rerror, label) \
do { \
if (!(p)->context || !PA_CONTEXT_IS_GOOD(pa_context_get_state((p)->context)) || \
!(p)->stream || !PA_STREAM_IS_GOOD(pa_stream_get_state((p)->stream))) { \
if (((p)->context && pa_context_get_state((p)->context) == PA_CONTEXT_FAILED) || \
((p)->stream && pa_stream_get_state((p)->stream) == PA_STREAM_FAILED)) { \
if (rerror) \
*(rerror) = pa_context_errno((p)->context); \
} else \
if (rerror) \
*(rerror) = PA_ERR_BADSTATE; \
goto label; \
} \
} while(false);
static double clock_get_monotonic_seconds() {
struct timespec ts;
ts.tv_sec = 0;
ts.tv_nsec = 0;
clock_gettime(CLOCK_MONOTONIC, &ts);
return (double)ts.tv_sec + (double)ts.tv_nsec * 0.000000001;
}
static int sound_device_index = 0;
struct pa_handle {
pa_context *context;
pa_stream *stream;
pa_mainloop *mainloop;
const void *read_data;
size_t read_index, read_length;
int operation_success;
};
static void pa_sound_device_free(pa_handle *s) {
assert(s);
if (s->stream)
pa_stream_unref(s->stream);
if (s->context) {
pa_context_disconnect(s->context);
pa_context_unref(s->context);
}
if (s->mainloop)
pa_mainloop_free(s->mainloop);
pa_xfree(s);
}
static pa_handle* pa_sound_device_new(const char *server,
const char *name,
const char *dev,
const char *stream_name,
const pa_sample_spec *ss,
const pa_buffer_attr *attr,
int *rerror) {
pa_handle *p;
int error = PA_ERR_INTERNAL, r;
p = pa_xnew0(pa_handle, 1);
if (!(p->mainloop = pa_mainloop_new()))
goto fail;
if (!(p->context = pa_context_new(pa_mainloop_get_api(p->mainloop), name)))
goto fail;
if (pa_context_connect(p->context, server, PA_CONTEXT_NOFLAGS, NULL) < 0) {
error = pa_context_errno(p->context);
goto fail;
}
for (;;) {
pa_context_state_t state = pa_context_get_state(p->context);
if (state == PA_CONTEXT_READY)
break;
if (!PA_CONTEXT_IS_GOOD(state)) {
error = pa_context_errno(p->context);
goto fail;
}
pa_mainloop_iterate(p->mainloop, 1, NULL);
}
if (!(p->stream = pa_stream_new(p->context, stream_name, ss, NULL))) {
error = pa_context_errno(p->context);
goto fail;
}
r = pa_stream_connect_record(p->stream, dev, attr,
(pa_stream_flags_t)(PA_STREAM_INTERPOLATE_TIMING|PA_STREAM_ADJUST_LATENCY|PA_STREAM_AUTO_TIMING_UPDATE));
if (r < 0) {
error = pa_context_errno(p->context);
goto fail;
}
for (;;) {
pa_stream_state_t state = pa_stream_get_state(p->stream);
if (state == PA_STREAM_READY)
break;
if (!PA_STREAM_IS_GOOD(state)) {
error = pa_context_errno(p->context);
goto fail;
}
pa_mainloop_iterate(p->mainloop, 1, NULL);
}
return p;
fail:
if (rerror)
*rerror = error;
pa_sound_device_free(p);
return NULL;
}
// Returns a negative value on failure. Always blocks a time specified matching the sampling rate of the audio.
static int pa_sound_device_read(pa_handle *p, void *data, size_t length) {
assert(p);
int r = 0;
int *rerror = &r;
bool retry = true;
pa_mainloop_iterate(p->mainloop, 0, NULL);
const int64_t timeout_ms = std::round((1000.0 / (double)pa_stream_get_sample_spec(p->stream)->rate) * 1000.0);
CHECK_DEAD_GOTO(p, rerror, fail);
while(true) {
if(pa_stream_readable_size(p->stream) < length) {
if(!retry)
break;
retry = false;
const double start_time = clock_get_monotonic_seconds();
while((clock_get_monotonic_seconds() - start_time) * 1000.0 < timeout_ms) {
pa_mainloop_prepare(p->mainloop, 1 * 1000);
pa_mainloop_poll(p->mainloop);
pa_mainloop_dispatch(p->mainloop);
}
continue;
}
r = pa_stream_peek(p->stream, &p->read_data, &p->read_length);
if(r != 0) {
if(retry)
usleep(timeout_ms * 1000);
return -1;
}
if(p->read_length < length || !p->read_data) {
pa_stream_drop(p->stream);
if(retry)
usleep(timeout_ms * 1000);
return -1;
}
memcpy(data, p->read_data, length);
pa_stream_drop(p->stream);
return 0;
}
fail:
return -1;
}
int sound_device_get_by_name(SoundDevice *device, const char *name, unsigned int num_channels, unsigned int period_frame_size) {
pa_sample_spec ss;
ss.format = PA_SAMPLE_S16LE;
@ -39,8 +221,13 @@ int sound_device_get_by_name(SoundDevice *device, const char *name, unsigned int
buffer_attr.maxlength = period_frame_size * 2 * num_channels; // 2 bytes/sample, @num_channels channels
buffer_attr.fragsize = buffer_attr.maxlength;
pa_simple *pa_handle = pa_simple_new(nullptr, "gpu-screen-recorder", PA_STREAM_RECORD, name, "record", &ss, nullptr, &buffer_attr, &error);
if(!pa_handle) {
// We want a unique stream name for every device which allows each input to be a different box in pipewire graph software
char stream_name[64];
snprintf(stream_name, sizeof(stream_name), "record-%d", sound_device_index);
++sound_device_index;
pa_handle *handle = pa_sound_device_new(nullptr, "gpu-screen-recorder", name, stream_name, &ss, &buffer_attr, &error);
if(!handle) {
fprintf(stderr, "pa_simple_new() failed: %s. Audio input device %s might not be valid\n", pa_strerror(error), name);
return -1;
}
@ -49,13 +236,13 @@ int sound_device_get_by_name(SoundDevice *device, const char *name, unsigned int
void *buffer = malloc(buffer_size);
if(!buffer) {
fprintf(stderr, "failed to allocate buffer for audio\n");
pa_simple_free(pa_handle);
pa_sound_device_free(handle);
return -1;
}
fprintf(stderr, "Using pulseaudio\n");
device->handle = pa_handle;
device->handle = handle;
device->buffer = buffer;
device->buffer_size = buffer_size;
device->frames = period_frame_size;
@ -63,100 +250,15 @@ int sound_device_get_by_name(SoundDevice *device, const char *name, unsigned int
}
void sound_device_close(SoundDevice *device) {
pa_simple_free((pa_simple*)device->handle);
pa_sound_device_free((pa_handle*)device->handle);
free(device->buffer);
}
int sound_device_read_next_chunk(SoundDevice *device, void **buffer) {
int error = 0;
if(pa_simple_read((pa_simple*)device->handle, device->buffer, device->buffer_size, &error) < 0) {
fprintf(stderr, "pa_simple_read() failed: %s\n", pa_strerror(error));
if(pa_sound_device_read((pa_handle*)device->handle, device->buffer, device->buffer_size) < 0) {
//fprintf(stderr, "pa_simple_read() failed: %s\n", pa_strerror(error));
return -1;
}
*buffer = device->buffer;
return device->frames;
}
#else
#define ALSA_PCM_NEW_HW_PARAMS_API
#include <alsa/asoundlib.h>
int sound_device_get_by_name(SoundDevice *device, const char *name, unsigned int num_channels, unsigned int period_frame_size) {
int rc;
snd_pcm_t *handle;
rc = snd_pcm_open(&handle, name, SND_PCM_STREAM_CAPTURE, 0);
if(rc < 0) {
fprintf(stderr, "unable to open pcm device 'default', reason: %s\n", snd_strerror(rc));
return rc;
}
snd_pcm_hw_params_t *params;
snd_pcm_hw_params_alloca(&params);
// Fill the params with default values
snd_pcm_hw_params_any(handle, params);
// Interleaved mode
snd_pcm_hw_params_set_access(handle, params, SND_PCM_ACCESS_RW_INTERLEAVED);
// Signed 16--bit little-endian format
snd_pcm_hw_params_set_format(handle, params, SND_PCM_FORMAT_S16_LE);
snd_pcm_hw_params_set_channels(handle, params, num_channels);
// 48000 bits/second samling rate (DVD quality)
unsigned int val = 48000;
int dir;
snd_pcm_hw_params_set_rate_near(handle, params, &val, &dir);
snd_pcm_uframes_t frames = period_frame_size;
snd_pcm_hw_params_set_period_size_near(handle, params, &frames, &dir);
// Write the parmeters to the driver
rc = snd_pcm_hw_params(handle, params);
if(rc < 0) {
fprintf(stderr, "unable to set hw parameters, reason: %s\n", snd_strerror(rc));
snd_pcm_close(handle);
return rc;
}
// Use a buffer large enough to hold one period
snd_pcm_hw_params_get_period_size(params, &frames, &dir);
int buffer_size = frames * 2 * num_channels; // 2 bytes/sample, @num_channels channels
void *buffer = malloc(buffer_size);
if(!buffer) {
fprintf(stderr, "failed to allocate buffer for audio\n");
snd_pcm_close(handle);
return -1;
}
fprintf(stderr, "Using alsa\n");
device->handle = handle;
device->buffer = buffer;
device->buffer_size = buffer_size;
device->frames = frames;
return 0;
}
void sound_device_close(SoundDevice *device) {
/* TODO: Is this also needed in @sound_device_get_by_name on failure? */
// TODO: This has been commented out since it causes the thread to block forever. Why?
//snd_pcm_drain((snd_pcm_t*)device->handle);
snd_pcm_close((snd_pcm_t*)device->handle);
free(device->buffer);
}
int sound_device_read_next_chunk(SoundDevice *device, void **buffer) {
int rc = snd_pcm_readi((snd_pcm_t*)device->handle, device->buffer, device->frames);
if (rc == -EPIPE) {
/* overrun */
fprintf(stderr, "overrun occured\n");
snd_pcm_prepare((snd_pcm_t*)device->handle);
return rc;
} else if(rc < 0) {
fprintf(stderr, "failed to read from sound device, reason: %s\n", snd_strerror(rc));
return rc;
} else if (rc != (int)device->frames) {
fprintf(stderr, "short read, read %d frames\n", rc);
}
*buffer = device->buffer;
return rc;
}
#endif
}