Fix replay video/audio desync, fix dummy audio when dropping audio input, give each audio stream a new name so it can be replaced with pipewire graphs

This commit is contained in:
dec05eba 2022-09-20 03:39:15 +02:00
parent 9f2ddf3802
commit 919890b7b2
6 changed files with 258 additions and 189 deletions

View File

@ -1,8 +1,8 @@
#!/bin/sh -e
dependencies="glew libavcodec libavformat libavutil x11 xcomposite glfw3 libpulse-simple libswresample"
dependencies="glew libavcodec libavformat libavutil x11 xcomposite glfw3 libpulse libswresample"
includes="$(pkg-config --cflags $dependencies) -I/opt/cuda/targets/x86_64-linux/include"
libs="$(pkg-config --libs $dependencies) /usr/lib64/libcuda.so -ldl -pthread -lm"
g++ -c src/sound.cpp -O2 $includes -DPULSEAUDIO=1
g++ -c src/main.cpp -O2 $includes -DPULSEAUDIO=1
g++ -c src/sound.cpp -O2 $includes
g++ -c src/main.cpp -O2 $includes
g++ -o gpu-screen-recorder -O2 sound.o main.o -s $libs

View File

@ -15,11 +15,11 @@ apt-get -y install build-essential nvidia-cuda-dev\
libglew-dev libglfw3-dev\
libpulse-dev
dependencies="glew libavcodec libavformat libavutil x11 xcomposite glfw3 libpulse-simple libswresample"
dependencies="glew libavcodec libavformat libavutil x11 xcomposite glfw3 libpulse libswresample"
includes="$(pkg-config --cflags $dependencies) -I/opt/cuda/targets/x86_64-linux/include"
libs="$(pkg-config --libs $dependencies) /usr/lib/x86_64-linux-gnu/stubs/libcuda.so -ldl -pthread -lm"
g++ -c src/sound.cpp -O2 $includes -DPULSEAUDIO=1
g++ -c src/main.cpp -O2 $includes -DPULSEAUDIO=1
g++ -c src/sound.cpp -O2 $includes
g++ -c src/main.cpp -O2 $includes
g++ -o gpu-screen-recorder -O2 sound.o main.o -s $libs
install -Dm755 "gpu-screen-recorder" "/usr/local/bin/gpu-screen-recorder"

View File

@ -8,9 +8,6 @@ platforms = ["posix"]
include_dirs = ["/opt/cuda/targets/x86_64-linux/include"]
libs = ["/usr/lib64/libcuda.so"]
[define]
PULSEAUDIO = "1"
[dependencies]
glew = ">=2"
libavcodec = ">=58"
@ -23,7 +20,6 @@ xcomposite = ">=0.2"
# TODO: Remove this dependency, this is needed right now for glfwMakeContextCurrent
glfw3 = "3"
#alsa = "1"
libpulse-simple = ">=13"
libpulse = ">=13"
libswresample = ">=3"

View File

@ -62,7 +62,6 @@ extern "C" {
#include <deque>
#include <future>
#include <condition_variable>
//#include <CL/cl.h>
@ -434,7 +433,7 @@ static void receive_frames(AVCodecContext *av_codec_context, int stream_index, A
} else {
av_packet_rescale_ts(&av_packet, av_codec_context->time_base, stream->time_base);
av_packet.stream_index = stream->index;
int ret = av_interleaved_write_frame(av_format_context, &av_packet);
int ret = av_write_frame(av_format_context, &av_packet);
if(ret < 0) {
fprintf(stderr, "Error: Failed to write frame index %d to muxer, reason: %s (%d)\n", av_packet.stream_index, av_error_to_string(ret), ret);
}
@ -810,33 +809,51 @@ static std::future<void> save_replay_thread;
static std::vector<AVPacket> save_replay_packets;
static std::string save_replay_output_filepath;
static void save_replay_async(AVCodecContext *video_codec_context, int video_stream_index, std::vector<AudioTrack> &audio_tracks, const std::deque<AVPacket> &frame_data_queue, bool frames_erased, std::string output_dir, std::string container_format) {
static void save_replay_async(AVCodecContext *video_codec_context, int video_stream_index, std::vector<AudioTrack> &audio_tracks, const std::deque<AVPacket> &frame_data_queue, bool frames_erased, std::string output_dir, std::string container_format, std::mutex &write_output_mutex) {
if(save_replay_thread.valid())
return;
size_t start_index = (size_t)-1;
for(size_t i = 0; i < frame_data_queue.size(); ++i) {
const AVPacket &av_packet = frame_data_queue[i];
if((av_packet.flags & AV_PKT_FLAG_KEY) && av_packet.stream_index == video_stream_index) {
start_index = i;
break;
int64_t video_pts_offset = 0;
int64_t audio_pts_offset = 0;
{
std::lock_guard<std::mutex> lock(write_output_mutex);
start_index = (size_t)-1;
for(size_t i = 0; i < frame_data_queue.size(); ++i) {
const AVPacket &av_packet = frame_data_queue[i];
if((av_packet.flags & AV_PKT_FLAG_KEY) && av_packet.stream_index == video_stream_index) {
start_index = i;
break;
}
}
if(start_index == (size_t)-1)
return;
if(frames_erased) {
video_pts_offset = frame_data_queue[start_index].pts;
// Find the next audio packet to use as audio pts offset
for(size_t i = start_index; i < frame_data_queue.size(); ++i) {
const AVPacket &av_packet = frame_data_queue[i];
if(av_packet.stream_index != video_stream_index) {
audio_pts_offset = av_packet.pts;
break;
}
}
} else {
start_index = 0;
}
save_replay_packets.resize(frame_data_queue.size());
for(size_t i = 0; i < frame_data_queue.size(); ++i) {
av_packet_ref(&save_replay_packets[i], &frame_data_queue[i]);
}
}
if(start_index == (size_t)-1)
return;
int64_t pts_offset = 0;
if(frames_erased)
pts_offset = frame_data_queue[start_index].pts;
save_replay_packets.resize(frame_data_queue.size());
for(size_t i = 0; i < frame_data_queue.size(); ++i) {
av_packet_ref(&save_replay_packets[i], &frame_data_queue[i]);
}
save_replay_output_filepath = output_dir + "/Replay_" + get_date_str() + "." + container_format;
save_replay_thread = std::async(std::launch::async, [video_stream_index, container_format, start_index, pts_offset, video_codec_context, &audio_tracks]() mutable {
save_replay_thread = std::async(std::launch::async, [video_stream_index, container_format, start_index, video_pts_offset, audio_pts_offset, video_codec_context, &audio_tracks]() mutable {
AVFormatContext *av_format_context;
// The output format is automatically guessed from the file extension
avformat_alloc_output_context2(&av_format_context, nullptr, container_format.c_str(), nullptr);
@ -874,18 +891,22 @@ static void save_replay_async(AVCodecContext *video_codec_context, int video_str
AVStream *stream = video_stream;
AVCodecContext *codec_context = video_codec_context;
if(av_packet.stream_index != video_stream_index) {
if(av_packet.stream_index == video_stream_index) {
av_packet.pts -= video_pts_offset;
av_packet.dts -= video_pts_offset;
} else {
AudioTrack *audio_track = stream_index_to_audio_track_map[av_packet.stream_index];
stream = audio_track->stream;
codec_context = audio_track->codec_context;
av_packet.pts -= audio_pts_offset;
av_packet.dts -= audio_pts_offset;
}
av_packet.stream_index = stream->index;
av_packet.pts -= pts_offset;
av_packet.dts -= pts_offset;
av_packet_rescale_ts(&av_packet, codec_context->time_base, stream->time_base);
int ret = av_interleaved_write_frame(av_format_context, &av_packet);
int ret = av_write_frame(av_format_context, &av_packet);
if(ret < 0)
fprintf(stderr, "Error: Failed to write frame index %d to muxer, reason: %s (%d)\n", stream->index, av_error_to_string(ret), ret);
}
@ -1426,52 +1447,11 @@ int main(int argc, char **argv) {
av_opt_set_sample_fmt(swr, "out_sample_fmt", AV_SAMPLE_FMT_FLTP, 0);
swr_init(swr);
std::deque<uint8_t*> buffered_audio;
std::mutex buffered_audio_mutex;
std::condition_variable buffered_audio_cv;
bool got_first_batch = false;
// TODO: Make the sound device read async instead of using a thread
std::thread sound_read_thread([&](){
while(running) {
void *sound_buffer;
int sound_buffer_size = sound_device_read_next_chunk(&audio_track.sound_device, &sound_buffer);
if(sound_buffer_size >= 0) {
uint8_t *data = (uint8_t*)malloc(audio_track.sound_device.buffer_size);
if(data) {
memcpy(data, sound_buffer, audio_track.sound_device.buffer_size);
std::unique_lock<std::mutex> lock(buffered_audio_mutex);
buffered_audio.push_back(data);
buffered_audio_cv.notify_one();
}
}
}
});
while(running) {
uint8_t *audio_buffer;
bool free_audio;
{
// TODO: Not a good solution to lack of audio as it causes dropped frames, but it's better then complete audio desync.
// The first packet is delayed for some reason...
std::unique_lock<std::mutex> lock(buffered_audio_mutex);
if(got_first_batch)
buffered_audio_cv.wait(lock, [&]{ return !running || !buffered_audio.empty(); });
else
buffered_audio_cv.wait_for(lock, std::chrono::milliseconds(21), [&]{ return !running || !buffered_audio.empty(); });
if(!running)
break;
if(buffered_audio.empty()) {
audio_buffer = empty_audio;
free_audio = false;
} else {
audio_buffer = buffered_audio.front();
buffered_audio.pop_front();
free_audio = true;
got_first_batch = true;
}
}
void *sound_buffer;
int sound_buffer_size = sound_device_read_next_chunk(&audio_track.sound_device, &sound_buffer);
if(sound_buffer_size < 0)
sound_buffer = empty_audio;
int ret = av_frame_make_writable(audio_track.frame);
if (ret < 0) {
@ -1480,7 +1460,7 @@ int main(int argc, char **argv) {
}
// TODO: Instead of converting audio, get float audio from alsa. Or does alsa do conversion internally to get this format?
swr_convert(swr, &audio_track.frame->data[0], audio_track.frame->nb_samples, (const uint8_t**)&audio_buffer, audio_track.sound_device.frames);
swr_convert(swr, &audio_track.frame->data[0], audio_track.frame->nb_samples, (const uint8_t**)&sound_buffer, audio_track.sound_device.frames);
audio_track.frame->pts = (clock_get_monotonic_seconds() - start_time_pts) * AV_TIME_BASE;
ret = avcodec_send_frame(audio_track.codec_context, audio_track.frame);
@ -1489,17 +1469,9 @@ int main(int argc, char **argv) {
} else {
fprintf(stderr, "Failed to encode audio!\n");
}
if(free_audio)
free(audio_buffer);
}
sound_read_thread.join();
while(!buffered_audio.empty()) {
free(buffered_audio.front());
buffered_audio.pop_front();
}
sound_device_close(&audio_track.sound_device);
swr_free(&swr);
}, av_format_context, &write_output_mutex);
}
@ -1749,7 +1721,7 @@ int main(int argc, char **argv) {
if(save_replay == 1 && !save_replay_thread.valid() && replay_buffer_size_secs != -1) {
save_replay = 0;
save_replay_async(video_codec_context, VIDEO_STREAM_INDEX, audio_tracks, frame_data_queue, frames_erased, filename, container_format);
save_replay_async(video_codec_context, VIDEO_STREAM_INDEX, audio_tracks, frame_data_queue, frames_erased, filename, container_format, write_output_mutex);
}
// av_frame_free(&frame);
@ -1767,7 +1739,6 @@ int main(int argc, char **argv) {
for(AudioTrack &audio_track : audio_tracks) {
audio_track.thread.join();
sound_device_close(&audio_track.sound_device);
}
if (replay_buffer_size_secs == -1 && av_write_trailer(av_format_context) != 0) {

View File

@ -20,11 +20,193 @@
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <time.h>
#include <cmath>
#ifdef PULSEAUDIO
#include <pulse/simple.h>
#include <pulse/pulseaudio.h>
#include <pulse/mainloop.h>
#include <pulse/xmalloc.h>
#include <pulse/error.h>
#define CHECK_DEAD_GOTO(p, rerror, label) \
do { \
if (!(p)->context || !PA_CONTEXT_IS_GOOD(pa_context_get_state((p)->context)) || \
!(p)->stream || !PA_STREAM_IS_GOOD(pa_stream_get_state((p)->stream))) { \
if (((p)->context && pa_context_get_state((p)->context) == PA_CONTEXT_FAILED) || \
((p)->stream && pa_stream_get_state((p)->stream) == PA_STREAM_FAILED)) { \
if (rerror) \
*(rerror) = pa_context_errno((p)->context); \
} else \
if (rerror) \
*(rerror) = PA_ERR_BADSTATE; \
goto label; \
} \
} while(false);
static double clock_get_monotonic_seconds() {
struct timespec ts;
ts.tv_sec = 0;
ts.tv_nsec = 0;
clock_gettime(CLOCK_MONOTONIC, &ts);
return (double)ts.tv_sec + (double)ts.tv_nsec * 0.000000001;
}
static int sound_device_index = 0;
struct pa_handle {
pa_context *context;
pa_stream *stream;
pa_mainloop *mainloop;
const void *read_data;
size_t read_index, read_length;
int operation_success;
};
static void pa_sound_device_free(pa_handle *s) {
assert(s);
if (s->stream)
pa_stream_unref(s->stream);
if (s->context) {
pa_context_disconnect(s->context);
pa_context_unref(s->context);
}
if (s->mainloop)
pa_mainloop_free(s->mainloop);
pa_xfree(s);
}
static pa_handle* pa_sound_device_new(const char *server,
const char *name,
const char *dev,
const char *stream_name,
const pa_sample_spec *ss,
const pa_buffer_attr *attr,
int *rerror) {
pa_handle *p;
int error = PA_ERR_INTERNAL, r;
p = pa_xnew0(pa_handle, 1);
if (!(p->mainloop = pa_mainloop_new()))
goto fail;
if (!(p->context = pa_context_new(pa_mainloop_get_api(p->mainloop), name)))
goto fail;
if (pa_context_connect(p->context, server, PA_CONTEXT_NOFLAGS, NULL) < 0) {
error = pa_context_errno(p->context);
goto fail;
}
for (;;) {
pa_context_state_t state = pa_context_get_state(p->context);
if (state == PA_CONTEXT_READY)
break;
if (!PA_CONTEXT_IS_GOOD(state)) {
error = pa_context_errno(p->context);
goto fail;
}
pa_mainloop_iterate(p->mainloop, 1, NULL);
}
if (!(p->stream = pa_stream_new(p->context, stream_name, ss, NULL))) {
error = pa_context_errno(p->context);
goto fail;
}
r = pa_stream_connect_record(p->stream, dev, attr,
(pa_stream_flags_t)(PA_STREAM_INTERPOLATE_TIMING|PA_STREAM_ADJUST_LATENCY|PA_STREAM_AUTO_TIMING_UPDATE));
if (r < 0) {
error = pa_context_errno(p->context);
goto fail;
}
for (;;) {
pa_stream_state_t state = pa_stream_get_state(p->stream);
if (state == PA_STREAM_READY)
break;
if (!PA_STREAM_IS_GOOD(state)) {
error = pa_context_errno(p->context);
goto fail;
}
pa_mainloop_iterate(p->mainloop, 1, NULL);
}
return p;
fail:
if (rerror)
*rerror = error;
pa_sound_device_free(p);
return NULL;
}
// Returns a negative value on failure. Always blocks a time specified matching the sampling rate of the audio.
static int pa_sound_device_read(pa_handle *p, void *data, size_t length) {
assert(p);
int r = 0;
int *rerror = &r;
bool retry = true;
pa_mainloop_iterate(p->mainloop, 0, NULL);
const int64_t timeout_ms = std::round((1000.0 / (double)pa_stream_get_sample_spec(p->stream)->rate) * 1000.0);
CHECK_DEAD_GOTO(p, rerror, fail);
while(true) {
if(pa_stream_readable_size(p->stream) < length) {
if(!retry)
break;
retry = false;
const double start_time = clock_get_monotonic_seconds();
while((clock_get_monotonic_seconds() - start_time) * 1000.0 < timeout_ms) {
pa_mainloop_prepare(p->mainloop, 1 * 1000);
pa_mainloop_poll(p->mainloop);
pa_mainloop_dispatch(p->mainloop);
}
continue;
}
r = pa_stream_peek(p->stream, &p->read_data, &p->read_length);
if(r != 0) {
if(retry)
usleep(timeout_ms * 1000);
return -1;
}
if(p->read_length < length || !p->read_data) {
pa_stream_drop(p->stream);
if(retry)
usleep(timeout_ms * 1000);
return -1;
}
memcpy(data, p->read_data, length);
pa_stream_drop(p->stream);
return 0;
}
fail:
return -1;
}
int sound_device_get_by_name(SoundDevice *device, const char *name, unsigned int num_channels, unsigned int period_frame_size) {
pa_sample_spec ss;
ss.format = PA_SAMPLE_S16LE;
@ -39,8 +221,13 @@ int sound_device_get_by_name(SoundDevice *device, const char *name, unsigned int
buffer_attr.maxlength = period_frame_size * 2 * num_channels; // 2 bytes/sample, @num_channels channels
buffer_attr.fragsize = buffer_attr.maxlength;
pa_simple *pa_handle = pa_simple_new(nullptr, "gpu-screen-recorder", PA_STREAM_RECORD, name, "record", &ss, nullptr, &buffer_attr, &error);
if(!pa_handle) {
// We want a unique stream name for every device which allows each input to be a different box in pipewire graph software
char stream_name[64];
snprintf(stream_name, sizeof(stream_name), "record-%d", sound_device_index);
++sound_device_index;
pa_handle *handle = pa_sound_device_new(nullptr, "gpu-screen-recorder", name, stream_name, &ss, &buffer_attr, &error);
if(!handle) {
fprintf(stderr, "pa_simple_new() failed: %s. Audio input device %s might not be valid\n", pa_strerror(error), name);
return -1;
}
@ -49,13 +236,13 @@ int sound_device_get_by_name(SoundDevice *device, const char *name, unsigned int
void *buffer = malloc(buffer_size);
if(!buffer) {
fprintf(stderr, "failed to allocate buffer for audio\n");
pa_simple_free(pa_handle);
pa_sound_device_free(handle);
return -1;
}
fprintf(stderr, "Using pulseaudio\n");
device->handle = pa_handle;
device->handle = handle;
device->buffer = buffer;
device->buffer_size = buffer_size;
device->frames = period_frame_size;
@ -63,100 +250,15 @@ int sound_device_get_by_name(SoundDevice *device, const char *name, unsigned int
}
void sound_device_close(SoundDevice *device) {
pa_simple_free((pa_simple*)device->handle);
pa_sound_device_free((pa_handle*)device->handle);
free(device->buffer);
}
int sound_device_read_next_chunk(SoundDevice *device, void **buffer) {
int error = 0;
if(pa_simple_read((pa_simple*)device->handle, device->buffer, device->buffer_size, &error) < 0) {
fprintf(stderr, "pa_simple_read() failed: %s\n", pa_strerror(error));
if(pa_sound_device_read((pa_handle*)device->handle, device->buffer, device->buffer_size) < 0) {
//fprintf(stderr, "pa_simple_read() failed: %s\n", pa_strerror(error));
return -1;
}
*buffer = device->buffer;
return device->frames;
}
#else
#define ALSA_PCM_NEW_HW_PARAMS_API
#include <alsa/asoundlib.h>
int sound_device_get_by_name(SoundDevice *device, const char *name, unsigned int num_channels, unsigned int period_frame_size) {
int rc;
snd_pcm_t *handle;
rc = snd_pcm_open(&handle, name, SND_PCM_STREAM_CAPTURE, 0);
if(rc < 0) {
fprintf(stderr, "unable to open pcm device 'default', reason: %s\n", snd_strerror(rc));
return rc;
}
snd_pcm_hw_params_t *params;
snd_pcm_hw_params_alloca(&params);
// Fill the params with default values
snd_pcm_hw_params_any(handle, params);
// Interleaved mode
snd_pcm_hw_params_set_access(handle, params, SND_PCM_ACCESS_RW_INTERLEAVED);
// Signed 16--bit little-endian format
snd_pcm_hw_params_set_format(handle, params, SND_PCM_FORMAT_S16_LE);
snd_pcm_hw_params_set_channels(handle, params, num_channels);
// 48000 bits/second samling rate (DVD quality)
unsigned int val = 48000;
int dir;
snd_pcm_hw_params_set_rate_near(handle, params, &val, &dir);
snd_pcm_uframes_t frames = period_frame_size;
snd_pcm_hw_params_set_period_size_near(handle, params, &frames, &dir);
// Write the parmeters to the driver
rc = snd_pcm_hw_params(handle, params);
if(rc < 0) {
fprintf(stderr, "unable to set hw parameters, reason: %s\n", snd_strerror(rc));
snd_pcm_close(handle);
return rc;
}
// Use a buffer large enough to hold one period
snd_pcm_hw_params_get_period_size(params, &frames, &dir);
int buffer_size = frames * 2 * num_channels; // 2 bytes/sample, @num_channels channels
void *buffer = malloc(buffer_size);
if(!buffer) {
fprintf(stderr, "failed to allocate buffer for audio\n");
snd_pcm_close(handle);
return -1;
}
fprintf(stderr, "Using alsa\n");
device->handle = handle;
device->buffer = buffer;
device->buffer_size = buffer_size;
device->frames = frames;
return 0;
}
void sound_device_close(SoundDevice *device) {
/* TODO: Is this also needed in @sound_device_get_by_name on failure? */
// TODO: This has been commented out since it causes the thread to block forever. Why?
//snd_pcm_drain((snd_pcm_t*)device->handle);
snd_pcm_close((snd_pcm_t*)device->handle);
free(device->buffer);
}
int sound_device_read_next_chunk(SoundDevice *device, void **buffer) {
int rc = snd_pcm_readi((snd_pcm_t*)device->handle, device->buffer, device->frames);
if (rc == -EPIPE) {
/* overrun */
fprintf(stderr, "overrun occured\n");
snd_pcm_prepare((snd_pcm_t*)device->handle);
return rc;
} else if(rc < 0) {
fprintf(stderr, "failed to read from sound device, reason: %s\n", snd_strerror(rc));
return rc;
} else if (rc != (int)device->frames) {
fprintf(stderr, "short read, read %d frames\n", rc);
}
*buffer = device->buffer;
return rc;
}
#endif