Add audio support

This commit is contained in:
dec05eba 2020-04-01 19:25:16 +02:00
parent 5e60805c36
commit 2fcd3ee3e5
5 changed files with 233 additions and 64 deletions

View File

@ -2,6 +2,8 @@
This is a screen recorder that has minimal impact on system performance by recording a window using the GPU only, This is a screen recorder that has minimal impact on system performance by recording a window using the GPU only,
similar to shadowplay on windows. similar to shadowplay on windows.
The output is an h264 encoded video with aac audio.
This project is still early in development. This project is still early in development.
# Performance # Performance
@ -12,11 +14,10 @@ the fps remains at 30.
`gpu-screen-recorder 0x1c00001 mp4 60 > test_video.mp4` `gpu-screen-recorder 0x1c00001 mp4 60 > test_video.mp4`
# Requirements # Requirements
X11, Nvidia (cuda) X11, Nvidia (cuda), pulseaudio
# TODO # TODO
* Scale video when the window is rescaled. * Scale video when the window is rescaled.
* Use the sound source in src/sound.cpp to record audio and mux it with ffmpeg to the final video.
* Support AMD and Intel, using VAAPI. cuda and vaapi should be loaded at runtime using dlopen instead of linking to those * Support AMD and Intel, using VAAPI. cuda and vaapi should be loaded at runtime using dlopen instead of linking to those
libraries at compile-time. libraries at compile-time.
* Clean up the code! * Clean up the code!

View File

@ -3,7 +3,7 @@
typedef struct { typedef struct {
void *handle; void *handle;
char *buffer; void *buffer;
int buffer_size; int buffer_size;
unsigned int frames; unsigned int frames;
} SoundDevice; } SoundDevice;
@ -14,14 +14,16 @@ typedef struct {
to clean up internal resources. to clean up internal resources.
Returns 0 on success, or a negative value on failure. Returns 0 on success, or a negative value on failure.
*/ */
int sound_device_get_by_name(SoundDevice *device, const char *name = "default", unsigned int num_channels = 1, unsigned int period_frame_size = 32); int sound_device_get_by_name(SoundDevice *device, const char *name = "default", unsigned int num_channels = 2, unsigned int period_frame_size = 32);
void sound_device_close(SoundDevice *device); void sound_device_close(SoundDevice *device);
/* /*
Returns the next chunk of audio into @buffer. Returns the next chunk of audio into @buffer.
Returns the size of the buffer, or a negative value on failure. Returns the number of frames read, or a negative value on failure.
*/ */
int sound_device_read_next_chunk(SoundDevice *device, char **buffer); int sound_device_read_next_chunk(SoundDevice *device, void **buffer);
int sound_device_get_buffer_size(SoundDevice *device);
#endif /* GPU_SCREEN_RECORDER_H */ #endif /* GPU_SCREEN_RECORDER_H */

View File

@ -9,7 +9,6 @@ include_dirs = ["/opt/cuda/targets/x86_64-linux/include"]
libs = ["/usr/lib/libcuda.so"] libs = ["/usr/lib/libcuda.so"]
[dependencies] [dependencies]
ffnvcodec = ">=9"
glew = ">=2" glew = ">=2"
glx = ">=1" glx = ">=1"
libavcodec = ">=58" libavcodec = ">=58"
@ -23,3 +22,5 @@ xdamage = "1"
glfw3 = "3" glfw3 = "3"
alsa = "1" alsa = "1"
libswresample = "3"

View File

@ -3,8 +3,13 @@
#include <stdlib.h> #include <stdlib.h>
#include <string> #include <string>
#include <vector> #include <vector>
#include <thread>
#include <mutex>
#include <unistd.h> #include <unistd.h>
#include "../include/sound.hpp"
#define GLX_GLXEXT_PROTOTYPES #define GLX_GLXEXT_PROTOTYPES
#include <GL/glew.h> #include <GL/glew.h>
#include <GL/glx.h> #include <GL/glx.h>
@ -19,6 +24,8 @@ extern "C" {
#include <libavformat/avformat.h> #include <libavformat/avformat.h>
#include <libavutil/hwcontext.h> #include <libavutil/hwcontext.h>
#include <libavutil/hwcontext_cuda.h> #include <libavutil/hwcontext_cuda.h>
#include <libavutil/opt.h>
#include <libswresample/swresample.h>
} }
#include <cudaGL.h> #include <cudaGL.h>
@ -28,6 +35,14 @@ extern "C" {
//#include <CL/cl.h> //#include <CL/cl.h>
static char av_error_buffer[AV_ERROR_MAX_STRING_SIZE];
static char* av_error_to_string(int err) {
if(av_strerror(err, av_error_buffer, sizeof(av_error_buffer) < 0))
strcpy(av_error_buffer, "Unknown error");
return av_error_buffer;
}
struct ScopedGLXFBConfig { struct ScopedGLXFBConfig {
~ScopedGLXFBConfig() { ~ScopedGLXFBConfig() {
if (configs) if (configs)
@ -236,7 +251,8 @@ std::vector<std::string> get_hardware_acceleration_device_names() {
} }
static void receive_frames(AVCodecContext *av_codec_context, AVStream *stream, static void receive_frames(AVCodecContext *av_codec_context, AVStream *stream,
AVFormatContext *av_format_context) { AVFormatContext *av_format_context,
std::mutex &write_output_mutex) {
AVPacket av_packet; AVPacket av_packet;
av_init_packet(&av_packet); av_init_packet(&av_packet);
for (;;) { for (;;) {
@ -244,14 +260,17 @@ static void receive_frames(AVCodecContext *av_codec_context, AVStream *stream,
av_packet.size = 0; av_packet.size = 0;
int res = avcodec_receive_packet(av_codec_context, &av_packet); int res = avcodec_receive_packet(av_codec_context, &av_packet);
if (res == 0) { // we have a packet, send the packet to the muxer if (res == 0) { // we have a packet, send the packet to the muxer
assert(av_packet.stream_index == stream->id);
av_packet_rescale_ts(&av_packet, av_codec_context->time_base, av_packet_rescale_ts(&av_packet, av_codec_context->time_base,
stream->time_base); stream->time_base);
av_packet.stream_index = stream->index; av_packet.stream_index = stream->index;
// Write the encoded video frame to disk // Write the encoded video frame to disk
// av_write_frame(av_format_context, &av_packet) // av_write_frame(av_format_context, &av_packet)
// write(STDOUT_FILENO, av_packet.data, av_packet.size) // write(STDOUT_FILENO, av_packet.data, av_packet.size)
if (av_write_frame(av_format_context, &av_packet) < 0) { std::lock_guard<std::mutex> lock(write_output_mutex);
fprintf(stderr, "Error: Failed to write frame to muxer\n"); int ret = av_write_frame(av_format_context, &av_packet);
if(ret < 0) {
fprintf(stderr, "Error: Failed to write video frame to muxer, reason: %s (%d)\n", av_error_to_string(ret), ret);
} }
av_packet_unref(&av_packet); av_packet_unref(&av_packet);
} else if (res == AVERROR(EAGAIN)) { // we have no packet } else if (res == AVERROR(EAGAIN)) { // we have no packet
@ -268,7 +287,46 @@ static void receive_frames(AVCodecContext *av_codec_context, AVStream *stream,
//av_packet_unref(&av_packet); //av_packet_unref(&av_packet);
} }
static AVStream *add_stream(AVFormatContext *av_format_context, AVCodec **codec, static AVStream *add_audio_stream(AVFormatContext *av_format_context, AVCodec **codec,
enum AVCodecID codec_id) {
*codec = avcodec_find_encoder(AV_CODEC_ID_AAC);
if (!*codec) {
fprintf(
stderr,
"Error: Could not find aac encoder\n");
exit(1);
}
AVStream *stream = avformat_new_stream(av_format_context, *codec);
if (!stream) {
fprintf(stderr, "Error: Could not allocate stream\n");
exit(1);
}
stream->id = av_format_context->nb_streams - 1;
fprintf(stderr, "audio stream id: %d\n", stream->id);
AVCodecContext *codec_context = stream->codec;
assert((*codec)->type == AVMEDIA_TYPE_AUDIO);
/*
codec_context->sample_fmt = (*codec)->sample_fmts
? (*codec)->sample_fmts[0]
: AV_SAMPLE_FMT_FLTP;
*/
codec_context->codec_id = AV_CODEC_ID_AAC;
codec_context->sample_fmt = AV_SAMPLE_FMT_FLTP;
//codec_context->bit_rate = 64000;
codec_context->sample_rate = 48000;
codec_context->channel_layout = AV_CH_LAYOUT_STEREO;
codec_context->channels = 2;
// Some formats want stream headers to be seperate
//if (av_format_context->oformat->flags & AVFMT_GLOBALHEADER)
// av_format_context->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
return stream;
}
static AVStream *add_video_stream(AVFormatContext *av_format_context, AVCodec **codec,
enum AVCodecID codec_id, enum AVCodecID codec_id,
const WindowPixmap &window_pixmap, const WindowPixmap &window_pixmap,
int fps) { int fps) {
@ -280,8 +338,7 @@ static AVStream *add_stream(AVFormatContext *av_format_context, AVCodec **codec,
if (!*codec) { if (!*codec) {
fprintf( fprintf(
stderr, stderr,
"Error: Could not find h264_nvenc or nvenc_h264 encoder for %s\n", "Error: Could not find h264_nvenc or nvenc_h264 encoder\n");
avcodec_get_name(codec_id));
exit(1); exit(1);
} }
@ -291,25 +348,15 @@ static AVStream *add_stream(AVFormatContext *av_format_context, AVCodec **codec,
exit(1); exit(1);
} }
stream->id = av_format_context->nb_streams - 1; stream->id = av_format_context->nb_streams - 1;
fprintf(stderr, "video stream id: %d\n", stream->id);
AVCodecContext *codec_context = stream->codec; AVCodecContext *codec_context = stream->codec;
switch ((*codec)->type) { assert((*codec)->type == AVMEDIA_TYPE_VIDEO);
case AVMEDIA_TYPE_AUDIO: { codec_context->codec_id = (*codec)->id;
codec_context->sample_fmt = (*codec)->sample_fmts fprintf(stderr, "codec id: %d\n", (*codec)->id);
? (*codec)->sample_fmts[0]
: AV_SAMPLE_FMT_FLTP;
codec_context->bit_rate = 64000;
codec_context->sample_rate = 44100;
codec_context->channels = 2;
break;
}
case AVMEDIA_TYPE_VIDEO: {
codec_context->codec_id = codec_id;
// TODO: Scale bitrate by resolution. For 4k, 8000000 is a better value
codec_context->bit_rate = 5000000;
// Resolution must be a multiple of two
codec_context->width = window_pixmap.texture_width & ~1; codec_context->width = window_pixmap.texture_width & ~1;
codec_context->height = window_pixmap.texture_height & ~1; codec_context->height = window_pixmap.texture_height & ~1;
codec_context->bit_rate = codec_context->width * codec_context->height; //5000000;
// Timebase: This is the fundamental unit of time (in seconds) in terms // Timebase: This is the fundamental unit of time (in seconds) in terms
// of which frame timestamps are represented. For fixed-fps content, // of which frame timestamps are represented. For fixed-fps content,
// timebase should be 1/framerate and timestamp increments should be // timebase should be 1/framerate and timestamp increments should be
@ -328,11 +375,6 @@ static AVStream *add_stream(AVFormatContext *av_format_context, AVCodec **codec,
// stream->time_base = codec_context->time_base; // stream->time_base = codec_context->time_base;
// codec_context->ticks_per_frame = 30; // codec_context->ticks_per_frame = 30;
break;
}
default:
break;
}
// Some formats want stream headers to be seperate // Some formats want stream headers to be seperate
if (av_format_context->oformat->flags & AVFMT_GLOBALHEADER) if (av_format_context->oformat->flags & AVFMT_GLOBALHEADER)
@ -341,6 +383,36 @@ static AVStream *add_stream(AVFormatContext *av_format_context, AVCodec **codec,
return stream; return stream;
} }
static AVFrame* open_audio(AVCodec *codec, AVStream *stream) {
int ret;
AVCodecContext *codec_context = stream->codec;
ret = avcodec_open2(codec_context, codec, nullptr);
if(ret < 0) {
fprintf(stderr, "failed to open codec, reason: %s\n", av_error_to_string(ret));
exit(1);
}
AVFrame *frame = av_frame_alloc();
if(!frame) {
fprintf(stderr, "failed to allocate audio frame\n");
exit(1);
}
frame->nb_samples = codec_context->frame_size;
frame->format = codec_context->sample_fmt;
frame->channels = codec_context->channels;
frame->channel_layout = codec_context->channel_layout;
ret = av_frame_get_buffer(frame, 0);
if(ret < 0) {
fprintf(stderr, "failed to allocate audio data buffers, reason: %s\n", av_error_to_string(ret));
exit(1);
}
return frame;
}
static void open_video(AVCodec *codec, AVStream *stream, static void open_video(AVCodec *codec, AVStream *stream,
WindowPixmap &window_pixmap, AVBufferRef **device_ctx, WindowPixmap &window_pixmap, AVBufferRef **device_ctx,
CUgraphicsResource *cuda_graphics_resource) { CUgraphicsResource *cuda_graphics_resource) {
@ -528,15 +600,24 @@ int main(int argc, char **argv) {
} }
AVOutputFormat *output_format = av_format_context->oformat; AVOutputFormat *output_format = av_format_context->oformat;
AVCodec *video_codec; AVCodec *video_codec;
AVStream *video_stream = AVStream *video_stream =
add_stream(av_format_context, &video_codec, output_format->video_codec, add_video_stream(av_format_context, &video_codec, output_format->video_codec,
window_pixmap, fps); window_pixmap, fps);
if (!video_stream) { if (!video_stream) {
fprintf(stderr, "Error: Failed to create video stream\n"); fprintf(stderr, "Error: Failed to create video stream\n");
return 1; return 1;
} }
AVCodec *audio_codec;
AVStream *audio_stream =
add_audio_stream(av_format_context, &audio_codec, output_format->audio_codec);
if (!audio_stream) {
fprintf(stderr, "Error: Failed to create audio stream\n");
return 1;
}
if (cuInit(0) < 0) { if (cuInit(0) < 0) {
fprintf(stderr, "Error: cuInit failed\n"); fprintf(stderr, "Error: cuInit failed\n");
return {}; return {};
@ -547,7 +628,9 @@ int main(int argc, char **argv) {
open_video(video_codec, video_stream, window_pixmap, &device_ctx, open_video(video_codec, video_stream, window_pixmap, &device_ctx,
&cuda_graphics_resource); &cuda_graphics_resource);
av_dump_format(av_format_context, 0, filename, 1); AVFrame *audio_frame = open_audio(audio_codec, audio_stream);
//av_dump_format(av_format_context, 0, filename, 1);
if (!(output_format->flags & AVFMT_NOFILE)) { if (!(output_format->flags & AVFMT_NOFILE)) {
int ret = avio_open(&av_format_context->pb, filename, AVIO_FLAG_WRITE); int ret = avio_open(&av_format_context->pb, filename, AVIO_FLAG_WRITE);
@ -635,6 +718,69 @@ int main(int argc, char **argv) {
int window_width = xwa.width; int window_width = xwa.width;
int window_height = xwa.height; int window_height = xwa.height;
SoundDevice sound_device;
if(sound_device_get_by_name(&sound_device, "pulse", audio_stream->codec->channels, audio_stream->codec->frame_size) != 0) {
fprintf(stderr, "failed to get 'pulse' sound device\n");
exit(1);
}
int audio_buffer_size = av_samples_get_buffer_size(NULL, audio_stream->codec->channels, audio_stream->codec->frame_size, audio_stream->codec->sample_fmt, 1);
uint8_t *audio_frame_buf = (uint8_t *)av_malloc(audio_buffer_size);
avcodec_fill_audio_frame(audio_frame, audio_stream->codec->channels, audio_stream->codec->sample_fmt, (const uint8_t*)audio_frame_buf, audio_buffer_size, 1);
AVPacket audio_packet;
av_new_packet(&audio_packet, audio_buffer_size);
std::mutex write_output_mutex;
bool running = true;
std::thread audio_thread([&running](AVFormatContext *av_format_context, AVStream *audio_stream, AVPacket *audio_packet, uint8_t *audio_frame_buf, SoundDevice *sound_device, AVFrame *audio_frame, std::mutex *write_output_mutex) {
SwrContext *swr = swr_alloc();
if(!swr) {
fprintf(stderr, "Failed to create SwrContext\n");
exit(1);
}
av_opt_set_int(swr, "in_channel_layout", audio_stream->codec->channel_layout, 0);
av_opt_set_int(swr, "out_channel_layout", audio_stream->codec->channel_layout, 0);
av_opt_set_int(swr, "in_sample_rate", audio_stream->codec->sample_rate, 0);
av_opt_set_int(swr, "out_sample_rate", audio_stream->codec->sample_rate, 0);
av_opt_set_sample_fmt(swr, "in_sample_fmt", AV_SAMPLE_FMT_S16, 0);
av_opt_set_sample_fmt(swr, "out_sample_fmt", AV_SAMPLE_FMT_FLTP, 0);
swr_init(swr);
while(running) {
void *sound_buffer;
int sound_buffer_size = sound_device_read_next_chunk(sound_device, &sound_buffer);
if(sound_buffer_size >= 0) {
// TODO: Instead of converting audio, get float audio from alsa. Or does alsa do conversion internally to get this format?
swr_convert(swr, &audio_frame_buf, audio_frame->nb_samples, (const uint8_t**)&sound_buffer, sound_buffer_size);
audio_frame->extended_data = &audio_frame_buf;
// TODO: Fix this. Warning from ffmpeg:
// Timestamps are unset in a packet for stream 1. This is deprecated and will stop working in the future. Fix your code to set the timestamps properly
//audio_frame->pts=audio_frame_index*100;
//++audio_frame_index;
int got_frame = 0;
int ret = avcodec_encode_audio2(audio_stream->codec, audio_packet, audio_frame, &got_frame);
if(ret < 0){
printf("Failed to encode!\n");
break;
}
if (got_frame==1){
//printf("Succeed to encode 1 frame! \tsize:%5d\n",pkt.size);
audio_packet->stream_index = audio_stream->index;
std::lock_guard<std::mutex> lock(*write_output_mutex);
ret = av_write_frame(av_format_context, audio_packet);
av_free_packet(audio_packet);
}
} else {
fprintf(stderr, "failed to read sound from device, error: %d\n", sound_buffer_size);
}
}
swr_free(&swr);
}, av_format_context, audio_stream, &audio_packet, audio_frame_buf, &sound_device, audio_frame, &write_output_mutex);
XEvent e; XEvent e;
while (!glfwWindowShouldClose(window)) { while (!glfwWindowShouldClose(window)) {
glClear(GL_COLOR_BUFFER_BIT); glClear(GL_COLOR_BUFFER_BIT);
@ -719,7 +865,7 @@ int main(int argc, char **argv) {
"Error: cuGraphicsGLRegisterImage failed, error %s, texture " "Error: cuGraphicsGLRegisterImage failed, error %s, texture "
"id: %u\n", "id: %u\n",
err_str, window_pixmap.target_texture_id); err_str, window_pixmap.target_texture_id);
exit(1); break;
} }
res = cuGraphicsResourceSetMapFlags( res = cuGraphicsResourceSetMapFlags(
@ -730,7 +876,7 @@ int main(int argc, char **argv) {
av_frame_unref(frame); av_frame_unref(frame);
if (av_hwframe_get_buffer(video_stream->codec->hw_frames_ctx, frame, 0) < 0) { if (av_hwframe_get_buffer(video_stream->codec->hw_frames_ctx, frame, 0) < 0) {
fprintf(stderr, "Error: av_hwframe_get_buffer failed\n"); fprintf(stderr, "Error: av_hwframe_get_buffer failed\n");
exit(1); break;
} }
} }
@ -741,7 +887,7 @@ int main(int argc, char **argv) {
frame_count += 1; frame_count += 1;
if (avcodec_send_frame(video_stream->codec, frame) >= 0) { if (avcodec_send_frame(video_stream->codec, frame) >= 0) {
receive_frames(video_stream->codec, video_stream, receive_frames(video_stream->codec, video_stream,
av_format_context); av_format_context, write_output_mutex);
} else { } else {
fprintf(stderr, "Error: avcodec_send_frame failed\n"); fprintf(stderr, "Error: avcodec_send_frame failed\n");
} }
@ -752,6 +898,20 @@ int main(int argc, char **argv) {
usleep(5000); usleep(5000);
} }
running = false;
audio_thread.join();
sound_device_close(&sound_device);
//Flush Encoder
#if 0
ret = flush_encoder(pFormatCtx,0);
if (ret < 0) {
printf("Flushing encoder failed\n");
return -1;
}
#endif
if (av_write_trailer(av_format_context) != 0) { if (av_write_trailer(av_format_context) != 0) {
fprintf(stderr, "Failed to write trailer\n"); fprintf(stderr, "Failed to write trailer\n");
} }

View File

@ -26,8 +26,8 @@ int sound_device_get_by_name(SoundDevice *device, const char *name, unsigned int
snd_pcm_hw_params_set_format(handle, params, SND_PCM_FORMAT_S16_LE); snd_pcm_hw_params_set_format(handle, params, SND_PCM_FORMAT_S16_LE);
snd_pcm_hw_params_set_channels(handle, params, num_channels); snd_pcm_hw_params_set_channels(handle, params, num_channels);
// 44100 bits/second samling rate (CD quality) // 48000 bits/second samling rate (DVD quality)
unsigned int val = 44100; unsigned int val = 48000;
int dir; int dir;
snd_pcm_hw_params_set_rate_near(handle, params, &val, &dir); snd_pcm_hw_params_set_rate_near(handle, params, &val, &dir);
@ -45,7 +45,7 @@ int sound_device_get_by_name(SoundDevice *device, const char *name, unsigned int
// Use a buffer large enough to hold one period // Use a buffer large enough to hold one period
snd_pcm_hw_params_get_period_size(params, &frames, &dir); snd_pcm_hw_params_get_period_size(params, &frames, &dir);
int buffer_size = frames * 2 * num_channels; // 2 bytes/sample, @num_channels channels int buffer_size = frames * 2 * num_channels; // 2 bytes/sample, @num_channels channels
char *buffer = (char*)malloc(buffer_size); void *buffer = malloc(buffer_size);
if(!buffer) { if(!buffer) {
fprintf(stderr, "failed to allocate buffer for audio\n"); fprintf(stderr, "failed to allocate buffer for audio\n");
snd_pcm_close(handle); snd_pcm_close(handle);
@ -61,18 +61,19 @@ int sound_device_get_by_name(SoundDevice *device, const char *name, unsigned int
void sound_device_close(SoundDevice *device) { void sound_device_close(SoundDevice *device) {
/* TODO: Is this also needed in @sound_device_get_by_name on failure? */ /* TODO: Is this also needed in @sound_device_get_by_name on failure? */
snd_pcm_drain((snd_pcm_t*)device->handle); // TODO: This has been commented out since it causes the thread to block forever. Why?
//snd_pcm_drain((snd_pcm_t*)device->handle);
snd_pcm_close((snd_pcm_t*)device->handle); snd_pcm_close((snd_pcm_t*)device->handle);
free(device->buffer); free(device->buffer);
} }
int sound_device_read_next_chunk(SoundDevice *device, char **buffer) { int sound_device_read_next_chunk(SoundDevice *device, void **buffer) {
int rc = snd_pcm_readi((snd_pcm_t*)device->handle, device->buffer, device->frames); int rc = snd_pcm_readi((snd_pcm_t*)device->handle, device->buffer, device->frames);
if (rc == -EPIPE) { if (rc == -EPIPE) {
/* overrun */ /* overrun */
fprintf(stderr, "overrun occured\n"); fprintf(stderr, "overrun occured\n");
snd_pcm_prepare((snd_pcm_t*)device->handle); snd_pcm_prepare((snd_pcm_t*)device->handle);
return 0; return rc;
} else if(rc < 0) { } else if(rc < 0) {
fprintf(stderr, "failed to read from sound device, reason: %s\n", snd_strerror(rc)); fprintf(stderr, "failed to read from sound device, reason: %s\n", snd_strerror(rc));
return rc; return rc;
@ -80,5 +81,9 @@ int sound_device_read_next_chunk(SoundDevice *device, char **buffer) {
fprintf(stderr, "short read, read %d frames\n", rc); fprintf(stderr, "short read, read %d frames\n", rc);
} }
*buffer = device->buffer; *buffer = device->buffer;
return 0; return rc;
}
int sound_device_get_buffer_size(SoundDevice *device) {
return device->buffer_size;
} }