Add audio support
This commit is contained in:
parent
5e60805c36
commit
2fcd3ee3e5
@ -2,6 +2,8 @@
|
||||
This is a screen recorder that has minimal impact on system performance by recording a window using the GPU only,
|
||||
similar to shadowplay on windows.
|
||||
|
||||
The output is an h264 encoded video with aac audio.
|
||||
|
||||
This project is still early in development.
|
||||
|
||||
# Performance
|
||||
@ -12,11 +14,10 @@ the fps remains at 30.
|
||||
`gpu-screen-recorder 0x1c00001 mp4 60 > test_video.mp4`
|
||||
|
||||
# Requirements
|
||||
X11, Nvidia (cuda)
|
||||
X11, Nvidia (cuda), pulseaudio
|
||||
|
||||
# TODO
|
||||
* Scale video when the window is rescaled.
|
||||
* Use the sound source in src/sound.cpp to record audio and mux it with ffmpeg to the final video.
|
||||
* Support AMD and Intel, using VAAPI. cuda and vaapi should be loaded at runtime using dlopen instead of linking to those
|
||||
libraries at compile-time.
|
||||
* Clean up the code!
|
||||
|
@ -3,7 +3,7 @@
|
||||
|
||||
typedef struct {
|
||||
void *handle;
|
||||
char *buffer;
|
||||
void *buffer;
|
||||
int buffer_size;
|
||||
unsigned int frames;
|
||||
} SoundDevice;
|
||||
@ -14,14 +14,16 @@ typedef struct {
|
||||
to clean up internal resources.
|
||||
Returns 0 on success, or a negative value on failure.
|
||||
*/
|
||||
int sound_device_get_by_name(SoundDevice *device, const char *name = "default", unsigned int num_channels = 1, unsigned int period_frame_size = 32);
|
||||
int sound_device_get_by_name(SoundDevice *device, const char *name = "default", unsigned int num_channels = 2, unsigned int period_frame_size = 32);
|
||||
|
||||
void sound_device_close(SoundDevice *device);
|
||||
|
||||
/*
|
||||
Returns the next chunk of audio into @buffer.
|
||||
Returns the size of the buffer, or a negative value on failure.
|
||||
Returns the number of frames read, or a negative value on failure.
|
||||
*/
|
||||
int sound_device_read_next_chunk(SoundDevice *device, char **buffer);
|
||||
int sound_device_read_next_chunk(SoundDevice *device, void **buffer);
|
||||
|
||||
int sound_device_get_buffer_size(SoundDevice *device);
|
||||
|
||||
#endif /* GPU_SCREEN_RECORDER_H */
|
||||
|
@ -9,7 +9,6 @@ include_dirs = ["/opt/cuda/targets/x86_64-linux/include"]
|
||||
libs = ["/usr/lib/libcuda.so"]
|
||||
|
||||
[dependencies]
|
||||
ffnvcodec = ">=9"
|
||||
glew = ">=2"
|
||||
glx = ">=1"
|
||||
libavcodec = ">=58"
|
||||
@ -23,3 +22,5 @@ xdamage = "1"
|
||||
glfw3 = "3"
|
||||
|
||||
alsa = "1"
|
||||
|
||||
libswresample = "3"
|
222
src/main.cpp
222
src/main.cpp
@ -3,8 +3,13 @@
|
||||
#include <stdlib.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <thread>
|
||||
#include <mutex>
|
||||
|
||||
#include <unistd.h>
|
||||
|
||||
#include "../include/sound.hpp"
|
||||
|
||||
#define GLX_GLXEXT_PROTOTYPES
|
||||
#include <GL/glew.h>
|
||||
#include <GL/glx.h>
|
||||
@ -19,6 +24,8 @@ extern "C" {
|
||||
#include <libavformat/avformat.h>
|
||||
#include <libavutil/hwcontext.h>
|
||||
#include <libavutil/hwcontext_cuda.h>
|
||||
#include <libavutil/opt.h>
|
||||
#include <libswresample/swresample.h>
|
||||
}
|
||||
#include <cudaGL.h>
|
||||
|
||||
@ -28,6 +35,14 @@ extern "C" {
|
||||
|
||||
//#include <CL/cl.h>
|
||||
|
||||
static char av_error_buffer[AV_ERROR_MAX_STRING_SIZE];
|
||||
|
||||
static char* av_error_to_string(int err) {
|
||||
if(av_strerror(err, av_error_buffer, sizeof(av_error_buffer) < 0))
|
||||
strcpy(av_error_buffer, "Unknown error");
|
||||
return av_error_buffer;
|
||||
}
|
||||
|
||||
struct ScopedGLXFBConfig {
|
||||
~ScopedGLXFBConfig() {
|
||||
if (configs)
|
||||
@ -236,7 +251,8 @@ std::vector<std::string> get_hardware_acceleration_device_names() {
|
||||
}
|
||||
|
||||
static void receive_frames(AVCodecContext *av_codec_context, AVStream *stream,
|
||||
AVFormatContext *av_format_context) {
|
||||
AVFormatContext *av_format_context,
|
||||
std::mutex &write_output_mutex) {
|
||||
AVPacket av_packet;
|
||||
av_init_packet(&av_packet);
|
||||
for (;;) {
|
||||
@ -244,14 +260,17 @@ static void receive_frames(AVCodecContext *av_codec_context, AVStream *stream,
|
||||
av_packet.size = 0;
|
||||
int res = avcodec_receive_packet(av_codec_context, &av_packet);
|
||||
if (res == 0) { // we have a packet, send the packet to the muxer
|
||||
assert(av_packet.stream_index == stream->id);
|
||||
av_packet_rescale_ts(&av_packet, av_codec_context->time_base,
|
||||
stream->time_base);
|
||||
av_packet.stream_index = stream->index;
|
||||
// Write the encoded video frame to disk
|
||||
// av_write_frame(av_format_context, &av_packet)
|
||||
// write(STDOUT_FILENO, av_packet.data, av_packet.size)
|
||||
if (av_write_frame(av_format_context, &av_packet) < 0) {
|
||||
fprintf(stderr, "Error: Failed to write frame to muxer\n");
|
||||
std::lock_guard<std::mutex> lock(write_output_mutex);
|
||||
int ret = av_write_frame(av_format_context, &av_packet);
|
||||
if(ret < 0) {
|
||||
fprintf(stderr, "Error: Failed to write video frame to muxer, reason: %s (%d)\n", av_error_to_string(ret), ret);
|
||||
}
|
||||
av_packet_unref(&av_packet);
|
||||
} else if (res == AVERROR(EAGAIN)) { // we have no packet
|
||||
@ -268,7 +287,46 @@ static void receive_frames(AVCodecContext *av_codec_context, AVStream *stream,
|
||||
//av_packet_unref(&av_packet);
|
||||
}
|
||||
|
||||
static AVStream *add_stream(AVFormatContext *av_format_context, AVCodec **codec,
|
||||
static AVStream *add_audio_stream(AVFormatContext *av_format_context, AVCodec **codec,
|
||||
enum AVCodecID codec_id) {
|
||||
*codec = avcodec_find_encoder(AV_CODEC_ID_AAC);
|
||||
if (!*codec) {
|
||||
fprintf(
|
||||
stderr,
|
||||
"Error: Could not find aac encoder\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
AVStream *stream = avformat_new_stream(av_format_context, *codec);
|
||||
if (!stream) {
|
||||
fprintf(stderr, "Error: Could not allocate stream\n");
|
||||
exit(1);
|
||||
}
|
||||
stream->id = av_format_context->nb_streams - 1;
|
||||
fprintf(stderr, "audio stream id: %d\n", stream->id);
|
||||
AVCodecContext *codec_context = stream->codec;
|
||||
|
||||
assert((*codec)->type == AVMEDIA_TYPE_AUDIO);
|
||||
/*
|
||||
codec_context->sample_fmt = (*codec)->sample_fmts
|
||||
? (*codec)->sample_fmts[0]
|
||||
: AV_SAMPLE_FMT_FLTP;
|
||||
*/
|
||||
codec_context->codec_id = AV_CODEC_ID_AAC;
|
||||
codec_context->sample_fmt = AV_SAMPLE_FMT_FLTP;
|
||||
//codec_context->bit_rate = 64000;
|
||||
codec_context->sample_rate = 48000;
|
||||
codec_context->channel_layout = AV_CH_LAYOUT_STEREO;
|
||||
codec_context->channels = 2;
|
||||
|
||||
// Some formats want stream headers to be seperate
|
||||
//if (av_format_context->oformat->flags & AVFMT_GLOBALHEADER)
|
||||
// av_format_context->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
|
||||
|
||||
return stream;
|
||||
}
|
||||
|
||||
static AVStream *add_video_stream(AVFormatContext *av_format_context, AVCodec **codec,
|
||||
enum AVCodecID codec_id,
|
||||
const WindowPixmap &window_pixmap,
|
||||
int fps) {
|
||||
@ -280,8 +338,7 @@ static AVStream *add_stream(AVFormatContext *av_format_context, AVCodec **codec,
|
||||
if (!*codec) {
|
||||
fprintf(
|
||||
stderr,
|
||||
"Error: Could not find h264_nvenc or nvenc_h264 encoder for %s\n",
|
||||
avcodec_get_name(codec_id));
|
||||
"Error: Could not find h264_nvenc or nvenc_h264 encoder\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
@ -291,25 +348,15 @@ static AVStream *add_stream(AVFormatContext *av_format_context, AVCodec **codec,
|
||||
exit(1);
|
||||
}
|
||||
stream->id = av_format_context->nb_streams - 1;
|
||||
fprintf(stderr, "video stream id: %d\n", stream->id);
|
||||
AVCodecContext *codec_context = stream->codec;
|
||||
|
||||
switch ((*codec)->type) {
|
||||
case AVMEDIA_TYPE_AUDIO: {
|
||||
codec_context->sample_fmt = (*codec)->sample_fmts
|
||||
? (*codec)->sample_fmts[0]
|
||||
: AV_SAMPLE_FMT_FLTP;
|
||||
codec_context->bit_rate = 64000;
|
||||
codec_context->sample_rate = 44100;
|
||||
codec_context->channels = 2;
|
||||
break;
|
||||
}
|
||||
case AVMEDIA_TYPE_VIDEO: {
|
||||
codec_context->codec_id = codec_id;
|
||||
// TODO: Scale bitrate by resolution. For 4k, 8000000 is a better value
|
||||
codec_context->bit_rate = 5000000;
|
||||
// Resolution must be a multiple of two
|
||||
assert((*codec)->type == AVMEDIA_TYPE_VIDEO);
|
||||
codec_context->codec_id = (*codec)->id;
|
||||
fprintf(stderr, "codec id: %d\n", (*codec)->id);
|
||||
codec_context->width = window_pixmap.texture_width & ~1;
|
||||
codec_context->height = window_pixmap.texture_height & ~1;
|
||||
codec_context->bit_rate = codec_context->width * codec_context->height; //5000000;
|
||||
// Timebase: This is the fundamental unit of time (in seconds) in terms
|
||||
// of which frame timestamps are represented. For fixed-fps content,
|
||||
// timebase should be 1/framerate and timestamp increments should be
|
||||
@ -328,11 +375,6 @@ static AVStream *add_stream(AVFormatContext *av_format_context, AVCodec **codec,
|
||||
|
||||
// stream->time_base = codec_context->time_base;
|
||||
// codec_context->ticks_per_frame = 30;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
// Some formats want stream headers to be seperate
|
||||
if (av_format_context->oformat->flags & AVFMT_GLOBALHEADER)
|
||||
@ -341,6 +383,36 @@ static AVStream *add_stream(AVFormatContext *av_format_context, AVCodec **codec,
|
||||
return stream;
|
||||
}
|
||||
|
||||
static AVFrame* open_audio(AVCodec *codec, AVStream *stream) {
|
||||
int ret;
|
||||
AVCodecContext *codec_context = stream->codec;
|
||||
|
||||
ret = avcodec_open2(codec_context, codec, nullptr);
|
||||
if(ret < 0) {
|
||||
fprintf(stderr, "failed to open codec, reason: %s\n", av_error_to_string(ret));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
AVFrame *frame = av_frame_alloc();
|
||||
if(!frame) {
|
||||
fprintf(stderr, "failed to allocate audio frame\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
frame->nb_samples = codec_context->frame_size;
|
||||
frame->format = codec_context->sample_fmt;
|
||||
frame->channels = codec_context->channels;
|
||||
frame->channel_layout = codec_context->channel_layout;
|
||||
|
||||
ret = av_frame_get_buffer(frame, 0);
|
||||
if(ret < 0) {
|
||||
fprintf(stderr, "failed to allocate audio data buffers, reason: %s\n", av_error_to_string(ret));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
return frame;
|
||||
}
|
||||
|
||||
static void open_video(AVCodec *codec, AVStream *stream,
|
||||
WindowPixmap &window_pixmap, AVBufferRef **device_ctx,
|
||||
CUgraphicsResource *cuda_graphics_resource) {
|
||||
@ -528,15 +600,24 @@ int main(int argc, char **argv) {
|
||||
}
|
||||
|
||||
AVOutputFormat *output_format = av_format_context->oformat;
|
||||
|
||||
AVCodec *video_codec;
|
||||
AVStream *video_stream =
|
||||
add_stream(av_format_context, &video_codec, output_format->video_codec,
|
||||
add_video_stream(av_format_context, &video_codec, output_format->video_codec,
|
||||
window_pixmap, fps);
|
||||
if (!video_stream) {
|
||||
fprintf(stderr, "Error: Failed to create video stream\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
AVCodec *audio_codec;
|
||||
AVStream *audio_stream =
|
||||
add_audio_stream(av_format_context, &audio_codec, output_format->audio_codec);
|
||||
if (!audio_stream) {
|
||||
fprintf(stderr, "Error: Failed to create audio stream\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (cuInit(0) < 0) {
|
||||
fprintf(stderr, "Error: cuInit failed\n");
|
||||
return {};
|
||||
@ -547,7 +628,9 @@ int main(int argc, char **argv) {
|
||||
open_video(video_codec, video_stream, window_pixmap, &device_ctx,
|
||||
&cuda_graphics_resource);
|
||||
|
||||
av_dump_format(av_format_context, 0, filename, 1);
|
||||
AVFrame *audio_frame = open_audio(audio_codec, audio_stream);
|
||||
|
||||
//av_dump_format(av_format_context, 0, filename, 1);
|
||||
|
||||
if (!(output_format->flags & AVFMT_NOFILE)) {
|
||||
int ret = avio_open(&av_format_context->pb, filename, AVIO_FLAG_WRITE);
|
||||
@ -635,6 +718,69 @@ int main(int argc, char **argv) {
|
||||
int window_width = xwa.width;
|
||||
int window_height = xwa.height;
|
||||
|
||||
SoundDevice sound_device;
|
||||
if(sound_device_get_by_name(&sound_device, "pulse", audio_stream->codec->channels, audio_stream->codec->frame_size) != 0) {
|
||||
fprintf(stderr, "failed to get 'pulse' sound device\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
int audio_buffer_size = av_samples_get_buffer_size(NULL, audio_stream->codec->channels, audio_stream->codec->frame_size, audio_stream->codec->sample_fmt, 1);
|
||||
uint8_t *audio_frame_buf = (uint8_t *)av_malloc(audio_buffer_size);
|
||||
avcodec_fill_audio_frame(audio_frame, audio_stream->codec->channels, audio_stream->codec->sample_fmt, (const uint8_t*)audio_frame_buf, audio_buffer_size, 1);
|
||||
|
||||
AVPacket audio_packet;
|
||||
av_new_packet(&audio_packet, audio_buffer_size);
|
||||
|
||||
std::mutex write_output_mutex;
|
||||
|
||||
bool running = true;
|
||||
std::thread audio_thread([&running](AVFormatContext *av_format_context, AVStream *audio_stream, AVPacket *audio_packet, uint8_t *audio_frame_buf, SoundDevice *sound_device, AVFrame *audio_frame, std::mutex *write_output_mutex) {
|
||||
SwrContext *swr = swr_alloc();
|
||||
if(!swr) {
|
||||
fprintf(stderr, "Failed to create SwrContext\n");
|
||||
exit(1);
|
||||
}
|
||||
av_opt_set_int(swr, "in_channel_layout", audio_stream->codec->channel_layout, 0);
|
||||
av_opt_set_int(swr, "out_channel_layout", audio_stream->codec->channel_layout, 0);
|
||||
av_opt_set_int(swr, "in_sample_rate", audio_stream->codec->sample_rate, 0);
|
||||
av_opt_set_int(swr, "out_sample_rate", audio_stream->codec->sample_rate, 0);
|
||||
av_opt_set_sample_fmt(swr, "in_sample_fmt", AV_SAMPLE_FMT_S16, 0);
|
||||
av_opt_set_sample_fmt(swr, "out_sample_fmt", AV_SAMPLE_FMT_FLTP, 0);
|
||||
swr_init(swr);
|
||||
|
||||
while(running) {
|
||||
void *sound_buffer;
|
||||
int sound_buffer_size = sound_device_read_next_chunk(sound_device, &sound_buffer);
|
||||
if(sound_buffer_size >= 0) {
|
||||
// TODO: Instead of converting audio, get float audio from alsa. Or does alsa do conversion internally to get this format?
|
||||
swr_convert(swr, &audio_frame_buf, audio_frame->nb_samples, (const uint8_t**)&sound_buffer, sound_buffer_size);
|
||||
audio_frame->extended_data = &audio_frame_buf;
|
||||
// TODO: Fix this. Warning from ffmpeg:
|
||||
// Timestamps are unset in a packet for stream 1. This is deprecated and will stop working in the future. Fix your code to set the timestamps properly
|
||||
//audio_frame->pts=audio_frame_index*100;
|
||||
//++audio_frame_index;
|
||||
|
||||
int got_frame = 0;
|
||||
int ret = avcodec_encode_audio2(audio_stream->codec, audio_packet, audio_frame, &got_frame);
|
||||
if(ret < 0){
|
||||
printf("Failed to encode!\n");
|
||||
break;
|
||||
}
|
||||
if (got_frame==1){
|
||||
//printf("Succeed to encode 1 frame! \tsize:%5d\n",pkt.size);
|
||||
audio_packet->stream_index = audio_stream->index;
|
||||
std::lock_guard<std::mutex> lock(*write_output_mutex);
|
||||
ret = av_write_frame(av_format_context, audio_packet);
|
||||
av_free_packet(audio_packet);
|
||||
}
|
||||
} else {
|
||||
fprintf(stderr, "failed to read sound from device, error: %d\n", sound_buffer_size);
|
||||
}
|
||||
}
|
||||
|
||||
swr_free(&swr);
|
||||
}, av_format_context, audio_stream, &audio_packet, audio_frame_buf, &sound_device, audio_frame, &write_output_mutex);
|
||||
|
||||
XEvent e;
|
||||
while (!glfwWindowShouldClose(window)) {
|
||||
glClear(GL_COLOR_BUFFER_BIT);
|
||||
@ -719,7 +865,7 @@ int main(int argc, char **argv) {
|
||||
"Error: cuGraphicsGLRegisterImage failed, error %s, texture "
|
||||
"id: %u\n",
|
||||
err_str, window_pixmap.target_texture_id);
|
||||
exit(1);
|
||||
break;
|
||||
}
|
||||
|
||||
res = cuGraphicsResourceSetMapFlags(
|
||||
@ -730,7 +876,7 @@ int main(int argc, char **argv) {
|
||||
av_frame_unref(frame);
|
||||
if (av_hwframe_get_buffer(video_stream->codec->hw_frames_ctx, frame, 0) < 0) {
|
||||
fprintf(stderr, "Error: av_hwframe_get_buffer failed\n");
|
||||
exit(1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@ -741,7 +887,7 @@ int main(int argc, char **argv) {
|
||||
frame_count += 1;
|
||||
if (avcodec_send_frame(video_stream->codec, frame) >= 0) {
|
||||
receive_frames(video_stream->codec, video_stream,
|
||||
av_format_context);
|
||||
av_format_context, write_output_mutex);
|
||||
} else {
|
||||
fprintf(stderr, "Error: avcodec_send_frame failed\n");
|
||||
}
|
||||
@ -752,6 +898,20 @@ int main(int argc, char **argv) {
|
||||
usleep(5000);
|
||||
}
|
||||
|
||||
running = false;
|
||||
audio_thread.join();
|
||||
|
||||
sound_device_close(&sound_device);
|
||||
|
||||
//Flush Encoder
|
||||
#if 0
|
||||
ret = flush_encoder(pFormatCtx,0);
|
||||
if (ret < 0) {
|
||||
printf("Flushing encoder failed\n");
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (av_write_trailer(av_format_context) != 0) {
|
||||
fprintf(stderr, "Failed to write trailer\n");
|
||||
}
|
||||
|
@ -26,8 +26,8 @@ int sound_device_get_by_name(SoundDevice *device, const char *name, unsigned int
|
||||
snd_pcm_hw_params_set_format(handle, params, SND_PCM_FORMAT_S16_LE);
|
||||
snd_pcm_hw_params_set_channels(handle, params, num_channels);
|
||||
|
||||
// 44100 bits/second samling rate (CD quality)
|
||||
unsigned int val = 44100;
|
||||
// 48000 bits/second samling rate (DVD quality)
|
||||
unsigned int val = 48000;
|
||||
int dir;
|
||||
snd_pcm_hw_params_set_rate_near(handle, params, &val, &dir);
|
||||
|
||||
@ -45,7 +45,7 @@ int sound_device_get_by_name(SoundDevice *device, const char *name, unsigned int
|
||||
// Use a buffer large enough to hold one period
|
||||
snd_pcm_hw_params_get_period_size(params, &frames, &dir);
|
||||
int buffer_size = frames * 2 * num_channels; // 2 bytes/sample, @num_channels channels
|
||||
char *buffer = (char*)malloc(buffer_size);
|
||||
void *buffer = malloc(buffer_size);
|
||||
if(!buffer) {
|
||||
fprintf(stderr, "failed to allocate buffer for audio\n");
|
||||
snd_pcm_close(handle);
|
||||
@ -61,18 +61,19 @@ int sound_device_get_by_name(SoundDevice *device, const char *name, unsigned int
|
||||
|
||||
void sound_device_close(SoundDevice *device) {
|
||||
/* TODO: Is this also needed in @sound_device_get_by_name on failure? */
|
||||
snd_pcm_drain((snd_pcm_t*)device->handle);
|
||||
// TODO: This has been commented out since it causes the thread to block forever. Why?
|
||||
//snd_pcm_drain((snd_pcm_t*)device->handle);
|
||||
snd_pcm_close((snd_pcm_t*)device->handle);
|
||||
free(device->buffer);
|
||||
}
|
||||
|
||||
int sound_device_read_next_chunk(SoundDevice *device, char **buffer) {
|
||||
int sound_device_read_next_chunk(SoundDevice *device, void **buffer) {
|
||||
int rc = snd_pcm_readi((snd_pcm_t*)device->handle, device->buffer, device->frames);
|
||||
if (rc == -EPIPE) {
|
||||
/* overrun */
|
||||
fprintf(stderr, "overrun occured\n");
|
||||
snd_pcm_prepare((snd_pcm_t*)device->handle);
|
||||
return 0;
|
||||
return rc;
|
||||
} else if(rc < 0) {
|
||||
fprintf(stderr, "failed to read from sound device, reason: %s\n", snd_strerror(rc));
|
||||
return rc;
|
||||
@ -80,5 +81,9 @@ int sound_device_read_next_chunk(SoundDevice *device, char **buffer) {
|
||||
fprintf(stderr, "short read, read %d frames\n", rc);
|
||||
}
|
||||
*buffer = device->buffer;
|
||||
return 0;
|
||||
return rc;
|
||||
}
|
||||
|
||||
int sound_device_get_buffer_size(SoundDevice *device) {
|
||||
return device->buffer_size;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user