Add option to record a display or all displays. This requires nvfbc

This commit is contained in:
dec05eba 2021-07-23 17:33:37 +02:00
parent 7b39bd81c0
commit 4617e2c45a
5 changed files with 2586 additions and 192 deletions

View File

@ -12,7 +12,8 @@ When recording a 4k game, fps drops from 30 to 7 when using OBS Studio, however
the fps remains at 30.
# Installation
gpu screen recorder can be built using [sibs](https://git.dec05eba.com/sibs) or if you are running Arch Linux, then you can find it on aur under the name gpu-screen-recorder-git (`yay -S gpu-screen-recorder-git`).
gpu screen recorder can be built using [sibs](https://git.dec05eba.com/sibs) or if you are running Arch Linux, then you can find it on aur under the name gpu-screen-recorder-git (`yay -S gpu-screen-recorder-git`).\
Recording displays requires a gpu with NvFBC support. Normally only tesla and quadro gpus support this, but by using https://github.com/keylase/nvidia-patch you can do this on all gpus that support nvenc as well (gpus as old as the nvidia 600 series), provided you are not using outdated gpu drivers.
# How to use
Run `interactive.sh` or run gpu-screen-recorder directly, for example: `gpu-screen-recorder -w 0x1c00001 -c mp4 -f 60 -a bluez_sink.00_18_09_8A_07_93.a2dp_sink.monitor > test_video.mp4`\

7
TODO Normal file
View File

@ -0,0 +1,7 @@
Check for reparent.
Only add window to list if its the window is a topmost window.
Use nvEncoder api directly? maybe with this we could copy the window opengl texture directly to the gpu which doesn't work right now for some reason.
Right now we are required to copy the opengl texture to another opengl texture first.
Load cuda at runtime with dlopen.
Track window damages and only update then. That is better for output file size.
Remove cuda to cuda copy when using nvFBC if possible. ffmpeg is getting in the way.

2006
include/NvFBC.h Normal file

File diff suppressed because it is too large Load Diff

245
include/NvFBCLibrary.hpp Normal file
View File

@ -0,0 +1,245 @@
#pragma once
#include "NvFBC.h"
#include <cuda.h>
#include <dlfcn.h>
#include <string.h>
#include <stdio.h>
class NvFBCLibrary {
public:
~NvFBCLibrary() {
if(fbc_handle_created) {
NVFBC_DESTROY_CAPTURE_SESSION_PARAMS destroy_capture_params;
memset(&destroy_capture_params, 0, sizeof(destroy_capture_params));
destroy_capture_params.dwVersion = NVFBC_DESTROY_CAPTURE_SESSION_PARAMS_VER;
nv_fbc_function_list.nvFBCDestroyCaptureSession(nv_fbc_handle, &destroy_capture_params);
NVFBC_DESTROY_HANDLE_PARAMS destroy_params;
memset(&destroy_params, 0, sizeof(destroy_params));
destroy_params.dwVersion = NVFBC_DESTROY_HANDLE_PARAMS_VER;
nv_fbc_function_list.nvFBCDestroyHandle(nv_fbc_handle, &destroy_params);
}
if(library)
dlclose(library);
}
bool load() {
if(library)
return true;
dlerror(); // clear
void *lib = dlopen("libnvidia-fbc.so.1", RTLD_NOW);
if(!lib) {
fprintf(stderr, "Error: failed to load libnvidia-fbc.so.1, error: %s\n", dlerror());
return false;
}
nv_fbc_create_instance = (PNVFBCCREATEINSTANCE)dlsym(lib, "NvFBCCreateInstance");
if(!nv_fbc_create_instance) {
fprintf(stderr, "Error: unable to resolve symbol 'NvFBCCreateInstance'\n");
dlclose(lib);
return false;
}
memset(&nv_fbc_function_list, 0, sizeof(nv_fbc_function_list));
nv_fbc_function_list.dwVersion = NVFBC_VERSION;
NVFBCSTATUS status = nv_fbc_create_instance(&nv_fbc_function_list);
if(status != NVFBC_SUCCESS) {
fprintf(stderr, "Error: failed to create NvFBC instance (status: %d)\n", status);
dlclose(lib);
return false;
}
library = lib;
return true;
}
// If |display_to_capture| is "screen", then the entire x11 screen is captured (all displays)
bool create(const char *display_to_capture, uint32_t fps, /*out*/ uint32_t *display_width, /*out*/ uint32_t *display_height, uint32_t x = 0, uint32_t y = 0, uint32_t width = 0, uint32_t height = 0) {
if(!library || !display_to_capture || !display_width || !display_height || fbc_handle_created)
return false;
const bool capture_region = (x > 0 && y > 0 && width > 0 && height > 0);
NVFBCSTATUS status;
NVFBC_TRACKING_TYPE tracking_type;
bool capture_session_created = false;
uint32_t output_id = 0;
fbc_handle_created = false;
NVFBC_CREATE_HANDLE_PARAMS create_params;
memset(&create_params, 0, sizeof(create_params));
create_params.dwVersion = NVFBC_CREATE_HANDLE_PARAMS_VER;
status = nv_fbc_function_list.nvFBCCreateHandle(&nv_fbc_handle, &create_params);
if(status != NVFBC_SUCCESS) {
fprintf(stderr, "Error: %s\n", nv_fbc_function_list.nvFBCGetLastErrorStr(nv_fbc_handle));
return false;
}
fbc_handle_created = true;
NVFBC_GET_STATUS_PARAMS status_params;
memset(&status_params, 0, sizeof(status_params));
status_params.dwVersion = NVFBC_GET_STATUS_PARAMS_VER;
status = nv_fbc_function_list.nvFBCGetStatus(nv_fbc_handle, &status_params);
if(status != NVFBC_SUCCESS) {
fprintf(stderr, "Error: %s\n", nv_fbc_function_list.nvFBCGetLastErrorStr(nv_fbc_handle));
goto error_cleanup;
}
if(status_params.bCanCreateNow == NVFBC_FALSE) {
fprintf(stderr, "Error: it's not possible to create a capture session on this system\n");
goto error_cleanup;
}
tracking_type = strcmp(display_to_capture, "screen") == 0 ? NVFBC_TRACKING_SCREEN : NVFBC_TRACKING_OUTPUT;
if(tracking_type == NVFBC_TRACKING_OUTPUT) {
if(!status_params.bXRandRAvailable) {
fprintf(stderr, "Error: the xrandr extension is not available\n");
goto error_cleanup;
}
if(status_params.bInModeset) {
fprintf(stderr, "Error: the x server is in modeset, unable to record\n");
goto error_cleanup;
}
output_id = get_output_id_from_display_name(status_params.outputs, status_params.dwOutputNum, display_to_capture, display_width, display_height);
if(output_id == 0) {
fprintf(stderr, "Error: display '%s' not found\n", display_to_capture);
goto error_cleanup;
}
} else {
*display_width = status_params.screenSize.w;
*display_height = status_params.screenSize.h;
}
NVFBC_CREATE_CAPTURE_SESSION_PARAMS create_capture_params;
memset(&create_capture_params, 0, sizeof(create_capture_params));
create_capture_params.dwVersion = NVFBC_CREATE_CAPTURE_SESSION_PARAMS_VER;
create_capture_params.eCaptureType = NVFBC_CAPTURE_SHARED_CUDA;
create_capture_params.bWithCursor = NVFBC_TRUE; // This will need to be disabled when using bAllowDirectCapture
if(capture_region) {
create_capture_params.captureBox = { x, y, width, height };
*display_width = width;
*display_height = height;
}
create_capture_params.eTrackingType = tracking_type;
//create_capture_params.dwSamplingRateMs = 1000 / fps;
if(tracking_type == NVFBC_TRACKING_OUTPUT)
create_capture_params.dwOutputId = output_id;
// TODO: Use create_capture_params.bAllowDirectCapture and create_capture_params.bPushModel
status = nv_fbc_function_list.nvFBCCreateCaptureSession(nv_fbc_handle, &create_capture_params);
if(status != NVFBC_SUCCESS) {
fprintf(stderr, "Error: %s\n", nv_fbc_function_list.nvFBCGetLastErrorStr(nv_fbc_handle));
goto error_cleanup;
}
capture_session_created = true;
NVFBC_TOCUDA_SETUP_PARAMS setup_params;
memset(&setup_params, 0, sizeof(setup_params));
setup_params.dwVersion = NVFBC_TOCUDA_SETUP_PARAMS_VER;
setup_params.eBufferFormat = NVFBC_BUFFER_FORMAT_BGRA;
status = nv_fbc_function_list.nvFBCToCudaSetUp(nv_fbc_handle, &setup_params);
if(status != NVFBC_SUCCESS) {
fprintf(stderr, "Error: %s\n", nv_fbc_function_list.nvFBCGetLastErrorStr(nv_fbc_handle));
goto error_cleanup;
}
return true;
error_cleanup:
if(fbc_handle_created) {
if(capture_session_created) {
NVFBC_DESTROY_CAPTURE_SESSION_PARAMS destroy_capture_params;
memset(&destroy_capture_params, 0, sizeof(destroy_capture_params));
destroy_capture_params.dwVersion = NVFBC_DESTROY_CAPTURE_SESSION_PARAMS_VER;
nv_fbc_function_list.nvFBCDestroyCaptureSession(nv_fbc_handle, &destroy_capture_params);
}
NVFBC_DESTROY_HANDLE_PARAMS destroy_params;
memset(&destroy_params, 0, sizeof(destroy_params));
destroy_params.dwVersion = NVFBC_DESTROY_HANDLE_PARAMS_VER;
nv_fbc_function_list.nvFBCDestroyHandle(nv_fbc_handle, &destroy_params);
fbc_handle_created = false;
}
output_id = 0;
return false;
}
bool capture(/*out*/ CUdeviceptr *cu_device_ptr, uint32_t *byte_size) {
if(!library || !fbc_handle_created || !cu_device_ptr || !byte_size)
return false;
NVFBCSTATUS status;
NVFBC_FRAME_GRAB_INFO frame_info;
memset(&frame_info, 0, sizeof(frame_info));
NVFBC_TOCUDA_GRAB_FRAME_PARAMS grab_params;
memset(&grab_params, 0, sizeof(grab_params));
grab_params.dwVersion = NVFBC_TOCUDA_GRAB_FRAME_PARAMS_VER;
grab_params.dwFlags = NVFBC_TOCUDA_GRAB_FLAGS_NOWAIT;
grab_params.pFrameGrabInfo = &frame_info;
grab_params.pCUDADeviceBuffer = cu_device_ptr;
status = nv_fbc_function_list.nvFBCToCudaGrabFrame(nv_fbc_handle, &grab_params);
if(status != NVFBC_SUCCESS) {
fprintf(stderr, "Error: %s\n", nv_fbc_function_list.nvFBCGetLastErrorStr(nv_fbc_handle));
return false;
}
*byte_size = frame_info.dwByteSize;
// TODO: Check bIsNewFrame
// TODO: Check dwWidth and dwHeight and update size in video output in ffmpeg. This can happen when xrandr is used to change monitor resolution
return true;
}
private:
static char to_upper(char c) {
if(c >= 'a' && c <= 'z')
return c - 32;
else
return c;
}
static bool strcase_equals(const char *str1, const char *str2) {
for(;;) {
char c1 = to_upper(*str1);
char c2 = to_upper(*str2);
if(c1 != c2)
return false;
if(c1 == '\0' || c2 == '\0')
return true;
++str1;
++str2;
}
}
// Returns 0 on failure
uint32_t get_output_id_from_display_name(NVFBC_RANDR_OUTPUT_INFO *outputs, uint32_t num_outputs, const char *display_name, uint32_t *display_width, uint32_t *display_height) {
if(!outputs)
return 0;
for(uint32_t i = 0; i < num_outputs; ++i) {
if(strcase_equals(outputs[i].name, display_name)) {
*display_width = outputs[i].trackedBox.w;
*display_height = outputs[i].trackedBox.h;
return outputs[i].dwId;
}
}
return 0;
}
private:
void *library = nullptr;
PNVFBCCREATEINSTANCE nv_fbc_create_instance = nullptr;
NVFBC_API_FUNCTION_LIST nv_fbc_function_list;
NVFBC_SESSION_HANDLE nv_fbc_handle;
bool fbc_handle_created = false;
};

View File

@ -52,6 +52,8 @@ extern "C" {
#include <libavutil/hwcontext.h>
}
#include "../include/NvFBCLibrary.hpp"
#include <deque>
//#include <CL/cl.h>
@ -397,12 +399,13 @@ static AVStream *add_audio_stream(AVFormatContext *av_format_context, AVCodec **
static AVStream *add_video_stream(AVFormatContext *av_format_context, AVCodec **codec,
VideoQuality video_quality,
const WindowPixmap &window_pixmap,
int texture_width, int texture_height,
int fps) {
//*codec = avcodec_find_encoder(codec_id);
*codec = avcodec_find_encoder_by_name("h264_nvenc");
bool using_hevc = true;
*codec = avcodec_find_encoder_by_name("hevc_nvenc");
if (!*codec) {
*codec = avcodec_find_encoder_by_name("nvenc_h264");
*codec = avcodec_find_encoder_by_name("nvenc_hevc");
}
if (!*codec) {
fprintf(
@ -425,8 +428,8 @@ static AVStream *add_video_stream(AVFormatContext *av_format_context, AVCodec **
assert((*codec)->type == AVMEDIA_TYPE_VIDEO);
codec_context->codec_id = (*codec)->id;
fprintf(stderr, "codec id: %d\n", (*codec)->id);
codec_context->width = window_pixmap.texture_width & ~1;
codec_context->height = window_pixmap.texture_height & ~1;
codec_context->width = texture_width & ~1;
codec_context->height = texture_height & ~1;
codec_context->bit_rate = 7500000 + (codec_context->width * codec_context->height) / 2;
// Timebase: This is the fundamental unit of time (in seconds) in terms
// of which frame timestamps are represented. For fixed-fps content,
@ -439,7 +442,7 @@ static AVStream *add_video_stream(AVFormatContext *av_format_context, AVCodec **
codec_context->sample_aspect_ratio.num = 0;
codec_context->sample_aspect_ratio.den = 0;
codec_context->gop_size = fps * 2;
codec_context->max_b_frames = 2;
codec_context->max_b_frames = using_hevc ? 0 : 2;
codec_context->pix_fmt = AV_PIX_FMT_CUDA;
codec_context->color_range = AVCOL_RANGE_JPEG;
switch(video_quality) {
@ -577,24 +580,26 @@ static void open_video(AVCodec *codec, AVStream *stream,
exit(1);
}
CUresult res;
CUcontext old_ctx;
res = cuCtxPopCurrent(&old_ctx);
res = cuCtxPushCurrent(*cuda_context);
res = cuGraphicsGLRegisterImage(
cuda_graphics_resource, window_pixmap.target_texture_id, GL_TEXTURE_2D,
CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY);
// cuGraphicsUnregisterResource(*cuda_graphics_resource);
if (res != CUDA_SUCCESS) {
const char *err_str;
cuGetErrorString(res, &err_str);
fprintf(stderr,
"Error: cuGraphicsGLRegisterImage failed, error %s, texture "
"id: %u\n",
err_str, window_pixmap.target_texture_id);
exit(1);
if(window_pixmap.target_texture_id != 0) {
CUresult res;
CUcontext old_ctx;
res = cuCtxPopCurrent(&old_ctx);
res = cuCtxPushCurrent(*cuda_context);
res = cuGraphicsGLRegisterImage(
cuda_graphics_resource, window_pixmap.target_texture_id, GL_TEXTURE_2D,
CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY);
// cuGraphicsUnregisterResource(*cuda_graphics_resource);
if (res != CUDA_SUCCESS) {
const char *err_str;
cuGetErrorString(res, &err_str);
fprintf(stderr,
"Error: cuGraphicsGLRegisterImage failed, error %s, texture "
"id: %u\n",
err_str, window_pixmap.target_texture_id);
exit(1);
}
res = cuCtxPopCurrent(&old_ctx);
}
res = cuCtxPopCurrent(&old_ctx);
}
static void close_video(AVStream *video_stream, AVFrame *frame) {
@ -605,9 +610,10 @@ static void close_video(AVStream *video_stream, AVFrame *frame) {
static void usage() {
fprintf(stderr, "usage: gpu-screen-recorder -w <window_id> -c <container_format> -f <fps> [-a <audio_input>] [-q <quality>] [-r <replay_buffer_size_sec>] [-o <output_file>]\n");
fprintf(stderr, "OPTIONS:\n");
fprintf(stderr, " -w Window to record.\n");
fprintf(stderr, " -w Window to record or a display or \"screen\". The display is the display name in xrandr and if \"screen\" is selected then all displays are recorded and they are recorded in h265 (aka hevc). Recording a display requires a gpu with NvFBC support.\n");
//fprintf(stderr, " -s The screen region to capture in format WxH+X+Y. This is only applicable when -w is a display or \"screen\". Optional, the entire window/display/screen is recorded by default.\n");
fprintf(stderr, " -c Container format for output file, for example mp4, or flv.\n");
fprintf(stderr, " -f Framerate to record at.\n");
fprintf(stderr, " -f Framerate to record at. Clamped to [1,500].\n");
fprintf(stderr, " -a Audio device to record from (pulse audio device). Optional, disabled by default.\n");
fprintf(stderr, " -q Video quality. Should either be 'medium', 'high' or 'ultra'. Optional, set to 'medium' be default.\n");
fprintf(stderr, " -r Replay buffer size in seconds. If this is set, then only the last seconds as set by this option will be stored"
@ -628,17 +634,39 @@ struct Arg {
bool optional;
};
static bool is_hex_num(char c) {
return (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f') || (c >= '0' && c <= '9');
}
static bool contains_non_hex_number(const char *str) {
size_t len = strlen(str);
if(len >= 2 && memcmp(str, "0x", 2) == 0) {
str += 2;
len -= 2;
}
for(size_t i = 0; i < len; ++i) {
char c = str[i];
if(c == '\0')
return false;
if(!is_hex_num(c))
return true;
}
return false;
}
int main(int argc, char **argv) {
signal(SIGINT, int_handler);
std::map<std::string, Arg> args = {
{ "-w", Arg { nullptr, false } },
//{ "-s", Arg { nullptr, true } },
{ "-c", Arg { nullptr, false } },
{ "-f", Arg { nullptr, false } },
{ "-a", Arg { nullptr, true } },
{ "-q", Arg { nullptr, true } },
{ "-o", Arg { nullptr, true } },
{ "-r", Arg { nullptr, true} }
{ "-r", Arg { nullptr, true } }
};
for(int i = 1; i < argc - 1; i += 2) {
@ -657,13 +685,30 @@ int main(int argc, char **argv) {
}
}
Window src_window_id = strtol(args["-w"].value, nullptr, 0);
uint32_t region_x = 0;
uint32_t region_y = 0;
uint32_t region_width = 0;
uint32_t region_height = 0;
/*
TODO: Fix this. Doesn't work for some reason
const char *screen_region = args["-s"].value;
if(screen_region) {
if(sscanf(screen_region, "%ux%u+%u+%u", &region_x, &region_y, &region_width, &region_height) != 4) {
fprintf(stderr, "Invalid value for -s '%s', expected a value in format WxH+X+Y\n", screen_region);
return 1;
}
}
*/
const char *container_format = args["-c"].value;
int fps = atoi(args["-f"].value);
if(fps <= 0 || fps > 255) {
fprintf(stderr, "invalid fps argument: %s\n", args["-f"].value);
if(fps == 0) {
fprintf(stderr, "Invalid fps argument: %s\n", args["-f"].value);
return 1;
}
if(fps > 500)
fps = 500;
const char *quality_str = args["-q"].value;
if(!quality_str)
@ -681,12 +726,6 @@ int main(int argc, char **argv) {
usage();
}
const char *filename = args["-o"].value;
if(!filename)
filename = "/dev/stdout";
const double target_fps = 1.0 / (double)fps;
int replay_buffer_size_secs = -1;
const char *replay_buffer_size_secs_str = args["-r"].value;
if(replay_buffer_size_secs_str) {
@ -698,70 +737,133 @@ int main(int argc, char **argv) {
replay_buffer_size_secs += 5; // Add a few seconds to account of lost packets because of non-keyframe packets skipped
}
Display *dpy = XOpenDisplay(nullptr);
if (!dpy) {
fprintf(stderr, "Error: Failed to open display\n");
CUresult res;
res = cuInit(0);
if(res != CUDA_SUCCESS) {
fprintf(stderr, "Error: cuInit failed (result: %d)\n", res);
return {};
}
CUdevice cu_dev;
res = cuDeviceGet(&cu_dev, 0);
if(res != CUDA_SUCCESS) {
fprintf(stderr, "Unable to get CUDA device (result: %d)\n", res);
return 1;
}
bool has_name_pixmap = x11_supports_composite_named_window_pixmap(dpy);
if (!has_name_pixmap) {
fprintf(stderr, "Error: XCompositeNameWindowPixmap is not supported by "
"your X11 server\n");
CUcontext cu_ctx;
res = cuCtxCreate_v2(&cu_ctx, CU_CTX_SCHED_AUTO, cu_dev);
if(res != CUDA_SUCCESS) {
fprintf(stderr, "Unable to create CUDA context (result: %d)\n", res);
return 1;
}
XWindowAttributes attr;
if (!XGetWindowAttributes(dpy, src_window_id, &attr)) {
fprintf(stderr, "Error: Invalid window id: %lu\n", src_window_id);
return 1;
uint32_t window_width = 0;
uint32_t window_height = 0;
NvFBCLibrary nv_fbc_library;
const char *window_str = args["-w"].value;
Window src_window_id = None;
if(contains_non_hex_number(window_str)) {
if(!nv_fbc_library.load())
return 1;
if(!nv_fbc_library.create(window_str, fps, &window_width, &window_height, region_x, region_y, region_width, region_height))
return 1;
} else {
src_window_id = strtol(window_str, nullptr, 0);
if(src_window_id == None && errno == EINVAL) {
fprintf(stderr, "Invalid window number %s\n", window_str);
usage();
}
}
XCompositeRedirectWindow(dpy, src_window_id, CompositeRedirectAutomatic);
const char *filename = args["-o"].value;
if(!filename)
filename = "/dev/stdout";
// glXMakeContextCurrent(Display *dpy, GLXDrawable draw, GLXDrawable read,
// GLXContext ctx)
if (!glfwInit()) {
fprintf(stderr, "Error: Failed to initialize glfw\n");
return 1;
}
glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 4);
glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 2);
glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE);
glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
glfwWindowHint(GLFW_RESIZABLE, GL_FALSE);
GLFWwindow *window = glfwCreateWindow(1, 1, "gpu-screen-recorder", nullptr, nullptr);
if (!window) {
fprintf(stderr, "Error: Failed to create glfw window\n");
glfwTerminate();
return 1;
}
glfwMakeContextCurrent(window);
glfwSwapInterval(0);
glfwHideWindow(window);
//#if defined(DEBUG)
XSetErrorHandler(x11_error_handler);
XSetIOErrorHandler(x11_io_error_handler);
//#endif
glewExperimental = GL_TRUE;
GLenum nGlewError = glewInit();
if (nGlewError != GLEW_OK) {
fprintf(stderr, "%s - Error initializing GLEW! %s\n", __FUNCTION__,
glewGetErrorString(nGlewError));
return 1;
}
glGetError(); // to clear the error caused deep in GLEW
const double target_fps = 1.0 / (double)fps;
WindowPixmap window_pixmap;
if (!recreate_window_pixmap(dpy, src_window_id, window_pixmap)) {
fprintf(stderr, "Error: Failed to create glx pixmap for window: %lu\n",
src_window_id);
return 1;
Display *dpy = nullptr;
GLFWwindow *window = nullptr;
if(src_window_id) {
dpy = XOpenDisplay(nullptr);
if (!dpy) {
fprintf(stderr, "Error: Failed to open display\n");
return 1;
}
bool has_name_pixmap = x11_supports_composite_named_window_pixmap(dpy);
if (!has_name_pixmap) {
fprintf(stderr, "Error: XCompositeNameWindowPixmap is not supported by "
"your X11 server\n");
return 1;
}
XWindowAttributes attr;
if (!XGetWindowAttributes(dpy, src_window_id, &attr)) {
fprintf(stderr, "Error: Invalid window id: %lu\n", src_window_id);
return 1;
}
XCompositeRedirectWindow(dpy, src_window_id, CompositeRedirectAutomatic);
// glXMakeContextCurrent(Display *dpy, GLXDrawable draw, GLXDrawable read,
// GLXContext ctx)
if (!glfwInit()) {
fprintf(stderr, "Error: Failed to initialize glfw\n");
return 1;
}
glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 4);
glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 2);
glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE);
glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
glfwWindowHint(GLFW_RESIZABLE, GL_FALSE);
window = glfwCreateWindow(1, 1, "gpu-screen-recorder", nullptr, nullptr);
if (!window) {
fprintf(stderr, "Error: Failed to create glfw window\n");
glfwTerminate();
return 1;
}
glfwMakeContextCurrent(window);
glfwSwapInterval(0);
glfwHideWindow(window);
//#if defined(DEBUG)
XSetErrorHandler(x11_error_handler);
XSetIOErrorHandler(x11_io_error_handler);
//#endif
glewExperimental = GL_TRUE;
GLenum nGlewError = glewInit();
if (nGlewError != GLEW_OK) {
fprintf(stderr, "%s - Error initializing GLEW! %s\n", __FUNCTION__,
glewGetErrorString(nGlewError));
return 1;
}
glGetError(); // to clear the error caused deep in GLEW
if (!recreate_window_pixmap(dpy, src_window_id, window_pixmap)) {
fprintf(stderr, "Error: Failed to create glx pixmap for window: %lu\n",
src_window_id);
return 1;
}
} else {
window_pixmap.texture_id = 0;
window_pixmap.target_texture_id = 0;
window_pixmap.texture_width = window_width;
window_pixmap.texture_height = window_height;
if (!glfwInit()) {
fprintf(stderr, "Error: Failed to initialize glfw\n");
return 1;
}
}
// Video start
@ -780,8 +882,7 @@ int main(int argc, char **argv) {
AVCodec *video_codec;
AVStream *video_stream =
add_video_stream(av_format_context, &video_codec, quality,
window_pixmap, fps);
add_video_stream(av_format_context, &video_codec, quality, window_pixmap.texture_width, window_pixmap.texture_height, fps);
if (!video_stream) {
fprintf(stderr, "Error: Failed to create video stream\n");
return 1;
@ -795,11 +896,6 @@ int main(int argc, char **argv) {
return 1;
}
if (cuInit(0) < 0) {
fprintf(stderr, "Error: cuInit failed\n");
return {};
}
AVBufferRef *device_ctx;
CUgraphicsResource cuda_graphics_resource;
open_video(video_codec, video_stream, window_pixmap, &device_ctx,
@ -841,7 +937,8 @@ int main(int argc, char **argv) {
// av_frame_free(&rgb_frame);
// avcodec_close(av_codec_context);
XSelectInput(dpy, src_window_id, StructureNotifyMask);
if(dpy)
XSelectInput(dpy, src_window_id, StructureNotifyMask);
/*
int damage_event;
@ -857,20 +954,21 @@ int main(int argc, char **argv) {
int frame_count = 0;
CUresult res;
CUcontext old_ctx;
res = cuCtxPopCurrent(&old_ctx);
res = cuCtxPushCurrent(*cuda_context);
// Get texture
res = cuGraphicsResourceSetMapFlags(
cuda_graphics_resource, CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY);
res = cuGraphicsMapResources(1, &cuda_graphics_resource, 0);
// Map texture to cuda array
CUarray mapped_array;
res = cuGraphicsSubResourceGetMappedArray(&mapped_array,
cuda_graphics_resource, 0, 0);
if(src_window_id) {
res = cuCtxPopCurrent(&old_ctx);
res = cuCtxPushCurrent(*cuda_context);
// Get texture
res = cuGraphicsResourceSetMapFlags(
cuda_graphics_resource, CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY);
res = cuGraphicsMapResources(1, &cuda_graphics_resource, 0);
// Map texture to cuda array
res = cuGraphicsSubResourceGetMappedArray(&mapped_array,
cuda_graphics_resource, 0, 0);
}
// Release texture
// res = cuGraphicsUnmapResources(1, &cuda_graphics_resource, 0);
@ -896,11 +994,12 @@ int main(int argc, char **argv) {
exit(1);
}
XWindowAttributes xwa;
XGetWindowAttributes(dpy, src_window_id, &xwa);
int window_width = xwa.width;
int window_height = xwa.height;
if(dpy) {
XWindowAttributes xwa;
XGetWindowAttributes(dpy, src_window_id, &xwa);
window_width = xwa.width;
window_height = xwa.height;
}
int original_window_width = window_width;
int original_window_height = window_height;
@ -999,67 +1098,70 @@ int main(int argc, char **argv) {
while (running) {
double frame_start = glfwGetTime();
glfwPollEvents();
glClear(GL_COLOR_BUFFER_BIT);
if (XCheckTypedWindowEvent(dpy, src_window_id, ConfigureNotify, &e) && e.xconfigure.window == src_window_id) {
// Window resize
if(e.xconfigure.width != window_width || e.xconfigure.height != window_height) {
window_width = e.xconfigure.width;
window_height = e.xconfigure.height;
window_resize_timer = glfwGetTime();
window_resized = true;
}
}
if(window)
glClear(GL_COLOR_BUFFER_BIT);
redraw = true;
const double window_resize_timeout = 1.0; // 1 second
if(window_resized && glfwGetTime() - window_resize_timer >= window_resize_timeout) {
window_resized = false;
fprintf(stderr, "Resize window!\n");
recreate_window_pixmap(dpy, src_window_id, window_pixmap);
// Resolution must be a multiple of two
//video_stream->codec->width = window_pixmap.texture_width & ~1;
//video_stream->codec->height = window_pixmap.texture_height & ~1;
cuGraphicsUnregisterResource(cuda_graphics_resource);
res = cuGraphicsGLRegisterImage(
&cuda_graphics_resource, window_pixmap.target_texture_id, GL_TEXTURE_2D,
CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY);
if (res != CUDA_SUCCESS) {
const char *err_str;
cuGetErrorString(res, &err_str);
fprintf(stderr,
"Error: cuGraphicsGLRegisterImage failed, error %s, texture "
"id: %u\n",
err_str, window_pixmap.target_texture_id);
running = false;
break;
if(src_window_id) {
if (XCheckTypedWindowEvent(dpy, src_window_id, ConfigureNotify, &e) && e.xconfigure.window == src_window_id) {
// Window resize
if(e.xconfigure.width != window_width || e.xconfigure.height != window_height) {
window_width = e.xconfigure.width;
window_height = e.xconfigure.height;
window_resize_timer = glfwGetTime();
window_resized = true;
}
}
res = cuGraphicsResourceSetMapFlags(
cuda_graphics_resource, CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY);
res = cuGraphicsMapResources(1, &cuda_graphics_resource, 0);
res = cuGraphicsSubResourceGetMappedArray(&mapped_array, cuda_graphics_resource, 0, 0);
const double window_resize_timeout = 1.0; // 1 second
if(window_resized && glfwGetTime() - window_resize_timer >= window_resize_timeout) {
window_resized = false;
fprintf(stderr, "Resize window!\n");
recreate_window_pixmap(dpy, src_window_id, window_pixmap);
// Resolution must be a multiple of two
//video_stream->codec->width = window_pixmap.texture_width & ~1;
//video_stream->codec->height = window_pixmap.texture_height & ~1;
av_frame_unref(frame);
if (av_hwframe_get_buffer(video_stream->codec->hw_frames_ctx, frame, 0) < 0) {
fprintf(stderr, "Error: av_hwframe_get_buffer failed\n");
running = false;
break;
cuGraphicsUnregisterResource(cuda_graphics_resource);
res = cuGraphicsGLRegisterImage(
&cuda_graphics_resource, window_pixmap.target_texture_id, GL_TEXTURE_2D,
CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY);
if (res != CUDA_SUCCESS) {
const char *err_str;
cuGetErrorString(res, &err_str);
fprintf(stderr,
"Error: cuGraphicsGLRegisterImage failed, error %s, texture "
"id: %u\n",
err_str, window_pixmap.target_texture_id);
running = false;
break;
}
res = cuGraphicsResourceSetMapFlags(
cuda_graphics_resource, CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY);
res = cuGraphicsMapResources(1, &cuda_graphics_resource, 0);
res = cuGraphicsSubResourceGetMappedArray(&mapped_array, cuda_graphics_resource, 0, 0);
av_frame_unref(frame);
if (av_hwframe_get_buffer(video_stream->codec->hw_frames_ctx, frame, 0) < 0) {
fprintf(stderr, "Error: av_hwframe_get_buffer failed\n");
running = false;
break;
}
frame->pts = frame_count;
if(window_width < original_window_width)
frame->width = window_pixmap.texture_width & ~1;
else
frame->width = original_window_width;
if(window_height < original_window_height)
frame->height = window_pixmap.texture_height & ~1;
else
frame->height = original_window_height;
}
frame->pts = frame_count;
if(window_width < original_window_width)
frame->width = window_pixmap.texture_width & ~1;
else
frame->width = original_window_width;
if(window_height < original_window_height)
frame->height = window_pixmap.texture_height & ~1;
else
frame->height = original_window_height;
}
++fps_counter;
@ -1078,33 +1180,64 @@ int main(int argc, char **argv) {
if (frame_time_overflow >= 0.0) {
frame_timer_start = time_now - frame_time_overflow;
bool frame_captured = true;
if(redraw) {
redraw = false;
// TODO: Use a framebuffer instead. glCopyImageSubData requires
// opengl 4.2
glCopyImageSubData(
window_pixmap.texture_id, GL_TEXTURE_2D, 0, 0, 0, 0,
window_pixmap.target_texture_id, GL_TEXTURE_2D, 0, 0, 0, 0,
window_pixmap.texture_width, window_pixmap.texture_height, 1);
glfwSwapBuffers(window);
// int err = glGetError();
// fprintf(stderr, "error: %d\n", err);
if(src_window_id) {
// TODO: Use a framebuffer instead. glCopyImageSubData requires
// opengl 4.2
glCopyImageSubData(
window_pixmap.texture_id, GL_TEXTURE_2D, 0, 0, 0, 0,
window_pixmap.target_texture_id, GL_TEXTURE_2D, 0, 0, 0, 0,
window_pixmap.texture_width, window_pixmap.texture_height, 1);
glfwSwapBuffers(window);
// int err = glGetError();
// fprintf(stderr, "error: %d\n", err);
CUDA_MEMCPY2D memcpy_struct;
memcpy_struct.srcXInBytes = 0;
memcpy_struct.srcY = 0;
memcpy_struct.srcMemoryType = CUmemorytype::CU_MEMORYTYPE_ARRAY;
CUDA_MEMCPY2D memcpy_struct;
memcpy_struct.srcXInBytes = 0;
memcpy_struct.srcY = 0;
memcpy_struct.srcMemoryType = CUmemorytype::CU_MEMORYTYPE_ARRAY;
memcpy_struct.dstXInBytes = 0;
memcpy_struct.dstY = 0;
memcpy_struct.dstMemoryType = CUmemorytype::CU_MEMORYTYPE_DEVICE;
memcpy_struct.dstXInBytes = 0;
memcpy_struct.dstY = 0;
memcpy_struct.dstMemoryType = CUmemorytype::CU_MEMORYTYPE_DEVICE;
memcpy_struct.srcArray = mapped_array;
memcpy_struct.dstDevice = (CUdeviceptr)frame->data[0];
memcpy_struct.dstPitch = frame->linesize[0];
memcpy_struct.WidthInBytes = frame->width * 4;
memcpy_struct.Height = frame->height;
cuMemcpy2D(&memcpy_struct);
memcpy_struct.srcArray = mapped_array;
memcpy_struct.dstDevice = (CUdeviceptr)frame->data[0];
memcpy_struct.dstPitch = frame->linesize[0];
memcpy_struct.WidthInBytes = frame->width * 4;
memcpy_struct.Height = frame->height;
cuMemcpy2D(&memcpy_struct);
frame_captured = true;
} else {
uint32_t byte_size;
CUdeviceptr src_cu_device_ptr;
frame_captured = nv_fbc_library.capture(&src_cu_device_ptr, &byte_size);
if(frame_captured) {
// TODO: Is it possible to bypass this copy?
/*
CUDA_MEMCPY2D memcpy_struct;
memcpy_struct.srcXInBytes = 0;
memcpy_struct.srcY = 0;
memcpy_struct.srcMemoryType = CUmemorytype::CU_MEMORYTYPE_DEVICE;
memcpy_struct.dstXInBytes = 0;
memcpy_struct.dstY = 0;
memcpy_struct.dstMemoryType = CUmemorytype::CU_MEMORYTYPE_DEVICE;
memcpy_struct.srcDevice = src_cu_device_ptr;
memcpy_struct.dstDevice = (CUdeviceptr)frame->data[0];
memcpy_struct.dstPitch = frame->linesize[0];
memcpy_struct.WidthInBytes = frame->width * 4;
memcpy_struct.Height = frame->height;
cuMemcpy2D(&memcpy_struct);
*/
cuMemcpyDtoD((CUdeviceptr)frame->data[0], src_cu_device_ptr, byte_size);
//frame->data[0] = (uint8_t*)src_cu_device_ptr;
}
}
// res = cuCtxPopCurrent(&old_ctx);
}
@ -1196,6 +1329,8 @@ int main(int argc, char **argv) {
// avformat_free_context(av_format_context);
// cleanup_window_pixmap(dpy, window_pixmap);
XCompositeUnredirectWindow(dpy, src_window_id, CompositeRedirectAutomatic);
XCloseDisplay(dpy);
if(dpy) {
XCompositeUnredirectWindow(dpy, src_window_id, CompositeRedirectAutomatic);
XCloseDisplay(dpy);
}
}