Remove unecessary cuda memcpy when using nvfbc
This commit is contained in:
parent
c43fa5e4ee
commit
6a01677e23
3
.gitignore
vendored
3
.gitignore
vendored
@ -7,6 +7,5 @@ tests/compile_commands.json
|
|||||||
.clangd/
|
.clangd/
|
||||||
.cache/
|
.cache/
|
||||||
|
|
||||||
main.o
|
*.o
|
||||||
sound.o
|
|
||||||
gpu-screen-recorder
|
gpu-screen-recorder
|
||||||
|
6
TODO
6
TODO
@ -1,11 +1,7 @@
|
|||||||
Check for reparent.
|
Check for reparent.
|
||||||
Only add window to list if its the window is a topmost window.
|
Only add window to list if its the window is a topmost window.
|
||||||
Use nvEncoder api directly? maybe with this we could copy the window opengl texture directly to the gpu which doesn't work right now for some reason.
|
|
||||||
Right now we are required to copy the opengl texture to another opengl texture first.
|
|
||||||
nvEncRegisterResource allows registering an opengl texture directly with NV_ENC_INPUT_RESOURCE_OPENGL_TEX and using that directly in the encoding.
|
|
||||||
Load cuda at runtime with dlopen.
|
Load cuda at runtime with dlopen.
|
||||||
Track window damages and only update then. That is better for output file size.
|
Track window damages and only update then. That is better for output file size.
|
||||||
Remove cuda to cuda copy when using nvFBC if possible. ffmpeg is getting in the way.
|
|
||||||
Getting the texture of a window when using a compositor is an nvidia specific limitation. When gpu-screen-recorder supports other gpus then this can be ignored.
|
Getting the texture of a window when using a compositor is an nvidia specific limitation. When gpu-screen-recorder supports other gpus then this can be ignored.
|
||||||
Remove dependency on glfw (and glew?).
|
Remove dependency on glfw (and glew?).
|
||||||
Quickly changing workspace and back while recording under i3 breaks the screen recorder. The resize is triggered and it fails to recreate texture (fail to get texture size, texture id probably == 0).
|
Quickly changing workspace and back while recording under i3 breaks the screen recorder. i3 probably unmaps windows in other workspaces.
|
||||||
|
101
src/main.cpp
101
src/main.cpp
@ -310,23 +310,6 @@ static bool recreate_window_pixmap(Display *dpy, Window window_id,
|
|||||||
return pixmap.texture_id != 0 && pixmap.target_texture_id != 0;
|
return pixmap.texture_id != 0 && pixmap.target_texture_id != 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::string> get_hardware_acceleration_device_names() {
|
|
||||||
int iGpu = 0;
|
|
||||||
int nGpu = 0;
|
|
||||||
cuDeviceGetCount(&nGpu);
|
|
||||||
if (iGpu < 0 || iGpu >= nGpu) {
|
|
||||||
fprintf(stderr, "Error: failed...\n");
|
|
||||||
return {};
|
|
||||||
}
|
|
||||||
|
|
||||||
CUdevice cuDevice = 0;
|
|
||||||
cuDeviceGet(&cuDevice, iGpu);
|
|
||||||
char deviceName[80];
|
|
||||||
cuDeviceGetName(deviceName, sizeof(deviceName), cuDevice);
|
|
||||||
fprintf(stderr, "device name: %s\n", deviceName);
|
|
||||||
return {deviceName};
|
|
||||||
}
|
|
||||||
|
|
||||||
// |stream| is only required for non-replay mode
|
// |stream| is only required for non-replay mode
|
||||||
static void receive_frames(AVCodecContext *av_codec_context, int stream_index, AVStream *stream, AVFrame *frame,
|
static void receive_frames(AVCodecContext *av_codec_context, int stream_index, AVStream *stream, AVFrame *frame,
|
||||||
AVFormatContext *av_format_context,
|
AVFormatContext *av_format_context,
|
||||||
@ -438,7 +421,6 @@ static AVCodecContext *create_video_codec_context(AVFormatContext *av_format_con
|
|||||||
|
|
||||||
assert(codec->type == AVMEDIA_TYPE_VIDEO);
|
assert(codec->type == AVMEDIA_TYPE_VIDEO);
|
||||||
codec_context->codec_id = codec->id;
|
codec_context->codec_id = codec->id;
|
||||||
fprintf(stderr, "codec id: %d\n", codec->id);
|
|
||||||
codec_context->width = record_width & ~1;
|
codec_context->width = record_width & ~1;
|
||||||
codec_context->height = record_height & ~1;
|
codec_context->height = record_height & ~1;
|
||||||
codec_context->bit_rate = 7500000 + (codec_context->width * codec_context->height) / 2;
|
codec_context->bit_rate = 7500000 + (codec_context->width * codec_context->height) / 2;
|
||||||
@ -464,6 +446,7 @@ static AVCodecContext *create_video_codec_context(AVFormatContext *av_format_con
|
|||||||
//av_opt_set(codec_context->priv_data, "preset", "slow", 0);
|
//av_opt_set(codec_context->priv_data, "preset", "slow", 0);
|
||||||
//av_opt_set(codec_context->priv_data, "profile", "high", 0);
|
//av_opt_set(codec_context->priv_data, "profile", "high", 0);
|
||||||
//codec_context->profile = FF_PROFILE_H264_HIGH;
|
//codec_context->profile = FF_PROFILE_H264_HIGH;
|
||||||
|
av_opt_set(codec_context->priv_data, "preset", "p4", 0);
|
||||||
break;
|
break;
|
||||||
case VideoQuality::HIGH:
|
case VideoQuality::HIGH:
|
||||||
codec_context->qmin = 12;
|
codec_context->qmin = 12;
|
||||||
@ -471,6 +454,7 @@ static AVCodecContext *create_video_codec_context(AVFormatContext *av_format_con
|
|||||||
//av_opt_set(codec_context->priv_data, "preset", "slow", 0);
|
//av_opt_set(codec_context->priv_data, "preset", "slow", 0);
|
||||||
//av_opt_set(codec_context->priv_data, "profile", "high", 0);
|
//av_opt_set(codec_context->priv_data, "profile", "high", 0);
|
||||||
//codec_context->profile = FF_PROFILE_H264_HIGH;
|
//codec_context->profile = FF_PROFILE_H264_HIGH;
|
||||||
|
av_opt_set(codec_context->priv_data, "preset", "p6", 0);
|
||||||
break;
|
break;
|
||||||
case VideoQuality::ULTRA:
|
case VideoQuality::ULTRA:
|
||||||
codec_context->bit_rate = 10000000 + (codec_context->width * codec_context->height) / 2;
|
codec_context->bit_rate = 10000000 + (codec_context->width * codec_context->height) / 2;
|
||||||
@ -479,6 +463,7 @@ static AVCodecContext *create_video_codec_context(AVFormatContext *av_format_con
|
|||||||
//av_opt_set(codec_context->priv_data, "preset", "veryslow", 0);
|
//av_opt_set(codec_context->priv_data, "preset", "veryslow", 0);
|
||||||
//av_opt_set(codec_context->priv_data, "profile", "high", 0);
|
//av_opt_set(codec_context->priv_data, "profile", "high", 0);
|
||||||
//codec_context->profile = FF_PROFILE_H264_HIGH;
|
//codec_context->profile = FF_PROFILE_H264_HIGH;
|
||||||
|
av_opt_set(codec_context->priv_data, "preset", "p7", 0);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (codec_context->codec_id == AV_CODEC_ID_MPEG1VIDEO)
|
if (codec_context->codec_id == AV_CODEC_ID_MPEG1VIDEO)
|
||||||
@ -486,6 +471,7 @@ static AVCodecContext *create_video_codec_context(AVFormatContext *av_format_con
|
|||||||
|
|
||||||
// stream->time_base = codec_context->time_base;
|
// stream->time_base = codec_context->time_base;
|
||||||
// codec_context->ticks_per_frame = 30;
|
// codec_context->ticks_per_frame = 30;
|
||||||
|
av_opt_set(codec_context->priv_data, "tune", "hq", 0);
|
||||||
|
|
||||||
// Some formats want stream headers to be seperate
|
// Some formats want stream headers to be seperate
|
||||||
if (av_format_context->oformat->flags & AVFMT_GLOBALHEADER)
|
if (av_format_context->oformat->flags & AVFMT_GLOBALHEADER)
|
||||||
@ -524,24 +510,20 @@ static AVFrame* open_audio(AVCodecContext *audio_codec_context) {
|
|||||||
|
|
||||||
static void open_video(AVCodecContext *codec_context,
|
static void open_video(AVCodecContext *codec_context,
|
||||||
WindowPixmap &window_pixmap, AVBufferRef **device_ctx,
|
WindowPixmap &window_pixmap, AVBufferRef **device_ctx,
|
||||||
CUgraphicsResource *cuda_graphics_resource) {
|
CUgraphicsResource *cuda_graphics_resource, CUcontext cuda_context) {
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
std::vector<std::string> hardware_accelerated_devices =
|
*device_ctx = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_CUDA);
|
||||||
get_hardware_acceleration_device_names();
|
if(!*device_ctx) {
|
||||||
if (hardware_accelerated_devices.empty()) {
|
fprintf(stderr, "Error: Failed to create hardware device context\n");
|
||||||
fprintf(
|
|
||||||
stderr,
|
|
||||||
"Error: No hardware accelerated device was found on your system\n");
|
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (av_hwdevice_ctx_create(device_ctx, AV_HWDEVICE_TYPE_CUDA,
|
AVHWDeviceContext *hw_device_context = (AVHWDeviceContext *)(*device_ctx)->data;
|
||||||
hardware_accelerated_devices[0].c_str(), NULL,
|
AVCUDADeviceContext *cuda_device_context = (AVCUDADeviceContext *)hw_device_context->hwctx;
|
||||||
0) < 0) {
|
cuda_device_context->cuda_ctx = cuda_context;
|
||||||
fprintf(stderr,
|
if(av_hwdevice_ctx_init(*device_ctx) < 0) {
|
||||||
"Error: Failed to create hardware device context for gpu: %s\n",
|
fprintf(stderr, "Error: Failed to create hardware device context\n");
|
||||||
hardware_accelerated_devices[0].c_str());
|
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -576,21 +558,11 @@ static void open_video(AVCodecContext *codec_context,
|
|||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
AVHWDeviceContext *hw_device_context =
|
|
||||||
(AVHWDeviceContext *)(*device_ctx)->data;
|
|
||||||
AVCUDADeviceContext *cuda_device_context =
|
|
||||||
(AVCUDADeviceContext *)hw_device_context->hwctx;
|
|
||||||
CUcontext *cuda_context = &(cuda_device_context->cuda_ctx);
|
|
||||||
if (!cuda_context) {
|
|
||||||
fprintf(stderr, "Error: No cuda context\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
if(window_pixmap.target_texture_id != 0) {
|
if(window_pixmap.target_texture_id != 0) {
|
||||||
CUresult res;
|
CUresult res;
|
||||||
CUcontext old_ctx;
|
CUcontext old_ctx;
|
||||||
res = cuCtxPopCurrent(&old_ctx);
|
res = cuCtxPopCurrent(&old_ctx);
|
||||||
res = cuCtxPushCurrent(*cuda_context);
|
res = cuCtxPushCurrent(cuda_context);
|
||||||
res = cuGraphicsGLRegisterImage(
|
res = cuGraphicsGLRegisterImage(
|
||||||
cuda_graphics_resource, window_pixmap.target_texture_id, GL_TEXTURE_2D,
|
cuda_graphics_resource, window_pixmap.target_texture_id, GL_TEXTURE_2D,
|
||||||
CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY);
|
CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY);
|
||||||
@ -930,21 +902,34 @@ int main(int argc, char **argv) {
|
|||||||
|
|
||||||
res = cuInit(0);
|
res = cuInit(0);
|
||||||
if(res != CUDA_SUCCESS) {
|
if(res != CUDA_SUCCESS) {
|
||||||
fprintf(stderr, "Error: cuInit failed (result: %d)\n", res);
|
const char *err_str;
|
||||||
return {};
|
cuGetErrorString(res, &err_str);
|
||||||
|
fprintf(stderr, "Error: cuInit failed, error %s (result: %d)\n", err_str, res);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int nGpu = 0;
|
||||||
|
cuDeviceGetCount(&nGpu);
|
||||||
|
if (nGpu <= 0) {
|
||||||
|
fprintf(stderr, "Error: no cuda supported devices found\n");
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
CUdevice cu_dev;
|
CUdevice cu_dev;
|
||||||
res = cuDeviceGet(&cu_dev, 0);
|
res = cuDeviceGet(&cu_dev, 0);
|
||||||
if(res != CUDA_SUCCESS) {
|
if(res != CUDA_SUCCESS) {
|
||||||
fprintf(stderr, "Unable to get CUDA device (result: %d)\n", res);
|
const char *err_str;
|
||||||
|
cuGetErrorString(res, &err_str);
|
||||||
|
fprintf(stderr, "Error: unable to get CUDA device, error: %s (result: %d)\n", err_str, res);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
CUcontext cu_ctx;
|
CUcontext cu_ctx;
|
||||||
res = cuCtxCreate_v2(&cu_ctx, CU_CTX_SCHED_AUTO, cu_dev);
|
res = cuCtxCreate_v2(&cu_ctx, CU_CTX_SCHED_AUTO, cu_dev);
|
||||||
if(res != CUDA_SUCCESS) {
|
if(res != CUDA_SUCCESS) {
|
||||||
fprintf(stderr, "Unable to create CUDA context (result: %d)\n", res);
|
const char *err_str;
|
||||||
|
cuGetErrorString(res, &err_str);
|
||||||
|
fprintf(stderr, "Error: unable to create CUDA context, error: %s (result: %d)\n", err_str, res);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1124,7 +1109,7 @@ int main(int argc, char **argv) {
|
|||||||
|
|
||||||
AVBufferRef *device_ctx;
|
AVBufferRef *device_ctx;
|
||||||
CUgraphicsResource cuda_graphics_resource;
|
CUgraphicsResource cuda_graphics_resource;
|
||||||
open_video(video_codec_context, window_pixmap, &device_ctx, &cuda_graphics_resource);
|
open_video(video_codec_context, window_pixmap, &device_ctx, &cuda_graphics_resource, cu_ctx);
|
||||||
if(video_stream)
|
if(video_stream)
|
||||||
avcodec_parameters_from_context(video_stream->codecpar, video_codec_context);
|
avcodec_parameters_from_context(video_stream->codecpar, video_codec_context);
|
||||||
|
|
||||||
@ -1161,16 +1146,6 @@ int main(int argc, char **argv) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
AVHWDeviceContext *hw_device_context =
|
|
||||||
(AVHWDeviceContext *)device_ctx->data;
|
|
||||||
AVCUDADeviceContext *cuda_device_context =
|
|
||||||
(AVCUDADeviceContext *)hw_device_context->hwctx;
|
|
||||||
CUcontext *cuda_context = &(cuda_device_context->cuda_ctx);
|
|
||||||
if (!cuda_context) {
|
|
||||||
fprintf(stderr, "Error: No cuda context\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
// av_frame_free(&rgb_frame);
|
// av_frame_free(&rgb_frame);
|
||||||
// avcodec_close(av_codec_context);
|
// avcodec_close(av_codec_context);
|
||||||
|
|
||||||
@ -1195,7 +1170,7 @@ int main(int argc, char **argv) {
|
|||||||
CUarray mapped_array;
|
CUarray mapped_array;
|
||||||
if(src_window_id) {
|
if(src_window_id) {
|
||||||
res = cuCtxPopCurrent(&old_ctx);
|
res = cuCtxPopCurrent(&old_ctx);
|
||||||
res = cuCtxPushCurrent(*cuda_context);
|
res = cuCtxPushCurrent(cu_ctx);
|
||||||
|
|
||||||
// Get texture
|
// Get texture
|
||||||
res = cuGraphicsResourceSetMapFlags(
|
res = cuGraphicsResourceSetMapFlags(
|
||||||
@ -1431,6 +1406,8 @@ int main(int argc, char **argv) {
|
|||||||
// int err = glGetError();
|
// int err = glGetError();
|
||||||
// fprintf(stderr, "error: %d\n", err);
|
// fprintf(stderr, "error: %d\n", err);
|
||||||
|
|
||||||
|
// TODO: Remove this copy, which is only possible by using nvenc directly and encoding window_pixmap.target_texture_id
|
||||||
|
|
||||||
CUDA_MEMCPY2D memcpy_struct;
|
CUDA_MEMCPY2D memcpy_struct;
|
||||||
memcpy_struct.srcXInBytes = 0;
|
memcpy_struct.srcXInBytes = 0;
|
||||||
memcpy_struct.srcY = 0;
|
memcpy_struct.srcY = 0;
|
||||||
@ -1449,11 +1426,11 @@ int main(int argc, char **argv) {
|
|||||||
|
|
||||||
frame_captured = true;
|
frame_captured = true;
|
||||||
} else {
|
} else {
|
||||||
uint32_t byte_size;
|
// TODO: Check when src_cu_device_ptr changes and re-register resource
|
||||||
CUdeviceptr src_cu_device_ptr;
|
uint32_t byte_size = 0;
|
||||||
|
CUdeviceptr src_cu_device_ptr = 0;
|
||||||
frame_captured = nv_fbc_library.capture(&src_cu_device_ptr, &byte_size);
|
frame_captured = nv_fbc_library.capture(&src_cu_device_ptr, &byte_size);
|
||||||
if(frame_captured)
|
frame->data[0] = (uint8_t*)src_cu_device_ptr;
|
||||||
cuMemcpyDtoD((CUdeviceptr)frame->data[0], src_cu_device_ptr, byte_size);
|
|
||||||
}
|
}
|
||||||
// res = cuCtxPopCurrent(&old_ctx);
|
// res = cuCtxPopCurrent(&old_ctx);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user