diff --git a/include/color_conversion.h b/include/color_conversion.h index def710d..738cba5 100644 --- a/include/color_conversion.h +++ b/include/color_conversion.h @@ -9,7 +9,7 @@ typedef enum { } gsr_source_color; typedef enum { - GSR_DESTINATION_COLOR_RGB, + GSR_DESTINATION_COLOR_BGR, GSR_DESTINATION_COLOR_NV12 /* YUV420, BT709, limited */ } gsr_destination_color; diff --git a/src/capture/kms_cuda.c b/src/capture/kms_cuda.c index e7f7ea5..d93d603 100644 --- a/src/capture/kms_cuda.c +++ b/src/capture/kms_cuda.c @@ -1,6 +1,7 @@ #include "../../include/capture/kms_cuda.h" #include "../../kms/client/kms_client.h" #include "../../include/utils.h" +#include "../../include/color_conversion.h" #include "../../include/cuda.h" #include #include @@ -11,6 +12,13 @@ #include #include +/* + TODO: Use dummy pool for cuda buffer so we can create our own cuda buffers from pixel buffer objects + and copy the input textures to the pixel buffer objects. Use sw_format NV12 as well. Then this is + similar to kms_vaapi. This allows us to remove one extra texture and texture copy. +*/ +/* TODO: Support cursor plane capture when nvidia supports cursor plane */ + #define MAX_CONNECTOR_IDS 32 typedef struct { @@ -39,6 +47,10 @@ typedef struct { CUgraphicsResource cuda_graphics_resource; CUarray mapped_array; + + unsigned int input_texture; + unsigned int target_texture; + gsr_color_conversion color_conversion; } gsr_capture_kms_cuda; static int max_int(int a, int b) { @@ -183,15 +195,51 @@ static int gsr_capture_kms_cuda_start(gsr_capture *cap, AVCodecContext *video_co return 0; } -static uint32_t fourcc(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { - return (d << 24) | (c << 16) | (b << 8) | a; +static unsigned int gl_create_texture(gsr_capture_kms_cuda *cap_kms, int width, int height) { + unsigned int texture_id = 0; + cap_kms->params.egl->glGenTextures(1, &texture_id); + cap_kms->params.egl->glBindTexture(GL_TEXTURE_2D, texture_id); + cap_kms->params.egl->glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, width, height, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL); + + cap_kms->params.egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + cap_kms->params.egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + cap_kms->params.egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + cap_kms->params.egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + + cap_kms->params.egl->glBindTexture(GL_TEXTURE_2D, 0); + return texture_id; +} + +static bool cuda_register_opengl_texture(gsr_capture_kms_cuda *cap_kms) { + CUresult res; + CUcontext old_ctx; + res = cap_kms->cuda.cuCtxPushCurrent_v2(cap_kms->cuda.cu_ctx); + // TODO: Use cuGraphicsEGLRegisterImage instead with the window egl image (dont use window_texture). + // That removes the need for an extra texture and texture copy + res = cap_kms->cuda.cuGraphicsGLRegisterImage( + &cap_kms->cuda_graphics_resource, cap_kms->target_texture, GL_TEXTURE_2D, + CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY); + if (res != CUDA_SUCCESS) { + const char *err_str = "unknown"; + cap_kms->cuda.cuGetErrorString(res, &err_str); + fprintf(stderr, "gsr error: cuda_register_opengl_texture: cuGraphicsGLRegisterImage failed, error: %s, texture " "id: %u\n", err_str, cap_kms->target_texture); + res = cap_kms->cuda.cuCtxPopCurrent_v2(&old_ctx); + return false; + } + + res = cap_kms->cuda.cuGraphicsResourceSetMapFlags(cap_kms->cuda_graphics_resource, CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY); + res = cap_kms->cuda.cuGraphicsMapResources(1, &cap_kms->cuda_graphics_resource, 0); + + res = cap_kms->cuda.cuGraphicsSubResourceGetMappedArray(&cap_kms->mapped_array, cap_kms->cuda_graphics_resource, 0, 0); + res = cap_kms->cuda.cuCtxPopCurrent_v2(&old_ctx); + return true; } static void gsr_capture_kms_cuda_tick(gsr_capture *cap, AVCodecContext *video_codec_context, AVFrame **frame) { gsr_capture_kms_cuda *cap_kms = cap->priv; // TODO: - //cap_kms->params.egl->glClear(GL_COLOR_BUFFER_BIT); + cap_kms->params.egl->glClear(GL_COLOR_BUFFER_BIT); if(!cap_kms->created_hw_frame) { cap_kms->created_hw_frame = true; @@ -219,6 +267,43 @@ static void gsr_capture_kms_cuda_tick(gsr_capture *cap, AVCodecContext *video_co cap_kms->stop_is_error = true; return; } + + cap_kms->params.egl->glGenTextures(1, &cap_kms->input_texture); + cap_kms->params.egl->glBindTexture(GL_TEXTURE_2D, cap_kms->input_texture); + cap_kms->params.egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + cap_kms->params.egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + cap_kms->params.egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + cap_kms->params.egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + cap_kms->params.egl->glBindTexture(GL_TEXTURE_2D, 0); + + cap_kms->target_texture = gl_create_texture(cap_kms, video_codec_context->width, video_codec_context->height); + if(cap_kms->target_texture == 0) { + fprintf(stderr, "gsr error: gsr_capture_kms_cuda_tick: failed to create opengl texture\n"); + cap_kms->should_stop = true; + cap_kms->stop_is_error = true; + return; + } + + if(!cuda_register_opengl_texture(cap_kms)) { + cap_kms->should_stop = true; + cap_kms->stop_is_error = true; + return; + } + + gsr_color_conversion_params color_conversion_params = {0}; + color_conversion_params.egl = cap_kms->params.egl; + color_conversion_params.source_color = GSR_SOURCE_COLOR_RGB; + color_conversion_params.destination_color = GSR_DESTINATION_COLOR_BGR; + + color_conversion_params.destination_textures[0] = cap_kms->target_texture; + color_conversion_params.num_destination_textures = 1; + + if(gsr_color_conversion_init(&cap_kms->color_conversion, &color_conversion_params) != 0) { + fprintf(stderr, "gsr error: gsr_capture_kms_cuda_tick: failed to create color conversion\n"); + cap_kms->should_stop = true; + cap_kms->stop_is_error = true; + return; + } } } @@ -277,25 +362,6 @@ static gsr_kms_response_fd* find_largest_drm(gsr_kms_response *kms_response) { return largest_drm; } -static bool gsr_capture_kms_register_egl_image_in_cuda(gsr_capture_kms_cuda *cap_kms, EGLImage image) { - CUcontext old_ctx; - CUresult res = cap_kms->cuda.cuCtxPushCurrent_v2(cap_kms->cuda.cu_ctx); - res = cap_kms->cuda.cuGraphicsEGLRegisterImage(&cap_kms->cuda_graphics_resource, image, CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY); - if(res != CUDA_SUCCESS) { - const char *err_str = "unknown"; - cap_kms->cuda.cuGetErrorString(res, &err_str); - fprintf(stderr, "gsr error: cuda_register_egl_image: cuGraphicsEGLRegisterImage failed, error: %s (%d), egl image %p\n", - err_str, res, image); - res = cap_kms->cuda.cuCtxPopCurrent_v2(&old_ctx); - return false; - } - - res = cap_kms->cuda.cuGraphicsResourceSetMapFlags(cap_kms->cuda_graphics_resource, CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY); - res = cap_kms->cuda.cuGraphicsSubResourceGetMappedArray(&cap_kms->mapped_array, cap_kms->cuda_graphics_resource, 0, 0); - res = cap_kms->cuda.cuCtxPopCurrent_v2(&old_ctx); - return true; -} - static void gsr_capture_kms_unload_cuda_graphics(gsr_capture_kms_cuda *cap_kms) { if(cap_kms->cuda.cu_ctx) { CUcontext old_ctx; @@ -311,6 +377,14 @@ static void gsr_capture_kms_unload_cuda_graphics(gsr_capture_kms_cuda *cap_kms) } } +static gsr_kms_response_fd* find_cursor_drm(gsr_kms_response *kms_response) { + for(int i = 0; i < kms_response->num_fds; ++i) { + if(kms_response->fds[i].is_cursor) + return &kms_response->fds[i]; + } + return NULL; +} + static int gsr_capture_kms_cuda_capture(gsr_capture *cap, AVFrame *frame) { (void)frame; gsr_capture_kms_cuda *cap_kms = cap->priv; @@ -323,6 +397,8 @@ static int gsr_capture_kms_cuda_capture(gsr_capture *cap, AVFrame *frame) { cap_kms->kms_response.num_fds = 0; gsr_kms_response_fd *drm_fd = NULL; + gsr_kms_response_fd *cursor_drm_fd = NULL; + bool capture_is_combined_plane = false; if(cap_kms->using_wayland_capture) { gsr_egl_update(cap_kms->params.egl); cap_kms->wayland_kms_data.fd = cap_kms->params.egl->fd; @@ -340,13 +416,16 @@ static int gsr_capture_kms_cuda_capture(gsr_capture *cap, AVFrame *frame) { cap_kms->wayland_kms_data.src_w = cap_kms->wayland_kms_data.width; cap_kms->wayland_kms_data.src_h = cap_kms->wayland_kms_data.height; + cap_kms->capture_pos.x = cap_kms->wayland_kms_data.x; + cap_kms->capture_pos.y = cap_kms->wayland_kms_data.y; + if(cap_kms->wayland_kms_data.fd <= 0) return -1; drm_fd = &cap_kms->wayland_kms_data; } else { if(gsr_kms_client_get_kms(&cap_kms->kms_client, &cap_kms->kms_response) != 0) { - fprintf(stderr, "gsr error: gsr_capture_kms_cuda_capture: failed to get kms, error: %d (%s)\n", cap_kms->kms_response.result, cap_kms->kms_response.err_msg); + fprintf(stderr, "gsr error: gsr_capture_kms_vaapi_capture: failed to get kms, error: %d (%s)\n", cap_kms->kms_response.result, cap_kms->kms_response.err_msg); return -1; } @@ -365,22 +444,26 @@ static int gsr_capture_kms_cuda_capture(gsr_capture *cap, AVFrame *frame) { break; } + // Will never happen on wayland unless the target monitor has been disconnected if(!drm_fd) { drm_fd = find_first_combined_drm(&cap_kms->kms_response); if(!drm_fd) drm_fd = find_largest_drm(&cap_kms->kms_response); + capture_is_combined_plane = true; } - } - // TODO: Use capture pos and capture size. Right now they are not used here and doesn't really need to be used on wayland - // and kms_cuda is only used on wayland right now so maybe it can be ignored. + cursor_drm_fd = find_cursor_drm(&cap_kms->kms_response); + } if(!drm_fd) return -1; + if(!capture_is_combined_plane && cursor_drm_fd && cursor_drm_fd->connector_id != drm_fd->connector_id) + cursor_drm_fd = NULL; + const intptr_t img_attr[] = { //EGL_IMAGE_PRESERVED_KHR, EGL_TRUE, - EGL_LINUX_DRM_FOURCC_EXT, fourcc('A', 'R', '2', '4'),//cap_kms->params.egl->pixel_format, ARGB8888 + EGL_LINUX_DRM_FOURCC_EXT, drm_fd->pixel_format,//cap_kms->params.egl->pixel_format, ARGB8888 EGL_WIDTH, drm_fd->width,//cap_kms->params.egl->width, EGL_HEIGHT, drm_fd->height,//cap_kms->params.egl->height, EGL_DMA_BUF_PLANE0_FD_EXT, drm_fd->fd,//cap_kms->params.egl->fd, @@ -391,17 +474,22 @@ static int gsr_capture_kms_cuda_capture(gsr_capture *cap, AVFrame *frame) { EGL_NONE }; - while(cap_kms->params.egl->glGetError()) {} - while(cap_kms->params.egl->eglGetError() != EGL_SUCCESS){} EGLImage image = cap_kms->params.egl->eglCreateImage(cap_kms->params.egl->egl_display, 0, EGL_LINUX_DMA_BUF_EXT, NULL, img_attr); - if(cap_kms->params.egl->glGetError() != 0 || cap_kms->params.egl->eglGetError() != EGL_SUCCESS) { - fprintf(stderr, "egl error!\n"); - } - - gsr_capture_kms_register_egl_image_in_cuda(cap_kms, image); + cap_kms->params.egl->glBindTexture(GL_TEXTURE_2D, cap_kms->input_texture); + cap_kms->params.egl->glEGLImageTargetTexture2DOES(GL_TEXTURE_2D, image); cap_kms->params.egl->eglDestroyImage(cap_kms->params.egl->egl_display, image); + cap_kms->params.egl->glBindTexture(GL_TEXTURE_2D, 0); - //cap_kms->params.egl->eglSwapBuffers(cap_kms->params.egl->egl_display, cap_kms->params.egl->egl_surface); + vec2i capture_pos = cap_kms->capture_pos; + if(!capture_is_combined_plane) + capture_pos = (vec2i){drm_fd->x, drm_fd->y}; + + gsr_color_conversion_draw(&cap_kms->color_conversion, cap_kms->input_texture, + (vec2i){0, 0}, cap_kms->capture_size, + capture_pos, cap_kms->capture_size, + 0.0f); + + cap_kms->params.egl->eglSwapBuffers(cap_kms->params.egl->egl_display, cap_kms->params.egl->egl_surface); frame->linesize[0] = frame->width * 4; @@ -422,8 +510,6 @@ static int gsr_capture_kms_cuda_capture(gsr_capture *cap, AVFrame *frame) { memcpy_struct.Height = frame->height; cap_kms->cuda.cuMemcpy2D_v2(&memcpy_struct); - gsr_capture_kms_unload_cuda_graphics(cap_kms); - return 0; } @@ -444,8 +530,22 @@ static void gsr_capture_kms_cuda_capture_end(gsr_capture *cap, AVFrame *frame) { static void gsr_capture_kms_cuda_stop(gsr_capture *cap, AVCodecContext *video_codec_context) { gsr_capture_kms_cuda *cap_kms = cap->priv; + gsr_color_conversion_deinit(&cap_kms->color_conversion); + gsr_capture_kms_unload_cuda_graphics(cap_kms); + if(cap_kms->params.egl->egl_context) { + if(cap_kms->input_texture) { + cap_kms->params.egl->glDeleteTextures(1, &cap_kms->input_texture); + cap_kms->input_texture = 0; + } + + if(cap_kms->target_texture) { + cap_kms->params.egl->glDeleteTextures(1, &cap_kms->target_texture); + cap_kms->target_texture = 0; + } + } + for(int i = 0; i < cap_kms->kms_response.num_fds; ++i) { if(cap_kms->kms_response.fds[i].fd > 0) close(cap_kms->kms_response.fds[i].fd); diff --git a/src/color_conversion.c b/src/color_conversion.c index e44b80a..821ae52 100644 --- a/src/color_conversion.c +++ b/src/color_conversion.c @@ -24,7 +24,7 @@ static float abs_f(float v) { " 0.0620, 0.4392, -0.0403, 0.0,\n" \ " 0.0625, 0.5000, 0.5000, 1.0);" -static int load_shader_rgb(gsr_shader *shader, gsr_egl *egl, int *rotation_uniform) { +static int load_shader_bgr(gsr_shader *shader, gsr_egl *egl, int *rotation_uniform) { char vertex_shader[2048]; snprintf(vertex_shader, sizeof(vertex_shader), "#version 300 es \n" @@ -45,10 +45,9 @@ static int load_shader_rgb(gsr_shader *shader, gsr_egl *egl, int *rotation_unifo "in vec2 texcoords_out; \n" "uniform sampler2D tex1; \n" "out vec4 FragColor; \n" - RGB_TO_YUV "void main() \n" "{ \n" - " FragColor = texture(tex1, texcoords_out); \n" + " FragColor = texture(tex1, texcoords_out).bgra; \n" "} \n"; if(gsr_shader_init(shader, egl, vertex_shader, fragment_shader) != 0) @@ -137,6 +136,7 @@ static unsigned int load_shader_uv(gsr_shader *shader, gsr_egl *egl, int *rotati } static int load_framebuffers(gsr_color_conversion *self) { + /* TODO: Only generate the necessary amount of framebuffers (self->params.num_destination_textures) */ const unsigned int draw_buffer = GL_COLOR_ATTACHMENT0; self->params.egl->glGenFramebuffers(MAX_FRAMEBUFFERS, self->framebuffers); @@ -192,21 +192,21 @@ int gsr_color_conversion_init(gsr_color_conversion *self, const gsr_color_conver self->params = *params; switch(params->destination_color) { - case GSR_DESTINATION_COLOR_RGB: { + case GSR_DESTINATION_COLOR_BGR: { if(self->params.num_destination_textures != 1) { - fprintf(stderr, "gsr error: gsr_color_conversion_init: expected 1 destination texture for destination color RGB, got %d destination texture(s)\n", self->params.num_destination_textures); + fprintf(stderr, "gsr error: gsr_color_conversion_init: expected 1 destination texture for destination color BGR, got %d destination texture(s)\n", self->params.num_destination_textures); return -1; } - if(load_shader_rgb(&self->shaders[0], self->params.egl, &self->rotation_uniforms[0]) != 0) { - fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load rgb shader\n"); + if(load_shader_bgr(&self->shaders[0], self->params.egl, &self->rotation_uniforms[0]) != 0) { + fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load bgr shader\n"); goto err; } break; } case GSR_DESTINATION_COLOR_NV12: { if(self->params.num_destination_textures != 2) { - fprintf(stderr, "gsr error: gsr_color_conversion_init: expected 2 destination textures for destination color RGB, got %d destination texture(s)\n", self->params.num_destination_textures); + fprintf(stderr, "gsr error: gsr_color_conversion_init: expected 2 destination textures for destination color NV12, got %d destination texture(s)\n", self->params.num_destination_textures); return -1; }