From 2f67083915ccb16727d00d49917a520d0962fb8c Mon Sep 17 00:00:00 2001 From: Steam Deck User Date: Sat, 1 Apr 2023 15:14:36 +0200 Subject: [PATCH] Use vaapi to do rgb->yuv420p color conversion Enable window capture for amd/intel. Properly check if h264/hevc is supported on amd/intel before using codec. --- README.md | 2 +- TODO | 8 +- build.sh | 5 +- include/egl.h | 35 +-- include/vaapi.h | 168 ----------- project.conf | 3 +- src/capture/nvfbc.c | 15 +- src/capture/xcomposite_cuda.c | 9 +- src/capture/xcomposite_drm.c | 553 +++++++++++----------------------- src/egl.c | 30 +- src/main.cpp | 141 +++++---- src/vaapi.c | 41 --- 12 files changed, 290 insertions(+), 720 deletions(-) delete mode 100644 include/vaapi.h delete mode 100644 src/vaapi.c diff --git a/README.md b/README.md index 6f6568b..149d633 100644 --- a/README.md +++ b/README.md @@ -38,7 +38,7 @@ You can also install gpu screen recorder ([the gtk gui version](https://git.dec0 ## Intel `libglvnd (which provides libgl and libegl), mesa, ffmpeg (libavcodec, libavformat, libavutil, libswresample, libavfilter), libx11, libxcomposite, libxrandr, libpulse, libva, libva-intel-driver`. ## NVIDIA -`libglvnd (which provides libgl and libegl), ffmpeg (libavcodec, libavformat, libavutil, libswresample, libavfilter), libx11, libxcomposite, libxrandr, libpulse, cuda (libnvidia-compute), nvenc (libnvidia-encode)`. Additionally, you need to have `nvfbc (libnvidia-fbc1)` installed when using nvfbc and `xnvctrl (libxnvctrl0)` when using the `-oc` option. +`libglvnd (which provides libgl and libegl), ffmpeg (libavcodec, libavformat, libavutil, libswresample, libavfilter), libx11, libxcomposite, libxrandr, libpulse, cuda (libnvidia-compute), nvenc (libnvidia-encode), libva`. Additionally, you need to have `nvfbc (libnvidia-fbc1)` installed when using nvfbc and `xnvctrl (libxnvctrl0)` when using the `-oc` option. # How to use Run `scripts/interactive.sh` or run gpu-screen-recorder directly, for example: `gpu-screen-recorder -w $(xdotool selectwindow) -c mp4 -f 60 -a "$(pactl get-default-sink).monitor" -o test_video.mp4` then stop the screen recorder with Ctrl+C, which will also save the recording. You can change -w to -w screen if you want to record all monitors or if you want to record a specific monitor then you can use -w monitor-name, for example -w HDMI-0 (use xrandr command to find the name of your monitor. The name can also be found in your desktop environments display settings).\ diff --git a/TODO b/TODO index 99d12c5..11af2cd 100644 --- a/TODO +++ b/TODO @@ -33,4 +33,10 @@ The video output will be black if if the system is suspended on nvidia and NVreg NVreg_RegistryDwords. Restore nvfbc screen recording on monitor reconfiguration. -Window capture doesn't work properly in _control_ game after going from pause menu to in-game (and back to pause menu). There might be some x11 event we need to catch. Same for vr-video-player. \ No newline at end of file +Window capture doesn't work properly in _control_ game after going from pause menu to in-game (and back to pause menu). There might be some x11 event we need to catch. Same for vr-video-player. + +Fix constant framerate not working properly on amd/intel because capture framerate gets locked to the same framerate as game framerate, which doesn't work well when you need to encode multiple duplicate frames. We can skip multiple encode if we duplicate frame once and then use that same frame data as the difference between frames will be exactly the same, but hevc complains about that. Is there a way to make hevc shut up? + +JPEG color range on amd seems to produce too bright video with h264 but not hevc, why? + +Support recording screen/monitor on amd/intel. \ No newline at end of file diff --git a/build.sh b/build.sh index 4265d51..5a967bd 100755 --- a/build.sh +++ b/build.sh @@ -1,7 +1,7 @@ #!/bin/sh -e #libdrm -dependencies="libavcodec libavformat libavutil x11 xcomposite xrandr libpulse libswresample libavfilter" +dependencies="libavcodec libavformat libavutil x11 xcomposite xrandr libpulse libswresample libavfilter libva" includes="$(pkg-config --cflags $dependencies)" libs="$(pkg-config --libs $dependencies) -ldl -pthread -lm" opts="-O2 -g0 -DNDEBUG" @@ -13,10 +13,9 @@ gcc -c src/egl.c $opts $includes gcc -c src/cuda.c $opts $includes gcc -c src/xnvctrl.c $opts $includes gcc -c src/overclock.c $opts $includes -gcc -c src/vaapi.c $opts $includes gcc -c src/window_texture.c $opts $includes gcc -c src/time.c $opts $includes g++ -c src/sound.cpp $opts $includes g++ -c src/main.cpp $opts $includes -g++ -o gpu-screen-recorder -O2 capture.o nvfbc.o egl.o cuda.o xnvctrl.o overclock.o vaapi.o window_texture.o time.o xcomposite_cuda.o xcomposite_drm.o sound.o main.o -s $libs +g++ -o gpu-screen-recorder -O2 capture.o nvfbc.o egl.o cuda.o xnvctrl.o overclock.o window_texture.o time.o xcomposite_cuda.o xcomposite_drm.o sound.o main.o -s $libs echo "Successfully built gpu-screen-recorder" diff --git a/include/egl.h b/include/egl.h index 7f3321f..1021fc4 100644 --- a/include/egl.h +++ b/include/egl.h @@ -39,6 +39,10 @@ typedef void (*__eglMustCastToProperFunctionPointerType)(void); #define EGL_OPENGL_ES2_BIT 0x0004 #define EGL_NONE 0x3038 #define EGL_CONTEXT_CLIENT_VERSION 0x3098 +#define EGL_BACK_BUFFER 0x3084 +#define EGL_GL_TEXTURE_2D 0x30B1 +#define EGL_TRUE 1 +#define EGL_IMAGE_PRESERVED_KHR 0x30D2 #define GL_TEXTURE_2D 0x0DE1 #define GL_RGB 0x1907 @@ -87,6 +91,8 @@ typedef struct { unsigned int (*eglSwapBuffers)(EGLDisplay dpy, EGLSurface surface); __eglMustCastToProperFunctionPointerType (*eglGetProcAddress)(const char *procname); + unsigned int (*eglExportDMABUFImageQueryMESA)(EGLDisplay dpy, EGLImageKHR image, int *fourcc, int *num_planes, uint64_t *modifiers); + unsigned int (*eglExportDMABUFImageMESA)(EGLDisplay dpy, EGLImageKHR image, int *fds, int32_t *strides, int32_t *offsets); void (*glEGLImageTargetTexture2DOES)(unsigned int target, GLeglImageOES image); unsigned int (*glGetError)(void); @@ -101,35 +107,6 @@ typedef struct { void (*glTexImage2D)(unsigned int target, int level, int internalFormat, int width, int height, int border, unsigned int format, unsigned int type, const void *pixels); void (*glCopyImageSubData)(unsigned int srcName, unsigned int srcTarget, int srcLevel, int srcX, int srcY, int srcZ, unsigned int dstName, unsigned int dstTarget, int dstLevel, int dstX, int dstY, int dstZ, int srcWidth, int srcHeight, int srcDepth); void (*glClearTexImage)(unsigned int texture, unsigned int level, unsigned int format, unsigned int type, const void *data); - void (*glGenFramebuffers)(int n, unsigned int *framebuffers); - void (*glBindFramebuffer)(unsigned int target, unsigned int framebuffer); - void (*glViewport)(int x, int y, int width, int height); - void (*glFramebufferTexture2D)(unsigned int target, unsigned int attachment, unsigned int textarget, unsigned int texture, int level); - void (*glDrawBuffers)(int n, const unsigned int *bufs); - unsigned int (*glCheckFramebufferStatus)(unsigned int target); - void (*glBindBuffer)(unsigned int target, unsigned int buffer); - void (*glGenBuffers)(int n, unsigned int *buffers); - void (*glBufferData)(unsigned int target, khronos_ssize_t size, const void *data, unsigned int usage); - void (*glGenVertexArrays)(int n, unsigned int *arrays); - void (*glBindVertexArray)(unsigned int array); - - unsigned int (*glCreateProgram)(void); - unsigned int (*glCreateShader)(unsigned int type); - void (*glAttachShader)(unsigned int program, unsigned int shader); - void (*glBindAttribLocation)(unsigned int program, unsigned int index, const char *name); - void (*glCompileShader)(unsigned int shader); - void (*glLinkProgram)(unsigned int program); - void (*glShaderSource)(unsigned int shader, int count, const char *const*string, const int *length); - void (*glUseProgram)(unsigned int program); - void (*glGetProgramInfoLog)(unsigned int program, int bufSize, int *length, char *infoLog); - void (*glGetShaderiv)(unsigned int shader, unsigned int pname, int *params); - void (*glGetShaderInfoLog)(unsigned int shader, int bufSize, int *length, char *infoLog); - void (*glDeleteProgram)(unsigned int program); - void (*glDeleteShader)(unsigned int shader); - void (*glGetProgramiv)(unsigned int program, unsigned int pname, int *params); - void (*glVertexAttribPointer)(unsigned int index, int size, unsigned int type, unsigned char normalized, int stride, const void *pointer); - void (*glEnableVertexAttribArray)(unsigned int index); - void (*glDrawArrays)(unsigned int mode, int first, int count ); } gsr_egl; bool gsr_egl_load(gsr_egl *self, Display *dpy); diff --git a/include/vaapi.h b/include/vaapi.h deleted file mode 100644 index 56acb13..0000000 --- a/include/vaapi.h +++ /dev/null @@ -1,168 +0,0 @@ -#ifndef GSR_VAAPI_H -#define GSR_VAAPI_H - -#include -#include - -// To prevent hwcontext_vaapi.h from including va.h.. An ugly hack -#define _VA_H_ - -// These definitions are copied from va.h, which is licensed under MIT - -typedef void* VADisplay; -typedef int VAStatus; - -typedef unsigned int VAGenericID; -typedef VAGenericID VAConfigID; -typedef VAGenericID VAContextID; -typedef VAGenericID VASurfaceID; - -#define VA_STATUS_SUCCESS 0x00000000 - -typedef struct { - /** Pixel format fourcc of the whole surface (VA_FOURCC_*). */ - uint32_t fourcc; - /** Width of the surface in pixels. */ - uint32_t width; - /** Height of the surface in pixels. */ - uint32_t height; - /** Number of distinct DRM objects making up the surface. */ - uint32_t num_objects; - /** Description of each object. */ - struct { - /** DRM PRIME file descriptor for this object. */ - int fd; - /** Total size of this object (may include regions which are - * not part of the surface). */ - uint32_t size; - /** Format modifier applied to this object. */ - uint64_t drm_format_modifier; - } objects[4]; - /** Number of layers making up the surface. */ - uint32_t num_layers; - /** Description of each layer in the surface. */ - struct { - /** DRM format fourcc of this layer (DRM_FOURCC_*). */ - uint32_t drm_format; - /** Number of planes in this layer. */ - uint32_t num_planes; - /** Index in the objects array of the object containing each - * plane. */ - uint32_t object_index[4]; - /** Offset within the object of each plane. */ - uint32_t offset[4]; - /** Pitch of each plane. */ - uint32_t pitch[4]; - } layers[4]; -} VADRMPRIMESurfaceDescriptor; - -#define VA_INVALID_ID 0xffffffff -#define VA_INVALID_SURFACE VA_INVALID_ID - -/** \brief Generic value types. */ -typedef enum { - VAGenericValueTypeInteger = 1, /**< 32-bit signed integer. */ - VAGenericValueTypeFloat, /**< 32-bit floating-point value. */ - VAGenericValueTypePointer, /**< Generic pointer type */ - VAGenericValueTypeFunc /**< Pointer to function */ -} VAGenericValueType; - -/** \brief Generic function type. */ -typedef void (*VAGenericFunc)(void); - -/** \brief Generic value. */ -typedef struct _VAGenericValue { - /** \brief Value type. See #VAGenericValueType. */ - VAGenericValueType type; - /** \brief Value holder. */ - union { - /** \brief 32-bit signed integer. */ - int32_t i; - /** \brief 32-bit float. */ - float f; - /** \brief Generic pointer. */ - void *p; - /** \brief Pointer to function. */ - VAGenericFunc fn; - } value; -} VAGenericValue; - -/** @name Surface attribute flags */ -/**@{*/ -/** \brief Surface attribute is not supported. */ -#define VA_SURFACE_ATTRIB_NOT_SUPPORTED 0x00000000 -/** \brief Surface attribute can be got through vaQuerySurfaceAttributes(). */ -#define VA_SURFACE_ATTRIB_GETTABLE 0x00000001 -/** \brief Surface attribute can be set through vaCreateSurfaces(). */ -#define VA_SURFACE_ATTRIB_SETTABLE 0x00000002 -/**@}*/ - -/** \brief Surface attribute types. */ -typedef enum { - VASurfaceAttribNone = 0, - /** - * \brief Pixel format as a FOURCC (int, read/write). - * - * When vaQuerySurfaceAttributes() is called, the driver will return one - * PixelFormat attribute per supported pixel format. - * - * When provided as an input to vaCreateSurfaces(), the driver will - * allocate a surface with the provided pixel format. - */ - VASurfaceAttribPixelFormat, - /** \brief Minimal width in pixels (int, read-only). */ - VASurfaceAttribMinWidth, - /** \brief Maximal width in pixels (int, read-only). */ - VASurfaceAttribMaxWidth, - /** \brief Minimal height in pixels (int, read-only). */ - VASurfaceAttribMinHeight, - /** \brief Maximal height in pixels (int, read-only). */ - VASurfaceAttribMaxHeight, - /** \brief Surface memory type expressed in bit fields (int, read/write). */ - VASurfaceAttribMemoryType, - /** \brief External buffer descriptor (pointer, write). - * - * Refer to the documentation for the memory type being created to - * determine what descriptor structure to pass here. If not otherwise - * stated, the common VASurfaceAttribExternalBuffers should be used. - */ - VASurfaceAttribExternalBufferDescriptor, - /** \brief Surface usage hint, gives the driver a hint of intended usage - * to optimize allocation (e.g. tiling) (int, read/write). */ - VASurfaceAttribUsageHint, - /** \brief List of possible DRM format modifiers (pointer, write). - * - * The value must be a pointer to a VADRMFormatModifierList. This can only - * be used when allocating a new buffer, it's invalid to use this attribute - * when importing an existing buffer. - */ - VASurfaceAttribDRMFormatModifiers, - /** \brief Number of surface attributes. */ - VASurfaceAttribCount -} VASurfaceAttribType; - -/** \brief Surface attribute. */ -typedef struct _VASurfaceAttrib { - /** \brief Type. */ - VASurfaceAttribType type; - /** \brief Flags. See "Surface attribute flags". */ - uint32_t flags; - /** \brief Value. See "Surface attribute types" for the expected types. */ - VAGenericValue value; -} VASurfaceAttrib; - -#define VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2 0x40000000 -#define VA_EXPORT_SURFACE_READ_WRITE 0x0003 -#define VA_EXPORT_SURFACE_SEPARATE_LAYERS 0x0004 - -typedef struct { - void *library; - - VAStatus (*vaExportSurfaceHandle)(VADisplay dpy, VASurfaceID surface_id, uint32_t mem_type, uint32_t flags, void *descriptor); - VAStatus (*vaSyncSurface)(VADisplay dpy, VASurfaceID render_target); -} gsr_vaapi; - -bool gsr_vaapi_load(gsr_vaapi *self); -void gsr_vaapi_unload(gsr_vaapi *self); - -#endif /* GSR_VAAPI_H */ diff --git a/project.conf b/project.conf index 8fa9493..5b9358f 100644 --- a/project.conf +++ b/project.conf @@ -13,4 +13,5 @@ xcomposite = ">=0.2" xrandr = ">=1" libpulse = ">=13" libswresample = ">=3" -libavfilter = ">=5" \ No newline at end of file +libavfilter = ">=5" +libva = ">=1" \ No newline at end of file diff --git a/src/capture/nvfbc.c b/src/capture/nvfbc.c index d538b5b..a7bb16a 100644 --- a/src/capture/nvfbc.c +++ b/src/capture/nvfbc.c @@ -177,8 +177,8 @@ static bool ffmpeg_create_cuda_contexts(gsr_capture_nvfbc *cap_nvfbc, AVCodecCon return false; } - video_codec_context->hw_device_ctx = device_ctx; - video_codec_context->hw_frames_ctx = frame_context; + video_codec_context->hw_device_ctx = av_buffer_ref(device_ctx); + video_codec_context->hw_frames_ctx = av_buffer_ref(frame_context); return true; } @@ -350,9 +350,9 @@ static int gsr_capture_nvfbc_start(gsr_capture *cap, AVCodecContext *video_codec if(video_codec_context->hw_device_ctx) av_buffer_unref(&video_codec_context->hw_device_ctx); - // Not needed because the above call to unref device ctx also frees this? - //if(video_codec_context->hw_frames_ctx) - // av_buffer_unref(&video_codec_context->hw_frames_ctx); + if(video_codec_context->hw_frames_ctx) + av_buffer_unref(&video_codec_context->hw_frames_ctx); + gsr_cuda_unload(&cap_nvfbc->cuda); return -1; } @@ -438,9 +438,8 @@ static void gsr_capture_nvfbc_destroy(gsr_capture *cap, AVCodecContext *video_co gsr_capture_nvfbc_destroy_session(cap); if(video_codec_context->hw_device_ctx) av_buffer_unref(&video_codec_context->hw_device_ctx); - // Not needed because the above call to unref device ctx also frees this? - //if(video_codec_context->hw_frames_ctx) - // av_buffer_unref(&video_codec_context->hw_frames_ctx); + if(video_codec_context->hw_frames_ctx) + av_buffer_unref(&video_codec_context->hw_frames_ctx); if(cap_nvfbc) { gsr_cuda_unload(&cap_nvfbc->cuda); dlclose(cap_nvfbc->library); diff --git a/src/capture/xcomposite_cuda.c b/src/capture/xcomposite_cuda.c index a812c12..1ea9923 100644 --- a/src/capture/xcomposite_cuda.c +++ b/src/capture/xcomposite_cuda.c @@ -131,8 +131,8 @@ static bool cuda_create_codec_context(gsr_capture_xcomposite_cuda *cap_xcomp, AV return false; } - video_codec_context->hw_device_ctx = device_ctx; - video_codec_context->hw_frames_ctx = frame_context; + video_codec_context->hw_device_ctx = av_buffer_ref(device_ctx); + video_codec_context->hw_frames_ctx = av_buffer_ref(frame_context); return true; } @@ -252,9 +252,8 @@ static void gsr_capture_xcomposite_cuda_stop(gsr_capture *cap, AVCodecContext *v if(video_codec_context->hw_device_ctx) av_buffer_unref(&video_codec_context->hw_device_ctx); - // Not needed because the above call to unref device ctx also frees this? - //if(video_codec_context->hw_frames_ctx) - // av_buffer_unref(&video_codec_context->hw_frames_ctx); + if(video_codec_context->hw_frames_ctx) + av_buffer_unref(&video_codec_context->hw_frames_ctx); if(cap_xcomp->cuda.cu_ctx) { CUcontext old_ctx; diff --git a/src/capture/xcomposite_drm.c b/src/capture/xcomposite_drm.c index fbd96fc..47cbfcc 100644 --- a/src/capture/xcomposite_drm.c +++ b/src/capture/xcomposite_drm.c @@ -1,15 +1,16 @@ #include "../../include/capture/xcomposite_drm.h" #include "../../include/egl.h" -#include "../../include/vaapi.h" #include "../../include/window_texture.h" #include "../../include/time.h" #include #include #include +#include #include #include #include #include +//#include #include /* TODO: Proper error checks and cleanups */ @@ -27,18 +28,20 @@ typedef struct { WindowTexture window_texture; gsr_egl egl; - gsr_vaapi vaapi; + + int fourcc; + int num_planes; + uint64_t modifiers; + int dmabuf_fd; + int32_t pitch; + int32_t offset; unsigned int target_textures[2]; - unsigned int framebuffer_y; - unsigned int framebuffer_uv; - unsigned int vao; - - unsigned int shader_y; - unsigned int shader_uv; - VADisplay va_dpy; + VAConfigID config_id; + VAContextID context_id; + VASurfaceID input_surface; } gsr_capture_xcomposite_drm; static int max_int(int a, int b) { @@ -47,7 +50,7 @@ static int max_int(int a, int b) { static bool drm_create_codec_context(gsr_capture_xcomposite_drm *cap_xcomp, AVCodecContext *video_codec_context) { AVBufferRef *device_ctx; - if(av_hwdevice_ctx_create(&device_ctx, AV_HWDEVICE_TYPE_VAAPI, "/dev/dri/card0", NULL, 0) < 0) { + if(av_hwdevice_ctx_create(&device_ctx, AV_HWDEVICE_TYPE_VAAPI, "/dev/dri/renderD128", NULL, 0) < 0) { fprintf(stderr, "Error: Failed to create hardware device context\n"); return false; } @@ -63,7 +66,7 @@ static bool drm_create_codec_context(gsr_capture_xcomposite_drm *cap_xcomp, AVCo (AVHWFramesContext *)frame_context->data; hw_frame_context->width = video_codec_context->width; hw_frame_context->height = video_codec_context->height; - hw_frame_context->sw_format = AV_PIX_FMT_NV12; + hw_frame_context->sw_format = AV_PIX_FMT_NV12;//AV_PIX_FMT_0RGB32;//AV_PIX_FMT_YUV420P;//AV_PIX_FMT_0RGB32;//AV_PIX_FMT_NV12; hw_frame_context->format = video_codec_context->pix_fmt; hw_frame_context->device_ref = device_ctx; hw_frame_context->device_ctx = (AVHWDeviceContext*)device_ctx->data; @@ -74,227 +77,20 @@ static bool drm_create_codec_context(gsr_capture_xcomposite_drm *cap_xcomp, AVCo cap_xcomp->va_dpy = vactx->display; if (av_hwframe_ctx_init(frame_context) < 0) { - fprintf(stderr, "Error: Failed to initialize hardware frame context (note: ffmpeg version needs to be > 4.0)\n"); + fprintf(stderr, "Error: Failed to initialize hardware frame context " + "(note: ffmpeg version needs to be > 4.0)\n"); av_buffer_unref(&device_ctx); //av_buffer_unref(&frame_context); return false; } - video_codec_context->hw_device_ctx = device_ctx; // TODO: av_buffer_ref? and in more places - video_codec_context->hw_frames_ctx = frame_context; + video_codec_context->hw_device_ctx = av_buffer_ref(device_ctx); + video_codec_context->hw_frames_ctx = av_buffer_ref(frame_context); return true; } -#define GL_COMPILE_STATUS 0x8B81 -#define GL_INFO_LOG_LENGTH 0x8B84 - -unsigned int esLoadShader ( gsr_capture_xcomposite_drm *cap_xcomp, unsigned int type, const char *shaderSrc ) { - unsigned int shader; - int compiled; - - // Create the shader object - shader = cap_xcomp->egl.glCreateShader ( type ); - - if ( shader == 0 ) - return 0; - - // Load the shader source - cap_xcomp->egl.glShaderSource ( shader, 1, &shaderSrc, NULL ); - - // Compile the shader - cap_xcomp->egl.glCompileShader ( shader ); - - // Check the compile status - cap_xcomp->egl.glGetShaderiv ( shader, GL_COMPILE_STATUS, &compiled ); - - if ( !compiled ) - { - int infoLen = 0; - - cap_xcomp->egl.glGetShaderiv ( shader, GL_INFO_LOG_LENGTH, &infoLen ); - - if ( infoLen > 1 ) - { - char* infoLog = malloc (sizeof(char) * infoLen ); - - cap_xcomp->egl.glGetShaderInfoLog ( shader, infoLen, NULL, infoLog ); - fprintf (stderr, "Error compiling shader:\n%s\n", infoLog ); - - free ( infoLog ); - } - - cap_xcomp->egl.glDeleteShader ( shader ); - return 0; - } - - return shader; - -} - -#define GL_FRAGMENT_SHADER 0x8B30 -#define GL_VERTEX_SHADER 0x8B31 -#define GL_COMPILE_STATUS 0x8B81 -#define GL_LINK_STATUS 0x8B82 - - -// -/// -/// \brief Load a vertex and fragment shader, create a program object, link program. -// Errors output to log. -/// \param vertShaderSrc Vertex shader source code -/// \param fragShaderSrc Fragment shader source code -/// \return A new program object linked with the vertex/fragment shader pair, 0 on failure -// -unsigned int esLoadProgram ( gsr_capture_xcomposite_drm *cap_xcomp, const char *vertShaderSrc, const char *fragShaderSrc ) -{ - unsigned int vertexShader; - unsigned int fragmentShader; - unsigned int programObject; - int linked; - - // Load the vertex/fragment shaders - vertexShader = esLoadShader ( cap_xcomp, GL_VERTEX_SHADER, vertShaderSrc ); - if ( vertexShader == 0 ) - return 0; - - fragmentShader = esLoadShader ( cap_xcomp, GL_FRAGMENT_SHADER, fragShaderSrc ); - if ( fragmentShader == 0 ) - { - cap_xcomp->egl.glDeleteShader( vertexShader ); - return 0; - } - - // Create the program object - programObject = cap_xcomp->egl.glCreateProgram ( ); - - if ( programObject == 0 ) - return 0; - - cap_xcomp->egl.glAttachShader ( programObject, vertexShader ); - cap_xcomp->egl.glAttachShader ( programObject, fragmentShader ); - - // Link the program - cap_xcomp->egl.glLinkProgram ( programObject ); - - // Check the link status - cap_xcomp->egl.glGetProgramiv ( programObject, GL_LINK_STATUS, &linked ); - - if ( !linked ) - { - int infoLen = 0; - - cap_xcomp->egl.glGetProgramiv ( programObject, GL_INFO_LOG_LENGTH, &infoLen ); - - if ( infoLen > 1 ) - { - char* infoLog = malloc (sizeof(char) * infoLen ); - - cap_xcomp->egl.glGetProgramInfoLog ( programObject, infoLen, NULL, infoLog ); - fprintf (stderr, "Error linking program:\n%s\n", infoLog ); - - free ( infoLog ); - } - - cap_xcomp->egl.glDeleteProgram ( programObject ); - return 0; - } - - // Free up no longer needed shader resources - cap_xcomp->egl.glDeleteShader ( vertexShader ); - cap_xcomp->egl.glDeleteShader ( fragmentShader ); - - return programObject; -} - -#define RGB_TO_YUV "const mat4 RGBtoYUV = mat4(0.257, 0.439, -0.148, 0.0,\n" \ - " 0.504, -0.368, -0.291, 0.0,\n" \ - " 0.098, -0.071, 0.439, 0.0,\n" \ - " 0.0625, 0.500, 0.500, 1.0);" - -static unsigned int LoadShadersY(gsr_capture_xcomposite_drm *cap_xcomp) { - char vShaderStr[] = - "#version 300 es \n" - "in vec2 pos; \n" - "in vec2 texcoords; \n" - "out vec2 texcoords_out; \n" - "void main() \n" - "{ \n" - " texcoords_out = texcoords; \n" - " gl_Position = vec4(pos.x, pos.y, 0.0, 1.0); \n" - "} \n"; - - char fShaderStr[] = - "#version 300 es \n" - "precision mediump float; \n" - "in vec2 texcoords_out; \n" - "uniform sampler2D tex1; \n" - "out vec4 FragColor; \n" - RGB_TO_YUV - "void main() \n" - "{ \n" - " FragColor.x = (RGBtoYUV * vec4(texture(tex1, texcoords_out).rgb, 1.0)).x; \n" - "} \n"; - - unsigned int shader_program = esLoadProgram(cap_xcomp, vShaderStr, fShaderStr); - if (shader_program == 0) { - fprintf(stderr, "failed to create shader!\n"); - return 0; - } - - cap_xcomp->egl.glBindAttribLocation(shader_program, 0, "pos"); - cap_xcomp->egl.glBindAttribLocation(shader_program, 1, "texcoords"); - return shader_program; -} - -static unsigned int LoadShadersUV(gsr_capture_xcomposite_drm *cap_xcomp) { - char vShaderStr[] = - "#version 300 es \n" - "in vec2 pos; \n" - "in vec2 texcoords; \n" - "out vec2 texcoords_out; \n" - "void main() \n" - "{ \n" - " texcoords_out = texcoords; \n" - " gl_Position = vec4(pos.x, pos.y, 0.0, 1.0); \n" - "} \n"; - - char fShaderStr[] = - "#version 300 es \n" - "precision mediump float; \n" - "in vec2 texcoords_out; \n" - "uniform sampler2D tex1; \n" - "out vec4 FragColor; \n" - RGB_TO_YUV - "void main() \n" - "{ \n" - " FragColor.xy = (RGBtoYUV * vec4(texture(tex1, texcoords_out*2.0).rgb, 1.0)).zy; \n" - "} \n"; - - unsigned int shader_program = esLoadProgram(cap_xcomp, vShaderStr, fShaderStr); - if (shader_program == 0) { - fprintf(stderr, "failed to create shader!\n"); - return 0; - } - - cap_xcomp->egl.glBindAttribLocation(shader_program, 0, "pos"); - cap_xcomp->egl.glBindAttribLocation(shader_program, 1, "texcoords"); - return shader_program; -} - -#define GL_FLOAT 0x1406 -#define GL_FALSE 0 -#define GL_TRUE 1 -#define GL_TRIANGLES 0x0004 #define DRM_FORMAT_MOD_INVALID 72057594037927935 -#define EGL_TRUE 1 -#define EGL_IMAGE_PRESERVED_KHR 0x30D2 -#define EGL_NATIVE_PIXMAP_KHR 0x30B0 - -static uint32_t fourcc(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { - return (d << 24) | (c << 16) | (b << 8) | a; -} - static int gsr_capture_xcomposite_drm_start(gsr_capture *cap, AVCodecContext *video_codec_context) { gsr_capture_xcomposite_drm *cap_xcomp = cap->priv; @@ -317,15 +113,51 @@ static int gsr_capture_xcomposite_drm_start(gsr_capture *cap, AVCodecContext *vi return -1; } - if(!gsr_vaapi_load(&cap_xcomp->vaapi)) { - fprintf(stderr, "gsr error: gsr_capture_xcomposite_drm_start: failed to load vaapi\n"); + if(!cap_xcomp->egl.eglExportDMABUFImageQueryMESA) { + fprintf(stderr, "gsr error: gsr_capture_xcomposite_drm_start: could not find eglExportDMABUFImageQueryMESA\n"); + gsr_egl_unload(&cap_xcomp->egl); + return -1; + } + + if(!cap_xcomp->egl.eglExportDMABUFImageMESA) { + fprintf(stderr, "gsr error: gsr_capture_xcomposite_drm_start: could not find eglExportDMABUFImageMESA\n"); gsr_egl_unload(&cap_xcomp->egl); return -1; } /* Disable vsync */ cap_xcomp->egl.eglSwapInterval(cap_xcomp->egl.egl_display, 0); +#if 0 + // TODO: Fallback to composite window + if(window_texture_init(&cap_xcomp->window_texture, cap_xcomp->dpy, cap_xcomp->params.window, &cap_xcomp->gl) != 0) { + fprintf(stderr, "gsr error: gsr_capture_xcomposite_start: failed get window texture for window %ld\n", cap_xcomp->params.window); + gsr_egl_unload(&cap_xcomp->egl); + return -1; + } + cap_xcomp->egl.glBindTexture(GL_TEXTURE_2D, window_texture_get_opengl_texture_id(&cap_xcomp->window_texture)); + cap_xcomp->texture_size.x = 0; + cap_xcomp->texture_size.y = 0; + cap_xcomp->egl.glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_WIDTH, &cap_xcomp->texture_size.x); + cap_xcomp->egl.glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_HEIGHT, &cap_xcomp->texture_size.y); + cap_xcomp->egl.glBindTexture(GL_TEXTURE_2D, 0); + + cap_xcomp->texture_size.x = max_int(2, cap_xcomp->texture_size.x & ~1); + cap_xcomp->texture_size.y = max_int(2, cap_xcomp->texture_size.y & ~1); + + cap_xcomp->target_texture_id = gl_create_texture(cap_xcomp, cap_xcomp->texture_size.x, cap_xcomp->texture_size.y); + if(cap_xcomp->target_texture_id == 0) { + fprintf(stderr, "gsr error: gsr_capture_xcomposite_start: failed to create opengl texture\n"); + gsr_capture_xcomposite_stop(cap, video_codec_context); + return -1; + } + + video_codec_context->width = cap_xcomp->texture_size.x; + video_codec_context->height = cap_xcomp->texture_size.y; + + cap_xcomp->window_resize_timer = clock_get_monotonic_seconds(); + return 0; +#else // TODO: Fallback to composite window if(window_texture_init(&cap_xcomp->window_texture, cap_xcomp->dpy, cap_xcomp->params.window, &cap_xcomp->egl) != 0) { fprintf(stderr, "gsr error: gsr_capture_xcomposite_drm_start: failed get window texture for window %ld\n", cap_xcomp->params.window); @@ -346,13 +178,47 @@ static int gsr_capture_xcomposite_drm_start(gsr_capture *cap, AVCodecContext *vi video_codec_context->width = cap_xcomp->texture_size.x; video_codec_context->height = cap_xcomp->texture_size.y; + { + const intptr_t pixmap_attrs[] = { + EGL_IMAGE_PRESERVED_KHR, EGL_TRUE, + EGL_NONE, + }; + + EGLImage img = cap_xcomp->egl.eglCreateImage(cap_xcomp->egl.egl_display, cap_xcomp->egl.egl_context, EGL_GL_TEXTURE_2D, (EGLClientBuffer)(uint64_t)window_texture_get_opengl_texture_id(&cap_xcomp->window_texture), pixmap_attrs); + if(!img) { + fprintf(stderr, "eglCreateImage failed\n"); + return -1; + } + + if(!cap_xcomp->egl.eglExportDMABUFImageQueryMESA(cap_xcomp->egl.egl_display, img, &cap_xcomp->fourcc, &cap_xcomp->num_planes, &cap_xcomp->modifiers)) { + fprintf(stderr, "eglExportDMABUFImageQueryMESA failed\n"); + return -1; + } + + if(cap_xcomp->num_planes != 1) { + // TODO: FAIL! + fprintf(stderr, "Blablalba\n"); + return -1; + } + + if(!cap_xcomp->egl.eglExportDMABUFImageMESA(cap_xcomp->egl.egl_display, img, &cap_xcomp->dmabuf_fd, &cap_xcomp->pitch, &cap_xcomp->offset)) { + fprintf(stderr, "eglExportDMABUFImageMESA failed\n"); + return -1; + } + + fprintf(stderr, "texture: %u, dmabuf: %d, pitch: %d, offset: %d\n", window_texture_get_opengl_texture_id(&cap_xcomp->window_texture), cap_xcomp->dmabuf_fd, cap_xcomp->pitch, cap_xcomp->offset); + fprintf(stderr, "fourcc: %d, num planes: %d, modifiers: %zu\n", cap_xcomp->fourcc, cap_xcomp->num_planes, cap_xcomp->modifiers); + } + if(!drm_create_codec_context(cap_xcomp, video_codec_context)) { fprintf(stderr, "failed to create hw codec context\n"); gsr_egl_unload(&cap_xcomp->egl); return -1; } + //fprintf(stderr, "sneed: %u\n", cap_xcomp->FramebufferName); return 0; +#endif } static void gsr_capture_xcomposite_drm_tick(gsr_capture *cap, AVCodecContext *video_codec_context, AVFrame **frame) { @@ -372,11 +238,7 @@ static void gsr_capture_xcomposite_drm_tick(gsr_capture *cap, AVCodecContext *vi (*frame)->format = video_codec_context->pix_fmt; (*frame)->width = video_codec_context->width; (*frame)->height = video_codec_context->height; - (*frame)->color_range = video_codec_context->color_range; - (*frame)->color_primaries = video_codec_context->color_primaries; - (*frame)->color_trc = video_codec_context->color_trc; - (*frame)->colorspace = video_codec_context->colorspace; - (*frame)->chroma_location = video_codec_context->chroma_sample_location; + (*frame)->color_range = AVCOL_RANGE_JPEG; int res = av_hwframe_get_buffer(video_codec_context->hw_frames_ctx, *frame, 0); if(res < 0) { @@ -384,143 +246,63 @@ static void gsr_capture_xcomposite_drm_tick(gsr_capture *cap, AVCodecContext *vi return; } - VADRMPRIMESurfaceDescriptor prime; + fprintf(stderr, "fourcc: %u\n", cap_xcomp->fourcc); + fprintf(stderr, "va surface id: %u\n", (VASurfaceID)(uintptr_t)(*frame)->data[3]); - VASurfaceID surface_id = (uintptr_t)(*frame)->data[3]; - VAStatus va_status = cap_xcomp->vaapi.vaExportSurfaceHandle(cap_xcomp->va_dpy, surface_id, VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2, VA_EXPORT_SURFACE_READ_WRITE | VA_EXPORT_SURFACE_SEPARATE_LAYERS, &prime); // TODO: Composed layers + int xx = 0, yy = 0; + cap_xcomp->egl.glBindTexture(GL_TEXTURE_2D, window_texture_get_opengl_texture_id(&cap_xcomp->window_texture)); + cap_xcomp->egl.glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_WIDTH, &xx); + cap_xcomp->egl.glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_HEIGHT, &yy); + cap_xcomp->egl.glBindTexture(GL_TEXTURE_2D, 0); + + uintptr_t dmabuf = cap_xcomp->dmabuf_fd; + + VASurfaceAttribExternalBuffers buf = {0}; + buf.pixel_format = VA_FOURCC_BGRX; // TODO: VA_FOURCC_XRGB? + buf.width = xx; + buf.height = yy; + buf.data_size = yy * cap_xcomp->pitch; + buf.num_planes = 1; + buf.pitches[0] = cap_xcomp->pitch; + buf.offsets[0] = cap_xcomp->offset; + buf.buffers = &dmabuf; + buf.num_buffers = 1; + buf.flags = 0; + buf.private_data = 0; + + #define VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME 0x20000000 + + VASurfaceAttrib attribs[2] = {0}; + attribs[0].type = VASurfaceAttribMemoryType; + attribs[0].flags = VA_SURFACE_ATTRIB_SETTABLE; + attribs[0].value.type = VAGenericValueTypeInteger; + attribs[0].value.value.i = VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME; // TODO: prime1 instead? + attribs[1].type = VASurfaceAttribExternalBufferDescriptor; + attribs[1].flags = VA_SURFACE_ATTRIB_SETTABLE; + attribs[1].value.type = VAGenericValueTypePointer; + attribs[1].value.value.p = &buf; + + VAStatus va_status = vaCreateSurfaces(cap_xcomp->va_dpy, VA_RT_FORMAT_RGB32, xx, yy, &cap_xcomp->input_surface, 1, attribs, 2); if(va_status != VA_STATUS_SUCCESS) { - fprintf(stderr, "vaExportSurfaceHandle failed\n"); + fprintf(stderr, "failed to create surface: %d\n", va_status); + abort(); return; } - cap_xcomp->vaapi.vaSyncSurface(cap_xcomp->va_dpy, surface_id); - fprintf(stderr, "fourcc: %u, width: %u, height: %u\n", prime.fourcc, prime.width, prime.height); - for(int i = 0; i < prime.num_layers; ++i) { - fprintf(stderr, " drm format: %u, num planes: %u\n", prime.layers[i].drm_format, prime.layers[i].num_planes); - for(int j = 0; j < prime.layers[i].num_planes; ++j) { - const uint32_t object_index = prime.layers[i].object_index[j]; - fprintf(stderr, " object index: %u, offset: %u, pitch: %u, fd: %d, size: %u, drm format mod: %lu\n", object_index, prime.layers[i].offset[j], prime.layers[i].pitch[j], prime.objects[object_index].fd, prime.objects[object_index].size, prime.objects[object_index].drm_format_modifier); - } + //vaBeginPicture(cap_xcomp->va_dpy, ) + + va_status = vaCreateConfig(cap_xcomp->va_dpy, VAProfileNone, VAEntrypointVideoProc, NULL, 0, &cap_xcomp->config_id); + if(va_status != VA_STATUS_SUCCESS) { + fprintf(stderr, "vaCreateConfig failed: %d\n", va_status); + abort(); + return; } - #define EGL_LINUX_DRM_FOURCC_EXT 0x3271 - #define EGL_WIDTH 0x3057 - #define EGL_HEIGHT 0x3056 - #define EGL_DMA_BUF_PLANE0_FD_EXT 0x3272 - #define EGL_DMA_BUF_PLANE0_OFFSET_EXT 0x3273 - #define EGL_DMA_BUF_PLANE0_PITCH_EXT 0x3274 - #define EGL_LINUX_DMA_BUF_EXT 0x3270 - - #define GL_TEXTURE0 0x84C0 - #define GL_COLOR_ATTACHMENT1 0x8CE1 - - #define FOURCC_NV12 842094158 - - if(prime.fourcc == FOURCC_NV12) { // This happens on AMD - while(cap_xcomp->egl.glGetError()) {} - while(cap_xcomp->egl.eglGetError() != EGL_SUCCESS){} - - EGLImage images[2]; - cap_xcomp->egl.glGenTextures(2, cap_xcomp->target_textures); - assert(cap_xcomp->egl.glGetError() == 0); - for(int i = 0; i < 2; ++i) { - const uint32_t formats[2] = { fourcc('R', '8', ' ', ' '), fourcc('G', 'R', '8', '8') }; - const int layer = i; - const int plane = 0; - - const intptr_t img_attr[] = { - EGL_LINUX_DRM_FOURCC_EXT, formats[i], - EGL_WIDTH, prime.width / (1 + i), // half size - EGL_HEIGHT, prime.height / (1 + i), // for chroma - EGL_DMA_BUF_PLANE0_FD_EXT, prime.objects[prime.layers[layer].object_index[plane]].fd, - EGL_DMA_BUF_PLANE0_OFFSET_EXT, prime.layers[layer].offset[plane], - EGL_DMA_BUF_PLANE0_PITCH_EXT, prime.layers[layer].pitch[plane], - EGL_NONE - }; - images[i] = cap_xcomp->egl.eglCreateImage(cap_xcomp->egl.egl_display, 0, EGL_LINUX_DMA_BUF_EXT, NULL, img_attr); // TODO: Cleanup at the end of this for loop - assert(images[i]); - assert(cap_xcomp->egl.eglGetError() == EGL_SUCCESS); - - //cap_xcomp->egl.glActiveTexture(GL_TEXTURE0 + i); - cap_xcomp->egl.glBindTexture(GL_TEXTURE_2D, cap_xcomp->target_textures[i]); - assert(cap_xcomp->egl.glGetError() == 0); - - cap_xcomp->egl.glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - cap_xcomp->egl.glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - cap_xcomp->egl.glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - cap_xcomp->egl.glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - assert(cap_xcomp->egl.glGetError() == 0); - - cap_xcomp->egl.glEGLImageTargetTexture2DOES(GL_TEXTURE_2D, images[i]); - assert(cap_xcomp->egl.glGetError() == 0); - assert(cap_xcomp->egl.eglGetError() == EGL_SUCCESS); - } - //cap_xcomp->egl.glActiveTexture(GL_TEXTURE0); - cap_xcomp->egl.glBindTexture(GL_TEXTURE_2D, 0); - - - - cap_xcomp->egl.glGenFramebuffers(1, &cap_xcomp->framebuffer_y); - cap_xcomp->egl.glBindFramebuffer(GL_FRAMEBUFFER, cap_xcomp->framebuffer_y); - - cap_xcomp->egl.glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, cap_xcomp->target_textures[0], 0); - - // Set the list of draw buffers. - unsigned int DrawBuffers[1] = {GL_COLOR_ATTACHMENT0}; - cap_xcomp->egl.glDrawBuffers(1, DrawBuffers); // "1" is the size of DrawBuffers - - if(cap_xcomp->egl.glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { - fprintf(stderr, "Failed to setup framebuffer\n"); - return; - } - - cap_xcomp->egl.glBindFramebuffer(GL_FRAMEBUFFER, 0); - - cap_xcomp->egl.glGenFramebuffers(1, &cap_xcomp->framebuffer_uv); - cap_xcomp->egl.glBindFramebuffer(GL_FRAMEBUFFER, cap_xcomp->framebuffer_uv); - - cap_xcomp->egl.glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, cap_xcomp->target_textures[1], 0); - - // Set the list of draw buffers. - cap_xcomp->egl.glDrawBuffers(1, DrawBuffers); // "1" is the size of DrawBuffers - - if(cap_xcomp->egl.glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { - fprintf(stderr, "Failed to setup framebuffer\n"); - return; - } - - cap_xcomp->egl.glBindFramebuffer(GL_FRAMEBUFFER, 0); - - - cap_xcomp->shader_y = LoadShadersY(cap_xcomp); - cap_xcomp->shader_uv = LoadShadersUV(cap_xcomp); - - float vVertices[] = { - -1.0f, 1.0f, 0.0f, 1.0f, - -1.0f, -1.0f, 0.0f, 0.0f, - 1.0f, -1.0f, 1.0f, 0.0f, - - -1.0f, 1.0f, 0.0f, 1.0f, - 1.0f, -1.0f, 1.0f, 0.0f, - 1.0f, 1.0f, 1.0f, 1.0f - }; - - unsigned int quadVBO; - cap_xcomp->egl.glGenVertexArrays(1, &cap_xcomp->vao); - cap_xcomp->egl.glGenBuffers(1, &quadVBO); - cap_xcomp->egl.glBindVertexArray(cap_xcomp->vao); - cap_xcomp->egl.glBindBuffer(GL_ARRAY_BUFFER, quadVBO); - cap_xcomp->egl.glBufferData(GL_ARRAY_BUFFER, sizeof(vVertices), &vVertices, GL_STATIC_DRAW); - - cap_xcomp->egl.glEnableVertexAttribArray(0); - cap_xcomp->egl.glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (void*)0); - - cap_xcomp->egl.glEnableVertexAttribArray(1); - cap_xcomp->egl.glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (void*)(2 * sizeof(float))); - - cap_xcomp->egl.glBindVertexArray(0); - } else { - fprintf(stderr, "unexpected fourcc: %u, expected nv12\n", prime.fourcc); + VASurfaceID target_surface_id = (uintptr_t)(*frame)->data[3]; + va_status = vaCreateContext(cap_xcomp->va_dpy, cap_xcomp->config_id, xx, yy, VA_PROGRESSIVE, &target_surface_id, 1, &cap_xcomp->context_id); + if(va_status != VA_STATUS_SUCCESS) { + fprintf(stderr, "vaCreateContext failed: %d\n", va_status); + abort(); return; } @@ -542,34 +324,47 @@ static bool gsr_capture_xcomposite_drm_should_stop(gsr_capture *cap, bool *err) static int gsr_capture_xcomposite_drm_capture(gsr_capture *cap, AVFrame *frame) { gsr_capture_xcomposite_drm *cap_xcomp = cap->priv; - vec2i source_size = cap_xcomp->texture_size; - cap_xcomp->egl.glBindVertexArray(cap_xcomp->vao); - cap_xcomp->egl.glViewport(0, 0, source_size.x, source_size.y); - cap_xcomp->egl.glBindTexture(GL_TEXTURE_2D, window_texture_get_opengl_texture_id(&cap_xcomp->window_texture)); + VASurfaceID target_surface_id = (uintptr_t)frame->data[3]; - { - cap_xcomp->egl.glBindFramebuffer(GL_FRAMEBUFFER, cap_xcomp->framebuffer_y); - //cap_xcomp->egl.glClear(GL_COLOR_BUFFER_BIT); - - cap_xcomp->egl.glUseProgram(cap_xcomp->shader_y); - cap_xcomp->egl.glDrawArrays(GL_TRIANGLES, 0, 6); + VAStatus va_status = vaBeginPicture(cap_xcomp->va_dpy, cap_xcomp->context_id, target_surface_id); + if(va_status != VA_STATUS_SUCCESS) { + fprintf(stderr, "vaBeginPicture failed: %d\n", va_status); + abort(); + return 1; } - { - cap_xcomp->egl.glBindFramebuffer(GL_FRAMEBUFFER, cap_xcomp->framebuffer_uv); - //cap_xcomp->egl.glClear(GL_COLOR_BUFFER_BIT); + VAProcPipelineParameterBuffer params = {0}; + params.surface = cap_xcomp->input_surface; + params.surface_region = NULL; + params.output_background_color = 0xFF000000; + params.filter_flags = VA_FRAME_PICTURE; + // TODO: Colors - cap_xcomp->egl.glUseProgram(cap_xcomp->shader_uv); - cap_xcomp->egl.glDrawArrays(GL_TRIANGLES, 0, 6); + VABufferID buffer_id = 0; + va_status = vaCreateBuffer(cap_xcomp->va_dpy, cap_xcomp->context_id, VAProcPipelineParameterBufferType, sizeof(params), 1, ¶ms, &buffer_id); + if(va_status != VA_STATUS_SUCCESS) { + fprintf(stderr, "vaCreateBuffer failed: %d\n", va_status); + return 1; } - cap_xcomp->egl.glBindVertexArray(0); - cap_xcomp->egl.glUseProgram(0); - cap_xcomp->egl.glBindTexture(GL_TEXTURE_2D, 0); - cap_xcomp->egl.glBindFramebuffer(GL_FRAMEBUFFER, 0); + va_status = vaRenderPicture(cap_xcomp->va_dpy, cap_xcomp->context_id, &buffer_id, 1); + if(va_status != VA_STATUS_SUCCESS) { + fprintf(stderr, "vaRenderPicture failed: %d\n", va_status); + return 1; + } - cap_xcomp->egl.eglSwapBuffers(cap_xcomp->egl.egl_display, cap_xcomp->egl.egl_surface); + va_status = vaEndPicture(cap_xcomp->va_dpy, cap_xcomp->context_id); + if(va_status != VA_STATUS_SUCCESS) { + fprintf(stderr, "vaEndPicture failed: %d\n", va_status); + return 1; + } + + // TODO: Needed? + //vaSyncSurface(cap_xcomp->va_dpy, target_surface_id); + + // TODO: Remove + //cap_xcomp->egl.eglSwapBuffers(cap_xcomp->egl.egl_display, cap_xcomp->egl.egl_surface); return 0; } diff --git a/src/egl.c b/src/egl.c index 14ee8d9..b991ff4 100644 --- a/src/egl.c +++ b/src/egl.c @@ -110,6 +110,8 @@ static bool gsr_egl_load_egl(gsr_egl *self, void *library) { } static bool gsr_egl_proc_load_egl(gsr_egl *self) { + self->eglExportDMABUFImageQueryMESA = self->eglGetProcAddress("eglExportDMABUFImageQueryMESA"); + self->eglExportDMABUFImageMESA = self->eglGetProcAddress("eglExportDMABUFImageMESA"); self->glEGLImageTargetTexture2DOES = self->eglGetProcAddress("glEGLImageTargetTexture2DOES"); if(!self->glEGLImageTargetTexture2DOES) { @@ -134,34 +136,6 @@ static bool gsr_egl_load_gl(gsr_egl *self, void *library) { { (void**)&self->glTexImage2D, "glTexImage2D" }, { (void**)&self->glCopyImageSubData, "glCopyImageSubData" }, { (void**)&self->glClearTexImage, "glClearTexImage" }, - { (void**)&self->glGenFramebuffers, "glGenFramebuffers" }, - { (void**)&self->glBindFramebuffer, "glBindFramebuffer" }, - { (void**)&self->glViewport, "glViewport" }, - { (void**)&self->glFramebufferTexture2D, "glFramebufferTexture2D" }, - { (void**)&self->glDrawBuffers, "glDrawBuffers" }, - { (void**)&self->glCheckFramebufferStatus, "glCheckFramebufferStatus" }, - { (void**)&self->glBindBuffer, "glBindBuffer" }, - { (void**)&self->glGenBuffers, "glGenBuffers" }, - { (void**)&self->glBufferData, "glBufferData" }, - { (void**)&self->glGenVertexArrays, "glGenVertexArrays" }, - { (void**)&self->glBindVertexArray, "glBindVertexArray" }, - { (void**)&self->glCreateProgram, "glCreateProgram" }, - { (void**)&self->glCreateShader, "glCreateShader" }, - { (void**)&self->glAttachShader, "glAttachShader" }, - { (void**)&self->glBindAttribLocation, "glBindAttribLocation" }, - { (void**)&self->glCompileShader, "glCompileShader" }, - { (void**)&self->glLinkProgram, "glLinkProgram" }, - { (void**)&self->glShaderSource, "glShaderSource" }, - { (void**)&self->glUseProgram, "glUseProgram" }, - { (void**)&self->glGetProgramInfoLog, "glGetProgramInfoLog" }, - { (void**)&self->glGetShaderiv, "glGetShaderiv" }, - { (void**)&self->glGetShaderInfoLog, "glGetShaderInfoLog" }, - { (void**)&self->glDeleteProgram, "glDeleteProgram" }, - { (void**)&self->glDeleteShader, "glDeleteShader" }, - { (void**)&self->glGetProgramiv, "glGetProgramiv" }, - { (void**)&self->glVertexAttribPointer, "glVertexAttribPointer" }, - { (void**)&self->glEnableVertexAttribArray, "glEnableVertexAttribArray" }, - { (void**)&self->glDrawArrays, "glDrawArrays" }, { NULL, NULL } }; diff --git a/src/main.cpp b/src/main.cpp index 7a268d6..54e59e9 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -446,20 +446,65 @@ static AVCodecContext *create_video_codec_context(AVPixelFormat pix_fmt, return codec_context; } +static bool vaapi_create_codec_context(AVCodecContext *video_codec_context) { + AVBufferRef *device_ctx; + if(av_hwdevice_ctx_create(&device_ctx, AV_HWDEVICE_TYPE_VAAPI, "/dev/dri/renderD128", NULL, 0) < 0) { + fprintf(stderr, "Error: Failed to create hardware device context\n"); + return false; + } + + AVBufferRef *frame_context = av_hwframe_ctx_alloc(device_ctx); + if(!frame_context) { + fprintf(stderr, "Error: Failed to create hwframe context\n"); + av_buffer_unref(&device_ctx); + return false; + } + + AVHWFramesContext *hw_frame_context = + (AVHWFramesContext *)frame_context->data; + hw_frame_context->width = video_codec_context->width; + hw_frame_context->height = video_codec_context->height; + hw_frame_context->sw_format = AV_PIX_FMT_NV12; + hw_frame_context->format = video_codec_context->pix_fmt; + hw_frame_context->device_ref = device_ctx; + hw_frame_context->device_ctx = (AVHWDeviceContext*)device_ctx->data; + + hw_frame_context->initial_pool_size = 1; + + if (av_hwframe_ctx_init(frame_context) < 0) { + fprintf(stderr, "Error: Failed to initialize hardware frame context " + "(note: ffmpeg version needs to be > 4.0)\n"); + av_buffer_unref(&device_ctx); + //av_buffer_unref(&frame_context); + return false; + } + + video_codec_context->hw_device_ctx = av_buffer_ref(device_ctx); + video_codec_context->hw_frames_ctx = av_buffer_ref(frame_context); + return true; +} + static bool check_if_codec_valid_for_hardware(const AVCodec *codec, gpu_vendor vendor) { - // TODO: For now we assume that amd and intel always support h264 and hevc, but we default to h264 - if(vendor != GPU_VENDOR_NVIDIA) - return true; + // Do not use AV_PIX_FMT_CUDA because we dont want to do full check with hardware context + AVCodecContext *codec_context = create_video_codec_context(vendor == GPU_VENDOR_NVIDIA ? AV_PIX_FMT_YUV420P : AV_PIX_FMT_VAAPI, VideoQuality::VERY_HIGH, 60, codec, false, vendor, FramerateMode::CONSTANT); + if(!codec_context) + return false; + + codec_context->width = 32; + codec_context->height = 32; + + if(!vaapi_create_codec_context(codec_context)) { + avcodec_free_context(&codec_context); + return false; + } bool success = false; - // Do not use AV_PIX_FMT_CUDA because we dont want to do full check with hardware context - AVCodecContext *codec_context = create_video_codec_context(AV_PIX_FMT_YUV420P, VideoQuality::VERY_HIGH, 60, codec, false, vendor, FramerateMode::CONSTANT); - codec_context->width = 1920; - codec_context->height = 1080; - if(codec_context) { - success = avcodec_open2(codec_context, codec_context->codec, NULL) == 0; - avcodec_free_context(&codec_context); - } + success = avcodec_open2(codec_context, codec_context->codec, NULL) == 0; + if(codec_context->hw_device_ctx) + av_buffer_unref(&codec_context->hw_device_ctx); + if(codec_context->hw_frames_ctx) + av_buffer_unref(&codec_context->hw_frames_ctx); + avcodec_free_context(&codec_context); return success; } @@ -468,6 +513,9 @@ static const AVCodec* find_h264_encoder(gpu_vendor vendor) { if(!codec) codec = avcodec_find_encoder_by_name(vendor == GPU_VENDOR_NVIDIA ? "nvenc_h264" : "vaapi_h264"); + if(!codec) + return nullptr; + static bool checked = false; static bool checked_success = true; if(!checked) { @@ -478,7 +526,6 @@ static const AVCodec* find_h264_encoder(gpu_vendor vendor) { return checked_success ? codec : nullptr; } -// TODO: Disable under intel/amd? static const AVCodec* find_h265_encoder(gpu_vendor vendor) { const AVCodec *codec = avcodec_find_encoder_by_name(vendor == GPU_VENDOR_NVIDIA ? "hevc_nvenc" : "hevc_vaapi"); @@ -620,16 +667,16 @@ static void open_video(AVCodecContext *codec_context, VideoQuality video_quality } else { switch(video_quality) { case VideoQuality::MEDIUM: - av_dict_set_int(&options, "qp", 40, 0); + av_dict_set_int(&options, "qp", 37, 0); break; case VideoQuality::HIGH: - av_dict_set_int(&options, "qp", 35, 0); + av_dict_set_int(&options, "qp", 32, 0); break; case VideoQuality::VERY_HIGH: - av_dict_set_int(&options, "qp", 30, 0); + av_dict_set_int(&options, "qp", 27, 0); break; case VideoQuality::ULTRA: - av_dict_set_int(&options, "qp", 24, 0); + av_dict_set_int(&options, "qp", 21, 0); break; } @@ -639,7 +686,7 @@ static void open_video(AVCodecContext *codec_context, VideoQuality video_quality if(codec_context->codec_id == AV_CODEC_ID_H264) { av_dict_set(&options, "profile", "high", 0); - av_dict_set_int(&options, "quality", 32, 0); + av_dict_set_int(&options, "quality", 4, 0); } else { av_dict_set(&options, "profile", "main", 0); } @@ -1228,8 +1275,6 @@ int main(int argc, char **argv) { usage(); } - FramerateMode framerate_mode = FramerateMode::CONSTANT; - const Arg &audio_input_arg = args["-a"]; const std::vector audio_inputs = get_pulseaudio_inputs(); std::vector requested_audio_inputs; @@ -1323,12 +1368,14 @@ int main(int argc, char **argv) { very_old_gpu = true; } - // TODO: Remove once gpu screen recorder supports amd and intel properly - if(gpu_inf.vendor != GPU_VENDOR_NVIDIA) { - fprintf(stderr, "Error: gpu-screen-recorder does currently only support nvidia gpus\n"); - return 2; + if(gpu_inf.vendor != GPU_VENDOR_NVIDIA && overclock) { + fprintf(stderr, "Info: overclock option has no effect on amd/intel, ignoring option...\n"); } + // TODO: Fix constant framerate not working properly on amd/intel because capture framerate gets locked to the same framerate as + // game framerate, which doesn't work well when you need to encode multiple duplicate frames. + const FramerateMode framerate_mode = gpu_inf.vendor == GPU_VENDOR_NVIDIA ? FramerateMode::CONSTANT : FramerateMode::VARIABLE; + const char *screen_region = args["-s"].value(); const char *window_str = args["-w"].value(); @@ -1517,41 +1564,23 @@ int main(int argc, char **argv) { const double target_fps = 1.0 / (double)fps; if(strcmp(video_codec_to_use, "auto") == 0) { - if(gpu_inf.vendor == GPU_VENDOR_NVIDIA) { - const AVCodec *h265_codec = find_h265_encoder(gpu_inf.vendor); + const AVCodec *h265_codec = find_h265_encoder(gpu_inf.vendor); - // h265 generally allows recording at a higher resolution than h264 on nvidia cards. On a gtx 1080 4k is the max resolution for h264 but for h265 it's 8k. - // Another important info is that when recording at a higher fps than.. 60? h265 has very bad performance. For example when recording at 144 fps the fps drops to 1 - // while with h264 the fps doesn't drop. - if(!h265_codec) { - fprintf(stderr, "Info: using h264 encoder because a codec was not specified and your gpu does not support h265\n"); - video_codec_to_use = "h264"; - video_codec = VideoCodec::H264; - } else if(fps > 60) { - fprintf(stderr, "Info: using h264 encoder because a codec was not specified and fps is more than 60\n"); - video_codec_to_use = "h264"; - video_codec = VideoCodec::H264; - } else { - fprintf(stderr, "Info: using h265 encoder because a codec was not specified\n"); - video_codec_to_use = "h265"; - video_codec = VideoCodec::H265; - } + // h265 generally allows recording at a higher resolution than h264 on nvidia cards. On a gtx 1080 4k is the max resolution for h264 but for h265 it's 8k. + // Another important info is that when recording at a higher fps than.. 60? h265 has very bad performance. For example when recording at 144 fps the fps drops to 1 + // while with h264 the fps doesn't drop. + if(!h265_codec) { + fprintf(stderr, "Info: using h264 encoder because a codec was not specified and your gpu does not support h265\n"); + video_codec_to_use = "h264"; + video_codec = VideoCodec::H264; + } else if(fps > 60) { + fprintf(stderr, "Info: using h264 encoder because a codec was not specified and fps is more than 60\n"); + video_codec_to_use = "h264"; + video_codec = VideoCodec::H264; } else { - const AVCodec *h264_codec = find_h264_encoder(gpu_inf.vendor); - - if(!h264_codec) { - fprintf(stderr, "Info: using h265 encoder because a codec was not specified and your gpu does not support h264\n"); - video_codec_to_use = "h265"; - video_codec = VideoCodec::H265; - //} else if(fps > 60) { - // fprintf(stderr, "Info: using h264 encoder because a codec was not specified and fps is more than 60\n"); - // video_codec_to_use = "h264"; - // video_codec = VideoCodec::H264; - } else { - fprintf(stderr, "Info: using h264 encoder because a codec was not specified\n"); - video_codec_to_use = "h264"; - video_codec = VideoCodec::H264; - } + fprintf(stderr, "Info: using h265 encoder because a codec was not specified\n"); + video_codec_to_use = "h265"; + video_codec = VideoCodec::H265; } } diff --git a/src/vaapi.c b/src/vaapi.c deleted file mode 100644 index 93ef797..0000000 --- a/src/vaapi.c +++ /dev/null @@ -1,41 +0,0 @@ -#include "../include/vaapi.h" -#include "../include/library_loader.h" -#include - -bool gsr_vaapi_load(gsr_vaapi *self) { - memset(self, 0, sizeof(gsr_vaapi)); - - dlerror(); /* clear */ - void *lib = dlopen("libva.so.2", RTLD_LAZY); - if(!lib) { - fprintf(stderr, "gsr error: gsr_vaapi_load failed: failed to load libva.so, error: %s\n", dlerror()); - return false; - } - - dlsym_assign required_dlsym[] = { - { (void**)&self->vaExportSurfaceHandle, "vaExportSurfaceHandle" }, - { (void**)&self->vaSyncSurface, "vaSyncSurface" }, - - { NULL, NULL } - }; - - if(!dlsym_load_list(lib, required_dlsym)) { - fprintf(stderr, "gsr error: gsr_vaapi_load failed: missing required symbols in libva.so\n"); - goto fail; - } - - self->library = lib; - return true; - - fail: - dlclose(lib); - memset(self, 0, sizeof(gsr_vaapi)); - return false; -} - -void gsr_vaapi_unload(gsr_vaapi *self) { - if(self->library) { - dlclose(self->library); - memset(self, 0, sizeof(gsr_vaapi)); - } -}