Use vaapi to do rgb->yuv420p color conversion

Enable window capture for amd/intel.
Properly check if h264/hevc is supported on amd/intel before using
This commit is contained in:
Steam Deck User 2023-04-01 15:14:36 +02:00
parent 162eac8c2f
commit 2f67083915
12 changed files with 290 additions and 720 deletions

View File

@ -38,7 +38,7 @@ You can also install gpu screen recorder ([the gtk gui version](https://git.dec0
## Intel
`libglvnd (which provides libgl and libegl), mesa, ffmpeg (libavcodec, libavformat, libavutil, libswresample, libavfilter), libx11, libxcomposite, libxrandr, libpulse, libva, libva-intel-driver`.
`libglvnd (which provides libgl and libegl), ffmpeg (libavcodec, libavformat, libavutil, libswresample, libavfilter), libx11, libxcomposite, libxrandr, libpulse, cuda (libnvidia-compute), nvenc (libnvidia-encode)`. Additionally, you need to have `nvfbc (libnvidia-fbc1)` installed when using nvfbc and `xnvctrl (libxnvctrl0)` when using the `-oc` option.
`libglvnd (which provides libgl and libegl), ffmpeg (libavcodec, libavformat, libavutil, libswresample, libavfilter), libx11, libxcomposite, libxrandr, libpulse, cuda (libnvidia-compute), nvenc (libnvidia-encode), libva`. Additionally, you need to have `nvfbc (libnvidia-fbc1)` installed when using nvfbc and `xnvctrl (libxnvctrl0)` when using the `-oc` option.
# How to use
Run `scripts/` or run gpu-screen-recorder directly, for example: `gpu-screen-recorder -w $(xdotool selectwindow) -c mp4 -f 60 -a "$(pactl get-default-sink).monitor" -o test_video.mp4` then stop the screen recorder with Ctrl+C, which will also save the recording. You can change -w to -w screen if you want to record all monitors or if you want to record a specific monitor then you can use -w monitor-name, for example -w HDMI-0 (use xrandr command to find the name of your monitor. The name can also be found in your desktop environments display settings).\

View File

@ -33,4 +33,10 @@ The video output will be black if if the system is suspended on nvidia and NVreg
Restore nvfbc screen recording on monitor reconfiguration.
Window capture doesn't work properly in _control_ game after going from pause menu to in-game (and back to pause menu). There might be some x11 event we need to catch. Same for vr-video-player.
Window capture doesn't work properly in _control_ game after going from pause menu to in-game (and back to pause menu). There might be some x11 event we need to catch. Same for vr-video-player.
Fix constant framerate not working properly on amd/intel because capture framerate gets locked to the same framerate as game framerate, which doesn't work well when you need to encode multiple duplicate frames. We can skip multiple encode if we duplicate frame once and then use that same frame data as the difference between frames will be exactly the same, but hevc complains about that. Is there a way to make hevc shut up?
JPEG color range on amd seems to produce too bright video with h264 but not hevc, why?
Support recording screen/monitor on amd/intel.

View File

@ -1,7 +1,7 @@
#!/bin/sh -e
dependencies="libavcodec libavformat libavutil x11 xcomposite xrandr libpulse libswresample libavfilter"
dependencies="libavcodec libavformat libavutil x11 xcomposite xrandr libpulse libswresample libavfilter libva"
includes="$(pkg-config --cflags $dependencies)"
libs="$(pkg-config --libs $dependencies) -ldl -pthread -lm"
opts="-O2 -g0 -DNDEBUG"
@ -13,10 +13,9 @@ gcc -c src/egl.c $opts $includes
gcc -c src/cuda.c $opts $includes
gcc -c src/xnvctrl.c $opts $includes
gcc -c src/overclock.c $opts $includes
gcc -c src/vaapi.c $opts $includes
gcc -c src/window_texture.c $opts $includes
gcc -c src/time.c $opts $includes
g++ -c src/sound.cpp $opts $includes
g++ -c src/main.cpp $opts $includes
g++ -o gpu-screen-recorder -O2 capture.o nvfbc.o egl.o cuda.o xnvctrl.o overclock.o vaapi.o window_texture.o time.o xcomposite_cuda.o xcomposite_drm.o sound.o main.o -s $libs
g++ -o gpu-screen-recorder -O2 capture.o nvfbc.o egl.o cuda.o xnvctrl.o overclock.o window_texture.o time.o xcomposite_cuda.o xcomposite_drm.o sound.o main.o -s $libs
echo "Successfully built gpu-screen-recorder"

View File

@ -39,6 +39,10 @@ typedef void (*__eglMustCastToProperFunctionPointerType)(void);
#define EGL_OPENGL_ES2_BIT 0x0004
#define EGL_NONE 0x3038
#define EGL_BACK_BUFFER 0x3084
#define EGL_GL_TEXTURE_2D 0x30B1
#define EGL_TRUE 1
#define GL_TEXTURE_2D 0x0DE1
#define GL_RGB 0x1907
@ -87,6 +91,8 @@ typedef struct {
unsigned int (*eglSwapBuffers)(EGLDisplay dpy, EGLSurface surface);
__eglMustCastToProperFunctionPointerType (*eglGetProcAddress)(const char *procname);
unsigned int (*eglExportDMABUFImageQueryMESA)(EGLDisplay dpy, EGLImageKHR image, int *fourcc, int *num_planes, uint64_t *modifiers);
unsigned int (*eglExportDMABUFImageMESA)(EGLDisplay dpy, EGLImageKHR image, int *fds, int32_t *strides, int32_t *offsets);
void (*glEGLImageTargetTexture2DOES)(unsigned int target, GLeglImageOES image);
unsigned int (*glGetError)(void);
@ -101,35 +107,6 @@ typedef struct {
void (*glTexImage2D)(unsigned int target, int level, int internalFormat, int width, int height, int border, unsigned int format, unsigned int type, const void *pixels);
void (*glCopyImageSubData)(unsigned int srcName, unsigned int srcTarget, int srcLevel, int srcX, int srcY, int srcZ, unsigned int dstName, unsigned int dstTarget, int dstLevel, int dstX, int dstY, int dstZ, int srcWidth, int srcHeight, int srcDepth);
void (*glClearTexImage)(unsigned int texture, unsigned int level, unsigned int format, unsigned int type, const void *data);
void (*glGenFramebuffers)(int n, unsigned int *framebuffers);
void (*glBindFramebuffer)(unsigned int target, unsigned int framebuffer);
void (*glViewport)(int x, int y, int width, int height);
void (*glFramebufferTexture2D)(unsigned int target, unsigned int attachment, unsigned int textarget, unsigned int texture, int level);
void (*glDrawBuffers)(int n, const unsigned int *bufs);
unsigned int (*glCheckFramebufferStatus)(unsigned int target);
void (*glBindBuffer)(unsigned int target, unsigned int buffer);
void (*glGenBuffers)(int n, unsigned int *buffers);
void (*glBufferData)(unsigned int target, khronos_ssize_t size, const void *data, unsigned int usage);
void (*glGenVertexArrays)(int n, unsigned int *arrays);
void (*glBindVertexArray)(unsigned int array);
unsigned int (*glCreateProgram)(void);
unsigned int (*glCreateShader)(unsigned int type);
void (*glAttachShader)(unsigned int program, unsigned int shader);
void (*glBindAttribLocation)(unsigned int program, unsigned int index, const char *name);
void (*glCompileShader)(unsigned int shader);
void (*glLinkProgram)(unsigned int program);
void (*glShaderSource)(unsigned int shader, int count, const char *const*string, const int *length);
void (*glUseProgram)(unsigned int program);
void (*glGetProgramInfoLog)(unsigned int program, int bufSize, int *length, char *infoLog);
void (*glGetShaderiv)(unsigned int shader, unsigned int pname, int *params);
void (*glGetShaderInfoLog)(unsigned int shader, int bufSize, int *length, char *infoLog);
void (*glDeleteProgram)(unsigned int program);
void (*glDeleteShader)(unsigned int shader);
void (*glGetProgramiv)(unsigned int program, unsigned int pname, int *params);
void (*glVertexAttribPointer)(unsigned int index, int size, unsigned int type, unsigned char normalized, int stride, const void *pointer);
void (*glEnableVertexAttribArray)(unsigned int index);
void (*glDrawArrays)(unsigned int mode, int first, int count );
} gsr_egl;
bool gsr_egl_load(gsr_egl *self, Display *dpy);

View File

@ -1,168 +0,0 @@
#ifndef GSR_VAAPI_H
#define GSR_VAAPI_H
#include <stdint.h>
#include <stdbool.h>
// To prevent hwcontext_vaapi.h from including va.h.. An ugly hack
#define _VA_H_
// These definitions are copied from va.h, which is licensed under MIT
typedef void* VADisplay;
typedef int VAStatus;
typedef unsigned int VAGenericID;
typedef VAGenericID VAConfigID;
typedef VAGenericID VAContextID;
typedef VAGenericID VASurfaceID;
#define VA_STATUS_SUCCESS 0x00000000
typedef struct {
/** Pixel format fourcc of the whole surface (VA_FOURCC_*). */
uint32_t fourcc;
/** Width of the surface in pixels. */
uint32_t width;
/** Height of the surface in pixels. */
uint32_t height;
/** Number of distinct DRM objects making up the surface. */
uint32_t num_objects;
/** Description of each object. */
struct {
/** DRM PRIME file descriptor for this object. */
int fd;
/** Total size of this object (may include regions which are
* not part of the surface). */
uint32_t size;
/** Format modifier applied to this object. */
uint64_t drm_format_modifier;
} objects[4];
/** Number of layers making up the surface. */
uint32_t num_layers;
/** Description of each layer in the surface. */
struct {
/** DRM format fourcc of this layer (DRM_FOURCC_*). */
uint32_t drm_format;
/** Number of planes in this layer. */
uint32_t num_planes;
/** Index in the objects array of the object containing each
* plane. */
uint32_t object_index[4];
/** Offset within the object of each plane. */
uint32_t offset[4];
/** Pitch of each plane. */
uint32_t pitch[4];
} layers[4];
} VADRMPRIMESurfaceDescriptor;
#define VA_INVALID_ID 0xffffffff
/** \brief Generic value types. */
typedef enum {
VAGenericValueTypeInteger = 1, /**< 32-bit signed integer. */
VAGenericValueTypeFloat, /**< 32-bit floating-point value. */
VAGenericValueTypePointer, /**< Generic pointer type */
VAGenericValueTypeFunc /**< Pointer to function */
} VAGenericValueType;
/** \brief Generic function type. */
typedef void (*VAGenericFunc)(void);
/** \brief Generic value. */
typedef struct _VAGenericValue {
/** \brief Value type. See #VAGenericValueType. */
VAGenericValueType type;
/** \brief Value holder. */
union {
/** \brief 32-bit signed integer. */
int32_t i;
/** \brief 32-bit float. */
float f;
/** \brief Generic pointer. */
void *p;
/** \brief Pointer to function. */
VAGenericFunc fn;
} value;
} VAGenericValue;
/** @name Surface attribute flags */
/** \brief Surface attribute is not supported. */
/** \brief Surface attribute can be got through vaQuerySurfaceAttributes(). */
/** \brief Surface attribute can be set through vaCreateSurfaces(). */
/** \brief Surface attribute types. */
typedef enum {
VASurfaceAttribNone = 0,
* \brief Pixel format as a FOURCC (int, read/write).
* When vaQuerySurfaceAttributes() is called, the driver will return one
* PixelFormat attribute per supported pixel format.
* When provided as an input to vaCreateSurfaces(), the driver will
* allocate a surface with the provided pixel format.
/** \brief Minimal width in pixels (int, read-only). */
/** \brief Maximal width in pixels (int, read-only). */
/** \brief Minimal height in pixels (int, read-only). */
/** \brief Maximal height in pixels (int, read-only). */
/** \brief Surface memory type expressed in bit fields (int, read/write). */
/** \brief External buffer descriptor (pointer, write).
* Refer to the documentation for the memory type being created to
* determine what descriptor structure to pass here. If not otherwise
* stated, the common VASurfaceAttribExternalBuffers should be used.
/** \brief Surface usage hint, gives the driver a hint of intended usage
* to optimize allocation (e.g. tiling) (int, read/write). */
/** \brief List of possible DRM format modifiers (pointer, write).
* The value must be a pointer to a VADRMFormatModifierList. This can only
* be used when allocating a new buffer, it's invalid to use this attribute
* when importing an existing buffer.
/** \brief Number of surface attributes. */
} VASurfaceAttribType;
/** \brief Surface attribute. */
typedef struct _VASurfaceAttrib {
/** \brief Type. */
VASurfaceAttribType type;
/** \brief Flags. See "Surface attribute flags". */
uint32_t flags;
/** \brief Value. See "Surface attribute types" for the expected types. */
VAGenericValue value;
} VASurfaceAttrib;
typedef struct {
void *library;
VAStatus (*vaExportSurfaceHandle)(VADisplay dpy, VASurfaceID surface_id, uint32_t mem_type, uint32_t flags, void *descriptor);
VAStatus (*vaSyncSurface)(VADisplay dpy, VASurfaceID render_target);
} gsr_vaapi;
bool gsr_vaapi_load(gsr_vaapi *self);
void gsr_vaapi_unload(gsr_vaapi *self);
#endif /* GSR_VAAPI_H */

View File

@ -13,4 +13,5 @@ xcomposite = ">=0.2"
xrandr = ">=1"
libpulse = ">=13"
libswresample = ">=3"
libavfilter = ">=5"
libavfilter = ">=5"
libva = ">=1"

View File

@ -177,8 +177,8 @@ static bool ffmpeg_create_cuda_contexts(gsr_capture_nvfbc *cap_nvfbc, AVCodecCon
return false;
video_codec_context->hw_device_ctx = device_ctx;
video_codec_context->hw_frames_ctx = frame_context;
video_codec_context->hw_device_ctx = av_buffer_ref(device_ctx);
video_codec_context->hw_frames_ctx = av_buffer_ref(frame_context);
return true;
@ -350,9 +350,9 @@ static int gsr_capture_nvfbc_start(gsr_capture *cap, AVCodecContext *video_codec
// Not needed because the above call to unref device ctx also frees this?
// av_buffer_unref(&video_codec_context->hw_frames_ctx);
return -1;
@ -438,9 +438,8 @@ static void gsr_capture_nvfbc_destroy(gsr_capture *cap, AVCodecContext *video_co
// Not needed because the above call to unref device ctx also frees this?
// av_buffer_unref(&video_codec_context->hw_frames_ctx);
if(cap_nvfbc) {

View File

@ -131,8 +131,8 @@ static bool cuda_create_codec_context(gsr_capture_xcomposite_cuda *cap_xcomp, AV
return false;
video_codec_context->hw_device_ctx = device_ctx;
video_codec_context->hw_frames_ctx = frame_context;
video_codec_context->hw_device_ctx = av_buffer_ref(device_ctx);
video_codec_context->hw_frames_ctx = av_buffer_ref(frame_context);
return true;
@ -252,9 +252,8 @@ static void gsr_capture_xcomposite_cuda_stop(gsr_capture *cap, AVCodecContext *v
// Not needed because the above call to unref device ctx also frees this?
// av_buffer_unref(&video_codec_context->hw_frames_ctx);
if(cap_xcomp->cuda.cu_ctx) {
CUcontext old_ctx;

View File

@ -1,15 +1,16 @@
#include "../../include/capture/xcomposite_drm.h"
#include "../../include/egl.h"
#include "../../include/vaapi.h"
#include "../../include/window_texture.h"
#include "../../include/time.h"
#include <stdlib.h>
#include <stdio.h>
#include <X11/Xlib.h>
#include <X11/extensions/Xcomposite.h>
#include <libavutil/hwcontext.h>
#include <libavutil/hwcontext_vaapi.h>
#include <libavutil/frame.h>
#include <libavcodec/avcodec.h>
//#include <drm_fourcc.h>
#include <assert.h>
/* TODO: Proper error checks and cleanups */
@ -27,18 +28,20 @@ typedef struct {
WindowTexture window_texture;
gsr_egl egl;
gsr_vaapi vaapi;
int fourcc;
int num_planes;
uint64_t modifiers;
int dmabuf_fd;
int32_t pitch;
int32_t offset;
unsigned int target_textures[2];
unsigned int framebuffer_y;
unsigned int framebuffer_uv;
unsigned int vao;
unsigned int shader_y;
unsigned int shader_uv;
VADisplay va_dpy;
VAConfigID config_id;
VAContextID context_id;
VASurfaceID input_surface;
} gsr_capture_xcomposite_drm;
static int max_int(int a, int b) {
@ -47,7 +50,7 @@ static int max_int(int a, int b) {
static bool drm_create_codec_context(gsr_capture_xcomposite_drm *cap_xcomp, AVCodecContext *video_codec_context) {
AVBufferRef *device_ctx;
if(av_hwdevice_ctx_create(&device_ctx, AV_HWDEVICE_TYPE_VAAPI, "/dev/dri/card0", NULL, 0) < 0) {
if(av_hwdevice_ctx_create(&device_ctx, AV_HWDEVICE_TYPE_VAAPI, "/dev/dri/renderD128", NULL, 0) < 0) {
fprintf(stderr, "Error: Failed to create hardware device context\n");
return false;
@ -63,7 +66,7 @@ static bool drm_create_codec_context(gsr_capture_xcomposite_drm *cap_xcomp, AVCo
(AVHWFramesContext *)frame_context->data;
hw_frame_context->width = video_codec_context->width;
hw_frame_context->height = video_codec_context->height;
hw_frame_context->sw_format = AV_PIX_FMT_NV12;
hw_frame_context->sw_format = AV_PIX_FMT_NV12;//AV_PIX_FMT_0RGB32;//AV_PIX_FMT_YUV420P;//AV_PIX_FMT_0RGB32;//AV_PIX_FMT_NV12;
hw_frame_context->format = video_codec_context->pix_fmt;
hw_frame_context->device_ref = device_ctx;
hw_frame_context->device_ctx = (AVHWDeviceContext*)device_ctx->data;
@ -74,227 +77,20 @@ static bool drm_create_codec_context(gsr_capture_xcomposite_drm *cap_xcomp, AVCo
cap_xcomp->va_dpy = vactx->display;
if (av_hwframe_ctx_init(frame_context) < 0) {
fprintf(stderr, "Error: Failed to initialize hardware frame context (note: ffmpeg version needs to be > 4.0)\n");
fprintf(stderr, "Error: Failed to initialize hardware frame context "
"(note: ffmpeg version needs to be > 4.0)\n");
return false;
video_codec_context->hw_device_ctx = device_ctx; // TODO: av_buffer_ref? and in more places
video_codec_context->hw_frames_ctx = frame_context;
video_codec_context->hw_device_ctx = av_buffer_ref(device_ctx);
video_codec_context->hw_frames_ctx = av_buffer_ref(frame_context);
return true;
#define GL_COMPILE_STATUS 0x8B81
#define GL_INFO_LOG_LENGTH 0x8B84
unsigned int esLoadShader ( gsr_capture_xcomposite_drm *cap_xcomp, unsigned int type, const char *shaderSrc ) {
unsigned int shader;
int compiled;
// Create the shader object
shader = cap_xcomp->egl.glCreateShader ( type );
if ( shader == 0 )
return 0;
// Load the shader source
cap_xcomp->egl.glShaderSource ( shader, 1, &shaderSrc, NULL );
// Compile the shader
cap_xcomp->egl.glCompileShader ( shader );
// Check the compile status
cap_xcomp->egl.glGetShaderiv ( shader, GL_COMPILE_STATUS, &compiled );
if ( !compiled )
int infoLen = 0;
cap_xcomp->egl.glGetShaderiv ( shader, GL_INFO_LOG_LENGTH, &infoLen );
if ( infoLen > 1 )
char* infoLog = malloc (sizeof(char) * infoLen );
cap_xcomp->egl.glGetShaderInfoLog ( shader, infoLen, NULL, infoLog );
fprintf (stderr, "Error compiling shader:\n%s\n", infoLog );
free ( infoLog );
cap_xcomp->egl.glDeleteShader ( shader );
return 0;
return shader;
#define GL_VERTEX_SHADER 0x8B31
#define GL_COMPILE_STATUS 0x8B81
#define GL_LINK_STATUS 0x8B82
/// \brief Load a vertex and fragment shader, create a program object, link program.
// Errors output to log.
/// \param vertShaderSrc Vertex shader source code
/// \param fragShaderSrc Fragment shader source code
/// \return A new program object linked with the vertex/fragment shader pair, 0 on failure
unsigned int esLoadProgram ( gsr_capture_xcomposite_drm *cap_xcomp, const char *vertShaderSrc, const char *fragShaderSrc )
unsigned int vertexShader;
unsigned int fragmentShader;
unsigned int programObject;
int linked;
// Load the vertex/fragment shaders
vertexShader = esLoadShader ( cap_xcomp, GL_VERTEX_SHADER, vertShaderSrc );
if ( vertexShader == 0 )
return 0;
fragmentShader = esLoadShader ( cap_xcomp, GL_FRAGMENT_SHADER, fragShaderSrc );
if ( fragmentShader == 0 )
cap_xcomp->egl.glDeleteShader( vertexShader );
return 0;
// Create the program object
programObject = cap_xcomp->egl.glCreateProgram ( );
if ( programObject == 0 )
return 0;
cap_xcomp->egl.glAttachShader ( programObject, vertexShader );
cap_xcomp->egl.glAttachShader ( programObject, fragmentShader );
// Link the program
cap_xcomp->egl.glLinkProgram ( programObject );
// Check the link status
cap_xcomp->egl.glGetProgramiv ( programObject, GL_LINK_STATUS, &linked );
if ( !linked )
int infoLen = 0;
cap_xcomp->egl.glGetProgramiv ( programObject, GL_INFO_LOG_LENGTH, &infoLen );
if ( infoLen > 1 )
char* infoLog = malloc (sizeof(char) * infoLen );
cap_xcomp->egl.glGetProgramInfoLog ( programObject, infoLen, NULL, infoLog );
fprintf (stderr, "Error linking program:\n%s\n", infoLog );
free ( infoLog );
cap_xcomp->egl.glDeleteProgram ( programObject );
return 0;
// Free up no longer needed shader resources
cap_xcomp->egl.glDeleteShader ( vertexShader );
cap_xcomp->egl.glDeleteShader ( fragmentShader );
return programObject;
#define RGB_TO_YUV "const mat4 RGBtoYUV = mat4(0.257, 0.439, -0.148, 0.0,\n" \
" 0.504, -0.368, -0.291, 0.0,\n" \
" 0.098, -0.071, 0.439, 0.0,\n" \
" 0.0625, 0.500, 0.500, 1.0);"
static unsigned int LoadShadersY(gsr_capture_xcomposite_drm *cap_xcomp) {
char vShaderStr[] =
"#version 300 es \n"
"in vec2 pos; \n"
"in vec2 texcoords; \n"
"out vec2 texcoords_out; \n"
"void main() \n"
"{ \n"
" texcoords_out = texcoords; \n"
" gl_Position = vec4(pos.x, pos.y, 0.0, 1.0); \n"
"} \n";
char fShaderStr[] =
"#version 300 es \n"
"precision mediump float; \n"
"in vec2 texcoords_out; \n"
"uniform sampler2D tex1; \n"
"out vec4 FragColor; \n"
"void main() \n"
"{ \n"
" FragColor.x = (RGBtoYUV * vec4(texture(tex1, texcoords_out).rgb, 1.0)).x; \n"
"} \n";
unsigned int shader_program = esLoadProgram(cap_xcomp, vShaderStr, fShaderStr);
if (shader_program == 0) {
fprintf(stderr, "failed to create shader!\n");
return 0;
cap_xcomp->egl.glBindAttribLocation(shader_program, 0, "pos");
cap_xcomp->egl.glBindAttribLocation(shader_program, 1, "texcoords");
return shader_program;
static unsigned int LoadShadersUV(gsr_capture_xcomposite_drm *cap_xcomp) {
char vShaderStr[] =
"#version 300 es \n"
"in vec2 pos; \n"
"in vec2 texcoords; \n"
"out vec2 texcoords_out; \n"
"void main() \n"
"{ \n"
" texcoords_out = texcoords; \n"
" gl_Position = vec4(pos.x, pos.y, 0.0, 1.0); \n"
"} \n";
char fShaderStr[] =
"#version 300 es \n"
"precision mediump float; \n"
"in vec2 texcoords_out; \n"
"uniform sampler2D tex1; \n"
"out vec4 FragColor; \n"
"void main() \n"
"{ \n"
" FragColor.xy = (RGBtoYUV * vec4(texture(tex1, texcoords_out*2.0).rgb, 1.0)).zy; \n"
"} \n";
unsigned int shader_program = esLoadProgram(cap_xcomp, vShaderStr, fShaderStr);
if (shader_program == 0) {
fprintf(stderr, "failed to create shader!\n");
return 0;
cap_xcomp->egl.glBindAttribLocation(shader_program, 0, "pos");
cap_xcomp->egl.glBindAttribLocation(shader_program, 1, "texcoords");
return shader_program;
#define GL_FLOAT 0x1406
#define GL_FALSE 0
#define GL_TRUE 1
#define GL_TRIANGLES 0x0004
#define DRM_FORMAT_MOD_INVALID 72057594037927935
#define EGL_TRUE 1
static uint32_t fourcc(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
return (d << 24) | (c << 16) | (b << 8) | a;
static int gsr_capture_xcomposite_drm_start(gsr_capture *cap, AVCodecContext *video_codec_context) {
gsr_capture_xcomposite_drm *cap_xcomp = cap->priv;
@ -317,15 +113,51 @@ static int gsr_capture_xcomposite_drm_start(gsr_capture *cap, AVCodecContext *vi
return -1;
if(!gsr_vaapi_load(&cap_xcomp->vaapi)) {
fprintf(stderr, "gsr error: gsr_capture_xcomposite_drm_start: failed to load vaapi\n");
if(!cap_xcomp->egl.eglExportDMABUFImageQueryMESA) {
fprintf(stderr, "gsr error: gsr_capture_xcomposite_drm_start: could not find eglExportDMABUFImageQueryMESA\n");
return -1;
if(!cap_xcomp->egl.eglExportDMABUFImageMESA) {
fprintf(stderr, "gsr error: gsr_capture_xcomposite_drm_start: could not find eglExportDMABUFImageMESA\n");
return -1;
/* Disable vsync */
cap_xcomp->egl.eglSwapInterval(cap_xcomp->egl.egl_display, 0);
#if 0
// TODO: Fallback to composite window
if(window_texture_init(&cap_xcomp->window_texture, cap_xcomp->dpy, cap_xcomp->params.window, &cap_xcomp->gl) != 0) {
fprintf(stderr, "gsr error: gsr_capture_xcomposite_start: failed get window texture for window %ld\n", cap_xcomp->params.window);
return -1;
cap_xcomp->egl.glBindTexture(GL_TEXTURE_2D, window_texture_get_opengl_texture_id(&cap_xcomp->window_texture));
cap_xcomp->texture_size.x = 0;
cap_xcomp->texture_size.y = 0;
cap_xcomp->egl.glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_WIDTH, &cap_xcomp->texture_size.x);
cap_xcomp->egl.glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_HEIGHT, &cap_xcomp->texture_size.y);
cap_xcomp->egl.glBindTexture(GL_TEXTURE_2D, 0);
cap_xcomp->texture_size.x = max_int(2, cap_xcomp->texture_size.x & ~1);
cap_xcomp->texture_size.y = max_int(2, cap_xcomp->texture_size.y & ~1);
cap_xcomp->target_texture_id = gl_create_texture(cap_xcomp, cap_xcomp->texture_size.x, cap_xcomp->texture_size.y);
if(cap_xcomp->target_texture_id == 0) {
fprintf(stderr, "gsr error: gsr_capture_xcomposite_start: failed to create opengl texture\n");
gsr_capture_xcomposite_stop(cap, video_codec_context);
return -1;
video_codec_context->width = cap_xcomp->texture_size.x;
video_codec_context->height = cap_xcomp->texture_size.y;
cap_xcomp->window_resize_timer = clock_get_monotonic_seconds();
return 0;
// TODO: Fallback to composite window
if(window_texture_init(&cap_xcomp->window_texture, cap_xcomp->dpy, cap_xcomp->params.window, &cap_xcomp->egl) != 0) {
fprintf(stderr, "gsr error: gsr_capture_xcomposite_drm_start: failed get window texture for window %ld\n", cap_xcomp->params.window);
@ -346,13 +178,47 @@ static int gsr_capture_xcomposite_drm_start(gsr_capture *cap, AVCodecContext *vi
video_codec_context->width = cap_xcomp->texture_size.x;
video_codec_context->height = cap_xcomp->texture_size.y;
const intptr_t pixmap_attrs[] = {
EGLImage img = cap_xcomp->egl.eglCreateImage(cap_xcomp->egl.egl_display, cap_xcomp->egl.egl_context, EGL_GL_TEXTURE_2D, (EGLClientBuffer)(uint64_t)window_texture_get_opengl_texture_id(&cap_xcomp->window_texture), pixmap_attrs);
if(!img) {
fprintf(stderr, "eglCreateImage failed\n");
return -1;
if(!cap_xcomp->egl.eglExportDMABUFImageQueryMESA(cap_xcomp->egl.egl_display, img, &cap_xcomp->fourcc, &cap_xcomp->num_planes, &cap_xcomp->modifiers)) {
fprintf(stderr, "eglExportDMABUFImageQueryMESA failed\n");
return -1;
if(cap_xcomp->num_planes != 1) {
fprintf(stderr, "Blablalba\n");
return -1;
if(!cap_xcomp->egl.eglExportDMABUFImageMESA(cap_xcomp->egl.egl_display, img, &cap_xcomp->dmabuf_fd, &cap_xcomp->pitch, &cap_xcomp->offset)) {
fprintf(stderr, "eglExportDMABUFImageMESA failed\n");
return -1;
fprintf(stderr, "texture: %u, dmabuf: %d, pitch: %d, offset: %d\n", window_texture_get_opengl_texture_id(&cap_xcomp->window_texture), cap_xcomp->dmabuf_fd, cap_xcomp->pitch, cap_xcomp->offset);
fprintf(stderr, "fourcc: %d, num planes: %d, modifiers: %zu\n", cap_xcomp->fourcc, cap_xcomp->num_planes, cap_xcomp->modifiers);
if(!drm_create_codec_context(cap_xcomp, video_codec_context)) {
fprintf(stderr, "failed to create hw codec context\n");
return -1;
//fprintf(stderr, "sneed: %u\n", cap_xcomp->FramebufferName);
return 0;
static void gsr_capture_xcomposite_drm_tick(gsr_capture *cap, AVCodecContext *video_codec_context, AVFrame **frame) {
@ -372,11 +238,7 @@ static void gsr_capture_xcomposite_drm_tick(gsr_capture *cap, AVCodecContext *vi
(*frame)->format = video_codec_context->pix_fmt;
(*frame)->width = video_codec_context->width;
(*frame)->height = video_codec_context->height;
(*frame)->color_range = video_codec_context->color_range;
(*frame)->color_primaries = video_codec_context->color_primaries;
(*frame)->color_trc = video_codec_context->color_trc;
(*frame)->colorspace = video_codec_context->colorspace;
(*frame)->chroma_location = video_codec_context->chroma_sample_location;
(*frame)->color_range = AVCOL_RANGE_JPEG;
int res = av_hwframe_get_buffer(video_codec_context->hw_frames_ctx, *frame, 0);
if(res < 0) {
@ -384,143 +246,63 @@ static void gsr_capture_xcomposite_drm_tick(gsr_capture *cap, AVCodecContext *vi
VADRMPRIMESurfaceDescriptor prime;
fprintf(stderr, "fourcc: %u\n", cap_xcomp->fourcc);
fprintf(stderr, "va surface id: %u\n", (VASurfaceID)(uintptr_t)(*frame)->data[3]);
VASurfaceID surface_id = (uintptr_t)(*frame)->data[3];
VAStatus va_status = cap_xcomp->vaapi.vaExportSurfaceHandle(cap_xcomp->va_dpy, surface_id, VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2, VA_EXPORT_SURFACE_READ_WRITE | VA_EXPORT_SURFACE_SEPARATE_LAYERS, &prime); // TODO: Composed layers
int xx = 0, yy = 0;
cap_xcomp->egl.glBindTexture(GL_TEXTURE_2D, window_texture_get_opengl_texture_id(&cap_xcomp->window_texture));
cap_xcomp->egl.glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_WIDTH, &xx);
cap_xcomp->egl.glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_HEIGHT, &yy);
cap_xcomp->egl.glBindTexture(GL_TEXTURE_2D, 0);
uintptr_t dmabuf = cap_xcomp->dmabuf_fd;
VASurfaceAttribExternalBuffers buf = {0};
buf.pixel_format = VA_FOURCC_BGRX; // TODO: VA_FOURCC_XRGB?
buf.width = xx;
buf.height = yy;
buf.data_size = yy * cap_xcomp->pitch;
buf.num_planes = 1;
buf.pitches[0] = cap_xcomp->pitch;
buf.offsets[0] = cap_xcomp->offset;
buf.buffers = &dmabuf;
buf.num_buffers = 1;
buf.flags = 0;
buf.private_data = 0;
VASurfaceAttrib attribs[2] = {0};
attribs[0].type = VASurfaceAttribMemoryType;
attribs[0].flags = VA_SURFACE_ATTRIB_SETTABLE;
attribs[0].value.type = VAGenericValueTypeInteger;
attribs[0].value.value.i = VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME; // TODO: prime1 instead?
attribs[1].type = VASurfaceAttribExternalBufferDescriptor;
attribs[1].flags = VA_SURFACE_ATTRIB_SETTABLE;
attribs[1].value.type = VAGenericValueTypePointer;
attribs[1].value.value.p = &buf;
VAStatus va_status = vaCreateSurfaces(cap_xcomp->va_dpy, VA_RT_FORMAT_RGB32, xx, yy, &cap_xcomp->input_surface, 1, attribs, 2);
if(va_status != VA_STATUS_SUCCESS) {
fprintf(stderr, "vaExportSurfaceHandle failed\n");
fprintf(stderr, "failed to create surface: %d\n", va_status);
cap_xcomp->vaapi.vaSyncSurface(cap_xcomp->va_dpy, surface_id);
fprintf(stderr, "fourcc: %u, width: %u, height: %u\n", prime.fourcc, prime.width, prime.height);
for(int i = 0; i < prime.num_layers; ++i) {
fprintf(stderr, " drm format: %u, num planes: %u\n", prime.layers[i].drm_format, prime.layers[i].num_planes);
for(int j = 0; j < prime.layers[i].num_planes; ++j) {
const uint32_t object_index = prime.layers[i].object_index[j];
fprintf(stderr, " object index: %u, offset: %u, pitch: %u, fd: %d, size: %u, drm format mod: %lu\n", object_index, prime.layers[i].offset[j], prime.layers[i].pitch[j], prime.objects[object_index].fd, prime.objects[object_index].size, prime.objects[object_index].drm_format_modifier);
//vaBeginPicture(cap_xcomp->va_dpy, )
va_status = vaCreateConfig(cap_xcomp->va_dpy, VAProfileNone, VAEntrypointVideoProc, NULL, 0, &cap_xcomp->config_id);
if(va_status != VA_STATUS_SUCCESS) {
fprintf(stderr, "vaCreateConfig failed: %d\n", va_status);
#define EGL_WIDTH 0x3057
#define EGL_HEIGHT 0x3056
#define EGL_DMA_BUF_PLANE0_FD_EXT 0x3272
#define EGL_LINUX_DMA_BUF_EXT 0x3270
#define GL_TEXTURE0 0x84C0
#define FOURCC_NV12 842094158
if(prime.fourcc == FOURCC_NV12) { // This happens on AMD
while(cap_xcomp->egl.glGetError()) {}
while(cap_xcomp->egl.eglGetError() != EGL_SUCCESS){}
EGLImage images[2];
cap_xcomp->egl.glGenTextures(2, cap_xcomp->target_textures);
assert(cap_xcomp->egl.glGetError() == 0);
for(int i = 0; i < 2; ++i) {
const uint32_t formats[2] = { fourcc('R', '8', ' ', ' '), fourcc('G', 'R', '8', '8') };
const int layer = i;
const int plane = 0;
const intptr_t img_attr[] = {
EGL_WIDTH, prime.width / (1 + i), // half size
EGL_HEIGHT, prime.height / (1 + i), // for chroma
EGL_DMA_BUF_PLANE0_FD_EXT, prime.objects[prime.layers[layer].object_index[plane]].fd,
EGL_DMA_BUF_PLANE0_OFFSET_EXT, prime.layers[layer].offset[plane],
EGL_DMA_BUF_PLANE0_PITCH_EXT, prime.layers[layer].pitch[plane],
images[i] = cap_xcomp->egl.eglCreateImage(cap_xcomp->egl.egl_display, 0, EGL_LINUX_DMA_BUF_EXT, NULL, img_attr); // TODO: Cleanup at the end of this for loop
assert(cap_xcomp->egl.eglGetError() == EGL_SUCCESS);
//cap_xcomp->egl.glActiveTexture(GL_TEXTURE0 + i);
cap_xcomp->egl.glBindTexture(GL_TEXTURE_2D, cap_xcomp->target_textures[i]);
assert(cap_xcomp->egl.glGetError() == 0);
cap_xcomp->egl.glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
cap_xcomp->egl.glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
cap_xcomp->egl.glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
cap_xcomp->egl.glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
assert(cap_xcomp->egl.glGetError() == 0);
cap_xcomp->egl.glEGLImageTargetTexture2DOES(GL_TEXTURE_2D, images[i]);
assert(cap_xcomp->egl.glGetError() == 0);
assert(cap_xcomp->egl.eglGetError() == EGL_SUCCESS);
cap_xcomp->egl.glBindTexture(GL_TEXTURE_2D, 0);
cap_xcomp->egl.glGenFramebuffers(1, &cap_xcomp->framebuffer_y);
cap_xcomp->egl.glBindFramebuffer(GL_FRAMEBUFFER, cap_xcomp->framebuffer_y);
cap_xcomp->egl.glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, cap_xcomp->target_textures[0], 0);
// Set the list of draw buffers.
unsigned int DrawBuffers[1] = {GL_COLOR_ATTACHMENT0};
cap_xcomp->egl.glDrawBuffers(1, DrawBuffers); // "1" is the size of DrawBuffers
if(cap_xcomp->egl.glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
fprintf(stderr, "Failed to setup framebuffer\n");
cap_xcomp->egl.glBindFramebuffer(GL_FRAMEBUFFER, 0);
cap_xcomp->egl.glGenFramebuffers(1, &cap_xcomp->framebuffer_uv);
cap_xcomp->egl.glBindFramebuffer(GL_FRAMEBUFFER, cap_xcomp->framebuffer_uv);
cap_xcomp->egl.glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, cap_xcomp->target_textures[1], 0);
// Set the list of draw buffers.
cap_xcomp->egl.glDrawBuffers(1, DrawBuffers); // "1" is the size of DrawBuffers
if(cap_xcomp->egl.glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
fprintf(stderr, "Failed to setup framebuffer\n");
cap_xcomp->egl.glBindFramebuffer(GL_FRAMEBUFFER, 0);
cap_xcomp->shader_y = LoadShadersY(cap_xcomp);
cap_xcomp->shader_uv = LoadShadersUV(cap_xcomp);
float vVertices[] = {
-1.0f, 1.0f, 0.0f, 1.0f,
-1.0f, -1.0f, 0.0f, 0.0f,
1.0f, -1.0f, 1.0f, 0.0f,
-1.0f, 1.0f, 0.0f, 1.0f,
1.0f, -1.0f, 1.0f, 0.0f,
1.0f, 1.0f, 1.0f, 1.0f
unsigned int quadVBO;
cap_xcomp->egl.glGenVertexArrays(1, &cap_xcomp->vao);
cap_xcomp->egl.glGenBuffers(1, &quadVBO);
cap_xcomp->egl.glBindBuffer(GL_ARRAY_BUFFER, quadVBO);
cap_xcomp->egl.glBufferData(GL_ARRAY_BUFFER, sizeof(vVertices), &vVertices, GL_STATIC_DRAW);
cap_xcomp->egl.glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (void*)0);
cap_xcomp->egl.glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (void*)(2 * sizeof(float)));
} else {
fprintf(stderr, "unexpected fourcc: %u, expected nv12\n", prime.fourcc);
VASurfaceID target_surface_id = (uintptr_t)(*frame)->data[3];
va_status = vaCreateContext(cap_xcomp->va_dpy, cap_xcomp->config_id, xx, yy, VA_PROGRESSIVE, &target_surface_id, 1, &cap_xcomp->context_id);
if(va_status != VA_STATUS_SUCCESS) {
fprintf(stderr, "vaCreateContext failed: %d\n", va_status);
@ -542,34 +324,47 @@ static bool gsr_capture_xcomposite_drm_should_stop(gsr_capture *cap, bool *err)
static int gsr_capture_xcomposite_drm_capture(gsr_capture *cap, AVFrame *frame) {
gsr_capture_xcomposite_drm *cap_xcomp = cap->priv;
vec2i source_size = cap_xcomp->texture_size;
cap_xcomp->egl.glViewport(0, 0, source_size.x, source_size.y);
cap_xcomp->egl.glBindTexture(GL_TEXTURE_2D, window_texture_get_opengl_texture_id(&cap_xcomp->window_texture));
VASurfaceID target_surface_id = (uintptr_t)frame->data[3];
cap_xcomp->egl.glBindFramebuffer(GL_FRAMEBUFFER, cap_xcomp->framebuffer_y);
cap_xcomp->egl.glDrawArrays(GL_TRIANGLES, 0, 6);
VAStatus va_status = vaBeginPicture(cap_xcomp->va_dpy, cap_xcomp->context_id, target_surface_id);
if(va_status != VA_STATUS_SUCCESS) {
fprintf(stderr, "vaBeginPicture failed: %d\n", va_status);
return 1;
cap_xcomp->egl.glBindFramebuffer(GL_FRAMEBUFFER, cap_xcomp->framebuffer_uv);
VAProcPipelineParameterBuffer params = {0};
params.surface = cap_xcomp->input_surface;
params.surface_region = NULL;
params.output_background_color = 0xFF000000;
params.filter_flags = VA_FRAME_PICTURE;
// TODO: Colors
cap_xcomp->egl.glDrawArrays(GL_TRIANGLES, 0, 6);
VABufferID buffer_id = 0;
va_status = vaCreateBuffer(cap_xcomp->va_dpy, cap_xcomp->context_id, VAProcPipelineParameterBufferType, sizeof(params), 1, &params, &buffer_id);
if(va_status != VA_STATUS_SUCCESS) {
fprintf(stderr, "vaCreateBuffer failed: %d\n", va_status);
return 1;
cap_xcomp->egl.glBindTexture(GL_TEXTURE_2D, 0);
cap_xcomp->egl.glBindFramebuffer(GL_FRAMEBUFFER, 0);
va_status = vaRenderPicture(cap_xcomp->va_dpy, cap_xcomp->context_id, &buffer_id, 1);
if(va_status != VA_STATUS_SUCCESS) {
fprintf(stderr, "vaRenderPicture failed: %d\n", va_status);
return 1;
cap_xcomp->egl.eglSwapBuffers(cap_xcomp->egl.egl_display, cap_xcomp->egl.egl_surface);
va_status = vaEndPicture(cap_xcomp->va_dpy, cap_xcomp->context_id);
if(va_status != VA_STATUS_SUCCESS) {
fprintf(stderr, "vaEndPicture failed: %d\n", va_status);
return 1;
// TODO: Needed?
//vaSyncSurface(cap_xcomp->va_dpy, target_surface_id);
// TODO: Remove
//cap_xcomp->egl.eglSwapBuffers(cap_xcomp->egl.egl_display, cap_xcomp->egl.egl_surface);
return 0;

View File

@ -110,6 +110,8 @@ static bool gsr_egl_load_egl(gsr_egl *self, void *library) {
static bool gsr_egl_proc_load_egl(gsr_egl *self) {
self->eglExportDMABUFImageQueryMESA = self->eglGetProcAddress("eglExportDMABUFImageQueryMESA");
self->eglExportDMABUFImageMESA = self->eglGetProcAddress("eglExportDMABUFImageMESA");
self->glEGLImageTargetTexture2DOES = self->eglGetProcAddress("glEGLImageTargetTexture2DOES");
if(!self->glEGLImageTargetTexture2DOES) {
@ -134,34 +136,6 @@ static bool gsr_egl_load_gl(gsr_egl *self, void *library) {
{ (void**)&self->glTexImage2D, "glTexImage2D" },
{ (void**)&self->glCopyImageSubData, "glCopyImageSubData" },
{ (void**)&self->glClearTexImage, "glClearTexImage" },
{ (void**)&self->glGenFramebuffers, "glGenFramebuffers" },
{ (void**)&self->glBindFramebuffer, "glBindFramebuffer" },
{ (void**)&self->glViewport, "glViewport" },
{ (void**)&self->glFramebufferTexture2D, "glFramebufferTexture2D" },
{ (void**)&self->glDrawBuffers, "glDrawBuffers" },
{ (void**)&self->glCheckFramebufferStatus, "glCheckFramebufferStatus" },
{ (void**)&self->glBindBuffer, "glBindBuffer" },
{ (void**)&self->glGenBuffers, "glGenBuffers" },
{ (void**)&self->glBufferData, "glBufferData" },
{ (void**)&self->glGenVertexArrays, "glGenVertexArrays" },
{ (void**)&self->glBindVertexArray, "glBindVertexArray" },
{ (void**)&self->glCreateProgram, "glCreateProgram" },
{ (void**)&self->glCreateShader, "glCreateShader" },
{ (void**)&self->glAttachShader, "glAttachShader" },
{ (void**)&self->glBindAttribLocation, "glBindAttribLocation" },
{ (void**)&self->glCompileShader, "glCompileShader" },
{ (void**)&self->glLinkProgram, "glLinkProgram" },
{ (void**)&self->glShaderSource, "glShaderSource" },
{ (void**)&self->glUseProgram, "glUseProgram" },
{ (void**)&self->glGetProgramInfoLog, "glGetProgramInfoLog" },
{ (void**)&self->glGetShaderiv, "glGetShaderiv" },
{ (void**)&self->glGetShaderInfoLog, "glGetShaderInfoLog" },
{ (void**)&self->glDeleteProgram, "glDeleteProgram" },
{ (void**)&self->glDeleteShader, "glDeleteShader" },
{ (void**)&self->glGetProgramiv, "glGetProgramiv" },
{ (void**)&self->glVertexAttribPointer, "glVertexAttribPointer" },
{ (void**)&self->glEnableVertexAttribArray, "glEnableVertexAttribArray" },
{ (void**)&self->glDrawArrays, "glDrawArrays" },

View File

@ -446,20 +446,65 @@ static AVCodecContext *create_video_codec_context(AVPixelFormat pix_fmt,
return codec_context;
static bool vaapi_create_codec_context(AVCodecContext *video_codec_context) {
AVBufferRef *device_ctx;
if(av_hwdevice_ctx_create(&device_ctx, AV_HWDEVICE_TYPE_VAAPI, "/dev/dri/renderD128", NULL, 0) < 0) {
fprintf(stderr, "Error: Failed to create hardware device context\n");
return false;
AVBufferRef *frame_context = av_hwframe_ctx_alloc(device_ctx);
if(!frame_context) {
fprintf(stderr, "Error: Failed to create hwframe context\n");
return false;
AVHWFramesContext *hw_frame_context =
(AVHWFramesContext *)frame_context->data;
hw_frame_context->width = video_codec_context->width;
hw_frame_context->height = video_codec_context->height;
hw_frame_context->sw_format = AV_PIX_FMT_NV12;
hw_frame_context->format = video_codec_context->pix_fmt;
hw_frame_context->device_ref = device_ctx;
hw_frame_context->device_ctx = (AVHWDeviceContext*)device_ctx->data;
hw_frame_context->initial_pool_size = 1;
if (av_hwframe_ctx_init(frame_context) < 0) {
fprintf(stderr, "Error: Failed to initialize hardware frame context "
"(note: ffmpeg version needs to be > 4.0)\n");
return false;
video_codec_context->hw_device_ctx = av_buffer_ref(device_ctx);
video_codec_context->hw_frames_ctx = av_buffer_ref(frame_context);
return true;
static bool check_if_codec_valid_for_hardware(const AVCodec *codec, gpu_vendor vendor) {
// TODO: For now we assume that amd and intel always support h264 and hevc, but we default to h264
if(vendor != GPU_VENDOR_NVIDIA)
return true;
// Do not use AV_PIX_FMT_CUDA because we dont want to do full check with hardware context
AVCodecContext *codec_context = create_video_codec_context(vendor == GPU_VENDOR_NVIDIA ? AV_PIX_FMT_YUV420P : AV_PIX_FMT_VAAPI, VideoQuality::VERY_HIGH, 60, codec, false, vendor, FramerateMode::CONSTANT);
return false;
codec_context->width = 32;
codec_context->height = 32;
if(!vaapi_create_codec_context(codec_context)) {
return false;
bool success = false;
// Do not use AV_PIX_FMT_CUDA because we dont want to do full check with hardware context
AVCodecContext *codec_context = create_video_codec_context(AV_PIX_FMT_YUV420P, VideoQuality::VERY_HIGH, 60, codec, false, vendor, FramerateMode::CONSTANT);
codec_context->width = 1920;
codec_context->height = 1080;
if(codec_context) {
success = avcodec_open2(codec_context, codec_context->codec, NULL) == 0;
success = avcodec_open2(codec_context, codec_context->codec, NULL) == 0;
return success;
@ -468,6 +513,9 @@ static const AVCodec* find_h264_encoder(gpu_vendor vendor) {
codec = avcodec_find_encoder_by_name(vendor == GPU_VENDOR_NVIDIA ? "nvenc_h264" : "vaapi_h264");
return nullptr;
static bool checked = false;
static bool checked_success = true;
if(!checked) {
@ -478,7 +526,6 @@ static const AVCodec* find_h264_encoder(gpu_vendor vendor) {
return checked_success ? codec : nullptr;
// TODO: Disable under intel/amd?
static const AVCodec* find_h265_encoder(gpu_vendor vendor) {
const AVCodec *codec = avcodec_find_encoder_by_name(vendor == GPU_VENDOR_NVIDIA ? "hevc_nvenc" : "hevc_vaapi");
@ -620,16 +667,16 @@ static void open_video(AVCodecContext *codec_context, VideoQuality video_quality
} else {
switch(video_quality) {
case VideoQuality::MEDIUM:
av_dict_set_int(&options, "qp", 40, 0);
av_dict_set_int(&options, "qp", 37, 0);
case VideoQuality::HIGH:
av_dict_set_int(&options, "qp", 35, 0);
av_dict_set_int(&options, "qp", 32, 0);
case VideoQuality::VERY_HIGH:
av_dict_set_int(&options, "qp", 30, 0);
av_dict_set_int(&options, "qp", 27, 0);
case VideoQuality::ULTRA:
av_dict_set_int(&options, "qp", 24, 0);
av_dict_set_int(&options, "qp", 21, 0);
@ -639,7 +686,7 @@ static void open_video(AVCodecContext *codec_context, VideoQuality video_quality
if(codec_context->codec_id == AV_CODEC_ID_H264) {
av_dict_set(&options, "profile", "high", 0);
av_dict_set_int(&options, "quality", 32, 0);
av_dict_set_int(&options, "quality", 4, 0);
} else {
av_dict_set(&options, "profile", "main", 0);
@ -1228,8 +1275,6 @@ int main(int argc, char **argv) {
FramerateMode framerate_mode = FramerateMode::CONSTANT;
const Arg &audio_input_arg = args["-a"];
const std::vector<AudioInput> audio_inputs = get_pulseaudio_inputs();
std::vector<MergedAudioInputs> requested_audio_inputs;
@ -1323,12 +1368,14 @@ int main(int argc, char **argv) {
very_old_gpu = true;
// TODO: Remove once gpu screen recorder supports amd and intel properly
if(gpu_inf.vendor != GPU_VENDOR_NVIDIA) {
fprintf(stderr, "Error: gpu-screen-recorder does currently only support nvidia gpus\n");
return 2;
if(gpu_inf.vendor != GPU_VENDOR_NVIDIA && overclock) {
fprintf(stderr, "Info: overclock option has no effect on amd/intel, ignoring option...\n");
// TODO: Fix constant framerate not working properly on amd/intel because capture framerate gets locked to the same framerate as
// game framerate, which doesn't work well when you need to encode multiple duplicate frames.
const FramerateMode framerate_mode = gpu_inf.vendor == GPU_VENDOR_NVIDIA ? FramerateMode::CONSTANT : FramerateMode::VARIABLE;
const char *screen_region = args["-s"].value();
const char *window_str = args["-w"].value();
@ -1517,41 +1564,23 @@ int main(int argc, char **argv) {
const double target_fps = 1.0 / (double)fps;
if(strcmp(video_codec_to_use, "auto") == 0) {
if(gpu_inf.vendor == GPU_VENDOR_NVIDIA) {
const AVCodec *h265_codec = find_h265_encoder(gpu_inf.vendor);
const AVCodec *h265_codec = find_h265_encoder(gpu_inf.vendor);
// h265 generally allows recording at a higher resolution than h264 on nvidia cards. On a gtx 1080 4k is the max resolution for h264 but for h265 it's 8k.
// Another important info is that when recording at a higher fps than.. 60? h265 has very bad performance. For example when recording at 144 fps the fps drops to 1
// while with h264 the fps doesn't drop.
if(!h265_codec) {
fprintf(stderr, "Info: using h264 encoder because a codec was not specified and your gpu does not support h265\n");
video_codec_to_use = "h264";
video_codec = VideoCodec::H264;
} else if(fps > 60) {
fprintf(stderr, "Info: using h264 encoder because a codec was not specified and fps is more than 60\n");
video_codec_to_use = "h264";
video_codec = VideoCodec::H264;
} else {
fprintf(stderr, "Info: using h265 encoder because a codec was not specified\n");
video_codec_to_use = "h265";
video_codec = VideoCodec::H265;
// h265 generally allows recording at a higher resolution than h264 on nvidia cards. On a gtx 1080 4k is the max resolution for h264 but for h265 it's 8k.
// Another important info is that when recording at a higher fps than.. 60? h265 has very bad performance. For example when recording at 144 fps the fps drops to 1
// while with h264 the fps doesn't drop.
if(!h265_codec) {
fprintf(stderr, "Info: using h264 encoder because a codec was not specified and your gpu does not support h265\n");
video_codec_to_use = "h264";
video_codec = VideoCodec::H264;
} else if(fps > 60) {
fprintf(stderr, "Info: using h264 encoder because a codec was not specified and fps is more than 60\n");
video_codec_to_use = "h264";
video_codec = VideoCodec::H264;
} else {
const AVCodec *h264_codec = find_h264_encoder(gpu_inf.vendor);
if(!h264_codec) {
fprintf(stderr, "Info: using h265 encoder because a codec was not specified and your gpu does not support h264\n");
video_codec_to_use = "h265";
video_codec = VideoCodec::H265;
//} else if(fps > 60) {
// fprintf(stderr, "Info: using h264 encoder because a codec was not specified and fps is more than 60\n");
// video_codec_to_use = "h264";
// video_codec = VideoCodec::H264;
} else {
fprintf(stderr, "Info: using h264 encoder because a codec was not specified\n");
video_codec_to_use = "h264";
video_codec = VideoCodec::H264;
fprintf(stderr, "Info: using h265 encoder because a codec was not specified\n");
video_codec_to_use = "h265";
video_codec = VideoCodec::H265;

View File

@ -1,41 +0,0 @@
#include "../include/vaapi.h"
#include "../include/library_loader.h"
#include <string.h>
bool gsr_vaapi_load(gsr_vaapi *self) {
memset(self, 0, sizeof(gsr_vaapi));
dlerror(); /* clear */
void *lib = dlopen("", RTLD_LAZY);
if(!lib) {
fprintf(stderr, "gsr error: gsr_vaapi_load failed: failed to load, error: %s\n", dlerror());
return false;
dlsym_assign required_dlsym[] = {
{ (void**)&self->vaExportSurfaceHandle, "vaExportSurfaceHandle" },
{ (void**)&self->vaSyncSurface, "vaSyncSurface" },
if(!dlsym_load_list(lib, required_dlsym)) {
fprintf(stderr, "gsr error: gsr_vaapi_load failed: missing required symbols in\n");
goto fail;
self->library = lib;
return true;
memset(self, 0, sizeof(gsr_vaapi));
return false;
void gsr_vaapi_unload(gsr_vaapi *self) {
if(self->library) {
memset(self, 0, sizeof(gsr_vaapi));