diff --git a/.gitignore b/.gitignore index 8bedc1d..ed1d024 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,9 @@ compile_commands.json tests/sibs-build/ tests/compile_commands.json +external/wlr-export-dmabuf-unstable-v1-client-protocol.h +external/wlr-export-dmabuf-unstable-v1-protocol.c + .clangd/ .cache/ .vscode/ diff --git a/README.md b/README.md index 5cfe126..729d8a3 100644 --- a/README.md +++ b/README.md @@ -6,13 +6,13 @@ This screen recorder can be used for recording your desktop offline, for live st where only the last few seconds are saved. ## Note -This software works only on X11 (Wayland with Xwayland is NOT supported).\ +This software works with x11 and wayland, but when using wayland only monitors can be recorded and root access is needed.\ If you are using a variable refresh rate monitor then choose to record "screen-direct-force". This will allow variable refresh rate to work when recording fullscreen applications. Note that some applications such as mpv will not work in fullscreen mode. A fix is being developed for this.\ GPU Screen Recorder only supports h264 and hevc codecs at the moment which means that webm files are not supported. ### TEMPORARY ISSUES 1) screen-direct capture has been temporary disabled as it causes issues with stuttering. This might be a nvfbc bug. 2) Recording the monitor on steam deck might fail sometimes. This happens even when using ffmpeg directly. This might be a steam deck driver bug. Recording a single window doesn't have this issue. -3) Videos created on AMD/Intel are in variable framerate format. Use MPV to play such videos, otherwise you might experience stuttering in the video if you are using a buggy video player. +3) Videos created on AMD/Intel are in variable framerate format. Use MPV to play such videos, otherwise you might experience stuttering in the video if you are using a buggy video player. Try saving the video into a .mkv file instead when using AMD/Intel, as some software may have better support for .mkv files (such as kdenlive). ### AMD/Intel root permission When recording a window under AMD/Intel no special user permission is required, however when recording a monitor the program needs root permission (to access KMS). To make this safer, the part that needs root access has been moved to its own executable (to make it as small as possible) and a GUI sudo prompt is shown to run this executable as root. The executable is called "gsr-kms-server". @@ -41,11 +41,11 @@ If you install GPU Screen Recorder flatpak, which is the gtk gui version then yo # Dependencies ## AMD -`libglvnd (which provides libgl and libegl), mesa, ffmpeg (libavcodec, libavformat, libavutil, libswresample, libavfilter), libx11, libxcomposite, libxrandr, libxfixes, libpulse, libva, libva-mesa-driver, libdrm, libcap, polkit (for pkexec)`. +`libglvnd (which provides libgl and libegl), mesa, ffmpeg (libavcodec, libavformat, libavutil, libswresample, libavfilter), libx11, libxcomposite, libxrandr, libxfixes, libpulse, libva, libva-mesa-driver, libdrm, libcap, polkit (for pkexec), wayland-client, wayland-elg`. ## Intel -`libglvnd (which provides libgl and libegl), mesa, ffmpeg (libavcodec, libavformat, libavutil, libswresample, libavfilter), libx11, libxcomposite, libxrandr, libxfixes, libpulse, libva, libva-intel-driver, libdrm, libcap, polkit (for pkexec)`. +`libglvnd (which provides libgl and libegl), mesa, ffmpeg (libavcodec, libavformat, libavutil, libswresample, libavfilter), libx11, libxcomposite, libxrandr, libxfixes, libpulse, libva, libva-intel-driver, libdrm, libcap, polkit (for pkexec), wayland-client, wayland-elg`. ## NVIDIA -`libglvnd (which provides libgl and libegl), ffmpeg (libavcodec, libavformat, libavutil, libswresample, libavfilter), libx11, libxcomposite, libxrandr, libxfixes, libpulse, cuda (libnvidia-compute), nvenc (libnvidia-encode), libva, libdrm, libcap`. Additionally, you need to have `nvfbc (libnvidia-fbc1)` installed when using nvfbc and `xnvctrl (libxnvctrl0)` when using the `-oc` option. +`libglvnd (which provides libgl and libegl), ffmpeg (libavcodec, libavformat, libavutil, libswresample, libavfilter), libx11, libxcomposite, libxrandr, libxfixes, libpulse, cuda (libnvidia-compute), nvenc (libnvidia-encode), libva, libdrm, libcap, polkit (for pkexec, only on wayland), wayland-client, wayland-elg`. Additionally, you need to have `nvfbc (libnvidia-fbc1)` installed when using nvfbc and `xnvctrl (libxnvctrl0)` when using the `-oc` option. # How to use Run `gpu-screen-recorder --help` to see all options. diff --git a/TODO b/TODO index d37df12..247ee92 100644 --- a/TODO +++ b/TODO @@ -69,4 +69,15 @@ Use separate plane (which has offset and pitch) from combined plane instead of t Both twitch and youtube support variable bitrate but twitch recommends constant bitrate to reduce stream buffering/dropped frames when going from low motion to high motion: https://help.twitch.tv/s/article/broadcasting-guidelines?language=en_US. Info for youtube: https://support.google.com/youtube/answer/2853702?hl=en#zippy=%2Cvariable-bitrate-with-custom-stream-keys-in-live-control-room%2Ck-p-fps%2Cp-fps. -Limit fps recording with x damage. This is good when running replay mode 24/7 and being afk or when not much is happening on the screen. \ No newline at end of file +Limit fps recording with x damage. This is good when running replay mode 24/7 and being afk or when not much is happening on the screen. + +On nvidia some games apparently causes the game to appear to stutter (without dropping fps) when recording a monitor but not using + when using direct screen capture. Observed in Deus Ex and Apex Legends. + +Test kms_cuda on hyprland and other wlroots based compositor to see if it works. + +Support "screen" (all monitors) capture on wayland. This should be done by getting all drm fds and multiple EGL_DMA_BUF_PLANEX_FD_EXT to create one egl image with all fds combined. + +Support pipewire screen capture? +Support screen rotation in nvidia wayland. +Support wlroots dmabuf screen recording, because it doesn't require root access unlike kms grab. \ No newline at end of file diff --git a/build.sh b/build.sh index f3e78ed..5648f6a 100755 --- a/build.sh +++ b/build.sh @@ -1,11 +1,19 @@ #!/bin/sh -e +script_dir=$(dirname "$0") +cd "$script_dir" + CC=${CC:-gcc} CXX=${CXX:-g++} opts="-O2 -g0 -DNDEBUG -Wall -Wextra" [ -n "$DEBUG" ] && opts="-O0 -g3 -Wall -Wextra"; +#build_wayland_protocol() { +# wayland-scanner private-code external/wlr-export-dmabuf-unstable-v1.xml external/wlr-export-dmabuf-unstable-v1-protocol.c +# wayland-scanner client-header external/wlr-export-dmabuf-unstable-v1.xml external/wlr-export-dmabuf-unstable-v1-client-protocol.h +#} + build_gsr_kms_server() { # TODO: -fcf-protection=full, not supported on arm extra_opts="-fstack-protector-all" @@ -17,7 +25,7 @@ build_gsr_kms_server() { } build_gsr() { - dependencies="libavcodec libavformat libavutil x11 xcomposite xrandr xfixes libpulse libswresample libavfilter libva libcap libdrm" + dependencies="libavcodec libavformat libavutil x11 xcomposite xrandr xfixes libpulse libswresample libavfilter libva libcap libdrm wayland-egl wayland-client" includes="$(pkg-config --cflags $dependencies)" libs="$(pkg-config --libs $dependencies) -ldl -pthread -lm" $CC -c src/capture/capture.c $opts $includes @@ -25,6 +33,7 @@ build_gsr() { $CC -c src/capture/xcomposite_cuda.c $opts $includes $CC -c src/capture/xcomposite_vaapi.c $opts $includes $CC -c src/capture/kms_vaapi.c $opts $includes + $CC -c src/capture/kms_cuda.c $opts $includes $CC -c kms/client/kms_client.c $opts $includes $CC -c src/egl.c $opts $includes $CC -c src/cuda.c $opts $includes @@ -36,11 +45,14 @@ build_gsr() { $CC -c src/cursor.c $opts $includes $CC -c src/utils.c $opts $includes $CC -c src/library_loader.c $opts $includes + $CC -c external/wlr-export-dmabuf-unstable-v1-protocol.c $opts $includes $CXX -c src/sound.cpp $opts $includes $CXX -c src/main.cpp $opts $includes - $CXX -o gpu-screen-recorder -O2 capture.o nvfbc.o kms_client.o egl.o cuda.o xnvctrl.o overclock.o window_texture.o shader.o color_conversion.o cursor.o utils.o library_loader.o xcomposite_cuda.o xcomposite_vaapi.o kms_vaapi.o sound.o main.o $libs $opts + $CXX -o gpu-screen-recorder -O2 capture.o nvfbc.o kms_client.o egl.o cuda.o xnvctrl.o overclock.o window_texture.o shader.o \ + color_conversion.o cursor.o utils.o library_loader.o xcomposite_cuda.o xcomposite_vaapi.o kms_vaapi.o kms_cuda.o wlr-export-dmabuf-unstable-v1-protocol.o sound.o main.o $libs $opts } +#build_wayland_protocol build_gsr_kms_server build_gsr echo "Successfully built gpu-screen-recorder" \ No newline at end of file diff --git a/external/wlr-export-dmabuf-unstable-v1.xml b/external/wlr-export-dmabuf-unstable-v1.xml new file mode 100644 index 0000000..2614065 --- /dev/null +++ b/external/wlr-export-dmabuf-unstable-v1.xml @@ -0,0 +1,203 @@ + + + + Copyright © 2018 Rostislav Pehlivanov + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the "Software"), + to deal in the Software without restriction, including without limitation + the rights to use, copy, modify, merge, publish, distribute, sublicense, + and/or sell copies of the Software, and to permit persons to whom the + Software is furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice (including the next + paragraph) shall be included in all copies or substantial portions of the + Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + + + + An interface to capture surfaces in an efficient way by exporting DMA-BUFs. + + Warning! The protocol described in this file is experimental and + backward incompatible changes may be made. Backward compatible changes + may be added together with the corresponding interface version bump. + Backward incompatible changes are done by bumping the version number in + the protocol and interface names and resetting the interface version. + Once the protocol is to be declared stable, the 'z' prefix and the + version number in the protocol and interface names are removed and the + interface version number is reset. + + + + + This object is a manager with which to start capturing from sources. + + + + + Capture the next frame of a an entire output. + + + + + + + + + All objects created by the manager will still remain valid, until their + appropriate destroy request has been called. + + + + + + + This object represents a single DMA-BUF frame. + + If the capture is successful, the compositor will first send a "frame" + event, followed by one or several "object". When the frame is available + for readout, the "ready" event is sent. + + If the capture failed, the "cancel" event is sent. This can happen anytime + before the "ready" event. + + Once either a "ready" or a "cancel" event is received, the client should + destroy the frame. Once an "object" event is received, the client is + responsible for closing the associated file descriptor. + + All frames are read-only and may not be written into or altered. + + + + + Special flags that should be respected by the client. + + + + + + + Main event supplying the client with information about the frame. If the + capture didn't fail, this event is always emitted first before any other + events. + + This event is followed by a number of "object" as specified by the + "num_objects" argument. + + + + + + + + + + + + + + + + Event which serves to supply the client with the file descriptors + containing the data for each object. + + After receiving this event, the client must always close the file + descriptor as soon as they're done with it and even if the frame fails. + + + + + + + + + + + + This event is sent as soon as the frame is presented, indicating it is + available for reading. This event includes the time at which + presentation happened at. + + The timestamp is expressed as tv_sec_hi, tv_sec_lo, tv_nsec triples, + each component being an unsigned 32-bit value. Whole seconds are in + tv_sec which is a 64-bit value combined from tv_sec_hi and tv_sec_lo, + and the additional fractional part in tv_nsec as nanoseconds. Hence, + for valid timestamps tv_nsec must be in [0, 999999999]. The seconds part + may have an arbitrary offset at start. + + After receiving this event, the client should destroy this object. + + + + + + + + + Indicates reason for cancelling the frame. + + + + + + + + + If the capture failed or if the frame is no longer valid after the + "frame" event has been emitted, this event will be used to inform the + client to scrap the frame. + + If the failure is temporary, the client may capture again the same + source. If the failure is permanent, any further attempts to capture the + same source will fail again. + + After receiving this event, the client should destroy this object. + + + + + + + Unreferences the frame. This request must be called as soon as its no + longer used. + + It can be called at any time by the client. The client will still have + to close any FDs it has been given. + + + + \ No newline at end of file diff --git a/include/capture/kms_cuda.h b/include/capture/kms_cuda.h new file mode 100644 index 0000000..e34802e --- /dev/null +++ b/include/capture/kms_cuda.h @@ -0,0 +1,18 @@ +#ifndef GSR_CAPTURE_KMS_CUDA_H +#define GSR_CAPTURE_KMS_CUDA_H + +#include "../vec2.h" +#include "../utils.h" +#include "capture.h" + +typedef struct _XDisplay Display; + +typedef struct { + const char *display_to_capture; /* if this is "screen", then the entire x11 screen is captured (all displays). A copy is made of this */ + gsr_gpu_info gpu_inf; + const char *card_path; /* reference */ +} gsr_capture_kms_cuda_params; + +gsr_capture* gsr_capture_kms_cuda_create(const gsr_capture_kms_cuda_params *params); + +#endif /* GSR_CAPTURE_KMS_CUDA_H */ diff --git a/include/capture/kms_vaapi.h b/include/capture/kms_vaapi.h index 741d74b..e7bd652 100644 --- a/include/capture/kms_vaapi.h +++ b/include/capture/kms_vaapi.h @@ -4,7 +4,6 @@ #include "../vec2.h" #include "../utils.h" #include "capture.h" -#include typedef struct _XDisplay Display; @@ -12,6 +11,7 @@ typedef struct { const char *display_to_capture; /* if this is "screen", then the entire x11 screen is captured (all displays). A copy is made of this */ gsr_gpu_info gpu_inf; const char *card_path; /* reference */ + bool wayland; } gsr_capture_kms_vaapi_params; gsr_capture* gsr_capture_kms_vaapi_create(const gsr_capture_kms_vaapi_params *params); diff --git a/include/color_conversion.h b/include/color_conversion.h index 1fe488b..e60f4d0 100644 --- a/include/color_conversion.h +++ b/include/color_conversion.h @@ -9,6 +9,7 @@ typedef enum { } gsr_source_color; typedef enum { + GSR_DESTINATION_COLOR_RGB, GSR_DESTINATION_COLOR_NV12 } gsr_destination_color; diff --git a/include/cuda.h b/include/cuda.h index ab07429..41fe15b 100644 --- a/include/cuda.h +++ b/include/cuda.h @@ -91,7 +91,8 @@ typedef struct { CUresult (*cuMemsetD8_v2)(CUdeviceptr dstDevice, unsigned char uc, size_t N); CUresult (*cuMemcpy2D_v2)(const CUDA_MEMCPY2D *pCopy); - CUresult (*cuGraphicsGLRegisterImage)(CUgraphicsResource *pCudaResource, unsigned int image, unsigned int target, unsigned int Flags); + CUresult (*cuGraphicsGLRegisterImage)(CUgraphicsResource *pCudaResource, unsigned int image, unsigned int target, unsigned int flags); + CUresult (*cuGraphicsEGLRegisterImage)(CUgraphicsResource *pCudaResource, void *image, unsigned int flags); CUresult (*cuGraphicsResourceSetMapFlags)(CUgraphicsResource resource, unsigned int flags); CUresult (*cuGraphicsMapResources)(unsigned int count, CUgraphicsResource *resources, CUstream hStream); CUresult (*cuGraphicsUnmapResources)(unsigned int count, CUgraphicsResource *resources, CUstream hStream); diff --git a/include/egl.h b/include/egl.h index e410958..920e0f0 100644 --- a/include/egl.h +++ b/include/egl.h @@ -50,13 +50,24 @@ typedef void (*__eglMustCastToProperFunctionPointerType)(void); #define EGL_DMA_BUF_PLANE0_FD_EXT 0x3272 #define EGL_DMA_BUF_PLANE0_OFFSET_EXT 0x3273 #define EGL_DMA_BUF_PLANE0_PITCH_EXT 0x3274 +#define EGL_DMA_BUF_PLANE0_MODIFIER_LO_EXT 0x3443 +#define EGL_DMA_BUF_PLANE0_MODIFIER_HI_EXT 0x3444 #define EGL_LINUX_DMA_BUF_EXT 0x3270 +#define EGL_OPENGL_API 0x30A2 +#define EGL_OPENGL_ES_API 0x30A0 +#define EGL_RED_SIZE 0x3024 +#define EGL_ALPHA_SIZE 0x3021 +#define EGL_BLUE_SIZE 0x3022 +#define EGL_GREEN_SIZE 0x3023 +#define EGL_SURFACE_TYPE 0x3033 +#define EGL_PBUFFER_BIT 0x0001 #define GL_FLOAT 0x1406 #define GL_FALSE 0 #define GL_TRUE 1 #define GL_TRIANGLES 0x0004 #define GL_TEXTURE_2D 0x0DE1 +#define GL_TEXTURE_EXTERNAL_OES 0x8D65 // TODO: Use this where applicable #define GL_RGB 0x1907 #define GL_RGBA 0x1908 #define GL_RGBA8 0x8058 @@ -97,11 +108,27 @@ typedef void (*FUNC_glEGLImageTargetTexture2DOES)(unsigned int target, GLeglImag typedef struct { void *egl_library; void *gl_library; - Display *dpy; + EGLDisplay egl_display; EGLSurface egl_surface; EGLContext egl_context; - Window window; + + Display *x11_dpy; + Window x11_window; + + void *wayland_dpy; + void *wayland_window; + void *wayland_registry; + void *wayland_surface; + void *wayland_compositor; + + int fd; + uint32_t width; + uint32_t height; + uint32_t pitch; + uint32_t offset; + uint32_t pixel_format; + uint64_t modifier; int32_t (*eglGetError)(void); EGLDisplay (*eglGetDisplay)(EGLNativeDisplayType display_id); @@ -117,6 +144,7 @@ typedef struct { unsigned int (*eglDestroyImage)(EGLDisplay dpy, EGLImage image); unsigned int (*eglSwapInterval)(EGLDisplay dpy, int32_t interval); unsigned int (*eglSwapBuffers)(EGLDisplay dpy, EGLSurface surface); + unsigned int (*eglBindAPI)(unsigned int api); __eglMustCastToProperFunctionPointerType (*eglGetProcAddress)(const char *procname); FUNC_eglExportDMABUFImageQueryMESA eglExportDMABUFImageQueryMESA; @@ -173,7 +201,10 @@ typedef struct { void (*glUniform1f)(int location, float v0); } gsr_egl; -bool gsr_egl_load(gsr_egl *self, Display *dpy); +bool gsr_egl_load(gsr_egl *self, Display *dpy, bool wayland); void gsr_egl_unload(gsr_egl *self); +void gsr_egl_update(gsr_egl *self); +void gsr_egl_cleanup_frame(gsr_egl *self); + #endif /* GSR_EGL_H */ diff --git a/include/utils.h b/include/utils.h index 7d76a2d..68d4e81 100644 --- a/include/utils.h +++ b/include/utils.h @@ -3,6 +3,7 @@ #include "vec2.h" #include +#include #include typedef enum { @@ -17,10 +18,20 @@ typedef struct { } gsr_gpu_info; typedef struct { + const char *name; + int name_len; vec2i pos; vec2i size; + XRRCrtcInfo *crt_info; /* Only on x11 */ + uint32_t connector_id; /* Only on drm */ } gsr_monitor; +typedef enum { + GSR_CONNECTION_X11, + GSR_CONNECTION_WAYLAND, + GSR_CONNECTION_DRM +} gsr_connection_type; + typedef struct { const char *name; int name_len; @@ -30,11 +41,11 @@ typedef struct { double clock_get_monotonic_seconds(void); -typedef void (*active_monitor_callback)(const XRROutputInfo *output_info, const XRRCrtcInfo *crt_info, const XRRModeInfo *mode_info, void *userdata); -void for_each_active_monitor_output(Display *display, active_monitor_callback callback, void *userdata); -bool get_monitor_by_name(Display *display, const char *name, gsr_monitor *monitor); +typedef void (*active_monitor_callback)(const gsr_monitor *monitor, void *userdata); +void for_each_active_monitor_output(void *connection, gsr_connection_type connection_type, active_monitor_callback callback, void *userdata); +bool get_monitor_by_name(void *connection, gsr_connection_type connection_type, const char *name, gsr_monitor *monitor); -bool gl_get_gpu_info(Display *dpy, gsr_gpu_info *info); +bool gl_get_gpu_info(Display *dpy, gsr_gpu_info *info, bool wayland); /* |output| should be at least 128 bytes in size */ bool gsr_get_valid_card_path(char *output); diff --git a/kms/client/kms_client.c b/kms/client/kms_client.c index 587dda3..ff8a3d5 100644 --- a/kms/client/kms_client.c +++ b/kms/client/kms_client.c @@ -105,6 +105,40 @@ static void strncpy_safe(char *dst, const char *src, int len) { dst[min_len] = '\0'; } +static bool find_program_in_path(const char *program_name, char *filepath, int filepath_len) { + const char *path = getenv("PATH"); + if(!path) + return false; + + int program_name_len = strlen(program_name); + const char *end = path + strlen(path); + while(path != end) { + const char *part_end = strchr(path, ':'); + const char *next = part_end; + if(part_end) { + next = part_end + 1; + } else { + part_end = end; + next = end; + } + + int len = part_end - path; + if(len + 1 + program_name_len < filepath_len) { + memcpy(filepath, path, len); + filepath[len] = '/'; + memcpy(filepath + len + 1, program_name, program_name_len); + filepath[len + 1 + program_name_len] = '\0'; + + if(access(filepath, F_OK) == 0) + return true; + } + + path = next; + } + + return false; +} + int gsr_kms_client_init(gsr_kms_client *self, const char *card_path) { self->kms_server_pid = -1; self->socket_fd = -1; @@ -118,11 +152,12 @@ int gsr_kms_client_init(gsr_kms_client *self, const char *card_path) { return -1; } - // This doesn't work on nixos, but we dont want to use $PATH because we want to make this as safe as possible by running pkexec - // on a path that only root can modify. If we use "gsr-kms-server" instead then $PATH can be modified in ~/.bashrc for example - // which will overwrite the path to gsr-kms-server and the user can end up running a malicious program that pretends to be gsr-kms-server. - // If there is a safe way to do this on nixos, then please tell me; or use gpu-screen-recorder flatpak instead. - const char *server_filepath = "/usr/bin/gsr-kms-server"; + char server_filepath[PATH_MAX]; + if(!find_program_in_path("gsr-kms-server", server_filepath, sizeof(server_filepath))) { + fprintf(stderr, "gsr error: gsr_kms_client_init: gsr-kms-server is not installed\n"); + return -1; + } + bool has_perm = 0; const bool inside_flatpak = getenv("FLATPAK_ID") != NULL; if(!inside_flatpak) { diff --git a/kms/server/kms_server.c b/kms/server/kms_server.c index a15eb2b..dc24224 100644 --- a/kms/server/kms_server.c +++ b/kms/server/kms_server.c @@ -306,7 +306,7 @@ static void strncpy_safe(char *dst, const char *src, int len) { int main(int argc, char **argv) { if(argc != 3) { - fprintf(stderr, "usage: kms_server \n"); + fprintf(stderr, "usage: gsr-kms-server \n"); return 1; } diff --git a/kms/server/project.conf b/kms/server/project.conf index cf863c1..26a1947 100644 --- a/kms/server/project.conf +++ b/kms/server/project.conf @@ -4,5 +4,8 @@ type = "executable" version = "1.0.0" platforms = ["posix"] +[config] +error_on_warning = "true" + [dependencies] libdrm = ">=2" diff --git a/project.conf b/project.conf index 3871cad..731f7c0 100644 --- a/project.conf +++ b/project.conf @@ -1,11 +1,12 @@ [package] name = "gpu-screen-recorder" type = "executable" -version = "2.1.0" +version = "3.0.0" platforms = ["posix"] [config] ignore_dirs = ["kms/server"] +error_on_warning = "true" [dependencies] libavcodec = ">=58" @@ -20,4 +21,6 @@ libavfilter = ">=5" libva = ">=1" libcap = ">=2" xfixes = ">=2" -libdrm = ">=2" \ No newline at end of file +libdrm = ">=2" +wayland-egl = ">=15" +wayland-client = ">=1" diff --git a/src/capture/kms_cuda.c b/src/capture/kms_cuda.c new file mode 100644 index 0000000..7569cd2 --- /dev/null +++ b/src/capture/kms_cuda.c @@ -0,0 +1,470 @@ +#include "../../include/capture/kms_cuda.h" +#include "../../kms/client/kms_client.h" +#include "../../include/egl.h" +#include "../../include/utils.h" +#include "../../include/cuda.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MAX_CONNECTOR_IDS 32 + +typedef struct { + uint32_t connector_ids[MAX_CONNECTOR_IDS]; + int num_connector_ids; +} MonitorId; + +typedef struct { + gsr_capture_kms_cuda_params params; + XEvent xev; + + bool should_stop; + bool stop_is_error; + bool created_hw_frame; + + gsr_egl egl; + gsr_cuda cuda; + + gsr_kms_client kms_client; + gsr_kms_response kms_response; + + vec2i capture_pos; + vec2i capture_size; + MonitorId monitor_id; + + CUgraphicsResource cuda_graphics_resource; + CUarray mapped_array; +} gsr_capture_kms_cuda; + +static int max_int(int a, int b) { + return a > b ? a : b; +} + +static void gsr_capture_kms_cuda_stop(gsr_capture *cap, AVCodecContext *video_codec_context); + +static bool cuda_create_codec_context(gsr_capture_kms_cuda *cap_kms, AVCodecContext *video_codec_context) { + CUcontext old_ctx; + cap_kms->cuda.cuCtxPushCurrent_v2(cap_kms->cuda.cu_ctx); + + AVBufferRef *device_ctx = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_CUDA); + if(!device_ctx) { + fprintf(stderr, "Error: Failed to create hardware device context\n"); + cap_kms->cuda.cuCtxPopCurrent_v2(&old_ctx); + return false; + } + + AVHWDeviceContext *hw_device_context = (AVHWDeviceContext*)device_ctx->data; + AVCUDADeviceContext *cuda_device_context = (AVCUDADeviceContext*)hw_device_context->hwctx; + cuda_device_context->cuda_ctx = cap_kms->cuda.cu_ctx; + if(av_hwdevice_ctx_init(device_ctx) < 0) { + fprintf(stderr, "Error: Failed to create hardware device context\n"); + av_buffer_unref(&device_ctx); + cap_kms->cuda.cuCtxPopCurrent_v2(&old_ctx); + return false; + } + + AVBufferRef *frame_context = av_hwframe_ctx_alloc(device_ctx); + if(!frame_context) { + fprintf(stderr, "Error: Failed to create hwframe context\n"); + av_buffer_unref(&device_ctx); + cap_kms->cuda.cuCtxPopCurrent_v2(&old_ctx); + return false; + } + + AVHWFramesContext *hw_frame_context = + (AVHWFramesContext *)frame_context->data; + hw_frame_context->width = video_codec_context->width; + hw_frame_context->height = video_codec_context->height; + hw_frame_context->sw_format = AV_PIX_FMT_BGR0; + hw_frame_context->format = video_codec_context->pix_fmt; + hw_frame_context->device_ref = device_ctx; + hw_frame_context->device_ctx = (AVHWDeviceContext*)device_ctx->data; + + hw_frame_context->initial_pool_size = 1; + + if (av_hwframe_ctx_init(frame_context) < 0) { + fprintf(stderr, "Error: Failed to initialize hardware frame context " + "(note: ffmpeg version needs to be > 4.0)\n"); + av_buffer_unref(&device_ctx); + //av_buffer_unref(&frame_context); + cap_kms->cuda.cuCtxPopCurrent_v2(&old_ctx); + return false; + } + + video_codec_context->hw_device_ctx = av_buffer_ref(device_ctx); + video_codec_context->hw_frames_ctx = av_buffer_ref(frame_context); + return true; +} + +// TODO: On monitor reconfiguration, find monitor x, y, width and height again. Do the same for nvfbc. + +typedef struct { + gsr_capture_kms_cuda *cap_kms; + const char *monitor_to_capture; + int monitor_to_capture_len; + int num_monitors; +} MonitorCallbackUserdata; + +static void monitor_callback(const gsr_monitor *monitor, void *userdata) { + MonitorCallbackUserdata *monitor_callback_userdata = userdata; + ++monitor_callback_userdata->num_monitors; + + if(monitor_callback_userdata->monitor_to_capture_len != monitor->name_len || memcmp(monitor_callback_userdata->monitor_to_capture, monitor->name, monitor->name_len) != 0) + return; + + const int connector_index = monitor_callback_userdata->cap_kms->monitor_id.num_connector_ids; + if(connector_index < MAX_CONNECTOR_IDS) { + monitor_callback_userdata->cap_kms->monitor_id.connector_ids[connector_index] = monitor->connector_id; + ++monitor_callback_userdata->cap_kms->monitor_id.num_connector_ids; + } + + if(connector_index == MAX_CONNECTOR_IDS) + fprintf(stderr, "gsr warning: reached max connector ids\n"); +} + +static int gsr_capture_kms_cuda_start(gsr_capture *cap, AVCodecContext *video_codec_context) { + gsr_capture_kms_cuda *cap_kms = cap->priv; + + if(gsr_kms_client_init(&cap_kms->kms_client, cap_kms->params.card_path) != 0) { + return -1; + } + cap_kms->monitor_id.num_connector_ids = 0; + MonitorCallbackUserdata monitor_callback_userdata = { + cap_kms, + cap_kms->params.display_to_capture, strlen(cap_kms->params.display_to_capture), + 0 + }; + for_each_active_monitor_output((void*)cap_kms->params.card_path, GSR_CONNECTION_DRM, monitor_callback, &monitor_callback_userdata); + + gsr_monitor monitor; + if(!get_monitor_by_name((void*)cap_kms->params.card_path, GSR_CONNECTION_DRM, cap_kms->params.display_to_capture, &monitor)) { + fprintf(stderr, "gsr error: gsr_capture_kms_cuda_start: failed to find monitor by name \"%s\"\n", cap_kms->params.display_to_capture); + gsr_capture_kms_cuda_stop(cap, video_codec_context); + return -1; + } + + cap_kms->capture_pos = monitor.pos; + cap_kms->capture_size = monitor.size; + + video_codec_context->width = max_int(2, cap_kms->capture_size.x & ~1); + video_codec_context->height = max_int(2, cap_kms->capture_size.y & ~1); + + if(!gsr_egl_load(&cap_kms->egl, NULL, true)) { + fprintf(stderr, "gsr error: gsr_capture_kms_cuda_start: failed to load opengl\n"); + gsr_capture_kms_cuda_stop(cap, video_codec_context); + return -1; + } + + /* Disable vsync */ + cap_kms->egl.eglSwapInterval(cap_kms->egl.egl_display, 0); + + // TODO: overclocking is not supported on wayland... + if(!gsr_cuda_load(&cap_kms->cuda, NULL, false)) { + fprintf(stderr, "gsr error: gsr_capture_kms_cuda_start: failed to load cuda\n"); + gsr_capture_kms_cuda_stop(cap, video_codec_context); + return -1; + } + + if(!cuda_create_codec_context(cap_kms, video_codec_context)) { + gsr_capture_kms_cuda_stop(cap, video_codec_context); + return -1; + } + + return 0; +} + +static uint32_t fourcc(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { + return (d << 24) | (c << 16) | (b << 8) | a; +} + +#define FOURCC_NV12 842094158 + +static void gsr_capture_kms_cuda_tick(gsr_capture *cap, AVCodecContext *video_codec_context, AVFrame **frame) { + gsr_capture_kms_cuda *cap_kms = cap->priv; + + // TODO: + cap_kms->egl.glClear(GL_COLOR_BUFFER_BIT); + + if(!cap_kms->created_hw_frame) { + cap_kms->created_hw_frame = true; + + av_frame_free(frame); + *frame = av_frame_alloc(); + if(!frame) { + fprintf(stderr, "gsr error: gsr_capture_kms_cuda_tick: failed to allocate frame\n"); + cap_kms->should_stop = true; + cap_kms->stop_is_error = true; + return; + } + (*frame)->format = video_codec_context->pix_fmt; + (*frame)->width = video_codec_context->width; + (*frame)->height = video_codec_context->height; + (*frame)->color_range = video_codec_context->color_range; + (*frame)->color_primaries = video_codec_context->color_primaries; + (*frame)->color_trc = video_codec_context->color_trc; + (*frame)->colorspace = video_codec_context->colorspace; + (*frame)->chroma_location = video_codec_context->chroma_sample_location; + + if(av_hwframe_get_buffer(video_codec_context->hw_frames_ctx, *frame, 0) < 0) { + fprintf(stderr, "gsr error: gsr_capture_kms_cuda_tick: av_hwframe_get_buffer failed\n"); + cap_kms->should_stop = true; + cap_kms->stop_is_error = true; + return; + } + } +} + +static bool gsr_capture_kms_cuda_should_stop(gsr_capture *cap, bool *err) { + gsr_capture_kms_cuda *cap_kms = cap->priv; + if(cap_kms->should_stop) { + if(err) + *err = cap_kms->stop_is_error; + return true; + } + + if(err) + *err = false; + return false; +} + +static gsr_kms_response_fd* find_drm_by_connector_id(gsr_kms_response *kms_response, uint32_t connector_id) { + for(int i = 0; i < kms_response->num_fds; ++i) { + if(kms_response->fds[i].connector_id == connector_id) + return &kms_response->fds[i]; + } + return NULL; +} + +static gsr_kms_response_fd* find_first_combined_drm(gsr_kms_response *kms_response) { + for(int i = 0; i < kms_response->num_fds; ++i) { + if(kms_response->fds[i].is_combined_plane) + return &kms_response->fds[i]; + } + return NULL; +} + +static gsr_kms_response_fd* find_largest_drm(gsr_kms_response *kms_response) { + if(kms_response->num_fds == 0) + return NULL; + + int64_t largest_size = 0; + gsr_kms_response_fd *largest_drm = &kms_response->fds[0]; + for(int i = 0; i < kms_response->num_fds; ++i) { + const int64_t size = (int64_t)kms_response->fds[i].width * (int64_t)kms_response->fds[i].height; + if(size > largest_size) { + largest_size = size; + largest_drm = &kms_response->fds[i]; + } + } + return largest_drm; +} + +static bool gsr_capture_kms_register_egl_image_in_cuda(gsr_capture_kms_cuda *cap_kms, EGLImage image) { + CUcontext old_ctx; + CUresult res = cap_kms->cuda.cuCtxPushCurrent_v2(cap_kms->cuda.cu_ctx); + res = cap_kms->cuda.cuGraphicsEGLRegisterImage(&cap_kms->cuda_graphics_resource, image, CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY); + if(res != CUDA_SUCCESS) { + const char *err_str = "unknown"; + cap_kms->cuda.cuGetErrorString(res, &err_str); + fprintf(stderr, "gsr error: cuda_register_egl_image: cuGraphicsEGLRegisterImage failed, error %s (%d), egl image %p\n", + err_str, res, image); + res = cap_kms->cuda.cuCtxPopCurrent_v2(&old_ctx); + return false; + } + + res = cap_kms->cuda.cuGraphicsResourceSetMapFlags(cap_kms->cuda_graphics_resource, CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY); + res = cap_kms->cuda.cuGraphicsSubResourceGetMappedArray(&cap_kms->mapped_array, cap_kms->cuda_graphics_resource, 0, 0); + res = cap_kms->cuda.cuCtxPopCurrent_v2(&old_ctx); + return true; +} + +static void gsr_capture_kms_unload_cuda_graphics(gsr_capture_kms_cuda *cap_kms) { + if(cap_kms->cuda.cu_ctx) { + CUcontext old_ctx; + cap_kms->cuda.cuCtxPushCurrent_v2(cap_kms->cuda.cu_ctx); + + if(cap_kms->cuda_graphics_resource) { + cap_kms->cuda.cuGraphicsUnmapResources(1, &cap_kms->cuda_graphics_resource, 0); + cap_kms->cuda.cuGraphicsUnregisterResource(cap_kms->cuda_graphics_resource); + cap_kms->cuda_graphics_resource = 0; + } + + cap_kms->cuda.cuCtxPopCurrent_v2(&old_ctx); + } +} + +static int gsr_capture_kms_cuda_capture(gsr_capture *cap, AVFrame *frame) { + (void)frame; + gsr_capture_kms_cuda *cap_kms = cap->priv; + + for(int i = 0; i < cap_kms->kms_response.num_fds; ++i) { + if(cap_kms->kms_response.fds[i].fd > 0) + close(cap_kms->kms_response.fds[i].fd); + cap_kms->kms_response.fds[i].fd = 0; + } + cap_kms->kms_response.num_fds = 0; + + if(gsr_kms_client_get_kms(&cap_kms->kms_client, &cap_kms->kms_response) != 0) { + fprintf(stderr, "gsr error: gsr_capture_kms_cuda_capture: failed to get kms, error: %d (%s)\n", cap_kms->kms_response.result, cap_kms->kms_response.err_msg); + return -1; + } + + if(cap_kms->kms_response.num_fds == 0) { + static bool error_shown = false; + if(!error_shown) { + error_shown = true; + fprintf(stderr, "gsr error: no drm found, capture will fail\n"); + } + return -1; + } + + gsr_kms_response_fd *drm_fd = NULL; + for(int i = 0; i < cap_kms->monitor_id.num_connector_ids; ++i) { + drm_fd = find_drm_by_connector_id(&cap_kms->kms_response, cap_kms->monitor_id.connector_ids[i]); + if(drm_fd) + break; + } + + if(!drm_fd) { + drm_fd = find_first_combined_drm(&cap_kms->kms_response); + if(!drm_fd) + drm_fd = find_largest_drm(&cap_kms->kms_response); + } + + if(!drm_fd) + return -1; + + //bool capture_is_combined_plane = drm_fd->is_combined_plane || ((int)drm_fd->width == cap_kms->screen_size.x && (int)drm_fd->height == cap_kms->screen_size.y); + + //gsr_egl_cleanup_frame(&cap_kms->egl); + //gsr_egl_update(&cap_kms->egl); + + const intptr_t img_attr[] = { + //EGL_IMAGE_PRESERVED_KHR, EGL_TRUE, + EGL_LINUX_DRM_FOURCC_EXT, fourcc('A', 'R', '2', '4'),//cap_kms->egl.pixel_format, ARGB8888 + EGL_WIDTH, drm_fd->width,//cap_kms->egl.width, + EGL_HEIGHT, drm_fd->height,//cap_kms->egl.height, + EGL_DMA_BUF_PLANE0_FD_EXT, drm_fd->fd,//cap_kms->egl.fd, + EGL_DMA_BUF_PLANE0_OFFSET_EXT, drm_fd->offset,//cap_kms->egl.offset, + EGL_DMA_BUF_PLANE0_PITCH_EXT, drm_fd->pitch,//cap_kms->egl.pitch, + EGL_DMA_BUF_PLANE0_MODIFIER_LO_EXT, drm_fd->modifier & 0xFFFFFFFFULL,//cap_kms->egl.modifier & 0xFFFFFFFFULL, + EGL_DMA_BUF_PLANE0_MODIFIER_HI_EXT, drm_fd->modifier >> 32ULL,//cap_kms->egl.modifier >> 32ULL, + EGL_NONE + }; + + EGLImage image = cap_kms->egl.eglCreateImage(cap_kms->egl.egl_display, 0, EGL_LINUX_DMA_BUF_EXT, NULL, img_attr); + gsr_capture_kms_register_egl_image_in_cuda(cap_kms, image); + cap_kms->egl.eglDestroyImage(cap_kms->egl.egl_display, image); + + cap_kms->egl.eglSwapBuffers(cap_kms->egl.egl_display, cap_kms->egl.egl_surface); + + frame->linesize[0] = frame->width * 4; + + CUDA_MEMCPY2D memcpy_struct; + memcpy_struct.srcXInBytes = 0; + memcpy_struct.srcY = 0; + memcpy_struct.srcMemoryType = CU_MEMORYTYPE_ARRAY; + + memcpy_struct.dstXInBytes = 0; + memcpy_struct.dstY = 0; + memcpy_struct.dstMemoryType = CU_MEMORYTYPE_DEVICE; + + memcpy_struct.srcArray = cap_kms->mapped_array; + memcpy_struct.srcPitch = frame->linesize[0]; + memcpy_struct.dstDevice = (CUdeviceptr)frame->data[0]; + memcpy_struct.dstPitch = frame->linesize[0]; + memcpy_struct.WidthInBytes = frame->width * 4; + memcpy_struct.Height = frame->height; + cap_kms->cuda.cuMemcpy2D_v2(&memcpy_struct); + + gsr_capture_kms_unload_cuda_graphics(cap_kms); + + for(int i = 0; i < cap_kms->kms_response.num_fds; ++i) { + if(cap_kms->kms_response.fds[i].fd > 0) + close(cap_kms->kms_response.fds[i].fd); + cap_kms->kms_response.fds[i].fd = 0; + } + cap_kms->kms_response.num_fds = 0; + + return 0; +} + +static void gsr_capture_kms_cuda_stop(gsr_capture *cap, AVCodecContext *video_codec_context) { + gsr_capture_kms_cuda *cap_kms = cap->priv; + + gsr_capture_kms_unload_cuda_graphics(cap_kms); + + for(int i = 0; i < cap_kms->kms_response.num_fds; ++i) { + if(cap_kms->kms_response.fds[i].fd > 0) + close(cap_kms->kms_response.fds[i].fd); + cap_kms->kms_response.fds[i].fd = 0; + } + cap_kms->kms_response.num_fds = 0; + + if(video_codec_context->hw_device_ctx) + av_buffer_unref(&video_codec_context->hw_device_ctx); + if(video_codec_context->hw_frames_ctx) + av_buffer_unref(&video_codec_context->hw_frames_ctx); + + gsr_cuda_unload(&cap_kms->cuda); + gsr_egl_unload(&cap_kms->egl); + gsr_kms_client_deinit(&cap_kms->kms_client); +} + +static void gsr_capture_kms_cuda_destroy(gsr_capture *cap, AVCodecContext *video_codec_context) { + (void)video_codec_context; + gsr_capture_kms_cuda *cap_kms = cap->priv; + if(cap->priv) { + gsr_capture_kms_cuda_stop(cap, video_codec_context); + free((void*)cap_kms->params.display_to_capture); + cap_kms->params.display_to_capture = NULL; + free(cap->priv); + cap->priv = NULL; + } + free(cap); +} + +gsr_capture* gsr_capture_kms_cuda_create(const gsr_capture_kms_cuda_params *params) { + if(!params) { + fprintf(stderr, "gsr error: gsr_capture_kms_cuda_create params is NULL\n"); + return NULL; + } + + gsr_capture *cap = calloc(1, sizeof(gsr_capture)); + if(!cap) + return NULL; + + gsr_capture_kms_cuda *cap_kms = calloc(1, sizeof(gsr_capture_kms_cuda)); + if(!cap_kms) { + free(cap); + return NULL; + } + + const char *display_to_capture = strdup(params->display_to_capture); + if(!display_to_capture) { + free(cap); + free(cap_kms); + return NULL; + } + + cap_kms->params = *params; + cap_kms->params.display_to_capture = display_to_capture; + + *cap = (gsr_capture) { + .start = gsr_capture_kms_cuda_start, + .tick = gsr_capture_kms_cuda_tick, + .should_stop = gsr_capture_kms_cuda_should_stop, + .capture = gsr_capture_kms_cuda_capture, + .destroy = gsr_capture_kms_cuda_destroy, + .priv = cap_kms + }; + + return cap; +} diff --git a/src/capture/kms_vaapi.c b/src/capture/kms_vaapi.c index aa115f3..908b537 100644 --- a/src/capture/kms_vaapi.c +++ b/src/capture/kms_vaapi.c @@ -134,21 +134,20 @@ static bool properties_has_atom(Atom *props, int nprop, Atom atom) { return false; } -static void monitor_callback(const XRROutputInfo *output_info, const XRRCrtcInfo *crt_info, const XRRModeInfo *mode_info, void *userdata) { - (void)mode_info; +static void monitor_callback(const gsr_monitor *monitor, void *userdata) { MonitorCallbackUserdata *monitor_callback_userdata = userdata; ++monitor_callback_userdata->num_monitors; if(strcmp(monitor_callback_userdata->monitor_to_capture, "screen") == 0) - monitor_callback_userdata->rotation = crt_info->rotation; + monitor_callback_userdata->rotation = monitor->crt_info->rotation; - if(monitor_callback_userdata->monitor_to_capture_len != output_info->nameLen || memcmp(monitor_callback_userdata->monitor_to_capture, output_info->name, output_info->nameLen) != 0) + if(monitor_callback_userdata->monitor_to_capture_len != monitor->name_len || memcmp(monitor_callback_userdata->monitor_to_capture, monitor->name, monitor->name_len) != 0) return; - monitor_callback_userdata->rotation = crt_info->rotation; - for(int i = 0; i < crt_info->noutput && monitor_callback_userdata->cap_kms->monitor_id.num_connector_ids < MAX_CONNECTOR_IDS; ++i) { + monitor_callback_userdata->rotation = monitor->crt_info->rotation; + for(int i = 0; i < monitor->crt_info->noutput && monitor_callback_userdata->cap_kms->monitor_id.num_connector_ids < MAX_CONNECTOR_IDS; ++i) { int nprop = 0; - Atom *props = XRRListOutputProperties(monitor_callback_userdata->cap_kms->dpy, crt_info->outputs[i], &nprop); + Atom *props = XRRListOutputProperties(monitor_callback_userdata->cap_kms->dpy, monitor->crt_info->outputs[i], &nprop); if(!props) continue; @@ -162,7 +161,7 @@ static void monitor_callback(const XRROutputInfo *output_info, const XRRCrtcInfo unsigned long bytes_after = 0; unsigned long nitems = 0; unsigned char *prop = NULL; - XRRGetOutputProperty(monitor_callback_userdata->cap_kms->dpy, crt_info->outputs[i], + XRRGetOutputProperty(monitor_callback_userdata->cap_kms->dpy, monitor->crt_info->outputs[i], monitor_callback_userdata->randr_connector_id_atom, 0, 128, false, false, AnyPropertyType, &type, &format, &nitems, &bytes_after, &prop); @@ -186,6 +185,9 @@ static int gsr_capture_kms_vaapi_start(gsr_capture *cap, AVCodecContext *video_c return -1; } + void *connection = cap_kms->params.wayland ? (void*)cap_kms->params.card_path : (void*)cap_kms->dpy; + const gsr_connection_type connection_type = cap_kms->params.wayland ? GSR_CONNECTION_DRM : GSR_CONNECTION_X11; + const Atom randr_connector_id_atom = XInternAtom(cap_kms->dpy, "CONNECTOR_ID", False); cap_kms->monitor_id.num_connector_ids = 0; MonitorCallbackUserdata monitor_callback_userdata = { @@ -194,7 +196,7 @@ static int gsr_capture_kms_vaapi_start(gsr_capture *cap, AVCodecContext *video_c 0, X11_ROT_0 }; - for_each_active_monitor_output(cap_kms->dpy, monitor_callback, &monitor_callback_userdata); + for_each_active_monitor_output(connection, connection_type, monitor_callback, &monitor_callback_userdata); cap_kms->screen_size.x = WidthOfScreen(DefaultScreenOfDisplay(cap_kms->dpy)); cap_kms->screen_size.y = HeightOfScreen(DefaultScreenOfDisplay(cap_kms->dpy)); @@ -205,7 +207,7 @@ static int gsr_capture_kms_vaapi_start(gsr_capture *cap, AVCodecContext *video_c monitor.pos.y = 0; monitor.size = cap_kms->screen_size; cap_kms->screen_capture = true; - } else if(!get_monitor_by_name(cap_kms->dpy, cap_kms->params.display_to_capture, &monitor)) { + } else if(!get_monitor_by_name(connection, connection_type, cap_kms->params.display_to_capture, &monitor)) { fprintf(stderr, "gsr error: gsr_capture_kms_vaapi_start: failed to find monitor by name \"%s\"\n", cap_kms->params.display_to_capture); gsr_capture_kms_vaapi_stop(cap, video_codec_context); return -1; @@ -225,7 +227,7 @@ static int gsr_capture_kms_vaapi_start(gsr_capture *cap, AVCodecContext *video_c cap_kms->capture_pos = monitor.pos; cap_kms->capture_size = monitor.size; - if(!gsr_egl_load(&cap_kms->egl, cap_kms->dpy)) { + if(!gsr_egl_load(&cap_kms->egl, cap_kms->dpy, cap_kms->params.wayland)) { fprintf(stderr, "gsr error: gsr_capture_kms_vaapi_start: failed to load opengl\n"); gsr_capture_kms_vaapi_stop(cap, video_codec_context); return -1; @@ -325,6 +327,7 @@ static void gsr_capture_kms_vaapi_tick(gsr_capture *cap, AVCodecContext *video_c const int plane = 0; const int div[2] = {1, 2}; // divide UV texture size by 2 because chroma is half size + //const uint64_t modifier = cap_kms->prime.objects[cap_kms->prime.layers[layer].object_index[plane]].drm_format_modifier; const intptr_t img_attr[] = { EGL_LINUX_DRM_FOURCC_EXT, formats[i], @@ -333,6 +336,9 @@ static void gsr_capture_kms_vaapi_tick(gsr_capture *cap, AVCodecContext *video_c EGL_DMA_BUF_PLANE0_FD_EXT, cap_kms->prime.objects[cap_kms->prime.layers[layer].object_index[plane]].fd, EGL_DMA_BUF_PLANE0_OFFSET_EXT, cap_kms->prime.layers[layer].offset[plane], EGL_DMA_BUF_PLANE0_PITCH_EXT, cap_kms->prime.layers[layer].pitch[plane], + // TODO: + //EGL_DMA_BUF_PLANE0_MODIFIER_LO_EXT, modifier & 0xFFFFFFFFULL, + //EGL_DMA_BUF_PLANE0_MODIFIER_HI_EXT, modifier >> 32ULL, EGL_NONE }; @@ -355,6 +361,7 @@ static void gsr_capture_kms_vaapi_tick(gsr_capture *cap, AVCodecContext *video_c while(cap_kms->egl.eglGetError() != EGL_SUCCESS){} cap_kms->egl.glEGLImageTargetTexture2DOES(GL_TEXTURE_2D, image); if(cap_kms->egl.glGetError() != 0 || cap_kms->egl.eglGetError() != EGL_SUCCESS) { + // TODO: Get the error properly fprintf(stderr, "gsr error: gsr_capture_kms_vaapi_tick: failed to bind egl image to gl texture, error: %d\n", cap_kms->egl.eglGetError()); cap_kms->should_stop = true; cap_kms->stop_is_error = true; @@ -484,6 +491,9 @@ static int gsr_capture_kms_vaapi_capture(gsr_capture *cap, AVFrame *frame) { } } + if(!drm_fd) + return -1; + bool capture_is_combined_plane = drm_fd->is_combined_plane || ((int)drm_fd->width == cap_kms->screen_size.x && (int)drm_fd->height == cap_kms->screen_size.y); // TODO: This causes a crash sometimes on steam deck, why? is it a driver bug? a vaapi pure version doesn't cause a crash. @@ -511,6 +521,9 @@ static int gsr_capture_kms_vaapi_capture(gsr_capture *cap, AVFrame *frame) { EGL_DMA_BUF_PLANE0_FD_EXT, drm_fd->fd, EGL_DMA_BUF_PLANE0_OFFSET_EXT, drm_fd->offset, EGL_DMA_BUF_PLANE0_PITCH_EXT, drm_fd->pitch, + // TODO: + //EGL_DMA_BUF_PLANE0_MODIFIER_LO_EXT, drm_fd->modifier & 0xFFFFFFFFULL, + //EGL_DMA_BUF_PLANE0_MODIFIER_HI_EXT, drm_fd->modifier >> 32ULL, EGL_NONE }; diff --git a/src/capture/xcomposite_cuda.c b/src/capture/xcomposite_cuda.c index d6fc10b..7cbc561 100644 --- a/src/capture/xcomposite_cuda.c +++ b/src/capture/xcomposite_cuda.c @@ -63,6 +63,8 @@ static bool cuda_register_opengl_texture(gsr_capture_xcomposite_cuda *cap_xcomp) CUresult res; CUcontext old_ctx; res = cap_xcomp->cuda.cuCtxPushCurrent_v2(cap_xcomp->cuda.cu_ctx); + // TODO: Use cuGraphicsEGLRegisterImage instead with the window egl image (dont use window_texture). + // That removes the need for an extra texture and texture copy res = cap_xcomp->cuda.cuGraphicsGLRegisterImage( &cap_xcomp->cuda_graphics_resource, cap_xcomp->target_texture_id, GL_TEXTURE_2D, CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY); @@ -123,6 +125,8 @@ static bool cuda_create_codec_context(gsr_capture_xcomposite_cuda *cap_xcomp, AV hw_frame_context->device_ref = device_ctx; hw_frame_context->device_ctx = (AVHWDeviceContext*)device_ctx->data; + hw_frame_context->initial_pool_size = 1; + if (av_hwframe_ctx_init(frame_context) < 0) { fprintf(stderr, "Error: Failed to initialize hardware frame context " "(note: ffmpeg version needs to be > 4.0)\n"); @@ -184,7 +188,7 @@ static int gsr_capture_xcomposite_cuda_start(gsr_capture *cap, AVCodecContext *v XSelectInput(cap_xcomp->dpy, cap_xcomp->window, StructureNotifyMask | ExposureMask); - if(!gsr_egl_load(&cap_xcomp->egl, cap_xcomp->dpy)) { + if(!gsr_egl_load(&cap_xcomp->egl, cap_xcomp->dpy, false)) { fprintf(stderr, "gsr error: gsr_capture_xcomposite_cuda_start: failed to load opengl\n"); return -1; } @@ -244,6 +248,18 @@ static int gsr_capture_xcomposite_cuda_start(gsr_capture *cap, AVCodecContext *v static void gsr_capture_xcomposite_cuda_stop(gsr_capture *cap, AVCodecContext *video_codec_context) { gsr_capture_xcomposite_cuda *cap_xcomp = cap->priv; + if(cap_xcomp->cuda.cu_ctx) { + CUcontext old_ctx; + cap_xcomp->cuda.cuCtxPushCurrent_v2(cap_xcomp->cuda.cu_ctx); + + if(cap_xcomp->cuda_graphics_resource) { + cap_xcomp->cuda.cuGraphicsUnmapResources(1, &cap_xcomp->cuda_graphics_resource, 0); + cap_xcomp->cuda.cuGraphicsUnregisterResource(cap_xcomp->cuda_graphics_resource); + } + + cap_xcomp->cuda.cuCtxPopCurrent_v2(&old_ctx); + } + window_texture_deinit(&cap_xcomp->window_texture); if(cap_xcomp->target_texture_id) { @@ -256,14 +272,6 @@ static void gsr_capture_xcomposite_cuda_stop(gsr_capture *cap, AVCodecContext *v if(video_codec_context->hw_frames_ctx) av_buffer_unref(&video_codec_context->hw_frames_ctx); - if(cap_xcomp->cuda.cu_ctx) { - CUcontext old_ctx; - cap_xcomp->cuda.cuCtxPushCurrent_v2(cap_xcomp->cuda.cu_ctx); - - cap_xcomp->cuda.cuGraphicsUnmapResources(1, &cap_xcomp->cuda_graphics_resource, 0); - cap_xcomp->cuda.cuGraphicsUnregisterResource(cap_xcomp->cuda_graphics_resource); - cap_xcomp->cuda.cuCtxPopCurrent_v2(&old_ctx); - } gsr_cuda_unload(&cap_xcomp->cuda); gsr_egl_unload(&cap_xcomp->egl); diff --git a/src/capture/xcomposite_vaapi.c b/src/capture/xcomposite_vaapi.c index bb92f7f..99c4a50 100644 --- a/src/capture/xcomposite_vaapi.c +++ b/src/capture/xcomposite_vaapi.c @@ -147,7 +147,7 @@ static int gsr_capture_xcomposite_vaapi_start(gsr_capture *cap, AVCodecContext * // TODO: Get select and add these on top of it and then restore at the end. Also do the same in other xcomposite XSelectInput(cap_xcomp->dpy, cap_xcomp->params.window, StructureNotifyMask | ExposureMask); - if(!gsr_egl_load(&cap_xcomp->egl, cap_xcomp->dpy)) { + if(!gsr_egl_load(&cap_xcomp->egl, cap_xcomp->dpy, false)) { fprintf(stderr, "gsr error: gsr_capture_xcomposite_vaapi_start: failed to load opengl\n"); return -1; } @@ -373,6 +373,7 @@ static void gsr_capture_xcomposite_vaapi_tick(gsr_capture *cap, AVCodecContext * EGL_NONE, }; + // TODO: Use the window texture egl image directly instead of exporting it to opengl texture and then importing it to egl image again EGLImage img = cap_xcomp->egl.eglCreateImage(cap_xcomp->egl.egl_display, cap_xcomp->egl.egl_context, EGL_GL_TEXTURE_2D, (EGLClientBuffer)(uint64_t)window_texture_get_opengl_texture_id(&cap_xcomp->window_texture), pixmap_attrs); if(!img) { fprintf(stderr, "gsr error: gsr_capture_xcomposite_vaapi_tick: eglCreateImage failed\n"); diff --git a/src/color_conversion.c b/src/color_conversion.c index 84500fd..5b61325 100644 --- a/src/color_conversion.c +++ b/src/color_conversion.c @@ -23,6 +23,42 @@ static float abs_f(float v) { " 0.098, -0.071, 0.439, 0.0,\n" \ " 0.0625, 0.500, 0.500, 1.0);" +static int load_shader_rgb(gsr_shader *shader, gsr_egl *egl, int *rotation_uniform) { + char vertex_shader[2048]; + snprintf(vertex_shader, sizeof(vertex_shader), + "#version 300 es \n" + "in vec2 pos; \n" + "in vec2 texcoords; \n" + "out vec2 texcoords_out; \n" + "uniform float rotation; \n" + ROTATE_Z + "void main() \n" + "{ \n" + " texcoords_out = texcoords; \n" + " gl_Position = vec4(pos.x, pos.y, 0.0, 1.0) * rotate_z(rotation); \n" + "} \n"); + + char fragment_shader[] = + "#version 300 es \n" + "precision mediump float; \n" + "in vec2 texcoords_out; \n" + "uniform sampler2D tex1; \n" + "out vec4 FragColor; \n" + RGB_TO_YUV + "void main() \n" + "{ \n" + " FragColor = texture(tex1, texcoords_out); \n" + "} \n"; + + if(gsr_shader_init(shader, egl, vertex_shader, fragment_shader) != 0) + return -1; + + gsr_shader_bind_attribute_location(shader, "pos", 0); + gsr_shader_bind_attribute_location(shader, "texcoords", 1); + *rotation_uniform = egl->glGetUniformLocation(shader->program_id, "rotation"); + return 0; +} + static int load_shader_y(gsr_shader *shader, gsr_egl *egl, int *rotation_uniform) { char vertex_shader[2048]; snprintf(vertex_shader, sizeof(vertex_shader), @@ -111,12 +147,14 @@ static int loader_framebuffers(gsr_color_conversion *self) { goto err; } - self->params.egl->glBindFramebuffer(GL_FRAMEBUFFER, self->framebuffers[1]); - self->params.egl->glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, self->params.destination_textures[1], 0); - self->params.egl->glDrawBuffers(1, &draw_buffer); - if(self->params.egl->glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { - fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to create framebuffer for UV\n"); - goto err; + if(self->params.num_destination_textures > 1) { + self->params.egl->glBindFramebuffer(GL_FRAMEBUFFER, self->framebuffers[1]); + self->params.egl->glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, self->params.destination_textures[1], 0); + self->params.egl->glDrawBuffers(1, &draw_buffer); + if(self->params.egl->glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { + fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to create framebuffer for UV\n"); + goto err; + } } self->params.egl->glBindFramebuffer(GL_FRAMEBUFFER, 0); @@ -152,19 +190,36 @@ int gsr_color_conversion_init(gsr_color_conversion *self, const gsr_color_conver self->params.egl = params->egl; self->params = *params; - if(self->params.num_destination_textures != 2) { - fprintf(stderr, "gsr error: gsr_color_conversion_init: expected 2 destination textures for destination color NV12, got %d destination texture(s)\n", self->params.num_destination_textures); - return -1; - } + switch(params->destination_color) { + case GSR_DESTINATION_COLOR_RGB: { + if(self->params.num_destination_textures != 1) { + fprintf(stderr, "gsr error: gsr_color_conversion_init: expected 1 destination texture for destination color RGB, got %d destination texture(s)\n", self->params.num_destination_textures); + return -1; + } - if(load_shader_y(&self->shaders[0], self->params.egl, &self->rotation_uniforms[0]) != 0) { - fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y shader\n"); - goto err; - } + if(load_shader_rgb(&self->shaders[0], self->params.egl, &self->rotation_uniforms[0]) != 0) { + fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load rgb shader\n"); + goto err; + } + break; + } + case GSR_DESTINATION_COLOR_NV12: { + if(self->params.num_destination_textures != 2) { + fprintf(stderr, "gsr error: gsr_color_conversion_init: expected 2 destination textures for destination color RGB, got %d destination texture(s)\n", self->params.num_destination_textures); + return -1; + } - if(load_shader_uv(&self->shaders[1], self->params.egl, &self->rotation_uniforms[1]) != 0) { - fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load UV shader\n"); - goto err; + if(load_shader_y(&self->shaders[0], self->params.egl, &self->rotation_uniforms[0]) != 0) { + fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y shader\n"); + goto err; + } + + if(load_shader_uv(&self->shaders[1], self->params.egl, &self->rotation_uniforms[1]) != 0) { + fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load UV shader\n"); + goto err; + } + break; + } } if(loader_framebuffers(self) != 0) @@ -274,7 +329,7 @@ int gsr_color_conversion_draw(gsr_color_conversion *self, unsigned int texture_i self->params.egl->glDrawArrays(GL_TRIANGLES, 0, 6); } - { + if(self->params.num_destination_textures > 1) { self->params.egl->glBindFramebuffer(GL_FRAMEBUFFER, self->framebuffers[1]); //cap_xcomp->egl.glClear(GL_COLOR_BUFFER_BIT); diff --git a/src/cuda.c b/src/cuda.c index 32b447e..dcbbb92 100644 --- a/src/cuda.c +++ b/src/cuda.c @@ -3,6 +3,7 @@ #include #include #include +#include bool gsr_cuda_load(gsr_cuda *self, Display *display, bool do_overclock) { memset(self, 0, sizeof(gsr_cuda)); @@ -31,6 +32,7 @@ bool gsr_cuda_load(gsr_cuda *self, Display *display, bool do_overclock) { { (void**)&self->cuMemcpy2D_v2, "cuMemcpy2D_v2" }, { (void**)&self->cuGraphicsGLRegisterImage, "cuGraphicsGLRegisterImage" }, + { (void**)&self->cuGraphicsEGLRegisterImage, "cuGraphicsEGLRegisterImage" }, { (void**)&self->cuGraphicsResourceSetMapFlags, "cuGraphicsResourceSetMapFlags" }, { (void**)&self->cuGraphicsMapResources, "cuGraphicsMapResources" }, { (void**)&self->cuGraphicsUnmapResources, "cuGraphicsUnmapResources" }, @@ -80,6 +82,7 @@ bool gsr_cuda_load(gsr_cuda *self, Display *display, bool do_overclock) { } if(self->do_overclock) { + assert(display); if(gsr_overclock_load(&self->overclock, display)) gsr_overclock_start(&self->overclock); else diff --git a/src/egl.c b/src/egl.c index a0fcc23..bbdb031 100644 --- a/src/egl.c +++ b/src/egl.c @@ -3,35 +3,209 @@ #include #include #include +#include -static bool gsr_egl_create_window(gsr_egl *self) { +#include +#include +//#include "../external/wlr-export-dmabuf-unstable-v1-client-protocol.h" +#include + +#if 0 +static struct wl_compositor *compositor = NULL; +static struct wl_output *output = NULL; +static struct zwlr_export_dmabuf_manager_v1 *export_manager = NULL; +static struct zwlr_export_dmabuf_frame_v1 *current_frame = NULL; +//static struct wl_shell *shell = NULL; + +struct window { + EGLContext egl_context; + struct wl_surface *surface; + //struct wl_shell_surface *shell_surface; + struct wl_egl_window *egl_window; + EGLSurface egl_surface; +}; + +static void output_handle_geometry(void *data, struct wl_output *wl_output, + int32_t x, int32_t y, int32_t phys_width, int32_t phys_height, + int32_t subpixel, const char *make, const char *model, + int32_t transform) { + fprintf(stderr, "output geometry, make: %s, model: %s\n", make, model); +} + +static void output_handle_mode(void *data, struct wl_output *wl_output, + uint32_t flags, int32_t width, int32_t height, int32_t refresh) { + +} + +static void output_handle_done(void* data, struct wl_output *wl_output) { + /* Nothing to do */ +} + +static void output_handle_scale(void* data, struct wl_output *wl_output, + int32_t factor) { + /* Nothing to do */ +} + +static const struct wl_output_listener output_listener = { + .geometry = output_handle_geometry, + .mode = output_handle_mode, + .done = output_handle_done, + .scale = output_handle_scale, +}; +#endif + +static void registry_add_object (void *data, struct wl_registry *registry, uint32_t name, const char *interface, uint32_t version) { + (void)version; + struct wl_compositor **wayland_compositor = data; + if (strcmp(interface, "wl_compositor") == 0) { + if(*wayland_compositor) { + wl_compositor_destroy(*wayland_compositor); + *wayland_compositor = NULL; + } + *wayland_compositor = wl_registry_bind(registry, name, &wl_compositor_interface, 1); + }/* else if(strcmp(interface, wl_output_interface.name) == 0) { + fprintf(stderr, "wayland output, name: %u\n", name); + output = wl_registry_bind(registry, name, &wl_output_interface, 1); + wl_output_add_listener(output, &output_listener, NULL); + } else if(strcmp(interface, zwlr_export_dmabuf_manager_v1_interface.name) == 0) { + export_manager = wl_registry_bind(registry, name, &zwlr_export_dmabuf_manager_v1_interface, 1); + }*/ + //fprintf(stderr, "interface: %s\n", interface); +} + +static void registry_remove_object (void *data, struct wl_registry *registry, uint32_t name) { + (void)data; + (void)registry; + (void)name; +} + +static struct wl_registry_listener registry_listener = {®istry_add_object, ®istry_remove_object}; + +#if 0 +static void register_cb(gsr_egl *egl); + +static void frame_start(void *data, struct zwlr_export_dmabuf_frame_v1 *frame, + uint32_t width, uint32_t height, uint32_t offset_x, uint32_t offset_y, + uint32_t buffer_flags, uint32_t flags, uint32_t format, + uint32_t mod_high, uint32_t mod_low, uint32_t num_objects) { + gsr_egl *egl = data; + //fprintf(stderr, "frame start, width: %u, height: %u, offset x: %u, offset y: %u, format: %u, num objects: %u\n", width, height, offset_x, offset_y, format, num_objects); + egl->width = width; + egl->height = height; + egl->pixel_format = format; + egl->modifier = ((uint64_t)mod_high << 32) | mod_low; + current_frame = frame; +} + +static void frame_object(void *data, struct zwlr_export_dmabuf_frame_v1 *frame, + uint32_t index, int32_t fd, uint32_t size, uint32_t offset, + uint32_t stride, uint32_t plane_index) { + // TODO: What if we get multiple objects? then we get multiple fd per frame + gsr_egl *egl = data; + //egl->fd = fd; + egl->pitch = stride; + egl->offset = offset; + //fprintf(stderr, "new frame, fd: %d, index: %u, size: %u, offset: %u, stride: %u, plane_index: %u\n", fd, index, size, offset, stride, plane_index); + close(fd); +} + + +static void frame_ready(void *data, struct zwlr_export_dmabuf_frame_v1 *frame, + uint32_t tv_sec_hi, uint32_t tv_sec_lo, uint32_t tv_nsec) { + register_cb(data); +} + +static void frame_cancel(void *data, struct zwlr_export_dmabuf_frame_v1 *frame, + uint32_t reason) { + register_cb(data); +} + + +static const struct zwlr_export_dmabuf_frame_v1_listener frame_listener = { + .frame = frame_start, + .object = frame_object, + .ready = frame_ready, + .cancel = frame_cancel, +}; + +static struct zwlr_export_dmabuf_frame_v1 *frame_callback = NULL; +static void register_cb(gsr_egl *egl) { + bool with_cursor = false; + frame_callback = zwlr_export_dmabuf_manager_v1_capture_output(export_manager, with_cursor, output); + zwlr_export_dmabuf_frame_v1_add_listener(frame_callback, &frame_listener, egl); +} +#endif + +// TODO: Create egl context without surface (in other words, x11/wayland agnostic, doesn't require x11/wayland dependency) +static bool gsr_egl_create_window(gsr_egl *self, bool wayland) { EGLConfig ecfg; int32_t num_config = 0; + EGLDisplay egl_display = NULL; EGLSurface egl_surface = NULL; EGLContext egl_context = NULL; - Window window = None; - - int32_t attr[] = { + + Window x11_window = None; + + struct wl_registry *wayland_registry = NULL; + struct wl_compositor *wayland_compositor = NULL; + struct wl_surface *wayland_surface = NULL; + void *wayland_dpy = NULL; + void *wayland_window = NULL; + + const int32_t attr[] = { EGL_BUFFER_SIZE, 24, - EGL_RENDERABLE_TYPE, - EGL_OPENGL_ES2_BIT, + EGL_RENDERABLE_TYPE, EGL_OPENGL_ES2_BIT, EGL_NONE }; - int32_t ctxattr[] = { + const int32_t ctxattr[] = { EGL_CONTEXT_CLIENT_VERSION, 2, EGL_NONE }; - window = XCreateWindow(self->dpy, DefaultRootWindow(self->dpy), 0, 0, 1, 1, 0, CopyFromParent, InputOutput, CopyFromParent, 0, NULL); + if(wayland) { + wayland_dpy = wl_display_connect(NULL); + if(!wayland_dpy) { + fprintf(stderr, "gsr error: gsr_egl_create_window failed: wl_display_connect failed\n"); + goto fail; + } - if(!window) { - fprintf(stderr, "gsr error: gsr_gl_create_window failed: failed to create gl window\n"); - goto fail; + wayland_registry = wl_display_get_registry(wayland_dpy); // TODO: Error checking + wl_registry_add_listener(wayland_registry, ®istry_listener, &wayland_compositor); // TODO: Error checking + + // Fetch globals + wl_display_roundtrip(wayland_dpy); + + // fetch wl_output + wl_display_roundtrip(wayland_dpy); + + if(!wayland_compositor) { + fprintf(stderr, "gsr error: gsr_gl_create_window failed: failed to find compositor\n"); + goto fail; + } + + /*if(!output) { + fprintf(stderr, "gsr error: gsr_gl_create_window failed: failed to find output\n"); + goto fail; + } + + if(!export_manager) { + fprintf(stderr, "gsr error: gsr_gl_create_window failed: failed to find export manager\n"); + goto fail; + }*/ + } else { + x11_window = XCreateWindow(self->x11_dpy, DefaultRootWindow(self->x11_dpy), 0, 0, 16, 16, 0, CopyFromParent, InputOutput, CopyFromParent, 0, NULL); + + if(!x11_window) { + fprintf(stderr, "gsr error: gsr_gl_create_window failed: failed to create gl window\n"); + goto fail; + } } - egl_display = self->eglGetDisplay(self->dpy); + self->eglBindAPI(EGL_OPENGL_ES_API); + + egl_display = self->eglGetDisplay(wayland_dpy ? (EGLNativeDisplayType)wayland_dpy : (EGLNativeDisplayType)self->x11_dpy); if(!egl_display) { fprintf(stderr, "gsr error: gsr_egl_create_window failed: eglGetDisplay failed\n"); goto fail; @@ -47,18 +221,25 @@ static bool gsr_egl_create_window(gsr_egl *self) { goto fail; } - egl_surface = self->eglCreateWindowSurface(egl_display, ecfg, (EGLNativeWindowType)window, NULL); - if(!egl_surface) { - fprintf(stderr, "gsr error: gsr_egl_create_window failed: failed to create window surface\n"); - goto fail; - } - egl_context = self->eglCreateContext(egl_display, ecfg, NULL, ctxattr); if(!egl_context) { fprintf(stderr, "gsr error: gsr_egl_create_window failed: failed to create egl context\n"); goto fail; } + if(wayland) { + wayland_surface = wl_compositor_create_surface(wayland_compositor); + wayland_window = wl_egl_window_create(wayland_surface, 16, 16); + egl_surface = self->eglCreateWindowSurface(egl_display, ecfg, (EGLNativeWindowType)wayland_window, NULL); + } else { + egl_surface = self->eglCreateWindowSurface(egl_display, ecfg, (EGLNativeWindowType)x11_window, NULL); + } + + if(!egl_surface) { + fprintf(stderr, "gsr error: gsr_egl_create_window failed: failed to create window surface\n"); + goto fail; + } + if(!self->eglMakeCurrent(egl_display, egl_surface, egl_surface, egl_context)) { fprintf(stderr, "gsr error: gsr_egl_create_window failed: failed to make context current\n"); goto fail; @@ -67,7 +248,14 @@ static bool gsr_egl_create_window(gsr_egl *self) { self->egl_display = egl_display; self->egl_surface = egl_surface; self->egl_context = egl_context; - self->window = window; + + self->x11_window = x11_window; + + self->wayland_dpy = wayland_dpy; + self->wayland_window = wayland_window; + self->wayland_surface = wayland_surface; + self->wayland_compositor = wayland_compositor; + self->wayland_registry = wayland_registry; return true; fail: @@ -77,8 +265,18 @@ static bool gsr_egl_create_window(gsr_egl *self) { self->eglDestroySurface(egl_display, egl_surface); if(egl_display) self->eglTerminate(egl_display); - if(window) - XDestroyWindow(self->dpy, window); + if(x11_window) + XDestroyWindow(self->x11_dpy, x11_window); + if(wayland_window) + wl_egl_window_destroy(wayland_window); + if(wayland_surface) + wl_surface_destroy(wayland_surface); + if(wayland_compositor) + wl_compositor_destroy(wayland_compositor); + if(wayland_registry) + wl_registry_destroy(wayland_registry); + if(wayland_dpy) + wl_display_disconnect(wayland_dpy); return false; } @@ -98,6 +296,7 @@ static bool gsr_egl_load_egl(gsr_egl *self, void *library) { { (void**)&self->eglDestroyImage, "eglDestroyImage" }, { (void**)&self->eglSwapInterval, "eglSwapInterval" }, { (void**)&self->eglSwapBuffers, "eglSwapBuffers" }, + { (void**)&self->eglBindAPI, "eglBindAPI" }, { (void**)&self->eglGetProcAddress, "eglGetProcAddress" }, { NULL, NULL } @@ -186,9 +385,9 @@ static bool gsr_egl_load_gl(gsr_egl *self, void *library) { return true; } -bool gsr_egl_load(gsr_egl *self, Display *dpy) { +bool gsr_egl_load(gsr_egl *self, Display *dpy, bool wayland) { memset(self, 0, sizeof(gsr_egl)); - self->dpy = dpy; + self->x11_dpy = dpy; void *egl_lib = NULL; void *gl_lib = NULL; @@ -215,7 +414,7 @@ bool gsr_egl_load(gsr_egl *self, Display *dpy) { if(!gsr_egl_proc_load_egl(self)) goto fail; - if(!gsr_egl_create_window(self)) + if(!gsr_egl_create_window(self, wayland)) goto fail; self->glEnable(GL_BLEND); @@ -250,9 +449,34 @@ void gsr_egl_unload(gsr_egl *self) { self->egl_display = NULL; } - if(self->window) { - XDestroyWindow(self->dpy, self->window); - self->window = None; + if(self->x11_window) { + XDestroyWindow(self->x11_dpy, self->x11_window); + self->x11_window = None; + } + + if(self->wayland_window) { + wl_egl_window_destroy(self->wayland_window); + self->wayland_window = NULL; + } + + if(self->wayland_surface) { + wl_surface_destroy(self->wayland_surface); + self->wayland_surface = NULL; + } + + if(self->wayland_compositor) { + wl_compositor_destroy(self->wayland_compositor); + self->wayland_compositor = NULL; + } + + if(self->wayland_registry) { + wl_registry_destroy(self->wayland_registry); + self->wayland_registry = NULL; + } + + if(self->wayland_dpy) { + wl_display_disconnect(self->wayland_dpy); + self->wayland_dpy = NULL; } if(self->egl_library) { @@ -267,3 +491,25 @@ void gsr_egl_unload(gsr_egl *self) { memset(self, 0, sizeof(gsr_egl)); } + +void gsr_egl_update(gsr_egl *self) { + if(!self->wayland_dpy) + return; + + wl_display_dispatch(self->wayland_dpy); +} + +void gsr_egl_cleanup_frame(gsr_egl *self) { + if(!self->wayland_dpy) + return; + + if(self->fd > 0) { + close(self->fd); + self->fd = 0; + } + + /*if(current_frame) { + zwlr_export_dmabuf_frame_v1_destroy(current_frame); + current_frame = NULL; + }*/ +} diff --git a/src/main.cpp b/src/main.cpp index 29ed7c2..212aff5 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -3,6 +3,7 @@ extern "C" { #include "../include/capture/xcomposite_cuda.h" #include "../include/capture/xcomposite_vaapi.h" #include "../include/capture/kms_vaapi.h" +#include "../include/capture/kms_cuda.h" #include "../include/egl.h" #include "../include/utils.h" } @@ -45,10 +46,9 @@ static const int VIDEO_STREAM_INDEX = 0; static thread_local char av_error_buffer[AV_ERROR_MAX_STRING_SIZE]; -static void monitor_output_callback_print(const XRROutputInfo *output_info, const XRRCrtcInfo *crt_info, const XRRModeInfo *mode_info, void *userdata) { - (void)mode_info; +static void monitor_output_callback_print(const gsr_monitor *monitor, void *userdata) { (void)userdata; - fprintf(stderr, " \"%.*s\" (%dx%d+%d+%d)\n", output_info->nameLen, output_info->name, (int)crt_info->width, (int)crt_info->height, crt_info->x, crt_info->y); + fprintf(stderr, " \"%.*s\" (%dx%d+%d+%d)\n", monitor->name_len, monitor->name, monitor->size.x, monitor->size.y, monitor->pos.x, monitor->pos.y); } static char* av_error_to_string(int err) { @@ -1056,9 +1056,11 @@ static int init_filter_graph(AVCodecContext *audio_codec_context, AVFilterGraph return 0; } -static void xwayland_check_callback(const XRROutputInfo *output_info, const XRRCrtcInfo*, const XRRModeInfo*, void *userdata) { +static void xwayland_check_callback(const gsr_monitor *monitor, void *userdata) { bool *xwayland_found = (bool*)userdata; - if(output_info->nameLen >= 8 && strncmp(output_info->name, "XWAYLAND", 8) == 0) + if(monitor->name_len >= 8 && strncmp(monitor->name, "XWAYLAND", 8) == 0) + *xwayland_found = true; + else if(memmem(monitor->name, monitor->name_len, "X11", 3)) *xwayland_found = true; } @@ -1068,7 +1070,7 @@ static bool is_xwayland(Display *display) { return true; bool xwayland_found = false; - for_each_active_monitor_output(display, xwayland_check_callback, &xwayland_found); + for_each_active_monitor_output(display, GSR_CONNECTION_X11, xwayland_check_callback, &xwayland_found); return xwayland_found; } @@ -1285,23 +1287,22 @@ int main(int argc, char **argv) { replay_buffer_size_secs += 5; // Add a few seconds to account of lost packets because of non-keyframe packets skipped } + bool wayland = false; Display *dpy = XOpenDisplay(nullptr); if (!dpy) { - fprintf(stderr, "Error: Failed to open display. Make sure you are running x11\n"); - _exit(2); + wayland = true; + fprintf(stderr, "Warning: failed to connect to the X server. Assuming wayland is running without Xwayland\n"); } XSetErrorHandler(x11_error_handler); XSetIOErrorHandler(x11_io_error_handler); - if(is_xwayland(dpy)) { - fprintf(stderr, "Error: GPU Screen Recorder only works in a pure X11 session. Xwayland is not supported\n"); - _exit(2); - } + if(!wayland) + wayland = is_xwayland(dpy); gsr_gpu_info gpu_inf; bool very_old_gpu = false; - if(!gl_get_gpu_info(dpy, &gpu_inf)) + if(!gl_get_gpu_info(dpy, &gpu_inf, wayland)) _exit(2); if(gpu_inf.vendor == GSR_GPU_VENDOR_NVIDIA && gpu_inf.gpu_version != 0 && gpu_inf.gpu_version < 900) { @@ -1315,7 +1316,7 @@ int main(int argc, char **argv) { char card_path[128]; card_path[0] = '\0'; - if(gpu_inf.vendor != GSR_GPU_VENDOR_NVIDIA) { + if(wayland || gpu_inf.vendor != GSR_GPU_VENDOR_NVIDIA) { // TODO: Allow specifying another card, and in other places if(!gsr_get_valid_card_path(card_path)) { fprintf(stderr, "Error: no /dev/dri/cardX device found\n"); @@ -1353,6 +1354,11 @@ int main(int argc, char **argv) { gsr_capture *capture = nullptr; if(strcmp(window_str, "focused") == 0) { + if(wayland) { + fprintf(stderr, "Error: GPU Screen Recorder window capture only works in a pure X11 session. Xwayland is not supported. You can record a monitor instead on wayland\n"); + _exit(2); + } + if(!screen_region) { fprintf(stderr, "Error: option -s is required when using -w focused\n"); usage(); @@ -1370,44 +1376,69 @@ int main(int argc, char **argv) { follow_focused = true; } else if(contains_non_hex_number(window_str)) { - if(strcmp(window_str, "screen") != 0 && strcmp(window_str, "screen-direct") != 0 && strcmp(window_str, "screen-direct-force") != 0) { + // TODO: wayland, not only drm (if wlroots) + if(wayland) { gsr_monitor gmon; - if(!get_monitor_by_name(dpy, window_str, &gmon)) { + if(!get_monitor_by_name(card_path, GSR_CONNECTION_DRM, window_str, &gmon)) { fprintf(stderr, "gsr error: display \"%s\" not found, expected one of:\n", window_str); - fprintf(stderr, " \"screen\" (%dx%d+%d+%d)\n", XWidthOfScreen(DefaultScreenOfDisplay(dpy)), XHeightOfScreen(DefaultScreenOfDisplay(dpy)), 0, 0); - fprintf(stderr, " \"screen-direct\" (%dx%d+%d+%d)\n", XWidthOfScreen(DefaultScreenOfDisplay(dpy)), XHeightOfScreen(DefaultScreenOfDisplay(dpy)), 0, 0); - fprintf(stderr, " \"screen-direct-force\" (%dx%d+%d+%d)\n", XWidthOfScreen(DefaultScreenOfDisplay(dpy)), XHeightOfScreen(DefaultScreenOfDisplay(dpy)), 0, 0); - for_each_active_monitor_output(dpy, monitor_output_callback_print, NULL); + for_each_active_monitor_output(card_path, GSR_CONNECTION_DRM, monitor_output_callback_print, NULL); _exit(1); } + } else { + if(strcmp(window_str, "screen") != 0 && strcmp(window_str, "screen-direct") != 0 && strcmp(window_str, "screen-direct-force") != 0) { + gsr_monitor gmon; + if(!get_monitor_by_name(dpy, GSR_CONNECTION_X11, window_str, &gmon)) { + fprintf(stderr, "gsr error: display \"%s\" not found, expected one of:\n", window_str); + fprintf(stderr, " \"screen\" (%dx%d+%d+%d)\n", XWidthOfScreen(DefaultScreenOfDisplay(dpy)), XHeightOfScreen(DefaultScreenOfDisplay(dpy)), 0, 0); + fprintf(stderr, " \"screen-direct\" (%dx%d+%d+%d)\n", XWidthOfScreen(DefaultScreenOfDisplay(dpy)), XHeightOfScreen(DefaultScreenOfDisplay(dpy)), 0, 0); + fprintf(stderr, " \"screen-direct-force\" (%dx%d+%d+%d)\n", XWidthOfScreen(DefaultScreenOfDisplay(dpy)), XHeightOfScreen(DefaultScreenOfDisplay(dpy)), 0, 0); + for_each_active_monitor_output(dpy, GSR_CONNECTION_X11, monitor_output_callback_print, NULL); + _exit(1); + } + } } if(gpu_inf.vendor == GSR_GPU_VENDOR_NVIDIA) { - const char *capture_target = window_str; - bool direct_capture = strcmp(window_str, "screen-direct") == 0; - if(direct_capture) { - capture_target = "screen"; - // TODO: Temporary disable direct capture because push model causes stuttering when it's direct capturing. This might be a nvfbc bug. This does not happen when using a compositor. - direct_capture = false; - fprintf(stderr, "Warning: screen-direct has temporary been disabled as it causes stuttering. This is likely a NvFBC bug. Falling back to \"screen\".\n"); - } + if(wayland) { + const char *capture_target = window_str; + if(strcmp(window_str, "screen-direct") == 0 || strcmp(window_str, "screen-direct-force") == 0) { + capture_target = "screen"; + } - if(strcmp(window_str, "screen-direct-force") == 0) { - direct_capture = true; - capture_target = "screen"; - } + gsr_capture_kms_cuda_params kms_params; + kms_params.display_to_capture = capture_target; + kms_params.gpu_inf = gpu_inf; + kms_params.card_path = card_path; + capture = gsr_capture_kms_cuda_create(&kms_params); + if(!capture) + _exit(1); + } else { + const char *capture_target = window_str; + bool direct_capture = strcmp(window_str, "screen-direct") == 0; + if(direct_capture) { + capture_target = "screen"; + // TODO: Temporary disable direct capture because push model causes stuttering when it's direct capturing. This might be a nvfbc bug. This does not happen when using a compositor. + direct_capture = false; + fprintf(stderr, "Warning: screen-direct has temporary been disabled as it causes stuttering. This is likely a NvFBC bug. Falling back to \"screen\".\n"); + } - gsr_capture_nvfbc_params nvfbc_params; - nvfbc_params.dpy = dpy; - nvfbc_params.display_to_capture = capture_target; - nvfbc_params.fps = fps; - nvfbc_params.pos = { 0, 0 }; - nvfbc_params.size = { 0, 0 }; - nvfbc_params.direct_capture = direct_capture; - nvfbc_params.overclock = overclock; - capture = gsr_capture_nvfbc_create(&nvfbc_params); - if(!capture) - _exit(1); + if(strcmp(window_str, "screen-direct-force") == 0) { + direct_capture = true; + capture_target = "screen"; + } + + gsr_capture_nvfbc_params nvfbc_params; + nvfbc_params.dpy = dpy; + nvfbc_params.display_to_capture = capture_target; + nvfbc_params.fps = fps; + nvfbc_params.pos = { 0, 0 }; + nvfbc_params.size = { 0, 0 }; + nvfbc_params.direct_capture = direct_capture; + nvfbc_params.overclock = overclock; + capture = gsr_capture_nvfbc_create(&nvfbc_params); + if(!capture) + _exit(1); + } } else { const char *capture_target = window_str; if(strcmp(window_str, "screen-direct") == 0 || strcmp(window_str, "screen-direct-force") == 0) { @@ -1418,11 +1449,17 @@ int main(int argc, char **argv) { kms_params.display_to_capture = capture_target; kms_params.gpu_inf = gpu_inf; kms_params.card_path = card_path; + kms_params.wayland = false;//wayland; capture = gsr_capture_kms_vaapi_create(&kms_params); if(!capture) _exit(1); } } else { + if(wayland) { + fprintf(stderr, "Error: GPU Screen Recorder window capture only works in a pure X11 session. Xwayland is not supported. You can record a monitor instead on wayland\n"); + _exit(2); + } + errno = 0; src_window_id = strtol(window_str, nullptr, 0); if(src_window_id == None || errno == EINVAL) { diff --git a/src/utils.c b/src/utils.c index 1187749..722fe06 100644 --- a/src/utils.c +++ b/src/utils.c @@ -24,19 +24,32 @@ static const XRRModeInfo* get_mode_info(const XRRScreenResources *sr, RRMode id) return NULL; } -void for_each_active_monitor_output(Display *display, active_monitor_callback callback, void *userdata) { +static void for_each_active_monitor_output_x11(Display *display, active_monitor_callback callback, void *userdata) { XRRScreenResources *screen_res = XRRGetScreenResources(display, DefaultRootWindow(display)); if(!screen_res) return; + char display_name[256]; for(int i = 0; i < screen_res->noutput; ++i) { XRROutputInfo *out_info = XRRGetOutputInfo(display, screen_res, screen_res->outputs[i]); if(out_info && out_info->crtc && out_info->connection == RR_Connected) { XRRCrtcInfo *crt_info = XRRGetCrtcInfo(display, screen_res, out_info->crtc); if(crt_info && crt_info->mode) { const XRRModeInfo *mode_info = get_mode_info(screen_res, crt_info->mode); - if(mode_info) - callback(out_info, crt_info, mode_info, userdata); + if(mode_info && out_info->nameLen < (int)sizeof(display_name)) { + memcpy(display_name, out_info->name, out_info->nameLen); + display_name[out_info->nameLen] = '\0'; + + gsr_monitor monitor = { + .name = display_name, + .name_len = out_info->nameLen, + .pos = { .x = crt_info->x, .y = crt_info->y }, + .size = { .x = (int)crt_info->width, .y = (int)crt_info->height }, + .crt_info = crt_info, + .connector_id = 0 // TODO: Get connector id + }; + callback(&monitor, userdata); + } } if(crt_info) XRRFreeCrtcInfo(crt_info); @@ -48,29 +61,137 @@ void for_each_active_monitor_output(Display *display, active_monitor_callback ca XRRFreeScreenResources(screen_res); } -static void get_monitor_by_name_callback(const XRROutputInfo *output_info, const XRRCrtcInfo *crt_info, const XRRModeInfo *mode_info, void *userdata) { - (void)mode_info; +typedef struct { + int type; + int count; +} drm_connector_type_count; + +#define CONNECTOR_TYPE_COUNTS 32 + +static drm_connector_type_count* drm_connector_types_get_index(drm_connector_type_count *type_counts, int *num_type_counts, int connector_type) { + for(int i = 0; i < *num_type_counts; ++i) { + if(type_counts[i].type == connector_type) + return &type_counts[i]; + } + + if(*num_type_counts == CONNECTOR_TYPE_COUNTS) + return NULL; + + const int index = *num_type_counts; + type_counts[index].type = connector_type; + type_counts[index].count = 0; + ++*num_type_counts; + return &type_counts[index]; +} + +static bool connector_get_property_by_name(int drmfd, drmModeConnectorPtr props, const char *name, uint64_t *result) { + for(int i = 0; i < props->count_props; ++i) { + drmModePropertyPtr prop = drmModeGetProperty(drmfd, props->props[i]); + if(prop) { + if(strcmp(name, prop->name) == 0) { + *result = props->prop_values[i]; + drmModeFreeProperty(prop); + return true; + } + drmModeFreeProperty(prop); + } + } + return false; +} + +static void for_each_active_monitor_output_drm(const char *drm_card_path, active_monitor_callback callback, void *userdata) { + int fd = open(drm_card_path, O_RDONLY); + if(fd == -1) + return; + + drmSetClientCap(fd, DRM_CLIENT_CAP_ATOMIC, 1); + + drm_connector_type_count type_counts[CONNECTOR_TYPE_COUNTS]; + int num_type_counts = 0; + + char display_name[256]; + drmModeResPtr resources = drmModeGetResources(fd); + if(resources) { + for(int i = 0; i < resources->count_connectors; ++i) { + drmModeConnectorPtr connector = drmModeGetConnectorCurrent(fd, resources->connectors[i]); + if(!connector) + continue; + + if(connector->connection != DRM_MODE_CONNECTED) { + drmModeFreeConnector(connector); + continue; + } + + drm_connector_type_count *connector_type = drm_connector_types_get_index(type_counts, &num_type_counts, connector->connector_type); + const char *connection_name = drmModeGetConnectorTypeName(connector->connector_type); + const int connection_name_len = strlen(connection_name); + if(connector_type) + ++connector_type->count; + + uint64_t crtc_id = 0; + connector_get_property_by_name(fd, connector, "CRTC_ID", &crtc_id); + + drmModeCrtcPtr crtc = drmModeGetCrtc(fd, crtc_id); + if(connector_type && crtc_id > 0 && crtc && connection_name_len + 5 < (int)sizeof(display_name)) { + const int display_name_len = snprintf(display_name, sizeof(display_name), "%s-%d", connection_name, connector_type->count); + gsr_monitor monitor = { + .name = display_name, + .name_len = display_name_len, + .pos = { .x = crtc->x, .y = crtc->y }, + .size = { .x = (int)crtc->width, .y = (int)crtc->height }, + .crt_info = NULL, + .connector_id = connector->connector_id + }; + callback(&monitor, userdata); + } + + if(crtc) + drmModeFreeCrtc(crtc); + + drmModeFreeConnector(connector); + } + drmModeFreeResources(resources); + } + + close(fd); +} + +void for_each_active_monitor_output(void *connection, gsr_connection_type connection_type, active_monitor_callback callback, void *userdata) { + switch(connection_type) { + case GSR_CONNECTION_X11: + for_each_active_monitor_output_x11(connection, callback, userdata); + break; + case GSR_CONNECTION_WAYLAND: + // TODO: use gsr_egl here (connection) + break; + case GSR_CONNECTION_DRM: + for_each_active_monitor_output_drm(connection, callback, userdata); + break; + } +} + +static void get_monitor_by_name_callback(const gsr_monitor *monitor, void *userdata) { get_monitor_by_name_userdata *data = (get_monitor_by_name_userdata*)userdata; - if(!data->found_monitor && data->name_len == output_info->nameLen && memcmp(data->name, output_info->name, data->name_len) == 0) { - data->monitor->pos = (vec2i){ .x = crt_info->x, .y = crt_info->y }; - data->monitor->size = (vec2i){ .x = (int)crt_info->width, .y = (int)crt_info->height }; + if(!data->found_monitor && strcmp(data->name, monitor->name) == 0) { + data->monitor->pos = monitor->pos; + data->monitor->size = monitor->size; data->found_monitor = true; } } -bool get_monitor_by_name(Display *display, const char *name, gsr_monitor *monitor) { +bool get_monitor_by_name(void *connection, gsr_connection_type connection_type, const char *name, gsr_monitor *monitor) { get_monitor_by_name_userdata userdata; userdata.name = name; userdata.name_len = strlen(name); userdata.monitor = monitor; userdata.found_monitor = false; - for_each_active_monitor_output(display, get_monitor_by_name_callback, &userdata); + for_each_active_monitor_output(connection, connection_type, get_monitor_by_name_callback, &userdata); return userdata.found_monitor; } -bool gl_get_gpu_info(Display *dpy, gsr_gpu_info *info) { +bool gl_get_gpu_info(Display *dpy, gsr_gpu_info *info, bool wayland) { gsr_egl gl; - if(!gsr_egl_load(&gl, dpy)) { + if(!gsr_egl_load(&gl, dpy, wayland)) { fprintf(stderr, "gsr error: failed to load opengl\n"); return false; } @@ -87,6 +208,12 @@ bool gl_get_gpu_info(Display *dpy, gsr_gpu_info *info) { goto end; } + if(gl_renderer && strstr((const char*)gl_renderer, "llvmpipe")) { + fprintf(stderr, "gsr error: your opengl environment is not properly setup. It's using llvmpipe (cpu fallback) for opengl instead of your graphics card\n"); + supported = false; + goto end; + } + if(strstr((const char*)gl_vendor, "AMD")) info->vendor = GSR_GPU_VENDOR_AMD; else if(strstr((const char*)gl_vendor, "Intel"))