From 9d185f309151a198e2f51e613e9a9dfaff5f6d6d Mon Sep 17 00:00:00 2001 From: dec05eba Date: Tue, 11 Oct 2022 18:57:09 +0200 Subject: [PATCH] Use p4 preset on older gpus (kepler family) because of performance with p7 --- build.sh | 1 + include/GlLibrary.hpp | 14 ++++- include/NvFBCLibrary.hpp | 2 +- project.conf | 3 -- src/main.cpp | 110 ++++++++++++++++++++++++++++----------- 5 files changed, 94 insertions(+), 36 deletions(-) diff --git a/build.sh b/build.sh index 62bb59a..8b92d92 100755 --- a/build.sh +++ b/build.sh @@ -6,3 +6,4 @@ libs="$(pkg-config --libs $dependencies) -ldl -pthread -lm" g++ -c src/sound.cpp -O2 -g0 -DNDEBUG $includes g++ -c src/main.cpp -O2 -g0 -DNDEBUG $includes g++ -o gpu-screen-recorder -O2 sound.o main.o -s $libs +echo "Successfully built gpu-screen-recorder" \ No newline at end of file diff --git a/include/GlLibrary.hpp b/include/GlLibrary.hpp index 1934db5..1337ef3 100644 --- a/include/GlLibrary.hpp +++ b/include/GlLibrary.hpp @@ -26,6 +26,8 @@ typedef struct __GLXFBConfigRec *GLXFBConfig; #define GL_TEXTURE_HEIGHT 0x1001 #define GL_NEAREST 0x2600 +#define GL_RENDERER 0x1F01 + #define GLX_BUFFER_SIZE 2 #define GLX_DOUBLEBUFFER 5 #define GLX_RED_SIZE 8 @@ -71,6 +73,7 @@ struct GlLibrary { void (*glClearTexImage)(unsigned int texture, unsigned int level, unsigned int format, unsigned int type, const void *data); unsigned int (*glGetError)(void); + const unsigned char* (*glGetString)(unsigned int name); void (*glClear)(unsigned int mask); void (*glGenTextures)(int n, unsigned int *textures); void (*glDeleteTextures)(int n, const unsigned int *texture); @@ -81,8 +84,7 @@ struct GlLibrary { void (*glCopyImageSubData)(unsigned int srcName, unsigned int srcTarget, int srcLevel, int srcX, int srcY, int srcZ, unsigned int dstName, unsigned int dstTarget, int dstLevel, int dstX, int dstY, int dstZ, int srcWidth, int srcHeight, int srcDepth); ~GlLibrary() { - if(library) - dlclose(library); + unload(); } bool load() { @@ -120,6 +122,7 @@ struct GlLibrary { { (void**)&glXSwapBuffers, "glXSwapBuffers" }, { (void**)&glGetError, "glGetError" }, + { (void**)&glGetString, "glGetString" }, { (void**)&glClear, "glClear" }, { (void**)&glGenTextures, "glGenTextures" }, { (void**)&glDeleteTextures, "glDeleteTextures" }, @@ -141,6 +144,13 @@ struct GlLibrary { return false; } } + + void unload() { + if(library) { + dlclose(library); + library = nullptr; + } + } private: void *library = nullptr; }; diff --git a/include/NvFBCLibrary.hpp b/include/NvFBCLibrary.hpp index 0849882..19b9bcc 100644 --- a/include/NvFBCLibrary.hpp +++ b/include/NvFBCLibrary.hpp @@ -192,7 +192,7 @@ public: NVFBC_TOCUDA_GRAB_FRAME_PARAMS grab_params; memset(&grab_params, 0, sizeof(grab_params)); grab_params.dwVersion = NVFBC_TOCUDA_GRAB_FRAME_PARAMS_VER; - grab_params.dwFlags = NVFBC_TOCUDA_GRAB_FLAGS_NOWAIT; + grab_params.dwFlags = NVFBC_TOCUDA_GRAB_FLAGS_NOWAIT | NVFBC_TOCUDA_GRAB_FLAGS_FORCE_REFRESH; grab_params.pFrameGrabInfo = &frame_info; grab_params.pCUDADeviceBuffer = cu_device_ptr; diff --git a/project.conf b/project.conf index 9f32d5f..8ac9d98 100644 --- a/project.conf +++ b/project.conf @@ -4,9 +4,6 @@ type = "executable" version = "1.2.0" platforms = ["posix"] -[config] -include_dirs = ["external"] - [dependencies] libavcodec = ">=58" libavformat = ">=58" diff --git a/src/main.cpp b/src/main.cpp index 3d62c1e..316a5f2 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -596,20 +596,24 @@ static AVCodecContext* create_audio_codec_context(AVFormatContext *av_format_con return codec_context; } +static const AVCodec* find_h264_encoder() { + const AVCodec *codec = avcodec_find_encoder_by_name("h264_nvenc"); + if(!codec) + codec = avcodec_find_encoder_by_name("nvenc_h264"); + return codec; +} + +static const AVCodec* find_h265_encoder() { + const AVCodec *codec = avcodec_find_encoder_by_name("hevc_nvenc"); + if(!codec) + codec = avcodec_find_encoder_by_name("nvenc_hevc"); + return codec; +} + static AVCodecContext *create_video_codec_context(AVFormatContext *av_format_context, VideoQuality video_quality, int record_width, int record_height, - int fps, VideoCodec video_codec, bool is_livestream) { - const AVCodec *codec = avcodec_find_encoder_by_name(video_codec == VideoCodec::H265 ? "hevc_nvenc" : "h264_nvenc"); - if (!codec) { - codec = avcodec_find_encoder_by_name(video_codec == VideoCodec::H265 ? "nvenc_hevc" : "nvenc_h264"); - } - if (!codec) { - fprintf( - stderr, - "Error: Could not find %s encoder\n", video_codec == VideoCodec::H265 ? "hevc" : "h264"); - exit(1); - } + int fps, const AVCodec *codec, bool is_livestream) { AVCodecContext *codec_context = avcodec_alloc_context3(codec); @@ -642,7 +646,7 @@ static AVCodecContext *create_video_codec_context(AVFormatContext *av_format_con codec_context->max_b_frames = 0; codec_context->pix_fmt = AV_PIX_FMT_CUDA; codec_context->color_range = AVCOL_RANGE_JPEG; - if(video_codec == VideoCodec::H265) + if(codec->id == AV_CODEC_ID_HEVC) codec_context->codec_tag = MKTAG('h', 'v', 'c', '1'); switch(video_quality) { case VideoQuality::MEDIUM: @@ -743,7 +747,7 @@ static AVBufferRef* dummy_hw_frame_init(size_t size) { static void open_video(AVCodecContext *codec_context, WindowPixmap &window_pixmap, AVBufferRef **device_ctx, - CUgraphicsResource *cuda_graphics_resource, CUcontext cuda_context, bool use_nvfbc, VideoQuality video_quality, bool is_livestream) { + CUgraphicsResource *cuda_graphics_resource, CUcontext cuda_context, bool use_nvfbc, VideoQuality video_quality, bool is_livestream, bool very_old_gpu) { int ret; *device_ctx = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_CUDA); @@ -810,7 +814,15 @@ static void open_video(AVCodecContext *codec_context, // //av_dict_set(&options, "preset", "llhq", 0); //} - av_dict_set(&options, "preset", "p7", 0); + // Fuck nvidia and ffmpeg, I want to use a good preset for the gpu but all gpus prefer different + // presets. Nvidia and ffmpeg used to support "hq" preset that chose the best preset for the gpu + // with pretty good performance but you now have to choose p1-p7, which are gpu agnostic and on + // older gpus p5-p7 slow the gpu down to a crawl... + // "hq" is now just an alias for p7 in ffmpeg :( + if(very_old_gpu) + av_dict_set(&options, "preset", "p4", 0); + else + av_dict_set(&options, "preset", "p7", 0); av_dict_set(&options, "tune", "hq", 0); av_dict_set(&options, "rc", "constqp", 0); @@ -1340,21 +1352,18 @@ int main(int argc, char **argv) { const double target_fps = 1.0 / (double)fps; + Display *dpy = XOpenDisplay(nullptr); + if (!dpy) { + fprintf(stderr, "Error: Failed to open display\n"); + return 1; + } + + XSetErrorHandler(x11_error_handler); + XSetIOErrorHandler(x11_io_error_handler); + WindowPixmap window_pixmap; - Display *dpy = nullptr; Window window = None; if(src_window_id) { - dpy = XOpenDisplay(nullptr); - if (!dpy) { - fprintf(stderr, "Error: Failed to open display\n"); - return 1; - } - - //#if defined(DEBUG) - XSetErrorHandler(x11_error_handler); - XSetIOErrorHandler(x11_io_error_handler); - //#endif - bool has_name_pixmap = x11_supports_composite_named_window_pixmap(dpy); if (!has_name_pixmap) { fprintf(stderr, "Error: XCompositeNameWindowPixmap is not supported by " @@ -1401,11 +1410,37 @@ int main(int argc, char **argv) { window_pixmap.texture_height = window_height; } + bool very_old_gpu = false; + bool gl_loaded = window; + if(!gl_loaded) { + if(!gl.load()) { + fprintf(stderr, "Error: Failed to load opengl\n"); + return 1; + } + } + + const unsigned char *gl_renderer = gl.glGetString(GL_RENDERER); + if(gl_renderer) { + int gpu_num = 1000; + sscanf((const char*)gl_renderer, "%*s %*s %*s %d", &gpu_num); + if(gpu_num < 900) { + fprintf(stderr, "Info: your gpu appears to be very old (older than maxwell architecture). Switching to lower preset\n"); + very_old_gpu = true; + } + } + + if(!gl_loaded) + gl.unload(); + if(strcmp(codec_to_use, "auto") == 0) { + const AVCodec *h265_codec = find_h265_encoder(); + // h265 generally allows recording at a higher resolution than h264 on nvidia cards. On a gtx 1080 4k is the max resolution for h264 but for h265 it's 8k. // Another important info is that when recording at a higher fps than.. 60? h265 has very bad performance. For example when recording at 144 fps the fps drops to 1 // while with h264 the fps doesn't drop. - if(fps > 60) { + if(!h265_codec) { + fprintf(stderr, "Info: using h264 encoder because a codec was not specified and your gpu does not support h265\n"); + } else if(fps > 60) { fprintf(stderr, "Info: using h264 encoder because a codec was not specified and fps is more than 60\n"); codec_to_use = "h264"; video_codec = VideoCodec::H264; @@ -1416,6 +1451,21 @@ int main(int argc, char **argv) { } } + const AVCodec *video_codec_f = nullptr; + switch(video_codec) { + case VideoCodec::H264: + video_codec_f = find_h264_encoder(); + break; + case VideoCodec::H265: + video_codec_f = find_h265_encoder(); + break; + } + + if(!video_codec_f) { + fprintf(stderr, "Error: your gpu does not support '%s' video codec\n", video_codec == VideoCodec::H264 ? "h264" : "h265"); + exit(2); + } + // Video start AVFormatContext *av_format_context; // The output format is automatically guessed by the file extension @@ -1448,13 +1498,13 @@ int main(int argc, char **argv) { AVStream *video_stream = nullptr; std::vector audio_tracks; - AVCodecContext *video_codec_context = create_video_codec_context(av_format_context, quality, record_width, record_height, fps, video_codec, is_livestream); + AVCodecContext *video_codec_context = create_video_codec_context(av_format_context, quality, record_width, record_height, fps, video_codec_f, is_livestream); if(replay_buffer_size_secs == -1) video_stream = create_stream(av_format_context, video_codec_context); AVBufferRef *device_ctx; CUgraphicsResource cuda_graphics_resource; - open_video(video_codec_context, window_pixmap, &device_ctx, &cuda_graphics_resource, cu_ctx, !src_window_id, quality, is_livestream); + open_video(video_codec_context, window_pixmap, &device_ctx, &cuda_graphics_resource, cu_ctx, !src_window_id, quality, is_livestream, very_old_gpu); if(video_stream) avcodec_parameters_from_context(video_stream->codecpar, video_codec_context); @@ -1498,7 +1548,7 @@ int main(int argc, char **argv) { // av_frame_free(&rgb_frame); // avcodec_close(av_codec_context); - if(dpy) + if(src_window_id) XSelectInput(dpy, src_window_id, StructureNotifyMask | ExposureMask); /*