Use p4 preset on older gpus (kepler family) because of performance

with p7
This commit is contained in:
dec05eba 2022-10-11 18:57:09 +02:00
parent 1ca4dcfbdb
commit 9d185f3091
5 changed files with 94 additions and 36 deletions

View File

@ -6,3 +6,4 @@ libs="$(pkg-config --libs $dependencies) -ldl -pthread -lm"
g++ -c src/sound.cpp -O2 -g0 -DNDEBUG $includes
g++ -c src/main.cpp -O2 -g0 -DNDEBUG $includes
g++ -o gpu-screen-recorder -O2 sound.o main.o -s $libs
echo "Successfully built gpu-screen-recorder"

View File

@ -26,6 +26,8 @@ typedef struct __GLXFBConfigRec *GLXFBConfig;
#define GL_TEXTURE_HEIGHT 0x1001
#define GL_NEAREST 0x2600
#define GL_RENDERER 0x1F01
#define GLX_BUFFER_SIZE 2
#define GLX_DOUBLEBUFFER 5
#define GLX_RED_SIZE 8
@ -71,6 +73,7 @@ struct GlLibrary {
void (*glClearTexImage)(unsigned int texture, unsigned int level, unsigned int format, unsigned int type, const void *data);
unsigned int (*glGetError)(void);
const unsigned char* (*glGetString)(unsigned int name);
void (*glClear)(unsigned int mask);
void (*glGenTextures)(int n, unsigned int *textures);
void (*glDeleteTextures)(int n, const unsigned int *texture);
@ -81,8 +84,7 @@ struct GlLibrary {
void (*glCopyImageSubData)(unsigned int srcName, unsigned int srcTarget, int srcLevel, int srcX, int srcY, int srcZ, unsigned int dstName, unsigned int dstTarget, int dstLevel, int dstX, int dstY, int dstZ, int srcWidth, int srcHeight, int srcDepth);
~GlLibrary() {
if(library)
dlclose(library);
unload();
}
bool load() {
@ -120,6 +122,7 @@ struct GlLibrary {
{ (void**)&glXSwapBuffers, "glXSwapBuffers" },
{ (void**)&glGetError, "glGetError" },
{ (void**)&glGetString, "glGetString" },
{ (void**)&glClear, "glClear" },
{ (void**)&glGenTextures, "glGenTextures" },
{ (void**)&glDeleteTextures, "glDeleteTextures" },
@ -141,6 +144,13 @@ struct GlLibrary {
return false;
}
}
void unload() {
if(library) {
dlclose(library);
library = nullptr;
}
}
private:
void *library = nullptr;
};

View File

@ -192,7 +192,7 @@ public:
NVFBC_TOCUDA_GRAB_FRAME_PARAMS grab_params;
memset(&grab_params, 0, sizeof(grab_params));
grab_params.dwVersion = NVFBC_TOCUDA_GRAB_FRAME_PARAMS_VER;
grab_params.dwFlags = NVFBC_TOCUDA_GRAB_FLAGS_NOWAIT;
grab_params.dwFlags = NVFBC_TOCUDA_GRAB_FLAGS_NOWAIT | NVFBC_TOCUDA_GRAB_FLAGS_FORCE_REFRESH;
grab_params.pFrameGrabInfo = &frame_info;
grab_params.pCUDADeviceBuffer = cu_device_ptr;

View File

@ -4,9 +4,6 @@ type = "executable"
version = "1.2.0"
platforms = ["posix"]
[config]
include_dirs = ["external"]
[dependencies]
libavcodec = ">=58"
libavformat = ">=58"

View File

@ -596,20 +596,24 @@ static AVCodecContext* create_audio_codec_context(AVFormatContext *av_format_con
return codec_context;
}
static const AVCodec* find_h264_encoder() {
const AVCodec *codec = avcodec_find_encoder_by_name("h264_nvenc");
if(!codec)
codec = avcodec_find_encoder_by_name("nvenc_h264");
return codec;
}
static const AVCodec* find_h265_encoder() {
const AVCodec *codec = avcodec_find_encoder_by_name("hevc_nvenc");
if(!codec)
codec = avcodec_find_encoder_by_name("nvenc_hevc");
return codec;
}
static AVCodecContext *create_video_codec_context(AVFormatContext *av_format_context,
VideoQuality video_quality,
int record_width, int record_height,
int fps, VideoCodec video_codec, bool is_livestream) {
const AVCodec *codec = avcodec_find_encoder_by_name(video_codec == VideoCodec::H265 ? "hevc_nvenc" : "h264_nvenc");
if (!codec) {
codec = avcodec_find_encoder_by_name(video_codec == VideoCodec::H265 ? "nvenc_hevc" : "nvenc_h264");
}
if (!codec) {
fprintf(
stderr,
"Error: Could not find %s encoder\n", video_codec == VideoCodec::H265 ? "hevc" : "h264");
exit(1);
}
int fps, const AVCodec *codec, bool is_livestream) {
AVCodecContext *codec_context = avcodec_alloc_context3(codec);
@ -642,7 +646,7 @@ static AVCodecContext *create_video_codec_context(AVFormatContext *av_format_con
codec_context->max_b_frames = 0;
codec_context->pix_fmt = AV_PIX_FMT_CUDA;
codec_context->color_range = AVCOL_RANGE_JPEG;
if(video_codec == VideoCodec::H265)
if(codec->id == AV_CODEC_ID_HEVC)
codec_context->codec_tag = MKTAG('h', 'v', 'c', '1');
switch(video_quality) {
case VideoQuality::MEDIUM:
@ -743,7 +747,7 @@ static AVBufferRef* dummy_hw_frame_init(size_t size) {
static void open_video(AVCodecContext *codec_context,
WindowPixmap &window_pixmap, AVBufferRef **device_ctx,
CUgraphicsResource *cuda_graphics_resource, CUcontext cuda_context, bool use_nvfbc, VideoQuality video_quality, bool is_livestream) {
CUgraphicsResource *cuda_graphics_resource, CUcontext cuda_context, bool use_nvfbc, VideoQuality video_quality, bool is_livestream, bool very_old_gpu) {
int ret;
*device_ctx = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_CUDA);
@ -810,7 +814,15 @@ static void open_video(AVCodecContext *codec_context,
// //av_dict_set(&options, "preset", "llhq", 0);
//}
av_dict_set(&options, "preset", "p7", 0);
// Fuck nvidia and ffmpeg, I want to use a good preset for the gpu but all gpus prefer different
// presets. Nvidia and ffmpeg used to support "hq" preset that chose the best preset for the gpu
// with pretty good performance but you now have to choose p1-p7, which are gpu agnostic and on
// older gpus p5-p7 slow the gpu down to a crawl...
// "hq" is now just an alias for p7 in ffmpeg :(
if(very_old_gpu)
av_dict_set(&options, "preset", "p4", 0);
else
av_dict_set(&options, "preset", "p7", 0);
av_dict_set(&options, "tune", "hq", 0);
av_dict_set(&options, "rc", "constqp", 0);
@ -1340,21 +1352,18 @@ int main(int argc, char **argv) {
const double target_fps = 1.0 / (double)fps;
Display *dpy = XOpenDisplay(nullptr);
if (!dpy) {
fprintf(stderr, "Error: Failed to open display\n");
return 1;
}
XSetErrorHandler(x11_error_handler);
XSetIOErrorHandler(x11_io_error_handler);
WindowPixmap window_pixmap;
Display *dpy = nullptr;
Window window = None;
if(src_window_id) {
dpy = XOpenDisplay(nullptr);
if (!dpy) {
fprintf(stderr, "Error: Failed to open display\n");
return 1;
}
//#if defined(DEBUG)
XSetErrorHandler(x11_error_handler);
XSetIOErrorHandler(x11_io_error_handler);
//#endif
bool has_name_pixmap = x11_supports_composite_named_window_pixmap(dpy);
if (!has_name_pixmap) {
fprintf(stderr, "Error: XCompositeNameWindowPixmap is not supported by "
@ -1401,11 +1410,37 @@ int main(int argc, char **argv) {
window_pixmap.texture_height = window_height;
}
bool very_old_gpu = false;
bool gl_loaded = window;
if(!gl_loaded) {
if(!gl.load()) {
fprintf(stderr, "Error: Failed to load opengl\n");
return 1;
}
}
const unsigned char *gl_renderer = gl.glGetString(GL_RENDERER);
if(gl_renderer) {
int gpu_num = 1000;
sscanf((const char*)gl_renderer, "%*s %*s %*s %d", &gpu_num);
if(gpu_num < 900) {
fprintf(stderr, "Info: your gpu appears to be very old (older than maxwell architecture). Switching to lower preset\n");
very_old_gpu = true;
}
}
if(!gl_loaded)
gl.unload();
if(strcmp(codec_to_use, "auto") == 0) {
const AVCodec *h265_codec = find_h265_encoder();
// h265 generally allows recording at a higher resolution than h264 on nvidia cards. On a gtx 1080 4k is the max resolution for h264 but for h265 it's 8k.
// Another important info is that when recording at a higher fps than.. 60? h265 has very bad performance. For example when recording at 144 fps the fps drops to 1
// while with h264 the fps doesn't drop.
if(fps > 60) {
if(!h265_codec) {
fprintf(stderr, "Info: using h264 encoder because a codec was not specified and your gpu does not support h265\n");
} else if(fps > 60) {
fprintf(stderr, "Info: using h264 encoder because a codec was not specified and fps is more than 60\n");
codec_to_use = "h264";
video_codec = VideoCodec::H264;
@ -1416,6 +1451,21 @@ int main(int argc, char **argv) {
}
}
const AVCodec *video_codec_f = nullptr;
switch(video_codec) {
case VideoCodec::H264:
video_codec_f = find_h264_encoder();
break;
case VideoCodec::H265:
video_codec_f = find_h265_encoder();
break;
}
if(!video_codec_f) {
fprintf(stderr, "Error: your gpu does not support '%s' video codec\n", video_codec == VideoCodec::H264 ? "h264" : "h265");
exit(2);
}
// Video start
AVFormatContext *av_format_context;
// The output format is automatically guessed by the file extension
@ -1448,13 +1498,13 @@ int main(int argc, char **argv) {
AVStream *video_stream = nullptr;
std::vector<AudioTrack> audio_tracks;
AVCodecContext *video_codec_context = create_video_codec_context(av_format_context, quality, record_width, record_height, fps, video_codec, is_livestream);
AVCodecContext *video_codec_context = create_video_codec_context(av_format_context, quality, record_width, record_height, fps, video_codec_f, is_livestream);
if(replay_buffer_size_secs == -1)
video_stream = create_stream(av_format_context, video_codec_context);
AVBufferRef *device_ctx;
CUgraphicsResource cuda_graphics_resource;
open_video(video_codec_context, window_pixmap, &device_ctx, &cuda_graphics_resource, cu_ctx, !src_window_id, quality, is_livestream);
open_video(video_codec_context, window_pixmap, &device_ctx, &cuda_graphics_resource, cu_ctx, !src_window_id, quality, is_livestream, very_old_gpu);
if(video_stream)
avcodec_parameters_from_context(video_stream->codecpar, video_codec_context);
@ -1498,7 +1548,7 @@ int main(int argc, char **argv) {
// av_frame_free(&rgb_frame);
// avcodec_close(av_codec_context);
if(dpy)
if(src_window_id)
XSelectInput(dpy, src_window_id, StructureNotifyMask | ExposureMask);
/*