Add overclocking option -oc to workaround a NVIDIA driver bug (forcefully set to p2 state when using cuda)

2023-03-17 17:17:14 +01:00
parent 8cbdb596eb
commit 93cb6593a6
20 changed files with 441 additions and 63 deletions
--- a/include/capture/nvfbc.h
+++ b/include/capture/nvfbc.h
@@ -12,7 +12,8 @@ typedef struct {
    int fps;
    vec2i pos;
    vec2i size;
-    bool direct_capture; /* temporary disabled */
+    bool direct_capture;
+    bool overclock;
 } gsr_capture_nvfbc_params;

 gsr_capture* gsr_capture_nvfbc_create(const gsr_capture_nvfbc_params *params);
--- a/include/capture/xcomposite_cuda.h
+++ b/include/capture/xcomposite_cuda.h
@@ -11,6 +11,7 @@ typedef struct {
    Window window;
    bool follow_focused; /* If this is set then |window| is ignored */
    vec2i region_size; /* This is currently only used with |follow_focused| */
+    bool overclock;
 } gsr_capture_xcomposite_cuda_params;

 gsr_capture* gsr_capture_xcomposite_cuda_create(const gsr_capture_xcomposite_cuda_params *params);
--- a/include/cuda.h
+++ b/include/cuda.h
@@ -1,12 +1,15 @@
 #ifndef GSR_CUDA_H
 #define GSR_CUDA_H

+#include "overclock.h"
 #include <stddef.h>
 #include <stdbool.h>

 // To prevent hwcontext_cuda.h from including cuda.h
 #define CUDA_VERSION 11070

+#define CU_CTX_SCHED_AUTO 0
+
 #if defined(_WIN64) || defined(__LP64__)
 typedef unsigned long long CUdeviceptr_v2;
 #else
@@ -68,11 +71,12 @@ typedef struct CUDA_MEMCPY2D_st {
 } CUDA_MEMCPY2D_v2;
 typedef CUDA_MEMCPY2D_v2 CUDA_MEMCPY2D;

-#define CU_CTX_SCHED_AUTO 0
-
 typedef struct CUgraphicsResource_st *CUgraphicsResource;

 typedef struct {
+    gsr_overclock overclock;
+    bool do_overclock;
+
    void *library;
    CUcontext cu_ctx;

@@ -95,7 +99,7 @@ typedef struct {
    CUresult (*cuGraphicsSubResourceGetMappedArray)(CUarray *pArray, CUgraphicsResource resource, unsigned int arrayIndex, unsigned int mipLevel);
 } gsr_cuda;

-bool gsr_cuda_load(gsr_cuda *self);
+bool gsr_cuda_load(gsr_cuda *self, Display *display, bool overclock);
 void gsr_cuda_unload(gsr_cuda *self);

 #endif /* GSR_CUDA_H */
--- a/include/overclock.h
+++ b/include/overclock.h
@@ -0,0 +1,17 @@
+#ifndef GSR_OVERCLOCK_H
+#define GSR_OVERCLOCK_H
+
+#include "xnvctrl.h"
+
+typedef struct {
+    gsr_xnvctrl xnvctrl;
+    int num_performance_levels;
+} gsr_overclock;
+
+bool gsr_overclock_load(gsr_overclock *self, Display *display);
+void gsr_overclock_unload(gsr_overclock *self);
+
+bool gsr_overclock_start(gsr_overclock *self);
+void gsr_overclock_stop(gsr_overclock *self);
+
+#endif /* GSR_OVERCLOCK_H */
--- a/include/xnvctrl.h
+++ b/include/xnvctrl.h
@@ -0,0 +1,43 @@
+#ifndef GSR_XNVCTRL_H
+#define GSR_XNVCTRL_H
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#define NV_CTRL_GPU_MEM_TRANSFER_RATE_OFFSET                            410
+#define NV_CTRL_GPU_MEM_TRANSFER_RATE_OFFSET_ALL_PERFORMANCE_LEVELS     425
+
+#define NV_CTRL_TARGET_TYPE_GPU                                         1
+
+#define NV_CTRL_STRING_PERFORMANCE_MODES                                29
+
+typedef struct _XDisplay Display;
+
+typedef struct {
+    int type;
+    union {
+        struct {
+            int64_t min;
+            int64_t max;
+        } range;
+        struct {
+            unsigned int ints;
+        } bits;
+    } u;
+    unsigned int permissions;
+} NVCTRLAttributeValidValuesRec;
+
+typedef struct {
+    Display *display;
+    void *library;
+    
+    int (*XNVCTRLQueryExtension)(Display *dpy, int *event_basep, int *error_basep);
+    int (*XNVCTRLSetTargetAttributeAndGetStatus)(Display *dpy, int target_type, int target_id, unsigned int display_mask, unsigned int attribute, int value);
+    int (*XNVCTRLQueryValidTargetAttributeValues)(Display *dpy, int target_type, int target_id, unsigned int display_mask, unsigned int attribute, NVCTRLAttributeValidValuesRec *values);
+    int (*XNVCTRLQueryTargetStringAttribute)(Display *dpy, int target_type, int target_id, unsigned int display_mask, unsigned int attribute, char **ptr);
+} gsr_xnvctrl;
+
+bool gsr_xnvctrl_load(gsr_xnvctrl *self, Display *display);
+void gsr_xnvctrl_unload(gsr_xnvctrl *self);
+
+#endif /* GSR_XNVCTRL_H */