From d92ecea69eab0db4e1c3a0e73fed10923374c3a8 Mon Sep 17 00:00:00 2001
From: dec05eba <dec05eba@protonmail.com>
Date: Sun, 26 Mar 2023 19:04:43 +0200
Subject: [PATCH] Remove yuv444 for now

---
 README.md                     |  2 +-
 TODO                          |  8 +++++++-
 src/capture/nvfbc.c           |  8 +++++++-
 src/capture/xcomposite_cuda.c | 11 +++++++++--
 src/capture/xcomposite_drm.c  |  1 +
 src/main.cpp                  | 22 +++++++++++++++-------
 6 files changed, 40 insertions(+), 12 deletions(-)

diff --git a/README.md b/README.md
index 9277bff..4180bd9 100644
--- a/README.md
+++ b/README.md
@@ -34,7 +34,7 @@ You can also install gpu screen recorder ([the gtk gui version](https://git.dec0
 ## AMD/Intel
 `libglvnd (which provides libgl and libegl), mesa, ffmpeg (libavcodec, libavformat, libavutil, libswresample, libavfilter), libx11, libxcomposite, libxrandr, libpulse, libva`.
 ## NVIDIA
-`libglvnd (which provides libgl and libegl), ffmpeg (libavcodec, libavformat, libavutil, libswresample, libavfilter), libx11, libxcomposite, libxrandr, libpulse, libnvidia-compute libnvidia-encode`. Additionally, you need to have `libnvidia-fbc1` installed when using nvfbc and `libxnvctrl0` when using the `-oc` option.
+`libglvnd (which provides libgl and libegl), ffmpeg (libavcodec, libavformat, libavutil, libswresample, libavfilter), libx11, libxcomposite, libxrandr, libpulse, cuda (libnvidia-compute), nvenc (libnvidia-encode)`. Additionally, you need to have `nvfbc (libnvidia-fbc1)` installed when using nvfbc and `xnvctrl (libxnvctrl0)` when using the `-oc` option.
 
 # How to use
 Run `scripts/interactive.sh` or run gpu-screen-recorder directly, for example: `gpu-screen-recorder -w $(xdotool selectwindow) -c mp4 -f 60 -a "$(pactl get-default-sink).monitor" -o test_video.mp4` then stop the screen recorder with Ctrl+C, which will also save the recording. You can change -w to -w screen if you want to record all monitors or if you want to record a specific monitor then you can use -w monitor-name, for example -w HDMI-0 (use xrandr command to find the name of your monitor. The name can also be found in your desktop environments display settings).\
diff --git a/TODO b/TODO
index 3471acd..12de328 100644
--- a/TODO
+++ b/TODO
@@ -21,4 +21,10 @@ Reverse engineer nvapi so we can disable "force p2 state" on linux too (nvapi pr
 Support yuv444p on amd/intel.
 fix yuv444 for hevc.
 Do not allow streaming if yuv444.
-Support 10 bit output because of better gradients. May even be smaller file size.
\ No newline at end of file
+Re-enable yuv444.
+Support 10 bit output because of better gradients. May even be smaller file size. Better supported on hevc (not supported at all on h264 on my gpu).
+Add nvidia/(amd/intel) specific install script for ubuntu. User should run install_ubuntu.sh but it should run different install dep script depending on if /proc/driver/nvidia/version exists or not. But what about switchable graphics setup?
+Test different combinations of switchable graphics. Intel hybrid mode (running intel but possible to run specific applications with prime-run), running pure intel. Detect switchable graphics.
+
+https://web.archive.org/web/20210306020203/https://forums.developer.nvidia.com/t/performance-power-management-problem-on-shared-vgpu/161986
+https://djdallmann.github.io/GamingPCSetup/CONTENT/RESEARCH/FINDINGS/registrykeys_displayadapter_class_4d36e968-e325-11ce-bfc1-08002be10318.txt
diff --git a/src/capture/nvfbc.c b/src/capture/nvfbc.c
index 68732f9..c1c998b 100644
--- a/src/capture/nvfbc.c
+++ b/src/capture/nvfbc.c
@@ -160,7 +160,7 @@ static bool ffmpeg_create_cuda_contexts(gsr_capture_nvfbc *cap_nvfbc, AVCodecCon
     AVHWFramesContext *hw_frame_context = (AVHWFramesContext*)frame_context->data;
     hw_frame_context->width = video_codec_context->width;
     hw_frame_context->height = video_codec_context->height;
-    hw_frame_context->sw_format = AV_PIX_FMT_0RGB32;
+    hw_frame_context->sw_format = AV_PIX_FMT_BGR0;
     hw_frame_context->format = video_codec_context->pix_fmt;
     hw_frame_context->device_ref = device_ctx;
     hw_frame_context->device_ctx = (AVHWDeviceContext*)device_ctx->data;
@@ -381,6 +381,7 @@ static void gsr_capture_nvfbc_tick(gsr_capture *cap, AVCodecContext *video_codec
         (*frame)->color_primaries = video_codec_context->color_primaries;
         (*frame)->color_trc = video_codec_context->color_trc;
         (*frame)->colorspace = video_codec_context->colorspace;
+        (*frame)->chroma_location = video_codec_context->chroma_sample_location;
     }
 }
 
@@ -414,7 +415,12 @@ static int gsr_capture_nvfbc_capture(gsr_capture *cap, AVFrame *frame) {
     */
 
     frame->data[0] = (uint8_t*)cu_device_ptr;
+    //frame->data[1] = (uint8_t*)cu_device_ptr;
+    //frame->data[2] = (uint8_t*)cu_device_ptr;
     frame->linesize[0] = frame->width * 4;
+    // TODO: Use these when outputting yuv444 by changing nvfbc color to YUV444P and sw_format to YUV444P
+    //frame->linesize[1] = frame->width * 1;
+    //frame->linesize[2] = frame->width * 1;
     return 0;
 }
 
diff --git a/src/capture/xcomposite_cuda.c b/src/capture/xcomposite_cuda.c
index bf8a053..1ce80f5 100644
--- a/src/capture/xcomposite_cuda.c
+++ b/src/capture/xcomposite_cuda.c
@@ -117,7 +117,7 @@ static bool cuda_create_codec_context(gsr_capture_xcomposite_cuda *cap_xcomp, AV
         (AVHWFramesContext *)frame_context->data;
     hw_frame_context->width = video_codec_context->width;
     hw_frame_context->height = video_codec_context->height;
-    hw_frame_context->sw_format = AV_PIX_FMT_0RGB32;
+    hw_frame_context->sw_format = AV_PIX_FMT_BGR0;
     hw_frame_context->format = video_codec_context->pix_fmt;
     hw_frame_context->device_ref = device_ctx;
     hw_frame_context->device_ctx = (AVHWDeviceContext*)device_ctx->data;
@@ -387,6 +387,7 @@ static void gsr_capture_xcomposite_cuda_tick(gsr_capture *cap, AVCodecContext *v
         (*frame)->color_primaries = video_codec_context->color_primaries;
         (*frame)->color_trc = video_codec_context->color_trc;
         (*frame)->colorspace = video_codec_context->colorspace;
+        (*frame)->chroma_location = video_codec_context->chroma_sample_location;
 
         if(av_hwframe_get_buffer(video_codec_context->hw_frames_ctx, *frame, 0) < 0) {
             fprintf(stderr, "gsr error: gsr_capture_xcomposite_cuda_tick: av_hwframe_get_buffer failed\n");
@@ -439,6 +440,9 @@ static int gsr_capture_xcomposite_cuda_capture(gsr_capture *cap, AVFrame *frame)
     cap_xcomp->egl.eglSwapBuffers(cap_xcomp->egl.egl_display, cap_xcomp->egl.egl_surface);
 
     frame->linesize[0] = frame->width * 4;
+    //frame->linesize[0] = frame->width * 1;
+    //frame->linesize[1] = frame->width * 1;
+    //frame->linesize[2] = frame->width * 1;
 
     CUDA_MEMCPY2D memcpy_struct;
     memcpy_struct.srcXInBytes = 0;
@@ -452,10 +456,13 @@ static int gsr_capture_xcomposite_cuda_capture(gsr_capture *cap, AVFrame *frame)
     memcpy_struct.srcArray = cap_xcomp->mapped_array;
     memcpy_struct.dstDevice = (CUdeviceptr)frame->data[0];
     memcpy_struct.dstPitch = frame->linesize[0];
-    memcpy_struct.WidthInBytes = frame->width * 4;
+    memcpy_struct.WidthInBytes = frame->width * 4;//frame->width * 1;
     memcpy_struct.Height = frame->height;
     cap_xcomp->cuda.cuMemcpy2D_v2(&memcpy_struct);
 
+    //frame->data[1] = frame->data[0];
+    //frame->data[2] = frame->data[0];
+
     return 0;
 }
 
diff --git a/src/capture/xcomposite_drm.c b/src/capture/xcomposite_drm.c
index a4810a8..fbd96fc 100644
--- a/src/capture/xcomposite_drm.c
+++ b/src/capture/xcomposite_drm.c
@@ -376,6 +376,7 @@ static void gsr_capture_xcomposite_drm_tick(gsr_capture *cap, AVCodecContext *vi
         (*frame)->color_primaries = video_codec_context->color_primaries;
         (*frame)->color_trc = video_codec_context->color_trc;
         (*frame)->colorspace = video_codec_context->colorspace;
+        (*frame)->chroma_location = video_codec_context->chroma_sample_location;
 
         int res = av_hwframe_get_buffer(video_codec_context->hw_frames_ctx, *frame, 0);
         if(res < 0) {
diff --git a/src/main.cpp b/src/main.cpp
index 2a555cd..80f29a4 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -370,10 +370,11 @@ static AVCodecContext *create_video_codec_context(AVPixelFormat pix_fmt,
     }
     codec_context->max_b_frames = 0;
     codec_context->pix_fmt = pix_fmt;
-    //codec_context->color_range = AVCOL_RANGE_JPEG;
-    //codec_context->color_primaries = AVCOL_PRI_BT709;
-    //codec_context->color_trc = AVCOL_TRC_BT709;
-    //codec_context->colorspace = AVCOL_SPC_BT709;
+    codec_context->color_range = AVCOL_RANGE_JPEG;
+    codec_context->color_primaries = AVCOL_PRI_BT709;
+    codec_context->color_trc = AVCOL_TRC_BT709;
+    codec_context->colorspace = AVCOL_SPC_BT709;
+    //codec_context->chroma_sample_location = AVCHROMA_LOC_CENTER;
     if(codec->id == AV_CODEC_ID_HEVC)
         codec_context->codec_tag = MKTAG('h', 'v', 'c', '1');
     switch(video_quality) {
@@ -422,6 +423,7 @@ static AVCodecContext *create_video_codec_context(AVPixelFormat pix_fmt,
     #endif
 
     av_opt_set_int(codec_context->priv_data, "b_ref_mode", 0, 0);
+    //av_opt_set_int(codec_context->priv_data, "cbr", true, 0);
 
     if(vendor != GPU_VENDOR_NVIDIA) {
         // TODO: More options, better options
@@ -606,6 +608,9 @@ static void open_video(AVCodecContext *codec_context, VideoQuality video_quality
                     av_dict_set(&options, "profile", "high444p", 0);
                     break;
             }
+        } else {
+            //av_dict_set(&options, "profile", "main10", 0);
+            //av_dict_set(&options, "pix_fmt", "yuv420p16le", 0);
         }
     } else {
         switch(video_quality) {
@@ -649,7 +654,7 @@ static void open_video(AVCodecContext *codec_context, VideoQuality video_quality
 }
 
 static void usage() {
-    fprintf(stderr, "usage: gpu-screen-recorder -w <window_id|monitor|focused> [-c <container_format>] [-s WxH] -f <fps> [-a <audio_input>...] [-q <quality>] [-r <replay_buffer_size_sec>] [-k h264|h265] [-ac aac|opus|flac] [-oc yes|no] [-pixfmt yuv420|yuv444] [-o <output_file>]\n");
+    fprintf(stderr, "usage: gpu-screen-recorder -w <window_id|monitor|focused> [-c <container_format>] [-s WxH] -f <fps> [-a <audio_input>...] [-q <quality>] [-r <replay_buffer_size_sec>] [-k h264|h265] [-ac aac|opus|flac] [-oc yes|no] [-o <output_file>]\n");
     fprintf(stderr, "OPTIONS:\n");
     fprintf(stderr, "  -w       Window to record, a display, \"screen\", \"screen-direct\", \"screen-direct-force\" or \"focused\". The display is the display (monitor) name in xrandr and if \"screen\" or \"screen-direct\" is selected then all displays are recorded. If this is \"focused\" then the currently focused window is recorded. When recording the focused window then the -s option has to be used as well.\n"
         "        \"screen-direct\"/\"screen-direct-force\" skips one texture copy for fullscreen applications so it may lead to better performance and it works with VRR monitors when recording fullscreen application but may break some applications, such as mpv in fullscreen mode. Direct mode doesn't capture cursor either. \"screen-direct-force\" is not recommended unless you use a VRR monitor because there might be driver issues that cause the video to stutter or record a black screen.\n");
@@ -664,7 +669,7 @@ static void usage() {
     fprintf(stderr, "  -k       Video codec to use. Should be either 'auto', 'h264' or 'h265'. Defaults to 'auto' which defaults to 'h265' on nvidia unless recording at a higher resolution than 3840x2160. On AMD/Intel this defaults to 'auto' which defaults to 'h264'. Forcefully set to 'h264' if -c is 'flv'.\n");
     fprintf(stderr, "  -ac      Audio codec to use. Should be either 'aac', 'opus' or 'flac'. Defaults to 'opus' for .mp4/.mkv files, otherwise defaults to 'aac'. 'opus' and 'flac' is only supported by .mp4/.mkv files. 'opus' is recommended for best performance and smallest audio size.\n");
     fprintf(stderr, "  -oc      Overclock memory transfer rate to the maximum performance level. This only applies to NVIDIA and exists to overcome a bug in NVIDIA driver where performance level is dropped when you record a game. Only needed if you are recording a game that is bottlenecked by GPU. Works only if your have \"Coolbits\" set to \"12\" in NVIDIA X settings, see README for more information. Note! use at your own risk! Optional, disabled by default\n");
-    fprintf(stderr, "  -pixfmt  The pixel format to use for the output video. yuv420 is the most common format and is best supported, but the color is compressed, so colors can look washed outandr certain colors of text can look bad. Use yuv444 for no color compression, but the video may not work everywhere and it may not work with hardware video decoding. Optional, defaults to yuv420\n");
+   // fprintf(stderr, "  -pixfmt  The pixel format to use for the output video. yuv420 is the most common format and is best supported, but the color is compressed, so colors can look washed out and certain colors of text can look bad. Use yuv444 for no color compression, but the video may not work everywhere and it may not work with hardware video decoding. Optional, defaults to yuv420\n");
     fprintf(stderr, "  -o       The output file path. If omitted then the encoded data is sent to stdout. Required in replay mode (when using -r). In replay mode this has to be an existing directory instead of a file.\n");
     fprintf(stderr, "NOTES:\n");
     fprintf(stderr, "  Send signal SIGINT (Ctrl+C) to gpu-screen-recorder to stop and save the recording (when not using replay mode).\n");
@@ -1646,6 +1651,7 @@ int main(int argc, char **argv) {
     if(replay_buffer_size_secs == -1) {
         AVDictionary *options = nullptr;
         av_dict_set(&options, "strict", "experimental", 0);
+        //av_dict_set_int(&av_format_context->metadata, "video_full_range_flag", 1, 0);
 
         int ret = avformat_write_header(av_format_context, &options);
         if (ret < 0) {
@@ -1674,6 +1680,7 @@ int main(int argc, char **argv) {
     frame->color_primaries = video_codec_context->color_primaries;
     frame->color_trc = video_codec_context->color_trc;
     frame->colorspace = video_codec_context->colorspace;
+    frame->chroma_location = video_codec_context->chroma_sample_location;
 
     std::mutex write_output_mutex;
     std::mutex audio_filter_mutex;
@@ -1854,6 +1861,7 @@ int main(int argc, char **argv) {
             // TODO: Check if duplicate frame can be saved just by writing it with a different pts instead of sending it again
             for(int i = 0; i < num_frames; ++i) {
                 video_frame->pts = video_pts_counter + i;
+
                 int ret = avcodec_send_frame(video_codec_context, video_frame);
                 if(ret == 0) {
                     // TODO: Move to separate thread because this could write to network (for example when livestreaming)
@@ -1914,6 +1922,7 @@ int main(int argc, char **argv) {
 
         double frame_time_overflow = frame_timer_elapsed - target_fps;
         if (frame_time_overflow >= 0.0) {
+            frame_time_overflow = std::min(frame_time_overflow, target_fps);
             frame_timer_start = time_now - frame_time_overflow;
             gsr_capture_capture(capture, frame);
             std::lock_guard<std::mutex> lock(video_frame_mutex);
@@ -1963,7 +1972,6 @@ int main(int argc, char **argv) {
         video_frame_cv.notify_one();
     }
     video_send_encode_thread.join();
-    //video_packet_save_thread.join();
 
     if(latest_video_frame) {
         av_frame_free(&latest_video_frame);