Fix replay video/audio desync, fix dummy audio when dropping audio input, give each audio stream a new name so it can be replaced with pipewire graphs

This commit is contained in:
dec05eba
2022-09-20 03:39:15 +02:00
parent 9f2ddf3802
commit 919890b7b2
6 changed files with 258 additions and 189 deletions

View File

@@ -62,7 +62,6 @@ extern "C" {
#include <deque>
#include <future>
#include <condition_variable>
//#include <CL/cl.h>
@@ -434,7 +433,7 @@ static void receive_frames(AVCodecContext *av_codec_context, int stream_index, A
} else {
av_packet_rescale_ts(&av_packet, av_codec_context->time_base, stream->time_base);
av_packet.stream_index = stream->index;
int ret = av_interleaved_write_frame(av_format_context, &av_packet);
int ret = av_write_frame(av_format_context, &av_packet);
if(ret < 0) {
fprintf(stderr, "Error: Failed to write frame index %d to muxer, reason: %s (%d)\n", av_packet.stream_index, av_error_to_string(ret), ret);
}
@@ -810,33 +809,51 @@ static std::future<void> save_replay_thread;
static std::vector<AVPacket> save_replay_packets;
static std::string save_replay_output_filepath;
static void save_replay_async(AVCodecContext *video_codec_context, int video_stream_index, std::vector<AudioTrack> &audio_tracks, const std::deque<AVPacket> &frame_data_queue, bool frames_erased, std::string output_dir, std::string container_format) {
static void save_replay_async(AVCodecContext *video_codec_context, int video_stream_index, std::vector<AudioTrack> &audio_tracks, const std::deque<AVPacket> &frame_data_queue, bool frames_erased, std::string output_dir, std::string container_format, std::mutex &write_output_mutex) {
if(save_replay_thread.valid())
return;
size_t start_index = (size_t)-1;
for(size_t i = 0; i < frame_data_queue.size(); ++i) {
const AVPacket &av_packet = frame_data_queue[i];
if((av_packet.flags & AV_PKT_FLAG_KEY) && av_packet.stream_index == video_stream_index) {
start_index = i;
break;
int64_t video_pts_offset = 0;
int64_t audio_pts_offset = 0;
{
std::lock_guard<std::mutex> lock(write_output_mutex);
start_index = (size_t)-1;
for(size_t i = 0; i < frame_data_queue.size(); ++i) {
const AVPacket &av_packet = frame_data_queue[i];
if((av_packet.flags & AV_PKT_FLAG_KEY) && av_packet.stream_index == video_stream_index) {
start_index = i;
break;
}
}
if(start_index == (size_t)-1)
return;
if(frames_erased) {
video_pts_offset = frame_data_queue[start_index].pts;
// Find the next audio packet to use as audio pts offset
for(size_t i = start_index; i < frame_data_queue.size(); ++i) {
const AVPacket &av_packet = frame_data_queue[i];
if(av_packet.stream_index != video_stream_index) {
audio_pts_offset = av_packet.pts;
break;
}
}
} else {
start_index = 0;
}
save_replay_packets.resize(frame_data_queue.size());
for(size_t i = 0; i < frame_data_queue.size(); ++i) {
av_packet_ref(&save_replay_packets[i], &frame_data_queue[i]);
}
}
if(start_index == (size_t)-1)
return;
int64_t pts_offset = 0;
if(frames_erased)
pts_offset = frame_data_queue[start_index].pts;
save_replay_packets.resize(frame_data_queue.size());
for(size_t i = 0; i < frame_data_queue.size(); ++i) {
av_packet_ref(&save_replay_packets[i], &frame_data_queue[i]);
}
save_replay_output_filepath = output_dir + "/Replay_" + get_date_str() + "." + container_format;
save_replay_thread = std::async(std::launch::async, [video_stream_index, container_format, start_index, pts_offset, video_codec_context, &audio_tracks]() mutable {
save_replay_thread = std::async(std::launch::async, [video_stream_index, container_format, start_index, video_pts_offset, audio_pts_offset, video_codec_context, &audio_tracks]() mutable {
AVFormatContext *av_format_context;
// The output format is automatically guessed from the file extension
avformat_alloc_output_context2(&av_format_context, nullptr, container_format.c_str(), nullptr);
@@ -874,18 +891,22 @@ static void save_replay_async(AVCodecContext *video_codec_context, int video_str
AVStream *stream = video_stream;
AVCodecContext *codec_context = video_codec_context;
if(av_packet.stream_index != video_stream_index) {
if(av_packet.stream_index == video_stream_index) {
av_packet.pts -= video_pts_offset;
av_packet.dts -= video_pts_offset;
} else {
AudioTrack *audio_track = stream_index_to_audio_track_map[av_packet.stream_index];
stream = audio_track->stream;
codec_context = audio_track->codec_context;
av_packet.pts -= audio_pts_offset;
av_packet.dts -= audio_pts_offset;
}
av_packet.stream_index = stream->index;
av_packet.pts -= pts_offset;
av_packet.dts -= pts_offset;
av_packet_rescale_ts(&av_packet, codec_context->time_base, stream->time_base);
int ret = av_interleaved_write_frame(av_format_context, &av_packet);
int ret = av_write_frame(av_format_context, &av_packet);
if(ret < 0)
fprintf(stderr, "Error: Failed to write frame index %d to muxer, reason: %s (%d)\n", stream->index, av_error_to_string(ret), ret);
}
@@ -1426,52 +1447,11 @@ int main(int argc, char **argv) {
av_opt_set_sample_fmt(swr, "out_sample_fmt", AV_SAMPLE_FMT_FLTP, 0);
swr_init(swr);
std::deque<uint8_t*> buffered_audio;
std::mutex buffered_audio_mutex;
std::condition_variable buffered_audio_cv;
bool got_first_batch = false;
// TODO: Make the sound device read async instead of using a thread
std::thread sound_read_thread([&](){
while(running) {
void *sound_buffer;
int sound_buffer_size = sound_device_read_next_chunk(&audio_track.sound_device, &sound_buffer);
if(sound_buffer_size >= 0) {
uint8_t *data = (uint8_t*)malloc(audio_track.sound_device.buffer_size);
if(data) {
memcpy(data, sound_buffer, audio_track.sound_device.buffer_size);
std::unique_lock<std::mutex> lock(buffered_audio_mutex);
buffered_audio.push_back(data);
buffered_audio_cv.notify_one();
}
}
}
});
while(running) {
uint8_t *audio_buffer;
bool free_audio;
{
// TODO: Not a good solution to lack of audio as it causes dropped frames, but it's better then complete audio desync.
// The first packet is delayed for some reason...
std::unique_lock<std::mutex> lock(buffered_audio_mutex);
if(got_first_batch)
buffered_audio_cv.wait(lock, [&]{ return !running || !buffered_audio.empty(); });
else
buffered_audio_cv.wait_for(lock, std::chrono::milliseconds(21), [&]{ return !running || !buffered_audio.empty(); });
if(!running)
break;
if(buffered_audio.empty()) {
audio_buffer = empty_audio;
free_audio = false;
} else {
audio_buffer = buffered_audio.front();
buffered_audio.pop_front();
free_audio = true;
got_first_batch = true;
}
}
void *sound_buffer;
int sound_buffer_size = sound_device_read_next_chunk(&audio_track.sound_device, &sound_buffer);
if(sound_buffer_size < 0)
sound_buffer = empty_audio;
int ret = av_frame_make_writable(audio_track.frame);
if (ret < 0) {
@@ -1480,7 +1460,7 @@ int main(int argc, char **argv) {
}
// TODO: Instead of converting audio, get float audio from alsa. Or does alsa do conversion internally to get this format?
swr_convert(swr, &audio_track.frame->data[0], audio_track.frame->nb_samples, (const uint8_t**)&audio_buffer, audio_track.sound_device.frames);
swr_convert(swr, &audio_track.frame->data[0], audio_track.frame->nb_samples, (const uint8_t**)&sound_buffer, audio_track.sound_device.frames);
audio_track.frame->pts = (clock_get_monotonic_seconds() - start_time_pts) * AV_TIME_BASE;
ret = avcodec_send_frame(audio_track.codec_context, audio_track.frame);
@@ -1489,17 +1469,9 @@ int main(int argc, char **argv) {
} else {
fprintf(stderr, "Failed to encode audio!\n");
}
if(free_audio)
free(audio_buffer);
}
sound_read_thread.join();
while(!buffered_audio.empty()) {
free(buffered_audio.front());
buffered_audio.pop_front();
}
sound_device_close(&audio_track.sound_device);
swr_free(&swr);
}, av_format_context, &write_output_mutex);
}
@@ -1749,7 +1721,7 @@ int main(int argc, char **argv) {
if(save_replay == 1 && !save_replay_thread.valid() && replay_buffer_size_secs != -1) {
save_replay = 0;
save_replay_async(video_codec_context, VIDEO_STREAM_INDEX, audio_tracks, frame_data_queue, frames_erased, filename, container_format);
save_replay_async(video_codec_context, VIDEO_STREAM_INDEX, audio_tracks, frame_data_queue, frames_erased, filename, container_format, write_output_mutex);
}
// av_frame_free(&frame);
@@ -1767,7 +1739,6 @@ int main(int argc, char **argv) {
for(AudioTrack &audio_track : audio_tracks) {
audio_track.thread.join();
sound_device_close(&audio_track.sound_device);
}
if (replay_buffer_size_secs == -1 && av_write_trailer(av_format_context) != 0) {