From 9f3b9efd8bd5508ffd069cbd0c228857ee11e1e5 Mon Sep 17 00:00:00 2001 From: ihy123 Date: Thu, 14 Aug 2025 12:44:10 +0300 Subject: [PATCH 01/12] ip/ffmpeg: more precise seeking av_seek_frame() and avformat_seek_file() seek to nearest "keyframe". For codecs like, for example, ape this means that seeking will be very off (5 seconds or more). So what we do is: 1. seek to nearest "keyframe" before the desired time, 2. discard some frames to approach the desired time. --- ip/ffmpeg.c | 154 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 94 insertions(+), 60 deletions(-) diff --git ip/ffmpeg.c ip/ffmpeg.c index 21b9a01f4..ecbf00582 100644 --- ip/ffmpeg.c +++ ip/ffmpeg.c @@ -44,6 +44,8 @@ struct ffmpeg_input { AVPacket pkt; int curr_pkt_size; uint8_t *curr_pkt_buf; + int64_t seek_ts; + int64_t prev_frame_end; int stream_index; unsigned long curr_size; @@ -76,6 +78,8 @@ static struct ffmpeg_input *ffmpeg_input_create(void) return NULL; } input->curr_pkt_size = 0; + input->seek_ts = -1; + input->prev_frame_end = -1; input->curr_pkt_buf = input->pkt.data; return input; } @@ -314,10 +318,7 @@ static int ffmpeg_fill_buffer(struct input_plugin_data *ip_data, AVFormatContext #else AVFrame *frame = avcodec_alloc_frame(); #endif - int got_frame; while (1) { - int len; - if (input->curr_pkt_size <= 0) { #if LIBAVCODEC_VERSION_MAJOR >= 56 av_packet_unref(&input->pkt); @@ -333,78 +334,108 @@ static int ffmpeg_fill_buffer(struct input_plugin_data *ip_data, AVFormatContext #endif return 0; } - if (input->pkt.stream_index == input->stream_index) { - input->curr_pkt_size = input->pkt.size; - input->curr_pkt_buf = input->pkt.data; - input->curr_size += input->pkt.size; - input->curr_duration += input->pkt.duration; - } - continue; - } - { - AVPacket avpkt; - av_new_packet(&avpkt, input->curr_pkt_size); - memcpy(avpkt.data, input->curr_pkt_buf, input->curr_pkt_size); + if (input->pkt.stream_index != input->stream_index) + continue; + input->curr_pkt_size = input->pkt.size; + input->curr_pkt_buf = input->pkt.data; + input->curr_size += input->pkt.size; + input->curr_duration += input->pkt.duration; + #if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(57, 48, 101) - int send_result = avcodec_send_packet(cc, &avpkt); - if (send_result != 0) { - if (send_result != AVERROR(EAGAIN)) { - d_print("avcodec_send_packet() returned %d\n", send_result); - char errstr[AV_ERROR_MAX_STRING_SIZE]; - if (!av_strerror(send_result, errstr, AV_ERROR_MAX_STRING_SIZE )) - { - d_print("av_strerror(): %s\n", errstr); - } else { - d_print("av_strerror(): Description for error cannot be found\n"); - } - av_packet_unref(&avpkt); - return -IP_ERROR_INTERNAL; + int send_result = avcodec_send_packet(cc, &input->pkt); + if (send_result != 0 && send_result != AVERROR(EAGAIN)) { + d_print("avcodec_send_packet() returned %d\n", send_result); + char errstr[AV_ERROR_MAX_STRING_SIZE]; + if (!av_strerror(send_result, errstr, AV_ERROR_MAX_STRING_SIZE )) + { + d_print("av_strerror(): %s\n", errstr); + } else { + d_print("av_strerror(): Description for error cannot be found\n"); } - len = 0; - } else { - len = input->curr_pkt_size; + return -IP_ERROR_INTERNAL; } - - int recv_result = avcodec_receive_frame(cc, frame); - got_frame = (recv_result == 0) ? 1 : 0; -#else - len = avcodec_decode_audio4(cc, frame, &got_frame, &avpkt); -#endif -#if LIBAVCODEC_VERSION_MAJOR >= 56 - av_packet_unref(&avpkt); -#else - av_free_packet(&avpkt); #endif } + +#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(57, 48, 101) + int recv_result = avcodec_receive_frame(cc, frame); + if (recv_result < 0) { + input->curr_pkt_size = 0; + continue; + } +#else + int got_frame; + int len = avcodec_decode_audio4(cc, frame, &got_frame, &input->pkt); if (len < 0) { /* this is often reached when seeking, not sure why */ input->curr_pkt_size = 0; continue; } - input->curr_pkt_size -= len; - input->curr_pkt_buf += len; - if (got_frame) { - int res = swr_convert(swr, - &output->buffer, - frame->nb_samples, - (const uint8_t **)frame->extended_data, - frame->nb_samples); - if (res < 0) - res = 0; - output->buffer_pos = output->buffer; + if (!got_frame) + continue; +#endif + + int64_t frame_ts = -1; + if (frame->pts) + frame_ts = frame->pts; + else if (frame->pkt_pts) + frame_ts = frame->pkt_pts; + else if (frame->pkt_dts) + frame_ts = frame->pkt_dts; + + const uint8_t **in = (const uint8_t **)frame->extended_data; + int in_count = frame->nb_samples; + if (input->seek_ts > 0 && (frame_ts >= 0 || input->prev_frame_end >= 0)) { + struct ffmpeg_private *priv = ip_data->private; + AVStream *st = priv->input_context->streams[priv->input->stream_index]; + if (frame_ts >= 0) + frame_ts = av_rescale_q(frame_ts, st->time_base, AV_TIME_BASE_Q); + else + frame_ts = input->prev_frame_end; + int64_t frame_dur = av_rescale(frame->nb_samples, AV_TIME_BASE, sf_get_rate(ip_data->sf)); + int64_t frame_end = frame_ts + frame_dur; + input->prev_frame_end = frame_end; + d_print("seek_ts: %ld, frame_ts: %ld, frame_end: %ld\n", input->seek_ts, frame_ts, frame_end); + if (frame_end <= input->seek_ts) + continue; + + /* skip part of this frame */ + int64_t skip_samples = av_rescale(input->seek_ts - frame_ts, sf_get_rate(ip_data->sf), AV_TIME_BASE); + in_count -= skip_samples; + if (av_sample_fmt_is_planar(frame->format)) { + for (int i = 0; i < cc->channels; i++) { + in[i] += skip_samples * sf_get_sample_size(ip_data->sf); + } + } else { + *in += skip_samples * cc->channels * sf_get_sample_size(ip_data->sf); + } + + input->seek_ts = -1; + input->prev_frame_end = -1; + } + + int res = swr_convert(swr, + &output->buffer, + frame->nb_samples, + in, + in_count); + if (res < 0) + res = 0; + + output->buffer_pos = output->buffer; #if LIBAVCODEC_VERSION_MAJOR >= 60 - output->buffer_used_len = res * cc->ch_layout.nb_channels * sf_get_sample_size(ip_data->sf); + output->buffer_used_len = res * cc->ch_layout.nb_channels * sf_get_sample_size(ip_data->sf); #else - output->buffer_used_len = res * cc->channels * sf_get_sample_size(ip_data->sf); + output->buffer_used_len = res * cc->channels * sf_get_sample_size(ip_data->sf); #endif + #if LIBAVCODEC_VERSION_MAJOR >= 56 - av_frame_free(&frame); + av_frame_free(&frame); #else - avcodec_free_frame(&frame); + avcodec_free_frame(&frame); #endif - return output->buffer_used_len; - } + return output->buffer_used_len; } /* This should never get here. */ return -IP_ERROR_INTERNAL; @@ -437,13 +468,16 @@ static int ffmpeg_seek(struct input_plugin_data *ip_data, double offset) AVStream *st = priv->input_context->streams[priv->input->stream_index]; int ret; - int64_t pts = av_rescale_q(offset * AV_TIME_BASE, AV_TIME_BASE_Q, st->time_base); + priv->input->seek_ts = offset * AV_TIME_BASE; + priv->input->prev_frame_end = -1; + int64_t ts = av_rescale(offset, st->time_base.den, st->time_base.num); avcodec_flush_buffers(priv->codec_context); /* Force reading a new packet in next ffmpeg_fill_buffer(). */ priv->input->curr_pkt_size = 0; - ret = av_seek_frame(priv->input_context, priv->input->stream_index, pts, 0); + ret = avformat_seek_file(priv->input_context, + priv->input->stream_index, 0, ts, ts, 0); if (ret < 0) { return -IP_ERROR_FUNCTION_NOT_SUPPORTED; From ec84fa7b4b4a72c19e2ab04eac864c99df6d2e4e Mon Sep 17 00:00:00 2001 From: ihy123 Date: Fri, 15 Aug 2025 21:42:19 +0300 Subject: [PATCH 02/12] ip/ffmpeg: skip samples only when needed --- ip/ffmpeg.c | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git ip/ffmpeg.c ip/ffmpeg.c index ecbf00582..5f5a4f37b 100644 --- ip/ffmpeg.c +++ ip/ffmpeg.c @@ -393,22 +393,26 @@ static int ffmpeg_fill_buffer(struct input_plugin_data *ip_data, AVFormatContext frame_ts = av_rescale_q(frame_ts, st->time_base, AV_TIME_BASE_Q); else frame_ts = input->prev_frame_end; - int64_t frame_dur = av_rescale(frame->nb_samples, AV_TIME_BASE, sf_get_rate(ip_data->sf)); - int64_t frame_end = frame_ts + frame_dur; - input->prev_frame_end = frame_end; - d_print("seek_ts: %ld, frame_ts: %ld, frame_end: %ld\n", input->seek_ts, frame_ts, frame_end); - if (frame_end <= input->seek_ts) - continue; - /* skip part of this frame */ - int64_t skip_samples = av_rescale(input->seek_ts - frame_ts, sf_get_rate(ip_data->sf), AV_TIME_BASE); - in_count -= skip_samples; - if (av_sample_fmt_is_planar(frame->format)) { - for (int i = 0; i < cc->channels; i++) { - in[i] += skip_samples * sf_get_sample_size(ip_data->sf); + if (frame_ts < input->seek_ts) { + int64_t frame_dur = av_rescale(frame->nb_samples, AV_TIME_BASE, sf_get_rate(ip_data->sf)); + int64_t frame_end = frame_ts + frame_dur; + input->prev_frame_end = frame_end; + d_print("seek_ts: %ld, frame_ts: %ld, frame_end: %ld\n", input->seek_ts, frame_ts, frame_end); + if (frame_end <= input->seek_ts) + continue; + + /* skip part of this frame */ + int64_t skip_samples = av_rescale(input->seek_ts - frame_ts, sf_get_rate(ip_data->sf), AV_TIME_BASE); + in_count -= skip_samples; + if (av_sample_fmt_is_planar(frame->format)) { + for (int i = 0; i < cc->channels; i++) { + in[i] += skip_samples * sf_get_sample_size(ip_data->sf); + } + } else { + *in += skip_samples * cc->channels * sf_get_sample_size(ip_data->sf); } - } else { - *in += skip_samples * cc->channels * sf_get_sample_size(ip_data->sf); + d_print("skipping %ld samples\n", skip_samples); } input->seek_ts = -1; From 70a8761fc1d30bfa302332d0807b89c3776d3f31 Mon Sep 17 00:00:00 2001 From: ihy123 Date: Sat, 16 Aug 2025 02:43:55 +0300 Subject: [PATCH 03/12] ip/ffmpeg: remove excessive version checks ffmpeg download page states that v4.0.6 has - libavutil 56.14.100 - libavcodec 58.18.100 - libavformat 58.12.100 (https://ffmpeg.org/olddownload.html) After removing all checks for versions lower than these, the plugin still compiles with v3.3.9 headers. After all, why be better with compatibility than developers themselves? --- ip/ffmpeg.c | 109 +++++++++++----------------------------------------- 1 file changed, 23 insertions(+), 86 deletions(-) diff --git ip/ffmpeg.c ip/ffmpeg.c index 5f5a4f37b..f6a11f450 100644 --- ip/ffmpeg.c +++ ip/ffmpeg.c @@ -25,7 +25,6 @@ #include "../config/ffmpeg.h" #endif -#include #include #include #include @@ -43,7 +42,6 @@ struct ffmpeg_input { AVPacket pkt; int curr_pkt_size; - uint8_t *curr_pkt_buf; int64_t seek_ts; int64_t prev_frame_end; int stream_index; @@ -80,17 +78,12 @@ static struct ffmpeg_input *ffmpeg_input_create(void) input->curr_pkt_size = 0; input->seek_ts = -1; input->prev_frame_end = -1; - input->curr_pkt_buf = input->pkt.data; return input; } static void ffmpeg_input_free(struct ffmpeg_input *input) { -#if LIBAVCODEC_VERSION_MAJOR >= 56 av_packet_unref(&input->pkt); -#else - av_free_packet(&input->pkt); -#endif free(input); } @@ -132,7 +125,7 @@ static void ffmpeg_init(void) av_log_set_level(AV_LOG_QUIET); -#if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(58, 18, 100) +#if LIBAVFORMAT_VERSION_INT < AV_VERSION_INT(58, 9, 100) /* We could register decoders explicitly to save memory, but we have to * be careful about compatibility. */ av_register_all(); @@ -149,9 +142,7 @@ static int ffmpeg_open(struct input_plugin_data *ip_data) AVCodec const *codec; AVCodecContext *cc = NULL; AVFormatContext *ic = NULL; -#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(57, 48, 101) AVCodecParameters *cp = NULL; -#endif SwrContext *swr = NULL; ffmpeg_init(); @@ -171,20 +162,11 @@ static int ffmpeg_open(struct input_plugin_data *ip_data) } for (i = 0; i < ic->nb_streams; i++) { - -#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(57, 48, 101) cp = ic->streams[i]->codecpar; if (cp->codec_type == AVMEDIA_TYPE_AUDIO) { stream_index = i; break; } -#else - cc = ic->streams[i]->codec; - if (cc->codec_type == AVMEDIA_TYPE_AUDIO) { - stream_index = i; - break; - } -#endif } if (stream_index == -1) { @@ -193,13 +175,9 @@ static int ffmpeg_open(struct input_plugin_data *ip_data) break; } -#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(57, 48, 101) codec = avcodec_find_decoder(cp->codec_id); cc = avcodec_alloc_context3(codec); avcodec_parameters_to_context(cc, cp); -#else - codec = avcodec_find_decoder(cc->codec_id); -#endif if (!codec) { d_print("codec not found: %d, %s\n", cc->codec_id, avcodec_get_name(cc->codec_id)); err = -IP_ERROR_UNSUPPORTED_FILE_TYPE; @@ -217,9 +195,7 @@ static int ffmpeg_open(struct input_plugin_data *ip_data) if (err < 0) { /* Clean up. cc is never opened at this point. (See above assumption.) */ -#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(57, 48, 101) avcodec_free_context(&cc); -#endif avformat_close_input(&ic); return err; } @@ -231,9 +207,7 @@ static int ffmpeg_open(struct input_plugin_data *ip_data) priv->input = ffmpeg_input_create(); if (priv->input == NULL) { avcodec_close(cc); -#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(57, 48, 101) avcodec_free_context(&cc); -#endif avformat_close_input(&ic); free(priv); return -IP_ERROR_INTERNAL; @@ -244,7 +218,7 @@ static int ffmpeg_open(struct input_plugin_data *ip_data) /* Prepare for resampling. */ out_sample_rate = min_u(cc->sample_rate, 384000); swr = swr_alloc(); -#if LIBAVCODEC_VERSION_MAJOR >= 60 +#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(59, 24, 100) if (cc->ch_layout.order == AV_CHANNEL_ORDER_UNSPEC) av_channel_layout_default(&cc->ch_layout, cc->ch_layout.nb_channels); av_opt_set_chlayout(swr, "in_chlayout", &cc->ch_layout, 0); @@ -259,7 +233,7 @@ static int ffmpeg_open(struct input_plugin_data *ip_data) priv->swr = swr; ip_data->private = priv; -#if LIBAVCODEC_VERSION_MAJOR >= 60 +#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(59, 24, 100) ip_data->sf = sf_rate(out_sample_rate) | sf_channels(cc->ch_layout.nb_channels); #else ip_data->sf = sf_rate(out_sample_rate) | sf_channels(cc->channels); @@ -281,10 +255,12 @@ static int ffmpeg_open(struct input_plugin_data *ip_data) } swr_init(swr); ip_data->sf |= sf_host_endian(); -#if LIBAVCODEC_VERSION_MAJOR >= 60 - channel_map_init_waveex(cc->ch_layout.nb_channels, cc->ch_layout.u.mask, ip_data->channel_map); +#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(59, 24, 100) + channel_map_init_waveex(cc->ch_layout.nb_channels, + cc->ch_layout.u.mask, ip_data->channel_map); #else - channel_map_init_waveex(cc->channels, cc->channel_layout, ip_data->channel_map); + channel_map_init_waveex(cc->channels, + cc->channel_layout, ip_data->channel_map); #endif return 0; } @@ -294,9 +270,7 @@ static int ffmpeg_close(struct input_plugin_data *ip_data) struct ffmpeg_private *priv = ip_data->private; avcodec_close(priv->codec_context); -#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(57, 48, 101) avcodec_free_context(&priv->codec_context); -#endif avformat_close_input(&priv->input_context); swr_free(&priv->swr); ffmpeg_input_free(priv->input); @@ -310,39 +284,27 @@ static int ffmpeg_close(struct input_plugin_data *ip_data) * This returns the number of bytes added to the buffer. * It returns < 0 on error. 0 on EOF. */ -static int ffmpeg_fill_buffer(struct input_plugin_data *ip_data, AVFormatContext *ic, AVCodecContext *cc, - struct ffmpeg_input *input, struct ffmpeg_output *output, SwrContext *swr) +static int ffmpeg_fill_buffer(struct input_plugin_data *ip_data, + AVFormatContext *ic, AVCodecContext *cc, + struct ffmpeg_input *input, struct ffmpeg_output *output, + SwrContext *swr) { -#if LIBAVCODEC_VERSION_MAJOR >= 56 AVFrame *frame = av_frame_alloc(); -#else - AVFrame *frame = avcodec_alloc_frame(); -#endif while (1) { if (input->curr_pkt_size <= 0) { -#if LIBAVCODEC_VERSION_MAJOR >= 56 av_packet_unref(&input->pkt); -#else - av_free_packet(&input->pkt); -#endif if (av_read_frame(ic, &input->pkt) < 0) { /* Force EOF once we can read no longer. */ -#if LIBAVCODEC_VERSION_MAJOR >= 56 av_frame_free(&frame); -#else - avcodec_free_frame(&frame); -#endif return 0; } if (input->pkt.stream_index != input->stream_index) continue; input->curr_pkt_size = input->pkt.size; - input->curr_pkt_buf = input->pkt.data; input->curr_size += input->pkt.size; input->curr_duration += input->pkt.duration; -#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(57, 48, 101) int send_result = avcodec_send_packet(cc, &input->pkt); if (send_result != 0 && send_result != AVERROR(EAGAIN)) { d_print("avcodec_send_packet() returned %d\n", send_result); @@ -355,32 +317,17 @@ static int ffmpeg_fill_buffer(struct input_plugin_data *ip_data, AVFormatContext } return -IP_ERROR_INTERNAL; } -#endif } -#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(57, 48, 101) int recv_result = avcodec_receive_frame(cc, frame); if (recv_result < 0) { input->curr_pkt_size = 0; continue; } -#else - int got_frame; - int len = avcodec_decode_audio4(cc, frame, &got_frame, &input->pkt); - if (len < 0) { - /* this is often reached when seeking, not sure why */ - input->curr_pkt_size = 0; - continue; - } - if (!got_frame) - continue; -#endif int64_t frame_ts = -1; if (frame->pts) frame_ts = frame->pts; - else if (frame->pkt_pts) - frame_ts = frame->pkt_pts; else if (frame->pkt_dts) frame_ts = frame->pkt_dts; @@ -395,7 +342,7 @@ static int ffmpeg_fill_buffer(struct input_plugin_data *ip_data, AVFormatContext frame_ts = input->prev_frame_end; if (frame_ts < input->seek_ts) { - int64_t frame_dur = av_rescale(frame->nb_samples, AV_TIME_BASE, sf_get_rate(ip_data->sf)); + int64_t frame_dur = av_rescale(frame->nb_samples, AV_TIME_BASE, frame->sample_rate); int64_t frame_end = frame_ts + frame_dur; input->prev_frame_end = frame_end; d_print("seek_ts: %ld, frame_ts: %ld, frame_end: %ld\n", input->seek_ts, frame_ts, frame_end); @@ -403,14 +350,14 @@ static int ffmpeg_fill_buffer(struct input_plugin_data *ip_data, AVFormatContext continue; /* skip part of this frame */ - int64_t skip_samples = av_rescale(input->seek_ts - frame_ts, sf_get_rate(ip_data->sf), AV_TIME_BASE); + int64_t skip_samples = av_rescale(input->seek_ts - frame_ts, frame->sample_rate, AV_TIME_BASE); in_count -= skip_samples; if (av_sample_fmt_is_planar(frame->format)) { - for (int i = 0; i < cc->channels; i++) { + for (int i = 0; i < sf_get_channels(ip_data->sf); i++) { in[i] += skip_samples * sf_get_sample_size(ip_data->sf); } } else { - *in += skip_samples * cc->channels * sf_get_sample_size(ip_data->sf); + *in += skip_samples * sf_get_frame_size(ip_data->sf); } d_print("skipping %ld samples\n", skip_samples); } @@ -428,17 +375,9 @@ static int ffmpeg_fill_buffer(struct input_plugin_data *ip_data, AVFormatContext res = 0; output->buffer_pos = output->buffer; -#if LIBAVCODEC_VERSION_MAJOR >= 60 - output->buffer_used_len = res * cc->ch_layout.nb_channels * sf_get_sample_size(ip_data->sf); -#else - output->buffer_used_len = res * cc->channels * sf_get_sample_size(ip_data->sf); -#endif + output->buffer_used_len = res * sf_get_frame_size(ip_data->sf); -#if LIBAVCODEC_VERSION_MAJOR >= 56 av_frame_free(&frame); -#else - avcodec_free_frame(&frame); -#endif return output->buffer_used_len; } /* This should never get here. */ @@ -453,11 +392,11 @@ static int ffmpeg_read(struct input_plugin_data *ip_data, char *buffer, int coun int out_size; if (output->buffer_used_len == 0) { - rc = ffmpeg_fill_buffer(ip_data, priv->input_context, priv->codec_context, + rc = ffmpeg_fill_buffer(ip_data, + priv->input_context, priv->codec_context, priv->input, priv->output, priv->swr); - if (rc <= 0) { + if (rc <= 0) return rc; - } } out_size = min_i(output->buffer_used_len, count); memcpy(buffer, output->buffer_pos, out_size); @@ -477,6 +416,7 @@ static int ffmpeg_seek(struct input_plugin_data *ip_data, double offset) int64_t ts = av_rescale(offset, st->time_base.den, st->time_base.num); avcodec_flush_buffers(priv->codec_context); + /* TODO: also flush swresample buffers */ /* Force reading a new packet in next ffmpeg_fill_buffer(). */ priv->input->curr_pkt_size = 0; @@ -501,7 +441,8 @@ static void ffmpeg_read_metadata(struct growing_keyvals *c, AVDictionary *metada } } -static int ffmpeg_read_comments(struct input_plugin_data *ip_data, struct keyval **comments) +static int ffmpeg_read_comments(struct input_plugin_data *ip_data, + struct keyval **comments) { struct ffmpeg_private *priv = ip_data->private; AVFormatContext *ic = priv->input_context; @@ -538,11 +479,7 @@ static long ffmpeg_current_bitrate(struct input_plugin_data *ip_data) AVStream *st = priv->input_context->streams[priv->input->stream_index]; long bitrate = -1; /* ape codec returns silly numbers */ -#if LIBAVCODEC_VERSION_MAJOR >= 55 if (priv->codec->id == AV_CODEC_ID_APE) -#else - if (priv->codec->id == CODEC_ID_APE) -#endif return -1; if (priv->input->curr_duration > 0) { double seconds = priv->input->curr_duration * av_q2d(st->time_base); From e1a2374a60a41987f95c7d892ebc1b150df7acb1 Mon Sep 17 00:00:00 2001 From: ihy123 Date: Sun, 17 Aug 2025 04:05:36 +0300 Subject: [PATCH 04/12] ip/ffmpeg: major refactor --- ip/ffmpeg.c | 643 +++++++++++++++++++++++++++------------------------- 1 file changed, 330 insertions(+), 313 deletions(-) diff --git ip/ffmpeg.c ip/ffmpeg.c index f6a11f450..42f630ee7 100644 --- ip/ffmpeg.c +++ ip/ffmpeg.c @@ -35,84 +35,32 @@ #include #endif -#ifndef AVCODEC_MAX_AUDIO_FRAME_SIZE -#define AVCODEC_MAX_AUDIO_FRAME_SIZE 192000 -#endif +struct ffmpeg_private { + AVCodecContext *codec_ctx; + AVFormatContext *format_ctx; + AVCodec const *codec; + SwrContext *swr; + int stream_index; -struct ffmpeg_input { - AVPacket pkt; - int curr_pkt_size; + AVPacket *pkt; + AVFrame *frame; int64_t seek_ts; int64_t prev_frame_end; - int stream_index; + /* A buffer to hold swr_convert()-ed samples */ + AVFrame *swr_frame; + int swr_frame_start; + + /* Bitrate estimation */ unsigned long curr_size; unsigned long curr_duration; }; -struct ffmpeg_output { - uint8_t *buffer; - uint8_t *buffer_malloc; - uint8_t *buffer_pos; /* current buffer position */ - int buffer_used_len; -}; - -struct ffmpeg_private { - AVCodecContext *codec_context; - AVFormatContext *input_context; - AVCodec const *codec; - SwrContext *swr; - - struct ffmpeg_input *input; - struct ffmpeg_output *output; -}; - -static struct ffmpeg_input *ffmpeg_input_create(void) -{ - struct ffmpeg_input *input = xnew(struct ffmpeg_input, 1); - - if (av_new_packet(&input->pkt, 0) != 0) { - free(input); - return NULL; - } - input->curr_pkt_size = 0; - input->seek_ts = -1; - input->prev_frame_end = -1; - return input; -} - -static void ffmpeg_input_free(struct ffmpeg_input *input) -{ - av_packet_unref(&input->pkt); - free(input); -} - -static struct ffmpeg_output *ffmpeg_output_create(void) -{ - struct ffmpeg_output *output = xnew(struct ffmpeg_output, 1); - - output->buffer_malloc = xnew(uint8_t, AVCODEC_MAX_AUDIO_FRAME_SIZE + 15); - output->buffer = output->buffer_malloc; - /* align to 16 bytes so avcodec can SSE/Altivec/etc */ - while ((intptr_t) output->buffer % 16) - output->buffer += 1; - output->buffer_pos = output->buffer; - output->buffer_used_len = 0; - return output; -} - -static void ffmpeg_output_free(struct ffmpeg_output *output) -{ - free(output->buffer_malloc); - output->buffer_malloc = NULL; - output->buffer = NULL; - free(output); -} - -static inline void ffmpeg_buffer_flush(struct ffmpeg_output *output) +static const char *ffmpeg_errmsg(int err) { - output->buffer_pos = output->buffer; - output->buffer_used_len = 0; + static char errstr[AV_ERROR_MAX_STRING_SIZE]; + av_strerror(err, errstr, AV_ERROR_MAX_STRING_SIZE); + return errstr; } static void ffmpeg_init(void) @@ -132,303 +80,372 @@ static void ffmpeg_init(void) #endif } -static int ffmpeg_open(struct input_plugin_data *ip_data) +static int ffmpeg_open_input(struct input_plugin_data *ip_data, + struct ffmpeg_private *priv) { - struct ffmpeg_private *priv; - int err = 0; - int i; - int stream_index = -1; - int out_sample_rate; - AVCodec const *codec; - AVCodecContext *cc = NULL; AVFormatContext *ic = NULL; + AVCodecContext *cc = NULL; AVCodecParameters *cp = NULL; - SwrContext *swr = NULL; - - ffmpeg_init(); + AVCodec const *codec = NULL; + int stream_index = -1; - err = avformat_open_input(&ic, ip_data->filename, NULL, NULL); - if (err < 0) { - d_print("av_open failed: %d\n", err); - return -IP_ERROR_FILE_FORMAT; + int err; + int res = avformat_open_input(&ic, ip_data->filename, NULL, NULL); + if (res < 0) { + err = -IP_ERROR_FILE_FORMAT; + goto err; } - do { - err = avformat_find_stream_info(ic, NULL); - if (err < 0) { - d_print("unable to find stream info: %d\n", err); - err = -IP_ERROR_FILE_FORMAT; - break; - } - - for (i = 0; i < ic->nb_streams; i++) { - cp = ic->streams[i]->codecpar; - if (cp->codec_type == AVMEDIA_TYPE_AUDIO) { - stream_index = i; - break; - } - } - - if (stream_index == -1) { - d_print("could not find audio stream\n"); - err = -IP_ERROR_FILE_FORMAT; - break; - } - - codec = avcodec_find_decoder(cp->codec_id); - cc = avcodec_alloc_context3(codec); - avcodec_parameters_to_context(cc, cp); - if (!codec) { - d_print("codec not found: %d, %s\n", cc->codec_id, avcodec_get_name(cc->codec_id)); - err = -IP_ERROR_UNSUPPORTED_FILE_TYPE; - break; - } + res = avformat_find_stream_info(ic, NULL); + if (res < 0) { + d_print("unable to find stream info\n"); + err = -IP_ERROR_FILE_FORMAT; + goto err; + } - if (avcodec_open2(cc, codec, NULL) < 0) { - d_print("could not open codec: %d, %s\n", cc->codec_id, avcodec_get_name(cc->codec_id)); - err = -IP_ERROR_UNSUPPORTED_FILE_TYPE; + for (int i = 0; i < ic->nb_streams; i++) { + cp = ic->streams[i]->codecpar; + if (cp->codec_type == AVMEDIA_TYPE_AUDIO) { + stream_index = i; break; } + } - /* We assume below that no more errors follow. */ - } while (0); + if (stream_index == -1) { + d_print("could not find audio stream\n"); + err = -IP_ERROR_FILE_FORMAT; + goto err_silent; + } - if (err < 0) { - /* Clean up. cc is never opened at this point. (See above assumption.) */ - avcodec_free_context(&cc); - avformat_close_input(&ic); - return err; + codec = avcodec_find_decoder(cp->codec_id); + if (!codec) { + d_print("codec (id: %d, name: %s) not found\n", + cc->codec_id, avcodec_get_name(cc->codec_id)); + err = -IP_ERROR_UNSUPPORTED_FILE_TYPE; + goto err_silent; + } + cc = avcodec_alloc_context3(codec); + avcodec_parameters_to_context(cc, cp); + + res = avcodec_open2(cc, codec, NULL); + if (res < 0) { + d_print("could not open codec (id: %d, name: %s)\n", + cc->codec_id, avcodec_get_name(cc->codec_id)); + err = -IP_ERROR_UNSUPPORTED_FILE_TYPE; + goto err; } - priv = xnew(struct ffmpeg_private, 1); - priv->codec_context = cc; - priv->input_context = ic; + priv->format_ctx = ic; + priv->codec_ctx = cc; priv->codec = codec; - priv->input = ffmpeg_input_create(); - if (priv->input == NULL) { - avcodec_close(cc); - avcodec_free_context(&cc); - avformat_close_input(&ic); - free(priv); - return -IP_ERROR_INTERNAL; + priv->stream_index = stream_index; + return 0; +err: + d_print("%s\n", ffmpeg_errmsg(res)); +err_silent: + avcodec_free_context(&cc); + avformat_close_input(&ic); + return err; +} + +static void ffmpeg_set_sf_and_swr_opts(SwrContext *swr, AVCodecContext *cc, + sample_format_t *sf_out, enum AVSampleFormat *out_sample_fmt) +{ + int out_sample_rate = min_u(cc->sample_rate, 384000); + sample_format_t sf = sf_rate(out_sample_rate) | sf_host_endian(); + av_opt_set_int(swr, "in_sample_rate", cc->sample_rate, 0); + av_opt_set_int(swr, "out_sample_rate", out_sample_rate, 0); + + *out_sample_fmt = cc->sample_fmt; + switch (*out_sample_fmt) { + case AV_SAMPLE_FMT_U8: + sf |= sf_bits(8) | sf_signed(0); + break; + case AV_SAMPLE_FMT_S32: + sf |= sf_bits(32) | sf_signed(1); + break; + default: + sf |= sf_bits(16) | sf_signed(1); + *out_sample_fmt = AV_SAMPLE_FMT_S16; } - priv->input->stream_index = stream_index; - priv->output = ffmpeg_output_create(); + av_opt_set_sample_fmt(swr, "in_sample_fmt", cc->sample_fmt, 0); + av_opt_set_sample_fmt(swr, "out_sample_fmt", *out_sample_fmt, 0); - /* Prepare for resampling. */ - out_sample_rate = min_u(cc->sample_rate, 384000); - swr = swr_alloc(); #if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(59, 24, 100) + sf |= sf_channels(cc->ch_layout.nb_channels); + if (cc->ch_layout.order == AV_CHANNEL_ORDER_UNSPEC) av_channel_layout_default(&cc->ch_layout, cc->ch_layout.nb_channels); - av_opt_set_chlayout(swr, "in_chlayout", &cc->ch_layout, 0); - av_opt_set_chlayout(swr, "out_chlayout", &cc->ch_layout, 0); + av_opt_set_chlayout(swr, "in_chlayout", &cc->ch_layout, 0); + av_opt_set_chlayout(swr, "out_chlayout", &cc->ch_layout, 0); #else - av_opt_set_int(swr, "in_channel_layout", av_get_default_channel_layout(cc->channels), 0); - av_opt_set_int(swr, "out_channel_layout", av_get_default_channel_layout(cc->channels), 0); + sf |= sf_channels(cc->channels); + + av_opt_set_int(swr, "in_channel_layout", + av_get_default_channel_layout(cc->channels), 0); + av_opt_set_int(swr, "out_channel_layout", + av_get_default_channel_layout(cc->channels), 0); #endif - av_opt_set_int(swr, "in_sample_rate", cc->sample_rate, 0); - av_opt_set_int(swr, "out_sample_rate", out_sample_rate, 0); - av_opt_set_sample_fmt(swr, "in_sample_fmt", cc->sample_fmt, 0); - priv->swr = swr; - ip_data->private = priv; + *sf_out = sf; +} + +static int ffmpeg_init_swr_frame(struct ffmpeg_private *priv, + sample_format_t sf, enum AVSampleFormat out_sample_fmt) +{ + AVCodecContext *cc = priv->codec_ctx; + AVFrame *frame = av_frame_alloc(); + #if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(59, 24, 100) - ip_data->sf = sf_rate(out_sample_rate) | sf_channels(cc->ch_layout.nb_channels); + av_channel_layout_copy(&frame->ch_layout, &cc->ch_layout); #else - ip_data->sf = sf_rate(out_sample_rate) | sf_channels(cc->channels); + frame->channel_layout = av_get_default_channel_layout(cc->channels); #endif - switch (cc->sample_fmt) { - case AV_SAMPLE_FMT_U8: - ip_data->sf |= sf_bits(8) | sf_signed(0); - av_opt_set_sample_fmt(swr, "out_sample_fmt", AV_SAMPLE_FMT_U8, 0); - break; - case AV_SAMPLE_FMT_S32: - ip_data->sf |= sf_bits(32) | sf_signed(1); - av_opt_set_sample_fmt(swr, "out_sample_fmt", AV_SAMPLE_FMT_S32, 0); - break; - /* AV_SAMPLE_FMT_S16 */ - default: - ip_data->sf |= sf_bits(16) | sf_signed(1); - av_opt_set_sample_fmt(swr, "out_sample_fmt", AV_SAMPLE_FMT_S16, 0); - break; + + frame->sample_rate = sf_get_rate(sf); + frame->format = out_sample_fmt; + + /* NOTE: 10 sec is probably too much, but the amount of space + * needed for swr_convert() is unpredictable */ + frame->nb_samples = 10 * sf_get_rate(sf); + int res = av_frame_get_buffer(frame, 0); + if (res < 0) { + d_print("av_frame_get_buffer(): %s\n", ffmpeg_errmsg(res)); + return -IP_ERROR_INTERNAL; } - swr_init(swr); - ip_data->sf |= sf_host_endian(); + frame->nb_samples = 0; + + priv->swr_frame = frame; + return 0; +} + +static void ffmpeg_free(struct ffmpeg_private *priv) +{ + avcodec_close(priv->codec_ctx); + avcodec_free_context(&priv->codec_ctx); + avformat_close_input(&priv->format_ctx); + + swr_free(&priv->swr); + + av_frame_free(&priv->frame); + av_packet_free(&priv->pkt); + av_frame_free(&priv->swr_frame); +} + +static int ffmpeg_open(struct input_plugin_data *ip_data) +{ + struct ffmpeg_private priv; + enum AVSampleFormat out_sample_fmt; + memset(&priv, 0, sizeof(struct ffmpeg_private)); + + ffmpeg_init(); + + int err = ffmpeg_open_input(ip_data, &priv); + if (err < 0) + return err; + + priv.pkt = av_packet_alloc(); + priv.frame = av_frame_alloc(); + priv.seek_ts = -1; + priv.prev_frame_end = -1; + + priv.swr = swr_alloc(); + ffmpeg_set_sf_and_swr_opts(priv.swr, priv.codec_ctx, + &ip_data->sf, &out_sample_fmt); + swr_init(priv.swr); + + err = ffmpeg_init_swr_frame(&priv, ip_data->sf, out_sample_fmt); + if (err < 0) { + ffmpeg_free(&priv); + return err; + } + #if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(59, 24, 100) - channel_map_init_waveex(cc->ch_layout.nb_channels, - cc->ch_layout.u.mask, ip_data->channel_map); + channel_map_init_waveex(priv.codec_ctx->ch_layout.nb_channels, + priv.codec_ctx->ch_layout.u.mask, ip_data->channel_map); #else - channel_map_init_waveex(cc->channels, - cc->channel_layout, ip_data->channel_map); + channel_map_init_waveex(priv.codec_ctx->channels, + priv.codec_ctx->channel_layout, ip_data->channel_map); #endif + + ip_data->private = xnew(struct ffmpeg_private, 1); + memcpy(ip_data->private, &priv, sizeof(struct ffmpeg_private)); return 0; } static int ffmpeg_close(struct input_plugin_data *ip_data) { - struct ffmpeg_private *priv = ip_data->private; - - avcodec_close(priv->codec_context); - avcodec_free_context(&priv->codec_context); - avformat_close_input(&priv->input_context); - swr_free(&priv->swr); - ffmpeg_input_free(priv->input); - ffmpeg_output_free(priv->output); - free(priv); + ffmpeg_free(ip_data->private); + free(ip_data->private); ip_data->private = NULL; return 0; } /* - * This returns the number of bytes added to the buffer. - * It returns < 0 on error. 0 on EOF. + * return: + * 0 - retry + * >0 - ok */ -static int ffmpeg_fill_buffer(struct input_plugin_data *ip_data, - AVFormatContext *ic, AVCodecContext *cc, - struct ffmpeg_input *input, struct ffmpeg_output *output, - SwrContext *swr) +static int ffmpeg_seek_into_frame(struct ffmpeg_private *priv, int64_t frame_ts) { - AVFrame *frame = av_frame_alloc(); - while (1) { - if (input->curr_pkt_size <= 0) { - av_packet_unref(&input->pkt); - if (av_read_frame(ic, &input->pkt) < 0) { - /* Force EOF once we can read no longer. */ - av_frame_free(&frame); - return 0; - } - - if (input->pkt.stream_index != input->stream_index) - continue; - input->curr_pkt_size = input->pkt.size; - input->curr_size += input->pkt.size; - input->curr_duration += input->pkt.duration; - - int send_result = avcodec_send_packet(cc, &input->pkt); - if (send_result != 0 && send_result != AVERROR(EAGAIN)) { - d_print("avcodec_send_packet() returned %d\n", send_result); - char errstr[AV_ERROR_MAX_STRING_SIZE]; - if (!av_strerror(send_result, errstr, AV_ERROR_MAX_STRING_SIZE )) - { - d_print("av_strerror(): %s\n", errstr); - } else { - d_print("av_strerror(): Description for error cannot be found\n"); - } - return -IP_ERROR_INTERNAL; - } - } + if (frame_ts >= 0) { + AVStream *s = priv->format_ctx->streams[priv->stream_index]; + frame_ts = av_rescale_q(frame_ts, s->time_base, AV_TIME_BASE_Q); + } else { + frame_ts = priv->prev_frame_end; + } - int recv_result = avcodec_receive_frame(cc, frame); - if (recv_result < 0) { - input->curr_pkt_size = 0; - continue; - } + if (frame_ts >= priv->seek_ts) + return 1; - int64_t frame_ts = -1; - if (frame->pts) - frame_ts = frame->pts; - else if (frame->pkt_dts) - frame_ts = frame->pkt_dts; - - const uint8_t **in = (const uint8_t **)frame->extended_data; - int in_count = frame->nb_samples; - if (input->seek_ts > 0 && (frame_ts >= 0 || input->prev_frame_end >= 0)) { - struct ffmpeg_private *priv = ip_data->private; - AVStream *st = priv->input_context->streams[priv->input->stream_index]; - if (frame_ts >= 0) - frame_ts = av_rescale_q(frame_ts, st->time_base, AV_TIME_BASE_Q); - else - frame_ts = input->prev_frame_end; - - if (frame_ts < input->seek_ts) { - int64_t frame_dur = av_rescale(frame->nb_samples, AV_TIME_BASE, frame->sample_rate); - int64_t frame_end = frame_ts + frame_dur; - input->prev_frame_end = frame_end; - d_print("seek_ts: %ld, frame_ts: %ld, frame_end: %ld\n", input->seek_ts, frame_ts, frame_end); - if (frame_end <= input->seek_ts) - continue; - - /* skip part of this frame */ - int64_t skip_samples = av_rescale(input->seek_ts - frame_ts, frame->sample_rate, AV_TIME_BASE); - in_count -= skip_samples; - if (av_sample_fmt_is_planar(frame->format)) { - for (int i = 0; i < sf_get_channels(ip_data->sf); i++) { - in[i] += skip_samples * sf_get_sample_size(ip_data->sf); - } - } else { - *in += skip_samples * sf_get_frame_size(ip_data->sf); - } - d_print("skipping %ld samples\n", skip_samples); - } - - input->seek_ts = -1; - input->prev_frame_end = -1; - } + int64_t frame_dur = av_rescale(priv->frame->nb_samples, + AV_TIME_BASE, priv->frame->sample_rate); + int64_t frame_end = frame_ts + frame_dur; + priv->prev_frame_end = frame_end; + + d_print("seek_ts: %ld, frame_ts: %ld, frame_end: %ld\n", + priv->seek_ts, frame_ts, frame_end); + + if (frame_end <= priv->seek_ts) + return 0; + + int64_t skip_samples = av_rescale(priv->seek_ts - frame_ts, + priv->frame->sample_rate, AV_TIME_BASE); + priv->frame->nb_samples -= skip_samples; + + int bps = av_get_bytes_per_sample(priv->frame->format); +#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(59, 24, 100) + int channels = priv->codec_ctx->ch_layout.nb_channels; +#else + int channels = priv->codec_ctx->channels; +#endif + + /* Just modify frame's data pointer because it's throw-away */ + if (av_sample_fmt_is_planar(priv->frame->format)) { + for (int i = 0; i < channels; i++) + priv->frame->extended_data[i] += skip_samples * bps; + } else { + priv->frame->extended_data[0] += skip_samples * channels * bps; + } + d_print("skipping %ld samples\n", skip_samples); + return 1; +} - int res = swr_convert(swr, - &output->buffer, - frame->nb_samples, - in, - in_count); +/* + * return: + * <0 - error + * 0 - retry + * >0 - ok + */ +static int ffmpeg_get_frame(struct ffmpeg_private *priv) +{ + int res = avcodec_receive_frame(priv->codec_ctx, priv->frame); + if (res == AVERROR(EAGAIN)) { + av_packet_unref(priv->pkt); + res = av_read_frame(priv->format_ctx, priv->pkt); if (res < 0) - res = 0; + return res; + + if (priv->pkt->stream_index != priv->stream_index) + return 0; - output->buffer_pos = output->buffer; - output->buffer_used_len = res * sf_get_frame_size(ip_data->sf); + priv->curr_size += priv->pkt->size; + priv->curr_duration += priv->pkt->duration; - av_frame_free(&frame); - return output->buffer_used_len; + res = avcodec_send_packet(priv->codec_ctx, priv->pkt); + if (res == AVERROR(EAGAIN)) + return 0; } - /* This should never get here. */ - return -IP_ERROR_INTERNAL; + if (res < 0) + return res; + + int64_t frame_ts = -1; + if (priv->frame->pts >= 0) + frame_ts = priv->frame->pts; + else if (priv->frame->pkt_dts >= 0) + frame_ts = priv->frame->pkt_dts; + + if (priv->seek_ts > 0 && (frame_ts >= 0 || priv->prev_frame_end >= 0)) { + if (ffmpeg_seek_into_frame(priv, frame_ts) == 0) + return 0; + priv->seek_ts = -1; + priv->prev_frame_end = -1; + } + return 1; +} + +static int ffmpeg_convert_frame(struct ffmpeg_private *priv) +{ + int res = swr_convert(priv->swr, + priv->swr_frame->extended_data, + /* TODO: proper buffer capacity */ + priv->frame->nb_samples, + (const uint8_t **)priv->frame->extended_data, + priv->frame->nb_samples); + if (res >= 0) { + priv->swr_frame->nb_samples = res; + priv->swr_frame_start = 0; + } + return res; } static int ffmpeg_read(struct input_plugin_data *ip_data, char *buffer, int count) { struct ffmpeg_private *priv = ip_data->private; - struct ffmpeg_output *output = priv->output; - int rc; - int out_size; - - if (output->buffer_used_len == 0) { - rc = ffmpeg_fill_buffer(ip_data, - priv->input_context, priv->codec_context, - priv->input, priv->output, priv->swr); - if (rc <= 0) - return rc; + int written = 0; + int res; + + count /= sf_get_frame_size(ip_data->sf); + + while (count) { + if (priv->swr_frame->nb_samples == 0) { + res = ffmpeg_get_frame(priv); + if (res == AVERROR_EOF) + break; + else if (res == 0) + continue; + else if (res < 0) + goto err; + + res = ffmpeg_convert_frame(priv); + if (res < 0) + goto err; + } + + int copy_frames = min_i(count, priv->swr_frame->nb_samples); + int copy_bytes = copy_frames * sf_get_frame_size(ip_data->sf); + void *dst = priv->swr_frame->extended_data[0] + priv->swr_frame_start; + memcpy(buffer + written, dst, copy_bytes); + + priv->swr_frame->nb_samples -= copy_frames; + priv->swr_frame_start += copy_bytes; + count -= copy_frames; + written += copy_bytes; } - out_size = min_i(output->buffer_used_len, count); - memcpy(buffer, output->buffer_pos, out_size); - output->buffer_used_len -= out_size; - output->buffer_pos += out_size; - return out_size; + return written; +err: + d_print("%s\n", ffmpeg_errmsg(res)); + return -IP_ERROR_INTERNAL; } static int ffmpeg_seek(struct input_plugin_data *ip_data, double offset) { struct ffmpeg_private *priv = ip_data->private; - AVStream *st = priv->input_context->streams[priv->input->stream_index]; - int ret; + AVStream *st = priv->format_ctx->streams[priv->stream_index]; - priv->input->seek_ts = offset * AV_TIME_BASE; - priv->input->prev_frame_end = -1; + priv->seek_ts = offset * AV_TIME_BASE; + priv->prev_frame_end = -1; int64_t ts = av_rescale(offset, st->time_base.den, st->time_base.num); - avcodec_flush_buffers(priv->codec_context); - /* TODO: also flush swresample buffers */ - /* Force reading a new packet in next ffmpeg_fill_buffer(). */ - priv->input->curr_pkt_size = 0; - - ret = avformat_seek_file(priv->input_context, - priv->input->stream_index, 0, ts, ts, 0); - - if (ret < 0) { + int ret = avformat_seek_file(priv->format_ctx, + priv->stream_index, 0, ts, ts, 0); + if (ret < 0) return -IP_ERROR_FUNCTION_NOT_SUPPORTED; - } else { - ffmpeg_buffer_flush(priv->output); - return 0; - } + + priv->swr_frame->nb_samples = 0; + avcodec_flush_buffers(priv->codec_ctx); + /* also flush swresample buffers? */ + return 0; } static void ffmpeg_read_metadata(struct growing_keyvals *c, AVDictionary *metadata) @@ -445,7 +462,7 @@ static int ffmpeg_read_comments(struct input_plugin_data *ip_data, struct keyval **comments) { struct ffmpeg_private *priv = ip_data->private; - AVFormatContext *ic = priv->input_context; + AVFormatContext *ic = priv->format_ctx; GROWING_KEYVALS(c); @@ -463,29 +480,29 @@ static int ffmpeg_read_comments(struct input_plugin_data *ip_data, static int ffmpeg_duration(struct input_plugin_data *ip_data) { struct ffmpeg_private *priv = ip_data->private; - return priv->input_context->duration / AV_TIME_BASE; + return priv->format_ctx->duration / AV_TIME_BASE; } static long ffmpeg_bitrate(struct input_plugin_data *ip_data) { struct ffmpeg_private *priv = ip_data->private; - long bitrate = priv->input_context->bit_rate; + long bitrate = priv->format_ctx->bit_rate; return bitrate ? bitrate : -IP_ERROR_FUNCTION_NOT_SUPPORTED; } static long ffmpeg_current_bitrate(struct input_plugin_data *ip_data) { struct ffmpeg_private *priv = ip_data->private; - AVStream *st = priv->input_context->streams[priv->input->stream_index]; + AVStream *st = priv->format_ctx->streams[priv->stream_index]; long bitrate = -1; /* ape codec returns silly numbers */ if (priv->codec->id == AV_CODEC_ID_APE) return -1; - if (priv->input->curr_duration > 0) { - double seconds = priv->input->curr_duration * av_q2d(st->time_base); - bitrate = (8 * priv->input->curr_size) / seconds; - priv->input->curr_size = 0; - priv->input->curr_duration = 0; + if (priv->curr_duration > 0) { + double seconds = priv->curr_duration * av_q2d(st->time_base); + bitrate = (8 * priv->curr_size) / seconds; + priv->curr_size = 0; + priv->curr_duration = 0; } return bitrate; } @@ -500,7 +517,7 @@ static char *ffmpeg_codec_profile(struct input_plugin_data *ip_data) { struct ffmpeg_private *priv = ip_data->private; const char *profile; - profile = av_get_profile_name(priv->codec, priv->codec_context->profile); + profile = av_get_profile_name(priv->codec, priv->codec_ctx->profile); return profile ? xstrdup(profile) : NULL; } From 59dca8686f52a4d4909c815d2af713539ad6fdc2 Mon Sep 17 00:00:00 2001 From: ihy123 Date: Sun, 17 Aug 2025 14:28:46 +0300 Subject: [PATCH 05/12] Validate sample format in ip_open() To prevent segfault in ip_setup() because channels=0, validate ip_data->sf after opening ip. --- input.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git input.c input.c index c20cb3f6a..f5c5b3c24 100644 --- input.c +++ input.c @@ -605,6 +605,16 @@ int ip_open(struct input_plugin *ip) ip_reset(ip, 1); return rc; } + + unsigned bits = sf_get_bits(ip->data.sf); + unsigned channels = sf_get_channels(ip->data.sf); + unsigned rate = sf_get_rate(ip->data.sf); + if (!bits || !channels || !rate) { + d_print("corrupt file: bits = %u, channels = %u, rate = %u\n", + bits, channels, rate); + return -IP_ERROR_FILE_FORMAT; + } + ip->open = 1; return 0; } From dcc3e425275a1c2e3e2a669b13374bb9b1a67e0d Mon Sep 17 00:00:00 2001 From: ihy123 Date: Sun, 17 Aug 2025 14:53:52 +0300 Subject: [PATCH 06/12] ip/ffmpeg: flush swresample buffer when seeking --- ip/ffmpeg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git ip/ffmpeg.c ip/ffmpeg.c index 42f630ee7..775e7de1d 100644 --- ip/ffmpeg.c +++ ip/ffmpeg.c @@ -444,7 +444,7 @@ static int ffmpeg_seek(struct input_plugin_data *ip_data, double offset) priv->swr_frame->nb_samples = 0; avcodec_flush_buffers(priv->codec_ctx); - /* also flush swresample buffers? */ + swr_convert(priv->swr, NULL, 0, NULL, 0); /* flush swr buffer */ return 0; } From 99a4e2f67857205e1e1cb4fb7a095819b298c0bc Mon Sep 17 00:00:00 2001 From: ihy123 Date: Sun, 17 Aug 2025 15:02:34 +0300 Subject: [PATCH 07/12] ip/ffmpeg: remember swr_frame's capacity --- ip/ffmpeg.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git ip/ffmpeg.c ip/ffmpeg.c index 775e7de1d..c659c1330 100644 --- ip/ffmpeg.c +++ ip/ffmpeg.c @@ -49,6 +49,7 @@ struct ffmpeg_private { /* A buffer to hold swr_convert()-ed samples */ AVFrame *swr_frame; + int swr_frame_samples_cap; int swr_frame_start; /* Bitrate estimation */ @@ -213,6 +214,7 @@ static int ffmpeg_init_swr_frame(struct ffmpeg_private *priv, d_print("av_frame_get_buffer(): %s\n", ffmpeg_errmsg(res)); return -IP_ERROR_INTERNAL; } + priv->swr_frame_samples_cap = frame->nb_samples; frame->nb_samples = 0; priv->swr_frame = frame; @@ -378,8 +380,7 @@ static int ffmpeg_convert_frame(struct ffmpeg_private *priv) { int res = swr_convert(priv->swr, priv->swr_frame->extended_data, - /* TODO: proper buffer capacity */ - priv->frame->nb_samples, + priv->swr_frame_samples_cap, (const uint8_t **)priv->frame->extended_data, priv->frame->nb_samples); if (res >= 0) { From fabf6bcf4fd3f24dafedeefd7e9daf4215c7066a Mon Sep 17 00:00:00 2001 From: ihy123 Date: Sun, 17 Aug 2025 15:54:19 +0300 Subject: [PATCH 08/12] ip/ffmpeg: reset swr_frame_start when seeking --- ip/ffmpeg.c | 1 + 1 file changed, 1 insertion(+) diff --git ip/ffmpeg.c ip/ffmpeg.c index c659c1330..71cc51116 100644 --- ip/ffmpeg.c +++ ip/ffmpeg.c @@ -444,6 +444,7 @@ static int ffmpeg_seek(struct input_plugin_data *ip_data, double offset) return -IP_ERROR_FUNCTION_NOT_SUPPORTED; priv->swr_frame->nb_samples = 0; + priv->swr_frame_start = 0; avcodec_flush_buffers(priv->codec_ctx); swr_convert(priv->swr, NULL, 0, NULL, 0); /* flush swr buffer */ return 0; From 265b893d78891362386fd406af308c5be73c36c2 Mon Sep 17 00:00:00 2001 From: ihy123 Date: Sun, 17 Aug 2025 17:27:20 +0300 Subject: [PATCH 09/12] ip/ffmpeg: better frame skipping logic --- ip/ffmpeg.c | 82 ++++++++++++++++++++++++++--------------------------- 1 file changed, 41 insertions(+), 41 deletions(-) diff --git ip/ffmpeg.c ip/ffmpeg.c index 71cc51116..af6ecfb8d 100644 --- ip/ffmpeg.c +++ ip/ffmpeg.c @@ -44,8 +44,8 @@ struct ffmpeg_private { AVPacket *pkt; AVFrame *frame; - int64_t seek_ts; - int64_t prev_frame_end; + double seek_ts; + int64_t skip_samples; /* A buffer to hold swr_convert()-ed samples */ AVFrame *swr_frame; @@ -249,7 +249,6 @@ static int ffmpeg_open(struct input_plugin_data *ip_data) priv.pkt = av_packet_alloc(); priv.frame = av_frame_alloc(); priv.seek_ts = -1; - priv.prev_frame_end = -1; priv.swr = swr_alloc(); ffmpeg_set_sf_and_swr_opts(priv.swr, priv.codec_ctx, @@ -283,37 +282,37 @@ static int ffmpeg_close(struct input_plugin_data *ip_data) return 0; } -/* - * return: - * 0 - retry - * >0 - ok - */ -static int ffmpeg_seek_into_frame(struct ffmpeg_private *priv, int64_t frame_ts) +static int64_t ffmpeg_calc_skip_samples(struct ffmpeg_private *priv) { - if (frame_ts >= 0) { - AVStream *s = priv->format_ctx->streams[priv->stream_index]; - frame_ts = av_rescale_q(frame_ts, s->time_base, AV_TIME_BASE_Q); + int64_t ts; + if (priv->frame->pts >= 0) { + ts = priv->frame->pts; + } else if (priv->frame->pkt_dts >= 0) { + ts = priv->frame->pkt_dts; } else { - frame_ts = priv->prev_frame_end; + d_print("AVFrame.pts and AVFrame.pkt_dts are unset\n"); + return -1; } - if (frame_ts >= priv->seek_ts) - return 1; - - int64_t frame_dur = av_rescale(priv->frame->nb_samples, - AV_TIME_BASE, priv->frame->sample_rate); - int64_t frame_end = frame_ts + frame_dur; - priv->prev_frame_end = frame_end; + AVStream *s = priv->format_ctx->streams[priv->stream_index]; + double frame_ts = ts * av_q2d(s->time_base); - d_print("seek_ts: %ld, frame_ts: %ld, frame_end: %ld\n", - priv->seek_ts, frame_ts, frame_end); + d_print("seek_ts: %.6fs, frame_ts: %.6fs\n", priv->seek_ts, frame_ts); - if (frame_end <= priv->seek_ts) + if (frame_ts >= priv->seek_ts) return 0; + return (priv->seek_ts - frame_ts) * priv->frame->sample_rate; +} - int64_t skip_samples = av_rescale(priv->seek_ts - frame_ts, - priv->frame->sample_rate, AV_TIME_BASE); - priv->frame->nb_samples -= skip_samples; +static void ffmpeg_skip_frame_part(struct ffmpeg_private *priv) +{ + if (priv->skip_samples >= priv->frame->nb_samples) { + d_print("skipping frame: %d samples\n", + priv->frame->nb_samples); + priv->skip_samples -= priv->frame->nb_samples; + priv->frame->nb_samples = 0; + return; + } int bps = av_get_bytes_per_sample(priv->frame->format); #if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(59, 24, 100) @@ -322,15 +321,17 @@ static int ffmpeg_seek_into_frame(struct ffmpeg_private *priv, int64_t frame_ts) int channels = priv->codec_ctx->channels; #endif + priv->frame->nb_samples -= priv->skip_samples; + /* Just modify frame's data pointer because it's throw-away */ if (av_sample_fmt_is_planar(priv->frame->format)) { for (int i = 0; i < channels; i++) - priv->frame->extended_data[i] += skip_samples * bps; + priv->frame->extended_data[i] += priv->skip_samples * bps; } else { - priv->frame->extended_data[0] += skip_samples * channels * bps; + priv->frame->extended_data[0] += priv->skip_samples * channels * bps; } - d_print("skipping %ld samples\n", skip_samples); - return 1; + d_print("skipping %ld samples\n", priv->skip_samples); + priv->skip_samples = 0; } /* @@ -361,17 +362,16 @@ static int ffmpeg_get_frame(struct ffmpeg_private *priv) if (res < 0) return res; - int64_t frame_ts = -1; - if (priv->frame->pts >= 0) - frame_ts = priv->frame->pts; - else if (priv->frame->pkt_dts >= 0) - frame_ts = priv->frame->pkt_dts; + if (priv->seek_ts > 0) { + priv->skip_samples = ffmpeg_calc_skip_samples(priv); + if (priv->skip_samples >= 0) + priv->seek_ts = -1; + } - if (priv->seek_ts > 0 && (frame_ts >= 0 || priv->prev_frame_end >= 0)) { - if (ffmpeg_seek_into_frame(priv, frame_ts) == 0) + if (priv->skip_samples > 0) { + ffmpeg_skip_frame_part(priv); + if (priv->frame->nb_samples == 0) return 0; - priv->seek_ts = -1; - priv->prev_frame_end = -1; } return 1; } @@ -434,8 +434,8 @@ static int ffmpeg_seek(struct input_plugin_data *ip_data, double offset) struct ffmpeg_private *priv = ip_data->private; AVStream *st = priv->format_ctx->streams[priv->stream_index]; - priv->seek_ts = offset * AV_TIME_BASE; - priv->prev_frame_end = -1; + priv->seek_ts = offset; + priv->skip_samples = 0; int64_t ts = av_rescale(offset, st->time_base.den, st->time_base.num); int ret = avformat_seek_file(priv->format_ctx, From d2c07d224c879de9a707d999423b8f814a23f765 Mon Sep 17 00:00:00 2001 From: ihy123 Date: Sun, 17 Aug 2025 19:22:50 +0300 Subject: [PATCH 10/12] ip/ffmpeg: don't process empty frames --- ip/ffmpeg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git ip/ffmpeg.c ip/ffmpeg.c index af6ecfb8d..dd9061aba 100644 --- ip/ffmpeg.c +++ ip/ffmpeg.c @@ -356,7 +356,7 @@ static int ffmpeg_get_frame(struct ffmpeg_private *priv) priv->curr_duration += priv->pkt->duration; res = avcodec_send_packet(priv->codec_ctx, priv->pkt); - if (res == AVERROR(EAGAIN)) + if (res == 0 || res == AVERROR(EAGAIN)) return 0; } if (res < 0) From 2ded02613f79bc228d0bb1569e52dba14c358f23 Mon Sep 17 00:00:00 2001 From: ihy123 Date: Mon, 18 Aug 2025 03:32:22 +0300 Subject: [PATCH 11/12] ip/ffmpeg: improve readability Previously ffmpeg_read()'s while loop was kinda leaking into ffmpeg_get_frame(), now it doesn't. --- ip/ffmpeg.c | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git ip/ffmpeg.c ip/ffmpeg.c index dd9061aba..fc748951f 100644 --- ip/ffmpeg.c +++ ip/ffmpeg.c @@ -337,30 +337,32 @@ static void ffmpeg_skip_frame_part(struct ffmpeg_private *priv) /* * return: * <0 - error - * 0 - retry + * 0 - eof * >0 - ok */ static int ffmpeg_get_frame(struct ffmpeg_private *priv) { - int res = avcodec_receive_frame(priv->codec_ctx, priv->frame); + int res; +retry: + res = avcodec_receive_frame(priv->codec_ctx, priv->frame); if (res == AVERROR(EAGAIN)) { av_packet_unref(priv->pkt); res = av_read_frame(priv->format_ctx, priv->pkt); if (res < 0) - return res; + goto err; if (priv->pkt->stream_index != priv->stream_index) - return 0; + goto retry; priv->curr_size += priv->pkt->size; priv->curr_duration += priv->pkt->duration; res = avcodec_send_packet(priv->codec_ctx, priv->pkt); if (res == 0 || res == AVERROR(EAGAIN)) - return 0; + goto retry; } if (res < 0) - return res; + goto err; if (priv->seek_ts > 0) { priv->skip_samples = ffmpeg_calc_skip_samples(priv); @@ -371,9 +373,14 @@ static int ffmpeg_get_frame(struct ffmpeg_private *priv) if (priv->skip_samples > 0) { ffmpeg_skip_frame_part(priv); if (priv->frame->nb_samples == 0) - return 0; + goto retry; } return 1; +err: + if (res == AVERROR_EOF) + return 0; + d_print("%s\n", ffmpeg_errmsg(res)); + return -IP_ERROR_INTERNAL; } static int ffmpeg_convert_frame(struct ffmpeg_private *priv) @@ -386,8 +393,10 @@ static int ffmpeg_convert_frame(struct ffmpeg_private *priv) if (res >= 0) { priv->swr_frame->nb_samples = res; priv->swr_frame_start = 0; + return res; } - return res; + d_print("%s\n", ffmpeg_errmsg(res)); + return -IP_ERROR_INTERNAL; } static int ffmpeg_read(struct input_plugin_data *ip_data, char *buffer, int count) @@ -401,16 +410,14 @@ static int ffmpeg_read(struct input_plugin_data *ip_data, char *buffer, int coun while (count) { if (priv->swr_frame->nb_samples == 0) { res = ffmpeg_get_frame(priv); - if (res == AVERROR_EOF) + if (res == 0) break; - else if (res == 0) - continue; else if (res < 0) - goto err; + return res; res = ffmpeg_convert_frame(priv); if (res < 0) - goto err; + return res; } int copy_frames = min_i(count, priv->swr_frame->nb_samples); @@ -424,9 +431,6 @@ static int ffmpeg_read(struct input_plugin_data *ip_data, char *buffer, int coun written += copy_bytes; } return written; -err: - d_print("%s\n", ffmpeg_errmsg(res)); - return -IP_ERROR_INTERNAL; } static int ffmpeg_seek(struct input_plugin_data *ip_data, double offset) From c541b39903aa57612501ee65770ef30d0954d22e Mon Sep 17 00:00:00 2001 From: ihy123 Date: Sun, 24 Aug 2025 19:16:57 +0300 Subject: [PATCH 12/12] ip/ffmpeg: fix building for ffmpeg 8.0 avcodec_close() can be safely removed because avcodec_free_context() is its replacement since 2016. See ffmpeg commit 2ef6dab0a79 Builds with v3.3.9 v4.0.6 v6.1.3 v7.1.1 v8.0 --- ip/ffmpeg.c | 1 - 1 file changed, 1 deletion(-) diff --git ip/ffmpeg.c ip/ffmpeg.c index fc748951f..2cb07671c 100644 --- ip/ffmpeg.c +++ ip/ffmpeg.c @@ -223,7 +223,6 @@ static int ffmpeg_init_swr_frame(struct ffmpeg_private *priv, static void ffmpeg_free(struct ffmpeg_private *priv) { - avcodec_close(priv->codec_ctx); avcodec_free_context(&priv->codec_ctx); avformat_close_input(&priv->format_ctx);