FFmpeg5.0源码阅读—

FFmpeg5.0源码阅读——格式检测

摘要：在拿到一个新的格式后，FFmpeg总是能够足够正确的判断格式的内容并进行相应的处理。本文在描述FFmpeg如何进行格式检测来确认正在处理的媒体格式类型，并进行相应的处理。
关键字：FFmpeg,format,probe

在调用FFmpeg的APIavformat_open_input之后就能在对应的AVFormatContext看到具体的媒体信息。为了获取媒体信息，首先需要打开流文件，然后读取HEAD检测媒体格式。

//打开文件
if ((ret = s->io_open(s, &s->pb, filename, AVIO_FLAG_READ | s->avio_flags, options)) < 0)
    return ret;

if (s->iformat)
    return 0;
//探测文件格式
return av_probe_input_buffer2(s->pb, &s->iformat, filename,
                                s, 0, s->format_probesize);

1 打开文件

流协议
为了打开文件首先需要确认输入的流协议。由于FFMpeg支持很多种协议，比如文件、RTMP、HTTP等，为了正确的读取数据首先需要确认输入的流是哪一种协议。对于输入文件，FFMpeg中确认具体是那种流协议，需要使用哪种protcol打开文件读取数据。
FFmpeg中很多场景对于支持格式的检测都是对当前支持的格式列表的遍历来选择最匹配的那一个作为最终匹配的结果。协议选择也是一样。如果外界调用API时指定了打开的协议的类型则不会进行检测，没有的花会按照下面的调用链进行匹配。

init_input->io_open_default->ffio_open_whitelist->ffurl_open_whitelist->ffurl_alloc->url_find_protocol

Protcol的核心代码如下，通过遍历内部维护的全局静态url表格匹配每一个url的name，最终选择能够匹配到的URLProtocol作为最终的流协议。一旦确认了流协议后续就是调用对应的open和read函数指针打开和读文件。

protocols = ffurl_get_protocols(NULL, NULL);
if (!protocols)
    return NULL;
for (i = 0; protocols[i]; i++) {
        const URLProtocol *up = protocols[i];
    if (!strcmp(proto_str, up->name)) {
        av_freep(&protocols);
        return up;
    }
    if (up->flags & URL_PROTOCOL_FLAG_NESTED_SCHEME &&
        !strcmp(proto_nested, up->name)) {
        av_freep(&protocols);
        return up;
    }
}

2 检测媒体格式

确认iformat
媒体格式探测其实就是确认使用哪种AVInputFormat解析（该成员是AVFormatContext->iformat字段）。如上面的代码所示这里的流协议检测是调用av_probe_input_format2来实现的，而该函数也只是转调了av_probe_input_format3而已。

av_probe_input_format3检测文件格式的方式就是遍历FFmpeg内部的iformat静态表格，选择匹配度个最高的那个作为最终的格式。核心代码如下。

while ((fmt1 = av_demuxer_iterate(&i))) {
    if (fmt1->flags & AVFMT_EXPERIMENTAL)
        continue;
    if (!is_opened == !(fmt1->flags & AVFMT_NOFILE) && strcmp(fmt1->name, "image2"))
        continue;
    score = 0;
    if (ffifmt(fmt1)->read_probe) {
        score = ffifmt(fmt1)->read_probe(&lpd);
        if (score)
            av_log(NULL, AV_LOG_TRACE, "Probing %s score:%d size:%d\n", fmt1->name, score, lpd.buf_size);
        if (fmt1->extensions && av_match_ext(lpd.filename, fmt1->extensions)) {
            switch (nodat) {
            case NO_ID3:
                score = FFMAX(score, 1);
                break;
            case ID3_GREATER_PROBE:
            case ID3_ALMOST_GREATER_PROBE:
                score = FFMAX(score, AVPROBE_SCORE_EXTENSION / 2 - 1);
                break;
            case ID3_GREATER_MAX_PROBE:
                score = FFMAX(score, AVPROBE_SCORE_EXTENSION);
                break;
            }
        }
    } else if (fmt1->extensions) {
        if (av_match_ext(lpd.filename, fmt1->extensions))
            score = AVPROBE_SCORE_EXTENSION;
    }
    if (av_match_name(lpd.mime_type, fmt1->mime_type)) {
        if (AVPROBE_SCORE_MIME > score) {
            av_log(NULL, AV_LOG_DEBUG, "Probing %s score:%d increased to %d due to MIME type\n", fmt1->name, score, AVPROBE_SCORE_MIME);
            score = AVPROBE_SCORE_MIME;
        }
    }
    if (score > score_max) {
        score_max = score;
        fmt       = fmt1;
    } else if (score == score_max)
        fmt = NULL;
}

从上面的流程中可以看到FFmpeg媒体信息探测会给每个格式一个分数，将根据输入流和对应的格式确认当前的分数，分数越高则匹配度越高，最后选择分数最高的格式作为选中的格式。而分数计算主要参考read_probe返回的分数值、后缀名和ID3元数据。
从代码中也能够看到read_probe给的分数相比比较优先，而输入路径的后缀名会作为兜底策略，最低50分。这样是为什么对于一个不是媒体文件的格式个更改后缀名后却有可能检测成功的原因。那为什么需要保留后缀名检测呢？原因是并不是所有格式都能够通过文件内容判断其格式，比如相机的raw格式往往更改后缀名后就无法正常解析，这是因为raw格式只包含数据不包含任何其他多余的信息。

read_probe
FFmpeg内部read_probe其实就是就是调用对应AVFormatInput的read_probe函数，每个格式都有自己的实现。一般情况下分为两种，一种是文件开头有tag直接表明当前文件类型的，比如GIF等；另一种是需要读取部分文件内容来确认格式信息的，比如mov、mp4格式。

第一种比较简单直接读取文件内容然后匹配tag即可，比如gif的格式检测：

static int gif_probe(const AVProbeData *p){
    /* check magick */
    if (memcmp(p->buf, gif87a_sig, 6) && memcmp(p->buf, gif89a_sig, 6))
        return 0;

    /* width or height contains zero? */
    if (!AV_RL16(&p->buf[6]) || !AV_RL16(&p->buf[8]))
        return 0;

    return AVPROBE_SCORE_MAX;
}

另一种需要度却文件内容，比如mp4格式检测需要读取其中的box来确认部分文件信息。

static int mov_probe(const AVProbeData *p)
{
    int64_t offset;
    uint32_t tag;
    int score = 0;
    int moov_offset = -1;

    /* check file header */
    offset = 0;
    for (;;) {
        int64_t size;
        int minsize = 8;
        /* ignore invalid offset */
        if ((offset + 8ULL) > (unsigned int)p->buf_size)
            break;
        size = AV_RB32(p->buf + offset);
        if (size == 1 && offset + 16 <= (unsigned int)p->buf_size) {
            size = AV_RB64(p->buf+offset + 8);
            minsize = 16;
        } else if (size == 0) {
            size = p->buf_size - offset;
        }
        if (size < minsize) {
            offset += 4;
            continue;
        }
        tag = AV_RL32(p->buf + offset + 4);
        switch(tag) {
        /* check for obvious tags */
        case MKTAG('m','o','o','v'):
            moov_offset = offset + 4;
        case MKTAG('m','d','a','t'):
        case MKTAG('p','n','o','t'): /* detect movs with preview pics like ew.mov and april.mov */
        case MKTAG('u','d','t','a'): /* Packet Video PVAuthor adds this and a lot of more junk */
        case MKTAG('f','t','y','p'):
            if (tag == MKTAG('f','t','y','p') &&
                       (   AV_RL32(p->buf + offset + 8) == MKTAG('j','p','2',' ')
                        || AV_RL32(p->buf + offset + 8) == MKTAG('j','p','x',' ')
                        || AV_RL32(p->buf + offset + 8) == MKTAG('j','x','l',' ')
                    )) {
                score = FFMAX(score, 5);
            } else {
                score = AVPROBE_SCORE_MAX;
            }
            break;
        /* those are more common words, so rate then a bit less */
        case MKTAG('e','d','i','w'): /* xdcam files have reverted first tags */
        case MKTAG('w','i','d','e'):
        case MKTAG('f','r','e','e'):
        case MKTAG('j','u','n','k'):
        case MKTAG('p','i','c','t'):
            score  = FFMAX(score, AVPROBE_SCORE_MAX - 5);
            break;
        case MKTAG(0x82,0x82,0x7f,0x7d):
            score  = FFMAX(score, AVPROBE_SCORE_EXTENSION - 5);
            break;
        case MKTAG('s','k','i','p'):
        case MKTAG('u','u','i','d'):
        case MKTAG('p','r','f','l'):
            /* if we only find those cause probedata is too small at least rate them */
            score  = FFMAX(score, AVPROBE_SCORE_EXTENSION);
            break;
        }
        if (size > INT64_MAX - offset)
            break;
        offset += size;
    }
    if (score > AVPROBE_SCORE_MAX - 50 && moov_offset != -1) {
        /* moov atom in the header - we should make sure that this is not a
         * MOV-packed MPEG-PS */
        offset = moov_offset;

        while (offset < (p->buf_size - 16)) { /* Sufficient space */
               /* We found an actual hdlr atom */
            if (AV_RL32(p->buf + offset     ) == MKTAG('h','d','l','r') &&
                AV_RL32(p->buf + offset +  8) == MKTAG('m','h','l','r') &&
                AV_RL32(p->buf + offset + 12) == MKTAG('M','P','E','G')) {
                av_log(NULL, AV_LOG_WARNING, "Found media data tag MPEG indicating this is a MOV-packed MPEG-PS.\n");
                /* We found a media handler reference atom describing an
                 * MPEG-PS-in-MOV, return a
                 * low score to force expanding the probe window until
                 * mpegps_probe finds what it needs */
                return 5;
            } else {
                /* Keep looking */
                offset += 2;
            }
        }
    }

    return score;
}