WIndows Media Foundation H264 解码器出现意外的 U/V 平面偏移

发布于 2025-01-19 19:49:16 字数 5533 浏览 7 评论 0原文

在使用 IMFSourceReader 通过 Windows Media Foundation 解码 H264 视频时,我发现 U/V 平面数据中存在意外的 y 偏移。通过反复试验,我发现了一种似乎适用于我尝试过的所有视频源的调整,但我真的很想知道此数据布局是否符合预期或已记录。

具体来说,有时 IMFSample::GetTotalLength() 返回的总缓冲区大小大于 height * stride * 3/2。在这种情况下,U/V 平面从 Y 平面数据的末尾偏移额外数据的 2/3,如下所示:

https://stash.reaper.fm/44192/YV12.png

需要明确的是,意想不到的部分是Y平面数据下方的N行像素以及上面的U平面数据。我认为所有其他偏移和填充都符合预期。此数据布局是预期的还是记录的?

应用调整前的图像:

https://stash.reaper.fm/44193/before.jpg

应用调整后的图像:

https://stash.reaper.fm/44194/after.jpg

视频:

https://stash.reaper.fm/44214/johnny.mp4

这是一个完整的程序,可以在调整和不调整的情况下绘制视频的第一帧。没有错误检查,YUV 到 RGB 的转换非常基本。

#include <windows.h>
#include <initguid.h>
#include <mfapi.h>
#include <mfidl.h>
#include <mfreadwrite.h>

#include "resource.h"

const int marg=16;

unsigned char clamp(int v)
{
  return v < 0 ? 0 : v > 255 ? 255 : v;
}

void yuv_to_rgb(unsigned char *rgb, int y, int u, int v)
{
  y -= 16;
  u -= 128;
  v -= 128;
  rgb[3] = 0;
  rgb[2] = clamp((y*298 + v*409 + 128) / 256);
  rgb[1] = clamp((y*298 - u*100 - v*208 + 128) / 256);
  rgb[0] = clamp((y*298 + u*516 + 128) / 256);
}

INT_PTR CALLBACK wndproc(HWND hwndDlg, UINT uMsg, WPARAM wParam, LPARAM lParam)
{
  static HBITMAP rawbmp = NULL, adjbmp=NULL;
  static unsigned int srcw = 0, srch = 0;

  switch (uMsg)
  {
    case WM_INITDIALOG:
    {
      MFStartup(MF_VERSION);

      IMFSourceReader *reader = NULL;
      MFCreateSourceReaderFromURL(L"C:\\Users\\xxx\\Documents\\johnny.mp4", NULL, &reader);
      reader->SetStreamSelection(MF_SOURCE_READER_ALL_STREAMS, FALSE);
      reader->SetStreamSelection(MF_SOURCE_READER_FIRST_VIDEO_STREAM, TRUE);

      IMFMediaType *fmt = NULL;
      MFCreateMediaType(&fmt);
      fmt->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
      fmt->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_YV12);
      reader->SetCurrentMediaType(MF_SOURCE_READER_FIRST_VIDEO_STREAM, NULL, fmt);
      fmt->Release();

      reader->GetCurrentMediaType(MF_SOURCE_READER_FIRST_VIDEO_STREAM, &fmt);
      MFGetAttributeSize(fmt, MF_MT_FRAME_SIZE, &srcw, &srch);
      fmt->Release();

      IMFSample *sample = NULL;
      DWORD flags=0;
      INT64 readpos=0;
      IMFMediaBuffer *buffer = NULL;
      DWORD bufsz=0;
      reader->ReadSample(MF_SOURCE_READER_FIRST_VIDEO_STREAM, 0, NULL, &flags, &readpos, &sample);
      sample->GetTotalLength(&bufsz);
      sample->ConvertToContiguousBuffer(&buffer);
      sample->Release();
      reader->Release();

      BITMAPINFO bi = {0};
      bi.bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
      bi.bmiHeader.biWidth = srcw;
      bi.bmiHeader.biHeight = srch;
      bi.bmiHeader.biPlanes = 1;
      bi.bmiHeader.biBitCount = 32;
      bi.bmiHeader.biCompression = BI_RGB;
      unsigned char *raw = NULL, *adj=NULL;
      rawbmp = CreateDIBSection(NULL, &bi, DIB_RGB_COLORS, (void**)&raw, NULL, 0);
      adjbmp = CreateDIBSection(NULL, &bi, DIB_RGB_COLORS, (void**)&adj, NULL, 0);

      IMF2DBuffer *buffer2d = NULL;
      BYTE *bptr = NULL;
      LONG bstride = 0;
      buffer->QueryInterface(&buffer2d);
      buffer2d->Lock2D(&bptr, &bstride);

      int offs=(bufsz*2/3-srch*bstride); // unexpected

      unsigned char *rawptr = raw + srcw*(srch-1)*4;
      unsigned char *adjptr = adj + srcw*(srch-1)*4;
      unsigned char *yptr = bptr;
      unsigned char *uptr = bptr + srch*bstride*5/4;
      unsigned char *vptr = bptr + srch*bstride;

      for (unsigned int y=0; y < srch; ++y)
      {
        for (unsigned int x=0; x < srcw; ++x)
        {
          yuv_to_rgb(rawptr+x*4, yptr[x], uptr[x/2], vptr[x/2]);
          yuv_to_rgb(adjptr+x*4, yptr[x], uptr[x/2+offs], vptr[x/2+offs]);
        }

        rawptr -= srcw*4;
        adjptr -= srcw*4;
        yptr += bstride;
        if (y&1) uptr += bstride/2;
        if (y&1) vptr += bstride/2;
      }

      buffer2d->Unlock2D();
      buffer2d->Release();
      buffer->Release();

      SetWindowPos(hwndDlg, NULL, 0, 0, srcw+2*marg, 2*(srch+2*marg), SWP_NOZORDER|SWP_NOMOVE|SWP_NOACTIVATE);
    }
    return 0;

    case WM_DESTROY:
    {
      DeleteObject(rawbmp);
      DeleteObject(adjbmp);
    }
    return 0;

    case WM_PAINT:
    {
      RECT r;
      GetClientRect(hwndDlg, &r);
      int w=r.right, h=r.bottom;

      PAINTSTRUCT ps;
      HDC dc = BeginPaint(hwndDlg, &ps);

      HDC srcdc = CreateCompatibleDC(dc);
      SelectObject(srcdc, rawbmp);
      BitBlt(dc, marg/2, marg/2, srcw, srch, srcdc, 0, 0, SRCCOPY);
      SelectObject(srcdc, adjbmp);
      BitBlt(dc, marg/2, (h+marg)/2, srcw, srch, srcdc, 0, 0, SRCCOPY);
      EndPaint(hwndDlg, &ps);
      ReleaseDC(hwndDlg, srcdc);
    }
    return 0;

    case WM_COMMAND:
      if (LOWORD(wParam) == IDCANCEL)
      {
        EndDialog(hwndDlg, 0);
      }
    return 0;
  }

  return 0;
}

int WINAPI WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPSTR lpCmdLine, int nShowCmd)
{
  DialogBox(hInstance, MAKEINTRESOURCE(IDD_DIALOG), GetDesktopWindow(), wndproc);
  return 0;
}

While decoding H264 video via Windows Media Foundation using IMFSourceReader, I am seeing an unexpected y-offset in the U/V plane data. By trial and error, I have found an adjustment that seems to work on all the video sources I've tried, but I'd really like to know if this data layout is expected or documented.

Specifically, sometimes IMFSample::GetTotalLength() returns a total buffer size that is greater than height * stride * 3/2. In that case, the U/V planes are offset from the end of the Y plane data by 2/3 of the extra data, like so:

https://stash.reaper.fm/44192/YV12.png

To be clear, the unexpected part is the N rows of pixels below the Y plane data and above the U plane data. I think all of the other offsets and padding are as expected. Is this data layout expected or documented?

Image before applying adjustment:

https://stash.reaper.fm/44193/before.jpg

Image after applying adjustment:

https://stash.reaper.fm/44194/after.jpg

The video:

https://stash.reaper.fm/44214/johnny.mp4

Here is a complete program that draws the first frame of the video with and without the adjustment. There is no error checking and the YUV to RGB conversion is very basic.

#include <windows.h>
#include <initguid.h>
#include <mfapi.h>
#include <mfidl.h>
#include <mfreadwrite.h>

#include "resource.h"

const int marg=16;

unsigned char clamp(int v)
{
  return v < 0 ? 0 : v > 255 ? 255 : v;
}

void yuv_to_rgb(unsigned char *rgb, int y, int u, int v)
{
  y -= 16;
  u -= 128;
  v -= 128;
  rgb[3] = 0;
  rgb[2] = clamp((y*298 + v*409 + 128) / 256);
  rgb[1] = clamp((y*298 - u*100 - v*208 + 128) / 256);
  rgb[0] = clamp((y*298 + u*516 + 128) / 256);
}

INT_PTR CALLBACK wndproc(HWND hwndDlg, UINT uMsg, WPARAM wParam, LPARAM lParam)
{
  static HBITMAP rawbmp = NULL, adjbmp=NULL;
  static unsigned int srcw = 0, srch = 0;

  switch (uMsg)
  {
    case WM_INITDIALOG:
    {
      MFStartup(MF_VERSION);

      IMFSourceReader *reader = NULL;
      MFCreateSourceReaderFromURL(L"C:\\Users\\xxx\\Documents\\johnny.mp4", NULL, &reader);
      reader->SetStreamSelection(MF_SOURCE_READER_ALL_STREAMS, FALSE);
      reader->SetStreamSelection(MF_SOURCE_READER_FIRST_VIDEO_STREAM, TRUE);

      IMFMediaType *fmt = NULL;
      MFCreateMediaType(&fmt);
      fmt->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
      fmt->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_YV12);
      reader->SetCurrentMediaType(MF_SOURCE_READER_FIRST_VIDEO_STREAM, NULL, fmt);
      fmt->Release();

      reader->GetCurrentMediaType(MF_SOURCE_READER_FIRST_VIDEO_STREAM, &fmt);
      MFGetAttributeSize(fmt, MF_MT_FRAME_SIZE, &srcw, &srch);
      fmt->Release();

      IMFSample *sample = NULL;
      DWORD flags=0;
      INT64 readpos=0;
      IMFMediaBuffer *buffer = NULL;
      DWORD bufsz=0;
      reader->ReadSample(MF_SOURCE_READER_FIRST_VIDEO_STREAM, 0, NULL, &flags, &readpos, &sample);
      sample->GetTotalLength(&bufsz);
      sample->ConvertToContiguousBuffer(&buffer);
      sample->Release();
      reader->Release();

      BITMAPINFO bi = {0};
      bi.bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
      bi.bmiHeader.biWidth = srcw;
      bi.bmiHeader.biHeight = srch;
      bi.bmiHeader.biPlanes = 1;
      bi.bmiHeader.biBitCount = 32;
      bi.bmiHeader.biCompression = BI_RGB;
      unsigned char *raw = NULL, *adj=NULL;
      rawbmp = CreateDIBSection(NULL, &bi, DIB_RGB_COLORS, (void**)&raw, NULL, 0);
      adjbmp = CreateDIBSection(NULL, &bi, DIB_RGB_COLORS, (void**)&adj, NULL, 0);

      IMF2DBuffer *buffer2d = NULL;
      BYTE *bptr = NULL;
      LONG bstride = 0;
      buffer->QueryInterface(&buffer2d);
      buffer2d->Lock2D(&bptr, &bstride);

      int offs=(bufsz*2/3-srch*bstride); // unexpected

      unsigned char *rawptr = raw + srcw*(srch-1)*4;
      unsigned char *adjptr = adj + srcw*(srch-1)*4;
      unsigned char *yptr = bptr;
      unsigned char *uptr = bptr + srch*bstride*5/4;
      unsigned char *vptr = bptr + srch*bstride;

      for (unsigned int y=0; y < srch; ++y)
      {
        for (unsigned int x=0; x < srcw; ++x)
        {
          yuv_to_rgb(rawptr+x*4, yptr[x], uptr[x/2], vptr[x/2]);
          yuv_to_rgb(adjptr+x*4, yptr[x], uptr[x/2+offs], vptr[x/2+offs]);
        }

        rawptr -= srcw*4;
        adjptr -= srcw*4;
        yptr += bstride;
        if (y&1) uptr += bstride/2;
        if (y&1) vptr += bstride/2;
      }

      buffer2d->Unlock2D();
      buffer2d->Release();
      buffer->Release();

      SetWindowPos(hwndDlg, NULL, 0, 0, srcw+2*marg, 2*(srch+2*marg), SWP_NOZORDER|SWP_NOMOVE|SWP_NOACTIVATE);
    }
    return 0;

    case WM_DESTROY:
    {
      DeleteObject(rawbmp);
      DeleteObject(adjbmp);
    }
    return 0;

    case WM_PAINT:
    {
      RECT r;
      GetClientRect(hwndDlg, &r);
      int w=r.right, h=r.bottom;

      PAINTSTRUCT ps;
      HDC dc = BeginPaint(hwndDlg, &ps);

      HDC srcdc = CreateCompatibleDC(dc);
      SelectObject(srcdc, rawbmp);
      BitBlt(dc, marg/2, marg/2, srcw, srch, srcdc, 0, 0, SRCCOPY);
      SelectObject(srcdc, adjbmp);
      BitBlt(dc, marg/2, (h+marg)/2, srcw, srch, srcdc, 0, 0, SRCCOPY);
      EndPaint(hwndDlg, &ps);
      ReleaseDC(hwndDlg, srcdc);
    }
    return 0;

    case WM_COMMAND:
      if (LOWORD(wParam) == IDCANCEL)
      {
        EndDialog(hwndDlg, 0);
      }
    return 0;
  }

  return 0;
}

int WINAPI WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPSTR lpCmdLine, int nShowCmd)
{
  DialogBox(hInstance, MAKEINTRESOURCE(IDD_DIALOG), GetDesktopWindow(), wndproc);
  return 0;
}

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。
列表为空,暂无数据
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文