如何获取 directshow 网络摄像头视频流的宽度和高度

发布于 2024-09-05 05:20:50 字数 3154 浏览 4 评论 0原文

我找到了一些代码，可以让我访问网络摄像头的原始像素数据。不过，我需要知道图像宽度、高度、像素格式，最好是数据步长（间距、内存填充或任何你想称呼的东西），如果它不是宽度*每像素字节的话，

#include <windows.h>
#include <dshow.h>

#pragma comment(lib,"Strmiids.lib")

#define DsHook(a,b,c) if (!c##_) { INT_PTR* p=b+*(INT_PTR**)a;   VirtualProtect(&c##_,4,PAGE_EXECUTE_READWRITE,&no);\
                                          *(INT_PTR*)&c##_=*p;   VirtualProtect(p,    4,PAGE_EXECUTE_READWRITE,&no);   *p=(INT_PTR)c; }


// Here you get image video data in buf / len. Process it before calling Receive_ because renderer dealocates it.
HRESULT ( __stdcall * Receive_ ) ( void* inst, IMediaSample *smp ) ; 
HRESULT   __stdcall   Receive    ( void* inst, IMediaSample *smp ) {     
    BYTE*     buf;    smp->GetPointer(&buf); DWORD len = smp->GetActualDataLength();
    //AM_MEDIA_TYPE* info;
    //smp->GetMediaType(&info);
    HRESULT   ret  =  Receive_   ( inst, smp );   
    return    ret; 
}

int WINAPI WinMain(HINSTANCE inst,HINSTANCE prev,LPSTR cmd,int show){
    HRESULT hr = CoInitialize(0); MSG msg={0}; DWORD no;

    IGraphBuilder*  graph= 0;  hr = CoCreateInstance( CLSID_FilterGraph, 0, CLSCTX_INPROC,IID_IGraphBuilder, (void **)&graph );
    IMediaControl*  ctrl = 0;  hr = graph->QueryInterface( IID_IMediaControl, (void **)&ctrl );

    ICreateDevEnum* devs = 0;  hr = CoCreateInstance (CLSID_SystemDeviceEnum, 0, CLSCTX_INPROC, IID_ICreateDevEnum, (void **) &devs);
    IEnumMoniker*   cams = 0;  hr = devs?devs->CreateClassEnumerator (CLSID_VideoInputDeviceCategory, &cams, 0):0;  
    IMoniker*       mon  = 0;  hr = cams->Next (1,&mon,0);  // get first found capture device (webcam?)    
    IBaseFilter*    cam  = 0;  hr = mon->BindToObject(0,0,IID_IBaseFilter, (void**)&cam);
                               hr = graph->AddFilter(cam, L"Capture Source"); // add web cam to graph as source
    IEnumPins*      pins = 0;  hr = cam?cam->EnumPins(&pins):0;   // we need output pin to autogenerate rest of the graph
    IPin*           pin  = 0;  hr = pins?pins->Next(1,&pin, 0):0; // via graph->Render
                               hr = graph->Render(pin); // graph builder now builds whole filter chain including MJPG decompression on some webcams
    IEnumFilters*   fil  = 0;  hr = graph->EnumFilters(&fil); // from all newly added filters
    IBaseFilter*    rnd  = 0;  hr = fil->Next(1,&rnd,0); // we find last one (renderer)
                               hr = rnd->EnumPins(&pins);  // because data we are intersted in are pumped to renderers input pin 
                               hr = pins->Next(1,&pin, 0); // via Receive member of IMemInputPin interface
    IMemInputPin*   mem  = 0;  hr = pin->QueryInterface(IID_IMemInputPin,(void**)&mem);

    DsHook(mem,6,Receive); // so we redirect it to our own proc to grab image data

    hr = ctrl->Run();   

    while ( GetMessage(   &msg, 0, 0, 0 ) ) {  
        TranslateMessage( &msg );   
        DispatchMessage(  &msg ); 
    }
};

如果可以的话，奖励积分告诉我如何让这个东西不渲染窗口但仍然让我访问图像数据。

原文

I found a bit of code that gets me access to the raw pixel data from my webcam. However I need to know the image width, height, pixel format and preferably the data stride(pitch, memory padding or whatever you want to call it) if its ever gonna be something other than the width * bytes per pixel

#include <windows.h>
#include <dshow.h>

#pragma comment(lib,"Strmiids.lib")

#define DsHook(a,b,c) if (!c##_) { INT_PTR* p=b+*(INT_PTR**)a;   VirtualProtect(&c##_,4,PAGE_EXECUTE_READWRITE,&no);\
                                          *(INT_PTR*)&c##_=*p;   VirtualProtect(p,    4,PAGE_EXECUTE_READWRITE,&no);   *p=(INT_PTR)c; }


// Here you get image video data in buf / len. Process it before calling Receive_ because renderer dealocates it.
HRESULT ( __stdcall * Receive_ ) ( void* inst, IMediaSample *smp ) ; 
HRESULT   __stdcall   Receive    ( void* inst, IMediaSample *smp ) {     
    BYTE*     buf;    smp->GetPointer(&buf); DWORD len = smp->GetActualDataLength();
    //AM_MEDIA_TYPE* info;
    //smp->GetMediaType(&info);
    HRESULT   ret  =  Receive_   ( inst, smp );   
    return    ret; 
}

int WINAPI WinMain(HINSTANCE inst,HINSTANCE prev,LPSTR cmd,int show){
    HRESULT hr = CoInitialize(0); MSG msg={0}; DWORD no;

    IGraphBuilder*  graph= 0;  hr = CoCreateInstance( CLSID_FilterGraph, 0, CLSCTX_INPROC,IID_IGraphBuilder, (void **)&graph );
    IMediaControl*  ctrl = 0;  hr = graph->QueryInterface( IID_IMediaControl, (void **)&ctrl );

    ICreateDevEnum* devs = 0;  hr = CoCreateInstance (CLSID_SystemDeviceEnum, 0, CLSCTX_INPROC, IID_ICreateDevEnum, (void **) &devs);
    IEnumMoniker*   cams = 0;  hr = devs?devs->CreateClassEnumerator (CLSID_VideoInputDeviceCategory, &cams, 0):0;  
    IMoniker*       mon  = 0;  hr = cams->Next (1,&mon,0);  // get first found capture device (webcam?)    
    IBaseFilter*    cam  = 0;  hr = mon->BindToObject(0,0,IID_IBaseFilter, (void**)&cam);
                               hr = graph->AddFilter(cam, L"Capture Source"); // add web cam to graph as source
    IEnumPins*      pins = 0;  hr = cam?cam->EnumPins(&pins):0;   // we need output pin to autogenerate rest of the graph
    IPin*           pin  = 0;  hr = pins?pins->Next(1,&pin, 0):0; // via graph->Render
                               hr = graph->Render(pin); // graph builder now builds whole filter chain including MJPG decompression on some webcams
    IEnumFilters*   fil  = 0;  hr = graph->EnumFilters(&fil); // from all newly added filters
    IBaseFilter*    rnd  = 0;  hr = fil->Next(1,&rnd,0); // we find last one (renderer)
                               hr = rnd->EnumPins(&pins);  // because data we are intersted in are pumped to renderers input pin 
                               hr = pins->Next(1,&pin, 0); // via Receive member of IMemInputPin interface
    IMemInputPin*   mem  = 0;  hr = pin->QueryInterface(IID_IMemInputPin,(void**)&mem);

    DsHook(mem,6,Receive); // so we redirect it to our own proc to grab image data

    hr = ctrl->Run();   

    while ( GetMessage(   &msg, 0, 0, 0 ) ) {  
        TranslateMessage( &msg );   
        DispatchMessage(  &msg ); 
    }
};

Bonus points if you can tell me how get this thing not to render a window but still get me access to the image data.

分享到QQ

分享到微博

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

需要登录才能够评论，你可以免费注册一个本站的账号。

猫弦 2024-09-12 05:20:50

那真的很难看。请不要这样做。相反，插入一个像样本采集器一样的直通过滤器（正如我回复您关于同一主题的其他帖子一样）。将样本采集器连接到空渲染器可以以干净、安全的方式获取这些位，而无需渲染图像。

要获得跨度，您需要通过 ISampleGrabber 或 IPin::ConnectionMediaType 获取媒体类型。格式块将是 VIDEOINFOHEADER 或 VIDEOINFOHEADER2（检查格式 GUID）。 bitmapinfo 标头 biWidth 和 biHeight 定义位图尺寸（以及步长）。如果 RECT 不为空，则定义位图中的相关图像区域。

接触完这篇文章后我现在必须洗手。

回复收藏 0 原文

蓝眼睛不忧郁 2024-09-12 05:20:50

我为你感到难过。当接口被创建时，可能没有最好的程序员。

// Here you get image video data in buf / len. Process it before calling Receive_ because renderer dealocates it.

BITMAPINFOHEADER bmpInfo; // current bitmap header info
int stride;

HRESULT ( __stdcall * Receive_ ) ( void* inst, IMediaSample *smp ) ; 
HRESULT   __stdcall   Receive    ( void* inst, IMediaSample *smp )
{     
    BYTE*     buf;    smp->GetPointer(&buf); DWORD len = smp->GetActualDataLength();
    HRESULT   ret  =  Receive_   ( inst, smp );   

    AM_MEDIA_TYPE* info;
    HRESULT hr = smp->GetMediaType(&info);
    if ( hr != S_OK )
    { //TODO: error } 
    else
    {
        if ( type->formattype == FORMAT_VideoInfo )
        {
            const VIDEOINFOHEADER * vi = reinterpret_cast<VIDEOINFOHEADER*>( type->pbFormat );
            const BITMAPINFOHEADER & bmiHeader = vi->bmiHeader;
            //! now the bmiHeader.biWidth contains the data stride
            stride = bmiHeader.biWidth;

            bmpInfo = bmiHeader;
            int width = ( vi->rcTarget.right - vi->rcTarget.left );
            //! replace the data stride be the actual width
            if ( width != 0 )
                bmpInfo.biWidth = width;

        }
        else
        { // unsupported format }
    }
    DeleteMediaType( info );

    return    ret; 
}

I am sorry for you. When the interface was created there were probably not the best programmer to it.

// Here you get image video data in buf / len. Process it before calling Receive_ because renderer dealocates it.

BITMAPINFOHEADER bmpInfo; // current bitmap header info
int stride;

HRESULT ( __stdcall * Receive_ ) ( void* inst, IMediaSample *smp ) ; 
HRESULT   __stdcall   Receive    ( void* inst, IMediaSample *smp )
{     
    BYTE*     buf;    smp->GetPointer(&buf); DWORD len = smp->GetActualDataLength();
    HRESULT   ret  =  Receive_   ( inst, smp );   

    AM_MEDIA_TYPE* info;
    HRESULT hr = smp->GetMediaType(&info);
    if ( hr != S_OK )
    { //TODO: error } 
    else
    {
        if ( type->formattype == FORMAT_VideoInfo )
        {
            const VIDEOINFOHEADER * vi = reinterpret_cast<VIDEOINFOHEADER*>( type->pbFormat );
            const BITMAPINFOHEADER & bmiHeader = vi->bmiHeader;
            //! now the bmiHeader.biWidth contains the data stride
            stride = bmiHeader.biWidth;

            bmpInfo = bmiHeader;
            int width = ( vi->rcTarget.right - vi->rcTarget.left );
            //! replace the data stride be the actual width
            if ( width != 0 )
                bmpInfo.biWidth = width;

        }
        else
        { // unsupported format }
    }
    DeleteMediaType( info );

    return    ret; 
}

回复收藏 0 原文

蓝海似她心 2024-09-12 05:20:50

以下是如何添加抑制渲染窗口的空渲染器。创建 IGraphBuilder* 之后直接添加

//create null renderer and add null renderer to graph
IBaseFilter *m_pNULLRenderer;  hr = CoCreateInstance(CLSID_NullRenderer, NULL, CLSCTX_INPROC_SERVER, IID_IBaseFilter, (void **)&m_pNULLRenderer);
                               hr = graph->AddFilter(m_pNULLRenderer, L"Null Renderer");

dshook hack 是我所知道的唯一优雅的 directshow 代码。

根据我的经验，DirectShow API 是一场复杂的噩梦，即使是最简单的操作也需要数百行代码，并且需要调整整个编程范例才能访问您的网络摄像头。因此，如果这段代码适合您，就像它对我一样，请使用它并享受更少的代码行维护。

Here's how to add the Null Renderer that suppresses the rendering window. Add directly after creating the IGraphBuilder*

//create null renderer and add null renderer to graph
IBaseFilter *m_pNULLRenderer;  hr = CoCreateInstance(CLSID_NullRenderer, NULL, CLSCTX_INPROC_SERVER, IID_IBaseFilter, (void **)&m_pNULLRenderer);
                               hr = graph->AddFilter(m_pNULLRenderer, L"Null Renderer");

That dshook hack is the only elegant directshow code of which I am aware.

In my experience, the DirectShow API is a convoluted nightmare, requiring hundreds of lines of code to do even the simplest operation, and adapting a whole programming paradigm in order to access your web camera. So if this code does the job for you, as it did for me, use it and enjoy fewer lines of code to maintain.

回复收藏 0 原文

~没有更多了~