OpenCV 多线程 (Windows/.NET) 导致视频捕获延迟几秒
我有一个多线程 openCV 程序,它使用 4 个线程来执行以下操作:
线程 1-> 调用 cvQueryFrame()
,它从相机中一一抓取帧图像并将它们存储到 std::vector
inputBuffer
线程 2-> 对 inputBuffer[0]
执行阈值处理,将结果复制到另一个std::vector
调用 filterOutputBuffer
线程 3-> 执行光流算法/为 filterOutputBuffer 中的前两个元素绘制流场,将结果复制到另一个 std: :vector
调用 ofOutputBuffer
线程 4-> 使用 cvShowImage(ofOutputBuffer[0])
显示图像
所以本质上我正在设想每个线程对相应输入向量/缓冲区的第一个元素执行任务,并将结果存储在相应输出向量的后面。有点像 3 个工厂工人在装配线上完成自己的工作,然后将最终结果扔进桶中供下一个人使用。
我为所有缓冲区设置了互斥体,并且程序可以正常工作,只是输出从实时摄像机流延迟了几秒钟。
我运行了同一个程序的非多线程版本(使用了一个巨大的 while(true) 循环),并且它是实时运行的,只有偶尔的卡顿。
为什么我的并发实现的性能延迟如此之大?
下面是线程函数:
void writeBuffer()
{
cout << "Thread " << GetCurrentThreadId() << ": Capturing frame from camera!" << endl;
CvCapture *capture = 0;
IplImage *frame = 0;
DWORD waitResult;
if (!(capture = cvCaptureFromCAM(0)))
cout << "Cannot initialize camera!" << endl;
//now start grabbing frames and storing into the vector inputBuffer
while (true)
{
//cout << "Thread " << GetCurrentThreadId() << ": Waiting for mutex to write to input buffer!..." << endl;
waitResult = WaitForSingleObject(hMutex, INFINITE);
switch(waitResult)
{
// The thread got ownership of the mutex
case WAIT_OBJECT_0:
frame = cvQueryFrame(capture); //store the image into frame
if(!frame)
{
cout << "Thread " << GetCurrentThreadId() << ": Error capturing frame from camera!" << endl;
}
//cout << "Thread " << GetCurrentThreadId() << ": Getting Frame..." << endl;
inputBuffer.push_back(*frame);
break;
default:
cout << "Thread " << GetCurrentThreadId() << ": Error acquiring mutex..." << endl;
}
if(!ReleaseMutex(hMutex))
{
cout << "Thread " << GetCurrentThreadId() << ": Error releasing mutex..." << endl;
}
//else cout << "Thread " << GetCurrentThreadId() << ": Done writing to input buffer, Mutex Released!" << endl;
//signal hDoneGettingFrame
PulseEvent(hDoneGettingFrame);
}
cout << "Thread " << GetCurrentThreadId() << ": Exiting..." << endl;
}
void opticalFlow()
{
...
DWORD waitResult;
//start grabbing frames from the vector inputBuffer
cout << "Thread " << GetCurrentThreadId() << ": Waiting to read from input buffer..." << endl;
while(true)
{
waitResult = WaitForSingleObject(fMutex, INFINITE);
switch(waitResult)
{
// The thread got ownership of the mutex
case WAIT_OBJECT_0:
//grab first two frames from buffer (inputBuffer[0-1]) and process them
if(filterOutputBuffer.size() > 1)
{
frame1 = filterOutputBuffer[0];
frame2 = filterOutputBuffer[1];
filterOutputBuffer.erase(filterOutputBuffer.begin());
}
else
{
if(!ReleaseMutex(fMutex))
cout << "Thread " << GetCurrentThreadId() << ": Error releasing filter mutex..." << endl;
//else cout << "Thread " << GetCurrentThreadId() << ": Input Buffer empty!" << endl;
continue;
}
break;
default:
cout << "Thread " << GetCurrentThreadId() << ": Error acquiring input mutex..." << endl;
continue;
}
if(!ReleaseMutex(fMutex))
{
cout << "Thread " << GetCurrentThreadId() << ": Error releasing input mutex..." << endl;
}
...
//Do optical flow stuff
...
waitResult = WaitForSingleObject(oMutex, INFINITE);
switch(waitResult)
{
// The thread got ownership of the mutex
case WAIT_OBJECT_0:
//cout << "Thread " << GetCurrentThreadId() << ": WRITING TO OUTPUT BUFFER..." << endl;
ofOutputBuffer.push_back(*frame1_3C);
break;
default:
cout << "Thread " << GetCurrentThreadId() << ": Error acquiring output mutex..." << endl;
}
if(!ReleaseMutex(oMutex))
cout << "Thread " << GetCurrentThreadId() << ": Error releasing output mutex..." << endl;
}
cout << "Thread " << GetCurrentThreadId() << ": Exiting..." << endl;
}
void filterImage()
{
DWORD waitResult;
...
//start grabbing frames from the vector inputBuffer
cout << "Thread " << GetCurrentThreadId() << ": Waiting to read from input buffer..." << endl;
while(true)
{
waitResult = WaitForSingleObject(hMutex, INFINITE);
switch(waitResult)
{
// The thread got ownership of the mutex
case WAIT_OBJECT_0:
//grab first frame and then release mutex
if(inputBuffer.size() > 0)
{
frame = inputBuffer[0];
inputBuffer.erase(inputBuffer.begin());
}
else
{
if(!ReleaseMutex(hMutex))
cout << "Thread " << GetCurrentThreadId() << ": Error releasing input mutex..." << endl;
//else cout << "Thread " << GetCurrentThreadId() << ": Input Buffer empty!" << endl;
continue;
}
break;
default:
cout << "Thread " << GetCurrentThreadId() << ": Error acquiring input mutex..." << endl;
continue;
}
if(!ReleaseMutex(hMutex))
{
cout << "Thread " << GetCurrentThreadId() << ": Error releasing input mutex..." << endl;
}
...
//Tresholding Image Stuff
...
//cout << "Thread " << GetCurrentThreadId() << ": Waiting to write to output buffer..." << endl;
waitResult = WaitForSingleObject(fMutex, INFINITE);
switch(waitResult)
{
// The thread got ownership of the mutex
case WAIT_OBJECT_0:
//cout << "Thread " << GetCurrentThreadId() << ": WRITING TO OUTPUT BUFFER..." << endl;
filterOutputBuffer.push_back(*out);
break;
default:
cout << "Thread " << GetCurrentThreadId() << ": Error acquiring filter mutex..." << endl;
}
if(!ReleaseMutex(fMutex))
cout << "Thread " << GetCurrentThreadId() << ": Error releasing filter mutex..." << endl;
}
}
void displayImage()
{
DWORD waitResult;
IplImage final;
int c;
cvNamedWindow("Image", CV_WINDOW_AUTOSIZE);
//start grabbing frames from the vector ouputBuffer
cout << "Thread " << GetCurrentThreadId() << ": Waiting to read from output buffer..." << endl;
while (true)
{
waitResult = WaitForSingleObject(oMutex, INFINITE);
switch(waitResult)
{
// The thread got ownership of the mutex
case WAIT_OBJECT_0:
if(ofOutputBuffer.size() > 0)
{
//cout << "Thread " << GetCurrentThreadId() << ": Reading output buffer..." << endl;
final = ofOutputBuffer[0];
ofOutputBuffer.erase(ofOutputBuffer.begin());
}
else
{
if(!ReleaseMutex(oMutex))
cout << "Thread " << GetCurrentThreadId() << ": Error releasing output mutex..." << endl;
//else cout << "Thread " << GetCurrentThreadId() << ": Output Buffer is empty!" << endl;
continue;
}
break;
default:
cout << "Thread " << GetCurrentThreadId() << ": Error acquiring output mutex..." << endl;
continue;
}
if(!ReleaseMutex(oMutex))
cout << "Thread " << GetCurrentThreadId() << ": Error releasing input mutex..." << endl;
//else cout << "Thread " << GetCurrentThreadId() << ": Done reading output buffer, mutex Released!" << endl;
//cout << "Thread " << GetCurrentThreadId() << ": Displaying Image..." << endl;
cvShowImage("Image", &final);
c = cvWaitKey(1);
}
cout << "Thread " << GetCurrentThreadId() << ": Exiting..." << endl;
}
这是主函数:
void main()
{
hMutex = CreateMutex(NULL, FALSE, NULL);
oMutex = CreateMutex(NULL, FALSE, NULL);
fMutex = CreateMutex(NULL, FALSE, NULL);
hDoneGettingFrame = CreateEvent(NULL, TRUE, FALSE, NULL);
hDoneReadingFrame = CreateEvent(NULL, TRUE, FALSE, NULL);
TName[0]= CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)writeBuffer, NULL, 0, &ThreadID);
TName[1]= CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)filterImage, NULL, 0, &ThreadID);
TName[2]= CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)opticalFlow, NULL, 0, &ThreadID);
TName[3]= CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)displayImage, NULL, 0, &ThreadID);
WaitForMultipleObjects(4, TName, TRUE, INFINITE);
CloseHandle(TName);
}
I have a multithreaded openCV program that uses 4 threads to do the following:
Thread 1->calls cvQueryFrame()
which grabs the frame images from the camera one by one and stores them into a std::vector
inputBuffer
Thread 2->performs thresholding on inputBuffer[0]
, copies result to another std::vector
called filterOutputBuffer
Thread 3->performs optical flow algorithm / draws flow field for the first two elements in filterOutputBuffer, copies result to another std::vector
called ofOutputBuffer
Thread 4->displays the image using cvShowImage(ofOutputBuffer[0])
So essentially I was envisioning each thread performing the task on the first element of the corresponding input vector/buffer and storing the result at the back of the corresponding output vector. Sort of like 3 factory workers doing their part on the assembly line, then throwing the end result into a bucket for the next guy.
I setup mutexes for all of the buffers and the program works, only the output is delayed several seconds from the live camera stream.
I ran a non-multithreaded version of the same program (that used one giant while(true) loop) and it ran in real-time with only the occasional stutter.
Why is my concurrent implementation delayed in performance so much?
Below are the thread functions:
void writeBuffer()
{
cout << "Thread " << GetCurrentThreadId() << ": Capturing frame from camera!" << endl;
CvCapture *capture = 0;
IplImage *frame = 0;
DWORD waitResult;
if (!(capture = cvCaptureFromCAM(0)))
cout << "Cannot initialize camera!" << endl;
//now start grabbing frames and storing into the vector inputBuffer
while (true)
{
//cout << "Thread " << GetCurrentThreadId() << ": Waiting for mutex to write to input buffer!..." << endl;
waitResult = WaitForSingleObject(hMutex, INFINITE);
switch(waitResult)
{
// The thread got ownership of the mutex
case WAIT_OBJECT_0:
frame = cvQueryFrame(capture); //store the image into frame
if(!frame)
{
cout << "Thread " << GetCurrentThreadId() << ": Error capturing frame from camera!" << endl;
}
//cout << "Thread " << GetCurrentThreadId() << ": Getting Frame..." << endl;
inputBuffer.push_back(*frame);
break;
default:
cout << "Thread " << GetCurrentThreadId() << ": Error acquiring mutex..." << endl;
}
if(!ReleaseMutex(hMutex))
{
cout << "Thread " << GetCurrentThreadId() << ": Error releasing mutex..." << endl;
}
//else cout << "Thread " << GetCurrentThreadId() << ": Done writing to input buffer, Mutex Released!" << endl;
//signal hDoneGettingFrame
PulseEvent(hDoneGettingFrame);
}
cout << "Thread " << GetCurrentThreadId() << ": Exiting..." << endl;
}
void opticalFlow()
{
...
DWORD waitResult;
//start grabbing frames from the vector inputBuffer
cout << "Thread " << GetCurrentThreadId() << ": Waiting to read from input buffer..." << endl;
while(true)
{
waitResult = WaitForSingleObject(fMutex, INFINITE);
switch(waitResult)
{
// The thread got ownership of the mutex
case WAIT_OBJECT_0:
//grab first two frames from buffer (inputBuffer[0-1]) and process them
if(filterOutputBuffer.size() > 1)
{
frame1 = filterOutputBuffer[0];
frame2 = filterOutputBuffer[1];
filterOutputBuffer.erase(filterOutputBuffer.begin());
}
else
{
if(!ReleaseMutex(fMutex))
cout << "Thread " << GetCurrentThreadId() << ": Error releasing filter mutex..." << endl;
//else cout << "Thread " << GetCurrentThreadId() << ": Input Buffer empty!" << endl;
continue;
}
break;
default:
cout << "Thread " << GetCurrentThreadId() << ": Error acquiring input mutex..." << endl;
continue;
}
if(!ReleaseMutex(fMutex))
{
cout << "Thread " << GetCurrentThreadId() << ": Error releasing input mutex..." << endl;
}
...
//Do optical flow stuff
...
waitResult = WaitForSingleObject(oMutex, INFINITE);
switch(waitResult)
{
// The thread got ownership of the mutex
case WAIT_OBJECT_0:
//cout << "Thread " << GetCurrentThreadId() << ": WRITING TO OUTPUT BUFFER..." << endl;
ofOutputBuffer.push_back(*frame1_3C);
break;
default:
cout << "Thread " << GetCurrentThreadId() << ": Error acquiring output mutex..." << endl;
}
if(!ReleaseMutex(oMutex))
cout << "Thread " << GetCurrentThreadId() << ": Error releasing output mutex..." << endl;
}
cout << "Thread " << GetCurrentThreadId() << ": Exiting..." << endl;
}
void filterImage()
{
DWORD waitResult;
...
//start grabbing frames from the vector inputBuffer
cout << "Thread " << GetCurrentThreadId() << ": Waiting to read from input buffer..." << endl;
while(true)
{
waitResult = WaitForSingleObject(hMutex, INFINITE);
switch(waitResult)
{
// The thread got ownership of the mutex
case WAIT_OBJECT_0:
//grab first frame and then release mutex
if(inputBuffer.size() > 0)
{
frame = inputBuffer[0];
inputBuffer.erase(inputBuffer.begin());
}
else
{
if(!ReleaseMutex(hMutex))
cout << "Thread " << GetCurrentThreadId() << ": Error releasing input mutex..." << endl;
//else cout << "Thread " << GetCurrentThreadId() << ": Input Buffer empty!" << endl;
continue;
}
break;
default:
cout << "Thread " << GetCurrentThreadId() << ": Error acquiring input mutex..." << endl;
continue;
}
if(!ReleaseMutex(hMutex))
{
cout << "Thread " << GetCurrentThreadId() << ": Error releasing input mutex..." << endl;
}
...
//Tresholding Image Stuff
...
//cout << "Thread " << GetCurrentThreadId() << ": Waiting to write to output buffer..." << endl;
waitResult = WaitForSingleObject(fMutex, INFINITE);
switch(waitResult)
{
// The thread got ownership of the mutex
case WAIT_OBJECT_0:
//cout << "Thread " << GetCurrentThreadId() << ": WRITING TO OUTPUT BUFFER..." << endl;
filterOutputBuffer.push_back(*out);
break;
default:
cout << "Thread " << GetCurrentThreadId() << ": Error acquiring filter mutex..." << endl;
}
if(!ReleaseMutex(fMutex))
cout << "Thread " << GetCurrentThreadId() << ": Error releasing filter mutex..." << endl;
}
}
void displayImage()
{
DWORD waitResult;
IplImage final;
int c;
cvNamedWindow("Image", CV_WINDOW_AUTOSIZE);
//start grabbing frames from the vector ouputBuffer
cout << "Thread " << GetCurrentThreadId() << ": Waiting to read from output buffer..." << endl;
while (true)
{
waitResult = WaitForSingleObject(oMutex, INFINITE);
switch(waitResult)
{
// The thread got ownership of the mutex
case WAIT_OBJECT_0:
if(ofOutputBuffer.size() > 0)
{
//cout << "Thread " << GetCurrentThreadId() << ": Reading output buffer..." << endl;
final = ofOutputBuffer[0];
ofOutputBuffer.erase(ofOutputBuffer.begin());
}
else
{
if(!ReleaseMutex(oMutex))
cout << "Thread " << GetCurrentThreadId() << ": Error releasing output mutex..." << endl;
//else cout << "Thread " << GetCurrentThreadId() << ": Output Buffer is empty!" << endl;
continue;
}
break;
default:
cout << "Thread " << GetCurrentThreadId() << ": Error acquiring output mutex..." << endl;
continue;
}
if(!ReleaseMutex(oMutex))
cout << "Thread " << GetCurrentThreadId() << ": Error releasing input mutex..." << endl;
//else cout << "Thread " << GetCurrentThreadId() << ": Done reading output buffer, mutex Released!" << endl;
//cout << "Thread " << GetCurrentThreadId() << ": Displaying Image..." << endl;
cvShowImage("Image", &final);
c = cvWaitKey(1);
}
cout << "Thread " << GetCurrentThreadId() << ": Exiting..." << endl;
}
Here is the main function:
void main()
{
hMutex = CreateMutex(NULL, FALSE, NULL);
oMutex = CreateMutex(NULL, FALSE, NULL);
fMutex = CreateMutex(NULL, FALSE, NULL);
hDoneGettingFrame = CreateEvent(NULL, TRUE, FALSE, NULL);
hDoneReadingFrame = CreateEvent(NULL, TRUE, FALSE, NULL);
TName[0]= CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)writeBuffer, NULL, 0, &ThreadID);
TName[1]= CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)filterImage, NULL, 0, &ThreadID);
TName[2]= CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)opticalFlow, NULL, 0, &ThreadID);
TName[3]= CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)displayImage, NULL, 0, &ThreadID);
WaitForMultipleObjects(4, TName, TRUE, INFINITE);
CloseHandle(TName);
}
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。
绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论
评论(4)
多线程应用程序在线程之间共享 CPU 时间。因此,当另一个线程想要处于运行状态时,存在上下文切换。线程之间的切换可能会增加 CPU 时间,从而导致应用程序变慢。
Multithreaded application shares the CPU times between threads. Therefore, there exists context switches when another thread wanted to be in run state. Possibly, switching between threads increase the cpu time which results the application become slower.
尝试使用 threadPool,它将最大限度地减少 CPU 在线程之间移动所消耗的时间。
Try to use threadPool it will minimize the time consumed from CPU to travel between threads.
好吧,首先,如果我稍微滚动一下您的第一个线程函数的循环:
换句话说,您的第一个线程几乎在第一个线程循环的整个运行中都保留队列互斥体,因此阻止了第二个线程从任何地方。我猜所有其他线程的代码都是相同的?
当在管道中的生产者-消费者队列中推送数据时,您应该只在最短的时间内保持队列锁定。对缓冲区对象进行处理,然后锁定队列,推送对象引用,然后立即解锁队列。然后发出信号量(或类似信号),以便下一个线程可以在可以时处理该对象。
不要让队列被锁定!互斥体不应该在那里让其他线程等待工作 - 它是为了保护队列免受多重访问。您需要一些其他信号来维护队列计数并使线程等待工作。无论您在网上看到多少其他示例,都不要为此使用事件 - 如果您必须滚动自己的生产者-消费者队列,请使用信号量。
更好 - 使用已经可以工作的 PC 队列类 - 查看 BlockingCollections 类。
Well, to start with, if I roll the loop of your first thread function around a bit:
To put it another way, your first thread holds onto the queue mutex for almost the entire run of the first thread loop, so preventing the second thread from getting anywhere. I'm guessing that all the other threads' code is the same?
When pushing data round producer-consumer queues in a pipeline, the idea is that you should keep the queue locked for the minimum time only. Do your processing on a buffer object, then lock the queue, push on the object reference and then immediately unlock the queue. Then signal a semaphore, (or the like), so that the next thread can process the object when it can.
Don't keep the queue locked! The mutex should not be there for the other thread to wait on for work - it's to protect the queue from multiple access. You need some other signaling to maintain the queue count and for the threads to wait on for work. Don't use an event for that, no matter how many other examples you've seen on the net - use a semaphore if you have to roll your own producer-consumer queue.
Better - use a P-C queue class that already works - look at the BlockingCollections classes.
信号量成功了!我没有使用单独的互斥体,而是创建了一个信号量并让所有线程通过它工作。
谢谢,现在运行得又快又流畅!
并且在线程中...
Semaphores did the trick! Instead of using separate mutexes, I just created a semaphore and let all the threads work through that.
Thanks, it's running fast and smooth now!
And in the threads...