C++ OpenMP广度首次搜索
我正在使用 C++ 实现广度优先搜索。 我的实现看起来像这样;
BFS.h
#pragma once
#include "Includes.h"
class BFS {
int _num_v;
int _num_l;
std::vector<std::vector<int>> _adj_list;
//std::vector<std::pair<int, int>>* _adj_list;
//std::list<int>* adj_list;
public:
std::vector<int> m_sbfs_results;
std::vector<int> m_pbfs_results;
std::vector<int> m_abfs_results;
BFS(int v, int l)
{
this->_num_v = v;
this->_num_l = l;
this->_adj_list.resize(_num_l);
this->m_sbfs_results.reserve(_num_v);
this->m_pbfs_results.reserve(_num_v);
this->m_abfs_results.reserve(_num_v);
//_adj_list = new std::vector<std::pair<int, int>>[_num_v];
//adj_list = new std::list<int>[num_v];
}
void add_edge(int v1, int v2/*, int w*/);
void sbfs(int start);
void pbfs(int start, int n_threads);
void abfs(int start, int n_threads);
void verify_bfs(std::vector<int> a, std::vector<int> b)
{
for (int i = 0; i < _num_v; i++)
{
if (a[i] != b[i])
{
std::cout << "a[" << i << "]=" << a[i] <<
" b[" << i << "]=" << b[i] << "\n";
}
}
}
};
BFS.cpp
#include "BFS.h"
#include <unordered_map>
void BFS::add_edge(int v, int w)
{
//std::cout << "v: " << v << " w: " << w << std::endl;
_adj_list.at(v).push_back(w);
_adj_list.at(w).push_back(v);
}
void BFS::sbfs(int start)
{
std::unordered_map<int, bool> visited(_num_l);
for (int i = 0; i < _num_l; i++)
visited[i] = false;
std::queue<int> queue;
visited[start] = true;
queue.push(start);
std::cout << "Sequential BFS starting from node " << start << "\n";
//std::vector<int>::iterator i;
while (!queue.empty())
{
start = queue.front();
//std::cout << start << " ";
m_sbfs_results.push_back(start);
queue.pop();
for (auto& node : _adj_list[start])
{
if (!visited[node])
{
visited[node] = true;
queue.push(node);
}
}
//for (i = _adj_list[start].begin();
// i != _adj_list[start].end();
// ++i)
//{
// if (!visited[*i])
// {
// queue.push(*i);
// visited[*i] = true;
// }
//}
}
}
void BFS::pbfs(int start, int n_threads)
{
omp_set_num_threads(n_threads);
std::unordered_map<int, bool> visited(_num_l);
#pragma omp for nowait
for (int i = 0; i < _num_l; i++)
visited[i] = false;
std::queue<int> local_queue; // T
std::queue<int> queue; // S
visited[start] = true;
queue.push(start);
std::cout << "Parallel BFS starting from node " << start << "\n";
/*
1. Each thread should store locally all the new verticies it discovers in T (queue)
2. Place them all in common array S (queue)
3. Go again!
*/
while (!queue.empty())
{
#pragma omp parallel firstprivate(local_queue)
{
#pragma omp critical
for (int i = 0; i < queue.size(); i++)
{
if (!queue.empty())
{
start = queue.front();
//std::cout << start << " ";
m_pbfs_results.push_back(start);
queue.pop();
}
}
#pragma omp barrier
#pragma omp for schedule(static) nowait
for (int i = 0; i < _adj_list[start].size(); i++)
{
int node = _adj_list[start][i];
if (!visited[node])
{
visited[node] = true;
local_queue.push(node);
}
}
#pragma omp barrier
#pragma omp critical
for (int i = 0; i < local_queue.size(); i++)
{
queue.push(local_queue.front());
local_queue.pop();
}
}
}
}
每次我尝试运行并行 BFS 时,放入 m_pbfs_results
中的数字量都是不同的。一次运行,向量中可能有 421 个项目,74、302,...顺序运行将始终在 4194299 个项目上具有相同的结果。 当单步执行代码时,数字看起来都很好。由于顶点太多,我无法真正遍历整个列表。我知道 Push_back 不是线程安全的,但这在这里并不重要,因为它位于关键部分内。
太长了;博士 为什么关键部分中的 *_results.push_back(start);
不能按预期工作?
I'm working on an implementation of breadth first search using C++.
My implementation looks something like;
BFS.h
#pragma once
#include "Includes.h"
class BFS {
int _num_v;
int _num_l;
std::vector<std::vector<int>> _adj_list;
//std::vector<std::pair<int, int>>* _adj_list;
//std::list<int>* adj_list;
public:
std::vector<int> m_sbfs_results;
std::vector<int> m_pbfs_results;
std::vector<int> m_abfs_results;
BFS(int v, int l)
{
this->_num_v = v;
this->_num_l = l;
this->_adj_list.resize(_num_l);
this->m_sbfs_results.reserve(_num_v);
this->m_pbfs_results.reserve(_num_v);
this->m_abfs_results.reserve(_num_v);
//_adj_list = new std::vector<std::pair<int, int>>[_num_v];
//adj_list = new std::list<int>[num_v];
}
void add_edge(int v1, int v2/*, int w*/);
void sbfs(int start);
void pbfs(int start, int n_threads);
void abfs(int start, int n_threads);
void verify_bfs(std::vector<int> a, std::vector<int> b)
{
for (int i = 0; i < _num_v; i++)
{
if (a[i] != b[i])
{
std::cout << "a[" << i << "]=" << a[i] <<
" b[" << i << "]=" << b[i] << "\n";
}
}
}
};
BFS.cpp
#include "BFS.h"
#include <unordered_map>
void BFS::add_edge(int v, int w)
{
//std::cout << "v: " << v << " w: " << w << std::endl;
_adj_list.at(v).push_back(w);
_adj_list.at(w).push_back(v);
}
void BFS::sbfs(int start)
{
std::unordered_map<int, bool> visited(_num_l);
for (int i = 0; i < _num_l; i++)
visited[i] = false;
std::queue<int> queue;
visited[start] = true;
queue.push(start);
std::cout << "Sequential BFS starting from node " << start << "\n";
//std::vector<int>::iterator i;
while (!queue.empty())
{
start = queue.front();
//std::cout << start << " ";
m_sbfs_results.push_back(start);
queue.pop();
for (auto& node : _adj_list[start])
{
if (!visited[node])
{
visited[node] = true;
queue.push(node);
}
}
//for (i = _adj_list[start].begin();
// i != _adj_list[start].end();
// ++i)
//{
// if (!visited[*i])
// {
// queue.push(*i);
// visited[*i] = true;
// }
//}
}
}
void BFS::pbfs(int start, int n_threads)
{
omp_set_num_threads(n_threads);
std::unordered_map<int, bool> visited(_num_l);
#pragma omp for nowait
for (int i = 0; i < _num_l; i++)
visited[i] = false;
std::queue<int> local_queue; // T
std::queue<int> queue; // S
visited[start] = true;
queue.push(start);
std::cout << "Parallel BFS starting from node " << start << "\n";
/*
1. Each thread should store locally all the new verticies it discovers in T (queue)
2. Place them all in common array S (queue)
3. Go again!
*/
while (!queue.empty())
{
#pragma omp parallel firstprivate(local_queue)
{
#pragma omp critical
for (int i = 0; i < queue.size(); i++)
{
if (!queue.empty())
{
start = queue.front();
//std::cout << start << " ";
m_pbfs_results.push_back(start);
queue.pop();
}
}
#pragma omp barrier
#pragma omp for schedule(static) nowait
for (int i = 0; i < _adj_list[start].size(); i++)
{
int node = _adj_list[start][i];
if (!visited[node])
{
visited[node] = true;
local_queue.push(node);
}
}
#pragma omp barrier
#pragma omp critical
for (int i = 0; i < local_queue.size(); i++)
{
queue.push(local_queue.front());
local_queue.pop();
}
}
}
}
Every time I try to run my parallelized BFS the amount of numbers that gets put into m_pbfs_results
are different. One run I could have 421 items in the vector, 74, 302, ... The sequential one will always have the same result at 4194299 items.
When stepping through the code the numbers seems to all look fine. I can't really step through the entire list as there are so many vertices. I know that push_back isn't thread safe, but that shouldn't matter here as it is within a critical section.
tl;dr
Why doesn't *_results.push_back(start);
within a critical section work as expected?
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论