一个简单的MPI程序

发布于 2024-11-29 16:49:52 字数 1310 浏览 5 评论 0原文

如果有人告诉我为什么当 n=40（第 20 行）的值时，这个简单的 MPI 发送和接收代码不能在两个处理器上运行，但适用于 n <= 时，我将不胜感激。 30.换句话说，如果消息大小超过特定数字（不是那么大，大约是大小为 8100 的一维数组），MPI 就会死锁。

#include "mpi.h"
#include "stdio.h"
#include "stdlib.h"
#include "iostream"
#include "math.h"
using namespace std;

int main(int argc, char *argv[])
{
    int processor_count, processor_rank;
    double *buff_H, *buff_send_H;
    int N_pa_prim1, l, n, N_p0;
    MPI_Status status;

    MPI_Init (&argc, &argv);
    MPI_Comm_size (MPI_COMM_WORLD, &processor_count);
    MPI_Comm_rank (MPI_COMM_WORLD, &processor_rank);

    N_pa_prim1=14; l=7; n=40; N_p0=7;
    buff_H = new double [n*n*N_p0+1];          //Receive buffer allocation

    buff_send_H = new double [n*n*N_p0+1];     //Send buffer allocation

    for (int j = 0; j < n*n*N_p0+1; j++)
        buff_send_H[j] = 1e-8*rand();

    if (processor_rank == 0)
        MPI_Send(buff_send_H, n*n*N_p0+1, MPI_DOUBLE, 1, 163, MPI_COMM_WORLD);  
else if(processor_rank == 1)
    MPI_Send(buff_send_H, n*n*N_p0+1, MPI_DOUBLE, 0, 163, MPI_COMM_WORLD);

    MPI_Recv(buff_H, n*n*N_p0+1, MPI_DOUBLE, MPI_ANY_SOURCE, 163, MPI_COMM_WORLD, &status);
cout << "Received successfully by " << processor_rank << endl;
MPI_Finalize();
return 0;
}

原文

I appreciate it if somebody tell me why this simple MPI send and receive code doesn't run on two processors, when the value of n=40(at line 20), but works for n <=30. In other words, if the message size goes beyond an specific number (which is not that large, roughly a 1-D array of size 8100) the MPI deadlocks.

#include "mpi.h"
#include "stdio.h"
#include "stdlib.h"
#include "iostream"
#include "math.h"
using namespace std;

int main(int argc, char *argv[])
{
    int processor_count, processor_rank;
    double *buff_H, *buff_send_H;
    int N_pa_prim1, l, n, N_p0;
    MPI_Status status;

    MPI_Init (&argc, &argv);
    MPI_Comm_size (MPI_COMM_WORLD, &processor_count);
    MPI_Comm_rank (MPI_COMM_WORLD, &processor_rank);

    N_pa_prim1=14; l=7; n=40; N_p0=7;
    buff_H = new double [n*n*N_p0+1];          //Receive buffer allocation

    buff_send_H = new double [n*n*N_p0+1];     //Send buffer allocation

    for (int j = 0; j < n*n*N_p0+1; j++)
        buff_send_H[j] = 1e-8*rand();

    if (processor_rank == 0)
        MPI_Send(buff_send_H, n*n*N_p0+1, MPI_DOUBLE, 1, 163, MPI_COMM_WORLD);  
else if(processor_rank == 1)
    MPI_Send(buff_send_H, n*n*N_p0+1, MPI_DOUBLE, 0, 163, MPI_COMM_WORLD);

    MPI_Recv(buff_H, n*n*N_p0+1, MPI_DOUBLE, MPI_ANY_SOURCE, 163, MPI_COMM_WORLD, &status);
cout << "Received successfully by " << processor_rank << endl;
MPI_Finalize();
return 0;
}

分享到QQ

分享到微博

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

需要登录才能够评论，你可以免费注册一个本站的账号。

谢绝鈎搭 2024-12-06 16:49:52

死锁是正确的行为；你的代码出现死锁。

MPI 规范允许MPI_Send 表现为MPI_Ssend——即阻塞。阻塞通信原语直到通信在某种意义上“完成”后才会返回，这（在阻塞发送的情况下）可能意味着接收已经开始。

您的代码如下所示：

If Processor 0:
   Send to processor 1

If Processor 1:
   Send to processor 0

Receive

也就是说，在发送完成之前接收不会开始。你正在发送，但他们永远不会返回，因为没有人接收！（事实上，这适用于小消息是一个实现工件 - 大多数 mpi 实现使用所谓的“eager 协议”来处理“足够小的”消息；但这通常不能指望。）

请注意，有这里还有其他逻辑错误——对于超过 2 个处理器，该程序也会死锁，因为等级 >= 2 的处理器将等待一条永远不会到来的消息。

您可以通过按等级交替发送和接收来修复程序：

if (processor_rank == 0) {
    MPI_Send(buff_send_H, n*n*N_p0+1, MPI_DOUBLE, 1, 163, MPI_COMM_WORLD);  
    MPI_Recv(buff_H, n*n*N_p0+1, MPI_DOUBLE, MPI_ANY_SOURCE, 163, MPI_COMM_WORLD, &status);
} else if (processor_rank == 1) {
    MPI_Recv(buff_H, n*n*N_p0+1, MPI_DOUBLE, MPI_ANY_SOURCE, 163, MPI_COMM_WORLD, &status);
    MPI_Send(buff_send_H, n*n*N_p0+1, MPI_DOUBLE, 0, 163, MPI_COMM_WORLD);
}

或使用 MPI_Sendrecv（这是阻塞（发送 + 接收），而不是阻塞发送 + 阻塞接收）：

int sendto;
if (processor_rank == 0)
    sendto = 1;
else if (processor_rank == 1)
    sendto = 0;

if (processor_rank == 0 || processor_rank == 1) {
    MPI_Sendrecv(buff_send_H, n*n*N_p0+1, MPI_DOUBLE, sendto, 163,
                 buff_H, n*n*N_p0+1, MPI_DOUBLE, MPI_ANY_SOURCE, 163,
                 MPI_COMM_WORLD, &status);
}

或使用非阻塞发送和接收：

MPI_Request reqs[2];
MPI_Status  statuses[2];
if (processor_rank == 0) {
    MPI_Isend(buff_send_H, n*n*N_p0+1, MPI_DOUBLE, 1, 163, MPI_COMM_WORLD, &reqs[0]);
} else if (processor_rank == 1) {
    MPI_Isend(buff_send_H, n*n*N_p0+1, MPI_DOUBLE, 0, 163, MPI_COMM_WORLD, &reqs[0]);
}

if (processor_rank == 0 || processor_rank == 1)
    MPI_Irecv(buff_H, n*n*N_p0+1, MPI_DOUBLE, MPI_ANY_SOURCE, 163, MPI_COMM_WORLD, &reqs[1]);

MPI_Waitall(2, reqs, statuses);

The deadlocking is correct behaviour; you have a deadlock in your code.

The MPI Specification allows MPI_Send to behave as MPI_Ssend -- that is, to be blocking. A blocking communications primitive does not return until the communications "have completed" in some sense, which (in the case of a blocking send) probably means the receive has started.

Your code looks like:

If Processor 0:
   Send to processor 1

If Processor 1:
   Send to processor 0

Receive

That is -- the receive doesn't start until the sends have completed. You're sending, but they'll never return, because no one is receiving! (The fact that this works for small messages is an implementation artifact - most mpi implementations use so called a so-called "eager protocol" for "small enough" messages; but this can't be counted upon in general.)

Note that there are other logic errors here, too -- this program will also deadlock for more than 2 processors, as processors of rank >= 2 will be waiting for a message which never comes.

You can fix your program by alternating sends and receives by rank:

if (processor_rank == 0) {
    MPI_Send(buff_send_H, n*n*N_p0+1, MPI_DOUBLE, 1, 163, MPI_COMM_WORLD);  
    MPI_Recv(buff_H, n*n*N_p0+1, MPI_DOUBLE, MPI_ANY_SOURCE, 163, MPI_COMM_WORLD, &status);
} else if (processor_rank == 1) {
    MPI_Recv(buff_H, n*n*N_p0+1, MPI_DOUBLE, MPI_ANY_SOURCE, 163, MPI_COMM_WORLD, &status);
    MPI_Send(buff_send_H, n*n*N_p0+1, MPI_DOUBLE, 0, 163, MPI_COMM_WORLD);
}

or by using MPI_Sendrecv (which is a blocking (send + receive), rather than a blocking send + a blocking receive):

int sendto;
if (processor_rank == 0)
    sendto = 1;
else if (processor_rank == 1)
    sendto = 0;

if (processor_rank == 0 || processor_rank == 1) {
    MPI_Sendrecv(buff_send_H, n*n*N_p0+1, MPI_DOUBLE, sendto, 163,
                 buff_H, n*n*N_p0+1, MPI_DOUBLE, MPI_ANY_SOURCE, 163,
                 MPI_COMM_WORLD, &status);
}

Or by using non-blocking sends and receives:

MPI_Request reqs[2];
MPI_Status  statuses[2];
if (processor_rank == 0) {
    MPI_Isend(buff_send_H, n*n*N_p0+1, MPI_DOUBLE, 1, 163, MPI_COMM_WORLD, &reqs[0]);
} else if (processor_rank == 1) {
    MPI_Isend(buff_send_H, n*n*N_p0+1, MPI_DOUBLE, 0, 163, MPI_COMM_WORLD, &reqs[0]);
}

if (processor_rank == 0 || processor_rank == 1)
    MPI_Irecv(buff_H, n*n*N_p0+1, MPI_DOUBLE, MPI_ANY_SOURCE, 163, MPI_COMM_WORLD, &reqs[1]);

MPI_Waitall(2, reqs, statuses);

回复收藏 0 原文

云之铃。 2024-12-06 16:49:52

谢谢乔纳森的帮助。在这里，我选择了第三种解决方案，并编写了与您类似的代码，只是添加了“for”循环来发送大量消息。这次没有陷入僵局；然而处理器仍然只接收最后一条消息。（由于消息很长，我只打印了它们的最后一个元素来检查一致性）

#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include <iostream>
#include <math.h>
using namespace std;

int main(int argc, char *argv[])
{
int processor_count, processor_rank;

//Initialize MPI
MPI_Init (&argc, &argv);
MPI_Comm_size (MPI_COMM_WORLD, &processor_count);
MPI_Comm_rank (MPI_COMM_WORLD, &processor_rank);

double **buff_H, *buff_send_H;
int N_pa_prim1, l, n, N_p0, count, temp;
N_pa_prim1=5; l=7; n=50; N_p0=7;

MPI_Request reqs[N_pa_prim1];
MPI_Status  statuses[N_pa_prim1];
buff_H = new double *[N_pa_prim1];                 //Receive buffer allocation
for (int i = 0; i < N_pa_prim1; i++)
    buff_H[i] = new double [n*n*N_p0+1];  
buff_send_H = new double [n*n*N_p0+1];             //Send buffer allocation

if (processor_rank == 0) {
    for (int i = 0; i < N_pa_prim1; i++){
        for (int j = 0; j < n*n*N_p0+1; j++)
            buff_send_H[j] = 2.0325e-8*rand();

        cout << processor_rank << "\t" << buff_send_H[n*n*N_p0] << "\t" << "Send" << "\t" << endl;  
        MPI_Isend(buff_send_H, n*n*N_p0+1, MPI_DOUBLE, 1, 163, MPI_COMM_WORLD, &reqs[i]);
    }
}
else if (processor_rank == 1) {
    for (int i = 0; i < N_pa_prim1; i++){
        for (int j = 0; j < n*n*N_p0+1; j++)
            buff_send_H[j] = 3.5871e-8*rand();

        cout << processor_rank << "\t" << buff_send_H[n*n*N_p0] << "\t" << "Send" << "\t" << endl;  
        MPI_Isend(buff_send_H, n*n*N_p0+1, MPI_DOUBLE, 0, 163, MPI_COMM_WORLD, &reqs[i]);
    }
}

for (int i = 0; i < N_pa_prim1; i++)
    MPI_Irecv(buff_H[i], n*n*N_p0+1, MPI_DOUBLE, MPI_ANY_SOURCE, 163, MPI_COMM_WORLD, &reqs[N_pa_prim1+i]);

MPI_Waitall(2*N_pa_prim1, reqs, statuses);

for (int i = 0; i < N_pa_prim1; i++)
    cout << processor_rank << "\t" << buff_H[i][n*n*N_p0] << "\t" << "Receive" << endl;

MPI_Finalize();
return 0;

}

Thank you Jonathan for your help. Here I have chosen the third solution and written a similar code to yours except adding "for" loops to send a number of messages. This time it doesn't deadlock; however processors keep on receiving only the last message. (since the messages are long, I've only printed their last elements to check the consistency)

#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include <iostream>
#include <math.h>
using namespace std;

int main(int argc, char *argv[])
{
int processor_count, processor_rank;

//Initialize MPI
MPI_Init (&argc, &argv);
MPI_Comm_size (MPI_COMM_WORLD, &processor_count);
MPI_Comm_rank (MPI_COMM_WORLD, &processor_rank);

double **buff_H, *buff_send_H;
int N_pa_prim1, l, n, N_p0, count, temp;
N_pa_prim1=5; l=7; n=50; N_p0=7;

MPI_Request reqs[N_pa_prim1];
MPI_Status  statuses[N_pa_prim1];
buff_H = new double *[N_pa_prim1];                 //Receive buffer allocation
for (int i = 0; i < N_pa_prim1; i++)
    buff_H[i] = new double [n*n*N_p0+1];  
buff_send_H = new double [n*n*N_p0+1];             //Send buffer allocation

if (processor_rank == 0) {
    for (int i = 0; i < N_pa_prim1; i++){
        for (int j = 0; j < n*n*N_p0+1; j++)
            buff_send_H[j] = 2.0325e-8*rand();

        cout << processor_rank << "\t" << buff_send_H[n*n*N_p0] << "\t" << "Send" << "\t" << endl;  
        MPI_Isend(buff_send_H, n*n*N_p0+1, MPI_DOUBLE, 1, 163, MPI_COMM_WORLD, &reqs[i]);
    }
}
else if (processor_rank == 1) {
    for (int i = 0; i < N_pa_prim1; i++){
        for (int j = 0; j < n*n*N_p0+1; j++)
            buff_send_H[j] = 3.5871e-8*rand();

        cout << processor_rank << "\t" << buff_send_H[n*n*N_p0] << "\t" << "Send" << "\t" << endl;  
        MPI_Isend(buff_send_H, n*n*N_p0+1, MPI_DOUBLE, 0, 163, MPI_COMM_WORLD, &reqs[i]);
    }
}

for (int i = 0; i < N_pa_prim1; i++)
    MPI_Irecv(buff_H[i], n*n*N_p0+1, MPI_DOUBLE, MPI_ANY_SOURCE, 163, MPI_COMM_WORLD, &reqs[N_pa_prim1+i]);

MPI_Waitall(2*N_pa_prim1, reqs, statuses);

for (int i = 0; i < N_pa_prim1; i++)
    cout << processor_rank << "\t" << buff_H[i][n*n*N_p0] << "\t" << "Receive" << endl;

MPI_Finalize();
return 0;

}

回复收藏 0 原文

~没有更多了~