如何使用 Hockney 模型参数创建 MPI 性能模型？

发布于 2025-01-16 07:56:46 字数 4414 浏览 3 评论 0原文

据我了解，参数 α 和 β 可以在霍克尼模型中使用来表示点对点通信中的延迟和带宽，其中 m 表示消息大小。例如：

T(m) = α + β · m

我一直在尝试使用这种技术对一些 OpenMPI 算法进行建模，但无法找出以下算法 MPI_Scatter：

int
ompi_coll_base_scatter_intra_linear_nb(const void *sbuf, int scount,
                                       struct ompi_datatype_t *sdtype,
                                       void *rbuf, int rcount,
                                       struct ompi_datatype_t *rdtype,
                                       int root,
                                       struct ompi_communicator_t *comm,
                                       mca_coll_base_module_t *module,
                                       int max_reqs)
{
    int i, rank, size, err, line, nreqs;
    ptrdiff_t incr;
    char *ptmp;
    ompi_request_t **reqs = NULL, **preq;

    rank = ompi_comm_rank(comm);
    size = ompi_comm_size(comm);

    /* If not root, receive data. */
    if (rank != root) {
        err = MCA_PML_CALL(recv(rbuf, rcount, rdtype, root,
                                MCA_COLL_BASE_TAG_SCATTER,
                                comm, MPI_STATUS_IGNORE));
        if (MPI_SUCCESS != err) {
            line = __LINE__; goto err_hndl;
        }

        return MPI_SUCCESS;
    }

    if (max_reqs <= 1) {
        max_reqs = 0;
        nreqs = size - 1; /* no send for myself */
    } else {
        /* We use blocking MPI_Send (which does not need a request)
         * every max_reqs send operation (which is size/max_reqs at most),
         * therefore no need to allocate requests for these sends. */
        nreqs = size - (size / max_reqs);
    }

    reqs = ompi_coll_base_comm_get_reqs(module->base_data, nreqs);
    if (NULL == reqs) {
        err = OMPI_ERR_OUT_OF_RESOURCE;
        line = __LINE__; goto err_hndl;
    }

    err = ompi_datatype_type_extent(sdtype, &incr);
    if (OMPI_SUCCESS != err) {
        line = __LINE__; goto err_hndl;
    }
    incr *= scount;

    /* I am the root, loop sending data. */
    for (i = 0, ptmp = (char *)sbuf, preq = reqs; i < size; ++i, ptmp += incr) {
        /* simple optimization */
        if (i == rank) {
            if (MPI_IN_PLACE != rbuf) {
                err = ompi_datatype_sndrcv(ptmp, scount, sdtype, rbuf, rcount,
                                           rdtype);
            }
        } else {
            if (!max_reqs || (i % max_reqs)) {
                err = MCA_PML_CALL(isend(ptmp, scount, sdtype, i,
                                         MCA_COLL_BASE_TAG_SCATTER,
                                         MCA_PML_BASE_SEND_STANDARD,
                                         comm, preq++));
            } else {
                err = MCA_PML_CALL(send(ptmp, scount, sdtype, i,
                                        MCA_COLL_BASE_TAG_SCATTER,
                                        MCA_PML_BASE_SEND_STANDARD,
                                        comm));
            }
        }
        if (MPI_SUCCESS != err) {
            line = __LINE__; goto err_hndl;
        }
    }

    err = ompi_request_wait_all(preq - reqs, reqs, MPI_STATUSES_IGNORE);
    if (MPI_SUCCESS != err) {
        line = __LINE__; goto err_hndl;
    }

    return MPI_SUCCESS;

err_hndl:
    if (NULL != reqs) {
        /* find a real error code */
        if (MPI_ERR_IN_STATUS == err) {
            for (i = 0; i < nreqs; i++) {
                if (MPI_REQUEST_NULL == reqs[i]) continue;
                if (MPI_ERR_PENDING == reqs[i]->req_status.MPI_ERROR) continue;
                if (reqs[i]->req_status.MPI_ERROR != MPI_SUCCESS) {
                    err = reqs[i]->req_status.MPI_ERROR;
                    break;
                }
            }
        }
        ompi_coll_base_free_reqs(reqs, nreqs);
    }
    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
                "%s:%4d\tError occurred %d, rank %2d", __FILE__, line, err, rank));
    (void)line;  /* silence compiler warning */
    return err;
}

到目前为止，通过查看代码，我了解到该模型应该是

T(NP, m) = (NP − 1) · (α + m · β).

NP 是进程数（因为 Scatter 使用除根之外的所有进程进行分发）。

这不考虑使用 MPI_Isend 发送的非阻塞发送。（根据代码片段中找到的条件）我不确定如何仅使用霍克尼模型来解释非阻塞和阻塞发送。

任何帮助将非常感激，因为我读过的有关该主题的论文似乎都没有很好地解释该过程。

原文

I understand that the parameters α and β can be used in the Hockney model to represent latency and bandwidth in peer to peer communications with m representing the message size. For example:

T(m) = α + β · m

I have been trying to model some OpenMPI algorithms using this technique and can't figure out this following algorithm for MPI_Scatter:

int
ompi_coll_base_scatter_intra_linear_nb(const void *sbuf, int scount,
                                       struct ompi_datatype_t *sdtype,
                                       void *rbuf, int rcount,
                                       struct ompi_datatype_t *rdtype,
                                       int root,
                                       struct ompi_communicator_t *comm,
                                       mca_coll_base_module_t *module,
                                       int max_reqs)
{
    int i, rank, size, err, line, nreqs;
    ptrdiff_t incr;
    char *ptmp;
    ompi_request_t **reqs = NULL, **preq;

    rank = ompi_comm_rank(comm);
    size = ompi_comm_size(comm);

    /* If not root, receive data. */
    if (rank != root) {
        err = MCA_PML_CALL(recv(rbuf, rcount, rdtype, root,
                                MCA_COLL_BASE_TAG_SCATTER,
                                comm, MPI_STATUS_IGNORE));
        if (MPI_SUCCESS != err) {
            line = __LINE__; goto err_hndl;
        }

        return MPI_SUCCESS;
    }

    if (max_reqs <= 1) {
        max_reqs = 0;
        nreqs = size - 1; /* no send for myself */
    } else {
        /* We use blocking MPI_Send (which does not need a request)
         * every max_reqs send operation (which is size/max_reqs at most),
         * therefore no need to allocate requests for these sends. */
        nreqs = size - (size / max_reqs);
    }

    reqs = ompi_coll_base_comm_get_reqs(module->base_data, nreqs);
    if (NULL == reqs) {
        err = OMPI_ERR_OUT_OF_RESOURCE;
        line = __LINE__; goto err_hndl;
    }

    err = ompi_datatype_type_extent(sdtype, &incr);
    if (OMPI_SUCCESS != err) {
        line = __LINE__; goto err_hndl;
    }
    incr *= scount;

    /* I am the root, loop sending data. */
    for (i = 0, ptmp = (char *)sbuf, preq = reqs; i < size; ++i, ptmp += incr) {
        /* simple optimization */
        if (i == rank) {
            if (MPI_IN_PLACE != rbuf) {
                err = ompi_datatype_sndrcv(ptmp, scount, sdtype, rbuf, rcount,
                                           rdtype);
            }
        } else {
            if (!max_reqs || (i % max_reqs)) {
                err = MCA_PML_CALL(isend(ptmp, scount, sdtype, i,
                                         MCA_COLL_BASE_TAG_SCATTER,
                                         MCA_PML_BASE_SEND_STANDARD,
                                         comm, preq++));
            } else {
                err = MCA_PML_CALL(send(ptmp, scount, sdtype, i,
                                        MCA_COLL_BASE_TAG_SCATTER,
                                        MCA_PML_BASE_SEND_STANDARD,
                                        comm));
            }
        }
        if (MPI_SUCCESS != err) {
            line = __LINE__; goto err_hndl;
        }
    }

    err = ompi_request_wait_all(preq - reqs, reqs, MPI_STATUSES_IGNORE);
    if (MPI_SUCCESS != err) {
        line = __LINE__; goto err_hndl;
    }

    return MPI_SUCCESS;

err_hndl:
    if (NULL != reqs) {
        /* find a real error code */
        if (MPI_ERR_IN_STATUS == err) {
            for (i = 0; i < nreqs; i++) {
                if (MPI_REQUEST_NULL == reqs[i]) continue;
                if (MPI_ERR_PENDING == reqs[i]->req_status.MPI_ERROR) continue;
                if (reqs[i]->req_status.MPI_ERROR != MPI_SUCCESS) {
                    err = reqs[i]->req_status.MPI_ERROR;
                    break;
                }
            }
        }
        ompi_coll_base_free_reqs(reqs, nreqs);
    }
    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
                "%s:%4d\tError occurred %d, rank %2d", __FILE__, line, err, rank));
    (void)line;  /* silence compiler warning */
    return err;
}

So far I understand that from looking at the code that the model should be

T(NP, m) = (NP − 1) · (α + m · β).

With NP being the number of processes (As Scatter distributes using all processes apart from the root).

This does not account for the use of non-blocking sends that are send using MPI_Isend. (on the condition found in the code snippet) I am unsure of how to account for both the non-blocking and blocking sends using simply the Hockney Model.

Any help would be very much appreciated as non of the papers that I have read on the subject seem to explain the process well.

分享到QQ

分享到微博