OpenMP私人类成员std :: vector

发布于 2025-02-12 18:58:26 字数 2708 浏览 2 评论 0原文

我有一个类成员std :: vector＆lt; double＆gt;我想在循环的并行中使用的chi _，每个线程都会看到chi _的不同版本。但是，即使使用private（chi _）很明显，每个线程都会看到chi _的相同值，不仅是来自输出的分析，还来自的地址。每个线程看到的Chi _。

是否可以将OpenMP用于在类成员变量上的类函数内的循环的并行，从而使每个线程都看到该变量的唯一版本？

我使用以下脚本编译并运行代码：

#!/usr/bin/env bash

export OMP_NUM_THREADS=2

set -e

CC=clang++

$CC -O3 --std=c++17 -o test_rho.exe main.cpp
$CC -O3 --std=c++17 -DOMP -fopenmp -o test_rho_omp.exe main.cpp

./test_rho.exe > ser.txt
grep 'TID' ser.txt | sort -nk6,6 -nk3,3 > ser.tmp
./test_rho_omp.exe > omp.txt
grep 'TID' omp.txt | sort -nk6,6 -nk3,3 > omp.tmp

grep 'chk' omp.tmp | sed 's/chk//g' > chk.tmp
grep 'gen' omp.tmp | sed 's/gen//g' > gen.tmp

main.cpp的内容如下：

// main.cpp
#include <iostream>
#include <vector>

#include <omp.h>
#include <math.h>
#include <stdlib.h>

// Wfn class
class Wfn {
    public:
        Wfn() { chi_.resize(nChis_); }
        std::vector<double> rhos(size_t nrhos);
        double rho(double x);

    private:
        size_t nChis_ = 120;
        void gen_chi(double x);
        std::vector<double> chi_;
};

// Pre-generate the array 'chi_' to reduce time complexity in rho calculation
void Wfn::gen_chi(double x)
{
    int tid = 0;
#ifdef OMP
    tid = omp_get_thread_num();
#endif
    printf("TID[%i] sees &chi_[0] = %p\n", tid, &chi_[0]);
    double xl, xm, xn;
    for (size_t i = 0; i < nChis_; i++)
    {
        xl = pow(x, i % 3);
        xm = pow(x, i % 4);
        xn = pow(x, i % 5);
        chi_[i] = exp(-x) * xl * xm * xn;
        printf("TID[%i] (gen) iChi %3lu sees for %10.5f value %10.5e\n", tid, i, x, chi_[i]);
    }
    for (size_t i = 0; i < nChis_; i++)
        printf("TID[%i] (chk) iChi %3lu sees for %10.5f value %10.5e\n", tid, i, x, chi_[i]);
}

// Evaluate rho on a single value
double Wfn::rho(double x)
{
    gen_chi(x);
    double rhov = 0.0;
    for (size_t iChi = 0; iChi < nChis_; ++iChi)
        for (size_t jChi = 0; jChi < nChis_; ++jChi)
            rhov += x * chi_[iChi] * chi_[jChi];
    return rhov;
}

// Evaluate rho on multiple values in parallel
std::vector<double> Wfn::rhos(size_t nrhos)
{
    std::vector<double> rhos(nrhos);
    #pragma omp parallel for private (chi_)
    for (size_t irho = 0; irho < nrhos; ++irho)
        rhos[irho] = rho((double)irho / 1000.0);
    return rhos;
}

// Main
int main(int argc, char** argv) {
    Wfn wfn;
    std::vector<double> rhovs;
    size_t nrhos = 1000;

    rhovs = wfn.rhos(nrhos);

    return 0;
}

原文

I have a class member std::vector<double> chi_ which I would like to use in a parallel for loop where each thread sees a different version of chi_. However, even when using private(chi_) it's clear that each thread sees the same value of chi_, not just from analysis of output, but from the address of chi_ that each thread sees.

Is it possible to utilize OpenMP for a parallel for loop inside a class function on a class member variable such that each thread sees a unique version of that variable?

I compile and run my code using the following script:

#!/usr/bin/env bash

export OMP_NUM_THREADS=2

set -e

CC=clang++

$CC -O3 --std=c++17 -o test_rho.exe main.cpp
$CC -O3 --std=c++17 -DOMP -fopenmp -o test_rho_omp.exe main.cpp

./test_rho.exe > ser.txt
grep 'TID' ser.txt | sort -nk6,6 -nk3,3 > ser.tmp
./test_rho_omp.exe > omp.txt
grep 'TID' omp.txt | sort -nk6,6 -nk3,3 > omp.tmp

grep 'chk' omp.tmp | sed 's/chk//g' > chk.tmp
grep 'gen' omp.tmp | sed 's/gen//g' > gen.tmp

and the contents of main.cpp is below:

// main.cpp
#include <iostream>
#include <vector>

#include <omp.h>
#include <math.h>
#include <stdlib.h>

// Wfn class
class Wfn {
    public:
        Wfn() { chi_.resize(nChis_); }
        std::vector<double> rhos(size_t nrhos);
        double rho(double x);

    private:
        size_t nChis_ = 120;
        void gen_chi(double x);
        std::vector<double> chi_;
};

// Pre-generate the array 'chi_' to reduce time complexity in rho calculation
void Wfn::gen_chi(double x)
{
    int tid = 0;
#ifdef OMP
    tid = omp_get_thread_num();
#endif
    printf("TID[%i] sees &chi_[0] = %p\n", tid, &chi_[0]);
    double xl, xm, xn;
    for (size_t i = 0; i < nChis_; i++)
    {
        xl = pow(x, i % 3);
        xm = pow(x, i % 4);
        xn = pow(x, i % 5);
        chi_[i] = exp(-x) * xl * xm * xn;
        printf("TID[%i] (gen) iChi %3lu sees for %10.5f value %10.5e\n", tid, i, x, chi_[i]);
    }
    for (size_t i = 0; i < nChis_; i++)
        printf("TID[%i] (chk) iChi %3lu sees for %10.5f value %10.5e\n", tid, i, x, chi_[i]);
}

// Evaluate rho on a single value
double Wfn::rho(double x)
{
    gen_chi(x);
    double rhov = 0.0;
    for (size_t iChi = 0; iChi < nChis_; ++iChi)
        for (size_t jChi = 0; jChi < nChis_; ++jChi)
            rhov += x * chi_[iChi] * chi_[jChi];
    return rhov;
}

// Evaluate rho on multiple values in parallel
std::vector<double> Wfn::rhos(size_t nrhos)
{
    std::vector<double> rhos(nrhos);
    #pragma omp parallel for private (chi_)
    for (size_t irho = 0; irho < nrhos; ++irho)
        rhos[irho] = rho((double)irho / 1000.0);
    return rhos;
}

// Main
int main(int argc, char** argv) {
    Wfn wfn;
    std::vector<double> rhovs;
    size_t nrhos = 1000;

    rhovs = wfn.rhos(nrhos);

    return 0;
}

分享到QQ

分享到微博