OpenCL 中的矩阵求逆

发布于 2024-09-03 13:26:54 字数 285 浏览 2 评论 0原文

我正在尝试使用 OpenCL 加速一些计算，算法的一部分包括反转矩阵。是否有任何开源库或免费可用的代码来计算用 OpenCL 或 CUDA 编写的矩阵的 lu 分解（lapack dgetrf 和 dgetri）或一般求逆？该矩阵是实数且为方阵，但除此之外没有任何其他特殊属性。到目前为止，我只在 GPU 上找到了基本的 blas 矩阵向量运算实现。

矩阵相当小，只有大约 60-100 行和列，因此它可以在 cpu 上计算得更快，但它在算法中间使用，所以我必须将其传输到主机，计算逆矩阵，然后然后将结果传输回设备，然后将其用于更大的计算。

原文

分享到QQ

分享到微博

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

需要登录才能够评论，你可以免费注册一个本站的账号。

薄荷港 2024-09-10 13:26:54

看看ViennaCL：http://viennacl.sourceforge.net/

回复收藏 0 原文

肤浅与狂妄 2024-09-10 13:26:54

我在 Open CL 中没有实现，但都有 "Numerical Recipes" Gil Strang 的“深入应用数学” 有精彩的解释，很容易理解代码。 “NR”有您可以改编的 C 代码。

计算倒数

这是不正确的。您不是用 LU 分解计算逆矩阵，而是分解矩阵。如果你想要逆，你必须用一系列单位向量进行前向反向替换。这是一个很小但很重要的区别。

回复收藏 0 原文

远昼 2024-09-10 13:26:54

我知道这有点晚了，但如果您尝试在这么小的矩阵（60-100 行）上进行任何矩阵计算，那么在 CPU 上而不是在 GPU 上计算会快得多，因为将信息从主存复制到 GPU 内存所需的时间。如果您想这样做，那么我建议您考虑使用并行语言，例如 OpenMP 或 MPI，因为这些语言可以让您并行化代码以加快 CPU 上的计算速度。

回复收藏 0 原文

嘿咻 2024-09-10 13:26:54

我使用 eigen lib 使用多线程在 CPU 上进行微积分高达 2k x 2k，因此现在比使用一个线程快 3.5-3.65 倍（取决于矩阵大小）。
我使用了 Intel Xeon 3.5Ghz E5-1620 v3 处理器和 16Gb 内存。（不幸的是，我删除了旧版本以添加精确值，但如果有一些优先级，我可以重写sw）

这是我用来比较的矩阵逆算法。（正如我所说，再次进行大量测试，结果是正确的）：

/*
Uses 2D arrays.
Main routines are:
init_2Dvector() that initializes any 2d vector (can be uchar, char, int, float or double)
multiply_2Dvector()
inverse()
*/

#include<iostream>
#include <vector>
#include <stdlib.h>
#include <time.h>

using namespace std;

/*
void print_2Dvector(vector<vector<double> >& vec)
{
    size_t xmax, ymax;
    ymax = vec.size();
    xmax = vec[0].size(); 

    int x, y;
    for (y = 0; y < ymax; y++)
    {
        for (x = 0; x < xmax; x++)
            cout << vec[y][x] << " \t";
        cout << endl;
    }
}*/

void print_2Dvector(vector<vector<double> >& vec,char *format="%lg \t")
{
    size_t xmax, ymax;
    ymax = vec.size();
    xmax = vec[0].size();

    int x, y;
    for (y = 0; y < ymax; y++)
    {
        {
            for (x = 0; x < xmax; x++)
                printf(format, vec[y][x]);
        }
        cout << endl;
    }
}

//Resizes to y_dim,x_dim any kind of 2d array:
template<typename T>
void init_2Dvector(vector<vector<T> >& vec, size_t y_dim, size_t x_dim)
{
    vec.resize(y_dim);
    for (size_t i = 0; i < vec.size(); i++)
        vec[i].resize(x_dim);
}
//Returns vec1*vec2. vec1 and 2 are not touch
vector< vector<double> > multiply_2Dvector(vector< vector<double> > vec1, vector< vector<double> > vec2)
{
    size_t xmax, ymax;
    ymax = vec1.size();   
    xmax = vec1[0].size();
    vector< vector<double> > vec3;
    if ((ymax != vec2[0].size()) || (xmax != vec2.size()))
    {
        cout << "ERROR on dim2_multiply() dimensions of vec2 not corresponding with vec1 ones" << endl; getchar(); return{};//returns a null
    }
    init_2Dvector(vec3, ymax, ymax);
    cout << "dimensions of vec3=" << vec3.size() << " x " << vec3[0].size() << endl;
    double xx;
    for (int y = 0; y < ymax; y++)
        for (int x = 0; x < ymax; x++)
        {
            xx = 0.0;
            for (int t = 0; t < xmax; t++)
                xx += vec1[y][t] * vec2[t][x];
            vec3[y][x] = xx;
        }
    return vec3;//ok
}

//returns inverse of x2, x2 is not modified
vector< vector<double> > inverse(vector< vector<double> > x)
{
    if (x.size() != x[0].size())
    {
        cout << "ERROR on inverse() not square array" << endl; getchar(); return{};//returns a null
    }

    size_t dim = x.size();
    int i, j, ord;
    vector< vector<double> > y(dim,vector<double>(dim));//output
    //init_2Dvector(y, dim, dim);
    //1. Unity array y: 
    for (i = 0; i < dim; i++)
    {
        y[i][i] = 1.0;
        for (j = i+1; j < dim; j++)
        {
            y[i][j]= y[j][i] = 0.0;
        }
    }

    double diagon, coef;
    double *ptrx, *ptry, *ptrx2, *ptry2;
    for (ord = 0; ord<dim; ord++)
    {
        //2 Hacemos diagonal de x =1
        int i2;
        if (fabs(x[ord][ord])<1e-15) //Si el elemento diagonal es 0 sumamos una columna que no sea 0 el elemento correspondiente
        {
            for (i2 = ord + 1; i2<dim; i2++)
            {
                if (fabs(x[i2][ord])>1e-15) break;
            }
            if (i2 >= dim)
                return{};//error, returns null
            for (i = 0; i<dim; i++)//sumo la linea que no es 0 el de la misma fila de ord
            {
                x[ord][i] += x[i2][i];
                y[ord][i] += y[i2][i];
            }
        }
        diagon = 1.0/x[ord][ord];
        ptry = &y[ord][0];
        ptrx = &x[ord][0];
        for (i = 0; i < dim; i++)
        {
            *ptry++ *= diagon;
            *ptrx++ *= diagon;
        }

        //Hacemos '0' la columna ord salvo elemento diagonal:
        for (i = 0; i<dim; i++)//Empezamos por primera fila
        {
            if (i == ord) continue;
            coef = x[i][ord];//elemento ha hacer 0 
            if (fabs(coef)<1e-15) continue; //si es cero se evita
            ptry = &y[i][0];
            ptry2 = &y[ord][0];
            ptrx = &x[i][0];
            ptrx2 = &x[ord][0];
            for (j = 0; j < dim; j++)
            {
                *ptry++ = *ptry - coef * (*ptry2++);//1ª matriz
                *ptrx++ = *ptrx - coef * (*ptrx2++);//2ª matriz
            }
        }
    }//end ord
    return y;
}


void test_5_inverse()
{
    vector< vector<double> > vec1 = {
        {0,-5,0,7,33,11,-1},
        {72,0,-11,7,9,33,5 },
        {-13,31,-5,15,29,30,24 },
        {-24,9,8,-23,31,-12,4 },
        {-3,-22,4,-24,-5,27,-10 },
        {-10,-21,-16,-32,-11,20,14 },
        {5,30,13,-32,29,-13,-13 }
    };
    vector< vector<double> > vec2;
    vec2 = inverse(vec1);
    vector< vector<double> > vec3;
    vec3 = multiply_2Dvector(vec1, vec2);

    cout << "initial array (must be unmodified):" << endl;
    print_2Dvector(vec1);

    cout << "Must be diagon array:" << endl;
    print_2Dvector(vec3," %8.3lf");
    cout << endl;
}


void test_6_inverse(int dim)
{
    vector< vector<double> > vec1(dim, vector<double>(dim));
    for (int i=0;i<dim;i++)
        for (int j = 0; j < dim; j++)
        {
            vec1[i][j] = (-1.0 + 2.0*rand() / RAND_MAX) * 10000;
        }

    vector< vector<double> > vec2;
    double ini, end;
    ini = (double)clock();
    vec2 = inverse(vec1);
    end = (double)clock();
    cout << "Time inverse =" << (end - ini) / CLOCKS_PER_SEC << endl;
    vector< vector<double> > vec3;
    vec3 = multiply_2Dvector(vec1, vec2);

    cout << "initial array (must be unmodified):" << endl;
    //print_2Dvector(vec1);

    cout << "Must be diagon array:" << endl;
    //print_2Dvector(vec3, " %8.3lf");
    cout << endl;
}

int main()
{
    vector< vector<double> > vec1;
    init_2Dvector(vec1, 10, 5);    //size_t ymax = vec1.size(),xmax = vec1[0].size();
    //test_2_dimension_vector();
    //test_one_dimension_vector();
    test_5_inverse();
    test_6_inverse(1300);
    cout << endl << "=== END ===" << endl; getchar(); 
    return 1;
}

I make calculus up to 2k x 2k at CPU using multithread using eigen lib, so it is now 3.5-3.65 times faster (depends of matrix size) than using one thread.
I used an Intel Xeon 3.5Ghz E5-1620 v3 processor and 16Gb ram. (Unfortunately I deleted the old version to add exact values, but if has some priority I could rewrite the sw)

This is my matrix inverse algorithm I used to compare with. (It is correct as I stated making a lot of tests again excel result):

/*
Uses 2D arrays.
Main routines are:
init_2Dvector() that initializes any 2d vector (can be uchar, char, int, float or double)
multiply_2Dvector()
inverse()
*/

#include<iostream>
#include <vector>
#include <stdlib.h>
#include <time.h>

using namespace std;

/*
void print_2Dvector(vector<vector<double> >& vec)
{
    size_t xmax, ymax;
    ymax = vec.size();
    xmax = vec[0].size(); 

    int x, y;
    for (y = 0; y < ymax; y++)
    {
        for (x = 0; x < xmax; x++)
            cout << vec[y][x] << " \t";
        cout << endl;
    }
}*/

void print_2Dvector(vector<vector<double> >& vec,char *format="%lg \t")
{
    size_t xmax, ymax;
    ymax = vec.size();
    xmax = vec[0].size();

    int x, y;
    for (y = 0; y < ymax; y++)
    {
        {
            for (x = 0; x < xmax; x++)
                printf(format, vec[y][x]);
        }
        cout << endl;
    }
}

//Resizes to y_dim,x_dim any kind of 2d array:
template<typename T>
void init_2Dvector(vector<vector<T> >& vec, size_t y_dim, size_t x_dim)
{
    vec.resize(y_dim);
    for (size_t i = 0; i < vec.size(); i++)
        vec[i].resize(x_dim);
}
//Returns vec1*vec2. vec1 and 2 are not touch
vector< vector<double> > multiply_2Dvector(vector< vector<double> > vec1, vector< vector<double> > vec2)
{
    size_t xmax, ymax;
    ymax = vec1.size();   
    xmax = vec1[0].size();
    vector< vector<double> > vec3;
    if ((ymax != vec2[0].size()) || (xmax != vec2.size()))
    {
        cout << "ERROR on dim2_multiply() dimensions of vec2 not corresponding with vec1 ones" << endl; getchar(); return{};//returns a null
    }
    init_2Dvector(vec3, ymax, ymax);
    cout << "dimensions of vec3=" << vec3.size() << " x " << vec3[0].size() << endl;
    double xx;
    for (int y = 0; y < ymax; y++)
        for (int x = 0; x < ymax; x++)
        {
            xx = 0.0;
            for (int t = 0; t < xmax; t++)
                xx += vec1[y][t] * vec2[t][x];
            vec3[y][x] = xx;
        }
    return vec3;//ok
}

//returns inverse of x2, x2 is not modified
vector< vector<double> > inverse(vector< vector<double> > x)
{
    if (x.size() != x[0].size())
    {
        cout << "ERROR on inverse() not square array" << endl; getchar(); return{};//returns a null
    }

    size_t dim = x.size();
    int i, j, ord;
    vector< vector<double> > y(dim,vector<double>(dim));//output
    //init_2Dvector(y, dim, dim);
    //1. Unity array y: 
    for (i = 0; i < dim; i++)
    {
        y[i][i] = 1.0;
        for (j = i+1; j < dim; j++)
        {
            y[i][j]= y[j][i] = 0.0;
        }
    }

    double diagon, coef;
    double *ptrx, *ptry, *ptrx2, *ptry2;
    for (ord = 0; ord<dim; ord++)
    {
        //2 Hacemos diagonal de x =1
        int i2;
        if (fabs(x[ord][ord])<1e-15) //Si el elemento diagonal es 0 sumamos una columna que no sea 0 el elemento correspondiente
        {
            for (i2 = ord + 1; i2<dim; i2++)
            {
                if (fabs(x[i2][ord])>1e-15) break;
            }
            if (i2 >= dim)
                return{};//error, returns null
            for (i = 0; i<dim; i++)//sumo la linea que no es 0 el de la misma fila de ord
            {
                x[ord][i] += x[i2][i];
                y[ord][i] += y[i2][i];
            }
        }
        diagon = 1.0/x[ord][ord];
        ptry = &y[ord][0];
        ptrx = &x[ord][0];
        for (i = 0; i < dim; i++)
        {
            *ptry++ *= diagon;
            *ptrx++ *= diagon;
        }

        //Hacemos '0' la columna ord salvo elemento diagonal:
        for (i = 0; i<dim; i++)//Empezamos por primera fila
        {
            if (i == ord) continue;
            coef = x[i][ord];//elemento ha hacer 0 
            if (fabs(coef)<1e-15) continue; //si es cero se evita
            ptry = &y[i][0];
            ptry2 = &y[ord][0];
            ptrx = &x[i][0];
            ptrx2 = &x[ord][0];
            for (j = 0; j < dim; j++)
            {
                *ptry++ = *ptry - coef * (*ptry2++);//1ª matriz
                *ptrx++ = *ptrx - coef * (*ptrx2++);//2ª matriz
            }
        }
    }//end ord
    return y;
}


void test_5_inverse()
{
    vector< vector<double> > vec1 = {
        {0,-5,0,7,33,11,-1},
        {72,0,-11,7,9,33,5 },
        {-13,31,-5,15,29,30,24 },
        {-24,9,8,-23,31,-12,4 },
        {-3,-22,4,-24,-5,27,-10 },
        {-10,-21,-16,-32,-11,20,14 },
        {5,30,13,-32,29,-13,-13 }
    };
    vector< vector<double> > vec2;
    vec2 = inverse(vec1);
    vector< vector<double> > vec3;
    vec3 = multiply_2Dvector(vec1, vec2);

    cout << "initial array (must be unmodified):" << endl;
    print_2Dvector(vec1);

    cout << "Must be diagon array:" << endl;
    print_2Dvector(vec3," %8.3lf");
    cout << endl;
}


void test_6_inverse(int dim)
{
    vector< vector<double> > vec1(dim, vector<double>(dim));
    for (int i=0;i<dim;i++)
        for (int j = 0; j < dim; j++)
        {
            vec1[i][j] = (-1.0 + 2.0*rand() / RAND_MAX) * 10000;
        }

    vector< vector<double> > vec2;
    double ini, end;
    ini = (double)clock();
    vec2 = inverse(vec1);
    end = (double)clock();
    cout << "Time inverse =" << (end - ini) / CLOCKS_PER_SEC << endl;
    vector< vector<double> > vec3;
    vec3 = multiply_2Dvector(vec1, vec2);

    cout << "initial array (must be unmodified):" << endl;
    //print_2Dvector(vec1);

    cout << "Must be diagon array:" << endl;
    //print_2Dvector(vec3, " %8.3lf");
    cout << endl;
}

int main()
{
    vector< vector<double> > vec1;
    init_2Dvector(vec1, 10, 5);    //size_t ymax = vec1.size(),xmax = vec1[0].size();
    //test_2_dimension_vector();
    //test_one_dimension_vector();
    test_5_inverse();
    test_6_inverse(1300);
    cout << endl << "=== END ===" << endl; getchar(); 
    return 1;
}

回复收藏 0 原文