非方阵乘法

发布于 2024-11-07 10:47:36 字数 1998 浏览 3 评论 0原文

除了 a[4][4]、b[4][4]、result2[4][4 等平方矩阵之外，此代码不起作用] 但它不适用于 a[4][10]、b[10][10]、result2[4][ 10]

//Init
for(r=0;r<numrowsa;r++){
    for(c=0;c<numcolsa;c++){
        a[r][c]=rand()%101;
    }
}

for(r=0;r<numrowsb;r++){
    for(c=0;c<numcolsb;c++){
        b[r][c]=rand()%101;
    }
}
for(r=0;r<numrowsr;r++){
    for(c=0;c<numcolsr;c++){
        result[r][c]=0;
    }
}
for(r=0;r<numrowsr2;r++){
    for(c=0;c<numcolr2;c++){
        result2[r][c]=0;
    }
}
//end init
t1=clock();

//trying parallel calculation
# pragma omp parallel num_threads(4) private(j)
{
    int thr = omp_get_thread_num();

    if(thr == 0)
        for(c=0;c<numcolsr;c++){
            for(j=0 ;j<numcolsa ; j++)
                result[0][c]+=a[0][j]*b[j][c];
        }
    else if (thr ==1 )
        for(c=0;c<numcolsr;c++){
            for(j=0 ; j<numcolsa ; j++)
                result[1][c]+=a[1][j]*b[j][c];
        }
    else if (thr ==2 )
        for(c=0;c<numcolsr;c++){
            for(j=0 ; j<numcolsa ; j++)
                result[2][c]+=a[2][j]*b[j][c];
        }
    else if (thr ==3)
        for(c=0;c<numcolsr;c++){
            for(j=0 ; j<numcolsa ; j++)
                result[3][c]+=a[3][j]*b[j][c];
        }
}
t2=clock();
cout <<endl<< "Time"<<t2-t1<<"ms"<<endl;

t1 = clock();
//trying serial calculation 
for(r=0;r<numrowsr2;r++){
    for(c=0;c<numcolr2;c++){
        for(i=0 ; i<numcolsa ; i++)
            result2[r][c]+=a[r][i]*b[i][c];
    }
}
t2 = clock();

/*
for(r=0;r<numrows;r++){
    for(c=0;c<numcols;c++){
        cout <<result[r][c]<<" ";
        if(c == numcols-1)
            cout << endl;
    }
}
*/
cout <<endl<< "Time"<<t2-t1<<"ms"<<endl;

错误是

运行时检查失败变量'b'周围的堆栈已损坏

有什么问题？

原文

This code doesn't work except for squared matrices like a[4][4], b[4][4], result2[4][4] but it didn't work for a[4][10], b[10][10], result2[4][10]

//Init
for(r=0;r<numrowsa;r++){
    for(c=0;c<numcolsa;c++){
        a[r][c]=rand()%101;
    }
}

for(r=0;r<numrowsb;r++){
    for(c=0;c<numcolsb;c++){
        b[r][c]=rand()%101;
    }
}
for(r=0;r<numrowsr;r++){
    for(c=0;c<numcolsr;c++){
        result[r][c]=0;
    }
}
for(r=0;r<numrowsr2;r++){
    for(c=0;c<numcolr2;c++){
        result2[r][c]=0;
    }
}
//end init
t1=clock();

//trying parallel calculation
# pragma omp parallel num_threads(4) private(j)
{
    int thr = omp_get_thread_num();

    if(thr == 0)
        for(c=0;c<numcolsr;c++){
            for(j=0 ;j<numcolsa ; j++)
                result[0][c]+=a[0][j]*b[j][c];
        }
    else if (thr ==1 )
        for(c=0;c<numcolsr;c++){
            for(j=0 ; j<numcolsa ; j++)
                result[1][c]+=a[1][j]*b[j][c];
        }
    else if (thr ==2 )
        for(c=0;c<numcolsr;c++){
            for(j=0 ; j<numcolsa ; j++)
                result[2][c]+=a[2][j]*b[j][c];
        }
    else if (thr ==3)
        for(c=0;c<numcolsr;c++){
            for(j=0 ; j<numcolsa ; j++)
                result[3][c]+=a[3][j]*b[j][c];
        }
}
t2=clock();
cout <<endl<< "Time"<<t2-t1<<"ms"<<endl;

t1 = clock();
//trying serial calculation 
for(r=0;r<numrowsr2;r++){
    for(c=0;c<numcolr2;c++){
        for(i=0 ; i<numcolsa ; i++)
            result2[r][c]+=a[r][i]*b[i][c];
    }
}
t2 = clock();

/*
for(r=0;r<numrows;r++){
    for(c=0;c<numcols;c++){
        cout <<result[r][c]<<" ";
        if(c == numcols-1)
            cout << endl;
    }
}
*/
cout <<endl<< "Time"<<t2-t1<<"ms"<<endl;

The error is

Runtime check failure Stack around variable 'b' was is corrupted

What's the problem?

分享到QQ

分享到微博

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

需要登录才能够评论，你可以免费注册一个本站的账号。

笑看君怀她人 2024-11-14 10:47:36

哎哟。

# pragma omp parallel
int t = omp_get_thread_num();
int nt = ...;
for(int i=t ; i<M ; i += nt) {
  for(int j=0 ; j<N ; ++j) {
    for(int k=0 ; k<K ; ++k) {
        .... 
    }
  }
}

ouch.

# pragma omp parallel
int t = omp_get_thread_num();
int nt = ...;
for(int i=t ; i<M ; i += nt) {
  for(int j=0 ; j<N ; ++j) {
    for(int k=0 ; k<K ; ++k) {
        .... 
    }
  }
}

回复收藏 0 原文

热血少△年 2024-11-14 10:47:36

您写道：

if(thr == 0)
    for(c=0;c<numcolsr;c++){
        for(j=0 ;j<numcolsa ; j++)
            result[0][c]+=a[0][j]*b[j][c];
    }
 else if (thr ==1 )
    for(c=0;c<numcolsr;c++){
        for(j=0 ; j<numcolsa ; j++)
            result[1][c]+=a[1][j]*b[j][c];
    } 
 // and so on

我不知道 omp 的详细信息，但我认为 c 和 j 将在线程之间共享是否正确？因为所有循环都使用相同的 c 和相同的 j。

在这种情况下，可能会发生各种竞争条件。例如，线程 1 可以在线程 0 执行 result[0][c]+=a[0][j]*b[j][c] 之前执行 c++ ，结果是读/写越界。

You wrote:

if(thr == 0)
    for(c=0;c<numcolsr;c++){
        for(j=0 ;j<numcolsa ; j++)
            result[0][c]+=a[0][j]*b[j][c];
    }
 else if (thr ==1 )
    for(c=0;c<numcolsr;c++){
        for(j=0 ; j<numcolsa ; j++)
            result[1][c]+=a[1][j]*b[j][c];
    } 
 // and so on

I don't know the details of omp, but am I correct in thinking that c and j will be shared among the threads? Because all loops use the same c and the same j.

In that case, various race conditions could happen. E.g. thread 1 could execute c++ just before thread 0 would execute result[0][c]+=a[0][j]*b[j][c], with an out-of-bounds read/write as result.

回复收藏 0 原文

~没有更多了~