双免费或损坏(!prev)和Free():下一个尺寸无效(正常)
在用Mallocs和Frees实施一些矩阵操作时,我会遇到一些麻烦。
var声明:
double **a, **b, *c; //in
double **d; //out
a必须是nxk_max矩阵,b k_maxxn,k_max lenght和da nxn矩阵
malloc的ca向量:
N = atoi (argv[1]);
a = (double **) malloc (N*sizeof (double *));
b = (double **) malloc (K_MAX*sizeof (double *));
d = (double **) malloc (N*sizeof (double *));
c = (double *) malloc (K_MAX * sizeof (double));
ind = (int *) malloc (N * sizeof (int));
for (int i=0; i<N; i++){
a[i] = (double *) malloc (K_MAX*sizeof (double));
d[i] = (double *) malloc (N*sizeof (double));
}
for (int i = 0; i < K_MAX; i++){
b[i] = (double *) malloc (N * sizeof (double));
}
freees:
for (int x=0; x<N; x++){
free (a[x]);
free (d[x]);
}
for (int x= 0; x<K_MAX; x++){
free (b[x]);
}
free (a);
free (b);
free (d);
free (ind);
另外,我也不明白为什么我总是得到正确的结果。可能,这是一件愚蠢的事情,但我看不到。
完整代码:
/*
MEJORAS IMPLEMENTADAS:
-Blocking -> 5
-Loop unrolling -> 2
-Reordenación de procedimientos
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <pmmintrin.h>
#define K_MAX 8
#define CLS 64
#define block_size CLS / sizeof (double)
double _random (double min, double max);
void start_counter();
double get_counter();
double mhz();
/* Initialize the cycle counter */
static unsigned cyc_hi = 0;
static unsigned cyc_lo = 0;
/* Set *hi and *lo to the high and low order bits of the cycle counter.
Implementation requires assembly code to use the rdtsc instruction. */
void access_counter(unsigned *hi, unsigned *lo)
{
asm("rdtsc; movl %%edx,%0; movl %%eax,%1" /* Read cycle counter */
: "=r" (*hi), "=r" (*lo) /* and move results to */
: /* No input */ /* the two outputs */
: "%edx", "%eax");
}
/* Record the current value of the cycle counter. */
void start_counter()
{
access_counter(&cyc_hi, &cyc_lo);
}
/* Return the number of cycles since the last call to start_counter. */
double get_counter()
{
unsigned ncyc_hi, ncyc_lo;
unsigned hi, lo, borrow;
double result;
/* Get cycle counter */
access_counter(&ncyc_hi, &ncyc_lo);
/* Do double precision subtraction */
lo = ncyc_lo - cyc_lo;
borrow = lo > ncyc_lo;
hi = ncyc_hi - cyc_hi - borrow;
result = (double) hi * (1 << 30) * 4 + lo;
if (result < 0) {
fprintf(stderr, "Error: counter returns neg value: %.0f\n", result);
}
return result;
}
double mhz(int verbose, int sleeptime)
{
double rate;
start_counter();
sleep(sleeptime);
rate = get_counter() / (1e6*sleeptime);
if (verbose)
printf("\n Processor clock rate = %.1f MHz\n", rate);
return rate;
}
int main(int argc, char *argv[]){
double ck;
int N, *ind;
double **a, **b, *c; //Valores de entrada
double **d, f; //Variables de salida
FILE *fp;
if (argc < 2){
printf ("Faltan argumentos\n");
exit (1);
}
if (!(fp = fopen ("resultados.txt", "a"))){
printf ("No se pudo abrir archivo");
exit(1);
}
srand (1);
N = atoi (argv[1]);
a = (double **) malloc (N*sizeof (double *));
b = (double **) malloc (K_MAX*sizeof (double *));
d = (double **) malloc (N*sizeof (double *));
c = (double *) malloc (K_MAX * sizeof (double));
ind = (int *) malloc (N * sizeof (int));
for (int i=0; i<N; i++){
a[i] = (double *) malloc (K_MAX*sizeof (double));
d[i] = (double *) malloc (N*sizeof (double));
}
for (int i = 0; i < K_MAX; i++){
b[i] = (double *) malloc (N * sizeof (double));
}
for (int i=0; i<N; i++){
for (int j=0; j<K_MAX; j++){
a[i][j] = _random (-2.0, 2.0);
}
}
for (int i=0; i<K_MAX; i++){
for (int j=0; j<N; j++){
b[i][j] = _random (-2.0, 2.0);
}
c [i] = _random (-2.0, 2.0);
}
start_counter();
/* Poñer aquí o código a medir */
/* for (int i=0; i<N; i+=block_size){ //Blocking
for (int j=0; j<N; j+=block_size){
for (int ii = i; ii < i+block_size; ii++){
for (int jj = j; jj < j+block_size; jj++){
d[ii][jj] = 0.0;
for (int k = 0; k < K_MAX; k++){
d[ii][jj] += 2 * a[ii][k] * ( b[k][jj]- c[k]);
}
}
}
}
} */
int i = 0;
int j = 0;
for ( ; i<N; i+=block_size){ //Blocking + Loop unrolling
for ( ; j<N; j+=block_size){
for (int ii = i; ii < i+block_size; ii+=2){ //El numero de operaciones en cada desenrollo debe ser divisor del block_size
for (int jj = j; jj < j+block_size; jj+=2){
d[ii][jj] = 0.0;
d[ii+1][jj] = 0.0;
d[ii][jj+1] = 0.0;
d[ii+1][jj+1] = 0.0;
//Unrolling
d[ii][jj] += 2 * a[ii][0] * ( b[0][jj]- c[0]);
d[ii+1][jj] += 2 * a[ii+1][0] * ( b[0][jj]- c[0]);
d[ii][jj+1] += 2 * a[ii][0] * ( b[0][jj+1]- c[0]);
d[ii+1][jj+1] += 2 * a[ii+1][0] * ( b[0][jj+1]- c[0]);
d[ii][jj] += 2 * a[ii][1] * ( b[1][jj]- c[1]);
d[ii+1][jj] += 2 * a[ii+1][1] * ( b[1][jj]- c[1]);
d[ii][jj+1] += 2 * a[ii][1] * ( b[1][jj+1]- c[1]);
d[ii+1][jj+1] += 2 * a[ii+1][1] * ( b[1][jj+1]- c[1]);
d[ii][jj] += 2 * a[ii][2] * ( b[2][jj]- c[2]);
d[ii+1][jj] += 2 * a[ii+1][2] * ( b[2][jj]- c[2]);
d[ii][jj+1] += 2 * a[ii][2] * ( b[2][jj+1]- c[2]);
d[ii+1][jj+1] += 2 * a[ii+1][2] * ( b[2][jj+1]- c[2]);
d[ii][jj] += 2 * a[ii][3] * ( b[3][jj]- c[3]);
d[ii+1][jj] += 2 * a[ii+1][3] * ( b[3][jj]- c[3]);
d[ii][jj+1] += 2 * a[ii][3] * ( b[3][jj+1]- c[3]);
d[ii+1][jj+1] += 2 * a[ii+1][3] * ( b[3][jj+1]- c[3]);
d[ii][jj] += 2 * a[ii][4] * ( b[4][jj]- c[4]);
d[ii+1][jj] += 2 * a[ii+1][4] * ( b[4][jj]- c[4]);
d[ii][jj+1] += 2 * a[ii][4] * ( b[4][jj+1]- c[4]);
d[ii+1][jj+1] += 2 * a[ii+1][4] * ( b[4][jj+1]- c[4]);
d[ii][jj] += 2 * a[ii][5] * ( b[5][jj]- c[5]);
d[ii+1][jj] += 2 * a[ii+1][5] * ( b[5][jj]- c[5]);
d[ii][jj+1] += 2 * a[ii][5] * ( b[5][jj+1]- c[5]);
d[ii+1][jj+1] += 2 * a[ii+1][5] * ( b[5][jj+1]- c[5]);
d[ii][jj] += 2 * a[ii][6] * ( b[6][jj]- c[6]);
d[ii+1][jj] += 2 * a[ii+1][6] * ( b[6][jj]- c[6]);
d[ii][jj+1] += 2 * a[ii][6] * ( b[6][jj+1]- c[6]);
d[ii+1][jj+1] += 2 * a[ii+1][6] * ( b[6][jj+1]- c[6]);
d[ii][jj] += 2 * a[ii][7] * ( b[7][jj]- c[7]);
d[ii+1][jj] += 2 * a[ii+1][7] * ( b[7][jj]- c[7]);
d[ii][jj+1] += 2 * a[ii][7] * ( b[7][jj+1]- c[7]);
d[ii+1][jj+1] += 2 * a[ii+1][7] * ( b[7][jj+1]- c[7]);
}
}
}
}
for (; i<N; i++){
for (; j<N; j++){
d[i][j] = 0.0;
d[i][j] += 2 * a[i][0] * ( b[0][j]- c[0]);
d[i][j] += 2 * a[i][1] * ( b[1][j]- c[1]);
d[i][j] += 2 * a[i][2] * ( b[2][j]- c[2]);
d[i][j] += 2 * a[i][3] * ( b[3][j]- c[3]);
d[i][j] += 2 * a[i][4] * ( b[4][j]- c[4]);
d[i][j] += 2 * a[i][5] * ( b[5][j]- c[5]);
d[i][j] += 2 * a[i][6] * ( b[6][j]- c[6]);
d[i][j] += 2 * a[i][7] * ( b[7][j]- c[7]);
}
}
/* for (int i=0; i<N; i+=block_size){ //Blocking + Loop unrolling
for (int j=0; j<N; j+=block_size){
for (int ii = i; ii < i+block_size; ii+=5){ //El numero de operaciones en cada desenrollo debe ser divisor del block_size
for (int jj = j; jj < j+block_size; jj+=5){
d[ii][jj] = 0.0;
d[ii+1][jj] = 0.0;
d[ii+2][jj] = 0.0;
d[ii+3][jj] = 0.0;
d[ii+4][jj] = 0.0;
d[ii][jj+1] = 0.0;
d[ii][jj+2] = 0.0;
d[ii][jj+3] = 0.0;
d[ii][jj+4] = 0.0;
d[ii+1][jj+1] = 0.0;
d[ii+1][jj+2] = 0.0;
d[ii+1][jj+3] = 0.0;
d[ii+1][jj+4] = 0.0;
d[ii+2][jj+1] = 0.0;
d[ii+2][jj+2] = 0.0;
d[ii+2][jj+3] = 0.0;
d[ii+2][jj+4] = 0.0;
d[ii+3][jj+1] = 0.0;
d[ii+3][jj+2] = 0.0;
d[ii+3][jj+3] = 0.0;
d[ii+3][jj+4] = 0.0;
d[ii+4][jj+1] = 0.0;
d[ii+4][jj+2] = 0.0;
d[ii+4][jj+3] = 0.0;
d[ii+4][jj+4] = 0.0;
for (int k = 0; k < K_MAX; k++){
d[ii][jj] += 2 * a[ii][k] * ( b[k][jj]- c[k]);
d[ii+1][jj] += 2 * a[ii+1][k] * ( b[k][jj]- c[k]);
d[ii+2][jj] += 2 * a[ii+2][k] * ( b[k][jj]- c[k]);
d[ii][jj+1] += 2 * a[ii][k] * ( b[k][jj+1]- c[k]);
d[ii+1][jj+1] += 2 * a[ii+1][k] * ( b[k][jj+1]- c[k]);
d[ii+2][jj+1] += 2 * a[ii+2][k] * ( b[k][jj+1]- c[k]);
d[ii][jj+2] += 2 * a[ii][k] * ( b[k][jj+2]- c[k]);
d[ii+1][jj+2] += 2 * a[ii+1][k] * ( b[k][jj+2]- c[k]);
d[ii+2][jj+2] += 2 * a[ii+2][k] * ( b[k][jj+2]- c[k]);
d[ii][jj+3] += 2 * a[ii][k] * ( b[k][jj+3]- c[k]);
d[ii+1][jj+3] += 2 * a[ii+1][k] * ( b[k][jj+3]- c[k]);
d[ii+2][jj+3] += 2 * a[ii+2][k] * ( b[k][jj+3]- c[k]);
d[ii+3][jj] += 2 * a[ii+3][k] * ( b[k][jj]- c[k]);
d[ii+3][jj+1] += 2 * a[ii+3][k] * ( b[k][jj+1]- c[k]);
d[ii+3][jj+2] += 2 * a[ii+3][k] * ( b[k][jj+2]- c[k]);
d[ii+3][jj+3] += 2 * a[ii+3][k] * ( b[k][jj+3]- c[k]);
d[ii+4][jj] += 2 * a[ii+4][k] * ( b[k][jj]- c[k]);
d[ii+4][jj+1] += 2 * a[ii+4][k] * ( b[k][jj+1]- c[k]);
d[ii+4][jj+2] += 2 * a[ii+4][k] * ( b[k][jj+2]- c[k]);
d[ii+4][jj+3] += 2 * a[ii+4][k] * ( b[k][jj+3]- c[k]);
d[ii][jj+4] += 2 * a[ii][k] * ( b[k][jj+4]- c[k]);
d[ii+1][jj+4] += 2 * a[ii+1][k] * ( b[k][jj+4]- c[k]);
d[ii+2][jj+4] += 2 * a[ii+2][k] * ( b[k][jj+4]- c[k]);
d[ii+3][jj+4] += 2 * a[ii+3][k] * ( b[k][jj+4]- c[k]);
d[ii+4][jj+4] += 2 * a[ii+4][k] * ( b[k][jj+4]- c[k]);
}
}
}
}
} */
/* for (int i=0; i<N; i+=2){
for (int j=0; j<N; j+=2){
d[i][j] = 0.0;
for (int k = 0; k < K_MAX; k++){
d[i][j] += 2 * a[i][k] * ( b[k][j]- c[k]);
d[i+1][j] += 2 * a[i+1][k] * ( b[k][j]- c[k]);
d[i][j+1] += 2 * a[i][k] * ( b[k][j+1]- c[k]);
d[i+1][j+1] += 2 * a[i+1][k] * ( b[k][j+1]- c[k]);
}
}
} */
/* for (int i=0; i<N; i+=5){
for (int j=0; j<N; j+=5){
d[i][j] = 0.0;
for (int k = 0; k < K_MAX; k++){
d[i][j] += 2 * a[i][k] * ( b[k][j]- c[k]);
d[i+1][j] += 2 * a[i+1][k] * ( b[k][j]- c[k]);
d[i][j+1] += 2 * a[i][k] * ( b[k][j+1]- c[k]);
d[i+1][j+1] += 2 * a[i+1][k] * ( b[k][j+1]- c[k]);
d[i][j+2] += 2 * a[i][k] * ( b[k][j+2]- c[k]);
d[i+1][j+2] += 2 * a[i+1][k] * ( b[k][j+2]- c[k]);
d[i][j+3] += 2 * a[i][k] * ( b[k][j+3]- c[k]);
d[i+1][j+3] += 2 * a[i+1][k] * ( b[k][j+3]- c[k]);
d[i+2][j] += 2 * a[i+2][k] * ( b[k][j]- c[k]);
d[i+2][j+1] += 2 * a[i+2][k] * ( b[k][j+1]- c[k]);
d[i+2][j+2] += 2 * a[i+2][k] * ( b[k][j+2]- c[k]);
d[i+2][j+3] += 2 * a[i+2][k] * ( b[k][j+3]- c[k]);
d[i+3][j] += 2 * a[i+3][k] * ( b[k][j]- c[k]);
d[i+3][j+1] += 2 * a[i+3][k] * ( b[k][j+1]- c[k]);
d[i+3][j+2] += 2 * a[i+3][k] * ( b[k][j+2]- c[k]);
d[i+3][j+3] += 2 * a[i+3][k] * ( b[k][j+3]- c[k]);
d[i+4][j] += 2 * a[i+4][k] * ( b[k][j]- c[k]);
d[i+4][j+1] += 2 * a[i+4][k] * ( b[k][j+1]- c[k]);
d[i+4][j+2] += 2 * a[i+4][k] * ( b[k][j+2]- c[k]);
d[i+4][j+3] += 2 * a[i+4][k] * ( b[k][j+3]- c[k]);
d[i][j+4] += 2 * a[i][k] * ( b[k][j+4]- c[k]);
d[i+1][j+4] += 2 * a[i+1][k] * ( b[k][j+4]- c[k]);
d[i+2][j+4] += 2 * a[i+2][k] * ( b[k][j+4]- c[k]);
d[i+3][j+4] += 2 * a[i+3][k] * ( b[k][j+4]- c[k]);
d[i+4][j+4] += 2 * a[i+4][k] * ( b[k][j+4]- c[k]);
}
}
} */
f = 0.0;
for (int i=0; i<N; i++){
f+= d[ind[i]][ind[i]]/2;
}
/*Fin codigo a medir*/
ck=get_counter();
printf ("f=%lf\n", f);
fprintf (fp, "%lf, ", ck);
fclose (fp);
printf("\n Clocks=%1.10lf \n",ck);
for (int x=0; x<N; x++){
free (a[x]);
free (d[x]);
}
for (int x= 0; x<K_MAX; x++){
free (b[x]);
}
free (a);
free (b);
free (d);
free (c);
free (ind);
/* Esta rutina imprime a frecuencia de reloxo estimada coas rutinas start_counter/get_counter */
mhz(1,1);
return 0;
}
double _random (double min, double max){ //Funcion que genera un double aleatorio cuyo valor absoluto esta entre min y max
double r = min + ((double)rand()/((double)RAND_MAX /(max - min)));
while (abs(r) < 1 || abs (r) >= 2)
r = min + ((double)rand()/((double)RAND_MAX /(max - min)));
return r;
}
I am having some troubles when implementing some matrix operations with mallocs and frees.
Var declaration:
double **a, **b, *c; //in
double **d; //out
A must be a NxK_MAX matrix, B K_MAXxN, c a vector of K_MAX lenght and d a NxN matrix
Malloc:
N = atoi (argv[1]);
a = (double **) malloc (N*sizeof (double *));
b = (double **) malloc (K_MAX*sizeof (double *));
d = (double **) malloc (N*sizeof (double *));
c = (double *) malloc (K_MAX * sizeof (double));
ind = (int *) malloc (N * sizeof (int));
for (int i=0; i<N; i++){
a[i] = (double *) malloc (K_MAX*sizeof (double));
d[i] = (double *) malloc (N*sizeof (double));
}
for (int i = 0; i < K_MAX; i++){
b[i] = (double *) malloc (N * sizeof (double));
}
Frees:
for (int x=0; x<N; x++){
free (a[x]);
free (d[x]);
}
for (int x= 0; x<K_MAX; x++){
free (b[x]);
}
free (a);
free (b);
free (d);
free (ind);
Also, I don't understand why I am always getting the right results. Probably, its a silly thing but I don't see it.
Full code:
/*
MEJORAS IMPLEMENTADAS:
-Blocking -> 5
-Loop unrolling -> 2
-Reordenación de procedimientos
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <pmmintrin.h>
#define K_MAX 8
#define CLS 64
#define block_size CLS / sizeof (double)
double _random (double min, double max);
void start_counter();
double get_counter();
double mhz();
/* Initialize the cycle counter */
static unsigned cyc_hi = 0;
static unsigned cyc_lo = 0;
/* Set *hi and *lo to the high and low order bits of the cycle counter.
Implementation requires assembly code to use the rdtsc instruction. */
void access_counter(unsigned *hi, unsigned *lo)
{
asm("rdtsc; movl %%edx,%0; movl %%eax,%1" /* Read cycle counter */
: "=r" (*hi), "=r" (*lo) /* and move results to */
: /* No input */ /* the two outputs */
: "%edx", "%eax");
}
/* Record the current value of the cycle counter. */
void start_counter()
{
access_counter(&cyc_hi, &cyc_lo);
}
/* Return the number of cycles since the last call to start_counter. */
double get_counter()
{
unsigned ncyc_hi, ncyc_lo;
unsigned hi, lo, borrow;
double result;
/* Get cycle counter */
access_counter(&ncyc_hi, &ncyc_lo);
/* Do double precision subtraction */
lo = ncyc_lo - cyc_lo;
borrow = lo > ncyc_lo;
hi = ncyc_hi - cyc_hi - borrow;
result = (double) hi * (1 << 30) * 4 + lo;
if (result < 0) {
fprintf(stderr, "Error: counter returns neg value: %.0f\n", result);
}
return result;
}
double mhz(int verbose, int sleeptime)
{
double rate;
start_counter();
sleep(sleeptime);
rate = get_counter() / (1e6*sleeptime);
if (verbose)
printf("\n Processor clock rate = %.1f MHz\n", rate);
return rate;
}
int main(int argc, char *argv[]){
double ck;
int N, *ind;
double **a, **b, *c; //Valores de entrada
double **d, f; //Variables de salida
FILE *fp;
if (argc < 2){
printf ("Faltan argumentos\n");
exit (1);
}
if (!(fp = fopen ("resultados.txt", "a"))){
printf ("No se pudo abrir archivo");
exit(1);
}
srand (1);
N = atoi (argv[1]);
a = (double **) malloc (N*sizeof (double *));
b = (double **) malloc (K_MAX*sizeof (double *));
d = (double **) malloc (N*sizeof (double *));
c = (double *) malloc (K_MAX * sizeof (double));
ind = (int *) malloc (N * sizeof (int));
for (int i=0; i<N; i++){
a[i] = (double *) malloc (K_MAX*sizeof (double));
d[i] = (double *) malloc (N*sizeof (double));
}
for (int i = 0; i < K_MAX; i++){
b[i] = (double *) malloc (N * sizeof (double));
}
for (int i=0; i<N; i++){
for (int j=0; j<K_MAX; j++){
a[i][j] = _random (-2.0, 2.0);
}
}
for (int i=0; i<K_MAX; i++){
for (int j=0; j<N; j++){
b[i][j] = _random (-2.0, 2.0);
}
c [i] = _random (-2.0, 2.0);
}
start_counter();
/* Poñer aquí o código a medir */
/* for (int i=0; i<N; i+=block_size){ //Blocking
for (int j=0; j<N; j+=block_size){
for (int ii = i; ii < i+block_size; ii++){
for (int jj = j; jj < j+block_size; jj++){
d[ii][jj] = 0.0;
for (int k = 0; k < K_MAX; k++){
d[ii][jj] += 2 * a[ii][k] * ( b[k][jj]- c[k]);
}
}
}
}
} */
int i = 0;
int j = 0;
for ( ; i<N; i+=block_size){ //Blocking + Loop unrolling
for ( ; j<N; j+=block_size){
for (int ii = i; ii < i+block_size; ii+=2){ //El numero de operaciones en cada desenrollo debe ser divisor del block_size
for (int jj = j; jj < j+block_size; jj+=2){
d[ii][jj] = 0.0;
d[ii+1][jj] = 0.0;
d[ii][jj+1] = 0.0;
d[ii+1][jj+1] = 0.0;
//Unrolling
d[ii][jj] += 2 * a[ii][0] * ( b[0][jj]- c[0]);
d[ii+1][jj] += 2 * a[ii+1][0] * ( b[0][jj]- c[0]);
d[ii][jj+1] += 2 * a[ii][0] * ( b[0][jj+1]- c[0]);
d[ii+1][jj+1] += 2 * a[ii+1][0] * ( b[0][jj+1]- c[0]);
d[ii][jj] += 2 * a[ii][1] * ( b[1][jj]- c[1]);
d[ii+1][jj] += 2 * a[ii+1][1] * ( b[1][jj]- c[1]);
d[ii][jj+1] += 2 * a[ii][1] * ( b[1][jj+1]- c[1]);
d[ii+1][jj+1] += 2 * a[ii+1][1] * ( b[1][jj+1]- c[1]);
d[ii][jj] += 2 * a[ii][2] * ( b[2][jj]- c[2]);
d[ii+1][jj] += 2 * a[ii+1][2] * ( b[2][jj]- c[2]);
d[ii][jj+1] += 2 * a[ii][2] * ( b[2][jj+1]- c[2]);
d[ii+1][jj+1] += 2 * a[ii+1][2] * ( b[2][jj+1]- c[2]);
d[ii][jj] += 2 * a[ii][3] * ( b[3][jj]- c[3]);
d[ii+1][jj] += 2 * a[ii+1][3] * ( b[3][jj]- c[3]);
d[ii][jj+1] += 2 * a[ii][3] * ( b[3][jj+1]- c[3]);
d[ii+1][jj+1] += 2 * a[ii+1][3] * ( b[3][jj+1]- c[3]);
d[ii][jj] += 2 * a[ii][4] * ( b[4][jj]- c[4]);
d[ii+1][jj] += 2 * a[ii+1][4] * ( b[4][jj]- c[4]);
d[ii][jj+1] += 2 * a[ii][4] * ( b[4][jj+1]- c[4]);
d[ii+1][jj+1] += 2 * a[ii+1][4] * ( b[4][jj+1]- c[4]);
d[ii][jj] += 2 * a[ii][5] * ( b[5][jj]- c[5]);
d[ii+1][jj] += 2 * a[ii+1][5] * ( b[5][jj]- c[5]);
d[ii][jj+1] += 2 * a[ii][5] * ( b[5][jj+1]- c[5]);
d[ii+1][jj+1] += 2 * a[ii+1][5] * ( b[5][jj+1]- c[5]);
d[ii][jj] += 2 * a[ii][6] * ( b[6][jj]- c[6]);
d[ii+1][jj] += 2 * a[ii+1][6] * ( b[6][jj]- c[6]);
d[ii][jj+1] += 2 * a[ii][6] * ( b[6][jj+1]- c[6]);
d[ii+1][jj+1] += 2 * a[ii+1][6] * ( b[6][jj+1]- c[6]);
d[ii][jj] += 2 * a[ii][7] * ( b[7][jj]- c[7]);
d[ii+1][jj] += 2 * a[ii+1][7] * ( b[7][jj]- c[7]);
d[ii][jj+1] += 2 * a[ii][7] * ( b[7][jj+1]- c[7]);
d[ii+1][jj+1] += 2 * a[ii+1][7] * ( b[7][jj+1]- c[7]);
}
}
}
}
for (; i<N; i++){
for (; j<N; j++){
d[i][j] = 0.0;
d[i][j] += 2 * a[i][0] * ( b[0][j]- c[0]);
d[i][j] += 2 * a[i][1] * ( b[1][j]- c[1]);
d[i][j] += 2 * a[i][2] * ( b[2][j]- c[2]);
d[i][j] += 2 * a[i][3] * ( b[3][j]- c[3]);
d[i][j] += 2 * a[i][4] * ( b[4][j]- c[4]);
d[i][j] += 2 * a[i][5] * ( b[5][j]- c[5]);
d[i][j] += 2 * a[i][6] * ( b[6][j]- c[6]);
d[i][j] += 2 * a[i][7] * ( b[7][j]- c[7]);
}
}
/* for (int i=0; i<N; i+=block_size){ //Blocking + Loop unrolling
for (int j=0; j<N; j+=block_size){
for (int ii = i; ii < i+block_size; ii+=5){ //El numero de operaciones en cada desenrollo debe ser divisor del block_size
for (int jj = j; jj < j+block_size; jj+=5){
d[ii][jj] = 0.0;
d[ii+1][jj] = 0.0;
d[ii+2][jj] = 0.0;
d[ii+3][jj] = 0.0;
d[ii+4][jj] = 0.0;
d[ii][jj+1] = 0.0;
d[ii][jj+2] = 0.0;
d[ii][jj+3] = 0.0;
d[ii][jj+4] = 0.0;
d[ii+1][jj+1] = 0.0;
d[ii+1][jj+2] = 0.0;
d[ii+1][jj+3] = 0.0;
d[ii+1][jj+4] = 0.0;
d[ii+2][jj+1] = 0.0;
d[ii+2][jj+2] = 0.0;
d[ii+2][jj+3] = 0.0;
d[ii+2][jj+4] = 0.0;
d[ii+3][jj+1] = 0.0;
d[ii+3][jj+2] = 0.0;
d[ii+3][jj+3] = 0.0;
d[ii+3][jj+4] = 0.0;
d[ii+4][jj+1] = 0.0;
d[ii+4][jj+2] = 0.0;
d[ii+4][jj+3] = 0.0;
d[ii+4][jj+4] = 0.0;
for (int k = 0; k < K_MAX; k++){
d[ii][jj] += 2 * a[ii][k] * ( b[k][jj]- c[k]);
d[ii+1][jj] += 2 * a[ii+1][k] * ( b[k][jj]- c[k]);
d[ii+2][jj] += 2 * a[ii+2][k] * ( b[k][jj]- c[k]);
d[ii][jj+1] += 2 * a[ii][k] * ( b[k][jj+1]- c[k]);
d[ii+1][jj+1] += 2 * a[ii+1][k] * ( b[k][jj+1]- c[k]);
d[ii+2][jj+1] += 2 * a[ii+2][k] * ( b[k][jj+1]- c[k]);
d[ii][jj+2] += 2 * a[ii][k] * ( b[k][jj+2]- c[k]);
d[ii+1][jj+2] += 2 * a[ii+1][k] * ( b[k][jj+2]- c[k]);
d[ii+2][jj+2] += 2 * a[ii+2][k] * ( b[k][jj+2]- c[k]);
d[ii][jj+3] += 2 * a[ii][k] * ( b[k][jj+3]- c[k]);
d[ii+1][jj+3] += 2 * a[ii+1][k] * ( b[k][jj+3]- c[k]);
d[ii+2][jj+3] += 2 * a[ii+2][k] * ( b[k][jj+3]- c[k]);
d[ii+3][jj] += 2 * a[ii+3][k] * ( b[k][jj]- c[k]);
d[ii+3][jj+1] += 2 * a[ii+3][k] * ( b[k][jj+1]- c[k]);
d[ii+3][jj+2] += 2 * a[ii+3][k] * ( b[k][jj+2]- c[k]);
d[ii+3][jj+3] += 2 * a[ii+3][k] * ( b[k][jj+3]- c[k]);
d[ii+4][jj] += 2 * a[ii+4][k] * ( b[k][jj]- c[k]);
d[ii+4][jj+1] += 2 * a[ii+4][k] * ( b[k][jj+1]- c[k]);
d[ii+4][jj+2] += 2 * a[ii+4][k] * ( b[k][jj+2]- c[k]);
d[ii+4][jj+3] += 2 * a[ii+4][k] * ( b[k][jj+3]- c[k]);
d[ii][jj+4] += 2 * a[ii][k] * ( b[k][jj+4]- c[k]);
d[ii+1][jj+4] += 2 * a[ii+1][k] * ( b[k][jj+4]- c[k]);
d[ii+2][jj+4] += 2 * a[ii+2][k] * ( b[k][jj+4]- c[k]);
d[ii+3][jj+4] += 2 * a[ii+3][k] * ( b[k][jj+4]- c[k]);
d[ii+4][jj+4] += 2 * a[ii+4][k] * ( b[k][jj+4]- c[k]);
}
}
}
}
} */
/* for (int i=0; i<N; i+=2){
for (int j=0; j<N; j+=2){
d[i][j] = 0.0;
for (int k = 0; k < K_MAX; k++){
d[i][j] += 2 * a[i][k] * ( b[k][j]- c[k]);
d[i+1][j] += 2 * a[i+1][k] * ( b[k][j]- c[k]);
d[i][j+1] += 2 * a[i][k] * ( b[k][j+1]- c[k]);
d[i+1][j+1] += 2 * a[i+1][k] * ( b[k][j+1]- c[k]);
}
}
} */
/* for (int i=0; i<N; i+=5){
for (int j=0; j<N; j+=5){
d[i][j] = 0.0;
for (int k = 0; k < K_MAX; k++){
d[i][j] += 2 * a[i][k] * ( b[k][j]- c[k]);
d[i+1][j] += 2 * a[i+1][k] * ( b[k][j]- c[k]);
d[i][j+1] += 2 * a[i][k] * ( b[k][j+1]- c[k]);
d[i+1][j+1] += 2 * a[i+1][k] * ( b[k][j+1]- c[k]);
d[i][j+2] += 2 * a[i][k] * ( b[k][j+2]- c[k]);
d[i+1][j+2] += 2 * a[i+1][k] * ( b[k][j+2]- c[k]);
d[i][j+3] += 2 * a[i][k] * ( b[k][j+3]- c[k]);
d[i+1][j+3] += 2 * a[i+1][k] * ( b[k][j+3]- c[k]);
d[i+2][j] += 2 * a[i+2][k] * ( b[k][j]- c[k]);
d[i+2][j+1] += 2 * a[i+2][k] * ( b[k][j+1]- c[k]);
d[i+2][j+2] += 2 * a[i+2][k] * ( b[k][j+2]- c[k]);
d[i+2][j+3] += 2 * a[i+2][k] * ( b[k][j+3]- c[k]);
d[i+3][j] += 2 * a[i+3][k] * ( b[k][j]- c[k]);
d[i+3][j+1] += 2 * a[i+3][k] * ( b[k][j+1]- c[k]);
d[i+3][j+2] += 2 * a[i+3][k] * ( b[k][j+2]- c[k]);
d[i+3][j+3] += 2 * a[i+3][k] * ( b[k][j+3]- c[k]);
d[i+4][j] += 2 * a[i+4][k] * ( b[k][j]- c[k]);
d[i+4][j+1] += 2 * a[i+4][k] * ( b[k][j+1]- c[k]);
d[i+4][j+2] += 2 * a[i+4][k] * ( b[k][j+2]- c[k]);
d[i+4][j+3] += 2 * a[i+4][k] * ( b[k][j+3]- c[k]);
d[i][j+4] += 2 * a[i][k] * ( b[k][j+4]- c[k]);
d[i+1][j+4] += 2 * a[i+1][k] * ( b[k][j+4]- c[k]);
d[i+2][j+4] += 2 * a[i+2][k] * ( b[k][j+4]- c[k]);
d[i+3][j+4] += 2 * a[i+3][k] * ( b[k][j+4]- c[k]);
d[i+4][j+4] += 2 * a[i+4][k] * ( b[k][j+4]- c[k]);
}
}
} */
f = 0.0;
for (int i=0; i<N; i++){
f+= d[ind[i]][ind[i]]/2;
}
/*Fin codigo a medir*/
ck=get_counter();
printf ("f=%lf\n", f);
fprintf (fp, "%lf, ", ck);
fclose (fp);
printf("\n Clocks=%1.10lf \n",ck);
for (int x=0; x<N; x++){
free (a[x]);
free (d[x]);
}
for (int x= 0; x<K_MAX; x++){
free (b[x]);
}
free (a);
free (b);
free (d);
free (c);
free (ind);
/* Esta rutina imprime a frecuencia de reloxo estimada coas rutinas start_counter/get_counter */
mhz(1,1);
return 0;
}
double _random (double min, double max){ //Funcion que genera un double aleatorio cuyo valor absoluto esta entre min y max
double r = min + ((double)rand()/((double)RAND_MAX /(max - min)));
while (abs(r) < 1 || abs (r) >= 2)
r = min + ((double)rand()/((double)RAND_MAX /(max - min)));
return r;
}
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论
评论(1)
valgrind显示您的代码在整个地方都在踩在
许多地方,我建议您在Valgrind下运行它
valgrind shows your code is stomping all over the place
the first few of many, I suggest you run it under valgrind yourself