双免费或损坏(!prev)和Free():下一个尺寸无效(正常)

发布于 2025-01-25 20:58:40 字数 14740 浏览 3 评论 0原文

在用Mallocs和Frees实施一些矩阵操作时,我会遇到一些麻烦。

var声明:

double **a, **b, *c; //in
double **d; //out

a必须是nxk_max矩阵,b k_maxxn,k_max lenght和da nxn矩阵

malloc的ca向量:

    N = atoi (argv[1]);
    a = (double **) malloc (N*sizeof (double *));
    b = (double **) malloc (K_MAX*sizeof (double *));
    d = (double **) malloc (N*sizeof (double *));
    c = (double *) malloc (K_MAX * sizeof (double));
    ind = (int *) malloc (N * sizeof (int));
    for (int i=0; i<N; i++){
        a[i] = (double *) malloc (K_MAX*sizeof (double));
        d[i] = (double *) malloc (N*sizeof (double));
    }
    for (int i = 0; i < K_MAX; i++){
        b[i] = (double *) malloc (N * sizeof (double));
    }

freees:

    for (int x=0; x<N; x++){
        free (a[x]);
        free (d[x]);
    }
    for (int x= 0; x<K_MAX; x++){
        free (b[x]);
    }

    free (a);
    free (b);
    free (d);
    free (ind);

另外,我也不明白为什么我总是得到正确的结果。可能,这是一件愚蠢的事情,但我看不到。

完整代码:

/*
MEJORAS IMPLEMENTADAS:
    -Blocking -> 5
    -Loop unrolling -> 2
    -Reordenación de procedimientos 
*/

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <pmmintrin.h>

#define K_MAX 8
#define CLS 64
#define block_size CLS / sizeof (double)

double _random (double min, double max);

void start_counter();
double get_counter();
double mhz();


/* Initialize the cycle counter */


 static unsigned cyc_hi = 0;
 static unsigned cyc_lo = 0;


 /* Set *hi and *lo to the high and low order bits of the cycle counter.
 Implementation requires assembly code to use the rdtsc instruction. */
 void access_counter(unsigned *hi, unsigned *lo)
 {
 asm("rdtsc; movl %%edx,%0; movl %%eax,%1" /* Read cycle counter */
 : "=r" (*hi), "=r" (*lo) /* and move results to */
 : /* No input */ /* the two outputs */
 : "%edx", "%eax");
 }

 /* Record the current value of the cycle counter. */
 void start_counter()
 {
 access_counter(&cyc_hi, &cyc_lo);
 }

 /* Return the number of cycles since the last call to start_counter. */
 double get_counter()
 {
 unsigned ncyc_hi, ncyc_lo;
 unsigned hi, lo, borrow;
 double result;

 /* Get cycle counter */
 access_counter(&ncyc_hi, &ncyc_lo);

 /* Do double precision subtraction */
 lo = ncyc_lo - cyc_lo; 
 borrow = lo > ncyc_lo;
 hi = ncyc_hi - cyc_hi - borrow;
 result = (double) hi * (1 << 30) * 4 + lo;
 if (result < 0) {
 fprintf(stderr, "Error: counter returns neg value: %.0f\n", result);
 }
 return result;
 }

double mhz(int verbose, int sleeptime)
 {
 double rate;

 start_counter();
 sleep(sleeptime);
 rate = get_counter() / (1e6*sleeptime);
 if (verbose)
 printf("\n Processor clock rate = %.1f MHz\n", rate);
 return rate;
 }




int main(int argc, char *argv[]){
    double ck;

    int N, *ind;
    double **a, **b, *c; //Valores de entrada
    double **d, f; //Variables de salida
    FILE *fp;

    if (argc < 2){
        printf ("Faltan argumentos\n");
        exit (1);
    }

    if (!(fp = fopen ("resultados.txt", "a"))){
        printf ("No se pudo abrir archivo");
        exit(1);
    }

    srand (1);

    N = atoi (argv[1]);
    a = (double **) malloc (N*sizeof (double *));
    b = (double **) malloc (K_MAX*sizeof (double *));
    d = (double **) malloc (N*sizeof (double *));
    c = (double *) malloc (K_MAX * sizeof (double));
    ind = (int *) malloc (N * sizeof (int));
    for (int i=0; i<N; i++){
        a[i] = (double *) malloc (K_MAX*sizeof (double));
        d[i] = (double *) malloc (N*sizeof (double));
    }
    for (int i = 0; i < K_MAX; i++){
        b[i] = (double *) malloc (N * sizeof (double));
    }

    for (int i=0; i<N; i++){
        for (int j=0; j<K_MAX; j++){
            a[i][j] = _random (-2.0, 2.0);
        }
    }
    for (int i=0; i<K_MAX; i++){
        for (int j=0; j<N; j++){
            b[i][j] = _random (-2.0, 2.0);

        }
        c [i] = _random (-2.0, 2.0);
    }


    start_counter();

    /* Poñer aquí o código a medir */
     
/*     for (int i=0; i<N; i+=block_size){ //Blocking
        for (int j=0; j<N; j+=block_size){
            for (int ii = i; ii < i+block_size; ii++){
                for (int jj = j; jj < j+block_size; jj++){
                    d[ii][jj] = 0.0;
                    for (int k = 0; k < K_MAX; k++){
                        d[ii][jj] += 2 * a[ii][k] * ( b[k][jj]- c[k]);
                        
                    }
                }
            }
        }
    } */


    int i = 0;
    int j = 0;

    for ( ; i<N; i+=block_size){ //Blocking + Loop unrolling
        for ( ; j<N; j+=block_size){
            
            for (int ii = i; ii < i+block_size; ii+=2){ //El numero de operaciones en cada desenrollo debe ser divisor del block_size
                for (int jj = j; jj < j+block_size; jj+=2){
                    d[ii][jj] = 0.0;
                    d[ii+1][jj] = 0.0;
                    d[ii][jj+1] = 0.0;
                    d[ii+1][jj+1] = 0.0;

                    //Unrolling
                        d[ii][jj] += 2 * a[ii][0] * ( b[0][jj]- c[0]);
                        d[ii+1][jj] += 2 * a[ii+1][0] * ( b[0][jj]- c[0]);
                        d[ii][jj+1] += 2 * a[ii][0] * ( b[0][jj+1]- c[0]);
                        d[ii+1][jj+1] += 2 * a[ii+1][0] * ( b[0][jj+1]- c[0]);

                        d[ii][jj] += 2 * a[ii][1] * ( b[1][jj]- c[1]);
                        d[ii+1][jj] += 2 * a[ii+1][1] * ( b[1][jj]- c[1]);
                        d[ii][jj+1] += 2 * a[ii][1] * ( b[1][jj+1]- c[1]);
                        d[ii+1][jj+1] += 2 * a[ii+1][1] * ( b[1][jj+1]- c[1]);

                        d[ii][jj] += 2 * a[ii][2] * ( b[2][jj]- c[2]);
                        d[ii+1][jj] += 2 * a[ii+1][2] * ( b[2][jj]- c[2]);
                        d[ii][jj+1] += 2 * a[ii][2] * ( b[2][jj+1]- c[2]);
                        d[ii+1][jj+1] += 2 * a[ii+1][2] * ( b[2][jj+1]- c[2]);

                        d[ii][jj] += 2 * a[ii][3] * ( b[3][jj]- c[3]);
                        d[ii+1][jj] += 2 * a[ii+1][3] * ( b[3][jj]- c[3]);
                        d[ii][jj+1] += 2 * a[ii][3] * ( b[3][jj+1]- c[3]);
                        d[ii+1][jj+1] += 2 * a[ii+1][3] * ( b[3][jj+1]- c[3]);

                        d[ii][jj] += 2 * a[ii][4] * ( b[4][jj]- c[4]);
                        d[ii+1][jj] += 2 * a[ii+1][4] * ( b[4][jj]- c[4]);
                        d[ii][jj+1] += 2 * a[ii][4] * ( b[4][jj+1]- c[4]);
                        d[ii+1][jj+1] += 2 * a[ii+1][4] * ( b[4][jj+1]- c[4]);

                        d[ii][jj] += 2 * a[ii][5] * ( b[5][jj]- c[5]);
                        d[ii+1][jj] += 2 * a[ii+1][5] * ( b[5][jj]- c[5]);
                        d[ii][jj+1] += 2 * a[ii][5] * ( b[5][jj+1]- c[5]);
                        d[ii+1][jj+1] += 2 * a[ii+1][5] * ( b[5][jj+1]- c[5]);

                        d[ii][jj] += 2 * a[ii][6] * ( b[6][jj]- c[6]);
                        d[ii+1][jj] += 2 * a[ii+1][6] * ( b[6][jj]- c[6]);
                        d[ii][jj+1] += 2 * a[ii][6] * ( b[6][jj+1]- c[6]);
                        d[ii+1][jj+1] += 2 * a[ii+1][6] * ( b[6][jj+1]- c[6]);

                        d[ii][jj] += 2 * a[ii][7] * ( b[7][jj]- c[7]);
                        d[ii+1][jj] += 2 * a[ii+1][7] * ( b[7][jj]- c[7]);
                        d[ii][jj+1] += 2 * a[ii][7] * ( b[7][jj+1]- c[7]);
                        d[ii+1][jj+1] += 2 * a[ii+1][7] * ( b[7][jj+1]- c[7]);
                }
            }
        }
    }
    
    for (; i<N; i++){
        for (; j<N; j++){
            d[i][j] = 0.0;
            d[i][j] += 2 * a[i][0] * ( b[0][j]- c[0]);
            d[i][j] += 2 * a[i][1] * ( b[1][j]- c[1]);
            d[i][j] += 2 * a[i][2] * ( b[2][j]- c[2]);
            d[i][j] += 2 * a[i][3] * ( b[3][j]- c[3]);
            d[i][j] += 2 * a[i][4] * ( b[4][j]- c[4]);
            d[i][j] += 2 * a[i][5] * ( b[5][j]- c[5]);
            d[i][j] += 2 * a[i][6] * ( b[6][j]- c[6]);
            d[i][j] += 2 * a[i][7] * ( b[7][j]- c[7]);
        }
    }

    /* for (int i=0; i<N; i+=block_size){ //Blocking + Loop unrolling
        for (int j=0; j<N; j+=block_size){
            
            for (int ii = i; ii < i+block_size; ii+=5){ //El numero de operaciones en cada desenrollo debe ser divisor del block_size
                for (int jj = j; jj < j+block_size; jj+=5){
                    d[ii][jj] = 0.0;
                    d[ii+1][jj] = 0.0;
                    d[ii+2][jj] = 0.0;
                    d[ii+3][jj] = 0.0;
                    d[ii+4][jj] = 0.0;
                    d[ii][jj+1] = 0.0;
                    d[ii][jj+2] = 0.0;
                    d[ii][jj+3] = 0.0;
                    d[ii][jj+4] = 0.0;
                    d[ii+1][jj+1] = 0.0;
                    d[ii+1][jj+2] = 0.0;
                    d[ii+1][jj+3] = 0.0;
                    d[ii+1][jj+4] = 0.0;
                    d[ii+2][jj+1] = 0.0;
                    d[ii+2][jj+2] = 0.0;
                    d[ii+2][jj+3] = 0.0;
                    d[ii+2][jj+4] = 0.0;
                    d[ii+3][jj+1] = 0.0;
                    d[ii+3][jj+2] = 0.0;
                    d[ii+3][jj+3] = 0.0;
                    d[ii+3][jj+4] = 0.0;
                    d[ii+4][jj+1] = 0.0;
                    d[ii+4][jj+2] = 0.0;
                    d[ii+4][jj+3] = 0.0;
                    d[ii+4][jj+4] = 0.0;

                    for (int k = 0; k < K_MAX; k++){
                        d[ii][jj] += 2 * a[ii][k] * ( b[k][jj]- c[k]);
                        d[ii+1][jj] += 2 * a[ii+1][k] * ( b[k][jj]- c[k]);
                        d[ii+2][jj] += 2 * a[ii+2][k] * ( b[k][jj]- c[k]);
                        d[ii][jj+1] += 2 * a[ii][k] * ( b[k][jj+1]- c[k]);
                        d[ii+1][jj+1] += 2 * a[ii+1][k] * ( b[k][jj+1]- c[k]);
                        d[ii+2][jj+1] += 2 * a[ii+2][k] * ( b[k][jj+1]- c[k]);
                        d[ii][jj+2] += 2 * a[ii][k] * ( b[k][jj+2]- c[k]);
                        d[ii+1][jj+2] += 2 * a[ii+1][k] * ( b[k][jj+2]- c[k]);
                        d[ii+2][jj+2] += 2 * a[ii+2][k] * ( b[k][jj+2]- c[k]);
                        d[ii][jj+3] += 2 * a[ii][k] * ( b[k][jj+3]- c[k]);
                        d[ii+1][jj+3] += 2 * a[ii+1][k] * ( b[k][jj+3]- c[k]);
                        d[ii+2][jj+3] += 2 * a[ii+2][k] * ( b[k][jj+3]- c[k]);
                        d[ii+3][jj] += 2 * a[ii+3][k] * ( b[k][jj]- c[k]);
                        d[ii+3][jj+1] += 2 * a[ii+3][k] * ( b[k][jj+1]- c[k]);
                        d[ii+3][jj+2] += 2 * a[ii+3][k] * ( b[k][jj+2]- c[k]);
                        d[ii+3][jj+3] += 2 * a[ii+3][k] * ( b[k][jj+3]- c[k]);
                        d[ii+4][jj] += 2 * a[ii+4][k] * ( b[k][jj]- c[k]);
                        d[ii+4][jj+1] += 2 * a[ii+4][k] * ( b[k][jj+1]- c[k]);
                        d[ii+4][jj+2] += 2 * a[ii+4][k] * ( b[k][jj+2]- c[k]);
                        d[ii+4][jj+3] += 2 * a[ii+4][k] * ( b[k][jj+3]- c[k]);
                        d[ii][jj+4] += 2 * a[ii][k] * ( b[k][jj+4]- c[k]);
                        d[ii+1][jj+4] += 2 * a[ii+1][k] * ( b[k][jj+4]- c[k]);
                        d[ii+2][jj+4] += 2 * a[ii+2][k] * ( b[k][jj+4]- c[k]);
                        d[ii+3][jj+4] += 2 * a[ii+3][k] * ( b[k][jj+4]- c[k]);
                        d[ii+4][jj+4] += 2 * a[ii+4][k] * ( b[k][jj+4]- c[k]);
                    }
                }
            }
        }
    } */

/*     for (int i=0; i<N; i+=2){ 
        for (int j=0; j<N; j+=2){
            d[i][j] = 0.0;
            for (int k = 0; k < K_MAX; k++){
                d[i][j] += 2 * a[i][k] * ( b[k][j]- c[k]);
                d[i+1][j] += 2 * a[i+1][k] * ( b[k][j]- c[k]);
                d[i][j+1] += 2 * a[i][k] * ( b[k][j+1]- c[k]);
                d[i+1][j+1] += 2 * a[i+1][k] * ( b[k][j+1]- c[k]);
            }
        }
    } */


/*     for (int i=0; i<N; i+=5){ 
        for (int j=0; j<N; j+=5){
            d[i][j] = 0.0;
            for (int k = 0; k < K_MAX; k++){
                d[i][j] += 2 * a[i][k] * ( b[k][j]- c[k]);
                d[i+1][j] += 2 * a[i+1][k] * ( b[k][j]- c[k]);
                d[i][j+1] += 2 * a[i][k] * ( b[k][j+1]- c[k]);
                d[i+1][j+1] += 2 * a[i+1][k] * ( b[k][j+1]- c[k]);
                d[i][j+2] += 2 * a[i][k] * ( b[k][j+2]- c[k]);
                d[i+1][j+2] += 2 * a[i+1][k] * ( b[k][j+2]- c[k]);
                d[i][j+3] += 2 * a[i][k] * ( b[k][j+3]- c[k]);
                d[i+1][j+3] += 2 * a[i+1][k] * ( b[k][j+3]- c[k]);
                d[i+2][j] += 2 * a[i+2][k] * ( b[k][j]- c[k]);
                d[i+2][j+1] += 2 * a[i+2][k] * ( b[k][j+1]- c[k]);
                d[i+2][j+2] += 2 * a[i+2][k] * ( b[k][j+2]- c[k]);
                d[i+2][j+3] += 2 * a[i+2][k] * ( b[k][j+3]- c[k]);
                d[i+3][j] += 2 * a[i+3][k] * ( b[k][j]- c[k]);
                d[i+3][j+1] += 2 * a[i+3][k] * ( b[k][j+1]- c[k]);
                d[i+3][j+2] += 2 * a[i+3][k] * ( b[k][j+2]- c[k]);
                d[i+3][j+3] += 2 * a[i+3][k] * ( b[k][j+3]- c[k]);
                d[i+4][j] += 2 * a[i+4][k] * ( b[k][j]- c[k]);
                d[i+4][j+1] += 2 * a[i+4][k] * ( b[k][j+1]- c[k]);
                d[i+4][j+2] += 2 * a[i+4][k] * ( b[k][j+2]- c[k]);
                d[i+4][j+3] += 2 * a[i+4][k] * ( b[k][j+3]- c[k]);
                d[i][j+4] += 2 * a[i][k] * ( b[k][j+4]- c[k]);
                d[i+1][j+4] += 2 * a[i+1][k] * ( b[k][j+4]- c[k]);
                d[i+2][j+4] += 2 * a[i+2][k] * ( b[k][j+4]- c[k]);
                d[i+3][j+4] += 2 * a[i+3][k] * ( b[k][j+4]- c[k]);
                d[i+4][j+4] += 2 * a[i+4][k] * ( b[k][j+4]- c[k]);
            }
        }
    } */
    f = 0.0;
    for (int i=0; i<N; i++){
        f+= d[ind[i]][ind[i]]/2;
    }    

    /*Fin codigo a medir*/
    ck=get_counter();
    printf ("f=%lf\n", f);

    fprintf (fp, "%lf, ", ck);
    fclose (fp);
    printf("\n Clocks=%1.10lf \n",ck);
    

    for (int x=0; x<N; x++){
        free (a[x]);
        free (d[x]);
    }
    for (int x= 0; x<K_MAX; x++){
        free (b[x]);
    }

    free (a);
    free (b);
    free (d);
    free (c);

    free (ind);

    /* Esta rutina imprime a frecuencia de reloxo estimada coas rutinas start_counter/get_counter */
    mhz(1,1);

    return 0;
}

double _random (double min, double max){ //Funcion que genera un double aleatorio cuyo valor absoluto esta entre min y max
    double r = min + ((double)rand()/((double)RAND_MAX /(max - min)));
    while (abs(r) < 1 || abs (r) >= 2)
        r = min + ((double)rand()/((double)RAND_MAX /(max - min)));
    return r;
}

I am having some troubles when implementing some matrix operations with mallocs and frees.

Var declaration:

double **a, **b, *c; //in
double **d; //out

A must be a NxK_MAX matrix, B K_MAXxN, c a vector of K_MAX lenght and d a NxN matrix

Malloc:

    N = atoi (argv[1]);
    a = (double **) malloc (N*sizeof (double *));
    b = (double **) malloc (K_MAX*sizeof (double *));
    d = (double **) malloc (N*sizeof (double *));
    c = (double *) malloc (K_MAX * sizeof (double));
    ind = (int *) malloc (N * sizeof (int));
    for (int i=0; i<N; i++){
        a[i] = (double *) malloc (K_MAX*sizeof (double));
        d[i] = (double *) malloc (N*sizeof (double));
    }
    for (int i = 0; i < K_MAX; i++){
        b[i] = (double *) malloc (N * sizeof (double));
    }

Frees:

    for (int x=0; x<N; x++){
        free (a[x]);
        free (d[x]);
    }
    for (int x= 0; x<K_MAX; x++){
        free (b[x]);
    }

    free (a);
    free (b);
    free (d);
    free (ind);

Also, I don't understand why I am always getting the right results. Probably, its a silly thing but I don't see it.

Full code:

/*
MEJORAS IMPLEMENTADAS:
    -Blocking -> 5
    -Loop unrolling -> 2
    -Reordenación de procedimientos 
*/

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <pmmintrin.h>

#define K_MAX 8
#define CLS 64
#define block_size CLS / sizeof (double)

double _random (double min, double max);

void start_counter();
double get_counter();
double mhz();


/* Initialize the cycle counter */


 static unsigned cyc_hi = 0;
 static unsigned cyc_lo = 0;


 /* Set *hi and *lo to the high and low order bits of the cycle counter.
 Implementation requires assembly code to use the rdtsc instruction. */
 void access_counter(unsigned *hi, unsigned *lo)
 {
 asm("rdtsc; movl %%edx,%0; movl %%eax,%1" /* Read cycle counter */
 : "=r" (*hi), "=r" (*lo) /* and move results to */
 : /* No input */ /* the two outputs */
 : "%edx", "%eax");
 }

 /* Record the current value of the cycle counter. */
 void start_counter()
 {
 access_counter(&cyc_hi, &cyc_lo);
 }

 /* Return the number of cycles since the last call to start_counter. */
 double get_counter()
 {
 unsigned ncyc_hi, ncyc_lo;
 unsigned hi, lo, borrow;
 double result;

 /* Get cycle counter */
 access_counter(&ncyc_hi, &ncyc_lo);

 /* Do double precision subtraction */
 lo = ncyc_lo - cyc_lo; 
 borrow = lo > ncyc_lo;
 hi = ncyc_hi - cyc_hi - borrow;
 result = (double) hi * (1 << 30) * 4 + lo;
 if (result < 0) {
 fprintf(stderr, "Error: counter returns neg value: %.0f\n", result);
 }
 return result;
 }

double mhz(int verbose, int sleeptime)
 {
 double rate;

 start_counter();
 sleep(sleeptime);
 rate = get_counter() / (1e6*sleeptime);
 if (verbose)
 printf("\n Processor clock rate = %.1f MHz\n", rate);
 return rate;
 }




int main(int argc, char *argv[]){
    double ck;

    int N, *ind;
    double **a, **b, *c; //Valores de entrada
    double **d, f; //Variables de salida
    FILE *fp;

    if (argc < 2){
        printf ("Faltan argumentos\n");
        exit (1);
    }

    if (!(fp = fopen ("resultados.txt", "a"))){
        printf ("No se pudo abrir archivo");
        exit(1);
    }

    srand (1);

    N = atoi (argv[1]);
    a = (double **) malloc (N*sizeof (double *));
    b = (double **) malloc (K_MAX*sizeof (double *));
    d = (double **) malloc (N*sizeof (double *));
    c = (double *) malloc (K_MAX * sizeof (double));
    ind = (int *) malloc (N * sizeof (int));
    for (int i=0; i<N; i++){
        a[i] = (double *) malloc (K_MAX*sizeof (double));
        d[i] = (double *) malloc (N*sizeof (double));
    }
    for (int i = 0; i < K_MAX; i++){
        b[i] = (double *) malloc (N * sizeof (double));
    }

    for (int i=0; i<N; i++){
        for (int j=0; j<K_MAX; j++){
            a[i][j] = _random (-2.0, 2.0);
        }
    }
    for (int i=0; i<K_MAX; i++){
        for (int j=0; j<N; j++){
            b[i][j] = _random (-2.0, 2.0);

        }
        c [i] = _random (-2.0, 2.0);
    }


    start_counter();

    /* Poñer aquí o código a medir */
     
/*     for (int i=0; i<N; i+=block_size){ //Blocking
        for (int j=0; j<N; j+=block_size){
            for (int ii = i; ii < i+block_size; ii++){
                for (int jj = j; jj < j+block_size; jj++){
                    d[ii][jj] = 0.0;
                    for (int k = 0; k < K_MAX; k++){
                        d[ii][jj] += 2 * a[ii][k] * ( b[k][jj]- c[k]);
                        
                    }
                }
            }
        }
    } */


    int i = 0;
    int j = 0;

    for ( ; i<N; i+=block_size){ //Blocking + Loop unrolling
        for ( ; j<N; j+=block_size){
            
            for (int ii = i; ii < i+block_size; ii+=2){ //El numero de operaciones en cada desenrollo debe ser divisor del block_size
                for (int jj = j; jj < j+block_size; jj+=2){
                    d[ii][jj] = 0.0;
                    d[ii+1][jj] = 0.0;
                    d[ii][jj+1] = 0.0;
                    d[ii+1][jj+1] = 0.0;

                    //Unrolling
                        d[ii][jj] += 2 * a[ii][0] * ( b[0][jj]- c[0]);
                        d[ii+1][jj] += 2 * a[ii+1][0] * ( b[0][jj]- c[0]);
                        d[ii][jj+1] += 2 * a[ii][0] * ( b[0][jj+1]- c[0]);
                        d[ii+1][jj+1] += 2 * a[ii+1][0] * ( b[0][jj+1]- c[0]);

                        d[ii][jj] += 2 * a[ii][1] * ( b[1][jj]- c[1]);
                        d[ii+1][jj] += 2 * a[ii+1][1] * ( b[1][jj]- c[1]);
                        d[ii][jj+1] += 2 * a[ii][1] * ( b[1][jj+1]- c[1]);
                        d[ii+1][jj+1] += 2 * a[ii+1][1] * ( b[1][jj+1]- c[1]);

                        d[ii][jj] += 2 * a[ii][2] * ( b[2][jj]- c[2]);
                        d[ii+1][jj] += 2 * a[ii+1][2] * ( b[2][jj]- c[2]);
                        d[ii][jj+1] += 2 * a[ii][2] * ( b[2][jj+1]- c[2]);
                        d[ii+1][jj+1] += 2 * a[ii+1][2] * ( b[2][jj+1]- c[2]);

                        d[ii][jj] += 2 * a[ii][3] * ( b[3][jj]- c[3]);
                        d[ii+1][jj] += 2 * a[ii+1][3] * ( b[3][jj]- c[3]);
                        d[ii][jj+1] += 2 * a[ii][3] * ( b[3][jj+1]- c[3]);
                        d[ii+1][jj+1] += 2 * a[ii+1][3] * ( b[3][jj+1]- c[3]);

                        d[ii][jj] += 2 * a[ii][4] * ( b[4][jj]- c[4]);
                        d[ii+1][jj] += 2 * a[ii+1][4] * ( b[4][jj]- c[4]);
                        d[ii][jj+1] += 2 * a[ii][4] * ( b[4][jj+1]- c[4]);
                        d[ii+1][jj+1] += 2 * a[ii+1][4] * ( b[4][jj+1]- c[4]);

                        d[ii][jj] += 2 * a[ii][5] * ( b[5][jj]- c[5]);
                        d[ii+1][jj] += 2 * a[ii+1][5] * ( b[5][jj]- c[5]);
                        d[ii][jj+1] += 2 * a[ii][5] * ( b[5][jj+1]- c[5]);
                        d[ii+1][jj+1] += 2 * a[ii+1][5] * ( b[5][jj+1]- c[5]);

                        d[ii][jj] += 2 * a[ii][6] * ( b[6][jj]- c[6]);
                        d[ii+1][jj] += 2 * a[ii+1][6] * ( b[6][jj]- c[6]);
                        d[ii][jj+1] += 2 * a[ii][6] * ( b[6][jj+1]- c[6]);
                        d[ii+1][jj+1] += 2 * a[ii+1][6] * ( b[6][jj+1]- c[6]);

                        d[ii][jj] += 2 * a[ii][7] * ( b[7][jj]- c[7]);
                        d[ii+1][jj] += 2 * a[ii+1][7] * ( b[7][jj]- c[7]);
                        d[ii][jj+1] += 2 * a[ii][7] * ( b[7][jj+1]- c[7]);
                        d[ii+1][jj+1] += 2 * a[ii+1][7] * ( b[7][jj+1]- c[7]);
                }
            }
        }
    }
    
    for (; i<N; i++){
        for (; j<N; j++){
            d[i][j] = 0.0;
            d[i][j] += 2 * a[i][0] * ( b[0][j]- c[0]);
            d[i][j] += 2 * a[i][1] * ( b[1][j]- c[1]);
            d[i][j] += 2 * a[i][2] * ( b[2][j]- c[2]);
            d[i][j] += 2 * a[i][3] * ( b[3][j]- c[3]);
            d[i][j] += 2 * a[i][4] * ( b[4][j]- c[4]);
            d[i][j] += 2 * a[i][5] * ( b[5][j]- c[5]);
            d[i][j] += 2 * a[i][6] * ( b[6][j]- c[6]);
            d[i][j] += 2 * a[i][7] * ( b[7][j]- c[7]);
        }
    }

    /* for (int i=0; i<N; i+=block_size){ //Blocking + Loop unrolling
        for (int j=0; j<N; j+=block_size){
            
            for (int ii = i; ii < i+block_size; ii+=5){ //El numero de operaciones en cada desenrollo debe ser divisor del block_size
                for (int jj = j; jj < j+block_size; jj+=5){
                    d[ii][jj] = 0.0;
                    d[ii+1][jj] = 0.0;
                    d[ii+2][jj] = 0.0;
                    d[ii+3][jj] = 0.0;
                    d[ii+4][jj] = 0.0;
                    d[ii][jj+1] = 0.0;
                    d[ii][jj+2] = 0.0;
                    d[ii][jj+3] = 0.0;
                    d[ii][jj+4] = 0.0;
                    d[ii+1][jj+1] = 0.0;
                    d[ii+1][jj+2] = 0.0;
                    d[ii+1][jj+3] = 0.0;
                    d[ii+1][jj+4] = 0.0;
                    d[ii+2][jj+1] = 0.0;
                    d[ii+2][jj+2] = 0.0;
                    d[ii+2][jj+3] = 0.0;
                    d[ii+2][jj+4] = 0.0;
                    d[ii+3][jj+1] = 0.0;
                    d[ii+3][jj+2] = 0.0;
                    d[ii+3][jj+3] = 0.0;
                    d[ii+3][jj+4] = 0.0;
                    d[ii+4][jj+1] = 0.0;
                    d[ii+4][jj+2] = 0.0;
                    d[ii+4][jj+3] = 0.0;
                    d[ii+4][jj+4] = 0.0;

                    for (int k = 0; k < K_MAX; k++){
                        d[ii][jj] += 2 * a[ii][k] * ( b[k][jj]- c[k]);
                        d[ii+1][jj] += 2 * a[ii+1][k] * ( b[k][jj]- c[k]);
                        d[ii+2][jj] += 2 * a[ii+2][k] * ( b[k][jj]- c[k]);
                        d[ii][jj+1] += 2 * a[ii][k] * ( b[k][jj+1]- c[k]);
                        d[ii+1][jj+1] += 2 * a[ii+1][k] * ( b[k][jj+1]- c[k]);
                        d[ii+2][jj+1] += 2 * a[ii+2][k] * ( b[k][jj+1]- c[k]);
                        d[ii][jj+2] += 2 * a[ii][k] * ( b[k][jj+2]- c[k]);
                        d[ii+1][jj+2] += 2 * a[ii+1][k] * ( b[k][jj+2]- c[k]);
                        d[ii+2][jj+2] += 2 * a[ii+2][k] * ( b[k][jj+2]- c[k]);
                        d[ii][jj+3] += 2 * a[ii][k] * ( b[k][jj+3]- c[k]);
                        d[ii+1][jj+3] += 2 * a[ii+1][k] * ( b[k][jj+3]- c[k]);
                        d[ii+2][jj+3] += 2 * a[ii+2][k] * ( b[k][jj+3]- c[k]);
                        d[ii+3][jj] += 2 * a[ii+3][k] * ( b[k][jj]- c[k]);
                        d[ii+3][jj+1] += 2 * a[ii+3][k] * ( b[k][jj+1]- c[k]);
                        d[ii+3][jj+2] += 2 * a[ii+3][k] * ( b[k][jj+2]- c[k]);
                        d[ii+3][jj+3] += 2 * a[ii+3][k] * ( b[k][jj+3]- c[k]);
                        d[ii+4][jj] += 2 * a[ii+4][k] * ( b[k][jj]- c[k]);
                        d[ii+4][jj+1] += 2 * a[ii+4][k] * ( b[k][jj+1]- c[k]);
                        d[ii+4][jj+2] += 2 * a[ii+4][k] * ( b[k][jj+2]- c[k]);
                        d[ii+4][jj+3] += 2 * a[ii+4][k] * ( b[k][jj+3]- c[k]);
                        d[ii][jj+4] += 2 * a[ii][k] * ( b[k][jj+4]- c[k]);
                        d[ii+1][jj+4] += 2 * a[ii+1][k] * ( b[k][jj+4]- c[k]);
                        d[ii+2][jj+4] += 2 * a[ii+2][k] * ( b[k][jj+4]- c[k]);
                        d[ii+3][jj+4] += 2 * a[ii+3][k] * ( b[k][jj+4]- c[k]);
                        d[ii+4][jj+4] += 2 * a[ii+4][k] * ( b[k][jj+4]- c[k]);
                    }
                }
            }
        }
    } */

/*     for (int i=0; i<N; i+=2){ 
        for (int j=0; j<N; j+=2){
            d[i][j] = 0.0;
            for (int k = 0; k < K_MAX; k++){
                d[i][j] += 2 * a[i][k] * ( b[k][j]- c[k]);
                d[i+1][j] += 2 * a[i+1][k] * ( b[k][j]- c[k]);
                d[i][j+1] += 2 * a[i][k] * ( b[k][j+1]- c[k]);
                d[i+1][j+1] += 2 * a[i+1][k] * ( b[k][j+1]- c[k]);
            }
        }
    } */


/*     for (int i=0; i<N; i+=5){ 
        for (int j=0; j<N; j+=5){
            d[i][j] = 0.0;
            for (int k = 0; k < K_MAX; k++){
                d[i][j] += 2 * a[i][k] * ( b[k][j]- c[k]);
                d[i+1][j] += 2 * a[i+1][k] * ( b[k][j]- c[k]);
                d[i][j+1] += 2 * a[i][k] * ( b[k][j+1]- c[k]);
                d[i+1][j+1] += 2 * a[i+1][k] * ( b[k][j+1]- c[k]);
                d[i][j+2] += 2 * a[i][k] * ( b[k][j+2]- c[k]);
                d[i+1][j+2] += 2 * a[i+1][k] * ( b[k][j+2]- c[k]);
                d[i][j+3] += 2 * a[i][k] * ( b[k][j+3]- c[k]);
                d[i+1][j+3] += 2 * a[i+1][k] * ( b[k][j+3]- c[k]);
                d[i+2][j] += 2 * a[i+2][k] * ( b[k][j]- c[k]);
                d[i+2][j+1] += 2 * a[i+2][k] * ( b[k][j+1]- c[k]);
                d[i+2][j+2] += 2 * a[i+2][k] * ( b[k][j+2]- c[k]);
                d[i+2][j+3] += 2 * a[i+2][k] * ( b[k][j+3]- c[k]);
                d[i+3][j] += 2 * a[i+3][k] * ( b[k][j]- c[k]);
                d[i+3][j+1] += 2 * a[i+3][k] * ( b[k][j+1]- c[k]);
                d[i+3][j+2] += 2 * a[i+3][k] * ( b[k][j+2]- c[k]);
                d[i+3][j+3] += 2 * a[i+3][k] * ( b[k][j+3]- c[k]);
                d[i+4][j] += 2 * a[i+4][k] * ( b[k][j]- c[k]);
                d[i+4][j+1] += 2 * a[i+4][k] * ( b[k][j+1]- c[k]);
                d[i+4][j+2] += 2 * a[i+4][k] * ( b[k][j+2]- c[k]);
                d[i+4][j+3] += 2 * a[i+4][k] * ( b[k][j+3]- c[k]);
                d[i][j+4] += 2 * a[i][k] * ( b[k][j+4]- c[k]);
                d[i+1][j+4] += 2 * a[i+1][k] * ( b[k][j+4]- c[k]);
                d[i+2][j+4] += 2 * a[i+2][k] * ( b[k][j+4]- c[k]);
                d[i+3][j+4] += 2 * a[i+3][k] * ( b[k][j+4]- c[k]);
                d[i+4][j+4] += 2 * a[i+4][k] * ( b[k][j+4]- c[k]);
            }
        }
    } */
    f = 0.0;
    for (int i=0; i<N; i++){
        f+= d[ind[i]][ind[i]]/2;
    }    

    /*Fin codigo a medir*/
    ck=get_counter();
    printf ("f=%lf\n", f);

    fprintf (fp, "%lf, ", ck);
    fclose (fp);
    printf("\n Clocks=%1.10lf \n",ck);
    

    for (int x=0; x<N; x++){
        free (a[x]);
        free (d[x]);
    }
    for (int x= 0; x<K_MAX; x++){
        free (b[x]);
    }

    free (a);
    free (b);
    free (d);
    free (c);

    free (ind);

    /* Esta rutina imprime a frecuencia de reloxo estimada coas rutinas start_counter/get_counter */
    mhz(1,1);

    return 0;
}

double _random (double min, double max){ //Funcion que genera un double aleatorio cuyo valor absoluto esta entre min y max
    double r = min + ((double)rand()/((double)RAND_MAX /(max - min)));
    while (abs(r) < 1 || abs (r) >= 2)
        r = min + ((double)rand()/((double)RAND_MAX /(max - min)));
    return r;
}

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(1

娇纵 2025-02-01 20:58:40

valgrind显示您的代码在整个地方都在踩在

==1520==
==1520== Invalid write of size 8
==1520==    at 0x1097EB: main (sh.cpp:158)
==1520==  Address 0x4a4a5c0 is 0 bytes after a block of size 80 alloc'd
==1520==    at 0x483B7F3: malloc (in /usr/lib/x86_64-linux-gnu/valgrind/vgpreload_memcheck-amd64-linux.so)
==1520==    by 0x1095E1: main (sh.cpp:111)
==1520==
==1520== Invalid write of size 8
==1520==    at 0x10981B: main (sh.cpp:159)
==1520==  Address 0x4a4a6d0 is 0 bytes after a block of size 80 alloc'd
==1520==    at 0x483B7F3: malloc (in /usr/lib/x86_64-linux-gnu/valgrind/vgpreload_memcheck-amd64-linux.so)
==1520==    by 0x1095E1: main (sh.cpp:111)
==1520==
==1520== Invalid write of size 8
==1520==    at 0x10984B: main (sh.cpp:160)
==1520==  Address 0x4a4a5c8 is 8 bytes after a block of size 80 alloc'd
==1520==    at 0x483B7F3: malloc (in /usr/lib/x86_64-linux-gnu/valgrind/vgpreload_memcheck-amd64-linux.so)
==1520==    by 0x1095E1: main (sh.cpp:111)

许多地方,我建议您在Valgrind下运行它

valgrind shows your code is stomping all over the place

==1520==
==1520== Invalid write of size 8
==1520==    at 0x1097EB: main (sh.cpp:158)
==1520==  Address 0x4a4a5c0 is 0 bytes after a block of size 80 alloc'd
==1520==    at 0x483B7F3: malloc (in /usr/lib/x86_64-linux-gnu/valgrind/vgpreload_memcheck-amd64-linux.so)
==1520==    by 0x1095E1: main (sh.cpp:111)
==1520==
==1520== Invalid write of size 8
==1520==    at 0x10981B: main (sh.cpp:159)
==1520==  Address 0x4a4a6d0 is 0 bytes after a block of size 80 alloc'd
==1520==    at 0x483B7F3: malloc (in /usr/lib/x86_64-linux-gnu/valgrind/vgpreload_memcheck-amd64-linux.so)
==1520==    by 0x1095E1: main (sh.cpp:111)
==1520==
==1520== Invalid write of size 8
==1520==    at 0x10984B: main (sh.cpp:160)
==1520==  Address 0x4a4a5c8 is 8 bytes after a block of size 80 alloc'd
==1520==    at 0x483B7F3: malloc (in /usr/lib/x86_64-linux-gnu/valgrind/vgpreload_memcheck-amd64-linux.so)
==1520==    by 0x1095E1: main (sh.cpp:111)

the first few of many, I suggest you run it under valgrind yourself

~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文