By using this site, you agree to our updated Privacy Policy and our Terms of Use. Manage your Cookies Settings.
435,255 Members | 2,640 Online
Bytes IT Community
+ Ask a Question
Need help? Post your question and get tips & solutions from a community of 435,255 IT Pros & Developers. It's quick & easy.

Matrix Multiplication using multi-threads gets segmentation fault

P: 3
Hello, I'm trying to write a program that calculates up to a 1024 x 1024 matrix using multi-threading. For example, I need to run a 1024 x 1024 using 256, 64, 16 or 4 threads. Or I need to run a 64 x 64 matrix using 16 or 4 threads. All the Matrices are square. I thought I coded my program correctly, however I get a segmentation fault when I use a 720 x 720 matrix or higher, heres the code.
Expand|Select|Wrap|Line Numbers
  1. #include <iostream>
  2. #include <stdio.h>
  3. #include <pthread.h>
  4.  
  5. using namespace std;
  6.  
  7.  
  8. const int   DIM = 720; //works up to 719, crashes at 720
  9. const int   num_of_thr = 4;
  10. int         matrix_A[DIM][DIM];
  11. int         matrix_B[DIM][DIM];
  12. int         c[DIM][DIM];
  13.  
  14. struct v
  15. {
  16.     int i;
  17.     int j;
  18. };
  19.  
  20. //worker thread
  21. void* matrix_multi(void* data)
  22. {    
  23.     for(int i = 0; i < DIM; i++)
  24.     {
  25.         for(int j = 0; j < DIM; j++)
  26.         {
  27.             c[i][j] = 0;
  28.             for(int k = 0; k < DIM; k++)
  29.             {
  30.                 c[i][j] += matrix_A[i][k] * matrix_B[k][j];
  31.             }
  32.         }
  33.     }
  34.     pthread_exit(0);
  35. }
  36.  
  37. int main()
  38. {
  39.  
  40.     pthread_t thr_id[DIM][DIM];
  41.     pthread_attr_t thr_attr;
  42.     pthread_attr_init(&thr_attr);
  43.  
  44.  
  45.  
  46.    //Filling the Matrices
  47.     for(int i = 0; i < DIM; i++)
  48.     {
  49.         for(int j = 0; j < DIM; j++)
  50.         {
  51.             matrix_A[i][j]= i + j;
  52.             matrix_B[i][j] = i + 3;
  53.         }
  54.     }
  55.  
  56.  
  57.     //create the threads
  58.     for(int i = 0; i < num_of_thr/2; i++)
  59.     {
  60.         for(int j = 0; j < num_of_thr/2; j++)
  61.         {
  62.             struct v *data = (struct v *) malloc(sizeof(struct v));
  63.             data->i = i;
  64.             data->j = j;
  65.             pthread_create(&thr_id[i][j],NULL,matrix_multi,  &data);
  66.         }
  67.     }
  68.  
  69.     //joining the threads
  70.     for(int i = 0; i < num_of_thr/2; i++)
  71.     {
  72.         for(int j = 0; j < num_of_thr/2; j++)
  73.         {
  74.         pthread_join(thr_id[i][j],NULL);
  75.         }
  76.     }
  77.  
  78.      return 0;
  79. }
  80.  
  81.  
  82.  
Any help would be appreciated, thanks in advance.
Sep 15 '10 #1

✓ answered by newb16

Seems that your compiler doesn't like huge variables in data segment (matrix_a) and/or stack (thr_id). Allocate them on the heap. (btw - why do you need thr_id be of size [DIM][DIM] when you only fill it to num_of_thr/2 in each dimension?

Share this Question
Share on Google+
4 Replies


ashitpro
Expert 100+
P: 542
I just ran this code on my CentOS machine (g++ 4.1.2).
It worked pretty well, no seg fault. Even if I increase the number of threads and dimensions.
Sep 15 '10 #2

100+
P: 687
Seems that your compiler doesn't like huge variables in data segment (matrix_a) and/or stack (thr_id). Allocate them on the heap. (btw - why do you need thr_id be of size [DIM][DIM] when you only fill it to num_of_thr/2 in each dimension?
Sep 15 '10 #3

P: 3
Thanks for the quick help. I changed my code quite a bit since yesterday. However I now get a "invalid conversion from `void*' to `__pthread_t**" on line 63 of my code. Here is the updated code:
Expand|Select|Wrap|Line Numbers
  1.  
  2. #include <pthread.h>
  3. #include <stdlib.h>
  4. #include <stdio.h>
  5.  
  6. #define SIZE 10            /* Size of matrices */
  7. int N;                /* number of threads */
  8.  
  9. int A[SIZE][SIZE], B[SIZE][SIZE], C[SIZE][SIZE];
  10.  
  11. void fill_matrix(int m[SIZE][SIZE])
  12. {
  13.   int i, j, n = 0;
  14.   for (i=0; i<SIZE; i++)
  15.     for (j=0; j<SIZE; j++)
  16.       m[i][j] = n++;
  17. }
  18.  
  19. void print_matrix(int m[SIZE][SIZE])
  20. {
  21.   int i, j = 0;
  22.   for (i=0; i<SIZE; i++) {
  23.     printf("\n\t| ");
  24.     for (j=0; j<SIZE; j++)
  25.       printf("%2d ", m[i][j]);
  26.     printf("|");
  27.   }
  28. }
  29.  
  30.  
  31. void* mmult (void* slice)
  32. {
  33.   int s = (int)slice;
  34.   int from = (s * SIZE)/N;    /* note that this 'slicing' works fine */
  35.   int to = ((s+1) * SIZE)/N;    /* even if SIZE is not divisible by N */
  36.   int i,j,k;
  37.  
  38.   printf("computing slice %d (from row %d to %d)\n", s, from, to-1);
  39.   for (i=from; i<to; i++)
  40.     for (j=0; j<SIZE; j++) {
  41.       C[i][j]=0;
  42.       for (k=0; k<SIZE; k++)
  43.     C[i][j] += A[i][k]*B[k][j];
  44.     }
  45.  
  46.   printf("finished slice %d\n", s);
  47.   return 0;
  48. }
  49.  
  50. int main(int argc, char *argv[])
  51. {
  52.   pthread_t *thread;
  53.   int i;
  54.  
  55.   if (argc!=2) {
  56.     printf("Usage: %s number_of_threads\n",argv[0]);
  57.     exit(-1);
  58.   }
  59.  
  60.   N=atoi(argv[1]);
  61.   fill_matrix(A);
  62.   fill_matrix(B);
  63.   thread = malloc(N*sizeof(pthread_t));
  64.  
  65.   for (i=1; i<N; i++) {
  66.     if (pthread_create (&thread[i], NULL, mmult, (void*)i) != 0 ) {
  67.       perror("Can't create thread");
  68.       exit(-1);
  69.     }
  70.   }
  71.  
  72.   /* master thread is thread 0 so: */
  73.   mmult(0);
  74.  
  75.   for (i=1; i<N; i++) pthread_join (thread[i], NULL);
  76.  
  77.   printf("\n\n");
  78.   print_matrix(A);
  79.   printf("\n\n\t       * \n");
  80.   print_matrix(B);
  81.   printf("\n\n\t       = \n");
  82.   print_matrix(C);
  83.   printf("\n\n");
  84.  
  85.   return 0;
  86.  
  87. }
  88.  
Any help would be appreciated, thanks in advance
Sep 16 '10 #4

ashitpro
Expert 100+
P: 542
Explicit type casting should work..

Expand|Select|Wrap|Line Numbers
  1. thread = (pthread_t *)malloc(N*sizeof(pthread_t));
  2.  
Sep 16 '10 #5

Post your reply

Sign in to post your reply or Sign up for a free account.