By using this site, you agree to our updated Privacy Policy and our Terms of Use. Manage your Cookies Settings.
429,426 Members | 1,729 Online
Bytes IT Community
+ Ask a Question
Need help? Post your question and get tips & solutions from a community of 429,426 IT Pros & Developers. It's quick & easy.

MPI Matrix Mult - Strange error

P: 2
Im trying to write a MPI C program for a debian cluster that multiplies matricies. Ive written almost all of it, but cant seem to shake a strange problem. Here is the code:

Expand|Select|Wrap|Line Numbers
  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. #include <mpi.h>
  4.  
  5. #define MASTER 0               // Make code more readable with names
  6. #define OTW 1          //"..."
  7. #define ITM 2          //"..."
  8.  
  9.  
  10. int size;
  11.  
  12. int main(int argc, char *argv[])
  13. {
  14.     int x, y, a, b, i, j, trash, reps, lowRows, extraRows, slaves, point, rows_to_send, rows_to_get, destination, source;
  15.     size = atoi(argv[1]);
  16.  
  17.   int length, rank, nodes;
  18.  
  19.  
  20.  
  21.  
  22.    x = MPI_Init(&argc,&argv);
  23.    MPI_Comm_size(MPI_COMM_WORLD,&nodes);
  24.    MPI_Comm_rank(MPI_COMM_WORLD,&rank);
  25.  
  26.  
  27.  
  28.   slaves = nodes - 1;
  29.   printf("nodes = %d slaves = %d", nodes, slaves);
  30.   MPI_Status status;
  31.  
  32.  
  33.  
  34.   ///////////////// CREATE THE MATRICIES DYNAMICALLY ///////////////////////////
  35.  
  36.  
  37.     int **outMatrix = (int **)malloc(size * sizeof(int *));
  38.     if(outMatrix == NULL)
  39.     {
  40.                   printf("Error! memory unavailable and not allocated!");
  41.                   exit(1);
  42.     }
  43.  
  44.     for(a = 0; a < size; a++)
  45.         outMatrix[a] = (int *)malloc(size * sizeof(int));
  46.  
  47.  
  48.     int **matrix_1= malloc(size * sizeof(int *));
  49.     if(matrix_1 == NULL)
  50.     {
  51.                   printf("Error! memory unavailable and not allocated!");
  52.                   exit(1);
  53.     }
  54.     for(a = 0; a < size; a++)
  55.         matrix_1[a] = malloc(size * sizeof(int));
  56.  
  57.  
  58.  
  59.     int **matrix_2 = malloc(size * sizeof(int *));
  60.     if(matrix_2 == NULL)
  61.     {
  62.                   printf("Error! memory unavailable and not allocated!");
  63.                   exit(1);
  64.     }
  65.     for(a = 0; a < size; a++)
  66.         matrix_2[a] = malloc(size * sizeof(int));
  67.  
  68.  
  69.  
  70.  
  71.  
  72. //////////////////// FILL THE MATRICIES WITH NUMBERS ///////////////////////////
  73.  
  74.  
  75.  
  76.   if(rank == MASTER)
  77.   {
  78.      insertToAll(matrix_1, 1);
  79.      insertToAll(matrix_2, 1);
  80.     insertToAll(outMatrix, 0);
  81.     //print(matrix_1);
  82.     //print(matrix_2);
  83.     //print(outMatrix);
  84.  
  85. //////////////////////// DISPATCH THE PIECES ///////////////////////////////////
  86.  
  87.        extraRows = size % slaves;                       // Find how many will be left over
  88.        lowRows = (size - extraRows) / slaves;           // Find the minimum amount all workers will get
  89.        point = 0;                                       // Initialize where rows begin their dispatch
  90.  
  91.  
  92.       printf("Number of worker tasks = %d\n",slaves);
  93.       printf("My rank better be 0 and it is %d \n", rank);
  94.  
  95.       for (destination = 1; destination <= slaves; destination++)
  96.       {
  97.               if(destination <= extraRows)
  98.               {
  99.                   rows_to_send = lowRows + 1;
  100.               }
  101.               else
  102.               {
  103.                   rows_to_send = lowRows;
  104.               }
  105.          printf("sending %d rows to %d \n", rows_to_send, destination);
  106.  
  107.         int returnedValue;
  108.         int test = 10;
  109.         MPI_Send(&test,         1,             MPI_INT,     destination,     OTW,     MPI_COMM_WORLD);
  110.         printf("sent the test, awaiting reply!");
  111.         MPI_Recv(&returnedValue,     1,             MPI_INT,     destination,     ITM,     MPI_COMM_WORLD, &status);
  112.         printf("The number sent back is %d",returnedValue);
  113.  
  114.  
  115.  
  116.  
  117.               MPI_Send(&point,         1,             MPI_INT,     destination,     OTW,     MPI_COMM_WORLD);
  118.  
  119.     printf("sending the number of rows that will be sent.\n");
  120.  
  121.               MPI_Send(&rows_to_send,     1,             MPI_INT,     destination,     OTW,     MPI_COMM_WORLD);
  122.  
  123.     printf("sending the series of rows that will be multiplied.\n");
  124.     printf(" point = %d \n rows_to_send = %d \n size = %d",point, rows_to_send, size);
  125.  
  126.               MPI_Send(&matrix_1[point][0], rows_to_send*size, MPI_INT,     destination,     OTW,     MPI_COMM_WORLD);
  127.  
  128.     printf("sending the entire other matrix.\n");
  129.  
  130.               MPI_Send(&matrix_2,         size*size,         MPI_INT,     destination,     OTW,     MPI_COMM_WORLD);
  131.     printf("All things sent.");
  132.  
  133.           point = point + rows_to_send;
  134.       }
  135.  
  136.   ////////////////////// RETRIEVE THE PIECES ///////////////////////////////////
  137.  
  138.       for (i=1; i<=slaves; i++)
  139.       {
  140.               source = i;
  141.               MPI_Recv(&point,             1,             MPI_INT, source, ITM, MPI_COMM_WORLD, &status);
  142.               MPI_Recv(&rows_to_get,         1,             MPI_INT, source, ITM, MPI_COMM_WORLD, &status);
  143.               MPI_Recv(&outMatrix[point][0],     rows_to_get*size,     MPI_INT, source, ITM, MPI_COMM_WORLD, &status);
  144.       }
  145.     } // Close the Master Tasks
  146.  
  147.  
  148.   ////////////////////// FINISH THE PROGRAM ////////////////////////////////////
  149.  
  150.  
  151.  
  152.     if (rank > MASTER)       // if you are a worker
  153.     {
  154.         int temp;
  155.         MPI_Recv(&temp,         1,             MPI_INT,     0,     OTW,     MPI_COMM_WORLD, &status);
  156.         temp = temp - 2;
  157.         MPI_Send(&temp,         1,             MPI_INT,     0,     ITM,     MPI_COMM_WORLD);
  158.  
  159.  
  160.  
  161.             MPI_Recv(&point,     1,             MPI_INT,     MASTER, OTW, MPI_COMM_WORLD, &status);
  162.             MPI_Recv(&rows_to_get, 1,             MPI_INT,     MASTER, OTW, MPI_COMM_WORLD, &status);
  163.             MPI_Recv(&matrix_1,     rows_to_get*size,     MPI_INT,     MASTER, OTW, MPI_COMM_WORLD, &status);
  164.             MPI_Recv(&matrix_2,     size*size,         MPI_INT,     MASTER, OTW, MPI_COMM_WORLD, &status);
  165.  
  166.               for (a = 0; a < size; a++)
  167.                  for (b = 0; b < rows_to_get; b++)
  168.                  {
  169.                         outMatrix[b][a] = 0;
  170.                         for (j=0; j<size; j++)
  171.                            outMatrix[b][a] = outMatrix[b][a] + matrix_1[b][j] * matrix_2[j][a];
  172.                  }
  173.            MPI_Send(&point,         1,             MPI_INT, MASTER, ITM, MPI_COMM_WORLD);
  174.            MPI_Send(&rows_to_get,     1,             MPI_INT, MASTER, ITM, MPI_COMM_WORLD);
  175.            MPI_Send(&outMatrix,     rows_to_get*size,     MPI_INT, MASTER, ITM, MPI_COMM_WORLD);
  176.     }
  177.   printf("i'm gettin out of here");
  178.   MPI_Finalize();
  179.  
  180.   return 0;
  181. }
  182.  
  183.  
  184. ///////////////////////////// HELPER METHODS ///////////////////////////////////
  185.  
  186.  
  187. int insertToAll(int A[size][size], int val)
  188. {
  189.     int row, column;
  190.     for(row = 0; row < size; row++)
  191.             for(column = 0; column < size; column++)
  192.             {
  193.                        A[row][column] = val;
  194.             }
  195. }
  196. int print(int matrix[size][size])
  197. {
  198.     int i, j;
  199.     int x;
  200.  
  201.     for(i = 0; i < size; i++)
  202.     {
  203.           for(j = 0; j < size; j++)
  204.           {
  205.                 x = matrix[j][i];
  206.                 printf("\t %i \t", x);
  207.           }
  208.           printf("\n");
  209.     }     
  210. }
  211.  
  212.  
Long, I know. It claims that a node is dead when it attempts to send the rows of the matrix out to the workers. The output is thus:

node00:~/HelloWorld$ mpirun n0-2 probWontWork 10
nodes = 3 slaves = 2Number of worker tasks = 2
My rank better be 0 and it is 0
sending 5 rows to 1
sent the test, awaiting reply!The number sent back is 8sending the number of rows that will be sent.
sending the series of rows that will be multiplied.
point = 0
rows_to_send = 5
size = 10
MPI_Send: process in local group is dead (rank 0, MPI_COMM_WORLD)
Rank (0, MPI_COMM_WORLD): Call stack within LAM:
Rank (0, MPI_COMM_WORLD): - MPI_Send()
Rank (0, MPI_COMM_WORLD): - main()
-----------------------------------------------------------------------------
One of the processes started by mpirun has exited with a nonzero exit
code. This typically indicates that the process finished in error.
If your process did not finish in error, be sure to include a "return
0" or "exit(0)" in your C code before exiting the application.

PID 9598 failed on node n1 (192.168.6.201) with exit status 1.
-----------------------------------------------------------------------------



Ive been working on this for quite a while, and have not gotten anywhere. Any thoughts?
Mar 1 '08 #1
Share this Question
Share on Google+
2 Replies


P: 2
It seems that if I statically allocate the 2d arrays, im fine. However, this limits me to the stack. Does anybody know how to overcome this problem?
Mar 3 '08 #2

gpraghuram
Expert 100+
P: 1,275
It seems that if I statically allocate the 2d arrays, im fine. However, this limits me to the stack. Does anybody know how to overcome this problem?
One thing which striked me is while allocating memory for matrix_1 and matrix_2 you are not typecasting the memory returned bu malloc.
Try typecasting the memory and run it
Otherwise memory allocation is OK

Raghuram
Mar 3 '08 #3

Post your reply

Sign in to post your reply or Sign up for a free account.