mpi dot product using point to point operations fails when data is large -
i have below code 2 dot product of 2 vectors of size vectorsize
. code works fine until vectorsize
10000 gives unrelated results. when tried debug program have seen processor 0 (root) finishes job before processors send local results. got same situation when utilized mpi_reduce()
(code part 2). if use mpi_scatter()
before mpi_reduce()
ok.
#include <stdio.h> #include <stdlib.h> #include "mpi.h" #define vectorsize 10000000 #define root 0 //[[## operation constructvectorpart() void constructvector(double * vector, int size, short vectorenu) { int = 0; if(vectorenu == 1) // i.e vector 1 { for(i = 0; < size; i++) { vector[i] = 0.1 + (i%20)*0.1; } } else if(vectorenu == 2) // i.e. vector 2 { for(i = 0 ; < size; i++) { vector[i] = 2-(i%20)*0.1; } } } //[[## operation dotproduct() double dotproduct(double* a, double* b, int length) { double result = 0; int = 0; (i = 0; i<length; i++) result += a[i] * b[i]; return result; } int main( argc, argv ) int argc; char **argv; { int processorid, numofprocessors; int partialvectorsize ; double t1, t2, localdotproduct, result; mpi_init( &argc, &argv ); mpi_comm_size( mpi_comm_world, &numofprocessors ); mpi_comm_rank( mpi_comm_world, &processorid ); if(processorid == 0) t1 = mpi_wtime(); // processors constitute own vector parts , // calculates corresponding partial dot products partialvectorsize = vectorsize/ numofprocessors; double *v1, *v2; v1 = (double*)(malloc((partialvectorsize) * sizeof(double))); v2 = (double*)(malloc((partialvectorsize) * sizeof(double))); constructvectorpart(v1,0,partialvectorsize,1); constructvectorpart(v2,0,partialvectorsize,2); localdotproduct = dotproduct(v1,v2, partialvectorsize); printf(" processor %d \n",processorid); //----------------- code part 1 --------------------------------------------- if( processorid != 0 ) // if not master { // send partial result master mpi_send( &localdotproduct, 1, mpi_double, 0,0, mpi_comm_world ); } else // master { // collect results result = localdotproduct; // own result int j; for( j=1; j<numofprocessors; ++j ) { mpi_recv( &localdotproduct, 1, mpi_double, j, 0, mpi_comm_world,mpi_status_ignore); result += localdotproduct; } t2 = mpi_wtime(); printf(" result = %f timeconsumed = %f \n",result, t2-t1); } //---------------------------------------------------------------------------- /* //--------------------- code part 2 ---------------- mpi_reduce(&localdotproduct, &result, 1, mpi_double, mpi_sum, 0,mpi_comm_world); if(processorid == 0) { t2 = mpi_wtime(); printf(" result = %f timeconsumed = %f \n",result, t2-t1); } //--------------------------------------------------- */ mpi_finalize(); free(v1); free(v2); return 0; }
Comments
Post a Comment