sequential.c
#include<stdio.h>
#include<stdlib.h>
#include<stddef.h>
#include<string.h>
#include<math.h>
#include<sys/time.h>
float ran2(long *);
long iseed=-9;
struct timeval tv;
float gettime(){
static int startflag = 1;
static double tsecs0, tsecs1;
if(startflag) {
(void ) gettimeofday(&tv, NULL);
tsecs0 = tv.tv_sec + tv.tv_usec*1.0E-6;
startflag = 0;
}
(void) gettimeofday(&tv, NULL);
tsecs1 = tv.tv_sec + tv.tv_usec*1.0e-6;
return (float) (tsecs1 - tsecs0);
}
int main(int argc, char **argv){
long i, j;
long np,niter,maxnp=100000000;
float *val,**lval;
niter = atoi(argv[1]);
float time1, time2;
time1 = gettime();
val = (float*)malloc(sizeof(float)*maxnp*niter);
lval = (float**)malloc(sizeof(float*)*niter);
long nlval[niter];
float step = (1.L-0.L)/(float)niter;
for(i=0;i<niter;i++){
lval[i] = (float*)malloc(sizeof(float)*maxnp*2);
nlval[i] = 0;
}
np = 0;
for(i=0;i<niter;i++){
iseed = -1*(i+1284L);
for(j=0;j<maxnp;j++){
val[np++] = ran2(&iseed);
}
}
for(i=0;i<np;i++){
j = val[i]/step;
*(lval[j]+nlval[j]) = val[i];
nlval[j]++;
}
for(i=0;i<niter;i++){
printf("p%d has %ld members ::: %g %g\n", (int)i,nlval[i], step*i, step*(+1));
}
time2 = gettime();
printf("Wallclock time = %g second\n", (time2-time1));
}
#define IM1 2147483563
#define IM2 2147483399
#define AM (1.0/IM1)
#define IMM1 (IM1-1)
#define IA1 40014
#define IA2 40692
#define IQ1 53668
#define IQ2 52774
#define IR1 12211
#define IR2 3791
#define NTAB 32
#define NDIV (1+IMM1/NTAB)
#define EPS 1.2e-7
#define RNMX (1.0-EPS)
float ran2(long *idum)
{
int j;
long k;
static long idum2=123456789;
static long iy=0;
static long iv[NTAB];
float temp;
if (*idum <= 0) {
if (-(*idum) < 1) *idum=1;
else *idum = -(*idum);
idum2=(*idum);
for (j=NTAB+7;j>=0;j--) {
k=(*idum)/IQ1;
*idum=IA1*(*idum-k*IQ1)-k*IR1;
if (*idum < 0) *idum += IM1;
if (j < NTAB) {
iv[j] = *idum;
}
}
iy=iv[0];
}
k=(*idum)/IQ1;
*idum=IA1*(*idum-k*IQ1)-k*IR1;
if (*idum < 0) *idum += IM1;
k=idum2/IQ2;
idum2=IA2*(idum2-k*IQ2)-k*IR2;
if (idum2 < 0) idum2 += IM2;
j=iy/NDIV;
iy=iv[j]-idum2;
iv[j] = *idum;
if (iy < 1) iy += IMM1;
if ((temp=AM*iy) > RNMX) return RNMX;
else return temp;
}
#undef IM1
#undef IM2
#undef AM
#undef IMM1
#undef IA1
#undef IA2
#undef IQ1
#undef IQ2
#undef IR1
#undef IR2
#undef NTAB
#undef NDIV
#undef EPS
#undef RNMX
/* (C) Copr. 1986-92 Numerical Recipes Software 71.+I0>+. */
answer.c
#include<stdio.h>
#include<stdlib.h>
#include<stddef.h>
#include<string.h>
#include<math.h>
#include<sys/time.h>
#include"mpi.h"
float ran2(long *);
long iseed;
struct timeval tv;
float gettime(){
static int startflag = 1;
static double tsecs0, tsecs1;
if(startflag) {
(void ) gettimeofday(&tv, NULL);
tsecs0 = tv.tv_sec + tv.tv_usec*1.0E-6;
startflag = 0;
}
(void) gettimeofday(&tv, NULL);
tsecs1 = tv.tv_sec + tv.tv_usec*1.0e-6;
return (float) (tsecs1 - tsecs0);
}
#define SWAP(a,b,tmp) do{\
tmp = *a;\
*a = *b;\
*b = tmp;\
} while(0)
void domaindecomp(float **ibase, size_t *mmem, float valmin, float valmax, MPI_Comm Comm){
int myid, nid;
size_t nmem = *mmem;
float halfval = (valmax+valmin)*0.5;
MPI_Status status;
MPI_Comm_size(Comm,&nid);
if(nid == 1) return;
MPI_Comm_rank(Comm,&myid);
float *base = *ibase;
float swaptmp;
float *left, *right;
left = base;
right = base + nmem;
/* bifurcation of data array */
if(myid < nid/2) {
for(;left<right;){
if(*left >= halfval) {
right --;
SWAP(left,right,swaptmp);
}
else left ++;
}
}
else {
for(;left<right;){
if(*left < halfval) {
right --;
SWAP(left,right,swaptmp);
}
else left ++;
}
}
long nrecv,nsend = nmem-(right-base);
int subgroupid, nsubgroup, subgroupsize;
nsubgroup = 2; /* The total number of ranks should be power of two ! */
subgroupsize = nid/nsubgroup;
subgroupid = myid/subgroupsize;
int dest = (myid + subgroupsize + nid)%nid;
int src = (myid - subgroupsize + nid)%nid;
MPI_Sendrecv(&nsend,1, MPI_LONG, dest,0,&nrecv, 1, MPI_LONG, src, 0, Comm,&status);
float *rbase = (float*)malloc(sizeof(float)*nrecv);
MPI_Sendrecv(right, nsend, MPI_FLOAT, dest,0, rbase, nrecv, MPI_FLOAT, src, 0, Comm, &status);
nmem = nmem - nsend;
base = (float*)realloc(base, nmem*sizeof(float));
size_t nowmem = nrecv + nmem;
rbase = (float*) realloc(rbase, sizeof(float)*nowmem);
memmove(rbase+nrecv, base, nmem*sizeof(float));
free(base);
MPI_Comm newcom;
int key = myid % subgroupsize;
float newvalmin, newvalmax;
newvalmin = (valmax-valmin)/nsubgroup * subgroupid + valmin;
newvalmax = (valmax-valmin)/nsubgroup * (subgroupid+1) + valmin;
if(subgroupid==1) newvalmax = valmax;
MPI_Comm_split(Comm,subgroupid, key, &newcom);
domaindecomp(&rbase, &nowmem, newvalmin, newvalmax, newcom);
MPI_Comm_free(&newcom);
*mmem = nowmem;
*ibase = rbase;
return;
}
void Check(float*val, size_t np, float lvalmin, float lvalmax){
size_t i;
for(i=0;i<np;i++){
if(val[i]<lvalmin || val[i]>=lvalmax){
fprintf(stderr,"Error in Check %g : %g %g\n",val[i],lvalmin,lvalmax);
exit(99);
}
}
}
int main(int argc, char **argv){
long i, j;
size_t np,maxnp=100000000;
float *val,**lval,lvalmin,lvalmax;
float time1, time2;
int myid,nid;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &myid);
MPI_Comm_size(MPI_COMM_WORLD, &nid);
if(nid != 2 && nid != 4 && nid != 8 && nid != 16 && nid != 32 && nid != 64){
fprintf(stderr,"Error input number of parallel ranks\n");
MPI_Finalize();
}
time1 = gettime();
val = (float*)malloc(sizeof(float)*maxnp);
float step = (1.-0)/(float)nid;
iseed = -1*(myid+1284L);
for(i=0;i<maxnp;i++){
val[i] = ran2(&iseed);
}
np = maxnp;
float valmin = 0.L;
float valmax = 1.L;
domaindecomp(&val, &np, valmin, valmax, MPI_COMM_WORLD);
printf("p%d has %ld members\n", myid,np);
MPI_Barrier(MPI_COMM_WORLD);
time2 = gettime();
if(myid==0) printf("Wallclock time = %g second\n", (time2-time1));
lvalmin = 1./nid *(float)(myid);
lvalmax = 1./nid *(float)(myid+1);
Check(val,np,lvalmin,lvalmax);
MPI_Finalize();
}
#define IM1 2147483563
#define IM2 2147483399
#define AM (1.0/IM1)
#define IMM1 (IM1-1)
#define IA1 40014
#define IA2 40692
#define IQ1 53668
#define IQ2 52774
#define IR1 12211
#define IR2 3791
#define NTAB 32
#define NDIV (1+IMM1/NTAB)
#define EPS 1.2e-7
#define RNMX (1.0-EPS)
float ran2(long *idum)
{
int j;
long k;
static long idum2=123456789;
static long iy=0;
static long iv[NTAB];
float temp;
if (*idum <= 0) {
if (-(*idum) < 1) *idum=1;
else *idum = -(*idum);
idum2=(*idum);
for (j=NTAB+7;j>=0;j--) {
k=(*idum)/IQ1;
*idum=IA1*(*idum-k*IQ1)-k*IR1;
if (*idum < 0) *idum += IM1;
if (j < NTAB) {
iv[j] = *idum;
}
}
iy=iv[0];
}
k=(*idum)/IQ1;
*idum=IA1*(*idum-k*IQ1)-k*IR1;
if (*idum < 0) *idum += IM1;
k=idum2/IQ2;
idum2=IA2*(idum2-k*IQ2)-k*IR2;
if (idum2 < 0) idum2 += IM2;
j=iy/NDIV;
iy=iv[j]-idum2;
iv[j] = *idum;
if (iy < 1) iy += IMM1;
if ((temp=AM*iy) > RNMX) return RNMX;
else return temp;
}
#undef IM1
#undef IM2
#undef AM
#undef IMM1
#undef IA1
#undef IA2
#undef IQ1
#undef IQ2
#undef IR1
#undef IR2
#undef NTAB
#undef NDIV
#undef EPS
#undef RNMX
/* (C) Copr. 1986-92 Numerical Recipes Software 71.+I0>+. */
myparallel.c
#include<stdio.h>
#include<stdlib.h>
#include<stddef.h>
#include<string.h>
#include<math.h>
#include<sys/time.h>
#include<mpi.h>
float ran2(long *);
long iseed=-9;
struct timeval tv;
float gettime(){
static int startflag = 1;
static double tsecs0, tsecs1;
if(startflag) {
(void ) gettimeofday(&tv, NULL);
tsecs0 = tv.tv_sec + tv.tv_usec*1.0E-6;
startflag = 0;
}
(void) gettimeofday(&tv, NULL);
tsecs1 = tv.tv_sec + tv.tv_usec*1.0e-6;
return (float) (tsecs1 - tsecs0);
}
void domaindecomp(float **ibase, size_t *mmem, float valmin, float valmax, MPI_Comm Comm){
int myid,nid;
MPI_Status stat;
MPI_Comm_rank(Comm, &myid);
MPI_Comm_size(Comm, &nid);
int maxnp = *mmem;
int i, j, k;
float *val = *ibase;
float **lval = (float**)malloc(sizeof(float*)*nid);
long *nlval = (long*)malloc(sizeof(long)*nid);
long cnt, recv=0;
for(i=0;i<nid;i++){
if(i!=myid){
lval[i] = (float*)malloc(sizeof(float)*maxnp);
nlval[i] = 0;
}
}
float step = (1.-0)/(float)nid;
for(i=0;i<maxnp;i++){
j = val[i]/step;
if(j==myid){
val[recv++] = val[i];
}
else{
*(lval[j]+nlval[j]) = val[i];
nlval[j]++;
}
}
for(i=0;i<myid;i++){
MPI_Send(&nlval[i],1,MPI_LONG,i,10,MPI_COMM_WORLD);
MPI_Send(lval[i],nlval[i],MPI_FLOAT,i,20,MPI_COMM_WORLD);
//printf("sending %d -> %d\n", myid, i);
}
for(i=0;i<nid-1;i++){
MPI_Recv(&cnt,1,MPI_LONG,MPI_ANY_SOURCE,10,MPI_COMM_WORLD,&stat);
//printf("receiving %d -> %d\n", stat.MPI_SOURCE, myid);
MPI_Recv(val+recv,cnt,MPI_FLOAT,stat.MPI_SOURCE,20,MPI_COMM_WORLD,&stat);
recv+=cnt;
}
for(i=myid+1;i<nid;i++){
MPI_Send(&nlval[i],1,MPI_LONG,i,10,MPI_COMM_WORLD);
MPI_Send(lval[i],nlval[i],MPI_FLOAT,i,20,MPI_COMM_WORLD);
//printf("sending %d -> %d\n", myid, i);
}
*mmem = recv;
for(i=0;i<nid;i++){
if(i!=myid) free(lval[i]);
}
free(lval);
free(nlval);
}
void Check(float*val, size_t np, float lvalmin, float lvalmax){
size_t i;
for(i=0;i<np;i++){
if(val[i]<lvalmin || val[i]>=lvalmax){
fprintf(stderr,"Error in Check %g : %g %g\n",val[i],lvalmin,lvalmax);
MPI_Finalize();
exit(99);
}
}
}
int main(int argc, char **argv){
long i, j;
long np,niter,maxnp=100000000;
float *val,**lval, lvalmin, lvalmax;
float time1, time2;
int myid,nid;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &myid);
MPI_Comm_size(MPI_COMM_WORLD, &nid);
if(nid != 2 & nid != 4 && nid != 8 && nid != 16 && nid != 32 && nid != 64){
fprintf(stderr, "Error input number of parallel ranks\n");
MPI_Finalize();
return 0;
}
time1 = gettime();
val = (float*)malloc(sizeof(float)*maxnp*2);
float step = (1.-0)/(float)nid;
iseed = -1*(myid+1284L);
for(i=0;i<maxnp;i++){
val[i] = ran2(&iseed);
}
np = maxnp;
float valmin = 0.L;
float valmax = 1.L;
domaindecomp(&val, &np, valmin, valmax, MPI_COMM_WORLD);
printf("p%d has %ld members\n", myid, np);
time2 = gettime();
if(myid==0) printf("Wallclock time = %g seconds\n", (time2-time1));
lvalmin = 1./nid *(float)(myid);
lvalmax = 1./nid *(float)(myid+1);
Check(val,np,lvalmin,lvalmax);
free(val);
MPI_Finalize();
return 0;
}
#define IM1 2147483563
#define IM2 2147483399
#define AM (1.0/IM1)
#define IMM1 (IM1-1)
#define IA1 40014
#define IA2 40692
#define IQ1 53668
#define IQ2 52774
#define IR1 12211
#define IR2 3791
#define NTAB 32
#define NDIV (1+IMM1/NTAB)
#define EPS 1.2e-7
#define RNMX (1.0-EPS)
float ran2(long *idum)
{
int j;
long k;
static long idum2=123456789;
static long iy=0;
static long iv[NTAB];
float temp;
if (*idum <= 0) {
if (-(*idum) < 1) *idum=1;
else *idum = -(*idum);
idum2=(*idum);
for (j=NTAB+7;j>=0;j--) {
k=(*idum)/IQ1;
*idum=IA1*(*idum-k*IQ1)-k*IR1;
if (*idum < 0) *idum += IM1;
if (j < NTAB) {
iv[j] = *idum;
}
}
iy=iv[0];
}
k=(*idum)/IQ1;
*idum=IA1*(*idum-k*IQ1)-k*IR1;
if (*idum < 0) *idum += IM1;
k=idum2/IQ2;
idum2=IA2*(idum2-k*IQ2)-k*IR2;
if (idum2 < 0) idum2 += IM2;
j=iy/NDIV;
iy=iv[j]-idum2;
iv[j] = *idum;
if (iy < 1) iy += IMM1;
if ((temp=AM*iy) > RNMX) return RNMX;
else return temp;
}
#undef IM1
#undef IM2
#undef AM
#undef IMM1
#undef IA1
#undef IA2
#undef IQ1
#undef IQ2
#undef IR1
#undef IR2
#undef NTAB
#undef NDIV
#undef EPS
#undef RNMX
/* (C) Copr. 1986-92 Numerical Recipes Software 71.+I0>+. */
'Parallel Programming' 카테고리의 다른 글
| MPI Function Dictionary (1) | 2016.10.04 |
|---|---|
| KSC 2015 2번 문제 및 답안 (0) | 2016.10.01 |
| KSC 2015 1번 문제 및 답안 (0) | 2016.10.01 |
| KSC 2014 3번 문제 및 답안 (0) | 2016.10.01 |
| KSC 2014 2번 문제 및 답안 (0) | 2016.10.01 |
댓글