본문 바로가기
Parallel Programming

KSC 2015 3번 문제 및 풀이

by suminhan 2016. 10. 1.

sequential.c

#include<stdio.h>
#include<stdlib.h>
#include<stddef.h>
#include<string.h>
#include<math.h>
#include<sys/time.h>

float ran2(long *);
long iseed=-9;

struct timeval tv;
float gettime(){
	static int startflag = 1;
	static double tsecs0, tsecs1;
	if(startflag) {
		(void ) gettimeofday(&tv, NULL);
		tsecs0 = tv.tv_sec + tv.tv_usec*1.0E-6;
		startflag = 0;
	}
	(void) gettimeofday(&tv, NULL);
	tsecs1 = tv.tv_sec + tv.tv_usec*1.0e-6;
	return (float) (tsecs1 - tsecs0);
}



int main(int argc, char **argv){
	long i, j;
	long np,niter,maxnp=100000000;
	float *val,**lval;

	niter = atoi(argv[1]);

	float time1, time2;
	time1 = gettime();


	val = (float*)malloc(sizeof(float)*maxnp*niter);
	lval = (float**)malloc(sizeof(float*)*niter);
	long nlval[niter];

	float step = (1.L-0.L)/(float)niter;

	for(i=0;i<niter;i++){
		lval[i] = (float*)malloc(sizeof(float)*maxnp*2);
		nlval[i] = 0;
	}
	np = 0;
	for(i=0;i<niter;i++){
		iseed = -1*(i+1284L);
		for(j=0;j<maxnp;j++){
			val[np++] = ran2(&iseed);
		}
	}
	for(i=0;i<np;i++){
		j = val[i]/step;
		*(lval[j]+nlval[j]) = val[i];
		nlval[j]++;
	}
	for(i=0;i<niter;i++){
		printf("p%d has %ld members ::: %g %g\n", (int)i,nlval[i], step*i, step*(+1));
	}
	time2 = gettime();
	printf("Wallclock time = %g second\n", (time2-time1));
}
#define IM1 2147483563
#define IM2 2147483399
#define AM (1.0/IM1)
#define IMM1 (IM1-1)
#define IA1 40014
#define IA2 40692
#define IQ1 53668
#define IQ2 52774
#define IR1 12211
#define IR2 3791
#define NTAB 32
#define NDIV (1+IMM1/NTAB)
#define EPS 1.2e-7
#define RNMX (1.0-EPS)

float ran2(long *idum)
{
	int j;
	long k;
	static long idum2=123456789;
	static long iy=0;
	static long iv[NTAB];
	float temp;

	if (*idum <= 0) {
		if (-(*idum) < 1) *idum=1;
		else *idum = -(*idum);
		idum2=(*idum);
		for (j=NTAB+7;j>=0;j--) {
			k=(*idum)/IQ1;
			*idum=IA1*(*idum-k*IQ1)-k*IR1;
			if (*idum < 0) *idum += IM1;
			if (j < NTAB) {
				iv[j] = *idum;
			}
		}
		iy=iv[0];
	}
	k=(*idum)/IQ1;
	*idum=IA1*(*idum-k*IQ1)-k*IR1;
	if (*idum < 0) *idum += IM1;
	k=idum2/IQ2;
	idum2=IA2*(idum2-k*IQ2)-k*IR2;
	if (idum2 < 0) idum2 += IM2;
	j=iy/NDIV;
	iy=iv[j]-idum2;
	iv[j] = *idum;
	if (iy < 1) iy += IMM1;
	if ((temp=AM*iy) > RNMX) return RNMX;
	else return temp;
}
#undef IM1
#undef IM2
#undef AM
#undef IMM1
#undef IA1
#undef IA2
#undef IQ1
#undef IQ2
#undef IR1
#undef IR2
#undef NTAB
#undef NDIV
#undef EPS
#undef RNMX
/* (C) Copr. 1986-92 Numerical Recipes Software 71.+I0>+. */
answer.c
#include<stdio.h>
#include<stdlib.h>
#include<stddef.h>
#include<string.h>
#include<math.h>
#include<sys/time.h>
#include"mpi.h"

float ran2(long *);
long iseed;

struct timeval tv;
float gettime(){
	static int startflag = 1;
	static double tsecs0, tsecs1;
	if(startflag) {
		(void ) gettimeofday(&tv, NULL);
		tsecs0 = tv.tv_sec + tv.tv_usec*1.0E-6;
		startflag = 0;
	}
	(void) gettimeofday(&tv, NULL);
	tsecs1 = tv.tv_sec + tv.tv_usec*1.0e-6;
	return (float) (tsecs1 - tsecs0);
}


#define SWAP(a,b,tmp) do{\
	tmp = *a;\
	*a = *b;\
	*b = tmp;\
} while(0)

void domaindecomp(float **ibase, size_t *mmem, float valmin, float valmax, MPI_Comm Comm){
	int myid, nid;
	size_t nmem = *mmem;
	float halfval = (valmax+valmin)*0.5;

	MPI_Status status;

	MPI_Comm_size(Comm,&nid);
	if(nid == 1) return;


	MPI_Comm_rank(Comm,&myid);

	float *base = *ibase;
	float swaptmp;
	
	float *left, *right;
	left = base;
	right = base + nmem;
	/* bifurcation of data array */
	if(myid < nid/2) {
		for(;left<right;){
			if(*left >= halfval) {
				right --;
				SWAP(left,right,swaptmp);
			}
			else left ++;
		}
	}
	else {
		for(;left<right;){
			if(*left < halfval) {
				right --;
				SWAP(left,right,swaptmp);
			}
			else left ++;
		}
	}
	long nrecv,nsend = nmem-(right-base);
	int subgroupid, nsubgroup, subgroupsize;
	nsubgroup = 2; /* The total number of ranks should be power of two ! */
	subgroupsize = nid/nsubgroup;
	subgroupid = myid/subgroupsize;
	int dest = (myid + subgroupsize + nid)%nid;
	int src  = (myid - subgroupsize + nid)%nid;
	MPI_Sendrecv(&nsend,1, MPI_LONG, dest,0,&nrecv, 1, MPI_LONG, src, 0, Comm,&status);
	float *rbase = (float*)malloc(sizeof(float)*nrecv);

	MPI_Sendrecv(right, nsend, MPI_FLOAT, dest,0, rbase, nrecv, MPI_FLOAT, src, 0, Comm, &status);
	nmem = nmem - nsend;
	base = (float*)realloc(base, nmem*sizeof(float));

	size_t nowmem = nrecv + nmem;
	rbase = (float*) realloc(rbase, sizeof(float)*nowmem);
	memmove(rbase+nrecv, base, nmem*sizeof(float));
	free(base);

	MPI_Comm newcom;
	int key = myid % subgroupsize;
	float newvalmin, newvalmax;

	newvalmin = (valmax-valmin)/nsubgroup * subgroupid + valmin;
	newvalmax = (valmax-valmin)/nsubgroup * (subgroupid+1) + valmin;
	if(subgroupid==1) newvalmax = valmax;

	MPI_Comm_split(Comm,subgroupid, key, &newcom);
	domaindecomp(&rbase, &nowmem, newvalmin, newvalmax, newcom);
	MPI_Comm_free(&newcom);

	*mmem = nowmem;
	*ibase = rbase;
	return;

}

void Check(float*val, size_t np, float lvalmin, float lvalmax){
	size_t i;
	for(i=0;i<np;i++){
		if(val[i]<lvalmin || val[i]>=lvalmax){
			fprintf(stderr,"Error in Check %g : %g %g\n",val[i],lvalmin,lvalmax);
			exit(99);
		}
	}
}


int main(int argc, char **argv){
	long i, j;
	size_t np,maxnp=100000000;
	float *val,**lval,lvalmin,lvalmax;
	float time1, time2;

	int myid,nid;


	MPI_Init(&argc, &argv);
	MPI_Comm_rank(MPI_COMM_WORLD, &myid);
	MPI_Comm_size(MPI_COMM_WORLD, &nid);


	if(nid != 2 && nid != 4 && nid != 8 && nid != 16 && nid != 32 && nid != 64){
		fprintf(stderr,"Error input number of parallel ranks\n");
		MPI_Finalize();
	}
	time1 = gettime();
	val = (float*)malloc(sizeof(float)*maxnp);

	float step = (1.-0)/(float)nid;

	iseed = -1*(myid+1284L);
	for(i=0;i<maxnp;i++){
		val[i] = ran2(&iseed);
	}
	np = maxnp;


	float valmin = 0.L;
	float valmax = 1.L;
	domaindecomp(&val, &np, valmin, valmax, MPI_COMM_WORLD);


	printf("p%d has %ld members\n", myid,np);
	MPI_Barrier(MPI_COMM_WORLD);
	time2 = gettime();
	if(myid==0) printf("Wallclock time = %g second\n", (time2-time1));
	lvalmin = 1./nid *(float)(myid);
	lvalmax = 1./nid *(float)(myid+1);
	Check(val,np,lvalmin,lvalmax);
	MPI_Finalize();
}
#define IM1 2147483563
#define IM2 2147483399
#define AM (1.0/IM1)
#define IMM1 (IM1-1)
#define IA1 40014
#define IA2 40692
#define IQ1 53668
#define IQ2 52774
#define IR1 12211
#define IR2 3791
#define NTAB 32
#define NDIV (1+IMM1/NTAB)
#define EPS 1.2e-7
#define RNMX (1.0-EPS)

float ran2(long *idum)
{
	int j;
	long k;
	static long idum2=123456789;
	static long iy=0;
	static long iv[NTAB];
	float temp;

	if (*idum <= 0) {
		if (-(*idum) < 1) *idum=1;
		else *idum = -(*idum);
		idum2=(*idum);
		for (j=NTAB+7;j>=0;j--) {
			k=(*idum)/IQ1;
			*idum=IA1*(*idum-k*IQ1)-k*IR1;
			if (*idum < 0) *idum += IM1;
			if (j < NTAB) {
				iv[j] = *idum;
			}
		}
		iy=iv[0];
	}
	k=(*idum)/IQ1;
	*idum=IA1*(*idum-k*IQ1)-k*IR1;
	if (*idum < 0) *idum += IM1;
	k=idum2/IQ2;
	idum2=IA2*(idum2-k*IQ2)-k*IR2;
	if (idum2 < 0) idum2 += IM2;
	j=iy/NDIV;
	iy=iv[j]-idum2;
	iv[j] = *idum;
	if (iy < 1) iy += IMM1;
	if ((temp=AM*iy) > RNMX) return RNMX;
	else return temp;
}
#undef IM1
#undef IM2
#undef AM
#undef IMM1
#undef IA1
#undef IA2
#undef IQ1
#undef IQ2
#undef IR1
#undef IR2
#undef NTAB
#undef NDIV
#undef EPS
#undef RNMX
/* (C) Copr. 1986-92 Numerical Recipes Software 71.+I0>+. */
myparallel.c
#include<stdio.h>
#include<stdlib.h>
#include<stddef.h>
#include<string.h>
#include<math.h>
#include<sys/time.h>
#include<mpi.h>

float ran2(long *);
long iseed=-9;

struct timeval tv;
float gettime(){
	static int startflag = 1;
	static double tsecs0, tsecs1;
	if(startflag) {
		(void ) gettimeofday(&tv, NULL);
		tsecs0 = tv.tv_sec + tv.tv_usec*1.0E-6;
		startflag = 0;
	}
	(void) gettimeofday(&tv, NULL);
	tsecs1 = tv.tv_sec + tv.tv_usec*1.0e-6;
	return (float) (tsecs1 - tsecs0);
}

void domaindecomp(float **ibase, size_t *mmem, float valmin, float valmax, MPI_Comm Comm){
	int myid,nid;
	MPI_Status stat;
	MPI_Comm_rank(Comm, &myid);
	MPI_Comm_size(Comm, &nid);
	int maxnp = *mmem;
	int i, j, k;
	float *val = *ibase;
	float **lval = (float**)malloc(sizeof(float*)*nid);
	long *nlval = (long*)malloc(sizeof(long)*nid);
	long cnt, recv=0;
	for(i=0;i<nid;i++){
		if(i!=myid){
			lval[i] = (float*)malloc(sizeof(float)*maxnp);
			nlval[i] = 0;
		}
	}
	float step = (1.-0)/(float)nid;
	for(i=0;i<maxnp;i++){
		j = val[i]/step;
		if(j==myid){
			val[recv++] = val[i];
		}
		else{
			*(lval[j]+nlval[j]) = val[i];
			nlval[j]++;
		}
	}

	for(i=0;i<myid;i++){
		MPI_Send(&nlval[i],1,MPI_LONG,i,10,MPI_COMM_WORLD);
		MPI_Send(lval[i],nlval[i],MPI_FLOAT,i,20,MPI_COMM_WORLD);
		//printf("sending %d -> %d\n", myid, i);
	}
	for(i=0;i<nid-1;i++){
		MPI_Recv(&cnt,1,MPI_LONG,MPI_ANY_SOURCE,10,MPI_COMM_WORLD,&stat);
		//printf("receiving %d -> %d\n", stat.MPI_SOURCE, myid);
		MPI_Recv(val+recv,cnt,MPI_FLOAT,stat.MPI_SOURCE,20,MPI_COMM_WORLD,&stat);
		recv+=cnt;
	}
	for(i=myid+1;i<nid;i++){
		MPI_Send(&nlval[i],1,MPI_LONG,i,10,MPI_COMM_WORLD);
		MPI_Send(lval[i],nlval[i],MPI_FLOAT,i,20,MPI_COMM_WORLD);
		//printf("sending %d -> %d\n", myid, i);
	}

	*mmem = recv;
	for(i=0;i<nid;i++){
		if(i!=myid)	free(lval[i]);
	}
	free(lval);
	free(nlval);
}


void Check(float*val, size_t np, float lvalmin, float lvalmax){
	size_t i;
	for(i=0;i<np;i++){
		if(val[i]<lvalmin || val[i]>=lvalmax){
			fprintf(stderr,"Error in Check %g : %g %g\n",val[i],lvalmin,lvalmax);
			MPI_Finalize();
			exit(99);
		}
	}
}

int main(int argc, char **argv){
	long i, j;
	long np,niter,maxnp=100000000;
	float *val,**lval, lvalmin, lvalmax;
	float time1, time2;
	int myid,nid;
	MPI_Init(&argc, &argv);
	MPI_Comm_rank(MPI_COMM_WORLD, &myid);
	MPI_Comm_size(MPI_COMM_WORLD, &nid);
	if(nid != 2 & nid != 4 && nid != 8 && nid != 16 && nid != 32 && nid != 64){
		fprintf(stderr, "Error input number of parallel ranks\n");
		MPI_Finalize();
		return 0;
	}
	time1 = gettime();
	val = (float*)malloc(sizeof(float)*maxnp*2);
	float step = (1.-0)/(float)nid;
	iseed = -1*(myid+1284L);
	for(i=0;i<maxnp;i++){
		val[i] = ran2(&iseed);
	}
	np = maxnp;
	float valmin = 0.L;
	float valmax = 1.L;
	domaindecomp(&val, &np, valmin, valmax, MPI_COMM_WORLD);
	printf("p%d has %ld members\n", myid, np);
	time2 = gettime();
	if(myid==0) printf("Wallclock time = %g seconds\n", (time2-time1));
	lvalmin = 1./nid *(float)(myid);
	lvalmax = 1./nid *(float)(myid+1);
	Check(val,np,lvalmin,lvalmax);

	free(val);

	MPI_Finalize();
	return 0;
}

#define IM1 2147483563
#define IM2 2147483399
#define AM (1.0/IM1)
#define IMM1 (IM1-1)
#define IA1 40014
#define IA2 40692
#define IQ1 53668
#define IQ2 52774
#define IR1 12211
#define IR2 3791
#define NTAB 32
#define NDIV (1+IMM1/NTAB)
#define EPS 1.2e-7
#define RNMX (1.0-EPS)

float ran2(long *idum)
{
	int j;
	long k;
	static long idum2=123456789;
	static long iy=0;
	static long iv[NTAB];
	float temp;

	if (*idum <= 0) {
		if (-(*idum) < 1) *idum=1;
		else *idum = -(*idum);
		idum2=(*idum);
		for (j=NTAB+7;j>=0;j--) {
			k=(*idum)/IQ1;
			*idum=IA1*(*idum-k*IQ1)-k*IR1;
			if (*idum < 0) *idum += IM1;
			if (j < NTAB) {
				iv[j] = *idum;
			}
		}
		iy=iv[0];
	}
	k=(*idum)/IQ1;
	*idum=IA1*(*idum-k*IQ1)-k*IR1;
	if (*idum < 0) *idum += IM1;
	k=idum2/IQ2;
	idum2=IA2*(idum2-k*IQ2)-k*IR2;
	if (idum2 < 0) idum2 += IM2;
	j=iy/NDIV;
	iy=iv[j]-idum2;
	iv[j] = *idum;
	if (iy < 1) iy += IMM1;
	if ((temp=AM*iy) > RNMX) return RNMX;
	else return temp;
}
#undef IM1
#undef IM2
#undef AM
#undef IMM1
#undef IA1
#undef IA2
#undef IQ1
#undef IQ2
#undef IR1
#undef IR2
#undef NTAB
#undef NDIV
#undef EPS
#undef RNMX
/* (C) Copr. 1986-92 Numerical Recipes Software 71.+I0>+. */


'Parallel Programming' 카테고리의 다른 글

MPI Function Dictionary  (1) 2016.10.04
KSC 2015 2번 문제 및 답안  (0) 2016.10.01
KSC 2015 1번 문제 및 답안  (0) 2016.10.01
KSC 2014 3번 문제 및 답안  (0) 2016.10.01
KSC 2014 2번 문제 및 답안  (0) 2016.10.01

댓글