본문 바로가기
Parallel Programming

KSC 2015 2번 문제 및 답안

by suminhan 2016. 10. 1.

sequential.c

#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <string.h>
#ifndef M_PI
    #define M_PI 3.14159265358979323846
#endif
/* int np = 30, ne = 512;  */
int np = 5,  ne = 10; 
                      // np: number of nodes in each element
					  // ne: number of elements
					  // caution: np 20, ne 256 will take approx 10 minutes on a serial job

void interface_flux(double *qq, double *fstar, double *ib, double speed);
void rhs(double *qq, double *rr, double *dv, double *df, double *ib, double speed);
void lagrange(double xx, double *pts, double *ll);
void lagrange_deriv(double xx, double *dl, double *pts);
void legendre(int n, double *x, int xsize, double *output);
double legendre_scalar(int n, double x);
double dlegendre_scalar(int n, double x);
double ddlegendre_scalar(int n, double x);
double dddlegendre_scalar(int n, double x);
void dlegendre_roots(int polyorder, double *output);
void gausslobatto_quadrature(int polyorder, double *roots, double *weights);
double dot_product(double *v, double *u, int n);
void save_field(double *xx, double *qq, int elem_num, double *roots, int eres);
void initialize(double *qq, double *xx, double xmax, double xmin, char *init_type);

int main(int argc, char **argv){

	double tend = 1E2, speed = 1.;
	// double tend = 1E-1, speed = 1.;
	char *init_type="mixed2";
	double *roots, *weights, *ll, *dl, xmin, xmax, 
		   deltax, jac, xr, xl, cfl, dt, rtime, min_dx;
	int ii, jj, kk, ee, idx, eres;
	long nstep;
	double *dx, *mesh; 
	double *smat, *xx, *qq, *qtemp, *k1, *k2, *k3, *k4, *minv_vec, *mmat, *dv, 
		   *mf, *ib, *df, *fstar;

	// initialize 
	// fortran index structure array[ii,jj,ee] where size(array) = (np, np, ne)
	// c 1d index structure array = [ee*np*np + jj*np + ii]
	roots   = (double*)malloc(np*   sizeof(double));
	weights = (double*)malloc(np*   sizeof(double));
	ll      = (double*)malloc(np*   sizeof(double));
	dl      = (double*)malloc(np*   sizeof(double));
	dx      = (double*)malloc(ne*   sizeof(double));
	mesh    = (double*)malloc((ne+1)*sizeof(double));

	smat	= (double*)malloc(np*np*sizeof(double));    // [jj np, ii np]
	xx		= (double*)malloc(ne*np*sizeof(double));    // [ee ne, ii np]
	qq		= (double*)malloc(ne*np*sizeof(double));    // [ee ne, ii np]	
	qtemp	= (double*)malloc(ne*np*sizeof(double));    // [ee ne, ii np]	
	k1		= (double*)malloc(ne*np*sizeof(double));    // [ee ne, ii np]	
	k2		= (double*)malloc(ne*np*sizeof(double));    // [ee ne, ii np]
	k3		= (double*)malloc(ne*np*sizeof(double));    // [ee ne, ii np]
	k4		= (double*)malloc(ne*np*sizeof(double));    // [ee ne, ii np]
	minv_vec= (double*)malloc(ne*np*sizeof(double));    // [ee ne, ii np]
	mmat	= (double*)malloc(ne*np*np*sizeof(double)); // [ee ne, jj np, ii np]
	dv		= (double*)malloc(ne*np*np*sizeof(double)); // [ee ne, jj np, ii np]
	mf		= (double*)malloc(2*np*sizeof(double));     // [jj 2,  ii np]
	ib		= (double*)malloc(2*np*sizeof(double));     // [jj 2,  ii np]
	fstar	= (double*)malloc(2*ne*sizeof(double));     // [jj 2,  ii ne]
	df		= (double*)malloc(ne*2*np*sizeof(double));  // [ee ne, jj 2, ii np]

	for (ii=0; ii<np; ++ii){
		roots[ii] = 0;
		weights[ii] = 0;
		ll[ii] = 0;
		dl[ii] = 0;
	}
	for (ii=0; ii<ne; ++ii){
		dx[ii] = 0;
		mesh[ii] = 0;
	}
	mesh[ne] = 0;

	
	for (ii=0; ii<np*np; ++ii){
		smat[ii] = 0;
	}
	for (ii=0; ii<ne*np; ++ii){
		xx[ii]	= 0;		 	
		qq[ii]	= 0;		 	
		k1[ii]	= 0;		 	
		k2[ii]	= 0;		 	
		k3[ii]	= 0;		 	
		k4[ii]	= 0;		 	
		minv_vec[ii]	= 0; 
	}
	for (ii=0; ii<ne*np*np; ++ii){
		mmat[ii] = 0;    	
		dv[ii]	 = 0;
	}
	for (ii=0; ii<np*2; ++ii){
		mf[ii] = 0;
		ib[ii] = 0;
	}
	for (ii=0; ii<ne*2; ++ii){
		fstar[ii] = 0;
	}
	for (ii=0; ii<ne*2*np; ++ii){
		df[ii] = 0;
	}

	// mesh setup
	xmin = 0.;
	xmax = 10.;
	deltax = (xmax-xmin)/(double)ne;
	mesh[ne] = xmax;
	for(ee=0;ee<ne;++ee){
		mesh[ee] = xmin+ee*deltax;
	}
	
	// gauss lobatto quadrature point, weight setup
	gausslobatto_quadrature(np, roots, weights);

	// coordinates and element size
	min_dx = xmax - xmin; // initial guess
	for(ee=0;ee<ne;ee++){
		xl = mesh[ee];
		xr = mesh[ee+1];
		dx[ee] = xr-xl; // size of each element
		if(dx[ee] < min_dx){
			min_dx = dx[ee]; // finding minimum dx
		}
		for(ii=0;ii<np;ii++){
			idx = ee*np+ii;
			xx[idx] = xl + 0.5*(1+roots[ii])*dx[ee];
		}
	}

	// mass matrix
	for(ii=0;ii<ne*np*np;ii++){
		mmat[ii] = 0;
	}
	for(ee=0;ee<ne;ee++){
		jac = fabs(dx[ee])/2;
		for(kk=0;kk<np;kk++){
			lagrange(roots[kk], ll, roots);
			for(jj=0;jj<np;jj++){
				for(ii=0;ii<np;ii++){
					idx = ee*np*np+jj*np+ii;
					// mass matrix mmat[ne][np][np] in 1d index representation
					mmat[idx] += jac*weights[kk]*ll[ii]*ll[jj];
				}
			}
		}
	}

	// stiffness matrix
	for(ii=0;ii<np*np;ii++){
		smat[ii] = 0;
	}
	for(kk=0;kk<np;kk++){
		lagrange(roots[kk], ll, roots);
		lagrange_deriv(roots[kk], dl, roots);
		for(jj=0;jj<np;jj++){
			for(ii=0;ii<np;ii++){
				idx = jj*np+ii;
				// stiffness matrix smat[np][np] in 1d index representation
				smat[idx] += weights[kk]*ll[jj]*dl[ii];
			}
		}
	}

	// face integration
	for(ii=0;ii<np*2;ii++){
		mf[ii] = 0;
	}
	lagrange(-1,mf,   roots); // mf[ii] for(ii=0, ii<np,ii++) represents element left face integration
	lagrange( 1,mf+np,roots); // mf[ii] for ii=np, ii<2*np, ii++) reresents element right face integration

	// boundary interpolation
	for(ii=0;ii<np*2;ii++){
		ib[ii] = 0;
	}
	lagrange(-1,ib,   roots); // element left edge interpolation
	lagrange( 1,ib+np,roots); // element right edge interpolation

	
	// divergence operators
	for(ii=0;ii<ne*np*np;ii++){
		dv[ii] = 0;
	}
	for(ii=0;ii<ne*np*2;ii++){
		dv[ii] = 0;
	}
	for(ee=0;ee<ne;ee++){
		for(jj=0;jj<np;jj++){
			// it turn out that mmat is diagonal. i.e., ii != jj, mmat[ee][jj][ii] = 0
			// the inverse of mmat is just the inverse of the diagonal components
			// here, we are extracting the inverse diagonal components only
			minv_vec[ee*np+jj] = 1./mmat[ee*np*np+jj*np+jj];
		}
		for(jj=0;jj<np;jj++){
			for(ii=0;ii<np;ii++){
				dv[ee*np*np+jj*np+ii] = minv_vec[ee*np+ii]*smat[jj*np+ii];
			}
		}
		for(jj=0;jj<2;jj++){
			for(ii=0;ii<np;ii++){
				df[ee*np*2+jj*np+ii]  = minv_vec[ee*np+ii]*mf[jj*np+ii];
			}
		}

	}
	
	// initialize qq field
	initialize(qq, xx, xmax, xmin, init_type);
	cfl = 1./(np*np);
	dt = cfl * min_dx / fabs(speed);
	rtime = 0.;
	nstep = 0;

	printf("Start Time Integration\n");

	// Runge-Kutta 4th order Time integration loop
	
	while(rtime < tend){
		dt = fmin(dt, tend-rtime);

		rhs(qq,	   k1, dv, df, ib, speed);

		for(ii=0;ii<ne*np;ii++)
			qtemp[ii] = qq[ii]+0.5*dt*k1[ii];
		rhs(qtemp, k2, dv, df, ib, speed);

		for(ii=0;ii<ne*np;ii++)
			qtemp[ii] = qq[ii]+0.5*dt*k2[ii];
		rhs(qtemp, k3, dv, df, ib, speed);
		
		for(ii=0;ii<ne*np;ii++)
			qtemp[ii] = qq[ii]+dt*k3[ii];
		rhs(qtemp, k4, dv, df, ib, speed);

		for(ii=0;ii<ne*np;ii++)
			qq[ii] += 1./6.*dt*(k1[ii]+2*k2[ii]+2*k3[ii]+k4[ii]);

		rtime += dt;
		nstep += 1;
		if(nstep%10000 == 0) 
			printf("nstep = %10ld, %5.1f%% complete\n", nstep, rtime/tend*100);
	}

	// timeloop ends here;

	printf("Integration complete\n");

	if(ne > 200){
		eres = 2;
	}
	else if (ne > 60){
		eres = 3;
	}
	else if (ne > 30){
		eres = 6;
	}
	else {
		eres = 10;
	}

	// final report
	printf("-----------------------------------------------\n");
	printf("code type   : c serial\n");
	printf("Final time  : %13.5e\n", rtime);
	printf("CFL         : %13.5e\n", cfl);
	printf("DOF         : %13d\n", ne*np);
	printf("No. of Elem : %13d\n", ne);
	printf("Order       : %13d\n", np);
	printf("eres        : %13d\n", eres);
	printf("time steps  : %13ld\n", nstep);
	printf("-----------------------------------------------\n");

	save_field(xx, qq, ne, roots, eres);

	free(roots);   
	free(weights); 
	free(ll);      
	free(dl);      
	free(dx);      
	free(mesh);    
	free(smat);	
	free(xx);		
	free(qq);		
	free(qtemp);	
	free(k1);		
	free(k2);		
	free(k3);		
	free(k4);		
	free(minv_vec);
	free(mmat);	
	free(dv);		
	free(mf);		
	free(ib);		
	free(fstar);	
	free(df);		

	return 0;
}

////////////////////////////////////////////////////////////////////////////
// parallelization needed functions: interface_flux & rhs

void interface_flux(double *qq, double *fstar, double *ib, double speed){
	int ii, ee;
	double qb[2*ne];

	for(ii=0;ii<2*ne;ii++){
		fstar[ii] = 0;
	}
	// fstar[0:ne-1] stores numerical flux of the left edge on each elements
	// fstar[ne:2*ne-1] stores numerical flux of the right edge on each elements

	for(ii=0;ii<ne;ii++){
		qb[ii]   = dot_product(ib,    qq+ii*np, np); // left edge interpolated value of qq at element ii
		qb[ne+ii] = dot_product(ib+np, qq+ii*np, np); // right edge interpolated value of qq at element ii
	}
	
	ii = 0; // calculating numerical flux (fstar) with periodic boundary condition
	fstar[ii] = -((qb[ne+ne-1]+qb[ii])/2*speed + fabs(speed)*(qb[ne+ne-1]-qb[ii])/2);
	fstar[ne+ne-1] = -fstar[0];
	for(ii=1;ii<ne;ii++){
		fstar[ii] = -((qb[ne+ii-1]+qb[ii])/2*speed + fabs(speed)*(qb[ne+ii-1]-qb[ii])/2);
		fstar[ne+ii-1] = -fstar[ii];
	}
}

void rhs(double *qq, double *rr, double *dv, double *df, double *ib, double speed){
	int ii, jj, ee, idx;
	double fstar[2*ne], term;
	for(ii=0;ii<np*ne;ii++){
		rr[ii] = 0;
	}

	for(ee=0;ee<ne;ee++){
		for(ii=0;ii<np;ii++){
			for(jj=0; jj<np; jj++){
				rr[ee*np+ii] += dv[ee*np*np+jj*np+ii]*speed*qq[ee*np+jj];
			}
		}
	}


	interface_flux(qq, fstar, ib, speed);
	for(ee=0;ee<ne;ee++){
		for(ii=0; ii<np; ii++){
			for(jj=0;jj<2;jj++){
				rr[ee*np+ii] -= df[ee*(np*2)+jj*np+ii]*fstar[ee+jj*ne];
			}
		}
	}
}

answer.c
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <string.h>
#include <mpi.h>

#define IMIN(a, b) (((a) < (b)) ? (a) : (b))
#ifndef M_PI
    #define M_PI 3.14159265358979323846
#endif

int np = 30, tne = 512, ne; 
/* int np = 4, tne = 5, ne;  */
						   // np: number of nodes in each element
					       // tne: total number of elements
					       // ne: number of elements in each rank

void equal_load(int n1, int n2, int nprocs, int myid, int *istart, int *ifinish);
void interface_flux(double *qq, double *fstar, double *ib, double speed, int nprocs, int myrank);
void rhs(double *qq, double *rr, double *dv, double *df, double *ib, double speed, int nprocs, int myrank);
void lagrange(double xx, double *pts, double *ll);
void lagrange_deriv(double xx, double *dl, double *pts);
void legendre(int n, double *x, int xsize, double *output);
double legendre_scalar(int n, double x);
double dlegendre_scalar(int n, double x);
double ddlegendre_scalar(int n, double x);
double dddlegendre_scalar(int n, double x);
void dlegendre_roots(int polyorder, double *output);
void gausslobatto_quadrature(int polyorder, double *roots, double *weights);
double dot_product(double *v, double *u, int n);
void save_field(double *xx, double *qq, int elem_num, double *roots, int eres);
void initialize(double *qq, double *xx, double xmax, double xmin, char *init_type);
double lxmin, lxmax;

int main(int argc, char **argv){

	double tend = 0.5, speed = 1.;
	// double tend = 1E-1, speed = 1.;
	char *init_type="mixed2";
	double *roots, *weights, *ll, *dl, xmin, xmax, 
		   deltax, jac, xr, xl, cfl, dt, rtime, min_dx;
	int ii, jj, kk, ee, idx, eres;
	long nstep;
	double *dx, *mesh; 
	double *smat, *xx, *qq, *qtemp, *k1, *k2, *k3, *k4, *minv_vec, *mmat, *dv, 
		   *mf, *ib, *df, *fstar, *gxx, *gqq, *buff1, *buff2;
	int istart, ifinish, myrank, nprocs;
	MPI_Request ireq1, ireq2, ireq3, ireq4;
	MPI_Status status;

	MPI_Init(&argc, &argv);
	MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
	MPI_Comm_size(MPI_COMM_WORLD,&nprocs);

	equal_load(0, tne-1, nprocs, myrank, &istart, &ifinish);
	ne = ifinish - istart + 1;

	// initialize 
	// fortran index structure array[ii,jj,ee] where size(array) = (np, np, ne)
	// c 1d index structure array = [ee*np*np + jj*np + ii]
	roots   = (double*)malloc(np*   sizeof(double));
	weights = (double*)malloc(np*   sizeof(double));
	ll      = (double*)malloc(np*   sizeof(double));
	dl      = (double*)malloc(np*   sizeof(double));
	dx      = (double*)malloc(ne*   sizeof(double));
	mesh    = (double*)malloc((ne+1)*sizeof(double));

	smat	= (double*)malloc(np*np*sizeof(double));    // [jj np, ii np]
	xx		= (double*)malloc(ne*np*sizeof(double));    // [ee ne, ii np]
	qq		= (double*)malloc(ne*np*sizeof(double));    // [ee ne, ii np]	
	qtemp	= (double*)malloc(ne*np*sizeof(double));    // [ee ne, ii np]	
	k1		= (double*)malloc(ne*np*sizeof(double));    // [ee ne, ii np]	
	k2		= (double*)malloc(ne*np*sizeof(double));    // [ee ne, ii np]
	k3		= (double*)malloc(ne*np*sizeof(double));    // [ee ne, ii np]
	k4		= (double*)malloc(ne*np*sizeof(double));    // [ee ne, ii np]
	minv_vec= (double*)malloc(ne*np*sizeof(double));    // [ee ne, ii np]
	mmat	= (double*)malloc(ne*np*np*sizeof(double)); // [ee ne, jj np, ii np]
	dv		= (double*)malloc(ne*np*np*sizeof(double)); // [ee ne, jj np, ii np]
	mf		= (double*)malloc(2*np*sizeof(double));     // [jj 2,  ii np]
	ib		= (double*)malloc(2*np*sizeof(double));     // [jj 2,  ii np]
	fstar	= (double*)malloc(2*ne*sizeof(double));     // [jj 2,  ii ne]
	df		= (double*)malloc(ne*2*np*sizeof(double));  // [ee ne, jj 2, ii np]
	

	for (ii=0; ii<np; ++ii){
		roots[ii] = 0;
		weights[ii] = 0;
		ll[ii] = 0;
		dl[ii] = 0;
	}
	for (ii=0; ii<ne; ++ii){
		dx[ii] = 0;
		mesh[ii] = 0;
	}
	mesh[ne] = 0;

	for (ii=0; ii<np*np; ++ii){
		smat[ii] = 0;
	}
	for (ii=0; ii<ne*np; ++ii){
		xx[ii]	= 0;		 	
		qq[ii]	= 0;		 	
		qtemp[ii]	= 0;		 	
		k1[ii]	= 0;		 	
		k2[ii]	= 0;		 	
		k3[ii]	= 0;		 	
		k4[ii]	= 0;		 	
		minv_vec[ii]	= 0; 
	}
	for (ii=0; ii<ne*np*np; ++ii){
		mmat[ii] = 0;    	
		dv[ii]	 = 0;
	}
	for (ii=0; ii<np*2; ++ii){
		mf[ii] = 0;
		ib[ii] = 0;
	}
	for (ii=0; ii<ne*2; ++ii){
		fstar[ii] = 0;
	}
	for (ii=0; ii<ne*2*np; ++ii){
		df[ii] = 0;
	}

	// mesh setup
	xmin = 0.;
	xmax = 10.;
	deltax = (xmax-xmin)/(double)tne;
	lxmin = xmin + (istart)*deltax;
	lxmax = xmin + (ifinish+1)*deltax;
	mesh[ne] = lxmax;
	for(ee=0;ee<ne;++ee){
		mesh[ee] = lxmin+ee*deltax;
	}
	
	// gauss lobatto quadrature point, weight setup
	gausslobatto_quadrature(np, roots, weights);

	// coordinates and element size
	min_dx = xmax - xmin; // initial guess
	for(ee=0;ee<ne;ee++){
		xl = mesh[ee];
		xr = mesh[ee+1];
		dx[ee] = xr-xl; // size of each element
		if(dx[ee] < min_dx){
			min_dx = dx[ee]; // finding minimum dx
		}
		for(ii=0;ii<np;ii++){
			idx = ee*np+ii;
			xx[idx] = xl + 0.5*(1+roots[ii])*dx[ee];
		}
	}

	// mass matrix
	for(ii=0;ii<ne*np*np;ii++){
		mmat[ii] = 0;
	}
	for(ee=0;ee<ne;ee++){
		jac = fabs(dx[ee])/2;
		for(kk=0;kk<np;kk++){
			lagrange(roots[kk], ll, roots);
			for(jj=0;jj<np;jj++){
				for(ii=0;ii<np;ii++){
					idx = ee*np*np+jj*np+ii;
					// mass matrix mmat[ne][np][np] in 1d index representation
					mmat[idx] += jac*weights[kk]*ll[ii]*ll[jj];
				}
			}
		}
	}

	// stiffness matrix
	for(ii=0;ii<np*np;ii++){
		smat[ii] = 0;
	}
	for(kk=0;kk<np;kk++){
		lagrange(roots[kk], ll, roots);
		lagrange_deriv(roots[kk], dl, roots);
		for(jj=0;jj<np;jj++){
			for(ii=0;ii<np;ii++){
				idx = jj*np+ii;
				// stiffness matrix smat[np][np] in 1d index representation
				smat[idx] += weights[kk]*ll[jj]*dl[ii];
			}
		}
	}

	// face integration
	for(ii=0;ii<np*2;ii++){
		mf[ii] = 0;
	}
	lagrange(-1,mf,   roots); // mf[ii] for(ii=0, ii<np,ii++) represents element left face integration
	lagrange( 1,mf+np,roots); // mf[ii] for ii=np, ii<2*np, ii++) reresents element right face integration

	
	// boundary interpolation
	for(ii=0;ii<np*2;ii++){
		ib[ii] = 0;
	}
	lagrange(-1,ib,   roots); // element left edge interpolation
	lagrange( 1,ib+np,roots); // element right edge interpolation

	
	// divergence operators
	for(ii=0;ii<ne*np*np;ii++){
		dv[ii] = 0;
	}
	for(ii=0;ii<ne*np*2;ii++){
		dv[ii] = 0;
	}
	for(ee=0;ee<ne;ee++){
		for(jj=0;jj<np;jj++){
			// it turn out that mmat is diagonal. i.e., ii != jj, mmat[ee][jj][ii] = 0
			// the inverse of mmat is just the inverse of the diagonal components
			// here, we are extracting the inverse diagonal components only
			minv_vec[ee*np+jj] = 1./mmat[ee*np*np+jj*np+jj];
		}
		for(jj=0;jj<np;jj++){
			for(ii=0;ii<np;ii++){
				dv[ee*np*np+jj*np+ii] = minv_vec[ee*np+ii]*smat[jj*np+ii];
			}
		}
		for(jj=0;jj<2;jj++){
			for(ii=0;ii<np;ii++){
				df[ee*np*2+jj*np+ii]  = minv_vec[ee*np+ii]*mf[jj*np+ii];
			}
		}

	}
	
	// initialize qq field
	initialize(qq, xx, xmax, xmin, init_type);
	cfl = 1./(np*np);
	dt = cfl * min_dx / fabs(speed);
	rtime = 0.;
	nstep = 0;


	if(myrank == 0){
		printf("Start Time Integration\n");
	}

	// Runge-Kutta 4th order Time integration loop
	
	while(rtime < tend){
		dt = fmin(dt, tend-rtime);

		rhs(qq,	   k1, dv, df, ib, speed, nprocs, myrank);

		for(ii=0;ii<ne*np;ii++)
			qtemp[ii] = qq[ii]+0.5*dt*k1[ii];
		rhs(qtemp, k2, dv, df, ib, speed, nprocs, myrank);

		for(ii=0;ii<ne*np;ii++)
			qtemp[ii] = qq[ii]+0.5*dt*k2[ii];
		rhs(qtemp, k3, dv, df, ib, speed, nprocs, myrank);
		
		for(ii=0;ii<ne*np;ii++)
			qtemp[ii] = qq[ii]+dt*k3[ii];
		rhs(qtemp, k4, dv, df, ib, speed, nprocs, myrank);

		for(ii=0;ii<ne*np;ii++)
			qq[ii] += 1./6.*dt*(k1[ii]+2*k2[ii]+2*k3[ii]+k4[ii]);

		rtime += dt;
		nstep += 1;
		if(myrank == 0 && (nstep%10000 == 0))
			printf("nstep = %10ld, %5.1f%% complete\n", nstep, rtime/tend*100);
	}

	// timeloop ends here;

	if(myrank != 0){
		MPI_Isend(xx, np*ne, MPI_DOUBLE, 0, 10, MPI_COMM_WORLD, &ireq1);
		MPI_Isend(qq, np*ne, MPI_DOUBLE, 0, 20, MPI_COMM_WORLD, &ireq2);
		MPI_Isend(&istart, 1, MPI_INT, 0, 30, MPI_COMM_WORLD, &ireq3);
		MPI_Isend(&ifinish, 1, MPI_INT, 0, 40, MPI_COMM_WORLD, &ireq4);
		MPI_Wait(&ireq3, &status);
		MPI_Wait(&ireq4, &status);
		MPI_Wait(&ireq1, &status);
		MPI_Wait(&ireq2, &status);
	}

	if(myrank == 0){

		printf("Integration complete\n");

		if(tne > 200){
			eres = 2;
		}
		else if (tne > 60){
			eres = 3;
		}
		else if (tne > 30){
			eres = 6;
		}
		else {
			eres = 10;
		}

		// final report
		printf("-----------------------------------------------\n");
		printf("code type   : c parallel\n");
		printf("Final time  : %13.5e\n", rtime);
		printf("CFL         : %13.5e\n", cfl);
		printf("DOF         : %13d\n", tne*np);
		printf("No. of Elem : %13d\n", tne);
		printf("Order       : %13d\n", np);
		printf("eres        : %13d\n", eres);
		printf("time steps  : %13ld\n", nstep);
		printf("-----------------------------------------------\n");

		gxx		= (double*)malloc(tne*np*sizeof(double));  
		gqq		= (double*)malloc(tne*np*sizeof(double));  
		buff1	= (double*)malloc((tne/nprocs+1)*np*sizeof(double));  
		buff2	= (double*)malloc((tne/nprocs+1)*np*sizeof(double));  

		for(ii=0;ii<ne*np;ii++){
			gxx[ii] = xx[ii];
			gqq[ii] = qq[ii];
		}
		kk = ne*np;
		for(ii=1;ii<nprocs;ii++){
			
			MPI_Irecv(&istart, 1, MPI_INT, ii, 30, MPI_COMM_WORLD, &ireq3);
			MPI_Irecv(&ifinish, 1, MPI_INT, ii, 40, MPI_COMM_WORLD, &ireq4);

			MPI_Wait(&ireq3, &status);
			MPI_Wait(&ireq4, &status);

			ne = ifinish-istart+1;

			MPI_Irecv(buff1, np*ne, MPI_DOUBLE, ii, 10, MPI_COMM_WORLD, &ireq1);
			MPI_Irecv(buff2, np*ne, MPI_DOUBLE, ii, 20, MPI_COMM_WORLD, &ireq2);

			MPI_Wait(&ireq1, &status);
			MPI_Wait(&ireq2, &status);

			for(jj=0; jj<ne*np;jj++){
				gxx[kk+jj] = buff1[jj];
				gqq[kk+jj] = buff2[jj];
			}
			kk += ne*np;
		}
		save_field(gxx, gqq, tne, roots, eres);
		free(gxx);
		free(gqq);
		free(buff1);
		free(buff2);

	}

	free(roots);   
	free(weights); 
	free(ll);      
	free(dl);      
	free(dx);      
	free(mesh);    
	free(smat);	
	free(xx);		
	free(qq);		
	free(qtemp);	
	free(k1);		
	free(k2);		
	free(k3);		
	free(k4);		
	free(minv_vec);
	free(mmat);	
	free(dv);		
	free(mf);		
	free(ib);		
	free(fstar);	
	free(df);		

	
	MPI_Finalize();
	return 0;

}

////////////////////////////////////////////////////////////////////////////
// parallelization needed functions: interface_flux & rhs

void equal_load(int n1, int n2, int nprocs, int myid, int *istart, int *ifinish){
	int iw1, iw2;
	iw1 = (n2-n1+1)/nprocs;
	iw2 = (n2-n1+1)%nprocs;
	*istart = myid*iw1+n1+IMIN(myid,iw2);
	*ifinish= *istart+iw1-1;
	if(iw2>myid) *ifinish+=1;
	if(n2 < *istart) *ifinish = *istart-1;
}

void interface_flux(double *qq, double *fstar, double *ib, double speed, int nprocs, int myrank){
	int ii, ee;
	double qb[2*ne], qb_start;
	MPI_Status status;
	MPI_Request ireq1, ireq2, ireq3, ireq4;

	for(ii=0;ii<2*ne;ii++){
		fstar[ii] = 0;
	}
	
	for(ii=0;ii<ne;ii++){
		qb[ii]   = dot_product(ib,    qq+ii*np, np); // left edge interpolated value of qq at element ii
		qb[ne+ii] = dot_product(ib+np, qq+ii*np, np); // right edge interpolated value of qq at element ii
	}

	// fstar[0:ne-1] stores numerical flux of the left edge on each elements
	// fstar[ne:2*ne-1] stores numerical flux of the right edge on each elements
	if(myrank == 0){
		MPI_Isend(qb+2*ne-1,    1, MPI_DOUBLE, myrank+1, 80, MPI_COMM_WORLD, &ireq1);
		MPI_Irecv(&qb_start,    1, MPI_DOUBLE, nprocs-1, 80, MPI_COMM_WORLD, &ireq2);
	}
	else if (myrank == nprocs-1){
		MPI_Isend(qb+2*ne-1,    1, MPI_DOUBLE, 0,        80, MPI_COMM_WORLD, &ireq1);
		MPI_Irecv(&qb_start,    1, MPI_DOUBLE, myrank-1, 80, MPI_COMM_WORLD, &ireq2);
	}
	else{
		MPI_Isend(qb+2*ne-1,    1, MPI_DOUBLE, myrank+1, 80, MPI_COMM_WORLD, &ireq1);
		MPI_Irecv(&qb_start,    1, MPI_DOUBLE, myrank-1, 80, MPI_COMM_WORLD, &ireq2);
	}

	MPI_Wait(&ireq1, &status);
	MPI_Wait(&ireq2, &status);

	ee = 0; // calculating numerical flux (fstar) with periodic boundary condition
	fstar[ee] = -((qb_start+qb[ee])/2.*speed + fabs(speed)*(qb_start-qb[ee])/2.);

	if(myrank == 0){
		MPI_Isend(fstar,        1, MPI_DOUBLE, nprocs-1, 90, MPI_COMM_WORLD, &ireq3);
	}
	else{
		MPI_Isend(fstar,        1, MPI_DOUBLE, myrank-1, 90, MPI_COMM_WORLD, &ireq3);
	}

	if(myrank == nprocs-1){
		MPI_Irecv(fstar+2*ne-1, 1, MPI_DOUBLE, 0,        90, MPI_COMM_WORLD, &ireq4);
	}
	else{
		MPI_Irecv(fstar+2*ne-1, 1, MPI_DOUBLE, myrank+1, 90, MPI_COMM_WORLD, &ireq4);
	}

	for(ee=1;ee<ne;ee++){
		fstar[ee] = -((qb[ne+ee-1]+qb[ee])/2.*speed + fabs(speed)*(qb[ne+ee-1]-qb[ee])/2.);
		fstar[ne+ee-1] = -fstar[ee];
	}

	MPI_Wait(&ireq3, &status);
	MPI_Wait(&ireq4, &status);

	fstar[2*ne-1] *= -1.;
}

void rhs(double *qq, double *rr, double *dv, double *df, double *ib, 
		double speed, int nprocs, int myrank){
	int ii, jj, ee;
	double fstar[2*ne];

	for(ii=0;ii<np*ne;ii++){
		rr[ii] = 0;
	}

	for(ee=0;ee<ne;ee++){
		for(ii=0;ii<np;ii++){
			for(jj=0; jj<np; jj++){
				rr[ee*np+ii] += dv[ee*np*np+jj*np+ii]*speed*qq[ee*np+jj];
			}
		}
	}
	interface_flux(qq, fstar, ib, speed, nprocs, myrank);
	for(ee=0;ee<ne;ee++){
		for(ii=0; ii<np; ii++){
			for(jj=0;jj<2;jj++){
				rr[ee*np+ii] -= df[ee*(np*2)+jj*np+ii]*fstar[ee+jj*ne];
			}
		}
	}
}

myparallel.c
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <string.h>
#include <mpi.h>
#ifndef M_PI
    #define M_PI 3.14159265358979323846
#endif
/* int np = 30, ne = 512;  */
int np = 5,  ne, tne = 10; 
                      // np: number of nodes in each element
					  // ne: number of elements
					  // caution: np 20, ne 256 will take approx 10 minutes on a serial job

void interface_flux(double *qq, double *fstar, double *ib, double speed, int nprocs, int myrank);
void rhs(double *qq, double *rr, double *dv, double *df, double *ib, double speed, int nprocs, int myrank);
void lagrange(double xx, double *pts, double *ll);
void lagrange_deriv(double xx, double *dl, double *pts);
void legendre(int n, double *x, int xsize, double *output);
double legendre_scalar(int n, double x);
double dlegendre_scalar(int n, double x);
double ddlegendre_scalar(int n, double x);
double dddlegendre_scalar(int n, double x);
void dlegendre_roots(int polyorder, double *output);
void gausslobatto_quadrature(int polyorder, double *roots, double *weights);
double dot_product(double *v, double *u, int n);
void save_field(double *xx, double *qq, int elem_num, double *roots, int eres);
void initialize(double *qq, double *xx, double xmax, double xmin, char *init_type);

int main(int argc, char **argv){

	double tend = 1E2, speed = 1.;
	// double tend = 1E-1, speed = 1.;
	char *init_type="mixed2";
	double *roots, *weights, *ll, *dl, xmin, xmax, 
		   deltax, jac, xr, xl, cfl, dt, rtime, min_dx;
	int ii, jj, kk, ee, idx, eres;
	long nstep;
	double *dx, *mesh; 
	double *smat, *xx, *qq, *qtemp, *k1, *k2, *k3, *k4, *minv_vec, *mmat, *dv, 
		   *mf, *ib, *df, *fstar;

	// newly added variables for parallelization
	int nprocs, myrank, q, r, esta;
	int *ircnt, *idisp;
	double *xxtot, *qqtot;
	MPI_Init(&argc, &argv);
	MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
	MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
	q = tne/nprocs;
	r = tne%nprocs;
	ne = myrank<r?(q+1):q;
	esta = myrank<r?(q+1)*myrank:(q*myrank+r);

	// initialize 
	// fortran index structure array[ii,jj,ee] where size(array) = (np, np, ne)
	// c 1d index structure array = [ee*np*np + jj*np + ii]
	roots   = (double*)malloc(np*   sizeof(double));
	weights = (double*)malloc(np*   sizeof(double));
	ll      = (double*)malloc(np*   sizeof(double));
	dl      = (double*)malloc(np*   sizeof(double));
	dx      = (double*)malloc(ne*   sizeof(double));
	mesh    = (double*)malloc((ne+1)*sizeof(double));

	smat	= (double*)malloc(np*np*sizeof(double));    // [jj np, ii np]
	xx		= (double*)malloc(ne*np*sizeof(double));    // [ee ne, ii np]
	qq		= (double*)malloc(ne*np*sizeof(double));    // [ee ne, ii np]	
	qtemp	= (double*)malloc(ne*np*sizeof(double));    // [ee ne, ii np]	
	k1		= (double*)malloc(ne*np*sizeof(double));    // [ee ne, ii np]	
	k2		= (double*)malloc(ne*np*sizeof(double));    // [ee ne, ii np]
	k3		= (double*)malloc(ne*np*sizeof(double));    // [ee ne, ii np]
	k4		= (double*)malloc(ne*np*sizeof(double));    // [ee ne, ii np]
	minv_vec= (double*)malloc(ne*np*sizeof(double));    // [ee ne, ii np]
	mmat	= (double*)malloc(ne*np*np*sizeof(double)); // [ee ne, jj np, ii np]
	dv		= (double*)malloc(ne*np*np*sizeof(double)); // [ee ne, jj np, ii np]
	mf		= (double*)malloc(2*np*sizeof(double));     // [jj 2,  ii np]
	ib		= (double*)malloc(2*np*sizeof(double));     // [jj 2,  ii np]
	fstar	= (double*)malloc(2*ne*sizeof(double));     // [jj 2,  ii ne]
	df		= (double*)malloc(ne*2*np*sizeof(double));  // [ee ne, jj 2, ii np]

	if(!myrank) {
		xxtot	= (double*)malloc(tne*np*sizeof(double));    // [ee ne, ii np]
		qqtot	= (double*)malloc(tne*np*sizeof(double));    // [ee ne, ii np]	
		ircnt	= (int*)malloc(nprocs*sizeof(int));
		idisp	= (int*)malloc(nprocs*sizeof(int));
		idisp[0] = 0;
		for(ii=0; ii<nprocs; ii++){
			ircnt[ii] = (myrank<r?(q+1):q)*np;
			if(ii>0) idisp[ii] = idisp[ii-1] + ircnt[ii-1];
			printf("ircnt[%d] = %d, idisp[%d] = %d\n", ii, ircnt[ii], ii, idisp[ii]);
		}
	}


	for (ii=0; ii<np; ++ii){
		roots[ii] = 0;
		weights[ii] = 0;
		ll[ii] = 0;
		dl[ii] = 0;
	}
	for (ii=0; ii<ne; ++ii){
		dx[ii] = 0;
		mesh[ii] = 0;
	}
	mesh[ne] = 0;

	
	for (ii=0; ii<np*np; ++ii){
		smat[ii] = 0;
	}
	for (ii=0; ii<ne*np; ++ii){
		xx[ii]	= 0;		 	
		qq[ii]	= 0;		 	
		k1[ii]	= 0;		 	
		k2[ii]	= 0;		 	
		k3[ii]	= 0;		 	
		k4[ii]	= 0;		 	
		minv_vec[ii]	= 0; 
	}
	for (ii=0; ii<ne*np*np; ++ii){
		mmat[ii] = 0;    	
		dv[ii]	 = 0;
	}
	for (ii=0; ii<np*2; ++ii){
		mf[ii] = 0;
		ib[ii] = 0;
	}
	for (ii=0; ii<ne*2; ++ii){
		fstar[ii] = 0;
	}
	for (ii=0; ii<ne*2*np; ++ii){
		df[ii] = 0;
	}

	// mesh setup
	xmin = 0.;
	xmax = 10.;
	deltax = (xmax-xmin)/(double)tne;
	//mesh[ne] = xmax;
	for(ee=0;ee<ne+1;++ee){
		mesh[ee] = xmin+(ee+esta)*deltax;
	}
	if(myrank == nprocs-1) mesh[ne] = xmax;

	
	// gauss lobatto quadrature point, weight setup
	gausslobatto_quadrature(np, roots, weights);

	// coordinates and element size
	min_dx = xmax - xmin; // initial guess
	for(ee=0;ee<ne;ee++){
		xl = mesh[ee];
		xr = mesh[ee+1];
		dx[ee] = xr-xl; // size of each element
		if(dx[ee] < min_dx){
			min_dx = dx[ee]; // finding minimum dx
		}
		for(ii=0;ii<np;ii++){
			idx = ee*np+ii;
			xx[idx] = xl + 0.5*(1+roots[ii])*dx[ee];
		}
	}

	// mass matrix
	for(ii=0;ii<ne*np*np;ii++){
		mmat[ii] = 0;
	}
	for(ee=0;ee<ne;ee++){
		jac = fabs(dx[ee])/2;
		for(kk=0;kk<np;kk++){
			lagrange(roots[kk], ll, roots);
			for(jj=0;jj<np;jj++){
				for(ii=0;ii<np;ii++){
					idx = ee*np*np+jj*np+ii;
					// mass matrix mmat[ne][np][np] in 1d index representation
					mmat[idx] += jac*weights[kk]*ll[ii]*ll[jj];
				}
			}
		}
	}

	// stiffness matrix
	for(ii=0;ii<np*np;ii++){
		smat[ii] = 0;
	}
	for(kk=0;kk<np;kk++){
		lagrange(roots[kk], ll, roots);
		lagrange_deriv(roots[kk], dl, roots);
		for(jj=0;jj<np;jj++){
			for(ii=0;ii<np;ii++){
				idx = jj*np+ii;
				// stiffness matrix smat[np][np] in 1d index representation
				smat[idx] += weights[kk]*ll[jj]*dl[ii];
			}
		}
	}

	// face integration
	for(ii=0;ii<np*2;ii++){
		mf[ii] = 0;
	}
	lagrange(-1,mf,   roots); // mf[ii] for(ii=0, ii<np,ii++) represents element left face integration
	lagrange( 1,mf+np,roots); // mf[ii] for ii=np, ii<2*np, ii++) reresents element right face integration

	// boundary interpolation
	for(ii=0;ii<np*2;ii++){
		ib[ii] = 0;
	}
	lagrange(-1,ib,   roots); // element left edge interpolation
	lagrange( 1,ib+np,roots); // element right edge interpolation

	
	// divergence operators
	for(ii=0;ii<ne*np*np;ii++){
		dv[ii] = 0;
	}
	for(ii=0;ii<ne*np*2;ii++){
		dv[ii] = 0;
	}
	for(ee=0;ee<ne;ee++){
		for(jj=0;jj<np;jj++){
			// it turn out that mmat is diagonal. i.e., ii != jj, mmat[ee][jj][ii] = 0
			// the inverse of mmat is just the inverse of the diagonal components
			// here, we are extracting the inverse diagonal components only
			minv_vec[ee*np+jj] = 1./mmat[ee*np*np+jj*np+jj];
		}
		for(jj=0;jj<np;jj++){
			for(ii=0;ii<np;ii++){
				dv[ee*np*np+jj*np+ii] = minv_vec[ee*np+ii]*smat[jj*np+ii];
			}
		}
		for(jj=0;jj<2;jj++){
			for(ii=0;ii<np;ii++){
				df[ee*np*2+jj*np+ii]  = minv_vec[ee*np+ii]*mf[jj*np+ii];
			}
		}

	}
	
	// initialize qq field
	initialize(qq, xx, xmax, xmin, init_type);
	cfl = 1./(np*np);
	dt = cfl * min_dx / fabs(speed);
	rtime = 0.;
	nstep = 0;

	if(!myrank)
		printf("Start Time Integration\n");

	// Runge-Kutta 4th order Time integration loop
	
	while(rtime < tend){
		dt = fmin(dt, tend-rtime);

		rhs(qq,	   k1, dv, df, ib, speed, nprocs, myrank);

		for(ii=0;ii<ne*np;ii++)
			qtemp[ii] = qq[ii]+0.5*dt*k1[ii];
		rhs(qtemp, k2, dv, df, ib, speed, nprocs, myrank);

		for(ii=0;ii<ne*np;ii++)
			qtemp[ii] = qq[ii]+0.5*dt*k2[ii];
		rhs(qtemp, k3, dv, df, ib, speed, nprocs, myrank);
		
		for(ii=0;ii<ne*np;ii++)
			qtemp[ii] = qq[ii]+dt*k3[ii];
		rhs(qtemp, k4, dv, df, ib, speed, nprocs, myrank);

		for(ii=0;ii<ne*np;ii++)
			qq[ii] += 1./6.*dt*(k1[ii]+2*k2[ii]+2*k3[ii]+k4[ii]);

		rtime += dt;
		nstep += 1;
		if((myrank==0) && nstep%10000 == 0) 
			printf("nstep = %10ld, %5.1f%% complete\n", nstep, rtime/tend*100);
	}

	printf("end..?\n");
	// timeloop ends here;
	//printf("ne*np = %d, ircnt[0] = %d, idisp[0] = %d\n", ne*np, ircnt[0], idisp[0]);
	MPI_Gatherv(xx, ne*np, MPI_DOUBLE, xxtot, ircnt, idisp, MPI_DOUBLE, 0, MPI_COMM_WORLD);
	MPI_Gatherv(qq, ne*np, MPI_DOUBLE, qqtot, ircnt, idisp, MPI_DOUBLE, 0, MPI_COMM_WORLD);
	printf("end..?\n");
	if(!myrank){
		printf("Integration complete\n");

		if(ne > 200){
			eres = 2;
		}
		else if (ne > 60){
			eres = 3;
		}
		else if (ne > 30){
			eres = 6;
		}
		else {
			eres = 10;
		}

		// final report
		printf("-----------------------------------------------\n");
		printf("code type   : c parallel\n");
		printf("Final time  : %13.5e\n", rtime);
		printf("CFL         : %13.5e\n", cfl);
		printf("DOF         : %13d\n", tne*np);
		printf("No. of Elem : %13d\n", tne);
		printf("Order       : %13d\n", np);
		printf("eres        : %13d\n", eres);
		printf("time steps  : %13ld\n", nstep);
		printf("-----------------------------------------------\n");

		save_field(xxtot, qqtot, tne, roots, eres);

		free(idisp);
		free(ircnt);
		free(xxtot);
		free(qqtot);
	}
	free(roots);   
	free(weights); 
	free(ll);      
	free(dl);      
	free(dx);      
	free(mesh);    
	free(smat);	
	free(xx);		
	free(qq);		
	free(qtemp);	
	free(k1);		
	free(k2);		
	free(k3);		
	free(k4);		
	free(minv_vec);
	free(mmat);	
	free(dv);		
	free(mf);		
	free(ib);		
	free(fstar);	
	free(df);		

	MPI_Finalize();
	return 0;
}

////////////////////////////////////////////////////////////////////////////
// parallelization needed functions: interface_flux & rhs

void interface_flux(double *qq, double *fstar, double *ib, double speed, int nprocs, int myrank){
	int ii, ee;
	double qb[2*ne];

	for(ii=0;ii<2*ne;ii++){
		fstar[ii] = 0;
	}
	// fstar[0:ne-1] stores numerical flux of the left edge on each elements
	// fstar[ne:2*ne-1] stores numerical flux of the right edge on each elements

	for(ii=0;ii<ne;ii++){
		qb[ii]   = dot_product(ib,    qq+ii*np, np); // left edge interpolated value of qq at element ii
		qb[ne+ii] = dot_product(ib+np, qq+ii*np, np); // right edge interpolated value of qq at element ii
	}
	
	ii = 0; // calculating numerical flux (fstar) with periodic boundary condition
	double qbf, fsta;
	double aspeed = fabs(speed);
	int prev = (myrank - 1 + nprocs)%nprocs;
	int next = (myrank + 1)%nprocs;
	MPI_Request req[4];
	MPI_Status stat[4];
	MPI_Isend(&qb[ne+ne-1], 1, MPI_DOUBLE, next, 123, MPI_COMM_WORLD, &req[0]);
	MPI_Irecv(&qbf, 1, MPI_DOUBLE, prev, 123, MPI_COMM_WORLD, &req[1]);
	MPI_Waitall(2, req, stat);
	fstar[0] = -((qbf+qb[0])/2*speed + fabs(speed)*(qbf-qb[0])/2);
	MPI_Isend(&fstar[0], 1, MPI_DOUBLE, prev, 321, MPI_COMM_WORLD, &req[2]);
	MPI_Irecv(&fsta, 1, MPI_DOUBLE, next, 321, MPI_COMM_WORLD, &req[3]);
	MPI_Waitall(2, req+2, stat+2);
	fstar[ne+ne-1] = -fsta;

	for(ii=1;ii<ne;ii++){
		fstar[ii] = -((qb[ne+ii-1]+qb[ii])/2*speed + fabs(speed)*(qb[ne+ii-1]-qb[ii])/2);
		fstar[ne+ii-1] = -fstar[ii];
	}

	/*
	fstar[0] = -((qb[ne+ne-1]+qb[0])/2*speed + fabs(speed)*(qb[ne+ne-1]-qb[0])/2);
	fstar[ne+ne-1] = -fstar[0];
	for(ii=1;ii<ne;ii++){
		fstar[ii] = -((qb[ne+ii-1]+qb[ii])/2*speed + fabs(speed)*(qb[ne+ii-1]-qb[ii])/2);
		fstar[ne+ii-1] = -fstar[ii];
	}
	*/
}

void rhs(double *qq, double *rr, double *dv, double *df, double *ib, double speed, int nprocs, int myrank){
	int ii, jj, ee, idx;
	double fstar[2*ne], term;
	for(ii=0;ii<np*ne;ii++){
		rr[ii] = 0;
	}

	for(ee=0;ee<ne;ee++){
		for(ii=0;ii<np;ii++){
			for(jj=0; jj<np; jj++){
				rr[ee*np+ii] += dv[ee*np*np+jj*np+ii]*speed*qq[ee*np+jj];
			}
		}
	}

	interface_flux(qq, fstar, ib, speed, nprocs, myrank);

	for(ee=0;ee<ne;ee++){
		for(ii=0; ii<np; ii++){
			for(jj=0;jj<2;jj++){
				rr[ee*np+ii] -= df[ee*(np*2)+jj*np+ii]*fstar[ee+jj*ne];
			}
		}
	}
}




'Parallel Programming' 카테고리의 다른 글

MPI Function Dictionary  (1) 2016.10.04
KSC 2015 3번 문제 및 풀이  (0) 2016.10.01
KSC 2015 1번 문제 및 답안  (0) 2016.10.01
KSC 2014 3번 문제 및 답안  (0) 2016.10.01
KSC 2014 2번 문제 및 답안  (0) 2016.10.01

댓글