Révision 282

Pi/C/OpenACC/Pi_OpenACC.c (revision 282)
1 1
//
2 2
// Estimation of Pi using Monte Carlo exploration process
3 3
// Cecill v2 Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
4
// Exploit OpenACC on Nvidia GPU
5
// module load 
6
// icpc -std=c99 -O3 -o Pi_OpenACC Pi_OpenACC.c -lm 
4
// Tested on GCC-10
5
// gcc -fopenacc -foffload=nvptx-none -foffload="-O3 -misa=sm_35" Pi_OpenACC.c
7 6
//
8 7

  
9 8
#include <math.h>
......
139 138
  LENGTH IterationsEach=((iterations%ParallelRate)==0)?iterations/ParallelRate:iterations/ParallelRate+1;
140 139

  
141 140
  inside=(LENGTH*)malloc(sizeof(LENGTH)*ParallelRate);
142
  
143
#if _OPENACC
144
  acc_init(acc_device_nvidia);
145
#endif
146 141

  
147 142
  gettimeofday(&tv1, &tz);
148
#pragma omp parallel for shared(ParallelRate,inside)
149
#pragma acc kernels loop
150
  for (int i=0 ; i<ParallelRate; i++) {
151
    inside[i]=MainLoopGlobal(IterationsEach,seed_w+i,seed_z+i);
152
  }
153 143

  
144
#pragma acc kernels copy(inside[0:ParallelRate])
145
  {
146
    #pragma acc independant
147
    for (int i=0 ; i<ParallelRate; i++) {
148
      inside[i]=MainLoopGlobal(IterationsEach,seed_w+i,seed_z+i);
149
    }
150
  }
154 151
  gettimeofday(&tv2, &tz);
155 152
  
156 153
  for (int i=0 ; i<ParallelRate; i++) {
......
163 160
			  (tv2.tv_usec-tv1.tv_usec))/1000000;
164 161

  
165 162
  double itops=(double)(ParallelRate*IterationsEach)/elapsed;
166
  
167
  printf("ParallelRate %i\nElapsed Time %.2f\nItops %.0f\n",ParallelRate,elapsed,itops);
168 163

  
164
  printf("Inside/Total %ld %ld\nParallelRate %i\nElapsed Time %.2f\nItops %.0f\nLogItops %.2f\n",insides,ParallelRate*IterationsEach,ParallelRate,elapsed,itops,log10(itops));
165

  
169 166
  free(inside);
170 167
  return(insides);
171 168
}
......
200 197

  
201 198
  LENGTH total=((iterations%ParallelRate)==0)?iterations:(iterations/ParallelRate+1)*ParallelRate;
202 199

  
203
  printf("Inside/Total %ld %ld\nPi estimation %f\n\n",insides,total,(4.*(float)insides/total));
204
   
200
  printf("Pi estimation %f\n\n",total,(4.*(float)insides/total));
201

  
202

  
205 203
}
Pi/C/OpenACC/Makefile (revision 282)
6 6
COMPUTING=INT32 INT64 FP32 FP64
7 7
MARSAGLIA=SHR3 CONG MWC KISS
8 8

  
9
CC=pgcc
9
CC=gcc
10 10
CFLAGS=-O3 
11
LIBRARY=
12
ACCFLAGS=-acc -ta=tesla:cc60 -Minfo=accel
11
LIBRARY=-lm
12
ACCFLAGS=-fopenacc -foffload=nvptx-none -foffload="-O3 -misa=sm_35"
13 13

  
14 14
all: $(SOURCE)
15 15

  

Formats disponibles : Unified diff