Statistiques
| Révision :

root / Pi / C / SyCL / Pi_SyCL.cpp @ 309

Historique | Voir | Annoter | Télécharger (6,2 ko)

1 309 equemene
// To create and activate compete SyCL environment, Debian Bookworm
2 309 equemene
// Create...
3 309 equemene
// export DPCPP_HOME=$PWD/sycl_workspace
4 309 equemene
// mv $DPCPP_HOME ${DPCPP_HOME}-$(date "+%Y%m%d-%H%M")
5 309 equemene
// mkdir $DPCPP_HOME
6 309 equemene
// cd $DPCPP_HOME
7 309 equemene
// git clone https://github.com/intel/llvm -b sycl
8 309 equemene
// python3 $DPCPP_HOME/llvm/buildbot/configure.py --cuda
9 309 equemene
// python3 $DPCPP_HOME/llvm/buildbot/compile.py
10 309 equemene
// python3 $DPCPP_HOME/llvm/buildbot/check.py
11 309 equemene
// Use...
12 309 equemene
// export PATH=$DPCPP_HOME/llvm/build/bin:$PATH
13 309 equemene
// export LD_LIBRARY_PATH=$DPCPP_HOME/llvm/build/lib:$LD_LIBRARY_PATH
14 309 equemene
// clang++ -fsycl -fsycl-targets=nvptx64-nvidia-cuda -DLONG -DTIME  Pi_SyCL.cpp -o Pi_SyCL_FP32_MWC -lm
15 309 equemene
// ./Pi_SyCL_FP32_MWC 1000000000 1024
16 309 equemene
17 309 equemene
#include <iostream>
18 309 equemene
#include <sycl/sycl.hpp>
19 309 equemene
#include <math.h>
20 309 equemene
#include <sys/time.h>
21 309 equemene
22 309 equemene
// Marsaglia RNG very simple implementation
23 309 equemene
#define znew  ((z=36969*(z&65535)+(z>>16))<<16)
24 309 equemene
#define wnew  ((w=18000*(w&65535)+(w>>16))&65535)
25 309 equemene
#define MWC   (znew+wnew)
26 309 equemene
#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
27 309 equemene
#define CONG  (jcong=69069*jcong+1234567)
28 309 equemene
#define KISS  ((MWC^CONG)+SHR3)
29 309 equemene
30 309 equemene
#define MWCfp MWC*2.328306435454494e-10f
31 309 equemene
#define KISSfp KISS*2.328306435454494e-10f
32 309 equemene
#define SHR3fp SHR3*2.328306435454494e-10f
33 309 equemene
#define CONGfp CONG*2.328306435454494e-10f
34 309 equemene
35 309 equemene
#define ITERATIONS 1000000000
36 309 equemene
37 309 equemene
#define PARALLELRATE 1024
38 309 equemene
39 309 equemene
#ifdef LONG
40 309 equemene
#define LENGTH long long
41 309 equemene
#else
42 309 equemene
#define LENGTH int
43 309 equemene
#endif
44 309 equemene
45 309 equemene
LENGTH MainLoopGlobal(LENGTH iterations,unsigned int seed_w,unsigned int seed_z)
46 309 equemene
{
47 309 equemene
#if defined TCONG
48 309 equemene
   unsigned int jcong=seed_z;
49 309 equemene
#elif defined TSHR3
50 309 equemene
   unsigned int jsr=seed_w;
51 309 equemene
#elif defined TMWC
52 309 equemene
   unsigned int z=seed_z;
53 309 equemene
   unsigned int w=seed_w;
54 309 equemene
#elif defined TKISS
55 309 equemene
   unsigned int jcong=seed_z;
56 309 equemene
   unsigned int jsr=seed_w;
57 309 equemene
   unsigned int z=seed_z;
58 309 equemene
   unsigned int w=seed_w;
59 309 equemene
#endif
60 309 equemene
61 309 equemene
   LENGTH total=0,i;
62 309 equemene
   unsigned long inside;
63 309 equemene
64 309 equemene
   for (i=0;i<iterations;i++) {
65 309 equemene
66 309 equemene
#if defined TINT32
67 309 equemene
    #define THEONE 1073741824
68 309 equemene
    #if defined TCONG
69 309 equemene
        unsigned int x=CONG>>17 ;
70 309 equemene
        unsigned int y=CONG>>17 ;
71 309 equemene
    #elif defined TSHR3
72 309 equemene
        unsigned int x=SHR3>>17 ;
73 309 equemene
        unsigned int y=SHR3>>17 ;
74 309 equemene
    #elif defined TMWC
75 309 equemene
        unsigned int x=MWC>>17 ;
76 309 equemene
        unsigned int y=MWC>>17 ;
77 309 equemene
    #elif defined TKISS
78 309 equemene
        unsigned int x=KISS>>17 ;
79 309 equemene
        unsigned int y=KISS>>17 ;
80 309 equemene
    #endif
81 309 equemene
#elif defined TINT64
82 309 equemene
    #define THEONE 4611686018427387904
83 309 equemene
    #if defined TCONG
84 309 equemene
        unsigned long x=(unsigned long)(CONG>>1) ;
85 309 equemene
        unsigned long y=(unsigned long)(CONG>>1) ;
86 309 equemene
    #elif defined TSHR3
87 309 equemene
        unsigned long x=(unsigned long)(SHR3>>1) ;
88 309 equemene
        unsigned long y=(unsigned long)(SHR3>>1) ;
89 309 equemene
    #elif defined TMWC
90 309 equemene
        unsigned long x=(unsigned long)(MWC>>1) ;
91 309 equemene
        unsigned long y=(unsigned long)(MWC>>1) ;
92 309 equemene
    #elif defined TKISS
93 309 equemene
        unsigned long x=(unsigned long)(KISS>>1) ;
94 309 equemene
        unsigned long y=(unsigned long)(KISS>>1) ;
95 309 equemene
    #endif
96 309 equemene
#elif defined TFP32
97 309 equemene
#define THEONE (float)1.0f
98 309 equemene
    #if defined TCONG
99 309 equemene
        float x=CONGfp ;
100 309 equemene
        float y=CONGfp ;
101 309 equemene
    #elif defined TSHR3
102 309 equemene
        float x=SHR3fp ;
103 309 equemene
        float y=SHR3fp ;
104 309 equemene
    #elif defined TMWC
105 309 equemene
        float x=MWCfp ;
106 309 equemene
        float y=MWCfp ;
107 309 equemene
    #elif defined TKISS
108 309 equemene
      float x=KISSfp ;
109 309 equemene
      float y=KISSfp ;
110 309 equemene
    #endif
111 309 equemene
#elif defined TFP64
112 309 equemene
#define THEONE (double)1.0f
113 309 equemene
    #if defined TCONG
114 309 equemene
        double x=(double)CONGfp ;
115 309 equemene
        double y=(double)CONGfp ;
116 309 equemene
    #elif defined TSHR3
117 309 equemene
        double x=(double)SHR3fp ;
118 309 equemene
        double y=(double)SHR3fp ;
119 309 equemene
    #elif defined TMWC
120 309 equemene
        double x=(double)MWCfp ;
121 309 equemene
        double y=(double)MWCfp ;
122 309 equemene
    #elif defined TKISS
123 309 equemene
        double x=(double)KISSfp ;
124 309 equemene
        double y=(double)KISSfp ;
125 309 equemene
    #endif
126 309 equemene
#endif
127 309 equemene
128 309 equemene
      inside=((x*x+y*y) < THEONE) ? 1:0;
129 309 equemene
      total+=inside;
130 309 equemene
   }
131 309 equemene
132 309 equemene
   return(total);
133 309 equemene
}
134 309 equemene
135 309 equemene
LENGTH splitter(LENGTH iterations,unsigned int seed_w,unsigned int seed_z,unsigned int ParallelRate)
136 309 equemene
{
137 309 equemene
  LENGTH *inside,insides=0;
138 309 equemene
  struct timeval tv1,tv2;
139 309 equemene
  LENGTH IterationsEach=((iterations%ParallelRate)==0)?iterations/ParallelRate:iterations/ParallelRate+1;
140 309 equemene
141 309 equemene
  inside=(LENGTH*)malloc(sizeof(LENGTH)*ParallelRate);
142 309 equemene
143 309 equemene
  gettimeofday(&tv1, NULL);
144 309 equemene
145 309 equemene
  sycl::buffer<LENGTH> insideBuf(&inside[0],ParallelRate);
146 309 equemene
147 309 equemene
  // Creating SYCL queue
148 309 equemene
  sycl::queue Queue;
149 309 equemene
150 309 equemene
  Queue.submit([&](auto &h) {
151 309 equemene
    sycl::accessor Ainside{insideBuf, h};
152 309 equemene
153 309 equemene
    // Executing kernel
154 309 equemene
    h.parallel_for(ParallelRate,[=](auto i) {
155 309 equemene
      Ainside[i]=MainLoopGlobal(IterationsEach,seed_w+i,seed_z+i);
156 309 equemene
    });
157 309 equemene
  });
158 309 equemene
159 309 equemene
  // Getting read only access to the buffer on the host.
160 309 equemene
  // Implicit barrier waiting for queue to complete the work.
161 309 equemene
  sycl::host_accessor HostAccessor{insideBuf};
162 309 equemene
163 309 equemene
  for (int i=0 ; i<ParallelRate; i++) {
164 309 equemene
    insides+=inside[i];
165 309 equemene
  }
166 309 equemene
167 309 equemene
  gettimeofday(&tv2, NULL);
168 309 equemene
169 309 equemene
  for (int i=0 ; i<ParallelRate; i++) {
170 309 equemene
    printf("\tFound %lld for ParallelRate %i\n",(long long)inside[i],i);
171 309 equemene
  }
172 309 equemene
  printf("\n");
173 309 equemene
174 309 equemene
  double elapsed=(double)((tv2.tv_sec-tv1.tv_sec) * 1000000L +
175 309 equemene
                            (tv2.tv_usec-tv1.tv_usec))/1000000;
176 309 equemene
177 309 equemene
  double itops=(double)(ParallelRate*IterationsEach)/elapsed;
178 309 equemene
179 309 equemene
  printf("ParallelRate %i\nElapsed Time %.2f\nItops %.0f\nLogItops %.2f\n",ParallelRate,elapsed,itops,log10(itops));
180 309 equemene
181 309 equemene
  free(inside);
182 309 equemene
183 309 equemene
  return(insides);
184 309 equemene
}
185 309 equemene
186 309 equemene
using namespace std;
187 309 equemene
int main(int argc, char *argv[]) {
188 309 equemene
  unsigned int seed_w=110271,seed_z=101008,ParallelRate=PARALLELRATE;
189 309 equemene
190 309 equemene
  LENGTH iterations=ITERATIONS,insides=0;
191 309 equemene
192 309 equemene
  if (argc > 1) {
193 309 equemene
    iterations=(LENGTH)atoll(argv[1]);
194 309 equemene
    if (argc > 2) {
195 309 equemene
      ParallelRate=atoi(argv[2]);
196 309 equemene
    }
197 309 equemene
  }
198 309 equemene
  else {
199 309 equemene
    printf("\n\tPi : Estimate Pi with Monte Carlo exploration\n\n");
200 309 equemene
    printf("\t\t#1 : number of iterations (default 1 billion)\n");
201 309 equemene
    printf("\t\t#2 : number of ParallelRate (default 1024)\n\n");
202 309 equemene
  }
203 309 equemene
204 309 equemene
  printf ("\n\tInformation about architecture:\n\n");
205 309 equemene
206 309 equemene
  printf ("\tSizeof int = %lld bytes.\n", (long long)sizeof(int));
207 309 equemene
  printf ("\tSizeof long = %lld bytes.\n", (long long)sizeof(long));
208 309 equemene
  printf ("\tSizeof long long = %lld bytes.\n\n", (long long)sizeof(long long));
209 309 equemene
210 309 equemene
  printf ("\tMax int = %u\n", INT_MAX);
211 309 equemene
  printf ("\tMax long = %ld\n", LONG_MAX);
212 309 equemene
  printf ("\tMax long long = %lld\n\n", LLONG_MAX);
213 309 equemene
214 309 equemene
  insides=splitter(iterations,seed_w,seed_z,ParallelRate);
215 309 equemene
216 309 equemene
  LENGTH total=((iterations%ParallelRate)==0)?iterations:(iterations/ParallelRate+1)*ParallelRate;
217 309 equemene
218 309 equemene
  printf("Inside/Total %ld %ld\nPi estimation %f\n\n",(long int)insides,(long int)total,(4.*(float)insides/total));
219 309 equemene
220 309 equemene
}