Révision 309

Pi/C/SyCL/Makefile (revision 309)
1
# Compile all version of Pi_SyCL for nvidia
2
# Cecill v2 Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
3

  
4
SOURCE=Pi_SyCL.cpp
5

  
6
COMPUTING=INT32 INT64 FP32 FP64
7
MARSAGLIA=SHR3 CONG MWC KISS
8

  
9
CC=clang++
10
CFLAGS=-fsycl -fsycl-targets=nvptx64-nvidia-cuda
11
LIBRARY=-lm
12

  
13
all: $(SOURCE)
14

  
15
	$(foreach TVAR,$(COMPUTING),$(foreach TRND,$(MARSAGLIA),$(CC) $(CFLAGS) -DT$(TVAR) -DT$(TRND) -DLONG -DTIME -o $(<:.cpp=)_$(TVAR)_$(TRND) $< $(LIBRARY); ) )
16

  
17
.PHONY: clean check mrproper
18

  
19
mrproper: 
20
	rm -rf $(foreach SRC,$(SOURCE),$(foreach TVAR,$(COMPUTING),$(foreach TRND,$(MARSAGLIA),$(SRC:.cpp=)_$(TVAR)_$(TRND) ) ) )
21

  
22
	find . -name "*~" -exec rm {} \;
23

  
24
clean:
25
	find . -name "*~" -exec rm {} \;
26

  
Pi/C/SyCL/Pi_SyCL.cpp (revision 309)
1
// To create and activate compete SyCL environment, Debian Bookworm
2
// Create...
3
// export DPCPP_HOME=$PWD/sycl_workspace
4
// mv $DPCPP_HOME ${DPCPP_HOME}-$(date "+%Y%m%d-%H%M")
5
// mkdir $DPCPP_HOME
6
// cd $DPCPP_HOME
7
// git clone https://github.com/intel/llvm -b sycl
8
// python3 $DPCPP_HOME/llvm/buildbot/configure.py --cuda
9
// python3 $DPCPP_HOME/llvm/buildbot/compile.py
10
// python3 $DPCPP_HOME/llvm/buildbot/check.py
11
// Use...
12
// export PATH=$DPCPP_HOME/llvm/build/bin:$PATH
13
// export LD_LIBRARY_PATH=$DPCPP_HOME/llvm/build/lib:$LD_LIBRARY_PATH
14
// clang++ -fsycl -fsycl-targets=nvptx64-nvidia-cuda -DLONG -DTIME  Pi_SyCL.cpp -o Pi_SyCL_FP32_MWC -lm
15
// ./Pi_SyCL_FP32_MWC 1000000000 1024
16

  
17
#include <iostream>
18
#include <sycl/sycl.hpp>
19
#include <math.h>
20
#include <sys/time.h>
21

  
22
// Marsaglia RNG very simple implementation
23
#define znew  ((z=36969*(z&65535)+(z>>16))<<16)
24
#define wnew  ((w=18000*(w&65535)+(w>>16))&65535)
25
#define MWC   (znew+wnew)
26
#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
27
#define CONG  (jcong=69069*jcong+1234567)
28
#define KISS  ((MWC^CONG)+SHR3)
29

  
30
#define MWCfp MWC*2.328306435454494e-10f
31
#define KISSfp KISS*2.328306435454494e-10f
32
#define SHR3fp SHR3*2.328306435454494e-10f
33
#define CONGfp CONG*2.328306435454494e-10f
34

  
35
#define ITERATIONS 1000000000
36

  
37
#define PARALLELRATE 1024
38

  
39
#ifdef LONG
40
#define LENGTH long long
41
#else
42
#define LENGTH int
43
#endif
44

  
45
LENGTH MainLoopGlobal(LENGTH iterations,unsigned int seed_w,unsigned int seed_z)
46
{
47
#if defined TCONG
48
   unsigned int jcong=seed_z;
49
#elif defined TSHR3
50
   unsigned int jsr=seed_w;
51
#elif defined TMWC
52
   unsigned int z=seed_z;
53
   unsigned int w=seed_w;
54
#elif defined TKISS
55
   unsigned int jcong=seed_z;
56
   unsigned int jsr=seed_w;
57
   unsigned int z=seed_z;
58
   unsigned int w=seed_w;
59
#endif
60

  
61
   LENGTH total=0,i;
62
   unsigned long inside;
63
     
64
   for (i=0;i<iterations;i++) {
65

  
66
#if defined TINT32
67
    #define THEONE 1073741824
68
    #if defined TCONG
69
        unsigned int x=CONG>>17 ;
70
        unsigned int y=CONG>>17 ;
71
    #elif defined TSHR3
72
        unsigned int x=SHR3>>17 ;
73
        unsigned int y=SHR3>>17 ;
74
    #elif defined TMWC
75
        unsigned int x=MWC>>17 ;
76
        unsigned int y=MWC>>17 ;
77
    #elif defined TKISS
78
        unsigned int x=KISS>>17 ;
79
        unsigned int y=KISS>>17 ;
80
    #endif
81
#elif defined TINT64
82
    #define THEONE 4611686018427387904
83
    #if defined TCONG
84
        unsigned long x=(unsigned long)(CONG>>1) ;
85
        unsigned long y=(unsigned long)(CONG>>1) ;
86
    #elif defined TSHR3
87
        unsigned long x=(unsigned long)(SHR3>>1) ;
88
        unsigned long y=(unsigned long)(SHR3>>1) ;
89
    #elif defined TMWC
90
        unsigned long x=(unsigned long)(MWC>>1) ;
91
        unsigned long y=(unsigned long)(MWC>>1) ;
92
    #elif defined TKISS
93
        unsigned long x=(unsigned long)(KISS>>1) ;
94
        unsigned long y=(unsigned long)(KISS>>1) ;
95
    #endif
96
#elif defined TFP32
97
#define THEONE (float)1.0f
98
    #if defined TCONG
99
        float x=CONGfp ;
100
        float y=CONGfp ;
101
    #elif defined TSHR3
102
        float x=SHR3fp ;
103
        float y=SHR3fp ;
104
    #elif defined TMWC
105
        float x=MWCfp ;
106
        float y=MWCfp ;
107
    #elif defined TKISS
108
      float x=KISSfp ;
109
      float y=KISSfp ;
110
    #endif
111
#elif defined TFP64
112
#define THEONE (double)1.0f
113
    #if defined TCONG
114
        double x=(double)CONGfp ;
115
        double y=(double)CONGfp ;
116
    #elif defined TSHR3
117
        double x=(double)SHR3fp ;
118
        double y=(double)SHR3fp ;
119
    #elif defined TMWC
120
        double x=(double)MWCfp ;
121
        double y=(double)MWCfp ;
122
    #elif defined TKISS
123
        double x=(double)KISSfp ;
124
        double y=(double)KISSfp ;
125
    #endif
126
#endif
127

  
128
      inside=((x*x+y*y) < THEONE) ? 1:0;
129
      total+=inside;
130
   }
131

  
132
   return(total);
133
}
134

  
135
LENGTH splitter(LENGTH iterations,unsigned int seed_w,unsigned int seed_z,unsigned int ParallelRate)
136
{
137
  LENGTH *inside,insides=0;
138
  struct timeval tv1,tv2;
139
  LENGTH IterationsEach=((iterations%ParallelRate)==0)?iterations/ParallelRate:iterations/ParallelRate+1;
140

  
141
  inside=(LENGTH*)malloc(sizeof(LENGTH)*ParallelRate);
142
  
143
  gettimeofday(&tv1, NULL);
144
    
145
  sycl::buffer<LENGTH> insideBuf(&inside[0],ParallelRate);
146

  
147
  // Creating SYCL queue
148
  sycl::queue Queue;
149

  
150
  Queue.submit([&](auto &h) {
151
    sycl::accessor Ainside{insideBuf, h};
152
    
153
    // Executing kernel
154
    h.parallel_for(ParallelRate,[=](auto i) {      
155
      Ainside[i]=MainLoopGlobal(IterationsEach,seed_w+i,seed_z+i);
156
    });
157
  });
158

  
159
  // Getting read only access to the buffer on the host.
160
  // Implicit barrier waiting for queue to complete the work.
161
  sycl::host_accessor HostAccessor{insideBuf};
162
  
163
  for (int i=0 ; i<ParallelRate; i++) {
164
    insides+=inside[i];
165
  }
166
  
167
  gettimeofday(&tv2, NULL);
168
  
169
  for (int i=0 ; i<ParallelRate; i++) {
170
    printf("\tFound %lld for ParallelRate %i\n",(long long)inside[i],i);
171
  }
172
  printf("\n");
173

  
174
  double elapsed=(double)((tv2.tv_sec-tv1.tv_sec) * 1000000L +
175
  			  (tv2.tv_usec-tv1.tv_usec))/1000000;
176
  
177
  double itops=(double)(ParallelRate*IterationsEach)/elapsed;
178
  
179
  printf("ParallelRate %i\nElapsed Time %.2f\nItops %.0f\nLogItops %.2f\n",ParallelRate,elapsed,itops,log10(itops));
180

  
181
  free(inside);
182
  
183
  return(insides);
184
} 
185

  
186
using namespace std;
187
int main(int argc, char *argv[]) {
188
  unsigned int seed_w=110271,seed_z=101008,ParallelRate=PARALLELRATE;
189

  
190
  LENGTH iterations=ITERATIONS,insides=0;
191
  
192
  if (argc > 1) {
193
    iterations=(LENGTH)atoll(argv[1]);
194
    if (argc > 2) {
195
      ParallelRate=atoi(argv[2]);
196
    }
197
  }
198
  else {
199
    printf("\n\tPi : Estimate Pi with Monte Carlo exploration\n\n");
200
    printf("\t\t#1 : number of iterations (default 1 billion)\n");
201
    printf("\t\t#2 : number of ParallelRate (default 1024)\n\n");
202
  }
203

  
204
  printf ("\n\tInformation about architecture:\n\n");
205

  
206
  printf ("\tSizeof int = %lld bytes.\n", (long long)sizeof(int));
207
  printf ("\tSizeof long = %lld bytes.\n", (long long)sizeof(long));
208
  printf ("\tSizeof long long = %lld bytes.\n\n", (long long)sizeof(long long));
209

  
210
  printf ("\tMax int = %u\n", INT_MAX);
211
  printf ("\tMax long = %ld\n", LONG_MAX);
212
  printf ("\tMax long long = %lld\n\n", LLONG_MAX);
213

  
214
  insides=splitter(iterations,seed_w,seed_z,ParallelRate);
215

  
216
  LENGTH total=((iterations%ParallelRate)==0)?iterations:(iterations/ParallelRate+1)*ParallelRate;
217

  
218
  printf("Inside/Total %ld %ld\nPi estimation %f\n\n",(long int)insides,(long int)total,(4.*(float)insides/total));
219
  
220
}

Formats disponibles : Unified diff