Statistiques
| Révision :

root / Pi / C / Kokkos / Pi_Kokkos.cpp @ 196

Historique | Voir | Annoter | Télécharger (7,75 ko)

1
/*
2
//@HEADER
3
// ************************************************************************
4
// 
5
//                        Kokkos v. 2.0
6
//              Copyright (2014) Sandia Corporation
7
// 
8
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9
// the U.S. Government retains certain rights in this software.
10
// 
11
// Redistribution and use in source and binary forms, with or without
12
// modification, are permitted provided that the following conditions are
13
// met:
14
//
15
// 1. Redistributions of source code must retain the above copyright
16
// notice, this list of conditions and the following disclaimer.
17
//
18
// 2. Redistributions in binary form must reproduce the above copyright
19
// notice, this list of conditions and the following disclaimer in the
20
// documentation and/or other materials provided with the distribution.
21
//
22
// 3. Neither the name of the Corporation nor the names of the
23
// contributors may be used to endorse or promote products derived from
24
// this software without specific prior written permission.
25
//
26
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37
//
38
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
39
// 
40
// ************************************************************************
41
//@HEADER
42
*/
43

    
44
#include <Kokkos_Core.hpp>
45
#include <cstdio>
46
#include <typeinfo>
47
#include <math.h>
48
#include <sys/time.h>
49

    
50
// Marsaglia RNG very simple implementation
51
#define znew  ((z=36969*(z&65535)+(z>>16))<<16)
52
#define wnew  ((w=18000*(w&65535)+(w>>16))&65535)
53
#define MWC   (znew+wnew)
54
#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
55
#define CONG  (jcong=69069*jcong+1234567)
56
#define KISS  ((MWC^CONG)+SHR3)
57

    
58
#define MWCfp MWC * 2.328306435454494e-10f
59
#define KISSfp KISS * 2.328306435454494e-10f
60
#define SHR3fp SHR3 * 2.328306435454494e-10f
61
#define CONGfp CONG * 2.328306435454494e-10f
62

    
63
#define ITERATIONS 1000000000
64

    
65
#define PARALLELRATE 1024
66

    
67
#ifdef LONG
68
#define LENGTH long long
69
#else
70
#define LENGTH int
71
#endif
72

    
73
// On Kokkos, vector, matrix and + are "View"s
74
typedef Kokkos::View<LENGTH*> view;
75

    
76
struct splitter {
77

    
78
  view Inside;
79
  unsigned int seed_w;
80
  unsigned int seed_z;
81
  LENGTH iterations;
82
  
83
  splitter(view Inside_,LENGTH iterations,unsigned int seed_w_,unsigned int seed_z_) :
84
    Inside (Inside_),iterations (iterations),seed_w (seed_w_),seed_z (seed_z_)
85
  {}
86
  
87
  KOKKOS_INLINE_FUNCTION
88
  void operator() (int i) const {
89

    
90
    // MainLoopGlobal totally copied inside operator()
91
#if defined TCONG
92
   unsigned int jcong=seed_z+i;
93
#elif defined TSHR3
94
   unsigned int jsr=seed_w+i;
95
#elif defined TMWC
96
   unsigned int z=seed_z+i;
97
   unsigned int w=seed_w-i;
98
#elif defined TKISS
99
   unsigned int jcong=seed_z+i;
100
   unsigned int jsr=seed_w-i;
101
   unsigned int z=seed_z+i;
102
   unsigned int w=seed_w-i;
103
#endif
104

    
105
   LENGTH total=0;
106
   
107
   for (LENGTH i=0;i<iterations;i++) {
108
#if defined TINT32
109
    #define THEONE 1073741824
110
    #if defined TCONG
111
        unsigned int x=CONG>>17 ;
112
        unsigned int y=CONG>>17 ;
113
    #elif defined TSHR3
114
        unsigned int x=SHR3>>17 ;
115
        unsigned int y=SHR3>>17 ;
116
    #elif defined TMWC
117
        unsigned int x=MWC>>17 ;
118
        unsigned int y=MWC>>17 ;
119
    #elif defined TKISS
120
        unsigned int x=KISS>>17 ;
121
        unsigned int y=KISS>>17 ;
122
    #endif
123
#elif defined TINT64
124
    #define THEONE 4611686018427387904
125
    #if defined TCONG
126
        unsigned long x=(unsigned long)(CONG>>1) ;
127
        unsigned long y=(unsigned long)(CONG>>1) ;
128
    #elif defined TSHR3
129
        unsigned long x=(unsigned long)(SHR3>>1) ;
130
        unsigned long y=(unsigned long)(SHR3>>1) ;
131
    #elif defined TMWC
132
        unsigned long x=(unsigned long)(MWC>>1) ;
133
        unsigned long y=(unsigned long)(MWC>>1) ;
134
    #elif defined TKISS
135
        unsigned long x=(unsigned long)(KISS>>1) ;
136
        unsigned long y=(unsigned long)(KISS>>1) ;
137
    #endif
138
#elif defined TFP32
139
    #define THEONE 1.0f
140
    #if defined TCONG
141
        float x=CONGfp ;
142
        float y=CONGfp ;
143
    #elif defined TSHR3
144
        float x=SHR3fp ;
145
        float y=SHR3fp ;
146
    #elif defined TMWC
147
        float x=MWCfp ;
148
        float y=MWCfp ;
149
    #elif defined TKISS
150
      float x=KISSfp ;
151
      float y=KISSfp ;
152
    #endif
153
#elif defined TFP64
154
    #define THEONE 1.0f
155
    #if defined TCONG
156
        double x=(double)CONGfp ;
157
        double y=(double)CONGfp ;
158
    #elif defined TSHR3
159
        double x=(double)SHR3fp ;
160
        double y=(double)SHR3fp ;
161
    #elif defined TMWC
162
        double x=(double)MWCfp ;
163
        double y=(double)MWCfp ;
164
    #elif defined TKISS
165
        double x=(double)KISSfp ;
166
        double y=(double)KISSfp ;
167
    #endif
168
#endif
169

    
170
        unsigned long inside=((x*x+y*y) < THEONE) ? 1:0;
171
        total+=inside;
172
   }
173
    Inside(i)=total;
174
  }
175
};
176

    
177
struct print {
178

    
179
  view Inside;
180

    
181
  print(view Inside_) :
182
    Inside (Inside_)
183
  {}
184
  
185
  KOKKOS_INLINE_FUNCTION
186
  void operator() (const int i) const {
187
    printf ("Inside of %i = %lld\n", i,Inside(i));
188
  }
189
};
190

    
191
// Reduction functor that reads the View given to its constructor.
192
struct ReduceFunctor {
193
  view Inside;
194

    
195
  ReduceFunctor (view Inside_) : Inside (Inside_) {}
196

    
197
  typedef LENGTH value_type;
198

    
199
  KOKKOS_INLINE_FUNCTION
200
  void operator() (int i, LENGTH &lsum) const {
201
    lsum += Inside(i);
202
  }
203
};
204

    
205
int main (int argc, char* argv[]) {
206

    
207
  unsigned int seed_w=110271,seed_z=101008,ParallelRate=PARALLELRATE;
208
  LENGTH iterations=ITERATIONS,insides=0;
209
  struct timeval tv1,tv2;
210
  struct timezone tz;
211

    
212
  if (argc > 1) {
213
    iterations=(LENGTH)atoll(argv[1]);
214
    ParallelRate=atoi(argv[2]);
215
  }
216
  else {
217
    printf("\n\tPi : Estimate Pi with Monte Carlo exploration\n\n");
218
    printf("\t\t#1 : number of iterations (default 1 billion)\n");
219
    printf("\t\t#2 : number of ParallelRate (default 1024)\n\n");
220
  }
221

    
222
  printf ("\n\tInformation about architecture:\n\n");
223

    
224
  printf ("\tSizeof int = %lld bytes.\n", (long long)sizeof(int));
225
  printf ("\tSizeof long = %lld bytes.\n", (long long)sizeof(long));
226
  printf ("\tSizeof long long = %lld bytes.\n\n", (long long)sizeof(long long));
227

    
228
  printf ("\tMax int = %u\n", INT_MAX);
229
  printf ("\tMax long = %ld\n", LONG_MAX);
230
  printf ("\tMax long long = %lld\n\n", LLONG_MAX);
231
  
232
  Kokkos::initialize (argc, argv);
233

    
234
  printf ("Pi Dart Dash on Kokkos execution space %s\n",
235
          typeid (Kokkos::DefaultExecutionSpace).name ());
236

    
237
  view Inside("Inside",ParallelRate);
238
  LENGTH IterationsEach=((iterations%ParallelRate)==0)?iterations/ParallelRate:iterations/ParallelRate+1;
239

    
240
  gettimeofday(&tv1, &tz);
241

    
242

    
243
  // Core of Kokkos : parallel_for & parallel_reduce
244
  Kokkos::parallel_for (ParallelRate,splitter(Inside,IterationsEach,seed_w,seed_z));
245
  //  Kokkos::parallel_for (ParallelRate,print(Inside));
246
  Kokkos::parallel_reduce (ParallelRate, ReduceFunctor (Inside), insides);
247

    
248
  gettimeofday(&tv2, &tz);
249
        
250
  Kokkos::finalize ();
251

    
252
  double elapsed=(double)((tv2.tv_sec-tv1.tv_sec) * 1000000L +
253
                          (tv2.tv_usec-tv1.tv_usec))/1000000;  
254

    
255
  double itops=(double)(ParallelRate*IterationsEach)/elapsed;
256
  
257
  printf("\n");
258

    
259
  printf("Inside/Total %lld %lld\nParallelRate %i\nElapsed Time %.2f\nItops %.0f\nPi estimation %f\n\n",(long long)insides,(long long)ParallelRate*IterationsEach,ParallelRate,elapsed,itops,(4.*(float)insides/((float)(ParallelRate)*(float)(IterationsEach))));
260
  
261
}
262