Statistiques
| Révision :

root / Pi / C / Kokkos / Pi_Kokkos.cpp @ 308

Historique | Voir | Annoter | Télécharger (7,75 ko)

1 188 equemene
/*
2 188 equemene
//@HEADER
3 188 equemene
// ************************************************************************
4 188 equemene
//
5 188 equemene
//                        Kokkos v. 2.0
6 188 equemene
//              Copyright (2014) Sandia Corporation
7 188 equemene
//
8 188 equemene
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 188 equemene
// the U.S. Government retains certain rights in this software.
10 188 equemene
//
11 188 equemene
// Redistribution and use in source and binary forms, with or without
12 188 equemene
// modification, are permitted provided that the following conditions are
13 188 equemene
// met:
14 188 equemene
//
15 188 equemene
// 1. Redistributions of source code must retain the above copyright
16 188 equemene
// notice, this list of conditions and the following disclaimer.
17 188 equemene
//
18 188 equemene
// 2. Redistributions in binary form must reproduce the above copyright
19 188 equemene
// notice, this list of conditions and the following disclaimer in the
20 188 equemene
// documentation and/or other materials provided with the distribution.
21 188 equemene
//
22 188 equemene
// 3. Neither the name of the Corporation nor the names of the
23 188 equemene
// contributors may be used to endorse or promote products derived from
24 188 equemene
// this software without specific prior written permission.
25 188 equemene
//
26 188 equemene
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 188 equemene
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 188 equemene
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 188 equemene
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 188 equemene
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 188 equemene
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 188 equemene
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 188 equemene
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 188 equemene
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 188 equemene
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 188 equemene
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 188 equemene
//
38 188 equemene
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
39 188 equemene
//
40 188 equemene
// ************************************************************************
41 188 equemene
//@HEADER
42 188 equemene
*/
43 188 equemene
44 188 equemene
#include <Kokkos_Core.hpp>
45 188 equemene
#include <cstdio>
46 188 equemene
#include <typeinfo>
47 188 equemene
#include <math.h>
48 188 equemene
#include <sys/time.h>
49 188 equemene
50 188 equemene
// Marsaglia RNG very simple implementation
51 188 equemene
#define znew  ((z=36969*(z&65535)+(z>>16))<<16)
52 188 equemene
#define wnew  ((w=18000*(w&65535)+(w>>16))&65535)
53 188 equemene
#define MWC   (znew+wnew)
54 188 equemene
#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
55 188 equemene
#define CONG  (jcong=69069*jcong+1234567)
56 188 equemene
#define KISS  ((MWC^CONG)+SHR3)
57 188 equemene
58 188 equemene
#define MWCfp MWC * 2.328306435454494e-10f
59 188 equemene
#define KISSfp KISS * 2.328306435454494e-10f
60 188 equemene
#define SHR3fp SHR3 * 2.328306435454494e-10f
61 188 equemene
#define CONGfp CONG * 2.328306435454494e-10f
62 188 equemene
63 188 equemene
#define ITERATIONS 1000000000
64 188 equemene
65 188 equemene
#define PARALLELRATE 1024
66 188 equemene
67 188 equemene
#ifdef LONG
68 188 equemene
#define LENGTH long long
69 188 equemene
#else
70 188 equemene
#define LENGTH int
71 188 equemene
#endif
72 188 equemene
73 188 equemene
// On Kokkos, vector, matrix and + are "View"s
74 188 equemene
typedef Kokkos::View<LENGTH*> view;
75 188 equemene
76 188 equemene
struct splitter {
77 188 equemene
78 188 equemene
  view Inside;
79 188 equemene
  unsigned int seed_w;
80 188 equemene
  unsigned int seed_z;
81 188 equemene
  LENGTH iterations;
82 188 equemene
83 188 equemene
  splitter(view Inside_,LENGTH iterations,unsigned int seed_w_,unsigned int seed_z_) :
84 188 equemene
    Inside (Inside_),iterations (iterations),seed_w (seed_w_),seed_z (seed_z_)
85 188 equemene
  {}
86 188 equemene
87 188 equemene
  KOKKOS_INLINE_FUNCTION
88 188 equemene
  void operator() (int i) const {
89 188 equemene
90 188 equemene
    // MainLoopGlobal totally copied inside operator()
91 188 equemene
#if defined TCONG
92 188 equemene
   unsigned int jcong=seed_z+i;
93 188 equemene
#elif defined TSHR3
94 188 equemene
   unsigned int jsr=seed_w+i;
95 188 equemene
#elif defined TMWC
96 188 equemene
   unsigned int z=seed_z+i;
97 188 equemene
   unsigned int w=seed_w-i;
98 188 equemene
#elif defined TKISS
99 188 equemene
   unsigned int jcong=seed_z+i;
100 188 equemene
   unsigned int jsr=seed_w-i;
101 188 equemene
   unsigned int z=seed_z+i;
102 188 equemene
   unsigned int w=seed_w-i;
103 188 equemene
#endif
104 188 equemene
105 188 equemene
   LENGTH total=0;
106 188 equemene
107 188 equemene
   for (LENGTH i=0;i<iterations;i++) {
108 188 equemene
#if defined TINT32
109 188 equemene
    #define THEONE 1073741824
110 188 equemene
    #if defined TCONG
111 188 equemene
        unsigned int x=CONG>>17 ;
112 188 equemene
        unsigned int y=CONG>>17 ;
113 188 equemene
    #elif defined TSHR3
114 188 equemene
        unsigned int x=SHR3>>17 ;
115 188 equemene
        unsigned int y=SHR3>>17 ;
116 188 equemene
    #elif defined TMWC
117 188 equemene
        unsigned int x=MWC>>17 ;
118 188 equemene
        unsigned int y=MWC>>17 ;
119 188 equemene
    #elif defined TKISS
120 188 equemene
        unsigned int x=KISS>>17 ;
121 188 equemene
        unsigned int y=KISS>>17 ;
122 188 equemene
    #endif
123 188 equemene
#elif defined TINT64
124 188 equemene
    #define THEONE 4611686018427387904
125 188 equemene
    #if defined TCONG
126 188 equemene
        unsigned long x=(unsigned long)(CONG>>1) ;
127 188 equemene
        unsigned long y=(unsigned long)(CONG>>1) ;
128 188 equemene
    #elif defined TSHR3
129 188 equemene
        unsigned long x=(unsigned long)(SHR3>>1) ;
130 188 equemene
        unsigned long y=(unsigned long)(SHR3>>1) ;
131 188 equemene
    #elif defined TMWC
132 188 equemene
        unsigned long x=(unsigned long)(MWC>>1) ;
133 188 equemene
        unsigned long y=(unsigned long)(MWC>>1) ;
134 188 equemene
    #elif defined TKISS
135 188 equemene
        unsigned long x=(unsigned long)(KISS>>1) ;
136 188 equemene
        unsigned long y=(unsigned long)(KISS>>1) ;
137 188 equemene
    #endif
138 188 equemene
#elif defined TFP32
139 188 equemene
    #define THEONE 1.0f
140 188 equemene
    #if defined TCONG
141 188 equemene
        float x=CONGfp ;
142 188 equemene
        float y=CONGfp ;
143 188 equemene
    #elif defined TSHR3
144 188 equemene
        float x=SHR3fp ;
145 188 equemene
        float y=SHR3fp ;
146 188 equemene
    #elif defined TMWC
147 188 equemene
        float x=MWCfp ;
148 188 equemene
        float y=MWCfp ;
149 188 equemene
    #elif defined TKISS
150 188 equemene
      float x=KISSfp ;
151 188 equemene
      float y=KISSfp ;
152 188 equemene
    #endif
153 188 equemene
#elif defined TFP64
154 188 equemene
    #define THEONE 1.0f
155 188 equemene
    #if defined TCONG
156 188 equemene
        double x=(double)CONGfp ;
157 188 equemene
        double y=(double)CONGfp ;
158 188 equemene
    #elif defined TSHR3
159 188 equemene
        double x=(double)SHR3fp ;
160 188 equemene
        double y=(double)SHR3fp ;
161 188 equemene
    #elif defined TMWC
162 188 equemene
        double x=(double)MWCfp ;
163 188 equemene
        double y=(double)MWCfp ;
164 188 equemene
    #elif defined TKISS
165 188 equemene
        double x=(double)KISSfp ;
166 188 equemene
        double y=(double)KISSfp ;
167 188 equemene
    #endif
168 188 equemene
#endif
169 188 equemene
170 188 equemene
        unsigned long inside=((x*x+y*y) < THEONE) ? 1:0;
171 188 equemene
        total+=inside;
172 188 equemene
   }
173 188 equemene
    Inside(i)=total;
174 188 equemene
  }
175 188 equemene
};
176 188 equemene
177 188 equemene
struct print {
178 188 equemene
179 188 equemene
  view Inside;
180 188 equemene
181 188 equemene
  print(view Inside_) :
182 188 equemene
    Inside (Inside_)
183 188 equemene
  {}
184 188 equemene
185 188 equemene
  KOKKOS_INLINE_FUNCTION
186 188 equemene
  void operator() (const int i) const {
187 188 equemene
    printf ("Inside of %i = %lld\n", i,Inside(i));
188 188 equemene
  }
189 188 equemene
};
190 188 equemene
191 188 equemene
// Reduction functor that reads the View given to its constructor.
192 188 equemene
struct ReduceFunctor {
193 188 equemene
  view Inside;
194 188 equemene
195 188 equemene
  ReduceFunctor (view Inside_) : Inside (Inside_) {}
196 188 equemene
197 188 equemene
  typedef LENGTH value_type;
198 188 equemene
199 188 equemene
  KOKKOS_INLINE_FUNCTION
200 188 equemene
  void operator() (int i, LENGTH &lsum) const {
201 188 equemene
    lsum += Inside(i);
202 188 equemene
  }
203 188 equemene
};
204 188 equemene
205 188 equemene
int main (int argc, char* argv[]) {
206 188 equemene
207 188 equemene
  unsigned int seed_w=110271,seed_z=101008,ParallelRate=PARALLELRATE;
208 188 equemene
  LENGTH iterations=ITERATIONS,insides=0;
209 188 equemene
  struct timeval tv1,tv2;
210 188 equemene
  struct timezone tz;
211 188 equemene
212 188 equemene
  if (argc > 1) {
213 188 equemene
    iterations=(LENGTH)atoll(argv[1]);
214 188 equemene
    ParallelRate=atoi(argv[2]);
215 188 equemene
  }
216 188 equemene
  else {
217 188 equemene
    printf("\n\tPi : Estimate Pi with Monte Carlo exploration\n\n");
218 188 equemene
    printf("\t\t#1 : number of iterations (default 1 billion)\n");
219 188 equemene
    printf("\t\t#2 : number of ParallelRate (default 1024)\n\n");
220 188 equemene
  }
221 188 equemene
222 188 equemene
  printf ("\n\tInformation about architecture:\n\n");
223 188 equemene
224 188 equemene
  printf ("\tSizeof int = %lld bytes.\n", (long long)sizeof(int));
225 188 equemene
  printf ("\tSizeof long = %lld bytes.\n", (long long)sizeof(long));
226 188 equemene
  printf ("\tSizeof long long = %lld bytes.\n\n", (long long)sizeof(long long));
227 188 equemene
228 188 equemene
  printf ("\tMax int = %u\n", INT_MAX);
229 188 equemene
  printf ("\tMax long = %ld\n", LONG_MAX);
230 188 equemene
  printf ("\tMax long long = %lld\n\n", LLONG_MAX);
231 188 equemene
232 188 equemene
  Kokkos::initialize (argc, argv);
233 188 equemene
234 188 equemene
  printf ("Pi Dart Dash on Kokkos execution space %s\n",
235 188 equemene
          typeid (Kokkos::DefaultExecutionSpace).name ());
236 188 equemene
237 188 equemene
  view Inside("Inside",ParallelRate);
238 188 equemene
  LENGTH IterationsEach=((iterations%ParallelRate)==0)?iterations/ParallelRate:iterations/ParallelRate+1;
239 188 equemene
240 188 equemene
  gettimeofday(&tv1, &tz);
241 188 equemene
242 188 equemene
243 188 equemene
  // Core of Kokkos : parallel_for & parallel_reduce
244 188 equemene
  Kokkos::parallel_for (ParallelRate,splitter(Inside,IterationsEach,seed_w,seed_z));
245 188 equemene
  //  Kokkos::parallel_for (ParallelRate,print(Inside));
246 188 equemene
  Kokkos::parallel_reduce (ParallelRate, ReduceFunctor (Inside), insides);
247 188 equemene
248 188 equemene
  gettimeofday(&tv2, &tz);
249 188 equemene
250 188 equemene
  Kokkos::finalize ();
251 188 equemene
252 188 equemene
  double elapsed=(double)((tv2.tv_sec-tv1.tv_sec) * 1000000L +
253 188 equemene
                          (tv2.tv_usec-tv1.tv_usec))/1000000;
254 188 equemene
255 188 equemene
  double itops=(double)(ParallelRate*IterationsEach)/elapsed;
256 188 equemene
257 188 equemene
  printf("\n");
258 188 equemene
259 188 equemene
  printf("Inside/Total %lld %lld\nParallelRate %i\nElapsed Time %.2f\nItops %.0f\nPi estimation %f\n\n",(long long)insides,(long long)ParallelRate*IterationsEach,ParallelRate,elapsed,itops,(4.*(float)insides/((float)(ParallelRate)*(float)(IterationsEach))));
260 188 equemene
261 188 equemene
}