root / Pi / C / Kokkos / Pi_Kokkos.cpp @ 284
Historique | Voir | Annoter | Télécharger (7,75 ko)
1 | 188 | equemene | /*
|
---|---|---|---|
2 | 188 | equemene | //@HEADER
|
3 | 188 | equemene | // ************************************************************************
|
4 | 188 | equemene | //
|
5 | 188 | equemene | // Kokkos v. 2.0
|
6 | 188 | equemene | // Copyright (2014) Sandia Corporation
|
7 | 188 | equemene | //
|
8 | 188 | equemene | // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
9 | 188 | equemene | // the U.S. Government retains certain rights in this software.
|
10 | 188 | equemene | //
|
11 | 188 | equemene | // Redistribution and use in source and binary forms, with or without
|
12 | 188 | equemene | // modification, are permitted provided that the following conditions are
|
13 | 188 | equemene | // met:
|
14 | 188 | equemene | //
|
15 | 188 | equemene | // 1. Redistributions of source code must retain the above copyright
|
16 | 188 | equemene | // notice, this list of conditions and the following disclaimer.
|
17 | 188 | equemene | //
|
18 | 188 | equemene | // 2. Redistributions in binary form must reproduce the above copyright
|
19 | 188 | equemene | // notice, this list of conditions and the following disclaimer in the
|
20 | 188 | equemene | // documentation and/or other materials provided with the distribution.
|
21 | 188 | equemene | //
|
22 | 188 | equemene | // 3. Neither the name of the Corporation nor the names of the
|
23 | 188 | equemene | // contributors may be used to endorse or promote products derived from
|
24 | 188 | equemene | // this software without specific prior written permission.
|
25 | 188 | equemene | //
|
26 | 188 | equemene | // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
27 | 188 | equemene | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
28 | 188 | equemene | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
29 | 188 | equemene | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
30 | 188 | equemene | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
31 | 188 | equemene | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
32 | 188 | equemene | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
33 | 188 | equemene | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
34 | 188 | equemene | // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
35 | 188 | equemene | // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
36 | 188 | equemene | // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
37 | 188 | equemene | //
|
38 | 188 | equemene | // Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
39 | 188 | equemene | //
|
40 | 188 | equemene | // ************************************************************************
|
41 | 188 | equemene | //@HEADER
|
42 | 188 | equemene | */
|
43 | 188 | equemene | |
44 | 188 | equemene | #include <Kokkos_Core.hpp> |
45 | 188 | equemene | #include <cstdio> |
46 | 188 | equemene | #include <typeinfo> |
47 | 188 | equemene | #include <math.h> |
48 | 188 | equemene | #include <sys/time.h> |
49 | 188 | equemene | |
50 | 188 | equemene | // Marsaglia RNG very simple implementation
|
51 | 188 | equemene | #define znew ((z=36969*(z&65535)+(z>>16))<<16) |
52 | 188 | equemene | #define wnew ((w=18000*(w&65535)+(w>>16))&65535) |
53 | 188 | equemene | #define MWC (znew+wnew)
|
54 | 188 | equemene | #define SHR3 (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5)) |
55 | 188 | equemene | #define CONG (jcong=69069*jcong+1234567) |
56 | 188 | equemene | #define KISS ((MWC^CONG)+SHR3)
|
57 | 188 | equemene | |
58 | 188 | equemene | #define MWCfp MWC * 2.328306435454494e-10f |
59 | 188 | equemene | #define KISSfp KISS * 2.328306435454494e-10f |
60 | 188 | equemene | #define SHR3fp SHR3 * 2.328306435454494e-10f |
61 | 188 | equemene | #define CONGfp CONG * 2.328306435454494e-10f |
62 | 188 | equemene | |
63 | 188 | equemene | #define ITERATIONS 1000000000 |
64 | 188 | equemene | |
65 | 188 | equemene | #define PARALLELRATE 1024 |
66 | 188 | equemene | |
67 | 188 | equemene | #ifdef LONG
|
68 | 188 | equemene | #define LENGTH long long |
69 | 188 | equemene | #else
|
70 | 188 | equemene | #define LENGTH int |
71 | 188 | equemene | #endif
|
72 | 188 | equemene | |
73 | 188 | equemene | // On Kokkos, vector, matrix and + are "View"s
|
74 | 188 | equemene | typedef Kokkos::View<LENGTH*> view;
|
75 | 188 | equemene | |
76 | 188 | equemene | struct splitter {
|
77 | 188 | equemene | |
78 | 188 | equemene | view Inside; |
79 | 188 | equemene | unsigned int seed_w; |
80 | 188 | equemene | unsigned int seed_z; |
81 | 188 | equemene | LENGTH iterations; |
82 | 188 | equemene | |
83 | 188 | equemene | splitter(view Inside_,LENGTH iterations,unsigned int seed_w_,unsigned int seed_z_) : |
84 | 188 | equemene | Inside (Inside_),iterations (iterations),seed_w (seed_w_),seed_z (seed_z_) |
85 | 188 | equemene | {} |
86 | 188 | equemene | |
87 | 188 | equemene | KOKKOS_INLINE_FUNCTION |
88 | 188 | equemene | void operator() (int i) const { |
89 | 188 | equemene | |
90 | 188 | equemene | // MainLoopGlobal totally copied inside operator()
|
91 | 188 | equemene | #if defined TCONG
|
92 | 188 | equemene | unsigned int jcong=seed_z+i; |
93 | 188 | equemene | #elif defined TSHR3
|
94 | 188 | equemene | unsigned int jsr=seed_w+i; |
95 | 188 | equemene | #elif defined TMWC
|
96 | 188 | equemene | unsigned int z=seed_z+i; |
97 | 188 | equemene | unsigned int w=seed_w-i; |
98 | 188 | equemene | #elif defined TKISS
|
99 | 188 | equemene | unsigned int jcong=seed_z+i; |
100 | 188 | equemene | unsigned int jsr=seed_w-i; |
101 | 188 | equemene | unsigned int z=seed_z+i; |
102 | 188 | equemene | unsigned int w=seed_w-i; |
103 | 188 | equemene | #endif
|
104 | 188 | equemene | |
105 | 188 | equemene | LENGTH total=0;
|
106 | 188 | equemene | |
107 | 188 | equemene | for (LENGTH i=0;i<iterations;i++) { |
108 | 188 | equemene | #if defined TINT32
|
109 | 188 | equemene | #define THEONE 1073741824 |
110 | 188 | equemene | #if defined TCONG
|
111 | 188 | equemene | unsigned int x=CONG>>17 ; |
112 | 188 | equemene | unsigned int y=CONG>>17 ; |
113 | 188 | equemene | #elif defined TSHR3
|
114 | 188 | equemene | unsigned int x=SHR3>>17 ; |
115 | 188 | equemene | unsigned int y=SHR3>>17 ; |
116 | 188 | equemene | #elif defined TMWC
|
117 | 188 | equemene | unsigned int x=MWC>>17 ; |
118 | 188 | equemene | unsigned int y=MWC>>17 ; |
119 | 188 | equemene | #elif defined TKISS
|
120 | 188 | equemene | unsigned int x=KISS>>17 ; |
121 | 188 | equemene | unsigned int y=KISS>>17 ; |
122 | 188 | equemene | #endif
|
123 | 188 | equemene | #elif defined TINT64
|
124 | 188 | equemene | #define THEONE 4611686018427387904 |
125 | 188 | equemene | #if defined TCONG
|
126 | 188 | equemene | unsigned long x=(unsigned long)(CONG>>1) ; |
127 | 188 | equemene | unsigned long y=(unsigned long)(CONG>>1) ; |
128 | 188 | equemene | #elif defined TSHR3
|
129 | 188 | equemene | unsigned long x=(unsigned long)(SHR3>>1) ; |
130 | 188 | equemene | unsigned long y=(unsigned long)(SHR3>>1) ; |
131 | 188 | equemene | #elif defined TMWC
|
132 | 188 | equemene | unsigned long x=(unsigned long)(MWC>>1) ; |
133 | 188 | equemene | unsigned long y=(unsigned long)(MWC>>1) ; |
134 | 188 | equemene | #elif defined TKISS
|
135 | 188 | equemene | unsigned long x=(unsigned long)(KISS>>1) ; |
136 | 188 | equemene | unsigned long y=(unsigned long)(KISS>>1) ; |
137 | 188 | equemene | #endif
|
138 | 188 | equemene | #elif defined TFP32
|
139 | 188 | equemene | #define THEONE 1.0f |
140 | 188 | equemene | #if defined TCONG
|
141 | 188 | equemene | float x=CONGfp ;
|
142 | 188 | equemene | float y=CONGfp ;
|
143 | 188 | equemene | #elif defined TSHR3
|
144 | 188 | equemene | float x=SHR3fp ;
|
145 | 188 | equemene | float y=SHR3fp ;
|
146 | 188 | equemene | #elif defined TMWC
|
147 | 188 | equemene | float x=MWCfp ;
|
148 | 188 | equemene | float y=MWCfp ;
|
149 | 188 | equemene | #elif defined TKISS
|
150 | 188 | equemene | float x=KISSfp ;
|
151 | 188 | equemene | float y=KISSfp ;
|
152 | 188 | equemene | #endif
|
153 | 188 | equemene | #elif defined TFP64
|
154 | 188 | equemene | #define THEONE 1.0f |
155 | 188 | equemene | #if defined TCONG
|
156 | 188 | equemene | double x=(double)CONGfp ; |
157 | 188 | equemene | double y=(double)CONGfp ; |
158 | 188 | equemene | #elif defined TSHR3
|
159 | 188 | equemene | double x=(double)SHR3fp ; |
160 | 188 | equemene | double y=(double)SHR3fp ; |
161 | 188 | equemene | #elif defined TMWC
|
162 | 188 | equemene | double x=(double)MWCfp ; |
163 | 188 | equemene | double y=(double)MWCfp ; |
164 | 188 | equemene | #elif defined TKISS
|
165 | 188 | equemene | double x=(double)KISSfp ; |
166 | 188 | equemene | double y=(double)KISSfp ; |
167 | 188 | equemene | #endif
|
168 | 188 | equemene | #endif
|
169 | 188 | equemene | |
170 | 188 | equemene | unsigned long inside=((x*x+y*y) < THEONE) ? 1:0; |
171 | 188 | equemene | total+=inside; |
172 | 188 | equemene | } |
173 | 188 | equemene | Inside(i)=total; |
174 | 188 | equemene | } |
175 | 188 | equemene | }; |
176 | 188 | equemene | |
177 | 188 | equemene | struct print {
|
178 | 188 | equemene | |
179 | 188 | equemene | view Inside; |
180 | 188 | equemene | |
181 | 188 | equemene | print(view Inside_) : |
182 | 188 | equemene | Inside (Inside_) |
183 | 188 | equemene | {} |
184 | 188 | equemene | |
185 | 188 | equemene | KOKKOS_INLINE_FUNCTION |
186 | 188 | equemene | void operator() (const int i) const { |
187 | 188 | equemene | printf ("Inside of %i = %lld\n", i,Inside(i));
|
188 | 188 | equemene | } |
189 | 188 | equemene | }; |
190 | 188 | equemene | |
191 | 188 | equemene | // Reduction functor that reads the View given to its constructor.
|
192 | 188 | equemene | struct ReduceFunctor {
|
193 | 188 | equemene | view Inside; |
194 | 188 | equemene | |
195 | 188 | equemene | ReduceFunctor (view Inside_) : Inside (Inside_) {} |
196 | 188 | equemene | |
197 | 188 | equemene | typedef LENGTH value_type;
|
198 | 188 | equemene | |
199 | 188 | equemene | KOKKOS_INLINE_FUNCTION |
200 | 188 | equemene | void operator() (int i, LENGTH &lsum) const { |
201 | 188 | equemene | lsum += Inside(i); |
202 | 188 | equemene | } |
203 | 188 | equemene | }; |
204 | 188 | equemene | |
205 | 188 | equemene | int main (int argc, char* argv[]) { |
206 | 188 | equemene | |
207 | 188 | equemene | unsigned int seed_w=110271,seed_z=101008,ParallelRate=PARALLELRATE; |
208 | 188 | equemene | LENGTH iterations=ITERATIONS,insides=0;
|
209 | 188 | equemene | struct timeval tv1,tv2;
|
210 | 188 | equemene | struct timezone tz;
|
211 | 188 | equemene | |
212 | 188 | equemene | if (argc > 1) { |
213 | 188 | equemene | iterations=(LENGTH)atoll(argv[1]);
|
214 | 188 | equemene | ParallelRate=atoi(argv[2]);
|
215 | 188 | equemene | } |
216 | 188 | equemene | else {
|
217 | 188 | equemene | printf("\n\tPi : Estimate Pi with Monte Carlo exploration\n\n");
|
218 | 188 | equemene | printf("\t\t#1 : number of iterations (default 1 billion)\n");
|
219 | 188 | equemene | printf("\t\t#2 : number of ParallelRate (default 1024)\n\n");
|
220 | 188 | equemene | } |
221 | 188 | equemene | |
222 | 188 | equemene | printf ("\n\tInformation about architecture:\n\n");
|
223 | 188 | equemene | |
224 | 188 | equemene | printf ("\tSizeof int = %lld bytes.\n", (long long)sizeof(int)); |
225 | 188 | equemene | printf ("\tSizeof long = %lld bytes.\n", (long long)sizeof(long)); |
226 | 188 | equemene | printf ("\tSizeof long long = %lld bytes.\n\n", (long long)sizeof(long long)); |
227 | 188 | equemene | |
228 | 188 | equemene | printf ("\tMax int = %u\n", INT_MAX);
|
229 | 188 | equemene | printf ("\tMax long = %ld\n", LONG_MAX);
|
230 | 188 | equemene | printf ("\tMax long long = %lld\n\n", LLONG_MAX);
|
231 | 188 | equemene | |
232 | 188 | equemene | Kokkos::initialize (argc, argv); |
233 | 188 | equemene | |
234 | 188 | equemene | printf ("Pi Dart Dash on Kokkos execution space %s\n",
|
235 | 188 | equemene | typeid (Kokkos::DefaultExecutionSpace).name ());
|
236 | 188 | equemene | |
237 | 188 | equemene | view Inside("Inside",ParallelRate);
|
238 | 188 | equemene | LENGTH IterationsEach=((iterations%ParallelRate)==0)?iterations/ParallelRate:iterations/ParallelRate+1; |
239 | 188 | equemene | |
240 | 188 | equemene | gettimeofday(&tv1, &tz); |
241 | 188 | equemene | |
242 | 188 | equemene | |
243 | 188 | equemene | // Core of Kokkos : parallel_for & parallel_reduce
|
244 | 188 | equemene | Kokkos::parallel_for (ParallelRate,splitter(Inside,IterationsEach,seed_w,seed_z)); |
245 | 188 | equemene | // Kokkos::parallel_for (ParallelRate,print(Inside));
|
246 | 188 | equemene | Kokkos::parallel_reduce (ParallelRate, ReduceFunctor (Inside), insides); |
247 | 188 | equemene | |
248 | 188 | equemene | gettimeofday(&tv2, &tz); |
249 | 188 | equemene | |
250 | 188 | equemene | Kokkos::finalize (); |
251 | 188 | equemene | |
252 | 188 | equemene | double elapsed=(double)((tv2.tv_sec-tv1.tv_sec) * 1000000L + |
253 | 188 | equemene | (tv2.tv_usec-tv1.tv_usec))/1000000;
|
254 | 188 | equemene | |
255 | 188 | equemene | double itops=(double)(ParallelRate*IterationsEach)/elapsed; |
256 | 188 | equemene | |
257 | 188 | equemene | printf("\n");
|
258 | 188 | equemene | |
259 | 188 | equemene | printf("Inside/Total %lld %lld\nParallelRate %i\nElapsed Time %.2f\nItops %.0f\nPi estimation %f\n\n",(long long)insides,(long long)ParallelRate*IterationsEach,ParallelRate,elapsed,itops,(4.*(float)insides/((float)(ParallelRate)*(float)(IterationsEach)))); |
260 | 188 | equemene | |
261 | 188 | equemene | } |