Révision 188

Pi/C/OpenMP/Pi_OpenMP.c (revision 188)
129 129
  struct timeval tv1,tv2;
130 130
  LENGTH IterationsEach=((iterations%ParallelRate)==0)?iterations/ParallelRate:iterations/ParallelRate+1;
131 131

  
132
  inside=(LENGTH*)malloc(sizeof(LENGTH)*ParallelRate);
133
  
134
  gettimeofday(&tv1, NULL);
135
  
136
#pragma omp parallel for
137
  for (int i=0 ; i<ParallelRate; i++) {
138
    inside[i]=MainLoopGlobal(IterationsEach,seed_w+i,seed_z+i);
132
#pragma omp parallel for  
133
  for (int i=0 ; i<process; i++) {
134
    inside[i]=MainLoopGlobal(iterations/process,seed_w+i,seed_z+i);
139 135
  }
140
  
141
  for (int i=0 ; i<ParallelRate; i++) {
136

  
137
  for (int i=0 ; i<process; i++) {
138
    printf("\tFound %lld for process %i\n",(long long)inside[i],i);
142 139
    insides+=inside[i];
143 140
  }
144
  
145
  gettimeofday(&tv2, NULL);
146
  
147
  for (int i=0 ; i<ParallelRate; i++) {
148
    printf("\tFound %lld for ParallelRate %i\n",(long long)inside[i],i);
149
  }
150 141
  printf("\n");
151 142

  
152 143
  double elapsed=(double)((tv2.tv_sec-tv1.tv_sec) * 1000000L +
Pi/C/Hybrid/Makefile (revision 188)
2 2

  
3 3
MARSAGLIA=SHR3 CONG MWC KISS
4 4

  
5
SOURCE=Pi_Hybrid.c Pi_aHybrid.c
5
#SOURCE=Pi_Hybrid.c Pi_aHybrid.c
6
SOURCE=Pi_Hybrid.c
6 7

  
7 8
CC=mpicc.openmpi
8 9
CFLAGS=-Wall -Wno-sequence-point -O3 -std=c99 -fopenmp 
Pi/C/Kokkos/Pi_Kokkos.cpp (revision 188)
1
/*
2
//@HEADER
3
// ************************************************************************
4
// 
5
//                        Kokkos v. 2.0
6
//              Copyright (2014) Sandia Corporation
7
// 
8
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9
// the U.S. Government retains certain rights in this software.
10
// 
11
// Redistribution and use in source and binary forms, with or without
12
// modification, are permitted provided that the following conditions are
13
// met:
14
//
15
// 1. Redistributions of source code must retain the above copyright
16
// notice, this list of conditions and the following disclaimer.
17
//
18
// 2. Redistributions in binary form must reproduce the above copyright
19
// notice, this list of conditions and the following disclaimer in the
20
// documentation and/or other materials provided with the distribution.
21
//
22
// 3. Neither the name of the Corporation nor the names of the
23
// contributors may be used to endorse or promote products derived from
24
// this software without specific prior written permission.
25
//
26
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37
//
38
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
39
// 
40
// ************************************************************************
41
//@HEADER
42
*/
43

  
44
#include <Kokkos_Core.hpp>
45
#include <cstdio>
46
#include <typeinfo>
47
#include <math.h>
48
#include <sys/time.h>
49

  
50
// Marsaglia RNG very simple implementation
51
#define znew  ((z=36969*(z&65535)+(z>>16))<<16)
52
#define wnew  ((w=18000*(w&65535)+(w>>16))&65535)
53
#define MWC   (znew+wnew)
54
#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
55
#define CONG  (jcong=69069*jcong+1234567)
56
#define KISS  ((MWC^CONG)+SHR3)
57

  
58
#define MWCfp MWC * 2.328306435454494e-10f
59
#define KISSfp KISS * 2.328306435454494e-10f
60
#define SHR3fp SHR3 * 2.328306435454494e-10f
61
#define CONGfp CONG * 2.328306435454494e-10f
62

  
63
#define ITERATIONS 1000000000
64

  
65
#define PARALLELRATE 1024
66

  
67
#ifdef LONG
68
#define LENGTH long long
69
#else
70
#define LENGTH int
71
#endif
72

  
73
// On Kokkos, vector, matrix and + are "View"s
74
typedef Kokkos::View<LENGTH*> view;
75

  
76
struct splitter {
77

  
78
  view Inside;
79
  unsigned int seed_w;
80
  unsigned int seed_z;
81
  LENGTH iterations;
82
  
83
  splitter(view Inside_,LENGTH iterations,unsigned int seed_w_,unsigned int seed_z_) :
84
    Inside (Inside_),iterations (iterations),seed_w (seed_w_),seed_z (seed_z_)
85
  {}
86
  
87
  KOKKOS_INLINE_FUNCTION
88
  void operator() (int i) const {
89

  
90
    // MainLoopGlobal totally copied inside operator()
91
#if defined TCONG
92
   unsigned int jcong=seed_z+i;
93
#elif defined TSHR3
94
   unsigned int jsr=seed_w+i;
95
#elif defined TMWC
96
   unsigned int z=seed_z+i;
97
   unsigned int w=seed_w-i;
98
#elif defined TKISS
99
   unsigned int jcong=seed_z+i;
100
   unsigned int jsr=seed_w-i;
101
   unsigned int z=seed_z+i;
102
   unsigned int w=seed_w-i;
103
#endif
104

  
105
   LENGTH total=0;
106
   
107
   for (LENGTH i=0;i<iterations;i++) {
108
#if defined TINT32
109
    #define THEONE 1073741824
110
    #if defined TCONG
111
        unsigned int x=CONG>>17 ;
112
        unsigned int y=CONG>>17 ;
113
    #elif defined TSHR3
114
        unsigned int x=SHR3>>17 ;
115
        unsigned int y=SHR3>>17 ;
116
    #elif defined TMWC
117
        unsigned int x=MWC>>17 ;
118
        unsigned int y=MWC>>17 ;
119
    #elif defined TKISS
120
        unsigned int x=KISS>>17 ;
121
        unsigned int y=KISS>>17 ;
122
    #endif
123
#elif defined TINT64
124
    #define THEONE 4611686018427387904
125
    #if defined TCONG
126
        unsigned long x=(unsigned long)(CONG>>1) ;
127
        unsigned long y=(unsigned long)(CONG>>1) ;
128
    #elif defined TSHR3
129
        unsigned long x=(unsigned long)(SHR3>>1) ;
130
        unsigned long y=(unsigned long)(SHR3>>1) ;
131
    #elif defined TMWC
132
        unsigned long x=(unsigned long)(MWC>>1) ;
133
        unsigned long y=(unsigned long)(MWC>>1) ;
134
    #elif defined TKISS
135
        unsigned long x=(unsigned long)(KISS>>1) ;
136
        unsigned long y=(unsigned long)(KISS>>1) ;
137
    #endif
138
#elif defined TFP32
139
    #define THEONE 1.0f
140
    #if defined TCONG
141
        float x=CONGfp ;
142
        float y=CONGfp ;
143
    #elif defined TSHR3
144
        float x=SHR3fp ;
145
        float y=SHR3fp ;
146
    #elif defined TMWC
147
        float x=MWCfp ;
148
        float y=MWCfp ;
149
    #elif defined TKISS
150
      float x=KISSfp ;
151
      float y=KISSfp ;
152
    #endif
153
#elif defined TFP64
154
    #define THEONE 1.0f
155
    #if defined TCONG
156
        double x=(double)CONGfp ;
157
        double y=(double)CONGfp ;
158
    #elif defined TSHR3
159
        double x=(double)SHR3fp ;
160
        double y=(double)SHR3fp ;
161
    #elif defined TMWC
162
        double x=(double)MWCfp ;
163
        double y=(double)MWCfp ;
164
    #elif defined TKISS
165
        double x=(double)KISSfp ;
166
        double y=(double)KISSfp ;
167
    #endif
168
#endif
169

  
170
	unsigned long inside=((x*x+y*y) < THEONE) ? 1:0;
171
	total+=inside;
172
   }
173
    Inside(i)=total;
174
  }
175
};
176

  
177
struct print {
178

  
179
  view Inside;
180

  
181
  print(view Inside_) :
182
    Inside (Inside_)
183
  {}
184
  
185
  KOKKOS_INLINE_FUNCTION
186
  void operator() (const int i) const {
187
    printf ("Inside of %i = %lld\n", i,Inside(i));
188
  }
189
};
190

  
191
// Reduction functor that reads the View given to its constructor.
192
struct ReduceFunctor {
193
  view Inside;
194

  
195
  ReduceFunctor (view Inside_) : Inside (Inside_) {}
196

  
197
  typedef LENGTH value_type;
198

  
199
  KOKKOS_INLINE_FUNCTION
200
  void operator() (int i, LENGTH &lsum) const {
201
    lsum += Inside(i);
202
  }
203
};
204

  
205
int main (int argc, char* argv[]) {
206

  
207
  unsigned int seed_w=110271,seed_z=101008,ParallelRate=PARALLELRATE;
208
  LENGTH iterations=ITERATIONS,insides=0;
209
  struct timeval tv1,tv2;
210
  struct timezone tz;
211

  
212
  if (argc > 1) {
213
    iterations=(LENGTH)atoll(argv[1]);
214
    ParallelRate=atoi(argv[2]);
215
  }
216
  else {
217
    printf("\n\tPi : Estimate Pi with Monte Carlo exploration\n\n");
218
    printf("\t\t#1 : number of iterations (default 1 billion)\n");
219
    printf("\t\t#2 : number of ParallelRate (default 1024)\n\n");
220
  }
221

  
222
  printf ("\n\tInformation about architecture:\n\n");
223

  
224
  printf ("\tSizeof int = %lld bytes.\n", (long long)sizeof(int));
225
  printf ("\tSizeof long = %lld bytes.\n", (long long)sizeof(long));
226
  printf ("\tSizeof long long = %lld bytes.\n\n", (long long)sizeof(long long));
227

  
228
  printf ("\tMax int = %u\n", INT_MAX);
229
  printf ("\tMax long = %ld\n", LONG_MAX);
230
  printf ("\tMax long long = %lld\n\n", LLONG_MAX);
231
  
232
  Kokkos::initialize (argc, argv);
233

  
234
  printf ("Pi Dart Dash on Kokkos execution space %s\n",
235
          typeid (Kokkos::DefaultExecutionSpace).name ());
236

  
237
  view Inside("Inside",ParallelRate);
238
  LENGTH IterationsEach=((iterations%ParallelRate)==0)?iterations/ParallelRate:iterations/ParallelRate+1;
239

  
240
  gettimeofday(&tv1, &tz);
241

  
242

  
243
  // Core of Kokkos : parallel_for & parallel_reduce
244
  Kokkos::parallel_for (ParallelRate,splitter(Inside,IterationsEach,seed_w,seed_z));
245
  //  Kokkos::parallel_for (ParallelRate,print(Inside));
246
  Kokkos::parallel_reduce (ParallelRate, ReduceFunctor (Inside), insides);
247

  
248
  gettimeofday(&tv2, &tz);
249
	
250
  Kokkos::finalize ();
251

  
252
  double elapsed=(double)((tv2.tv_sec-tv1.tv_sec) * 1000000L +
253
			  (tv2.tv_usec-tv1.tv_usec))/1000000;  
254

  
255
  double itops=(double)(ParallelRate*IterationsEach)/elapsed;
256
  
257
  printf("\n");
258

  
259
  printf("Inside/Total %lld %lld\nParallelRate %i\nElapsed Time %.2f\nItops %.0f\nPi estimation %f\n\n",(long long)insides,(long long)ParallelRate*IterationsEach,ParallelRate,elapsed,itops,(4.*(float)insides/((float)(ParallelRate)*(float)(IterationsEach))));
260
  
261
}
262

  
Pi/C/Kokkos/Makefile (revision 188)
1
KOKKOS_PATH = ./kokkos-2.7.00
2
KOKKOS_SRC_PATH = ${KOKKOS_PATH}
3
# SRC = $(wildcard ${KOKKOS_SRC_PATH}/example/tutorial/07_Pi/*.cpp)
4
SRC = Pi_Kokkos.cpp
5
vpath %.cpp $(sort $(dir $(SRC)))
6

  
7
COMPUTING=INT32 INT64 FP32 FP64
8
MARSAGLIA=SHR3 CONG MWC KISS
9

  
10
# COMPUTING=FP32
11
# MARSAGLIA=MWC
12

  
13
default: build
14
	echo "Start Build"
15

  
16
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
17
CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper
18
CXXFLAGS = -O3
19
LINK = ${CXX}
20
LDFLAGS = 
21
EXE=cuda
22
else
23
CXX = g++
24
CXXFLAGS = -O3
25
LINK = ${CXX}
26
LDFLAGS =  
27
EXE=host
28
endif
29

  
30
DEPFLAGS = -M
31

  
32
OBJ = $(foreach TVAR,$(COMPUTING),$(foreach TRND,$(MARSAGLIA),$(notdir $(SRC:.cpp=))_$(TVAR)_$(TRND).o ) )
33
LIB =
34

  
35
include $(KOKKOS_PATH)/Makefile.kokkos
36

  
37
build: $(EXE)
38

  
39
test: $(EXE)
40
	./$(EXE)
41

  
42
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
43
	@echo $(OBJ)
44
	$(foreach TVAR,$(COMPUTING),$(foreach TRND,$(MARSAGLIA), $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(SRC:.cpp=)_$(TVAR)_$(TRND).o $(KOKKOS_LIBS) $(LIB)  -o $(SRC:.cpp=)_$(TVAR)_$(TRND).$(EXE) ; ) )
45

  
46
clean: kokkos-clean 
47
	rm -f *.o
48

  
49
mrproper: kokkos-clean 
50
	rm -f *.o *.cuda *.host
51

  
52
# Compilation rules
53

  
54
$(OBJ):$(SRC) $(KOKKOS_CPP_DEPENDS)
55
	$(foreach TVAR,$(COMPUTING),$(foreach TRND,$(MARSAGLIA),$(CXX) $(CFLAGS) -DT$(TVAR) -DT$(TRND) -DLONG -DTIME $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c -o $(<:.cpp=)_$(TVAR)_$(TRND).o $< ; ) )
56

  
57
# %.o:%.cpp $(KOKKOS_CPP_DEPENDS)
58
# 	@echo $(OBJ2) 
59
# 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@)
60

  
Pi/C/Kokkos/README (revision 188)
1
# To compile it under Debian Stretch
2
# Current version is broken, take specific one...
3

  
4
wget https://github.com/kokkos/kokkos/archive/2.7.00.tar.gz
5
tar xzf 2.7.00.tar.gz
6
sed -i 's/^host_compiler/host_compiler=\"clang++-3.8\"\n#host_compiler/g' kokkos-2.7.00/bin/nvcc_wrapper
7

  
8

  
9
# Load cuda environment for lib64 and include folders
10
. /usr/share/modules/init/bash
11
module load cuda/9.0
12

  
13
# Define the default compilers (must be installed)
14
export CC=/usr/bin/clang-3.8
15
export CXX=/usr/bin/clang++-3.8
16

  
17
# 
18
export KOKKOS_ENABLE_CUDA=1
19
export KOKKOS_ENABLE_OPENMP=1
20
export KOKKOS_ENABLE_HWLOC=1
21

  
22
# To compile CUDA versions
23
export KOKKOS_DEVICES=Cuda
24
export KOKKOS_ARCH=Maxwell52
25

  
26
# To compile OpenMP versions
27
export KOKKOS_DEVICES=OpenMP
28

  

Formats disponibles : Unified diff