Révision 308

Pi/C/OpenMP/GPU/Makefile.amd (revision 308)
1
# Compile all version of Pi_OpenMP using Xeon Phi
2
# Cecill v2 Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
3

  
4
SOURCE=Pi_OpenMP.c
5

  
6
COMPUTING=INT32 INT64 FP32 FP64
7
MARSAGLIA=SHR3 CONG MWC KISS
8

  
9
CC=gcc
10
CFLAGS=-Wall -O3 -std=c99 -foffload=amdgcu-none -foffload="-O3 -misa=sm_35" -fopenmp -g
11
LIBRARY=-lm -lgomp
12

  
13
all: $(SOURCE)
14

  
15
	$(foreach TVAR,$(COMPUTING),$(foreach TRND,$(MARSAGLIA),$(CC) $(CFLAGS) -DT$(TVAR) -DT$(TRND) -DLONG -DTIME -o $(<:.c=)_$(TVAR)_$(TRND) $< $(LIBRARY); ) )
16

  
17
.PHONY: clean check mrproper
18

  
19
mrproper: 
20
	rm -rf $(foreach SRC,$(SOURCE),$(foreach TVAR,$(COMPUTING),$(foreach TRND,$(MARSAGLIA),$(SRC:.c=)_$(TVAR)_$(TRND) ) ) )
21

  
22
	find . -name "*~" -exec rm {} \;
23

  
24
clean:
25
	find . -name "*~" -exec rm {} \;
26

  
Pi/C/OpenMP/GPU/Makefile.nvidia (revision 308)
1
# Compile all version of Pi_OpenMP using Xeon Phi
2
# Cecill v2 Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
3

  
4
SOURCE=Pi_OpenMP.c
5

  
6
COMPUTING=INT32 INT64 FP32 FP64
7
MARSAGLIA=SHR3 CONG MWC KISS
8

  
9
CC=gcc
10
CFLAGS=-Wall -O3 -std=c99 -foffload=nvptx-none -foffload="-O3 -misa=sm_35" -fopenmp -g
11
LIBRARY=-lm -lgomp
12

  
13
all: $(SOURCE)
14

  
15
	$(foreach TVAR,$(COMPUTING),$(foreach TRND,$(MARSAGLIA),$(CC) $(CFLAGS) -DT$(TVAR) -DT$(TRND) -DLONG -DTIME -o $(<:.c=)_$(TVAR)_$(TRND) $< $(LIBRARY); ) )
16

  
17
.PHONY: clean check mrproper
18

  
19
mrproper: 
20
	rm -rf $(foreach SRC,$(SOURCE),$(foreach TVAR,$(COMPUTING),$(foreach TRND,$(MARSAGLIA),$(SRC:.c=)_$(TVAR)_$(TRND) ) ) )
21

  
22
	find . -name "*~" -exec rm {} \;
23

  
24
clean:
25
	find . -name "*~" -exec rm {} \;
26

  
Pi/C/OpenMP/GPU/Pi_OpenMP.c (revision 308)
1
//
2
// Estimation of Pi using Monte Carlo exploration process
3
// Cecill v2 Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
4
// For Nvidia Devices
5
// gcc -Wall -O3 -std=c99 -foffload=nvptx-none -foffload="-O3 -misa=sm_35" -fopenmp -g Pi_OpenMP Pi_OpenMP.c -lm -lgomp
6
// For AMD Devices
7
// gcc -Wall -O3 -std=c99 -foffload=amdgcu-none -foffload="-O3 -misa=sm_35" -fopenmp -g Pi_OpenMP Pi_OpenMP.c -lm -lgomp
8

  
9
#include <math.h>
10
#include <stdio.h>
11
#include <stdlib.h>
12
#include <omp.h>
13
#include <limits.h>
14
#include <sys/time.h>
15

  
16
// Marsaglia RNG very simple implementation
17
#define znew  ((z=36969*(z&65535)+(z>>16))<<16)
18
#define wnew  ((w=18000*(w&65535)+(w>>16))&65535)
19
#define MWC   (znew+wnew)
20
#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
21
#define CONG  (jcong=69069*jcong+1234567)
22
#define KISS  ((MWC^CONG)+SHR3)
23

  
24
#define MWCfp MWC * 2.328306435454494e-10f
25
#define KISSfp KISS * 2.328306435454494e-10f
26
#define SHR3fp SHR3 * 2.328306435454494e-10f
27
#define CONGfp CONG * 2.328306435454494e-10f
28

  
29
#define ITERATIONS 1000000000
30

  
31
#define PARALLELRATE 1024
32

  
33
#ifdef LONG
34
#define LENGTH long long
35
#else
36
#define LENGTH int
37
#endif
38

  
39
LENGTH MainLoopGlobal(LENGTH iterations,unsigned int seed_w,unsigned int seed_z)
40
{
41
#if defined TCONG
42
   unsigned int jcong=seed_z;
43
#elif defined TSHR3
44
   unsigned int jsr=seed_w;
45
#elif defined TMWC
46
   unsigned int z=seed_z;
47
   unsigned int w=seed_w;
48
#elif defined TKISS
49
   unsigned int jcong=seed_z;
50
   unsigned int jsr=seed_w;
51
   unsigned int z=seed_z;
52
   unsigned int w=seed_w;
53
#endif
54

  
55
   LENGTH total=0;
56
   
57
   for (LENGTH i=0;i<iterations;i++) {
58

  
59
#if defined TINT32
60
    #define THEONE 1073741824
61
    #if defined TCONG
62
        unsigned int x=CONG>>17 ;
63
        unsigned int y=CONG>>17 ;
64
    #elif defined TSHR3
65
        unsigned int x=SHR3>>17 ;
66
        unsigned int y=SHR3>>17 ;
67
    #elif defined TMWC
68
        unsigned int x=MWC>>17 ;
69
        unsigned int y=MWC>>17 ;
70
    #elif defined TKISS
71
        unsigned int x=KISS>>17 ;
72
        unsigned int y=KISS>>17 ;
73
    #endif
74
#elif defined TINT64
75
    #define THEONE 4611686018427387904
76
    #if defined TCONG
77
        unsigned long x=(unsigned long)(CONG>>1) ;
78
        unsigned long y=(unsigned long)(CONG>>1) ;
79
    #elif defined TSHR3
80
        unsigned long x=(unsigned long)(SHR3>>1) ;
81
        unsigned long y=(unsigned long)(SHR3>>1) ;
82
    #elif defined TMWC
83
        unsigned long x=(unsigned long)(MWC>>1) ;
84
        unsigned long y=(unsigned long)(MWC>>1) ;
85
    #elif defined TKISS
86
        unsigned long x=(unsigned long)(KISS>>1) ;
87
        unsigned long y=(unsigned long)(KISS>>1) ;
88
    #endif
89
#elif defined TFP32
90
    #define THEONE 1.0f
91
    #if defined TCONG
92
        float x=CONGfp ;
93
        float y=CONGfp ;
94
    #elif defined TSHR3
95
        float x=SHR3fp ;
96
        float y=SHR3fp ;
97
    #elif defined TMWC
98
        float x=MWCfp ;
99
        float y=MWCfp ;
100
    #elif defined TKISS
101
      float x=KISSfp ;
102
      float y=KISSfp ;
103
    #endif
104
#elif defined TFP64
105
    #define THEONE 1.0f
106
    #if defined TCONG
107
        double x=(double)CONGfp ;
108
        double y=(double)CONGfp ;
109
    #elif defined TSHR3
110
        double x=(double)SHR3fp ;
111
        double y=(double)SHR3fp ;
112
    #elif defined TMWC
113
        double x=(double)MWCfp ;
114
        double y=(double)MWCfp ;
115
    #elif defined TKISS
116
        double x=(double)KISSfp ;
117
        double y=(double)KISSfp ;
118
    #endif
119
#endif
120

  
121
      unsigned long inside=((x*x+y*y) < THEONE) ? 1:0;
122
      total+=inside;
123
   }
124

  
125
   return(total);
126
}
127

  
128
LENGTH splitter(LENGTH iterations,unsigned int seed_w,unsigned int seed_z,unsigned int ParallelRate)
129
{
130
  LENGTH *inside,insides=0;
131
  struct timeval tv1,tv2;
132
  LENGTH IterationsEach=((iterations%ParallelRate)==0)?iterations/ParallelRate:iterations/ParallelRate+1;
133

  
134
  inside=(LENGTH*)malloc(sizeof(LENGTH)*ParallelRate);
135
  
136
  gettimeofday(&tv1, NULL);
137
  
138
// #pragma omp parallel for
139
#pragma omp target teams distribute parallel for simd \
140
   map(tofrom:inside[0:ParallelRate])
141
  for (int i=0 ; i<ParallelRate; i++) {
142
    inside[i]=MainLoopGlobal(IterationsEach,seed_w+i,seed_z+i);
143
  }
144
  
145
  for (int i=0 ; i<ParallelRate; i++) {
146
    insides+=inside[i];
147
  }
148
  
149
  gettimeofday(&tv2, NULL);
150
  
151
  for (int i=0 ; i<ParallelRate; i++) {
152
    printf("\tFound %lld for ParallelRate %i\n",(long long)inside[i],i);
153
  }
154
  printf("\n");
155

  
156
  double elapsed=(double)((tv2.tv_sec-tv1.tv_sec) * 1000000L +
157
  			  (tv2.tv_usec-tv1.tv_usec))/1000000;
158
  
159
  double itops=(double)(ParallelRate*IterationsEach)/elapsed;
160
  
161
  printf("ParallelRate %i\nElapsed Time %.2f\nItops %.0f\nLogItops %.2f\n",ParallelRate,elapsed,itops,log10(itops));
162

  
163
  free(inside);
164
  
165
  return(insides);
166
} 
167

  
168

  
169
int main(int argc, char *argv[]) {
170

  
171
  unsigned int seed_w=110271,seed_z=101008,ParallelRate=PARALLELRATE;
172
  LENGTH iterations=ITERATIONS,insides=0;
173
  
174
  if (argc > 1) {
175
    iterations=(LENGTH)atoll(argv[1]);
176
    if (argc > 2) {
177
      ParallelRate=atoi(argv[2]);
178
    }
179
  }
180
  else {
181
    printf("\n\tPi : Estimate Pi with Monte Carlo exploration\n\n");
182
    printf("\t\t#1 : number of iterations (default 1 billion)\n");
183
    printf("\t\t#2 : number of ParallelRate (default 1024)\n\n");
184
  }
185

  
186
  printf ("\n\tInformation about architecture:\n\n");
187

  
188
  printf ("\tSizeof int = %lld bytes.\n", (long long)sizeof(int));
189
  printf ("\tSizeof long = %lld bytes.\n", (long long)sizeof(long));
190
  printf ("\tSizeof long long = %lld bytes.\n\n", (long long)sizeof(long long));
191

  
192
  printf ("\tMax int = %u\n", INT_MAX);
193
  printf ("\tMax long = %ld\n", LONG_MAX);
194
  printf ("\tMax long long = %lld\n\n", LLONG_MAX);
195

  
196
  insides=splitter(iterations,seed_w,seed_z,ParallelRate);
197

  
198
  LENGTH total=((iterations%ParallelRate)==0)?iterations:(iterations/ParallelRate+1)*ParallelRate;
199

  
200
  printf("Inside/Total %ld %ld\nPi estimation %f\n\n",(long int)insides,(long int)total,(4.*(float)insides/total));
201
  
202
}
Pi/C/OpenMP/Makefile (revision 308)
7 7
MARSAGLIA=SHR3 CONG MWC KISS
8 8

  
9 9
CC=gcc
10
CFLAGS=-Wall -O3 -std=c99 -fopenmp -g
10
# CFLAGS=-Wall -O3 -std=c99 -fopenmp -g
11
CFLAGS=-Wall -O3 -std=c99 -foffload=nvptx-none -foffload="-O3 -misa=sm_35" -fopenmp -g
11 12
LIBRARY=-lm -lgomp
12 13

  
13 14
all: $(SOURCE)

Formats disponibles : Unified diff