Statistiques
| Révision :

root / Pi / XPU / PiXpuMPI.py @ 194

Historique | Voir | Annoter | Télécharger (13,72 ko)

1
#!/usr/bin/env python3
2

    
3
#
4
# Pi-by-MonteCarlo using PyCUDA/PyOpenCL
5
#
6
# CC BY-NC-SA 2011 : Emmanuel QUEMENER <emmanuel.quemener@gmail.com> 
7
# Cecill v2 : Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
8
#
9
# Thanks to Andreas Klockner for PyCUDA:
10
# http://mathema.tician.de/software/pycuda
11
# Thanks to Andreas Klockner for PyOpenCL:
12
# http://mathema.tician.de/software/pyopencl
13
# 
14

    
15
# 2013-01-01 : problems with launch timeout
16
# http://stackoverflow.com/questions/497685/how-do-you-get-around-the-maximum-cuda-run-time
17
# Option "Interactive" "0" in /etc/X11/xorg.conf
18

    
19
# Common tools
20
import numpy
21
from numpy.random import randint as nprnd
22
import sys
23
import getopt
24
import time
25
import math
26
import itertools
27
from socket import gethostname
28

    
29
import mpi4py
30
from mpi4py import MPI
31

    
32
from PiXPU import *
33

    
34
if __name__=='__main__':
35

    
36
    # MPI Init
37
    comm = MPI.COMM_WORLD
38
    rank = comm.Get_rank()
39
        
40
    # Define number of Nodes on with computing is performed (exclude 0)
41
    RankSize=comm.Get_size()
42

    
43
    if rank == 0:
44
    
45
        # Set defaults values
46
  
47
        # Id of Device : 1 is for first find !
48
        Device=1
49
        # GPU style can be Cuda (Nvidia implementation) or OpenCL
50
        GpuStyle='OpenCL'
51
        # Iterations is integer
52
        Iterations=10000000
53
        # BlocksBlocks in first number of Blocks to explore
54
        BlocksBegin=1
55
        # BlocksEnd is last number of Blocks to explore
56
        BlocksEnd=16
57
        # BlocksStep is the step of Blocks to explore
58
        BlocksStep=1
59
        # ThreadsBlocks in first number of Blocks to explore
60
        ThreadsBegin=1
61
        # ThreadsEnd is last number of Blocks to explore
62
        ThreadsEnd=1
63
        # ThreadsStep is the step of Blocks to explore
64
        ThreadsStep=1
65
        # Redo is the times to redo the test to improve metrology
66
        Redo=1
67
        # OutMetrology is method for duration estimation : False is GPU inside
68
        OutMetrology=False
69
        Metrology='InMetro'
70
        # Curves is True to print the curves
71
        Curves=False
72
        # Fit is True to print the curves
73
        Fit=False
74
        # Marsaglia RNG
75
        RNG='MWC'
76
        # Value type : INT32, INT64, FP32, FP64
77
        ValueType='FP32'
78
        # Inside based on If
79
        IfThen=False
80
        
81
        HowToUse='%s -c (Print Curves) -k (Case On IfThen) -d <DeviceId> -g <CUDA/OpenCL> -i <Iterations> -b <BlocksBegin> -e <BlocksEnd> -s <BlocksStep> -f <ThreadsFirst> -l <ThreadsLast> -t <ThreadssTep> -r <RedoToImproveStats> -m <SHR3/CONG/MWC/KISS> -v <INT32/INT64/FP32/FP64>'
82
    
83
        try:
84
            opts, args = getopt.getopt(sys.argv[1:],"hckg:i:b:e:s:f:l:t:r:d:m:v:",["gpustyle=","iterations=","blocksBegin=","blocksEnd=","blocksStep=","threadsFirst=","threadsLast=","threadssTep=","redo=","device=","marsaglia=","valuetype="])
85
        except getopt.GetoptError:
86
            print(HowToUse % sys.argv[0])
87
            sys.exit(2)
88

    
89
        # List of Devices
90
        Devices=[]
91
        Alu={}
92
        
93
        for opt, arg in opts:
94
            if opt == '-h':
95
                print(HowToUse % sys.argv[0])
96

    
97
                print("\nInformations about devices detected under OpenCL:")
98
                # For PyOpenCL import
99
                try:
100
                    import pyopencl as cl
101
                    Id=0
102
                    for platform in cl.get_platforms():
103
                        for device in platform.get_devices():
104
                            #deviceType=cl.device_type.to_string(device.type)
105
                            deviceType="xPU"
106
                            print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip(),deviceType,device.name.lstrip()))
107
                            Id=Id+1
108

    
109
                    print
110
                except:
111
                    print("Your platform does not seem to support OpenCL")
112

    
113
                print("\nInformations about devices detected under CUDA API:")
114
                # For PyCUDA import
115
                try:
116
                    import pycuda.driver as cuda
117
                    cuda.init()
118
                    for Id in range(cuda.Device.count()):
119
                        device=cuda.Device(Id)
120
                        print("Device #%i of type GPU : %s" % (Id,device.name()))
121
                    print
122
                except:
123
                    print("Your platform does not seem to support CUDA")
124
        
125
                sys.exit()
126
                    
127
            elif opt == '-c':
128
                Curves=True
129
            elif opt == '-k':
130
                IfThen=True
131
            elif opt in ("-d", "--device"):
132
                Devices.append(int(arg))
133
            elif opt in ("-g", "--gpustyle"):
134
                GpuStyle = arg
135
            elif opt in ("-m", "--marsaglia"):
136
                RNG = arg
137
            elif opt in ("-v", "--valuetype"):
138
                ValueType = arg
139
            elif opt in ("-i", "--iterations"):
140
                Iterations = numpy.uint64(arg)
141
            elif opt in ("-b", "--blocksbegin"):
142
                BlocksBegin = int(arg)
143
                BlocksEnd = BlocksBegin
144
            elif opt in ("-e", "--blocksend"):
145
                BlocksEnd = int(arg)
146
            elif opt in ("-s", "--blocksstep"):
147
                BlocksStep = int(arg)
148
            elif opt in ("-f", "--threadsfirst"):
149
                ThreadsBegin = int(arg)
150
                ThreadsEnd = ThreadsBegin
151
            elif opt in ("-l", "--threadslast"):
152
                ThreadsEnd = int(arg)
153
            elif opt in ("-t", "--threadsstep"):
154
                ThreadsStep = int(arg)
155
            elif opt in ("-r", "--redo"):
156
                Redo = int(arg)
157

    
158
        print("Devices Identification : %s" % Devices)
159
        print("GpuStyle used : %s" % GpuStyle)
160
        print("Iterations : %s" % Iterations)
161
        print("Number of Blocks on begin : %s" % BlocksBegin)
162
        print("Number of Blocks on end : %s" % BlocksEnd)
163
        print("Step on Blocks : %s" % BlocksStep)
164
        print("Number of Threads on begin : %s" % ThreadsBegin)
165
        print("Number of Threads on end : %s" % ThreadsEnd)
166
        print("Step on Threads : %s" % ThreadsStep)
167
        print("Number of redo : %s" % Redo)
168
        print("Metrology done out of XPU : %r" % OutMetrology)
169
        print("Type of Marsaglia RNG used : %s" % RNG)
170
        print("Type of variable : %s" % ValueType)
171

    
172
        if GpuStyle=='CUDA':
173
            try:
174
                # For PyCUDA import
175
                import pycuda.driver as cuda
176

    
177
                cuda.init()
178
                for Id in range(cuda.Device.count()):
179
                    device=cuda.Device(Id)
180
                    print("Device #%i of type GPU : %s" % (Id,device.name()))
181
                    if Id in Devices:
182
                        Alu[Id]='GPU'
183
            except ImportError:
184
                print("Platform does not seem to support CUDA")
185

    
186
        if GpuStyle=='OpenCL':
187
            try:
188
                # For PyOpenCL import
189
                import pyopencl as cl
190
                Id=0
191
                for platform in cl.get_platforms():
192
                    for device in platform.get_devices():
193
                        #deviceType=cl.device_type.to_string(device.type)
194
                        deviceType="*PU"
195
                        print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip().rstrip(),deviceType,device.name.lstrip().rstrip()))
196

    
197
                        if Id in Devices:
198
                            # Set the Alu as detected Device Type
199
                            Alu[Id]=deviceType
200
                        Id=Id+1
201
            except ImportError:
202
                print("Platform does not seem to support OpenCL")
203

    
204
        print(Devices,Alu)
205
            
206
        BlocksList=range(BlocksBegin,BlocksEnd+BlocksStep,BlocksStep)
207
        ThreadsList=range(ThreadsBegin,ThreadsEnd+ThreadsStep,ThreadsStep)
208
    
209
        ExploredJobs=numpy.array([]).astype(numpy.uint32)
210
        ExploredBlocks=numpy.array([]).astype(numpy.uint32)
211
        ExploredThreads=numpy.array([]).astype(numpy.uint32)
212
        avgD=numpy.array([]).astype(numpy.float32)
213
        medD=numpy.array([]).astype(numpy.float32)
214
        stdD=numpy.array([]).astype(numpy.float32)
215
        minD=numpy.array([]).astype(numpy.float32)
216
        maxD=numpy.array([]).astype(numpy.float32)
217
        avgR=numpy.array([]).astype(numpy.float32)
218
        medR=numpy.array([]).astype(numpy.float32)
219
        stdR=numpy.array([]).astype(numpy.float32)
220
        minR=numpy.array([]).astype(numpy.float32)
221
        maxR=numpy.array([]).astype(numpy.float32)
222

    
223
        IterationsMPI=numpy.uint64(Iterations/len(Devices))
224
        if Iterations%len(Devices)!=0:
225
            IterationsMPI+=1
226

    
227
        for Blocks,Threads in itertools.product(BlocksList,ThreadsList):
228
        
229
            ExploredJobs=numpy.append(ExploredJobs,Blocks*Threads)
230
            ExploredBlocks=numpy.append(ExploredBlocks,Blocks)
231
            ExploredThreads=numpy.append(ExploredThreads,Threads)
232

    
233
            DurationItem=numpy.array([]).astype(numpy.float32)
234
            Duration=numpy.array([]).astype(numpy.float32)
235
            Rate=numpy.array([]).astype(numpy.float32)
236
            for i in range(Redo):
237
                time_start=time.time()
238

    
239
                r=1
240
                # Distribution of Devices over nodes
241
                InputCL={}
242
                InputCL['Iterations']=IterationsMPI
243
                InputCL['Steps']=1
244
                InputCL['Blocks']=Blocks
245
                InputCL['Threads']=Threads
246
                InputCL['RNG']=RNG
247
                InputCL['ValueType']=ValueType
248
                InputCL['GpuStyle']=GpuStyle
249
                InputCL['IfThen']=IfThen
250

    
251
                for Device in Devices[1:]:
252
                    print("Send to device %i on rank %i" % (Device,r))
253
                    InputCL['Device']=Device
254
                    comm.send('CONTINUE',dest=r,tag=11)
255
                    comm.send(InputCL,dest=r,tag=11)
256
                    r+=1
257

    
258
                # Compute on rank 0
259
                print("Compute on rank 0")
260
                InputCL['Device']=Devices[0]
261

    
262
                if GpuStyle=='CUDA':
263
                    try:
264
                        OutputCL=MetropolisCuda(InputCL)
265
                    except:
266
                        print("Problem with (%i,%i) // computations on Cuda" % (Blocks,Threads))
267
                elif GpuStyle=='OpenCL':
268
                    try:
269
                        OutputCL=MetropolisOpenCL(InputCL)
270
                    except:
271
                        print("Problem with (%i,%i) // computations on OpenCL" %  (Blocks,Threads))
272

    
273
                Inside=OutputCL['Inside']
274
                NewIterations=OutputCL['NewIterations']
275

    
276
                for slave in range(1,len(Devices)):
277
                    print("Get OutputCL from %i" % slave)
278
                    OutputCL=comm.recv(source=slave,tag=11)
279
                    print(OutputCL)
280
                    NewIterations+=OutputCL['NewIterations']
281
                    Inside+=OutputCL['Inside']
282

    
283
                print("Pi estimation %.8f" % (4./NewIterations*Inside))
284
                
285
                Duration=numpy.append(Duration,time.time()-time_start)
286
                Rate=numpy.append(Rate,NewIterations/Duration[-1])
287
                                            
288
            avgD=numpy.append(avgD,numpy.average(Duration))
289
            medD=numpy.append(medD,numpy.median(Duration))
290
            stdD=numpy.append(stdD,numpy.std(Duration))
291
            minD=numpy.append(minD,numpy.min(Duration))
292
            maxD=numpy.append(maxD,numpy.max(Duration))
293
            avgR=numpy.append(avgR,numpy.average(Rate))
294
            medR=numpy.append(medR,numpy.median(Rate))
295
            stdR=numpy.append(stdR,numpy.std(Rate))
296
            minR=numpy.append(minR,numpy.min(Rate))
297
            maxR=numpy.append(maxR,numpy.max(Rate))
298

    
299
            print("%.2f %.2f %.2f %.2f %.2f %i %i %i %i %i" % (avgD[-1],medD[-1],stdD[-1],minD[-1],maxD[-1],avgR[-1],medR[-1],stdR[-1],minR[-1],maxR[-1]))
300
        
301
            numpy.savez("PiMPI_%s_%s_%s_%s_%s_%s_%s_%s_%.8i_Device%i_%s_%s" % (ValueType,RNG,Alu[Devices[0]],GpuStyle,BlocksBegin,BlocksEnd,ThreadsBegin,ThreadsEnd,Iterations,Devices[0],Metrology,gethostname()),(ExploredBlocks,ExploredThreads,avgD,medD,stdD,minD,maxD,avgR,medR,stdR,minR,maxR))
302
            ToSave=[ ExploredBlocks,ExploredThreads,avgD,medD,stdD,minD,maxD,avgR,medR,stdR,minR,maxR ]
303
            numpy.savetxt("PiMPI_%s_%s_%s_%s_%s_%s_%s_%i_%.8i_Device%i_%s_%s" % (ValueType,RNG,Alu[Devices[0]],GpuStyle,BlocksBegin,BlocksEnd,ThreadsBegin,ThreadsEnd,Iterations,Devices[0],Metrology,gethostname()),numpy.transpose(ToSave),fmt='%i %i %e %e %e %e %e %i %i %i %i %i')
304

    
305
            if Fit:
306
                FitAndPrint(ExploredJobs,median,Curves)
307
        # Send MPI exit tag
308
        for slave in range(1,RankSize):
309
            comm.send('BREAK',dest=slave,tag=11)
310
            
311
    else:
312
        while True:
313
            Signal=comm.recv(source=0,tag=11)
314
            if Signal=='CONTINUE':
315
                # Receive information from Master
316
                InputCL=comm.recv(source=0,tag=11)
317
                print("Parameters retreive for rank %s of %s on %s from master:" % (rank,RankSize,gethostname()))
318
                print("Input CL:" % InputCL)
319
                # Execute on slave
320

    
321
                if InputCL['GpuStyle']=='CUDA':
322
                    try:
323
                        OutputCL=MetropolisCuda(InputCL)
324
                    except:
325
                        print("Problem with (%i,%i) // computations on Cuda" % (InputCL['Blocks'],InputCL['Threads']))
326
                elif InputCL['GpuStyle']=='OpenCL':
327
                    try:
328
                        OutputCL=MetropolisOpenCL(InputCL)
329
                    except:
330
                        print("Problem with (%i,%i) // computations on OpenCL" %  (InputCL['Blocks'],InputCL['Threads']))
331

    
332
                print("Output CL:" % OutputCL)
333
                # Send information to Master
334
                comm.send(OutputCL,dest=0,tag=11)
335
                print("Data sent to master")
336
            else:
337
                print('Exit signal from Master')
338
                break