Statistiques
| Révision :

root / Pi / XPU / PiXpuMPI.py @ 190

Historique | Voir | Annoter | Télécharger (13,64 ko)

1
#!/usr/bin/env python3
2

    
3
#
4
# Pi-by-MonteCarlo using PyCUDA/PyOpenCL
5
#
6
# CC BY-NC-SA 2011 : Emmanuel QUEMENER <emmanuel.quemener@gmail.com> 
7
# Cecill v2 : Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
8
#
9
# Thanks to Andreas Klockner for PyCUDA:
10
# http://mathema.tician.de/software/pycuda
11
# Thanks to Andreas Klockner for PyOpenCL:
12
# http://mathema.tician.de/software/pyopencl
13
# 
14

    
15
# 2013-01-01 : problems with launch timeout
16
# http://stackoverflow.com/questions/497685/how-do-you-get-around-the-maximum-cuda-run-time
17
# Option "Interactive" "0" in /etc/X11/xorg.conf
18

    
19
# Common tools
20
import numpy
21
from numpy.random import randint as nprnd
22
import sys
23
import getopt
24
import time
25
import math
26
import itertools
27
from socket import gethostname
28

    
29
import mpi4py
30
from mpi4py import MPI
31

    
32
from PiXPU import *
33

    
34
if __name__=='__main__':
35

    
36
    # MPI Init
37
    comm = MPI.COMM_WORLD
38
    rank = comm.Get_rank()
39
        
40
    # Define number of Nodes on with computing is performed (exclude 0)
41
    RankSize=comm.Get_size()
42

    
43
    if rank == 0:
44
    
45
        # Set defaults values
46
  
47
        # Id of Device : 1 is for first find !
48
        Device=1
49
        # GPU style can be Cuda (Nvidia implementation) or OpenCL
50
        GpuStyle='OpenCL'
51
        # Iterations is integer
52
        Iterations=10000000
53
        # BlocksBlocks in first number of Blocks to explore
54
        BlocksBegin=1
55
        # BlocksEnd is last number of Blocks to explore
56
        BlocksEnd=16
57
        # BlocksStep is the step of Blocks to explore
58
        BlocksStep=1
59
        # ThreadsBlocks in first number of Blocks to explore
60
        ThreadsBegin=1
61
        # ThreadsEnd is last number of Blocks to explore
62
        ThreadsEnd=1
63
        # ThreadsStep is the step of Blocks to explore
64
        ThreadsStep=1
65
        # Redo is the times to redo the test to improve metrology
66
        Redo=1
67
        # OutMetrology is method for duration estimation : False is GPU inside
68
        OutMetrology=False
69
        Metrology='InMetro'
70
        # Curves is True to print the curves
71
        Curves=False
72
        # Fit is True to print the curves
73
        Fit=False
74
        # Marsaglia RNG
75
        RNG='MWC'
76
        # Value type : INT32, INT64, FP32, FP64
77
        ValueType='FP32'
78
        # Inside based on If
79
        IfThen=False
80
        
81
        HowToUse='%s -c (Print Curves) -k (Case On IfThen) -d <DeviceId> -g <CUDA/OpenCL> -i <Iterations> -b <BlocksBegin> -e <BlocksEnd> -s <BlocksStep> -f <ThreadsFirst> -l <ThreadsLast> -t <ThreadssTep> -r <RedoToImproveStats> -m <SHR3/CONG/MWC/KISS> -v <INT32/INT64/FP32/FP64>'
82
    
83
        try:
84
            opts, args = getopt.getopt(sys.argv[1:],"hckg:i:b:e:s:f:l:t:r:d:m:v:",["gpustyle=","iterations=","blocksBegin=","blocksEnd=","blocksStep=","threadsFirst=","threadsLast=","threadssTep=","redo=","device=","marsaglia=","valuetype="])
85
        except getopt.GetoptError:
86
            print(HowToUse % sys.argv[0])
87
            sys.exit(2)
88

    
89
        # List of Devices
90
        Devices=[]
91
        Alu={}
92
        
93
        for opt, arg in opts:
94
            if opt == '-h':
95
                print(HowToUse % sys.argv[0])
96

    
97
                print("\nInformations about devices detected under OpenCL:")
98
                # For PyOpenCL import
99
                try:
100
                    import pyopencl as cl
101
                    Id=0
102
                    for platform in cl.get_platforms():
103
                        for device in platform.get_devices():
104
                            #deviceType=cl.device_type.to_string(device.type)
105
                            deviceType="xPU"
106
                            print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip(),deviceType,device.name.lstrip()))
107
                            Id=Id+1
108

    
109
                    print
110
                except:
111
                    print("Your platform does not seem to support OpenCL")
112

    
113
                print("\nInformations about devices detected under CUDA API:")
114
                # For PyCUDA import
115
                try:
116
                    import pycuda.driver as cuda
117
                    cuda.init()
118
                    for Id in range(cuda.Device.count()):
119
                        device=cuda.Device(Id)
120
                        print("Device #%i of type GPU : %s" % (Id,device.name()))
121
                    print
122
                except:
123
                    print("Your platform does not seem to support CUDA")
124
        
125
                sys.exit()
126
                    
127
            elif opt == '-c':
128
                Curves=True
129
            elif opt == '-k':
130
                IfThen=True
131
            elif opt in ("-d", "--device"):
132
                Devices.append(int(arg))
133
            elif opt in ("-g", "--gpustyle"):
134
                GpuStyle = arg
135
            elif opt in ("-m", "--marsaglia"):
136
                RNG = arg
137
            elif opt in ("-v", "--valuetype"):
138
                ValueType = arg
139
            elif opt in ("-i", "--iterations"):
140
                Iterations = numpy.uint64(arg)
141
            elif opt in ("-b", "--blocksbegin"):
142
                BlocksBegin = int(arg)
143
            elif opt in ("-e", "--blocksend"):
144
                BlocksEnd = int(arg)
145
            elif opt in ("-s", "--blocksstep"):
146
                BlocksStep = int(arg)
147
            elif opt in ("-f", "--threadsfirst"):
148
                ThreadsBegin = int(arg)
149
            elif opt in ("-l", "--threadslast"):
150
                ThreadsEnd = int(arg)
151
            elif opt in ("-t", "--threadsstep"):
152
                ThreadsStep = int(arg)
153
            elif opt in ("-r", "--redo"):
154
                Redo = int(arg)
155

    
156
        print("Devices Identification : %s" % Devices)
157
        print("GpuStyle used : %s" % GpuStyle)
158
        print("Iterations : %s" % Iterations)
159
        print("Number of Blocks on begin : %s" % BlocksBegin)
160
        print("Number of Blocks on end : %s" % BlocksEnd)
161
        print("Step on Blocks : %s" % BlocksStep)
162
        print("Number of Threads on begin : %s" % ThreadsBegin)
163
        print("Number of Threads on end : %s" % ThreadsEnd)
164
        print("Step on Threads : %s" % ThreadsStep)
165
        print("Number of redo : %s" % Redo)
166
        print("Metrology done out of XPU : %r" % OutMetrology)
167
        print("Type of Marsaglia RNG used : %s" % RNG)
168
        print("Type of variable : %s" % ValueType)
169

    
170
        if GpuStyle=='CUDA':
171
            try:
172
                # For PyCUDA import
173
                import pycuda.driver as cuda
174

    
175
                cuda.init()
176
                for Id in range(cuda.Device.count()):
177
                    device=cuda.Device(Id)
178
                    print("Device #%i of type GPU : %s" % (Id,device.name()))
179
                    if Id in Devices:
180
                        Alu[Id]='GPU'
181
            except ImportError:
182
                print("Platform does not seem to support CUDA")
183

    
184
        if GpuStyle=='OpenCL':
185
            try:
186
                # For PyOpenCL import
187
                import pyopencl as cl
188
                Id=0
189
                for platform in cl.get_platforms():
190
                    for device in platform.get_devices():
191
                        #deviceType=cl.device_type.to_string(device.type)
192
                        deviceType="*PU"
193
                        print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip().rstrip(),deviceType,device.name.lstrip().rstrip()))
194

    
195
                        if Id in Devices:
196
                            # Set the Alu as detected Device Type
197
                            Alu[Id]=deviceType
198
                        Id=Id+1
199
            except ImportError:
200
                print("Platform does not seem to support OpenCL")
201

    
202
        print(Devices,Alu)
203
            
204
        BlocksList=range(BlocksBegin,BlocksEnd+BlocksStep,BlocksStep)
205
        ThreadsList=range(ThreadsBegin,ThreadsEnd+ThreadsStep,ThreadsStep)
206
    
207
        ExploredJobs=numpy.array([]).astype(numpy.uint32)
208
        ExploredBlocks=numpy.array([]).astype(numpy.uint32)
209
        ExploredThreads=numpy.array([]).astype(numpy.uint32)
210
        avgD=numpy.array([]).astype(numpy.float32)
211
        medD=numpy.array([]).astype(numpy.float32)
212
        stdD=numpy.array([]).astype(numpy.float32)
213
        minD=numpy.array([]).astype(numpy.float32)
214
        maxD=numpy.array([]).astype(numpy.float32)
215
        avgR=numpy.array([]).astype(numpy.float32)
216
        medR=numpy.array([]).astype(numpy.float32)
217
        stdR=numpy.array([]).astype(numpy.float32)
218
        minR=numpy.array([]).astype(numpy.float32)
219
        maxR=numpy.array([]).astype(numpy.float32)
220

    
221
        IterationsMPI=numpy.uint64(Iterations/len(Devices))
222
        if Iterations%len(Devices)!=0:
223
            IterationsMPI+=1
224

    
225
        for Blocks,Threads in itertools.product(BlocksList,ThreadsList):
226
        
227
            ExploredJobs=numpy.append(ExploredJobs,Blocks*Threads)
228
            ExploredBlocks=numpy.append(ExploredBlocks,Blocks)
229
            ExploredThreads=numpy.append(ExploredThreads,Threads)
230

    
231
            DurationItem=numpy.array([]).astype(numpy.float32)
232
            Duration=numpy.array([]).astype(numpy.float32)
233
            Rate=numpy.array([]).astype(numpy.float32)
234
            for i in range(Redo):
235
                time_start=time.time()
236

    
237
                r=1
238
                # Distribution of Devices over nodes
239
                InputCL={}
240
                InputCL['Iterations']=IterationsMPI
241
                InputCL['Steps']=1
242
                InputCL['Blocks']=Blocks
243
                InputCL['Threads']=Threads
244
                InputCL['RNG']=RNG
245
                InputCL['ValueType']=ValueType
246
                InputCL['GpuStyle']=GpuStyle
247
                InputCL['IfThen']=IfThen
248

    
249
                for Device in Devices[1:]:
250
                    print("Send to device %i on rank %i" % (Device,r))
251
                    InputCL['Device']=Device
252
                    comm.send('CONTINUE',dest=r,tag=11)
253
                    comm.send(InputCL,dest=r,tag=11)
254
                    r+=1
255

    
256
                # Compute on rank 0
257
                print("Compute on rank 0")
258
                InputCL['Device']=Devices[0]
259

    
260
                if GpuStyle=='CUDA':
261
                    try:
262
                        OutputCL=MetropolisCuda(InputCL)
263
                    except:
264
                        print("Problem with (%i,%i) // computations on Cuda" % (Blocks,Threads))
265
                elif GpuStyle=='OpenCL':
266
                    try:
267
                        OutputCL=MetropolisOpenCL(InputCL)
268
                    except:
269
                        print("Problem with (%i,%i) // computations on OpenCL" %  (Blocks,Threads))
270

    
271
                Inside=OutputCL['Inside']
272
                NewIterations=OutputCL['NewIterations']
273

    
274
                for slave in range(1,len(Devices)):
275
                    print("Get OutputCL from %i" % slave)
276
                    OutputCL=comm.recv(source=slave,tag=11)
277
                    print(OutputCL)
278
                    NewIterations+=OutputCL['NewIterations']
279
                    Inside+=OutputCL['Inside']
280

    
281
                print("Pi estimation %.8f" % (4./NewIterations*Inside))
282
                
283
                Duration=numpy.append(Duration,time.time()-time_start)
284
                Rate=numpy.append(Rate,NewIterations/Duration[-1])
285
                                            
286
            avgD=numpy.append(avgD,numpy.average(Duration))
287
            medD=numpy.append(medD,numpy.median(Duration))
288
            stdD=numpy.append(stdD,numpy.std(Duration))
289
            minD=numpy.append(minD,numpy.min(Duration))
290
            maxD=numpy.append(maxD,numpy.max(Duration))
291
            avgR=numpy.append(avgR,numpy.average(Rate))
292
            medR=numpy.append(medR,numpy.median(Rate))
293
            stdR=numpy.append(stdR,numpy.std(Rate))
294
            minR=numpy.append(minR,numpy.min(Rate))
295
            maxR=numpy.append(maxR,numpy.max(Rate))
296

    
297
            print("%.2f %.2f %.2f %.2f %.2f %i %i %i %i %i" % (avgD[-1],medD[-1],stdD[-1],minD[-1],maxD[-1],avgR[-1],medR[-1],stdR[-1],minR[-1],maxR[-1]))
298
        
299
            numpy.savez("PiMPI_%s_%s_%s_%s_%s_%s_%s_%s_%.8i_Device%i_%s_%s" % (ValueType,RNG,Alu[Devices[0]],GpuStyle,BlocksBegin,BlocksEnd,ThreadsBegin,ThreadsEnd,Iterations,Devices[0],Metrology,gethostname()),(ExploredBlocks,ExploredThreads,avgD,medD,stdD,minD,maxD,avgR,medR,stdR,minR,maxR))
300
            ToSave=[ ExploredBlocks,ExploredThreads,avgD,medD,stdD,minD,maxD,avgR,medR,stdR,minR,maxR ]
301
            numpy.savetxt("PiMPI_%s_%s_%s_%s_%s_%s_%s_%i_%.8i_Device%i_%s_%s" % (ValueType,RNG,Alu[Devices[0]],GpuStyle,BlocksBegin,BlocksEnd,ThreadsBegin,ThreadsEnd,Iterations,Devices[0],Metrology,gethostname()),numpy.transpose(ToSave),fmt='%i %i %e %e %e %e %e %i %i %i %i %i')
302

    
303
            if Fit:
304
                FitAndPrint(ExploredJobs,median,Curves)
305
        # Send MPI exit tag
306
        for slave in range(1,RankSize):
307
            comm.send('BREAK',dest=slave,tag=11)
308
            
309
    else:
310
        while True:
311
            Signal=comm.recv(source=0,tag=11)
312
            if Signal=='CONTINUE':
313
                # Receive information from Master
314
                InputCL=comm.recv(source=0,tag=11)
315
                print("Parameters retreive for rank %s of %s on %s from master:" % (rank,RankSize,gethostname()))
316
                print("Input CL:" % InputCL)
317
                # Execute on slave
318

    
319
                if InputCL['GpuStyle']=='CUDA':
320
                    try:
321
                        OutputCL=MetropolisCuda(InputCL)
322
                    except:
323
                        print("Problem with (%i,%i) // computations on Cuda" % (InputCL['Blocks'],InputCL['Threads']))
324
                elif InputCL['GpuStyle']=='OpenCL':
325
                    try:
326
                        OutputCL=MetropolisOpenCL(InputCL)
327
                    except:
328
                        print("Problem with (%i,%i) // computations on OpenCL" %  (InputCL['Blocks'],InputCL['Threads']))
329

    
330
                print("Output CL:" % OutputCL)
331
                # Send information to Master
332
                comm.send(OutputCL,dest=0,tag=11)
333
                print("Data sent to master")
334
            else:
335
                print('Exit signal from Master')
336
                break