Statistiques
| Révision :

root / Pi / XPU / PiXpuMPI.py @ 189

Historique | Voir | Annoter | Télécharger (13,47 ko)

1
#!/usr/bin/env python3
2

    
3
#
4
# Pi-by-MonteCarlo using PyCUDA/PyOpenCL
5
#
6
# CC BY-NC-SA 2011 : Emmanuel QUEMENER <emmanuel.quemener@gmail.com> 
7
# Cecill v2 : Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
8
#
9
# Thanks to Andreas Klockner for PyCUDA:
10
# http://mathema.tician.de/software/pycuda
11
# Thanks to Andreas Klockner for PyOpenCL:
12
# http://mathema.tician.de/software/pyopencl
13
# 
14

    
15
# 2013-01-01 : problems with launch timeout
16
# http://stackoverflow.com/questions/497685/how-do-you-get-around-the-maximum-cuda-run-time
17
# Option "Interactive" "0" in /etc/X11/xorg.conf
18

    
19
# Common tools
20
import numpy
21
from numpy.random import randint as nprnd
22
import sys
23
import getopt
24
import time
25
import math
26
import itertools
27
from socket import gethostname
28

    
29
import mpi4py
30
from mpi4py import MPI
31

    
32
from PiXPU import *
33

    
34
if __name__=='__main__':
35

    
36
    # MPI Init
37
    comm = MPI.COMM_WORLD
38
    rank = comm.Get_rank()
39
        
40
    # Define number of Nodes on with computing is performed (exclude 0)
41
    RankSize=comm.Get_size()
42

    
43
    if rank == 0:
44
    
45
        # Set defaults values
46
  
47
        # Id of Device : 1 is for first find !
48
        Device=1
49
        # GPU style can be Cuda (Nvidia implementation) or OpenCL
50
        GpuStyle='OpenCL'
51
        # Iterations is integer
52
        Iterations=10000000
53
        # BlocksBlocks in first number of Blocks to explore
54
        BlocksBegin=1
55
        # BlocksEnd is last number of Blocks to explore
56
        BlocksEnd=16
57
        # BlocksStep is the step of Blocks to explore
58
        BlocksStep=1
59
        # ThreadsBlocks in first number of Blocks to explore
60
        ThreadsBegin=1
61
        # ThreadsEnd is last number of Blocks to explore
62
        ThreadsEnd=1
63
        # ThreadsStep is the step of Blocks to explore
64
        ThreadsStep=1
65
        # Redo is the times to redo the test to improve metrology
66
        Redo=1
67
        # OutMetrology is method for duration estimation : False is GPU inside
68
        OutMetrology=False
69
        Metrology='InMetro'
70
        # Curves is True to print the curves
71
        Curves=False
72
        # Fit is True to print the curves
73
        Fit=False
74
        # Marsaglia RNG
75
        RNG='MWC'
76
        # Value type : INT32, INT64, FP32, FP64
77
        ValueType='FP32'
78

    
79
        HowToUse='%s -c (Print Curves) -d <DeviceId> -g <CUDA/OpenCL> -i <Iterations> -b <BlocksBegin> -e <BlocksEnd> -s <BlocksStep> -f <ThreadsFirst> -l <ThreadsLast> -t <ThreadssTep> -r <RedoToImproveStats> -m <SHR3/CONG/MWC/KISS> -v <INT32/INT64/FP32/FP64>'
80
    
81
        try:
82
            opts, args = getopt.getopt(sys.argv[1:],"hcg:i:b:e:s:f:l:t:r:d:m:v:",["gpustyle=","iterations=","blocksBegin=","blocksEnd=","blocksStep=","threadsFirst=","threadsLast=","threadssTep=","redo=","device=","marsaglia=","valuetype="])
83
        except getopt.GetoptError:
84
            print(HowToUse % sys.argv[0])
85
            sys.exit(2)
86

    
87
        # List of Devices
88
        Devices=[]
89
        Alu={}
90
        
91
        for opt, arg in opts:
92
            if opt == '-h':
93
                print(HowToUse % sys.argv[0])
94

    
95
                print("\nInformations about devices detected under OpenCL:")
96
                # For PyOpenCL import
97
                try:
98
                    import pyopencl as cl
99
                    Id=0
100
                    for platform in cl.get_platforms():
101
                        for device in platform.get_devices():
102
                            #deviceType=cl.device_type.to_string(device.type)
103
                            deviceType="xPU"
104
                            print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip(),deviceType,device.name.lstrip()))
105
                            Id=Id+1
106

    
107
                    print
108
                except:
109
                    print("Your platform does not seem to support OpenCL")
110

    
111
                print("\nInformations about devices detected under CUDA API:")
112
                # For PyCUDA import
113
                try:
114
                    import pycuda.driver as cuda
115
                    cuda.init()
116
                    for Id in range(cuda.Device.count()):
117
                        device=cuda.Device(Id)
118
                        print("Device #%i of type GPU : %s" % (Id,device.name()))
119
                    print
120
                except:
121
                    print("Your platform does not seem to support CUDA")
122
        
123
                sys.exit()
124
                    
125
            elif opt == '-c':
126
                Curves=True
127
            elif opt in ("-d", "--device"):
128
                Devices.append(int(arg))
129
            elif opt in ("-g", "--gpustyle"):
130
                GpuStyle = arg
131
            elif opt in ("-m", "--marsaglia"):
132
                RNG = arg
133
            elif opt in ("-v", "--valuetype"):
134
                ValueType = arg
135
            elif opt in ("-i", "--iterations"):
136
                Iterations = numpy.uint64(arg)
137
            elif opt in ("-b", "--blocksbegin"):
138
                BlocksBegin = int(arg)
139
            elif opt in ("-e", "--blocksend"):
140
                BlocksEnd = int(arg)
141
            elif opt in ("-s", "--blocksstep"):
142
                BlocksStep = int(arg)
143
            elif opt in ("-f", "--threadsfirst"):
144
                ThreadsBegin = int(arg)
145
            elif opt in ("-l", "--threadslast"):
146
                ThreadsEnd = int(arg)
147
            elif opt in ("-t", "--threadsstep"):
148
                ThreadsStep = int(arg)
149
            elif opt in ("-r", "--redo"):
150
                Redo = int(arg)
151

    
152
        print("Devices Identification : %s" % Devices)
153
        print("GpuStyle used : %s" % GpuStyle)
154
        print("Iterations : %s" % Iterations)
155
        print("Number of Blocks on begin : %s" % BlocksBegin)
156
        print("Number of Blocks on end : %s" % BlocksEnd)
157
        print("Step on Blocks : %s" % BlocksStep)
158
        print("Number of Threads on begin : %s" % ThreadsBegin)
159
        print("Number of Threads on end : %s" % ThreadsEnd)
160
        print("Step on Threads : %s" % ThreadsStep)
161
        print("Number of redo : %s" % Redo)
162
        print("Metrology done out of XPU : %r" % OutMetrology)
163
        print("Type of Marsaglia RNG used : %s" % RNG)
164
        print("Type of variable : %s" % ValueType)
165

    
166
        if GpuStyle=='CUDA':
167
            try:
168
                # For PyCUDA import
169
                import pycuda.driver as cuda
170

    
171
                cuda.init()
172
                for Id in range(cuda.Device.count()):
173
                    device=cuda.Device(Id)
174
                    print("Device #%i of type GPU : %s" % (Id,device.name()))
175
                    if Id in Devices:
176
                        Alu[Id]='GPU'
177
            except ImportError:
178
                print("Platform does not seem to support CUDA")
179

    
180
        if GpuStyle=='OpenCL':
181
            try:
182
                # For PyOpenCL import
183
                import pyopencl as cl
184
                Id=0
185
                for platform in cl.get_platforms():
186
                    for device in platform.get_devices():
187
                        #deviceType=cl.device_type.to_string(device.type)
188
                        deviceType="*PU"
189
                        print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip().rstrip(),deviceType,device.name.lstrip().rstrip()))
190

    
191
                        if Id in Devices:
192
                            # Set the Alu as detected Device Type
193
                            Alu[Id]=deviceType
194
                        Id=Id+1
195
            except ImportError:
196
                print("Platform does not seem to support OpenCL")
197

    
198
        print(Devices,Alu)
199
            
200
        BlocksList=range(BlocksBegin,BlocksEnd+BlocksStep,BlocksStep)
201
        ThreadsList=range(ThreadsBegin,ThreadsEnd+ThreadsStep,ThreadsStep)
202
    
203
        ExploredJobs=numpy.array([]).astype(numpy.uint32)
204
        ExploredBlocks=numpy.array([]).astype(numpy.uint32)
205
        ExploredThreads=numpy.array([]).astype(numpy.uint32)
206
        avgD=numpy.array([]).astype(numpy.float32)
207
        medD=numpy.array([]).astype(numpy.float32)
208
        stdD=numpy.array([]).astype(numpy.float32)
209
        minD=numpy.array([]).astype(numpy.float32)
210
        maxD=numpy.array([]).astype(numpy.float32)
211
        avgR=numpy.array([]).astype(numpy.float32)
212
        medR=numpy.array([]).astype(numpy.float32)
213
        stdR=numpy.array([]).astype(numpy.float32)
214
        minR=numpy.array([]).astype(numpy.float32)
215
        maxR=numpy.array([]).astype(numpy.float32)
216

    
217
        IterationsMPI=numpy.uint64(Iterations/len(Devices))
218
        if Iterations%len(Devices)!=0:
219
            IterationsMPI+=1
220

    
221
        for Blocks,Threads in itertools.product(BlocksList,ThreadsList):
222
        
223
            ExploredJobs=numpy.append(ExploredJobs,Blocks*Threads)
224
            ExploredBlocks=numpy.append(ExploredBlocks,Blocks)
225
            ExploredThreads=numpy.append(ExploredThreads,Threads)
226

    
227
            DurationItem=numpy.array([]).astype(numpy.float32)
228
            Duration=numpy.array([]).astype(numpy.float32)
229
            Rate=numpy.array([]).astype(numpy.float32)
230
            for i in range(Redo):
231
                time_start=time.time()
232

    
233
                r=1
234
                # Distribution of Devices over nodes
235
                InputCL={}
236
                InputCL['Iterations']=IterationsMPI
237
                InputCL['Steps']=1
238
                InputCL['Blocks']=Blocks
239
                InputCL['Threads']=Threads
240
                InputCL['RNG']=RNG
241
                InputCL['ValueType']=ValueType
242
                InputCL['GpuStyle']=GpuStyle
243

    
244
                for Device in Devices[1:]:
245
                    print("Send to device %i on rank %i" % (Device,r))
246
                    InputCL['Device']=Device
247
                    comm.send('CONTINUE',dest=r,tag=11)
248
                    comm.send(InputCL,dest=r,tag=11)
249
                    r+=1
250

    
251
                # Compute on rank 0
252
                print("Compute on rank 0")
253
                InputCL['Device']=Devices[0]
254

    
255
                if GpuStyle=='CUDA':
256
                    try:
257
                        OutputCL=MetropolisCuda(InputCL)
258
                    except:
259
                        print("Problem with (%i,%i) // computations on Cuda" % (Blocks,Threads))
260
                elif GpuStyle=='OpenCL':
261
                    try:
262
                        OutputCL=MetropolisOpenCL(InputCL)
263
                    except:
264
                        print("Problem with (%i,%i) // computations on OpenCL" %  (Blocks,Threads))
265

    
266
                Inside=OutputCL['Inside']
267
                NewIterations=OutputCL['NewIterations']
268

    
269
                for slave in range(1,len(Devices)):
270
                    print("Get OutputCL from %i" % slave)
271
                    OutputCL=comm.recv(source=slave,tag=11)
272
                    print(OutputCL)
273
                    NewIterations+=OutputCL['NewIterations']
274
                    Inside+=OutputCL['Inside']
275

    
276
                print("Pi estimation %.8f" % (4./NewIterations*Inside))
277
                
278
                Duration=numpy.append(Duration,time.time()-time_start)
279
                Rate=numpy.append(Rate,NewIterations/Duration[-1])
280
                                            
281
            avgD=numpy.append(avgD,numpy.average(Duration))
282
            medD=numpy.append(medD,numpy.median(Duration))
283
            stdD=numpy.append(stdD,numpy.std(Duration))
284
            minD=numpy.append(minD,numpy.min(Duration))
285
            maxD=numpy.append(maxD,numpy.max(Duration))
286
            avgR=numpy.append(avgR,numpy.average(Rate))
287
            medR=numpy.append(medR,numpy.median(Rate))
288
            stdR=numpy.append(stdR,numpy.std(Rate))
289
            minR=numpy.append(minR,numpy.min(Rate))
290
            maxR=numpy.append(maxR,numpy.max(Rate))
291

    
292
            print("%.2f %.2f %.2f %.2f %.2f %i %i %i %i %i" % (avgD[-1],medD[-1],stdD[-1],minD[-1],maxD[-1],avgR[-1],medR[-1],stdR[-1],minR[-1],maxR[-1]))
293
        
294
            numpy.savez("PiMPI_%s_%s_%s_%s_%s_%s_%s_%s_%.8i_Device%i_%s_%s" % (ValueType,RNG,Alu[Devices[0]],GpuStyle,BlocksBegin,BlocksEnd,ThreadsBegin,ThreadsEnd,Iterations,Devices[0],Metrology,gethostname()),(ExploredBlocks,ExploredThreads,avgD,medD,stdD,minD,maxD,avgR,medR,stdR,minR,maxR))
295
            ToSave=[ ExploredBlocks,ExploredThreads,avgD,medD,stdD,minD,maxD,avgR,medR,stdR,minR,maxR ]
296
            numpy.savetxt("PiMPI_%s_%s_%s_%s_%s_%s_%s_%i_%.8i_Device%i_%s_%s" % (ValueType,RNG,Alu[Devices[0]],GpuStyle,BlocksBegin,BlocksEnd,ThreadsBegin,ThreadsEnd,Iterations,Devices[0],Metrology,gethostname()),numpy.transpose(ToSave),fmt='%i %i %e %e %e %e %e %i %i %i %i %i')
297

    
298
            if Fit:
299
                FitAndPrint(ExploredJobs,median,Curves)
300
        # Send MPI exit tag
301
        for slave in range(1,RankSize):
302
            comm.send('BREAK',dest=slave,tag=11)
303
            
304
    else:
305
        while True:
306
            Signal=comm.recv(source=0,tag=11)
307
            if Signal=='CONTINUE':
308
                # Receive information from Master
309
                InputCL=comm.recv(source=0,tag=11)
310
                print("Parameters retreive for rank %s of %s on %s from master:" % (rank,RankSize,gethostname()))
311
                print("Input CL:" % InputCL)
312
                # Execute on slave
313

    
314
                if InputCL['GpuStyle']=='CUDA':
315
                    try:
316
                        OutputCL=MetropolisCuda(InputCL)
317
                    except:
318
                        print("Problem with (%i,%i) // computations on Cuda" % (InputCL['Blocks'],InputCL['Threads']))
319
                elif InputCL['GpuStyle']=='OpenCL':
320
                    try:
321
                        OutputCL=MetropolisOpenCL(InputCL)
322
                    except:
323
                        print("Problem with (%i,%i) // computations on OpenCL" %  (InputCL['Blocks'],InputCL['Threads']))
324

    
325
                print("Output CL:" % OutputCL)
326
                # Send information to Master
327
                comm.send(OutputCL,dest=0,tag=11)
328
                print("Data sent to master")
329
            else:
330
                print('Exit signal from Master')
331
                break