Statistiques
| Révision :

root / Pi / XPU / PiHybrid.py @ 309

Historique | Voir | Annoter | Télécharger (14,13 ko)

1
#!/usr/bin/env python
2

    
3
#
4
# Pi-by-MonteCarlo using PyCUDA/PyOpenCL
5
#
6
# CC BY-NC-SA 2011 : Emmanuel QUEMENER <emmanuel.quemener@gmail.com> 
7
# Cecill v2 : Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
8
#
9
# Thanks to Andreas Klockner for PyCUDA:
10
# http://mathema.tician.de/software/pycuda
11
# Thanks to Andreas Klockner for PyOpenCL:
12
# http://mathema.tician.de/software/pyopencl
13
# 
14

    
15
# 2013-01-01 : problems with launch timeout
16
# http://stackoverflow.com/questions/497685/how-do-you-get-around-the-maximum-cuda-run-time
17
# Option "Interactive" "0" in /etc/X11/xorg.conf
18

    
19
# Common tools
20
import numpy
21
from numpy.random import randint as nprnd
22
import sys
23
import getopt
24
import time
25
import math
26
import itertools
27
from socket import gethostname
28

    
29
from threading import Thread
30

    
31
class threadWithReturn(Thread):
32
    def __init__(self, *args, **kwargs):
33
        super(threadWithReturn, self).__init__(*args, **kwargs)
34

    
35
        self._return = None
36

    
37
    def run(self):
38
        if self._Thread__target is not None:
39
            self._return = self._Thread__target(*self._Thread__args, **self._Thread__kwargs)
40

    
41
    def join(self, *args, **kwargs):
42
        super(threadWithReturn, self).join(*args, **kwargs)
43

    
44
        return self._return
45

    
46
from PiXPU import *
47

    
48
if __name__=='__main__':
49

    
50
    # MPI Init
51
    comm = MPI.COMM_WORLD
52
    rank = comm.Get_rank()
53
        
54
    # Define number of Nodes on with computing is performed (exclude 0)
55
    RankSize=comm.Get_size()
56

    
57
    if rank == 0:
58
    
59
        # Set defaults values
60
  
61
        # Id of Device : 1 is for first find !
62
        Device=1
63
        # GPU style can be Cuda (Nvidia implementation) or OpenCL
64
        GpuStyle='OpenCL'
65
        # Iterations is integer
66
        Iterations=10000000
67
        # BlocksBlocks in first number of Blocks to explore
68
        BlocksBegin=1
69
        # BlocksEnd is last number of Blocks to explore
70
        BlocksEnd=16
71
        # BlocksStep is the step of Blocks to explore
72
        BlocksStep=1
73
        # ThreadsBlocks in first number of Blocks to explore
74
        ThreadsBegin=1
75
        # ThreadsEnd is last number of Blocks to explore
76
        ThreadsEnd=1
77
        # ThreadsStep is the step of Blocks to explore
78
        ThreadsStep=1
79
        # Redo is the times to redo the test to improve metrology
80
        Redo=1
81
        # OutMetrology is method for duration estimation : False is GPU inside
82
        OutMetrology=False
83
        Metrology='InMetro'
84
        # Curves is True to print the curves
85
        Curves=False
86
        # Fit is True to print the curves
87
        Fit=False
88
        # Marsaglia RNG
89
        RNG='MWC'
90
        # Value type : INT32, INT64, FP32, FP64
91
        ValueType='FP32'
92

    
93
        HowToUse='%s -o (Out of Core Metrology) -c (Print Curves) -d <DeviceId> -g <CUDA/OpenCL> -i <Iterations> -b <BlocksBegin> -e <BlocksEnd> -s <BlocksStep> -f <ThreadsFirst> -l <ThreadsLast> -t <ThreadssTep> -r <RedoToImproveStats> -m <SHR3/CONG/MWC/KISS> -v <INT32/INT64/FP32/FP64>'
94
    
95
        try:
96
            opts, args = getopt.getopt(sys.argv[1:],"hocg:i:b:e:s:f:l:t:r:d:m:v:",["gpustyle=","iterations=","blocksBegin=","blocksEnd=","blocksStep=","threadsFirst=","threadsLast=","threadssTep=","redo=","device=","marsaglia=","valuetype="])
97
        except getopt.GetoptError:
98
            print HowToUse % sys.argv[0]
99
            sys.exit(2)
100

    
101
        # List of Devices
102
        Devices=[]
103
        Alu={}
104
        
105
        for opt, arg in opts:
106
            if opt == '-h':
107
                print HowToUse % sys.argv[0]
108

    
109
                print "\nInformations about devices detected under OpenCL:"
110
                # For PyOpenCL import
111
                try:
112
                    import pyopencl as cl
113
                    Id=1
114
                    for platform in cl.get_platforms():
115
                        for device in platform.get_devices():
116
                            deviceType=cl.device_type.to_string(device.type)
117
                            print "Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip(),deviceType,device.name.lstrip())
118
                            Id=Id+1
119

    
120
                    print
121
                    sys.exit()
122
                except ImportError:
123
                    print "Your platform does not seem to support OpenCL"
124
        
125
            elif opt == '-o':
126
                OutMetrology=True
127
                Metrology='OutMetro'
128
            elif opt == '-c':
129
                Curves=True
130
            elif opt in ("-d", "--device"):
131
                Devices.append(int(arg))
132
            elif opt in ("-g", "--gpustyle"):
133
                GpuStyle = arg
134
            elif opt in ("-m", "--marsaglia"):
135
                RNG = arg
136
            elif opt in ("-v", "--valuetype"):
137
                ValueType = arg
138
            elif opt in ("-i", "--iterations"):
139
                Iterations = numpy.uint64(arg)
140
            elif opt in ("-b", "--blocksbegin"):
141
                BlocksBegin = int(arg)
142
            elif opt in ("-e", "--blocksend"):
143
                BlocksEnd = int(arg)
144
            elif opt in ("-s", "--blocksstep"):
145
                BlocksStep = int(arg)
146
            elif opt in ("-f", "--threadsfirst"):
147
                ThreadsBegin = int(arg)
148
            elif opt in ("-l", "--threadslast"):
149
                ThreadsEnd = int(arg)
150
            elif opt in ("-t", "--threadsstep"):
151
                ThreadsStep = int(arg)
152
            elif opt in ("-r", "--redo"):
153
                Redo = int(arg)
154

    
155
        print "Devices Identification : %s" % Devices
156
        print "GpuStyle used : %s" % GpuStyle
157
        print "Iterations : %s" % Iterations
158
        print "Number of Blocks on begin : %s" % BlocksBegin
159
        print "Number of Blocks on end : %s" % BlocksEnd
160
        print "Step on Blocks : %s" % BlocksStep
161
        print "Number of Threads on begin : %s" % ThreadsBegin
162
        print "Number of Threads on end : %s" % ThreadsEnd
163
        print "Step on Threads : %s" % ThreadsStep
164
        print "Number of redo : %s" % Redo
165
        print "Metrology done out of XPU : %r" % OutMetrology
166
        print "Type of Marsaglia RNG used : %s" % RNG
167
        print "Type of variable : %s" % ValueType
168

    
169
        if GpuStyle=='CUDA':
170
            try:
171
                # For PyCUDA import
172
                import pycuda.driver as cuda
173
                import pycuda.gpuarray as gpuarray
174
                import pycuda.autoinit
175
                from pycuda.compiler import SourceModule
176
            except ImportError:
177
                print "Platform does not seem to support CUDA"
178

    
179
        if GpuStyle=='OpenCL':
180
            try:
181
                # For PyOpenCL import
182
                import pyopencl as cl
183
                Id=1
184
                for platform in cl.get_platforms():
185
                    for device in platform.get_devices():
186
                        deviceType=cl.device_type.to_string(device.type)
187
                        print "Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip().rstrip(),deviceType,device.name.lstrip().rstrip())
188

    
189
                        if Id in Devices:
190
                            # Set the Alu as detected Device Type
191
                            Alu[Id]=deviceType
192
                        Id=Id+1
193
            except ImportError:
194
                print "Platform does not seem to support OpenCL"
195

    
196
        print Devices,Alu
197
            
198
        BlocksList=xrange(BlocksBegin,BlocksEnd+BlocksStep,BlocksStep)
199
        ThreadsList=xrange(ThreadsBegin,ThreadsEnd+ThreadsStep,ThreadsStep)
200
    
201
        ExploredJobs=numpy.array([]).astype(numpy.uint32)
202
        ExploredBlocks=numpy.array([]).astype(numpy.uint32)
203
        ExploredThreads=numpy.array([]).astype(numpy.uint32)
204
        avgD=numpy.array([]).astype(numpy.float32)
205
        medD=numpy.array([]).astype(numpy.float32)
206
        stdD=numpy.array([]).astype(numpy.float32)
207
        minD=numpy.array([]).astype(numpy.float32)
208
        maxD=numpy.array([]).astype(numpy.float32)
209
        avgR=numpy.array([]).astype(numpy.float32)
210
        medR=numpy.array([]).astype(numpy.float32)
211
        stdR=numpy.array([]).astype(numpy.float32)
212
        minR=numpy.array([]).astype(numpy.float32)
213
        maxR=numpy.array([]).astype(numpy.float32)
214

    
215
        for Blocks,Threads in itertools.product(BlocksList,ThreadsList):
216
        
217
            # print Blocks,Threads
218
            circle=numpy.zeros(Blocks*Threads).astype(numpy.uint64)
219
            ExploredJobs=numpy.append(ExploredJobs,Blocks*Threads)
220
            ExploredBlocks=numpy.append(ExploredBlocks,Blocks)
221
            ExploredThreads=numpy.append(ExploredThreads,Threads)
222
        
223
            if OutMetrology: 
224
                DurationItem=numpy.array([]).astype(numpy.float32)
225
                Duration=numpy.array([]).astype(numpy.float32)
226
                Rate=numpy.array([]).astype(numpy.float32)
227
                for i in range(Redo):
228
                    start=time.time()
229
                    if GpuStyle=='CUDA':
230
                        try:
231
                            Inside,NewIterations,DurationItem=MetropolisCuda(circle,Iterations,1,Blocks,Threads,RNG,ValueType)
232
                        except:
233
                            print "Problem with (%i,%i) // computations on Cuda" % (Blocks,Threads)
234
                    elif GpuStyle=='OpenCL':
235
                        try:
236
                            MetroParamCL={}
237
                            MetroParamCL['Iterations']=Iterations
238
                            MetroParamCL['Steps']=1
239
                            MetroParamCL['Blocks']=Blocks
240
                            MetroParamCL['Threads']=Threads
241
                            MetroParamCL['Device']=Devices[0]
242
                            MetroParamCL['RNG']=RNG
243
                            MetroParamCL['ValueType']=ValueType
244
                            
245
                            OutputCL=MetropolisOpenCL(MetroParamCL)
246
                        except:
247
                            print "Problem with (%i,%i) // computations on OpenCL" % (Blocks,Threads)            
248
                    Duration=numpy.append(Duration,time.time()-start)
249
                    Rate=numpy.append(Rate,NewIterations/Duration[-1])
250
            else:
251
                if GpuStyle=='CUDA':
252
                    try:
253
                        Inside,NewIterations,Duration=MetropolisCuda(circle,Iterations,Redo,Blocks,Threads,RNG,ValueType)
254
                    except:
255
                        print "Problem with (%i,%i) // computations on Cuda" % (Blocks,Threads)
256
                elif GpuStyle=='OpenCL':
257
                    try:
258
                        IterationsMPI=numpy.uint64(Iterations/len(Devices))
259
                        if Iterations%len(Devices)!=0:
260
                            IterationsMPI+=1
261

    
262
                        r=1
263
                        time_start=time.time()
264
                        for Device in Devices:
265
                            InputCL={}
266
                            InputCL['Iterations']=IterationsMPI
267
                            InputCL['Steps']=Redo
268
                            InputCL['Blocks']=Blocks
269
                            InputCL['Threads']=Threads
270
                            InputCL['Device']=Device
271
                            InputCL['RNG']=RNG
272
                            InputCL['ValueType']=ValueType
273
                            print "Send to device %i on rank %i" % (Device,r)
274
                            comm.send('CONTINUE',dest=r,tag=11)
275
                            comm.send(InputCL,dest=r,tag=11)
276
                            r+=1
277

    
278
                        Inside=0
279
                        NewIterations=0
280
                        for slave in xrange(1,len(Devices)+1):
281
                            print "slave %i" % slave
282
                            OutputCL=comm.recv(source=slave,tag=11)
283
                            print "OutputCL from rank %s %s" % (slave,OutputCL)
284
                            NewIterations+=OutputCL['NewIterations']
285
                            Inside+=OutputCL['Inside']
286

    
287
                        Duration=time.time()-time_start
288
                        print "Pi estimation %.8f" % (4./NewIterations*Inside)
289
                    except:
290
                        print "Problem with (%i,%i) // computations on OpenCL" % (Blocks,Threads)            
291
            Duration=OutputCL['Duration']
292
            NewIterations=OutputCL['NewIterations']
293
            Rate=NewIterations/Duration
294
            
295
            avgD=numpy.append(avgD,numpy.average(Duration))
296
            medD=numpy.append(medD,numpy.median(Duration))
297
            stdD=numpy.append(stdD,numpy.std(Duration))
298
            minD=numpy.append(minD,numpy.min(Duration))
299
            maxD=numpy.append(maxD,numpy.max(Duration))
300
            avgR=numpy.append(avgR,numpy.average(Rate))
301
            medR=numpy.append(medR,numpy.median(Rate))
302
            stdR=numpy.append(stdR,numpy.std(Rate))
303
            minR=numpy.append(minR,numpy.min(Rate))
304
            maxR=numpy.append(maxR,numpy.max(Rate))
305

    
306
            print "%.2f %.2f %.2f %.2f %.2f %i %i %i %i %i" % (avgD[-1],medD[-1],stdD[-1],minD[-1],maxD[-1],avgR[-1],medR[-1],stdR[-1],minR[-1],maxR[-1])
307
        
308
            numpy.savez("Pi_%s_%s_%s_%s_%s_%s_%s_%s_%.8i_Device%i_%s_%s" % (ValueType,RNG,Alu[Devices[0]],GpuStyle,BlocksBegin,BlocksEnd,ThreadsBegin,ThreadsEnd,Iterations,Devices[0],Metrology,gethostname()),(ExploredBlocks,ExploredThreads,avgD,medD,stdD,minD,maxD,avgR,medR,stdR,minR,maxR))
309
            ToSave=[ ExploredBlocks,ExploredThreads,avgD,medD,stdD,minD,maxD,avgR,medR,stdR,minR,maxR ]
310
            numpy.savetxt("Pi_%s_%s_%s_%s_%s_%s_%s_%i_%.8i_Device%i_%s_%s" % (ValueType,RNG,Alu[Devices[0]],GpuStyle,BlocksBegin,BlocksEnd,ThreadsBegin,ThreadsEnd,Iterations,Devices[0],Metrology,gethostname()),numpy.transpose(ToSave),fmt='%i %i %e %e %e %e %e %i %i %i %i %i')
311

    
312
            if Fit:
313
                FitAndPrint(ExploredJobs,median,Curves)
314
        # Send MPI exit tag
315
        for slave in xrange(1,RankSize):
316
            comm.send('BREAK',dest=slave,tag=11)
317
            
318
    else:
319
        while True:
320
            Signal=comm.recv(source=0,tag=11)
321
            if Signal=='CONTINUE':
322
                # Receive information from Master
323
                InputCL=comm.recv(source=0,tag=11)
324
                print "Parameters retreive from master %s " % InputCL
325
                # Execute on slave
326
                OutputCL=MetropolisOpenCL(InputCL)
327
                print OutputCL
328
                # Send information to Master
329
                comm.send(OutputCL,dest=0,tag=11)
330
                print "Data sent to master"
331
            else:
332
                print 'Exit signal from Master'
333
                break