Statistiques
| Révision :

root / Pi / XPU / PiHybrid.py

Historique | Voir | Annoter | Télécharger (14,13 ko)

1 106 equemene
#!/usr/bin/env python
2 106 equemene
3 106 equemene
#
4 106 equemene
# Pi-by-MonteCarlo using PyCUDA/PyOpenCL
5 106 equemene
#
6 106 equemene
# CC BY-NC-SA 2011 : Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
7 106 equemene
# Cecill v2 : Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
8 106 equemene
#
9 106 equemene
# Thanks to Andreas Klockner for PyCUDA:
10 106 equemene
# http://mathema.tician.de/software/pycuda
11 106 equemene
# Thanks to Andreas Klockner for PyOpenCL:
12 106 equemene
# http://mathema.tician.de/software/pyopencl
13 106 equemene
#
14 106 equemene
15 106 equemene
# 2013-01-01 : problems with launch timeout
16 106 equemene
# http://stackoverflow.com/questions/497685/how-do-you-get-around-the-maximum-cuda-run-time
17 106 equemene
# Option "Interactive" "0" in /etc/X11/xorg.conf
18 106 equemene
19 106 equemene
# Common tools
20 106 equemene
import numpy
21 106 equemene
from numpy.random import randint as nprnd
22 106 equemene
import sys
23 106 equemene
import getopt
24 106 equemene
import time
25 106 equemene
import math
26 106 equemene
import itertools
27 106 equemene
from socket import gethostname
28 106 equemene
29 107 equemene
from threading import Thread
30 106 equemene
31 107 equemene
class threadWithReturn(Thread):
32 107 equemene
    def __init__(self, *args, **kwargs):
33 107 equemene
        super(threadWithReturn, self).__init__(*args, **kwargs)
34 107 equemene
35 107 equemene
        self._return = None
36 107 equemene
37 107 equemene
    def run(self):
38 107 equemene
        if self._Thread__target is not None:
39 107 equemene
            self._return = self._Thread__target(*self._Thread__args, **self._Thread__kwargs)
40 107 equemene
41 107 equemene
    def join(self, *args, **kwargs):
42 107 equemene
        super(threadWithReturn, self).join(*args, **kwargs)
43 107 equemene
44 107 equemene
        return self._return
45 107 equemene
46 106 equemene
from PiXPU import *
47 106 equemene
48 106 equemene
if __name__=='__main__':
49 106 equemene
50 106 equemene
    # MPI Init
51 106 equemene
    comm = MPI.COMM_WORLD
52 106 equemene
    rank = comm.Get_rank()
53 106 equemene
54 106 equemene
    # Define number of Nodes on with computing is performed (exclude 0)
55 106 equemene
    RankSize=comm.Get_size()
56 106 equemene
57 106 equemene
    if rank == 0:
58 106 equemene
59 106 equemene
        # Set defaults values
60 106 equemene
61 106 equemene
        # Id of Device : 1 is for first find !
62 106 equemene
        Device=1
63 106 equemene
        # GPU style can be Cuda (Nvidia implementation) or OpenCL
64 106 equemene
        GpuStyle='OpenCL'
65 106 equemene
        # Iterations is integer
66 106 equemene
        Iterations=10000000
67 106 equemene
        # BlocksBlocks in first number of Blocks to explore
68 106 equemene
        BlocksBegin=1
69 106 equemene
        # BlocksEnd is last number of Blocks to explore
70 106 equemene
        BlocksEnd=16
71 106 equemene
        # BlocksStep is the step of Blocks to explore
72 106 equemene
        BlocksStep=1
73 106 equemene
        # ThreadsBlocks in first number of Blocks to explore
74 106 equemene
        ThreadsBegin=1
75 106 equemene
        # ThreadsEnd is last number of Blocks to explore
76 106 equemene
        ThreadsEnd=1
77 106 equemene
        # ThreadsStep is the step of Blocks to explore
78 106 equemene
        ThreadsStep=1
79 106 equemene
        # Redo is the times to redo the test to improve metrology
80 106 equemene
        Redo=1
81 106 equemene
        # OutMetrology is method for duration estimation : False is GPU inside
82 106 equemene
        OutMetrology=False
83 106 equemene
        Metrology='InMetro'
84 106 equemene
        # Curves is True to print the curves
85 106 equemene
        Curves=False
86 106 equemene
        # Fit is True to print the curves
87 106 equemene
        Fit=False
88 106 equemene
        # Marsaglia RNG
89 106 equemene
        RNG='MWC'
90 106 equemene
        # Value type : INT32, INT64, FP32, FP64
91 106 equemene
        ValueType='FP32'
92 106 equemene
93 106 equemene
        HowToUse='%s -o (Out of Core Metrology) -c (Print Curves) -d <DeviceId> -g <CUDA/OpenCL> -i <Iterations> -b <BlocksBegin> -e <BlocksEnd> -s <BlocksStep> -f <ThreadsFirst> -l <ThreadsLast> -t <ThreadssTep> -r <RedoToImproveStats> -m <SHR3/CONG/MWC/KISS> -v <INT32/INT64/FP32/FP64>'
94 106 equemene
95 106 equemene
        try:
96 106 equemene
            opts, args = getopt.getopt(sys.argv[1:],"hocg:i:b:e:s:f:l:t:r:d:m:v:",["gpustyle=","iterations=","blocksBegin=","blocksEnd=","blocksStep=","threadsFirst=","threadsLast=","threadssTep=","redo=","device=","marsaglia=","valuetype="])
97 106 equemene
        except getopt.GetoptError:
98 106 equemene
            print HowToUse % sys.argv[0]
99 106 equemene
            sys.exit(2)
100 106 equemene
101 106 equemene
        # List of Devices
102 106 equemene
        Devices=[]
103 106 equemene
        Alu={}
104 106 equemene
105 106 equemene
        for opt, arg in opts:
106 106 equemene
            if opt == '-h':
107 106 equemene
                print HowToUse % sys.argv[0]
108 106 equemene
109 106 equemene
                print "\nInformations about devices detected under OpenCL:"
110 106 equemene
                # For PyOpenCL import
111 106 equemene
                try:
112 106 equemene
                    import pyopencl as cl
113 106 equemene
                    Id=1
114 106 equemene
                    for platform in cl.get_platforms():
115 106 equemene
                        for device in platform.get_devices():
116 106 equemene
                            deviceType=cl.device_type.to_string(device.type)
117 106 equemene
                            print "Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip(),deviceType,device.name.lstrip())
118 106 equemene
                            Id=Id+1
119 106 equemene
120 106 equemene
                    print
121 106 equemene
                    sys.exit()
122 106 equemene
                except ImportError:
123 106 equemene
                    print "Your platform does not seem to support OpenCL"
124 106 equemene
125 106 equemene
            elif opt == '-o':
126 106 equemene
                OutMetrology=True
127 106 equemene
                Metrology='OutMetro'
128 106 equemene
            elif opt == '-c':
129 106 equemene
                Curves=True
130 106 equemene
            elif opt in ("-d", "--device"):
131 106 equemene
                Devices.append(int(arg))
132 106 equemene
            elif opt in ("-g", "--gpustyle"):
133 106 equemene
                GpuStyle = arg
134 106 equemene
            elif opt in ("-m", "--marsaglia"):
135 106 equemene
                RNG = arg
136 106 equemene
            elif opt in ("-v", "--valuetype"):
137 106 equemene
                ValueType = arg
138 106 equemene
            elif opt in ("-i", "--iterations"):
139 106 equemene
                Iterations = numpy.uint64(arg)
140 106 equemene
            elif opt in ("-b", "--blocksbegin"):
141 106 equemene
                BlocksBegin = int(arg)
142 106 equemene
            elif opt in ("-e", "--blocksend"):
143 106 equemene
                BlocksEnd = int(arg)
144 106 equemene
            elif opt in ("-s", "--blocksstep"):
145 106 equemene
                BlocksStep = int(arg)
146 106 equemene
            elif opt in ("-f", "--threadsfirst"):
147 106 equemene
                ThreadsBegin = int(arg)
148 106 equemene
            elif opt in ("-l", "--threadslast"):
149 106 equemene
                ThreadsEnd = int(arg)
150 106 equemene
            elif opt in ("-t", "--threadsstep"):
151 106 equemene
                ThreadsStep = int(arg)
152 106 equemene
            elif opt in ("-r", "--redo"):
153 106 equemene
                Redo = int(arg)
154 106 equemene
155 106 equemene
        print "Devices Identification : %s" % Devices
156 106 equemene
        print "GpuStyle used : %s" % GpuStyle
157 106 equemene
        print "Iterations : %s" % Iterations
158 106 equemene
        print "Number of Blocks on begin : %s" % BlocksBegin
159 106 equemene
        print "Number of Blocks on end : %s" % BlocksEnd
160 106 equemene
        print "Step on Blocks : %s" % BlocksStep
161 106 equemene
        print "Number of Threads on begin : %s" % ThreadsBegin
162 106 equemene
        print "Number of Threads on end : %s" % ThreadsEnd
163 106 equemene
        print "Step on Threads : %s" % ThreadsStep
164 106 equemene
        print "Number of redo : %s" % Redo
165 106 equemene
        print "Metrology done out of XPU : %r" % OutMetrology
166 106 equemene
        print "Type of Marsaglia RNG used : %s" % RNG
167 106 equemene
        print "Type of variable : %s" % ValueType
168 106 equemene
169 106 equemene
        if GpuStyle=='CUDA':
170 106 equemene
            try:
171 106 equemene
                # For PyCUDA import
172 106 equemene
                import pycuda.driver as cuda
173 106 equemene
                import pycuda.gpuarray as gpuarray
174 106 equemene
                import pycuda.autoinit
175 106 equemene
                from pycuda.compiler import SourceModule
176 106 equemene
            except ImportError:
177 106 equemene
                print "Platform does not seem to support CUDA"
178 106 equemene
179 106 equemene
        if GpuStyle=='OpenCL':
180 106 equemene
            try:
181 106 equemene
                # For PyOpenCL import
182 106 equemene
                import pyopencl as cl
183 106 equemene
                Id=1
184 106 equemene
                for platform in cl.get_platforms():
185 106 equemene
                    for device in platform.get_devices():
186 106 equemene
                        deviceType=cl.device_type.to_string(device.type)
187 106 equemene
                        print "Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip().rstrip(),deviceType,device.name.lstrip().rstrip())
188 106 equemene
189 106 equemene
                        if Id in Devices:
190 106 equemene
                            # Set the Alu as detected Device Type
191 106 equemene
                            Alu[Id]=deviceType
192 106 equemene
                        Id=Id+1
193 106 equemene
            except ImportError:
194 106 equemene
                print "Platform does not seem to support OpenCL"
195 106 equemene
196 106 equemene
        print Devices,Alu
197 106 equemene
198 106 equemene
        BlocksList=xrange(BlocksBegin,BlocksEnd+BlocksStep,BlocksStep)
199 106 equemene
        ThreadsList=xrange(ThreadsBegin,ThreadsEnd+ThreadsStep,ThreadsStep)
200 106 equemene
201 106 equemene
        ExploredJobs=numpy.array([]).astype(numpy.uint32)
202 106 equemene
        ExploredBlocks=numpy.array([]).astype(numpy.uint32)
203 106 equemene
        ExploredThreads=numpy.array([]).astype(numpy.uint32)
204 106 equemene
        avgD=numpy.array([]).astype(numpy.float32)
205 106 equemene
        medD=numpy.array([]).astype(numpy.float32)
206 106 equemene
        stdD=numpy.array([]).astype(numpy.float32)
207 106 equemene
        minD=numpy.array([]).astype(numpy.float32)
208 106 equemene
        maxD=numpy.array([]).astype(numpy.float32)
209 106 equemene
        avgR=numpy.array([]).astype(numpy.float32)
210 106 equemene
        medR=numpy.array([]).astype(numpy.float32)
211 106 equemene
        stdR=numpy.array([]).astype(numpy.float32)
212 106 equemene
        minR=numpy.array([]).astype(numpy.float32)
213 106 equemene
        maxR=numpy.array([]).astype(numpy.float32)
214 106 equemene
215 106 equemene
        for Blocks,Threads in itertools.product(BlocksList,ThreadsList):
216 106 equemene
217 106 equemene
            # print Blocks,Threads
218 106 equemene
            circle=numpy.zeros(Blocks*Threads).astype(numpy.uint64)
219 106 equemene
            ExploredJobs=numpy.append(ExploredJobs,Blocks*Threads)
220 106 equemene
            ExploredBlocks=numpy.append(ExploredBlocks,Blocks)
221 106 equemene
            ExploredThreads=numpy.append(ExploredThreads,Threads)
222 106 equemene
223 106 equemene
            if OutMetrology:
224 106 equemene
                DurationItem=numpy.array([]).astype(numpy.float32)
225 106 equemene
                Duration=numpy.array([]).astype(numpy.float32)
226 106 equemene
                Rate=numpy.array([]).astype(numpy.float32)
227 106 equemene
                for i in range(Redo):
228 106 equemene
                    start=time.time()
229 106 equemene
                    if GpuStyle=='CUDA':
230 106 equemene
                        try:
231 106 equemene
                            Inside,NewIterations,DurationItem=MetropolisCuda(circle,Iterations,1,Blocks,Threads,RNG,ValueType)
232 106 equemene
                        except:
233 106 equemene
                            print "Problem with (%i,%i) // computations on Cuda" % (Blocks,Threads)
234 106 equemene
                    elif GpuStyle=='OpenCL':
235 106 equemene
                        try:
236 106 equemene
                            MetroParamCL={}
237 106 equemene
                            MetroParamCL['Iterations']=Iterations
238 106 equemene
                            MetroParamCL['Steps']=1
239 106 equemene
                            MetroParamCL['Blocks']=Blocks
240 106 equemene
                            MetroParamCL['Threads']=Threads
241 106 equemene
                            MetroParamCL['Device']=Devices[0]
242 106 equemene
                            MetroParamCL['RNG']=RNG
243 106 equemene
                            MetroParamCL['ValueType']=ValueType
244 106 equemene
245 106 equemene
                            OutputCL=MetropolisOpenCL(MetroParamCL)
246 106 equemene
                        except:
247 106 equemene
                            print "Problem with (%i,%i) // computations on OpenCL" % (Blocks,Threads)
248 106 equemene
                    Duration=numpy.append(Duration,time.time()-start)
249 106 equemene
                    Rate=numpy.append(Rate,NewIterations/Duration[-1])
250 106 equemene
            else:
251 106 equemene
                if GpuStyle=='CUDA':
252 106 equemene
                    try:
253 106 equemene
                        Inside,NewIterations,Duration=MetropolisCuda(circle,Iterations,Redo,Blocks,Threads,RNG,ValueType)
254 106 equemene
                    except:
255 106 equemene
                        print "Problem with (%i,%i) // computations on Cuda" % (Blocks,Threads)
256 106 equemene
                elif GpuStyle=='OpenCL':
257 106 equemene
                    try:
258 106 equemene
                        IterationsMPI=numpy.uint64(Iterations/len(Devices))
259 106 equemene
                        if Iterations%len(Devices)!=0:
260 106 equemene
                            IterationsMPI+=1
261 106 equemene
262 106 equemene
                        r=1
263 106 equemene
                        time_start=time.time()
264 106 equemene
                        for Device in Devices:
265 106 equemene
                            InputCL={}
266 106 equemene
                            InputCL['Iterations']=IterationsMPI
267 106 equemene
                            InputCL['Steps']=Redo
268 106 equemene
                            InputCL['Blocks']=Blocks
269 106 equemene
                            InputCL['Threads']=Threads
270 106 equemene
                            InputCL['Device']=Device
271 106 equemene
                            InputCL['RNG']=RNG
272 106 equemene
                            InputCL['ValueType']=ValueType
273 106 equemene
                            print "Send to device %i on rank %i" % (Device,r)
274 106 equemene
                            comm.send('CONTINUE',dest=r,tag=11)
275 106 equemene
                            comm.send(InputCL,dest=r,tag=11)
276 106 equemene
                            r+=1
277 106 equemene
278 106 equemene
                        Inside=0
279 106 equemene
                        NewIterations=0
280 106 equemene
                        for slave in xrange(1,len(Devices)+1):
281 106 equemene
                            print "slave %i" % slave
282 106 equemene
                            OutputCL=comm.recv(source=slave,tag=11)
283 106 equemene
                            print "OutputCL from rank %s %s" % (slave,OutputCL)
284 106 equemene
                            NewIterations+=OutputCL['NewIterations']
285 106 equemene
                            Inside+=OutputCL['Inside']
286 106 equemene
287 106 equemene
                        Duration=time.time()-time_start
288 106 equemene
                        print "Pi estimation %.8f" % (4./NewIterations*Inside)
289 106 equemene
                    except:
290 106 equemene
                        print "Problem with (%i,%i) // computations on OpenCL" % (Blocks,Threads)
291 106 equemene
            Duration=OutputCL['Duration']
292 106 equemene
            NewIterations=OutputCL['NewIterations']
293 106 equemene
            Rate=NewIterations/Duration
294 106 equemene
295 106 equemene
            avgD=numpy.append(avgD,numpy.average(Duration))
296 106 equemene
            medD=numpy.append(medD,numpy.median(Duration))
297 106 equemene
            stdD=numpy.append(stdD,numpy.std(Duration))
298 106 equemene
            minD=numpy.append(minD,numpy.min(Duration))
299 106 equemene
            maxD=numpy.append(maxD,numpy.max(Duration))
300 106 equemene
            avgR=numpy.append(avgR,numpy.average(Rate))
301 106 equemene
            medR=numpy.append(medR,numpy.median(Rate))
302 106 equemene
            stdR=numpy.append(stdR,numpy.std(Rate))
303 106 equemene
            minR=numpy.append(minR,numpy.min(Rate))
304 106 equemene
            maxR=numpy.append(maxR,numpy.max(Rate))
305 106 equemene
306 106 equemene
            print "%.2f %.2f %.2f %.2f %.2f %i %i %i %i %i" % (avgD[-1],medD[-1],stdD[-1],minD[-1],maxD[-1],avgR[-1],medR[-1],stdR[-1],minR[-1],maxR[-1])
307 106 equemene
308 106 equemene
            numpy.savez("Pi_%s_%s_%s_%s_%s_%s_%s_%s_%.8i_Device%i_%s_%s" % (ValueType,RNG,Alu[Devices[0]],GpuStyle,BlocksBegin,BlocksEnd,ThreadsBegin,ThreadsEnd,Iterations,Devices[0],Metrology,gethostname()),(ExploredBlocks,ExploredThreads,avgD,medD,stdD,minD,maxD,avgR,medR,stdR,minR,maxR))
309 106 equemene
            ToSave=[ ExploredBlocks,ExploredThreads,avgD,medD,stdD,minD,maxD,avgR,medR,stdR,minR,maxR ]
310 106 equemene
            numpy.savetxt("Pi_%s_%s_%s_%s_%s_%s_%s_%i_%.8i_Device%i_%s_%s" % (ValueType,RNG,Alu[Devices[0]],GpuStyle,BlocksBegin,BlocksEnd,ThreadsBegin,ThreadsEnd,Iterations,Devices[0],Metrology,gethostname()),numpy.transpose(ToSave),fmt='%i %i %e %e %e %e %e %i %i %i %i %i')
311 106 equemene
312 106 equemene
            if Fit:
313 106 equemene
                FitAndPrint(ExploredJobs,median,Curves)
314 106 equemene
        # Send MPI exit tag
315 106 equemene
        for slave in xrange(1,RankSize):
316 106 equemene
            comm.send('BREAK',dest=slave,tag=11)
317 106 equemene
318 106 equemene
    else:
319 106 equemene
        while True:
320 106 equemene
            Signal=comm.recv(source=0,tag=11)
321 106 equemene
            if Signal=='CONTINUE':
322 106 equemene
                # Receive information from Master
323 106 equemene
                InputCL=comm.recv(source=0,tag=11)
324 106 equemene
                print "Parameters retreive from master %s " % InputCL
325 106 equemene
                # Execute on slave
326 106 equemene
                OutputCL=MetropolisOpenCL(InputCL)
327 106 equemene
                print OutputCL
328 106 equemene
                # Send information to Master
329 106 equemene
                comm.send(OutputCL,dest=0,tag=11)
330 106 equemene
                print "Data sent to master"
331 106 equemene
            else:
332 106 equemene
                print 'Exit signal from Master'
333 106 equemene
                break