Statistiques
| Révision :

root / Pi / XPU / PiXpuMPI.py @ 185

Historique | Voir | Annoter | Télécharger (13,47 ko)

1 127 equemene
#!/usr/bin/env python3
2 107 equemene
3 107 equemene
#
4 107 equemene
# Pi-by-MonteCarlo using PyCUDA/PyOpenCL
5 107 equemene
#
6 107 equemene
# CC BY-NC-SA 2011 : Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
7 107 equemene
# Cecill v2 : Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
8 107 equemene
#
9 107 equemene
# Thanks to Andreas Klockner for PyCUDA:
10 107 equemene
# http://mathema.tician.de/software/pycuda
11 107 equemene
# Thanks to Andreas Klockner for PyOpenCL:
12 107 equemene
# http://mathema.tician.de/software/pyopencl
13 107 equemene
#
14 107 equemene
15 107 equemene
# 2013-01-01 : problems with launch timeout
16 107 equemene
# http://stackoverflow.com/questions/497685/how-do-you-get-around-the-maximum-cuda-run-time
17 107 equemene
# Option "Interactive" "0" in /etc/X11/xorg.conf
18 107 equemene
19 107 equemene
# Common tools
20 107 equemene
import numpy
21 107 equemene
from numpy.random import randint as nprnd
22 107 equemene
import sys
23 107 equemene
import getopt
24 107 equemene
import time
25 107 equemene
import math
26 107 equemene
import itertools
27 107 equemene
from socket import gethostname
28 107 equemene
29 107 equemene
import mpi4py
30 107 equemene
from mpi4py import MPI
31 107 equemene
32 107 equemene
from PiXPU import *
33 107 equemene
34 107 equemene
if __name__=='__main__':
35 107 equemene
36 107 equemene
    # MPI Init
37 107 equemene
    comm = MPI.COMM_WORLD
38 107 equemene
    rank = comm.Get_rank()
39 107 equemene
40 107 equemene
    # Define number of Nodes on with computing is performed (exclude 0)
41 107 equemene
    RankSize=comm.Get_size()
42 107 equemene
43 107 equemene
    if rank == 0:
44 107 equemene
45 107 equemene
        # Set defaults values
46 107 equemene
47 107 equemene
        # Id of Device : 1 is for first find !
48 107 equemene
        Device=1
49 107 equemene
        # GPU style can be Cuda (Nvidia implementation) or OpenCL
50 107 equemene
        GpuStyle='OpenCL'
51 107 equemene
        # Iterations is integer
52 107 equemene
        Iterations=10000000
53 107 equemene
        # BlocksBlocks in first number of Blocks to explore
54 107 equemene
        BlocksBegin=1
55 107 equemene
        # BlocksEnd is last number of Blocks to explore
56 107 equemene
        BlocksEnd=16
57 107 equemene
        # BlocksStep is the step of Blocks to explore
58 107 equemene
        BlocksStep=1
59 107 equemene
        # ThreadsBlocks in first number of Blocks to explore
60 107 equemene
        ThreadsBegin=1
61 107 equemene
        # ThreadsEnd is last number of Blocks to explore
62 107 equemene
        ThreadsEnd=1
63 107 equemene
        # ThreadsStep is the step of Blocks to explore
64 107 equemene
        ThreadsStep=1
65 107 equemene
        # Redo is the times to redo the test to improve metrology
66 107 equemene
        Redo=1
67 107 equemene
        # OutMetrology is method for duration estimation : False is GPU inside
68 107 equemene
        OutMetrology=False
69 107 equemene
        Metrology='InMetro'
70 107 equemene
        # Curves is True to print the curves
71 107 equemene
        Curves=False
72 107 equemene
        # Fit is True to print the curves
73 107 equemene
        Fit=False
74 107 equemene
        # Marsaglia RNG
75 107 equemene
        RNG='MWC'
76 107 equemene
        # Value type : INT32, INT64, FP32, FP64
77 107 equemene
        ValueType='FP32'
78 107 equemene
79 129 equemene
        HowToUse='%s -c (Print Curves) -d <DeviceId> -g <CUDA/OpenCL> -i <Iterations> -b <BlocksBegin> -e <BlocksEnd> -s <BlocksStep> -f <ThreadsFirst> -l <ThreadsLast> -t <ThreadssTep> -r <RedoToImproveStats> -m <SHR3/CONG/MWC/KISS> -v <INT32/INT64/FP32/FP64>'
80 107 equemene
81 107 equemene
        try:
82 129 equemene
            opts, args = getopt.getopt(sys.argv[1:],"hcg:i:b:e:s:f:l:t:r:d:m:v:",["gpustyle=","iterations=","blocksBegin=","blocksEnd=","blocksStep=","threadsFirst=","threadsLast=","threadssTep=","redo=","device=","marsaglia=","valuetype="])
83 107 equemene
        except getopt.GetoptError:
84 127 equemene
            print(HowToUse % sys.argv[0])
85 107 equemene
            sys.exit(2)
86 107 equemene
87 107 equemene
        # List of Devices
88 107 equemene
        Devices=[]
89 107 equemene
        Alu={}
90 107 equemene
91 107 equemene
        for opt, arg in opts:
92 107 equemene
            if opt == '-h':
93 127 equemene
                print(HowToUse % sys.argv[0])
94 107 equemene
95 127 equemene
                print("\nInformations about devices detected under OpenCL:")
96 107 equemene
                # For PyOpenCL import
97 107 equemene
                try:
98 107 equemene
                    import pyopencl as cl
99 129 equemene
                    Id=0
100 107 equemene
                    for platform in cl.get_platforms():
101 107 equemene
                        for device in platform.get_devices():
102 138 equemene
                            #deviceType=cl.device_type.to_string(device.type)
103 157 equemene
                            deviceType="xPU"
104 127 equemene
                            print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip(),deviceType,device.name.lstrip()))
105 107 equemene
                            Id=Id+1
106 107 equemene
107 107 equemene
                    print
108 123 equemene
                except:
109 127 equemene
                    print("Your platform does not seem to support OpenCL")
110 123 equemene
111 129 equemene
                print("\nInformations about devices detected under CUDA API:")
112 123 equemene
                # For PyCUDA import
113 123 equemene
                try:
114 123 equemene
                    import pycuda.driver as cuda
115 123 equemene
                    cuda.init()
116 123 equemene
                    for Id in range(cuda.Device.count()):
117 123 equemene
                        device=cuda.Device(Id)
118 127 equemene
                        print("Device #%i of type GPU : %s" % (Id,device.name()))
119 123 equemene
                    print
120 123 equemene
                except:
121 127 equemene
                    print("Your platform does not seem to support CUDA")
122 107 equemene
123 123 equemene
                sys.exit()
124 123 equemene
125 107 equemene
            elif opt == '-c':
126 107 equemene
                Curves=True
127 107 equemene
            elif opt in ("-d", "--device"):
128 107 equemene
                Devices.append(int(arg))
129 107 equemene
            elif opt in ("-g", "--gpustyle"):
130 107 equemene
                GpuStyle = arg
131 107 equemene
            elif opt in ("-m", "--marsaglia"):
132 107 equemene
                RNG = arg
133 107 equemene
            elif opt in ("-v", "--valuetype"):
134 107 equemene
                ValueType = arg
135 107 equemene
            elif opt in ("-i", "--iterations"):
136 107 equemene
                Iterations = numpy.uint64(arg)
137 107 equemene
            elif opt in ("-b", "--blocksbegin"):
138 107 equemene
                BlocksBegin = int(arg)
139 107 equemene
            elif opt in ("-e", "--blocksend"):
140 107 equemene
                BlocksEnd = int(arg)
141 107 equemene
            elif opt in ("-s", "--blocksstep"):
142 107 equemene
                BlocksStep = int(arg)
143 107 equemene
            elif opt in ("-f", "--threadsfirst"):
144 107 equemene
                ThreadsBegin = int(arg)
145 107 equemene
            elif opt in ("-l", "--threadslast"):
146 107 equemene
                ThreadsEnd = int(arg)
147 107 equemene
            elif opt in ("-t", "--threadsstep"):
148 107 equemene
                ThreadsStep = int(arg)
149 107 equemene
            elif opt in ("-r", "--redo"):
150 107 equemene
                Redo = int(arg)
151 107 equemene
152 127 equemene
        print("Devices Identification : %s" % Devices)
153 127 equemene
        print("GpuStyle used : %s" % GpuStyle)
154 127 equemene
        print("Iterations : %s" % Iterations)
155 127 equemene
        print("Number of Blocks on begin : %s" % BlocksBegin)
156 127 equemene
        print("Number of Blocks on end : %s" % BlocksEnd)
157 127 equemene
        print("Step on Blocks : %s" % BlocksStep)
158 127 equemene
        print("Number of Threads on begin : %s" % ThreadsBegin)
159 127 equemene
        print("Number of Threads on end : %s" % ThreadsEnd)
160 127 equemene
        print("Step on Threads : %s" % ThreadsStep)
161 127 equemene
        print("Number of redo : %s" % Redo)
162 127 equemene
        print("Metrology done out of XPU : %r" % OutMetrology)
163 127 equemene
        print("Type of Marsaglia RNG used : %s" % RNG)
164 127 equemene
        print("Type of variable : %s" % ValueType)
165 107 equemene
166 107 equemene
        if GpuStyle=='CUDA':
167 107 equemene
            try:
168 107 equemene
                # For PyCUDA import
169 107 equemene
                import pycuda.driver as cuda
170 129 equemene
171 129 equemene
                cuda.init()
172 129 equemene
                for Id in range(cuda.Device.count()):
173 129 equemene
                    device=cuda.Device(Id)
174 129 equemene
                    print("Device #%i of type GPU : %s" % (Id,device.name()))
175 129 equemene
                    if Id in Devices:
176 129 equemene
                        Alu[Id]='GPU'
177 107 equemene
            except ImportError:
178 127 equemene
                print("Platform does not seem to support CUDA")
179 107 equemene
180 107 equemene
        if GpuStyle=='OpenCL':
181 107 equemene
            try:
182 107 equemene
                # For PyOpenCL import
183 107 equemene
                import pyopencl as cl
184 129 equemene
                Id=0
185 107 equemene
                for platform in cl.get_platforms():
186 107 equemene
                    for device in platform.get_devices():
187 138 equemene
                        #deviceType=cl.device_type.to_string(device.type)
188 138 equemene
                        deviceType="*PU"
189 127 equemene
                        print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip().rstrip(),deviceType,device.name.lstrip().rstrip()))
190 107 equemene
191 107 equemene
                        if Id in Devices:
192 107 equemene
                            # Set the Alu as detected Device Type
193 107 equemene
                            Alu[Id]=deviceType
194 107 equemene
                        Id=Id+1
195 107 equemene
            except ImportError:
196 127 equemene
                print("Platform does not seem to support OpenCL")
197 107 equemene
198 127 equemene
        print(Devices,Alu)
199 107 equemene
200 127 equemene
        BlocksList=range(BlocksBegin,BlocksEnd+BlocksStep,BlocksStep)
201 127 equemene
        ThreadsList=range(ThreadsBegin,ThreadsEnd+ThreadsStep,ThreadsStep)
202 107 equemene
203 107 equemene
        ExploredJobs=numpy.array([]).astype(numpy.uint32)
204 107 equemene
        ExploredBlocks=numpy.array([]).astype(numpy.uint32)
205 107 equemene
        ExploredThreads=numpy.array([]).astype(numpy.uint32)
206 107 equemene
        avgD=numpy.array([]).astype(numpy.float32)
207 107 equemene
        medD=numpy.array([]).astype(numpy.float32)
208 107 equemene
        stdD=numpy.array([]).astype(numpy.float32)
209 107 equemene
        minD=numpy.array([]).astype(numpy.float32)
210 107 equemene
        maxD=numpy.array([]).astype(numpy.float32)
211 107 equemene
        avgR=numpy.array([]).astype(numpy.float32)
212 107 equemene
        medR=numpy.array([]).astype(numpy.float32)
213 107 equemene
        stdR=numpy.array([]).astype(numpy.float32)
214 107 equemene
        minR=numpy.array([]).astype(numpy.float32)
215 107 equemene
        maxR=numpy.array([]).astype(numpy.float32)
216 107 equemene
217 129 equemene
        IterationsMPI=numpy.uint64(Iterations/len(Devices))
218 129 equemene
        if Iterations%len(Devices)!=0:
219 129 equemene
            IterationsMPI+=1
220 129 equemene
221 107 equemene
        for Blocks,Threads in itertools.product(BlocksList,ThreadsList):
222 107 equemene
223 107 equemene
            ExploredJobs=numpy.append(ExploredJobs,Blocks*Threads)
224 107 equemene
            ExploredBlocks=numpy.append(ExploredBlocks,Blocks)
225 107 equemene
            ExploredThreads=numpy.append(ExploredThreads,Threads)
226 129 equemene
227 129 equemene
            DurationItem=numpy.array([]).astype(numpy.float32)
228 129 equemene
            Duration=numpy.array([]).astype(numpy.float32)
229 129 equemene
            Rate=numpy.array([]).astype(numpy.float32)
230 129 equemene
            for i in range(Redo):
231 129 equemene
                time_start=time.time()
232 129 equemene
233 129 equemene
                r=1
234 129 equemene
                # Distribution of Devices over nodes
235 129 equemene
                InputCL={}
236 129 equemene
                InputCL['Iterations']=IterationsMPI
237 129 equemene
                InputCL['Steps']=1
238 129 equemene
                InputCL['Blocks']=Blocks
239 129 equemene
                InputCL['Threads']=Threads
240 129 equemene
                InputCL['RNG']=RNG
241 129 equemene
                InputCL['ValueType']=ValueType
242 129 equemene
                InputCL['GpuStyle']=GpuStyle
243 129 equemene
244 129 equemene
                for Device in Devices[1:]:
245 129 equemene
                    print("Send to device %i on rank %i" % (Device,r))
246 129 equemene
                    InputCL['Device']=Device
247 129 equemene
                    comm.send('CONTINUE',dest=r,tag=11)
248 129 equemene
                    comm.send(InputCL,dest=r,tag=11)
249 129 equemene
                    r+=1
250 129 equemene
251 129 equemene
                # Compute on rank 0
252 129 equemene
                print("Compute on rank 0")
253 129 equemene
                InputCL['Device']=Devices[0]
254 129 equemene
255 107 equemene
                if GpuStyle=='CUDA':
256 107 equemene
                    try:
257 129 equemene
                        OutputCL=MetropolisCuda(InputCL)
258 107 equemene
                    except:
259 127 equemene
                        print("Problem with (%i,%i) // computations on Cuda" % (Blocks,Threads))
260 107 equemene
                elif GpuStyle=='OpenCL':
261 107 equemene
                    try:
262 129 equemene
                        OutputCL=MetropolisOpenCL(InputCL)
263 129 equemene
                    except:
264 129 equemene
                        print("Problem with (%i,%i) // computations on OpenCL" %  (Blocks,Threads))
265 107 equemene
266 129 equemene
                Inside=OutputCL['Inside']
267 129 equemene
                NewIterations=OutputCL['NewIterations']
268 107 equemene
269 129 equemene
                for slave in range(1,len(Devices)):
270 129 equemene
                    print("Get OutputCL from %i" % slave)
271 129 equemene
                    OutputCL=comm.recv(source=slave,tag=11)
272 129 equemene
                    print(OutputCL)
273 129 equemene
                    NewIterations+=OutputCL['NewIterations']
274 129 equemene
                    Inside+=OutputCL['Inside']
275 107 equemene
276 129 equemene
                print("Pi estimation %.8f" % (4./NewIterations*Inside))
277 129 equemene
278 129 equemene
                Duration=numpy.append(Duration,time.time()-time_start)
279 129 equemene
                Rate=numpy.append(Rate,NewIterations/Duration[-1])
280 129 equemene
281 107 equemene
            avgD=numpy.append(avgD,numpy.average(Duration))
282 107 equemene
            medD=numpy.append(medD,numpy.median(Duration))
283 107 equemene
            stdD=numpy.append(stdD,numpy.std(Duration))
284 107 equemene
            minD=numpy.append(minD,numpy.min(Duration))
285 107 equemene
            maxD=numpy.append(maxD,numpy.max(Duration))
286 107 equemene
            avgR=numpy.append(avgR,numpy.average(Rate))
287 107 equemene
            medR=numpy.append(medR,numpy.median(Rate))
288 107 equemene
            stdR=numpy.append(stdR,numpy.std(Rate))
289 107 equemene
            minR=numpy.append(minR,numpy.min(Rate))
290 107 equemene
            maxR=numpy.append(maxR,numpy.max(Rate))
291 107 equemene
292 127 equemene
            print("%.2f %.2f %.2f %.2f %.2f %i %i %i %i %i" % (avgD[-1],medD[-1],stdD[-1],minD[-1],maxD[-1],avgR[-1],medR[-1],stdR[-1],minR[-1],maxR[-1]))
293 107 equemene
294 131 equemene
            numpy.savez("PiMPI_%s_%s_%s_%s_%s_%s_%s_%s_%.8i_Device%i_%s_%s" % (ValueType,RNG,Alu[Devices[0]],GpuStyle,BlocksBegin,BlocksEnd,ThreadsBegin,ThreadsEnd,Iterations,Devices[0],Metrology,gethostname()),(ExploredBlocks,ExploredThreads,avgD,medD,stdD,minD,maxD,avgR,medR,stdR,minR,maxR))
295 107 equemene
            ToSave=[ ExploredBlocks,ExploredThreads,avgD,medD,stdD,minD,maxD,avgR,medR,stdR,minR,maxR ]
296 131 equemene
            numpy.savetxt("PiMPI_%s_%s_%s_%s_%s_%s_%s_%i_%.8i_Device%i_%s_%s" % (ValueType,RNG,Alu[Devices[0]],GpuStyle,BlocksBegin,BlocksEnd,ThreadsBegin,ThreadsEnd,Iterations,Devices[0],Metrology,gethostname()),numpy.transpose(ToSave),fmt='%i %i %e %e %e %e %e %i %i %i %i %i')
297 107 equemene
298 107 equemene
            if Fit:
299 107 equemene
                FitAndPrint(ExploredJobs,median,Curves)
300 107 equemene
        # Send MPI exit tag
301 127 equemene
        for slave in range(1,RankSize):
302 107 equemene
            comm.send('BREAK',dest=slave,tag=11)
303 107 equemene
304 107 equemene
    else:
305 107 equemene
        while True:
306 107 equemene
            Signal=comm.recv(source=0,tag=11)
307 107 equemene
            if Signal=='CONTINUE':
308 107 equemene
                # Receive information from Master
309 107 equemene
                InputCL=comm.recv(source=0,tag=11)
310 127 equemene
                print("Parameters retreive for rank %s of %s on %s from master:" % (rank,RankSize,gethostname()))
311 127 equemene
                print("Input CL:" % InputCL)
312 107 equemene
                # Execute on slave
313 129 equemene
314 129 equemene
                if InputCL['GpuStyle']=='CUDA':
315 129 equemene
                    try:
316 129 equemene
                        OutputCL=MetropolisCuda(InputCL)
317 129 equemene
                    except:
318 129 equemene
                        print("Problem with (%i,%i) // computations on Cuda" % (InputCL['Blocks'],InputCL['Threads']))
319 129 equemene
                elif InputCL['GpuStyle']=='OpenCL':
320 129 equemene
                    try:
321 129 equemene
                        OutputCL=MetropolisOpenCL(InputCL)
322 129 equemene
                    except:
323 129 equemene
                        print("Problem with (%i,%i) // computations on OpenCL" %  (InputCL['Blocks'],InputCL['Threads']))
324 129 equemene
325 127 equemene
                print("Output CL:" % OutputCL)
326 107 equemene
                # Send information to Master
327 107 equemene
                comm.send(OutputCL,dest=0,tag=11)
328 127 equemene
                print("Data sent to master")
329 107 equemene
            else:
330 127 equemene
                print('Exit signal from Master')
331 107 equemene
                break