Statistiques
| Révision :

root / Pi / XPU / PiXpuThreads.py @ 190

Historique | Voir | Annoter | Télécharger (11,37 ko)

1 127 equemene
#!/usr/bin/env python3
2 107 equemene
3 107 equemene
#
4 107 equemene
# Pi-by-MonteCarlo using PyCUDA/PyOpenCL
5 107 equemene
#
6 107 equemene
# CC BY-NC-SA 2011 : Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
7 107 equemene
# Cecill v2 : Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
8 107 equemene
#
9 107 equemene
# Thanks to Andreas Klockner for PyCUDA:
10 107 equemene
# http://mathema.tician.de/software/pycuda
11 107 equemene
# Thanks to Andreas Klockner for PyOpenCL:
12 107 equemene
# http://mathema.tician.de/software/pyopencl
13 107 equemene
#
14 107 equemene
15 107 equemene
# 2013-01-01 : problems with launch timeout
16 107 equemene
# http://stackoverflow.com/questions/497685/how-do-you-get-around-the-maximum-cuda-run-time
17 107 equemene
# Option "Interactive" "0" in /etc/X11/xorg.conf
18 107 equemene
19 107 equemene
# Common tools
20 107 equemene
import numpy
21 107 equemene
from numpy.random import randint as nprnd
22 107 equemene
import sys
23 107 equemene
import getopt
24 107 equemene
import time
25 107 equemene
import math
26 107 equemene
import itertools
27 107 equemene
from socket import gethostname
28 107 equemene
29 107 equemene
from threading import Thread
30 107 equemene
31 107 equemene
from PiXPU import *
32 107 equemene
33 107 equemene
class threadWithReturn(Thread):
34 107 equemene
    def __init__(self, *args, **kwargs):
35 107 equemene
        super(threadWithReturn, self).__init__(*args, **kwargs)
36 107 equemene
        self._return = None
37 107 equemene
38 107 equemene
    def run(self):
39 127 equemene
        if self._target is not None:
40 127 equemene
            self._return = self._target(*self._args, **self._kwargs)
41 107 equemene
42 107 equemene
    def join(self, *args, **kwargs):
43 107 equemene
        super(threadWithReturn, self).join(*args, **kwargs)
44 107 equemene
        return self._return
45 107 equemene
46 107 equemene
if __name__=='__main__':
47 107 equemene
48 107 equemene
    # Set defaults values
49 107 equemene
50 107 equemene
    # Id of Device : 1 is for first find !
51 107 equemene
    Device=1
52 107 equemene
    # GPU style can be Cuda (Nvidia implementation) or OpenCL
53 107 equemene
    GpuStyle='OpenCL'
54 107 equemene
    # Iterations is integer
55 107 equemene
    Iterations=10000000
56 107 equemene
    # BlocksBlocks in first number of Blocks to explore
57 107 equemene
    BlocksBegin=1
58 107 equemene
    # BlocksEnd is last number of Blocks to explore
59 107 equemene
    BlocksEnd=16
60 107 equemene
    # BlocksStep is the step of Blocks to explore
61 107 equemene
    BlocksStep=1
62 107 equemene
    # ThreadsBlocks in first number of Blocks to explore
63 107 equemene
    ThreadsBegin=1
64 107 equemene
    # ThreadsEnd is last number of Blocks to explore
65 107 equemene
    ThreadsEnd=1
66 107 equemene
    # ThreadsStep is the step of Blocks to explore
67 107 equemene
    ThreadsStep=1
68 107 equemene
    # Redo is the times to redo the test to improve metrology
69 107 equemene
    Redo=1
70 107 equemene
    # OutMetrology is method for duration estimation : False is GPU inside
71 107 equemene
    OutMetrology=False
72 107 equemene
    Metrology='InMetro'
73 107 equemene
    # Curves is True to print the curves
74 107 equemene
    Curves=False
75 107 equemene
    # Fit is True to print the curves
76 107 equemene
    Fit=False
77 107 equemene
    # Marsaglia RNG
78 107 equemene
    RNG='MWC'
79 107 equemene
    # Value type : INT32, INT64, FP32, FP64
80 107 equemene
    ValueType='FP32'
81 190 equemene
    # Inside based on If
82 190 equemene
    IfThen=False
83 107 equemene
84 190 equemene
    HowToUse='%s -c (Print Curves) -k (Case On IfThen) -d <DeviceId> -g <CUDA/OpenCL> -i <Iterations> -b <BlocksBegin> -e <BlocksEnd> -s <BlocksStep> -f <ThreadsFirst> -l <ThreadsLast> -t <ThreadssTep> -r <RedoToImproveStats> -m <SHR3/CONG/MWC/KISS> -v <INT32/INT64/FP32/FP64>'
85 107 equemene
86 107 equemene
    try:
87 190 equemene
        opts, args = getopt.getopt(sys.argv[1:],"hckg:i:b:e:s:f:l:t:r:d:m:v:",["gpustyle=","iterations=","blocksBegin=","blocksEnd=","blocksStep=","threadsFirst=","threadsLast=","threadssTep=","redo=","device=","marsaglia=","valuetype="])
88 107 equemene
    except getopt.GetoptError:
89 127 equemene
        print(HowToUse % sys.argv[0])
90 107 equemene
        sys.exit(2)
91 107 equemene
92 107 equemene
    # List of Devices
93 107 equemene
    Devices=[]
94 107 equemene
    Alu={}
95 107 equemene
96 107 equemene
    for opt, arg in opts:
97 107 equemene
        if opt == '-h':
98 127 equemene
            print(HowToUse % sys.argv[0])
99 107 equemene
100 127 equemene
            print("\nInformations about devices detected under OpenCL:")
101 107 equemene
            # For PyOpenCL import
102 107 equemene
            try:
103 107 equemene
                import pyopencl as cl
104 123 equemene
                Id=0
105 107 equemene
                for platform in cl.get_platforms():
106 107 equemene
                    for device in platform.get_devices():
107 138 equemene
                        #deviceType=cl.device_type.to_string(device.type)
108 157 equemene
                        deviceType="xPU"
109 127 equemene
                        print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip(),deviceType,device.name.lstrip()))
110 107 equemene
                        Id=Id+1
111 107 equemene
112 107 equemene
                print
113 107 equemene
            except ImportError:
114 127 equemene
                print("Your platform does not seem to support OpenCL")
115 129 equemene
116 129 equemene
            print("\nInformations about devices detected under CUDA API:")
117 129 equemene
            # For PyCUDA import
118 129 equemene
            try:
119 129 equemene
                import pycuda.driver as cuda
120 129 equemene
                cuda.init()
121 129 equemene
                for Id in range(cuda.Device.count()):
122 129 equemene
                    device=cuda.Device(Id)
123 129 equemene
                    print("Device #%i of type GPU : %s" % (Id,device.name()))
124 129 equemene
                print
125 129 equemene
            except:
126 129 equemene
                print("Your platform does not seem to support CUDA")
127 107 equemene
128 129 equemene
            sys.exit()
129 129 equemene
130 107 equemene
        elif opt == '-c':
131 107 equemene
            Curves=True
132 190 equemene
        elif opt == '-k':
133 190 equemene
            IfThen=True
134 107 equemene
        elif opt in ("-d", "--device"):
135 107 equemene
            Devices.append(int(arg))
136 107 equemene
        elif opt in ("-g", "--gpustyle"):
137 107 equemene
            GpuStyle = arg
138 107 equemene
        elif opt in ("-m", "--marsaglia"):
139 107 equemene
            RNG = arg
140 107 equemene
        elif opt in ("-v", "--valuetype"):
141 107 equemene
            ValueType = arg
142 107 equemene
        elif opt in ("-i", "--iterations"):
143 107 equemene
            Iterations = numpy.uint64(arg)
144 107 equemene
        elif opt in ("-b", "--blocksbegin"):
145 107 equemene
            BlocksBegin = int(arg)
146 107 equemene
        elif opt in ("-e", "--blocksend"):
147 107 equemene
            BlocksEnd = int(arg)
148 107 equemene
        elif opt in ("-s", "--blocksstep"):
149 107 equemene
            BlocksStep = int(arg)
150 107 equemene
        elif opt in ("-f", "--threadsfirst"):
151 107 equemene
            ThreadsBegin = int(arg)
152 107 equemene
        elif opt in ("-l", "--threadslast"):
153 107 equemene
            ThreadsEnd = int(arg)
154 107 equemene
        elif opt in ("-t", "--threadsstep"):
155 107 equemene
            ThreadsStep = int(arg)
156 107 equemene
        elif opt in ("-r", "--redo"):
157 107 equemene
            Redo = int(arg)
158 107 equemene
159 127 equemene
    print("Devices Identification : %s" % Devices)
160 127 equemene
    print("GpuStyle used : %s" % GpuStyle)
161 127 equemene
    print("Iterations : %s" % Iterations)
162 127 equemene
    print("Number of Blocks on begin : %s" % BlocksBegin)
163 127 equemene
    print("Number of Blocks on end : %s" % BlocksEnd)
164 127 equemene
    print("Step on Blocks : %s" % BlocksStep)
165 127 equemene
    print("Number of Threads on begin : %s" % ThreadsBegin)
166 127 equemene
    print("Number of Threads on end : %s" % ThreadsEnd)
167 127 equemene
    print("Step on Threads : %s" % ThreadsStep)
168 127 equemene
    print("Number of redo : %s" % Redo)
169 127 equemene
    print("Metrology done out of XPU : %r" % OutMetrology)
170 127 equemene
    print("Type of Marsaglia RNG used : %s" % RNG)
171 127 equemene
    print("Type of variable : %s" % ValueType)
172 107 equemene
173 107 equemene
    if GpuStyle=='CUDA':
174 107 equemene
        try:
175 107 equemene
            # For PyCUDA import
176 107 equemene
            import pycuda.driver as cuda
177 129 equemene
178 129 equemene
            cuda.init()
179 129 equemene
            for Id in range(cuda.Device.count()):
180 129 equemene
                device=cuda.Device(Id)
181 129 equemene
                print("Device #%i of type GPU : %s" % (Id,device.name()))
182 129 equemene
                if Id in Devices:
183 129 equemene
                    Alu[Id]='GPU'
184 107 equemene
        except ImportError:
185 127 equemene
            print("Platform does not seem to support CUDA")
186 129 equemene
187 107 equemene
    if GpuStyle=='OpenCL':
188 107 equemene
        try:
189 107 equemene
            # For PyOpenCL import
190 107 equemene
            import pyopencl as cl
191 123 equemene
            Id=0
192 107 equemene
            for platform in cl.get_platforms():
193 107 equemene
                for device in platform.get_devices():
194 138 equemene
                    #deviceType=cl.device_type.to_string(device.type)
195 138 equemene
                    deviceType="*PU"
196 127 equemene
                    print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip().rstrip(),deviceType,device.name.lstrip().rstrip()))
197 107 equemene
198 107 equemene
                    if Id in Devices:
199 107 equemene
                    # Set the Alu as detected Device Type
200 107 equemene
                        Alu[Id]=deviceType
201 107 equemene
                    Id=Id+1
202 107 equemene
        except ImportError:
203 127 equemene
            print("Platform does not seem to support OpenCL")
204 107 equemene
205 127 equemene
    print(Devices,Alu)
206 107 equemene
207 127 equemene
    BlocksList=range(BlocksBegin,BlocksEnd+BlocksStep,BlocksStep)
208 127 equemene
    ThreadsList=range(ThreadsBegin,ThreadsEnd+ThreadsStep,ThreadsStep)
209 107 equemene
210 107 equemene
    ExploredJobs=numpy.array([]).astype(numpy.uint32)
211 107 equemene
    ExploredBlocks=numpy.array([]).astype(numpy.uint32)
212 107 equemene
    ExploredThreads=numpy.array([]).astype(numpy.uint32)
213 107 equemene
    avgD=numpy.array([]).astype(numpy.float32)
214 107 equemene
    medD=numpy.array([]).astype(numpy.float32)
215 107 equemene
    stdD=numpy.array([]).astype(numpy.float32)
216 107 equemene
    minD=numpy.array([]).astype(numpy.float32)
217 107 equemene
    maxD=numpy.array([]).astype(numpy.float32)
218 107 equemene
    avgR=numpy.array([]).astype(numpy.float32)
219 107 equemene
    medR=numpy.array([]).astype(numpy.float32)
220 107 equemene
    stdR=numpy.array([]).astype(numpy.float32)
221 107 equemene
    minR=numpy.array([]).astype(numpy.float32)
222 107 equemene
    maxR=numpy.array([]).astype(numpy.float32)
223 107 equemene
224 107 equemene
    for Blocks,Threads in itertools.product(BlocksList,ThreadsList):
225 107 equemene
226 107 equemene
        ExploredJobs=numpy.append(ExploredJobs,Blocks*Threads)
227 107 equemene
        ExploredBlocks=numpy.append(ExploredBlocks,Blocks)
228 107 equemene
        ExploredThreads=numpy.append(ExploredThreads,Threads)
229 107 equemene
230 107 equemene
        IterationsMP=Iterations/len(Devices)
231 107 equemene
        if Iterations%len(Devices)!=0:
232 107 equemene
            IterationsMP+=1
233 107 equemene
234 129 equemene
        DurationItem=numpy.array([]).astype(numpy.float32)
235 129 equemene
        Duration=numpy.array([]).astype(numpy.float32)
236 129 equemene
        Rate=numpy.array([]).astype(numpy.float32)
237 129 equemene
        for i in range(Redo):
238 129 equemene
            MyThreads=[]
239 107 equemene
            time_start=time.time()
240 129 equemene
241 107 equemene
            for Device in Devices:
242 107 equemene
                InputCL={}
243 107 equemene
                InputCL['Iterations']=IterationsMP
244 129 equemene
                InputCL['Steps']=1
245 107 equemene
                InputCL['Blocks']=Blocks
246 107 equemene
                InputCL['Threads']=Threads
247 107 equemene
                InputCL['Device']=Device
248 107 equemene
                InputCL['RNG']=RNG
249 107 equemene
                InputCL['ValueType']=ValueType
250 190 equemene
                InputCL['IfThen']=IfThen
251 107 equemene
                if GpuStyle=='CUDA':
252 107 equemene
                    try:
253 129 equemene
                        MyThread=threadWithReturn(target=MetropolisCuda, args=(InputCL,))
254 107 equemene
                    except:
255 127 equemene
                        print("Problem with (%i,%i) // computations on Cuda" % (Blocks,Threads))
256 107 equemene
                elif GpuStyle=='OpenCL':
257 107 equemene
                    try:
258 107 equemene
                        MyThread=threadWithReturn(target=MetropolisOpenCL, args=(InputCL,))
259 107 equemene
                    except:
260 129 equemene
                        print("Problem with (%i,%i) // computations on OpenCL" % (Blocks,Threads)            )
261 107 equemene
262 127 equemene
                print("Start on #%i device..." % Device)
263 107 equemene
                MyThread.start()
264 107 equemene
                MyThreads.append(MyThread)
265 107 equemene
266 107 equemene
            NewIterations=0
267 107 equemene
            Inside=0
268 107 equemene
            for MyThread in MyThreads:
269 107 equemene
                OutputCL=MyThread.join()
270 107 equemene
                NewIterations+=OutputCL['NewIterations']
271 107 equemene
                Inside+=OutputCL['Inside']
272 130 equemene
            print("Pi estimation %.8f" % (4./NewIterations*Inside))
273 129 equemene
274 129 equemene
            Duration=numpy.append(Duration,time.time()-time_start)
275 129 equemene
            Rate=numpy.append(Rate,NewIterations/Duration[-1])
276 129 equemene
277 107 equemene
        avgD=numpy.append(avgD,numpy.average(Duration))
278 107 equemene
        medD=numpy.append(medD,numpy.median(Duration))
279 107 equemene
        stdD=numpy.append(stdD,numpy.std(Duration))
280 107 equemene
        minD=numpy.append(minD,numpy.min(Duration))
281 107 equemene
        maxD=numpy.append(maxD,numpy.max(Duration))
282 107 equemene
        avgR=numpy.append(avgR,numpy.average(Rate))
283 107 equemene
        medR=numpy.append(medR,numpy.median(Rate))
284 107 equemene
        stdR=numpy.append(stdR,numpy.std(Rate))
285 107 equemene
        minR=numpy.append(minR,numpy.min(Rate))
286 107 equemene
        maxR=numpy.append(maxR,numpy.max(Rate))
287 107 equemene
288 127 equemene
        print("%.2f %.2f %.2f %.2f %.2f %i %i %i %i %i" % (avgD[-1],medD[-1],stdD[-1],minD[-1],maxD[-1],avgR[-1],medR[-1],stdR[-1],minR[-1],maxR[-1]))
289 107 equemene
290 131 equemene
        numpy.savez("PiThreads_%s_%s_%s_%s_%s_%s_%s_%s_%.8i_Device%i_%s_%s" % (ValueType,RNG,Alu[Devices[0]],GpuStyle,BlocksBegin,BlocksEnd,ThreadsBegin,ThreadsEnd,Iterations,Devices[0],Metrology,gethostname()),(ExploredBlocks,ExploredThreads,avgD,medD,stdD,minD,maxD,avgR,medR,stdR,minR,maxR))
291 107 equemene
        ToSave=[ ExploredBlocks,ExploredThreads,avgD,medD,stdD,minD,maxD,avgR,medR,stdR,minR,maxR ]
292 131 equemene
        numpy.savetxt("PiThreads_%s_%s_%s_%s_%s_%s_%s_%i_%.8i_Device%i_%s_%s" % (ValueType,RNG,Alu[Devices[0]],GpuStyle,BlocksBegin,BlocksEnd,ThreadsBegin,ThreadsEnd,Iterations,Devices[0],Metrology,gethostname()),numpy.transpose(ToSave),fmt='%i %i %e %e %e %e %e %i %i %i %i %i')
293 107 equemene
294 107 equemene
    if Fit:
295 107 equemene
        FitAndPrint(ExploredJobs,median,Curves)