Statistiques
| Révision :

root / Pi / XPU / PiXpuThreads.py @ 194

Historique | Voir | Annoter | Télécharger (11,44 ko)

1 127 equemene
#!/usr/bin/env python3
2 107 equemene
3 107 equemene
#
4 107 equemene
# Pi-by-MonteCarlo using PyCUDA/PyOpenCL
5 107 equemene
#
6 107 equemene
# CC BY-NC-SA 2011 : Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
7 107 equemene
# Cecill v2 : Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
8 107 equemene
#
9 107 equemene
# Thanks to Andreas Klockner for PyCUDA:
10 107 equemene
# http://mathema.tician.de/software/pycuda
11 107 equemene
# Thanks to Andreas Klockner for PyOpenCL:
12 107 equemene
# http://mathema.tician.de/software/pyopencl
13 107 equemene
#
14 107 equemene
15 107 equemene
# 2013-01-01 : problems with launch timeout
16 107 equemene
# http://stackoverflow.com/questions/497685/how-do-you-get-around-the-maximum-cuda-run-time
17 107 equemene
# Option "Interactive" "0" in /etc/X11/xorg.conf
18 107 equemene
19 107 equemene
# Common tools
20 107 equemene
import numpy
21 107 equemene
from numpy.random import randint as nprnd
22 107 equemene
import sys
23 107 equemene
import getopt
24 107 equemene
import time
25 107 equemene
import math
26 107 equemene
import itertools
27 107 equemene
from socket import gethostname
28 107 equemene
29 107 equemene
from threading import Thread
30 107 equemene
31 107 equemene
from PiXPU import *
32 107 equemene
33 107 equemene
class threadWithReturn(Thread):
34 107 equemene
    def __init__(self, *args, **kwargs):
35 107 equemene
        super(threadWithReturn, self).__init__(*args, **kwargs)
36 107 equemene
        self._return = None
37 107 equemene
38 107 equemene
    def run(self):
39 127 equemene
        if self._target is not None:
40 127 equemene
            self._return = self._target(*self._args, **self._kwargs)
41 107 equemene
42 107 equemene
    def join(self, *args, **kwargs):
43 107 equemene
        super(threadWithReturn, self).join(*args, **kwargs)
44 107 equemene
        return self._return
45 107 equemene
46 107 equemene
if __name__=='__main__':
47 107 equemene
48 107 equemene
    # Set defaults values
49 107 equemene
50 107 equemene
    # Id of Device : 1 is for first find !
51 107 equemene
    Device=1
52 107 equemene
    # GPU style can be Cuda (Nvidia implementation) or OpenCL
53 107 equemene
    GpuStyle='OpenCL'
54 107 equemene
    # Iterations is integer
55 107 equemene
    Iterations=10000000
56 107 equemene
    # BlocksBlocks in first number of Blocks to explore
57 107 equemene
    BlocksBegin=1
58 107 equemene
    # BlocksEnd is last number of Blocks to explore
59 107 equemene
    BlocksEnd=16
60 107 equemene
    # BlocksStep is the step of Blocks to explore
61 107 equemene
    BlocksStep=1
62 107 equemene
    # ThreadsBlocks in first number of Blocks to explore
63 107 equemene
    ThreadsBegin=1
64 107 equemene
    # ThreadsEnd is last number of Blocks to explore
65 107 equemene
    ThreadsEnd=1
66 107 equemene
    # ThreadsStep is the step of Blocks to explore
67 107 equemene
    ThreadsStep=1
68 107 equemene
    # Redo is the times to redo the test to improve metrology
69 107 equemene
    Redo=1
70 107 equemene
    # OutMetrology is method for duration estimation : False is GPU inside
71 107 equemene
    OutMetrology=False
72 107 equemene
    Metrology='InMetro'
73 107 equemene
    # Curves is True to print the curves
74 107 equemene
    Curves=False
75 107 equemene
    # Fit is True to print the curves
76 107 equemene
    Fit=False
77 107 equemene
    # Marsaglia RNG
78 107 equemene
    RNG='MWC'
79 107 equemene
    # Value type : INT32, INT64, FP32, FP64
80 107 equemene
    ValueType='FP32'
81 190 equemene
    # Inside based on If
82 190 equemene
    IfThen=False
83 107 equemene
84 190 equemene
    HowToUse='%s -c (Print Curves) -k (Case On IfThen) -d <DeviceId> -g <CUDA/OpenCL> -i <Iterations> -b <BlocksBegin> -e <BlocksEnd> -s <BlocksStep> -f <ThreadsFirst> -l <ThreadsLast> -t <ThreadssTep> -r <RedoToImproveStats> -m <SHR3/CONG/MWC/KISS> -v <INT32/INT64/FP32/FP64>'
85 107 equemene
86 107 equemene
    try:
87 190 equemene
        opts, args = getopt.getopt(sys.argv[1:],"hckg:i:b:e:s:f:l:t:r:d:m:v:",["gpustyle=","iterations=","blocksBegin=","blocksEnd=","blocksStep=","threadsFirst=","threadsLast=","threadssTep=","redo=","device=","marsaglia=","valuetype="])
88 107 equemene
    except getopt.GetoptError:
89 127 equemene
        print(HowToUse % sys.argv[0])
90 107 equemene
        sys.exit(2)
91 107 equemene
92 107 equemene
    # List of Devices
93 107 equemene
    Devices=[]
94 107 equemene
    Alu={}
95 107 equemene
96 107 equemene
    for opt, arg in opts:
97 107 equemene
        if opt == '-h':
98 127 equemene
            print(HowToUse % sys.argv[0])
99 107 equemene
100 127 equemene
            print("\nInformations about devices detected under OpenCL:")
101 107 equemene
            # For PyOpenCL import
102 107 equemene
            try:
103 107 equemene
                import pyopencl as cl
104 123 equemene
                Id=0
105 107 equemene
                for platform in cl.get_platforms():
106 107 equemene
                    for device in platform.get_devices():
107 138 equemene
                        #deviceType=cl.device_type.to_string(device.type)
108 157 equemene
                        deviceType="xPU"
109 127 equemene
                        print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip(),deviceType,device.name.lstrip()))
110 107 equemene
                        Id=Id+1
111 107 equemene
112 107 equemene
                print
113 107 equemene
            except ImportError:
114 127 equemene
                print("Your platform does not seem to support OpenCL")
115 129 equemene
116 129 equemene
            print("\nInformations about devices detected under CUDA API:")
117 129 equemene
            # For PyCUDA import
118 129 equemene
            try:
119 129 equemene
                import pycuda.driver as cuda
120 129 equemene
                cuda.init()
121 129 equemene
                for Id in range(cuda.Device.count()):
122 129 equemene
                    device=cuda.Device(Id)
123 129 equemene
                    print("Device #%i of type GPU : %s" % (Id,device.name()))
124 129 equemene
                print
125 129 equemene
            except:
126 129 equemene
                print("Your platform does not seem to support CUDA")
127 107 equemene
128 129 equemene
            sys.exit()
129 129 equemene
130 107 equemene
        elif opt == '-c':
131 107 equemene
            Curves=True
132 190 equemene
        elif opt == '-k':
133 190 equemene
            IfThen=True
134 107 equemene
        elif opt in ("-d", "--device"):
135 107 equemene
            Devices.append(int(arg))
136 107 equemene
        elif opt in ("-g", "--gpustyle"):
137 107 equemene
            GpuStyle = arg
138 107 equemene
        elif opt in ("-m", "--marsaglia"):
139 107 equemene
            RNG = arg
140 107 equemene
        elif opt in ("-v", "--valuetype"):
141 107 equemene
            ValueType = arg
142 107 equemene
        elif opt in ("-i", "--iterations"):
143 107 equemene
            Iterations = numpy.uint64(arg)
144 107 equemene
        elif opt in ("-b", "--blocksbegin"):
145 107 equemene
            BlocksBegin = int(arg)
146 192 equemene
            BlocksEnd = BlocksBegin
147 107 equemene
        elif opt in ("-e", "--blocksend"):
148 107 equemene
            BlocksEnd = int(arg)
149 107 equemene
        elif opt in ("-s", "--blocksstep"):
150 107 equemene
            BlocksStep = int(arg)
151 107 equemene
        elif opt in ("-f", "--threadsfirst"):
152 107 equemene
            ThreadsBegin = int(arg)
153 192 equemene
            ThreadsEnd = ThreadsBegin
154 107 equemene
        elif opt in ("-l", "--threadslast"):
155 107 equemene
            ThreadsEnd = int(arg)
156 107 equemene
        elif opt in ("-t", "--threadsstep"):
157 107 equemene
            ThreadsStep = int(arg)
158 107 equemene
        elif opt in ("-r", "--redo"):
159 107 equemene
            Redo = int(arg)
160 107 equemene
161 127 equemene
    print("Devices Identification : %s" % Devices)
162 127 equemene
    print("GpuStyle used : %s" % GpuStyle)
163 127 equemene
    print("Iterations : %s" % Iterations)
164 127 equemene
    print("Number of Blocks on begin : %s" % BlocksBegin)
165 127 equemene
    print("Number of Blocks on end : %s" % BlocksEnd)
166 127 equemene
    print("Step on Blocks : %s" % BlocksStep)
167 127 equemene
    print("Number of Threads on begin : %s" % ThreadsBegin)
168 127 equemene
    print("Number of Threads on end : %s" % ThreadsEnd)
169 127 equemene
    print("Step on Threads : %s" % ThreadsStep)
170 127 equemene
    print("Number of redo : %s" % Redo)
171 127 equemene
    print("Metrology done out of XPU : %r" % OutMetrology)
172 127 equemene
    print("Type of Marsaglia RNG used : %s" % RNG)
173 127 equemene
    print("Type of variable : %s" % ValueType)
174 107 equemene
175 107 equemene
    if GpuStyle=='CUDA':
176 107 equemene
        try:
177 107 equemene
            # For PyCUDA import
178 107 equemene
            import pycuda.driver as cuda
179 129 equemene
180 129 equemene
            cuda.init()
181 129 equemene
            for Id in range(cuda.Device.count()):
182 129 equemene
                device=cuda.Device(Id)
183 129 equemene
                print("Device #%i of type GPU : %s" % (Id,device.name()))
184 129 equemene
                if Id in Devices:
185 129 equemene
                    Alu[Id]='GPU'
186 107 equemene
        except ImportError:
187 127 equemene
            print("Platform does not seem to support CUDA")
188 129 equemene
189 107 equemene
    if GpuStyle=='OpenCL':
190 107 equemene
        try:
191 107 equemene
            # For PyOpenCL import
192 107 equemene
            import pyopencl as cl
193 123 equemene
            Id=0
194 107 equemene
            for platform in cl.get_platforms():
195 107 equemene
                for device in platform.get_devices():
196 138 equemene
                    #deviceType=cl.device_type.to_string(device.type)
197 138 equemene
                    deviceType="*PU"
198 127 equemene
                    print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip().rstrip(),deviceType,device.name.lstrip().rstrip()))
199 107 equemene
200 107 equemene
                    if Id in Devices:
201 107 equemene
                    # Set the Alu as detected Device Type
202 107 equemene
                        Alu[Id]=deviceType
203 107 equemene
                    Id=Id+1
204 107 equemene
        except ImportError:
205 127 equemene
            print("Platform does not seem to support OpenCL")
206 107 equemene
207 127 equemene
    print(Devices,Alu)
208 107 equemene
209 127 equemene
    BlocksList=range(BlocksBegin,BlocksEnd+BlocksStep,BlocksStep)
210 127 equemene
    ThreadsList=range(ThreadsBegin,ThreadsEnd+ThreadsStep,ThreadsStep)
211 107 equemene
212 107 equemene
    ExploredJobs=numpy.array([]).astype(numpy.uint32)
213 107 equemene
    ExploredBlocks=numpy.array([]).astype(numpy.uint32)
214 107 equemene
    ExploredThreads=numpy.array([]).astype(numpy.uint32)
215 107 equemene
    avgD=numpy.array([]).astype(numpy.float32)
216 107 equemene
    medD=numpy.array([]).astype(numpy.float32)
217 107 equemene
    stdD=numpy.array([]).astype(numpy.float32)
218 107 equemene
    minD=numpy.array([]).astype(numpy.float32)
219 107 equemene
    maxD=numpy.array([]).astype(numpy.float32)
220 107 equemene
    avgR=numpy.array([]).astype(numpy.float32)
221 107 equemene
    medR=numpy.array([]).astype(numpy.float32)
222 107 equemene
    stdR=numpy.array([]).astype(numpy.float32)
223 107 equemene
    minR=numpy.array([]).astype(numpy.float32)
224 107 equemene
    maxR=numpy.array([]).astype(numpy.float32)
225 107 equemene
226 107 equemene
    for Blocks,Threads in itertools.product(BlocksList,ThreadsList):
227 107 equemene
228 107 equemene
        ExploredJobs=numpy.append(ExploredJobs,Blocks*Threads)
229 107 equemene
        ExploredBlocks=numpy.append(ExploredBlocks,Blocks)
230 107 equemene
        ExploredThreads=numpy.append(ExploredThreads,Threads)
231 107 equemene
232 107 equemene
        IterationsMP=Iterations/len(Devices)
233 107 equemene
        if Iterations%len(Devices)!=0:
234 107 equemene
            IterationsMP+=1
235 107 equemene
236 129 equemene
        DurationItem=numpy.array([]).astype(numpy.float32)
237 129 equemene
        Duration=numpy.array([]).astype(numpy.float32)
238 129 equemene
        Rate=numpy.array([]).astype(numpy.float32)
239 129 equemene
        for i in range(Redo):
240 129 equemene
            MyThreads=[]
241 107 equemene
            time_start=time.time()
242 129 equemene
243 107 equemene
            for Device in Devices:
244 107 equemene
                InputCL={}
245 107 equemene
                InputCL['Iterations']=IterationsMP
246 129 equemene
                InputCL['Steps']=1
247 107 equemene
                InputCL['Blocks']=Blocks
248 107 equemene
                InputCL['Threads']=Threads
249 107 equemene
                InputCL['Device']=Device
250 107 equemene
                InputCL['RNG']=RNG
251 107 equemene
                InputCL['ValueType']=ValueType
252 190 equemene
                InputCL['IfThen']=IfThen
253 107 equemene
                if GpuStyle=='CUDA':
254 107 equemene
                    try:
255 129 equemene
                        MyThread=threadWithReturn(target=MetropolisCuda, args=(InputCL,))
256 107 equemene
                    except:
257 127 equemene
                        print("Problem with (%i,%i) // computations on Cuda" % (Blocks,Threads))
258 107 equemene
                elif GpuStyle=='OpenCL':
259 107 equemene
                    try:
260 107 equemene
                        MyThread=threadWithReturn(target=MetropolisOpenCL, args=(InputCL,))
261 107 equemene
                    except:
262 129 equemene
                        print("Problem with (%i,%i) // computations on OpenCL" % (Blocks,Threads)            )
263 107 equemene
264 127 equemene
                print("Start on #%i device..." % Device)
265 107 equemene
                MyThread.start()
266 107 equemene
                MyThreads.append(MyThread)
267 107 equemene
268 107 equemene
            NewIterations=0
269 107 equemene
            Inside=0
270 107 equemene
            for MyThread in MyThreads:
271 107 equemene
                OutputCL=MyThread.join()
272 107 equemene
                NewIterations+=OutputCL['NewIterations']
273 107 equemene
                Inside+=OutputCL['Inside']
274 130 equemene
            print("Pi estimation %.8f" % (4./NewIterations*Inside))
275 129 equemene
276 129 equemene
            Duration=numpy.append(Duration,time.time()-time_start)
277 129 equemene
            Rate=numpy.append(Rate,NewIterations/Duration[-1])
278 129 equemene
279 107 equemene
        avgD=numpy.append(avgD,numpy.average(Duration))
280 107 equemene
        medD=numpy.append(medD,numpy.median(Duration))
281 107 equemene
        stdD=numpy.append(stdD,numpy.std(Duration))
282 107 equemene
        minD=numpy.append(minD,numpy.min(Duration))
283 107 equemene
        maxD=numpy.append(maxD,numpy.max(Duration))
284 107 equemene
        avgR=numpy.append(avgR,numpy.average(Rate))
285 107 equemene
        medR=numpy.append(medR,numpy.median(Rate))
286 107 equemene
        stdR=numpy.append(stdR,numpy.std(Rate))
287 107 equemene
        minR=numpy.append(minR,numpy.min(Rate))
288 107 equemene
        maxR=numpy.append(maxR,numpy.max(Rate))
289 107 equemene
290 127 equemene
        print("%.2f %.2f %.2f %.2f %.2f %i %i %i %i %i" % (avgD[-1],medD[-1],stdD[-1],minD[-1],maxD[-1],avgR[-1],medR[-1],stdR[-1],minR[-1],maxR[-1]))
291 107 equemene
292 131 equemene
        numpy.savez("PiThreads_%s_%s_%s_%s_%s_%s_%s_%s_%.8i_Device%i_%s_%s" % (ValueType,RNG,Alu[Devices[0]],GpuStyle,BlocksBegin,BlocksEnd,ThreadsBegin,ThreadsEnd,Iterations,Devices[0],Metrology,gethostname()),(ExploredBlocks,ExploredThreads,avgD,medD,stdD,minD,maxD,avgR,medR,stdR,minR,maxR))
293 107 equemene
        ToSave=[ ExploredBlocks,ExploredThreads,avgD,medD,stdD,minD,maxD,avgR,medR,stdR,minR,maxR ]
294 131 equemene
        numpy.savetxt("PiThreads_%s_%s_%s_%s_%s_%s_%s_%i_%.8i_Device%i_%s_%s" % (ValueType,RNG,Alu[Devices[0]],GpuStyle,BlocksBegin,BlocksEnd,ThreadsBegin,ThreadsEnd,Iterations,Devices[0],Metrology,gethostname()),numpy.transpose(ToSave),fmt='%i %i %e %e %e %e %e %i %i %i %i %i')
295 107 equemene
296 107 equemene
    if Fit:
297 107 equemene
        FitAndPrint(ExploredJobs,median,Curves)