Statistiques
| Révision :

root / Pi / XPU / PiXpuThreads.py @ 247

Historique | Voir | Annoter | Télécharger (11,67 ko)

1 127 equemene
#!/usr/bin/env python3
2 107 equemene
3 107 equemene
#
4 107 equemene
# Pi-by-MonteCarlo using PyCUDA/PyOpenCL
5 107 equemene
#
6 107 equemene
# CC BY-NC-SA 2011 : Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
7 107 equemene
# Cecill v2 : Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
8 107 equemene
#
9 107 equemene
# Thanks to Andreas Klockner for PyCUDA:
10 107 equemene
# http://mathema.tician.de/software/pycuda
11 107 equemene
# Thanks to Andreas Klockner for PyOpenCL:
12 107 equemene
# http://mathema.tician.de/software/pyopencl
13 107 equemene
#
14 107 equemene
15 107 equemene
# 2013-01-01 : problems with launch timeout
16 107 equemene
# http://stackoverflow.com/questions/497685/how-do-you-get-around-the-maximum-cuda-run-time
17 107 equemene
# Option "Interactive" "0" in /etc/X11/xorg.conf
18 107 equemene
19 107 equemene
# Common tools
20 107 equemene
import numpy
21 107 equemene
from numpy.random import randint as nprnd
22 107 equemene
import sys
23 107 equemene
import getopt
24 107 equemene
import time
25 107 equemene
import math
26 107 equemene
import itertools
27 107 equemene
from socket import gethostname
28 107 equemene
29 107 equemene
from threading import Thread
30 107 equemene
31 107 equemene
from PiXPU import *
32 107 equemene
33 107 equemene
class threadWithReturn(Thread):
34 107 equemene
    def __init__(self, *args, **kwargs):
35 107 equemene
        super(threadWithReturn, self).__init__(*args, **kwargs)
36 107 equemene
        self._return = None
37 107 equemene
38 107 equemene
    def run(self):
39 127 equemene
        if self._target is not None:
40 127 equemene
            self._return = self._target(*self._args, **self._kwargs)
41 107 equemene
42 107 equemene
    def join(self, *args, **kwargs):
43 107 equemene
        super(threadWithReturn, self).join(*args, **kwargs)
44 107 equemene
        return self._return
45 107 equemene
46 107 equemene
if __name__=='__main__':
47 107 equemene
48 107 equemene
    # Set defaults values
49 107 equemene
50 107 equemene
    # Id of Device : 1 is for first find !
51 107 equemene
    Device=1
52 107 equemene
    # GPU style can be Cuda (Nvidia implementation) or OpenCL
53 107 equemene
    GpuStyle='OpenCL'
54 107 equemene
    # Iterations is integer
55 107 equemene
    Iterations=10000000
56 107 equemene
    # BlocksBlocks in first number of Blocks to explore
57 107 equemene
    BlocksBegin=1
58 107 equemene
    # BlocksEnd is last number of Blocks to explore
59 107 equemene
    BlocksEnd=16
60 107 equemene
    # BlocksStep is the step of Blocks to explore
61 107 equemene
    BlocksStep=1
62 107 equemene
    # ThreadsBlocks in first number of Blocks to explore
63 107 equemene
    ThreadsBegin=1
64 107 equemene
    # ThreadsEnd is last number of Blocks to explore
65 107 equemene
    ThreadsEnd=1
66 107 equemene
    # ThreadsStep is the step of Blocks to explore
67 107 equemene
    ThreadsStep=1
68 107 equemene
    # Redo is the times to redo the test to improve metrology
69 107 equemene
    Redo=1
70 107 equemene
    # OutMetrology is method for duration estimation : False is GPU inside
71 107 equemene
    OutMetrology=False
72 107 equemene
    Metrology='InMetro'
73 107 equemene
    # Curves is True to print the curves
74 107 equemene
    Curves=False
75 107 equemene
    # Fit is True to print the curves
76 107 equemene
    Fit=False
77 107 equemene
    # Marsaglia RNG
78 107 equemene
    RNG='MWC'
79 239 equemene
    # Seeds
80 239 equemene
    Seeds=110271,101008
81 107 equemene
    # Value type : INT32, INT64, FP32, FP64
82 107 equemene
    ValueType='FP32'
83 190 equemene
    # Inside based on If
84 190 equemene
    IfThen=False
85 107 equemene
86 190 equemene
    HowToUse='%s -c (Print Curves) -k (Case On IfThen) -d <DeviceId> -g <CUDA/OpenCL> -i <Iterations> -b <BlocksBegin> -e <BlocksEnd> -s <BlocksStep> -f <ThreadsFirst> -l <ThreadsLast> -t <ThreadssTep> -r <RedoToImproveStats> -m <SHR3/CONG/MWC/KISS> -v <INT32/INT64/FP32/FP64>'
87 107 equemene
88 107 equemene
    try:
89 190 equemene
        opts, args = getopt.getopt(sys.argv[1:],"hckg:i:b:e:s:f:l:t:r:d:m:v:",["gpustyle=","iterations=","blocksBegin=","blocksEnd=","blocksStep=","threadsFirst=","threadsLast=","threadssTep=","redo=","device=","marsaglia=","valuetype="])
90 107 equemene
    except getopt.GetoptError:
91 127 equemene
        print(HowToUse % sys.argv[0])
92 107 equemene
        sys.exit(2)
93 107 equemene
94 107 equemene
    # List of Devices
95 107 equemene
    Devices=[]
96 107 equemene
    Alu={}
97 107 equemene
98 107 equemene
    for opt, arg in opts:
99 107 equemene
        if opt == '-h':
100 127 equemene
            print(HowToUse % sys.argv[0])
101 107 equemene
102 127 equemene
            print("\nInformations about devices detected under OpenCL:")
103 107 equemene
            # For PyOpenCL import
104 107 equemene
            try:
105 107 equemene
                import pyopencl as cl
106 123 equemene
                Id=0
107 107 equemene
                for platform in cl.get_platforms():
108 107 equemene
                    for device in platform.get_devices():
109 138 equemene
                        #deviceType=cl.device_type.to_string(device.type)
110 157 equemene
                        deviceType="xPU"
111 127 equemene
                        print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip(),deviceType,device.name.lstrip()))
112 107 equemene
                        Id=Id+1
113 107 equemene
114 107 equemene
                print
115 107 equemene
            except ImportError:
116 127 equemene
                print("Your platform does not seem to support OpenCL")
117 129 equemene
118 129 equemene
            print("\nInformations about devices detected under CUDA API:")
119 129 equemene
            # For PyCUDA import
120 129 equemene
            try:
121 129 equemene
                import pycuda.driver as cuda
122 129 equemene
                cuda.init()
123 129 equemene
                for Id in range(cuda.Device.count()):
124 129 equemene
                    device=cuda.Device(Id)
125 129 equemene
                    print("Device #%i of type GPU : %s" % (Id,device.name()))
126 129 equemene
                print
127 129 equemene
            except:
128 129 equemene
                print("Your platform does not seem to support CUDA")
129 107 equemene
130 129 equemene
            sys.exit()
131 129 equemene
132 107 equemene
        elif opt == '-c':
133 107 equemene
            Curves=True
134 190 equemene
        elif opt == '-k':
135 190 equemene
            IfThen=True
136 107 equemene
        elif opt in ("-d", "--device"):
137 107 equemene
            Devices.append(int(arg))
138 107 equemene
        elif opt in ("-g", "--gpustyle"):
139 107 equemene
            GpuStyle = arg
140 107 equemene
        elif opt in ("-m", "--marsaglia"):
141 107 equemene
            RNG = arg
142 107 equemene
        elif opt in ("-v", "--valuetype"):
143 107 equemene
            ValueType = arg
144 107 equemene
        elif opt in ("-i", "--iterations"):
145 107 equemene
            Iterations = numpy.uint64(arg)
146 107 equemene
        elif opt in ("-b", "--blocksbegin"):
147 107 equemene
            BlocksBegin = int(arg)
148 192 equemene
            BlocksEnd = BlocksBegin
149 107 equemene
        elif opt in ("-e", "--blocksend"):
150 107 equemene
            BlocksEnd = int(arg)
151 107 equemene
        elif opt in ("-s", "--blocksstep"):
152 107 equemene
            BlocksStep = int(arg)
153 107 equemene
        elif opt in ("-f", "--threadsfirst"):
154 107 equemene
            ThreadsBegin = int(arg)
155 192 equemene
            ThreadsEnd = ThreadsBegin
156 107 equemene
        elif opt in ("-l", "--threadslast"):
157 107 equemene
            ThreadsEnd = int(arg)
158 107 equemene
        elif opt in ("-t", "--threadsstep"):
159 107 equemene
            ThreadsStep = int(arg)
160 107 equemene
        elif opt in ("-r", "--redo"):
161 107 equemene
            Redo = int(arg)
162 107 equemene
163 127 equemene
    print("Devices Identification : %s" % Devices)
164 127 equemene
    print("GpuStyle used : %s" % GpuStyle)
165 127 equemene
    print("Iterations : %s" % Iterations)
166 127 equemene
    print("Number of Blocks on begin : %s" % BlocksBegin)
167 127 equemene
    print("Number of Blocks on end : %s" % BlocksEnd)
168 127 equemene
    print("Step on Blocks : %s" % BlocksStep)
169 127 equemene
    print("Number of Threads on begin : %s" % ThreadsBegin)
170 127 equemene
    print("Number of Threads on end : %s" % ThreadsEnd)
171 127 equemene
    print("Step on Threads : %s" % ThreadsStep)
172 127 equemene
    print("Number of redo : %s" % Redo)
173 127 equemene
    print("Metrology done out of XPU : %r" % OutMetrology)
174 127 equemene
    print("Type of Marsaglia RNG used : %s" % RNG)
175 127 equemene
    print("Type of variable : %s" % ValueType)
176 107 equemene
177 107 equemene
    if GpuStyle=='CUDA':
178 107 equemene
        try:
179 107 equemene
            # For PyCUDA import
180 107 equemene
            import pycuda.driver as cuda
181 129 equemene
182 129 equemene
            cuda.init()
183 129 equemene
            for Id in range(cuda.Device.count()):
184 129 equemene
                device=cuda.Device(Id)
185 129 equemene
                print("Device #%i of type GPU : %s" % (Id,device.name()))
186 129 equemene
                if Id in Devices:
187 129 equemene
                    Alu[Id]='GPU'
188 107 equemene
        except ImportError:
189 127 equemene
            print("Platform does not seem to support CUDA")
190 129 equemene
191 107 equemene
    if GpuStyle=='OpenCL':
192 107 equemene
        try:
193 107 equemene
            # For PyOpenCL import
194 107 equemene
            import pyopencl as cl
195 123 equemene
            Id=0
196 107 equemene
            for platform in cl.get_platforms():
197 107 equemene
                for device in platform.get_devices():
198 138 equemene
                    #deviceType=cl.device_type.to_string(device.type)
199 239 equemene
                    deviceType="xPU"
200 127 equemene
                    print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip().rstrip(),deviceType,device.name.lstrip().rstrip()))
201 107 equemene
202 107 equemene
                    if Id in Devices:
203 107 equemene
                    # Set the Alu as detected Device Type
204 107 equemene
                        Alu[Id]=deviceType
205 107 equemene
                    Id=Id+1
206 107 equemene
        except ImportError:
207 127 equemene
            print("Platform does not seem to support OpenCL")
208 107 equemene
209 127 equemene
    print(Devices,Alu)
210 107 equemene
211 127 equemene
    BlocksList=range(BlocksBegin,BlocksEnd+BlocksStep,BlocksStep)
212 127 equemene
    ThreadsList=range(ThreadsBegin,ThreadsEnd+ThreadsStep,ThreadsStep)
213 107 equemene
214 107 equemene
    ExploredJobs=numpy.array([]).astype(numpy.uint32)
215 107 equemene
    ExploredBlocks=numpy.array([]).astype(numpy.uint32)
216 107 equemene
    ExploredThreads=numpy.array([]).astype(numpy.uint32)
217 107 equemene
    avgD=numpy.array([]).astype(numpy.float32)
218 107 equemene
    medD=numpy.array([]).astype(numpy.float32)
219 107 equemene
    stdD=numpy.array([]).astype(numpy.float32)
220 107 equemene
    minD=numpy.array([]).astype(numpy.float32)
221 107 equemene
    maxD=numpy.array([]).astype(numpy.float32)
222 107 equemene
    avgR=numpy.array([]).astype(numpy.float32)
223 107 equemene
    medR=numpy.array([]).astype(numpy.float32)
224 107 equemene
    stdR=numpy.array([]).astype(numpy.float32)
225 107 equemene
    minR=numpy.array([]).astype(numpy.float32)
226 107 equemene
    maxR=numpy.array([]).astype(numpy.float32)
227 107 equemene
228 107 equemene
    for Blocks,Threads in itertools.product(BlocksList,ThreadsList):
229 107 equemene
230 107 equemene
        ExploredJobs=numpy.append(ExploredJobs,Blocks*Threads)
231 107 equemene
        ExploredBlocks=numpy.append(ExploredBlocks,Blocks)
232 107 equemene
        ExploredThreads=numpy.append(ExploredThreads,Threads)
233 107 equemene
234 107 equemene
        IterationsMP=Iterations/len(Devices)
235 107 equemene
        if Iterations%len(Devices)!=0:
236 107 equemene
            IterationsMP+=1
237 107 equemene
238 129 equemene
        DurationItem=numpy.array([]).astype(numpy.float32)
239 129 equemene
        Duration=numpy.array([]).astype(numpy.float32)
240 129 equemene
        Rate=numpy.array([]).astype(numpy.float32)
241 129 equemene
        for i in range(Redo):
242 129 equemene
            MyThreads=[]
243 107 equemene
            time_start=time.time()
244 129 equemene
245 107 equemene
            for Device in Devices:
246 239 equemene
                DeltaD=Device-min(Devices)+1
247 239 equemene
                DeltaS=(DeltaD-1)*524287
248 107 equemene
                InputCL={}
249 107 equemene
                InputCL['Iterations']=IterationsMP
250 129 equemene
                InputCL['Steps']=1
251 107 equemene
                InputCL['Blocks']=Blocks
252 107 equemene
                InputCL['Threads']=Threads
253 107 equemene
                InputCL['Device']=Device
254 107 equemene
                InputCL['RNG']=RNG
255 239 equemene
                InputCL['Seeds']=numpy.uint32(Seeds[0]*DeltaD+DeltaS),numpy.uint32(Seeds[1]*DeltaD+DeltaS)
256 107 equemene
                InputCL['ValueType']=ValueType
257 190 equemene
                InputCL['IfThen']=IfThen
258 107 equemene
                if GpuStyle=='CUDA':
259 107 equemene
                    try:
260 129 equemene
                        MyThread=threadWithReturn(target=MetropolisCuda, args=(InputCL,))
261 107 equemene
                    except:
262 127 equemene
                        print("Problem with (%i,%i) // computations on Cuda" % (Blocks,Threads))
263 107 equemene
                elif GpuStyle=='OpenCL':
264 107 equemene
                    try:
265 107 equemene
                        MyThread=threadWithReturn(target=MetropolisOpenCL, args=(InputCL,))
266 107 equemene
                    except:
267 129 equemene
                        print("Problem with (%i,%i) // computations on OpenCL" % (Blocks,Threads)            )
268 107 equemene
269 127 equemene
                print("Start on #%i device..." % Device)
270 107 equemene
                MyThread.start()
271 107 equemene
                MyThreads.append(MyThread)
272 107 equemene
273 107 equemene
            NewIterations=0
274 107 equemene
            Inside=0
275 107 equemene
            for MyThread in MyThreads:
276 107 equemene
                OutputCL=MyThread.join()
277 107 equemene
                NewIterations+=OutputCL['NewIterations']
278 107 equemene
                Inside+=OutputCL['Inside']
279 130 equemene
            print("Pi estimation %.8f" % (4./NewIterations*Inside))
280 129 equemene
281 129 equemene
            Duration=numpy.append(Duration,time.time()-time_start)
282 129 equemene
            Rate=numpy.append(Rate,NewIterations/Duration[-1])
283 129 equemene
284 107 equemene
        avgD=numpy.append(avgD,numpy.average(Duration))
285 107 equemene
        medD=numpy.append(medD,numpy.median(Duration))
286 107 equemene
        stdD=numpy.append(stdD,numpy.std(Duration))
287 107 equemene
        minD=numpy.append(minD,numpy.min(Duration))
288 107 equemene
        maxD=numpy.append(maxD,numpy.max(Duration))
289 107 equemene
        avgR=numpy.append(avgR,numpy.average(Rate))
290 107 equemene
        medR=numpy.append(medR,numpy.median(Rate))
291 107 equemene
        stdR=numpy.append(stdR,numpy.std(Rate))
292 107 equemene
        minR=numpy.append(minR,numpy.min(Rate))
293 107 equemene
        maxR=numpy.append(maxR,numpy.max(Rate))
294 107 equemene
295 127 equemene
        print("%.2f %.2f %.2f %.2f %.2f %i %i %i %i %i" % (avgD[-1],medD[-1],stdD[-1],minD[-1],maxD[-1],avgR[-1],medR[-1],stdR[-1],minR[-1],maxR[-1]))
296 107 equemene
297 131 equemene
        numpy.savez("PiThreads_%s_%s_%s_%s_%s_%s_%s_%s_%.8i_Device%i_%s_%s" % (ValueType,RNG,Alu[Devices[0]],GpuStyle,BlocksBegin,BlocksEnd,ThreadsBegin,ThreadsEnd,Iterations,Devices[0],Metrology,gethostname()),(ExploredBlocks,ExploredThreads,avgD,medD,stdD,minD,maxD,avgR,medR,stdR,minR,maxR))
298 107 equemene
        ToSave=[ ExploredBlocks,ExploredThreads,avgD,medD,stdD,minD,maxD,avgR,medR,stdR,minR,maxR ]
299 131 equemene
        numpy.savetxt("PiThreads_%s_%s_%s_%s_%s_%s_%s_%i_%.8i_Device%i_%s_%s" % (ValueType,RNG,Alu[Devices[0]],GpuStyle,BlocksBegin,BlocksEnd,ThreadsBegin,ThreadsEnd,Iterations,Devices[0],Metrology,gethostname()),numpy.transpose(ToSave),fmt='%i %i %e %e %e %e %e %i %i %i %i %i')
300 107 equemene
301 107 equemene
    if Fit:
302 107 equemene
        FitAndPrint(ExploredJobs,median,Curves)