Statistiques
| Révision :

root / Pi / XPU / PiXpuThreads.py @ 187

Historique | Voir | Annoter | Télécharger (11,22 ko)

1 127 equemene
#!/usr/bin/env python3
2 107 equemene
3 107 equemene
#
4 107 equemene
# Pi-by-MonteCarlo using PyCUDA/PyOpenCL
5 107 equemene
#
6 107 equemene
# CC BY-NC-SA 2011 : Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
7 107 equemene
# Cecill v2 : Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
8 107 equemene
#
9 107 equemene
# Thanks to Andreas Klockner for PyCUDA:
10 107 equemene
# http://mathema.tician.de/software/pycuda
11 107 equemene
# Thanks to Andreas Klockner for PyOpenCL:
12 107 equemene
# http://mathema.tician.de/software/pyopencl
13 107 equemene
#
14 107 equemene
15 107 equemene
# 2013-01-01 : problems with launch timeout
16 107 equemene
# http://stackoverflow.com/questions/497685/how-do-you-get-around-the-maximum-cuda-run-time
17 107 equemene
# Option "Interactive" "0" in /etc/X11/xorg.conf
18 107 equemene
19 107 equemene
# Common tools
20 107 equemene
import numpy
21 107 equemene
from numpy.random import randint as nprnd
22 107 equemene
import sys
23 107 equemene
import getopt
24 107 equemene
import time
25 107 equemene
import math
26 107 equemene
import itertools
27 107 equemene
from socket import gethostname
28 107 equemene
29 107 equemene
from threading import Thread
30 107 equemene
31 107 equemene
from PiXPU import *
32 107 equemene
33 107 equemene
class threadWithReturn(Thread):
34 107 equemene
    def __init__(self, *args, **kwargs):
35 107 equemene
        super(threadWithReturn, self).__init__(*args, **kwargs)
36 107 equemene
        self._return = None
37 107 equemene
38 107 equemene
    def run(self):
39 127 equemene
        if self._target is not None:
40 127 equemene
            self._return = self._target(*self._args, **self._kwargs)
41 107 equemene
42 107 equemene
    def join(self, *args, **kwargs):
43 107 equemene
        super(threadWithReturn, self).join(*args, **kwargs)
44 107 equemene
        return self._return
45 107 equemene
46 107 equemene
if __name__=='__main__':
47 107 equemene
48 107 equemene
    # Set defaults values
49 107 equemene
50 107 equemene
    # Id of Device : 1 is for first find !
51 107 equemene
    Device=1
52 107 equemene
    # GPU style can be Cuda (Nvidia implementation) or OpenCL
53 107 equemene
    GpuStyle='OpenCL'
54 107 equemene
    # Iterations is integer
55 107 equemene
    Iterations=10000000
56 107 equemene
    # BlocksBlocks in first number of Blocks to explore
57 107 equemene
    BlocksBegin=1
58 107 equemene
    # BlocksEnd is last number of Blocks to explore
59 107 equemene
    BlocksEnd=16
60 107 equemene
    # BlocksStep is the step of Blocks to explore
61 107 equemene
    BlocksStep=1
62 107 equemene
    # ThreadsBlocks in first number of Blocks to explore
63 107 equemene
    ThreadsBegin=1
64 107 equemene
    # ThreadsEnd is last number of Blocks to explore
65 107 equemene
    ThreadsEnd=1
66 107 equemene
    # ThreadsStep is the step of Blocks to explore
67 107 equemene
    ThreadsStep=1
68 107 equemene
    # Redo is the times to redo the test to improve metrology
69 107 equemene
    Redo=1
70 107 equemene
    # OutMetrology is method for duration estimation : False is GPU inside
71 107 equemene
    OutMetrology=False
72 107 equemene
    Metrology='InMetro'
73 107 equemene
    # Curves is True to print the curves
74 107 equemene
    Curves=False
75 107 equemene
    # Fit is True to print the curves
76 107 equemene
    Fit=False
77 107 equemene
    # Marsaglia RNG
78 107 equemene
    RNG='MWC'
79 107 equemene
    # Value type : INT32, INT64, FP32, FP64
80 107 equemene
    ValueType='FP32'
81 107 equemene
82 129 equemene
    HowToUse='%s -c (Print Curves) -d <DeviceId> -g <CUDA/OpenCL> -i <Iterations> -b <BlocksBegin> -e <BlocksEnd> -s <BlocksStep> -f <ThreadsFirst> -l <ThreadsLast> -t <ThreadssTep> -r <RedoToImproveStats> -m <SHR3/CONG/MWC/KISS> -v <INT32/INT64/FP32/FP64>'
83 107 equemene
84 107 equemene
    try:
85 129 equemene
        opts, args = getopt.getopt(sys.argv[1:],"hcg:i:b:e:s:f:l:t:r:d:m:v:",["gpustyle=","iterations=","blocksBegin=","blocksEnd=","blocksStep=","threadsFirst=","threadsLast=","threadssTep=","redo=","device=","marsaglia=","valuetype="])
86 107 equemene
    except getopt.GetoptError:
87 127 equemene
        print(HowToUse % sys.argv[0])
88 107 equemene
        sys.exit(2)
89 107 equemene
90 107 equemene
    # List of Devices
91 107 equemene
    Devices=[]
92 107 equemene
    Alu={}
93 107 equemene
94 107 equemene
    for opt, arg in opts:
95 107 equemene
        if opt == '-h':
96 127 equemene
            print(HowToUse % sys.argv[0])
97 107 equemene
98 127 equemene
            print("\nInformations about devices detected under OpenCL:")
99 107 equemene
            # For PyOpenCL import
100 107 equemene
            try:
101 107 equemene
                import pyopencl as cl
102 123 equemene
                Id=0
103 107 equemene
                for platform in cl.get_platforms():
104 107 equemene
                    for device in platform.get_devices():
105 138 equemene
                        #deviceType=cl.device_type.to_string(device.type)
106 157 equemene
                        deviceType="xPU"
107 127 equemene
                        print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip(),deviceType,device.name.lstrip()))
108 107 equemene
                        Id=Id+1
109 107 equemene
110 107 equemene
                print
111 107 equemene
            except ImportError:
112 127 equemene
                print("Your platform does not seem to support OpenCL")
113 129 equemene
114 129 equemene
            print("\nInformations about devices detected under CUDA API:")
115 129 equemene
            # For PyCUDA import
116 129 equemene
            try:
117 129 equemene
                import pycuda.driver as cuda
118 129 equemene
                cuda.init()
119 129 equemene
                for Id in range(cuda.Device.count()):
120 129 equemene
                    device=cuda.Device(Id)
121 129 equemene
                    print("Device #%i of type GPU : %s" % (Id,device.name()))
122 129 equemene
                print
123 129 equemene
            except:
124 129 equemene
                print("Your platform does not seem to support CUDA")
125 107 equemene
126 129 equemene
            sys.exit()
127 129 equemene
128 107 equemene
        elif opt == '-c':
129 107 equemene
            Curves=True
130 107 equemene
        elif opt in ("-d", "--device"):
131 107 equemene
            Devices.append(int(arg))
132 107 equemene
        elif opt in ("-g", "--gpustyle"):
133 107 equemene
            GpuStyle = arg
134 107 equemene
        elif opt in ("-m", "--marsaglia"):
135 107 equemene
            RNG = arg
136 107 equemene
        elif opt in ("-v", "--valuetype"):
137 107 equemene
            ValueType = arg
138 107 equemene
        elif opt in ("-i", "--iterations"):
139 107 equemene
            Iterations = numpy.uint64(arg)
140 107 equemene
        elif opt in ("-b", "--blocksbegin"):
141 107 equemene
            BlocksBegin = int(arg)
142 107 equemene
        elif opt in ("-e", "--blocksend"):
143 107 equemene
            BlocksEnd = int(arg)
144 107 equemene
        elif opt in ("-s", "--blocksstep"):
145 107 equemene
            BlocksStep = int(arg)
146 107 equemene
        elif opt in ("-f", "--threadsfirst"):
147 107 equemene
            ThreadsBegin = int(arg)
148 107 equemene
        elif opt in ("-l", "--threadslast"):
149 107 equemene
            ThreadsEnd = int(arg)
150 107 equemene
        elif opt in ("-t", "--threadsstep"):
151 107 equemene
            ThreadsStep = int(arg)
152 107 equemene
        elif opt in ("-r", "--redo"):
153 107 equemene
            Redo = int(arg)
154 107 equemene
155 127 equemene
    print("Devices Identification : %s" % Devices)
156 127 equemene
    print("GpuStyle used : %s" % GpuStyle)
157 127 equemene
    print("Iterations : %s" % Iterations)
158 127 equemene
    print("Number of Blocks on begin : %s" % BlocksBegin)
159 127 equemene
    print("Number of Blocks on end : %s" % BlocksEnd)
160 127 equemene
    print("Step on Blocks : %s" % BlocksStep)
161 127 equemene
    print("Number of Threads on begin : %s" % ThreadsBegin)
162 127 equemene
    print("Number of Threads on end : %s" % ThreadsEnd)
163 127 equemene
    print("Step on Threads : %s" % ThreadsStep)
164 127 equemene
    print("Number of redo : %s" % Redo)
165 127 equemene
    print("Metrology done out of XPU : %r" % OutMetrology)
166 127 equemene
    print("Type of Marsaglia RNG used : %s" % RNG)
167 127 equemene
    print("Type of variable : %s" % ValueType)
168 107 equemene
169 107 equemene
    if GpuStyle=='CUDA':
170 107 equemene
        try:
171 107 equemene
            # For PyCUDA import
172 107 equemene
            import pycuda.driver as cuda
173 129 equemene
174 129 equemene
            cuda.init()
175 129 equemene
            for Id in range(cuda.Device.count()):
176 129 equemene
                device=cuda.Device(Id)
177 129 equemene
                print("Device #%i of type GPU : %s" % (Id,device.name()))
178 129 equemene
                if Id in Devices:
179 129 equemene
                    Alu[Id]='GPU'
180 107 equemene
        except ImportError:
181 127 equemene
            print("Platform does not seem to support CUDA")
182 129 equemene
183 107 equemene
    if GpuStyle=='OpenCL':
184 107 equemene
        try:
185 107 equemene
            # For PyOpenCL import
186 107 equemene
            import pyopencl as cl
187 123 equemene
            Id=0
188 107 equemene
            for platform in cl.get_platforms():
189 107 equemene
                for device in platform.get_devices():
190 138 equemene
                    #deviceType=cl.device_type.to_string(device.type)
191 138 equemene
                    deviceType="*PU"
192 127 equemene
                    print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip().rstrip(),deviceType,device.name.lstrip().rstrip()))
193 107 equemene
194 107 equemene
                    if Id in Devices:
195 107 equemene
                    # Set the Alu as detected Device Type
196 107 equemene
                        Alu[Id]=deviceType
197 107 equemene
                    Id=Id+1
198 107 equemene
        except ImportError:
199 127 equemene
            print("Platform does not seem to support OpenCL")
200 107 equemene
201 127 equemene
    print(Devices,Alu)
202 107 equemene
203 127 equemene
    BlocksList=range(BlocksBegin,BlocksEnd+BlocksStep,BlocksStep)
204 127 equemene
    ThreadsList=range(ThreadsBegin,ThreadsEnd+ThreadsStep,ThreadsStep)
205 107 equemene
206 107 equemene
    ExploredJobs=numpy.array([]).astype(numpy.uint32)
207 107 equemene
    ExploredBlocks=numpy.array([]).astype(numpy.uint32)
208 107 equemene
    ExploredThreads=numpy.array([]).astype(numpy.uint32)
209 107 equemene
    avgD=numpy.array([]).astype(numpy.float32)
210 107 equemene
    medD=numpy.array([]).astype(numpy.float32)
211 107 equemene
    stdD=numpy.array([]).astype(numpy.float32)
212 107 equemene
    minD=numpy.array([]).astype(numpy.float32)
213 107 equemene
    maxD=numpy.array([]).astype(numpy.float32)
214 107 equemene
    avgR=numpy.array([]).astype(numpy.float32)
215 107 equemene
    medR=numpy.array([]).astype(numpy.float32)
216 107 equemene
    stdR=numpy.array([]).astype(numpy.float32)
217 107 equemene
    minR=numpy.array([]).astype(numpy.float32)
218 107 equemene
    maxR=numpy.array([]).astype(numpy.float32)
219 107 equemene
220 107 equemene
    for Blocks,Threads in itertools.product(BlocksList,ThreadsList):
221 107 equemene
222 107 equemene
        ExploredJobs=numpy.append(ExploredJobs,Blocks*Threads)
223 107 equemene
        ExploredBlocks=numpy.append(ExploredBlocks,Blocks)
224 107 equemene
        ExploredThreads=numpy.append(ExploredThreads,Threads)
225 107 equemene
226 107 equemene
        IterationsMP=Iterations/len(Devices)
227 107 equemene
        if Iterations%len(Devices)!=0:
228 107 equemene
            IterationsMP+=1
229 107 equemene
230 129 equemene
        DurationItem=numpy.array([]).astype(numpy.float32)
231 129 equemene
        Duration=numpy.array([]).astype(numpy.float32)
232 129 equemene
        Rate=numpy.array([]).astype(numpy.float32)
233 129 equemene
        for i in range(Redo):
234 129 equemene
            MyThreads=[]
235 107 equemene
            time_start=time.time()
236 129 equemene
237 107 equemene
            for Device in Devices:
238 107 equemene
                InputCL={}
239 107 equemene
                InputCL['Iterations']=IterationsMP
240 129 equemene
                InputCL['Steps']=1
241 107 equemene
                InputCL['Blocks']=Blocks
242 107 equemene
                InputCL['Threads']=Threads
243 107 equemene
                InputCL['Device']=Device
244 107 equemene
                InputCL['RNG']=RNG
245 107 equemene
                InputCL['ValueType']=ValueType
246 107 equemene
                if GpuStyle=='CUDA':
247 107 equemene
                    try:
248 129 equemene
                        MyThread=threadWithReturn(target=MetropolisCuda, args=(InputCL,))
249 107 equemene
                    except:
250 127 equemene
                        print("Problem with (%i,%i) // computations on Cuda" % (Blocks,Threads))
251 107 equemene
                elif GpuStyle=='OpenCL':
252 107 equemene
                    try:
253 107 equemene
                        MyThread=threadWithReturn(target=MetropolisOpenCL, args=(InputCL,))
254 107 equemene
                    except:
255 129 equemene
                        print("Problem with (%i,%i) // computations on OpenCL" % (Blocks,Threads)            )
256 107 equemene
257 127 equemene
                print("Start on #%i device..." % Device)
258 107 equemene
                MyThread.start()
259 107 equemene
                MyThreads.append(MyThread)
260 107 equemene
261 107 equemene
            NewIterations=0
262 107 equemene
            Inside=0
263 107 equemene
            for MyThread in MyThreads:
264 107 equemene
                OutputCL=MyThread.join()
265 107 equemene
                NewIterations+=OutputCL['NewIterations']
266 107 equemene
                Inside+=OutputCL['Inside']
267 130 equemene
            print("Pi estimation %.8f" % (4./NewIterations*Inside))
268 129 equemene
269 129 equemene
            Duration=numpy.append(Duration,time.time()-time_start)
270 129 equemene
            Rate=numpy.append(Rate,NewIterations/Duration[-1])
271 129 equemene
272 107 equemene
        avgD=numpy.append(avgD,numpy.average(Duration))
273 107 equemene
        medD=numpy.append(medD,numpy.median(Duration))
274 107 equemene
        stdD=numpy.append(stdD,numpy.std(Duration))
275 107 equemene
        minD=numpy.append(minD,numpy.min(Duration))
276 107 equemene
        maxD=numpy.append(maxD,numpy.max(Duration))
277 107 equemene
        avgR=numpy.append(avgR,numpy.average(Rate))
278 107 equemene
        medR=numpy.append(medR,numpy.median(Rate))
279 107 equemene
        stdR=numpy.append(stdR,numpy.std(Rate))
280 107 equemene
        minR=numpy.append(minR,numpy.min(Rate))
281 107 equemene
        maxR=numpy.append(maxR,numpy.max(Rate))
282 107 equemene
283 127 equemene
        print("%.2f %.2f %.2f %.2f %.2f %i %i %i %i %i" % (avgD[-1],medD[-1],stdD[-1],minD[-1],maxD[-1],avgR[-1],medR[-1],stdR[-1],minR[-1],maxR[-1]))
284 107 equemene
285 131 equemene
        numpy.savez("PiThreads_%s_%s_%s_%s_%s_%s_%s_%s_%.8i_Device%i_%s_%s" % (ValueType,RNG,Alu[Devices[0]],GpuStyle,BlocksBegin,BlocksEnd,ThreadsBegin,ThreadsEnd,Iterations,Devices[0],Metrology,gethostname()),(ExploredBlocks,ExploredThreads,avgD,medD,stdD,minD,maxD,avgR,medR,stdR,minR,maxR))
286 107 equemene
        ToSave=[ ExploredBlocks,ExploredThreads,avgD,medD,stdD,minD,maxD,avgR,medR,stdR,minR,maxR ]
287 131 equemene
        numpy.savetxt("PiThreads_%s_%s_%s_%s_%s_%s_%s_%i_%.8i_Device%i_%s_%s" % (ValueType,RNG,Alu[Devices[0]],GpuStyle,BlocksBegin,BlocksEnd,ThreadsBegin,ThreadsEnd,Iterations,Devices[0],Metrology,gethostname()),numpy.transpose(ToSave),fmt='%i %i %e %e %e %e %e %i %i %i %i %i')
288 107 equemene
289 107 equemene
    if Fit:
290 107 equemene
        FitAndPrint(ExploredJobs,median,Curves)