Statistiques
| Révision :

root / Pi / XPU / PiXpuThreads.py @ 194

Historique | Voir | Annoter | Télécharger (11,44 ko)

1
#!/usr/bin/env python3
2

    
3
#
4
# Pi-by-MonteCarlo using PyCUDA/PyOpenCL
5
#
6
# CC BY-NC-SA 2011 : Emmanuel QUEMENER <emmanuel.quemener@gmail.com> 
7
# Cecill v2 : Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
8
#
9
# Thanks to Andreas Klockner for PyCUDA:
10
# http://mathema.tician.de/software/pycuda
11
# Thanks to Andreas Klockner for PyOpenCL:
12
# http://mathema.tician.de/software/pyopencl
13
# 
14

    
15
# 2013-01-01 : problems with launch timeout
16
# http://stackoverflow.com/questions/497685/how-do-you-get-around-the-maximum-cuda-run-time
17
# Option "Interactive" "0" in /etc/X11/xorg.conf
18

    
19
# Common tools
20
import numpy
21
from numpy.random import randint as nprnd
22
import sys
23
import getopt
24
import time
25
import math
26
import itertools
27
from socket import gethostname
28

    
29
from threading import Thread
30

    
31
from PiXPU import *
32

    
33
class threadWithReturn(Thread):
34
    def __init__(self, *args, **kwargs):
35
        super(threadWithReturn, self).__init__(*args, **kwargs)
36
        self._return = None
37

    
38
    def run(self):
39
        if self._target is not None:
40
            self._return = self._target(*self._args, **self._kwargs)
41

    
42
    def join(self, *args, **kwargs):
43
        super(threadWithReturn, self).join(*args, **kwargs)
44
        return self._return
45

    
46
if __name__=='__main__':
47
    
48
    # Set defaults values
49
  
50
    # Id of Device : 1 is for first find !
51
    Device=1
52
    # GPU style can be Cuda (Nvidia implementation) or OpenCL
53
    GpuStyle='OpenCL'
54
    # Iterations is integer
55
    Iterations=10000000
56
    # BlocksBlocks in first number of Blocks to explore
57
    BlocksBegin=1
58
    # BlocksEnd is last number of Blocks to explore
59
    BlocksEnd=16
60
    # BlocksStep is the step of Blocks to explore
61
    BlocksStep=1
62
    # ThreadsBlocks in first number of Blocks to explore
63
    ThreadsBegin=1
64
    # ThreadsEnd is last number of Blocks to explore
65
    ThreadsEnd=1
66
    # ThreadsStep is the step of Blocks to explore
67
    ThreadsStep=1
68
    # Redo is the times to redo the test to improve metrology
69
    Redo=1
70
    # OutMetrology is method for duration estimation : False is GPU inside
71
    OutMetrology=False
72
    Metrology='InMetro'
73
    # Curves is True to print the curves
74
    Curves=False
75
    # Fit is True to print the curves
76
    Fit=False
77
    # Marsaglia RNG
78
    RNG='MWC'
79
    # Value type : INT32, INT64, FP32, FP64
80
    ValueType='FP32'
81
    # Inside based on If
82
    IfThen=False
83

    
84
    HowToUse='%s -c (Print Curves) -k (Case On IfThen) -d <DeviceId> -g <CUDA/OpenCL> -i <Iterations> -b <BlocksBegin> -e <BlocksEnd> -s <BlocksStep> -f <ThreadsFirst> -l <ThreadsLast> -t <ThreadssTep> -r <RedoToImproveStats> -m <SHR3/CONG/MWC/KISS> -v <INT32/INT64/FP32/FP64>'
85
    
86
    try:
87
        opts, args = getopt.getopt(sys.argv[1:],"hckg:i:b:e:s:f:l:t:r:d:m:v:",["gpustyle=","iterations=","blocksBegin=","blocksEnd=","blocksStep=","threadsFirst=","threadsLast=","threadssTep=","redo=","device=","marsaglia=","valuetype="])
88
    except getopt.GetoptError:
89
        print(HowToUse % sys.argv[0])
90
        sys.exit(2)
91

    
92
    # List of Devices
93
    Devices=[]
94
    Alu={}
95
        
96
    for opt, arg in opts:
97
        if opt == '-h':
98
            print(HowToUse % sys.argv[0])
99

    
100
            print("\nInformations about devices detected under OpenCL:")
101
            # For PyOpenCL import
102
            try:
103
                import pyopencl as cl
104
                Id=0
105
                for platform in cl.get_platforms():
106
                    for device in platform.get_devices():
107
                        #deviceType=cl.device_type.to_string(device.type)
108
                        deviceType="xPU"
109
                        print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip(),deviceType,device.name.lstrip()))
110
                        Id=Id+1
111

    
112
                print
113
            except ImportError:
114
                print("Your platform does not seem to support OpenCL")
115

    
116
            print("\nInformations about devices detected under CUDA API:")
117
            # For PyCUDA import
118
            try:
119
                import pycuda.driver as cuda
120
                cuda.init()
121
                for Id in range(cuda.Device.count()):
122
                    device=cuda.Device(Id)
123
                    print("Device #%i of type GPU : %s" % (Id,device.name()))
124
                print
125
            except:
126
                print("Your platform does not seem to support CUDA")
127
        
128
            sys.exit()
129
                
130
        elif opt == '-c':
131
            Curves=True
132
        elif opt == '-k':
133
            IfThen=True
134
        elif opt in ("-d", "--device"):
135
            Devices.append(int(arg))
136
        elif opt in ("-g", "--gpustyle"):
137
            GpuStyle = arg
138
        elif opt in ("-m", "--marsaglia"):
139
            RNG = arg
140
        elif opt in ("-v", "--valuetype"):
141
            ValueType = arg
142
        elif opt in ("-i", "--iterations"):
143
            Iterations = numpy.uint64(arg)
144
        elif opt in ("-b", "--blocksbegin"):
145
            BlocksBegin = int(arg)
146
            BlocksEnd = BlocksBegin
147
        elif opt in ("-e", "--blocksend"):
148
            BlocksEnd = int(arg)
149
        elif opt in ("-s", "--blocksstep"):
150
            BlocksStep = int(arg)
151
        elif opt in ("-f", "--threadsfirst"):
152
            ThreadsBegin = int(arg)
153
            ThreadsEnd = ThreadsBegin
154
        elif opt in ("-l", "--threadslast"):
155
            ThreadsEnd = int(arg)
156
        elif opt in ("-t", "--threadsstep"):
157
            ThreadsStep = int(arg)
158
        elif opt in ("-r", "--redo"):
159
            Redo = int(arg)
160

    
161
    print("Devices Identification : %s" % Devices)
162
    print("GpuStyle used : %s" % GpuStyle)
163
    print("Iterations : %s" % Iterations)
164
    print("Number of Blocks on begin : %s" % BlocksBegin)
165
    print("Number of Blocks on end : %s" % BlocksEnd)
166
    print("Step on Blocks : %s" % BlocksStep)
167
    print("Number of Threads on begin : %s" % ThreadsBegin)
168
    print("Number of Threads on end : %s" % ThreadsEnd)
169
    print("Step on Threads : %s" % ThreadsStep)
170
    print("Number of redo : %s" % Redo)
171
    print("Metrology done out of XPU : %r" % OutMetrology)
172
    print("Type of Marsaglia RNG used : %s" % RNG)
173
    print("Type of variable : %s" % ValueType)
174

    
175
    if GpuStyle=='CUDA':
176
        try:
177
            # For PyCUDA import
178
            import pycuda.driver as cuda
179
        
180
            cuda.init()
181
            for Id in range(cuda.Device.count()):
182
                device=cuda.Device(Id)
183
                print("Device #%i of type GPU : %s" % (Id,device.name()))
184
                if Id in Devices:
185
                    Alu[Id]='GPU'
186
        except ImportError:
187
            print("Platform does not seem to support CUDA")
188
    
189
    if GpuStyle=='OpenCL':
190
        try:
191
            # For PyOpenCL import
192
            import pyopencl as cl
193
            Id=0
194
            for platform in cl.get_platforms():
195
                for device in platform.get_devices():
196
                    #deviceType=cl.device_type.to_string(device.type)
197
                    deviceType="*PU"
198
                    print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip().rstrip(),deviceType,device.name.lstrip().rstrip()))
199

    
200
                    if Id in Devices:
201
                    # Set the Alu as detected Device Type
202
                        Alu[Id]=deviceType
203
                    Id=Id+1
204
        except ImportError:
205
            print("Platform does not seem to support OpenCL")
206

    
207
    print(Devices,Alu)
208
            
209
    BlocksList=range(BlocksBegin,BlocksEnd+BlocksStep,BlocksStep)
210
    ThreadsList=range(ThreadsBegin,ThreadsEnd+ThreadsStep,ThreadsStep)
211
    
212
    ExploredJobs=numpy.array([]).astype(numpy.uint32)
213
    ExploredBlocks=numpy.array([]).astype(numpy.uint32)
214
    ExploredThreads=numpy.array([]).astype(numpy.uint32)
215
    avgD=numpy.array([]).astype(numpy.float32)
216
    medD=numpy.array([]).astype(numpy.float32)
217
    stdD=numpy.array([]).astype(numpy.float32)
218
    minD=numpy.array([]).astype(numpy.float32)
219
    maxD=numpy.array([]).astype(numpy.float32)
220
    avgR=numpy.array([]).astype(numpy.float32)
221
    medR=numpy.array([]).astype(numpy.float32)
222
    stdR=numpy.array([]).astype(numpy.float32)
223
    minR=numpy.array([]).astype(numpy.float32)
224
    maxR=numpy.array([]).astype(numpy.float32)
225

    
226
    for Blocks,Threads in itertools.product(BlocksList,ThreadsList):
227
        
228
        ExploredJobs=numpy.append(ExploredJobs,Blocks*Threads)
229
        ExploredBlocks=numpy.append(ExploredBlocks,Blocks)
230
        ExploredThreads=numpy.append(ExploredThreads,Threads)
231

    
232
        IterationsMP=Iterations/len(Devices)
233
        if Iterations%len(Devices)!=0:
234
            IterationsMP+=1
235

    
236
        DurationItem=numpy.array([]).astype(numpy.float32)
237
        Duration=numpy.array([]).astype(numpy.float32)
238
        Rate=numpy.array([]).astype(numpy.float32)
239
        for i in range(Redo):
240
            MyThreads=[]
241
            time_start=time.time()
242

    
243
            for Device in Devices:
244
                InputCL={}
245
                InputCL['Iterations']=IterationsMP
246
                InputCL['Steps']=1
247
                InputCL['Blocks']=Blocks
248
                InputCL['Threads']=Threads
249
                InputCL['Device']=Device
250
                InputCL['RNG']=RNG
251
                InputCL['ValueType']=ValueType
252
                InputCL['IfThen']=IfThen
253
                if GpuStyle=='CUDA':
254
                    try:
255
                        MyThread=threadWithReturn(target=MetropolisCuda, args=(InputCL,))
256
                    except:
257
                        print("Problem with (%i,%i) // computations on Cuda" % (Blocks,Threads))
258
                elif GpuStyle=='OpenCL':
259
                    try:
260
                        MyThread=threadWithReturn(target=MetropolisOpenCL, args=(InputCL,))
261
                    except:
262
                        print("Problem with (%i,%i) // computations on OpenCL" % (Blocks,Threads)            )
263
                    
264
                print("Start on #%i device..." % Device)
265
                MyThread.start()
266
                MyThreads.append(MyThread)
267

    
268
            NewIterations=0
269
            Inside=0
270
            for MyThread in MyThreads:
271
                OutputCL=MyThread.join()
272
                NewIterations+=OutputCL['NewIterations']
273
                Inside+=OutputCL['Inside']
274
            print("Pi estimation %.8f" % (4./NewIterations*Inside))
275

    
276
            Duration=numpy.append(Duration,time.time()-time_start)
277
            Rate=numpy.append(Rate,NewIterations/Duration[-1])
278
                            
279
        avgD=numpy.append(avgD,numpy.average(Duration))
280
        medD=numpy.append(medD,numpy.median(Duration))
281
        stdD=numpy.append(stdD,numpy.std(Duration))
282
        minD=numpy.append(minD,numpy.min(Duration))
283
        maxD=numpy.append(maxD,numpy.max(Duration))
284
        avgR=numpy.append(avgR,numpy.average(Rate))
285
        medR=numpy.append(medR,numpy.median(Rate))
286
        stdR=numpy.append(stdR,numpy.std(Rate))
287
        minR=numpy.append(minR,numpy.min(Rate))
288
        maxR=numpy.append(maxR,numpy.max(Rate))
289

    
290
        print("%.2f %.2f %.2f %.2f %.2f %i %i %i %i %i" % (avgD[-1],medD[-1],stdD[-1],minD[-1],maxD[-1],avgR[-1],medR[-1],stdR[-1],minR[-1],maxR[-1]))
291
        
292
        numpy.savez("PiThreads_%s_%s_%s_%s_%s_%s_%s_%s_%.8i_Device%i_%s_%s" % (ValueType,RNG,Alu[Devices[0]],GpuStyle,BlocksBegin,BlocksEnd,ThreadsBegin,ThreadsEnd,Iterations,Devices[0],Metrology,gethostname()),(ExploredBlocks,ExploredThreads,avgD,medD,stdD,minD,maxD,avgR,medR,stdR,minR,maxR))
293
        ToSave=[ ExploredBlocks,ExploredThreads,avgD,medD,stdD,minD,maxD,avgR,medR,stdR,minR,maxR ]
294
        numpy.savetxt("PiThreads_%s_%s_%s_%s_%s_%s_%s_%i_%.8i_Device%i_%s_%s" % (ValueType,RNG,Alu[Devices[0]],GpuStyle,BlocksBegin,BlocksEnd,ThreadsBegin,ThreadsEnd,Iterations,Devices[0],Metrology,gethostname()),numpy.transpose(ToSave),fmt='%i %i %e %e %e %e %e %i %i %i %i %i')
295

    
296
    if Fit:
297
        FitAndPrint(ExploredJobs,median,Curves)