Statistiques
| Révision :

root / Pi / XPU / PiXpuThreads.py @ 282

Historique | Voir | Annoter | Télécharger (11,75 ko)

1
#!/usr/bin/env python3
2

    
3
#
4
# Pi-by-MonteCarlo using PyCUDA/PyOpenCL
5
#
6
# CC BY-NC-SA 2011 : Emmanuel QUEMENER <emmanuel.quemener@gmail.com> 
7
# Cecill v2 : Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
8
#
9
# Thanks to Andreas Klockner for PyCUDA:
10
# http://mathema.tician.de/software/pycuda
11
# Thanks to Andreas Klockner for PyOpenCL:
12
# http://mathema.tician.de/software/pyopencl
13
# 
14

    
15
# 2013-01-01 : problems with launch timeout
16
# http://stackoverflow.com/questions/497685/how-do-you-get-around-the-maximum-cuda-run-time
17
# Option "Interactive" "0" in /etc/X11/xorg.conf
18

    
19
# Common tools
20
import numpy
21
from numpy.random import randint as nprnd
22
import sys
23
import getopt
24
import time
25
import math
26
import itertools
27
from socket import gethostname
28

    
29
from threading import Thread
30

    
31
from PiXPU import *
32

    
33
class threadWithReturn(Thread):
34
    def __init__(self, *args, **kwargs):
35
        super(threadWithReturn, self).__init__(*args, **kwargs)
36
        self._return = None
37

    
38
    def run(self):
39
        if self._target is not None:
40
            self._return = self._target(*self._args, **self._kwargs)
41

    
42
    def join(self, *args, **kwargs):
43
        super(threadWithReturn, self).join(*args, **kwargs)
44
        return self._return
45

    
46
if __name__=='__main__':
47
    
48
    # Set defaults values
49
  
50
    # Id of Device : 1 is for first find !
51
    Device=1
52
    # GPU style can be Cuda (Nvidia implementation) or OpenCL
53
    GpuStyle='OpenCL'
54
    # Iterations is integer
55
    Iterations=10000000
56
    # BlocksBlocks in first number of Blocks to explore
57
    BlocksBegin=1
58
    # BlocksEnd is last number of Blocks to explore
59
    BlocksEnd=16
60
    # BlocksStep is the step of Blocks to explore
61
    BlocksStep=1
62
    # ThreadsBlocks in first number of Blocks to explore
63
    ThreadsBegin=1
64
    # ThreadsEnd is last number of Blocks to explore
65
    ThreadsEnd=1
66
    # ThreadsStep is the step of Blocks to explore
67
    ThreadsStep=1
68
    # Redo is the times to redo the test to improve metrology
69
    Redo=1
70
    # OutMetrology is method for duration estimation : False is GPU inside
71
    OutMetrology=False
72
    Metrology='InMetro'
73
    # Curves is True to print the curves
74
    Curves=False
75
    # Fit is True to print the curves
76
    Fit=False
77
    # Marsaglia RNG
78
    RNG='MWC'
79
    # Seeds
80
    Seeds=110271,101008
81
    # Value type : INT32, INT64, FP32, FP64
82
    ValueType='FP32'
83
    # Inside based on If
84
    IfThen=False
85

    
86
    HowToUse='%s -c (Print Curves) -k (Case On IfThen) -d <DeviceId> -g <CUDA/OpenCL> -i <Iterations> -b <BlocksBegin> -e <BlocksEnd> -s <BlocksStep> -f <ThreadsFirst> -l <ThreadsLast> -t <ThreadssTep> -r <RedoToImproveStats> -m <SHR3/CONG/MWC/KISS> -v <INT32/INT64/FP32/FP64>'
87
    
88
    try:
89
        opts, args = getopt.getopt(sys.argv[1:],"hckg:i:b:e:s:f:l:t:r:d:m:v:",["gpustyle=","iterations=","blocksBegin=","blocksEnd=","blocksStep=","threadsFirst=","threadsLast=","threadssTep=","redo=","device=","marsaglia=","valuetype="])
90
    except getopt.GetoptError:
91
        print(HowToUse % sys.argv[0])
92
        sys.exit(2)
93

    
94
    # List of Devices
95
    Devices=[]
96
    Alu={}
97
        
98
    for opt, arg in opts:
99
        if opt == '-h':
100
            print(HowToUse % sys.argv[0])
101

    
102
            print("\nInformations about devices detected under OpenCL:")
103
            # For PyOpenCL import
104
            try:
105
                import pyopencl as cl
106
                Id=0
107
                for platform in cl.get_platforms():
108
                    for device in platform.get_devices():
109
                        #deviceType=cl.device_type.to_string(device.type)
110
                        deviceType="xPU"
111
                        print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip(),deviceType,device.name.lstrip()))
112
                        Id=Id+1
113

    
114
                print
115
            except ImportError:
116
                print("Your platform does not seem to support OpenCL")
117

    
118
            print("\nInformations about devices detected under CUDA API:")
119
            # For PyCUDA import
120
            try:
121
                import pycuda.driver as cuda
122
                cuda.init()
123
                for Id in range(cuda.Device.count()):
124
                    device=cuda.Device(Id)
125
                    print("Device #%i of type GPU : %s" % (Id,device.name()))
126
                print
127
            except:
128
                print("Your platform does not seem to support CUDA")
129
        
130
            sys.exit()
131
                
132
        elif opt == '-c':
133
            Curves=True
134
        elif opt == '-k':
135
            IfThen=True
136
        elif opt in ("-d", "--device"):
137
            Devices.append(int(arg))
138
        elif opt in ("-g", "--gpustyle"):
139
            GpuStyle = arg
140
        elif opt in ("-m", "--marsaglia"):
141
            RNG = arg
142
        elif opt in ("-v", "--valuetype"):
143
            ValueType = arg
144
        elif opt in ("-i", "--iterations"):
145
            Iterations = numpy.uint64(arg)
146
        elif opt in ("-b", "--blocksbegin"):
147
            BlocksBegin = int(arg)
148
            BlocksEnd = BlocksBegin
149
        elif opt in ("-e", "--blocksend"):
150
            BlocksEnd = int(arg)
151
        elif opt in ("-s", "--blocksstep"):
152
            BlocksStep = int(arg)
153
        elif opt in ("-f", "--threadsfirst"):
154
            ThreadsBegin = int(arg)
155
            ThreadsEnd = ThreadsBegin
156
        elif opt in ("-l", "--threadslast"):
157
            ThreadsEnd = int(arg)
158
        elif opt in ("-t", "--threadsstep"):
159
            ThreadsStep = int(arg)
160
        elif opt in ("-r", "--redo"):
161
            Redo = int(arg)
162

    
163
    print("Devices Identification : %s" % Devices)
164
    print("GpuStyle used : %s" % GpuStyle)
165
    print("Iterations : %s" % Iterations)
166
    print("Number of Blocks on begin : %s" % BlocksBegin)
167
    print("Number of Blocks on end : %s" % BlocksEnd)
168
    print("Step on Blocks : %s" % BlocksStep)
169
    print("Number of Threads on begin : %s" % ThreadsBegin)
170
    print("Number of Threads on end : %s" % ThreadsEnd)
171
    print("Step on Threads : %s" % ThreadsStep)
172
    print("Number of redo : %s" % Redo)
173
    print("Metrology done out of XPU : %r" % OutMetrology)
174
    print("Type of Marsaglia RNG used : %s" % RNG)
175
    print("Type of variable : %s" % ValueType)
176

    
177
    if GpuStyle=='CUDA':
178
        try:
179
            # For PyCUDA import
180
            import pycuda.driver as cuda
181
        
182
            cuda.init()
183
            for Id in range(cuda.Device.count()):
184
                device=cuda.Device(Id)
185
                print("Device #%i of type GPU : %s" % (Id,device.name()))
186
                if Id in Devices:
187
                    Alu[Id]='GPU'
188
        except ImportError:
189
            print("Platform does not seem to support CUDA")
190
    
191
    if GpuStyle=='OpenCL':
192
        try:
193
            # For PyOpenCL import
194
            import pyopencl as cl
195
            Id=0
196
            for platform in cl.get_platforms():
197
                for device in platform.get_devices():
198
                    #deviceType=cl.device_type.to_string(device.type)
199
                    deviceType="xPU"
200
                    print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip().rstrip(),deviceType,device.name.lstrip().rstrip()))
201

    
202
                    if Id in Devices:
203
                    # Set the Alu as detected Device Type
204
                        Alu[Id]=deviceType
205
                    Id=Id+1
206
        except ImportError:
207
            print("Platform does not seem to support OpenCL")
208

    
209
    print(Devices,Alu)
210
            
211
    BlocksList=range(BlocksBegin,BlocksEnd+BlocksStep,BlocksStep)
212
    ThreadsList=range(ThreadsBegin,ThreadsEnd+ThreadsStep,ThreadsStep)
213
    
214
    ExploredJobs=numpy.array([]).astype(numpy.uint32)
215
    ExploredBlocks=numpy.array([]).astype(numpy.uint32)
216
    ExploredThreads=numpy.array([]).astype(numpy.uint32)
217
    avgD=numpy.array([]).astype(numpy.float32)
218
    medD=numpy.array([]).astype(numpy.float32)
219
    stdD=numpy.array([]).astype(numpy.float32)
220
    minD=numpy.array([]).astype(numpy.float32)
221
    maxD=numpy.array([]).astype(numpy.float32)
222
    avgR=numpy.array([]).astype(numpy.float32)
223
    medR=numpy.array([]).astype(numpy.float32)
224
    stdR=numpy.array([]).astype(numpy.float32)
225
    minR=numpy.array([]).astype(numpy.float32)
226
    maxR=numpy.array([]).astype(numpy.float32)
227

    
228
    for Blocks,Threads in itertools.product(BlocksList,ThreadsList):
229
        
230
        ExploredJobs=numpy.append(ExploredJobs,Blocks*Threads)
231
        ExploredBlocks=numpy.append(ExploredBlocks,Blocks)
232
        ExploredThreads=numpy.append(ExploredThreads,Threads)
233

    
234
        IterationsMP=Iterations/len(Devices)
235
        if Iterations%len(Devices)!=0:
236
            IterationsMP+=1
237

    
238
        DurationItem=numpy.array([]).astype(numpy.float32)
239
        Duration=numpy.array([]).astype(numpy.float32)
240
        Rate=numpy.array([]).astype(numpy.float32)
241
        for i in range(Redo):
242
            MyThreads=[]
243
            time_start=time.time()
244

    
245
            for Device in Devices:
246
                DeltaD=Device-min(Devices)+1
247
                DeltaS=(DeltaD-1)*524287
248
                InputCL={}
249
                InputCL['Iterations']=IterationsMP
250
                InputCL['Steps']=1
251
                InputCL['Blocks']=Blocks
252
                InputCL['Threads']=Threads
253
                InputCL['Device']=Device
254
                InputCL['RNG']=RNG
255
                InputCL['Seeds']=numpy.uint32(Seeds[0]*DeltaD+DeltaS),numpy.uint32(Seeds[1]*DeltaD+DeltaS)
256
                InputCL['ValueType']=ValueType
257
                InputCL['IfThen']=IfThen
258
                if GpuStyle=='CUDA':
259
                    try:
260
                        MyThread=threadWithReturn(target=MetropolisCuda, args=(InputCL,))
261
                    except:
262
                        print("Problem with (%i,%i) // computations on Cuda" % (Blocks,Threads))
263
                elif GpuStyle=='OpenCL':
264
                    try:
265
                        MyThread=threadWithReturn(target=MetropolisOpenCL, args=(InputCL,))
266
                    except:
267
                        print("Problem with (%i,%i) // computations on OpenCL" % (Blocks,Threads)            )
268
                    
269
                print("Start on #%i device..." % Device)
270
                MyThread.start()
271
                MyThreads.append(MyThread)
272

    
273
            NewIterations=0
274
            Inside=0
275
            for MyThread in MyThreads:
276
                OutputCL=MyThread.join()
277
                NewIterations+=OutputCL['NewIterations']
278
                Inside+=OutputCL['Inside']
279

    
280
            Duration=numpy.append(Duration,time.time()-time_start)
281
            Rate=numpy.append(Rate,NewIterations/Duration[-1])
282
            print("Itops %i\nLogItops %.2f " % (int(Rate),numpy.log(Rate)/numpy.log(10)))
283
            print("Pi estimation %.8f" % (4./NewIterations*Inside))
284
                            
285
        avgD=numpy.append(avgD,numpy.average(Duration))
286
        medD=numpy.append(medD,numpy.median(Duration))
287
        stdD=numpy.append(stdD,numpy.std(Duration))
288
        minD=numpy.append(minD,numpy.min(Duration))
289
        maxD=numpy.append(maxD,numpy.max(Duration))
290
        avgR=numpy.append(avgR,numpy.average(Rate))
291
        medR=numpy.append(medR,numpy.median(Rate))
292
        stdR=numpy.append(stdR,numpy.std(Rate))
293
        minR=numpy.append(minR,numpy.min(Rate))
294
        maxR=numpy.append(maxR,numpy.max(Rate))
295

    
296
        print("%.2f %.2f %.2f %.2f %.2f %i %i %i %i %i" % (avgD[-1],medD[-1],stdD[-1],minD[-1],maxD[-1],avgR[-1],medR[-1],stdR[-1],minR[-1],maxR[-1]))
297
        
298
        numpy.savez("PiThreads_%s_%s_%s_%s_%s_%s_%s_%s_%.8i_Device%i_%s_%s" % (ValueType,RNG,Alu[Devices[0]],GpuStyle,BlocksBegin,BlocksEnd,ThreadsBegin,ThreadsEnd,Iterations,Devices[0],Metrology,gethostname()),(ExploredBlocks,ExploredThreads,avgD,medD,stdD,minD,maxD,avgR,medR,stdR,minR,maxR))
299
        ToSave=[ ExploredBlocks,ExploredThreads,avgD,medD,stdD,minD,maxD,avgR,medR,stdR,minR,maxR ]
300
        numpy.savetxt("PiThreads_%s_%s_%s_%s_%s_%s_%s_%i_%.8i_Device%i_%s_%s" % (ValueType,RNG,Alu[Devices[0]],GpuStyle,BlocksBegin,BlocksEnd,ThreadsBegin,ThreadsEnd,Iterations,Devices[0],Metrology,gethostname()),numpy.transpose(ToSave),fmt='%i %i %e %e %e %e %e %i %i %i %i %i')
301

    
302
    if Fit:
303
        FitAndPrint(ExploredJobs,median,Curves)