Statistiques
| Révision :

root / Pi / XPU / PiXpuThreads.py @ 190

Historique | Voir | Annoter | Télécharger (11,37 ko)

1
#!/usr/bin/env python3
2

    
3
#
4
# Pi-by-MonteCarlo using PyCUDA/PyOpenCL
5
#
6
# CC BY-NC-SA 2011 : Emmanuel QUEMENER <emmanuel.quemener@gmail.com> 
7
# Cecill v2 : Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
8
#
9
# Thanks to Andreas Klockner for PyCUDA:
10
# http://mathema.tician.de/software/pycuda
11
# Thanks to Andreas Klockner for PyOpenCL:
12
# http://mathema.tician.de/software/pyopencl
13
# 
14

    
15
# 2013-01-01 : problems with launch timeout
16
# http://stackoverflow.com/questions/497685/how-do-you-get-around-the-maximum-cuda-run-time
17
# Option "Interactive" "0" in /etc/X11/xorg.conf
18

    
19
# Common tools
20
import numpy
21
from numpy.random import randint as nprnd
22
import sys
23
import getopt
24
import time
25
import math
26
import itertools
27
from socket import gethostname
28

    
29
from threading import Thread
30

    
31
from PiXPU import *
32

    
33
class threadWithReturn(Thread):
34
    def __init__(self, *args, **kwargs):
35
        super(threadWithReturn, self).__init__(*args, **kwargs)
36
        self._return = None
37

    
38
    def run(self):
39
        if self._target is not None:
40
            self._return = self._target(*self._args, **self._kwargs)
41

    
42
    def join(self, *args, **kwargs):
43
        super(threadWithReturn, self).join(*args, **kwargs)
44
        return self._return
45

    
46
if __name__=='__main__':
47
    
48
    # Set defaults values
49
  
50
    # Id of Device : 1 is for first find !
51
    Device=1
52
    # GPU style can be Cuda (Nvidia implementation) or OpenCL
53
    GpuStyle='OpenCL'
54
    # Iterations is integer
55
    Iterations=10000000
56
    # BlocksBlocks in first number of Blocks to explore
57
    BlocksBegin=1
58
    # BlocksEnd is last number of Blocks to explore
59
    BlocksEnd=16
60
    # BlocksStep is the step of Blocks to explore
61
    BlocksStep=1
62
    # ThreadsBlocks in first number of Blocks to explore
63
    ThreadsBegin=1
64
    # ThreadsEnd is last number of Blocks to explore
65
    ThreadsEnd=1
66
    # ThreadsStep is the step of Blocks to explore
67
    ThreadsStep=1
68
    # Redo is the times to redo the test to improve metrology
69
    Redo=1
70
    # OutMetrology is method for duration estimation : False is GPU inside
71
    OutMetrology=False
72
    Metrology='InMetro'
73
    # Curves is True to print the curves
74
    Curves=False
75
    # Fit is True to print the curves
76
    Fit=False
77
    # Marsaglia RNG
78
    RNG='MWC'
79
    # Value type : INT32, INT64, FP32, FP64
80
    ValueType='FP32'
81
    # Inside based on If
82
    IfThen=False
83

    
84
    HowToUse='%s -c (Print Curves) -k (Case On IfThen) -d <DeviceId> -g <CUDA/OpenCL> -i <Iterations> -b <BlocksBegin> -e <BlocksEnd> -s <BlocksStep> -f <ThreadsFirst> -l <ThreadsLast> -t <ThreadssTep> -r <RedoToImproveStats> -m <SHR3/CONG/MWC/KISS> -v <INT32/INT64/FP32/FP64>'
85
    
86
    try:
87
        opts, args = getopt.getopt(sys.argv[1:],"hckg:i:b:e:s:f:l:t:r:d:m:v:",["gpustyle=","iterations=","blocksBegin=","blocksEnd=","blocksStep=","threadsFirst=","threadsLast=","threadssTep=","redo=","device=","marsaglia=","valuetype="])
88
    except getopt.GetoptError:
89
        print(HowToUse % sys.argv[0])
90
        sys.exit(2)
91

    
92
    # List of Devices
93
    Devices=[]
94
    Alu={}
95
        
96
    for opt, arg in opts:
97
        if opt == '-h':
98
            print(HowToUse % sys.argv[0])
99

    
100
            print("\nInformations about devices detected under OpenCL:")
101
            # For PyOpenCL import
102
            try:
103
                import pyopencl as cl
104
                Id=0
105
                for platform in cl.get_platforms():
106
                    for device in platform.get_devices():
107
                        #deviceType=cl.device_type.to_string(device.type)
108
                        deviceType="xPU"
109
                        print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip(),deviceType,device.name.lstrip()))
110
                        Id=Id+1
111

    
112
                print
113
            except ImportError:
114
                print("Your platform does not seem to support OpenCL")
115

    
116
            print("\nInformations about devices detected under CUDA API:")
117
            # For PyCUDA import
118
            try:
119
                import pycuda.driver as cuda
120
                cuda.init()
121
                for Id in range(cuda.Device.count()):
122
                    device=cuda.Device(Id)
123
                    print("Device #%i of type GPU : %s" % (Id,device.name()))
124
                print
125
            except:
126
                print("Your platform does not seem to support CUDA")
127
        
128
            sys.exit()
129
                
130
        elif opt == '-c':
131
            Curves=True
132
        elif opt == '-k':
133
            IfThen=True
134
        elif opt in ("-d", "--device"):
135
            Devices.append(int(arg))
136
        elif opt in ("-g", "--gpustyle"):
137
            GpuStyle = arg
138
        elif opt in ("-m", "--marsaglia"):
139
            RNG = arg
140
        elif opt in ("-v", "--valuetype"):
141
            ValueType = arg
142
        elif opt in ("-i", "--iterations"):
143
            Iterations = numpy.uint64(arg)
144
        elif opt in ("-b", "--blocksbegin"):
145
            BlocksBegin = int(arg)
146
        elif opt in ("-e", "--blocksend"):
147
            BlocksEnd = int(arg)
148
        elif opt in ("-s", "--blocksstep"):
149
            BlocksStep = int(arg)
150
        elif opt in ("-f", "--threadsfirst"):
151
            ThreadsBegin = int(arg)
152
        elif opt in ("-l", "--threadslast"):
153
            ThreadsEnd = int(arg)
154
        elif opt in ("-t", "--threadsstep"):
155
            ThreadsStep = int(arg)
156
        elif opt in ("-r", "--redo"):
157
            Redo = int(arg)
158

    
159
    print("Devices Identification : %s" % Devices)
160
    print("GpuStyle used : %s" % GpuStyle)
161
    print("Iterations : %s" % Iterations)
162
    print("Number of Blocks on begin : %s" % BlocksBegin)
163
    print("Number of Blocks on end : %s" % BlocksEnd)
164
    print("Step on Blocks : %s" % BlocksStep)
165
    print("Number of Threads on begin : %s" % ThreadsBegin)
166
    print("Number of Threads on end : %s" % ThreadsEnd)
167
    print("Step on Threads : %s" % ThreadsStep)
168
    print("Number of redo : %s" % Redo)
169
    print("Metrology done out of XPU : %r" % OutMetrology)
170
    print("Type of Marsaglia RNG used : %s" % RNG)
171
    print("Type of variable : %s" % ValueType)
172

    
173
    if GpuStyle=='CUDA':
174
        try:
175
            # For PyCUDA import
176
            import pycuda.driver as cuda
177
        
178
            cuda.init()
179
            for Id in range(cuda.Device.count()):
180
                device=cuda.Device(Id)
181
                print("Device #%i of type GPU : %s" % (Id,device.name()))
182
                if Id in Devices:
183
                    Alu[Id]='GPU'
184
        except ImportError:
185
            print("Platform does not seem to support CUDA")
186
    
187
    if GpuStyle=='OpenCL':
188
        try:
189
            # For PyOpenCL import
190
            import pyopencl as cl
191
            Id=0
192
            for platform in cl.get_platforms():
193
                for device in platform.get_devices():
194
                    #deviceType=cl.device_type.to_string(device.type)
195
                    deviceType="*PU"
196
                    print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip().rstrip(),deviceType,device.name.lstrip().rstrip()))
197

    
198
                    if Id in Devices:
199
                    # Set the Alu as detected Device Type
200
                        Alu[Id]=deviceType
201
                    Id=Id+1
202
        except ImportError:
203
            print("Platform does not seem to support OpenCL")
204

    
205
    print(Devices,Alu)
206
            
207
    BlocksList=range(BlocksBegin,BlocksEnd+BlocksStep,BlocksStep)
208
    ThreadsList=range(ThreadsBegin,ThreadsEnd+ThreadsStep,ThreadsStep)
209
    
210
    ExploredJobs=numpy.array([]).astype(numpy.uint32)
211
    ExploredBlocks=numpy.array([]).astype(numpy.uint32)
212
    ExploredThreads=numpy.array([]).astype(numpy.uint32)
213
    avgD=numpy.array([]).astype(numpy.float32)
214
    medD=numpy.array([]).astype(numpy.float32)
215
    stdD=numpy.array([]).astype(numpy.float32)
216
    minD=numpy.array([]).astype(numpy.float32)
217
    maxD=numpy.array([]).astype(numpy.float32)
218
    avgR=numpy.array([]).astype(numpy.float32)
219
    medR=numpy.array([]).astype(numpy.float32)
220
    stdR=numpy.array([]).astype(numpy.float32)
221
    minR=numpy.array([]).astype(numpy.float32)
222
    maxR=numpy.array([]).astype(numpy.float32)
223

    
224
    for Blocks,Threads in itertools.product(BlocksList,ThreadsList):
225
        
226
        ExploredJobs=numpy.append(ExploredJobs,Blocks*Threads)
227
        ExploredBlocks=numpy.append(ExploredBlocks,Blocks)
228
        ExploredThreads=numpy.append(ExploredThreads,Threads)
229

    
230
        IterationsMP=Iterations/len(Devices)
231
        if Iterations%len(Devices)!=0:
232
            IterationsMP+=1
233

    
234
        DurationItem=numpy.array([]).astype(numpy.float32)
235
        Duration=numpy.array([]).astype(numpy.float32)
236
        Rate=numpy.array([]).astype(numpy.float32)
237
        for i in range(Redo):
238
            MyThreads=[]
239
            time_start=time.time()
240

    
241
            for Device in Devices:
242
                InputCL={}
243
                InputCL['Iterations']=IterationsMP
244
                InputCL['Steps']=1
245
                InputCL['Blocks']=Blocks
246
                InputCL['Threads']=Threads
247
                InputCL['Device']=Device
248
                InputCL['RNG']=RNG
249
                InputCL['ValueType']=ValueType
250
                InputCL['IfThen']=IfThen
251
                if GpuStyle=='CUDA':
252
                    try:
253
                        MyThread=threadWithReturn(target=MetropolisCuda, args=(InputCL,))
254
                    except:
255
                        print("Problem with (%i,%i) // computations on Cuda" % (Blocks,Threads))
256
                elif GpuStyle=='OpenCL':
257
                    try:
258
                        MyThread=threadWithReturn(target=MetropolisOpenCL, args=(InputCL,))
259
                    except:
260
                        print("Problem with (%i,%i) // computations on OpenCL" % (Blocks,Threads)            )
261
                    
262
                print("Start on #%i device..." % Device)
263
                MyThread.start()
264
                MyThreads.append(MyThread)
265

    
266
            NewIterations=0
267
            Inside=0
268
            for MyThread in MyThreads:
269
                OutputCL=MyThread.join()
270
                NewIterations+=OutputCL['NewIterations']
271
                Inside+=OutputCL['Inside']
272
            print("Pi estimation %.8f" % (4./NewIterations*Inside))
273

    
274
            Duration=numpy.append(Duration,time.time()-time_start)
275
            Rate=numpy.append(Rate,NewIterations/Duration[-1])
276
                            
277
        avgD=numpy.append(avgD,numpy.average(Duration))
278
        medD=numpy.append(medD,numpy.median(Duration))
279
        stdD=numpy.append(stdD,numpy.std(Duration))
280
        minD=numpy.append(minD,numpy.min(Duration))
281
        maxD=numpy.append(maxD,numpy.max(Duration))
282
        avgR=numpy.append(avgR,numpy.average(Rate))
283
        medR=numpy.append(medR,numpy.median(Rate))
284
        stdR=numpy.append(stdR,numpy.std(Rate))
285
        minR=numpy.append(minR,numpy.min(Rate))
286
        maxR=numpy.append(maxR,numpy.max(Rate))
287

    
288
        print("%.2f %.2f %.2f %.2f %.2f %i %i %i %i %i" % (avgD[-1],medD[-1],stdD[-1],minD[-1],maxD[-1],avgR[-1],medR[-1],stdR[-1],minR[-1],maxR[-1]))
289
        
290
        numpy.savez("PiThreads_%s_%s_%s_%s_%s_%s_%s_%s_%.8i_Device%i_%s_%s" % (ValueType,RNG,Alu[Devices[0]],GpuStyle,BlocksBegin,BlocksEnd,ThreadsBegin,ThreadsEnd,Iterations,Devices[0],Metrology,gethostname()),(ExploredBlocks,ExploredThreads,avgD,medD,stdD,minD,maxD,avgR,medR,stdR,minR,maxR))
291
        ToSave=[ ExploredBlocks,ExploredThreads,avgD,medD,stdD,minD,maxD,avgR,medR,stdR,minR,maxR ]
292
        numpy.savetxt("PiThreads_%s_%s_%s_%s_%s_%s_%s_%i_%.8i_Device%i_%s_%s" % (ValueType,RNG,Alu[Devices[0]],GpuStyle,BlocksBegin,BlocksEnd,ThreadsBegin,ThreadsEnd,Iterations,Devices[0],Metrology,gethostname()),numpy.transpose(ToSave),fmt='%i %i %e %e %e %e %e %i %i %i %i %i')
293

    
294
    if Fit:
295
        FitAndPrint(ExploredJobs,median,Curves)