Statistiques
| Révision :

root / Pi / XPU / PiXpuThreads.py @ 174

Historique | Voir | Annoter | Télécharger (11,22 ko)

1
#!/usr/bin/env python3
2

    
3
#
4
# Pi-by-MonteCarlo using PyCUDA/PyOpenCL
5
#
6
# CC BY-NC-SA 2011 : Emmanuel QUEMENER <emmanuel.quemener@gmail.com> 
7
# Cecill v2 : Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
8
#
9
# Thanks to Andreas Klockner for PyCUDA:
10
# http://mathema.tician.de/software/pycuda
11
# Thanks to Andreas Klockner for PyOpenCL:
12
# http://mathema.tician.de/software/pyopencl
13
# 
14

    
15
# 2013-01-01 : problems with launch timeout
16
# http://stackoverflow.com/questions/497685/how-do-you-get-around-the-maximum-cuda-run-time
17
# Option "Interactive" "0" in /etc/X11/xorg.conf
18

    
19
# Common tools
20
import numpy
21
from numpy.random import randint as nprnd
22
import sys
23
import getopt
24
import time
25
import math
26
import itertools
27
from socket import gethostname
28

    
29
from threading import Thread
30

    
31
from PiXPU import *
32

    
33
class threadWithReturn(Thread):
34
    def __init__(self, *args, **kwargs):
35
        super(threadWithReturn, self).__init__(*args, **kwargs)
36
        self._return = None
37

    
38
    def run(self):
39
        if self._target is not None:
40
            self._return = self._target(*self._args, **self._kwargs)
41

    
42
    def join(self, *args, **kwargs):
43
        super(threadWithReturn, self).join(*args, **kwargs)
44
        return self._return
45

    
46
if __name__=='__main__':
47
    
48
    # Set defaults values
49
  
50
    # Id of Device : 1 is for first find !
51
    Device=1
52
    # GPU style can be Cuda (Nvidia implementation) or OpenCL
53
    GpuStyle='OpenCL'
54
    # Iterations is integer
55
    Iterations=10000000
56
    # BlocksBlocks in first number of Blocks to explore
57
    BlocksBegin=1
58
    # BlocksEnd is last number of Blocks to explore
59
    BlocksEnd=16
60
    # BlocksStep is the step of Blocks to explore
61
    BlocksStep=1
62
    # ThreadsBlocks in first number of Blocks to explore
63
    ThreadsBegin=1
64
    # ThreadsEnd is last number of Blocks to explore
65
    ThreadsEnd=1
66
    # ThreadsStep is the step of Blocks to explore
67
    ThreadsStep=1
68
    # Redo is the times to redo the test to improve metrology
69
    Redo=1
70
    # OutMetrology is method for duration estimation : False is GPU inside
71
    OutMetrology=False
72
    Metrology='InMetro'
73
    # Curves is True to print the curves
74
    Curves=False
75
    # Fit is True to print the curves
76
    Fit=False
77
    # Marsaglia RNG
78
    RNG='MWC'
79
    # Value type : INT32, INT64, FP32, FP64
80
    ValueType='FP32'
81

    
82
    HowToUse='%s -c (Print Curves) -d <DeviceId> -g <CUDA/OpenCL> -i <Iterations> -b <BlocksBegin> -e <BlocksEnd> -s <BlocksStep> -f <ThreadsFirst> -l <ThreadsLast> -t <ThreadssTep> -r <RedoToImproveStats> -m <SHR3/CONG/MWC/KISS> -v <INT32/INT64/FP32/FP64>'
83
    
84
    try:
85
        opts, args = getopt.getopt(sys.argv[1:],"hcg:i:b:e:s:f:l:t:r:d:m:v:",["gpustyle=","iterations=","blocksBegin=","blocksEnd=","blocksStep=","threadsFirst=","threadsLast=","threadssTep=","redo=","device=","marsaglia=","valuetype="])
86
    except getopt.GetoptError:
87
        print(HowToUse % sys.argv[0])
88
        sys.exit(2)
89

    
90
    # List of Devices
91
    Devices=[]
92
    Alu={}
93
        
94
    for opt, arg in opts:
95
        if opt == '-h':
96
            print(HowToUse % sys.argv[0])
97

    
98
            print("\nInformations about devices detected under OpenCL:")
99
            # For PyOpenCL import
100
            try:
101
                import pyopencl as cl
102
                Id=0
103
                for platform in cl.get_platforms():
104
                    for device in platform.get_devices():
105
                        #deviceType=cl.device_type.to_string(device.type)
106
                        deviceType="xPU"
107
                        print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip(),deviceType,device.name.lstrip()))
108
                        Id=Id+1
109

    
110
                print
111
            except ImportError:
112
                print("Your platform does not seem to support OpenCL")
113

    
114
            print("\nInformations about devices detected under CUDA API:")
115
            # For PyCUDA import
116
            try:
117
                import pycuda.driver as cuda
118
                cuda.init()
119
                for Id in range(cuda.Device.count()):
120
                    device=cuda.Device(Id)
121
                    print("Device #%i of type GPU : %s" % (Id,device.name()))
122
                print
123
            except:
124
                print("Your platform does not seem to support CUDA")
125
        
126
            sys.exit()
127
                
128
        elif opt == '-c':
129
            Curves=True
130
        elif opt in ("-d", "--device"):
131
            Devices.append(int(arg))
132
        elif opt in ("-g", "--gpustyle"):
133
            GpuStyle = arg
134
        elif opt in ("-m", "--marsaglia"):
135
            RNG = arg
136
        elif opt in ("-v", "--valuetype"):
137
            ValueType = arg
138
        elif opt in ("-i", "--iterations"):
139
            Iterations = numpy.uint64(arg)
140
        elif opt in ("-b", "--blocksbegin"):
141
            BlocksBegin = int(arg)
142
        elif opt in ("-e", "--blocksend"):
143
            BlocksEnd = int(arg)
144
        elif opt in ("-s", "--blocksstep"):
145
            BlocksStep = int(arg)
146
        elif opt in ("-f", "--threadsfirst"):
147
            ThreadsBegin = int(arg)
148
        elif opt in ("-l", "--threadslast"):
149
            ThreadsEnd = int(arg)
150
        elif opt in ("-t", "--threadsstep"):
151
            ThreadsStep = int(arg)
152
        elif opt in ("-r", "--redo"):
153
            Redo = int(arg)
154

    
155
    print("Devices Identification : %s" % Devices)
156
    print("GpuStyle used : %s" % GpuStyle)
157
    print("Iterations : %s" % Iterations)
158
    print("Number of Blocks on begin : %s" % BlocksBegin)
159
    print("Number of Blocks on end : %s" % BlocksEnd)
160
    print("Step on Blocks : %s" % BlocksStep)
161
    print("Number of Threads on begin : %s" % ThreadsBegin)
162
    print("Number of Threads on end : %s" % ThreadsEnd)
163
    print("Step on Threads : %s" % ThreadsStep)
164
    print("Number of redo : %s" % Redo)
165
    print("Metrology done out of XPU : %r" % OutMetrology)
166
    print("Type of Marsaglia RNG used : %s" % RNG)
167
    print("Type of variable : %s" % ValueType)
168

    
169
    if GpuStyle=='CUDA':
170
        try:
171
            # For PyCUDA import
172
            import pycuda.driver as cuda
173
        
174
            cuda.init()
175
            for Id in range(cuda.Device.count()):
176
                device=cuda.Device(Id)
177
                print("Device #%i of type GPU : %s" % (Id,device.name()))
178
                if Id in Devices:
179
                    Alu[Id]='GPU'
180
        except ImportError:
181
            print("Platform does not seem to support CUDA")
182
    
183
    if GpuStyle=='OpenCL':
184
        try:
185
            # For PyOpenCL import
186
            import pyopencl as cl
187
            Id=0
188
            for platform in cl.get_platforms():
189
                for device in platform.get_devices():
190
                    #deviceType=cl.device_type.to_string(device.type)
191
                    deviceType="*PU"
192
                    print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip().rstrip(),deviceType,device.name.lstrip().rstrip()))
193

    
194
                    if Id in Devices:
195
                    # Set the Alu as detected Device Type
196
                        Alu[Id]=deviceType
197
                    Id=Id+1
198
        except ImportError:
199
            print("Platform does not seem to support OpenCL")
200

    
201
    print(Devices,Alu)
202
            
203
    BlocksList=range(BlocksBegin,BlocksEnd+BlocksStep,BlocksStep)
204
    ThreadsList=range(ThreadsBegin,ThreadsEnd+ThreadsStep,ThreadsStep)
205
    
206
    ExploredJobs=numpy.array([]).astype(numpy.uint32)
207
    ExploredBlocks=numpy.array([]).astype(numpy.uint32)
208
    ExploredThreads=numpy.array([]).astype(numpy.uint32)
209
    avgD=numpy.array([]).astype(numpy.float32)
210
    medD=numpy.array([]).astype(numpy.float32)
211
    stdD=numpy.array([]).astype(numpy.float32)
212
    minD=numpy.array([]).astype(numpy.float32)
213
    maxD=numpy.array([]).astype(numpy.float32)
214
    avgR=numpy.array([]).astype(numpy.float32)
215
    medR=numpy.array([]).astype(numpy.float32)
216
    stdR=numpy.array([]).astype(numpy.float32)
217
    minR=numpy.array([]).astype(numpy.float32)
218
    maxR=numpy.array([]).astype(numpy.float32)
219

    
220
    for Blocks,Threads in itertools.product(BlocksList,ThreadsList):
221
        
222
        ExploredJobs=numpy.append(ExploredJobs,Blocks*Threads)
223
        ExploredBlocks=numpy.append(ExploredBlocks,Blocks)
224
        ExploredThreads=numpy.append(ExploredThreads,Threads)
225

    
226
        IterationsMP=Iterations/len(Devices)
227
        if Iterations%len(Devices)!=0:
228
            IterationsMP+=1
229

    
230
        DurationItem=numpy.array([]).astype(numpy.float32)
231
        Duration=numpy.array([]).astype(numpy.float32)
232
        Rate=numpy.array([]).astype(numpy.float32)
233
        for i in range(Redo):
234
            MyThreads=[]
235
            time_start=time.time()
236

    
237
            for Device in Devices:
238
                InputCL={}
239
                InputCL['Iterations']=IterationsMP
240
                InputCL['Steps']=1
241
                InputCL['Blocks']=Blocks
242
                InputCL['Threads']=Threads
243
                InputCL['Device']=Device
244
                InputCL['RNG']=RNG
245
                InputCL['ValueType']=ValueType
246
                if GpuStyle=='CUDA':
247
                    try:
248
                        MyThread=threadWithReturn(target=MetropolisCuda, args=(InputCL,))
249
                    except:
250
                        print("Problem with (%i,%i) // computations on Cuda" % (Blocks,Threads))
251
                elif GpuStyle=='OpenCL':
252
                    try:
253
                        MyThread=threadWithReturn(target=MetropolisOpenCL, args=(InputCL,))
254
                    except:
255
                        print("Problem with (%i,%i) // computations on OpenCL" % (Blocks,Threads)            )
256
                    
257
                print("Start on #%i device..." % Device)
258
                MyThread.start()
259
                MyThreads.append(MyThread)
260

    
261
            NewIterations=0
262
            Inside=0
263
            for MyThread in MyThreads:
264
                OutputCL=MyThread.join()
265
                NewIterations+=OutputCL['NewIterations']
266
                Inside+=OutputCL['Inside']
267
            print("Pi estimation %.8f" % (4./NewIterations*Inside))
268

    
269
            Duration=numpy.append(Duration,time.time()-time_start)
270
            Rate=numpy.append(Rate,NewIterations/Duration[-1])
271
                            
272
        avgD=numpy.append(avgD,numpy.average(Duration))
273
        medD=numpy.append(medD,numpy.median(Duration))
274
        stdD=numpy.append(stdD,numpy.std(Duration))
275
        minD=numpy.append(minD,numpy.min(Duration))
276
        maxD=numpy.append(maxD,numpy.max(Duration))
277
        avgR=numpy.append(avgR,numpy.average(Rate))
278
        medR=numpy.append(medR,numpy.median(Rate))
279
        stdR=numpy.append(stdR,numpy.std(Rate))
280
        minR=numpy.append(minR,numpy.min(Rate))
281
        maxR=numpy.append(maxR,numpy.max(Rate))
282

    
283
        print("%.2f %.2f %.2f %.2f %.2f %i %i %i %i %i" % (avgD[-1],medD[-1],stdD[-1],minD[-1],maxD[-1],avgR[-1],medR[-1],stdR[-1],minR[-1],maxR[-1]))
284
        
285
        numpy.savez("PiThreads_%s_%s_%s_%s_%s_%s_%s_%s_%.8i_Device%i_%s_%s" % (ValueType,RNG,Alu[Devices[0]],GpuStyle,BlocksBegin,BlocksEnd,ThreadsBegin,ThreadsEnd,Iterations,Devices[0],Metrology,gethostname()),(ExploredBlocks,ExploredThreads,avgD,medD,stdD,minD,maxD,avgR,medR,stdR,minR,maxR))
286
        ToSave=[ ExploredBlocks,ExploredThreads,avgD,medD,stdD,minD,maxD,avgR,medR,stdR,minR,maxR ]
287
        numpy.savetxt("PiThreads_%s_%s_%s_%s_%s_%s_%s_%i_%.8i_Device%i_%s_%s" % (ValueType,RNG,Alu[Devices[0]],GpuStyle,BlocksBegin,BlocksEnd,ThreadsBegin,ThreadsEnd,Iterations,Devices[0],Metrology,gethostname()),numpy.transpose(ToSave),fmt='%i %i %e %e %e %e %e %i %i %i %i %i')
288

    
289
    if Fit:
290
        FitAndPrint(ExploredJobs,median,Curves)