Révision 60 Splutter/GPU/SplutterGPU.py
SplutterGPU.py (revision 60) | ||
---|---|---|
134 | 134 |
|
135 | 135 |
__global__ void MainLoopHybrid(uint *s,uint size,ulong iterations,uint seed_w,uint seed_z) |
136 | 136 |
{ |
137 |
// uint z=rotl(seed_z,blockDim.x*blockIdx.x+threadIdx.x threadIdx.x); |
|
138 |
// uint w=rotr(seed_w,blockDim.x*blockIdx.x+threadIdx.x); |
|
139 |
|
|
140 |
// uint jsr=rotl(seed_z,blockDim.x*blockIdx.x+threadIdx.x); |
|
141 |
// uint jcong=rotr(seed_w,blockDim.x*blockIdx.x+threadIdx.x); |
|
142 |
|
|
143 | 137 |
uint z=seed_z; |
144 | 138 |
uint w=seed_w; |
145 | 139 |
|
... | ... | |
171 | 165 |
#define MWCfp MWC * 2.328306435454494e-10f |
172 | 166 |
#define KISSfp KISS * 2.328306435454494e-10f |
173 | 167 |
|
174 |
#define MAX 4294967296 |
|
168 |
#define MAX (ulong)4294967296
|
|
175 | 169 |
|
176 | 170 |
uint rotl(uint value, int shift) { |
177 | 171 |
return (value << shift) | (value >> (sizeof(value) * CHAR_BIT - shift)); |
... | ... | |
183 | 177 |
|
184 | 178 |
__kernel void MainLoopGlobal(__global uint *s,const uint space,const ulong iterations,const uint seed_w,const uint seed_z) |
185 | 179 |
{ |
186 |
__private const float id=(float)get_global_id(0); |
|
187 |
__private const float size=(float)get_global_size(0); |
|
188 |
__private const float block=space/size; |
|
180 |
//__private const float id=(float)get_global_id(0);
|
|
181 |
//__private const float size=(float)get_global_size(0);
|
|
182 |
//__private const float block=space/size;
|
|
189 | 183 |
|
190 |
__private uint z=seed_z; |
|
191 |
__private uint w=seed_w; |
|
184 |
__private const ulong id=(ulong)get_global_id(0); |
|
185 |
__private const ulong size=(ulong)get_global_size(0); |
|
186 |
__private const ulong block=(ulong)space/(ulong)size; |
|
187 |
|
|
188 |
__private uint z=seed_z-(uint)id; |
|
189 |
__private uint w=seed_w+(uint)id; |
|
192 | 190 |
|
193 | 191 |
__private uint jsr=seed_z; |
194 | 192 |
__private uint jcong=seed_w; |
... | ... | |
198 | 196 |
// Standard version does not work for several processes (some lost!) |
199 | 197 |
//__private uint position=(uint)(((ulong)space*(ulong)MWC)/(ulong)MAX); |
200 | 198 |
// Dense version |
201 |
__private uint position=(uint)( (ulong)space*((ulong)CONG+MAX*(ulong)id)/(ulong)size/(ulong)MAX ); |
|
199 |
//__private uint position=(uint)( ((ulong)CONG+MAX*(ulong)id)/(ulong)size*(ulong)space/(ulong)MAX ); |
|
200 |
__private size_t position=(size_t)( ((ulong)MWC+(ulong)MAX*(ulong)id)*(ulong)block/(ulong)MAX ); |
|
202 | 201 |
// Float version seems to be the best... |
203 | 202 |
//__private uint position=(uint)( block*(CONGfp+id) ); |
204 | 203 |
|
... | ... | |
399 | 398 |
|
400 | 399 |
MySplutter=numpy.zeros(steps) |
401 | 400 |
|
402 |
print 'toto ',2**(int)(numpy.log2(MemoryXPU/4)),(MemoryXPU/jobs/4)*jobs |
|
401 |
MaxWorks=2**(int)(numpy.log2(MemoryXPU/4))/8 |
|
402 |
print MaxWorks,2**(int)(numpy.log2(MemoryXPU/4)) |
|
403 | 403 |
|
404 | 404 |
for i in range(steps): |
405 | 405 |
|
... | ... | |
407 | 407 |
#Splutter=numpy.zeros(1024).astype(numpy.uint32) |
408 | 408 |
|
409 | 409 |
#Splutter=numpy.zeros(jobs).astype(numpy.uint32) |
410 |
Splutter=numpy.zeros(jobs*16).astype(numpy.uint32)
|
|
410 |
Splutter=numpy.zeros((MaxWorks/jobs)*jobs).astype(numpy.uint32)
|
|
411 | 411 |
|
412 | 412 |
print Splutter,len(Splutter) |
413 | 413 |
|
... | ... | |
731 | 731 |
print "Values seem to be wrong..." |
732 | 732 |
#THREADS*=2 |
733 | 733 |
if len(average)!=0: |
734 |
numpy.savez("Splutter_%s_%s_%s_%s_%i_%.8i_Device%i_%s_%s" % (Alu,GpuStyle,ParaStyle,JobStart,JobEnd,Iterations,Device,Metrology,gethostname()),(ExploredJobs,average,median,stddev))
|
|
734 |
numpy.savez("Splutter_%s_%s_%s_%i_%i_%.8i_Device%i_%s_%s" % (Alu,GpuStyle,ParaStyle,JobStart,JobEnd,Iterations,Device,Metrology,gethostname()),(ExploredJobs,average,median,stddev))
|
|
735 | 735 |
ToSave=[ ExploredJobs,average,median,stddev ] |
736 |
numpy.savetxt("Splutter_%s_%s_%s_%s_%i_%.8i_Device%i_%s_%s" % (Alu,GpuStyle,ParaStyle,JobStart,JobEnd,Iterations,Device,Metrology,gethostname()),numpy.transpose(ToSave))
|
|
736 |
numpy.savetxt("Splutter_%s_%s_%s_%i_%i_%.8i_Device%i_%s_%s" % (Alu,GpuStyle,ParaStyle,JobStart,JobEnd,Iterations,Device,Metrology,gethostname()),numpy.transpose(ToSave))
|
|
737 | 737 |
Jobs+=JobStep |
738 | 738 |
|
739 | 739 |
if Fit: |
Formats disponibles : Unified diff