Révision 62
Splutter/GPU/SplutterGPU.py (revision 62) | ||
---|---|---|
193 | 193 |
|
194 | 194 |
for (__private ulong i=0;i<iterations;i++) { |
195 | 195 |
|
196 |
// Standard version does not work for several processes (some lost!) |
|
197 |
//__private uint position=(uint)(((ulong)space*(ulong)MWC)/(ulong)MAX); |
|
196 |
// Standard version does not work for several processes (some lost!) memory unconsistent |
|
197 |
//__private size_t position=(size_t)(((ulong)space*(ulong)MWC)/(ulong)MAX); |
|
198 |
|
|
198 | 199 |
// Dense version |
199 |
//__private uint position=(uint)( ((ulong)CONG+MAX*(ulong)id)/(ulong)size*(ulong)space/(ulong)MAX ); |
|
200 |
__private size_t position=(size_t)( ((ulong)MWC+(ulong)MAX*(ulong)id)*(ulong)block/(ulong)MAX ); |
|
200 |
//__private size_t position=(size_t)( ((ulong)MWC+(ulong)id*(ulong)MAX)*(ulong)block/(ulong)MAX ); |
|
201 |
|
|
202 |
// Sparse version |
|
203 |
//__private size_t position=(size_t)( ((ulong)MWC+(ulong)id*(ulong)MAX)*(ulong)block/(ulong)MAX ); |
|
204 |
//__private size_t position=(size_t)( ((ulong)MWC*(block)+(ulong)id*(ulong)MAX)/(ulong)MAX ); |
|
205 |
// First |
|
206 |
//__private size_t position=(size_t)( (ulong)(0)*(ulong)size+(ulong)id ); |
|
207 |
// Last |
|
208 |
//__private size_t position=(size_t)( (ulong)(block-1)*(ulong)size+(ulong)id ); |
|
209 |
// General |
|
210 |
__private size_t position=(size_t)( (ulong)MWC*(ulong)(block)/(ulong)MAX*(ulong)size+(ulong)id ); |
|
211 |
|
|
201 | 212 |
// Float version seems to be the best... |
202 | 213 |
//__private uint position=(uint)( block*(CONGfp+id) ); |
203 | 214 |
|
... | ... | |
411 | 422 |
|
412 | 423 |
MaxWorks=2**(int)(numpy.log2(MinMemoryXPU/4)) |
413 | 424 |
print MaxWorks,2**(int)(numpy.log2(MemoryXPU)) |
414 |
|
|
415 | 425 |
|
416 |
Splutter=numpy.zeros((MaxWorks/jobs)*jobs).astype(numpy.uint32) |
|
426 |
#Splutter=numpy.zeros((MaxWorks/jobs)*jobs).astype(numpy.uint32) |
|
427 |
Splutter=numpy.zeros(jobs*16).astype(numpy.uint32) |
|
417 | 428 |
|
418 | 429 |
for i in range(steps): |
419 | 430 |
|
... | ... | |
422 | 433 |
|
423 | 434 |
#Splutter=numpy.zeros(jobs).astype(numpy.uint32) |
424 | 435 |
|
436 |
Splutter[:]=0 |
|
437 |
|
|
425 | 438 |
print Splutter,len(Splutter) |
426 | 439 |
|
427 |
Splutter[:]=0 |
|
428 |
|
|
429 | 440 |
SplutterCL = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=Splutter) |
430 | 441 |
|
431 | 442 |
if ParaStyle=='Blocks': |
... | ... | |
478 | 489 |
elapsed = 1e-9*(CLLaunch.profile.end - CLLaunch.profile.start) |
479 | 490 |
|
480 | 491 |
MyDuration[i]=elapsed |
481 |
#print Splutter,sum(Splutter)
|
|
492 |
print Splutter,sum(Splutter) |
|
482 | 493 |
#MySplutter[i]=numpy.median(Splutter) |
483 | 494 |
#print numpy.mean(Splutter)*len(Splutter),MySplutter[i]*len(Splutter),numpy.std(Splutter) |
484 | 495 |
|
Formats disponibles : Unified diff