Révision 62 Splutter/GPU/SplutterGPU.py

SplutterGPU.py (revision 62)
193 193

  
194 194
   for (__private ulong i=0;i<iterations;i++) {
195 195

  
196
      // Standard version does not work for several processes (some lost!)
197
      //__private uint position=(uint)(((ulong)space*(ulong)MWC)/(ulong)MAX);
196
      // Standard version does not work for several processes (some lost!) memory unconsistent
197
      //__private size_t position=(size_t)(((ulong)space*(ulong)MWC)/(ulong)MAX);
198
      
198 199
      // Dense version
199
      //__private uint position=(uint)( ((ulong)CONG+MAX*(ulong)id)/(ulong)size*(ulong)space/(ulong)MAX );
200
      __private size_t position=(size_t)( ((ulong)MWC+(ulong)MAX*(ulong)id)*(ulong)block/(ulong)MAX );
200
      //__private size_t position=(size_t)( ((ulong)MWC+(ulong)id*(ulong)MAX)*(ulong)block/(ulong)MAX );
201

  
202
      // Sparse version
203
      //__private size_t position=(size_t)( ((ulong)MWC+(ulong)id*(ulong)MAX)*(ulong)block/(ulong)MAX );
204
      //__private size_t position=(size_t)( ((ulong)MWC*(block)+(ulong)id*(ulong)MAX)/(ulong)MAX );
205
      // First
206
      //__private size_t position=(size_t)( (ulong)(0)*(ulong)size+(ulong)id );
207
      // Last
208
      //__private size_t position=(size_t)( (ulong)(block-1)*(ulong)size+(ulong)id );
209
      // General
210
      __private size_t position=(size_t)( (ulong)MWC*(ulong)(block)/(ulong)MAX*(ulong)size+(ulong)id );
211

  
201 212
      // Float version seems to be the best...
202 213
      //__private uint position=(uint)( block*(CONGfp+id) );
203 214

  
......
411 422

  
412 423
  MaxWorks=2**(int)(numpy.log2(MinMemoryXPU/4))
413 424
  print MaxWorks,2**(int)(numpy.log2(MemoryXPU))
414

  
415 425
  
416
  Splutter=numpy.zeros((MaxWorks/jobs)*jobs).astype(numpy.uint32)
426
  #Splutter=numpy.zeros((MaxWorks/jobs)*jobs).astype(numpy.uint32)
427
  Splutter=numpy.zeros(jobs*16).astype(numpy.uint32)
417 428

  
418 429
  for i in range(steps):
419 430
		
......
422 433
 
423 434
    #Splutter=numpy.zeros(jobs).astype(numpy.uint32)
424 435

  
436
    Splutter[:]=0
437

  
425 438
    print Splutter,len(Splutter)
426 439

  
427
    Splutter[:]=0
428

  
429 440
    SplutterCL = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=Splutter)
430 441

  
431 442
    if ParaStyle=='Blocks':
......
478 489
    elapsed = 1e-9*(CLLaunch.profile.end - CLLaunch.profile.start)
479 490

  
480 491
    MyDuration[i]=elapsed
481
    #print Splutter,sum(Splutter)
492
    print Splutter,sum(Splutter)
482 493
    #MySplutter[i]=numpy.median(Splutter)
483 494
    #print numpy.mean(Splutter)*len(Splutter),MySplutter[i]*len(Splutter),numpy.std(Splutter)
484 495

  

Formats disponibles : Unified diff