Révision 60

Splutter/GPU/SplutterGPU.py (revision 60)
134 134

  
135 135
__global__ void MainLoopHybrid(uint *s,uint size,ulong iterations,uint seed_w,uint seed_z)
136 136
{
137
//   uint z=rotl(seed_z,blockDim.x*blockIdx.x+threadIdx.x threadIdx.x);
138
//   uint w=rotr(seed_w,blockDim.x*blockIdx.x+threadIdx.x);
139

  
140
//   uint jsr=rotl(seed_z,blockDim.x*blockIdx.x+threadIdx.x);
141
//   uint jcong=rotr(seed_w,blockDim.x*blockIdx.x+threadIdx.x);
142

  
143 137
   uint z=seed_z;
144 138
   uint w=seed_w;
145 139

  
......
171 165
#define MWCfp MWC * 2.328306435454494e-10f
172 166
#define KISSfp KISS * 2.328306435454494e-10f
173 167

  
174
#define MAX 4294967296
168
#define MAX (ulong)4294967296
175 169

  
176 170
uint rotl(uint value, int shift) {
177 171
    return (value << shift) | (value >> (sizeof(value) * CHAR_BIT - shift));
......
183 177

  
184 178
__kernel void MainLoopGlobal(__global uint *s,const uint space,const ulong iterations,const uint seed_w,const uint seed_z)
185 179
{
186
   __private const float id=(float)get_global_id(0);
187
   __private const float size=(float)get_global_size(0);
188
   __private const float block=space/size;
180
   //__private const float id=(float)get_global_id(0);
181
   //__private const float size=(float)get_global_size(0);
182
   //__private const float block=space/size;
189 183

  
190
   __private uint z=seed_z;
191
   __private uint w=seed_w;
184
   __private const ulong id=(ulong)get_global_id(0);
185
   __private const ulong size=(ulong)get_global_size(0);
186
   __private const ulong block=(ulong)space/(ulong)size;
187
   
188
   __private uint z=seed_z-(uint)id;
189
   __private uint w=seed_w+(uint)id;
192 190

  
193 191
   __private uint jsr=seed_z;
194 192
   __private uint jcong=seed_w;
......
198 196
      // Standard version does not work for several processes (some lost!)
199 197
      //__private uint position=(uint)(((ulong)space*(ulong)MWC)/(ulong)MAX);
200 198
      // Dense version
201
      __private uint position=(uint)( (ulong)space*((ulong)CONG+MAX*(ulong)id)/(ulong)size/(ulong)MAX );
199
      //__private uint position=(uint)( ((ulong)CONG+MAX*(ulong)id)/(ulong)size*(ulong)space/(ulong)MAX );
200
      __private size_t position=(size_t)( ((ulong)MWC+(ulong)MAX*(ulong)id)*(ulong)block/(ulong)MAX );
202 201
      // Float version seems to be the best...
203 202
      //__private uint position=(uint)( block*(CONGfp+id) );
204 203

  
......
399 398

  
400 399
  MySplutter=numpy.zeros(steps)
401 400

  
402
  print 'toto ',2**(int)(numpy.log2(MemoryXPU/4)),(MemoryXPU/jobs/4)*jobs
401
  MaxWorks=2**(int)(numpy.log2(MemoryXPU/4))/8
402
  print MaxWorks,2**(int)(numpy.log2(MemoryXPU/4))
403 403

  
404 404
  for i in range(steps):
405 405
		
......
407 407
    #Splutter=numpy.zeros(1024).astype(numpy.uint32)
408 408
 
409 409
    #Splutter=numpy.zeros(jobs).astype(numpy.uint32)
410
    Splutter=numpy.zeros(jobs*16).astype(numpy.uint32)
410
    Splutter=numpy.zeros((MaxWorks/jobs)*jobs).astype(numpy.uint32)
411 411

  
412 412
    print Splutter,len(Splutter)
413 413

  
......
731 731
      print "Values seem to be wrong..."
732 732
    #THREADS*=2
733 733
    if len(average)!=0:
734
      numpy.savez("Splutter_%s_%s_%s_%s_%i_%.8i_Device%i_%s_%s" % (Alu,GpuStyle,ParaStyle,JobStart,JobEnd,Iterations,Device,Metrology,gethostname()),(ExploredJobs,average,median,stddev))
734
      numpy.savez("Splutter_%s_%s_%s_%i_%i_%.8i_Device%i_%s_%s" % (Alu,GpuStyle,ParaStyle,JobStart,JobEnd,Iterations,Device,Metrology,gethostname()),(ExploredJobs,average,median,stddev))
735 735
      ToSave=[ ExploredJobs,average,median,stddev ]
736
      numpy.savetxt("Splutter_%s_%s_%s_%s_%i_%.8i_Device%i_%s_%s" % (Alu,GpuStyle,ParaStyle,JobStart,JobEnd,Iterations,Device,Metrology,gethostname()),numpy.transpose(ToSave))
736
      numpy.savetxt("Splutter_%s_%s_%s_%i_%i_%.8i_Device%i_%s_%s" % (Alu,GpuStyle,ParaStyle,JobStart,JobEnd,Iterations,Device,Metrology,gethostname()),numpy.transpose(ToSave))
737 737
    Jobs+=JobStep
738 738

  
739 739
  if Fit:

Formats disponibles : Unified diff