Révision 310
ETSN/MySteps_6_SyCL.cpp (revision 310) | ||
---|---|---|
1 |
// To create and activate compete SyCL environment, Debian Bookworm |
|
2 |
// Create... |
|
3 |
// export DPCPP_HOME=$PWD/sycl_workspace |
|
4 |
// mv $DPCPP_HOME ${DPCPP_HOME}-$(date "+%Y%m%d-%H%M") |
|
5 |
// mkdir $DPCPP_HOME |
|
6 |
// cd $DPCPP_HOME |
|
7 |
// git clone https://github.com/intel/llvm -b sycl |
|
8 |
// python3 $DPCPP_HOME/llvm/buildbot/configure.py --cuda |
|
9 |
// python3 $DPCPP_HOME/llvm/buildbot/compile.py |
|
10 |
// python3 $DPCPP_HOME/llvm/buildbot/check.py |
|
11 |
// Use... |
|
12 |
// export PATH=$DPCPP_HOME/llvm/build/bin:$PATH |
|
13 |
// export LD_LIBRARY_PATH=$DPCPP_HOME/llvm/build/lib:$LD_LIBRARY_PATH |
|
14 |
// clang++ -fsycl -fsycl-targets=nvptx64-nvidia-cuda -DCHECK MySteps_sycl.cpp -o MySteps_sycl.exe -lm |
|
15 |
// ./MySteps_sycl.exe 1024 10 |
|
16 |
|
|
17 |
#include <iostream> |
|
18 |
#include <sycl/sycl.hpp> |
|
19 |
#include <math.h> |
|
20 |
#include <sys/time.h> |
|
21 |
|
|
22 |
#define MYFLOAT float |
|
23 |
|
|
24 |
#define ERROR 1e-5 |
|
25 |
|
|
26 |
MYFLOAT MySillyFunction(MYFLOAT x) |
|
27 |
{ |
|
28 |
return(pow(sqrt(log(exp(atanh(tanh(asinh(sinh(acosh(cosh(atan(tan(asin(sin(acos(cos(x))))))))))))))),2)); |
|
29 |
} |
|
30 |
|
|
31 |
void MySillySum(MYFLOAT *res, MYFLOAT *a, MYFLOAT *b,int calls, int size) |
|
32 |
{ |
|
33 |
for (uint i=0; i<size;i++) |
|
34 |
{ |
|
35 |
MYFLOAT ai=a[i]; |
|
36 |
MYFLOAT bi=b[i]; |
|
37 |
|
|
38 |
for (int c=0;c<calls;c++) |
|
39 |
{ |
|
40 |
ai=MySillyFunction(ai); |
|
41 |
bi=MySillyFunction(bi); |
|
42 |
} |
|
43 |
|
|
44 |
res[i] = ai + bi; |
|
45 |
} |
|
46 |
} |
|
47 |
|
|
48 |
using namespace std; |
|
49 |
int main(int argc, char *argv[]) { |
|
50 |
MYFLOAT *a,*b,*res,*resacc; |
|
51 |
int size=1024; |
|
52 |
int calls=1; |
|
53 |
struct timeval tv1,tv2; |
|
54 |
|
|
55 |
if (argc > 1) { |
|
56 |
size=(int)atoll(argv[1]); |
|
57 |
calls=(int)atoll(argv[2]); |
|
58 |
} |
|
59 |
else { |
|
60 |
printf("\n\tMySteps : Estimate SillySum\n\n\t\t#1 : size (default 1024)\n\t\t#2 : calls (default 1)\n\n"); |
|
61 |
} |
|
62 |
|
|
63 |
std::cout << size << " " << calls << std::endl ; |
|
64 |
|
|
65 |
a=(MYFLOAT*)malloc(size*sizeof(MYFLOAT)); |
|
66 |
b=(MYFLOAT*)malloc(size*sizeof(MYFLOAT)); |
|
67 |
res=(MYFLOAT*)malloc(size*sizeof(MYFLOAT)); |
|
68 |
resacc=(MYFLOAT*)malloc(size*sizeof(MYFLOAT)); |
|
69 |
|
|
70 |
srand(110271); |
|
71 |
// Initialize the vectors |
|
72 |
for (size_t I = 0; I < size; ++I) { |
|
73 |
a[I]=(MYFLOAT)rand()/(MYFLOAT)RAND_MAX; |
|
74 |
b[I]=(MYFLOAT)rand()/(MYFLOAT)RAND_MAX; |
|
75 |
res[I]=0; |
|
76 |
} |
|
77 |
|
|
78 |
#ifdef CHECK |
|
79 |
gettimeofday(&tv1, NULL); |
|
80 |
MySillySum(res,a,b,calls,size); |
|
81 |
gettimeofday(&tv2, NULL); |
|
82 |
MYFLOAT elapsedNative=(MYFLOAT)((tv2.tv_sec-tv1.tv_sec) * 1000000L + |
|
83 |
(tv2.tv_usec-tv1.tv_usec))/1000000; |
|
84 |
|
|
85 |
std::cout << "ElapsedNative: " << elapsedNative << std::endl; |
|
86 |
std::cout << "NativeRate: " << size/elapsedNative << std::endl; |
|
87 |
#endif |
|
88 |
|
|
89 |
gettimeofday(&tv1, NULL); |
|
90 |
|
|
91 |
sycl::buffer<MYFLOAT> aBuf(&a[0],size); |
|
92 |
sycl::buffer<MYFLOAT> bBuf(&b[0],size); |
|
93 |
sycl::buffer<MYFLOAT> resaccBuf(&resacc[0],size); |
|
94 |
|
|
95 |
// Creating SYCL queue |
|
96 |
sycl::queue Queue; |
|
97 |
|
|
98 |
Queue.submit([&](auto &h) { |
|
99 |
// Getting write only access to the buffer on a device. |
|
100 |
sycl::accessor Aa{aBuf, h, sycl::read_only}; |
|
101 |
sycl::accessor Ab{bBuf, h, sycl::read_only}; |
|
102 |
sycl::accessor Aresacc{resaccBuf, h}; |
|
103 |
|
|
104 |
// Executing kernel |
|
105 |
h.parallel_for(size,[=](auto i) { |
|
106 |
MYFLOAT Aai = Aa[i]; |
|
107 |
MYFLOAT Abi = Ab[i] ; |
|
108 |
for (size_t C = 0 ; C < calls ; C++) { |
|
109 |
Aai = MySillyFunction(Aai); |
|
110 |
Abi = MySillyFunction(Abi) ; |
|
111 |
} |
|
112 |
Aresacc[i] = Aai + Abi ; |
|
113 |
}); |
|
114 |
}); |
|
115 |
|
|
116 |
// Getting read only access to the buffer on the host. |
|
117 |
// Implicit barrier waiting for queue to complete the work. |
|
118 |
sycl::host_accessor HostAccessor{resaccBuf}; |
|
119 |
|
|
120 |
gettimeofday(&tv2, NULL); |
|
121 |
|
|
122 |
MYFLOAT elapsedSyCL=(MYFLOAT)((tv2.tv_sec-tv1.tv_sec) * 1000000L + |
|
123 |
(tv2.tv_usec-tv1.tv_usec))/1000000; |
|
124 |
|
|
125 |
// Check the results |
|
126 |
#ifdef CHECK |
|
127 |
bool MismatchFound = false; |
|
128 |
for (size_t I = 0; I < size; ++I) { |
|
129 |
if ( fabs(resacc[I] - res[I]) > ERROR ) { |
|
130 |
std::cout << "Element: " << I << ", error: " << res[I]-resacc[I] |
|
131 |
<< std::endl; |
|
132 |
MismatchFound = true; |
|
133 |
} |
|
134 |
} |
|
135 |
|
|
136 |
if (!MismatchFound) { |
|
137 |
std::cout << "The results are correct!" << std::endl; |
|
138 |
} |
|
139 |
// return MismatchFound; |
|
140 |
#endif |
|
141 |
|
|
142 |
std::cout << "ElapsedSyCL: " << elapsedSyCL << ::std::endl; |
|
143 |
std::cout << "SyCLRate: " << size/elapsedSyCL << std::endl; |
|
144 |
|
|
145 |
free(a); |
|
146 |
free(b); |
|
147 |
free(res); |
|
148 |
free(resacc); |
|
149 |
|
|
150 |
} |
Formats disponibles : Unified diff