root / ETSN / MySteps_6_SyCL.cpp @ 310
Historique | Voir | Annoter | Télécharger (4,02 ko)
1 | 310 | equemene | // To create and activate compete SyCL environment, Debian Bookworm
|
---|---|---|---|
2 | 310 | equemene | // Create...
|
3 | 310 | equemene | // export DPCPP_HOME=$PWD/sycl_workspace
|
4 | 310 | equemene | // mv $DPCPP_HOME ${DPCPP_HOME}-$(date "+%Y%m%d-%H%M")
|
5 | 310 | equemene | // mkdir $DPCPP_HOME
|
6 | 310 | equemene | // cd $DPCPP_HOME
|
7 | 310 | equemene | // git clone https://github.com/intel/llvm -b sycl
|
8 | 310 | equemene | // python3 $DPCPP_HOME/llvm/buildbot/configure.py --cuda
|
9 | 310 | equemene | // python3 $DPCPP_HOME/llvm/buildbot/compile.py
|
10 | 310 | equemene | // python3 $DPCPP_HOME/llvm/buildbot/check.py
|
11 | 310 | equemene | // Use...
|
12 | 310 | equemene | // export PATH=$DPCPP_HOME/llvm/build/bin:$PATH
|
13 | 310 | equemene | // export LD_LIBRARY_PATH=$DPCPP_HOME/llvm/build/lib:$LD_LIBRARY_PATH
|
14 | 310 | equemene | // clang++ -fsycl -fsycl-targets=nvptx64-nvidia-cuda -DCHECK MySteps_sycl.cpp -o MySteps_sycl.exe -lm
|
15 | 310 | equemene | // ./MySteps_sycl.exe 1024 10
|
16 | 310 | equemene | |
17 | 310 | equemene | #include <iostream> |
18 | 310 | equemene | #include <sycl/sycl.hpp> |
19 | 310 | equemene | #include <math.h> |
20 | 310 | equemene | #include <sys/time.h> |
21 | 310 | equemene | |
22 | 310 | equemene | #define MYFLOAT float |
23 | 310 | equemene | |
24 | 310 | equemene | #define ERROR 1e-5 |
25 | 310 | equemene | |
26 | 310 | equemene | MYFLOAT MySillyFunction(MYFLOAT x) |
27 | 310 | equemene | { |
28 | 310 | equemene | return(pow(sqrt(log(exp(atanh(tanh(asinh(sinh(acosh(cosh(atan(tan(asin(sin(acos(cos(x))))))))))))))),2)); |
29 | 310 | equemene | } |
30 | 310 | equemene | |
31 | 310 | equemene | void MySillySum(MYFLOAT *res, MYFLOAT *a, MYFLOAT *b,int calls, int size) |
32 | 310 | equemene | { |
33 | 310 | equemene | for (uint i=0; i<size;i++) |
34 | 310 | equemene | { |
35 | 310 | equemene | MYFLOAT ai=a[i]; |
36 | 310 | equemene | MYFLOAT bi=b[i]; |
37 | 310 | equemene | |
38 | 310 | equemene | for (int c=0;c<calls;c++) |
39 | 310 | equemene | { |
40 | 310 | equemene | ai=MySillyFunction(ai); |
41 | 310 | equemene | bi=MySillyFunction(bi); |
42 | 310 | equemene | } |
43 | 310 | equemene | |
44 | 310 | equemene | res[i] = ai + bi; |
45 | 310 | equemene | } |
46 | 310 | equemene | } |
47 | 310 | equemene | |
48 | 310 | equemene | using namespace std; |
49 | 310 | equemene | int main(int argc, char *argv[]) { |
50 | 310 | equemene | MYFLOAT *a,*b,*res,*resacc; |
51 | 310 | equemene | int size=1024; |
52 | 310 | equemene | int calls=1; |
53 | 310 | equemene | struct timeval tv1,tv2;
|
54 | 310 | equemene | |
55 | 310 | equemene | if (argc > 1) { |
56 | 310 | equemene | size=(int)atoll(argv[1]); |
57 | 310 | equemene | calls=(int)atoll(argv[2]); |
58 | 310 | equemene | } |
59 | 310 | equemene | else {
|
60 | 310 | equemene | printf("\n\tMySteps : Estimate SillySum\n\n\t\t#1 : size (default 1024)\n\t\t#2 : calls (default 1)\n\n");
|
61 | 310 | equemene | } |
62 | 310 | equemene | |
63 | 310 | equemene | std::cout << size << " " << calls << std::endl ;
|
64 | 310 | equemene | |
65 | 310 | equemene | a=(MYFLOAT*)malloc(size*sizeof(MYFLOAT));
|
66 | 310 | equemene | b=(MYFLOAT*)malloc(size*sizeof(MYFLOAT));
|
67 | 310 | equemene | res=(MYFLOAT*)malloc(size*sizeof(MYFLOAT));
|
68 | 310 | equemene | resacc=(MYFLOAT*)malloc(size*sizeof(MYFLOAT));
|
69 | 310 | equemene | |
70 | 310 | equemene | srand(110271);
|
71 | 310 | equemene | // Initialize the vectors
|
72 | 310 | equemene | for (size_t I = 0; I < size; ++I) { |
73 | 310 | equemene | a[I]=(MYFLOAT)rand()/(MYFLOAT)RAND_MAX; |
74 | 310 | equemene | b[I]=(MYFLOAT)rand()/(MYFLOAT)RAND_MAX; |
75 | 310 | equemene | res[I]=0;
|
76 | 310 | equemene | } |
77 | 310 | equemene | |
78 | 310 | equemene | #ifdef CHECK
|
79 | 310 | equemene | gettimeofday(&tv1, NULL);
|
80 | 310 | equemene | MySillySum(res,a,b,calls,size); |
81 | 310 | equemene | gettimeofday(&tv2, NULL);
|
82 | 310 | equemene | MYFLOAT elapsedNative=(MYFLOAT)((tv2.tv_sec-tv1.tv_sec) * 1000000L +
|
83 | 310 | equemene | (tv2.tv_usec-tv1.tv_usec))/1000000;
|
84 | 310 | equemene | |
85 | 310 | equemene | std::cout << "ElapsedNative: " << elapsedNative << std::endl;
|
86 | 310 | equemene | std::cout << "NativeRate: " << size/elapsedNative << std::endl;
|
87 | 310 | equemene | #endif
|
88 | 310 | equemene | |
89 | 310 | equemene | gettimeofday(&tv1, NULL);
|
90 | 310 | equemene | |
91 | 310 | equemene | sycl::buffer<MYFLOAT> aBuf(&a[0],size);
|
92 | 310 | equemene | sycl::buffer<MYFLOAT> bBuf(&b[0],size);
|
93 | 310 | equemene | sycl::buffer<MYFLOAT> resaccBuf(&resacc[0],size);
|
94 | 310 | equemene | |
95 | 310 | equemene | // Creating SYCL queue
|
96 | 310 | equemene | sycl::queue Queue; |
97 | 310 | equemene | |
98 | 310 | equemene | Queue.submit([&](auto &h) {
|
99 | 310 | equemene | // Getting write only access to the buffer on a device.
|
100 | 310 | equemene | sycl::accessor Aa{aBuf, h, sycl::read_only}; |
101 | 310 | equemene | sycl::accessor Ab{bBuf, h, sycl::read_only}; |
102 | 310 | equemene | sycl::accessor Aresacc{resaccBuf, h}; |
103 | 310 | equemene | |
104 | 310 | equemene | // Executing kernel
|
105 | 310 | equemene | h.parallel_for(size,[=](auto i) {
|
106 | 310 | equemene | MYFLOAT Aai = Aa[i]; |
107 | 310 | equemene | MYFLOAT Abi = Ab[i] ; |
108 | 310 | equemene | for (size_t C = 0 ; C < calls ; C++) { |
109 | 310 | equemene | Aai = MySillyFunction(Aai); |
110 | 310 | equemene | Abi = MySillyFunction(Abi) ; |
111 | 310 | equemene | } |
112 | 310 | equemene | Aresacc[i] = Aai + Abi ; |
113 | 310 | equemene | }); |
114 | 310 | equemene | }); |
115 | 310 | equemene | |
116 | 310 | equemene | // Getting read only access to the buffer on the host.
|
117 | 310 | equemene | // Implicit barrier waiting for queue to complete the work.
|
118 | 310 | equemene | sycl::host_accessor HostAccessor{resaccBuf}; |
119 | 310 | equemene | |
120 | 310 | equemene | gettimeofday(&tv2, NULL);
|
121 | 310 | equemene | |
122 | 310 | equemene | MYFLOAT elapsedSyCL=(MYFLOAT)((tv2.tv_sec-tv1.tv_sec) * 1000000L +
|
123 | 310 | equemene | (tv2.tv_usec-tv1.tv_usec))/1000000;
|
124 | 310 | equemene | |
125 | 310 | equemene | // Check the results
|
126 | 310 | equemene | #ifdef CHECK
|
127 | 310 | equemene | bool MismatchFound = false; |
128 | 310 | equemene | for (size_t I = 0; I < size; ++I) { |
129 | 310 | equemene | if ( fabs(resacc[I] - res[I]) > ERROR ) {
|
130 | 310 | equemene | std::cout << "Element: " << I << ", error: " << res[I]-resacc[I] |
131 | 310 | equemene | << std::endl; |
132 | 310 | equemene | MismatchFound = true;
|
133 | 310 | equemene | } |
134 | 310 | equemene | } |
135 | 310 | equemene | |
136 | 310 | equemene | if (!MismatchFound) {
|
137 | 310 | equemene | std::cout << "The results are correct!" << std::endl;
|
138 | 310 | equemene | } |
139 | 310 | equemene | // return MismatchFound;
|
140 | 310 | equemene | #endif
|
141 | 310 | equemene | |
142 | 310 | equemene | std::cout << "ElapsedSyCL: " << elapsedSyCL << ::std::endl;
|
143 | 310 | equemene | std::cout << "SyCLRate: " << size/elapsedSyCL << std::endl;
|
144 | 310 | equemene | |
145 | 310 | equemene | free(a); |
146 | 310 | equemene | free(b); |
147 | 310 | equemene | free(res); |
148 | 310 | equemene | free(resacc); |
149 | 310 | equemene | |
150 | 310 | equemene | } |