// This program demonstrates how to create a simple vector-add // application using syclFlow and unified shared memory (USM). #include constexpr size_t N = 10000000; /*int main() { tf::Executor executor; tf::Taskflow taskflow; sycl::queue queue; int* data {nullptr}; // create an allocate task to allocate a shared memory tf::Task allocate = taskflow.emplace( [&](){ data = sycl::malloc_shared(N, queue); } ); // create a syclFlow task to add 2 to each element of the vector tf::Task syclFlow = taskflow.emplace_on([&](tf::syclFlow& sf){ tf::syclTask fill = sf.fill(data, 100, N); tf::syclTask plus = sf.parallel_for( sycl::range<1>(N), [=](sycl::id<1> id) { data[id] += 2; } ); fill.precede(plus); }, queue); // create a deallocate task that checks the result and frees the memory tf::Task deallocate = taskflow.emplace([&](){ for(size_t i=0; i data; sycl::queue Q{}; // Select any device for this queue std::cout << "Selected device is: " << Q.get_device().get_info() << "\n"; sycl::buffer A{ sycl::range<1>(size) }; sycl::buffer B{ sycl::range<1>(size) }; sycl::buffer C{ data }; Q.submit([&](sycl::handler& h) { auto acc = A.get_access(h); h.parallel_for(size, [=](auto& idx) { acc[idx] = 1000; }); }); Q.submit([&](sycl::handler& h) { auto acc = B.get_access(h); h.parallel_for(size, [=](auto& idx) { acc[idx] = 4000; }); }); Q.submit([&](sycl::handler& h) { auto Aacc = A.get_access(h); auto Bacc = B.get_access(h); auto Cacc = C.get_access(h); h.parallel_for(size , [=](auto&idx){ Cacc[idx] = Aacc[idx] + Bacc[idx]; }); }); sycl::accessor acc = B.get_access(); for(int i=0; i