namespace tf { /** @page cudaFlowScan Parallel Scan %cudaFlow provides template methods to create parallel scan tasks on a CUDA GPU. @tableofcontents @section CUDAParallelScanIncludeTheHeader Include the Header You need to include the header file, `%taskflow/cuda/algorithm/scan.hpp`, for creating a parallel-scan task. @section cudaFlowScanARangeOfItems Scan a Range of Items tf::cudaFlow::inclusive_scan computes an inclusive prefix sum operation using the given binary operator over a range of elements specified by [first, last). The term "inclusive" means that the i-th input element is included in the i-th sum. The following code computes the inclusive prefix sum over an input array and stores the result in an output array. @code{.cpp} const size_t N = 1000000; int* input = tf::cuda_malloc_shared(N); // input vector int* output = tf::cuda_malloc_shared(N); // output vector // initializes the data for(size_t i=0; i[first, last) and computes an inclusive prefix sum over these transformed items. The following code multiplies each item by 10 and then compute the inclusive prefix sum over 1000000 transformed items. @code{.cpp} const size_t N = 1000000; int* input = tf::cuda_malloc_shared(N); // input vector int* output = tf::cuda_malloc_shared(N); // output vector // initializes the data for(size_t i=0; i(N); // input vector int* output = tf::cuda_malloc_shared(N); // output vector // initializes the data for(size_t i=0; i