tf::cudaExecutionPolicy taskflow/cuda/cuda_execution_policy.hpp unsigned NT NT unsigned VT VT const unsigned const unsigned tf::cudaExecutionPolicy< NT, VT >::nt nt = NT static constant for getting the number of threads per block const unsigned const unsigned tf::cudaExecutionPolicy< NT, VT >::vt vt = VT static constant for getting the number of work units per thread const unsigned const unsigned tf::cudaExecutionPolicy< NT, VT >::nv nv = NT*VT static constant for getting the number of elements to process per block cudaStream_t cudaStream_t tf::cudaExecutionPolicy< NT, VT >::_stream _stream {0} tf::cudaExecutionPolicy< NT, VT >::cudaExecutionPolicy ()=default cudaExecutionPolicy constructs an execution policy object with default stream tf::cudaExecutionPolicy< NT, VT >::cudaExecutionPolicy (cudaStream_t s) cudaExecutionPolicy cudaStream_t s constructs an execution policy object with the given stream cudaStream_t cudaStream_t tf::cudaExecutionPolicy< NT, VT >::stream () noexcept stream queries the associated stream void void tf::cudaExecutionPolicy< NT, VT >::stream (cudaStream_t stream) noexcept stream cudaStream_t stream assigns a stream unsigned static unsigned tf::cudaExecutionPolicy< NT, VT >::num_blocks (unsigned N) num_blocks unsigned N queries the number of blocks to accommodate N elements typename T unsigned unsigned tf::cudaExecutionPolicy< NT, VT >::reduce_bufsz (unsigned count) reduce_bufsz unsigned count queries the buffer size in bytes needed to call reduce kernels T value type count number of elements to reduce The function is used to allocate a buffer for calling tf::cuda_reduce, tf::cuda_uninitialized_reduce, tf::cuda_transform_reduce, and tf::cuda_uninitialized_transform_reduce. typename T unsigned unsigned tf::cudaExecutionPolicy< NT, VT >::min_element_bufsz (unsigned count) min_element_bufsz unsigned count queries the buffer size in bytes needed to call tf::cuda_min_element T value type count number of elements to search The function is used to decide the buffer size in bytes for calling tf::cuda_min_element. typename T unsigned unsigned tf::cudaExecutionPolicy< NT, VT >::max_element_bufsz (unsigned count) max_element_bufsz unsigned count queries the buffer size in bytes needed to call tf::cuda_max_element T value type count number of elements to search The function is used to decide the buffer size in bytes for calling tf::cuda_max_element. typename T unsigned unsigned tf::cudaExecutionPolicy< NT, VT >::scan_bufsz (unsigned count) scan_bufsz unsigned count queries the buffer size in bytes needed to call scan kernels T value type count number of elements to scan The function is used to allocate a buffer for calling tf::cuda_inclusive_scan, tf::cuda_exclusive_scan, tf::cuda_transform_inclusive_scan, and tf::cuda_transform_exclusive_scan. unsigned unsigned tf::cudaExecutionPolicy< NT, VT >::merge_bufsz (unsigned a_count, unsigned b_count) merge_bufsz unsigned a_count unsigned b_count queries the buffer size in bytes needed for CUDA merge algorithms a_count number of elements in the first vector to merge b_count number of elements in the second vector to merge The buffer size of merge algorithm does not depend on the data type. The buffer is purely used only for storing temporary indices (of type unsigned) required during the merge process. The function is used to allocate a buffer for calling tf::cuda_merge and tf::cuda_merge_by_key. class to define execution policy for CUDA standard algorithms NT number of threads per block VT number of work units per thread Execution policy configures the kernel execution parameters in CUDA algorithms. The first template argument, NT, the number of threads per block should always be a power-of-two number. The second template argument, VT, the number of work units per thread is recommended to be an odd number to avoid bank conflict. Details can be referred to Execution Policy. tf::cudaExecutionPolicy_stream tf::cudaExecutionPolicycudaExecutionPolicy tf::cudaExecutionPolicycudaExecutionPolicy tf::cudaExecutionPolicymax_element_bufsz tf::cudaExecutionPolicymerge_bufsz tf::cudaExecutionPolicymin_element_bufsz tf::cudaExecutionPolicynt tf::cudaExecutionPolicynum_blocks tf::cudaExecutionPolicynv tf::cudaExecutionPolicyreduce_bufsz tf::cudaExecutionPolicyscan_bufsz tf::cudaExecutionPolicystream tf::cudaExecutionPolicystream tf::cudaExecutionPolicyvt