namespace tf { /** @page cudaFlowFind Parallel Find %Taskflow provides standalone template methods for finding elements in the given ranges using CUDA. @tableofcontents @section CUDAParallelFindIncludeTheHeader Include the Header You need to include the header file, `%taskflow/cuda/algorithm/find.hpp`, for creating a parallel-find task. @section cudaFlowFindItems Find an Element in a Range tf::cudaFlow::find_if finds the index of the first element in the range [first, last) that satisfies the given criteria. This is equivalent to the parallel execution of the following loop: @code{.cpp} unsigned idx = 0; for(; first != last; ++first, ++idx) { if (p(*first)) { return idx; } } return idx; @endcode If no such an element is found, the size of the range is returned. The following code finds the index of the first element that is dividable by @c 17 over a range of one million elements. @code{.cpp} const size_t N = 1000000; auto vec = tf::cuda_malloc_shared(N); // vector auto idx = tf::cuda_malloc_shared(1); // index // initializes the data for(size_t i=0; i[first, last) using the given comparison function object. This is equivalent to a parallel execution of the following loop: @code{.cpp} if(first == last) { return 0; } auto smallest = first; for (++first; first != last; ++first) { if (op(*first, *smallest)) { smallest = first; } } return std::distance(first, smallest); @endcode The following code finds the index of the minimum element in a range of one millions elements. @code{.cpp} const size_t N = 1000000; auto vec = tf::cuda_malloc_shared(N); // vector auto idx = tf::cuda_malloc_shared(1); // index // initializes the data for(size_t i=0; i{})); // deletes the memory tf::cuda_free(vec); tf::cuda_free(idx); @endcode @section cudaFlowFindMaxItems Find the Maximum Element in a Range Similar to tf::cudaFlow::min_element, tf::cudaFlow::max_element finds the index of the maximum element in the given range [first, last) using the given comparison function object. This is equivalent to a parallel execution of the following loop: @code{.cpp} if(first == last) { return 0; } auto largest = first; for (++first; first != last; ++first) { if (op(*largest, *first)) { largest = first; } } return std::distance(first, largest); @endcode The following code finds the index of the maximum element in a range of one millions elements. @code{.cpp} const size_t N = 1000000; auto vec = tf::cuda_malloc_shared(N); // vector auto idx = tf::cuda_malloc_shared(1); // index // initializes the data for(size_t i=0; i{})); // deletes the memory tf::cuda_free(vec); tf::cuda_free(idx); @endcode @section cudaFlowFindMiscellaneousItems Miscellaneous Items Parallel find algorithms are also available in tf::cudaFlowCapturer with the same API. */ }