namespace tf {
/** @page cudaFlowFind Parallel Find
%Taskflow provides standalone template methods for finding elements in
the given ranges using CUDA.
@tableofcontents
@section CUDAParallelFindIncludeTheHeader Include the Header
You need to include the header file, `%taskflow/cuda/algorithm/find.hpp`,
for creating a parallel-find task.
@section cudaFlowFindItems Find an Element in a Range
tf::cudaFlow::find_if finds the index of the first element
in the range [first, last) that satisfies the given criteria.
This is equivalent to the parallel execution of the following loop:
@code{.cpp}
unsigned idx = 0;
for(; first != last; ++first, ++idx) {
if (p(*first)) {
return idx;
}
}
return idx;
@endcode
If no such an element is found, the size of the range is returned.
The following code finds the index of the first element that is dividable
by @c 17 over a range of one million elements.
@code{.cpp}
const size_t N = 1000000;
auto vec = tf::cuda_malloc_shared(N); // vector
auto idx = tf::cuda_malloc_shared(1); // index
// initializes the data
for(size_t i=0; i[first, last) using the given comparison
function object.
This is equivalent to a parallel execution of the following loop:
@code{.cpp}
if(first == last) {
return 0;
}
auto smallest = first;
for (++first; first != last; ++first) {
if (op(*first, *smallest)) {
smallest = first;
}
}
return std::distance(first, smallest);
@endcode
The following code finds the index of the minimum element in a range
of one millions elements.
@code{.cpp}
const size_t N = 1000000;
auto vec = tf::cuda_malloc_shared(N); // vector
auto idx = tf::cuda_malloc_shared(1); // index
// initializes the data
for(size_t i=0; i{}));
// deletes the memory
tf::cuda_free(vec);
tf::cuda_free(idx);
@endcode
@section cudaFlowFindMaxItems Find the Maximum Element in a Range
Similar to tf::cudaFlow::min_element,
tf::cudaFlow::max_element finds the index of the maximum element
in the given range [first, last) using the given comparison
function object.
This is equivalent to a parallel execution of the following loop:
@code{.cpp}
if(first == last) {
return 0;
}
auto largest = first;
for (++first; first != last; ++first) {
if (op(*largest, *first)) {
largest = first;
}
}
return std::distance(first, largest);
@endcode
The following code finds the index of the maximum element in a range
of one millions elements.
@code{.cpp}
const size_t N = 1000000;
auto vec = tf::cuda_malloc_shared(N); // vector
auto idx = tf::cuda_malloc_shared(1); // index
// initializes the data
for(size_t i=0; i{}));
// deletes the memory
tf::cuda_free(vec);
tf::cuda_free(idx);
@endcode
@section cudaFlowFindMiscellaneousItems Miscellaneous Items
Parallel find algorithms are also available in tf::cudaFlowCapturer
with the same API.
*/
}