mesytec-mnode/external/taskflow-3.8.0/docs/xml/classtf_1_1cudaExecutionPolicy.xml
2025-01-04 01:25:05 +01:00

418 lines
22 KiB
XML

<?xml version='1.0' encoding='UTF-8' standalone='no'?>
<doxygen xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="compound.xsd" version="1.9.1" xml:lang="en-US">
<compounddef id="classtf_1_1cudaExecutionPolicy" kind="class" language="C++" prot="public">
<compoundname>tf::cudaExecutionPolicy</compoundname>
<includes refid="cuda__execution__policy_8hpp" local="no">taskflow/cuda/cuda_execution_policy.hpp</includes>
<templateparamlist>
<param>
<type>unsigned</type>
<declname>NT</declname>
<defname>NT</defname>
</param>
<param>
<type>unsigned</type>
<declname>VT</declname>
<defname>VT</defname>
</param>
</templateparamlist>
<sectiondef kind="public-static-attrib">
<memberdef kind="variable" id="classtf_1_1cudaExecutionPolicy_1abb1050526f45873c967976a99e9a370d" prot="public" static="yes" mutable="no">
<type>const unsigned</type>
<definition>const unsigned tf::cudaExecutionPolicy&lt; NT, VT &gt;::nt</definition>
<argsstring></argsstring>
<name>nt</name>
<initializer>= NT</initializer>
<briefdescription>
<para>static constant for getting the number of threads per block </para>
</briefdescription>
<detaileddescription>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_execution_policy.hpp" line="36" column="25" bodyfile="taskflow/cuda/cuda_execution_policy.hpp" bodystart="36" bodyend="-1"/>
</memberdef>
<memberdef kind="variable" id="classtf_1_1cudaExecutionPolicy_1a9410f1b3a5cb9a3cc5e8d640bc7d3990" prot="public" static="yes" mutable="no">
<type>const unsigned</type>
<definition>const unsigned tf::cudaExecutionPolicy&lt; NT, VT &gt;::vt</definition>
<argsstring></argsstring>
<name>vt</name>
<initializer>= VT</initializer>
<briefdescription>
<para>static constant for getting the number of work units per thread </para>
</briefdescription>
<detaileddescription>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_execution_policy.hpp" line="39" column="25" bodyfile="taskflow/cuda/cuda_execution_policy.hpp" bodystart="39" bodyend="-1"/>
</memberdef>
<memberdef kind="variable" id="classtf_1_1cudaExecutionPolicy_1a92ac5a32147584738f32a720ea08e3f4" prot="public" static="yes" mutable="no">
<type>const unsigned</type>
<definition>const unsigned tf::cudaExecutionPolicy&lt; NT, VT &gt;::nv</definition>
<argsstring></argsstring>
<name>nv</name>
<initializer>= NT*VT</initializer>
<briefdescription>
<para>static constant for getting the number of elements to process per block </para>
</briefdescription>
<detaileddescription>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_execution_policy.hpp" line="42" column="25" bodyfile="taskflow/cuda/cuda_execution_policy.hpp" bodystart="42" bodyend="-1"/>
</memberdef>
</sectiondef>
<sectiondef kind="private-attrib">
<memberdef kind="variable" id="classtf_1_1cudaExecutionPolicy_1a1eaf3d45afdf53b4bf9927dd1bd7a02b" prot="private" static="no" mutable="no">
<type>cudaStream_t</type>
<definition>cudaStream_t tf::cudaExecutionPolicy&lt; NT, VT &gt;::_stream</definition>
<argsstring></argsstring>
<name>_stream</name>
<initializer>{0}</initializer>
<briefdescription>
</briefdescription>
<detaileddescription>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_execution_policy.hpp" line="144" column="16" bodyfile="taskflow/cuda/cuda_execution_policy.hpp" bodystart="144" bodyend="-1"/>
</memberdef>
</sectiondef>
<sectiondef kind="public-func">
<memberdef kind="function" id="classtf_1_1cudaExecutionPolicy_1aea3b671f778bfb9eca5d7113636f63bf" prot="public" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
<type></type>
<definition>tf::cudaExecutionPolicy&lt; NT, VT &gt;::cudaExecutionPolicy</definition>
<argsstring>()=default</argsstring>
<name>cudaExecutionPolicy</name>
<briefdescription>
<para>constructs an execution policy object with default stream </para>
</briefdescription>
<detaileddescription>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_execution_policy.hpp" line="47" column="3"/>
</memberdef>
<memberdef kind="function" id="classtf_1_1cudaExecutionPolicy_1ac1c7784472394d4abcb6f6a2a80cc019" prot="public" static="no" const="no" explicit="yes" inline="yes" virt="non-virtual">
<type></type>
<definition>tf::cudaExecutionPolicy&lt; NT, VT &gt;::cudaExecutionPolicy</definition>
<argsstring>(cudaStream_t s)</argsstring>
<name>cudaExecutionPolicy</name>
<param>
<type>cudaStream_t</type>
<declname>s</declname>
</param>
<briefdescription>
<para>constructs an execution policy object with the given stream </para>
</briefdescription>
<detaileddescription>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_execution_policy.hpp" line="52" column="12" bodyfile="taskflow/cuda/cuda_execution_policy.hpp" bodystart="52" bodyend="52"/>
</memberdef>
<memberdef kind="function" id="classtf_1_1cudaExecutionPolicy_1a5be1b273985800ab886665d28663c29b" prot="public" static="no" const="no" explicit="no" inline="yes" noexcept="yes" virt="non-virtual">
<type>cudaStream_t</type>
<definition>cudaStream_t tf::cudaExecutionPolicy&lt; NT, VT &gt;::stream</definition>
<argsstring>() noexcept</argsstring>
<name>stream</name>
<briefdescription>
<para>queries the associated stream </para>
</briefdescription>
<detaileddescription>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_execution_policy.hpp" line="57" column="16" bodyfile="taskflow/cuda/cuda_execution_policy.hpp" bodystart="57" bodyend="57"/>
</memberdef>
<memberdef kind="function" id="classtf_1_1cudaExecutionPolicy_1a5f2a4d6b35af49403756ee2291264758" prot="public" static="no" const="no" explicit="no" inline="yes" noexcept="yes" virt="non-virtual">
<type>void</type>
<definition>void tf::cudaExecutionPolicy&lt; NT, VT &gt;::stream</definition>
<argsstring>(cudaStream_t stream) noexcept</argsstring>
<name>stream</name>
<param>
<type>cudaStream_t</type>
<declname>stream</declname>
</param>
<briefdescription>
<para>assigns a stream </para>
</briefdescription>
<detaileddescription>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_execution_policy.hpp" line="62" column="8" bodyfile="taskflow/cuda/cuda_execution_policy.hpp" bodystart="62" bodyend="62"/>
</memberdef>
</sectiondef>
<sectiondef kind="public-static-func">
<memberdef kind="function" id="classtf_1_1cudaExecutionPolicy_1ab96c478964fcba935aa99efe91a64e5c" prot="public" static="yes" const="no" explicit="no" inline="yes" virt="non-virtual">
<type>unsigned</type>
<definition>static unsigned tf::cudaExecutionPolicy&lt; NT, VT &gt;::num_blocks</definition>
<argsstring>(unsigned N)</argsstring>
<name>num_blocks</name>
<param>
<type>unsigned</type>
<declname>N</declname>
</param>
<briefdescription>
<para>queries the number of blocks to accommodate N elements </para>
</briefdescription>
<detaileddescription>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_execution_policy.hpp" line="67" column="19" bodyfile="taskflow/cuda/cuda_execution_policy.hpp" bodystart="67" bodyend="67"/>
</memberdef>
<memberdef kind="function" id="classtf_1_1cudaExecutionPolicy_1a446cee95bb839ee180052059e2ad7fd6" prot="public" static="yes" const="no" explicit="no" inline="no" virt="non-virtual">
<templateparamlist>
<param>
<type>typename T</type>
</param>
</templateparamlist>
<type>unsigned</type>
<definition>unsigned tf::cudaExecutionPolicy&lt; NT, VT &gt;::reduce_bufsz</definition>
<argsstring>(unsigned count)</argsstring>
<name>reduce_bufsz</name>
<param>
<type>unsigned</type>
<declname>count</declname>
</param>
<briefdescription>
<para>queries the buffer size in bytes needed to call reduce kernels </para>
</briefdescription>
<detaileddescription>
<para><parameterlist kind="templateparam"><parameteritem>
<parameternamelist>
<parametername>T</parametername>
</parameternamelist>
<parameterdescription>
<para>value type</para>
</parameterdescription>
</parameteritem>
</parameterlist>
<parameterlist kind="param"><parameteritem>
<parameternamelist>
<parametername>count</parametername>
</parameternamelist>
<parameterdescription>
<para>number of elements to reduce</para>
</parameterdescription>
</parameteritem>
</parameterlist>
The function is used to allocate a buffer for calling <ref refid="namespacetf_1a8a872d2a0ac73a676713cb5be5aa688c" kindref="member">tf::cuda_reduce</ref>, <ref refid="namespacetf_1a492e8410db032a0273a99dd905486161" kindref="member">tf::cuda_uninitialized_reduce</ref>, <ref refid="namespacetf_1a4463d06240d608bc31d8b3546a851e4e" kindref="member">tf::cuda_transform_reduce</ref>, and <ref refid="namespacetf_1aa451668b7a0a3abf385cf2abebed8962" kindref="member">tf::cuda_uninitialized_transform_reduce</ref>. </para>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_execution_policy.hpp" line="85" column="19" bodyfile="taskflow/cuda/algorithm/reduce.hpp" bodystart="215" bodyend="223"/>
</memberdef>
<memberdef kind="function" id="classtf_1_1cudaExecutionPolicy_1abcafb001cd68c1135392f4bcda5a2a05" prot="public" static="yes" const="no" explicit="no" inline="no" virt="non-virtual">
<templateparamlist>
<param>
<type>typename T</type>
</param>
</templateparamlist>
<type>unsigned</type>
<definition>unsigned tf::cudaExecutionPolicy&lt; NT, VT &gt;::min_element_bufsz</definition>
<argsstring>(unsigned count)</argsstring>
<name>min_element_bufsz</name>
<param>
<type>unsigned</type>
<declname>count</declname>
</param>
<briefdescription>
<para>queries the buffer size in bytes needed to call <ref refid="namespacetf_1a572c13198191c46765264f8afabe2e9f" kindref="member">tf::cuda_min_element</ref> </para>
</briefdescription>
<detaileddescription>
<para><parameterlist kind="templateparam"><parameteritem>
<parameternamelist>
<parametername>T</parametername>
</parameternamelist>
<parameterdescription>
<para>value type</para>
</parameterdescription>
</parameteritem>
</parameterlist>
<parameterlist kind="param"><parameteritem>
<parameternamelist>
<parametername>count</parametername>
</parameternamelist>
<parameterdescription>
<para>number of elements to search</para>
</parameterdescription>
</parameteritem>
</parameterlist>
The function is used to decide the buffer size in bytes for calling <ref refid="namespacetf_1a572c13198191c46765264f8afabe2e9f" kindref="member">tf::cuda_min_element</ref>. </para>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_execution_policy.hpp" line="98" column="19" bodyfile="taskflow/cuda/algorithm/find.hpp" bodystart="194" bodyend="196"/>
</memberdef>
<memberdef kind="function" id="classtf_1_1cudaExecutionPolicy_1a31fe75c4b0765df3035e12be49af88aa" prot="public" static="yes" const="no" explicit="no" inline="no" virt="non-virtual">
<templateparamlist>
<param>
<type>typename T</type>
</param>
</templateparamlist>
<type>unsigned</type>
<definition>unsigned tf::cudaExecutionPolicy&lt; NT, VT &gt;::max_element_bufsz</definition>
<argsstring>(unsigned count)</argsstring>
<name>max_element_bufsz</name>
<param>
<type>unsigned</type>
<declname>count</declname>
</param>
<briefdescription>
<para>queries the buffer size in bytes needed to call <ref refid="namespacetf_1a3fc577fd0a8f127770bcf68bc56c073e" kindref="member">tf::cuda_max_element</ref> </para>
</briefdescription>
<detaileddescription>
<para><parameterlist kind="templateparam"><parameteritem>
<parameternamelist>
<parametername>T</parametername>
</parameternamelist>
<parameterdescription>
<para>value type</para>
</parameterdescription>
</parameteritem>
</parameterlist>
<parameterlist kind="param"><parameteritem>
<parameternamelist>
<parametername>count</parametername>
</parameternamelist>
<parameterdescription>
<para>number of elements to search</para>
</parameterdescription>
</parameteritem>
</parameterlist>
The function is used to decide the buffer size in bytes for calling <ref refid="namespacetf_1a3fc577fd0a8f127770bcf68bc56c073e" kindref="member">tf::cuda_max_element</ref>. </para>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_execution_policy.hpp" line="111" column="19" bodyfile="taskflow/cuda/algorithm/find.hpp" bodystart="246" bodyend="248"/>
</memberdef>
<memberdef kind="function" id="classtf_1_1cudaExecutionPolicy_1af25648b3269902b333cfcd58665005e8" prot="public" static="yes" const="no" explicit="no" inline="no" virt="non-virtual">
<templateparamlist>
<param>
<type>typename T</type>
</param>
</templateparamlist>
<type>unsigned</type>
<definition>unsigned tf::cudaExecutionPolicy&lt; NT, VT &gt;::scan_bufsz</definition>
<argsstring>(unsigned count)</argsstring>
<name>scan_bufsz</name>
<param>
<type>unsigned</type>
<declname>count</declname>
</param>
<briefdescription>
<para>queries the buffer size in bytes needed to call scan kernels </para>
</briefdescription>
<detaileddescription>
<para><parameterlist kind="templateparam"><parameteritem>
<parameternamelist>
<parametername>T</parametername>
</parameternamelist>
<parameterdescription>
<para>value type</para>
</parameterdescription>
</parameteritem>
</parameterlist>
<parameterlist kind="param"><parameteritem>
<parameternamelist>
<parametername>count</parametername>
</parameternamelist>
<parameterdescription>
<para>number of elements to scan</para>
</parameterdescription>
</parameteritem>
</parameterlist>
The function is used to allocate a buffer for calling <ref refid="namespacetf_1a2e1b44c84a09e0a8495a611cb9a7ea40" kindref="member">tf::cuda_inclusive_scan</ref>, <ref refid="namespacetf_1aeb391c40120844318fd715b8c3a716bb" kindref="member">tf::cuda_exclusive_scan</ref>, <ref refid="namespacetf_1afa4aa760ddb6efbda1b9bab505ad5baf" kindref="member">tf::cuda_transform_inclusive_scan</ref>, and <ref refid="namespacetf_1a2e739895c1c73538967af060ca714366" kindref="member">tf::cuda_transform_exclusive_scan</ref>. </para>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_execution_policy.hpp" line="125" column="19" bodyfile="taskflow/cuda/algorithm/scan.hpp" bodystart="330" bodyend="337"/>
</memberdef>
<memberdef kind="function" id="classtf_1_1cudaExecutionPolicy_1a1febbe549d9cbe4502a5b66167ab9553" prot="public" static="yes" const="no" explicit="no" inline="yes" virt="non-virtual">
<type>unsigned</type>
<definition>unsigned tf::cudaExecutionPolicy&lt; NT, VT &gt;::merge_bufsz</definition>
<argsstring>(unsigned a_count, unsigned b_count)</argsstring>
<name>merge_bufsz</name>
<param>
<type>unsigned</type>
<declname>a_count</declname>
</param>
<param>
<type>unsigned</type>
<declname>b_count</declname>
</param>
<briefdescription>
<para>queries the buffer size in bytes needed for CUDA merge algorithms </para>
</briefdescription>
<detaileddescription>
<para><parameterlist kind="param"><parameteritem>
<parameternamelist>
<parametername>a_count</parametername>
</parameternamelist>
<parameterdescription>
<para>number of elements in the first vector to merge </para>
</parameterdescription>
</parameteritem>
<parameteritem>
<parameternamelist>
<parametername>b_count</parametername>
</parameternamelist>
<parameterdescription>
<para>number of elements in the second vector to merge</para>
</parameterdescription>
</parameteritem>
</parameterlist>
The buffer size of merge algorithm does not depend on the data type. The buffer is purely used only for storing temporary indices (of type <computeroutput>unsigned</computeroutput>) required during the merge process.</para>
<para>The function is used to allocate a buffer for calling <ref refid="namespacetf_1a37ec481149c2f01669353033d75ed72a" kindref="member">tf::cuda_merge</ref> and <ref refid="namespacetf_1aa84d4c68d2cbe9f6efc4a1eb1a115458" kindref="member">tf::cuda_merge_by_key</ref>. </para>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_execution_policy.hpp" line="140" column="26" bodyfile="taskflow/cuda/algorithm/merge.hpp" bodystart="452" bodyend="454"/>
</memberdef>
</sectiondef>
<briefdescription>
<para>class to define execution policy for CUDA standard algorithms </para>
</briefdescription>
<detaileddescription>
<para><parameterlist kind="templateparam"><parameteritem>
<parameternamelist>
<parametername>NT</parametername>
</parameternamelist>
<parameterdescription>
<para>number of threads per block </para>
</parameterdescription>
</parameteritem>
<parameteritem>
<parameternamelist>
<parametername>VT</parametername>
</parameternamelist>
<parameterdescription>
<para>number of work units per thread</para>
</parameterdescription>
</parameteritem>
</parameterlist>
Execution policy configures the kernel execution parameters in CUDA algorithms. The first template argument, <computeroutput>NT</computeroutput>, the number of threads per block should always be a power-of-two number. The second template argument, <computeroutput>VT</computeroutput>, the number of work units per thread is recommended to be an odd number to avoid bank conflict.</para>
<para>Details can be referred to <ref refid="CUDASTDExecutionPolicy" kindref="compound">Execution Policy</ref>. </para>
</detaileddescription>
<location file="taskflow/cuda/cuda_execution_policy.hpp" line="29" column="1" bodyfile="taskflow/cuda/cuda_execution_policy.hpp" bodystart="29" bodyend="145"/>
<listofallmembers>
<member refid="classtf_1_1cudaExecutionPolicy_1a1eaf3d45afdf53b4bf9927dd1bd7a02b" prot="private" virt="non-virtual"><scope>tf::cudaExecutionPolicy</scope><name>_stream</name></member>
<member refid="classtf_1_1cudaExecutionPolicy_1aea3b671f778bfb9eca5d7113636f63bf" prot="public" virt="non-virtual"><scope>tf::cudaExecutionPolicy</scope><name>cudaExecutionPolicy</name></member>
<member refid="classtf_1_1cudaExecutionPolicy_1ac1c7784472394d4abcb6f6a2a80cc019" prot="public" virt="non-virtual"><scope>tf::cudaExecutionPolicy</scope><name>cudaExecutionPolicy</name></member>
<member refid="classtf_1_1cudaExecutionPolicy_1a31fe75c4b0765df3035e12be49af88aa" prot="public" virt="non-virtual"><scope>tf::cudaExecutionPolicy</scope><name>max_element_bufsz</name></member>
<member refid="classtf_1_1cudaExecutionPolicy_1a1febbe549d9cbe4502a5b66167ab9553" prot="public" virt="non-virtual"><scope>tf::cudaExecutionPolicy</scope><name>merge_bufsz</name></member>
<member refid="classtf_1_1cudaExecutionPolicy_1abcafb001cd68c1135392f4bcda5a2a05" prot="public" virt="non-virtual"><scope>tf::cudaExecutionPolicy</scope><name>min_element_bufsz</name></member>
<member refid="classtf_1_1cudaExecutionPolicy_1abb1050526f45873c967976a99e9a370d" prot="public" virt="non-virtual"><scope>tf::cudaExecutionPolicy</scope><name>nt</name></member>
<member refid="classtf_1_1cudaExecutionPolicy_1ab96c478964fcba935aa99efe91a64e5c" prot="public" virt="non-virtual"><scope>tf::cudaExecutionPolicy</scope><name>num_blocks</name></member>
<member refid="classtf_1_1cudaExecutionPolicy_1a92ac5a32147584738f32a720ea08e3f4" prot="public" virt="non-virtual"><scope>tf::cudaExecutionPolicy</scope><name>nv</name></member>
<member refid="classtf_1_1cudaExecutionPolicy_1a446cee95bb839ee180052059e2ad7fd6" prot="public" virt="non-virtual"><scope>tf::cudaExecutionPolicy</scope><name>reduce_bufsz</name></member>
<member refid="classtf_1_1cudaExecutionPolicy_1af25648b3269902b333cfcd58665005e8" prot="public" virt="non-virtual"><scope>tf::cudaExecutionPolicy</scope><name>scan_bufsz</name></member>
<member refid="classtf_1_1cudaExecutionPolicy_1a5be1b273985800ab886665d28663c29b" prot="public" virt="non-virtual"><scope>tf::cudaExecutionPolicy</scope><name>stream</name></member>
<member refid="classtf_1_1cudaExecutionPolicy_1a5f2a4d6b35af49403756ee2291264758" prot="public" virt="non-virtual"><scope>tf::cudaExecutionPolicy</scope><name>stream</name></member>
<member refid="classtf_1_1cudaExecutionPolicy_1a9410f1b3a5cb9a3cc5e8d640bc7d3990" prot="public" virt="non-virtual"><scope>tf::cudaExecutionPolicy</scope><name>vt</name></member>
</listofallmembers>
</compounddef>
</doxygen>