mesytec-mnode/external/taskflow-3.8.0/docs/xml/classtf_1_1cudaFlowCapturer.xml
2025-01-04 01:25:05 +01:00

1675 lines
79 KiB
XML

<?xml version='1.0' encoding='UTF-8' standalone='no'?>
<doxygen xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="compound.xsd" version="1.9.1" xml:lang="en-US">
<compounddef id="classtf_1_1cudaFlowCapturer" kind="class" language="C++" prot="public">
<compoundname>tf::cudaFlowCapturer</compoundname>
<includes refid="cuda__capturer_8hpp" local="no">taskflow/cuda/cuda_capturer.hpp</includes>
<innerclass refid="structtf_1_1cudaFlowCapturer_1_1External" prot="private">tf::cudaFlowCapturer::External</innerclass>
<innerclass refid="structtf_1_1cudaFlowCapturer_1_1Internal" prot="private">tf::cudaFlowCapturer::Internal</innerclass>
<sectiondef kind="private-type">
<memberdef kind="typedef" id="classtf_1_1cudaFlowCapturer_1a5f92f6ccad52aed18441d80bc186049f" prot="private" static="no">
<type>std::variant&lt; External, Internal &gt;</type>
<definition>using tf::cudaFlowCapturer::handle_t = std::variant&lt;External, Internal&gt;</definition>
<argsstring></argsstring>
<name>handle_t</name>
<briefdescription>
</briefdescription>
<detaileddescription>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_capturer.hpp" line="71" column="3" bodyfile="taskflow/cuda/cuda_capturer.hpp" bodystart="71" bodyend="-1"/>
</memberdef>
<memberdef kind="typedef" id="classtf_1_1cudaFlowCapturer_1a9e390b0f7cb62729b1f04a5f37430ac8" prot="private" static="no">
<type>std::variant&lt; <ref refid="classtf_1_1cudaFlowRoundRobinOptimizer" kindref="compound">cudaFlowRoundRobinOptimizer</ref>, <ref refid="classtf_1_1cudaFlowSequentialOptimizer" kindref="compound">cudaFlowSequentialOptimizer</ref>, <ref refid="classtf_1_1cudaFlowLinearOptimizer" kindref="compound">cudaFlowLinearOptimizer</ref> &gt;</type>
<definition>using tf::cudaFlowCapturer::Optimizer = std::variant&lt; cudaFlowRoundRobinOptimizer, cudaFlowSequentialOptimizer, cudaFlowLinearOptimizer &gt;</definition>
<argsstring></argsstring>
<name>Optimizer</name>
<briefdescription>
</briefdescription>
<detaileddescription>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_capturer.hpp" line="73" column="3" bodyfile="taskflow/cuda/cuda_capturer.hpp" bodystart="77" bodyend="-1"/>
</memberdef>
</sectiondef>
<sectiondef kind="friend">
<memberdef kind="friend" id="classtf_1_1cudaFlowCapturer_1a9f64f729511a922781a59663ff1c6250" prot="private" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
<type>class</type>
<definition>friend class cudaFlow</definition>
<argsstring></argsstring>
<name>cudaFlow</name>
<param>
<type><ref refid="classtf_1_1cudaFlow" kindref="compound">cudaFlow</ref></type>
</param>
<briefdescription>
</briefdescription>
<detaileddescription>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_capturer.hpp" line="59" column="3" bodyfile="taskflow/cuda/cuda_capturer.hpp" bodystart="59" bodyend="-1"/>
</memberdef>
<memberdef kind="friend" id="classtf_1_1cudaFlowCapturer_1a763b2f90bc53f92d680a635fe28e858e" prot="private" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
<type>class</type>
<definition>friend class Executor</definition>
<argsstring></argsstring>
<name>Executor</name>
<param>
<type><ref refid="classtf_1_1Executor" kindref="compound">Executor</ref></type>
</param>
<briefdescription>
</briefdescription>
<detaileddescription>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_capturer.hpp" line="60" column="3" bodyfile="taskflow/cuda/cuda_capturer.hpp" bodystart="60" bodyend="-1"/>
</memberdef>
</sectiondef>
<sectiondef kind="private-attrib">
<memberdef kind="variable" id="classtf_1_1cudaFlowCapturer_1aaaebe71b8297f4e14ba132a664401628" prot="private" static="no" mutable="no">
<type>cudaFlowGraph</type>
<definition>cudaFlowGraph tf::cudaFlowCapturer::_cfg</definition>
<argsstring></argsstring>
<name>_cfg</name>
<briefdescription>
</briefdescription>
<detaileddescription>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_capturer.hpp" line="514" column="19" bodyfile="taskflow/cuda/cuda_capturer.hpp" bodystart="514" bodyend="-1"/>
</memberdef>
<memberdef kind="variable" id="classtf_1_1cudaFlowCapturer_1acdc0ba0a1d25ca9f3c0780a62b68508a" prot="private" static="no" mutable="no">
<type>Optimizer</type>
<definition>Optimizer tf::cudaFlowCapturer::_optimizer</definition>
<argsstring></argsstring>
<name>_optimizer</name>
<briefdescription>
</briefdescription>
<detaileddescription>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_capturer.hpp" line="516" column="15" bodyfile="taskflow/cuda/cuda_capturer.hpp" bodystart="516" bodyend="-1"/>
</memberdef>
<memberdef kind="variable" id="classtf_1_1cudaFlowCapturer_1aadd53d42f612da940755d5ebc6fb00de" prot="private" static="no" mutable="no">
<type>cudaGraphExec</type>
<definition>cudaGraphExec tf::cudaFlowCapturer::_exe</definition>
<argsstring></argsstring>
<name>_exe</name>
<initializer>{nullptr}</initializer>
<briefdescription>
</briefdescription>
<detaileddescription>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_capturer.hpp" line="518" column="19" bodyfile="taskflow/cuda/cuda_capturer.hpp" bodystart="518" bodyend="-1"/>
</memberdef>
</sectiondef>
<sectiondef kind="public-func">
<memberdef kind="function" id="classtf_1_1cudaFlowCapturer_1a0ddccd6faa338047921269bfe964b774" prot="public" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
<type></type>
<definition>tf::cudaFlowCapturer::cudaFlowCapturer</definition>
<argsstring>()=default</argsstring>
<name>cudaFlowCapturer</name>
<briefdescription>
<para>constructs a standalone <ref refid="classtf_1_1cudaFlowCapturer" kindref="compound">cudaFlowCapturer</ref> </para>
</briefdescription>
<detaileddescription>
<para>A standalone cudaFlow capturer does not go through any taskflow and can be run by the caller thread using <ref refid="classtf_1_1cudaFlowCapturer_1a952596fd7c46acee4c2459d8fe39da28" kindref="member">tf::cudaFlowCapturer::run</ref>. </para>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_capturer.hpp" line="87" column="5"/>
</memberdef>
<memberdef kind="function" id="classtf_1_1cudaFlowCapturer_1a8492d77263ab2a15cce21d4bfae5b331" prot="public" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
<type></type>
<definition>tf::cudaFlowCapturer::~cudaFlowCapturer</definition>
<argsstring>()=default</argsstring>
<name>~cudaFlowCapturer</name>
<briefdescription>
<para>destructs the <ref refid="classtf_1_1cudaFlowCapturer" kindref="compound">cudaFlowCapturer</ref> </para>
</briefdescription>
<detaileddescription>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_capturer.hpp" line="92" column="5"/>
</memberdef>
<memberdef kind="function" id="classtf_1_1cudaFlowCapturer_1abeca6931972344a97c862c1f8d3ab9bb" prot="public" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
<type></type>
<definition>tf::cudaFlowCapturer::cudaFlowCapturer</definition>
<argsstring>(cudaFlowCapturer &amp;&amp;)=default</argsstring>
<name>cudaFlowCapturer</name>
<param>
<type><ref refid="classtf_1_1cudaFlowCapturer" kindref="compound">cudaFlowCapturer</ref> &amp;&amp;</type>
</param>
<briefdescription>
<para>default move constructor </para>
</briefdescription>
<detaileddescription>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_capturer.hpp" line="97" column="5"/>
</memberdef>
<memberdef kind="function" id="classtf_1_1cudaFlowCapturer_1a8e9d99a9bd07761156ab8445a07dbdec" prot="public" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
<type><ref refid="classtf_1_1cudaFlowCapturer" kindref="compound">cudaFlowCapturer</ref> &amp;</type>
<definition>cudaFlowCapturer&amp; tf::cudaFlowCapturer::operator=</definition>
<argsstring>(cudaFlowCapturer &amp;&amp;)=default</argsstring>
<name>operator=</name>
<param>
<type><ref refid="classtf_1_1cudaFlowCapturer" kindref="compound">cudaFlowCapturer</ref> &amp;&amp;</type>
</param>
<briefdescription>
<para>default move assignment operator </para>
</briefdescription>
<detaileddescription>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_capturer.hpp" line="102" column="22"/>
</memberdef>
<memberdef kind="function" id="classtf_1_1cudaFlowCapturer_1a3413a20a7c8229365e1ee9fb5af4af1e" prot="public" static="no" const="yes" explicit="no" inline="yes" virt="non-virtual">
<type>bool</type>
<definition>bool tf::cudaFlowCapturer::empty</definition>
<argsstring>() const</argsstring>
<name>empty</name>
<briefdescription>
<para>queries the emptiness of the graph </para>
</briefdescription>
<detaileddescription>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_capturer.hpp" line="107" column="10" bodyfile="taskflow/cuda/cuda_capturer.hpp" bodystart="522" bodyend="524"/>
</memberdef>
<memberdef kind="function" id="classtf_1_1cudaFlowCapturer_1aeb826786f1580bae1335d94ffbeb7e02" prot="public" static="no" const="yes" explicit="no" inline="yes" virt="non-virtual">
<type>size_t</type>
<definition>size_t tf::cudaFlowCapturer::num_tasks</definition>
<argsstring>() const</argsstring>
<name>num_tasks</name>
<briefdescription>
<para>queries the number of tasks </para>
</briefdescription>
<detaileddescription>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_capturer.hpp" line="112" column="12" bodyfile="taskflow/cuda/cuda_capturer.hpp" bodystart="527" bodyend="529"/>
</memberdef>
<memberdef kind="function" id="classtf_1_1cudaFlowCapturer_1a06f1176b6a5590832f0e09a049f8a622" prot="public" static="no" const="no" explicit="no" inline="yes" virt="non-virtual">
<type>void</type>
<definition>void tf::cudaFlowCapturer::clear</definition>
<argsstring>()</argsstring>
<name>clear</name>
<briefdescription>
<para>clear this cudaFlow capturer </para>
</briefdescription>
<detaileddescription>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_capturer.hpp" line="117" column="10" bodyfile="taskflow/cuda/cuda_capturer.hpp" bodystart="532" bodyend="535"/>
</memberdef>
<memberdef kind="function" id="classtf_1_1cudaFlowCapturer_1a90d1265bcc27647906bed6e6876c9aa7" prot="public" static="no" const="yes" explicit="no" inline="yes" virt="non-virtual">
<type>void</type>
<definition>void tf::cudaFlowCapturer::dump</definition>
<argsstring>(std::ostream &amp;os) const</argsstring>
<name>dump</name>
<param>
<type><ref refid="cpp/io/basic_ostream" kindref="compound" external="/home/thuang295/Code/taskflow/doxygen/cppreference-doxygen-web.tag.xml">std::ostream</ref> &amp;</type>
<declname>os</declname>
</param>
<briefdescription>
<para>dumps the cudaFlow graph into a DOT format through an output stream </para>
</briefdescription>
<detaileddescription>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_capturer.hpp" line="123" column="10" bodyfile="taskflow/cuda/cuda_capturer.hpp" bodystart="538" bodyend="540"/>
</memberdef>
<memberdef kind="function" id="classtf_1_1cudaFlowCapturer_1a979fe2a7bf2c361c050c0742108197c7" prot="public" static="no" const="yes" explicit="no" inline="yes" virt="non-virtual">
<type>void</type>
<definition>void tf::cudaFlowCapturer::dump_native_graph</definition>
<argsstring>(std::ostream &amp;os) const</argsstring>
<name>dump_native_graph</name>
<param>
<type><ref refid="cpp/io/basic_ostream" kindref="compound" external="/home/thuang295/Code/taskflow/doxygen/cppreference-doxygen-web.tag.xml">std::ostream</ref> &amp;</type>
<declname>os</declname>
</param>
<briefdescription>
<para>dumps the native captured graph into a DOT format through an output stream </para>
</briefdescription>
<detaileddescription>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_capturer.hpp" line="129" column="10" bodyfile="taskflow/cuda/cuda_capturer.hpp" bodystart="543" bodyend="545"/>
</memberdef>
<memberdef kind="function" id="classtf_1_1cudaFlowCapturer_1ad0d937ae0d77239f148b66a77e35db41" prot="public" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
<templateparamlist>
<param>
<type>typename C</type>
</param>
<param>
<type>std::enable_if_t&lt; std::is_invocable_r_v&lt; void, C, cudaStream_t &gt;, void &gt; *</type>
<defval>nullptr</defval>
</param>
</templateparamlist>
<type><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref></type>
<definition>cudaTask tf::cudaFlowCapturer::on</definition>
<argsstring>(C &amp;&amp;callable)</argsstring>
<name>on</name>
<param>
<type>C &amp;&amp;</type>
<declname>callable</declname>
</param>
<briefdescription>
<para>captures a sequential CUDA operations from the given callable </para>
</briefdescription>
<detaileddescription>
<para><parameterlist kind="templateparam"><parameteritem>
<parameternamelist>
<parametername>C</parametername>
</parameternamelist>
<parameterdescription>
<para>callable type constructible with <computeroutput>std::function&lt;void(cudaStream_t)&gt;</computeroutput> </para>
</parameterdescription>
</parameteritem>
</parameterlist>
<parameterlist kind="param"><parameteritem>
<parameternamelist>
<parametername>callable</parametername>
</parameternamelist>
<parameterdescription>
<para>a callable to capture CUDA operations with the stream</para>
</parameterdescription>
</parameteritem>
</parameterlist>
This methods applies a stream created by the flow to capture a sequence of CUDA operations defined in the callable. </para>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_capturer.hpp" line="147" column="14" bodyfile="taskflow/cuda/cuda_capturer.hpp" bodystart="551" bodyend="556"/>
</memberdef>
<memberdef kind="function" id="classtf_1_1cudaFlowCapturer_1a5215d459df3a0d7bccac1a1f2ce9d1ee" prot="public" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
<templateparamlist>
<param>
<type>typename C</type>
</param>
<param>
<type>std::enable_if_t&lt; std::is_invocable_r_v&lt; void, C, cudaStream_t &gt;, void &gt; *</type>
<defval>nullptr</defval>
</param>
</templateparamlist>
<type>void</type>
<definition>void tf::cudaFlowCapturer::on</definition>
<argsstring>(cudaTask task, C &amp;&amp;callable)</argsstring>
<name>on</name>
<param>
<type><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref></type>
<declname>task</declname>
</param>
<param>
<type>C &amp;&amp;</type>
<declname>callable</declname>
</param>
<briefdescription>
<para>updates a capture task to another sequential CUDA operations </para>
</briefdescription>
<detaileddescription>
<para>The method is similar to <ref refid="classtf_1_1cudaFlowCapturer_1ad0d937ae0d77239f148b66a77e35db41" kindref="member">cudaFlowCapturer::on</ref> but operates on an existing task. </para>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_capturer.hpp" line="158" column="10" bodyfile="taskflow/cuda/cuda_capturer.hpp" bodystart="657" bodyend="667"/>
</memberdef>
<memberdef kind="function" id="classtf_1_1cudaFlowCapturer_1a593335760ea517cea597237137ef9333" prot="public" static="no" const="no" explicit="no" inline="yes" virt="non-virtual">
<type><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref></type>
<definition>cudaTask tf::cudaFlowCapturer::noop</definition>
<argsstring>()</argsstring>
<name>noop</name>
<briefdescription>
<para>captures a no-operation task </para>
</briefdescription>
<detaileddescription>
<para><simplesect kind="return"><para>a <ref refid="classtf_1_1cudaTask" kindref="compound">tf::cudaTask</ref> handle</para>
</simplesect>
An empty node performs no operation during execution, but can be used for transitive ordering. For example, a phased execution graph with 2 groups of <computeroutput>n</computeroutput> nodes with a barrier between them can be represented using an empty node and <computeroutput>2*n</computeroutput> dependency edges, rather than no empty node and <computeroutput>n^2</computeroutput> dependency edges. </para>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_capturer.hpp" line="172" column="14" bodyfile="taskflow/cuda/cuda_capturer.hpp" bodystart="559" bodyend="561"/>
</memberdef>
<memberdef kind="function" id="classtf_1_1cudaFlowCapturer_1a168a968d7f5833700fcc14a210ad39bc" prot="public" static="no" const="no" explicit="no" inline="yes" virt="non-virtual">
<type>void</type>
<definition>void tf::cudaFlowCapturer::noop</definition>
<argsstring>(cudaTask task)</argsstring>
<name>noop</name>
<param>
<type><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref></type>
<declname>task</declname>
</param>
<briefdescription>
<para>updates a task to a no-operation task </para>
</briefdescription>
<detaileddescription>
<para>The method is similar to <ref refid="classtf_1_1cudaFlowCapturer_1a593335760ea517cea597237137ef9333" kindref="member">tf::cudaFlowCapturer::noop</ref> but operates on an existing task. </para>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_capturer.hpp" line="180" column="10" bodyfile="taskflow/cuda/cuda_capturer.hpp" bodystart="564" bodyend="566"/>
</memberdef>
<memberdef kind="function" id="classtf_1_1cudaFlowCapturer_1ae84d097cdae9e2e8ce108dea760483ed" prot="public" static="no" const="no" explicit="no" inline="yes" virt="non-virtual">
<type><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref></type>
<definition>cudaTask tf::cudaFlowCapturer::memcpy</definition>
<argsstring>(void *dst, const void *src, size_t count)</argsstring>
<name>memcpy</name>
<param>
<type>void *</type>
<declname>dst</declname>
</param>
<param>
<type>const void *</type>
<declname>src</declname>
</param>
<param>
<type>size_t</type>
<declname>count</declname>
</param>
<briefdescription>
<para>copies data between host and device asynchronously through a stream </para>
</briefdescription>
<detaileddescription>
<para><parameterlist kind="param"><parameteritem>
<parameternamelist>
<parametername>dst</parametername>
</parameternamelist>
<parameterdescription>
<para>destination memory address </para>
</parameterdescription>
</parameteritem>
<parameteritem>
<parameternamelist>
<parametername>src</parametername>
</parameternamelist>
<parameterdescription>
<para>source memory address </para>
</parameterdescription>
</parameteritem>
<parameteritem>
<parameternamelist>
<parametername>count</parametername>
</parameternamelist>
<parameterdescription>
<para>size in bytes to copy</para>
</parameterdescription>
</parameteritem>
</parameterlist>
The method captures a <computeroutput>cudaMemcpyAsync</computeroutput> operation through an internal stream. </para>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_capturer.hpp" line="192" column="14" bodyfile="taskflow/cuda/cuda_capturer.hpp" bodystart="569" bodyend="578"/>
</memberdef>
<memberdef kind="function" id="classtf_1_1cudaFlowCapturer_1a20db64e086bf8182b350eaf5d8807af9" prot="public" static="no" const="no" explicit="no" inline="yes" virt="non-virtual">
<type>void</type>
<definition>void tf::cudaFlowCapturer::memcpy</definition>
<argsstring>(cudaTask task, void *dst, const void *src, size_t count)</argsstring>
<name>memcpy</name>
<param>
<type><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref></type>
<declname>task</declname>
</param>
<param>
<type>void *</type>
<declname>dst</declname>
</param>
<param>
<type>const void *</type>
<declname>src</declname>
</param>
<param>
<type>size_t</type>
<declname>count</declname>
</param>
<briefdescription>
<para>updates a capture task to a memcpy operation </para>
</briefdescription>
<detaileddescription>
<para>The method is similar to <ref refid="classtf_1_1cudaFlowCapturer_1ae84d097cdae9e2e8ce108dea760483ed" kindref="member">cudaFlowCapturer::memcpy</ref> but operates on an existing task. </para>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_capturer.hpp" line="200" column="10" bodyfile="taskflow/cuda/cuda_capturer.hpp" bodystart="670" bodyend="679"/>
</memberdef>
<memberdef kind="function" id="classtf_1_1cudaFlowCapturer_1ab70f12050e78b588f5c23d874aa4e538" prot="public" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
<templateparamlist>
<param>
<type>typename T</type>
</param>
<param>
<type>std::enable_if_t&lt;!std::is_same_v&lt; T, void &gt;, void &gt; *</type>
<defval>nullptr</defval>
</param>
</templateparamlist>
<type><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref></type>
<definition>cudaTask tf::cudaFlowCapturer::copy</definition>
<argsstring>(T *tgt, const T *src, size_t num)</argsstring>
<name>copy</name>
<param>
<type>T *</type>
<declname>tgt</declname>
</param>
<param>
<type>const T *</type>
<declname>src</declname>
</param>
<param>
<type>size_t</type>
<declname>num</declname>
</param>
<briefdescription>
<para>captures a copy task of typed data </para>
</briefdescription>
<detaileddescription>
<para><parameterlist kind="templateparam"><parameteritem>
<parameternamelist>
<parametername>T</parametername>
</parameternamelist>
<parameterdescription>
<para>element type (non-void)</para>
</parameterdescription>
</parameteritem>
</parameterlist>
<parameterlist kind="param"><parameteritem>
<parameternamelist>
<parametername>tgt</parametername>
</parameternamelist>
<parameterdescription>
<para>pointer to the target memory block </para>
</parameterdescription>
</parameteritem>
<parameteritem>
<parameternamelist>
<parametername>src</parametername>
</parameternamelist>
<parameterdescription>
<para>pointer to the source memory block </para>
</parameterdescription>
</parameteritem>
<parameteritem>
<parameternamelist>
<parametername>num</parametername>
</parameternamelist>
<parameterdescription>
<para>number of elements to copy</para>
</parameterdescription>
</parameteritem>
</parameterlist>
<simplesect kind="return"><para><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref> handle</para>
</simplesect>
A copy task transfers <computeroutput>num*sizeof(T)</computeroutput> bytes of data from a source location to a target location. Direction can be arbitrary among CPUs and GPUs. </para>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_capturer.hpp" line="219" column="14" bodyfile="taskflow/cuda/cuda_capturer.hpp" bodystart="582" bodyend="589"/>
</memberdef>
<memberdef kind="function" id="classtf_1_1cudaFlowCapturer_1a605f9dfd1363e10d08cbdab29f59a52e" prot="public" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
<templateparamlist>
<param>
<type>typename T</type>
</param>
<param>
<type>std::enable_if_t&lt;!std::is_same_v&lt; T, void &gt;, void &gt; *</type>
<defval>nullptr</defval>
</param>
</templateparamlist>
<type>void</type>
<definition>void tf::cudaFlowCapturer::copy</definition>
<argsstring>(cudaTask task, T *tgt, const T *src, size_t num)</argsstring>
<name>copy</name>
<param>
<type><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref></type>
<declname>task</declname>
</param>
<param>
<type>T *</type>
<declname>tgt</declname>
</param>
<param>
<type>const T *</type>
<declname>src</declname>
</param>
<param>
<type>size_t</type>
<declname>num</declname>
</param>
<briefdescription>
<para>updates a capture task to a copy operation </para>
</briefdescription>
<detaileddescription>
<para>The method is similar to <ref refid="classtf_1_1cudaFlowCapturer_1ab70f12050e78b588f5c23d874aa4e538" kindref="member">cudaFlowCapturer::copy</ref> but operates on an existing task. </para>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_capturer.hpp" line="230" column="10" bodyfile="taskflow/cuda/cuda_capturer.hpp" bodystart="685" bodyend="694"/>
</memberdef>
<memberdef kind="function" id="classtf_1_1cudaFlowCapturer_1a0d38965b380f940bf6cfc6667a281052" prot="public" static="no" const="no" explicit="no" inline="yes" virt="non-virtual">
<type><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref></type>
<definition>cudaTask tf::cudaFlowCapturer::memset</definition>
<argsstring>(void *ptr, int v, size_t n)</argsstring>
<name>memset</name>
<param>
<type>void *</type>
<declname>ptr</declname>
</param>
<param>
<type>int</type>
<declname>v</declname>
</param>
<param>
<type>size_t</type>
<declname>n</declname>
</param>
<briefdescription>
<para>initializes or sets GPU memory to the given value byte by byte </para>
</briefdescription>
<detaileddescription>
<para><parameterlist kind="param"><parameteritem>
<parameternamelist>
<parametername>ptr</parametername>
</parameternamelist>
<parameterdescription>
<para>pointer to GPU memory </para>
</parameterdescription>
</parameteritem>
<parameteritem>
<parameternamelist>
<parametername>v</parametername>
</parameternamelist>
<parameterdescription>
<para>value to set for each byte of the specified memory </para>
</parameterdescription>
</parameteritem>
<parameteritem>
<parameternamelist>
<parametername>n</parametername>
</parameternamelist>
<parameterdescription>
<para>size in bytes to set</para>
</parameterdescription>
</parameteritem>
</parameterlist>
The method captures a <computeroutput>cudaMemsetAsync</computeroutput> operation through an internal stream to fill the first <computeroutput>count</computeroutput> bytes of the memory area pointed to by <computeroutput>devPtr</computeroutput> with the constant byte value <computeroutput>value</computeroutput>. </para>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_capturer.hpp" line="243" column="14" bodyfile="taskflow/cuda/cuda_capturer.hpp" bodystart="592" bodyend="598"/>
</memberdef>
<memberdef kind="function" id="classtf_1_1cudaFlowCapturer_1a4a7c4dd81f5e00e8a4c733417bca3205" prot="public" static="no" const="no" explicit="no" inline="yes" virt="non-virtual">
<type>void</type>
<definition>void tf::cudaFlowCapturer::memset</definition>
<argsstring>(cudaTask task, void *ptr, int value, size_t n)</argsstring>
<name>memset</name>
<param>
<type><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref></type>
<declname>task</declname>
</param>
<param>
<type>void *</type>
<declname>ptr</declname>
</param>
<param>
<type>int</type>
<declname>value</declname>
</param>
<param>
<type>size_t</type>
<declname>n</declname>
</param>
<briefdescription>
<para>updates a capture task to a memset operation </para>
</briefdescription>
<detaileddescription>
<para>The method is similar to <ref refid="classtf_1_1cudaFlowCapturer_1a0d38965b380f940bf6cfc6667a281052" kindref="member">cudaFlowCapturer::memset</ref> but operates on an existing task. </para>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_capturer.hpp" line="251" column="10" bodyfile="taskflow/cuda/cuda_capturer.hpp" bodystart="697" bodyend="705"/>
</memberdef>
<memberdef kind="function" id="classtf_1_1cudaFlowCapturer_1a6f06c7f6954d8d67ad89f0eddfe285e9" prot="public" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
<templateparamlist>
<param>
<type>typename F</type>
</param>
<param>
<type>typename...</type>
<declname>ArgsT</declname>
<defname>ArgsT</defname>
</param>
</templateparamlist>
<type><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref></type>
<definition>cudaTask tf::cudaFlowCapturer::kernel</definition>
<argsstring>(dim3 g, dim3 b, size_t s, F f, ArgsT &amp;&amp;... args)</argsstring>
<name>kernel</name>
<param>
<type>dim3</type>
<declname>g</declname>
</param>
<param>
<type>dim3</type>
<declname>b</declname>
</param>
<param>
<type>size_t</type>
<declname>s</declname>
</param>
<param>
<type>F</type>
<declname>f</declname>
</param>
<param>
<type>ArgsT &amp;&amp;...</type>
<declname>args</declname>
</param>
<briefdescription>
<para>captures a kernel </para>
</briefdescription>
<detaileddescription>
<para><parameterlist kind="templateparam"><parameteritem>
<parameternamelist>
<parametername>F</parametername>
</parameternamelist>
<parameterdescription>
<para>kernel function type </para>
</parameterdescription>
</parameteritem>
<parameteritem>
<parameternamelist>
<parametername>ArgsT</parametername>
</parameternamelist>
<parameterdescription>
<para>kernel function parameters type</para>
</parameterdescription>
</parameteritem>
</parameterlist>
<parameterlist kind="param"><parameteritem>
<parameternamelist>
<parametername>g</parametername>
</parameternamelist>
<parameterdescription>
<para>configured grid </para>
</parameterdescription>
</parameteritem>
<parameteritem>
<parameternamelist>
<parametername>b</parametername>
</parameternamelist>
<parameterdescription>
<para>configured block </para>
</parameterdescription>
</parameteritem>
<parameteritem>
<parameternamelist>
<parametername>s</parametername>
</parameternamelist>
<parameterdescription>
<para>configured shared memory size in bytes </para>
</parameterdescription>
</parameteritem>
<parameteritem>
<parameternamelist>
<parametername>f</parametername>
</parameternamelist>
<parameterdescription>
<para>kernel function </para>
</parameterdescription>
</parameteritem>
<parameteritem>
<parameternamelist>
<parametername>args</parametername>
</parameternamelist>
<parameterdescription>
<para>arguments to forward to the kernel function by copy</para>
</parameterdescription>
</parameteritem>
</parameterlist>
<simplesect kind="return"><para><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref> handle </para>
</simplesect>
</para>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_capturer.hpp" line="268" column="14" bodyfile="taskflow/cuda/cuda_capturer.hpp" bodystart="602" bodyend="608"/>
</memberdef>
<memberdef kind="function" id="classtf_1_1cudaFlowCapturer_1a850c7c028e1535db1deaecd819d82efb" prot="public" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
<templateparamlist>
<param>
<type>typename F</type>
</param>
<param>
<type>typename...</type>
<declname>ArgsT</declname>
<defname>ArgsT</defname>
</param>
</templateparamlist>
<type>void</type>
<definition>void tf::cudaFlowCapturer::kernel</definition>
<argsstring>(cudaTask task, dim3 g, dim3 b, size_t s, F f, ArgsT &amp;&amp;... args)</argsstring>
<name>kernel</name>
<param>
<type><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref></type>
<declname>task</declname>
</param>
<param>
<type>dim3</type>
<declname>g</declname>
</param>
<param>
<type>dim3</type>
<declname>b</declname>
</param>
<param>
<type>size_t</type>
<declname>s</declname>
</param>
<param>
<type>F</type>
<declname>f</declname>
</param>
<param>
<type>ArgsT &amp;&amp;...</type>
<declname>args</declname>
</param>
<briefdescription>
<para>updates a capture task to a kernel operation </para>
</briefdescription>
<detaileddescription>
<para>The method is similar to <ref refid="classtf_1_1cudaFlowCapturer_1a6f06c7f6954d8d67ad89f0eddfe285e9" kindref="member">cudaFlowCapturer::kernel</ref> but operates on an existing task. </para>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_capturer.hpp" line="277" column="10" bodyfile="taskflow/cuda/cuda_capturer.hpp" bodystart="709" bodyend="715"/>
</memberdef>
<memberdef kind="function" id="classtf_1_1cudaFlowCapturer_1ac944c7d20056e0633ef84f1a25b52296" prot="public" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
<templateparamlist>
<param>
<type>typename C</type>
</param>
</templateparamlist>
<type><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref></type>
<definition>cudaTask tf::cudaFlowCapturer::single_task</definition>
<argsstring>(C c)</argsstring>
<name>single_task</name>
<param>
<type>C</type>
<declname>c</declname>
</param>
<briefdescription>
<para>capturers a kernel to runs the given callable with only one thread </para>
</briefdescription>
<detaileddescription>
<para><parameterlist kind="templateparam"><parameteritem>
<parameternamelist>
<parametername>C</parametername>
</parameternamelist>
<parameterdescription>
<para>callable type</para>
</parameterdescription>
</parameteritem>
</parameterlist>
<parameterlist kind="param"><parameteritem>
<parameternamelist>
<parametername>c</parametername>
</parameternamelist>
<parameterdescription>
<para>callable to run by a single kernel thread </para>
</parameterdescription>
</parameteritem>
</parameterlist>
</para>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_capturer.hpp" line="293" column="14" bodyfile="taskflow/cuda/algorithm/for_each.hpp" bodystart="175" bodyend="179"/>
</memberdef>
<memberdef kind="function" id="classtf_1_1cudaFlowCapturer_1a2f7e439c336aa43781c3ef1ef0d71154" prot="public" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
<templateparamlist>
<param>
<type>typename C</type>
</param>
</templateparamlist>
<type>void</type>
<definition>void tf::cudaFlowCapturer::single_task</definition>
<argsstring>(cudaTask task, C c)</argsstring>
<name>single_task</name>
<param>
<type><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref></type>
<declname>task</declname>
</param>
<param>
<type>C</type>
<declname>c</declname>
</param>
<briefdescription>
<para>updates a capture task to a single-threaded kernel </para>
</briefdescription>
<detaileddescription>
<para>This method is similar to <ref refid="classtf_1_1cudaFlowCapturer_1ac944c7d20056e0633ef84f1a25b52296" kindref="member">cudaFlowCapturer::single_task</ref> but operates on an existing task. </para>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_capturer.hpp" line="302" column="10" bodyfile="taskflow/cuda/algorithm/for_each.hpp" bodystart="183" bodyend="187"/>
</memberdef>
<memberdef kind="function" id="classtf_1_1cudaFlowCapturer_1a0b2f1bcd59f0b42e0f823818348b4ae7" prot="public" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
<templateparamlist>
<param>
<type>typename I</type>
</param>
<param>
<type>typename C</type>
</param>
</templateparamlist>
<type><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref></type>
<definition>cudaTask tf::cudaFlowCapturer::for_each</definition>
<argsstring>(I first, I last, C callable)</argsstring>
<name>for_each</name>
<param>
<type>I</type>
<declname>first</declname>
</param>
<param>
<type>I</type>
<declname>last</declname>
</param>
<param>
<type>C</type>
<declname>callable</declname>
</param>
<briefdescription>
<para>captures a kernel that applies a callable to each dereferenced element of the data array </para>
</briefdescription>
<detaileddescription>
<para><parameterlist kind="templateparam"><parameteritem>
<parameternamelist>
<parametername>I</parametername>
</parameternamelist>
<parameterdescription>
<para>iterator type </para>
</parameterdescription>
</parameteritem>
<parameteritem>
<parameternamelist>
<parametername>C</parametername>
</parameternamelist>
<parameterdescription>
<para>callable type</para>
</parameterdescription>
</parameteritem>
</parameterlist>
<parameterlist kind="param"><parameteritem>
<parameternamelist>
<parametername>first</parametername>
</parameternamelist>
<parameterdescription>
<para>iterator to the beginning </para>
</parameterdescription>
</parameteritem>
<parameteritem>
<parameternamelist>
<parametername>last</parametername>
</parameternamelist>
<parameterdescription>
<para>iterator to the end </para>
</parameterdescription>
</parameteritem>
<parameteritem>
<parameternamelist>
<parametername>callable</parametername>
</parameternamelist>
<parameterdescription>
<para>a callable object to apply to the dereferenced iterator</para>
</parameterdescription>
</parameteritem>
</parameterlist>
<simplesect kind="return"><para><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref> handle</para>
</simplesect>
This method is equivalent to the parallel execution of the following loop on a GPU:</para>
<para><programlisting filename=".cpp"><codeline><highlight class="keywordflow">for</highlight><highlight class="normal">(</highlight><highlight class="keyword">auto</highlight><highlight class="normal"><sp/>itr<sp/>=<sp/>first;<sp/>itr<sp/>!=<sp/>last;<sp/>i++)<sp/>{</highlight></codeline>
<codeline><highlight class="normal"><sp/><sp/>callable(*itr);</highlight></codeline>
<codeline><highlight class="normal">}</highlight></codeline>
</programlisting> </para>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_capturer.hpp" line="326" column="14" bodyfile="taskflow/cuda/algorithm/for_each.hpp" bodystart="275" bodyend="279"/>
</memberdef>
<memberdef kind="function" id="classtf_1_1cudaFlowCapturer_1a17471b99db619c5a6b4645b3dffebe20" prot="public" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
<templateparamlist>
<param>
<type>typename I</type>
</param>
<param>
<type>typename C</type>
</param>
</templateparamlist>
<type>void</type>
<definition>void tf::cudaFlowCapturer::for_each</definition>
<argsstring>(cudaTask task, I first, I last, C callable)</argsstring>
<name>for_each</name>
<param>
<type><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref></type>
<declname>task</declname>
</param>
<param>
<type>I</type>
<declname>first</declname>
</param>
<param>
<type>I</type>
<declname>last</declname>
</param>
<param>
<type>C</type>
<declname>callable</declname>
</param>
<briefdescription>
<para>updates a capture task to a for-each kernel task </para>
</briefdescription>
<detaileddescription>
<para>This method is similar to <ref refid="classtf_1_1cudaFlowCapturer_1a0b2f1bcd59f0b42e0f823818348b4ae7" kindref="member">cudaFlowCapturer::for_each</ref> but operates on an existing task. </para>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_capturer.hpp" line="335" column="10" bodyfile="taskflow/cuda/algorithm/for_each.hpp" bodystart="291" bodyend="295"/>
</memberdef>
<memberdef kind="function" id="classtf_1_1cudaFlowCapturer_1aeb877f42ee3a627c40f1c9c84e31ba3c" prot="public" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
<templateparamlist>
<param>
<type>typename I</type>
</param>
<param>
<type>typename C</type>
</param>
</templateparamlist>
<type><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref></type>
<definition>cudaTask tf::cudaFlowCapturer::for_each_index</definition>
<argsstring>(I first, I last, I step, C callable)</argsstring>
<name>for_each_index</name>
<param>
<type>I</type>
<declname>first</declname>
</param>
<param>
<type>I</type>
<declname>last</declname>
</param>
<param>
<type>I</type>
<declname>step</declname>
</param>
<param>
<type>C</type>
<declname>callable</declname>
</param>
<briefdescription>
<para>captures a kernel that applies a callable to each index in the range with the step size </para>
</briefdescription>
<detaileddescription>
<para><parameterlist kind="templateparam"><parameteritem>
<parameternamelist>
<parametername>I</parametername>
</parameternamelist>
<parameterdescription>
<para>index type </para>
</parameterdescription>
</parameteritem>
<parameteritem>
<parameternamelist>
<parametername>C</parametername>
</parameternamelist>
<parameterdescription>
<para>callable type</para>
</parameterdescription>
</parameteritem>
</parameterlist>
<parameterlist kind="param"><parameteritem>
<parameternamelist>
<parametername>first</parametername>
</parameternamelist>
<parameterdescription>
<para>beginning index </para>
</parameterdescription>
</parameteritem>
<parameteritem>
<parameternamelist>
<parametername>last</parametername>
</parameternamelist>
<parameterdescription>
<para>last index </para>
</parameterdescription>
</parameteritem>
<parameteritem>
<parameternamelist>
<parametername>step</parametername>
</parameternamelist>
<parameterdescription>
<para>step size </para>
</parameterdescription>
</parameteritem>
<parameteritem>
<parameternamelist>
<parametername>callable</parametername>
</parameternamelist>
<parameterdescription>
<para>the callable to apply to each element in the data array</para>
</parameterdescription>
</parameteritem>
</parameterlist>
<simplesect kind="return"><para><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref> handle</para>
</simplesect>
This method is equivalent to the parallel execution of the following loop on a GPU:</para>
<para><programlisting filename=".cpp"><codeline><highlight class="comment">//<sp/>step<sp/>is<sp/>positive<sp/>[first,<sp/>last)</highlight><highlight class="normal"></highlight></codeline>
<codeline><highlight class="normal"></highlight><highlight class="keywordflow">for</highlight><highlight class="normal">(</highlight><highlight class="keyword">auto</highlight><highlight class="normal"><sp/>i=first;<sp/>i&lt;last;<sp/>i+=step)<sp/>{</highlight></codeline>
<codeline><highlight class="normal"><sp/><sp/>callable(i);</highlight></codeline>
<codeline><highlight class="normal">}</highlight></codeline>
<codeline><highlight class="normal"></highlight></codeline>
<codeline><highlight class="normal"></highlight><highlight class="comment">//<sp/>step<sp/>is<sp/>negative<sp/>[first,<sp/>last)</highlight><highlight class="normal"></highlight></codeline>
<codeline><highlight class="normal"></highlight><highlight class="keywordflow">for</highlight><highlight class="normal">(</highlight><highlight class="keyword">auto</highlight><highlight class="normal"><sp/>i=first;<sp/>i&gt;last;<sp/>i+=step)<sp/>{</highlight></codeline>
<codeline><highlight class="normal"><sp/><sp/>callable(i);</highlight></codeline>
<codeline><highlight class="normal">}</highlight></codeline>
</programlisting> </para>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_capturer.hpp" line="366" column="14" bodyfile="taskflow/cuda/algorithm/for_each.hpp" bodystart="283" bodyend="287"/>
</memberdef>
<memberdef kind="function" id="classtf_1_1cudaFlowCapturer_1a05ca5fb4d005f1ff05fd1e4312fcd357" prot="public" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
<templateparamlist>
<param>
<type>typename I</type>
</param>
<param>
<type>typename C</type>
</param>
</templateparamlist>
<type>void</type>
<definition>void tf::cudaFlowCapturer::for_each_index</definition>
<argsstring>(cudaTask task, I first, I last, I step, C callable)</argsstring>
<name>for_each_index</name>
<param>
<type><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref></type>
<declname>task</declname>
</param>
<param>
<type>I</type>
<declname>first</declname>
</param>
<param>
<type>I</type>
<declname>last</declname>
</param>
<param>
<type>I</type>
<declname>step</declname>
</param>
<param>
<type>C</type>
<declname>callable</declname>
</param>
<briefdescription>
<para>updates a capture task to a for-each-index kernel task </para>
</briefdescription>
<detaileddescription>
<para>This method is similar to <ref refid="classtf_1_1cudaFlowCapturer_1aeb877f42ee3a627c40f1c9c84e31ba3c" kindref="member">cudaFlowCapturer::for_each_index</ref> but operates on an existing task. </para>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_capturer.hpp" line="375" column="10" bodyfile="taskflow/cuda/algorithm/for_each.hpp" bodystart="299" bodyend="305"/>
</memberdef>
<memberdef kind="function" id="classtf_1_1cudaFlowCapturer_1a99d9a86a7240ebf0767441e4ec2e14c4" prot="public" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
<templateparamlist>
<param>
<type>typename I</type>
</param>
<param>
<type>typename O</type>
</param>
<param>
<type>typename C</type>
</param>
</templateparamlist>
<type><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref></type>
<definition>cudaTask tf::cudaFlowCapturer::transform</definition>
<argsstring>(I first, I last, O output, C op)</argsstring>
<name>transform</name>
<param>
<type>I</type>
<declname>first</declname>
</param>
<param>
<type>I</type>
<declname>last</declname>
</param>
<param>
<type>O</type>
<declname>output</declname>
</param>
<param>
<type>C</type>
<declname>op</declname>
</param>
<briefdescription>
<para>captures a kernel that transforms an input range to an output range </para>
</briefdescription>
<detaileddescription>
<para><parameterlist kind="templateparam"><parameteritem>
<parameternamelist>
<parametername>I</parametername>
</parameternamelist>
<parameterdescription>
<para>input iterator type </para>
</parameterdescription>
</parameteritem>
<parameteritem>
<parameternamelist>
<parametername>O</parametername>
</parameternamelist>
<parameterdescription>
<para>output iterator type </para>
</parameterdescription>
</parameteritem>
<parameteritem>
<parameternamelist>
<parametername>C</parametername>
</parameternamelist>
<parameterdescription>
<para>unary operator type</para>
</parameterdescription>
</parameteritem>
</parameterlist>
<parameterlist kind="param"><parameteritem>
<parameternamelist>
<parametername>first</parametername>
</parameternamelist>
<parameterdescription>
<para>iterator to the beginning of the input range </para>
</parameterdescription>
</parameteritem>
<parameteritem>
<parameternamelist>
<parametername>last</parametername>
</parameternamelist>
<parameterdescription>
<para>iterator to the end of the input range </para>
</parameterdescription>
</parameteritem>
<parameteritem>
<parameternamelist>
<parametername>output</parametername>
</parameternamelist>
<parameterdescription>
<para>iterator to the beginning of the output range </para>
</parameterdescription>
</parameteritem>
<parameteritem>
<parameternamelist>
<parametername>op</parametername>
</parameternamelist>
<parameterdescription>
<para>unary operator to apply to transform each item in the range</para>
</parameterdescription>
</parameteritem>
</parameterlist>
<simplesect kind="return"><para><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref> handle</para>
</simplesect>
This method is equivalent to the parallel execution of the following loop on a GPU:</para>
<para><programlisting filename=".cpp"><codeline><highlight class="keywordflow">while</highlight><highlight class="normal"><sp/>(first<sp/>!=<sp/>last)<sp/>{</highlight></codeline>
<codeline><highlight class="normal"><sp/><sp/>*output++<sp/>=<sp/>op(*first++);</highlight></codeline>
<codeline><highlight class="normal">}</highlight></codeline>
</programlisting> </para>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_capturer.hpp" line="402" column="14" bodyfile="taskflow/cuda/algorithm/transform.hpp" bodystart="236" bodyend="241"/>
</memberdef>
<memberdef kind="function" id="classtf_1_1cudaFlowCapturer_1afa62195f91702a6f5cbdad6fefb97e4c" prot="public" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
<templateparamlist>
<param>
<type>typename I</type>
</param>
<param>
<type>typename O</type>
</param>
<param>
<type>typename C</type>
</param>
</templateparamlist>
<type>void</type>
<definition>void tf::cudaFlowCapturer::transform</definition>
<argsstring>(cudaTask task, I first, I last, O output, C op)</argsstring>
<name>transform</name>
<param>
<type><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref></type>
<declname>task</declname>
</param>
<param>
<type>I</type>
<declname>first</declname>
</param>
<param>
<type>I</type>
<declname>last</declname>
</param>
<param>
<type>O</type>
<declname>output</declname>
</param>
<param>
<type>C</type>
<declname>op</declname>
</param>
<briefdescription>
<para>updates a capture task to a transform kernel task </para>
</briefdescription>
<detaileddescription>
<para>This method is similar to <ref refid="classtf_1_1cudaFlowCapturer_1a99d9a86a7240ebf0767441e4ec2e14c4" kindref="member">cudaFlowCapturer::transform</ref> but operates on an existing task. </para>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_capturer.hpp" line="411" column="10" bodyfile="taskflow/cuda/algorithm/transform.hpp" bodystart="256" bodyend="263"/>
</memberdef>
<memberdef kind="function" id="classtf_1_1cudaFlowCapturer_1ac2f527e57e8fe447b9f13ba51e9b9c48" prot="public" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
<templateparamlist>
<param>
<type>typename I1</type>
</param>
<param>
<type>typename I2</type>
</param>
<param>
<type>typename O</type>
</param>
<param>
<type>typename C</type>
</param>
</templateparamlist>
<type><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref></type>
<definition>cudaTask tf::cudaFlowCapturer::transform</definition>
<argsstring>(I1 first1, I1 last1, I2 first2, O output, C op)</argsstring>
<name>transform</name>
<param>
<type>I1</type>
<declname>first1</declname>
</param>
<param>
<type>I1</type>
<declname>last1</declname>
</param>
<param>
<type>I2</type>
<declname>first2</declname>
</param>
<param>
<type>O</type>
<declname>output</declname>
</param>
<param>
<type>C</type>
<declname>op</declname>
</param>
<briefdescription>
<para>captures a kernel that transforms two input ranges to an output range </para>
</briefdescription>
<detaileddescription>
<para><parameterlist kind="templateparam"><parameteritem>
<parameternamelist>
<parametername>I1</parametername>
</parameternamelist>
<parameterdescription>
<para>first input iterator type </para>
</parameterdescription>
</parameteritem>
<parameteritem>
<parameternamelist>
<parametername>I2</parametername>
</parameternamelist>
<parameterdescription>
<para>second input iterator type </para>
</parameterdescription>
</parameteritem>
<parameteritem>
<parameternamelist>
<parametername>O</parametername>
</parameternamelist>
<parameterdescription>
<para>output iterator type </para>
</parameterdescription>
</parameteritem>
<parameteritem>
<parameternamelist>
<parametername>C</parametername>
</parameternamelist>
<parameterdescription>
<para>unary operator type</para>
</parameterdescription>
</parameteritem>
</parameterlist>
<parameterlist kind="param"><parameteritem>
<parameternamelist>
<parametername>first1</parametername>
</parameternamelist>
<parameterdescription>
<para>iterator to the beginning of the input range </para>
</parameterdescription>
</parameteritem>
<parameteritem>
<parameternamelist>
<parametername>last1</parametername>
</parameternamelist>
<parameterdescription>
<para>iterator to the end of the input range </para>
</parameterdescription>
</parameteritem>
<parameteritem>
<parameternamelist>
<parametername>first2</parametername>
</parameternamelist>
<parameterdescription>
<para>iterato </para>
</parameterdescription>
</parameteritem>
<parameteritem>
<parameternamelist>
<parametername>output</parametername>
</parameternamelist>
<parameterdescription>
<para>iterator to the beginning of the output range </para>
</parameterdescription>
</parameteritem>
<parameteritem>
<parameternamelist>
<parametername>op</parametername>
</parameternamelist>
<parameterdescription>
<para>binary operator to apply to transform each pair of items in the two input ranges</para>
</parameterdescription>
</parameteritem>
</parameterlist>
<simplesect kind="return"><para><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref> handle</para>
</simplesect>
This method is equivalent to the parallel execution of the following loop on a GPU:</para>
<para><programlisting filename=".cpp"><codeline><highlight class="keywordflow">while</highlight><highlight class="normal"><sp/>(first1<sp/>!=<sp/>last1)<sp/>{</highlight></codeline>
<codeline><highlight class="normal"><sp/><sp/>*output++<sp/>=<sp/>op(*first1++,<sp/>*first2++);</highlight></codeline>
<codeline><highlight class="normal">}</highlight></codeline>
</programlisting> </para>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_capturer.hpp" line="439" column="14" bodyfile="taskflow/cuda/algorithm/transform.hpp" bodystart="245" bodyend="252"/>
</memberdef>
<memberdef kind="function" id="classtf_1_1cudaFlowCapturer_1a568dcdd226d7e466e2ee106fcdde5db9" prot="public" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
<templateparamlist>
<param>
<type>typename I1</type>
</param>
<param>
<type>typename I2</type>
</param>
<param>
<type>typename O</type>
</param>
<param>
<type>typename C</type>
</param>
</templateparamlist>
<type>void</type>
<definition>void tf::cudaFlowCapturer::transform</definition>
<argsstring>(cudaTask task, I1 first1, I1 last1, I2 first2, O output, C op)</argsstring>
<name>transform</name>
<param>
<type><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref></type>
<declname>task</declname>
</param>
<param>
<type>I1</type>
<declname>first1</declname>
</param>
<param>
<type>I1</type>
<declname>last1</declname>
</param>
<param>
<type>I2</type>
<declname>first2</declname>
</param>
<param>
<type>O</type>
<declname>output</declname>
</param>
<param>
<type>C</type>
<declname>op</declname>
</param>
<briefdescription>
<para>updates a capture task to a transform kernel task </para>
</briefdescription>
<detaileddescription>
<para>This method is similar to <ref refid="classtf_1_1cudaFlowCapturer_1a99d9a86a7240ebf0767441e4ec2e14c4" kindref="member">cudaFlowCapturer::transform</ref> but operates on an existing task. </para>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_capturer.hpp" line="448" column="10" bodyfile="taskflow/cuda/algorithm/transform.hpp" bodystart="267" bodyend="274"/>
</memberdef>
<memberdef kind="function" id="classtf_1_1cudaFlowCapturer_1aa1d016b56c06cb28eabfebfdd7dbb24d" prot="public" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
<templateparamlist>
<param>
<type>typename OPT</type>
</param>
<param>
<type>typename...</type>
<declname>ArgsT</declname>
<defname>ArgsT</defname>
</param>
</templateparamlist>
<type>OPT &amp;</type>
<definition>OPT &amp; tf::cudaFlowCapturer::make_optimizer</definition>
<argsstring>(ArgsT &amp;&amp;... args)</argsstring>
<name>make_optimizer</name>
<param>
<type>ArgsT &amp;&amp;...</type>
<declname>args</declname>
</param>
<briefdescription>
<para>selects a different optimization algorithm </para>
</briefdescription>
<detaileddescription>
<para><parameterlist kind="templateparam"><parameteritem>
<parameternamelist>
<parametername>OPT</parametername>
</parameternamelist>
<parameterdescription>
<para>optimizer type </para>
</parameterdescription>
</parameteritem>
<parameteritem>
<parameternamelist>
<parametername>ArgsT</parametername>
</parameternamelist>
<parameterdescription>
<para>arguments types</para>
</parameterdescription>
</parameteritem>
</parameterlist>
<parameterlist kind="param"><parameteritem>
<parameternamelist>
<parametername>args</parametername>
</parameternamelist>
<parameterdescription>
<para>arguments to forward to construct the optimizer</para>
</parameterdescription>
</parameteritem>
</parameterlist>
<simplesect kind="return"><para>a reference to the optimizer</para>
</simplesect>
We currently supports the following optimization algorithms to capture a user-described cudaFlow:<itemizedlist>
<listitem><para><ref refid="classtf_1_1cudaFlowSequentialOptimizer" kindref="compound">tf::cudaFlowSequentialOptimizer</ref></para>
</listitem><listitem><para><ref refid="classtf_1_1cudaFlowRoundRobinOptimizer" kindref="compound">tf::cudaFlowRoundRobinOptimizer</ref></para>
</listitem><listitem><para><ref refid="classtf_1_1cudaFlowLinearOptimizer" kindref="compound">tf::cudaFlowLinearOptimizer</ref></para>
</listitem></itemizedlist>
</para>
<para>By default, <ref refid="classtf_1_1cudaFlowCapturer" kindref="compound">tf::cudaFlowCapturer</ref> uses the round-robin optimization algorithm with four streams to transform a user-level graph into a native CUDA graph. </para>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_capturer.hpp" line="477" column="9" bodyfile="taskflow/cuda/cuda_capturer.hpp" bodystart="719" bodyend="721"/>
</memberdef>
<memberdef kind="function" id="classtf_1_1cudaFlowCapturer_1a31f29772f4713848c1b0ff1a66a3dcc3" prot="public" static="no" const="no" explicit="no" inline="yes" virt="non-virtual">
<type>cudaGraph_t</type>
<definition>cudaGraph_t tf::cudaFlowCapturer::capture</definition>
<argsstring>()</argsstring>
<name>capture</name>
<briefdescription>
<para>captures the <ref refid="classtf_1_1cudaFlow" kindref="compound">cudaFlow</ref> and turns it into a CUDA <ref refid="classtf_1_1Graph" kindref="compound">Graph</ref> </para>
</briefdescription>
<detaileddescription>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_capturer.hpp" line="482" column="17" bodyfile="taskflow/cuda/cuda_capturer.hpp" bodystart="611" bodyend="615"/>
</memberdef>
<memberdef kind="function" id="classtf_1_1cudaFlowCapturer_1a952596fd7c46acee4c2459d8fe39da28" prot="public" static="no" const="no" explicit="no" inline="yes" virt="non-virtual">
<type>void</type>
<definition>void tf::cudaFlowCapturer::run</definition>
<argsstring>(cudaStream_t stream)</argsstring>
<name>run</name>
<param>
<type>cudaStream_t</type>
<declname>stream</declname>
</param>
<briefdescription>
<para>offloads the cudaFlowCapturer onto a GPU asynchronously via a stream </para>
</briefdescription>
<detaileddescription>
<para><parameterlist kind="param"><parameteritem>
<parameternamelist>
<parametername>stream</parametername>
</parameternamelist>
<parameterdescription>
<para>stream for performing this operation</para>
</parameterdescription>
</parameteritem>
</parameterlist>
Offloads the present cudaFlowCapturer onto a GPU asynchronously via the given stream.</para>
<para>An offloaded cudaFlowCapturer forces the underlying graph to be instantiated. After the instantiation, you should not modify the graph topology but update node parameters. </para>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_capturer.hpp" line="500" column="10" bodyfile="taskflow/cuda/cuda_capturer.hpp" bodystart="618" bodyend="641"/>
</memberdef>
<memberdef kind="function" id="classtf_1_1cudaFlowCapturer_1a34be2e2d69ff66add60f5517e01bea83" prot="public" static="no" const="no" explicit="no" inline="yes" virt="non-virtual">
<type>cudaGraph_t</type>
<definition>cudaGraph_t tf::cudaFlowCapturer::native_graph</definition>
<argsstring>()</argsstring>
<name>native_graph</name>
<briefdescription>
<para>acquires a reference to the underlying CUDA graph </para>
</briefdescription>
<detaileddescription>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_capturer.hpp" line="505" column="17" bodyfile="taskflow/cuda/cuda_capturer.hpp" bodystart="644" bodyend="646"/>
</memberdef>
<memberdef kind="function" id="classtf_1_1cudaFlowCapturer_1a3c03a7d269268a2a63e864fedb2fb8a6" prot="public" static="no" const="no" explicit="no" inline="yes" virt="non-virtual">
<type>cudaGraphExec_t</type>
<definition>cudaGraphExec_t tf::cudaFlowCapturer::native_executable</definition>
<argsstring>()</argsstring>
<name>native_executable</name>
<briefdescription>
<para>acquires a reference to the underlying CUDA graph executable </para>
</briefdescription>
<detaileddescription>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="taskflow/cuda/cuda_capturer.hpp" line="510" column="21" bodyfile="taskflow/cuda/cuda_capturer.hpp" bodystart="649" bodyend="651"/>
</memberdef>
</sectiondef>
<briefdescription>
<para>class to create a cudaFlow graph using stream capture </para>
</briefdescription>
<detaileddescription>
<para>The usage of <ref refid="classtf_1_1cudaFlowCapturer" kindref="compound">tf::cudaFlowCapturer</ref> is similar to <ref refid="classtf_1_1cudaFlow" kindref="compound">tf::cudaFlow</ref>, except users can call the method <ref refid="classtf_1_1cudaFlowCapturer_1ad0d937ae0d77239f148b66a77e35db41" kindref="member">tf::cudaFlowCapturer::on</ref> to capture a sequence of asynchronous CUDA operations through the given stream. The following example creates a CUDA graph that captures two kernel tasks, <computeroutput>task_1</computeroutput> and <computeroutput>task_2</computeroutput>, where <computeroutput>task_1</computeroutput> runs before <computeroutput>task_2</computeroutput>.</para>
<para><programlisting filename=".cpp"><codeline><highlight class="normal">taskflow.emplace([](<ref refid="classtf_1_1cudaFlowCapturer" kindref="compound">tf::cudaFlowCapturer</ref>&amp;<sp/>capturer){</highlight></codeline>
<codeline><highlight class="normal"></highlight></codeline>
<codeline><highlight class="normal"><sp/><sp/></highlight><highlight class="comment">//<sp/>capture<sp/>my_kernel_1<sp/>through<sp/>the<sp/>given<sp/>stream<sp/>managed<sp/>by<sp/>the<sp/>capturer</highlight><highlight class="normal"></highlight></codeline>
<codeline><highlight class="normal"><sp/><sp/></highlight><highlight class="keyword">auto</highlight><highlight class="normal"><sp/>task_1<sp/>=<sp/>capturer.<ref refid="classtf_1_1cudaFlowCapturer_1ad0d937ae0d77239f148b66a77e35db41" kindref="member">on</ref>([&amp;](cudaStream_t<sp/>stream){</highlight></codeline>
<codeline><highlight class="normal"><sp/><sp/><sp/><sp/>my_kernel_1&lt;&lt;&lt;grid_1,<sp/>block_1,<sp/>shm_size_1,<sp/>stream&gt;&gt;&gt;(my_parameters_1);</highlight></codeline>
<codeline><highlight class="normal"><sp/><sp/>});</highlight></codeline>
<codeline><highlight class="normal"></highlight></codeline>
<codeline><highlight class="normal"><sp/><sp/></highlight><highlight class="comment">//<sp/>capture<sp/>my_kernel_2<sp/>through<sp/>the<sp/>given<sp/>stream<sp/>managed<sp/>by<sp/>the<sp/>capturer</highlight><highlight class="normal"></highlight></codeline>
<codeline><highlight class="normal"><sp/><sp/></highlight><highlight class="keyword">auto</highlight><highlight class="normal"><sp/>task_2<sp/>=<sp/>capturer.<ref refid="classtf_1_1cudaFlowCapturer_1ad0d937ae0d77239f148b66a77e35db41" kindref="member">on</ref>([&amp;](cudaStream_t<sp/>stream){</highlight></codeline>
<codeline><highlight class="normal"><sp/><sp/><sp/><sp/>my_kernel_2&lt;&lt;&lt;grid_2,<sp/>block_2,<sp/>shm_size_2,<sp/>stream&gt;&gt;&gt;(my_parameters_2);</highlight></codeline>
<codeline><highlight class="normal"><sp/><sp/>});</highlight></codeline>
<codeline><highlight class="normal"></highlight></codeline>
<codeline><highlight class="normal"><sp/><sp/>task_1.<ref refid="classtf_1_1cudaTask_1abdd68287ec4dff4216af34d1db44d1b4" kindref="member">precede</ref>(task_2);</highlight></codeline>
<codeline><highlight class="normal">});</highlight></codeline>
</programlisting></para>
<para>Similar to <ref refid="classtf_1_1cudaFlow" kindref="compound">tf::cudaFlow</ref>, a cudaFlowCapturer is a task (<ref refid="classtf_1_1Task" kindref="compound">tf::Task</ref>) created from <ref refid="classtf_1_1Taskflow" kindref="compound">tf::Taskflow</ref> and will be run by <emphasis>one</emphasis> worker thread in the executor. That is, the callable that describes a cudaFlowCapturer will be executed sequentially. Inside a cudaFlow capturer task, different GPU tasks (<ref refid="classtf_1_1cudaTask" kindref="compound">tf::cudaTask</ref>) may run in parallel depending on the selected optimization algorithm. By default, we use <ref refid="classtf_1_1cudaFlowRoundRobinOptimizer" kindref="compound">tf::cudaFlowRoundRobinOptimizer</ref> to transform a user-level graph into a native CUDA graph.</para>
<para>Please refer to <ref refid="GPUTaskingcudaFlowCapturer" kindref="compound">GPU Tasking (cudaFlowCapturer)</ref> for details. </para>
</detaileddescription>
<location file="taskflow/cuda/cuda_capturer.hpp" line="57" column="1" bodyfile="taskflow/cuda/cuda_capturer.hpp" bodystart="57" bodyend="519"/>
<listofallmembers>
<member refid="classtf_1_1cudaFlowCapturer_1aaaebe71b8297f4e14ba132a664401628" prot="private" virt="non-virtual"><scope>tf::cudaFlowCapturer</scope><name>_cfg</name></member>
<member refid="classtf_1_1cudaFlowCapturer_1aadd53d42f612da940755d5ebc6fb00de" prot="private" virt="non-virtual"><scope>tf::cudaFlowCapturer</scope><name>_exe</name></member>
<member refid="classtf_1_1cudaFlowCapturer_1acdc0ba0a1d25ca9f3c0780a62b68508a" prot="private" virt="non-virtual"><scope>tf::cudaFlowCapturer</scope><name>_optimizer</name></member>
<member refid="classtf_1_1cudaFlowCapturer_1a31f29772f4713848c1b0ff1a66a3dcc3" prot="public" virt="non-virtual"><scope>tf::cudaFlowCapturer</scope><name>capture</name></member>
<member refid="classtf_1_1cudaFlowCapturer_1a06f1176b6a5590832f0e09a049f8a622" prot="public" virt="non-virtual"><scope>tf::cudaFlowCapturer</scope><name>clear</name></member>
<member refid="classtf_1_1cudaFlowCapturer_1ab70f12050e78b588f5c23d874aa4e538" prot="public" virt="non-virtual"><scope>tf::cudaFlowCapturer</scope><name>copy</name></member>
<member refid="classtf_1_1cudaFlowCapturer_1a605f9dfd1363e10d08cbdab29f59a52e" prot="public" virt="non-virtual"><scope>tf::cudaFlowCapturer</scope><name>copy</name></member>
<member refid="classtf_1_1cudaFlowCapturer_1a9f64f729511a922781a59663ff1c6250" prot="private" virt="non-virtual"><scope>tf::cudaFlowCapturer</scope><name>cudaFlow</name></member>
<member refid="classtf_1_1cudaFlowCapturer_1a0ddccd6faa338047921269bfe964b774" prot="public" virt="non-virtual"><scope>tf::cudaFlowCapturer</scope><name>cudaFlowCapturer</name></member>
<member refid="classtf_1_1cudaFlowCapturer_1abeca6931972344a97c862c1f8d3ab9bb" prot="public" virt="non-virtual"><scope>tf::cudaFlowCapturer</scope><name>cudaFlowCapturer</name></member>
<member refid="classtf_1_1cudaFlowCapturer_1a90d1265bcc27647906bed6e6876c9aa7" prot="public" virt="non-virtual"><scope>tf::cudaFlowCapturer</scope><name>dump</name></member>
<member refid="classtf_1_1cudaFlowCapturer_1a979fe2a7bf2c361c050c0742108197c7" prot="public" virt="non-virtual"><scope>tf::cudaFlowCapturer</scope><name>dump_native_graph</name></member>
<member refid="classtf_1_1cudaFlowCapturer_1a3413a20a7c8229365e1ee9fb5af4af1e" prot="public" virt="non-virtual"><scope>tf::cudaFlowCapturer</scope><name>empty</name></member>
<member refid="classtf_1_1cudaFlowCapturer_1a763b2f90bc53f92d680a635fe28e858e" prot="private" virt="non-virtual"><scope>tf::cudaFlowCapturer</scope><name>Executor</name></member>
<member refid="classtf_1_1cudaFlowCapturer_1a0b2f1bcd59f0b42e0f823818348b4ae7" prot="public" virt="non-virtual"><scope>tf::cudaFlowCapturer</scope><name>for_each</name></member>
<member refid="classtf_1_1cudaFlowCapturer_1a17471b99db619c5a6b4645b3dffebe20" prot="public" virt="non-virtual"><scope>tf::cudaFlowCapturer</scope><name>for_each</name></member>
<member refid="classtf_1_1cudaFlowCapturer_1aeb877f42ee3a627c40f1c9c84e31ba3c" prot="public" virt="non-virtual"><scope>tf::cudaFlowCapturer</scope><name>for_each_index</name></member>
<member refid="classtf_1_1cudaFlowCapturer_1a05ca5fb4d005f1ff05fd1e4312fcd357" prot="public" virt="non-virtual"><scope>tf::cudaFlowCapturer</scope><name>for_each_index</name></member>
<member refid="classtf_1_1cudaFlowCapturer_1a5f92f6ccad52aed18441d80bc186049f" prot="private" virt="non-virtual"><scope>tf::cudaFlowCapturer</scope><name>handle_t</name></member>
<member refid="classtf_1_1cudaFlowCapturer_1a6f06c7f6954d8d67ad89f0eddfe285e9" prot="public" virt="non-virtual"><scope>tf::cudaFlowCapturer</scope><name>kernel</name></member>
<member refid="classtf_1_1cudaFlowCapturer_1a850c7c028e1535db1deaecd819d82efb" prot="public" virt="non-virtual"><scope>tf::cudaFlowCapturer</scope><name>kernel</name></member>
<member refid="classtf_1_1cudaFlowCapturer_1aa1d016b56c06cb28eabfebfdd7dbb24d" prot="public" virt="non-virtual"><scope>tf::cudaFlowCapturer</scope><name>make_optimizer</name></member>
<member refid="classtf_1_1cudaFlowCapturer_1ae84d097cdae9e2e8ce108dea760483ed" prot="public" virt="non-virtual"><scope>tf::cudaFlowCapturer</scope><name>memcpy</name></member>
<member refid="classtf_1_1cudaFlowCapturer_1a20db64e086bf8182b350eaf5d8807af9" prot="public" virt="non-virtual"><scope>tf::cudaFlowCapturer</scope><name>memcpy</name></member>
<member refid="classtf_1_1cudaFlowCapturer_1a0d38965b380f940bf6cfc6667a281052" prot="public" virt="non-virtual"><scope>tf::cudaFlowCapturer</scope><name>memset</name></member>
<member refid="classtf_1_1cudaFlowCapturer_1a4a7c4dd81f5e00e8a4c733417bca3205" prot="public" virt="non-virtual"><scope>tf::cudaFlowCapturer</scope><name>memset</name></member>
<member refid="classtf_1_1cudaFlowCapturer_1a3c03a7d269268a2a63e864fedb2fb8a6" prot="public" virt="non-virtual"><scope>tf::cudaFlowCapturer</scope><name>native_executable</name></member>
<member refid="classtf_1_1cudaFlowCapturer_1a34be2e2d69ff66add60f5517e01bea83" prot="public" virt="non-virtual"><scope>tf::cudaFlowCapturer</scope><name>native_graph</name></member>
<member refid="classtf_1_1cudaFlowCapturer_1a593335760ea517cea597237137ef9333" prot="public" virt="non-virtual"><scope>tf::cudaFlowCapturer</scope><name>noop</name></member>
<member refid="classtf_1_1cudaFlowCapturer_1a168a968d7f5833700fcc14a210ad39bc" prot="public" virt="non-virtual"><scope>tf::cudaFlowCapturer</scope><name>noop</name></member>
<member refid="classtf_1_1cudaFlowCapturer_1aeb826786f1580bae1335d94ffbeb7e02" prot="public" virt="non-virtual"><scope>tf::cudaFlowCapturer</scope><name>num_tasks</name></member>
<member refid="classtf_1_1cudaFlowCapturer_1ad0d937ae0d77239f148b66a77e35db41" prot="public" virt="non-virtual"><scope>tf::cudaFlowCapturer</scope><name>on</name></member>
<member refid="classtf_1_1cudaFlowCapturer_1a5215d459df3a0d7bccac1a1f2ce9d1ee" prot="public" virt="non-virtual"><scope>tf::cudaFlowCapturer</scope><name>on</name></member>
<member refid="classtf_1_1cudaFlowCapturer_1a8e9d99a9bd07761156ab8445a07dbdec" prot="public" virt="non-virtual"><scope>tf::cudaFlowCapturer</scope><name>operator=</name></member>
<member refid="classtf_1_1cudaFlowCapturer_1a9e390b0f7cb62729b1f04a5f37430ac8" prot="private" virt="non-virtual"><scope>tf::cudaFlowCapturer</scope><name>Optimizer</name></member>
<member refid="classtf_1_1cudaFlowCapturer_1a952596fd7c46acee4c2459d8fe39da28" prot="public" virt="non-virtual"><scope>tf::cudaFlowCapturer</scope><name>run</name></member>
<member refid="classtf_1_1cudaFlowCapturer_1ac944c7d20056e0633ef84f1a25b52296" prot="public" virt="non-virtual"><scope>tf::cudaFlowCapturer</scope><name>single_task</name></member>
<member refid="classtf_1_1cudaFlowCapturer_1a2f7e439c336aa43781c3ef1ef0d71154" prot="public" virt="non-virtual"><scope>tf::cudaFlowCapturer</scope><name>single_task</name></member>
<member refid="classtf_1_1cudaFlowCapturer_1a99d9a86a7240ebf0767441e4ec2e14c4" prot="public" virt="non-virtual"><scope>tf::cudaFlowCapturer</scope><name>transform</name></member>
<member refid="classtf_1_1cudaFlowCapturer_1afa62195f91702a6f5cbdad6fefb97e4c" prot="public" virt="non-virtual"><scope>tf::cudaFlowCapturer</scope><name>transform</name></member>
<member refid="classtf_1_1cudaFlowCapturer_1ac2f527e57e8fe447b9f13ba51e9b9c48" prot="public" virt="non-virtual"><scope>tf::cudaFlowCapturer</scope><name>transform</name></member>
<member refid="classtf_1_1cudaFlowCapturer_1a568dcdd226d7e466e2ee106fcdde5db9" prot="public" virt="non-virtual"><scope>tf::cudaFlowCapturer</scope><name>transform</name></member>
<member refid="classtf_1_1cudaFlowCapturer_1a8492d77263ab2a15cce21d4bfae5b331" prot="public" virt="non-virtual"><scope>tf::cudaFlowCapturer</scope><name>~cudaFlowCapturer</name></member>
</listofallmembers>
</compounddef>
</doxygen>