1796 lines
84 KiB
XML
1796 lines
84 KiB
XML
<?xml version='1.0' encoding='UTF-8' standalone='no'?>
|
|
<doxygen xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="compound.xsd" version="1.9.1" xml:lang="en-US">
|
|
<compounddef id="classtf_1_1cudaFlow" kind="class" language="C++" prot="public">
|
|
<compoundname>tf::cudaFlow</compoundname>
|
|
<includes refid="cudaflow_8hpp" local="no">taskflow/cuda/cudaflow.hpp</includes>
|
|
<sectiondef kind="private-attrib">
|
|
<memberdef kind="variable" id="classtf_1_1cudaFlow_1a31c7ba8b8053d6ad95c7da5e5a9494f2" prot="private" static="no" mutable="no">
|
|
<type>cudaFlowGraph</type>
|
|
<definition>cudaFlowGraph tf::cudaFlow::_cfg</definition>
|
|
<argsstring></argsstring>
|
|
<name>_cfg</name>
|
|
<briefdescription>
|
|
</briefdescription>
|
|
<detaileddescription>
|
|
</detaileddescription>
|
|
<inbodydescription>
|
|
</inbodydescription>
|
|
<location file="taskflow/cuda/cudaflow.hpp" line="587" column="19" bodyfile="taskflow/cuda/cudaflow.hpp" bodystart="587" bodyend="-1"/>
|
|
</memberdef>
|
|
<memberdef kind="variable" id="classtf_1_1cudaFlow_1a8e3255897f01f87dcd9ca506c314a125" prot="private" static="no" mutable="no">
|
|
<type>cudaGraphExec</type>
|
|
<definition>cudaGraphExec tf::cudaFlow::_exe</definition>
|
|
<argsstring></argsstring>
|
|
<name>_exe</name>
|
|
<initializer>{nullptr}</initializer>
|
|
<briefdescription>
|
|
</briefdescription>
|
|
<detaileddescription>
|
|
</detaileddescription>
|
|
<inbodydescription>
|
|
</inbodydescription>
|
|
<location file="taskflow/cuda/cudaflow.hpp" line="588" column="19" bodyfile="taskflow/cuda/cudaflow.hpp" bodystart="588" bodyend="-1"/>
|
|
</memberdef>
|
|
</sectiondef>
|
|
<sectiondef kind="public-func">
|
|
<memberdef kind="function" id="classtf_1_1cudaFlow_1ad4c3e001db151486c8479151a2108d37" prot="public" static="no" const="no" explicit="no" inline="yes" virt="non-virtual">
|
|
<type></type>
|
|
<definition>tf::cudaFlow::cudaFlow</definition>
|
|
<argsstring>()</argsstring>
|
|
<name>cudaFlow</name>
|
|
<briefdescription>
|
|
<para>constructs a cudaFlow </para>
|
|
</briefdescription>
|
|
<detaileddescription>
|
|
</detaileddescription>
|
|
<inbodydescription>
|
|
</inbodydescription>
|
|
<location file="taskflow/cuda/cudaflow.hpp" line="63" column="5" bodyfile="taskflow/cuda/cudaflow.hpp" bodystart="592" bodyend="594"/>
|
|
</memberdef>
|
|
<memberdef kind="function" id="classtf_1_1cudaFlow_1a828c3ab275521672e4ec6c78d3a9ee62" prot="public" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
|
|
<type></type>
|
|
<definition>tf::cudaFlow::~cudaFlow</definition>
|
|
<argsstring>()=default</argsstring>
|
|
<name>~cudaFlow</name>
|
|
<briefdescription>
|
|
<para>destroys the cudaFlow and its associated native CUDA graph and executable graph </para>
|
|
</briefdescription>
|
|
<detaileddescription>
|
|
</detaileddescription>
|
|
<inbodydescription>
|
|
</inbodydescription>
|
|
<location file="taskflow/cuda/cudaflow.hpp" line="69" column="5"/>
|
|
</memberdef>
|
|
<memberdef kind="function" id="classtf_1_1cudaFlow_1a677a4b510abee2ac665193389b20f725" prot="public" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
|
|
<type></type>
|
|
<definition>tf::cudaFlow::cudaFlow</definition>
|
|
<argsstring>(cudaFlow &&)=default</argsstring>
|
|
<name>cudaFlow</name>
|
|
<param>
|
|
<type><ref refid="classtf_1_1cudaFlow" kindref="compound">cudaFlow</ref> &&</type>
|
|
</param>
|
|
<briefdescription>
|
|
<para>default move constructor </para>
|
|
</briefdescription>
|
|
<detaileddescription>
|
|
</detaileddescription>
|
|
<inbodydescription>
|
|
</inbodydescription>
|
|
<location file="taskflow/cuda/cudaflow.hpp" line="74" column="5"/>
|
|
</memberdef>
|
|
<memberdef kind="function" id="classtf_1_1cudaFlow_1a74beef874538193ac0df81a180faa742" prot="public" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
|
|
<type><ref refid="classtf_1_1cudaFlow" kindref="compound">cudaFlow</ref> &</type>
|
|
<definition>cudaFlow& tf::cudaFlow::operator=</definition>
|
|
<argsstring>(cudaFlow &&)=default</argsstring>
|
|
<name>operator=</name>
|
|
<param>
|
|
<type><ref refid="classtf_1_1cudaFlow" kindref="compound">cudaFlow</ref> &&</type>
|
|
</param>
|
|
<briefdescription>
|
|
<para>default move assignment operator </para>
|
|
</briefdescription>
|
|
<detaileddescription>
|
|
</detaileddescription>
|
|
<inbodydescription>
|
|
</inbodydescription>
|
|
<location file="taskflow/cuda/cudaflow.hpp" line="79" column="14"/>
|
|
</memberdef>
|
|
<memberdef kind="function" id="classtf_1_1cudaFlow_1a1926f45a038d8faa9c1b1ee43fd29a93" prot="public" static="no" const="yes" explicit="no" inline="yes" virt="non-virtual">
|
|
<type>bool</type>
|
|
<definition>bool tf::cudaFlow::empty</definition>
|
|
<argsstring>() const</argsstring>
|
|
<name>empty</name>
|
|
<briefdescription>
|
|
<para>queries the emptiness of the graph </para>
|
|
</briefdescription>
|
|
<detaileddescription>
|
|
</detaileddescription>
|
|
<inbodydescription>
|
|
</inbodydescription>
|
|
<location file="taskflow/cuda/cudaflow.hpp" line="84" column="10" bodyfile="taskflow/cuda/cudaflow.hpp" bodystart="604" bodyend="606"/>
|
|
</memberdef>
|
|
<memberdef kind="function" id="classtf_1_1cudaFlow_1ae6560c27d249af7e4b8b921388f5e1e2" prot="public" static="no" const="yes" explicit="no" inline="yes" virt="non-virtual">
|
|
<type>size_t</type>
|
|
<definition>size_t tf::cudaFlow::num_tasks</definition>
|
|
<argsstring>() const</argsstring>
|
|
<name>num_tasks</name>
|
|
<briefdescription>
|
|
<para>queries the number of tasks </para>
|
|
</briefdescription>
|
|
<detaileddescription>
|
|
</detaileddescription>
|
|
<inbodydescription>
|
|
</inbodydescription>
|
|
<location file="taskflow/cuda/cudaflow.hpp" line="89" column="12" bodyfile="taskflow/cuda/cudaflow.hpp" bodystart="609" bodyend="611"/>
|
|
</memberdef>
|
|
<memberdef kind="function" id="classtf_1_1cudaFlow_1aad726dfe21e9719d96c65530a56d9951" prot="public" static="no" const="no" explicit="no" inline="yes" virt="non-virtual">
|
|
<type>void</type>
|
|
<definition>void tf::cudaFlow::clear</definition>
|
|
<argsstring>()</argsstring>
|
|
<name>clear</name>
|
|
<briefdescription>
|
|
<para>clears the cudaFlow object </para>
|
|
</briefdescription>
|
|
<detaileddescription>
|
|
</detaileddescription>
|
|
<inbodydescription>
|
|
</inbodydescription>
|
|
<location file="taskflow/cuda/cudaflow.hpp" line="94" column="10" bodyfile="taskflow/cuda/cudaflow.hpp" bodystart="597" bodyend="601"/>
|
|
</memberdef>
|
|
<memberdef kind="function" id="classtf_1_1cudaFlow_1a7f97b68fa7c889db49b26aa71a46a7cf" prot="public" static="no" const="yes" explicit="no" inline="yes" virt="non-virtual">
|
|
<type>void</type>
|
|
<definition>void tf::cudaFlow::dump</definition>
|
|
<argsstring>(std::ostream &os) const</argsstring>
|
|
<name>dump</name>
|
|
<param>
|
|
<type><ref refid="cpp/io/basic_ostream" kindref="compound" external="/home/thuang295/Code/taskflow/doxygen/cppreference-doxygen-web.tag.xml">std::ostream</ref> &</type>
|
|
<declname>os</declname>
|
|
</param>
|
|
<briefdescription>
|
|
<para>dumps the cudaFlow graph into a DOT format through an output stream </para>
|
|
</briefdescription>
|
|
<detaileddescription>
|
|
</detaileddescription>
|
|
<inbodydescription>
|
|
</inbodydescription>
|
|
<location file="taskflow/cuda/cudaflow.hpp" line="100" column="10" bodyfile="taskflow/cuda/cudaflow.hpp" bodystart="614" bodyend="616"/>
|
|
</memberdef>
|
|
<memberdef kind="function" id="classtf_1_1cudaFlow_1a43507f21eb9cb77667ffe0ac7e6ae635" prot="public" static="no" const="yes" explicit="no" inline="yes" virt="non-virtual">
|
|
<type>void</type>
|
|
<definition>void tf::cudaFlow::dump_native_graph</definition>
|
|
<argsstring>(std::ostream &os) const</argsstring>
|
|
<name>dump_native_graph</name>
|
|
<param>
|
|
<type><ref refid="cpp/io/basic_ostream" kindref="compound" external="/home/thuang295/Code/taskflow/doxygen/cppreference-doxygen-web.tag.xml">std::ostream</ref> &</type>
|
|
<declname>os</declname>
|
|
</param>
|
|
<briefdescription>
|
|
<para>dumps the native CUDA graph into a DOT format through an output stream </para>
|
|
</briefdescription>
|
|
<detaileddescription>
|
|
<para>The native CUDA graph may be different from the upper-level cudaFlow graph when flow capture is involved. </para>
|
|
</detaileddescription>
|
|
<inbodydescription>
|
|
</inbodydescription>
|
|
<location file="taskflow/cuda/cudaflow.hpp" line="109" column="10" bodyfile="taskflow/cuda/cudaflow.hpp" bodystart="619" bodyend="621"/>
|
|
</memberdef>
|
|
<memberdef kind="function" id="classtf_1_1cudaFlow_1a30b2e107cb2c90a37f467b28d1b42a74" prot="public" static="no" const="no" explicit="no" inline="yes" virt="non-virtual">
|
|
<type><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref></type>
|
|
<definition>cudaTask tf::cudaFlow::noop</definition>
|
|
<argsstring>()</argsstring>
|
|
<name>noop</name>
|
|
<briefdescription>
|
|
<para>creates a no-operation task </para>
|
|
</briefdescription>
|
|
<detaileddescription>
|
|
<para><simplesect kind="return"><para>a <ref refid="classtf_1_1cudaTask" kindref="compound">tf::cudaTask</ref> handle</para>
|
|
</simplesect>
|
|
An empty node performs no operation during execution, but can be used for transitive ordering. For example, a phased execution graph with 2 groups of <computeroutput>n</computeroutput> nodes with a barrier between them can be represented using an empty node and <computeroutput>2*n</computeroutput> dependency edges, rather than no empty node and <computeroutput>n^2</computeroutput> dependency edges. </para>
|
|
</detaileddescription>
|
|
<inbodydescription>
|
|
</inbodydescription>
|
|
<location file="taskflow/cuda/cudaflow.hpp" line="127" column="14" bodyfile="taskflow/cuda/cudaflow.hpp" bodystart="628" bodyend="642"/>
|
|
</memberdef>
|
|
<memberdef kind="function" id="classtf_1_1cudaFlow_1a060e1c96111c2134ce0f896420a42cd0" prot="public" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
|
|
<templateparamlist>
|
|
<param>
|
|
<type>typename C</type>
|
|
</param>
|
|
</templateparamlist>
|
|
<type><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref></type>
|
|
<definition>cudaTask tf::cudaFlow::host</definition>
|
|
<argsstring>(C &&callable)</argsstring>
|
|
<name>host</name>
|
|
<param>
|
|
<type>C &&</type>
|
|
<declname>callable</declname>
|
|
</param>
|
|
<briefdescription>
|
|
<para>creates a host task that runs a callable on the host </para>
|
|
</briefdescription>
|
|
<detaileddescription>
|
|
<para><parameterlist kind="templateparam"><parameteritem>
|
|
<parameternamelist>
|
|
<parametername>C</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>callable type</para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
</parameterlist>
|
|
<parameterlist kind="param"><parameteritem>
|
|
<parameternamelist>
|
|
<parametername>callable</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>a callable object with neither arguments nor return (i.e., constructible from <computeroutput>std::function<void()></computeroutput>)</para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
</parameterlist>
|
|
<simplesect kind="return"><para>a <ref refid="classtf_1_1cudaTask" kindref="compound">tf::cudaTask</ref> handle</para>
|
|
</simplesect>
|
|
A host task can only execute CPU-specific functions and cannot do any CUDA calls (e.g., <computeroutput>cudaMalloc</computeroutput>). </para>
|
|
</detaileddescription>
|
|
<inbodydescription>
|
|
</inbodydescription>
|
|
<location file="taskflow/cuda/cudaflow.hpp" line="143" column="14" bodyfile="taskflow/cuda/cudaflow.hpp" bodystart="646" bodyend="666"/>
|
|
</memberdef>
|
|
<memberdef kind="function" id="classtf_1_1cudaFlow_1a02e4e5cf7d03b9d087d6fbf54eb86bbf" prot="public" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
|
|
<templateparamlist>
|
|
<param>
|
|
<type>typename C</type>
|
|
</param>
|
|
</templateparamlist>
|
|
<type>void</type>
|
|
<definition>void tf::cudaFlow::host</definition>
|
|
<argsstring>(cudaTask task, C &&callable)</argsstring>
|
|
<name>host</name>
|
|
<param>
|
|
<type><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref></type>
|
|
<declname>task</declname>
|
|
</param>
|
|
<param>
|
|
<type>C &&</type>
|
|
<declname>callable</declname>
|
|
</param>
|
|
<briefdescription>
|
|
<para>updates parameters of a host task </para>
|
|
</briefdescription>
|
|
<detaileddescription>
|
|
<para>The method is similar to <ref refid="classtf_1_1cudaFlow_1a060e1c96111c2134ce0f896420a42cd0" kindref="member">tf::cudaFlow::host</ref> but operates on a task of type <ref refid="namespacetf_1afebc56ae6d5765010d0dd13a5f04132eab9361011891280a44d85b967739cc6a5" kindref="member">tf::cudaTaskType::HOST</ref>. </para>
|
|
</detaileddescription>
|
|
<inbodydescription>
|
|
</inbodydescription>
|
|
<location file="taskflow/cuda/cudaflow.hpp" line="152" column="10" bodyfile="taskflow/cuda/cudaflow.hpp" bodystart="808" bodyend="817"/>
|
|
</memberdef>
|
|
<memberdef kind="function" id="classtf_1_1cudaFlow_1a68f666503d13a7b80fb7399fb2f0c153" prot="public" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
|
|
<templateparamlist>
|
|
<param>
|
|
<type>typename F</type>
|
|
</param>
|
|
<param>
|
|
<type>typename...</type>
|
|
<declname>ArgsT</declname>
|
|
<defname>ArgsT</defname>
|
|
</param>
|
|
</templateparamlist>
|
|
<type><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref></type>
|
|
<definition>cudaTask tf::cudaFlow::kernel</definition>
|
|
<argsstring>(dim3 g, dim3 b, size_t s, F f, ArgsT... args)</argsstring>
|
|
<name>kernel</name>
|
|
<param>
|
|
<type>dim3</type>
|
|
<declname>g</declname>
|
|
</param>
|
|
<param>
|
|
<type>dim3</type>
|
|
<declname>b</declname>
|
|
</param>
|
|
<param>
|
|
<type>size_t</type>
|
|
<declname>s</declname>
|
|
</param>
|
|
<param>
|
|
<type>F</type>
|
|
<declname>f</declname>
|
|
</param>
|
|
<param>
|
|
<type>ArgsT...</type>
|
|
<declname>args</declname>
|
|
</param>
|
|
<briefdescription>
|
|
<para>creates a kernel task </para>
|
|
</briefdescription>
|
|
<detaileddescription>
|
|
<para><parameterlist kind="templateparam"><parameteritem>
|
|
<parameternamelist>
|
|
<parametername>F</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>kernel function type </para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
<parameteritem>
|
|
<parameternamelist>
|
|
<parametername>ArgsT</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>kernel function parameters type</para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
</parameterlist>
|
|
<parameterlist kind="param"><parameteritem>
|
|
<parameternamelist>
|
|
<parametername>g</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>configured grid </para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
<parameteritem>
|
|
<parameternamelist>
|
|
<parametername>b</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>configured block </para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
<parameteritem>
|
|
<parameternamelist>
|
|
<parametername>s</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>configured shared memory size in bytes </para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
<parameteritem>
|
|
<parameternamelist>
|
|
<parametername>f</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>kernel function </para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
<parameteritem>
|
|
<parameternamelist>
|
|
<parametername>args</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>arguments to forward to the kernel function by copy</para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
</parameterlist>
|
|
<simplesect kind="return"><para>a <ref refid="classtf_1_1cudaTask" kindref="compound">tf::cudaTask</ref> handle </para>
|
|
</simplesect>
|
|
</para>
|
|
</detaileddescription>
|
|
<inbodydescription>
|
|
</inbodydescription>
|
|
<location file="taskflow/cuda/cudaflow.hpp" line="169" column="14" bodyfile="taskflow/cuda/cudaflow.hpp" bodystart="670" bodyend="695"/>
|
|
</memberdef>
|
|
<memberdef kind="function" id="classtf_1_1cudaFlow_1a821117dd640807bb7ec114b46888dfb1" prot="public" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
|
|
<templateparamlist>
|
|
<param>
|
|
<type>typename F</type>
|
|
</param>
|
|
<param>
|
|
<type>typename...</type>
|
|
<declname>ArgsT</declname>
|
|
<defname>ArgsT</defname>
|
|
</param>
|
|
</templateparamlist>
|
|
<type>void</type>
|
|
<definition>void tf::cudaFlow::kernel</definition>
|
|
<argsstring>(cudaTask task, dim3 g, dim3 b, size_t shm, F f, ArgsT... args)</argsstring>
|
|
<name>kernel</name>
|
|
<param>
|
|
<type><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref></type>
|
|
<declname>task</declname>
|
|
</param>
|
|
<param>
|
|
<type>dim3</type>
|
|
<declname>g</declname>
|
|
</param>
|
|
<param>
|
|
<type>dim3</type>
|
|
<declname>b</declname>
|
|
</param>
|
|
<param>
|
|
<type>size_t</type>
|
|
<declname>shm</declname>
|
|
</param>
|
|
<param>
|
|
<type>F</type>
|
|
<declname>f</declname>
|
|
</param>
|
|
<param>
|
|
<type>ArgsT...</type>
|
|
<declname>args</declname>
|
|
</param>
|
|
<briefdescription>
|
|
<para>updates parameters of a kernel task </para>
|
|
</briefdescription>
|
|
<detaileddescription>
|
|
<para>The method is similar to <ref refid="classtf_1_1cudaFlow_1a68f666503d13a7b80fb7399fb2f0c153" kindref="member">tf::cudaFlow::kernel</ref> but operates on a task of type <ref refid="namespacetf_1afebc56ae6d5765010d0dd13a5f04132ea35c10219c45ccfb5b07444fd7e17214c" kindref="member">tf::cudaTaskType::KERNEL</ref>. The kernel function name must NOT change. </para>
|
|
</detaileddescription>
|
|
<inbodydescription>
|
|
</inbodydescription>
|
|
<location file="taskflow/cuda/cudaflow.hpp" line="179" column="10" bodyfile="taskflow/cuda/cudaflow.hpp" bodystart="821" bodyend="843"/>
|
|
</memberdef>
|
|
<memberdef kind="function" id="classtf_1_1cudaFlow_1a079ca65da35301e5aafd45878a19e9d2" prot="public" static="no" const="no" explicit="no" inline="yes" virt="non-virtual">
|
|
<type><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref></type>
|
|
<definition>cudaTask tf::cudaFlow::memset</definition>
|
|
<argsstring>(void *dst, int v, size_t count)</argsstring>
|
|
<name>memset</name>
|
|
<param>
|
|
<type>void *</type>
|
|
<declname>dst</declname>
|
|
</param>
|
|
<param>
|
|
<type>int</type>
|
|
<declname>v</declname>
|
|
</param>
|
|
<param>
|
|
<type>size_t</type>
|
|
<declname>count</declname>
|
|
</param>
|
|
<briefdescription>
|
|
<para>creates a memset task that fills untyped data with a byte value </para>
|
|
</briefdescription>
|
|
<detaileddescription>
|
|
<para><parameterlist kind="param"><parameteritem>
|
|
<parameternamelist>
|
|
<parametername>dst</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>pointer to the destination device memory area </para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
<parameteritem>
|
|
<parameternamelist>
|
|
<parametername>v</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>value to set for each byte of specified memory </para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
<parameteritem>
|
|
<parameternamelist>
|
|
<parametername>count</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>size in bytes to set</para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
</parameterlist>
|
|
<simplesect kind="return"><para>a <ref refid="classtf_1_1cudaTask" kindref="compound">tf::cudaTask</ref> handle</para>
|
|
</simplesect>
|
|
A memset task fills the first <computeroutput>count</computeroutput> bytes of device memory area pointed by <computeroutput>dst</computeroutput> with the byte value <computeroutput>v</computeroutput>. </para>
|
|
</detaileddescription>
|
|
<inbodydescription>
|
|
</inbodydescription>
|
|
<location file="taskflow/cuda/cudaflow.hpp" line="195" column="14" bodyfile="taskflow/cuda/cudaflow.hpp" bodystart="765" bodyend="781"/>
|
|
</memberdef>
|
|
<memberdef kind="function" id="classtf_1_1cudaFlow_1a082505f0fec89f65808421cdc737fb17" prot="public" static="no" const="no" explicit="no" inline="yes" virt="non-virtual">
|
|
<type>void</type>
|
|
<definition>void tf::cudaFlow::memset</definition>
|
|
<argsstring>(cudaTask task, void *dst, int ch, size_t count)</argsstring>
|
|
<name>memset</name>
|
|
<param>
|
|
<type><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref></type>
|
|
<declname>task</declname>
|
|
</param>
|
|
<param>
|
|
<type>void *</type>
|
|
<declname>dst</declname>
|
|
</param>
|
|
<param>
|
|
<type>int</type>
|
|
<declname>ch</declname>
|
|
</param>
|
|
<param>
|
|
<type>size_t</type>
|
|
<declname>count</declname>
|
|
</param>
|
|
<briefdescription>
|
|
<para>updates parameters of a memset task </para>
|
|
</briefdescription>
|
|
<detaileddescription>
|
|
<para>The method is similar to <ref refid="classtf_1_1cudaFlow_1a079ca65da35301e5aafd45878a19e9d2" kindref="member">tf::cudaFlow::memset</ref> but operates on a task of type <ref refid="namespacetf_1afebc56ae6d5765010d0dd13a5f04132ea41d4dbfd78ceea21abb0ecb03c3cc921" kindref="member">tf::cudaTaskType::MEMSET</ref>. The source/destination memory may have different address values but must be allocated from the same contexts as the original source/destination memory. </para>
|
|
</detaileddescription>
|
|
<inbodydescription>
|
|
</inbodydescription>
|
|
<location file="taskflow/cuda/cudaflow.hpp" line="206" column="10" bodyfile="taskflow/cuda/cudaflow.hpp" bodystart="879" bodyend="891"/>
|
|
</memberdef>
|
|
<memberdef kind="function" id="classtf_1_1cudaFlow_1ad37637606f0643f360e9eda1f9a6e559" prot="public" static="no" const="no" explicit="no" inline="yes" virt="non-virtual">
|
|
<type><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref></type>
|
|
<definition>cudaTask tf::cudaFlow::memcpy</definition>
|
|
<argsstring>(void *tgt, const void *src, size_t bytes)</argsstring>
|
|
<name>memcpy</name>
|
|
<param>
|
|
<type>void *</type>
|
|
<declname>tgt</declname>
|
|
</param>
|
|
<param>
|
|
<type>const void *</type>
|
|
<declname>src</declname>
|
|
</param>
|
|
<param>
|
|
<type>size_t</type>
|
|
<declname>bytes</declname>
|
|
</param>
|
|
<briefdescription>
|
|
<para>creates a memcpy task that copies untyped data in bytes </para>
|
|
</briefdescription>
|
|
<detaileddescription>
|
|
<para><parameterlist kind="param"><parameteritem>
|
|
<parameternamelist>
|
|
<parametername>tgt</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>pointer to the target memory block </para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
<parameteritem>
|
|
<parameternamelist>
|
|
<parametername>src</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>pointer to the source memory block </para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
<parameteritem>
|
|
<parameternamelist>
|
|
<parametername>bytes</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>bytes to copy</para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
</parameterlist>
|
|
<simplesect kind="return"><para>a <ref refid="classtf_1_1cudaTask" kindref="compound">tf::cudaTask</ref> handle</para>
|
|
</simplesect>
|
|
A memcpy task transfers <computeroutput>bytes</computeroutput> of data from a source location to a target location. Direction can be arbitrary among CPUs and GPUs. </para>
|
|
</detaileddescription>
|
|
<inbodydescription>
|
|
</inbodydescription>
|
|
<location file="taskflow/cuda/cudaflow.hpp" line="220" column="14" bodyfile="taskflow/cuda/cudaflow.hpp" bodystart="784" bodyend="800"/>
|
|
</memberdef>
|
|
<memberdef kind="function" id="classtf_1_1cudaFlow_1acf9e6cfa65cbfcd1d33c88e64b487ce6" prot="public" static="no" const="no" explicit="no" inline="yes" virt="non-virtual">
|
|
<type>void</type>
|
|
<definition>void tf::cudaFlow::memcpy</definition>
|
|
<argsstring>(cudaTask task, void *tgt, const void *src, size_t bytes)</argsstring>
|
|
<name>memcpy</name>
|
|
<param>
|
|
<type><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref></type>
|
|
<declname>task</declname>
|
|
</param>
|
|
<param>
|
|
<type>void *</type>
|
|
<declname>tgt</declname>
|
|
</param>
|
|
<param>
|
|
<type>const void *</type>
|
|
<declname>src</declname>
|
|
</param>
|
|
<param>
|
|
<type>size_t</type>
|
|
<declname>bytes</declname>
|
|
</param>
|
|
<briefdescription>
|
|
<para>updates parameters of a memcpy task </para>
|
|
</briefdescription>
|
|
<detaileddescription>
|
|
<para>The method is similar to <ref refid="classtf_1_1cudaFlow_1ad37637606f0643f360e9eda1f9a6e559" kindref="member">tf::cudaFlow::memcpy</ref> but operates on a task of type <ref refid="namespacetf_1afebc56ae6d5765010d0dd13a5f04132eac5d10cc70cce96265c445f14e7f5aba4" kindref="member">tf::cudaTaskType::MEMCPY</ref>. The source/destination memory may have different address values but must be allocated from the same contexts as the original source/destination memory. </para>
|
|
</detaileddescription>
|
|
<inbodydescription>
|
|
</inbodydescription>
|
|
<location file="taskflow/cuda/cudaflow.hpp" line="231" column="10" bodyfile="taskflow/cuda/cudaflow.hpp" bodystart="862" bodyend="876"/>
|
|
</memberdef>
|
|
<memberdef kind="function" id="classtf_1_1cudaFlow_1a40172fac4464f6d805f75921ea3c2a3b" prot="public" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
|
|
<templateparamlist>
|
|
<param>
|
|
<type>typename T</type>
|
|
</param>
|
|
<param>
|
|
<type>std::enable_if_t< is_pod_v< T > &&(sizeof(T)==1||sizeof(T)==2||sizeof(T)==4), void > *</type>
|
|
<defval>nullptr</defval>
|
|
</param>
|
|
</templateparamlist>
|
|
<type><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref></type>
|
|
<definition>cudaTask tf::cudaFlow::zero</definition>
|
|
<argsstring>(T *dst, size_t count)</argsstring>
|
|
<name>zero</name>
|
|
<param>
|
|
<type>T *</type>
|
|
<declname>dst</declname>
|
|
</param>
|
|
<param>
|
|
<type>size_t</type>
|
|
<declname>count</declname>
|
|
</param>
|
|
<briefdescription>
|
|
<para>creates a memset task that sets a typed memory block to zero </para>
|
|
</briefdescription>
|
|
<detaileddescription>
|
|
<para><parameterlist kind="templateparam"><parameteritem>
|
|
<parameternamelist>
|
|
<parametername>T</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>element type (size of <computeroutput>T</computeroutput> must be either 1, 2, or 4) </para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
</parameterlist>
|
|
<parameterlist kind="param"><parameteritem>
|
|
<parameternamelist>
|
|
<parametername>dst</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>pointer to the destination device memory area </para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
<parameteritem>
|
|
<parameternamelist>
|
|
<parametername>count</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>number of elements</para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
</parameterlist>
|
|
<simplesect kind="return"><para>a <ref refid="classtf_1_1cudaTask" kindref="compound">tf::cudaTask</ref> handle</para>
|
|
</simplesect>
|
|
A zero task zeroes the first <computeroutput>count</computeroutput> elements of type <computeroutput>T</computeroutput> in a device memory area pointed by <computeroutput>dst</computeroutput>. </para>
|
|
</detaileddescription>
|
|
<inbodydescription>
|
|
</inbodydescription>
|
|
<location file="taskflow/cuda/cudaflow.hpp" line="248" column="14" bodyfile="taskflow/cuda/cudaflow.hpp" bodystart="701" bodyend="717"/>
|
|
</memberdef>
|
|
<memberdef kind="function" id="classtf_1_1cudaFlow_1a78c2a73243809e3cbd1955cc1ffe6477" prot="public" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
|
|
<templateparamlist>
|
|
<param>
|
|
<type>typename T</type>
|
|
</param>
|
|
<param>
|
|
<type>std::enable_if_t< is_pod_v< T > &&(sizeof(T)==1||sizeof(T)==2||sizeof(T)==4), void > *</type>
|
|
<defval>nullptr</defval>
|
|
</param>
|
|
</templateparamlist>
|
|
<type>void</type>
|
|
<definition>void tf::cudaFlow::zero</definition>
|
|
<argsstring>(cudaTask task, T *dst, size_t count)</argsstring>
|
|
<name>zero</name>
|
|
<param>
|
|
<type><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref></type>
|
|
<declname>task</declname>
|
|
</param>
|
|
<param>
|
|
<type>T *</type>
|
|
<declname>dst</declname>
|
|
</param>
|
|
<param>
|
|
<type>size_t</type>
|
|
<declname>count</declname>
|
|
</param>
|
|
<briefdescription>
|
|
<para>updates parameters of a memset task to a zero task </para>
|
|
</briefdescription>
|
|
<detaileddescription>
|
|
<para>The method is similar to <ref refid="classtf_1_1cudaFlow_1a40172fac4464f6d805f75921ea3c2a3b" kindref="member">tf::cudaFlow::zero</ref> but operates on a task of type <ref refid="namespacetf_1afebc56ae6d5765010d0dd13a5f04132ea41d4dbfd78ceea21abb0ecb03c3cc921" kindref="member">tf::cudaTaskType::MEMSET</ref>.</para>
|
|
<para>The source/destination memory may have different address values but must be allocated from the same contexts as the original source/destination memory. </para>
|
|
</detaileddescription>
|
|
<inbodydescription>
|
|
</inbodydescription>
|
|
<location file="taskflow/cuda/cudaflow.hpp" line="263" column="10" bodyfile="taskflow/cuda/cudaflow.hpp" bodystart="915" bodyend="927"/>
|
|
</memberdef>
|
|
<memberdef kind="function" id="classtf_1_1cudaFlow_1a21d4447bc834f4d3e1bb4772c850d090" prot="public" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
|
|
<templateparamlist>
|
|
<param>
|
|
<type>typename T</type>
|
|
</param>
|
|
<param>
|
|
<type>std::enable_if_t< is_pod_v< T > &&(sizeof(T)==1||sizeof(T)==2||sizeof(T)==4), void > *</type>
|
|
<defval>nullptr</defval>
|
|
</param>
|
|
</templateparamlist>
|
|
<type><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref></type>
|
|
<definition>cudaTask tf::cudaFlow::fill</definition>
|
|
<argsstring>(T *dst, T value, size_t count)</argsstring>
|
|
<name>fill</name>
|
|
<param>
|
|
<type>T *</type>
|
|
<declname>dst</declname>
|
|
</param>
|
|
<param>
|
|
<type>T</type>
|
|
<declname>value</declname>
|
|
</param>
|
|
<param>
|
|
<type>size_t</type>
|
|
<declname>count</declname>
|
|
</param>
|
|
<briefdescription>
|
|
<para>creates a memset task that fills a typed memory block with a value </para>
|
|
</briefdescription>
|
|
<detaileddescription>
|
|
<para><parameterlist kind="templateparam"><parameteritem>
|
|
<parameternamelist>
|
|
<parametername>T</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>element type (size of <computeroutput>T</computeroutput> must be either 1, 2, or 4)</para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
</parameterlist>
|
|
<parameterlist kind="param"><parameteritem>
|
|
<parameternamelist>
|
|
<parametername>dst</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>pointer to the destination device memory area </para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
<parameteritem>
|
|
<parameternamelist>
|
|
<parametername>value</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>value to fill for each element of type <computeroutput>T</computeroutput> </para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
<parameteritem>
|
|
<parameternamelist>
|
|
<parametername>count</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>number of elements</para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
</parameterlist>
|
|
<simplesect kind="return"><para>a <ref refid="classtf_1_1cudaTask" kindref="compound">tf::cudaTask</ref> handle</para>
|
|
</simplesect>
|
|
A fill task fills the first <computeroutput>count</computeroutput> elements of type <computeroutput>T</computeroutput> with <computeroutput>value</computeroutput> in a device memory area pointed by <computeroutput>dst</computeroutput>. The value to fill is interpreted in type <computeroutput>T</computeroutput> rather than byte. </para>
|
|
</detaileddescription>
|
|
<inbodydescription>
|
|
</inbodydescription>
|
|
<location file="taskflow/cuda/cudaflow.hpp" line="283" column="14" bodyfile="taskflow/cuda/cudaflow.hpp" bodystart="723" bodyend="739"/>
|
|
</memberdef>
|
|
<memberdef kind="function" id="classtf_1_1cudaFlow_1a39ed97c9142959c73d4c25c34d71bd5e" prot="public" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
|
|
<templateparamlist>
|
|
<param>
|
|
<type>typename T</type>
|
|
</param>
|
|
<param>
|
|
<type>std::enable_if_t< is_pod_v< T > &&(sizeof(T)==1||sizeof(T)==2||sizeof(T)==4), void > *</type>
|
|
<defval>nullptr</defval>
|
|
</param>
|
|
</templateparamlist>
|
|
<type>void</type>
|
|
<definition>void tf::cudaFlow::fill</definition>
|
|
<argsstring>(cudaTask task, T *dst, T value, size_t count)</argsstring>
|
|
<name>fill</name>
|
|
<param>
|
|
<type><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref></type>
|
|
<declname>task</declname>
|
|
</param>
|
|
<param>
|
|
<type>T *</type>
|
|
<declname>dst</declname>
|
|
</param>
|
|
<param>
|
|
<type>T</type>
|
|
<declname>value</declname>
|
|
</param>
|
|
<param>
|
|
<type>size_t</type>
|
|
<declname>count</declname>
|
|
</param>
|
|
<briefdescription>
|
|
<para>updates parameters of a memset task to a fill task </para>
|
|
</briefdescription>
|
|
<detaileddescription>
|
|
<para>The method is similar to <ref refid="classtf_1_1cudaFlow_1a21d4447bc834f4d3e1bb4772c850d090" kindref="member">tf::cudaFlow::fill</ref> but operates on a task of type <ref refid="namespacetf_1afebc56ae6d5765010d0dd13a5f04132ea41d4dbfd78ceea21abb0ecb03c3cc921" kindref="member">tf::cudaTaskType::MEMSET</ref>.</para>
|
|
<para>The source/destination memory may have different address values but must be allocated from the same contexts as the original source/destination memory. </para>
|
|
</detaileddescription>
|
|
<inbodydescription>
|
|
</inbodydescription>
|
|
<location file="taskflow/cuda/cudaflow.hpp" line="298" column="10" bodyfile="taskflow/cuda/cudaflow.hpp" bodystart="897" bodyend="909"/>
|
|
</memberdef>
|
|
<memberdef kind="function" id="classtf_1_1cudaFlow_1af03e04771b655f9e629eb4c22e19b19f" prot="public" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
|
|
<templateparamlist>
|
|
<param>
|
|
<type>typename T</type>
|
|
</param>
|
|
<param>
|
|
<type>std::enable_if_t<!std::is_same_v< T, void >, void > *</type>
|
|
<defval>nullptr</defval>
|
|
</param>
|
|
</templateparamlist>
|
|
<type><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref></type>
|
|
<definition>cudaTask tf::cudaFlow::copy</definition>
|
|
<argsstring>(T *tgt, const T *src, size_t num)</argsstring>
|
|
<name>copy</name>
|
|
<param>
|
|
<type>T *</type>
|
|
<declname>tgt</declname>
|
|
</param>
|
|
<param>
|
|
<type>const T *</type>
|
|
<declname>src</declname>
|
|
</param>
|
|
<param>
|
|
<type>size_t</type>
|
|
<declname>num</declname>
|
|
</param>
|
|
<briefdescription>
|
|
<para>creates a memcopy task that copies typed data </para>
|
|
</briefdescription>
|
|
<detaileddescription>
|
|
<para><parameterlist kind="templateparam"><parameteritem>
|
|
<parameternamelist>
|
|
<parametername>T</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>element type (non-void)</para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
</parameterlist>
|
|
<parameterlist kind="param"><parameteritem>
|
|
<parameternamelist>
|
|
<parametername>tgt</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>pointer to the target memory block </para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
<parameteritem>
|
|
<parameternamelist>
|
|
<parametername>src</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>pointer to the source memory block </para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
<parameteritem>
|
|
<parameternamelist>
|
|
<parametername>num</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>number of elements to copy</para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
</parameterlist>
|
|
<simplesect kind="return"><para>a <ref refid="classtf_1_1cudaTask" kindref="compound">tf::cudaTask</ref> handle</para>
|
|
</simplesect>
|
|
A copy task transfers <computeroutput>num*sizeof(T)</computeroutput> bytes of data from a source location to a target location. Direction can be arbitrary among CPUs and GPUs. </para>
|
|
</detaileddescription>
|
|
<inbodydescription>
|
|
</inbodydescription>
|
|
<location file="taskflow/cuda/cudaflow.hpp" line="317" column="14" bodyfile="taskflow/cuda/cudaflow.hpp" bodystart="746" bodyend="762"/>
|
|
</memberdef>
|
|
<memberdef kind="function" id="classtf_1_1cudaFlow_1a6cf6ec1e85172fa99c16bf0beffc0562" prot="public" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
|
|
<templateparamlist>
|
|
<param>
|
|
<type>typename T</type>
|
|
</param>
|
|
<param>
|
|
<type>std::enable_if_t<!std::is_same_v< T, void >, void > *</type>
|
|
<defval>nullptr</defval>
|
|
</param>
|
|
</templateparamlist>
|
|
<type>void</type>
|
|
<definition>void tf::cudaFlow::copy</definition>
|
|
<argsstring>(cudaTask task, T *tgt, const T *src, size_t num)</argsstring>
|
|
<name>copy</name>
|
|
<param>
|
|
<type><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref></type>
|
|
<declname>task</declname>
|
|
</param>
|
|
<param>
|
|
<type>T *</type>
|
|
<declname>tgt</declname>
|
|
</param>
|
|
<param>
|
|
<type>const T *</type>
|
|
<declname>src</declname>
|
|
</param>
|
|
<param>
|
|
<type>size_t</type>
|
|
<declname>num</declname>
|
|
</param>
|
|
<briefdescription>
|
|
<para>updates parameters of a memcpy task to a copy task </para>
|
|
</briefdescription>
|
|
<detaileddescription>
|
|
<para>The method is similar to <ref refid="classtf_1_1cudaFlow_1af03e04771b655f9e629eb4c22e19b19f" kindref="member">tf::cudaFlow::copy</ref> but operates on a task of type <ref refid="namespacetf_1afebc56ae6d5765010d0dd13a5f04132eac5d10cc70cce96265c445f14e7f5aba4" kindref="member">tf::cudaTaskType::MEMCPY</ref>. The source/destination memory may have different address values but must be allocated from the same contexts as the original source/destination memory. </para>
|
|
</detaileddescription>
|
|
<inbodydescription>
|
|
</inbodydescription>
|
|
<location file="taskflow/cuda/cudaflow.hpp" line="331" column="10" bodyfile="taskflow/cuda/cudaflow.hpp" bodystart="847" bodyend="859"/>
|
|
</memberdef>
|
|
<memberdef kind="function" id="classtf_1_1cudaFlow_1ae6810f7de27e5a347331aacfce67bea1" prot="public" static="no" const="no" explicit="no" inline="yes" virt="non-virtual">
|
|
<type>void</type>
|
|
<definition>void tf::cudaFlow::run</definition>
|
|
<argsstring>(cudaStream_t stream)</argsstring>
|
|
<name>run</name>
|
|
<param>
|
|
<type>cudaStream_t</type>
|
|
<declname>stream</declname>
|
|
</param>
|
|
<briefdescription>
|
|
<para>offloads the cudaFlow onto a GPU asynchronously via a stream </para>
|
|
</briefdescription>
|
|
<detaileddescription>
|
|
<para><parameterlist kind="param"><parameteritem>
|
|
<parameternamelist>
|
|
<parametername>stream</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>stream for performing this operation</para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
</parameterlist>
|
|
Offloads the present cudaFlow onto a GPU asynchronously via the given stream.</para>
|
|
<para>An offloaded cudaFlow forces the underlying graph to be instantiated. After the instantiation, you should not modify the graph topology but update node parameters. </para>
|
|
</detaileddescription>
|
|
<inbodydescription>
|
|
</inbodydescription>
|
|
<location file="taskflow/cuda/cudaflow.hpp" line="348" column="10" bodyfile="taskflow/cuda/cudaflow.hpp" bodystart="1004" bodyend="1010"/>
|
|
</memberdef>
|
|
<memberdef kind="function" id="classtf_1_1cudaFlow_1acfbee67cff7dc7c6297c20c64f2e015c" prot="public" static="no" const="no" explicit="no" inline="yes" virt="non-virtual">
|
|
<type>cudaGraph_t</type>
|
|
<definition>cudaGraph_t tf::cudaFlow::native_graph</definition>
|
|
<argsstring>()</argsstring>
|
|
<name>native_graph</name>
|
|
<briefdescription>
|
|
<para>acquires a reference to the underlying CUDA graph </para>
|
|
</briefdescription>
|
|
<detaileddescription>
|
|
</detaileddescription>
|
|
<inbodydescription>
|
|
</inbodydescription>
|
|
<location file="taskflow/cuda/cudaflow.hpp" line="353" column="17" bodyfile="taskflow/cuda/cudaflow.hpp" bodystart="1013" bodyend="1015"/>
|
|
</memberdef>
|
|
<memberdef kind="function" id="classtf_1_1cudaFlow_1a5bfdaf621ab617ab5f0ca63466570256" prot="public" static="no" const="no" explicit="no" inline="yes" virt="non-virtual">
|
|
<type>cudaGraphExec_t</type>
|
|
<definition>cudaGraphExec_t tf::cudaFlow::native_executable</definition>
|
|
<argsstring>()</argsstring>
|
|
<name>native_executable</name>
|
|
<briefdescription>
|
|
<para>acquires a reference to the underlying CUDA graph executable </para>
|
|
</briefdescription>
|
|
<detaileddescription>
|
|
</detaileddescription>
|
|
<inbodydescription>
|
|
</inbodydescription>
|
|
<location file="taskflow/cuda/cudaflow.hpp" line="358" column="21" bodyfile="taskflow/cuda/cudaflow.hpp" bodystart="1018" bodyend="1020"/>
|
|
</memberdef>
|
|
<memberdef kind="function" id="classtf_1_1cudaFlow_1ac2906cb0002fc411a983d100a3d58d62" prot="public" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
|
|
<templateparamlist>
|
|
<param>
|
|
<type>typename C</type>
|
|
</param>
|
|
</templateparamlist>
|
|
<type><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref></type>
|
|
<definition>cudaTask tf::cudaFlow::single_task</definition>
|
|
<argsstring>(C c)</argsstring>
|
|
<name>single_task</name>
|
|
<param>
|
|
<type>C</type>
|
|
<declname>c</declname>
|
|
</param>
|
|
<briefdescription>
|
|
<para>runs a callable with only a single kernel thread </para>
|
|
</briefdescription>
|
|
<detaileddescription>
|
|
<para><parameterlist kind="templateparam"><parameteritem>
|
|
<parameternamelist>
|
|
<parametername>C</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>callable type</para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
</parameterlist>
|
|
<parameterlist kind="param"><parameteritem>
|
|
<parameternamelist>
|
|
<parametername>c</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>callable to run by a single kernel thread</para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
</parameterlist>
|
|
<simplesect kind="return"><para>a <ref refid="classtf_1_1cudaTask" kindref="compound">tf::cudaTask</ref> handle </para>
|
|
</simplesect>
|
|
</para>
|
|
</detaileddescription>
|
|
<inbodydescription>
|
|
</inbodydescription>
|
|
<location file="taskflow/cuda/cudaflow.hpp" line="374" column="14" bodyfile="taskflow/cuda/algorithm/for_each.hpp" bodystart="163" bodyend="165"/>
|
|
</memberdef>
|
|
<memberdef kind="function" id="classtf_1_1cudaFlow_1add2d364f38c72322d8e36bc0da0b98e4" prot="public" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
|
|
<templateparamlist>
|
|
<param>
|
|
<type>typename C</type>
|
|
</param>
|
|
</templateparamlist>
|
|
<type>void</type>
|
|
<definition>void tf::cudaFlow::single_task</definition>
|
|
<argsstring>(cudaTask task, C c)</argsstring>
|
|
<name>single_task</name>
|
|
<param>
|
|
<type><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref></type>
|
|
<declname>task</declname>
|
|
</param>
|
|
<param>
|
|
<type>C</type>
|
|
<declname>c</declname>
|
|
</param>
|
|
<briefdescription>
|
|
<para>updates a single-threaded kernel task </para>
|
|
</briefdescription>
|
|
<detaileddescription>
|
|
<para>This method is similar to <ref refid="classtf_1_1cudaFlow_1ac2906cb0002fc411a983d100a3d58d62" kindref="member">cudaFlow::single_task</ref> but operates on an existing task. </para>
|
|
</detaileddescription>
|
|
<inbodydescription>
|
|
</inbodydescription>
|
|
<location file="taskflow/cuda/cudaflow.hpp" line="383" column="10" bodyfile="taskflow/cuda/algorithm/for_each.hpp" bodystart="169" bodyend="171"/>
|
|
</memberdef>
|
|
<memberdef kind="function" id="classtf_1_1cudaFlow_1a1a681f6223853b6445dcfdad07e4d0fd" prot="public" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
|
|
<templateparamlist>
|
|
<param>
|
|
<type>typename I</type>
|
|
</param>
|
|
<param>
|
|
<type>typename C</type>
|
|
</param>
|
|
</templateparamlist>
|
|
<type><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref></type>
|
|
<definition>cudaTask tf::cudaFlow::for_each</definition>
|
|
<argsstring>(I first, I last, C callable)</argsstring>
|
|
<name>for_each</name>
|
|
<param>
|
|
<type>I</type>
|
|
<declname>first</declname>
|
|
</param>
|
|
<param>
|
|
<type>I</type>
|
|
<declname>last</declname>
|
|
</param>
|
|
<param>
|
|
<type>C</type>
|
|
<declname>callable</declname>
|
|
</param>
|
|
<briefdescription>
|
|
<para>applies a callable to each dereferenced element of the data array </para>
|
|
</briefdescription>
|
|
<detaileddescription>
|
|
<para><parameterlist kind="templateparam"><parameteritem>
|
|
<parameternamelist>
|
|
<parametername>I</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>iterator type </para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
<parameteritem>
|
|
<parameternamelist>
|
|
<parametername>C</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>callable type</para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
</parameterlist>
|
|
<parameterlist kind="param"><parameteritem>
|
|
<parameternamelist>
|
|
<parametername>first</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>iterator to the beginning (inclusive) </para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
<parameteritem>
|
|
<parameternamelist>
|
|
<parametername>last</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>iterator to the end (exclusive) </para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
<parameteritem>
|
|
<parameternamelist>
|
|
<parametername>callable</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>a callable object to apply to the dereferenced iterator</para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
</parameterlist>
|
|
<simplesect kind="return"><para>a <ref refid="classtf_1_1cudaTask" kindref="compound">tf::cudaTask</ref> handle</para>
|
|
</simplesect>
|
|
This method is equivalent to the parallel execution of the following loop on a GPU:</para>
|
|
<para><programlisting filename=".cpp"><codeline><highlight class="keywordflow">for</highlight><highlight class="normal">(</highlight><highlight class="keyword">auto</highlight><highlight class="normal"><sp/>itr<sp/>=<sp/>first;<sp/>itr<sp/>!=<sp/>last;<sp/>itr++)<sp/>{</highlight></codeline>
|
|
<codeline><highlight class="normal"><sp/><sp/>callable(*itr);</highlight></codeline>
|
|
<codeline><highlight class="normal">}</highlight></codeline>
|
|
</programlisting> </para>
|
|
</detaileddescription>
|
|
<inbodydescription>
|
|
</inbodydescription>
|
|
<location file="taskflow/cuda/cudaflow.hpp" line="406" column="14" bodyfile="taskflow/cuda/algorithm/for_each.hpp" bodystart="195" bodyend="210"/>
|
|
</memberdef>
|
|
<memberdef kind="function" id="classtf_1_1cudaFlow_1af9cc7ee16602754929bb9118a9d7f0b2" prot="public" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
|
|
<templateparamlist>
|
|
<param>
|
|
<type>typename I</type>
|
|
</param>
|
|
<param>
|
|
<type>typename C</type>
|
|
</param>
|
|
</templateparamlist>
|
|
<type>void</type>
|
|
<definition>void tf::cudaFlow::for_each</definition>
|
|
<argsstring>(cudaTask task, I first, I last, C callable)</argsstring>
|
|
<name>for_each</name>
|
|
<param>
|
|
<type><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref></type>
|
|
<declname>task</declname>
|
|
</param>
|
|
<param>
|
|
<type>I</type>
|
|
<declname>first</declname>
|
|
</param>
|
|
<param>
|
|
<type>I</type>
|
|
<declname>last</declname>
|
|
</param>
|
|
<param>
|
|
<type>C</type>
|
|
<declname>callable</declname>
|
|
</param>
|
|
<briefdescription>
|
|
<para>updates parameters of a kernel task created from <ref refid="classtf_1_1cudaFlow_1a1a681f6223853b6445dcfdad07e4d0fd" kindref="member">tf::cudaFlow::for_each</ref> </para>
|
|
</briefdescription>
|
|
<detaileddescription>
|
|
<para>The type of the iterators and the callable must be the same as the task created from <ref refid="classtf_1_1cudaFlow_1a1a681f6223853b6445dcfdad07e4d0fd" kindref="member">tf::cudaFlow::for_each</ref>. </para>
|
|
</detaileddescription>
|
|
<inbodydescription>
|
|
</inbodydescription>
|
|
<location file="taskflow/cuda/cudaflow.hpp" line="416" column="10" bodyfile="taskflow/cuda/algorithm/for_each.hpp" bodystart="214" bodyend="229"/>
|
|
</memberdef>
|
|
<memberdef kind="function" id="classtf_1_1cudaFlow_1a34f1ea89e5651faa6e8af522a42556ac" prot="public" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
|
|
<templateparamlist>
|
|
<param>
|
|
<type>typename I</type>
|
|
</param>
|
|
<param>
|
|
<type>typename C</type>
|
|
</param>
|
|
</templateparamlist>
|
|
<type><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref></type>
|
|
<definition>cudaTask tf::cudaFlow::for_each_index</definition>
|
|
<argsstring>(I first, I last, I step, C callable)</argsstring>
|
|
<name>for_each_index</name>
|
|
<param>
|
|
<type>I</type>
|
|
<declname>first</declname>
|
|
</param>
|
|
<param>
|
|
<type>I</type>
|
|
<declname>last</declname>
|
|
</param>
|
|
<param>
|
|
<type>I</type>
|
|
<declname>step</declname>
|
|
</param>
|
|
<param>
|
|
<type>C</type>
|
|
<declname>callable</declname>
|
|
</param>
|
|
<briefdescription>
|
|
<para>applies a callable to each index in the range with the step size </para>
|
|
</briefdescription>
|
|
<detaileddescription>
|
|
<para><parameterlist kind="templateparam"><parameteritem>
|
|
<parameternamelist>
|
|
<parametername>I</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>index type </para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
<parameteritem>
|
|
<parameternamelist>
|
|
<parametername>C</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>callable type</para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
</parameterlist>
|
|
<parameterlist kind="param"><parameteritem>
|
|
<parameternamelist>
|
|
<parametername>first</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>beginning index </para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
<parameteritem>
|
|
<parameternamelist>
|
|
<parametername>last</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>last index </para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
<parameteritem>
|
|
<parameternamelist>
|
|
<parametername>step</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>step size </para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
<parameteritem>
|
|
<parameternamelist>
|
|
<parametername>callable</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>the callable to apply to each element in the data array</para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
</parameterlist>
|
|
<simplesect kind="return"><para>a <ref refid="classtf_1_1cudaTask" kindref="compound">tf::cudaTask</ref> handle</para>
|
|
</simplesect>
|
|
This method is equivalent to the parallel execution of the following loop on a GPU:</para>
|
|
<para><programlisting filename=".cpp"><codeline><highlight class="comment">//<sp/>step<sp/>is<sp/>positive<sp/>[first,<sp/>last)</highlight><highlight class="normal"></highlight></codeline>
|
|
<codeline><highlight class="normal"></highlight><highlight class="keywordflow">for</highlight><highlight class="normal">(</highlight><highlight class="keyword">auto</highlight><highlight class="normal"><sp/>i=first;<sp/>i<last;<sp/>i+=step)<sp/>{</highlight></codeline>
|
|
<codeline><highlight class="normal"><sp/><sp/>callable(i);</highlight></codeline>
|
|
<codeline><highlight class="normal">}</highlight></codeline>
|
|
<codeline><highlight class="normal"></highlight></codeline>
|
|
<codeline><highlight class="normal"></highlight><highlight class="comment">//<sp/>step<sp/>is<sp/>negative<sp/>[first,<sp/>last)</highlight><highlight class="normal"></highlight></codeline>
|
|
<codeline><highlight class="normal"></highlight><highlight class="keywordflow">for</highlight><highlight class="normal">(</highlight><highlight class="keyword">auto</highlight><highlight class="normal"><sp/>i=first;<sp/>i>last;<sp/>i+=step)<sp/>{</highlight></codeline>
|
|
<codeline><highlight class="normal"><sp/><sp/>callable(i);</highlight></codeline>
|
|
<codeline><highlight class="normal">}</highlight></codeline>
|
|
</programlisting> </para>
|
|
</detaileddescription>
|
|
<inbodydescription>
|
|
</inbodydescription>
|
|
<location file="taskflow/cuda/cudaflow.hpp" line="446" column="14" bodyfile="taskflow/cuda/algorithm/for_each.hpp" bodystart="233" bodyend="248"/>
|
|
</memberdef>
|
|
<memberdef kind="function" id="classtf_1_1cudaFlow_1a3fa7f8e38b4da1fe0cbcfb265f9349a2" prot="public" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
|
|
<templateparamlist>
|
|
<param>
|
|
<type>typename I</type>
|
|
</param>
|
|
<param>
|
|
<type>typename C</type>
|
|
</param>
|
|
</templateparamlist>
|
|
<type>void</type>
|
|
<definition>void tf::cudaFlow::for_each_index</definition>
|
|
<argsstring>(cudaTask task, I first, I last, I step, C callable)</argsstring>
|
|
<name>for_each_index</name>
|
|
<param>
|
|
<type><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref></type>
|
|
<declname>task</declname>
|
|
</param>
|
|
<param>
|
|
<type>I</type>
|
|
<declname>first</declname>
|
|
</param>
|
|
<param>
|
|
<type>I</type>
|
|
<declname>last</declname>
|
|
</param>
|
|
<param>
|
|
<type>I</type>
|
|
<declname>step</declname>
|
|
</param>
|
|
<param>
|
|
<type>C</type>
|
|
<declname>callable</declname>
|
|
</param>
|
|
<briefdescription>
|
|
<para>updates parameters of a kernel task created from <ref refid="classtf_1_1cudaFlow_1a34f1ea89e5651faa6e8af522a42556ac" kindref="member">tf::cudaFlow::for_each_index</ref> </para>
|
|
</briefdescription>
|
|
<detaileddescription>
|
|
<para>The type of the iterators and the callable must be the same as the task created from <ref refid="classtf_1_1cudaFlow_1a34f1ea89e5651faa6e8af522a42556ac" kindref="member">tf::cudaFlow::for_each_index</ref>. </para>
|
|
</detaileddescription>
|
|
<inbodydescription>
|
|
</inbodydescription>
|
|
<location file="taskflow/cuda/cudaflow.hpp" line="456" column="10" bodyfile="taskflow/cuda/algorithm/for_each.hpp" bodystart="252" bodyend="267"/>
|
|
</memberdef>
|
|
<memberdef kind="function" id="classtf_1_1cudaFlow_1af89a9bda182272462a0eda2581536cd8" prot="public" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
|
|
<templateparamlist>
|
|
<param>
|
|
<type>typename I</type>
|
|
</param>
|
|
<param>
|
|
<type>typename O</type>
|
|
</param>
|
|
<param>
|
|
<type>typename C</type>
|
|
</param>
|
|
</templateparamlist>
|
|
<type><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref></type>
|
|
<definition>cudaTask tf::cudaFlow::transform</definition>
|
|
<argsstring>(I first, I last, O output, C op)</argsstring>
|
|
<name>transform</name>
|
|
<param>
|
|
<type>I</type>
|
|
<declname>first</declname>
|
|
</param>
|
|
<param>
|
|
<type>I</type>
|
|
<declname>last</declname>
|
|
</param>
|
|
<param>
|
|
<type>O</type>
|
|
<declname>output</declname>
|
|
</param>
|
|
<param>
|
|
<type>C</type>
|
|
<declname>op</declname>
|
|
</param>
|
|
<briefdescription>
|
|
<para>applies a callable to a source range and stores the result in a target range </para>
|
|
</briefdescription>
|
|
<detaileddescription>
|
|
<para><parameterlist kind="templateparam"><parameteritem>
|
|
<parameternamelist>
|
|
<parametername>I</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>input iterator type </para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
<parameteritem>
|
|
<parameternamelist>
|
|
<parametername>O</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>output iterator type </para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
<parameteritem>
|
|
<parameternamelist>
|
|
<parametername>C</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>unary operator type</para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
</parameterlist>
|
|
<parameterlist kind="param"><parameteritem>
|
|
<parameternamelist>
|
|
<parametername>first</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>iterator to the beginning of the input range </para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
<parameteritem>
|
|
<parameternamelist>
|
|
<parametername>last</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>iterator to the end of the input range </para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
<parameteritem>
|
|
<parameternamelist>
|
|
<parametername>output</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>iterator to the beginning of the output range </para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
<parameteritem>
|
|
<parameternamelist>
|
|
<parametername>op</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>the operator to apply to transform each element in the range</para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
</parameterlist>
|
|
<simplesect kind="return"><para>a <ref refid="classtf_1_1cudaTask" kindref="compound">tf::cudaTask</ref> handle</para>
|
|
</simplesect>
|
|
This method is equivalent to the parallel execution of the following loop on a GPU:</para>
|
|
<para><programlisting filename=".cpp"><codeline><highlight class="keywordflow">while</highlight><highlight class="normal"><sp/>(first<sp/>!=<sp/>last)<sp/>{</highlight></codeline>
|
|
<codeline><highlight class="normal"><sp/><sp/>*output++<sp/>=<sp/>callable(*first++);</highlight></codeline>
|
|
<codeline><highlight class="normal">}</highlight></codeline>
|
|
</programlisting> </para>
|
|
</detaileddescription>
|
|
<inbodydescription>
|
|
</inbodydescription>
|
|
<location file="taskflow/cuda/cudaflow.hpp" line="483" column="14" bodyfile="taskflow/cuda/algorithm/transform.hpp" bodystart="151" bodyend="167"/>
|
|
</memberdef>
|
|
<memberdef kind="function" id="classtf_1_1cudaFlow_1a4a211b1f8562e10f9aae8b44fd6acdec" prot="public" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
|
|
<templateparamlist>
|
|
<param>
|
|
<type>typename I</type>
|
|
</param>
|
|
<param>
|
|
<type>typename O</type>
|
|
</param>
|
|
<param>
|
|
<type>typename C</type>
|
|
</param>
|
|
</templateparamlist>
|
|
<type>void</type>
|
|
<definition>void tf::cudaFlow::transform</definition>
|
|
<argsstring>(cudaTask task, I first, I last, O output, C c)</argsstring>
|
|
<name>transform</name>
|
|
<param>
|
|
<type><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref></type>
|
|
<declname>task</declname>
|
|
</param>
|
|
<param>
|
|
<type>I</type>
|
|
<declname>first</declname>
|
|
</param>
|
|
<param>
|
|
<type>I</type>
|
|
<declname>last</declname>
|
|
</param>
|
|
<param>
|
|
<type>O</type>
|
|
<declname>output</declname>
|
|
</param>
|
|
<param>
|
|
<type>C</type>
|
|
<declname>c</declname>
|
|
</param>
|
|
<briefdescription>
|
|
<para>updates parameters of a kernel task created from <ref refid="classtf_1_1cudaFlow_1af89a9bda182272462a0eda2581536cd8" kindref="member">tf::cudaFlow::transform</ref> </para>
|
|
</briefdescription>
|
|
<detaileddescription>
|
|
<para>The type of the iterators and the callable must be the same as the task created from <ref refid="classtf_1_1cudaFlow_1a1a681f6223853b6445dcfdad07e4d0fd" kindref="member">tf::cudaFlow::for_each</ref>. </para>
|
|
</detaileddescription>
|
|
<inbodydescription>
|
|
</inbodydescription>
|
|
<location file="taskflow/cuda/cudaflow.hpp" line="493" column="10" bodyfile="taskflow/cuda/algorithm/transform.hpp" bodystart="191" bodyend="207"/>
|
|
</memberdef>
|
|
<memberdef kind="function" id="classtf_1_1cudaFlow_1abab2bfdfc86ef3a764ece4743fdede76" prot="public" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
|
|
<templateparamlist>
|
|
<param>
|
|
<type>typename I1</type>
|
|
</param>
|
|
<param>
|
|
<type>typename I2</type>
|
|
</param>
|
|
<param>
|
|
<type>typename O</type>
|
|
</param>
|
|
<param>
|
|
<type>typename C</type>
|
|
</param>
|
|
</templateparamlist>
|
|
<type><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref></type>
|
|
<definition>cudaTask tf::cudaFlow::transform</definition>
|
|
<argsstring>(I1 first1, I1 last1, I2 first2, O output, C op)</argsstring>
|
|
<name>transform</name>
|
|
<param>
|
|
<type>I1</type>
|
|
<declname>first1</declname>
|
|
</param>
|
|
<param>
|
|
<type>I1</type>
|
|
<declname>last1</declname>
|
|
</param>
|
|
<param>
|
|
<type>I2</type>
|
|
<declname>first2</declname>
|
|
</param>
|
|
<param>
|
|
<type>O</type>
|
|
<declname>output</declname>
|
|
</param>
|
|
<param>
|
|
<type>C</type>
|
|
<declname>op</declname>
|
|
</param>
|
|
<briefdescription>
|
|
<para>creates a task to perform parallel transforms over two ranges of items </para>
|
|
</briefdescription>
|
|
<detaileddescription>
|
|
<para><parameterlist kind="templateparam"><parameteritem>
|
|
<parameternamelist>
|
|
<parametername>I1</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>first input iterator type </para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
<parameteritem>
|
|
<parameternamelist>
|
|
<parametername>I2</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>second input iterator type </para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
<parameteritem>
|
|
<parameternamelist>
|
|
<parametername>O</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>output iterator type </para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
<parameteritem>
|
|
<parameternamelist>
|
|
<parametername>C</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>unary operator type</para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
</parameterlist>
|
|
<parameterlist kind="param"><parameteritem>
|
|
<parameternamelist>
|
|
<parametername>first1</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>iterator to the beginning of the input range </para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
<parameteritem>
|
|
<parameternamelist>
|
|
<parametername>last1</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>iterator to the end of the input range </para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
<parameteritem>
|
|
<parameternamelist>
|
|
<parametername>first2</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>iterato </para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
<parameteritem>
|
|
<parameternamelist>
|
|
<parametername>output</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>iterator to the beginning of the output range </para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
<parameteritem>
|
|
<parameternamelist>
|
|
<parametername>op</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>binary operator to apply to transform each pair of items in the two input ranges</para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
</parameterlist>
|
|
<simplesect kind="return"><para><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref> handle</para>
|
|
</simplesect>
|
|
This method is equivalent to the parallel execution of the following loop on a GPU:</para>
|
|
<para><programlisting filename=".cpp"><codeline><highlight class="keywordflow">while</highlight><highlight class="normal"><sp/>(first1<sp/>!=<sp/>last1)<sp/>{</highlight></codeline>
|
|
<codeline><highlight class="normal"><sp/><sp/>*output++<sp/>=<sp/>op(*first1++,<sp/>*first2++);</highlight></codeline>
|
|
<codeline><highlight class="normal">}</highlight></codeline>
|
|
</programlisting> </para>
|
|
</detaileddescription>
|
|
<inbodydescription>
|
|
</inbodydescription>
|
|
<location file="taskflow/cuda/cudaflow.hpp" line="521" column="14" bodyfile="taskflow/cuda/algorithm/transform.hpp" bodystart="171" bodyend="187"/>
|
|
</memberdef>
|
|
<memberdef kind="function" id="classtf_1_1cudaFlow_1a7c6ca7be2b6908e8f71570c54303ba9e" prot="public" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
|
|
<templateparamlist>
|
|
<param>
|
|
<type>typename I1</type>
|
|
</param>
|
|
<param>
|
|
<type>typename I2</type>
|
|
</param>
|
|
<param>
|
|
<type>typename O</type>
|
|
</param>
|
|
<param>
|
|
<type>typename C</type>
|
|
</param>
|
|
</templateparamlist>
|
|
<type>void</type>
|
|
<definition>void tf::cudaFlow::transform</definition>
|
|
<argsstring>(cudaTask task, I1 first1, I1 last1, I2 first2, O output, C c)</argsstring>
|
|
<name>transform</name>
|
|
<param>
|
|
<type><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref></type>
|
|
<declname>task</declname>
|
|
</param>
|
|
<param>
|
|
<type>I1</type>
|
|
<declname>first1</declname>
|
|
</param>
|
|
<param>
|
|
<type>I1</type>
|
|
<declname>last1</declname>
|
|
</param>
|
|
<param>
|
|
<type>I2</type>
|
|
<declname>first2</declname>
|
|
</param>
|
|
<param>
|
|
<type>O</type>
|
|
<declname>output</declname>
|
|
</param>
|
|
<param>
|
|
<type>C</type>
|
|
<declname>c</declname>
|
|
</param>
|
|
<briefdescription>
|
|
<para>updates parameters of a kernel task created from <ref refid="classtf_1_1cudaFlow_1af89a9bda182272462a0eda2581536cd8" kindref="member">tf::cudaFlow::transform</ref> </para>
|
|
</briefdescription>
|
|
<detaileddescription>
|
|
<para>The type of the iterators and the callable must be the same as the task created from <ref refid="classtf_1_1cudaFlow_1a1a681f6223853b6445dcfdad07e4d0fd" kindref="member">tf::cudaFlow::for_each</ref>. </para>
|
|
</detaileddescription>
|
|
<inbodydescription>
|
|
</inbodydescription>
|
|
<location file="taskflow/cuda/cudaflow.hpp" line="531" column="10" bodyfile="taskflow/cuda/algorithm/transform.hpp" bodystart="211" bodyend="228"/>
|
|
</memberdef>
|
|
<memberdef kind="function" id="classtf_1_1cudaFlow_1a89c389fff64a16e5dd8c60875d3b514d" prot="public" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
|
|
<templateparamlist>
|
|
<param>
|
|
<type>typename C</type>
|
|
</param>
|
|
</templateparamlist>
|
|
<type><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref></type>
|
|
<definition>cudaTask tf::cudaFlow::capture</definition>
|
|
<argsstring>(C &&callable)</argsstring>
|
|
<name>capture</name>
|
|
<param>
|
|
<type>C &&</type>
|
|
<declname>callable</declname>
|
|
</param>
|
|
<briefdescription>
|
|
<para>constructs a subflow graph through <ref refid="classtf_1_1cudaFlowCapturer" kindref="compound">tf::cudaFlowCapturer</ref> </para>
|
|
</briefdescription>
|
|
<detaileddescription>
|
|
<para><parameterlist kind="templateparam"><parameteritem>
|
|
<parameternamelist>
|
|
<parametername>C</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>callable type constructible from <computeroutput>std::function<void(tf::cudaFlowCapturer&)></computeroutput> </para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
</parameterlist>
|
|
<parameterlist kind="param"><parameteritem>
|
|
<parameternamelist>
|
|
<parametername>callable</parametername>
|
|
</parameternamelist>
|
|
<parameterdescription>
|
|
<para>the callable to construct a capture flow</para>
|
|
</parameterdescription>
|
|
</parameteritem>
|
|
</parameterlist>
|
|
<simplesect kind="return"><para>a <ref refid="classtf_1_1cudaTask" kindref="compound">tf::cudaTask</ref> handle</para>
|
|
</simplesect>
|
|
A captured subflow forms a sub-graph to the cudaFlow and can be used to capture custom (or third-party) kernels that cannot be directly constructed from the cudaFlow.</para>
|
|
<para>Example usage:</para>
|
|
<para><programlisting filename=".cpp"><codeline><highlight class="normal">taskflow.emplace([&](<ref refid="classtf_1_1cudaFlow" kindref="compound">tf::cudaFlow</ref>&<sp/>cf){</highlight></codeline>
|
|
<codeline><highlight class="normal"></highlight></codeline>
|
|
<codeline><highlight class="normal"><sp/><sp/><ref refid="classtf_1_1cudaTask" kindref="compound">tf::cudaTask</ref><sp/>my_kernel<sp/>=<sp/>cf.<ref refid="classtf_1_1cudaFlow_1a68f666503d13a7b80fb7399fb2f0c153" kindref="member">kernel</ref>(my_arguments);</highlight></codeline>
|
|
<codeline><highlight class="normal"></highlight></codeline>
|
|
<codeline><highlight class="normal"><sp/><sp/></highlight><highlight class="comment">//<sp/>create<sp/>a<sp/>flow<sp/>capturer<sp/>to<sp/>capture<sp/>custom<sp/>kernels</highlight><highlight class="normal"></highlight></codeline>
|
|
<codeline><highlight class="normal"><sp/><sp/><ref refid="classtf_1_1cudaTask" kindref="compound">tf::cudaTask</ref><sp/>my_subflow<sp/>=<sp/>cf.<ref refid="classtf_1_1cudaFlow_1a89c389fff64a16e5dd8c60875d3b514d" kindref="member">capture</ref>([&](<ref refid="classtf_1_1cudaFlowCapturer" kindref="compound">tf::cudaFlowCapturer</ref>&<sp/>capturer){</highlight></codeline>
|
|
<codeline><highlight class="normal"><sp/><sp/><sp/><sp/>capturer.<ref refid="classtf_1_1cudaFlowCapturer_1ad0d937ae0d77239f148b66a77e35db41" kindref="member">on</ref>([&](cudaStream_t<sp/>stream){</highlight></codeline>
|
|
<codeline><highlight class="normal"><sp/><sp/><sp/><sp/><sp/><sp/>invoke_custom_kernel_with_stream(stream,<sp/>custom_arguments);</highlight></codeline>
|
|
<codeline><highlight class="normal"><sp/><sp/><sp/><sp/>});</highlight></codeline>
|
|
<codeline><highlight class="normal"><sp/><sp/>});</highlight></codeline>
|
|
<codeline><highlight class="normal"></highlight></codeline>
|
|
<codeline><highlight class="normal"><sp/><sp/>my_kernel.<ref refid="classtf_1_1cudaTask_1abdd68287ec4dff4216af34d1db44d1b4" kindref="member">precede</ref>(my_subflow);</highlight></codeline>
|
|
<codeline><highlight class="normal">});</highlight></codeline>
|
|
</programlisting> </para>
|
|
</detaileddescription>
|
|
<inbodydescription>
|
|
</inbodydescription>
|
|
<location file="taskflow/cuda/cudaflow.hpp" line="572" column="14" bodyfile="taskflow/cuda/cudaflow.hpp" bodystart="965" bodyend="997"/>
|
|
</memberdef>
|
|
<memberdef kind="function" id="classtf_1_1cudaFlow_1aa0f182dc0fa99bcc9118311925fddca5" prot="public" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
|
|
<templateparamlist>
|
|
<param>
|
|
<type>typename C</type>
|
|
</param>
|
|
</templateparamlist>
|
|
<type>void</type>
|
|
<definition>void tf::cudaFlow::capture</definition>
|
|
<argsstring>(cudaTask task, C callable)</argsstring>
|
|
<name>capture</name>
|
|
<param>
|
|
<type><ref refid="classtf_1_1cudaTask" kindref="compound">cudaTask</ref></type>
|
|
<declname>task</declname>
|
|
</param>
|
|
<param>
|
|
<type>C</type>
|
|
<declname>callable</declname>
|
|
</param>
|
|
<briefdescription>
|
|
<para>updates the captured child graph </para>
|
|
</briefdescription>
|
|
<detaileddescription>
|
|
<para>The method is similar to <ref refid="classtf_1_1cudaFlow_1a89c389fff64a16e5dd8c60875d3b514d" kindref="member">tf::cudaFlow::capture</ref> but operates on a task of type <ref refid="namespacetf_1afebc56ae6d5765010d0dd13a5f04132ea46be697979903d784a70aeec45eb14ad" kindref="member">tf::cudaTaskType::SUBFLOW</ref>. The new captured graph must be topologically identical to the original captured graph. </para>
|
|
</detaileddescription>
|
|
<inbodydescription>
|
|
</inbodydescription>
|
|
<location file="taskflow/cuda/cudaflow.hpp" line="583" column="10" bodyfile="taskflow/cuda/cudaflow.hpp" bodystart="931" bodyend="957"/>
|
|
</memberdef>
|
|
</sectiondef>
|
|
<briefdescription>
|
|
<para>class to create a cudaFlow task dependency graph </para>
|
|
</briefdescription>
|
|
<detaileddescription>
|
|
<para>A cudaFlow is a high-level interface over CUDA <ref refid="classtf_1_1Graph" kindref="compound">Graph</ref> to perform GPU operations using the task dependency graph model. The class provides a set of methods for creating and launch different tasks on one or multiple CUDA devices, for instance, kernel tasks, data transfer tasks, and memory operation tasks. The following example creates a cudaFlow of two kernel tasks, <computeroutput>task1</computeroutput> and <computeroutput>task2</computeroutput>, where <computeroutput>task1</computeroutput> runs before <computeroutput>task2</computeroutput>.</para>
|
|
<para><programlisting filename=".cpp"><codeline><highlight class="normal"><ref refid="classtf_1_1Taskflow" kindref="compound">tf::Taskflow</ref><sp/>taskflow;</highlight></codeline>
|
|
<codeline><highlight class="normal"><ref refid="classtf_1_1Executor" kindref="compound">tf::Executor</ref><sp/>executor;</highlight></codeline>
|
|
<codeline><highlight class="normal"></highlight></codeline>
|
|
<codeline><highlight class="normal">taskflow.<ref refid="classtf_1_1FlowBuilder_1a60d7a666cab71ecfa3010b2efb0d6b57" kindref="member">emplace</ref>([&](<ref refid="classtf_1_1cudaFlow" kindref="compound">tf::cudaFlow</ref>&<sp/>cf){</highlight></codeline>
|
|
<codeline><highlight class="normal"><sp/><sp/></highlight><highlight class="comment">//<sp/>create<sp/>two<sp/>kernel<sp/>tasks</highlight><highlight class="normal"></highlight></codeline>
|
|
<codeline><highlight class="normal"><sp/><sp/><ref refid="classtf_1_1cudaTask" kindref="compound">tf::cudaTask</ref><sp/>task1<sp/>=<sp/>cf.<ref refid="classtf_1_1cudaFlow_1a68f666503d13a7b80fb7399fb2f0c153" kindref="member">kernel</ref>(grid1,<sp/>block1,<sp/>shm_size1,<sp/>kernel1,<sp/>args1);</highlight></codeline>
|
|
<codeline><highlight class="normal"><sp/><sp/><ref refid="classtf_1_1cudaTask" kindref="compound">tf::cudaTask</ref><sp/>task2<sp/>=<sp/>cf.<ref refid="classtf_1_1cudaFlow_1a68f666503d13a7b80fb7399fb2f0c153" kindref="member">kernel</ref>(grid2,<sp/>block2,<sp/>shm_size2,<sp/>kernel2,<sp/>args2);</highlight></codeline>
|
|
<codeline><highlight class="normal"></highlight></codeline>
|
|
<codeline><highlight class="normal"><sp/><sp/></highlight><highlight class="comment">//<sp/>kernel1<sp/>runs<sp/>before<sp/>kernel2</highlight><highlight class="normal"></highlight></codeline>
|
|
<codeline><highlight class="normal"><sp/><sp/>task1.<ref refid="classtf_1_1cudaTask_1abdd68287ec4dff4216af34d1db44d1b4" kindref="member">precede</ref>(task2);</highlight></codeline>
|
|
<codeline><highlight class="normal">});</highlight></codeline>
|
|
<codeline><highlight class="normal"></highlight></codeline>
|
|
<codeline><highlight class="normal">executor.<ref refid="classtf_1_1Executor_1a8d08f0cb79e7b3780087975d13368a96" kindref="member">run</ref>(taskflow).wait();</highlight></codeline>
|
|
</programlisting></para>
|
|
<para>A cudaFlow is a task (<ref refid="classtf_1_1Task" kindref="compound">tf::Task</ref>) created from <ref refid="classtf_1_1Taskflow" kindref="compound">tf::Taskflow</ref> and will be run by <emphasis>one</emphasis> worker thread in the executor. That is, the callable that describes a cudaFlow will be executed sequentially. Inside a cudaFlow task, different GPU tasks (<ref refid="classtf_1_1cudaTask" kindref="compound">tf::cudaTask</ref>) may run in parallel scheduled by the CUDA runtime.</para>
|
|
<para>Please refer to <ref refid="GPUTaskingcudaFlow" kindref="compound">GPU Tasking (cudaFlow)</ref> for details. </para>
|
|
</detaileddescription>
|
|
<location file="taskflow/cuda/cudaflow.hpp" line="56" column="1" bodyfile="taskflow/cuda/cudaflow.hpp" bodystart="56" bodyend="589"/>
|
|
<listofallmembers>
|
|
<member refid="classtf_1_1cudaFlow_1a31c7ba8b8053d6ad95c7da5e5a9494f2" prot="private" virt="non-virtual"><scope>tf::cudaFlow</scope><name>_cfg</name></member>
|
|
<member refid="classtf_1_1cudaFlow_1a8e3255897f01f87dcd9ca506c314a125" prot="private" virt="non-virtual"><scope>tf::cudaFlow</scope><name>_exe</name></member>
|
|
<member refid="classtf_1_1cudaFlow_1a89c389fff64a16e5dd8c60875d3b514d" prot="public" virt="non-virtual"><scope>tf::cudaFlow</scope><name>capture</name></member>
|
|
<member refid="classtf_1_1cudaFlow_1aa0f182dc0fa99bcc9118311925fddca5" prot="public" virt="non-virtual"><scope>tf::cudaFlow</scope><name>capture</name></member>
|
|
<member refid="classtf_1_1cudaFlow_1aad726dfe21e9719d96c65530a56d9951" prot="public" virt="non-virtual"><scope>tf::cudaFlow</scope><name>clear</name></member>
|
|
<member refid="classtf_1_1cudaFlow_1af03e04771b655f9e629eb4c22e19b19f" prot="public" virt="non-virtual"><scope>tf::cudaFlow</scope><name>copy</name></member>
|
|
<member refid="classtf_1_1cudaFlow_1a6cf6ec1e85172fa99c16bf0beffc0562" prot="public" virt="non-virtual"><scope>tf::cudaFlow</scope><name>copy</name></member>
|
|
<member refid="classtf_1_1cudaFlow_1ad4c3e001db151486c8479151a2108d37" prot="public" virt="non-virtual"><scope>tf::cudaFlow</scope><name>cudaFlow</name></member>
|
|
<member refid="classtf_1_1cudaFlow_1a677a4b510abee2ac665193389b20f725" prot="public" virt="non-virtual"><scope>tf::cudaFlow</scope><name>cudaFlow</name></member>
|
|
<member refid="classtf_1_1cudaFlow_1a7f97b68fa7c889db49b26aa71a46a7cf" prot="public" virt="non-virtual"><scope>tf::cudaFlow</scope><name>dump</name></member>
|
|
<member refid="classtf_1_1cudaFlow_1a43507f21eb9cb77667ffe0ac7e6ae635" prot="public" virt="non-virtual"><scope>tf::cudaFlow</scope><name>dump_native_graph</name></member>
|
|
<member refid="classtf_1_1cudaFlow_1a1926f45a038d8faa9c1b1ee43fd29a93" prot="public" virt="non-virtual"><scope>tf::cudaFlow</scope><name>empty</name></member>
|
|
<member refid="classtf_1_1cudaFlow_1a21d4447bc834f4d3e1bb4772c850d090" prot="public" virt="non-virtual"><scope>tf::cudaFlow</scope><name>fill</name></member>
|
|
<member refid="classtf_1_1cudaFlow_1a39ed97c9142959c73d4c25c34d71bd5e" prot="public" virt="non-virtual"><scope>tf::cudaFlow</scope><name>fill</name></member>
|
|
<member refid="classtf_1_1cudaFlow_1a1a681f6223853b6445dcfdad07e4d0fd" prot="public" virt="non-virtual"><scope>tf::cudaFlow</scope><name>for_each</name></member>
|
|
<member refid="classtf_1_1cudaFlow_1af9cc7ee16602754929bb9118a9d7f0b2" prot="public" virt="non-virtual"><scope>tf::cudaFlow</scope><name>for_each</name></member>
|
|
<member refid="classtf_1_1cudaFlow_1a34f1ea89e5651faa6e8af522a42556ac" prot="public" virt="non-virtual"><scope>tf::cudaFlow</scope><name>for_each_index</name></member>
|
|
<member refid="classtf_1_1cudaFlow_1a3fa7f8e38b4da1fe0cbcfb265f9349a2" prot="public" virt="non-virtual"><scope>tf::cudaFlow</scope><name>for_each_index</name></member>
|
|
<member refid="classtf_1_1cudaFlow_1a060e1c96111c2134ce0f896420a42cd0" prot="public" virt="non-virtual"><scope>tf::cudaFlow</scope><name>host</name></member>
|
|
<member refid="classtf_1_1cudaFlow_1a02e4e5cf7d03b9d087d6fbf54eb86bbf" prot="public" virt="non-virtual"><scope>tf::cudaFlow</scope><name>host</name></member>
|
|
<member refid="classtf_1_1cudaFlow_1a68f666503d13a7b80fb7399fb2f0c153" prot="public" virt="non-virtual"><scope>tf::cudaFlow</scope><name>kernel</name></member>
|
|
<member refid="classtf_1_1cudaFlow_1a821117dd640807bb7ec114b46888dfb1" prot="public" virt="non-virtual"><scope>tf::cudaFlow</scope><name>kernel</name></member>
|
|
<member refid="classtf_1_1cudaFlow_1ad37637606f0643f360e9eda1f9a6e559" prot="public" virt="non-virtual"><scope>tf::cudaFlow</scope><name>memcpy</name></member>
|
|
<member refid="classtf_1_1cudaFlow_1acf9e6cfa65cbfcd1d33c88e64b487ce6" prot="public" virt="non-virtual"><scope>tf::cudaFlow</scope><name>memcpy</name></member>
|
|
<member refid="classtf_1_1cudaFlow_1a079ca65da35301e5aafd45878a19e9d2" prot="public" virt="non-virtual"><scope>tf::cudaFlow</scope><name>memset</name></member>
|
|
<member refid="classtf_1_1cudaFlow_1a082505f0fec89f65808421cdc737fb17" prot="public" virt="non-virtual"><scope>tf::cudaFlow</scope><name>memset</name></member>
|
|
<member refid="classtf_1_1cudaFlow_1a5bfdaf621ab617ab5f0ca63466570256" prot="public" virt="non-virtual"><scope>tf::cudaFlow</scope><name>native_executable</name></member>
|
|
<member refid="classtf_1_1cudaFlow_1acfbee67cff7dc7c6297c20c64f2e015c" prot="public" virt="non-virtual"><scope>tf::cudaFlow</scope><name>native_graph</name></member>
|
|
<member refid="classtf_1_1cudaFlow_1a30b2e107cb2c90a37f467b28d1b42a74" prot="public" virt="non-virtual"><scope>tf::cudaFlow</scope><name>noop</name></member>
|
|
<member refid="classtf_1_1cudaFlow_1ae6560c27d249af7e4b8b921388f5e1e2" prot="public" virt="non-virtual"><scope>tf::cudaFlow</scope><name>num_tasks</name></member>
|
|
<member refid="classtf_1_1cudaFlow_1a74beef874538193ac0df81a180faa742" prot="public" virt="non-virtual"><scope>tf::cudaFlow</scope><name>operator=</name></member>
|
|
<member refid="classtf_1_1cudaFlow_1ae6810f7de27e5a347331aacfce67bea1" prot="public" virt="non-virtual"><scope>tf::cudaFlow</scope><name>run</name></member>
|
|
<member refid="classtf_1_1cudaFlow_1ac2906cb0002fc411a983d100a3d58d62" prot="public" virt="non-virtual"><scope>tf::cudaFlow</scope><name>single_task</name></member>
|
|
<member refid="classtf_1_1cudaFlow_1add2d364f38c72322d8e36bc0da0b98e4" prot="public" virt="non-virtual"><scope>tf::cudaFlow</scope><name>single_task</name></member>
|
|
<member refid="classtf_1_1cudaFlow_1af89a9bda182272462a0eda2581536cd8" prot="public" virt="non-virtual"><scope>tf::cudaFlow</scope><name>transform</name></member>
|
|
<member refid="classtf_1_1cudaFlow_1a4a211b1f8562e10f9aae8b44fd6acdec" prot="public" virt="non-virtual"><scope>tf::cudaFlow</scope><name>transform</name></member>
|
|
<member refid="classtf_1_1cudaFlow_1abab2bfdfc86ef3a764ece4743fdede76" prot="public" virt="non-virtual"><scope>tf::cudaFlow</scope><name>transform</name></member>
|
|
<member refid="classtf_1_1cudaFlow_1a7c6ca7be2b6908e8f71570c54303ba9e" prot="public" virt="non-virtual"><scope>tf::cudaFlow</scope><name>transform</name></member>
|
|
<member refid="classtf_1_1cudaFlow_1a40172fac4464f6d805f75921ea3c2a3b" prot="public" virt="non-virtual"><scope>tf::cudaFlow</scope><name>zero</name></member>
|
|
<member refid="classtf_1_1cudaFlow_1a78c2a73243809e3cbd1955cc1ffe6477" prot="public" virt="non-virtual"><scope>tf::cudaFlow</scope><name>zero</name></member>
|
|
<member refid="classtf_1_1cudaFlow_1a828c3ab275521672e4ec6c78d3a9ee62" prot="public" virt="non-virtual"><scope>tf::cudaFlow</scope><name>~cudaFlow</name></member>
|
|
</listofallmembers>
|
|
</compounddef>
|
|
</doxygen>
|