189 lines
12 KiB
XML
189 lines
12 KiB
XML
<?xml version='1.0' encoding='UTF-8' standalone='no'?>
|
|
<doxygen xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="compound.xsd" version="1.9.1" xml:lang="en-US">
|
|
<compounddef id="classtf_1_1cudaFlowRoundRobinOptimizer" kind="class" language="C++" prot="public">
|
|
<compoundname>tf::cudaFlowRoundRobinOptimizer</compoundname>
|
|
<basecompoundref prot="public" virt="non-virtual">tf::cudaFlowOptimizerBase</basecompoundref>
|
|
<includes refid="cuda__optimizer_8hpp" local="no">taskflow/cuda/cuda_optimizer.hpp</includes>
|
|
<sectiondef kind="friend">
|
|
<memberdef kind="friend" id="classtf_1_1cudaFlowRoundRobinOptimizer_1a672b45d300c57d726c203c62f950efbd" prot="private" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
|
|
<type>class</type>
|
|
<definition>friend class cudaFlowCapturer</definition>
|
|
<argsstring></argsstring>
|
|
<name>cudaFlowCapturer</name>
|
|
<param>
|
|
<type><ref refid="classtf_1_1cudaFlowCapturer" kindref="compound">cudaFlowCapturer</ref></type>
|
|
</param>
|
|
<briefdescription>
|
|
</briefdescription>
|
|
<detaileddescription>
|
|
</detaileddescription>
|
|
<inbodydescription>
|
|
</inbodydescription>
|
|
<location file="taskflow/cuda/cuda_optimizer.hpp" line="245" column="3" bodyfile="taskflow/cuda/cuda_optimizer.hpp" bodystart="245" bodyend="-1"/>
|
|
</memberdef>
|
|
</sectiondef>
|
|
<sectiondef kind="private-attrib">
|
|
<memberdef kind="variable" id="classtf_1_1cudaFlowRoundRobinOptimizer_1a1354083daa786bad9df520f4ddb03511" prot="private" static="no" mutable="no">
|
|
<type>size_t</type>
|
|
<definition>size_t tf::cudaFlowRoundRobinOptimizer::_num_streams</definition>
|
|
<argsstring></argsstring>
|
|
<name>_num_streams</name>
|
|
<initializer>{4}</initializer>
|
|
<briefdescription>
|
|
</briefdescription>
|
|
<detaileddescription>
|
|
</detaileddescription>
|
|
<inbodydescription>
|
|
</inbodydescription>
|
|
<location file="taskflow/cuda/cuda_optimizer.hpp" line="271" column="12" bodyfile="taskflow/cuda/cuda_optimizer.hpp" bodystart="271" bodyend="-1"/>
|
|
</memberdef>
|
|
</sectiondef>
|
|
<sectiondef kind="public-func">
|
|
<memberdef kind="function" id="classtf_1_1cudaFlowRoundRobinOptimizer_1aef646675174ffcab6135fbfb7f0eecfe" prot="public" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
|
|
<type></type>
|
|
<definition>tf::cudaFlowRoundRobinOptimizer::cudaFlowRoundRobinOptimizer</definition>
|
|
<argsstring>()=default</argsstring>
|
|
<name>cudaFlowRoundRobinOptimizer</name>
|
|
<briefdescription>
|
|
<para>constructs a round-robin optimizer with 4 streams by default </para>
|
|
</briefdescription>
|
|
<detaileddescription>
|
|
</detaileddescription>
|
|
<inbodydescription>
|
|
</inbodydescription>
|
|
<location file="taskflow/cuda/cuda_optimizer.hpp" line="252" column="5"/>
|
|
</memberdef>
|
|
<memberdef kind="function" id="classtf_1_1cudaFlowRoundRobinOptimizer_1ab293c8613773baf87ff740d2cec14149" prot="public" static="no" const="no" explicit="yes" inline="yes" virt="non-virtual">
|
|
<type></type>
|
|
<definition>tf::cudaFlowRoundRobinOptimizer::cudaFlowRoundRobinOptimizer</definition>
|
|
<argsstring>(size_t num_streams)</argsstring>
|
|
<name>cudaFlowRoundRobinOptimizer</name>
|
|
<param>
|
|
<type>size_t</type>
|
|
<declname>num_streams</declname>
|
|
</param>
|
|
<briefdescription>
|
|
<para>constructs a round-robin optimizer with the given number of streams </para>
|
|
</briefdescription>
|
|
<detaileddescription>
|
|
</detaileddescription>
|
|
<inbodydescription>
|
|
</inbodydescription>
|
|
<location file="taskflow/cuda/cuda_optimizer.hpp" line="257" column="14" bodyfile="taskflow/cuda/cuda_optimizer.hpp" bodystart="280" bodyend="286"/>
|
|
</memberdef>
|
|
<memberdef kind="function" id="classtf_1_1cudaFlowRoundRobinOptimizer_1a22fb9667ce393c31d908c3cc4f0ba650" prot="public" static="no" const="yes" explicit="no" inline="yes" virt="non-virtual">
|
|
<type>size_t</type>
|
|
<definition>size_t tf::cudaFlowRoundRobinOptimizer::num_streams</definition>
|
|
<argsstring>() const</argsstring>
|
|
<name>num_streams</name>
|
|
<briefdescription>
|
|
<para>queries the number of streams used by the optimizer </para>
|
|
</briefdescription>
|
|
<detaileddescription>
|
|
</detaileddescription>
|
|
<inbodydescription>
|
|
</inbodydescription>
|
|
<location file="taskflow/cuda/cuda_optimizer.hpp" line="262" column="12" bodyfile="taskflow/cuda/cuda_optimizer.hpp" bodystart="289" bodyend="291"/>
|
|
</memberdef>
|
|
<memberdef kind="function" id="classtf_1_1cudaFlowRoundRobinOptimizer_1acbd190f22ecc606a8b888953649a5be6" prot="public" static="no" const="no" explicit="no" inline="yes" virt="non-virtual">
|
|
<type>void</type>
|
|
<definition>void tf::cudaFlowRoundRobinOptimizer::num_streams</definition>
|
|
<argsstring>(size_t n)</argsstring>
|
|
<name>num_streams</name>
|
|
<param>
|
|
<type>size_t</type>
|
|
<declname>n</declname>
|
|
</param>
|
|
<briefdescription>
|
|
<para>sets the number of streams used by the optimizer </para>
|
|
</briefdescription>
|
|
<detaileddescription>
|
|
</detaileddescription>
|
|
<inbodydescription>
|
|
</inbodydescription>
|
|
<location file="taskflow/cuda/cuda_optimizer.hpp" line="267" column="10" bodyfile="taskflow/cuda/cuda_optimizer.hpp" bodystart="294" bodyend="299"/>
|
|
</memberdef>
|
|
</sectiondef>
|
|
<sectiondef kind="private-func">
|
|
<memberdef kind="function" id="classtf_1_1cudaFlowRoundRobinOptimizer_1ad612d3b6c169a65eebcf300eaca358aa" prot="private" static="no" const="no" explicit="no" inline="yes" virt="non-virtual">
|
|
<type>cudaGraph_t</type>
|
|
<definition>cudaGraph_t tf::cudaFlowRoundRobinOptimizer::_optimize</definition>
|
|
<argsstring>(cudaFlowGraph &graph)</argsstring>
|
|
<name>_optimize</name>
|
|
<param>
|
|
<type>cudaFlowGraph &</type>
|
|
<declname>graph</declname>
|
|
</param>
|
|
<briefdescription>
|
|
</briefdescription>
|
|
<detaileddescription>
|
|
</detaileddescription>
|
|
<inbodydescription>
|
|
</inbodydescription>
|
|
<location file="taskflow/cuda/cuda_optimizer.hpp" line="273" column="17" bodyfile="taskflow/cuda/cuda_optimizer.hpp" bodystart="318" bodyend="400"/>
|
|
</memberdef>
|
|
<memberdef kind="function" id="classtf_1_1cudaFlowRoundRobinOptimizer_1afd0f87fbc9131efbdb9e92bb834aeb47" prot="private" static="no" const="no" explicit="no" inline="yes" virt="non-virtual">
|
|
<type>void</type>
|
|
<definition>void tf::cudaFlowRoundRobinOptimizer::_reset</definition>
|
|
<argsstring>(std::vector< std::vector< cudaFlowNode * >> &graph)</argsstring>
|
|
<name>_reset</name>
|
|
<param>
|
|
<type><ref refid="cpp/container/vector" kindref="compound" external="/home/thuang295/Code/taskflow/doxygen/cppreference-doxygen-web.tag.xml">std::vector</ref>< <ref refid="cpp/container/vector" kindref="compound" external="/home/thuang295/Code/taskflow/doxygen/cppreference-doxygen-web.tag.xml">std::vector</ref>< cudaFlowNode * >> &</type>
|
|
<declname>graph</declname>
|
|
</param>
|
|
<briefdescription>
|
|
</briefdescription>
|
|
<detaileddescription>
|
|
</detaileddescription>
|
|
<inbodydescription>
|
|
</inbodydescription>
|
|
<location file="taskflow/cuda/cuda_optimizer.hpp" line="275" column="10" bodyfile="taskflow/cuda/cuda_optimizer.hpp" bodystart="301" bodyend="315"/>
|
|
</memberdef>
|
|
</sectiondef>
|
|
<briefdescription>
|
|
<para>class to capture a CUDA graph using a round-robin algorithm </para>
|
|
</briefdescription>
|
|
<detaileddescription>
|
|
<para>A round-robin capturing algorithm levelizes the user-described graph and assign streams to nodes in a round-robin order level by level. The algorithm is based on the following paper published in Euro-Par 2021:<itemizedlist>
|
|
<listitem><para>Dian-Lun Lin and Tsung-Wei Huang, "Efficient GPU Computation using Task <ref refid="classtf_1_1Graph" kindref="compound">Graph</ref> Parallelism," <emphasis>European Conference on Parallel and Distributed Computing (Euro-Par)</emphasis>, 2021</para>
|
|
</listitem></itemizedlist>
|
|
</para>
|
|
<para>The round-robin optimization algorithm is best suited for large cudaFlow graphs that compose hundreds of or thousands of GPU operations (e.g., kernels and memory copies) with many of them being able to run in parallel. You can configure the number of streams to the optimizer to adjust the maximum kernel currency in the captured CUDA graph. </para>
|
|
</detaileddescription>
|
|
<inheritancegraph>
|
|
<node id="1">
|
|
<label>tf::cudaFlowRoundRobinOptimizer</label>
|
|
<link refid="classtf_1_1cudaFlowRoundRobinOptimizer"/>
|
|
<childnode refid="2" relation="public-inheritance">
|
|
</childnode>
|
|
</node>
|
|
<node id="2">
|
|
<label>tf::cudaFlowOptimizerBase</label>
|
|
</node>
|
|
</inheritancegraph>
|
|
<collaborationgraph>
|
|
<node id="1">
|
|
<label>tf::cudaFlowRoundRobinOptimizer</label>
|
|
<link refid="classtf_1_1cudaFlowRoundRobinOptimizer"/>
|
|
<childnode refid="2" relation="public-inheritance">
|
|
</childnode>
|
|
</node>
|
|
<node id="2">
|
|
<label>tf::cudaFlowOptimizerBase</label>
|
|
</node>
|
|
</collaborationgraph>
|
|
<location file="taskflow/cuda/cuda_optimizer.hpp" line="243" column="1" bodyfile="taskflow/cuda/cuda_optimizer.hpp" bodystart="243" bodyend="277"/>
|
|
<listofallmembers>
|
|
<member refid="classtf_1_1cudaFlowOptimizerBase_1ae20d9b88a98439f8d8ee5f6280b15744" prot="protected" virt="non-virtual"><scope>tf::cudaFlowRoundRobinOptimizer</scope><name>_levelize</name></member>
|
|
<member refid="classtf_1_1cudaFlowRoundRobinOptimizer_1a1354083daa786bad9df520f4ddb03511" prot="private" virt="non-virtual"><scope>tf::cudaFlowRoundRobinOptimizer</scope><name>_num_streams</name></member>
|
|
<member refid="classtf_1_1cudaFlowRoundRobinOptimizer_1ad612d3b6c169a65eebcf300eaca358aa" prot="private" virt="non-virtual"><scope>tf::cudaFlowRoundRobinOptimizer</scope><name>_optimize</name></member>
|
|
<member refid="classtf_1_1cudaFlowRoundRobinOptimizer_1afd0f87fbc9131efbdb9e92bb834aeb47" prot="private" virt="non-virtual"><scope>tf::cudaFlowRoundRobinOptimizer</scope><name>_reset</name></member>
|
|
<member refid="classtf_1_1cudaFlowOptimizerBase_1a25bb1274b6ab2279e261690a5fe46007" prot="protected" virt="non-virtual"><scope>tf::cudaFlowRoundRobinOptimizer</scope><name>_toposort</name></member>
|
|
<member refid="classtf_1_1cudaFlowRoundRobinOptimizer_1a672b45d300c57d726c203c62f950efbd" prot="private" virt="non-virtual"><scope>tf::cudaFlowRoundRobinOptimizer</scope><name>cudaFlowCapturer</name></member>
|
|
<member refid="classtf_1_1cudaFlowRoundRobinOptimizer_1aef646675174ffcab6135fbfb7f0eecfe" prot="public" virt="non-virtual"><scope>tf::cudaFlowRoundRobinOptimizer</scope><name>cudaFlowRoundRobinOptimizer</name></member>
|
|
<member refid="classtf_1_1cudaFlowRoundRobinOptimizer_1ab293c8613773baf87ff740d2cec14149" prot="public" virt="non-virtual"><scope>tf::cudaFlowRoundRobinOptimizer</scope><name>cudaFlowRoundRobinOptimizer</name></member>
|
|
<member refid="classtf_1_1cudaFlowRoundRobinOptimizer_1a22fb9667ce393c31d908c3cc4f0ba650" prot="public" virt="non-virtual"><scope>tf::cudaFlowRoundRobinOptimizer</scope><name>num_streams</name></member>
|
|
<member refid="classtf_1_1cudaFlowRoundRobinOptimizer_1acbd190f22ecc606a8b888953649a5be6" prot="public" virt="non-virtual"><scope>tf::cudaFlowRoundRobinOptimizer</scope><name>num_streams</name></member>
|
|
</listofallmembers>
|
|
</compounddef>
|
|
</doxygen>
|