449 lines
14 KiB
Text
449 lines
14 KiB
Text
namespace tf {
|
|
|
|
/** @page ExecuteTaskflow %Executor
|
|
|
|
After you create a task dependency graph, you need to submit it to threads for execution.
|
|
In this chapter, we will show you how to execute a task dependency graph.
|
|
|
|
@tableofcontents
|
|
|
|
@section CreateAnExecutor Create an Executor
|
|
|
|
To execute a taskflow, you need to create an @em executor of type tf::Executor.
|
|
An executor is a @em thread-safe object that
|
|
manages a set of worker threads and executes tasks
|
|
through an efficient @em work-stealing algorithm.
|
|
Issuing a call to run a taskflow creates a @em topology,
|
|
a data structure to keep track of the execution status of a running graph.
|
|
tf::Executor takes an unsigned integer to construct with @c N worker threads.
|
|
The default value is std::thread::hardware_concurrency.
|
|
|
|
@code{.cpp}
|
|
tf::Executor executor1; // create an executor with the number of workers
|
|
// equal to std::thread::hardware_concurrency
|
|
tf::Executor executor2(4); // create an executor of 4 worker threads
|
|
@endcode
|
|
|
|
An executor can be reused to execute multiple taskflows.
|
|
In most workloads, you may need only one executor to run multiple taskflows
|
|
where each taskflow represents a part of a parallel decomposition.
|
|
|
|
@section ExecuteATaskflow Execute a Taskflow
|
|
|
|
tf::Executor provides a set of @c run\_* methods,
|
|
tf::Executor::run,
|
|
tf::Executor::run_n, and tf::Executor::run_until to run a taskflow
|
|
for one time, multiple times, or until a given predicate evaluates to true.
|
|
All methods accept an optional callback to invoke after the execution completes,
|
|
and return a tf::Future for users to access the execution status.
|
|
The code below shows several ways to run a taskflow.
|
|
|
|
@code{.cpp}
|
|
1: // Declare an executor and a taskflow
|
|
2: tf::Executor executor;
|
|
3: tf::Taskflow taskflow;
|
|
4:
|
|
5: // Add three tasks into the taskflow
|
|
6: tf::Task A = taskflow.emplace([] () { std::cout << "This is TaskA\n"; });
|
|
7: tf::Task B = taskflow.emplace([] () { std::cout << "This is TaskB\n"; });
|
|
8: tf::Task C = taskflow.emplace([] () { std::cout << "This is TaskC\n"; });
|
|
9:
|
|
10: // Build precedence between tasks
|
|
11: A.precede(B, C);
|
|
12:
|
|
13: tf::Future<void> fu = executor.run(taskflow);
|
|
14: fu.wait(); // block until the execution completes
|
|
15:
|
|
16: executor.run(taskflow, [](){ std::cout << "end of 1 run"; }).wait();
|
|
17: executor.run_n(taskflow, 4);
|
|
18: executor.wait_for_all(); // block until all associated executions finish
|
|
19: executor.run_n(taskflow, 4, [](){ std::cout << "end of 4 runs"; }).wait();
|
|
20: executor.run_until(taskflow, [cnt=0] () mutable { return ++cnt == 10; });
|
|
@endcode
|
|
|
|
|
|
Debrief:
|
|
|
|
@li Lines 6-8 create a taskflow of three tasks A, B, and C
|
|
@li Lines 13-14 run the taskflow once and wait for completion
|
|
@li Line 16 runs the taskflow once with a callback to invoke when the execution finishes
|
|
@li Lines 17-18 run the taskflow four times and use tf::Executor::wait_for_all to wait for completion
|
|
@li Line 19 runs the taskflow four times and invokes a callback at the end of the fourth execution
|
|
@li Line 20 keeps running the taskflow until the predicate returns true
|
|
|
|
Issuing multiple runs on the same taskflow will automatically @em synchronize
|
|
to a sequential chain of executions in the order of run calls.
|
|
|
|
@code{.cpp}
|
|
executor.run(taskflow); // execution 1
|
|
executor.run_n(taskflow, 10); // execution 2
|
|
executor.run(taskflow); // execution 3
|
|
executor.wait_for_all(); // execution 1 -> execution 2 -> execution 3
|
|
@endcode
|
|
|
|
@attention
|
|
A running taskflow must remain alive during its execution.
|
|
It is your responsibility to ensure a taskflow not being destructed
|
|
when it is running.
|
|
For example, the code below can result undefined behavior.
|
|
|
|
@code{.cpp}
|
|
tf::Executor executor; // create an executor
|
|
|
|
// create a taskflow whose lifetime is restricted by the scope
|
|
{
|
|
tf::Taskflow taskflow;
|
|
|
|
// add tasks to the taskflow
|
|
// ...
|
|
|
|
// run the taskflow
|
|
executor.run(taskflow);
|
|
|
|
} // leaving the scope will destroy taskflow while it is running,
|
|
// resulting in undefined behavior
|
|
@endcode
|
|
|
|
Similarly, you should avoid touching a taskflow while it is running.
|
|
|
|
@code{.cpp}
|
|
tf::Taskflow taskflow;
|
|
|
|
// Add tasks into the taskflow
|
|
// ...
|
|
|
|
// Declare an executor
|
|
tf::Executor executor;
|
|
|
|
tf::Future<void> future = executor.run(taskflow); // non-blocking return
|
|
|
|
// alter the taskflow while running leads to undefined behavior
|
|
taskflow.emplace([](){ std::cout << "Add a new task\n"; });
|
|
@endcode
|
|
|
|
You must always keep a taskflow alive and must not modify it while
|
|
it is running on an executor.
|
|
|
|
@section ExecuteATaskflowWithTransferredOwnership Execute a Taskflow with Transferred Ownership
|
|
|
|
You can transfer the ownership of a taskflow to an executor and run it without
|
|
wrangling with the lifetime issue of that taskflow.
|
|
Each @c run\_* method discussed in the previous section comes with an overload
|
|
that takes a @em moved taskflow object.
|
|
|
|
@code{.cpp}
|
|
tf::Taskflow taskflow;
|
|
tf::Executor executor;
|
|
|
|
taskflow.emplace([](){});
|
|
|
|
// let the executor manage the lifetime of the submitted taskflow
|
|
executor.run(std::move(taskflow));
|
|
|
|
// now taskflow has no tasks
|
|
assert(taskflow.num_tasks() == 0);
|
|
@endcode
|
|
|
|
However, you should avoid moving a @em running taskflow which can result in
|
|
undefined behavior.
|
|
|
|
@code{.cpp}
|
|
tf::Taskflow taskflow;
|
|
tf::Executor executor;
|
|
|
|
taskflow.emplace([](){});
|
|
|
|
// executor does not manage the lifetime of taskflow
|
|
executor.run(taskflow);
|
|
|
|
// error! you cannot move a taskflow while it is running
|
|
executor.run(std::move(taskflow));
|
|
@endcode
|
|
|
|
The correct way to submit a taskflow with moved ownership to an executor is
|
|
to ensure all previous runs have completed.
|
|
The executor will automatically release the resources of a moved taskflow
|
|
right @em after its execution completes.
|
|
|
|
@code{.cpp}
|
|
// submit the taskflow and wait until it completes
|
|
executor.run(taskflow).wait();
|
|
|
|
// now it's safe to move the taskflow to the executor and run it
|
|
executor.run(std::move(taskflow));
|
|
@endcode
|
|
|
|
Likewise, you cannot move a taskflow that is running on an executor.
|
|
You must wait until all the previous fires of runs on that taskflow complete
|
|
before calling move.
|
|
|
|
@code{.cpp}
|
|
// submit the taskflow and wait until it completes
|
|
executor.run(taskflow).wait();
|
|
|
|
// now it's safe to move the taskflow to another
|
|
tf::Taskflow moved_taskflow(std::move(taskflow));
|
|
@endcode
|
|
|
|
@section ExecuteATaskflowFromAnInternalWorker Execute a Taskflow from an Internal Worker
|
|
|
|
Each run variant of tf::Executor returns a tf::Future object which allows you
|
|
to wait for the result to complete.
|
|
When calling `tf::Future::wait`, the caller blocks without doing anything until
|
|
the associated state is written to be ready.
|
|
This design, however, can introduce deadlock problem especially when you need to run
|
|
multiple taskflows from the internal workers of an executor.
|
|
For example, the code below creates a taskflow of 1000 tasks
|
|
with each task running a taskflow of 500 tasks in a blocking fashion:
|
|
|
|
@code{.cpp}
|
|
tf::Executor executor(2);
|
|
tf::Taskflow taskflow;
|
|
std::array<tf::Taskflow, 1000> others;
|
|
|
|
std::atomic<size_t> counter{0};
|
|
|
|
for(size_t n=0; n<1000; n++) {
|
|
for(size_t i=0; i<500; i++) {
|
|
others[n].emplace([&](){ counter++; });
|
|
}
|
|
taskflow.emplace([&executor, &tf=others[n]](){
|
|
// blocking the worker can introduce deadlock where
|
|
// all workers are waiting for their taskflows to finish
|
|
executor.run(tf).wait();
|
|
});
|
|
}
|
|
executor.run(taskflow).wait();
|
|
@endcode
|
|
|
|
|
|
To avoid this problem,
|
|
the executor has a method, tf::Executor::corun, to execute
|
|
a taskflow from a worker of that executor.
|
|
The worker will not block but co-run the taskflow with other tasks
|
|
in its work-stealing loop.
|
|
|
|
@code{.cpp}
|
|
tf::Executor executor(2);
|
|
tf::Taskflow taskflow;
|
|
std::array<tf::Taskflow, 1000> others;
|
|
|
|
std::atomic<size_t> counter{0};
|
|
|
|
for(size_t n=0; n<1000; n++) {
|
|
for(size_t i=0; i<500; i++) {
|
|
others[n].emplace([&](){ counter++; });
|
|
}
|
|
taskflow.emplace([&executor, &tf=others[n]](){
|
|
// the caller worker will not block but corun these
|
|
// taskflows through its work-stealing loop
|
|
executor.corun(tf);
|
|
});
|
|
}
|
|
executor.run(taskflow).wait();
|
|
@endcode
|
|
|
|
|
|
Similar to tf::Executor::corun, the method tf::Executor::corun_until
|
|
is another variant that keeps the calling worker in the work-stealing loop
|
|
until the given predicate becomes true.
|
|
You can use this method to prevent blocking a worker from doing useful things,
|
|
such as being blocked when submitting an outstanding task (e.g., a GPU operation).
|
|
|
|
@code{.cpp}
|
|
taskflow.emplace([&](){
|
|
auto fu = std::async([](){ std::sleep(100s); });
|
|
executor.corun_until([](){
|
|
return fu.wait_for(std::chrono::seconds(0)) == future_status::ready;
|
|
});
|
|
});
|
|
@endcode
|
|
|
|
@attention
|
|
You must call tf::Executor::corun_until and tf::Executor::corun
|
|
from a worker of the calling executor or an exception will be thrown.
|
|
|
|
@section ThreadSafety Touch an Executor from Multiple Threads
|
|
|
|
All @c run\_* methods are @em thread-safe.
|
|
You can have multiple threads call these methods from an executor to run different taskflows.
|
|
However, the order which taskflow runs first is non-deterministic and is up to the
|
|
runtime.
|
|
|
|
@code{.cpp}
|
|
1: tf::Executor executor;
|
|
2:
|
|
3: for(int i=0; i<10; ++i) {
|
|
4: std::thread([i, &](){
|
|
5: // ... modify my taskflow at i
|
|
6: executor.run(taskflows[i]); // run my taskflow at i
|
|
7: }).detach();
|
|
8: }
|
|
9:
|
|
10: executor.wait_for_all();
|
|
@endcode
|
|
|
|
@section QueryTheWorkerID Query the Worker ID
|
|
|
|
Each worker in an executor has an unique integer identifier in the range
|
|
<tt>[0, N)</tt> that can be queried by the caller thread using tf::Executor::this_worker_id.
|
|
If the caller thread is not a worker in the executor, @c -1 is returned.
|
|
This method is convenient for users to maintain a one-to-one mapping between
|
|
a worker and its application data structure.
|
|
|
|
@code{.cpp}
|
|
std::vector<int> worker_vectors[8]; // one vector per worker
|
|
|
|
tf::Taskflow taskflow;
|
|
tf::Executor executor(8); // an executor of eight workers
|
|
|
|
assert(executor.this_worker_id() == -1); // master thread is not a worker
|
|
|
|
taskflow.emplace([&](){
|
|
int id = executor.this_worker_id(); // in the range [0, 8)
|
|
auto& vec = worker_vectors[worker_id];
|
|
// ...
|
|
});
|
|
@endcode
|
|
|
|
@section ObserveThreadActivities Observe Thread Activities
|
|
|
|
You can observe thread activities in an executor when a worker thread participates in executing
|
|
a task and leaves the execution using tf::ObserverInterface --
|
|
an @em interface class that provides a set of methods for you to define
|
|
what to do when a thread enters and leaves the execution context of a task.
|
|
|
|
@code{.cpp}
|
|
class ObserverInterface {
|
|
virtual ~ObserverInterface() = default;
|
|
virtual void set_up(size_t num_workers) = 0;
|
|
virtual void on_entry(tf::WorkerView worker_view, tf::TaskView task_view) = 0;
|
|
virtual void on_exit(tf::WorkerView worker_view, tf::TaskView task_view) = 0;
|
|
};
|
|
@endcode
|
|
|
|
There are three methods you must define in your derived class,
|
|
tf::ObserverInterface::set_up, tf::ObserverInterface::on_entry, and
|
|
tf::ObserverInterface::on_exit.
|
|
The method, tf::ObserverInterface::set_up, is a constructor-like method that
|
|
will be called by the executor when the observer is constructed.
|
|
It passes an argument of the number of workers to observer in the executor.
|
|
You may use it to preallocate or initialize data storage, e.g., an independent vector for each worker.
|
|
The methods, tf::ObserverInterface::on_entry and
|
|
tf::ObserverInterface::on_exit,
|
|
are called by a worker thread before and after the execution context of a task,
|
|
respectively.
|
|
Both methods provide immutable access to the underlying worker and the running task
|
|
using tf::WorkerView and tf::TaskView.
|
|
You may use them to record timepoints and calculate the elapsed time of a task.
|
|
|
|
You can associate an executor with one or multiple observers (though one is common)
|
|
using tf::Executor::make_observer.
|
|
We use std::shared_ptr to manage the ownership of an observer.
|
|
The executor loops through each observer and invoke the corresponding methods accordingly.
|
|
|
|
@code{.cpp}
|
|
#include <taskflow/taskflow.hpp>
|
|
|
|
struct MyObserver : public tf::ObserverInterface {
|
|
|
|
MyObserver(const std::string& name) {
|
|
std::cout << "constructing observer " << name << '\n';
|
|
}
|
|
|
|
void set_up(size_t num_workers) override final {
|
|
std::cout << "setting up observer with " << num_workers << " workers\n";
|
|
}
|
|
|
|
void on_entry(tf::WorkerView w, tf::TaskView tv) override final {
|
|
std::ostringstream oss;
|
|
oss << "worker " << w.id() << " ready to run " << tv.name() << '\n';
|
|
std::cout << oss.str();
|
|
}
|
|
|
|
void on_exit(tf::WorkerView w, tf::TaskView tv) override final {
|
|
std::ostringstream oss;
|
|
oss << "worker " << w.id() << " finished running " << tv.name() << '\n';
|
|
std::cout << oss.str();
|
|
}
|
|
|
|
};
|
|
|
|
int main(){
|
|
|
|
tf::Executor executor(4);
|
|
|
|
// Create a taskflow of eight tasks
|
|
tf::Taskflow taskflow;
|
|
|
|
auto A = taskflow.emplace([] () { std::cout << "1\n"; }).name("A");
|
|
auto B = taskflow.emplace([] () { std::cout << "2\n"; }).name("B");
|
|
auto C = taskflow.emplace([] () { std::cout << "3\n"; }).name("C");
|
|
auto D = taskflow.emplace([] () { std::cout << "4\n"; }).name("D");
|
|
auto E = taskflow.emplace([] () { std::cout << "5\n"; }).name("E");
|
|
auto F = taskflow.emplace([] () { std::cout << "6\n"; }).name("F");
|
|
auto G = taskflow.emplace([] () { std::cout << "7\n"; }).name("G");
|
|
auto H = taskflow.emplace([] () { std::cout << "8\n"; }).name("H");
|
|
|
|
// create an observer
|
|
std::shared_ptr<MyObserver> observer = executor.make_observer<MyObserver>(
|
|
"MyObserver"
|
|
);
|
|
|
|
// run the taskflow
|
|
executor.run(taskflow).get();
|
|
|
|
// remove the observer (optional)
|
|
executor.remove_observer(std::move(observer));
|
|
|
|
return 0;
|
|
}
|
|
@endcode
|
|
|
|
The above code produces the following output:
|
|
|
|
@code{.bash}
|
|
constructing observer MyObserver
|
|
setting up observer with 4 workers
|
|
worker 2 ready to run A
|
|
1
|
|
worker 2 finished running A
|
|
worker 2 ready to run B
|
|
2
|
|
worker 1 ready to run C
|
|
worker 2 finished running B
|
|
3
|
|
worker 2 ready to run D
|
|
worker 3 ready to run E
|
|
worker 1 finished running C
|
|
4
|
|
5
|
|
worker 1 ready to run F
|
|
worker 2 finished running D
|
|
worker 3 finished running E
|
|
6
|
|
worker 2 ready to run G
|
|
worker 3 ready to run H
|
|
worker 1 finished running F
|
|
7
|
|
8
|
|
worker 2 finished running G
|
|
worker 3 finished running H
|
|
@endcode
|
|
|
|
It is expected each line of std::cout interleaves with each other
|
|
as there are four workers participating in task scheduling.
|
|
However, the @em ready message always appears before the corresponding task message
|
|
(e.g., numbers)
|
|
and then the @em finished message.
|
|
|
|
*/
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|