mesytec-mnode/external/taskflow-3.8.0/sandbox/executor/executor-dl.hpp

2519 lines
72 KiB
C++
Raw Normal View History

2025-01-04 01:25:05 +01:00
#pragma once
#include "observer.hpp"
#include "taskflow.hpp"
#include "async_task.hpp"
/**
@file executor.hpp
@brief executor include file
*/
namespace tf {
// ----------------------------------------------------------------------------
// Executor Definition
// ----------------------------------------------------------------------------
/** @class Executor
@brief class to create an executor for running a taskflow graph
An executor manages a set of worker threads to run one or multiple taskflows
using an efficient work-stealing scheduling algorithm.
@code{.cpp}
// Declare an executor and a taskflow
tf::Executor executor;
tf::Taskflow taskflow;
// Add three tasks into the taskflow
tf::Task A = taskflow.emplace([] () { std::cout << "This is TaskA\n"; });
tf::Task B = taskflow.emplace([] () { std::cout << "This is TaskB\n"; });
tf::Task C = taskflow.emplace([] () { std::cout << "This is TaskC\n"; });
// Build precedence between tasks
A.precede(B, C);
tf::Future<void> fu = executor.run(taskflow);
fu.wait(); // block until the execution completes
executor.run(taskflow, [](){ std::cout << "end of 1 run"; }).wait();
executor.run_n(taskflow, 4);
executor.wait_for_all(); // block until all associated executions finish
executor.run_n(taskflow, 4, [](){ std::cout << "end of 4 runs"; }).wait();
executor.run_until(taskflow, [cnt=0] () mutable { return ++cnt == 10; });
@endcode
All the @c run methods are @em thread-safe. You can submit multiple
taskflows at the same time to an executor from different threads.
*/
class Executor {
friend class FlowBuilder;
friend class Subflow;
friend class Runtime;
public:
/**
@brief constructs the executor with @c N worker threads
@param N the number of workers (default std::thread::hardware_concurrency)
The constructor spawns @c N worker threads to run tasks in a
work-stealing loop. The number of workers must be greater than zero
or an exception will be thrown.
By default, the number of worker threads is equal to the maximum
hardware concurrency returned by std::thread::hardware_concurrency.
*/
explicit Executor(size_t N = std::thread::hardware_concurrency());
/**
@brief destructs the executor
The destructor calls Executor::wait_for_all to wait for all submitted
taskflows to complete and then notifies all worker threads to stop
and join these threads.
*/
~Executor();
/**
@brief runs a taskflow once
@param taskflow a tf::Taskflow object
@return a tf::Future that holds the result of the execution
This member function executes the given taskflow once and returns a tf::Future
object that eventually holds the result of the execution.
@code{.cpp}
tf::Future<void> future = executor.run(taskflow);
// do something else
future.wait();
@endcode
This member function is thread-safe.
@attention
The executor does not own the given taskflow. It is your responsibility to
ensure the taskflow remains alive during its execution.
*/
tf::Future<void> run(Taskflow& taskflow);
/**
@brief runs a moved taskflow once
@param taskflow a moved tf::Taskflow object
@return a tf::Future that holds the result of the execution
This member function executes a moved taskflow once and returns a tf::Future
object that eventually holds the result of the execution.
The executor will take care of the lifetime of the moved taskflow.
@code{.cpp}
tf::Future<void> future = executor.run(std::move(taskflow));
// do something else
future.wait();
@endcode
This member function is thread-safe.
*/
tf::Future<void> run(Taskflow&& taskflow);
/**
@brief runs a taskflow once and invoke a callback upon completion
@param taskflow a tf::Taskflow object
@param callable a callable object to be invoked after this run
@return a tf::Future that holds the result of the execution
This member function executes the given taskflow once and invokes the given
callable when the execution completes.
This member function returns a tf::Future object that
eventually holds the result of the execution.
@code{.cpp}
tf::Future<void> future = executor.run(taskflow, [](){ std::cout << "done"; });
// do something else
future.wait();
@endcode
This member function is thread-safe.
@attention
The executor does not own the given taskflow. It is your responsibility to
ensure the taskflow remains alive during its execution.
*/
template<typename C>
tf::Future<void> run(Taskflow& taskflow, C&& callable);
/**
@brief runs a moved taskflow once and invoke a callback upon completion
@param taskflow a moved tf::Taskflow object
@param callable a callable object to be invoked after this run
@return a tf::Future that holds the result of the execution
This member function executes a moved taskflow once and invokes the given
callable when the execution completes.
This member function returns a tf::Future object that
eventually holds the result of the execution.
The executor will take care of the lifetime of the moved taskflow.
@code{.cpp}
tf::Future<void> future = executor.run(
std::move(taskflow), [](){ std::cout << "done"; }
);
// do something else
future.wait();
@endcode
This member function is thread-safe.
*/
template<typename C>
tf::Future<void> run(Taskflow&& taskflow, C&& callable);
/**
@brief runs a taskflow for @c N times
@param taskflow a tf::Taskflow object
@param N number of runs
@return a tf::Future that holds the result of the execution
This member function executes the given taskflow @c N times and returns a tf::Future
object that eventually holds the result of the execution.
@code{.cpp}
tf::Future<void> future = executor.run_n(taskflow, 2); // run taskflow 2 times
// do something else
future.wait();
@endcode
This member function is thread-safe.
@attention
The executor does not own the given taskflow. It is your responsibility to
ensure the taskflow remains alive during its execution.
*/
tf::Future<void> run_n(Taskflow& taskflow, size_t N);
/**
@brief runs a moved taskflow for @c N times
@param taskflow a moved tf::Taskflow object
@param N number of runs
@return a tf::Future that holds the result of the execution
This member function executes a moved taskflow @c N times and returns a tf::Future
object that eventually holds the result of the execution.
The executor will take care of the lifetime of the moved taskflow.
@code{.cpp}
tf::Future<void> future = executor.run_n(
std::move(taskflow), 2 // run the moved taskflow 2 times
);
// do something else
future.wait();
@endcode
This member function is thread-safe.
*/
tf::Future<void> run_n(Taskflow&& taskflow, size_t N);
/**
@brief runs a taskflow for @c N times and then invokes a callback
@param taskflow a tf::Taskflow
@param N number of runs
@param callable a callable object to be invoked after this run
@return a tf::Future that holds the result of the execution
This member function executes the given taskflow @c N times and invokes the given
callable when the execution completes.
This member function returns a tf::Future object that
eventually holds the result of the execution.
@code{.cpp}
tf::Future<void> future = executor.run(
taskflow, 2, [](){ std::cout << "done"; } // runs taskflow 2 times and invoke
// the lambda to print "done"
);
// do something else
future.wait();
@endcode
This member function is thread-safe.
@attention
The executor does not own the given taskflow. It is your responsibility to
ensure the taskflow remains alive during its execution.
*/
template<typename C>
tf::Future<void> run_n(Taskflow& taskflow, size_t N, C&& callable);
/**
@brief runs a moved taskflow for @c N times and then invokes a callback
@param taskflow a moved tf::Taskflow
@param N number of runs
@param callable a callable object to be invoked after this run
@return a tf::Future that holds the result of the execution
This member function executes a moved taskflow @c N times and invokes the given
callable when the execution completes.
This member function returns a tf::Future object that
eventually holds the result of the execution.
@code{.cpp}
tf::Future<void> future = executor.run_n(
// run the moved taskflow 2 times and invoke the lambda to print "done"
std::move(taskflow), 2, [](){ std::cout << "done"; }
);
// do something else
future.wait();
@endcode
This member function is thread-safe.
*/
template<typename C>
tf::Future<void> run_n(Taskflow&& taskflow, size_t N, C&& callable);
/**
@brief runs a taskflow multiple times until the predicate becomes true
@param taskflow a tf::Taskflow
@param pred a boolean predicate to return @c true for stop
@return a tf::Future that holds the result of the execution
This member function executes the given taskflow multiple times until
the predicate returns @c true.
This member function returns a tf::Future object that
eventually holds the result of the execution.
@code{.cpp}
tf::Future<void> future = executor.run_until(
taskflow, [](){ return rand()%10 == 0 }
);
// do something else
future.wait();
@endcode
This member function is thread-safe.
@attention
The executor does not own the given taskflow. It is your responsibility to
ensure the taskflow remains alive during its execution.
*/
template<typename P>
tf::Future<void> run_until(Taskflow& taskflow, P&& pred);
/**
@brief runs a moved taskflow and keeps running it
until the predicate becomes true
@param taskflow a moved tf::Taskflow object
@param pred a boolean predicate to return @c true for stop
@return a tf::Future that holds the result of the execution
This member function executes a moved taskflow multiple times until
the predicate returns @c true.
This member function returns a tf::Future object that
eventually holds the result of the execution.
The executor will take care of the lifetime of the moved taskflow.
@code{.cpp}
tf::Future<void> future = executor.run_until(
std::move(taskflow), [](){ return rand()%10 == 0 }
);
// do something else
future.wait();
@endcode
This member function is thread-safe.
*/
template<typename P>
tf::Future<void> run_until(Taskflow&& taskflow, P&& pred);
/**
@brief runs a taskflow multiple times until the predicate becomes true and
then invokes the callback
@param taskflow a tf::Taskflow
@param pred a boolean predicate to return @c true for stop
@param callable a callable object to be invoked after this run completes
@return a tf::Future that holds the result of the execution
This member function executes the given taskflow multiple times until
the predicate returns @c true and then invokes the given callable when
the execution completes.
This member function returns a tf::Future object that
eventually holds the result of the execution.
@code{.cpp}
tf::Future<void> future = executor.run_until(
taskflow, [](){ return rand()%10 == 0 }, [](){ std::cout << "done"; }
);
// do something else
future.wait();
@endcode
This member function is thread-safe.
@attention
The executor does not own the given taskflow. It is your responsibility to
ensure the taskflow remains alive during its execution.
*/
template<typename P, typename C>
tf::Future<void> run_until(Taskflow& taskflow, P&& pred, C&& callable);
/**
@brief runs a moved taskflow and keeps running
it until the predicate becomes true and then invokes the callback
@param taskflow a moved tf::Taskflow
@param pred a boolean predicate to return @c true for stop
@param callable a callable object to be invoked after this run completes
@return a tf::Future that holds the result of the execution
This member function executes a moved taskflow multiple times until
the predicate returns @c true and then invokes the given callable when
the execution completes.
This member function returns a tf::Future object that
eventually holds the result of the execution.
The executor will take care of the lifetime of the moved taskflow.
@code{.cpp}
tf::Future<void> future = executor.run_until(
std::move(taskflow),
[](){ return rand()%10 == 0 }, [](){ std::cout << "done"; }
);
// do something else
future.wait();
@endcode
This member function is thread-safe.
*/
template<typename P, typename C>
tf::Future<void> run_until(Taskflow&& taskflow, P&& pred, C&& callable);
/**
@brief runs a target graph and waits until it completes using
an internal worker of this executor
@tparam T target type which has `tf::Graph& T::graph()` defined
@param target the target task graph object
The method runs a target graph which has `tf::Graph& T::graph()` defined
and waits until the execution completes.
Unlike the typical flow of calling `tf::Executor::run` series
plus waiting on the result, this method must be called by an internal
worker of this executor. The caller worker will participate in
the work-stealing loop of the scheduler, thereby avoiding potential
deadlock caused by blocked waiting.
@code{.cpp}
tf::Executor executor(2);
tf::Taskflow taskflow;
std::array<tf::Taskflow, 1000> others;
std::atomic<size_t> counter{0};
for(size_t n=0; n<1000; n++) {
for(size_t i=0; i<1000; i++) {
others[n].emplace([&](){ counter++; });
}
taskflow.emplace([&executor, &tf=others[n]](){
executor.corun(tf);
//executor.run(tf).wait(); <- blocking the worker without doing anything
// will introduce deadlock
});
}
executor.run(taskflow).wait();
@endcode
The method is thread-safe as long as the target is not concurrently
ran by two or more threads.
@attention
You must call tf::Executor::corun from a worker of the calling executor
or an exception will be thrown.
*/
template <typename T>
void corun(T& target);
/**
@brief keeps running the work-stealing loop until the predicate becomes true
@tparam P predicate type
@param predicate a boolean predicate to indicate when to stop the loop
The method keeps the caller worker running in the work-stealing loop
until the stop predicate becomes true.
@code{.cpp}
taskflow.emplace([&](){
std::future<void> fu = std::async([](){ std::sleep(100s); });
executor.corun_until([](){
return fu.wait_for(std::chrono::seconds(0)) == future_status::ready;
});
});
@endcode
@attention
You must call tf::Executor::corun_until from a worker of the calling executor
or an exception will be thrown.
*/
template <typename P>
void corun_until(P&& predicate);
/**
@brief waits for all tasks to complete
This member function waits until all submitted tasks
(e.g., taskflows, asynchronous tasks) to finish.
@code{.cpp}
executor.run(taskflow1);
executor.run_n(taskflow2, 10);
executor.run_n(taskflow3, 100);
executor.wait_for_all(); // wait until the above submitted taskflows finish
@endcode
*/
void wait_for_all();
/**
@brief queries the number of worker threads
Each worker represents one unique thread spawned by an executor
upon its construction time.
@code{.cpp}
tf::Executor executor(4);
std::cout << executor.num_workers(); // 4
@endcode
*/
size_t num_workers() const noexcept;
/**
@brief queries the number of running topologies at the time of this call
When a taskflow is submitted to an executor, a topology is created to store
runtime metadata of the running taskflow.
When the execution of the submitted taskflow finishes,
its corresponding topology will be removed from the executor.
@code{.cpp}
executor.run(taskflow);
std::cout << executor.num_topologies(); // 0 or 1 (taskflow still running)
@endcode
*/
size_t num_topologies() const;
/**
@brief queries the number of running taskflows with moved ownership
@code{.cpp}
executor.run(std::move(taskflow));
std::cout << executor.num_taskflows(); // 0 or 1 (taskflow still running)
@endcode
*/
size_t num_taskflows() const;
/**
@brief queries the id of the caller thread in this executor
Each worker has an unique id in the range of @c 0 to @c N-1 associated with
its parent executor.
If the caller thread does not belong to the executor, @c -1 is returned.
@code{.cpp}
tf::Executor executor(4); // 4 workers in the executor
executor.this_worker_id(); // -1 (main thread is not a worker)
taskflow.emplace([&](){
std::cout << executor.this_worker_id(); // 0, 1, 2, or 3
});
executor.run(taskflow);
@endcode
*/
int this_worker_id() const;
// --------------------------------------------------------------------------
// Observer methods
// --------------------------------------------------------------------------
/**
@brief constructs an observer to inspect the activities of worker threads
@tparam Observer observer type derived from tf::ObserverInterface
@tparam ArgsT argument parameter pack
@param args arguments to forward to the constructor of the observer
@return a shared pointer to the created observer
Each executor manages a list of observers with shared ownership with callers.
For each of these observers, the two member functions,
tf::ObserverInterface::on_entry and tf::ObserverInterface::on_exit
will be called before and after the execution of a task.
This member function is not thread-safe.
*/
template <typename Observer, typename... ArgsT>
std::shared_ptr<Observer> make_observer(ArgsT&&... args);
/**
@brief removes an observer from the executor
This member function is not thread-safe.
*/
template <typename Observer>
void remove_observer(std::shared_ptr<Observer> observer);
/**
@brief queries the number of observers
*/
size_t num_observers() const noexcept;
// --------------------------------------------------------------------------
// Async Task Methods
// --------------------------------------------------------------------------
/**
@brief creates a parameterized asynchronous task to run the given function
@tparam P task parameter type
@tparam F callable type
@param params task parameters
@param func callable object
@return a @std_future that will hold the result of the execution
The method creates a parameterized asynchronous task
to run the given function and return a @std_future object
that eventually will hold the result of the execution.
@code{.cpp}
std::future<int> future = executor.async("name", [](){
std::cout << "create an asynchronous task with a name and returns 1\n";
return 1;
});
future.get();
@endcode
This member function is thread-safe.
*/
template <typename P, typename F>
auto async(P&& params, F&& func);
/**
@brief runs a given function asynchronously
@tparam F callable type
@param func callable object
@return a @std_future that will hold the result of the execution
The method creates an asynchronous task to run the given function
and return a @std_future object that eventually will hold the result
of the return value.
@code{.cpp}
std::future<int> future = executor.async([](){
std::cout << "create an asynchronous task and returns 1\n";
return 1;
});
future.get();
@endcode
This member function is thread-safe.
*/
template <typename F>
auto async(F&& func);
/**
@brief similar to tf::Executor::async but does not return a future object
@tparam F callable type
@param params task parameters
@param func callable object
The method creates a parameterized asynchronous task
to run the given function without returning any @std_future object.
This member function is more efficient than tf::Executor::async
and is encouraged to use when applications do not need a @std_future to acquire
the result or synchronize the execution.
@code{.cpp}
executor.silent_async("name", [](){
std::cout << "create an asynchronous task with a name and no return\n";
});
executor.wait_for_all();
@endcode
This member function is thread-safe.
*/
template <typename P, typename F>
void silent_async(P&& params, F&& func);
/**
@brief similar to tf::Executor::async but does not return a future object
@tparam F callable type
@param func callable object
The method creates an asynchronous task
to run the given function without returning any @std_future object.
This member function is more efficient than tf::Executor::async
and is encouraged to use when applications do not need a @std_future to acquire
the result or synchronize the execution.
@code{.cpp}
executor.silent_async([](){
std::cout << "create an asynchronous task with no return\n";
});
executor.wait_for_all();
@endcode
This member function is thread-safe.
*/
template <typename F>
void silent_async(F&& func);
// --------------------------------------------------------------------------
// Silent Dependent Async Methods
// --------------------------------------------------------------------------
/**
@brief runs the given function asynchronously
when the given dependents finish
@tparam F callable type
@tparam Tasks task types convertible to tf::AsyncTask
@param func callable object
@param tasks asynchronous tasks on which this execution depends
@return a tf::AsyncTask handle
This member function is more efficient than tf::Executor::dependent_async
and is encouraged to use when you do not want a @std_future to
acquire the result or synchronize the execution.
The example below creates three asynchronous tasks, @c A, @c B, and @c C,
in which task @c C runs after task @c A and task @c B.
@code{.cpp}
tf::AsyncTask A = executor.silent_dependent_async([](){ printf("A\n"); });
tf::AsyncTask B = executor.silent_dependent_async([](){ printf("B\n"); });
executor.silent_dependent_async([](){ printf("C runs after A and B\n"); }, A, B);
executor.wait_for_all();
@endcode
This member function is thread-safe.
*/
template <typename F, typename... Tasks,
std::enable_if_t<all_same_v<AsyncTask, std::decay_t<Tasks>...>, void>* = nullptr
>
tf::AsyncTask silent_dependent_async(F&& func, Tasks&&... tasks);
/**
@brief runs the given function asynchronously
when the given dependents finish
@tparam F callable type
@tparam Tasks task types convertible to tf::AsyncTask
@param params task parameters
@param func callable object
@param tasks asynchronous tasks on which this execution depends
@return a tf::AsyncTask handle
This member function is more efficient than tf::Executor::dependent_async
and is encouraged to use when you do not want a @std_future to
acquire the result or synchronize the execution.
The example below creates three asynchronous tasks, @c A, @c B, and @c C,
in which task @c C runs after task @c A and task @c B.
Assigned task names will appear in the observers of the executor.
@code{.cpp}
tf::AsyncTask A = executor.silent_dependent_async("A", [](){ printf("A\n"); });
tf::AsyncTask B = executor.silent_dependent_async("B", [](){ printf("B\n"); });
executor.silent_dependent_async(
"C", [](){ printf("C runs after A and B\n"); }, A, B
);
executor.wait_for_all();
@endcode
This member function is thread-safe.
*/
template <typename P, typename F, typename... Tasks,
std::enable_if_t<is_task_params_v<P> && all_same_v<AsyncTask, std::decay_t<Tasks>...>, void>* = nullptr
>
tf::AsyncTask silent_dependent_async(P&& params, F&& func, Tasks&&... tasks);
/**
@brief runs the given function asynchronously
when the given range of dependents finish
@tparam F callable type
@tparam I iterator type
@param func callable object
@param first iterator to the beginning (inclusive)
@param last iterator to the end (exclusive)
@return a tf::AsyncTask handle
This member function is more efficient than tf::Executor::dependent_async
and is encouraged to use when you do not want a @std_future to
acquire the result or synchronize the execution.
The example below creates three asynchronous tasks, @c A, @c B, and @c C,
in which task @c C runs after task @c A and task @c B.
@code{.cpp}
std::array<tf::AsyncTask, 2> array {
executor.silent_dependent_async([](){ printf("A\n"); }),
executor.silent_dependent_async([](){ printf("B\n"); })
};
executor.silent_dependent_async(
[](){ printf("C runs after A and B\n"); }, array.begin(), array.end()
);
executor.wait_for_all();
@endcode
This member function is thread-safe.
*/
template <typename F, typename I,
std::enable_if_t<!std::is_same_v<std::decay_t<I>, AsyncTask>, void>* = nullptr
>
tf::AsyncTask silent_dependent_async(F&& func, I first, I last);
/**
@brief runs the given function asynchronously
when the given range of dependents finish
@tparam F callable type
@tparam I iterator type
@param params tasks parameters
@param func callable object
@param first iterator to the beginning (inclusive)
@param last iterator to the end (exclusive)
@return a tf::AsyncTask handle
This member function is more efficient than tf::Executor::dependent_async
and is encouraged to use when you do not want a @std_future to
acquire the result or synchronize the execution.
The example below creates three asynchronous tasks, @c A, @c B, and @c C,
in which task @c C runs after task @c A and task @c B.
Assigned task names will appear in the observers of the executor.
@code{.cpp}
std::array<tf::AsyncTask, 2> array {
executor.silent_dependent_async("A", [](){ printf("A\n"); }),
executor.silent_dependent_async("B", [](){ printf("B\n"); })
};
executor.silent_dependent_async(
"C", [](){ printf("C runs after A and B\n"); }, array.begin(), array.end()
);
executor.wait_for_all();
@endcode
This member function is thread-safe.
*/
template <typename P, typename F, typename I,
std::enable_if_t<is_task_params_v<P> && !std::is_same_v<std::decay_t<I>, AsyncTask>, void>* = nullptr
>
tf::AsyncTask silent_dependent_async(P&& params, F&& func, I first, I last);
// --------------------------------------------------------------------------
// Dependent Async Methods
// --------------------------------------------------------------------------
/**
@brief runs the given function asynchronously
when the given dependents finish
@tparam F callable type
@tparam Tasks task types convertible to tf::AsyncTask
@param func callable object
@param tasks asynchronous tasks on which this execution depends
@return a pair of a tf::AsyncTask handle and
a @std_future that holds the result of the execution
The example below creates three asynchronous tasks, @c A, @c B, and @c C,
in which task @c C runs after task @c A and task @c B.
Task @c C returns a pair of its tf::AsyncTask handle and a std::future<int>
that eventually will hold the result of the execution.
@code{.cpp}
tf::AsyncTask A = executor.silent_dependent_async([](){ printf("A\n"); });
tf::AsyncTask B = executor.silent_dependent_async([](){ printf("B\n"); });
auto [C, fuC] = executor.dependent_async(
[](){
printf("C runs after A and B\n");
return 1;
},
A, B
);
fuC.get(); // C finishes, which in turns means both A and B finish
@endcode
You can mixed the use of tf::AsyncTask handles
returned by Executor::dependent_async and Executor::silent_dependent_async
when specifying task dependencies.
This member function is thread-safe.
*/
template <typename F, typename... Tasks,
std::enable_if_t<all_same_v<AsyncTask, std::decay_t<Tasks>...>, void>* = nullptr
>
auto dependent_async(F&& func, Tasks&&... tasks);
/**
@brief runs the given function asynchronously
when the given dependents finish
@tparam P task parameters type
@tparam F callable type
@tparam Tasks task types convertible to tf::AsyncTask
@param params task parameters
@param func callable object
@param tasks asynchronous tasks on which this execution depends
@return a pair of a tf::AsyncTask handle and
a @std_future that holds the result of the execution
The example below creates three named asynchronous tasks, @c A, @c B, and @c C,
in which task @c C runs after task @c A and task @c B.
Task @c C returns a pair of its tf::AsyncTask handle and a std::future<int>
that eventually will hold the result of the execution.
Assigned task names will appear in the observers of the executor.
@code{.cpp}
tf::AsyncTask A = executor.silent_dependent_async("A", [](){ printf("A\n"); });
tf::AsyncTask B = executor.silent_dependent_async("B", [](){ printf("B\n"); });
auto [C, fuC] = executor.dependent_async(
"C",
[](){
printf("C runs after A and B\n");
return 1;
},
A, B
);
assert(fuC.get()==1); // C finishes, which in turns means both A and B finish
@endcode
You can mixed the use of tf::AsyncTask handles
returned by Executor::dependent_async and Executor::silent_dependent_async
when specifying task dependencies.
This member function is thread-safe.
*/
template <typename P, typename F, typename... Tasks,
std::enable_if_t<is_task_params_v<P> && all_same_v<AsyncTask, std::decay_t<Tasks>...>, void>* = nullptr
>
auto dependent_async(P&& params, F&& func, Tasks&&... tasks);
/**
@brief runs the given function asynchronously
when the given range of dependents finish
@tparam F callable type
@tparam I iterator type
@param func callable object
@param first iterator to the beginning (inclusive)
@param last iterator to the end (exclusive)
@return a pair of a tf::AsyncTask handle and
a @std_future that holds the result of the execution
The example below creates three asynchronous tasks, @c A, @c B, and @c C,
in which task @c C runs after task @c A and task @c B.
Task @c C returns a pair of its tf::AsyncTask handle and a std::future<int>
that eventually will hold the result of the execution.
@code{.cpp}
std::array<tf::AsyncTask, 2> array {
executor.silent_dependent_async([](){ printf("A\n"); }),
executor.silent_dependent_async([](){ printf("B\n"); })
};
auto [C, fuC] = executor.dependent_async(
[](){
printf("C runs after A and B\n");
return 1;
},
array.begin(), array.end()
);
assert(fuC.get()==1); // C finishes, which in turns means both A and B finish
@endcode
You can mixed the use of tf::AsyncTask handles
returned by Executor::dependent_async and Executor::silent_dependent_async
when specifying task dependencies.
This member function is thread-safe.
*/
template <typename F, typename I,
std::enable_if_t<!std::is_same_v<std::decay_t<I>, AsyncTask>, void>* = nullptr
>
auto dependent_async(F&& func, I first, I last);
/**
@brief runs the given function asynchronously
when the given range of dependents finish
@tparam P task parameters type
@tparam F callable type
@tparam I iterator type
@param params task parameters
@param func callable object
@param first iterator to the beginning (inclusive)
@param last iterator to the end (exclusive)
@return a pair of a tf::AsyncTask handle and
a @std_future that holds the result of the execution
The example below creates three named asynchronous tasks, @c A, @c B, and @c C,
in which task @c C runs after task @c A and task @c B.
Task @c C returns a pair of its tf::AsyncTask handle and a std::future<int>
that eventually will hold the result of the execution.
Assigned task names will appear in the observers of the executor.
@code{.cpp}
std::array<tf::AsyncTask, 2> array {
executor.silent_dependent_async("A", [](){ printf("A\n"); }),
executor.silent_dependent_async("B", [](){ printf("B\n"); })
};
auto [C, fuC] = executor.dependent_async(
"C",
[](){
printf("C runs after A and B\n");
return 1;
},
array.begin(), array.end()
);
assert(fuC.get()==1); // C finishes, which in turns means both A and B finish
@endcode
You can mixed the use of tf::AsyncTask handles
returned by Executor::dependent_async and Executor::silent_dependent_async
when specifying task dependencies.
This member function is thread-safe.
*/
template <typename P, typename F, typename I,
std::enable_if_t<is_task_params_v<P> && !std::is_same_v<std::decay_t<I>, AsyncTask>, void>* = nullptr
>
auto dependent_async(P&& params, F&& func, I first, I last);
private:
const size_t _MAX_STEALS;
std::mutex _wsq_mutex;
std::mutex _taskflows_mutex;
std::vector<std::thread> _threads;
std::vector<Worker> _workers;
#ifdef __cpp_lib_atomic_wait
std::atomic<size_t> _num_topologies {0};
std::atomic_flag _all_spawned = ATOMIC_FLAG_INIT;
std::atomic_flag _done = ATOMIC_FLAG_INIT;
std::atomic<uint64_t> _state {0ull};
static const uint64_t _EPOCH_INC{1ull << 32};
static const uint64_t _NUM_WAITERS_MASK{(1ull << 32) - 1};
static const uint64_t _NUM_WAITERS_INC{1ull};
#else
std::condition_variable _topology_cv;
std::mutex _topology_mutex;
size_t _num_topologies {0};
Notifier _notifier;
std::atomic<bool> _done {0};
#endif
std::unordered_map<std::thread::id, size_t> _wids;
std::list<Taskflow> _taskflows;
TaskQueue<Node*> _wsq;
std::unordered_set<std::shared_ptr<ObserverInterface>> _observers;
Worker* _this_worker();
bool _wait_for_task(Worker&, Node*&);
bool _invoke_module_task_internal(Worker&, Node*);
void _observer_prologue(Worker&, Node*);
void _observer_epilogue(Worker&, Node*);
void _spawn(size_t);
void _exploit_task(Worker&, Node*&);
void _explore_task(Worker&, Node*&);
void _schedule(Worker&, Node*);
void _schedule(Node*);
void _schedule(Worker&, const SmallVector<Node*>&);
void _schedule(const SmallVector<Node*>&);
void _set_up_topology(Worker*, Topology*);
void _set_up_graph(Graph&, Node*, Topology*, int, SmallVector<Node*>&);
void _tear_down_topology(Worker&, Topology*);
void _tear_down_async(Node*);
void _tear_down_dependent_async(Worker&, Node*);
void _tear_down_invoke(Worker&, Node*);
void _increment_topology();
void _decrement_topology();
void _invoke(Worker&, Node*);
void _invoke_static_task(Worker&, Node*);
void _invoke_subflow_task(Worker&, Node*);
void _detach_subflow_task(Worker&, Node*, Graph&);
void _invoke_condition_task(Worker&, Node*, SmallVector<int>&);
void _invoke_multi_condition_task(Worker&, Node*, SmallVector<int>&);
void _invoke_module_task(Worker&, Node*);
void _invoke_async_task(Worker&, Node*);
void _invoke_dependent_async_task(Worker&, Node*);
void _process_async_dependent(Node*, tf::AsyncTask&, size_t&);
void _process_exception(Worker&, Node*);
void _schedule_async_task(Node*);
void _corun_graph(Worker&, Node*, Graph&);
template <typename P>
void _corun_until(Worker&, P&&);
};
// Constructor
inline Executor::Executor(size_t N) :
_MAX_STEALS {((N+1) << 1)},
_threads {N},
_workers {N}
#ifndef __cpp_lib_atomic_wait
,_notifier {N}
#endif
{
if(N == 0) {
TF_THROW("executor must define at least one worker");
}
_spawn(N);
// initialize the default observer if requested
if(has_env(TF_ENABLE_PROFILER)) {
TFProfManager::get()._manage(make_observer<TFProfObserver>());
}
}
// Destructor
inline Executor::~Executor() {
// wait for all topologies to complete
wait_for_all();
// shut down the scheduler
#ifdef __cpp_lib_atomic_wait
_done.test_and_set(std::memory_order_relaxed);
_state.fetch_add(_EPOCH_INC, std::memory_order_release);
_state.notify_all();
#else
_done = true;
_notifier.notify(true);
#endif
for(auto& t : _threads) {
t.join();
}
}
// Function: num_workers
inline size_t Executor::num_workers() const noexcept {
return _workers.size();
}
// Function: num_topologies
inline size_t Executor::num_topologies() const {
#ifdef __cpp_lib_atomic_wait
return _num_topologies.load(std::memory_order_relaxed);
#else
return _num_topologies;
#endif
}
// Function: num_taskflows
inline size_t Executor::num_taskflows() const {
return _taskflows.size();
}
// Function: _this_worker
inline Worker* Executor::_this_worker() {
auto itr = _wids.find(std::this_thread::get_id());
return itr == _wids.end() ? nullptr : &_workers[itr->second];
}
// Function: this_worker_id
inline int Executor::this_worker_id() const {
auto i = _wids.find(std::this_thread::get_id());
return i == _wids.end() ? -1 : static_cast<int>(_workers[i->second]._id);
}
// Procedure: _spawn
inline void Executor::_spawn(size_t N) {
#ifdef __cpp_lib_atomic_wait
#else
std::mutex mutex;
std::condition_variable cond;
size_t n=0;
#endif
for(size_t id=0; id<N; ++id) {
_workers[id]._id = id;
_workers[id]._vtm = id;
_workers[id]._executor = this;
#ifndef __cpp_lib_atomic_wait
_workers[id]._waiter = &_notifier._waiters[id];
#endif
_threads[id] = std::thread([&, &w=_workers[id]] () {
#ifdef __cpp_lib_atomic_wait
// wait for the caller thread to initialize the ID mapping
_all_spawned.wait(false, std::memory_order_acquire);
w._thread = &_threads[w._id];
#else
// update the ID mapping of this thread
w._thread = &_threads[w._id];
{
std::scoped_lock lock(mutex);
_wids[std::this_thread::get_id()] = w._id;
if(n++; n == num_workers()) {
cond.notify_one();
}
}
#endif
Node* t = nullptr;
while(1) {
// execute the tasks.
_exploit_task(w, t);
// wait for tasks
if(_wait_for_task(w, t) == false) {
break;
}
}
});
// POSIX-like system can use the following to affine threads to cores
//cpu_set_t cpuset;
//CPU_ZERO(&cpuset);
//CPU_SET(id, &cpuset);
//pthread_setaffinity_np(
// _threads[id].native_handle(), sizeof(cpu_set_t), &cpuset
//);
#ifdef __cpp_lib_atomic_wait
//_wids[_threads[id].get_id()] = id;
_wids.emplace(std::piecewise_construct,
std::forward_as_tuple(_threads[id].get_id()), std::forward_as_tuple(id)
);
#endif
}
#ifdef __cpp_lib_atomic_wait
_all_spawned.test_and_set(std::memory_order_release);
_all_spawned.notify_all();
#else
std::unique_lock<std::mutex> lock(mutex);
cond.wait(lock, [&](){ return n==N; });
#endif
}
// Function: _corun_until
template <typename P>
void Executor::_corun_until(Worker& w, P&& stop_predicate) {
std::uniform_int_distribution<size_t> rdvtm(0, _workers.size()-1);
exploit:
while(!stop_predicate()) {
//exploit:
if(auto t = w._wsq.pop(); t) {
_invoke(w, t);
}
else {
size_t num_steals = 0;
explore:
t = (w._id == w._vtm) ? _wsq.steal() : _workers[w._vtm]._wsq.steal();
if(t) {
_invoke(w, t);
goto exploit;
}
else if(!stop_predicate()) {
if(num_steals++ > _MAX_STEALS) {
std::this_thread::yield();
}
w._vtm = rdvtm(w._rdgen);
goto explore;
}
else {
break;
}
}
}
}
// Function: _explore_task
inline void Executor::_explore_task(Worker& w, Node*& t) {
//assert(_workers[w].wsq.empty());
//assert(!t);
size_t num_steals = 0;
size_t num_yields = 0;
std::uniform_int_distribution<size_t> rdvtm(0, _workers.size()-1);
// Here, we write do-while to make the worker steal at once
// from the assigned victim.
do {
t = (w._id == w._vtm) ? _wsq.steal() : _workers[w._vtm]._wsq.steal();
if(t) {
break;
}
if(num_steals++ > _MAX_STEALS) {
std::this_thread::yield();
if(num_yields++ > 100) {
break;
}
}
w._vtm = rdvtm(w._rdgen);
}
#ifdef __cpp_lib_atomic_wait
// the _DONE can be checked later in wait_for_task?
while(!_done.test(std::memory_order_relaxed));
#else
while(!_done);
#endif
}
// Procedure: _exploit_task
inline void Executor::_exploit_task(Worker& w, Node*& t) {
while(t) {
_invoke(w, t);
t = w._wsq.pop();
}
}
// Function: _wait_for_task
inline bool Executor::_wait_for_task(Worker& worker, Node*& t) {
explore_task:
_explore_task(worker, t);
if(t) {
return true;
}
// The last thief who successfully stole a task will wake up
// another thief worker to avoid starvation.
// if(t) {
//#ifdef __cpp_lib_atomic_wait
//
//#else
// _notifier.notify(false);
//#endif
// return true;
// }
#ifdef __cpp_lib_atomic_wait
for(uint64_t cur_state = _state.load(std::memory_order_acquire);;) {
uint64_t new_state = cur_state + _NUM_WAITERS_INC;
// TODO: CAS with relaxed??
if(_state.compare_exchange_weak(cur_state, new_state, std::memory_order_acquire)) {
if(_done.test(std::memory_order_relaxed)) {
_state.fetch_sub(_NUM_WAITERS_INC, std::memory_order_relaxed);
//_state.fetch_add(_EPOCH_INC, std::memory_order_release);
//_state.notify_all();
return false;
}
if(!_wsq.empty()) {
worker._vtm = worker._id;
_state.fetch_sub(_NUM_WAITERS_INC, std::memory_order_relaxed);
goto explore_task;
}
// We need to use index-based scanning to avoid data race
// with _spawn which may initialize a worker at the same time.
for(size_t vtm=0; vtm<_workers.size(); vtm++) {
if(!_workers[vtm]._wsq.empty()) {
worker._vtm = vtm;
_state.fetch_sub(_NUM_WAITERS_INC, std::memory_order_relaxed);
goto explore_task;
}
}
_state.wait(new_state, std::memory_order_relaxed);
_state.fetch_sub(_NUM_WAITERS_INC, std::memory_order_relaxed);
goto explore_task;
}
}
#else
// ---- 2PC guard ----
_notifier.prepare_wait(worker._waiter);
if(!_wsq.empty()) {
_notifier.cancel_wait(worker._waiter);
worker._vtm = worker._id;
goto explore_task;
}
if(_done) {
_notifier.cancel_wait(worker._waiter);
_notifier.notify(true);
return false;
}
// We need to use index-based scanning to avoid data race
// with _spawn which may initialize a worker at the same time.
for(size_t vtm=0; vtm<_workers.size(); vtm++) {
if(!_workers[vtm]._wsq.empty()) {
_notifier.cancel_wait(worker._waiter);
worker._vtm = vtm;
goto explore_task;
}
}
// Now I really need to relinquish my self to others
_notifier.commit_wait(worker._waiter);
goto explore_task;
#endif
}
// Function: make_observer
template<typename Observer, typename... ArgsT>
std::shared_ptr<Observer> Executor::make_observer(ArgsT&&... args) {
static_assert(
std::is_base_of_v<ObserverInterface, Observer>,
"Observer must be derived from ObserverInterface"
);
// use a local variable to mimic the constructor
auto ptr = std::make_shared<Observer>(std::forward<ArgsT>(args)...);
ptr->set_up(_workers.size());
_observers.emplace(std::static_pointer_cast<ObserverInterface>(ptr));
return ptr;
}
// Procedure: remove_observer
template <typename Observer>
void Executor::remove_observer(std::shared_ptr<Observer> ptr) {
static_assert(
std::is_base_of_v<ObserverInterface, Observer>,
"Observer must be derived from ObserverInterface"
);
_observers.erase(std::static_pointer_cast<ObserverInterface>(ptr));
}
// Function: num_observers
inline size_t Executor::num_observers() const noexcept {
return _observers.size();
}
// Procedure: _schedule
inline void Executor::_schedule(Worker& worker, Node* node) {
// We need to fetch p before the release such that the read
// operation is synchronized properly with other thread to
// void data race.
auto p = node->_priority;
node->_state.fetch_or(Node::READY, std::memory_order_release);
// caller is a worker to this pool - starting at v3.5 we do not use
// any complicated notification mechanism as the experimental result
// has shown no significant advantage.
if(worker._executor == this) {
worker._wsq.push(node, p);
#ifdef __cpp_lib_atomic_wait
// we load the state first as load is much faster than fetch_add
if((_state.load(std::memory_order_acquire) & _NUM_WAITERS_MASK) != 0) {
_state.fetch_add(_EPOCH_INC, std::memory_order_release);
_state.notify_one();
}
#else
_notifier.notify(false);
#endif
return;
}
{
std::lock_guard<std::mutex> lock(_wsq_mutex);
_wsq.push(node, p);
}
#ifdef __cpp_lib_atomic_wait
// we load the state first as load is much faster than fetch_add
if((_state.load(std::memory_order_acquire) & _NUM_WAITERS_MASK) != 0) {
_state.fetch_add(_EPOCH_INC, std::memory_order_release);
_state.notify_one();
}
#else
_notifier.notify(false);
#endif
}
// Procedure: _schedule
inline void Executor::_schedule(Node* node) {
// We need to fetch p before the release such that the read
// operation is synchronized properly with other thread to
// void data race.
auto p = node->_priority;
node->_state.fetch_or(Node::READY, std::memory_order_release);
{
std::lock_guard<std::mutex> lock(_wsq_mutex);
_wsq.push(node, p);
}
#ifdef __cpp_lib_atomic_wait
// we load the state first as load is much faster than fetch_add
if((_state.load(std::memory_order_acquire) & _NUM_WAITERS_MASK) != 0) {
_state.fetch_add(_EPOCH_INC, std::memory_order_release);
_state.notify_one();
}
#else
_notifier.notify(false);
#endif
}
// Procedure: _schedule
inline void Executor::_schedule(Worker& worker, const SmallVector<Node*>& nodes) {
// We need to cacth the node count to avoid accessing the nodes
// vector while the parent topology is removed!
const auto num_nodes = nodes.size();
if(num_nodes == 0) {
return;
}
// caller is a worker to this pool - starting at v3.5 we do not use
// any complicated notification mechanism as the experimental result
// has shown no significant advantage.
if(worker._executor == this) {
for(size_t i=0; i<num_nodes; ++i) {
// We need to fetch p before the release such that the read
// operation is synchronized properly with other thread to
// void data race.
auto p = nodes[i]->_priority;
nodes[i]->_state.fetch_or(Node::READY, std::memory_order_release);
worker._wsq.push(nodes[i], p);
#ifdef __cpp_lib_atomic_wait
// we load the state first as load is much faster than fetch_add
if((_state.load(std::memory_order_acquire) & _NUM_WAITERS_MASK) != 0) {
_state.fetch_add(_EPOCH_INC, std::memory_order_release);
_state.notify_one();
}
#else
_notifier.notify(false);
#endif
}
return;
}
{
std::lock_guard<std::mutex> lock(_wsq_mutex);
for(size_t k=0; k<num_nodes; ++k) {
auto p = nodes[k]->_priority;
nodes[k]->_state.fetch_or(Node::READY, std::memory_order_release);
_wsq.push(nodes[k], p);
}
}
#ifdef __cpp_lib_atomic_wait
uint64_t num_waiters = (_state.fetch_add(_EPOCH_INC, std::memory_order_release) & _NUM_WAITERS_MASK);
if(num_nodes < num_waiters) {
for(uint64_t k = 0; k < num_waiters - num_nodes; ++k) {
_state.notify_one();
}
}
else {
_state.notify_all();
}
#else
_notifier.notify_n(num_nodes);
#endif
}
// Procedure: _schedule
inline void Executor::_schedule(const SmallVector<Node*>& nodes) {
// parent topology may be removed!
const auto num_nodes = nodes.size();
if(num_nodes == 0) {
return;
}
// We need to fetch p before the release such that the read
// operation is synchronized properly with other thread to
// void data race.
{
std::lock_guard<std::mutex> lock(_wsq_mutex);
for(size_t k=0; k<num_nodes; ++k) {
auto p = nodes[k]->_priority;
nodes[k]->_state.fetch_or(Node::READY, std::memory_order_release);
_wsq.push(nodes[k], p);
}
}
#ifdef __cpp_lib_atomic_wait
uint64_t num_waiters = (_state.fetch_add(_EPOCH_INC, std::memory_order_release) & _NUM_WAITERS_MASK);
if(num_nodes < num_waiters) {
for(uint64_t k = 0; k < num_waiters - num_nodes; ++k) {
_state.notify_one();
}
}
else {
_state.notify_all();
}
#else
_notifier.notify_n(num_nodes);
#endif
}
// Procedure: _invoke
inline void Executor::_invoke(Worker& worker, Node* node) {
// synchronize all outstanding memory operations caused by reordering
while(!(node->_state.load(std::memory_order_acquire) & Node::READY));
begin_invoke:
SmallVector<int> conds;
// no need to do other things if the topology is cancelled
if(node->_is_cancelled()) {
_tear_down_invoke(worker, node);
return;
}
// if acquiring semaphore(s) exists, acquire them first
if(node->_semaphores && !node->_semaphores->to_acquire.empty()) {
SmallVector<Node*> nodes;
if(!node->_acquire_all(nodes)) {
_schedule(worker, nodes);
return;
}
node->_state.fetch_or(Node::ACQUIRED, std::memory_order_release);
}
// condition task
//int cond = -1;
// switch is faster than nested if-else due to jump table
switch(node->_handle.index()) {
// static task
case Node::STATIC:{
_invoke_static_task(worker, node);
}
break;
// subflow task
case Node::SUBFLOW: {
_invoke_subflow_task(worker, node);
}
break;
// condition task
case Node::CONDITION: {
_invoke_condition_task(worker, node, conds);
}
break;
// multi-condition task
case Node::MULTI_CONDITION: {
_invoke_multi_condition_task(worker, node, conds);
}
break;
// module task
case Node::MODULE: {
_invoke_module_task(worker, node);
}
break;
// async task
case Node::ASYNC: {
_invoke_async_task(worker, node);
_tear_down_async(node);
return ;
}
break;
// dependent async task
case Node::DEPENDENT_ASYNC: {
_invoke_dependent_async_task(worker, node);
_tear_down_dependent_async(worker, node);
if(worker._cache) {
node = worker._cache;
goto begin_invoke;
}
return;
}
break;
// monostate (placeholder)
default:
break;
}
//invoke_successors:
// if releasing semaphores exist, release them
if(node->_semaphores && !node->_semaphores->to_release.empty()) {
_schedule(worker, node->_release_all());
}
// Reset the join counter to support the cyclic control flow.
// + We must do this before scheduling the successors to avoid race
// condition on _dependents.
// + We must use fetch_add instead of direct assigning
// because the user-space call on "invoke" may explicitly schedule
// this task again (e.g., pipeline) which can access the join_counter.
if((node->_state.load(std::memory_order_relaxed) & Node::CONDITIONED)) {
node->_join_counter.fetch_add(node->num_strong_dependents(), std::memory_order_relaxed);
}
else {
node->_join_counter.fetch_add(node->num_dependents(), std::memory_order_relaxed);
}
// acquire the parent flow counter
auto& j = (node->_parent) ? node->_parent->_join_counter :
node->_topology->_join_counter;
// Here, we want to cache the latest successor with the highest priority
worker._cache = nullptr;
auto max_p = static_cast<unsigned>(TaskPriority::MAX);
// Invoke the task based on the corresponding type
switch(node->_handle.index()) {
// condition and multi-condition tasks
case Node::CONDITION:
case Node::MULTI_CONDITION: {
for(auto cond : conds) {
if(cond >= 0 && static_cast<size_t>(cond) < node->_successors.size()) {
auto s = node->_successors[cond];
// zeroing the join counter for invariant
s->_join_counter.store(0, std::memory_order_relaxed);
j.fetch_add(1, std::memory_order_relaxed);
if(s->_priority <= max_p) {
if(worker._cache) {
_schedule(worker, worker._cache);
}
worker._cache = s;
max_p = s->_priority;
}
else {
_schedule(worker, s);
}
}
}
}
break;
// non-condition task
default: {
for(size_t i=0; i<node->_successors.size(); ++i) {
//if(auto s = node->_successors[i]; --(s->_join_counter) == 0) {
if(auto s = node->_successors[i];
s->_join_counter.fetch_sub(1, std::memory_order_acq_rel) == 1) {
j.fetch_add(1, std::memory_order_relaxed);
if(s->_priority <= max_p) {
if(worker._cache) {
_schedule(worker, worker._cache);
}
worker._cache = s;
max_p = s->_priority;
}
else {
_schedule(worker, s);
}
}
}
}
break;
}
// tear_down the invoke
_tear_down_invoke(worker, node);
// perform tail recursion elimination for the right-most child to reduce
// the number of expensive pop/push operations through the task queue
if(worker._cache) {
node = worker._cache;
//node->_state.fetch_or(Node::READY, std::memory_order_release);
goto begin_invoke;
}
}
// Procedure: _tear_down_invoke
inline void Executor::_tear_down_invoke(Worker& worker, Node* node) {
// we must check parent first before subtracting the join counter,
// or it can introduce data race
if(auto parent = node->_parent; parent == nullptr) {
if(node->_topology->_join_counter.fetch_sub(1, std::memory_order_acq_rel) == 1) {
_tear_down_topology(worker, node->_topology);
}
}
// Here we asssume the parent is in a busy loop (e.g., corun) waiting for
// its join counter to become 0.
else {
//parent->_join_counter.fetch_sub(1, std::memory_order_acq_rel);
parent->_join_counter.fetch_sub(1, std::memory_order_release);
}
//// module task
//else {
// auto id = parent->_handle.index();
// if(parent->_join_counter.fetch_sub(1, std::memory_order_acq_rel) == 1) {
// if(id == Node::MODULE) {
// return parent;
// }
// }
//}
//return nullptr;
}
// Procedure: _observer_prologue
inline void Executor::_observer_prologue(Worker& worker, Node* node) {
for(auto& observer : _observers) {
observer->on_entry(WorkerView(worker), TaskView(*node));
}
}
// Procedure: _observer_epilogue
inline void Executor::_observer_epilogue(Worker& worker, Node* node) {
for(auto& observer : _observers) {
observer->on_exit(WorkerView(worker), TaskView(*node));
}
}
// Procedure: _process_exception
inline void Executor::_process_exception(Worker&, Node* node) {
constexpr static auto flag = Topology::EXCEPTION | Topology::CANCELLED;
// if the node has a parent, we store the exception in its parent
if(auto parent = node->_parent; parent) {
if ((parent->_state.fetch_or(Node::EXCEPTION, std::memory_order_relaxed) & Node::EXCEPTION) == 0) {
parent->_exception_ptr = std::current_exception();
}
// TODO if the node has a topology, cancel it to enable early stop
//if(auto tpg = node->_topology; tpg) {
// tpg->_state.fetch_or(Topology::CANCELLED, std::memory_order_relaxed);
//}
}
// multiple tasks may throw, so we only take the first thrown exception
else if(auto tpg = node->_topology; tpg &&
((tpg->_state.fetch_or(flag, std::memory_order_relaxed) & Topology::EXCEPTION) == 0)
) {
tpg->_exception_ptr = std::current_exception();
}
// TODO: skip the exception that is not associated with any taskflows
}
// Procedure: _invoke_static_task
inline void Executor::_invoke_static_task(Worker& worker, Node* node) {
_observer_prologue(worker, node);
TF_EXECUTOR_EXCEPTION_HANDLER(worker, node, {
auto& work = std::get_if<Node::Static>(&node->_handle)->work;
switch(work.index()) {
case 0:
std::get_if<0>(&work)->operator()();
break;
case 1:
Runtime rt(*this, worker, node);
std::get_if<1>(&work)->operator()(rt);
node->_process_exception();
break;
}
});
_observer_epilogue(worker, node);
}
// Procedure: _invoke_subflow_task
inline void Executor::_invoke_subflow_task(Worker& w, Node* node) {
_observer_prologue(w, node);
TF_EXECUTOR_EXCEPTION_HANDLER(w, node, {
auto handle = std::get_if<Node::Subflow>(&node->_handle);
handle->subgraph._clear();
Subflow sf(*this, w, node, handle->subgraph);
handle->work(sf);
if(sf._joinable) {
_corun_graph(w, node, handle->subgraph);
}
node->_process_exception();
});
_observer_epilogue(w, node);
}
// Procedure: _detach_subflow_task
inline void Executor::_detach_subflow_task(Worker& w, Node* p, Graph& g) {
// graph is empty and has no async tasks
if(g.empty() && p->_join_counter.load(std::memory_order_acquire) == 0) {
return;
}
SmallVector<Node*> src;
_set_up_graph(g, nullptr, p->_topology, Node::DETACHED, src);
{
std::lock_guard<std::mutex> lock(p->_topology->_taskflow._mutex);
p->_topology->_taskflow._graph._merge(std::move(g));
}
p->_topology->_join_counter.fetch_add(src.size(), std::memory_order_relaxed);
_schedule(w, src);
}
// Procedure: _corun_graph
inline void Executor::_corun_graph(Worker& w, Node* p, Graph& g) {
// assert(p);
// graph is empty and has no async tasks (subflow)
if(g.empty() && p->_join_counter.load(std::memory_order_acquire) == 0) {
return;
}
SmallVector<Node*> src;
_set_up_graph(g, p, p->_topology, 0, src);
p->_join_counter.fetch_add(src.size(), std::memory_order_relaxed);
_schedule(w, src);
_corun_until(w, [p] () -> bool {
return p->_join_counter.load(std::memory_order_acquire) == 0; }
);
}
// Procedure: _invoke_condition_task
inline void Executor::_invoke_condition_task(
Worker& worker, Node* node, SmallVector<int>& conds
) {
_observer_prologue(worker, node);
TF_EXECUTOR_EXCEPTION_HANDLER(worker, node, {
auto& work = std::get_if<Node::Condition>(&node->_handle)->work;
switch(work.index()) {
case 0:
conds = { std::get_if<0>(&work)->operator()() };
break;
case 1:
Runtime rt(*this, worker, node);
conds = { std::get_if<1>(&work)->operator()(rt) };
node->_process_exception();
break;
}
});
_observer_epilogue(worker, node);
}
// Procedure: _invoke_multi_condition_task
inline void Executor::_invoke_multi_condition_task(
Worker& worker, Node* node, SmallVector<int>& conds
) {
_observer_prologue(worker, node);
TF_EXECUTOR_EXCEPTION_HANDLER(worker, node, {
auto& work = std::get_if<Node::MultiCondition>(&node->_handle)->work;
switch(work.index()) {
case 0:
conds = std::get_if<0>(&work)->operator()();
break;
case 1:
Runtime rt(*this, worker, node);
conds = std::get_if<1>(&work)->operator()(rt);
node->_process_exception();
break;
}
});
_observer_epilogue(worker, node);
}
// Procedure: _invoke_module_task
inline void Executor::_invoke_module_task(Worker& w, Node* node) {
_observer_prologue(w, node);
TF_EXECUTOR_EXCEPTION_HANDLER(w, node, {
_corun_graph(w, node, std::get_if<Node::Module>(&node->_handle)->graph);
node->_process_exception();
});
_observer_epilogue(w, node);
}
//// Function: _invoke_module_task_internal
//inline bool Executor::_invoke_module_task_internal(Worker& w, Node* p) {
//
// // acquire the underlying graph
// auto& g = std::get_if<Node::Module>(&p->_handle)->graph;
//
// // no need to do anything if the graph is empty
// if(g.empty()) {
// return false;
// }
//
// SmallVector<Node*> src;
// _set_up_graph(g, p, p->_topology, 0, src);
// p->_join_counter.fetch_add(src.size(), std::memory_order_relaxed);
//
// _schedule(w, src);
// return true;
//}
// Procedure: _invoke_async_task
inline void Executor::_invoke_async_task(Worker& worker, Node* node) {
_observer_prologue(worker, node);
TF_EXECUTOR_EXCEPTION_HANDLER(worker, node, {
auto& work = std::get_if<Node::Async>(&node->_handle)->work;
switch(work.index()) {
case 0:
std::get_if<0>(&work)->operator()();
break;
case 1:
Runtime rt(*this, worker, node);
std::get_if<1>(&work)->operator()(rt);
break;
}
});
_observer_epilogue(worker, node);
}
// Procedure: _invoke_dependent_async_task
inline void Executor::_invoke_dependent_async_task(Worker& worker, Node* node) {
_observer_prologue(worker, node);
TF_EXECUTOR_EXCEPTION_HANDLER(worker, node, {
auto& work = std::get_if<Node::DependentAsync>(&node->_handle)->work;
switch(work.index()) {
case 0:
std::get_if<0>(&work)->operator()();
break;
case 1:
Runtime rt(*this, worker, node);
std::get_if<1>(&work)->operator()(rt);
break;
}
});
_observer_epilogue(worker, node);
}
// Function: run
inline tf::Future<void> Executor::run(Taskflow& f) {
return run_n(f, 1, [](){});
}
// Function: run
inline tf::Future<void> Executor::run(Taskflow&& f) {
return run_n(std::move(f), 1, [](){});
}
// Function: run
template <typename C>
tf::Future<void> Executor::run(Taskflow& f, C&& c) {
return run_n(f, 1, std::forward<C>(c));
}
// Function: run
template <typename C>
tf::Future<void> Executor::run(Taskflow&& f, C&& c) {
return run_n(std::move(f), 1, std::forward<C>(c));
}
// Function: run_n
inline tf::Future<void> Executor::run_n(Taskflow& f, size_t repeat) {
return run_n(f, repeat, [](){});
}
// Function: run_n
inline tf::Future<void> Executor::run_n(Taskflow&& f, size_t repeat) {
return run_n(std::move(f), repeat, [](){});
}
// Function: run_n
template <typename C>
tf::Future<void> Executor::run_n(Taskflow& f, size_t repeat, C&& c) {
return run_until(
f, [repeat]() mutable { return repeat-- == 0; }, std::forward<C>(c)
);
}
// Function: run_n
template <typename C>
tf::Future<void> Executor::run_n(Taskflow&& f, size_t repeat, C&& c) {
return run_until(
std::move(f), [repeat]() mutable { return repeat-- == 0; }, std::forward<C>(c)
);
}
// Function: run_until
template<typename P>
tf::Future<void> Executor::run_until(Taskflow& f, P&& pred) {
return run_until(f, std::forward<P>(pred), [](){});
}
// Function: run_until
template<typename P>
tf::Future<void> Executor::run_until(Taskflow&& f, P&& pred) {
return run_until(std::move(f), std::forward<P>(pred), [](){});
}
// Function: run_until
template <typename P, typename C>
tf::Future<void> Executor::run_until(Taskflow& f, P&& p, C&& c) {
_increment_topology();
// Need to check the empty under the lock since subflow task may
// define detached blocks that modify the taskflow at the same time
bool empty;
{
std::lock_guard<std::mutex> lock(f._mutex);
empty = f.empty();
}
// No need to create a real topology but returns an dummy future
if(empty || p()) {
c();
std::promise<void> promise;
promise.set_value();
_decrement_topology();
return tf::Future<void>(promise.get_future());
}
// create a topology for this run
auto t = std::make_shared<Topology>(f, std::forward<P>(p), std::forward<C>(c));
// need to create future before the topology got torn down quickly
tf::Future<void> future(t->_promise.get_future(), t);
// modifying topology needs to be protected under the lock
{
std::lock_guard<std::mutex> lock(f._mutex);
f._topologies.push(t);
if(f._topologies.size() == 1) {
_set_up_topology(_this_worker(), t.get());
}
}
return future;
}
// Function: run_until
template <typename P, typename C>
tf::Future<void> Executor::run_until(Taskflow&& f, P&& pred, C&& c) {
std::list<Taskflow>::iterator itr;
{
std::scoped_lock<std::mutex> lock(_taskflows_mutex);
itr = _taskflows.emplace(_taskflows.end(), std::move(f));
itr->_satellite = itr;
}
return run_until(*itr, std::forward<P>(pred), std::forward<C>(c));
}
// Function: corun
template <typename T>
void Executor::corun(T& target) {
auto w = _this_worker();
if(w == nullptr) {
TF_THROW("corun must be called by a worker of the executor");
}
Node parent; // auxiliary parent
_corun_graph(*w, &parent, target.graph());
parent._process_exception();
}
// Function: corun_until
template <typename P>
void Executor::corun_until(P&& predicate) {
auto w = _this_worker();
if(w == nullptr) {
TF_THROW("corun_until must be called by a worker of the executor");
}
_corun_until(*w, std::forward<P>(predicate));
// TODO: exception?
}
// Procedure: _increment_topology
inline void Executor::_increment_topology() {
#ifdef __cpp_lib_atomic_wait
_num_topologies.fetch_add(1, std::memory_order_relaxed);
#else
std::lock_guard<std::mutex> lock(_topology_mutex);
++_num_topologies;
#endif
}
// Procedure: _decrement_topology
inline void Executor::_decrement_topology() {
#ifdef __cpp_lib_atomic_wait
if(_num_topologies.fetch_sub(1, std::memory_order_acq_rel) == 1) {
_num_topologies.notify_all();
}
#else
std::lock_guard<std::mutex> lock(_topology_mutex);
if(--_num_topologies == 0) {
_topology_cv.notify_all();
}
#endif
}
// Procedure: wait_for_all
inline void Executor::wait_for_all() {
#ifdef __cpp_lib_atomic_wait
size_t n = _num_topologies.load(std::memory_order_acquire);
while(n != 0) {
_num_topologies.wait(n, std::memory_order_acquire);
n = _num_topologies.load(std::memory_order_acquire);
}
#else
std::unique_lock<std::mutex> lock(_topology_mutex);
_topology_cv.wait(lock, [&](){ return _num_topologies == 0; });
#endif
}
// Function: _set_up_topology
inline void Executor::_set_up_topology(Worker* worker, Topology* tpg) {
// ---- under taskflow lock ----
tpg->_sources.clear();
tpg->_taskflow._graph._clear_detached();
_set_up_graph(tpg->_taskflow._graph, nullptr, tpg, 0, tpg->_sources);
tpg->_join_counter.store(tpg->_sources.size(), std::memory_order_relaxed);
if(worker) {
_schedule(*worker, tpg->_sources);
}
else {
_schedule(tpg->_sources);
}
}
// Function: _set_up_graph
inline void Executor::_set_up_graph(
Graph& g, Node* parent, Topology* tpg, int state, SmallVector<Node*>& src
) {
for(auto node : g._nodes) {
node->_topology = tpg;
node->_parent = parent;
node->_state.store(state, std::memory_order_relaxed);
if(node->num_dependents() == 0) {
src.push_back(node);
}
node->_set_up_join_counter();
node->_exception_ptr = nullptr;
}
}
// Function: _tear_down_topology
inline void Executor::_tear_down_topology(Worker& worker, Topology* tpg) {
auto &f = tpg->_taskflow;
//assert(&tpg == &(f._topologies.front()));
// case 1: we still need to run the topology again
if(!tpg->_exception_ptr && !tpg->cancelled() && !tpg->_pred()) {
//assert(tpg->_join_counter == 0);
std::lock_guard<std::mutex> lock(f._mutex);
tpg->_join_counter.store(tpg->_sources.size(), std::memory_order_relaxed);
_schedule(worker, tpg->_sources);
}
// case 2: the final run of this topology
else {
// TODO: if the topology is cancelled, need to release all semaphores
if(tpg->_call != nullptr) {
tpg->_call();
}
// If there is another run (interleave between lock)
if(std::unique_lock<std::mutex> lock(f._mutex); f._topologies.size()>1) {
//assert(tpg->_join_counter == 0);
// Set the promise
tpg->_promise.set_value();
f._topologies.pop();
tpg = f._topologies.front().get();
// decrement the topology but since this is not the last we don't notify
_decrement_topology();
// set up topology needs to be under the lock or it can
// introduce memory order error with pop
_set_up_topology(&worker, tpg);
}
else {
//assert(f._topologies.size() == 1);
auto fetched_tpg {std::move(f._topologies.front())};
f._topologies.pop();
auto satellite {f._satellite};
lock.unlock();
// Soon after we carry out the promise, there is no longer any guarantee
// for the lifetime of the associated taskflow.
fetched_tpg->_carry_out_promise();
_decrement_topology();
// remove the taskflow if it is managed by the executor
// TODO: in the future, we may need to synchronize on wait
// (which means the following code should the moved before set_value)
if(satellite) {
std::scoped_lock<std::mutex> satellite_lock(_taskflows_mutex);
_taskflows.erase(*satellite);
}
}
}
}
// ############################################################################
// Forward Declaration: Subflow
// ############################################################################
inline void Subflow::join() {
// assert(this_worker().worker == &_worker);
if(!_joinable) {
TF_THROW("subflow not joinable");
}
// only the parent worker can join the subflow
_executor._corun_graph(_worker, _parent, _graph);
// if any exception is caught from subflow tasks, rethrow it
_parent->_process_exception();
_joinable = false;
}
inline void Subflow::detach() {
// assert(this_worker().worker == &_worker);
if(!_joinable) {
TF_THROW("subflow already joined or detached");
}
// only the parent worker can detach the subflow
_executor._detach_subflow_task(_worker, _parent, _graph);
_joinable = false;
}
// ############################################################################
// Forward Declaration: Runtime
// ############################################################################
// Procedure: schedule
inline void Runtime::schedule(Task task) {
auto node = task._node;
// need to keep the invariant: when scheduling a task, the task must have
// zero dependency (join counter is 0)
// or we can encounter bug when inserting a nested flow (e.g., module task)
node->_join_counter.store(0, std::memory_order_relaxed);
auto& j = node->_parent ? node->_parent->_join_counter :
node->_topology->_join_counter;
j.fetch_add(1, std::memory_order_relaxed);
_executor._schedule(_worker, node);
}
// Procedure: corun
template <typename T>
void Runtime::corun(T&& target) {
_executor._corun_graph(_worker, _parent, target.graph());
_parent->_process_exception();
}
// Procedure: corun_until
template <typename P>
void Runtime::corun_until(P&& predicate) {
_executor._corun_until(_worker, std::forward<P>(predicate));
// TODO: exception?
}
// Function: corun_all
inline void Runtime::corun_all() {
_executor._corun_until(_worker, [this] () -> bool {
return _parent->_join_counter.load(std::memory_order_acquire) == 0;
});
_parent->_process_exception();
}
// Destructor
inline Runtime::~Runtime() {
_executor._corun_until(_worker, [this] () -> bool {
return _parent->_join_counter.load(std::memory_order_acquire) == 0;
});
}
// ------------------------------------
// Runtime::silent_async series
// ------------------------------------
// Function: _silent_async
template <typename P, typename F>
void Runtime::_silent_async(Worker& w, P&& params, F&& f) {
_parent->_join_counter.fetch_add(1, std::memory_order_relaxed);
auto node = node_pool.animate(
std::forward<P>(params), _parent->_topology, _parent, 0,
std::in_place_type_t<Node::Async>{}, std::forward<F>(f)
);
_executor._schedule(w, node);
}
// Function: silent_async
template <typename F>
void Runtime::silent_async(F&& f) {
_silent_async(*_executor._this_worker(), DefaultTaskParams{}, std::forward<F>(f));
}
// Function: silent_async
template <typename P, typename F>
void Runtime::silent_async(P&& params, F&& f) {
_silent_async(*_executor._this_worker(), std::forward<P>(params), std::forward<F>(f));
}
// Function: silent_async_unchecked
template <typename F>
void Runtime::silent_async_unchecked(F&& f) {
_silent_async(_worker, DefaultTaskParams{}, std::forward<F>(f));
}
// Function: silent_async_unchecked
template <typename P, typename F>
void Runtime::silent_async_unchecked(P&& params, F&& f) {
_silent_async(_worker, std::forward<P>(params), std::forward<F>(f));
}
// ------------------------------------
// Runtime::async series
// ------------------------------------
// Function: _async
template <typename P, typename F>
auto Runtime::_async(Worker& w, P&& params, F&& f) {
_parent->_join_counter.fetch_add(1, std::memory_order_relaxed);
using R = std::invoke_result_t<std::decay_t<F>>;
std::packaged_task<R()> p(std::forward<F>(f));
auto fu{p.get_future()};
auto node = node_pool.animate(
std::forward<P>(params), _parent->_topology, _parent, 0,
std::in_place_type_t<Node::Async>{},
[p=make_moc(std::move(p))] () mutable { p.object(); }
);
_executor._schedule(w, node);
return fu;
}
// Function: async
template <typename F>
auto Runtime::async(F&& f) {
return _async(*_executor._this_worker(), DefaultTaskParams{}, std::forward<F>(f));
}
// Function: async
template <typename P, typename F>
auto Runtime::async(P&& params, F&& f) {
return _async(*_executor._this_worker(), std::forward<P>(params), std::forward<F>(f));
}
} // end of namespace tf -----------------------------------------------------