mesytec-mnode/external/taskflow-3.8.0/taskflow/core/tsq.hpp

490 lines
12 KiB
C++
Raw Normal View History

2025-01-04 01:25:05 +01:00
#pragma once
#include "../utility/macros.hpp"
#include "../utility/traits.hpp"
/**
@file tsq.hpp
@brief task queue include file
*/
namespace tf {
// ----------------------------------------------------------------------------
// Task Queue
// ----------------------------------------------------------------------------
/**
@class: UnboundedTaskQueue
@tparam T data type (must be a pointer type)
@brief class to create a lock-free unbounded single-producer multiple-consumer queue
This class implements the work-stealing queue described in the paper,
<a href="https://www.di.ens.fr/~zappa/readings/ppopp13.pdf">Correct and Efficient Work-Stealing for Weak Memory Models</a>.
Only the queue owner can perform pop and push operations,
while others can steal data from the queue simultaneously.
*/
template <typename T>
class UnboundedTaskQueue {
static_assert(std::is_pointer_v<T>, "T must be a pointer type");
struct Array {
int64_t C;
int64_t M;
std::atomic<T>* S;
explicit Array(int64_t c) :
C {c},
M {c-1},
S {new std::atomic<T>[static_cast<size_t>(C)]} {
}
~Array() {
delete [] S;
}
int64_t capacity() const noexcept {
return C;
}
void push(int64_t i, T o) noexcept {
S[i & M].store(o, std::memory_order_relaxed);
}
T pop(int64_t i) noexcept {
return S[i & M].load(std::memory_order_relaxed);
}
Array* resize(int64_t b, int64_t t) {
Array* ptr = new Array {2*C};
for(int64_t i=t; i!=b; ++i) {
ptr->push(i, pop(i));
}
return ptr;
}
};
// Doubling the alignment by 2 seems to generate the most
// decent performance.
alignas(2*TF_CACHELINE_SIZE) std::atomic<int64_t> _top;
alignas(2*TF_CACHELINE_SIZE) std::atomic<int64_t> _bottom;
std::atomic<Array*> _array;
std::vector<Array*> _garbage;
public:
/**
@brief constructs the queue with the given size in the base-2 logarithm
@param LogSize the base-2 logarithm of the queue size
*/
explicit UnboundedTaskQueue(int64_t LogSize = TF_DEFAULT_UNBOUNDED_TASK_QUEUE_LOG_SIZE);
/**
@brief destructs the queue
*/
~UnboundedTaskQueue();
/**
@brief queries if the queue is empty at the time of this call
*/
bool empty() const noexcept;
/**
@brief queries the number of items at the time of this call
*/
size_t size() const noexcept;
/**
@brief queries the capacity of the queue
*/
int64_t capacity() const noexcept;
/**
@brief inserts an item to the queue
@param item the item to push to the queue
Only the owner thread can insert an item to the queue.
The operation can trigger the queue to resize its capacity
if more space is required.
*/
void push(T item);
/**
@brief pops out an item from the queue
Only the owner thread can pop out an item from the queue.
The return can be a @c nullptr if this operation failed (empty queue).
*/
T pop();
/**
@brief steals an item from the queue
Any threads can try to steal an item from the queue.
The return can be a @c nullptr if this operation failed (not necessary empty).
*/
T steal();
private:
Array* resize_array(Array* a, int64_t b, int64_t t);
};
// Constructor
template <typename T>
UnboundedTaskQueue<T>::UnboundedTaskQueue(int64_t LogSize) {
_top.store(0, std::memory_order_relaxed);
_bottom.store(0, std::memory_order_relaxed);
_array.store(new Array{(int64_t{1} << LogSize)}, std::memory_order_relaxed);
_garbage.reserve(32);
}
// Destructor
template <typename T>
UnboundedTaskQueue<T>::~UnboundedTaskQueue() {
for(auto a : _garbage) {
delete a;
}
delete _array.load();
}
// Function: empty
template <typename T>
bool UnboundedTaskQueue<T>::empty() const noexcept {
int64_t b = _bottom.load(std::memory_order_relaxed);
int64_t t = _top.load(std::memory_order_relaxed);
return (b <= t);
}
// Function: size
template <typename T>
size_t UnboundedTaskQueue<T>::size() const noexcept {
int64_t b = _bottom.load(std::memory_order_relaxed);
int64_t t = _top.load(std::memory_order_relaxed);
return static_cast<size_t>(b >= t ? b - t : 0);
}
// Function: push
template <typename T>
void UnboundedTaskQueue<T>::push(T o) {
int64_t b = _bottom.load(std::memory_order_relaxed);
int64_t t = _top.load(std::memory_order_acquire);
Array* a = _array.load(std::memory_order_relaxed);
// queue is full
if TF_UNLIKELY(a->capacity() - 1 < (b - t)) {
a = resize_array(a, b, t);
}
a->push(b, o);
std::atomic_thread_fence(std::memory_order_release);
_bottom.store(b + 1, std::memory_order_relaxed);
}
// Function: pop
template <typename T>
T UnboundedTaskQueue<T>::pop() {
int64_t b = _bottom.load(std::memory_order_relaxed) - 1;
Array* a = _array.load(std::memory_order_relaxed);
_bottom.store(b, std::memory_order_relaxed);
std::atomic_thread_fence(std::memory_order_seq_cst);
int64_t t = _top.load(std::memory_order_relaxed);
T item {nullptr};
if(t <= b) {
item = a->pop(b);
if(t == b) {
// the last item just got stolen
if(!_top.compare_exchange_strong(t, t+1,
std::memory_order_seq_cst,
std::memory_order_relaxed)) {
item = nullptr;
}
_bottom.store(b + 1, std::memory_order_relaxed);
}
}
else {
_bottom.store(b + 1, std::memory_order_relaxed);
}
return item;
}
// Function: steal
template <typename T>
T UnboundedTaskQueue<T>::steal() {
int64_t t = _top.load(std::memory_order_acquire);
std::atomic_thread_fence(std::memory_order_seq_cst);
int64_t b = _bottom.load(std::memory_order_acquire);
T item {nullptr};
if(t < b) {
Array* a = _array.load(std::memory_order_consume);
item = a->pop(t);
if(!_top.compare_exchange_strong(t, t+1,
std::memory_order_seq_cst,
std::memory_order_relaxed)) {
return nullptr;
}
}
return item;
}
// Function: capacity
template <typename T>
int64_t UnboundedTaskQueue<T>::capacity() const noexcept {
return _array.load(std::memory_order_relaxed)->capacity();
}
template <typename T>
typename UnboundedTaskQueue<T>::Array*
UnboundedTaskQueue<T>::resize_array(Array* a, int64_t b, int64_t t) {
Array* tmp = a->resize(b, t);
_garbage.push_back(a);
std::swap(a, tmp);
_array.store(a, std::memory_order_release);
// Note: the original paper using relaxed causes t-san to complain
//_array.store(a, std::memory_order_relaxed);
return a;
}
// ----------------------------------------------------------------------------
// BoundedTaskQueue
// ----------------------------------------------------------------------------
/**
@class: BoundedTaskQueue
@tparam T data type
@tparam LogSize the base-2 logarithm of the queue size
@brief class to create a lock-free bounded single-producer multiple-consumer queue
This class implements the work-stealing queue described in the paper,
"Correct and Efficient Work-Stealing for Weak Memory Models,"
available at https://www.di.ens.fr/~zappa/readings/ppopp13.pdf.
Only the queue owner can perform pop and push operations,
while others can steal data from the queue.
*/
template <typename T, size_t LogSize = TF_DEFAULT_BOUNDED_TASK_QUEUE_LOG_SIZE>
class BoundedTaskQueue {
static_assert(std::is_pointer_v<T>, "T must be a pointer type");
constexpr static int64_t BufferSize = int64_t{1} << LogSize;
constexpr static int64_t BufferMask = (BufferSize - 1);
static_assert((BufferSize >= 2) && ((BufferSize & (BufferSize - 1)) == 0));
alignas(2*TF_CACHELINE_SIZE) std::atomic<int64_t> _top {0};
alignas(2*TF_CACHELINE_SIZE) std::atomic<int64_t> _bottom {0};
alignas(2*TF_CACHELINE_SIZE) std::atomic<T> _buffer[BufferSize];
public:
/**
@brief constructs the queue with a given capacity
*/
BoundedTaskQueue() = default;
/**
@brief destructs the queue
*/
~BoundedTaskQueue() = default;
/**
@brief queries if the queue is empty at the time of this call
*/
bool empty() const noexcept;
/**
@brief queries the number of items at the time of this call
*/
size_t size() const noexcept;
/**
@brief queries the capacity of the queue
*/
constexpr size_t capacity() const;
/**
@brief tries to insert an item to the queue
@tparam O data type
@param item the item to perfect-forward to the queue
@return `true` if the insertion succeed or `false` (queue is full)
Only the owner thread can insert an item to the queue.
*/
template <typename O>
bool try_push(O&& item);
/**
@brief tries to insert an item to the queue or invoke the callable if fails
@tparam O data type
@tparam C callable type
@param item the item to perfect-forward to the queue
@param on_full callable to invoke when the queue is faull (insertion fails)
Only the owner thread can insert an item to the queue.
*/
template <typename O, typename C>
void push(O&& item, C&& on_full);
/**
@brief pops out an item from the queue
Only the owner thread can pop out an item from the queue.
The return can be a @std_nullopt if this operation failed (empty queue).
*/
T pop();
/**
@brief steals an item from the queue
Any threads can try to steal an item from the queue.
The return can be a @std_nullopt if this operation failed (not necessary empty).
*/
T steal();
};
// Function: empty
template <typename T, size_t LogSize>
bool BoundedTaskQueue<T, LogSize>::empty() const noexcept {
int64_t b = _bottom.load(std::memory_order_relaxed);
int64_t t = _top.load(std::memory_order_relaxed);
return b <= t;
}
// Function: size
template <typename T, size_t LogSize>
size_t BoundedTaskQueue<T, LogSize>::size() const noexcept {
int64_t b = _bottom.load(std::memory_order_relaxed);
int64_t t = _top.load(std::memory_order_relaxed);
return static_cast<size_t>(b >= t ? b - t : 0);
}
// Function: try_push
template <typename T, size_t LogSize>
template <typename O>
bool BoundedTaskQueue<T, LogSize>::try_push(O&& o) {
int64_t b = _bottom.load(std::memory_order_relaxed);
int64_t t = _top.load(std::memory_order_acquire);
// queue is full
if TF_UNLIKELY((b - t) >= BufferSize - 1) {
return false;
}
_buffer[b & BufferMask].store(std::forward<O>(o), std::memory_order_relaxed);
std::atomic_thread_fence(std::memory_order_release);
_bottom.store(b + 1, std::memory_order_relaxed);
return true;
}
// Function: push
template <typename T, size_t LogSize>
template <typename O, typename C>
void BoundedTaskQueue<T, LogSize>::push(O&& o, C&& on_full) {
int64_t b = _bottom.load(std::memory_order_relaxed);
int64_t t = _top.load(std::memory_order_acquire);
// queue is full
if TF_UNLIKELY((b - t) >= BufferSize - 1) {
on_full();
return;
}
_buffer[b & BufferMask].store(std::forward<O>(o), std::memory_order_relaxed);
std::atomic_thread_fence(std::memory_order_release);
_bottom.store(b + 1, std::memory_order_relaxed);
}
// Function: pop
template <typename T, size_t LogSize>
T BoundedTaskQueue<T, LogSize>::pop() {
int64_t b = _bottom.load(std::memory_order_relaxed) - 1;
_bottom.store(b, std::memory_order_relaxed);
std::atomic_thread_fence(std::memory_order_seq_cst);
int64_t t = _top.load(std::memory_order_relaxed);
T item {nullptr};
if(t <= b) {
item = _buffer[b & BufferMask].load(std::memory_order_relaxed);
if(t == b) {
// the last item just got stolen
if(!_top.compare_exchange_strong(t, t+1,
std::memory_order_seq_cst,
std::memory_order_relaxed)) {
item = nullptr;
}
_bottom.store(b + 1, std::memory_order_relaxed);
}
}
else {
_bottom.store(b + 1, std::memory_order_relaxed);
}
return item;
}
// Function: steal
template <typename T, size_t LogSize>
T BoundedTaskQueue<T, LogSize>::steal() {
int64_t t = _top.load(std::memory_order_acquire);
std::atomic_thread_fence(std::memory_order_seq_cst);
int64_t b = _bottom.load(std::memory_order_acquire);
T item{nullptr};
if(t < b) {
item = _buffer[t & BufferMask].load(std::memory_order_relaxed);
if(!_top.compare_exchange_strong(t, t+1,
std::memory_order_seq_cst,
std::memory_order_relaxed)) {
return nullptr;
}
}
return item;
}
// Function: capacity
template <typename T, size_t LogSize>
constexpr size_t BoundedTaskQueue<T, LogSize>::capacity() const {
return static_cast<size_t>(BufferSize - 1);
}
} // end of namespace tf -----------------------------------------------------