/* -*- Mode: C++; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */ /*! * \file buffer.hpp * \ingroup building_blocks * * \brief This file contains the definition of the bounded \p SPSC channel * buffer used in FastFlow * * Single-Writer Single-Reader circular buffer. * No lock is needed around pop and push methods. * Wait-free and fence-free (in the TSO model). * * A single NULL value is used to indicate buffer full and * buffer empty conditions. * * More details about the SWSR_Ptr_Buffer implementation * can be found in: * * Massimo Torquati, "Single-Producer/Single-Consumer Queue on Shared Cache * Multi-Core Systems", TR-10-20, Computer Science Department, University * of Pisa Italy,2010 * ( http://compass2.di.unipi.it/TR/Files/TR-10-20.pdf.gz ) * * M. Aldinucci, M. Danelutto, P. Kilpatrick, M. Meneghin, and M. Torquati, * "An Efficient Unbounded Lock-Free Queue for Multi-core Systems," * in Proc. of 18th Intl. Euro-Par 2012 Parallel Processing, Rhodes Island, * Greece, 2012, pp. 662-673. doi:10.1007/978-3-642-32820-6_65 */ /* *************************************************************************** * * FastFlow is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License version 3 as * published by the Free Software Foundation. * Starting from version 3.0.1 FastFlow is dual licensed under the GNU LGPLv3 * or MIT License (https://github.com/ParaGroup/WindFlow/blob/vers3.x/LICENSE.MIT) * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public * License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software Foundation, * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * **************************************************************************** */ /* Author: Massimo Torquati * */ #ifndef FF_SWSR_PTR_BUFFER_HPP #define FF_SWSR_PTR_BUFFER_HPP #include #include //#include //#include #include #include #if defined(__APPLE__) #include #endif #include namespace ff { // 64 bytes is the common size of a cache line static const int longxCacheLine = (CACHE_LINE_SIZE/sizeof(long)); /*! * \class SWSR_Ptr_Buffer * \ingroup building_blocks * * \brief SPSC bound channel (Single-Writer/Single-Reader) * * This class describes the SWSR circular buffer, used in FastFlow to * implement a lock-free (wait-free) bounded FIFO queue. No lock is needed * around pop and push methods. * * A single NULL value is used to indicate buffer full and buffer empty * conditions. * * This class is defined in \ref buffer.hpp * */ class SWSR_Ptr_Buffer { /** * experimentally we found that a good value is between * 2 and 6 cache lines (16 to 48 entries respectively) */ enum {MULTIPUSH_BUFFER_SIZE=16}; private: // Padding is required to avoid false-sharing between // core's private cache #if defined(NO_VOLATILE_POINTERS) unsigned long pread; long padding1[longxCacheLine-1]; unsigned long pwrite; long padding2[longxCacheLine-1]; #else ALIGN_TO_PRE(CACHE_LINE_SIZE) volatile unsigned long pread; ALIGN_TO_POST(CACHE_LINE_SIZE) ALIGN_TO_PRE(CACHE_LINE_SIZE) volatile unsigned long pwrite; ALIGN_TO_POST(CACHE_LINE_SIZE) #endif size_t size; void ** buf; #if defined(SWSR_MULTIPUSH) /* massimot: experimental code (see multipush) * */ long padding3[longxCacheLine-2]; // local multipush buffer used by the mpush method void * multipush_buf[MULTIPUSH_BUFFER_SIZE]; int mcnt; #endif public: /* pointer to member function for the push method */ bool (SWSR_Ptr_Buffer::*pushPMF)(void * const); /* pointer to member function for the ppop method */ bool (SWSR_Ptr_Buffer::*popPMF)(void **); public: /** * Constructor. * * \param n the size of the buffer */ SWSR_Ptr_Buffer(unsigned long n, const bool=true): pread(0),pwrite(0),size(n),buf(0) { pushPMF=&SWSR_Ptr_Buffer::push; popPMF =&SWSR_Ptr_Buffer::pop; // Avoid unused private field warning on padding1, padding2 //(void)padding1; //(void)padding2; } /** * Default destructor */ ~SWSR_Ptr_Buffer() { // freeAlignedMemory is a function defined in 'sysdep.h' freeAlignedMemory(buf); } /** * It initialise the buffer. Allocate space (\p size) of possibly aligned * memory and reset the pointers (read pointer and write pointer) by * placing them at the beginning of the buffer. * * \return TODO */ bool init(const bool startatlineend=false) { if (buf || (size==0)) return false; #if defined(SWSR_MULTIPUSH) if (size= size) ? (1-size): 1); // circular buffer return true; } return false; } /** * The multipush method, which pushes a batch of elements (array) in the * queue. NOTE: len should be a multiple of longxCacheLine/sizeof(void*) * */ inline bool multipush(void * const data[], int len) { if ((unsigned)len>=size) return false; unsigned long last = pwrite + ((pwrite+ --len >= size) ? (len-size): len); unsigned long r = len-(last+1), l=last; unsigned long i; if (buf[last]==NULL) { if (last < pwrite) { for(i=len;i>r;--i,--l) buf[l] = data[i]; for(i=(size-1);i>=pwrite;--i,--r) buf[i] = data[r]; } else for(int i=len;i>=0;--i) buf[pwrite+i] = data[i]; WMB(); pwrite = pwrite + ((last+1 >= size) ? 0 : (last+1)); #if defined(SWSR_MULTIPUSH) mcnt = 0; // reset mpush counter #endif return true; } return false; } #if defined(SWSR_MULTIPUSH) // massimot: experimental code /** * This method provides the same interface of the \p push method, but it * allows to provide a batch of items to * the consumer, thus ensuring better cache locality and * lowering the cache trashing. * * \param data Element to be pushed in the buffer */ inline bool mpush(void * const data) { assert(data); if (mcnt==MULTIPUSH_BUFFER_SIZE) return multipush(multipush_buf,MULTIPUSH_BUFFER_SIZE); multipush_buf[mcnt++]=data; if (mcnt==MULTIPUSH_BUFFER_SIZE) return multipush(multipush_buf,MULTIPUSH_BUFFER_SIZE); return true; } /* REW -- ? */ inline bool flush() { return (mcnt ? multipush(multipush_buf,mcnt) : true); } #endif /* SWSR_MULTIPUSH */ /** * It is like pop but doesn't copy any data. * * \return \p true is alway returned. */ inline bool inc() { buf[pread]=NULL; pread = pread + ((pread+1 >= size) ? (1-size): 1); // circular buffer return true; } /** * Pop method: get the next value from the FIFO buffer. * * \param data Pointer to the location where to store the * data popped from the buffer. */ inline bool pop(void ** data) { /* modify only pread pointer */ if (empty()) return false; *data = buf[pread]; //std::atomic_thread_fence(std::memory_order_acquire); return inc(); } /** * It returns the "head" of the buffer, i.e. the element pointed by the read * pointer (it is a FIFO queue, so \p push on the tail and \p pop from the * head). * * \return The head of the buffer. */ inline void * top() const { return buf[pread]; } /** * Reset the buffer and move \p read and \p write pointers to the beginning * of the buffer (i.e. position 0). Also, the entire buffer is cleaned and * set to 0 */ inline void reset(const bool startatlineend=false) { if (startatlineend) { /** * This is a good starting point if the multipush method will be * used in order to reduce cache trashing. */ pwrite = longxCacheLine-1; pread = longxCacheLine-1; } else { pread=0; pwrite=0; } #if defined(SWSR_MULTIPUSH) mcnt = 0; #endif if (size<=512) for(unsigned long i=0;i0) return (unsigned long)len; if (len<0) return (unsigned long)(size+len); if (buf[tpwrite]==NULL) return 0; return size; } // Not yet implemented inline bool mp_push(void *const) { abort(); return false; } // Not yet implemented inline bool mc_pop(void **) { abort(); return false; } inline bool isFixedSize() const { return true; } }; /*! * \class Lamport_Buffer. * \ingroup aux_classes * * \brief Implementation of the well-known Lamport's wait-free circular * buffer. Not currently used. * * */ class Lamport_Buffer { private: // Padding is required to avoid false-sharing between // core's private cache volatile unsigned long pread; long padding1[longxCacheLine-1]; volatile unsigned long pwrite; long padding2[longxCacheLine-1]; const size_t size; void ** buf; public: /** * Constructor */ Lamport_Buffer(unsigned long n, const bool=true): pread(0),pwrite(0),size(n),buf(0) { // Avoid unused private field warning on padding1, padding2 (void)padding1; (void)padding2; } /** * Destructor */ ~Lamport_Buffer() { freeAlignedMemory(buf); } /** * It initialize the circular buffer. * * \return If successful \p true is returned, otherwise \p false is * returned. */ bool init() { assert(buf==0); buf=(void**)getAlignedMemory(longxCacheLine*sizeof(long),size*sizeof(void*)); if (!buf) return false; reset(); return true; } /** * It return true if the buffer is empty */ inline bool empty() { return (pwrite == pread); } /** * It return true if there is at least one room in the buffer */ inline bool available() { const unsigned long next = pwrite + ((pwrite+1>=size)?(1-size):1); return (next != pread); } /** * TODO */ inline size_t buffersize() const { return size; }; /** * TODO */ inline bool push(void * const data) { assert(data); const unsigned long next = pwrite + ((pwrite+1>=size)?(1-size):1); if (next != pread) { buf[pwrite] = data; /* We have to ensure that all writes have been committed * in memory before we change the value of the pwrite * reference otherwise the reader can read stale data. */ WMB(); pwrite =next; return true; } return false; } /** * TODO */ inline bool pop(void ** data) { assert(data); if (empty()) return false; *data = buf[pread]; pread = pread + ((pread+1 >= size) ? (1-size): 1); return true; } /** * TODO */ inline void reset() { pread=0; pwrite=0; if (size<=512) for(unsigned long i=0;i=0) return len; //return size+len; long tpread=pread, tpwrite=pwrite; long len = tpwrite-tpread; if (len>0) return (unsigned long)len; if (len<0) return (unsigned long)(size+len); if (buf[tpwrite]==NULL) return 0; return size; } }; /*! * @} * \endlink */ } // namespace ff #endif /* FF_SWSR_PTR_BUFFER_HPP */