mesytec-mnode/external/taskflow-3.8.0/3rd-party/ff/allocator.hpp

1801 lines
66 KiB
C++
Raw Normal View History

2025-01-04 01:25:05 +01:00
/* -*- Mode: C++; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
/*!
* \file allocator.hpp
* \ingroup building_blocks core_patterns
*
* \brief Implementations of the FastFlow's lock-free allocator.
*
* Here we defined the \ref ff::ff_allocator (1) and the ff::FFAllocator (2).
*
* 1. The ff_allocator allocates only large chunks of memory, slicing them up
* into little chunks all with the same size. Only one thread can perform
* malloc operations while any number of threads may perform frees using
* the ff_allocator.
*
* The ff_allocator is based on the idea of Slab Allocator, for more details
* about Slab Allocator please see:
* Bonwick, Jeff. "The Slab Allocator: An Object-Caching Kernel Memory
* Allocator." Boston USENIX Proceedings, 1994.
*
* 2. Based on the ff_allocator, the FFAllocator has been implemented.
* It might be used by any number of threads to dynamically
* allocate/deallocate memory. You have to include allocator.hpp and just use
* <tt> ff::ff_malloc, ff::ff_free(), ff::ff_realloc, ff::ff_posix_memalign </tt>
*
* \note In all the cases it is possible, it is better to use the ff_allocator
* as it allows more control and is more efficient.
*
*/
/* ***************************************************************************
*
* FastFlow is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License version 3 as
* published by the Free Software Foundation.
* Starting from version 3.0.1 FastFlow is dual licensed under the GNU LGPLv3
* or MIT License (https://github.com/ParaGroup/WindFlow/blob/vers3.x/LICENSE.MIT)
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc., 59
* Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* As a special exception, you may use this file as part of a free software
* library without restriction. Specifically, if other files instantiate
* templates or use macros or inline functions from this file, or you compile
* this file and link it with other files to produce an executable, this file
* does not by itself cause the resulting executable to be covered by the GNU
* General Public License. This exception does not however invalidate any
* other reasons why the executable file might be covered by the GNU General
* Public License.
*
* **************************************************************************/
/* Lock-free FastFlow allocator.
*
* Here we defined the ff_allocator (1) and the FFAllocator (2).
*
* 1. The ff_allocator allocates only large chunks of memory, slicing them up
* into little chunks all with the same size. Only one thread can perform
* malloc operations while any number of threads may perform frees using
* the ff_allocator.
*
* The ff_allocator is based on the idea of Slab Allocator, for more details
* about Slab Allocator please see:
* Bonwick, Jeff. "The Slab Allocator: An Object-Caching Kernel Memory
* Allocator." Boston USENIX Proceedings, 1994.
*
* 2. Based on the ff_allocator, the FFAllocator has been implemented.
* It might be used by any number of threads to dynamically
* allocate/deallocate memory. You have to include allocator.hpp and just use
* ff_malloc, ff_free, ff_realloc, ff_posix_memalign.
*
* Note: In all the cases it is possible, it is better to use the ff_allocator
* as it allows more control and is more efficient.
*
*/
/*
*
* Author:
* Massimo Torquati <torquati@di.unipi.it> or <massimotor@gmail.com>
*
* - June 2008 first version
* - March 2011 main rework
* - June 2012 - performance improvement in getfrom_fb
* - statistics cleanup
* - Sept 2015 Marco Aldinucci: porting to c++11
*
*/
#ifndef FF_ALLOCATOR_HPP
#define FF_ALLOCATOR_HPP
#include <assert.h>
#include <algorithm>
#include <ff/platforms/platform.h>
// #if defined(HAVE_ATOMIC_H)
// #include <asm/atomic.h>
// #else
// #include <ff/atomic/atomic.h>
// #endif
#include <atomic>
#if defined(ALLOCATOR_STATS)
#include <iostream>
#endif
//#include <pthread.h>
#include <ff/ubuffer.hpp>
#include <ff/spin-lock.hpp>
#include <ff/svector.hpp>
#include <ff/utils.hpp>
//#define DEBUG_ALLOCATOR 1
#if defined(DEBUG_ALLOCATOR)
#define DBG(X) X
#else
#define DBG(X)
#endif
namespace ff {
/*
* \ingroup building_blocks
*
*
*/
enum { N_SLABBUFFER=9, MAX_SLABBUFFER_SIZE=8192};
static const int POW2_MIN = 5;
static const int POW2_MAX = 13;
// array containing different possbile sizes for a slab buffer
static const int buffersize[N_SLABBUFFER] =
{ 32, 64,128,256,512,1024,2048,4096,8192 };
// array containing the allowed numbers (quantity) of buffers. These values will be
// used together with the array of possible sizes for buffers: they will be paired
// with a specific buffer size and will denote the number of buffers of that size,
// present inside the cache.
static const int nslabs_default[N_SLABBUFFER] =
{ 512,512,512,512,128, 64, 32, 16, 8 };
#if defined(ALLOCATOR_STATS)
struct all_stats {
std::atomic_long nmalloc;
std::atomic_long nfree;
std::atomic_long nrealloc;
std::atomic_long sysmalloc;
std::atomic_long sysfree;
std::atomic_long sysrealloc;
std::atomic_long hit;
std::atomic_long miss;
std::atomic_long leakremoved;
std::atomic_long memallocated;
std::atomic_long memfreed;
std::atomic_long segmalloc;
std::atomic_long segfree;
std::atomic_long newallocator;
std::atomic_long deleteallocator;
all_stats() {
nmalloc.store(0);
nfree.store(0);
nrealloc.store(0);
sysmalloc.store(0);
sysfree.store(0);
sysrealloc.store(0);
hit.store(0);
miss.store(0);
leakremoved.store(0);
memallocated.store(0);
memfreed.store(0);
segmalloc.store(0);
segfree.store(0);
newallocator.store(0);
deleteallocator.store(0);
}
static all_stats *instance() {
static all_stats allstats;
return &allstats;
}
void print(std::ostream & out = std::cout) {
out << "\n--- Allocator Stats ---\n"
<< "malloc = " << nmalloc << "\n"
<< " cache hit = " << hit << "\n"
<< " cache miss = " << miss << "\n"
<< " leakremoved = " << leakremoved << "\n\n"
<< "free = " << nfree << "\n"
<< "realloc = " << nrealloc << "\n\n"
<< "mem. allocated= " << memallocated << "\n"
<< "mem. freed = " << memfreed << "\n\n"
<< "segmalloc = " << segmalloc << "\n"
<< "segfree = " << segfree << "\n\n"
<< "sysmalloc = " << sysmalloc << "\n"
<< "sysfree = " << sysfree << "\n"
<< "sysrealloc = " << sysrealloc << "\n\n";
if (newallocator>0) {
out << "\n ---- How many allocators ----\n"
<< "new allocator = " << newallocator << "\n"
<< "delete allocator = " << deleteallocator << "\n\n";
}
}
};
#define ALLSTATS(X) X
#else
#define ALLSTATS(X)
#endif /* ALLOCATOR_STATS */
static inline bool isPowerOfTwoMultiple(size_t alignment, size_t multiple) {
return alignment && ((alignment & (alignment-multiple))==0);
}
/*
* \class SegmentAllocator
*
* \brief Uses standard \p libc allocator to allocate chunks of (possibly
* aligned) memory. It also keeps track of the amount of allocated memory.
*
*/
class SegmentAllocator {
public:
SegmentAllocator():memory_allocated(0) { }
~SegmentAllocator() {}
/*
* Allocates a new segment of (possibly aligned) memory. \n This method
* uses standard \p malloc to allocate memory (or \p posix_memalign when
* possible). \p malloc should guarantee a sufficiently well aligned memory
* for any purpose, while \p posix_memalign is used to effectively allocate
* memory aligned in a defined way (i.e. the address of the allocated
* memory will be a multiple of a specific value).
*
* \param segment_size the quantity of memory to be allocated.
*
* \returns a pointer to the chunk of memory allocated.
*/
void * newsegment(size_t segment_size) {
//void * ptr = ::malloc(segment_size);
void * ptr=NULL;
// MA: Check if this does not lead to a page swap
#if defined (_SC_PAGESIZE)
ptr = getAlignedMemory(sysconf(_SC_PAGESIZE),segment_size);
#else
ptr = getAlignedMemory(4096,segment_size); // To be fixed for MSC
#endif
if (!ptr) return NULL;
memory_allocated += segment_size; // updt quantity of allocated memory
ALLSTATS(all_stats::instance()->segmalloc.fetch_add(1);
all_stats::instance()->memallocated.fetch_add(segment_size));
#if defined(MAKE_VALGRIND_HAPPY)
memset(ptr,0,segment_size);
#endif
return ptr;
}
/*
* Free \p segment_size amount of memory from the memory chunk pointed by
* \p ptr.\n First, it checks that the total allocated memory is at least
* as big as \p segment_size. Then, it frees memory and updates the counter
* of the total allocated memory.
*
* \param ptr pointer to the memory chunk to be freed
* \param segment_size quantity of memory to free from the chunk
* requested.
*/
void freesegment(void * ptr, size_t segment_size) {
DBG(assert(memory_allocated >=segment_size));
//::free(ptr);
freeAlignedMemory(ptr);
memory_allocated -= segment_size;
ALLSTATS( all_stats::instance()->segfree.fetch_add(1);
all_stats::instance()->memfreed.fetch_add(segment_size) );
}
/*
* \return Returns the amount of allocated memory
*/
size_t getallocated() const { return memory_allocated; }
private:
size_t memory_allocated;
};
// forward declarations
class SlabCache;
class ff_allocator;
/*
* \struct Buf_ctl
*
* \brief A buffer controller
*
* Each slab buffer has a \p Buf_ctl structure at the beginning of the data.
* This structure holds a back-pointer to the controlling slab.
*
*
*/
struct Buf_ctl { SlabCache * ptr; };
/*
* \struct Seg_ctl
*
* \brief The segment controller
*
* Each slab segment has a \p Seg_ctl structure at the beginning of the
* segment, responsible for maintaining the linkage with other slabs in the
* cache, the number of buffers in use and the number of available buffers.
*
*/
struct Seg_ctl {
// number of buffers in use
DBG(size_t refcount;)
// reference to the SlabCache that owns the segment
SlabCache * cacheentry;
// reference to the allocator
ff_allocator * allocator;
// number of items in the buffer freelist (i.e. Buffers available)
//atomic_long_t availbuffers;
size_t availbuffers;
};
/*
* \struct xThreadData
*
* \brief A buffer shared among threads (i.e. leak queue)
*
* A buffer that is used as a channel for exchanging shared data among threads.
* It is built upon FastFlow's unbounded Single-Writer / Single-Reader queues
* (\p uSWSR_Ptr_Buffer). These buffers guarantee deadlock avoidance and
* scalability in stream-oriented applications.
*
*
*/
struct xThreadData {
enum { LEAK_CHUNK=4096 };
xThreadData(const bool /*allocator*/, size_t /*nslabs*/, const pthread_t key)
: leak(0), key(key) {
//leak = (uSWSR_Ptr_Buffer*)::malloc(sizeof(uSWSR_Ptr_Buffer));
leak = (uSWSR_Ptr_Buffer*)getAlignedMemory(128,sizeof(uSWSR_Ptr_Buffer));
if (!leak) abort();
new (leak) uSWSR_Ptr_Buffer(LEAK_CHUNK);
if (!leak->init()) abort();
}
~xThreadData() {
if (leak) { leak->~uSWSR_Ptr_Buffer(); freeAlignedMemory(leak); } // free(leak)
}
uSWSR_Ptr_Buffer * leak; //
const pthread_t key; // used to identify a thread (threadID)
long padding[longxCacheLine-((sizeof(const pthread_t)+sizeof(uSWSR_Ptr_Buffer*))/sizeof(long))]; //
};
/*
* SlabCache
* ----------------- <---
* | size | |
* | cur ptr seg | |
* | availbuffers | |
* | vect of buf-ptr | |
* ----------------- |
* | -------------<--------------------<-----------
* | | | |
* | v | |
* (Slab segment) | -------------------------------------------------
* | | | p | | p |
* |---|- cacheentry | t | data | t | data
* <---------------------------|- allocator | r | | r |
* (ptr to ff_allocator) |- availbuffers| | | |
* -------------------------------------------------
* | Seg_ctl | ^ | | ^ |
* | |
* |--- Buf_ctl |--- Buf_ctl
*
*
* NOTE: the segment overhead is: sizeof(Seg_ctl) + (nbuffer * BUFFER_OVERHEAD)
*
*/
/*
* \class SlabCache
*
* \brief Cache of slab segments.
*
* A SlabCache is meant to keep commonly used object in an initialised state,
* available for use by kernel. Each cache contains blocks of contiguous pages
* in memory (i.e. \a slab segments).
*
* Each slab segment consists of contiguous memory blocks, carved up into
* equal-size chunks, with a reference count to indicate the number of
* allocated chunks.
*
* The SlabCache is used to implement the efficient lock-free allocator used in
* FastFlow (\p ff_allocator), which is based on the
*
* <a
* href="http://www.ibm.com/developerworks/linux/library/l-linux-slab-allocator/"
* target="_blank">Slab allocator</a>
*
* At the beginning of each segment there is a Seg_ctl structure, that is
* responsible for maintaining the linkage with other slabs in the cache, keeps
* track of the number of buffers in use and the number of available buffers.
*
* Each buffer in the segment is managed by a Buf_ctl structure, that holds a
* back-pointer to the controlling slab.
*
*/
class SlabCache {
private:
enum {TICKS_TO_WAIT=500,TICKS_CNT=3};
enum {BUFFER_OVERHEAD=sizeof(Buf_ctl)};
enum {MIN_FB_CAPACITY=32};
inline Seg_ctl * getsegctl(Buf_ctl * buf) {
return *((Seg_ctl **)buf);
}
/*
* This method creates a new slab, that is, it allocates a new segment of
* large and possibly aligned memory.
*
* \returns 0 if the creation of a new Slab succeedes; a negative value
* otherwise.
*/
inline int newslab() {
/*
* Allocate a large chunk of memory. The size of each chunk is equal to
* the size of the slab buffer times the num of slubs in the segment
* plus the segment overhead.
*
* 'alloc' is of type SegmentAllocator
*/
void * ptr = alloc->newsegment( size*nslabs + sizeof(Seg_ctl) +
nslabs * BUFFER_OVERHEAD );
/*
* If the allocation of the new segment succeedes, the pointer to the
* newly allocated segment of memory becomes the new Segment controller
* and all its embedded data are reset.
*/
if (ptr) {
Seg_ctl * seg = (Seg_ctl *)ptr; // Segment controller
DBG(seg->refcount = 0); // number of buffers in use
seg->cacheentry = this; // owner of the segment
seg->allocator = mainalloc; // ref to the ff_allocator
//atomic_long_set(&seg->availbuffers,nslabs);
seg->availbuffers = nslabs;
++seg;
*(Seg_ctl **)seg = (Seg_ctl *)ptr;
buffptr = (Buf_ctl *)seg;
availbuffers += nslabs;
seglist.push_back(ptr); // add new slab into list of seg
return 0;
}
return -1;
}
/*
* Free a slab of memory previously allocated. This method frees the memory
* segment pointed by 'ptr'. The size of the freed memory is exactly the
* same size of the allocated segment: the size of the slab buffer times
* the num of slubs in the segment plus the segment overhead.
*
* By default, the 'seglist_rem'' is set to true. It means that it has to
* remove the given pointer from the list of pointers that point to active
* memory chunks.
*
*/
inline void freeslab(void * ptr, bool seglist_rem=true) {
alloc->freesegment( ptr, size*nslabs + sizeof(Seg_ctl) + nslabs * BUFFER_OVERHEAD );
if (seglist_rem) { /* remove ptr from seglist */
svector<void *>::iterator b(seglist.begin()), e(seglist.end());
for(;b!=e;++b)
if ((*b)==ptr) {
seglist.erase(b);
return;
}
DBG(assert(1==0));
}
}
/*
* This function is called when freeing a slab and the allocator has been
* deregistered. It checks whether the memory occupied by the slab segment
* given as argument can actually be deallocated. And this is possible when
* none of the slab buffers are in use. In this case, the memory occupied
* by the slab segment can be deallocated and the segment removed from the
* list of segments.
*
* \returns true if the operation suceeds; false otherwise.
*/
inline bool checkReclaim(Seg_ctl * seg) {
bool r=false;
spin_lock(lock); // per-cache locking
DBG(assert(seg->availbuffers <= (unsigned)nslabs));
if (++(seg->availbuffers) == (unsigned long)nslabs) {
freeslab(seg); /* Free the given segment of memory (a slab) */
r=true;
}
spin_unlock(lock);
return r;
}
inline void checkReclaimD(Seg_ctl * seg) {
if (++(seg->availbuffers) == (unsigned long)nslabs) {
freeslab(seg); /* Free the given segment of memory (a slab) */
}
}
// REW
// TODO: The following have to be checked if it can use the last queue
// index as in the getfrom_fb() method !!!!!
//
inline void * getfrom_fb_delayed() {
if (fb_size==1) return 0;
for(unsigned i=1;i<fb_size;++i) {
if (fb[i]->leak->length()<(unsigned)delayedReclaim) {
return 0; // cache is empty
}
}
union { Buf_ctl * buf; void * ptr; } b={NULL};
for(unsigned i=2;i<fb_size;++i) {
fb[i]->leak->pop(&b.ptr);
DBG(assert(b.ptr));
checkReclaimD(getsegctl(b.buf));
}
fb[1]->leak->pop(&b.ptr);
ALLSTATS(all_stats::instance()->leakremoved.fetch_add(1));
return b.ptr;
}
inline void * getfrom_fb() {
void * buf = 0;
for(unsigned i=0;i<fb_size;++i) {
unsigned k=(lastqueue+i)%fb_size;
if (fb[k]->leak->pop((void **)&buf)) {
ALLSTATS(all_stats::instance()->leakremoved.fetch_add(1));
lastqueue=k;
return buf;
}
}
// the free buffers are empty
return 0;
}
inline int searchfb(const pthread_t key) {
for(unsigned i=0;i<fb_size;++i)
if (fb[i]->key == key) return (int)i;
return -1;
}
public:
/*
* Default Constructor
*
* \param mainalloc a pointer to the allocator used (must be of type
* \p ff_allocator).
* \param delayedReclaim a flag to... REW
* \param alloc a pointer to a \p SegmentAllocator object, used to obtain large
* (and possibly aligned) chunks of memory from the operating system.
* \param sz the size of each buffer in a segment of the slab cache.
* \param ns number of buffers (slabs) in each segment (note that, as the
* size of each buffer increases, the number of buffers in each segment
* decreases).
*
*/
SlabCache( ff_allocator * const mainalloc, const int delayedReclaim,
SegmentAllocator * const alloc, size_t sz, int ns )
: size(sz), nslabs(ns), fb(0), fb_capacity(0), fb_size(0),
buffptr(0), availbuffers(0), alloc(alloc),
mainalloc(mainalloc), delayedReclaim(delayedReclaim),lastqueue(0) { }
/**
* Destructor
*/
~SlabCache() {
if (!nslabs) return;
svector<void *>::iterator b(seglist.begin()), e(seglist.end());
for(;b!=e;++b) { // just check the list is not empty
freeslab(*b,false); // seglist_rem = false: nothing is removed
}
seglist.clear(); // clear the list
if (fb) {
for(unsigned i=0;i<fb_size;++i)
if (fb[i]) {
fb[i]->~xThreadData();
::free(fb[i]);
}
::free(fb);
}
}
/*
* Initialise a new SlabCache.
*
* This method creates a new slab, that is,
* it allocates space for an unbounded buffer built upon the
* uSWSR_Ptr_Buffer. Then it creates a new segment of large and (possibly)
* aligned memory. The initial size of the buffer - set to a minimum value
* equal to 32 - can be later increased, if necessary.
*
* \param prealloc flag that act as a mask against the creation of a new
* Slab. By default it is set to \p true.
*
* \returns 0 if the creation of a new Slab succeedes and the prealloc mask
* is set to \p true OR if the creation of a new Slab fails and the
* prealloc mask is set to \p false. Gives a negative value otherwise.
*/
int init(bool prealloc=true) {
if (!nslabs) return 0;
nomoremalloc.store(0); // atomic variable set to 0
init_unlocked(lock);
/* Allocate space for leak queue */
fb = (xThreadData**)::malloc(MIN_FB_CAPACITY*sizeof(xThreadData*));
if (!fb) return -1;
fb_capacity = MIN_FB_CAPACITY;
if ( prealloc && (newslab()<0) ) return -1;
return 0;
}
/*
* Register the calling thread into the shared buffer (leak queue).\n
* In case the thread (ID) is already registered in the buffer, it returns a
* pointer to its position in the buffer. If it is not registered, then
* allocates space for a new thread's buffer and initialise the new buffer
* with the key of the new thread. The \p allocator flag is set to false.
*
* \param allocator \p true if the calling thread is the allocator thread;
* \p false by default.
* \returns a pointer to the allocated thread in the buffer.
*/
inline xThreadData * register4free(const bool allocator=false) {
DBG(assert(nslabs>0));
pthread_t key= pthread_self(); // obtain ID of the calling thread
int entry = searchfb(key); // search for threadID in leak queue
if (entry<0) { // if threadID not found
// allocate a new buffer for thread 'key'
xThreadData * xtd = (xThreadData*)::malloc(sizeof(xThreadData));
if (!xtd) return NULL;
new (xtd) xThreadData(allocator, nslabs, key);
/*
* REW
* if the size of the existing leak queue has reached the max
* capacity, allocate more space (::realloc) and update the queue's
* capacity. These operations are thread safe (mutual exclusion).
*/
spin_lock(lock);
if (fb_size==fb_capacity) {
xThreadData ** fb_new = (xThreadData**)::realloc( fb,
(fb_capacity+MIN_FB_CAPACITY) * sizeof(xThreadData*) );
if (!fb_new) {
spin_unlock(lock);
return NULL;
}
fb = fb_new;
fb_capacity += MIN_FB_CAPACITY;
}
// add the new buffer to the leak queue and release lock
fb[entry=(int) fb_size++] = xtd;
spin_unlock(lock);
}
DBG(assert(fb[entry]->key == key));
return fb[entry]; // position of new entry in buffer
}
/*
* Deregister the allocator thread and reclaim memory
*/
inline void deregisterAllocator(const bool reclaim) {
DBG(assert(nslabs>0));
DBG(assert(delayedReclaim==0));
// atomic variable set to 1: allocator has been deregistered
// and prevented from allocating
nomoremalloc.store(1);
// try to reclaim some memory
for(unsigned i=0;i<fb_size;++i) {
DBG(assert(fb[i]));
if (reclaim) {
union { Buf_ctl * buf2; void * ptr; } b={NULL};
while(fb[i]->leak->pop(&b.ptr)) { // while pop succeedes
checkReclaim(getsegctl(b.buf2));
}
}
}
}
/*
* Get an item from a Slab segment.
*
* \returns a pointer to the item obtained
*/
inline void * getitem() {
DBG(assert(nslabs>0));
void * item = 0;
/* try to get one item from the available ones */
if (availbuffers) {
avail:
Seg_ctl * seg = *((Seg_ctl **)buffptr);
DBG(assert(seg));
DBG(++seg->refcount); // incr num buffers in use
--(seg->availbuffers); // decr available buffers
DBG(if (seg->refcount==1) assert(availbuffers==nslabs));
item = (char *)buffptr + BUFFER_OVERHEAD; // set data pointer
if (--availbuffers) {
Seg_ctl ** ctl = (Seg_ctl **)((char *)item + size);
*ctl = seg;
buffptr = (Buf_ctl *)ctl;
} else buffptr = 0;
DBG( if ((getsegctl((Buf_ctl *)((char *)item -
sizeof(Buf_ctl))))->allocator == NULL)
abort() );
return item;
}
// else, try to get a free item from cache
item = delayedReclaim ? getfrom_fb_delayed() : getfrom_fb();
if (item) {
ALLSTATS(all_stats::instance()->hit.fetch_add(1));
DBG(if ((getsegctl((Buf_ctl *)item))->allocator == NULL) abort());
return ((char *)item + BUFFER_OVERHEAD);
}
ALLSTATS(all_stats::instance()->miss.fetch_add(1));
/* if there are not available items try to allocate a new slab */
if (newslab()<0) return NULL;
goto avail;
return NULL; // not reached
}
/*
* Push an item into the slab
*
* \return false if the operation succedes; true if some memory has been
* reclaimed
*/
inline bool putitem(Buf_ctl * buf) {
DBG(assert(buf)); DBG(assert(nslabs>0));
/*
* If nomoremalloc is set to 1, it means the allocator has been
* deregistered and the memory in segment 'seg' could have been
* reclaimed.
*/
if (nomoremalloc.load()) {
// NOTE: if delayedReclaim is set, freeing threads cannot pass here
DBG(assert(delayedReclaim==0));
Seg_ctl * seg = *(Seg_ctl **)buf;
return checkReclaim(seg); // true if some memory can be reclaimed
}
/*
* If nomorealloc is 0,
*/
int entry = searchfb(pthread_self()); // look for calling thread
xThreadData * xtd = NULL;
if (entry<0) xtd = register4free(); // if not present, register it
else xtd = fb[entry]; // else, point to its position
DBG(if (!xtd) abort());
xtd->leak->push((void *)buf); // push the item in the buffer
return false;
}
/// Get the size of the SlabCache
inline size_t getsize() const { return size; }
/// Get the number of slabs in the cache
inline size_t getnslabs() const { return nslabs;}
inline size_t allocatedsize() const {
return (alloc?alloc->getallocated():0);
}
protected:
size_t size; /* size of slab buffer */
size_t nslabs; /* num of buffers in segment */
xThreadData ** fb; /* leak queue */
size_t fb_capacity; /* Min is 32 */
size_t fb_size;
lock_t lock;
Buf_ctl * buffptr;
size_t availbuffers;
private:
std::atomic_long nomoremalloc;
SegmentAllocator * const alloc;
ff_allocator * const mainalloc; // the main allocator
const int delayedReclaim;
unsigned lastqueue;
svector<void *> seglist; // list of pointers to mem segments
};
/*!
* \class ff_allocator
* \ingroup building_blocks
* \brief The ff_allocator, based on the idea of the
* <a href="http://www.ibm.com/developerworks/linux/library/l-linux-slab-allocator/" target="_blank"> Slab allocator</a>
*
* A ff_allocator is owner by a single ff_node, which is the ff_node
* which <tt></tt> it. The owner can
* <tt> malloc/realloc/posix_memalign</tt>. Other ff_node can \p free
* provided they /p register4free. A ff_node can init more than one
* ff_allocator. Every violation cause unexpected results (e.g. segfault).
*
* The ff_allocator works over the SlabCache and is tuned to outperform
* standard allocators' performance in a multi-threaded envirnoment. When it is
* initialised, it creates a (predefined) number of SlabCaches, each one
* containing a (predefined) number of buffers of different sizes, from 32 to
* 8192 bytes. The thread that calls first the allocator object and wants to
* allocate memory has to register itself to the shared leak queue. Only one
* thread can perform this operation. The allocator obtains memory from the
* pre-allocated SlabCache: if there is a slab (i.e. a buffer) big enough to
* contain an object of the requested size, the pointer to that buffer is
* passed to the thread that requested the memory. This latter thread has to
* register as well to the same shared leak queue, so that when it has finished
* its operations, it can return memory to the allocator thread.
*
* \note Very efficient but delicate to use, expert only. Not
* designed to end-user but to build FastFlow pattern runtime
* support. Not expert should use \ref ff::FFAllocator.
*
* \example perf_test_alloc1.cpp
*/
class ff_allocator {
friend class FFAllocator;
public:
/*
* Check if there is a slab big enough to contain an object as big as \p
* size.
*
* Since there is a limited number of possible slab sizes, and these
* predefined dimensions are all powers of 2, it is easy to find which slub
* can contain the object of the given size.
*
* \param size the size of the object that must be contained in a slab.
*
* \return 0 if the object can be contained in the slab of the
* smallest size
* \return the index of the suitable slab size in the array of possible sizes, if
* one exists
* \return -1 if the object is too big to be contained in one of the slabs.
*/
inline int getslabs(size_t size) {
if (!size) return -1;
--size;
size_t e=1, sz=size;
while(sz >>= 1) ++e;
if (e < (size_t)POW2_MIN) return 0;
if (e > (size_t)POW2_MAX) return -1;
/*
{
for(int i=POW2_MAX-POW2_MIN+1;i<N_SLABBUFFER;++i)
if ((size_t)buffersize[i]>=size) return i;
}
*/
return (int) e - POW2_MIN;
}
private:
inline Seg_ctl * getsegctl(Buf_ctl * buf) {
return *((Seg_ctl **)buf);
}
inline SlabCache * getslabs(Buf_ctl * buf) {
return (getsegctl(buf))->cacheentry;
}
protected:
/*
* Frees the buffer \p buf and returns it to its slab segment
*
* \return \p true if some memory has been reclaimed
*/
virtual inline bool free(Buf_ctl * buf) {
SlabCache * const entry = getslabs(buf);
DBG( if (!entry) abort() );
ALLSTATS(all_stats::instance()->nfree.fetch_add(1));
return entry->putitem(buf);
}
inline size_t allocatedsize() {
size_t s=0;
svector<SlabCache *>::iterator b(slabcache.begin()), e(slabcache.end());
for(;b!=e;++b) {
if ((*b) && (*b)->getnslabs())
s+=(*b)->allocatedsize();
}
return s;
}
public:
// FIX: we have to implement max_size !!!!
/// Default Constructor
ff_allocator(size_t /*max_size*/=0, const int delayedReclaim=0) :
alloc(0), /* max_size(max_size), */ delayedReclaim(delayedReclaim) {
}
/*
* Destructor
*/
virtual ~ff_allocator() {
for(size_t i=0;i<slabcache.size(); ++i) {
if (slabcache[i]) {
slabcache[i]->~SlabCache();
::free(slabcache[i]);
slabcache[i] = NULL;
}
}
if (alloc) {
alloc->~SegmentAllocator();
::free(alloc);
alloc=NULL;
}
}
// initialize the allocator - Check this -- REW
/**
* \brief init the allocator
*
* Initialise the allocator. This method is called by one ff_node for
* each data-path. (e.g. the Emitter in a Farm skeleton).
* It creates a number of SlabCaches objects, a number specified by the
* \p N_SLABBUFFER constant. The size of each created segment
* goes from 32 (the first one created) to 8192 (the last).
* Typically the number of buffers in a slab segment
* decreases as the size of the slab increases.
*
* Default:
* \code{.ccp}
* enum { N_SLABBUFFER=9, MAX_SLABBUFFER_SIZE=8192};
* const int buffersize[N_SLABBUFFER] = { 32, 64,128,256,512,1024,2048,4096,8192 };
* const int nslabs_default[N_SLABBUFFER] = { 512,512,512,512,128, 64, 32, 16, 8 };
* \endcode
*
* The number of nslabs is dynamically increased if needed recaliming more
* memory from OS. This is a reltevely slow and not lock-free path of the code.
*
* \param _nslabs an array specifying the allowed numbers of buffers in a
* SlabCache (overwrite \p nslabs_default )
* \param prealloc if \true use preallocated segments
*
* \return 0 if initialisation succedes; a negative value otherwise
*/
int init(const int _nslabs[N_SLABBUFFER]=0, bool prealloc=true) {
svector<int> nslabs; // used to specify the num of buffers in a slab
if (_nslabs)
for (int i=0;i<N_SLABBUFFER; ++i)
nslabs.push_back(_nslabs[i]);
else
for (int i=0;i<N_SLABBUFFER; ++i)
nslabs.push_back(nslabs_default[i]);
/*
* Allocate space for a SegmenAllocator object and
* initialise it.
*/
alloc = (SegmentAllocator *)::malloc(sizeof(SegmentAllocator));
if (!alloc) return -1;
new (alloc) SegmentAllocator();
SlabCache * s = 0;
slabcache.reserve(N_SLABBUFFER);
/*
* Allocate space for 'N_SLABBUFFER' caches and create SlabCache
* objects. Buffers size within a cache size grows from 32 to 8192. if
* not otherwise specified, the number of slab buffers in a segment
* decreases as the size of the slab increases.
*/
for(int i=0; i<N_SLABBUFFER; ++i) {
s = (SlabCache*)::malloc(sizeof(SlabCache));
if (!s) return -1;
new (s) SlabCache( this, delayedReclaim, alloc,
buffersize[i], nslabs[i] );
if (s->init(prealloc)<0) {
error("ff_allocator:init: slab init fails!\n");
return -1;
}
slabcache.push_back(s); // create a list of SlabCaches
}
return 0;
}
/**
* \brief register ff_allocator (get ownership)
*
* The ff_node that allocates memory have to call this method in order to
* register itself to the shared buffer. With this method, a ff_node is
* allowed to allocate memory. Only one ff_node can allocate memory.
*
* \returns 0 if operation succedes, -1 otherwise
*/
inline int registerAllocator() {
svector<SlabCache *>::iterator b(slabcache.begin()), e(slabcache.end());
for(;b!=e;++b) {
if ((*b)->getnslabs())
if ((*b)->register4free(true)==0) return -1;
}
return 0;
}
/**
* \brief de-register the ff_allocator (release ownership)
*
* Deregister the ff_allocator (i.e. release ownership) and reclaim
* allocated memory back to the allocator. Every ff_node can perform this
* action. No <tt>malloc/realloc/posix_memalign</tt> requires than one
* ff_node own the ff_allocator
*
* \param reclaimMemory \p true if reclaim; \p false if deregister only
*/
inline void deregisterAllocator(bool reclaimMemory=true) {
svector<SlabCache *>::iterator b(slabcache.begin()), e(slabcache.end());
for(;b!=e;++b) {
if ((*b)->getnslabs())
(*b)->deregisterAllocator(reclaimMemory);
}
}
/**
* \brief register for the free operation
*
* Threads different from the allocator (i.e. those threads that do not
* allocate memory) have to register themselves to the shared buffer by
* calling this method. In this way they are provided with a chunk of
* memory. Since they are registered, once their taks terminates they free
* the memory assigned and that memory returns back to the allocator
* thread's buffer, so that it can be reused.
*
*/
inline int register4free() {
svector<SlabCache *>::iterator b(slabcache.begin()), e(slabcache.end());
for(;b!=e;++b) {
if ((*b)->getnslabs())
if ((*b)->register4free()==0) return -1;
}
return 0;
}
/**
* \brief malloc
*
* Request to allocate \p size bytes. If the size is too
* large, use OS malloc to get a new chunk of memory.
*
* To be called on a \ref ff::ff_allocator initialised via
* \p ff:ff_allocator.init() on the calling \ref ff::ff_node
* (owner).
* Each \ref ff::ff_allocator object is owned by the \ref ff::ff_node
* which \p registerAllocator it.
* Only the owner can malloc on it, whereas other ff_node can
* free. More than one \ref ff::ff_allocator per ff_node can
* be defined.
*
* Violating ownership cause segfault or unspected behaviour.
*
* \param size the size of memory requested
* \return pointer to allocated memory chunk
*
* \note Very efficient but delicate to use, expert only. Not
* designed to end-user but to build FastFlow pattern runtime
* support. Not expert should use \ref ff::FFAllocator
*/
inline void * malloc(size_t size) {
/**
* use standard allocator if the size is too big or
* we don't want to use the ff_allocator for that size
*/
if ( size>MAX_SLABBUFFER_SIZE ||
(slabcache[getslabs(size)]->getnslabs()==0) ) {
ALLSTATS(all_stats::instance()->sysmalloc.fetch_add(1));
void * ptr = ::malloc(size+sizeof(Buf_ctl));
if (!ptr) return 0;
((Buf_ctl *)ptr)->ptr = 0;
return (char *)ptr + sizeof(Buf_ctl);
}
// otherwise
int entry = getslabs(size);
DBG(if (entry<0) abort());
ALLSTATS(all_stats::instance()->nmalloc.fetch_add(1));
void * buf = slabcache[entry]->getitem();
return buf;
}
/**
* \brief ff posix_memalign.
*
* \param[out] *memptr pointer to a chunk of memory where the aligned memory will
* be returned
* \param alignment allocation's base address is an exact multiple of \p alignment, which
* must be a power of 2 at least as large as sizeof(void *)
* \param size the size of memory requested.
* \return 0 if successful; otherwise it returns an error value.
*
* \note Very efficient but delicate to use, expert only. Not
* designed to end-user but to build FastFlow pattern runtime
* support. Not expert should use \ref ff::FFAllocator
*
*/
inline int posix_memalign(void **memptr, size_t alignment, size_t size) {
if (!isPowerOfTwoMultiple(alignment, sizeof(void *))) return -1;
size_t realsize = size+alignment;
/*
* if the size is too big or we don't want to use the ff_allocator
* for that size, use the standard allocator and force alignment
*/
if (realsize > MAX_SLABBUFFER_SIZE ||
(slabcache[getslabs(realsize)]->getnslabs()==0)) {
ALLSTATS(all_stats::instance()->sysmalloc.fetch_add(1));
void * ptr = ::malloc(realsize+sizeof(Buf_ctl));
if (!ptr) return -1;
((Buf_ctl *)ptr)->ptr = 0;
void * ptraligned = (void *)( ( (long)((char*)ptr +
sizeof(Buf_ctl)) + alignment ) & ~(alignment-1) );
((Buf_ctl *)((char *)ptraligned - sizeof(Buf_ctl)))->ptr = 0;
*memptr = ptraligned;
return 0;
}
// else use
int entry = getslabs(realsize);
DBG(if (entry<0) abort());
ALLSTATS(all_stats::instance()->nmalloc.fetch_add(1));
void * buf = slabcache[entry]->getitem();
Buf_ctl * backptr = (Buf_ctl *)((char *)buf - sizeof(Buf_ctl));
DBG(assert(backptr));
void * ptraligned = (void *)(((long)((char*)buf) + alignment) & ~(alignment-1));
for(Buf_ctl *p=(Buf_ctl*)buf;p!=(Buf_ctl*)ptraligned;p++) {
p->ptr = backptr->ptr;
}
*memptr = ptraligned;
return 0;
}
// BUG 2 FIX: free fails if memory has been previously allocated using the
// posix_memalign method with a size grater than MAX_SLABBUFFER_SIZE!!!!
/**
* \brief free
*
* free the requested memory chunk on a given ff::ff_allocator object.
*
* The memory should have been allocated using \p malloc on the same
* object (otherwise segfault), and the ff_node should have been
* \p ff::register4free() on the calling \ref ff::ff_node.
*
* Wrong registering cause segfault or unspected behaviour.
*
* \note Very efficient but delicate to use, expert only. Not
* designed to end-user but to build FastFlow pattern runtime
* support. Not expert should use \ref ff::FFAllocator
*
* \param ptr a pointer to the buffer.
*
*/
inline void free(void * ptr) {
if (!ptr) return;
Buf_ctl * buf = (Buf_ctl *)((char *)ptr - sizeof(Buf_ctl));
if (!buf->ptr) {
ALLSTATS(all_stats::instance()->sysfree.fetch_add(1));
::free(buf);
return;
}
Buf_ctl * buf2 = (Buf_ctl *)((char *)buf - sizeof(Buf_ctl));
while(buf->ptr == buf2->ptr) {
buf = buf2;
buf2 = (Buf_ctl *)((char *)buf - sizeof(Buf_ctl));
}
free(buf);
}
/**
* \brief realloc
*
* It changes the size of the memory block pointed to by
* \p ptr to \p newsize bytes. If the size is too large,
* use OS malloc to get a new chunk of memory.
*
* To be called on a \ref ff::ff_allocator initialised via
* \p registerAllocator() on the calling \ref ff::ff_node
* (owner).
* Each \ref ff::ff_allocator object is owned by the \ref ff::ff_node
* which \p registerAllocator it.
* Only the owner can malloc/realloc on it, whereas other ff_node can
* free. More than one \ref ff::ff_allocator per ff_node can
* be defined.
*
* Violating ownership cause segfault or unspected behaviour.
*
* \param size the size of memory requested
* \return pointer to allocated memory chunk
*
* \note Very efficient but delicate to use, expert only. Not
* designed to end-user but to build FastFlow pattern runtime
* support. Not expert should use \ref ff::FFAllocator
*/
inline void * realloc(void * ptr, size_t newsize) {
if (ptr) {
size_t oldsize;
Buf_ctl * buf = (Buf_ctl *)((char *)ptr - sizeof(Buf_ctl));
if (!buf->ptr) {
ALLSTATS(all_stats::instance()->sysrealloc.fetch_add(1));
void * newptr= ::realloc(buf, newsize+sizeof(Buf_ctl));
if (!ptr) return 0;
((Buf_ctl *)newptr)->ptr = 0;
return (char *)newptr + sizeof(Buf_ctl);
}
ALLSTATS(all_stats::instance()->nrealloc.fetch_add(1));
SlabCache * const entry = getslabs(buf);
if (!entry) return 0;
if ((oldsize=entry->getsize()) >= newsize) {
return ptr;
}
void * newptr = this->malloc(newsize);
memcpy(newptr,ptr,oldsize);
entry->putitem(buf);
return newptr;
}
return this->malloc(newsize);
}
inline void * growsup(void * ptr, size_t newsize) {
if (ptr) {
size_t oldsize;
Buf_ctl * buf = (Buf_ctl *)((char *)ptr - sizeof(Buf_ctl));
if (!buf->ptr) { /* WARNING: this is probably an error !!! */
ALLSTATS(all_stats::instance()->sysrealloc.fetch_add(1));
void * newptr= ::realloc(buf, newsize+sizeof(Buf_ctl));
if (!ptr) return 0;
((Buf_ctl *)newptr)->ptr = 0;
return (char *)newptr + sizeof(Buf_ctl);
}
SlabCache * const entry = getslabs(buf);
if (!entry) return 0;
if ((oldsize=entry->getsize()) >= newsize) {
return ptr;
}
entry->putitem(buf);
}
return this->malloc(newsize);
}
ALLSTATS( void printstats(std::ostream & out) {
all_stats::instance()->print(out); }
)
private:
svector<SlabCache *> slabcache; // List of caches
SegmentAllocator * alloc;
/* const size_t max_size; */ // TODO
const int delayedReclaim;
};
// forward decl
class ffa_wrapper;
static void FFAkeyDestructorHandler(void * kv);
static void killMyself(ffa_wrapper * ptr);
/*
* \class ffa_wrapper
*
* \brief A wrapper of the ff_allocator.
*
* This wrapper acts as an intermediate level between the ff_allocator (which
* it extends) and the FFAllocator (which uses the wrapper through the
* FFAxThreadData structure).
*
* This class is defined in \ref allocator.hpp
*/
class ffa_wrapper: public ff_allocator {
protected:
virtual inline bool free(Buf_ctl * buf) {
bool reclaimed = ff_allocator::free(buf);
// Do I have to kill myself?
if (reclaimed && nomorealloc.load() && (allocatedsize()==0)) {
killMyself(this);
}
return reclaimed;
}
public:
friend void FFAkeyDestructorHandler(void * kv);
ffa_wrapper(size_t max_size=0,const int delayedReclaim=0) :
ff_allocator(max_size,delayedReclaim) {
nomorealloc.store(0);
}
virtual ~ffa_wrapper() {}
inline void * malloc(size_t size) {
return ff_allocator::malloc(size);
}
inline int posix_memalign(void **memptr, size_t alignment, size_t size) {
return ff_allocator::posix_memalign(memptr,alignment,size);
}
inline void free(void *) { abort(); }
inline void * realloc(void * ptr, size_t newsize) {
return ff_allocator::realloc(ptr,newsize);
}
inline void * growsup(void * ptr, size_t newsize) {
return ff_allocator::growsup(ptr,newsize);
}
private:
inline void nomoremalloc() {
nomorealloc.store(1);
}
private:
std::atomic_long nomorealloc;
};
struct FFAxThreadData {
FFAxThreadData(ffa_wrapper * f): f(f) { }
ffa_wrapper * f;
long padding[longxCacheLine-(sizeof(ffa_wrapper*)/sizeof(long))];
};
/**
* \class FFAllocator
* \ingroup core_patterns
* \brief A user-space parallel allocator (process-wide)
*
* Based on the \p ff_allocator, the FFAllocator might be used by any number
* of \ref ff:ff_node (i.e. threads) to dynamically allocate/deallocate memory. It consists on
* a network of \p ff_allocator per \ref ff_node. It uses \ref ff::uSWSR_Ptr_Buffer
* to avoid deadlocks.
*
* FFAllocator will be created as a static object on the first call to \ref FFAllocator::instance()
*
* Usage example:
* \code{.cpp}
* #define INIT() // no init needed
* #define MALLOC(size) (FFAllocator::instance()->malloc(size))
* #define FREE(ptr,unused) (FFAllocator::instance()->free(ptr))
* \endcode
*
* \example perf_test_alloc2.cpp
*/
class FFAllocator {
enum {MIN_A_CAPACITY=32};
protected:
inline Seg_ctl * getsegctl(Buf_ctl * buf) {
return *((Seg_ctl **)buf);
}
inline FFAxThreadData * newAllocator( bool prealloc,
int _nslabs[N_SLABBUFFER],
size_t max_size )
{
FFAxThreadData * ffaxtd =
(FFAxThreadData *)::malloc(sizeof(FFAxThreadData) +
sizeof(ffa_wrapper));
if (!ffaxtd) return NULL;
ffa_wrapper * f = (ffa_wrapper*)((char *) ffaxtd +
sizeof(FFAxThreadData));
new (f) ffa_wrapper(max_size,delayedReclaim);
new (ffaxtd) FFAxThreadData(f);
if (f->init(_nslabs, prealloc)<0) {
error("FFAllocator:newAllocator: init fails!\n");
return NULL;
}
if (f->registerAllocator()<0) { // register allocator to leak queue
error("FFAllocator:newAllocator: registerAllocator fails!\n");
return NULL;
}
// REW -- like in register4free
// if more space in the SlabCache is needed, allocate more space
// (::realloc) and update counters. Thread-safe operation (mutex)
spin_lock(lock);
if (A_size == A_capacity) {
FFAxThreadData ** A_new = (FFAxThreadData**) ::realloc( A,
(A_capacity+MIN_A_CAPACITY)*sizeof(FFAxThreadData*) );
if (!A_new) { spin_unlock(lock); return NULL;}
A=A_new;
A_capacity += MIN_A_CAPACITY;
}
A[A_size++] = ffaxtd;
spin_unlock(lock);
ALLSTATS(all_stats::instance()->newallocator.fetch_add(1));
return ffaxtd;
}
public:
/**
* \brief Constructor
*
* \param delayedReclaim Deferred reclamation configuration
*/
FFAllocator(int delayedReclaim=0) :
A(0), A_capacity(0), A_size(0),
delayedReclaim(delayedReclaim)
{
init_unlocked(lock);
if (pthread_key_create( &A_key,
(delayedReclaim ? NULL : FFAkeyDestructorHandler) )!=0) {
error("FFAllocator FATAL ERROR: pthread_key_create fails\n");
abort();
}
}
/**
* \brief Destructor
*
* Delete the allocator and return pre-allocated memory to the OS
*/
~FFAllocator() {
if (delayedReclaim) {
if (A) {
for(unsigned i=0;i<A_size;++i)
if (A[i]) deleteAllocator(A[i]->f);
::free(A);
}
pthread_key_delete(A_key);
}
}
/**
* \brief Returns an instance of the FFAllocator object
*
*/
static inline FFAllocator * instance() {
static FFAllocator FFA;
return &FFA;
}
/*
* Allocator factory method.
*
* Each time this method is called it spawns a new memory allocator. The
* calling thread is registered as an allocator thread. Other threads
* willing to use this SlabCache have to register themselves to the shared
* leak queue of the allocator.
*
* \param max_size PARAMETER NOT USED!
* \param _nslabs array specifying the allowed quantities of buffers. By
* default it is initialised to 0s
* \param prealloc flag that is used as a mask when creating a new slab.
* Default is \p true.
*
* \returns a \p ffa_wrapper object, that is in turn an extension of
* a \ff_allocator object.
*/
inline ffa_wrapper * newAllocator( size_t max_size=0,
int _nslabs[N_SLABBUFFER]=0,
bool prealloc=true )
{
FFAxThreadData * ffaxtd = newAllocator(prealloc, _nslabs, max_size);
if (!ffaxtd) return NULL;
return ffaxtd->f;
}
/*
* Delete the allocator \p f passed as argument and reclaim the used
* memory.\n Also updates the counter of the available size in the slab
*
* \param f an object of type ffa_wrapper.
* \param reclaimMemory flag to decide whether to reclaim memory or not.
*/
inline void deleteAllocator(ffa_wrapper * f, bool reclaimMemory=true) {
if (!f) return;
spin_lock(lock);
for (unsigned int i=0;i<A_size;++i) {
if (A[i]->f == f) {
f->deregisterAllocator(reclaimMemory);
f->~ffa_wrapper();
A[i]->~FFAxThreadData();
::free(A[i]);
for (unsigned int j=i+1;j<A_size;++i,++j)
A[i]=A[j];
--A_size;
break;
}
}
spin_unlock(lock);
ALLSTATS(all_stats::instance()->deleteallocator.fetch_add(1));
}
/*
* Delete allocator owned by calling thread and reclaim memory
*
*/
inline void deleteAllocator() {
FFAxThreadData * ffaxtd = (FFAxThreadData*)pthread_getspecific(A_key);
if (!ffaxtd) return;
deleteAllocator(ffaxtd->f,true);
}
/**
* \brief malloc
*
* Request to allocate \p size bytes. If the size is too
* large, use OS malloc to get a new chunk of memory. Otherwise use the
* ff_allocator on the calling ff_node
*
* \param size the size of memory requested
* \return pointer to allocated memory chunk
*/
inline void * malloc(size_t size) {
/* use standard allocator if the size is too big */
if (size>MAX_SLABBUFFER_SIZE) {
ALLSTATS(all_stats::instance()->sysmalloc.fetch_add(1));
void * ptr = ::malloc(size+sizeof(Buf_ctl));
if (!ptr) return 0;
((Buf_ctl *)ptr)->ptr = 0;
return (char *)ptr + sizeof(Buf_ctl);
}
FFAxThreadData * ffaxtd = (FFAxThreadData*) pthread_getspecific(A_key);
if (!ffaxtd) {
// if no thread-data is associated to the key
// initialise and register a new allocator
// REW -- why prealloc is FALSE??
//
ffaxtd = newAllocator(false,0,0);
if (!ffaxtd) {
error("FFAllocator:malloc: newAllocator fails!\n");
return NULL;
}
// REW -- ?
spin_lock(lock);
if (pthread_setspecific(A_key, ffaxtd)!=0) {
deleteAllocator(ffaxtd->f);
spin_unlock(lock);
return NULL;
}
spin_unlock(lock);
}
return ffaxtd->f->malloc(size);
}
/**
* \brief ff posix_memalign
*
* \param[out] *memptr pointer to a chunk of memory where the aligned memory will
* be returned
* \param alignment allocation's base address is an exact multiple of \p alignment, which
* must be a power of 2 at least as large as sizeof(void *)
* \param size the size of memory requested.
* \return 0 if successful; otherwise it returns an error value.
*
*/
inline int posix_memalign(void **memptr, size_t alignment, size_t size) {
if (!isPowerOfTwoMultiple(alignment, sizeof(void *))) return -1;
size_t realsize = size+alignment;
if (realsize > MAX_SLABBUFFER_SIZE) {
ALLSTATS(all_stats::instance()->sysmalloc.fetch_add(1));
void * ptr = ::malloc(realsize+sizeof(Buf_ctl));
if (!ptr) return -1;
((Buf_ctl *)ptr)->ptr = 0;
void * ptraligned = (void *)(((long)((char*)ptr+sizeof(Buf_ctl)) + alignment) & ~(alignment-1));
((Buf_ctl *)((char *)ptraligned - sizeof(Buf_ctl)))->ptr = 0;
*memptr = ptraligned;
return 0;
}
FFAxThreadData * ffaxtd = (FFAxThreadData*)pthread_getspecific(A_key);
if (!ffaxtd) {
// initialise and register a new allocator
// REW -- why prealloc FALSE??
ffaxtd = newAllocator(false,0,0);
if (!ffaxtd) {
error("FFAllocator:posix_memalign: newAllocator fails!\n");
return -1;
}
spin_lock(lock);
if (pthread_setspecific(A_key, ffaxtd)!=0) {
deleteAllocator(ffaxtd->f);
spin_unlock(lock);
return -1;
}
spin_unlock(lock);
}
return ffaxtd->f->posix_memalign(memptr,alignment,size);
}
/**
* \brief free
*
* free the requested memory chunk
*
* \param ptr a pointer to the buffer.
*
*/
inline void free(void * ptr) {
if (!ptr) return;
Buf_ctl * buf = (Buf_ctl *)((char *)ptr - sizeof(Buf_ctl));
if (!buf->ptr) {
ALLSTATS(all_stats::instance()->sysfree.fetch_add(1));
::free(buf);
return;
}
Buf_ctl * buf2 = (Buf_ctl *)((char *)buf - sizeof(Buf_ctl));
while(buf->ptr == buf2->ptr) {
buf = buf2;
buf2 = (Buf_ctl *)((char *)buf - sizeof(Buf_ctl));
}
DBG(if (!getsegctl(buf)->allocator) abort());
(getsegctl(buf)->allocator)->free(buf);
}
/**
* \brief realloc.
*
* It changes the size of the memory block pointed to by \p ptr to \p newsize
* bytes.
*
* \param ptr pointer to the buffer
* \param newsize the new size.
*
*/
inline void * realloc(void * ptr, size_t newsize) {
if (ptr) {
Buf_ctl * buf = (Buf_ctl *)((char *)ptr - sizeof(Buf_ctl));
if (!buf->ptr) { /* use standard allocator */
ALLSTATS(all_stats::instance()->sysrealloc.fetch_add(1));
void * newptr= ::realloc(buf, newsize+sizeof(Buf_ctl));
if (!ptr) return 0;
((Buf_ctl *)newptr)->ptr = 0;
return (char *)newptr + sizeof(Buf_ctl);
}
ffa_wrapper * f = (ffa_wrapper*) pthread_getspecific(A_key);
if (!f) return NULL;
return f->realloc(ptr,newsize);
}
return this->malloc(newsize);
}
inline void * growsup(void * ptr, size_t newsize) {
if (ptr) {
Buf_ctl * buf = (Buf_ctl *)((char *)ptr - sizeof(Buf_ctl));
if (!buf->ptr) { /* use standard allocator */
ALLSTATS(all_stats::instance()->sysrealloc.fetch_add(1));
void * newptr= ::realloc(buf, newsize+sizeof(Buf_ctl));
if (!ptr) return 0;
((Buf_ctl *)newptr)->ptr = 0;
return (char *)newptr + sizeof(Buf_ctl);
}
ffa_wrapper * f = (ffa_wrapper*)pthread_getspecific(A_key);
if (!f) return NULL;
return f->growsup(ptr,newsize);
}
return this->malloc(newsize);
}
ALLSTATS(void printstats(std::ostream & out) {
all_stats::instance()->print(out);
})
private:
FFAxThreadData **A; // ffa_wrapper : ff_allocator
size_t A_capacity; //
size_t A_size; //
lock_t lock;
pthread_key_t A_key;
const int delayedReclaim;
};
/* REW - Document these? */
static void FFAkeyDestructorHandler(void * kv) {
FFAxThreadData * ffaxtd = (FFAxThreadData*)kv;
ffaxtd->f->nomoremalloc();
ffaxtd->f->deregisterAllocator();
}
static inline void killMyself(ffa_wrapper * ptr) {
FFAllocator::instance()->deleteAllocator(ptr);
}
// Static functions below are not documented, but their usage is quite obvious
static inline void * ff_malloc(size_t size) {
return FFAllocator::instance()->malloc(size);
}
static inline void ff_free(void * ptr) {
FFAllocator::instance()->free(ptr);
}
static inline void * ff_realloc(void * ptr, size_t newsize) {
return FFAllocator::instance()->realloc(ptr,newsize);
}
static inline int ff_posix_memalign(void **memptr, size_t alignment, size_t size) {
return FFAllocator::instance()->posix_memalign(memptr,alignment,size);
}
} // namespace ff
#endif /* FF_ALLOCATOR_HPP */