mesytec-mnode/external/taskflow-3.8.0/3rd-party/ff/dynqueue.hpp

/* -*- Mode: C++; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */

/*!
 * \file dynqueue.hpp
 * \ingroup aux_classes
 *
 * \brief Implementation of a dynamic queue. Not currently used.
 *
 * Dynamic (list-based) Single-Writer Single-Reader
 * (or Single-Producer Single-Consumer) unbounded queue.
 *
 * No lock is needed around pop and push methods.
 * See also ubuffer.hpp for a more efficient SPSC unbounded queue.
 *
 * M. Aldinucci, M. Danelutto, P. Kilpatrick, M. Meneghin, and M. Torquati,
 * "An Efficient Unbounded Lock-Free Queue for Multi-core Systems,"
 * in Proc. of 18th Intl. Euro-Par 2012 Parallel Processing, Rhodes Island,
 * Greece, 2012, pp. 662-673. doi:10.1007/978-3-642-32820-6_65
 *
 * \note Not currently used in the FastFlow implementation.
 */

#ifndef FF_DYNQUEUE_HPP
#define FF_DYNQUEUE_HPP

/* ***************************************************************************
 *
 *  FastFlow is free software; you can redistribute it and/or modify it
 *  under the terms of the GNU Lesser General Public License version 3 as
 *  published by the Free Software Foundation.
 *  Starting from version 3.0.1 FastFlow is dual licensed under the GNU LGPLv3
 *  or MIT License (https://github.com/ParaGroup/WindFlow/blob/vers3.x/LICENSE.MIT)
 *
 *  This program is distributed in the hope that it will be useful, but WITHOUT
 *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
 *  License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program; if not, write to the Free Software Foundation,
 *  Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 *
 ****************************************************************************
 */


#include <stdlib.h>
#include <ff/buffer.hpp>
#include <ff/spin-lock.hpp> // used only for mp_push and mp_pop
#include <ff/sysdep.h>

namespace ff {

#if !defined(_FF_DYNQUEUE_OPTIMIZATION)

class dynqueue {
private:
    struct Node {
        void        * data;
        struct Node * next;
    };

    union {
        Node * volatile   head;
        char padding1[CACHE_LINE_SIZE];
        //long padding1[longxCacheLine-(sizeof(Node *)/sizeof(long))];
    };
    union {
        Node * volatile   tail;
        char padding2[CACHE_LINE_SIZE];
        //long padding2[longxCacheLine-(sizeof(Node*)/sizeof(long))];
    };

    /* ----- two-lock used only in the mp_push and mp_pop methods ------- */
    /*                                                                    */    
    /*  By using the mp_push and mp_pop methods as standard push and pop, */
    /*  the dynqueue algorithm basically implements the well-known        */
    /*  Michael and Scott 2-locks MPMC queue.                             */    
    /*                                                                    */
    /*                                                                    */    
	/*
	union {
        lock_t P_lock;
        char padding3[CACHE_LINE_SIZE];
    };
    union {
        lock_t C_lock;
        char padding4[CACHE_LINE_SIZE];
    };
    */
	ALIGN_TO_PRE(CACHE_LINE_SIZE)
	lock_t P_lock;
	ALIGN_TO_POST(CACHE_LINE_SIZE)

	ALIGN_TO_PRE(CACHE_LINE_SIZE)
		lock_t C_lock;
	ALIGN_TO_POST(CACHE_LINE_SIZE)

    /* -------------------------------------------------------------- */

    // internal cache
    // if mp_push and mp_pop methods are used the cache access is lock protected
#if defined(STRONG_WAIT_FREE)
    Lamport_Buffer     cache;
#else
    SWSR_Ptr_Buffer    cache;
#endif

private:
    inline Node * allocnode() {
        union { Node * n; void * n2; } p;
#if !defined(NO_CACHE)
        if (cache.pop(&p.n2)) return p.n;
#endif
        p.n = (Node *)::malloc(sizeof(Node));
        return p.n;
    }

    inline Node * mp_allocnode() {
        union { Node * n; void * n2; } p;
#if !defined(NO_CACHE)
        spin_lock(P_lock);
        if (cache.pop(&p.n2)) {
            spin_unlock(P_lock);
            return p.n;
        }
        spin_unlock(P_lock);
#endif
        p.n = (Node *)::malloc(sizeof(Node));
        return p.n;
    }

public:
    enum {DEFAULT_CACHE_SIZE=1024};

    dynqueue(int cachesize=DEFAULT_CACHE_SIZE, bool fillcache=false):cache(cachesize) {
        Node * n = (Node *)::malloc(sizeof(Node));
        n->data = NULL; n->next = NULL;
        head=n;
        tail=n;
        cache.init();
        if (fillcache) {
            for(int i=0;i<cachesize;++i) {
                n = (Node *)::malloc(sizeof(Node));
                if (n) cache.push(n);
            }
        }
        init_unlocked(P_lock); 
        init_unlocked(C_lock);
        // Avoid unused private field warning on padding vars
        //(void) padding1; (void) padding2 ; (void) padding3; (void) padding4;
    }

    bool init() { return true;}

    ~dynqueue() {
        union { Node * n; void * n2; } p;
        if (cache.buffersize()>0) while(cache.pop(&p.n2)) free(p.n);
        while(head != tail) {
            p.n = (Node*)head;
            head = head->next;
            free(p.n);
        }
        if (head) free((void*)head);
    }
    
    inline bool push(void * const data) {
        assert(data != NULL);
        Node * n = allocnode();
        n->data = data; n->next = NULL;
        WMB();
        tail->next = n;
        tail       = n;

        return true;
    }

    inline bool  pop(void ** data) {        
        assert(data != NULL);
#if defined(STRONG_WAIT_FREE)
        if (head == tail) return false;
#else
        if (head->next) 
#endif
        {
            Node * n = (Node *)head;
            *data    = (head->next)->data;
            head     = head->next;
#if !defined(NO_CACHE)
            if (!cache.push(n)) ::free(n);
#else
            ::free(n);
#endif      
            return true;
        }
        return false;
    }    


    inline unsigned long length() const { return 0;}

    /*
     * MS 2-lock MPMC algorithm PUSH method 
     */
    inline bool mp_push(void * const data) {
        assert(data != NULL);
        Node* n = mp_allocnode();
        n->data = data; n->next = NULL;
        ff::spin_lock(P_lock);
        tail->next = n;
        tail       = n;
        spin_unlock(P_lock);
        return true;
    }

    /*
     * MS 2-lock MPMC algorithm POP method 
     */
    inline bool  mp_pop(void ** data) {        
        assert(data != NULL);
        spin_lock(C_lock);
        if (head->next) {
            Node * n = (Node *)head;
            *data    = (head->next)->data;
            head     = head->next;
            bool f   = cache.push(n);
            spin_unlock(C_lock);
            if (!f) ::free(n);
            return true;
        }
        spin_unlock(C_lock);
        return false;
    }    
};

#else // _FF_DYNQUEUE_OPTIMIZATION
/* 
 * Experimental code
 */

class dynqueue {
private:
    struct Node {
        void        * data;
        struct Node * next;
    };

    Node * volatile         head;
    volatile unsigned long  pwrite;
    long padding1[longxCacheLine-((sizeof(Node *)+sizeof(unsigned long))/sizeof(long))];
    Node * volatile        tail;
    volatile unsigned long pread;
    long padding2[longxCacheLine-((sizeof(Node*)+sizeof(unsigned long))/sizeof(long))];

    const   size_t cachesize;
    void ** cache;

private:

    inline bool cachepush(void * const data) {
        
        if (!cache[pwrite]) {
            /* Write Memory Barrier: ensure all previous memory write 
             * are visible to the other processors before any later
             * writes are executed.  This is an "expensive" memory fence
             * operation needed in all the architectures with a weak-ordering 
             * memory model where stores can be executed out-or-order 
             * (e.g. Powerpc). This is a no-op on Intel x86/x86-64 CPUs.
             */
            WMB(); 
            cache[pwrite] = data;
            pwrite += (pwrite+1 >= cachesize) ? (1-cachesize): 1;
            return true;
        }
        return false;
    }

    inline bool  cachepop(void ** data) {
        if (!cache[pread]) return false;
        
        *data = cache[pread];
        cache[pread]=NULL;
        pread += (pread+1 >= cachesize) ? (1-cachesize): 1;    
        return true;
    }    
    
public:
    enum {DEFAULT_CACHE_SIZE=1024};

    dynqueue(int cachesize=DEFAULT_CACHE_SIZE, bool fillcache=false):cachesize(cachesize) {
        Node * n = (Node *)::malloc(sizeof(Node));
        n->data = NULL; n->next = NULL;
        head=n;
        tail=n;

        cache=(void**)getAlignedMemory(longxCacheLine*sizeof(long),cachesize*sizeof(void*));
        if (!cache) {
            error("FATAL ERROR: dynqueue no memory available!\n");
            abort();
        }

        if (fillcache) {
            for(int i=0;i<cachesize;++i) {
                n = (Node *)::malloc(sizeof(Node));
                if (n) cachepush(n);
            }
        }
    }

    ~dynqueue() {
        union { Node * n; void * n2; } p;
        while(cachepop(&p.n2)) free(p.n);
        while(head != tail) {
            p.n = (Node*)head;
            head = head->next;
            free(p.n);
        }
        if (head) free((void*)head);
        if (cache) freeAlignedMemory(cache);
    }

    inline bool push(void * const data) {
        assert(data != NULL);

        union { Node * n; void * n2; } p;
        if (!cachepop(&p.n2))
            p.n = (Node *)::malloc(sizeof(Node));
        
        p.n->data = data; p.n->next = NULL;
        WMB();
        tail->next = p.n;
        tail       = p.n;

        return true;
    }

    inline bool  pop(void ** data) {
        assert(data != NULL);
        if (head->next) {
            Node * n = (Node *)head;
            *data    = (head->next)->data;
            head     = head->next;

            if (!cachepush(n)) free(n);
            return true;
        }
        return false;
    }    
};

#endif // _FF_DYNQUEUE_OPTIMIZATION


} // namespace

#endif /* FF_DYNQUEUE_HPP */
add taskflow-3.8.0 2025-01-04 01:25:05 +01:00			`/* -- Mode: C++; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -- */`

			`/*!`
			`* \file dynqueue.hpp`
			`* \ingroup aux_classes`
			`*`
			`* \brief Implementation of a dynamic queue. Not currently used.`
			`*`
			`* Dynamic (list-based) Single-Writer Single-Reader`
			`* (or Single-Producer Single-Consumer) unbounded queue.`
			`*`
			`* No lock is needed around pop and push methods.`
			`* See also ubuffer.hpp for a more efficient SPSC unbounded queue.`
			`*`
			`* M. Aldinucci, M. Danelutto, P. Kilpatrick, M. Meneghin, and M. Torquati,`
			`* "An Efficient Unbounded Lock-Free Queue for Multi-core Systems,"`
			`* in Proc. of 18th Intl. Euro-Par 2012 Parallel Processing, Rhodes Island,`
			`* Greece, 2012, pp. 662-673. doi:10.1007/978-3-642-32820-6_65`
			`*`
			`* \note Not currently used in the FastFlow implementation.`
			`*/`

			`#ifndef FF_DYNQUEUE_HPP`
			`#define FF_DYNQUEUE_HPP`

			`/* ***************************************************************************`
			`*`
			`* FastFlow is free software; you can redistribute it and/or modify it`
			`* under the terms of the GNU Lesser General Public License version 3 as`
			`* published by the Free Software Foundation.`
			`* Starting from version 3.0.1 FastFlow is dual licensed under the GNU LGPLv3`
			`* or MIT License (https://github.com/ParaGroup/WindFlow/blob/vers3.x/LICENSE.MIT)`
			`*`
			`* This program is distributed in the hope that it will be useful, but WITHOUT`
			`* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or`
			`* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public`
			`* License for more details.`
			`*`
			`* You should have received a copy of the GNU Lesser General Public License`
			`* along with this program; if not, write to the Free Software Foundation,`
			`* Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.`
			`*`
			`****************************************************************************`
			`*/`


			`#include <stdlib.h>`
			`#include <ff/buffer.hpp>`
			`#include <ff/spin-lock.hpp> // used only for mp_push and mp_pop`
			`#include <ff/sysdep.h>`

			`namespace ff {`

			`#if !defined(_FF_DYNQUEUE_OPTIMIZATION)`

			`class dynqueue {`
			`private:`
			`struct Node {`
			`void * data;`
			`struct Node * next;`
			`};`

			`union {`
			`Node * volatile head;`
			`char padding1[CACHE_LINE_SIZE];`
			`//long padding1[longxCacheLine-(sizeof(Node *)/sizeof(long))];`
			`};`
			`union {`
			`Node * volatile tail;`
			`char padding2[CACHE_LINE_SIZE];`
			`//long padding2[longxCacheLine-(sizeof(Node*)/sizeof(long))];`
			`};`

			`/* ----- two-lock used only in the mp_push and mp_pop methods ------- */`
			`/* */`
			`/* By using the mp_push and mp_pop methods as standard push and pop, */`
			`/* the dynqueue algorithm basically implements the well-known */`
			`/* Michael and Scott 2-locks MPMC queue. */`
			`/* */`
			`/* */`
			`/*`
			`union {`
			`lock_t P_lock;`
			`char padding3[CACHE_LINE_SIZE];`
			`};`
			`union {`
			`lock_t C_lock;`
			`char padding4[CACHE_LINE_SIZE];`
			`};`
			`*/`
			`ALIGN_TO_PRE(CACHE_LINE_SIZE)`
			`lock_t P_lock;`
			`ALIGN_TO_POST(CACHE_LINE_SIZE)`

			`ALIGN_TO_PRE(CACHE_LINE_SIZE)`
			`lock_t C_lock;`
			`ALIGN_TO_POST(CACHE_LINE_SIZE)`

			`/* -------------------------------------------------------------- */`

			`// internal cache`
			`// if mp_push and mp_pop methods are used the cache access is lock protected`
			`#if defined(STRONG_WAIT_FREE)`
			`Lamport_Buffer cache;`
			`#else`
			`SWSR_Ptr_Buffer cache;`
			`#endif`

			`private:`
			`inline Node * allocnode() {`
			`union { Node * n; void * n2; } p;`
			`#if !defined(NO_CACHE)`
			`if (cache.pop(&p.n2)) return p.n;`
			`#endif`
			`p.n = (Node *)::malloc(sizeof(Node));`
			`return p.n;`
			`}`

			`inline Node * mp_allocnode() {`
			`union { Node * n; void * n2; } p;`
			`#if !defined(NO_CACHE)`
			`spin_lock(P_lock);`
			`if (cache.pop(&p.n2)) {`
			`spin_unlock(P_lock);`
			`return p.n;`
			`}`
			`spin_unlock(P_lock);`
			`#endif`
			`p.n = (Node *)::malloc(sizeof(Node));`
			`return p.n;`
			`}`

			`public:`
			`enum {DEFAULT_CACHE_SIZE=1024};`

			`dynqueue(int cachesize=DEFAULT_CACHE_SIZE, bool fillcache=false):cache(cachesize) {`
			`Node * n = (Node *)::malloc(sizeof(Node));`
			`n->data = NULL; n->next = NULL;`
			`head=n;`
			`tail=n;`
			`cache.init();`
			`if (fillcache) {`
			`for(int i=0;i<cachesize;++i) {`
			`n = (Node *)::malloc(sizeof(Node));`
			`if (n) cache.push(n);`
			`}`
			`}`
			`init_unlocked(P_lock);`
			`init_unlocked(C_lock);`
			`// Avoid unused private field warning on padding vars`
			`//(void) padding1; (void) padding2 ; (void) padding3; (void) padding4;`
			`}`

			`bool init() { return true;}`

			`~dynqueue() {`
			`union { Node * n; void * n2; } p;`
			`if (cache.buffersize()>0) while(cache.pop(&p.n2)) free(p.n);`
			`while(head != tail) {`
			`p.n = (Node*)head;`
			`head = head->next;`
			`free(p.n);`
			`}`
			`if (head) free((void*)head);`
			`}`

			`inline bool push(void * const data) {`
			`assert(data != NULL);`
			`Node * n = allocnode();`
			`n->data = data; n->next = NULL;`
			`WMB();`
			`tail->next = n;`
			`tail = n;`

			`return true;`
			`}`

			`inline bool pop(void ** data) {`
			`assert(data != NULL);`
			`#if defined(STRONG_WAIT_FREE)`
			`if (head == tail) return false;`
			`#else`
			`if (head->next)`
			`#endif`
			`{`
			`Node * n = (Node *)head;`
			`*data = (head->next)->data;`
			`head = head->next;`
			`#if !defined(NO_CACHE)`
			`if (!cache.push(n)) ::free(n);`
			`#else`
			`::free(n);`
			`#endif`
			`return true;`
			`}`
			`return false;`
			`}`


			`inline unsigned long length() const { return 0;}`

			`/*`
			`* MS 2-lock MPMC algorithm PUSH method`
			`*/`
			`inline bool mp_push(void * const data) {`
			`assert(data != NULL);`
			`Node* n = mp_allocnode();`
			`n->data = data; n->next = NULL;`
			`ff::spin_lock(P_lock);`
			`tail->next = n;`
			`tail = n;`
			`spin_unlock(P_lock);`
			`return true;`
			`}`

			`/*`
			`* MS 2-lock MPMC algorithm POP method`
			`*/`
			`inline bool mp_pop(void ** data) {`
			`assert(data != NULL);`
			`spin_lock(C_lock);`
			`if (head->next) {`
			`Node * n = (Node *)head;`
			`*data = (head->next)->data;`
			`head = head->next;`
			`bool f = cache.push(n);`
			`spin_unlock(C_lock);`
			`if (!f) ::free(n);`
			`return true;`
			`}`
			`spin_unlock(C_lock);`
			`return false;`
			`}`
			`};`

			`#else // _FF_DYNQUEUE_OPTIMIZATION`
			`/*`
			`* Experimental code`
			`*/`

			`class dynqueue {`
			`private:`
			`struct Node {`
			`void * data;`
			`struct Node * next;`
			`};`

			`Node * volatile head;`
			`volatile unsigned long pwrite;`
			`long padding1[longxCacheLine-((sizeof(Node *)+sizeof(unsigned long))/sizeof(long))];`
			`Node * volatile tail;`
			`volatile unsigned long pread;`
			`long padding2[longxCacheLine-((sizeof(Node*)+sizeof(unsigned long))/sizeof(long))];`

			`const size_t cachesize;`
			`void ** cache;`

			`private:`

			`inline bool cachepush(void * const data) {`

			`if (!cache[pwrite]) {`
			`/* Write Memory Barrier: ensure all previous memory write`
			`* are visible to the other processors before any later`
			`* writes are executed. This is an "expensive" memory fence`
			`* operation needed in all the architectures with a weak-ordering`
			`* memory model where stores can be executed out-or-order`
			`* (e.g. Powerpc). This is a no-op on Intel x86/x86-64 CPUs.`
			`*/`
			`WMB();`
			`cache[pwrite] = data;`
			`pwrite += (pwrite+1 >= cachesize) ? (1-cachesize): 1;`
			`return true;`
			`}`
			`return false;`
			`}`

			`inline bool cachepop(void ** data) {`
			`if (!cache[pread]) return false;`

			`*data = cache[pread];`
			`cache[pread]=NULL;`
			`pread += (pread+1 >= cachesize) ? (1-cachesize): 1;`
			`return true;`
			`}`

			`public:`
			`enum {DEFAULT_CACHE_SIZE=1024};`

			`dynqueue(int cachesize=DEFAULT_CACHE_SIZE, bool fillcache=false):cachesize(cachesize) {`
			`Node * n = (Node *)::malloc(sizeof(Node));`
			`n->data = NULL; n->next = NULL;`
			`head=n;`
			`tail=n;`

			`cache=(void*)getAlignedMemory(longxCacheLinesizeof(long),cachesizesizeof(void));`
			`if (!cache) {`
			`error("FATAL ERROR: dynqueue no memory available!\n");`
			`abort();`
			`}`

			`if (fillcache) {`
			`for(int i=0;i<cachesize;++i) {`
			`n = (Node *)::malloc(sizeof(Node));`
			`if (n) cachepush(n);`
			`}`
			`}`
			`}`

			`~dynqueue() {`
			`union { Node * n; void * n2; } p;`
			`while(cachepop(&p.n2)) free(p.n);`
			`while(head != tail) {`
			`p.n = (Node*)head;`
			`head = head->next;`
			`free(p.n);`
			`}`
			`if (head) free((void*)head);`
			`if (cache) freeAlignedMemory(cache);`
			`}`

			`inline bool push(void * const data) {`
			`assert(data != NULL);`

			`union { Node * n; void * n2; } p;`
			`if (!cachepop(&p.n2))`
			`p.n = (Node *)::malloc(sizeof(Node));`

			`p.n->data = data; p.n->next = NULL;`
			`WMB();`
			`tail->next = p.n;`
			`tail = p.n;`

			`return true;`
			`}`

			`inline bool pop(void ** data) {`
			`assert(data != NULL);`
			`if (head->next) {`
			`Node * n = (Node *)head;`
			`*data = (head->next)->data;`
			`head = head->next;`

			`if (!cachepush(n)) free(n);`
			`return true;`
			`}`
			`return false;`
			`}`
			`};`

			`#endif // _FF_DYNQUEUE_OPTIMIZATION`


			`} // namespace`

			`#endif /* FF_DYNQUEUE_HPP */`