"ata_rw28",
"bcast",
"bench",
+ "bomp",
"boot_perfmon",
"bulkbench",
"diskd",
--- /dev/null
+/*
+ * Copyright (c) 2014, ETH Zurich.
+ * All rights reserved.
+ *
+ * This file is distributed under the terms in the attached LICENSE file.
+ * If you do not find this file, copies can be found by writing to:
+ * ETH Zurich D-INFK, Universitaetsstrasse 6, CH-8092 Zurich. Attn: Systems Group.
+ */
+
+/**
+ * \brief interface between bomp thrads
+ */
+interface bomp "Barrelfish OpenMP Interface" {
+
+ /*
+ * ==========================================================================
+ * Messages between Node Masters and Worker Threads
+ * ==========================================================================
+ */
+
+ /**
+ * \brief initiates the execution of a thread
+ *
+ * \param fn Address of the function to call
+ * \param addr Pointer to the argument
+ * \param tid The thread's ID
+ * \param icv The thread's task control variables
+ */
+ message execute(uint64 fn,
+ uint64 arg,
+ uint32 tid,
+ uint64 icv);
+
+ /**
+ * \brief message signalling the termination of a bomp thread
+ *
+ * \param status The thread's return value
+ */
+ message done(errval status);
+
+ /*
+ * ==========================================================================
+ * Messages between Domain Master and Node Masters
+ * ==========================================================================
+ */
+
+ rpc initialize(out errval status,
+ out uint32 nthreads);
+
+
+
+ /*
+ * -------------------------------------------------------------------------
+ * Execution
+ * -------------------------------------------------------------------------
+ */
+
+ message execute_range(uint64 fn,
+ uint64 addr,
+ uint32 from,
+ uint32 to,
+ uint32 nthreads);
+
+
+ /*
+ * -------------------------------------------------------------------------
+ * Memory
+ * -------------------------------------------------------------------------
+ */
+ rpc map(in cap frame,
+ in uint64 addr,
+ out errval status);
+
+
+ rpc update(in uint64 addr,
+ in uint64 offset,
+ in uint64 length,
+ out errval status);
+
+};
#define OMP_SUPPORT_DYNAMIC 0
#include <stddef.h> // for size_t
+#include <barrelfish_kpi/spinlocks_arch.h>
+#include <barrelfish/thread_sync.h>
/* a simple OpenMP lock */
-typedef void *omp_lock_t;
+typedef struct __omp_lock
+{
+ struct thread_mutex mutex;
+ uint8_t initialized;
+} omp_lock_t;
/* a nestable OpenMP lock */
-typedef void *omp_nest_lock_t;
+typedef struct __omp_nested_lock
+{
+ struct thread_mutex mutex;
+ void *owner;
+ uint32_t count;
+ uint8_t initialized;
+} omp_nest_lock_t;
+
/**
* BOMP backend types
bomp_backend_t bomp_get_backend(void);
+///< Default Stacksize for BOMP threads
+#define BOMP_DEFAULT_STACKSIZE (64 * 1024)
+
+///< Flag indicating that all threads should be used
+#define BOMP_THREADS_ALL ((unsigned int) -1)
+
+/*
+ * ------------------------------------------------------------------------------
+ * BOMP initialization Functions
+ * ------------------------------------------------------------------------------
+ */
+
+/**
+ * \brief initializes the BOMP library using the indicated cores of the BM
+ *
+ * \param coresbm bitmap representing the cores to run on
+ * \param stack_size size of the thread's stack in bytes
+ *
+ * \returns 0 on SUCCESS
+ * non-zero on FAILURE
+ */
+int bomp_init_cores(void *coresbm, size_t stack_size);
+
+/**
+ * \brief initializes the BOMP library with the given stack sizes
+ *
+ * \param stack_size size of the thread's stack in bytes
+ *
+ * \returns 0 on SUCCESS
+ * non-zero on FAILURE
+ *
+ * This function will use the first nthreads cores to run on
+ */
+int bomp_init_varstack(unsigned int nthreads, size_t stack_size);
+
+/**
+ * \brief initializes the BOMP library using the given thread number
+ *
+ * \param nthreads the number of threads to prepare
+ *
+ * \returns 0 on SUCCESS
+ * non-zero on FAILURE
+ *
+ * This function will use the first nthreads cores and the default stack size
+ * for the BOMP threads to run on.
+ */
+static inline int bomp_init(unsigned int nthreads)
+{
+ return bomp_init_varstack(nthreads, BOMP_DEFAULT_STACKSIZE);
+}
+
+
+
+
+#if 0
+/*
+ * Backend specific main thread runners
+ * needed on Barrelfish as we don't dynamically grow thread stacks
+ */
+typedef int (*main_func_t)(void *);
+int bomp_run_main(main_func_t mainfunc, void *mainarg, size_t stacksize);
+#endif
+
/*
* OpenMP Library API as defined by openmp.org
*/
+
extern void omp_set_num_threads(int num_threads);
extern int omp_get_num_threads(void);
extern int omp_get_max_threads(void);
extern void omp_init_lock(omp_lock_t *lock);
extern void omp_destroy_lock(omp_lock_t *lock);
-extern void omp_set_lock(omp_lock_t lock);
-extern void omp_unset_lock(omp_lock_t lock);
-extern int omp_test_lock(omp_lock_t lock);
+extern void omp_set_lock(omp_lock_t *lock);
+extern void omp_unset_lock(omp_lock_t *lock);
+extern int omp_test_lock(omp_lock_t *lock);
extern void omp_init_nest_lock(omp_nest_lock_t *lock);
extern void omp_destroy_nest_lock(omp_nest_lock_t *lock);
-extern void omp_set_nest_lock(omp_nest_lock_t lock);
-extern void omp_unset_nest_lock(omp_nest_lock_t lock);
-extern int omp_test_nest_lock(omp_nest_lock_t lock);
+extern void omp_set_nest_lock(omp_nest_lock_t *lock);
+extern void omp_unset_nest_lock(omp_nest_lock_t *lock);
+extern int omp_test_nest_lock(omp_nest_lock_t *lock);
extern double omp_get_wtime(void);
extern double omp_get_wtick(void);
--- /dev/null
+--------------------------------------------------------------------------
+-- Copyright (c) 2007-2009, ETH Zurich.
+-- All rights reserved.
+--
+-- This file is distributed under the terms in the attached LICENSE file.
+-- If you do not find this file, copies can be found by writing to:
+-- ETH Zurich D-INFK, Haldeneggsteig 4, CH-8092 Zurich. Attn: Systems Group.
+--
+-- Hakefile for lib/bomp
+--
+--------------------------------------------------------------------------
+
+[ build library {
+ target = "bomp_new",
+ cFiles = concat [ find cInDir dir | dir <- ["."] ],
+ addLibraries = [
+ "bench", -- for basic benchmarking
+ "numa", -- get topology information
+ "bitmap"
+ ],
+ addIncludes = [
+ "include"
+ ],
+ flounderDefs = [
+ "bomp"
+ ],
+ flounderBindings = [
+ "bomp"
+ ],
+ architectures = [
+ "x86_64",
+ "k1om"
+ ]
+ } ,
+
+ build application {
+ target = "bomp_new_test",
+ cFiles = ["test/test.c"],
+ addCFlags = ["-fopenmp"],
+ addLibraries = ["bomp_new"],
+ architectures = [
+ "x86_64",
+ "k1om"
+ ]
+ }
+]
+
--- /dev/null
+/*
+ * Copyright (c) 2014 ETH Zurich.
+ * All rights reserved.
+ *
+ * This file is distributed under the terms in the attached LICENSE file.
+ * If you do not find this file, copies can be found by writing to:
+ * ETH Zurich D-INFK, Universitaetsstrasse 6, CH-8092 Zurich. Attn: Systems Group.
+ */
+#include <bomp_internal.h>
+
+void GOMP_atomic_start(void)
+{
+ assert(!"NYI");
+// assert(g_bomp_state);
+// bomp_lock(&g_bomp_state->atomic_lock);
+}
+
+void GOMP_atomic_end(void)
+{
+ assert(!"NYI");
+// assert(g_bomp_state);
+// bomp_unlock(&g_bomp_state->atomic_lock);
+}
--- /dev/null
+/*
+ * Copyright (c) 2014 ETH Zurich.
+ * All rights reserved.
+ *
+ * This file is distributed under the terms in the attached LICENSE file.
+ * If you do not find this file, copies can be found by writing to:
+ * ETH Zurich D-INFK, Universitaetsstrasse 6, CH-8092 Zurich. Attn: Systems Group.
+ */
+#include <bomp_internal.h>
+
+/*
+ * These functions implement the BARRIER construct
+ */
+
+void GOMP_barrier(void)
+{
+ assert(!"NYI");
+// assert(g_bomp_state);
+//
+// struct bomp_thread_local_data *th_local_data = g_bomp_state->backend.get_tls();
+// assert(th_local_data != NULL);
+// bomp_barrier_wait(th_local_data->work->barrier);
+}
+
+bool GOMP_barrier_cancel (void)
+{
+ assert(!"NYI");
+ return 0;
+}
--- /dev/null
+/**
+ * \file
+ * \brief API to use the bomp library
+ */
+
+/*
+ * Copyright (c)2014 ETH Zurich.
+ * All rights reserved.
+ *
+ * This file is distributed under the terms in the attached LICENSE file.
+ * If you do not find this file, copies can be found by writing to:
+ * ETH Zurich D-INFK, Universitaetsstrasse 6, CH-8092 Zurich. Attn: Systems Group.
+ */
+
+#include <bomp_internal.h>
+
+void bomp_start_processing(void (*fn)(void *),
+ void *data,
+ coreid_t tid_start,
+ coreid_t nthreads)
+{
+ struct bomp_tls *tls = thread_get_tls();
+
+ debug_printf("bomp_start_processing(%p, %p, %u, %u)\n", fn, data, tid_start, nthreads);
+
+ /* this function must only be called by the program and node masters */
+ assert(tls->role == BOMP_THREAD_ROLE_MASTER || tls->role == BOMP_THREAD_ROLE_NODE);
+
+ /* add one to the tid_start as this will be our ID */
+ coreid_t tid_current = tid_start + 1;
+
+ struct bomp_node *node;
+
+ if (tls->role == BOMP_THREAD_ROLE_MASTER) {
+ node = &tls->r.master.local;
+
+ if (nthreads > (node->threads_max + 1)) {
+ /* send the requests to the node masters */
+ nthreads -= (node->threads_max + 1);
+ for (nodeid_t i = 0; i < tls->r.master.num_nodes; ++i) {
+ coreid_t num = bomp_node_exec(&tls->r.master.nodes[i], fn, data, tid_start, nthreads);
+ assert(num <= nthreads);
+ tls->r.master.nodes_active++;
+ nthreads -= num;
+ tid_current += num;
+ if (nthreads == 0) {
+ break;
+ }
+ }
+ nthreads += (node->threads_max);
+ }
+ } else if (tls->role == BOMP_THREAD_ROLE_NODE) {
+ node = &tls->r.node;
+ }
+
+ debug_printf("nthreads=%u, max_threads=%u\n", nthreads, node->threads_max);
+
+ assert((node->threads_max + 1)>= nthreads);
+
+ struct omp_icv_task *icv = bomp_icv_get()->task;
+
+ for (coreid_t i = 1; i < nthreads; ++i) {
+ node->threads[i].icvt = icv;
+ node->threads_active++;
+ bomp_thread_exec(&node->threads[i], fn, data, tid_current);
+ tid_current++;
+ }
+
+ /* set the local thread ID */
+ tls->thread_id = 0;
+
+ return;
+#if 0
+ /* Create Threads and ask them to process the function specified */
+ /* Let them die as soon as they are done */
+ unsigned i;
+ struct bomp_work *xdata;
+ struct bomp_barrier *barrier;
+
+ g_bomp_state->num_threads = nthreads;
+
+ char *memory = calloc(
+ 1,
+ nthreads * sizeof(struct bomp_thread_local_data *)
+ + sizeof(struct bomp_barrier)
+ + nthreads * sizeof(struct bomp_work));
+ assert(memory != NULL);
+
+ g_bomp_state->tld = (struct bomp_thread_local_data **) memory;
+ memory += nthreads * sizeof(struct bomp_thread_local_data *);
+
+ /* Create a barier for the work that will be carried out by the threads */
+ barrier = (struct bomp_barrier *) memory;
+ memory += sizeof(struct bomp_barrier);
+ bomp_barrier_init(barrier, nthreads);
+
+ /* For main thread */
+ xdata = (struct bomp_work *) memory;
+ memory += sizeof(struct bomp_work);
+
+ xdata->fn = fn;
+ xdata->data = data;
+ xdata->thread_id = 0;
+ xdata->barrier = barrier;
+ bomp_set_tls(xdata);
+
+ for (i = 1; i < nthreads; i++) {
+ xdata = (struct bomp_work *) memory;
+ memory += sizeof(struct bomp_work);
+
+ xdata->fn = fn;
+ xdata->data = data;
+ xdata->thread_id = i;
+ xdata->barrier = barrier;
+
+ /* Create threads */
+ bomp_run_on(i * BOMP_DEFAULT_CORE_STRIDE + THREAD_OFFSET, bomp_thread_fn,
+ xdata);
+ }
+#endif
+}
+
+void bomp_end_processing(void)
+{
+ debug_printf("bomp_end_processing\n");
+ struct bomp_tls *tls = thread_get_tls();
+ struct waitset *ws = get_default_waitset();
+ if (tls->role == BOMP_THREAD_ROLE_MASTER) {
+ struct bomp_node *node = &tls->r.master.local;
+ struct bomp_master *master = &tls->r.master;
+ while(master->nodes_active != 1 || node->threads_active != 1) {
+ event_dispatch(ws);
+ }
+ } else if (tls->role == BOMP_THREAD_ROLE_NODE) {
+ struct bomp_node *node = &tls->r.node;
+ while(node->threads_active != 0) {
+ event_dispatch(ws);
+ }
+ }
+
+ free(tls->icv.task);
+ tls->icv.task = NULL;
+
+ debug_printf("bomp_end_processing: done\n");
+}
--- /dev/null
+/**
+ * \file
+ * \brief API to use the bomp library
+ */
+
+/*
+ * Copyright (c)2014 ETH Zurich.
+ * All rights reserved.
+ *
+ * This file is distributed under the terms in the attached LICENSE file.
+ * If you do not find this file, copies can be found by writing to:
+ * ETH Zurich D-INFK, Universitaetsstrasse 6, CH-8092 Zurich. Attn: Systems Group.
+ */
+#include <barrelfish/barrelfish.h>
+
+
+#include <bomp_internal.h>
+/*
+ * NOTES
+ *
+ * - nodes and worker store their local state in the TLS or binding
+ * - master thread stores it in global variable
+ *
+ * master threads
+ * - list of nodes -> execute on
+ * - if having list of workers -> execute on
+ *
+ *
+ */
+
+
+/**
+ * \brief initializes the BOMP library using the indicated cores of the BM
+ *
+ * \param coresbm bitmap representing the cores to run on
+ * \param stack_size size of the thread's stack in bytes
+ *
+ * \returns 0 on SUCCESS
+ * non-zero on FAILURE
+ */
+int bomp_init_cores(void*coresbm, size_t stack_size)
+{
+ assert(!"NYI");
+ return 0;
+}
+
+/**
+ * \brief initializes the BOMP library with the given stack sizes
+ *
+ * \param stack_size size of the thread's stack in bytes
+ *
+ * \returns 0 on SUCCESS
+ * non-zero on FAILURE
+ *
+ * This function will use the first nthreads cores to run on
+ */
+int bomp_init_varstack(unsigned int nthreads, size_t stack_size)
+{
+ /// get the maximum number of cores
+ if (numa_available() != 0) {
+ return -1;
+ }
+
+ if (nthreads == BOMP_THREADS_ALL) {
+ nthreads = numa_num_configured_cpus();
+ }
+
+ nodeid_t node_current = numa_current_node();
+ nodeid_t node_count = 1;
+
+#if 0
+
+ if (nthreads > numa_num_node_cpus(node_current)) {
+ coreid_t threads_remaining = nthreads - numa_num_node_cpus(node_current);
+ /* determine the number of needed nodes */
+ for (nodeid_t node = 0; node <= numa_max_node(); ++node) {
+ if (node == node_current) {
+ continue;
+ }
+ node_count++;
+ if (threads_remaining < numa_num_node_cpus(node)) {
+ threads_remaining = 0;
+ break;
+ } else {
+ threads_remaining -= numa_num_node_cpus(node);
+ }
+
+ }
+
+ if (threads_remaining) {
+ /* Not enough cores availabel to serve the nthreads request */
+ BOMP_ERROR("not enough cores available: need %" PRIuCOREID " more \n",
+ threads_remaining);
+ return -2;
+ }
+ }
+#endif
+
+
+ BOMP_DEBUG_INIT("Initializing BOMP with a %" PRIuNODEID " nodes of %" PRIuCOREID
+ " threads\n", node_count, nthreads);
+
+ bomp_icv_init_default((coreid_t)nthreads);
+
+ struct bomp_tls *tls = calloc(1, sizeof(struct bomp_tls));
+ if (tls == NULL) {
+ return LIB_ERR_MALLOC_FAIL;
+ }
+
+ tls->role = BOMP_THREAD_ROLE_MASTER;
+ tls->self = thread_self();
+
+ if (node_count > 1) {
+ tls->r.master.nodes = calloc(node_count, sizeof(struct bomp_node));
+ if (tls->r.master.nodes == NULL) {
+ free(tls);
+ return LIB_ERR_MALLOC_FAIL;
+ }
+ tls->r.master.num_nodes = node_count - 1;
+ } else {
+ tls->r.master.num_nodes = 0;
+ tls->r.master.nodes = NULL;
+ }
+
+ tls->icv.global = &g_omp_icv_global_default;
+ tls->icv.device = &g_omp_icv_device_default;
+ tls->icv.task = NULL;
+
+ thread_set_tls(tls);
+
+ // divide the threads equally among the nodes
+ coreid_t threads_per_node = (coreid_t)(nthreads / node_count);
+
+#if 0
+ coreid_t master_threads = numa_num_node_cpus(node_current);
+#else
+ coreid_t master_threads = nthreads;
+#endif
+
+ if (master_threads > threads_per_node) {
+ master_threads = threads_per_node;
+ }
+
+ nthreads -= master_threads;
+
+ nodeid_t numa_node = 0;
+ for (nodeid_t node = 1; node < node_count; ++node) {
+ if (numa_node == node_current) {
+ numa_node++;
+ }
+ coreid_t num_threads = numa_num_node_cpus(node);
+ if (num_threads > threads_per_node) {
+ num_threads = threads_per_node;
+ }
+
+ bomp_node_init(BOMP_NODE_LOCAL, numa_node, node, num_threads,
+ stack_size, &tls->r.master.nodes[node]);
+
+ nthreads -= num_threads;
+ }
+
+ /* now all the other threads should have been initialized */
+ assert(nthreads == 0);
+
+ /* initialize the local node */
+ bomp_node_init(BOMP_NODE_MASTER, node_current, 0, master_threads,
+ stack_size, &tls->r.master.local);
+
+ // the master thread is active
+ tls->r.master.local.threads_active = 1;
+ tls->r.master.nodes_active = 1;
+
+ return 0;
+}
--- /dev/null
+/*
+ * Copyright (c) 2014 ETH Zurich.
+ * All rights reserved.
+ *
+ * This file is distributed under the terms in the attached LICENSE file.
+ * If you do not find this file, copies can be found by writing to:
+ * ETH Zurich D-INFK, Universitaetsstrasse 6, CH-8092 Zurich. Attn: Systems Group.
+ */
+
+/**
+ * \brief this file contains the controll channel implementation between the
+ * node masters and the bomp worker threads
+ */
+#include <barrelfish/barrelfish.h>
+#include <bomp_internal.h>
+#include <bitmap.h>
+
+/* forward declaration */
+static int bomp_node_msg_handler(void *arg);
+
+///< stores the state of a message in transit
+struct bomp_msg_st
+{
+ struct txq_msg_st common; ///< common msg state
+
+ /* union of arguments */
+ union {
+ struct {
+ uint64_t fn; ///< the function
+ uint64_t arg; ///< the argument to the function
+ uint32_t tid; ///< thread ID
+ uint64_t icv; ///< thread's control variables
+ } exec; ///< execution
+ } args;
+};
+
+/*
+ * ==============================================================================
+ * Control Channel: Program Master Side
+ * ==============================================================================
+ */
+
+
+static errval_t bomp_node_init_threads(nodeid_t nodeid,
+ nodeid_t numanode,
+ coreid_t nthreads,
+ size_t stack_size,
+ struct bomp_node *node)
+{
+ errval_t err;
+
+ BOMP_DEBUG_NODE("Initialize worker threads for node %" PRIuNODEID " with %"
+ PRIuCOREID " threads\n", nodeid, nthreads);
+
+ node->threads = calloc(nthreads, sizeof(struct bomp_thread));
+ if (node->threads == NULL) {
+ return LIB_ERR_MALLOC_FAIL;
+ }
+
+#if 0
+ struct bitmap *bm = numa_allocate_cpumask();
+ err = numa_node_to_cpus(numanode, bm);
+ assert(err_is_ok(err));
+#else
+ struct bitmap *bm = numa_all_cpus_ptr;
+#endif
+
+
+ coreid_t core = (coreid_t)bitmap_get_first(bm);
+
+ for (coreid_t i = 0; i < nthreads; ++i) {
+ BOMP_DEBUG_NODE("spanning to core %u\n", core);
+ node->threads_max++;
+ node->threads[i].node = node;
+ if (core == disp_get_core_id()) {
+ /* master thread */
+ core = (coreid_t)bitmap_get_next(bm, core + 1);
+ continue;
+ }
+
+ err = bomp_thread_init(core, stack_size, &node->threads[i]);
+ if (err_is_fail(err)) {
+ DEBUG_ERR(err, "creating thread\n");
+ return err;
+ }
+
+ core = (coreid_t)bitmap_get_next(bm, core + 1);
+ }
+ node->threads_active = 0;
+ node->tls = thread_get_tls();
+
+ return SYS_ERR_OK;
+}
+
+/**
+ * \brief callback for creating the dispatcher on the remote core
+ *
+ * \param arg argument for the callback
+ * \param err outcome of the spanning request
+ */
+static void bomp_node_init_done(void *arg, errval_t err)
+{
+ assert(err_is_ok(err));
+
+ uint32_t *done = arg;
+ *done = 1;
+}
+
+
+/**
+ * \brief callback when the BOMP thread connects to the node
+ *
+ * \param st state pointer
+ * \param err status of the connect
+ * \param _b created BOMP binding
+ */
+static void bomp_node_accept_cb(void *st,
+ errval_t err,
+ struct bomp_binding *_b)
+{
+ struct bomp_node *n = st;
+
+ BOMP_DEBUG_NODE("connection accepted. tid=%" PRIuCOREID "\n", n->id);
+
+ n->node_err = err;
+
+ txq_init(&n->txq, _b, _b->waitset, (txq_register_fn_t) _b->register_send,
+ sizeof(struct bomp_msg_st));
+
+ _b->st = st;
+ n->ctrl = _b;
+
+ // _b->rx_vtbl.done = done__rx;
+}
+
+/* a node that is on our local address space */
+static errval_t bomp_node_init_local(nodeid_t nodeid,
+ nodeid_t numanode,
+ coreid_t nthreads,
+ size_t stack_size,
+ struct bomp_node *node)
+{
+ BOMP_DEBUG_NODE("Initialize local node node %" PRIuNODEID " with %"
+ PRIuCOREID " threads\n", nodeid, nthreads);
+
+ errval_t err;
+
+ uint32_t done;
+
+ node->id = nodeid;
+ node->numa_node = numanode;
+ node->threads_max = nthreads;
+ node->stack_size = stack_size;
+ node->threads_max = nthreads;
+
+ struct bitmap *bm = numa_allocate_cpumask();
+ err = numa_node_to_cpus(numanode, bm);
+ assert(err_is_ok(err));
+
+ coreid_t core = (coreid_t)bitmap_get_first(bm);
+
+ err = domain_new_dispatcher(core, bomp_node_init_done, &done);
+ if (err_is_fail(err)) {
+ BOMP_ERROR("creating new dispatcher on core %" PRIuCOREID "failed\n",
+ core);
+ return err;
+ }
+
+ while(!done) {
+ thread_yield();
+ }
+
+ BOMP_DEBUG_NODE("dispatcher ready. allocating memory for msg channel\n");
+
+ size_t msg_frame_size;
+ err = frame_alloc(&node->msgframe, 2 * BOMP_CHANNEL_SIZE, &msg_frame_size);
+ if (err_is_fail(err)) {
+ return err;
+ }
+
+ err = vspace_map_one_frame(&node->msgbuf, msg_frame_size, node->msgframe,
+ NULL, NULL);
+ if (err_is_fail(err)) {
+ return err;
+ }
+
+ struct bomp_frameinfo fi = {
+ .sendbase = (lpaddr_t)node->msgbuf + BOMP_CHANNEL_SIZE,
+ .inbuf = node->msgbuf,
+ .inbufsize = BOMP_CHANNEL_SIZE,
+ .outbuf = ((uint8_t *) node->msgbuf) + BOMP_CHANNEL_SIZE,
+ .outbufsize = BOMP_CHANNEL_SIZE
+ };
+
+ BOMP_DEBUG_NODE("creating channel on %p\n", node->msgbuf);
+
+ err = bomp_accept(&fi, node, bomp_node_accept_cb,
+ get_default_waitset(), IDC_EXPORT_FLAGS_DEFAULT);
+
+ if (err_is_fail(err)) {
+ // XXX> error handling
+ return err;
+ }
+
+ BOMP_DEBUG_NODE("creating thread on core %" PRIuCOREID "\n", core);
+ err = domain_thread_create_on(core, bomp_node_msg_handler, node);
+ if (err_is_fail(err)) {
+ // XXX> error handling
+ return err;
+ }
+
+ while (node->ctrl == NULL) {
+ err = event_dispatch(get_default_waitset());
+ if (err_is_fail(err)) {
+ USER_PANIC_ERR(err, "event dispatch\n");
+ }
+ }
+
+ BOMP_DEBUG_NODE("node master on node %" PRIuNODEID " connected \n", nodeid);
+
+ return node->node_err;
+}
+
+/* remote node: a node that is in a foreign address space */
+static errval_t bomp_node_init_remote(nodeid_t nodeid,
+ coreid_t nthreads,
+ size_t stack_size,
+ struct bomp_node *node)
+{
+ BOMP_DEBUG_NODE("Initialize remote node node %" PRIuNODEID " with %"
+ PRIuCOREID " threads\n", nodeid, nthreads);
+
+ assert(!"NYI");
+ return SYS_ERR_OK;
+}
+
+
+/**
+ * \brief
+ */
+errval_t bomp_node_init(bomp_node_type_t type,
+ nodeid_t numanode,
+ nodeid_t nodeid,
+ coreid_t nthreads,
+ size_t stack_size,
+ struct bomp_node *node)
+{
+ node->type = type;
+
+ switch(type) {
+ case BOMP_NODE_MASTER :
+ return bomp_node_init_threads(nodeid, numanode, nthreads, stack_size, node);
+ break;
+ case BOMP_NODE_LOCAL:
+ return bomp_node_init_local(nodeid, numanode, nthreads, stack_size, node);
+ break;
+ case BOMP_NODE_REMOTE :
+ return bomp_node_init_remote(nodeid, nthreads, stack_size, node);
+ break;
+ default:
+ return -1;
+ break;
+ }
+}
+
+
+
+coreid_t bomp_node_exec(struct bomp_node *node, void *fn, void *arg, coreid_t tid_start, coreid_t nthreads)
+{
+ debug_printf("Executing on node %u\n", node->id);
+ assert(!"NYI");
+ return node->threads_max;
+
+ return 0;
+}
+
+#if 0
+
+
+/*
+ * ==============================================================================
+ * Control Channel: Node Master Side
+ * ==============================================================================
+ */
+
+/**
+ * \brief initializes the shared memory channel Node Master - Worker Threads
+ * (Worker Side)
+ *
+ * \param channel address of the message buffers to use
+ */
+errval_t bomp_noded_channel_bind(void *channel)
+{
+ assert(!"NYI");
+
+ return SYS_ERR_OK;
+}
+
+#endif
+
+/**
+ * \brief XOMP channel connect callback called by the Flounder backend
+ *
+ * \param st Supplied worker state
+ * \param err outcome of the connect attempt
+ * \param xb XOMP Flounder binding
+ */
+static void bomp_node_connect_cb(void *st,
+ errval_t err,
+ struct bomp_binding *b)
+{
+ struct bomp_thread *t = st;
+
+ BOMP_DEBUG_THREAD("connected to node master.\n");
+
+ t->ctrl = b;
+
+ txq_init(&t->txq, b, b->waitset, (txq_register_fn_t) b->register_send,
+ sizeof(struct bomp_msg_st));
+
+ //b->rx_vtbl.execute = execute__rx;
+}
+
+/**
+ * \brief
+ *
+ * \param arg
+ *
+ * \return
+ */
+static int bomp_node_msg_handler(void *arg)
+{
+ BOMP_DEBUG_NODE("node master message handler started\n");
+
+ errval_t err;
+
+ struct bomp_tls *tls = calloc(1, sizeof(struct bomp_tls));
+ if (tls == NULL) {
+ BOMP_ERROR("Could not allocate memory for TLS. %p\n", arg);
+ return -1;
+ }
+
+ struct bomp_node *node = arg;
+
+ assert(numa_current_node() == node->numa_node);
+
+ tls->role = BOMP_THREAD_ROLE_NODE;
+ tls->self = thread_self();
+ tls->r.node.id = node->id;
+ tls->r.node.msgbuf = node->msgbuf;
+ tls->r.node.tls = tls;
+ tls->r.node.stack_size = node->stack_size;
+
+ struct bomp_frameinfo fi = {
+ .sendbase = (lpaddr_t)arg,
+ .inbuf = ((uint8_t *) arg) + BOMP_CHANNEL_SIZE,
+ .inbufsize = BOMP_CHANNEL_SIZE,
+ .outbuf = ((uint8_t *) arg),
+ .outbufsize = BOMP_CHANNEL_SIZE
+ };
+
+ struct waitset *ws = get_default_waitset();
+
+ BOMP_DEBUG_NODE("initializing local worker threads\n");
+ err = bomp_node_init_threads(node->id, node->numa_node, node->threads_max,
+ node->stack_size, &tls->r.node);
+ if (err_is_fail(err)) {
+ DEBUG_ERR(err, "init threads\n");
+ }
+
+ assert(node->threads_max == tls->r.node.threads_max);
+
+ BOMP_DEBUG_NODE("connecting to program master\n");
+
+ err = bomp_connect(&fi, bomp_node_connect_cb, &tls->r.thread, ws,
+ IDC_EXPORT_FLAGS_DEFAULT);
+
+
+ if (err_is_fail(err)) {
+ /* TODO: Clean up */
+ return err_push(err, XOMP_ERR_WORKER_INIT_FAILED);
+ }
+
+ thread_set_tls(tls);
+
+ while(1) {
+ err = event_dispatch_non_block(ws);
+ switch(err_no(err)) {
+ case LIB_ERR_NO_EVENT :
+ thread_yield();
+ continue;
+ break;
+ case SYS_ERR_OK:
+ continue;
+ break;
+ default:
+ USER_PANIC_ERR(err, "event dispatch");
+ break;
+ }
+ }
+
+ BOMP_NOTICE("node master %" PRIuNODEID " terminated", tls->r.node.id);
+
+
+ return 0;
+}
--- /dev/null
+/*
+ * Copyright (c) 2014 ETH Zurich.
+ * All rights reserved.
+ *
+ * This file is distributed under the terms in the attached LICENSE file.
+ * If you do not find this file, copies can be found by writing to:
+ * ETH Zurich D-INFK, Universitaetsstrasse 6, CH-8092 Zurich. Attn: Systems Group.
+ */
+
+/**
+ * \brief this file contains the controll channel implementation between the
+ * node masters and the bomp worker threads
+ */
+
+#include <bomp_internal.h>
+
+#include <if/bomp_defs.h>
+
+
+/* forward declaration */
+static int bomp_thread_msg_handler(void *arg);
+
+///< stores the state of a message in transit
+struct bomp_msg_st
+{
+ struct txq_msg_st common; ///< common msg state
+
+ uint32_t *message_sent; ///<
+
+ /* union of arguments */
+ union {
+ struct {
+ uint64_t fn; ///< the function
+ uint64_t arg; ///< the argument to the function
+ uint32_t tid; ///< thread ID
+ uint64_t icv; ///< thread's control variables
+ } exec; ///< execution
+ } args;
+};
+
+/*
+ * ==============================================================================
+ * Control Channel: Node Master Side
+ * ==============================================================================
+ */
+
+/*
+ * -----------------------------------------------------------------------------
+ * RX Handlers
+ * -----------------------------------------------------------------------------
+ */
+
+static void done__rx(struct bomp_binding *_binding, bomp_errval_t status)
+{
+ BOMP_DEBUG_THREAD("done__rx from thread\n");
+
+ struct bomp_thread *t = _binding->st;
+
+ struct bomp_node *node = t->node;
+
+
+ node->threads_active--;
+
+ BOMP_DEBUG_THREAD("threads active %u\n", node->threads_active);
+
+}
+
+/*
+ * -----------------------------------------------------------------------------
+ * TX Handlers
+ * -----------------------------------------------------------------------------
+ */
+
+static void txq_msg_sent_cb(void *st)
+{
+ struct bomp_msg_st *msg_st = (struct bomp_msg_st *)st;
+ *(msg_st->message_sent) = 1;
+}
+
+static errval_t execute__tx(struct txq_msg_st *msg_st)
+{
+ struct bomp_msg_st *st = (struct bomp_msg_st *)msg_st;
+
+ return bomp_execute__tx(msg_st->queue->binding, TXQCONT(msg_st),
+ st->args.exec.fn, st->args.exec.arg, st->args.exec.tid,
+ st->args.exec.icv);
+}
+
+
+
+/**
+ * \brief callback when the BOMP thread connects to the node
+ *
+ * \param st state pointer
+ * \param err status of the connect
+ * \param _b created BOMP binding
+ */
+static void bomp_thread_accept_cb(void *st,
+ errval_t err,
+ struct bomp_binding *_b)
+{
+ struct bomp_thread *t = st;
+
+ BOMP_DEBUG_THREAD("connection accepted. tid=%" PRIuCOREID "\n", t->coreid);
+
+ t->thread_err = err;
+
+ txq_init(&t->txq, _b, _b->waitset, (txq_register_fn_t) _b->register_send,
+ sizeof(struct bomp_msg_st));
+
+ _b->st = st;
+ t->ctrl = _b;
+
+ _b->rx_vtbl.done = done__rx;
+}
+
+/**
+ * \brief callback for creating the dispatcher on the remote core
+ *
+ * \param arg argument for the callback
+ * \param err outcome of the spanning request
+ */
+static void bomp_thread_init_done(void *arg, errval_t err)
+{
+ assert(err_is_ok(err));
+
+ uint32_t *done = arg;
+ *done = 1;
+}
+
+/**
+ * \brief initializes a thread on the given core
+ *
+ * \@param core ID of the core on which to create the tread on
+ * \param stack_size size of the stack of the tread to be created
+ * \param thread pointer to the thread struct to create
+ *
+ * \returns SYS_ERR_OK on SUCCESS
+ * errval on FAILURE
+ */
+errval_t bomp_thread_init(coreid_t core,
+ size_t stack_size,
+ struct bomp_thread *thread)
+{
+ errval_t err;
+
+ BOMP_DEBUG_THREAD("Creating thread on core %"PRIuCOREID " \n", core);
+
+ uint32_t done;
+
+ err = domain_new_dispatcher(core, bomp_thread_init_done, &done);
+ if (err_is_fail(err)) {
+ BOMP_ERROR("creating new dispatcher on core %" PRIuCOREID "failed\n",
+ core);
+ return err;
+ }
+
+ while(!done) {
+ thread_yield();
+ }
+
+ BOMP_DEBUG_THREAD("dispatcher ready. allocating memory for msg channel\n");
+
+ size_t msg_frame_size;
+ err = frame_alloc(&thread->msgframe, 2 * BOMP_CHANNEL_SIZE, &msg_frame_size);
+ if (err_is_fail(err)) {
+ return err;
+ }
+
+ err = vspace_map_one_frame(&thread->msgbuf, msg_frame_size, thread->msgframe,
+ NULL, NULL);
+ if (err_is_fail(err)) {
+ return err;
+ }
+
+ struct bomp_frameinfo fi = {
+ .sendbase = (lpaddr_t)thread->msgbuf + BOMP_CHANNEL_SIZE,
+ .inbuf = thread->msgbuf,
+ .inbufsize = BOMP_CHANNEL_SIZE,
+ .outbuf = ((uint8_t *) thread->msgbuf) + BOMP_CHANNEL_SIZE,
+ .outbufsize = BOMP_CHANNEL_SIZE
+ };
+
+ BOMP_DEBUG_THREAD("creating channel on %p\n", thread->msgbuf);
+
+ err = bomp_accept(&fi, thread, bomp_thread_accept_cb,
+ get_default_waitset(), IDC_EXPORT_FLAGS_DEFAULT);
+
+ if (err_is_fail(err)) {
+ // XXX> error handling
+ return err;
+ }
+
+ BOMP_DEBUG_THREAD("creating thread on core %" PRIuCOREID "\n", core);
+ err = domain_thread_create_on(core, bomp_thread_msg_handler, thread->msgbuf);
+ if (err_is_fail(err)) {
+ // XXX> error handling
+ return err;
+ }
+
+ while (thread->ctrl == NULL) {
+ err = event_dispatch(get_default_waitset());
+ if (err_is_fail(err)) {
+ USER_PANIC_ERR(err, "event dispatch\n");
+ }
+ }
+
+ BOMP_DEBUG_THREAD("thread on core %" PRIuCOREID " connected \n", core);
+
+ return thread->thread_err;
+}
+
+errval_t bomp_thread_exec(struct bomp_thread *thread,
+ bomp_thread_fn_t fn, void *arg, uint32_t tid)
+{
+ debug_printf("bomp_thread_exec(%p, %p, %p, %u) %p\n", thread, fn, arg, tid, thread->icvt);
+ struct txq_msg_st *msg_st = txq_msg_st_alloc(&thread->txq);
+ if (msg_st == NULL) {
+ return LIB_ERR_MALLOC_FAIL;
+ }
+
+ uint32_t msg_sent = 0;
+
+ msg_st->send = execute__tx;
+ msg_st->cleanup = (txq_cleanup_fn_t)txq_msg_sent_cb;
+
+ struct bomp_msg_st *bomp_msg_st = (struct bomp_msg_st *)msg_st;
+
+ bomp_msg_st->args.exec.arg = (uint64_t)arg;
+ bomp_msg_st->args.exec.fn = (uint64_t)fn;
+ bomp_msg_st->args.exec.tid = tid;
+ bomp_msg_st->args.exec.icv = (uint64_t)thread->icvt;
+ bomp_msg_st->message_sent = &msg_sent;
+
+ txq_send(msg_st);
+
+ while(msg_sent == 0) {
+ event_dispatch(get_default_waitset());
+ }
+
+ //return event_dispatch_non_block(get_default_waitset());
+ return SYS_ERR_OK;
+}
+
+/*
+ * ==============================================================================
+ * Control Channel: Worker Thread Side
+ * ==============================================================================
+ */
+
+/*
+ * -----------------------------------------------------------------------------
+ * TX Handlers
+ * -----------------------------------------------------------------------------
+ */
+
+static errval_t done__tx(struct txq_msg_st *msg_st)
+{
+ BOMP_DEBUG_THREAD("done__tx\n");
+
+ return bomp_done__tx(msg_st->queue->binding, TXQCONT(msg_st),msg_st->err);
+}
+
+/*
+ * -----------------------------------------------------------------------------
+ * RX Handlers
+ * -----------------------------------------------------------------------------
+ */
+
+static void execute__rx(struct bomp_binding *_binding,
+ uint64_t fn, uint64_t arg, uint32_t tid, uint64_t icv_task)
+{
+
+
+ struct bomp_thread *t = _binding->st;
+ struct bomp_tls *tls = thread_get_tls();
+
+ BOMP_DEBUG_THREAD("execute__rx: %p %p, %lx\n", t, tls, icv_task);
+
+ assert(t == &tls->r.thread);
+
+ struct omp_icv_task icvt;
+ memcpy(&icvt, (void *)icv_task, sizeof(struct omp_icv_task));
+
+ bomp_icv_set_task(&icvt);
+
+ tls->thread_id = tid;
+
+ bomp_thread_fn_t func= (bomp_thread_fn_t)fn;
+
+ // calling the function
+ func((void *)arg);
+
+ bomp_icv_set_task(NULL);
+ tls->thread_id = -1;
+
+ struct txq_msg_st *msg_st = txq_msg_st_alloc(&t->txq);
+ if (msg_st == NULL) {
+ BOMP_ERROR("allocation of message state failed: %" PRIu32 "\n", tid);
+ return;
+ }
+
+ msg_st->send = done__tx;
+ msg_st->err = SYS_ERR_OK;
+
+ txq_send(msg_st);
+}
+
+/**
+ * \brief XOMP channel connect callback called by the Flounder backend
+ *
+ * \param st Supplied worker state
+ * \param err outcome of the connect attempt
+ * \param xb XOMP Flounder binding
+ */
+static void bomp_thread_connect_cb(void *st,
+ errval_t err,
+ struct bomp_binding *b)
+{
+ struct bomp_thread *t = st;
+
+ BOMP_DEBUG_THREAD("connected to node master.\n");
+
+ t->ctrl = b;
+
+ txq_init(&t->txq, b, b->waitset, (txq_register_fn_t) b->register_send,
+ sizeof(struct bomp_msg_st));
+
+ b->rx_vtbl.execute = execute__rx;
+}
+
+
+/**
+ * \brief
+ *
+ * \param arg
+ *
+ * \return
+ */
+static int bomp_thread_msg_handler(void *arg)
+{
+
+
+ errval_t err;
+
+ struct bomp_tls *tls = calloc(1, sizeof(struct bomp_tls));
+ if (tls == NULL) {
+ BOMP_ERROR("Could not allocate memory for TLS. %p\n", arg);
+ return -1;
+ }
+
+ BOMP_DEBUG_THREAD("thread message handler started %p\n", tls);
+
+ tls->role = BOMP_THREAD_ROLE_WORKER;
+ tls->self = thread_self();
+ tls->r.thread.coreid = disp_get_core_id();
+ tls->r.thread.msgbuf = arg;
+ tls->r.thread.tls = tls;
+
+ struct waitset local_waitset;
+ //struct waitset *ws = get_default_waitset();
+ struct waitset *ws = &local_waitset;
+
+ waitset_init(ws);
+
+
+ struct bomp_frameinfo fi = {
+ .sendbase = (lpaddr_t)arg,
+ .inbuf = ((uint8_t *) arg) + BOMP_CHANNEL_SIZE,
+ .inbufsize = BOMP_CHANNEL_SIZE,
+ .outbuf = ((uint8_t *) arg),
+ .outbufsize = BOMP_CHANNEL_SIZE
+ };
+
+
+
+ err = bomp_connect(&fi, bomp_thread_connect_cb, &tls->r.thread, ws,
+ IDC_EXPORT_FLAGS_DEFAULT);
+
+
+ if (err_is_fail(err)) {
+ /* TODO: Clean up */
+ return err_push(err, XOMP_ERR_WORKER_INIT_FAILED);
+ }
+
+ thread_set_tls(tls);
+
+
+ while(1) {
+ err = event_dispatch_non_block(ws);
+ switch(err_no(err)) {
+ case LIB_ERR_NO_EVENT :
+ thread_yield();
+ continue;
+ break;
+ case SYS_ERR_OK:
+ continue;
+ break;
+ default:
+ USER_PANIC_ERR(err, "event dispatch");
+ break;
+ }
+ }
+
+ BOMP_NOTICE("thread %" PRIuCOREID " terminated", disp_get_core_id());
+
+
+ return 0;
+}
--- /dev/null
+/*
+ * Copyright (c) 2014 ETH Zurich.
+ * All rights reserved.
+ *
+ * This file is distributed under the terms in the attached LICENSE file.
+ * If you do not find this file, copies can be found by writing to:
+ * ETH Zurich D-INFK, Universitaetsstrasse 6, CH-8092 Zurich. Attn: Systems Group.
+ */
+#include <bomp_internal.h>
+
+/*
+ * These functions implement the OpenMP CRITICAL construct
+ *
+ * With a specified name, use omp set lock and omp unset lock with name being
+ * transformed into a variable declared like
+ *
+ * omp_lock_t gomp_critical_user_<name> __attribute__((common))
+ * Ideally the ABI would specify that all zero is a valid unlocked state, and
+ * so we wouldn’t need to initialize this at startup.
+ */
+
+void GOMP_critical_start(void)
+{
+ assert(!"NYI");
+// assert(g_bomp_state);
+// bomp_mutex_lock(&g_bomp_state->critical_lock);
+}
+
+void GOMP_critical_end(void)
+{
+ assert(!"NYI");
+// assert(g_bomp_state);
+// bomp_mutex_unlock(&g_bomp_state->critical_lock);
+}
+
+void GOMP_critical_name_start(void **pptr)
+{
+ assert(!"NYI");
+}
+
+void GOMP_critical_name_end(void **pptr)
+{
+ assert(!"NYI");
+}
--- /dev/null
+/*
+ * Copyright (c) 2014 ETH Zurich.
+ * All rights reserved.
+ *
+ * This file is distributed under the terms in the attached LICENSE file.
+ * If you do not find this file, copies can be found by writing to:
+ * ETH Zurich D-INFK, Universitaetsstrasse 6, CH-8092 Zurich. Attn: Systems Group.
+ */
+#include <bomp_internal.h>
+
+struct omp_icv_device g_omp_icv_device_default;
+struct omp_icv_global g_omp_icv_global_default;
+struct omp_icv_task g_omp_icv_task_default;
+
+void bomp_icv_init_default(coreid_t nthreads)
+{
+ /* Global Control Variables */
+ struct omp_icv_global *icv_glob = &g_omp_icv_global_default;
+ icv_glob->thread_limit = nthreads;
+ icv_glob->time_start = rdtsc();
+
+ /* Device Control Variables */
+ struct omp_icv_device *icv_dev = &g_omp_icv_device_default;
+ icv_dev->dev_sched = 0;
+ icv_dev->stack_size = OMP_STACKSIZE;
+ icv_dev->wait_policy = OMP_WAIT_POLICY;
+ icv_dev->max_active_levels = OMP_MAX_ACTIVE_LEVELS;
+#if OMP_VERSION >= OMP_VERSION_40
+ icv_dev->cancel = OMP_CANCELLATION;
+#endif
+
+ struct omp_icv_task *icv_task = &g_omp_icv_task_default;
+ /* Task Control Variables */
+ icv_task->dynamic = OMP_DYNAMIC;
+ icv_task->nested = OMP_NESTED;
+ icv_task->nthreads = nthreads;
+ icv_task->thread_limit = nthreads;
+ icv_task->place_partition = OMP_PLACES;
+ icv_task->active_levels = 0;
+ icv_task->levels=0;
+ icv_task->run_sched = OMP_SCHEDULE;
+ icv_task->run_sched_modifier = 0;
+#if OMP_VERSION >= OMP_VERSION_40
+ icv_task->bind = OMP_PROC_BIND;
+ icv_task->default_device = OMP_DEFAULT_DEVICE;
+#endif
+}
+
+/**
+ * \brief allocated an initializes a new task ICV set
+ *
+ * \returns pointer to the ICV task struct
+ *
+ * The struct is initialized based on the parent
+ */
+struct omp_icv_task *bomp_icv_task_new(void)
+{
+ struct omp_icv_task *icv = calloc(1, sizeof(*icv));
+ if (icv == NULL) {
+ return icv;
+ }
+
+ memcpy(icv, &g_omp_icv_task_default, sizeof(*icv));
+
+ return icv;
+}
--- /dev/null
+/*
+ * Copyright (c) 2014 ETH Zurich.
+ * All rights reserved.
+ *
+ * This file is distributed under the terms in the attached LICENSE file.
+ * If you do not find this file, copies can be found by writing to:
+ * ETH Zurich D-INFK, Universitaetsstrasse 64, CH-8092 Zurich. Attn: Systems Group.
+ */
+
+#ifndef __LIBBOMP_DEBUG_H
+#define __LIBBOMP_DEBUG_H
+
+#define BOMP_DEBUG_ENABLE 1
+#define BOMP_DEBUG_INIT_ENABLE 1
+#define BOMP_DEBUG_EXEC_ENABLE 1
+#define BOMP_DEBUG_CTRL_ENABLE 1
+#define BOMP_DEBUG_THREAD_ENABLE 1
+#define BOMP_DEBUG_NODE_ENABLE 1
+
+#define BOMP_ERROR(x, ...) debug_printf("[libbomp] ERROR: " x, __VA_ARGS__)
+#define BOMP_WARNING(x, ...)
+#define BOMP_NOTICE(x, ...) debug_printf("[libbomp] NOTICE: " x, __VA_ARGS__)
+
+#if BOMP_DEBUG_ENABLE
+#define BOMP_DEBUG_PRINT(x...) debug_printf("[libbomp] " x);
+#else
+#define BOMP_DEBUG_PRINT(x...)
+#endif
+
+#if BOMP_DEBUG_INIT_ENABLE
+#define BOMP_DEBUG_INIT(x...) BOMP_DEBUG_PRINT("[init ] " x)
+#else
+#define BOMP_DEBUG_INIT(x...)
+#endif
+
+#if BOMP_DEBUG_EXEC_ENABLE
+#define BOMP_DEBUG_EXEC(x...) BOMP_DEBUG_PRINT("[exec ] " x)
+#else
+#define BOMP_DEBUG_EXEC(x...)
+#endif
+
+#if BOMP_DEBUG_CTRL_ENABLE
+#define BOMP_DEBUG_CTRL(x...) BOMP_DEBUG_PRINT("[ctrl ] " x)
+#else
+#define BOMP_DEBUG_CTRL(x...)
+#endif
+
+#if BOMP_DEBUG_THREAD_ENABLE
+#define BOMP_DEBUG_THREAD(x...) BOMP_DEBUG_PRINT("[thread] " x)
+#else
+#define BOMP_DEBUG_THREAD(x...)
+#endif
+
+#if BOMP_DEBUG_NODE_ENABLE
+#define BOMP_DEBUG_NODE(x...) BOMP_DEBUG_PRINT("[node ] " x)
+#else
+#define BOMP_DEBUG_NODE(x...)
+#endif
+
+
+#endif /* __LIBBOMP_DEBUG_H */
--- /dev/null
+/*
+ * Copyright (c) 2014 ETH Zurich.
+ * All rights reserved.
+ *
+ * This file is distributed under the terms in the attached LICENSE file.
+ * If you do not find this file, copies can be found by writing to:
+ * ETH Zurich D-INFK, Universitaetsstrasse 64, CH-8092 Zurich. Attn: Systems Group.
+ */
+
+#ifndef __LIBBOMP_INTERNAL_H
+#define __LIBBOMP_INTERNAL_H
+
+#include <barrelfish/barrelfish.h>
+#include <flounder/flounder_txqueue.h>
+
+#include <numa.h>
+#include <omp.h>
+
+#include <omp_abi.h>
+#include <omp_icv.h>
+#include <omp_environment.h>
+
+#include <bomp_debug.h>
+
+
+#include <if/bomp_defs.h>
+
+
+///< size of the control channel in bytes
+#define BOMP_CHANNEL_SIZE 2048
+
+///< bomp thread id
+typedef uint32_t bomp_tid_t;
+
+/// the maximum size of a execution node
+#define BOMP_NODE_SIZE_MAX -1
+
+
+///< BOMP node types
+typedef enum bomp_node_type {
+ BOMP_NODE_INVALID, ///< the node has an invalid type
+ BOMP_NODE_MASTER, ///< this is the program master node
+ BOMP_NODE_LOCAL, ///< the node is a address space local node
+ BOMP_NODE_REMOTE ///< the node is on a different address space
+} bomp_node_type_t;
+
+typedef enum bomp_thread_role {
+ BOMP_THREAD_ROLE_INVALID, ///< Invalid thread type
+ BOMP_THREAD_ROLE_WORKER, ///< Normal worker thread
+ BOMP_THREAD_ROLE_NODE, ///< Node coordinator thread
+ BOMP_THREAD_ROLE_MASTER ///< Program master thread (initial thread)
+} bomp_thread_role_t;
+
+///< type of the execute function of BOMP
+typedef void (*bomp_thread_fn_t)(void *);
+
+/**
+ * state of a normal BOMP thread
+ */
+struct bomp_thread {
+ coreid_t id; ///< id of the thread
+ coreid_t coreid; ///< id of the core the thread runs on
+ struct omp_icv_task *icvt; ///<
+ struct bomp_tls *tls;
+ size_t stack_size; ///< size of the stack
+ void *msgbuf; ///< message buffer for this frame
+ struct capref msgframe; ///< backing frame for the message buffer
+ struct bomp_node *node; ///< the node this threads belongs to
+ errval_t thread_err; ///< stores the error in case of failure
+ struct bomp_binding *ctrl; ///< control channel
+ struct tx_queue txq; ///< Flounder TX queue
+};
+
+/**
+ * state of a BOMP node coordinator
+ */
+struct bomp_node {
+ nodeid_t id; ///< the id of the execution node
+ nodeid_t numa_node; ///< numa node id
+ bomp_node_type_t type; ///< type of this node
+ struct bomp_tls *tls; ///< pointer to the thread local storage
+ coreid_t threads_max; ///< the number of threads of this node
+ coreid_t threads_active; ///< the number of active threads on this node
+ size_t stack_size; ///< size of the stack
+ struct bomp_thread *threads; ///< pointer to the local threads array
+ struct bomp_binding *ctrl; ///< control channel
+ void *msgbuf; ///< message buffer for this frame
+ struct capref msgframe; ///< backing frame for the message buffer
+ errval_t node_err; ///< error of the code
+ struct tx_queue txq; ///< Flounder TX queue
+};
+
+
+/**
+ * \brief stores the state of the BOMP master thread
+ *
+ * The BOMP master thread is also a node coordinator of its own node.
+ */
+struct bomp_master {
+ nodeid_t num_nodes; ///< the number of nodes in the system
+ struct bomp_node *nodes; ///< array of nodes to other
+ coreid_t threads_max; ///< the maximum number of threads in the system
+ coreid_t nodes_active; ///< the number of active threads in the system
+ struct bomp_node local; ///< the local node
+};
+
+struct bomp_binding *ctrl; ///< control channel
+ struct tx_queue txq; ///< Flounder TX queue
+
+
+struct bomp_work {
+ coreid_t thread_id;
+};
+
+struct bomp_tls {
+ struct thread *self; ///< pointer ot the struct thread
+ struct omp_icv icv; ///< pointer holding the environment variables
+ coreid_t thread_id;
+ bomp_thread_role_t role; ///< identifies the role of the thread
+ union {
+ struct bomp_master master;
+ struct bomp_node node;
+ struct bomp_thread thread;
+ } r;
+
+};
+
+
+errval_t bomp_node_init(bomp_node_type_t type, nodeid_t numanode, nodeid_t nodeid, coreid_t nthreads,
+ size_t stack_size, struct bomp_node *node);
+coreid_t bomp_node_exec(struct bomp_node *node, void *fn, void *arg, coreid_t tid_start, coreid_t nthreads);
+
+errval_t bomp_thread_init(coreid_t core, size_t stack_size, struct bomp_thread *thread);
+
+errval_t bomp_thread_exec(struct bomp_thread *thread,
+ bomp_thread_fn_t fn, void *arg, uint32_t tid);
+
+
+void bomp_start_processing(void (*fn)(void *),
+ void *data,
+ coreid_t tid_start,
+ coreid_t nthreads);
+void bomp_end_processing(void);
+
+/**
+ * \brief obtaining a pointer to the control variables
+ *
+ * \return pointe to the ICV struct
+ */
+static inline struct omp_icv *bomp_icv_get(void)
+{
+ struct bomp_tls *tls = thread_get_tls();
+ return &tls->icv;
+}
+
+static inline void bomp_icv_set_task(struct omp_icv_task *task)
+{
+ struct bomp_tls *tls = thread_get_tls();
+ tls->icv.task = task;
+}
+
+static inline struct omp_icv_task *bomp_icv_get_task(void)
+{
+ struct bomp_tls *tls = thread_get_tls();
+ return tls->icv.task;
+}
+
+
+
+#if 0
+/**
+ * \brief this struct stores thread local data such as the team / task
+ * of this thread
+ */
+struct bomp_thread
+{
+ bomp_thread_fn_t fn;
+ void *arg;
+
+ struct bomp_task *task;
+
+};
+
+
+struct bomp_work {
+ void (*fn)(void *);
+ void *data;
+ unsigned thread_id;
+ unsigned num_threads;
+ unsigned num_vtreads;
+ struct bomp_barrier *barrier;
+};
+
+
+struct bomp_thread_local_data {
+ void *thr; // thread reference
+ struct bomp_work *work;
+ struct bomp_icv_data *icv;
+};
+#endif
+
+
+
+
+#endif/* _LIBBOMP_H */
--- /dev/null
+/*
+ * Copyright (c) 2014 ETH Zurich.
+ * All rights reserved.
+ *
+ * This file is distributed under the terms in the attached LICENSE file.
+ * If you do not find this file, copies can be found by writing to:
+ * ETH Zurich D-INFK, Universitaetsstrasse 64, CH-8092 Zurich. Attn: Systems Group.
+ */
+
+#ifndef __GOMP_ABI_H
+#define __GOMP_ABI_H
+
+/* This header specifies the function signatures of the GOMP library as described
+ * in the GCC runtime library
+ *
+ * These functions have to be implemented in order to give full OpenMP support
+ */
+
+/* atomic.c */
+void GOMP_atomic_start(void);
+void GOMP_atomic_end(void);
+
+/* barrier.c */
+void GOMP_barrier(void);
+bool GOMP_barrier_cancel (void);
+
+/* critical.c */
+void GOMP_critical_start(void);
+void GOMP_critical_end(void);
+void GOMP_critical_name_start(void **pptr);
+void GOMP_critical_name_end(void **pptr);
+
+/* loop.c */
+bool GOMP_loop_static_start (long, long, long, long, long *, long *);
+bool GOMP_loop_dynamic_start (long, long, long, long, long *, long *);
+bool GOMP_loop_guided_start (long, long, long, long, long *, long *);
+bool GOMP_loop_runtime_start (long, long, long, long *, long *);
+
+bool GOMP_loop_ordered_static_start (long, long, long, long,
+ long *, long *);
+bool GOMP_loop_ordered_dynamic_start (long, long, long, long,
+ long *, long *);
+bool GOMP_loop_ordered_guided_start (long, long, long, long,
+ long *, long *);
+bool GOMP_loop_ordered_runtime_start (long, long, long, long *, long *);
+
+bool GOMP_loop_static_next (long *, long *);
+bool GOMP_loop_dynamic_next (long *, long *);
+bool GOMP_loop_guided_next (long *, long *);
+bool GOMP_loop_runtime_next (long *, long *);
+
+bool GOMP_loop_ordered_static_next (long *, long *);
+bool GOMP_loop_ordered_dynamic_next (long *, long *);
+bool GOMP_loop_ordered_guided_next (long *, long *);
+bool GOMP_loop_ordered_runtime_next (long *, long *);
+
+void GOMP_parallel_loop_static_start (void (*)(void *), void *,
+ unsigned, long, long, long, long);
+void GOMP_parallel_loop_dynamic_start (void (*)(void *), void *,
+ unsigned, long, long, long, long);
+void GOMP_parallel_loop_guided_start (void (*)(void *), void *,
+ unsigned, long, long, long, long);
+void GOMP_parallel_loop_runtime_start (void (*)(void *), void *,
+ unsigned, long, long, long);
+void GOMP_parallel_loop_static (void (*)(void *), void *,
+ unsigned, long, long, long, long,
+ unsigned);
+void GOMP_parallel_loop_dynamic (void (*)(void *), void *,
+ unsigned, long, long, long, long,
+ unsigned);
+void GOMP_parallel_loop_guided (void (*)(void *), void *,
+ unsigned, long, long, long, long,
+ unsigned);
+void GOMP_parallel_loop_runtime (void (*)(void *), void *,
+ unsigned, long, long, long,
+ unsigned);
+
+void GOMP_loop_end (void);
+void GOMP_loop_end_nowait (void);
+bool GOMP_loop_end_cancel (void);
+
+/* ordered.c */
+void GOMP_ordered_start(void);
+void GOMP_ordered_end(void);
+
+/* parallel.c */
+void GOMP_parallel(void (*fn)(void *), void *data, unsigned num_threads, unsigned int flags);
+void GOMP_parallel_start(void (*) (void *), void *, unsigned);
+void GOMP_parallel_end(void);
+bool GOMP_cancel(int which, bool do_cancel);
+bool GOMP_cancellation_point(int which);
+
+/* sections.c */
+unsigned GOMP_sections_start(unsigned count);
+unsigned GOMP_sections_next(void);
+void GOMP_parallel_sections_start(void (*fn)(void *), void *data,
+ unsigned num_threads, unsigned count);
+void GOMP_parallel_sections(void (*fn)(void *), void *data, unsigned num_threads,
+ unsigned count, unsigned flags);
+void GOMP_sections_end(void);
+bool GOMP_sections_end_cancel(void);
+void GOMP_sections_end_nowait(void);
+
+/* single.c */
+bool GOMP_single_start(void);
+void *GOMP_single_copy_start (void);
+void GOMP_single_copy_end (void *data);
+
+/* target.c */
+void GOMP_target (int, void (*) (void *), const void *,
+ size_t, void **, size_t *, unsigned char *);
+void GOMP_target_data (int, const void *,
+ size_t, void **, size_t *, unsigned char *);
+void GOMP_target_end_data (void);
+void GOMP_target_update (int, const void *,
+ size_t, void **, size_t *, unsigned char *);
+void GOMP_teams (unsigned int, unsigned int);
+
+#endif /* __GOMP_ABI_H */
--- /dev/null
+/*
+ * Copyright (c) 2014 ETH Zurich.
+ * All rights reserved.
+ *
+ * This file is distributed under the terms in the attached LICENSE file.
+ * If you do not find this file, copies can be found by writing to:
+ * ETH Zurich D-INFK, Universitaetsstrasse 6, CH-8092 Zurich. Attn: Systems Group.
+ */
+
+#ifndef _LIBBOMP_ENVIRONEMNT_H
+#define _LIBBOMP_ENVIRONEMNT_H
+
+/*
+ * ---------------------------------------------------------------------------
+ * 4.0 Environment Variables
+ * ---------------------------------------------------------------------------
+ *
+ * Environment variable names are upper case, and the values assigned to them
+ * are case insensitive and may have leading and trailing white space.
+ *
+ * These are the values which are set as the default when initializing the
+ * library
+ */
+
+/**
+ * Sets the cancel-var ICV. policy may be true (non-zero) or false (zero). If true,
+ * the effects of the cancel construct and of cancellation points are enabled and
+ * cancellation is activated
+ */
+#define OMP_CANCELLATION 0
+
+/**
+ * Sets the default-device-var ICV that controls the default device number to
+ * use in device constructs.
+ */
+#define OMP_DEFAULT_DEVICE 0
+
+/**
+ * If var is TRUE, instructs the runtime to display the OpenMP version number
+ * and the value of the ICVs associated with the environment variables as name=value
+ * pairs. If var is VERBOSE, the runtime may also display vendor-specific variables.
+ * If var is FALSE, no information is displayed.
+ */
+#define OMP_DISPLAY_ENV 0
+
+/**
+ * Sets the dyn-var ICV. If true, the implementation may dynamically adjust the
+ * number of threads to use for executing parallel regions.
+ */
+#define OMP_DYNAMIC 0
+
+/**
+ * Sets the max-active-levels-var ICV that controls the maximum number of nested
+ * active parallel regions.
+ */
+#define OMP_MAX_ACTIVE_LEVELS 0
+
+/**
+ * Sets the nest-var ICV to enable or to disable nested parallelism. Valid values
+ * for nested are true or false.
+ */
+#define OMP_NESTED 0
+
+/**
+ * Sets the nthreads-var ICV for the number of threads to use for parallel regions.
+ */
+#define OMP_NUM_THREADS 40
+
+/**
+ * Sets the place-partition-var ICV that defines the OpenMP places available to
+ * the execution environment. places is an abstract name (threads, cores, sockets,
+ * or implementation-defined), or a list of non-negative numbers.
+ */
+#define OMP_PLACES 0
+
+/**
+ * Sets the value of the global bind-var ICV, which sets the thread affinity
+ * policy to be used for parallel regions at the corresponding nested level.
+ * policy can be the values true, false, or a comma-separated list of master,
+ * close, or spread in quotes.
+ */
+#define OMP_PROC_BIND 0
+
+/**
+ * Sets the run-sched-var ICV for the runtime schedule type and chunk size.
+ * Valid OpenMP schedule types are static, dynamic, guided, or auto.
+ */
+#define OMP_SCHEDULE OMP_SCHED_STATIC
+
+/**
+ * Sets the stacksize-var ICV that specifies the size of the stack for threads
+ * created by the OpenMP implementation. size is a positive integer that specifies
+ * stack size. If unit is not specified, size is measured in kilobytes (K).
+ */
+#define OMP_STACKSIZE (64*1024)
+
+/**
+ * Sets the thread-limit-var ICV that controls the number of threads participating
+ * in the OpenMP program.
+ */
+#define OMP_THREAD_LIMIT 40
+
+/**
+ * Sets the wait-policy-var ICV that provides a hint to an OpenMP implementation
+ * about the desired behavior of waiting threads. Valid values for policy are
+ * ACTIVE (waiting threads consume processor cycles while waiting) and PASSIVE.
+ */
+#define OMP_WAIT_POLICY 0
+
+
+#endif /* _LIBBOMP_ENVIRONEMNT_H */
--- /dev/null
+/*
+ * Copyright (c) 2014 ETH Zurich.
+ * All rights reserved.
+ *
+ * This file is distributed under the terms in the attached LICENSE file.
+ * If you do not find this file, copies can be found by writing to:
+ * ETH Zurich D-INFK, Universitaetsstrasse 6, CH-8092 Zurich. Attn: Systems Group.
+ */
+
+#ifndef _LIBBOMP_ICV_H
+#define _LIBBOMP_ICV_H
+
+/*
+ * ---------------------------------------------------------------------------
+ * 2.3 Internal Control Variables
+ * ---------------------------------------------------------------------------
+ *
+ * An OpenMP implementation must act as if there are internal control variables
+ * (ICVs) that control the behavior of an OpenMP program.
+ * They are initialized by the implementation itself and may be given values
+ * through OpenMP environment variables and through calls to OpenMP API routines.
+ */
+
+
+/**
+ * \brief
+ */
+struct omp_icv_task
+{
+ /**
+ * controls whether dynamic adjustment of the number of threads is enabled
+ * for encountered parallel regions.
+ *
+ * Scope: Data Environment
+ * Initialization: OMP_DYNAMIC
+ * Accessed: omp_set_dynamic(), omp_get_dyamic()
+ */
+ uint8_t dynamic;
+
+ /**
+ * controls whether nested parallelism is enabled for encountered parallel
+ * regions
+ *
+ * Scope: Data Environment
+ * Initialization: OMP_NESTED, set to false
+ * Accessed: omp_set_nested(), omp_get_nested()
+ */
+ uint8_t nested;
+
+ /**
+ * controls the number of threads requested for encountered parallel regions
+ *
+ * Scope: Data Environment
+ * Initialization: OMP_NUM_THREADS
+ * Accessed: omp_set_num_threads(), omp_get_num_threads()
+ */
+ uint32_t nthreads;
+
+ /**
+ * controls the maximum number of threads participating in the contention group
+ *
+ * Scope: Data Environment
+ * Initialization: OMP_THREAD_LIMIT
+ * Accessed: thread_limit (clause), omp_get_thread_limit()
+ */
+ uint32_t thread_limit;
+
+ /**
+ * controls the place partition available to the execution environment for
+ * encountered parallel regions.
+ *
+ * Scope: Data Environment
+ * Initialization: OMP_PLACES
+ */
+ uint32_t place_partition;
+
+ /**
+ * the number of nested, active parallel regions enclosing the current ask
+ * such that all of the parallel regions are enclosed by the outermost initial
+ * task region on the current device.
+ *
+ * Scope: Data Environment
+ * Initialization: set to zero
+ * Accessed: omp_get_active_levels()
+ */
+ uint8_t active_levels;
+
+ /**
+ * the number of nested parallel regions enclosing the current task such that
+ * all of the parallel regions are enclosed by the outermost initial task region
+ * on the current device.
+ *
+ * Scope: Data Environment
+ * Initialization: set to zero
+ * Accessed: omp_get_level()
+ */
+ uint8_t levels;
+
+#if OMP_VERSION >= OMP_VERSION_40
+ /**
+ * Controls the binding of OpenMP threads to places. When binding is requested,
+ * the variable indicates that the execution environment is advised not to
+ * move threads between places. The variable can also provide default thread
+ * affinity policies.
+ *
+ * Scope: Data Environment
+ * Initialization: OMP_PROC_BIND
+ * Accessed: omp_get_proc_bind()
+ */
+ uint8_t bind;
+
+ /**
+ * controls the default target device.
+ *
+ * Scope: Data Environment
+ * Initialization: OMP_DEFAULT_DEVICE
+ * Accessed: omp_set_default_device(), omp_get_default_device()
+ */
+ uint8_t default_device;
+#endif
+
+ /**
+ * controls the schedule that the runtime schedule clause uses for loop regions
+ *
+ * Scope: Data Environment
+ * Initialization: OMP_SCHEDULE
+ * Accessed: omp_set_schedule(), omp_get_schedule()
+ */
+ omp_sched_t run_sched;
+ int run_sched_modifier;
+};
+
+/**
+ *
+ */
+struct omp_icv_device
+{
+ /**
+ * controls the implementation defined default scheduling of loop loop regions
+ *
+ * Scope: Device
+ * Initialization: (none)
+ */
+ uint8_t dev_sched;
+
+ /**
+ * controls the stack size for threads that the OpenMP implementation creates
+ *
+ * Scope: Device
+ * Initialization: OMP_STACKSIZE
+ */
+ uint32_t stack_size;
+
+ /**
+ * controls the desired behavior of waiting threads.
+ *
+ * Scope: Device
+ * Initialization: OMP_WAIT_POLICY
+ */
+ uint8_t wait_policy;
+
+#if OMP_VERSION >= OMP_VERSION_40
+ /**
+ * controls the desired behavior of the cancel construct and cancellation points
+ *
+ * Scope: Device
+ * Initialization: OMP_CANCELLATION
+ * Accessed: omp_get_cancellation()
+ */
+ uint8_t cancel;
+#endif
+
+ /**
+ * The number of nested, active parallel regions enclosing the current ask
+ * such that all of the parallel regions are enclosed by the outermost initial
+ * task region on the current device.
+ *
+ * Scope: Device
+ * Initialization: OMP_MAX_ACTIVE_LEVELS
+ * Accessed: omp_set_max_active_levels(), omp_get_max_active_levels()
+ */
+ uint8_t max_active_levels;
+};
+
+/**
+ *
+ */
+struct omp_icv_global {
+
+ /**
+ * the global thread limit of the OpenMP program
+ *
+ * This variable is purely BOMP related
+ */
+ coreid_t thread_limit;
+
+ /**
+ * the start time of the program
+ */
+ cycles_t time_start;
+};
+
+
+/**
+ * \brief represents the OpenMP environment for each thread
+ */
+struct omp_icv {
+ struct omp_icv_global *global; ///< global control variables
+ struct omp_icv_device *device; ///< device specific control variables
+ struct omp_icv_task *task; ///< task specific control variables
+};
+
+/* global variables for default ICV values */
+extern struct omp_icv_device g_omp_icv_device_default;
+extern struct omp_icv_global g_omp_icv_global_default;
+extern struct omp_icv_task g_omp_icv_task_default;
+
+/* initialization of the default ICV */
+void bomp_icv_init_default(coreid_t nthreads);
+struct omp_icv_task *bomp_icv_task_new(void);
+
+
+
+/* macros for accessing the control variables */
+
+#define OMP_SET_ICV_GLOBAL(_var, _val) \
+ ((bomp_icv_get()->global)->_var=(_val)
+#define OMP_GET_ICV_GLOBAL(_var) \
+ ((bomp_icv_get()->global)->_var)
+
+#define OMP_SET_ICV_DEV(_var, _val) \
+ (bomp_icv_get()->device)->_var=(_val)
+#define OMP_GET_ICV_DEV(_var) \
+ ((bomp_icv_get()->device)->_var)
+
+#define OMP_SET_ICV_TASK(_var, _val) \
+ (bomp_icv_get()->task)->_var=(_val)
+#define OMP_GET_ICV_TASK(_var) \
+ ((bomp_icv_get()->task)->_var)
+
+
+#endif /* _LIBBOMP_ENVIRONEMNT_H */
--- /dev/null
+/*
+ * Copyright (c) 2014 ETH Zurich.
+ * All rights reserved.
+ *
+ * This file is distributed under the terms in the attached LICENSE file.
+ * If you do not find this file, copies can be found by writing to:
+ * ETH Zurich D-INFK, Universitaetsstrasse 6, CH-8092 Zurich. Attn: Systems Group.
+ */
+#include <bomp_internal.h>
+
+/*
+ * this implements the FOR constructs
+ *
+ * #pragma omp parallel for
+ * for (i = lb; i <= ub; i++)
+ * body;
+ *
+ * becomes
+ *
+ * void subfunction (void *data) {
+ * long _s0, _e0;
+ * while (GOMP_loop_static_next (&_s0, &_e0)) {
+ * long _e1 = _e0, i;
+ * for (i = _s0; i < _e1; i++)
+ * body;
+ * }
+ * GOMP_loop_end_nowait ();
+ * }
+ * GOMP_parallel_loop_static (subfunction, NULL, 0, lb, ub+1, 1, 0);
+ * subfunction (NULL);
+ * GOMP_parallel_end ();
+ */
+
+bool GOMP_loop_ordered_runtime_start(long start,
+ long end,
+ long incr,
+ long *istart,
+ long *iend)
+{
+ assert(!"NYI");
+ return 0;
+}
+
+bool GOMP_loop_dynamic_start(long start,
+ long end,
+ long incr,
+ long chunk_size,
+ long *istart,
+ long *iend)
+{
+ assert(!"NYI");
+ return 0;
+}
+
+bool GOMP_loop_runtime_next(long *istart,
+ long *iend)
+{
+ assert(!"NYI");
+ return 0;
+}
+
+bool GOMP_loop_ordered_runtime_next(long *istart,
+ long *iend)
+{
+ assert(!"NYI");
+ return 0;
+}
+
+bool GOMP_loop_dynamic_next(long *istart,
+ long *iend)
+{
+ assert(!"NYI");
+ return 0;
+}
+
+void GOMP_loop_end_nowait(void)
+{
+ assert(!"NYI");
+}
+
+void GOMP_loop_end(void)
+{
+ assert(!"NYI");
+}
+
--- /dev/null
+/**
+ * \file
+ * \brief OpenMP API implementation as defined in OpenMP Version 4.0
+ *
+ * Source: http://www.openmp.org/mp-documents/OpenMP4.0.0.pdf
+ */
+
+/*
+ * Copyright (c)2014 ETH Zurich.
+ * All rights reserved.
+ *
+ * This file is distributed under the terms in the attached LICENSE file.
+ * If you do not find this file, copies can be found by writing to:
+ * ETH Zurich D-INFK, Universitaetsstrasse 6, CH-8092 Zurich. Attn: Systems Group.
+ */
+
+#include <bomp_internal.h>
+
+
+/*
+ * ===========================================================================
+ * OpenMP 4.0 API
+ * ===========================================================================
+ */
+
+/*
+ * ---------------------------------------------------------------------------
+ * 3.2 Execution Environment Routines
+ * ---------------------------------------------------------------------------
+ *
+ * Execution environment routines affect and monitor threads, processors, and
+ * the parallel environment. The library routines are external functions with
+ * “C” linkage.
+ */
+
+/**
+ * \brief Sets the number of threads to be used for parallel regions
+ *
+ * \param num_threads the number of threads
+ *
+ * Affects the number of threads used for subsequent parallel regions not
+ * specifying a num_threads clause, by setting the value of the first element of
+ * the nthreads-var ICV of the current task to num_threads.
+ */
+void omp_set_num_threads(int num_threads)
+{
+ if (num_threads > 0) {
+ if (num_threads > OMP_GET_ICV_GLOBAL(thread_limit)) {
+ num_threads = OMP_GET_ICV_GLOBAL(thread_limit);
+ }
+
+ OMP_SET_ICV_TASK(nthreads, num_threads);
+ }
+}
+
+/**
+ * \brief returns the current number of threads used (innermost parallel region)
+ *
+ * \returns number of used threads
+ *
+ * Returns the number of threads in the current team. The binding region for an
+ * omp_get_num_threads region is the innermost enclosing parallel region.
+ * If called from the sequential part of a program, this routine returns 1.
+ */
+int omp_get_num_threads(void)
+{
+ /*
+ struct gomp_team *team = gomp_thread ()->ts.team;
+ return team ? team->nthreads : 1;
+
+ XXX: we dont't have teams yet so we just return the number of threads
+ participating in working in the task
+ */
+
+ if (bomp_icv_get()->task) {
+ if (OMP_GET_ICV_TASK(active_levels) > 1) {
+ return 1; /// if we are nested return 1
+ }
+ return OMP_GET_ICV_TASK(nthreads);
+ }
+ return 1;
+}
+
+/**
+ * \brief the maximum number of threads that can be used for a new parallel task
+ *
+ * \returns number of usable threads
+ *
+ * Returns an upper bound on the number of threads that could be used to form a
+ * new team if a parallel construct without a num_threads clause were encountered
+ * after execution returns from this routine.
+ *
+ * The value returned by omp_get_max_threads is the value of the first element of
+ * the nthreads-var ICV of the current task. This value is also an upper bound on
+ * the number of threads that could be used to form a new team if a parallel
+ * region without a num_threads clause were encountered after execution returns
+ * from this routine.
+ */
+int omp_get_max_threads(void)
+{
+ if (bomp_icv_get()->task) {
+ return OMP_GET_ICV_TASK(thread_limit);
+ }
+ return OMP_GET_ICV_GLOBAL(thread_limit);
+}
+
+/**
+ * \brief Returns the thread number of the calling thread within the current team.
+ *
+ * \returns ThreadID
+ */
+int omp_get_thread_num(void)
+{
+ if (bomp_icv_get()->task) {
+ return ((struct bomp_tls *)thread_get_tls())->thread_id;
+ }
+ return 0;
+}
+
+/**
+ * \brief returns the number of available processors
+ *
+ * \returns available processor count
+ *
+ * Returns the number of processors that are available to the device at the time
+ * the routine is called.
+ */
+int omp_get_num_procs(void)
+{
+ return numa_num_configured_cpus();
+}
+
+/**
+ * \brief checks if we are currently in a parallel region
+ *
+ * \returns TRUE active threads is greater than 1
+ * FALSE active threads is 1 (main thread)
+ *
+ * Returns true if the active-levels-var ICV is greater than zero; otherwise it
+ * returns false. The effect of the omp_in_parallel routine is to return true if
+ * the current task is enclosed by an active parallel region, and the parallel
+ * region is enclosed by the outermost initial task region on the device;
+ * otherwise it returns false.
+ */
+int omp_in_parallel(void)
+{
+ if (bomp_icv_get()->task) {
+ return (OMP_GET_ICV_TASK(active_levels) > 0);
+ } else {
+ return 0;
+ }
+}
+
+/**
+ * \brief enables / disables the dynamic behavior
+ *
+ * \param dynamic_threads zero to disable dynamic behavior
+ * non-zero to enable dynamic behavior
+ *
+ * Returns the value of the dyn-var ICV, which indicates if dynamic adjustment
+ * of the number of threads is enabled or disabled.
+ */
+void omp_set_dynamic(int dynamic_threads)
+{
+#if OMP_SUPPORT_DYNAMIC
+ OMP_SET_ICV_TASK(dynamic, (!!dynamic_threads));
+#endif
+}
+
+/**
+ * \brief checks if the dynamic behavior is enabled for the current task
+ *
+ * \returns TRUE if dynamic behavior enabled
+ * FALSE if disabled
+ *
+ * This routine returns the value of the dyn-var ICV, which is true if dynamic
+ * adjustment of the number of threads is enabled for the current task.
+ */
+int omp_get_dynamic(void)
+{
+#if OMP_SUPPORT_DYNAMIC
+ return OMP_GET_ICV_TASK(dynamic);
+#else
+ return 0;
+#endif
+}
+
+/**
+ * \brief Enables or disables nested parallelism, by setting the nest-var ICV.
+ *
+ * \param nested TRUE: enable nested behavior
+ * FALSE: disable nested behavior
+ */
+void omp_set_nested(int nested)
+{
+#if OMP_SUPPORT_NESTED
+ OMP_SET_ICV_TASK(nested, !!nested);
+#endif
+
+}
+
+/**
+ * \brief checks if the nested behavior is enabled
+ *
+ * \returns TRUE if nested behavior is enabled
+ * FALSE if disabled
+ *
+ * Returns the value of the nest-var ICV, which indicates if nested parallelism
+ * is enabled or disabled.
+ */
+int omp_get_nested(void)
+{
+#if OMP_SUPPORT_NESTED
+ return OMP_GET_ICV_TASK(nested);
+#else
+ return 0;
+#endif
+}
+
+/**
+ * \brief sets the schedule to be used
+ *
+ * \param kind which schedule to be used (one of OMP_SCHED_*)
+ * \param modifier modifier to tweak the scheduler (depends on kind)
+ *
+ * The omp_set_schedule routine affects the schedule that is applied when runtime
+ * is used as schedule kind, by setting the value of the run-sched-var ICV.
+ */
+void omp_set_schedule(omp_sched_t kind,
+ int modifier)
+{
+ OMP_SET_ICV_TASK(run_sched, kind);
+ OMP_SET_ICV_TASK(run_sched_modifier, modifier);
+}
+
+/**
+ * \brief returns the current scheduler settings
+ *
+ * \param kind returns the current scheduler setting (one of OMP_SCHED_*)
+ * \param modifier returns the modifier of the scheduler
+ *
+ * Returns the value of run-sched-var ICV, which is the schedule applied when
+ * runtime schedule is used.
+ */
+void omp_get_schedule(omp_sched_t *kind,
+ int *modifier)
+{
+ if (kind) {
+ *kind = OMP_GET_ICV_TASK(run_sched);
+ }
+ if (modifier) {
+ *modifier = OMP_GET_ICV_TASK(run_sched_modifier);
+ }
+}
+
+/**
+ * \brief obtains he maximum number of OpenMP threads available
+ *
+ * \returns number of available threads
+ *
+ * Returns the value of the thread-limit-var ICV, which is the maximum number
+ * of OpenMP threads available.
+ *
+ * The binding thread set for an omp_get_thread_limit region is all threads on the
+ * device. The effect of executing this routine is not related to any specific
+ * region corresponding to any construct or API routine.
+ */
+int omp_get_thread_limit(void)
+{
+ return OMP_GET_ICV_TASK(thread_limit);
+}
+
+/**
+ * \brief limits the nested depth
+ *
+ * \param max_active_levels maximum nested level
+ *
+ * Limits the number of nested active parallel regions, by setting
+ * max-active-levels-var ICV.
+ */
+void omp_set_max_active_levels(int max_active_levels)
+{
+ if (max_active_levels > 0) {
+ OMP_SET_ICV_DEV(max_active_levels, max_active_levels);
+ }
+}
+
+/**
+ * \brief returns the maximim nested depth
+ *
+ * \returns maximum nested level
+ *
+ * Returns the value of max-active-levels-var ICV, which determines the maximum
+ * number of nested active parallel regions.
+ */
+int omp_get_max_active_levels(void)
+{
+ return OMP_GET_ICV_DEV(max_active_levels);
+}
+
+/**
+ * \brief returns the level the task is runnig at
+ *
+ * \param number enclosing nested parallel regions
+ *
+ * For the enclosing device region, returns the levels-vars ICV, which is the
+ * number of nested parallel regions that enclose the task containing the call.
+ */
+int omp_get_level(void)
+{
+ return OMP_GET_ICV_TASK(levels);
+}
+
+/**
+ * \brief returns the ancestor thread number of a thread at a given level
+ *
+ * \param level the level of the ancestor
+ *
+ * \returns thread number of ancestor thread
+ *
+ * The omp_get_ancestor_thread_num routine returns the thread number of the
+ * ancestor at a given nest level of the current thread or the thread number of
+ * the current thread. If the requested nest level is outside the range of 0 and
+ * the nest level of the current thread, as returned by the omp_get_level routine,
+ * the routine returns -1.
+ */
+int omp_get_ancestor_thread_num(int level)
+{
+ int my_level = omp_get_level();
+ if (level > my_level || level < 0) {
+ return -1;
+ } else if (my_level == level) {
+ return omp_get_thread_num();
+ } else {
+ /* TODO */
+ assert(!"NYI");
+ return 0;
+ }
+}
+
+/**
+ * \brief returns the team size of a thread at a given level
+ *
+ * \param level the level to consider
+ *
+ * \returns number of threads in the team *
+ *
+ * The omp_get_team_size routine returns the size of the thread team to which the
+ * ancestor or the current thread belongs. If the requested nested level is outside
+ * the range of 0 and the nested level of the current thread, as returned by the
+ * omp_get_level routine, the routine returns -1. Inactive parallel regions are
+ * regarded like active parallel regions executed with one thread.
+ */
+int omp_get_team_size(int level)
+{
+ int my_level = omp_get_level();
+ if (level > my_level || level < 0) {
+ return -1;
+ } else {
+ /* TODO */
+ assert(!"NYI");
+ return 0;
+ }
+}
+
+/**
+ * \brief returns the number of active, nested parallel regions
+ *
+ * \returns number of nested parallel regions *
+ *
+ * The effect of the omp_get_active_level routine is to return the number of nested,
+ * active parallel regions enclosing the current task such that all of the parallel
+ * regions are enclosed by the outermost initial task region on the current device.
+ */
+int omp_get_active_level(void)
+{
+ return OMP_GET_ICV_TASK(active_levels);
+}
+
+/**
+ * \brief checks if thread is in the final task region
+ *
+ * \returns TRUE if thread is in the final task region
+ * FALSE otherwise
+ *
+ * Returns true if the routine is executed in a final task region; otherwise,
+ * it returns false.
+ */
+int omp_in_final(void)
+{
+ assert(!"NYI");
+ return 1; // TODO
+}
+
+#if OMP_VERSION >= OMP_VERSION_40
+
+/**
+ * \brief returns the cancellation value
+ *
+ * \returns cancellation value
+ *
+ * Returns the value of the cancel-var ICV, which controls the behavior of
+ * cancel construct and cancellation points.
+ */
+int omp_get_cancellation(void)
+{
+ return OMP_GET_ICV_DEV(cancel);
+}
+
+/**
+ * \brief returns the thread affinitiy policy
+ *
+ * \returns OpenMP thread policy value
+ *
+ * Returns the thread affinity policy to be used for the subsequent nested
+ * parallel regions that do not specify a proc_bind clause.
+ */
+omp_proc_bind_t omp_get_proc_bind(void)
+{
+ return OMP_GET_ICV_TASK(bind);
+}
+
+/**
+ * \brief controls the default target device
+ *
+ * \param device_num device number of the target device
+ *
+ * The effect of this routine is to set the value of the default-device-var ICV
+ * of the current task to the value specified in the argument. When called from
+ * within a target region the effect of this routine is unspecified.
+ */
+void omp_set_default_device(int device_num)
+{
+ OMP_SET_ICV_TASK(default_device, device_num);
+}
+
+/**
+ * \brief Returns the default target device.
+ *
+ * \returns device number of default target device
+ *
+ * The omp_get_default_device routine returns the value of the default-device-var
+ * ICV of the current task. When called from within a target region the effect of
+ * this routine is unspecified.
+ */
+int omp_get_default_device(void)
+{
+ // TODO: behavior if on target
+ return OMP_GET_ICV_TASK(default_device);
+}
+
+/**
+ * \brief Returns the number of target devices.
+ *
+ * \returns number of target devices
+ *
+ * The omp_get_num_devices routine returns the number of available target devices.
+ * When called from within a target region the effect of this routine is
+ * unspecified.
+ */
+int omp_get_num_devices(void)
+{
+ return 0; // TODO
+}
+
+/**
+ * \brief returns the number of teams in the current region
+ *
+ * \returns number of teams
+ *
+ * The effect of this routine is to return the number of teams in the current teams
+ * region. The routine returns 1 if it is called from outside of a teams region.
+ */
+int omp_get_num_teams(void)
+{
+ assert(!"NYI: Teams");
+ return 1; // TODO: team counting
+}
+
+/**
+ * \brief gets the team number of the calling thread
+ *
+ * \returns team number
+ *
+ * Returns the team number of calling thread. The team number is an integer
+ * between 0 and one less than the value returned by omp_get_num_teams, inclusive.
+ */
+int omp_get_team_num(void)
+{
+ assert(!"NYI: Teams");
+ return 0;
+}
+
+/**
+ * \brief checks if the task is executing as the host device
+ *
+ * \returns TRUE if task is host device
+ * FALSE otherwise
+ * Returns true if the current task is executing on the host device; otherwise,
+ * it returns false.
+ */
+int omp_is_initial_device(void)
+{
+ assert(!"NYI: Initial device");
+ return 1;
+}
+#endif
+
+/*
+ * ---------------------------------------------------------------------------
+ * 3.3 Lock Routines
+ * ---------------------------------------------------------------------------
+ * General-purpose lock routines. Two types of locks are supported: simple locks
+ * and nestable locks. A nestable lock can be set multiple times by the same task
+ * before being unset; a simple lock cannot be set if it is already owned by the
+ * task trying to set it.
+ *
+ * XXX: we may have to consider something different when we are dealing with
+ * non-shared address spaces such as XOMP
+ */
+
+
+/*
+ * Simple OpenMP locks
+ */
+
+/**
+ * \brief initializes and allocates a simple OpenMP lock
+ *
+ * \param arg returned pointer to the lock
+ *
+ * The effect of these routines is to initialize the lock to the unlocked state;
+ * that is, no task owns the lock.
+ */
+void omp_init_lock(omp_lock_t *arg)
+{
+ struct __omp_lock *lock = (struct __omp_lock *)arg;
+
+ assert(lock != NULL);
+
+ thread_mutex_init(&lock->mutex);
+ lock->initialized = 0x1;
+}
+
+/**
+ * \brief destroys a simple OpenMP lock
+ *
+ * \param arg OpenMP lock to destroyed (set to zero)
+ *
+ * The effect of these routines is to change the state of the lock to uninitialized.
+ */
+void omp_destroy_lock(omp_lock_t *arg)
+{
+ struct __omp_lock *lock = (struct __omp_lock *) arg;
+
+ /* acquire the lock to make sure there are no other threads holding the lock */
+ thread_mutex_lock(&lock->mutex);
+ /* we have the lock now */
+ memset(lock, 0, sizeof (*lock));
+}
+
+/**
+ * \brief acquires a simple OpenMP lock
+ *
+ * \param arg The lock to acquire
+ *
+ * Each of these routines causes suspension of the task executing the routine
+ * until the specified lock is available and then sets the lock.
+ */
+void omp_set_lock(omp_lock_t *arg)
+{
+ struct __omp_lock *lock = (struct __omp_lock *) arg;
+ assert(lock->initialized);
+ thread_mutex_lock(&lock->mutex);
+}
+
+/**
+ * \brief Releases the simple OpenMP lock
+ *
+ * \param arg The lock to be released
+ *
+ * For a simple lock, the omp_unset_lock routine causes the lock to become
+ * unlocked.
+ */
+void omp_unset_lock(omp_lock_t *arg)
+{
+ struct __omp_lock *lock = (struct __omp_lock *) arg;
+ assert(lock->initialized);
+ thread_mutex_unlock(&lock->mutex);
+}
+
+/**
+ * \brief tries to acquire a simple openMP lock
+ *
+ * \param arg The OpenMP lock to acquire
+ *
+ * \returns TRUE if lock is acquired successfully
+ * FALSE if lock is already held by other thread
+ *
+ * These routines attempt to set a lock in the same manner as omp_set_lock and
+ * omp_set_nest_lock, except that they do not suspend execution of the task
+ * executing the routine.
+ * For a simple lock, the omp_test_lock routine returns true if the lock is
+ * successfully set; otherwise, it returns false.
+ */
+int omp_test_lock(omp_lock_t *arg)
+{
+ struct __omp_lock *lock = (struct __omp_lock *) arg;
+ assert(lock->initialized);
+ return thread_mutex_trylock(&lock->mutex);
+}
+
+/*
+ * Nested OpenMP locks
+ */
+
+/**
+ * \brief initializes and allocates a nested OpenMP lock
+ *
+ * \param arg returned pointer to the lock
+ *
+ * The effect of these routines is to initialize the lock to the unlocked state;
+ * that is, no task owns the lock. In addition, the nesting count for a nestable
+ * lock is set to zero.
+ */
+void omp_init_nest_lock(omp_nest_lock_t *arg)
+{
+
+ struct __omp_nested_lock *nlock = (struct __omp_nested_lock *)arg;
+ assert(nlock != NULL);
+ thread_mutex_init(&nlock->mutex);
+ nlock->owner = NULL;
+ nlock->count = 0;
+ nlock->initialized = 1;
+}
+
+/**
+ * \brief destroys a Nested OpenMP lock
+ *
+ * \param arg OpenMP lock to destroyed (set to zero)
+ *
+ * The effect of these routines is to change the state of the lock to uninitialized.
+ */
+void omp_destroy_nest_lock(omp_nest_lock_t *arg)
+{
+ struct __omp_nested_lock *nlock = (struct __omp_nested_lock *) arg;
+
+ /*acquire the lock to make sure there are no other threads holding the lock */
+ thread_mutex_lock(&nlock->mutex);
+ /* we have the lock now */
+ memset(nlock, 0, sizeof (*nlock));
+}
+
+/**
+ * \brief acquires a simple OpenMP lock
+ *
+ * \param arg The lock to acquire
+ *
+ * Each of these routines causes suspension of the task executing the routine
+ * until the specified lock is available and then sets the lock.
+ *
+ * A nestable lock is available if it is unlocked or if it is already owned by
+ * the task executing the routine. The task executing the routine is granted,
+ * or retains, ownership of the lock, and the nesting count for the lock is
+ * incremented.
+ */
+void omp_set_nest_lock(omp_nest_lock_t *arg)
+{
+ struct __omp_nested_lock *nlock = (struct __omp_nested_lock *) arg;
+ assert(nlock->initialized);
+
+ if (nlock->owner != thread_self()) {
+ thread_mutex_lock (&nlock->mutex);
+ nlock->owner = thread_self();
+ }
+ nlock->count++;
+}
+
+/**
+ * \brief Releases the simple OpenMP lock
+ *
+ * \param arg The lock to be released
+ *
+ * For a nestable lock, the omp_unset_nest_lock routine decrements the nesting
+ * count, and causes the lock to become unlocked if the resulting nesting count
+ * is zero.
+ */
+void omp_unset_nest_lock(omp_nest_lock_t *arg)
+{
+ struct __omp_nested_lock *nlock = (struct __omp_nested_lock *) arg;
+ assert(nlock->initialized);
+
+ nlock->count--;
+
+ // if we were the last holder unlock the mutex
+ if (nlock->count == 0) {
+ thread_mutex_unlock(&nlock->mutex);
+ }
+}
+
+/**
+ * \brief tries to acquire a simple openMP lock
+ *
+ * \param arg The OpenMP lock to acquire
+ *
+ * \returns TRUE if lock is acquired successfully
+ * FALSE if lock is already held by other thread
+ *
+ * These routines attempt to set a lock in the same manner as omp_set_lock and
+ * omp_set_nest_lock, except that they do not suspend execution of the task
+ * executing the routine.
+ * For a nestable lock, the omp_test_nest_lock routine returns the new nesting
+ * count if the lock is successfully set; otherwise, it returns zero.
+ */
+int omp_test_nest_lock(omp_nest_lock_t *arg)
+{
+ struct __omp_nested_lock *nlock = (struct __omp_nested_lock *) arg;
+ assert(nlock->initialized);
+
+ if (nlock->owner != thread_self()) {
+ if (!thread_mutex_trylock(&nlock->mutex)) {
+ return 0;
+ }
+ nlock->owner = thread_self();
+ }
+
+ nlock->count++;
+
+ return nlock->count;
+}
+
+/*
+ * ---------------------------------------------------------------------------
+ * 3.4 Timing Routines
+ * ---------------------------------------------------------------------------
+ * Timing routines support a portable wall clock timer. These record elapsed
+ * time per-thread and are not guaranteed to be globally consistent across all
+ * the threads participating in an application.
+ */
+
+/**
+ * \brief returns elapsed wall clock time in seconds.
+ *
+ * \returns call clock time
+ *
+ * The omp_get_wtime routine returns a value equal to the elapsed wall clock time
+ * in seconds since some “time in the past”. The actual “time in the past” is
+ * arbitrary, but it is guaranteed not to change during the execution of the
+ * application program. The time returned is a “per-thread time”, so it is not
+ * required to be globally consistent across all the threads participating in an
+ * application.
+ */
+double omp_get_wtime(void)
+{
+ cycles_t t_start = OMP_GET_ICV_GLOBAL(time_start);
+ cycles_t t_current = rdtsc();
+ assert(!"conversion to ms");
+ return (t_current - t_start);
+}
+
+/**
+ * \brief returns the precision of the timer used by omp_get_wtime.
+ *
+ * \returns the timer precision
+ *
+ * The omp_get_wtick routine returns a value equal to the number of seconds
+ * between successive clock ticks of the timer used by omp_get_wtime.
+ */
+double omp_get_wtick(void)
+{
+ return 1.0 / 1e6;
+}
+
--- /dev/null
+/*
+ * Copyright (c) 2014 ETH Zurich.
+ * All rights reserved.
+ *
+ * This file is distributed under the terms in the attached LICENSE file.
+ * If you do not find this file, copies can be found by writing to:
+ * ETH Zurich D-INFK, Universitaetsstrasse 6, CH-8092 Zurich. Attn: Systems Group.
+ */
+#include <bomp_internal.h>
+
+#if 0
+
+/**
+ * \brief allocated an initializes a new task ICV set
+ *
+ * \returns pointer to the ICV task struct
+ *
+ * The struct is initialized based on the parent
+ */
+struct omp_icv_task *bomp_icv_new(void)
+{
+ assert(!"NYI");
+ return 0;
+}
+
+/**
+ * \brief Initializes the task specific ICV from the parent task
+ *
+ * \param icv_task ICV task struct to initialize
+ *
+ * If there is no parent task, then the global ICVs are taken
+ */
+void bomp_icv_init_from_parent(struct omp_icv_task *icv_task)
+{
+ assert(!"NYI");
+}
+
+/**
+ * \brief Initializes the task specific ICV from the environment
+ *
+ * \param icv_task ICV task struct to initialize
+ *
+ * This function initializes the task ICV based on the values defined in
+ * environment.h.
+ *
+ * The function may only be called to initialize the global task ICV
+ */
+void bomp_icv_init_from_env(struct omp_icv_task *icv_task)
+{
+ icv_task->dynamic = OMP_DYNAMIC;
+ icv_task->nested = OMP_NESTED;
+ icv_task->nthreads = g_thread_limit;
+ icv_task->thread_limit = g_thread_limit;
+ icv_task->place_partition = OMP_PLACES;
+ icv_task->active_levels = 0;
+ icv_task->levels=0;
+ icv_task->run_sched = OMP_SCHEDULE;
+ icv_task->run_sched_modifier = 0;
+#if OMP_VERSION >= OMP_VERSION_40
+ icv_task->bind = OMP_PROC_BIND;
+ icv_task->default_device = OMP_DEFAULT_DEVICE;
+#endif
+}
+
+/**
+ * \brief Initializes the device specific ICV from the environment
+ *
+ * \param icv_dev ICV task struct to initialize
+ *
+ * This function initializes the device ICV based on the values defined in
+ * environment.h.
+ */
+void bomp_icv_dev_init_from_env(struct omp_icv_device *icv_dev)
+{
+ icv_dev->dev_sched = 0;
+ icv_dev->stack_size = OMP_STACKSIZE;
+ icv_dev->wait_policy = OMP_WAIT_POLICY;
+ icv_dev->max_active_levels = OMP_MAX_ACTIVE_LEVELS;
+#if OMP_VERSION >= OMP_VERSION_40
+ icv_dev->cancel = OMP_CANCELLATION;
+#endif
+}
+#endif
--- /dev/null
+/*
+ * Copyright (c) 2014 ETH Zurich.
+ * All rights reserved.
+ *
+ * This file is distributed under the terms in the attached LICENSE file.
+ * If you do not find this file, copies can be found by writing to:
+ * ETH Zurich D-INFK, Universitaetsstrasse 6, CH-8092 Zurich. Attn: Systems Group.
+ */
+#include <bomp_internal.h>
+
+/*
+ * This functions implement the ORDERED construct
+ */
+
+
+void GOMP_ordered_start(void)
+{
+ assert(!"NYI");
+ /* nop */
+}
+
+void GOMP_ordered_end(void)
+{
+ /* nop */
+ assert(!"NYI");
+}
--- /dev/null
+/*
+ * Copyright (c) 2014 ETH Zurich.
+ * All rights reserved.
+ *
+ * This file is distributed under the terms in the attached LICENSE file.
+ * If you do not find this file, copies can be found by writing to:
+ * ETH Zurich D-INFK, Universitaetsstrasse 6, CH-8092 Zurich. Attn: Systems Group.
+ */
+#include <bomp_internal.h>
+
+/*
+ * These functions implement the PARALLEL construct
+ *
+ * #pragma omp parallel
+ * {
+ * body;
+ * }
+ *
+ * is translated into
+ * void subfunction (void *data)
+ * {
+ * use data;
+ * body;
+ * }
+ * setup data;
+ * GOMP_parallel_start (subfunction, &data, num_threads);
+ * subfunction (&data);
+ * GOMP_parallel_end ();
+ */
+
+void GOMP_parallel_start(void (*fn)(void *),
+ void *data,
+ unsigned nthreads)
+{
+ debug_printf("GOMP_parallel_start(%p, %p, %u)\n", fn, data, nthreads);
+
+ /* Identify the number of threads that can be spawned and start the processing */
+ if (!omp_in_parallel()) {
+ debug_printf("not in parallel\n");
+
+ struct omp_icv_task *icv_task = bomp_icv_task_new();
+ if (!icv_task) {
+ debug_printf("no icv task\n");
+ return;
+ }
+
+ icv_task->active_levels = 1;
+ icv_task->nthreads = omp_get_max_threads();
+ debug_printf("omp_get_max_threads = %u\n", icv_task->nthreads);
+
+ if (nthreads == 0 || (icv_task->dynamic && icv_task->nthreads < nthreads)) {
+ icv_task->nthreads = OMP_GET_ICV_GLOBAL(thread_limit);
+ debug_printf("resetting to = %u\n", icv_task->nthreads);
+ }
+
+ bomp_icv_set_task(icv_task);
+ debug_printf("icv task set %u\n", icv_task->nthreads);
+
+ /* start processing */
+ bomp_start_processing(fn, data, 0, icv_task->nthreads);
+ } else {
+ if (omp_get_nested()) {
+ // handle nested paralellism
+ assert(!"Handling nested paralellism\n");
+ }
+
+ /* we have already started enough threads */
+ uint32_t active_levels = OMP_GET_ICV_TASK(active_levels);
+ //debug_printf("setting active_levels to %u\n", active_levels+1);
+
+ OMP_SET_ICV_TASK(active_levels, active_levels+1);
+ }
+}
+
+void GOMP_parallel_end(void)
+{
+// debug_printf("GOMP_parallel_end\n");
+
+ uint32_t active_levels = OMP_GET_ICV_TASK(active_levels);
+
+ if (active_levels == 1) {
+ bomp_end_processing();
+ } else {
+// debug_printf("setting active_levels to %u\n", active_levels-1);
+ OMP_SET_ICV_TASK(active_levels, active_levels-1);
+ }
+
+
+ debug_printf("GOMP_parallel_end end\n");
+}
+
+void GOMP_parallel(void (*fn)(void *),
+ void *data,
+ unsigned num_threads,
+ unsigned int flags)
+{
+ debug_printf("GOMP_parallel");
+ assert(!"NYI");
+}
+
+#if OMP_VERSION >= OMP_VERSION_40
+bool GOMP_cancel(int which,
+ bool do_cancel)
+{
+ assert(!"NYI");
+ return 0;
+}
+
+bool GOMP_cancellation_point(int which)
+{
+ assert(!"NYI");
+ return 0;
+}
+#endif
--- /dev/null
+/*
+ * Copyright (c) 2014 ETH Zurich.
+ * All rights reserved.
+ *
+ * This file is distributed under the terms in the attached LICENSE file.
+ * If you do not find this file, copies can be found by writing to:
+ * ETH Zurich D-INFK, Universitaetsstrasse 6, CH-8092 Zurich. Attn: Systems Group.
+ */
+#include <bomp_internal.h>
+
+/*
+ * This functions implement the SECTIONS construct
+ *
+ * #pragma omp sections
+ * {
+ * #pragma omp section
+ * stmt1;
+ * #pragma omp section
+ * stmt2;
+ * #pragma omp section
+ * stmt3;
+ * }
+ * becomes
+ *
+ * for (i = GOMP_sections_start (3); i != 0; i = GOMP_sections_next ())
+ * switch (i) {
+ * case 1:
+ * stmt1;
+ * break;
+ * case 2:
+ * stmt2;
+ * break;
+ * case 3:
+ * stmt3;
+ * break;
+ * }
+ *
+ * GOMP_barrier ();
+ */
+
+unsigned GOMP_sections_start(unsigned count)
+{
+ assert(!"NYI");
+ return 0;
+}
+
+unsigned GOMP_sections_next(void)
+{
+ assert(!"NYI");
+ return 0;
+}
+
+void GOMP_parallel_sections_start(void (*fn)(void *),
+ void *data,
+ unsigned num_threads,
+ unsigned count)
+{
+ assert(!"NYI");
+}
+
+void GOMP_parallel_sections(void (*fn)(void *),
+ void *data,
+ unsigned num_threads,
+ unsigned count,
+ unsigned flags)
+{
+ assert(!"NYI");
+}
+
+void GOMP_sections_end(void)
+{
+ assert(!"NYI");
+}
+
+bool GOMP_sections_end_cancel(void)
+{
+
+ assert(!"NYI");
+ return 0;
+}
+
+void GOMP_sections_end_nowait(void)
+{
+ assert(!"NYI");
+}
--- /dev/null
+/*
+ * Copyright (c) 2014 ETH Zurich.
+ * All rights reserved.
+ *
+ * This file is distributed under the terms in the attached LICENSE file.
+ * If you do not find this file, copies can be found by writing to:
+ * ETH Zurich D-INFK, Universitaetsstrasse 6, CH-8092 Zurich. Attn: Systems Group.
+ */
+#include <bomp_internal.h>
+
+/*
+ * this functions implement the SINGLE construct
+ *
+ * #pragma omp single
+ * {
+ * body;
+ * }
+ *
+ * becomes
+ *
+ * if (GOMP_single_start ())
+ * body;
+ * GOMP_barrier ();
+ *
+ * and
+ *
+ * #pragma omp single copyprivate(x)
+ * {
+ * body;
+ * }
+ *
+ * becomse
+ *
+ * datap = GOMP_single_copy_start ();
+ * if (datap == NULL) {
+ * body;
+ * data.x = x;
+ * GOMP_single_copy_end (&data);
+ * } else {
+ * x = datap->x;
+ * }
+ * GOMP_barrier ();
+ */
+
+/* This function should return true for just the first thread */
+bool GOMP_single_start(void)
+{
+ assert(!"NYI");
+ return 0;
+}
+
+void *GOMP_single_copy_start (void)
+{
+ assert(!"NYI");
+ return NULL;
+}
+
+void GOMP_single_copy_end (void *data)
+{
+ assert(!"NYI");
+}
--- /dev/null
+
+#include <barrelfish/barrelfish.h>
+#include <omp.h>
+
+#define ITERATIONS 10000
+
+int main(int arc, char *argv[])
+{
+ debug_printf("Bomp New Test started\n");
+
+ debug_printf("==========================\n");
+
+ bomp_init(BOMP_THREADS_ALL);
+
+ debug_printf("==========================\n");
+ debug_printf("==========================\n");
+
+ uint32_t array[10];
+ memset(array, 0, sizeof(array));
+
+
+ uint64_t counter = 0;
+
+#pragma omp parallel for
+ for (uint32_t i = 0; i < ITERATIONS; ++i) {
+ array[omp_get_thread_num()]++;
+ if ((i % 5000) == 0) {
+ debug_printf("loop %u\n", i);
+ }
+ counter++;
+ }
+
+ assert(counter == ITERATIONS);
+ debug_printf("array: %u %u %u %u", array[0], array[1], array[2], array[3]);
+
+ debug_printf("==========================\n");
+ debug_printf("==========================\n");
+ counter = 0;
+ memset(array, 0, sizeof(array));
+
+#pragma omp parallel for
+ for (uint32_t i = 0; i < 10000; ++i) {
+ array[omp_get_thread_num()]++;
+ if ((i % 5000) == 0) {
+ debug_printf("loop %u\n", i);
+ }
+ counter++;
+ }
+ debug_printf("array: %u %u %u %u", array[0], array[1], array[2], array[3]);
+ assert(counter == ITERATIONS);
+
+ debug_printf("==========================\n");
+ debug_printf("==========================\n");
+
+ counter = 0;
+ memset(array, 0, sizeof(array));
+
+#pragma omp parallel for
+ for (uint32_t i = 0; i < 12; ++i) {
+#pragma omp parallel for
+ for (uint32_t j = 0; j < 10; ++j) {
+ debug_printf("loop %u.%u\n", i, j);
+ array[omp_get_thread_num()]++;
+ __sync_fetch_and_add(&counter, 1);
+ }
+ }
+
+ debug_printf("array: %u %u %u %u", array[0], array[1], array[2], array[3]);
+ if (counter != 120) {
+ debug_printf("%lu %u\n\n", counter, 120);
+ }
+
+ while(1)
+ ;
+ assert(counter == 120);
+
+
+ debug_printf("==========================\n");
+ debug_printf("==========================\n");
+
+
+ debug_printf("==========================\n");
+
+ debug_printf("Bomp New Test terminated\n");
+
+ while(1)
+ ;
+ return 0;
+}