Cleanup
authorStefan Kaestle <stefan.kaestle@inf.ethz.ch>
Thu, 21 Aug 2014 08:54:06 +0000 (10:54 +0200)
committerStefan Kaestle <stefan.kaestle@inf.ethz.ch>
Thu, 21 Aug 2014 08:54:06 +0000 (10:54 +0200)
.gitignore
if/Hakefile
tools/weever/install.sh [changed mode: 0644->0755]
tools/weever/multiboot/build_data_files.sh [changed mode: 0644->0755]
usr/xeon_phi_test/benchmark.c [deleted file]

index 1890b99..9d7c576 100644 (file)
@@ -12,3 +12,4 @@ tags
 
 /.metadata/
 /toolchain/
+*.pyc
\ No newline at end of file
index 6afe602..0211e19 100644 (file)
@@ -54,7 +54,6 @@
                "skb",
                "skb_map",
                "octopus",
-               "octopus",
                "omap_sdma",
                "spawn",
                "terminal",
@@ -83,7 +82,6 @@
                "bulk_ctrl",
                "arrakis",
                "e10k_vf",
-               "empty"
                "flounderbootstrap",
                "empty"
            ],
old mode 100644 (file)
new mode 100755 (executable)
old mode 100644 (file)
new mode 100755 (executable)
diff --git a/usr/xeon_phi_test/benchmark.c b/usr/xeon_phi_test/benchmark.c
deleted file mode 100644 (file)
index e1d539c..0000000
+++ /dev/null
@@ -1,753 +0,0 @@
-/*
- * Copyright (c) 2014 ETH Zurich.
- * All rights reserved.
- *
- * This file is distributed under the terms in the attached LICENSE file.
- * If you do not find this file, copies can be found by writing to:
- * ETH Zurich D-INFK, Universitaetsstrasse 6, CH-8092 Zurich. Attn: Systems Group.
- */
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-#include <limits.h>
-
-#include <barrelfish/barrelfish.h>
-#include <barrelfish/ump_chan.h>
-#include <bench/bench.h>
-#include <barrelfish/sys_debug.h>
-#include <xeon_phi/xeon_phi_dma_client.h>
-#include "benchmark.h"
-
-static void xphi_bench_print_settings(void)
-{
-    printf("Core host: %u, Core card: %u\n",
-    XPHI_BENCH_CORE_HOST,
-           XPHI_BENCH_CORE_CARD);
-    printf("Buffer size = %lu bytes, processing runs %u\n",
-    XPHI_BENCH_BUF_SIZE,
-           XPHI_BENCH_PROCESS_RUNS);
-    printf("Bytes per run: %lu kB\n",
-           (XPHI_BENCH_NUM_RUNS * XPHI_BENCH_BUF_SIZE) / 1024);
-
-#ifdef XPHI_BENCH_PROCESS_CARD
-    printf("Processing Side:  Card\n");
-#else
-    printf("Processing Side:  Host\n");
-#endif
-
-#ifdef XPHI_BENCH_CHAN_CARD
-#ifdef XPHI_BENCH_BUFFER_CARD
-    printf("Memory Setup (Normal):     Host [  ]                      Card [ UMP | UMP | BUFFERS ] \n");
-    printf("Memory Setup (Reversed):   Host [ UMP | UMP | BUFFERS ]   Card [ ] \n");
-#else
-    printf("Memory Setup (Normal):     Host [ BUFFERS ]               Card [ UMP | UMP ] \n");
-    printf("Memory Setup (Reversed):   Host [ UMP | UMP | BUFFERS ]   Card [  ] \n");
-#endif
-#endif
-
-#ifdef XPHI_BENCH_CHAN_HOST
-#ifdef XPHI_BENCH_BUFFER_CARD
-    printf("Memory Setup (Normal):     Host [ UMP | UMP ]             Card [ BUFFERS ] \n");
-    printf("Memory Setup (Reversed):   Host [ UMP | UMP | BUFFERS ]   Card [ ] \n");
-#else
-    printf("Memory Setup (Normal):     Host [ BUFFERS ]   Card [ UMP | UMP ] \n");
-    printf("Memory Setup (Reversed):   Host [ ]           Card [ UMP | UMP | BUFFERS ] \n");
-#endif
-#endif
-
-#ifdef XPHI_BENCH_CHAN_DEFAULT
-#ifdef XPHI_BENCH_BUFFER_CARD
-    printf("Memory Setup (Normal):     Host [ UMP ]             Card [ UMP | BUFFERS ] \n");
-    printf("Memory Setup (Reversed):   Host [ UMP | BUFFERS ]   Card [ UMP ] \n");
-#else
-    printf("Memory Setup (Normal):     Host [ UMP | BUFFERS ]   Card [ UMP ] \n");
-    printf("Memory Setup (Reversed):   Host [ UMP ]             Card [ UMP | BUFFERS ] \n");
-#endif
-    printf("UMP Channel Setup (Normal):   Recv Remote, Send Local\n");
-    printf("UMP Channel Setup (Reversed): Recv Local, Send Remote\n");
-#endif
-}
-
-errval_t xphi_bench_memwrite(void *target)
-{
-    return SYS_ERR_OK;
-
-    debug_printf("Executing local measurements\n");
-
-    errval_t err;
-
-    bench_init();
-
-    cycles_t tsc_start, tsc_end;
-    cycles_t result[4];
-    uint64_t tscperus;
-    bench_ctl_t *ctl;
-
-    err = sys_debug_get_tsc_per_ms(&tscperus);
-    assert(err_is_ok(err));
-    tscperus /= 1000;
-
-    debug_printf("tscperus = %lu\n", tscperus);
-
-    ctl = bench_ctl_init(BENCH_MODE_FIXEDRUNS, 3, XPHI_BENCH_NUM_REPS);
-
-    debug_printf("starting benchmark...\n");
-    uint32_t rep_counter = 0;
-    do {
-        debug_printf("  > run %u of %u memwrite of %lu bytes..\n", rep_counter++,
-        XPHI_BENCH_NUM_REPS,
-                     XPHI_BENCH_BUF_FRAME_SIZE);
-
-        /* using memset */
-        tsc_start = bench_tsc();
-        memset(target, 0, XPHI_BENCH_BUF_FRAME_SIZE);
-        tsc_end = bench_tsc();
-        if (tsc_end < tsc_start) {
-            result[0] = (LONG_MAX - tsc_start) + tsc_end - bench_tscoverhead();
-        } else {
-            result[0] = (tsc_end - tsc_start - bench_tscoverhead());
-        }
-
-        /* writing in a loop*/
-        volatile uint8_t *buf = target;
-        tsc_start = bench_tsc();
-        for (uint32_t i = 0; i < XPHI_BENCH_BUF_FRAME_SIZE; ++i) {
-            buf[i] = (uint8_t) 1;
-        }
-        tsc_end = bench_tsc();
-        if (tsc_end < tsc_start) {
-            result[1] = (LONG_MAX - tsc_start) + tsc_end - bench_tscoverhead();
-        } else {
-            result[1] = (tsc_end - tsc_start - bench_tscoverhead());
-        }
-
-        /* reading in a while loop */
-        buf = target;
-        buf[XPHI_BENCH_BUF_FRAME_SIZE - 1] = 0;
-        tsc_start = bench_tsc();
-        while (*(buf++))
-            ;
-
-        tsc_end = bench_tsc();
-        if (tsc_end < tsc_start) {
-            result[2] = (LONG_MAX - tsc_start) + tsc_end - bench_tscoverhead();
-        } else {
-            result[2] = (tsc_end - tsc_start - bench_tscoverhead());
-        }
-
-    } while (!bench_ctl_add_run(ctl, result));
-
-    // bench_ctl_dump_csv(ctl, "", tscperus);
-    bench_ctl_dump_analysis(ctl, 0, "memset()", tscperus);
-    bench_ctl_dump_analysis(ctl, 1, "forloop write", tscperus);
-    bench_ctl_dump_analysis(ctl, 2, "forloop read", tscperus);
-    return SYS_ERR_OK;
-
-    return SYS_ERR_OK;
-}
-
-static volatile uint8_t dma_done;
-
-static void dma_done_cb(xeon_phi_dma_id_t id,
-                        errval_t err,
-                        void *st)
-{
-    xeon_phi_dma_id_t *id2 = st;
-    if (id != *id2) {
-        debug_printf("id %016lx, %016lx\n", id, *id2);
-    }
-    assert(id == *id2);
-    XPHI_BENCH_DBG("DMA request executed...\n");
-    dma_done = 0x1;
-}
-
-static inline cycles_t calculate_time(cycles_t tsc_start,
-                                      cycles_t tsc_end)
-{
-    cycles_t result;
-    if (tsc_end < tsc_start) {
-        result = (LONG_MAX - tsc_start) + tsc_end - bench_tscoverhead();
-    } else {
-        result = (tsc_end - tsc_start - bench_tscoverhead());
-    }
-    return result;
-}
-
-static errval_t measure_memcpy(void *dst,
-                               void *src)
-{
-    errval_t err;
-    cycles_t tsc_start, tsc_end;
-    uint64_t tscperus;
-    bench_ctl_t *ctl;
-
-    cycles_t result;
-
-    debug_printf("--------------------------------\n");
-    debug_printf("Measuring memcpy...\n");
-    debug_printf("--------------------------------\n");
-
-    bench_init();
-
-    err = sys_debug_get_tsc_per_ms(&tscperus);
-    assert(err_is_ok(err));
-    tscperus /= 1000;
-
-    for (int i = XPHI_BENCH_SIZE_MIN_BITS; i <= XPHI_BENCH_SIZE_MAX_BITS-2; ++i) {
-        size_t size = (1UL << i);
-
-        ctl = bench_ctl_init(BENCH_MODE_FIXEDRUNS, 1, XPHI_BENCH_NUM_REPS);
-
-        uint8_t idx = 0;
-        //debug_printf("Benchmark: Run %u, size = %lu bytes, [%016lx] -> [%016lx]\n", idx, size, src, dst);
-        do {
-            tsc_start = bench_tsc();
-            memcpy(dst, src, size);
-            tsc_end = bench_tsc();
-            result = calculate_time(tsc_start, tsc_end);
-            idx++;
-        } while (!bench_ctl_add_run(ctl, &result));
-        char buf[50];
-
-        snprintf(buf, sizeof(buf), "%u", i);
-        bench_ctl_dump_analysis(ctl, 0, buf, tscperus);
-
-        bench_ctl_destroy(ctl);
-    }
-    debug_printf("--------------------------------\n");
-    return SYS_ERR_OK;
-}
-
-static errval_t measure_forloop(void *dst,
-                                void *src)
-{
-    errval_t err;
-    cycles_t tsc_start, tsc_end;
-    uint64_t tscperus;
-    bench_ctl_t *ctl;
-
-    cycles_t result;
-
-    debug_printf("--------------------------------\n");
-    debug_printf("Measuring Forloop...\n");
-    debug_printf("--------------------------------\n");
-
-    bench_init();
-
-    err = sys_debug_get_tsc_per_ms(&tscperus);
-    assert(err_is_ok(err));
-    tscperus /= 1000;
-
-    for (int i = XPHI_BENCH_SIZE_MIN_BITS; i <= XPHI_BENCH_SIZE_MAX_BITS-2; ++i) {
-        size_t size = (1UL << i);
-
-        ctl = bench_ctl_init(BENCH_MODE_FIXEDRUNS, 1, XPHI_BENCH_NUM_REPS);
-
-        uint8_t idx = 0;
-        //debug_printf("Benchmark: Run %u, size = %lu bytes, [%016lx] -> [%016lx]\n", idx, size, src, dst);
-        do {
-            volatile uint64_t *bsrc = src;
-            volatile uint64_t *bdst = dst;
-            tsc_start = bench_tsc();
-            for (uint32_t j = 0; j < size / sizeof(uint64_t); ++j) {
-                bdst[j] = bsrc[j];
-            }
-            tsc_end = bench_tsc();
-            result = calculate_time(tsc_start, tsc_end);
-            idx++;
-        } while (!bench_ctl_add_run(ctl, &result));
-        char buf[50];
-
-        snprintf(buf, sizeof(buf), "%u", i);
-        bench_ctl_dump_analysis(ctl, 0, buf, tscperus);
-
-        bench_ctl_destroy(ctl);
-    }
-    debug_printf("--------------------------------\n");
-    return SYS_ERR_OK;
-}
-
-static errval_t measure_dma(lpaddr_t pdst,
-                            lpaddr_t psrc)
-{
-    errval_t err;
-    cycles_t tsc_start, tsc_end;
-    uint64_t tscperus;
-    bench_ctl_t *ctl;
-
-    cycles_t result;
-    debug_printf("--------------------------------\n");
-    debug_printf("Measuring DMA...\n");
-    debug_printf("--------------------------------\n");
-    // avoid host-host DMA.
-    if (psrc == 0) {
-        debug_printf("skipping host-host transfer\n");
-        return SYS_ERR_OK;
-    }
-
-    bench_init();
-
-    err = sys_debug_get_tsc_per_ms(&tscperus);
-    assert(err_is_ok(err));
-    tscperus /= 1000;
-
-    for (int i = XPHI_BENCH_SIZE_MIN_BITS; i <= XPHI_BENCH_SIZE_MAX_BITS; ++i) {
-        size_t size = (1UL << i);
-
-        ctl = bench_ctl_init(BENCH_MODE_FIXEDRUNS, 1, XPHI_BENCH_NUM_REPS);
-
-        uint8_t idx = 0;
-        //debug_printf("Benchmark: Run %u, size = %lu bytes, [%016lx] -> [%016lx]\n", idx, size, src, dst);
-        do {
-
-            /* Test 3: DMA Transfer */
-            struct xeon_phi_dma_info info = {
-                .src = psrc,
-                .dest = pdst,
-                .size = size
-            };
-
-            xeon_phi_dma_id_t id;
-
-            struct xeon_phi_dma_cont cont = {
-                .cb = dma_done_cb,
-                .arg = &id
-            };
-
-            dma_done = 0x0;
-
-            tsc_start = bench_tsc();
-            err = xeon_phi_dma_client_start(0, &info, cont, &id);
-            if (err_is_fail(err)) {
-                USER_PANIC_ERR(err, "could not exec the transfer");
-            }
-            while (!dma_done) {
-                messages_wait_and_handle_next();
-            }
-            tsc_end = bench_tsc();
-            result = calculate_time(tsc_start, tsc_end);
-            idx++;
-        } while (!bench_ctl_add_run(ctl, &result));
-        char buf[50];
-
-        snprintf(buf, sizeof(buf), "%u", i);
-        bench_ctl_dump_analysis(ctl, 0, buf, tscperus);
-
-        bench_ctl_destroy(ctl);
-    }
-
-    debug_printf("--------------------------------\n");
-
-    return SYS_ERR_OK;
-}
-
-errval_t xphi_bench_memcpy(void *dst,
-                           void *src,
-                           size_t size,
-                           lpaddr_t pdst,
-                           lpaddr_t psrc)
-{
-    errval_t err;
-    uint64_t tscperus;
-
-    bench_init();
-
-    err = sys_debug_get_tsc_per_ms(&tscperus);
-    assert(err_is_ok(err));
-    tscperus /= 1000;
-
-    debug_printf("Starting memcpy benchmark. tsc/us=%lu, cpysize=%lu bytes\n",
-                 tscperus,
-                 (uint64_t) size);
-
-    if (0) {
-    measure_memcpy(dst, src);
-
-    measure_forloop(dst, src);
-    }
-    measure_dma(pdst, psrc);
-
-    return SYS_ERR_OK;
-}
-
-void xphi_bench_start_echo(struct bench_bufs *bufs,
-                           struct ump_chan *uc)
-{
-    errval_t err;
-
-    volatile struct ump_message *msg;
-    volatile struct ump_message *msg_recv;
-
-    struct ump_control ctrl;
-    msg = ump_chan_get_next(uc, &ctrl);
-
-    // send initiator message
-    debug_printf("signal ready.\n");
-    msg->data[0] = 123;
-    msg->header.control = ctrl;
-
-    debug_printf("xphi_bench_start_echo: receiving messages.\n");
-#ifdef XPHI_BENCH_CHECK_STOP
-    uint64_t data = 0x0;
-    while (data != XPHI_BENCH_STOP_FLAG) {
-#else
-        while(true) {
-#endif
-        err = ump_chan_recv(uc, &msg_recv);
-        if (err_is_ok(err)) {
-            XPHI_BENCH_DBG("received ump message [%p]\n", msg_recv);
-            msg = ump_chan_get_next(uc, &ctrl);
-            msg->header.control = ctrl;
-#ifdef XPHI_BENCH_CHECK_STOP
-            data = msg_recv->data[0];
-#endif
-        }
-    }
-    if (data == XPHI_BENCH_STOP_FLAG) {
-        debug_printf("xphi_bench_start_echo: received stop flag.\n");
-    }
-}
-
-void xphi_bench_start_processor(struct bench_bufs *bufs,
-                                struct ump_chan *uc)
-{
-    errval_t err;
-
-    volatile struct ump_message *msg;
-
-    uint64_t buf_idx = 0;
-
-    struct ump_control ctrl;
-    msg = ump_chan_get_next(uc, &ctrl);
-
-    // send initiator message
-    debug_printf("signal ready.\n");
-    msg->data[0] = 123;
-    msg->header.control = ctrl;
-
-    debug_printf("xphi_bench_start_processor: receiving messages.\n");
-#ifdef XPHI_BENCH_CHECK_STOP
-    while (buf_idx != XPHI_BENCH_STOP_FLAG) {
-#else
-        while(true) {
-#endif
-        err = ump_chan_recv(uc, &msg);
-        if (err_is_ok(err)) {
-            buf_idx = msg->data[0];
-            XPHI_BENCH_DBG("received ump message [%016lx]\n", buf_idx);
-            struct bench_buf *buf = &bufs->buf[buf_idx];
-            xphi_bench_fill_buffer(buf, XPHI_BENCH_PROCESS_RUNS);
-            msg = ump_chan_get_next(uc, &ctrl);
-            msg->data[0] = buf_idx;
-            msg->header.control = ctrl;
-        }
-    }
-    if (buf_idx == XPHI_BENCH_STOP_FLAG) {
-        debug_printf("xphi_bench_start_processor: received stop flag\n");
-    }
-}
-
-errval_t xphi_bench_start_initator_rtt(struct bench_bufs *bufs,
-                                       struct ump_chan *uc)
-{
-    errval_t err;
-    cycles_t tsc_start, tsc_end;
-    cycles_t result;
-    uint64_t tscperus;
-    bench_ctl_t *ctl;
-
-    volatile struct ump_message *msg;
-
-    bench_init();
-
-    err = sys_debug_get_tsc_per_ms(&tscperus);
-    assert(err_is_ok(err));
-    tscperus /= 1000;
-
-    ctl = bench_ctl_init(BENCH_MODE_FIXEDRUNS, 1,
-    XPHI_BENCH_NUM_REPS * XPHI_BENCH_NUM_RUNS);
-
-    debug_printf("RTT benchmark: waiting for ready signal.\n");
-    while (1) {
-        err = ump_chan_recv(uc, &msg);
-        if (err_is_ok(err)) {
-            break;
-        }
-    }
-
-    struct ump_control ctrl;
-
-    debug_printf("Starting RTT benchmark tsc/us=%lu\n", tscperus);
-    uint32_t rep_counter = 0;
-    do {
-        if (!(rep_counter++ % XPHI_BENCH_NUM_RUNS)) {
-            debug_printf("  > run %u of %u...\n", rep_counter,
-            XPHI_BENCH_NUM_REPS * XPHI_BENCH_NUM_RUNS);
-        }
-        tsc_start = bench_tsc();
-        msg = ump_chan_get_next(uc, &ctrl);
-        msg->header.control = ctrl;
-        do {
-            err = ump_chan_recv(uc, &msg);
-        } while (err_is_fail(err));
-        tsc_end = bench_tsc();
-        result = calculate_time(tsc_start, tsc_end);
-
-    } while (!bench_ctl_add_run(ctl, &result));
-
-#ifdef XPHI_BENCH_CHECK_STOP
-    msg = ump_chan_get_next(uc, &ctrl);
-    msg->data[0] = XPHI_BENCH_STOP_FLAG;
-    msg->header.control = ctrl;
-#endif
-    xphi_bench_print_settings();
-    // bench_ctl_dump_csv(ctl, "", tscperus);
-    bench_ctl_dump_analysis(ctl, 0, "RTT", tscperus);
-
-    return SYS_ERR_OK;
-}
-
-errval_t xphi_bench_start_initator_sync(struct bench_bufs *bufs,
-                                        struct ump_chan *uc)
-{
-    errval_t err;
-
-    cycles_t tsc_start, tsc_end;
-    cycles_t result;
-    uint64_t tscperus;
-    bench_ctl_t *ctl;
-
-    volatile struct ump_message *msg;
-    uint64_t buf_idx;
-
-    bench_init();
-
-    uint32_t n_recv = 0;
-
-    err = sys_debug_get_tsc_per_ms(&tscperus);
-    assert(err_is_ok(err));
-    tscperus /= 1000;
-
-    ctl = bench_ctl_init(BENCH_MODE_FIXEDRUNS, 1, XPHI_BENCH_NUM_REPS);
-
-    debug_printf("Sync Throughput Benchmark: waiting for ready signal...\n");
-    while (1) {
-        err = ump_chan_recv(uc, &msg);
-        if (err_is_ok(err)) {
-            break;
-        }
-    }
-
-    struct ump_control ctrl;
-
-    debug_printf("Starting sync throughput benchmark. tsc/us=%lu\n", tscperus);
-    uint32_t rep_counter = 0;
-    do {
-        uint64_t b_idx = 0;
-
-        debug_printf("  > run %u of %u with %u moves...\n", rep_counter++,
-        XPHI_BENCH_NUM_REPS,
-                     XPHI_BENCH_NUM_RUNS);
-
-        tsc_start = bench_tsc();
-
-        msg = ump_chan_get_next(uc, &ctrl);
-        struct bench_buf *buf = &bufs->buf[b_idx];
-        xphi_bench_fill_buffer(buf, 1);
-
-        // send initiator message
-        XPHI_BENCH_DBG("sending message [%lu]\n", b_idx);
-        msg->data[0] = b_idx;
-        msg->header.control = ctrl;
-        n_recv = 0;
-        for (uint32_t irun = 0; irun < (XPHI_BENCH_NUM_RUNS - 1); ++irun) {
-            do {
-                err = ump_chan_recv(uc, &msg);
-            } while (err_is_fail(err));
-
-            n_recv++;
-            buf_idx = msg->data[0];
-            uint32_t ret_count = 0;
-            buf = &bufs->buf[b_idx];
-            xphi_bench_read_buffer(buf, 1, &ret_count);
-            XPHI_BENCH_DBG("received message [%lu]\n", buf_idx);
-            assert(buf_idx == b_idx);
-            b_idx = (b_idx + 1) & (bufs->num - 1);
-
-            buf = &bufs->buf[b_idx];
-            xphi_bench_fill_buffer(buf, 1);
-
-            XPHI_BENCH_DBG("sending message [%lu]\n", b_idx);
-            msg = ump_chan_get_next(uc, &ctrl);
-            assert(msg);
-            msg->data[0] = b_idx;
-            msg->header.control = ctrl;
-
-        }
-
-        while (n_recv < XPHI_BENCH_NUM_RUNS) {
-            err = ump_chan_recv(uc, &msg);
-            if (err_is_ok(err)) {
-                buf_idx = msg->data[0];
-                XPHI_BENCH_DBG("received message [%lu]\n", buf_idx);
-                buf = &bufs->buf[buf_idx];
-                uint32_t ret_count = 0;
-                xphi_bench_read_buffer(buf, 1, &ret_count);
-                n_recv++;
-            }
-        }
-        tsc_end = bench_tsc();
-        result = calculate_time(tsc_start, tsc_end);
-    } while (!bench_ctl_add_run(ctl, &result));
-
-#ifdef XPHI_BENCH_CHECK_STOP
-    msg = ump_chan_get_next(uc, &ctrl);
-    msg->data[0] = XPHI_BENCH_STOP_FLAG;
-    msg->header.control = ctrl;
-#endif
-
-    double avg_s = bench_avg(ctl->data, ctl->result_count) / tscperus;
-    avg_s /= 1000000;
-    xphi_bench_print_settings();
-// bench_ctl_dump_csv(ctl, "", tscperus);
-    bench_ctl_dump_analysis(ctl, 0, "Sync Throughput", tscperus);
-    printf("Average seconds: %f\n", avg_s);
-    printf("Average throughput: %f GByte/s\n",
-           (((double) (XPHI_BENCH_NUM_RUNS * XPHI_BENCH_BUF_SIZE)) / 1024 / 1024
-            / 1024)
-           / (avg_s));
-    printf("Average throughput (with processing): %f GByte/s\n",
-           (XPHI_BENCH_PROCESS_RUNS * ((double) (XPHI_BENCH_NUM_RUNS
-                           * XPHI_BENCH_BUF_SIZE))
-            / 1024 / 1024 / 1024)
-           / (avg_s));
-
-    return SYS_ERR_OK;
-}
-
-errval_t xphi_bench_start_initator_async(struct bench_bufs *bufs,
-                                         struct ump_chan *uc)
-{
-    volatile struct ump_message *msg;
-    uint64_t buf_idx;
-    uint32_t in_transit = 0;
-
-    errval_t err;
-
-    bench_init();
-
-    cycles_t tsc_start;
-    cycles_t result;
-    uint64_t tscperus;
-    bench_ctl_t *ctl;
-
-    err = sys_debug_get_tsc_per_ms(&tscperus);
-    assert(err_is_ok(err));
-    tscperus /= 1000;
-
-    debug_printf("tscperus = %lu\n", tscperus);
-
-    ctl = bench_ctl_init(BENCH_MODE_FIXEDRUNS, 1, XPHI_BENCH_NUM_REPS);
-
-    debug_printf("waiting for ready signal\n");
-    while (1) {
-        err = ump_chan_recv(uc, &msg);
-        if (err_is_ok(err)) {
-            break;
-        }
-    }
-
-    debug_printf("starting benchmark ASYNC...\n");
-
-    struct ump_control ctrl;
-
-    uint32_t rep_counter = 0;
-    do {
-        uint64_t b_idx = 0;
-        debug_printf("  > run %u of %u with %u moves...\n", rep_counter++,
-        XPHI_BENCH_NUM_REPS,
-                     XPHI_BENCH_NUM_RUNS);
-        tsc_start = bench_tsc();
-
-        uint32_t irun = 0;
-        uint32_t n_recv = 0;
-        struct bench_buf *buf;
-        while (irun < XPHI_BENCH_NUM_RUNS) {
-            if (in_transit < XPHI_BENCH_MSG_NUM) {
-                msg = ump_chan_get_next(uc, &ctrl);
-                if (!msg) {
-                    continue;
-                }
-                buf = &bufs->buf[b_idx];
-                xphi_bench_fill_buffer(buf, 1);
-                XPHI_BENCH_DBG("sending message [%lu] %p\n", b_idx, msg);
-                msg->data[0] = b_idx;
-                msg->header.control = ctrl;
-                irun++;
-                in_transit++;
-                b_idx = (b_idx + 1) & (bufs->num - 1);
-            }
-
-            err = ump_chan_recv(uc, &msg);
-            if (err_is_ok(err)) {
-                buf_idx = msg->data[0];
-                XPHI_BENCH_DBG("receiving message [%lu]\n", buf_idx);
-                buf = &bufs->buf[buf_idx];
-                uint32_t ret_count = 0;
-                xphi_bench_read_buffer(buf, 1, &ret_count);
-                in_transit--;
-                n_recv++;
-            }
-        }
-
-        while (n_recv < XPHI_BENCH_NUM_RUNS) {
-            err = ump_chan_recv(uc, &msg);
-            if (err_is_ok(err)) {
-                buf_idx = msg->data[0];
-                buf = &bufs->buf[buf_idx];
-                uint32_t ret_count = 0;
-                XPHI_BENCH_DBG("receiving message [%lu]\n", buf_idx);
-                xphi_bench_read_buffer(buf, 1, &ret_count);
-                in_transit--;
-                n_recv++;
-            }
-        }
-
-        result = bench_tsc();
-        if (result - tsc_start > bench_tscoverhead()) {
-            debug_printf("%lu %lu", result - tsc_start, bench_tscoverhead());
-        }
-        if (result < tsc_start) {
-            result = (LONG_MAX - tsc_start) + result - bench_tscoverhead();
-        } else {
-            result = (result - tsc_start - bench_tscoverhead());
-        }
-
-        assert(in_transit == 0);
-    } while (!bench_ctl_add_run(ctl, &result));
-
-#ifdef XPHI_BENCH_CHECK_STOP
-    msg = ump_chan_get_next(uc, &ctrl);
-    msg->data[0] = XPHI_BENCH_STOP_FLAG;
-    msg->header.control = ctrl;
-#endif
-
-    double avg_s = bench_avg(ctl->data, ctl->result_count) / tscperus;
-    avg_s /= 1000000;
-    xphi_bench_print_settings();
-// bench_ctl_dump_csv(ctl, "", tscperus);
-    bench_ctl_dump_analysis(ctl, 0, "ASync Throughput", tscperus);
-    printf("Average seconds: %f\n", avg_s);
-    printf("Average throughput: %f GByte/s\n",
-           (((double) (XPHI_BENCH_NUM_RUNS * XPHI_BENCH_BUF_SIZE)) / 1024 / 1024
-            / 1024)
-           / (avg_s));
-    printf("Average throughput (with processing): %f GByte/s\n",
-           (XPHI_BENCH_PROCESS_RUNS * ((double) (XPHI_BENCH_NUM_RUNS
-                           * XPHI_BENCH_BUF_SIZE))
-            / 1024 / 1024 / 1024)
-           / (avg_s));
-
-    return SYS_ERR_OK;
-}
-