Xeon Phi DMA: - Wait until start replay is sent. - New error code that prohibits...
authorReto Achermann <acreto@student.ethz.ch>
Fri, 4 Jul 2014 12:15:45 +0000 (14:15 +0200)
committerStefan Kaestle <stefan.kaestle@inf.ethz.ch>
Wed, 20 Aug 2014 21:38:19 +0000 (23:38 +0200)
Test Domains:
Some changes regarding to reading the cycle counters.

errors/errno.fugu
lib/xeon_phi/xeon_phi_dma_client.c
usr/drivers/xeon_phi/debug.h
usr/drivers/xeon_phi/dma/dma_service.c
usr/xeon_phi_test/benchmark.c
usr/xeon_phi_test/main_card.c
usr/xeon_phi_test/main_host.c

index 1db3c6a..afff81d 100755 (executable)
@@ -1047,6 +1047,7 @@ errors xeon_phi XEON_PHI_ERR_ {
     failure MSG_NOT_INITIALIZED  "Messaging service has not been initialized",
     failure DMA_NOT_INITIALIZED  "DMA library has not been initialized",
     failure DMA_ID_NOT_EXISTS    "The DMA transfer with that ID does not exist ",
+    failure DMA_NOT_SUPPORTED    "The DMA transfer is not supported.",
     failure DMA_BUSY             "All DMA channels are busy",
     failure DMA_IDLE             "All DMA channels are idle",
     failure DMA_MEM_REGISTERED   "The memory has not been registered",
index 0c5efdb..769dda9 100644 (file)
@@ -847,6 +847,11 @@ errval_t xeon_phi_dma_client_start(uint8_t xphi_id,
 
     xdma_rpc_wait_done(xphi_id);
 
+    if (err_is_fail(msg_st->err)) {
+        xdma_insert_free_request(req);
+        return msg_st->err;
+    }
+
     req->id = msg_st->id;
 
     xdma_insert_pending_request(req);
index 8e7a546..5932e29 100644 (file)
@@ -16,8 +16,8 @@
  */
 #define XDEBUG_ENABLED   1
 #define XDEBUG_BOOT      1
-#define XDEBUG_DMA       1
-#define XDEBUG_DMA_V     1
+#define XDEBUG_DMA       0
+#define XDEBUG_DMA_V     0
 #define XDEBUG_INT       1
 #define XDEBUG_SMPT      1
 #define XDEBUG_SERVICE   1
@@ -48,6 +48,7 @@
 #endif
 #else
 #define XDMA_DEBUG(x...)
+#define XDMAV_DEBUG(x...)
 #endif
 
 #if XDEBUG_INT
index 471ee03..1c02ec7 100644 (file)
@@ -248,17 +248,24 @@ struct dma_exec_resp_st
     struct xeon_phi_dma_binding *b;
     xeon_phi_dma_id_t id;
     errval_t err;
+    volatile uint8_t sent;
 };
 
 struct dma_exec_resp_st exec_resp_err;
 
+static void dma_exec_response_sent(void *a)
+{
+    struct dma_exec_resp_st *st = a;
+    st->sent = 0x1;
+}
+
 static void dma_exec_response_tx(void *a)
 {
     errval_t err;
 
     struct dma_exec_resp_st *st = a;
 
-    struct event_closure txcont = MKCONT(free, a);
+    struct event_closure txcont = MKCONT(dma_exec_response_sent, a);
 
     err = xeon_phi_dma_exec_response__tx(st->b, txcont, st->err, st->id);
     if (err_is_fail(err)) {
@@ -282,14 +289,15 @@ static void dma_exec_call_rx(struct xeon_phi_dma_binding *_binding,
     XDMAV_DEBUG("memcopy request [0x%016lx]->[0x%016lx] of size 0x%lx\n",
                            src, dst, length);
 
-    struct dma_exec_resp_st *st = malloc(sizeof(struct dma_exec_resp_st));
-    assert(st);
-    st->b = _binding;
+    struct dma_exec_resp_st st;
+
+    st.b = _binding;
+    st.sent = 0x0;
     lpaddr_t dma_src = xdma_mem_verify(_binding, src, length);
     lpaddr_t dma_dst = xdma_mem_verify(_binding, dst, length);
     if (!dma_src || !dma_dst) {
-        st->err = XEON_PHI_ERR_DMA_MEM_REGISTERED;
-        st->id = 0;
+        st.err = XEON_PHI_ERR_DMA_MEM_REGISTERED;
+        st.id = 0;
 #ifdef XDEBUG_DMA
         if (!dma_src) {
             XDMA_DEBUG("Memory range not registered: [0x%016lx] [0x%016lx]\n",
@@ -301,7 +309,17 @@ static void dma_exec_call_rx(struct xeon_phi_dma_binding *_binding,
         }
 #endif
 
-        dma_exec_response_tx(st);
+        dma_exec_response_tx(&st);
+        return;
+    }
+
+    /*
+     * DMA transfers from host to host are not supported.
+     */
+    if (dma_src > XEON_PHI_SYSMEM_BASE && dma_dst > XEON_PHI_SYSMEM_BASE) {
+        st.err = XEON_PHI_ERR_DMA_NOT_SUPPORTED;
+        st.id = 0;
+        dma_exec_response_tx(&st);
         return;
     }
 
@@ -313,13 +331,23 @@ static void dma_exec_call_rx(struct xeon_phi_dma_binding *_binding,
     setup.info.mem.src = dma_src;
     setup.info.mem.dst = dma_dst;
     setup.info.mem.bytes = length;
-    setup.info.mem.dma_id = &st->id;
+    setup.info.mem.dma_id = &st.id;
 
     struct xeon_phi *phi = xdma_mem_get_phi(_binding);
 
-    st->err = dma_do_request(phi, &setup);
+    st.err = dma_do_request(phi, &setup);
 
-    dma_exec_response_tx(st);
+    dma_exec_response_tx(&st);
+
+    /*
+     * XXX: we must wait until the message has been sent, otherwise we may
+     *      trigger sending the done message when we poll in the main message
+     *      loop. This causes the client library to receive a done message
+     *      of an invalid id.
+     */
+    while(!st.sent) {
+        messages_wait_and_handle_next();
+    }
 }
 
 /*
index 84708bb..97b110e 100644 (file)
@@ -9,6 +9,7 @@
 #include <stdio.h>
 #include <string.h>
 #include <stdlib.h>
+#include <limits.h>
 
 #include <barrelfish/barrelfish.h>
 #include <barrelfish/ump_chan.h>
@@ -77,7 +78,7 @@ errval_t xphi_bench_memwrite(void *target)
 
     bench_init();
 
-    cycles_t tsc_start;
+    cycles_t tsc_start, tsc_end;
     cycles_t result[4];
     uint64_t tscperus;
     bench_ctl_t *ctl;
@@ -98,26 +99,41 @@ errval_t xphi_bench_memwrite(void *target)
                      XPHI_BENCH_BUF_FRAME_SIZE);
 
         /* using memset */
-        tsc_start = rdtsc();
+        tsc_start = bench_tsc();
         memset(target, 0, XPHI_BENCH_BUF_FRAME_SIZE);
-        result[0] = rdtsc() - tsc_start - bench_tscoverhead();
+        tsc_end = bench_tsc();
+        if (tsc_end < tsc_start) {
+            result[0] = (LONG_MAX - tsc_start) + tsc_end - bench_tscoverhead();
+        } else {
+            result[0] = (tsc_end - tsc_start - bench_tscoverhead());
+        }
 
         /* writing in a loop*/
         volatile uint8_t *buf = target;
-        tsc_start = rdtsc();
+        tsc_start = bench_tsc();
         for (uint32_t i = 0; i < XPHI_BENCH_BUF_FRAME_SIZE; ++i) {
             buf[i] = (uint8_t) 1;
         }
-        result[1] = rdtsc() - tsc_start - bench_tscoverhead();
+        tsc_end = bench_tsc();
+        if (tsc_end < tsc_start) {
+            result[1] = (LONG_MAX - tsc_start) + tsc_end - bench_tscoverhead();
+        } else {
+            result[1] = (tsc_end - tsc_start - bench_tscoverhead());
+        }
 
         /* reading in a while loop */
         buf = target;
         buf[XPHI_BENCH_BUF_FRAME_SIZE - 1] = 0;
-        tsc_start = rdtsc();
+        tsc_start = bench_tsc();
         while (*(buf++))
             ;
 
-        result[2] = rdtsc() - tsc_start - bench_tscoverhead();
+        tsc_end = bench_tsc();
+        if (tsc_end < tsc_start) {
+            result[2] = (LONG_MAX - tsc_start) + tsc_end - bench_tscoverhead();
+        } else {
+            result[2] = (tsc_end - tsc_start - bench_tscoverhead());
+        }
 
     } while (!bench_ctl_add_run(ctl, result));
 
@@ -153,7 +169,7 @@ errval_t xphi_bench_memcpy(void *dst,
 
     bench_init();
 
-    cycles_t tsc_start;
+    cycles_t tsc_start, tsc_end;
     cycles_t result[4];
     uint64_t tscperus;
     bench_ctl_t *ctl;
@@ -174,18 +190,28 @@ errval_t xphi_bench_memcpy(void *dst,
                      size);
 
         /* using memset */
-        tsc_start = rdtsc();
+        tsc_start = bench_tsc();
         memcpy(dst, src, size);
-        result[0] = rdtsc() - tsc_start - bench_tscoverhead();
+        tsc_end = bench_tsc();
+        if (tsc_end < tsc_start) {
+            result[0] = (LONG_MAX - tsc_start) + tsc_end - bench_tscoverhead();
+        } else {
+            result[0] = (tsc_end - tsc_start - bench_tscoverhead());
+        }
 
         /* writing in a loop*/
         volatile uint64_t *bsrc = src;
         volatile uint64_t *bdst = dst;
-        tsc_start = rdtsc();
+        tsc_start = bench_tsc();
         for (uint32_t i = 0; i < size / sizeof(uint64_t); ++i) {
             bdst[i] = bsrc[i];
         }
-        result[1] = rdtsc() - tsc_start - bench_tscoverhead();
+        tsc_end = bench_tsc();
+        if (tsc_end < tsc_start) {
+            result[1] = (LONG_MAX - tsc_start) + tsc_end - bench_tscoverhead();
+        } else {
+            result[1] = (tsc_end - tsc_start - bench_tscoverhead());
+        }
 
         struct xeon_phi_dma_info info = {
             .src = psrc,
@@ -200,7 +226,7 @@ errval_t xphi_bench_memcpy(void *dst,
 
         /* reading in a while loop */
         dma_done = 0x0;
-        tsc_start = rdtsc();
+        tsc_start = bench_tsc();
         err = xeon_phi_dma_client_start(0, &info, cont, NULL);
         if (err_is_fail(err)) {
             USER_PANIC_ERR(err, "could not exec the transfer");
@@ -208,7 +234,12 @@ errval_t xphi_bench_memcpy(void *dst,
         while (!dma_done) {
             messages_wait_and_handle_next();
         }
-        result[2] = rdtsc() - tsc_start - bench_tscoverhead();
+        tsc_end = bench_tsc();
+        if (tsc_end < tsc_start) {
+            result[2] = (LONG_MAX - tsc_start) + tsc_end - bench_tscoverhead();
+        } else {
+            result[2] = (tsc_end - tsc_start - bench_tscoverhead());
+        }
 
     } while (!bench_ctl_add_run(ctl, result));
 
@@ -316,9 +347,8 @@ errval_t xphi_bench_start_initator_rtt(struct bench_bufs *bufs,
 
     debug_printf("tscperus = %lu\n", tscperus);
 
-    ctl = bench_ctl_init(BENCH_MODE_FIXEDRUNS,
-                         1,
-                         XPHI_BENCH_NUM_REPS * XPHI_BENCH_NUM_RUNS);
+    ctl = bench_ctl_init(BENCH_MODE_FIXEDRUNS, 1,
+    XPHI_BENCH_NUM_REPS * XPHI_BENCH_NUM_RUNS);
 
     debug_printf("waiting for ready signal\n");
     while (1) {
@@ -334,18 +364,22 @@ errval_t xphi_bench_start_initator_rtt(struct bench_bufs *bufs,
     uint32_t rep_counter = 0;
     do {
         if (!(rep_counter++ % XPHI_BENCH_NUM_RUNS)) {
-            debug_printf("  > run %u of %u...\n",
-                         rep_counter,
-                         XPHI_BENCH_NUM_REPS * XPHI_BENCH_NUM_RUNS);
+            debug_printf("  > run %u of %u...\n", rep_counter,
+            XPHI_BENCH_NUM_REPS * XPHI_BENCH_NUM_RUNS);
         }
-        tsc_start = rdtsc();
+        tsc_start = bench_tsc();
         msg = ump_chan_get_next(uc, &ctrl);
         msg->header.control = ctrl;
         do {
             err = ump_chan_recv(uc, &msg);
         } while (err_is_fail(err));
-        result = rdtsc();
-        result = (result - tsc_start - bench_tscoverhead());
+        result = bench_tsc();
+        if (result < tsc_start) {
+            result = (LONG_MAX - tsc_start) + result - bench_tscoverhead();
+        } else {
+            result = (result - tsc_start - bench_tscoverhead());
+        }
+
     } while (!bench_ctl_add_run(ctl, &result));
 
 #ifdef XPHI_BENCH_CHECK_STOP
@@ -357,7 +391,7 @@ errval_t xphi_bench_start_initator_rtt(struct bench_bufs *bufs,
     double avg_s = bench_avg(ctl->data, ctl->result_count) / tscperus;
     avg_s /= 1000000;
     xphi_bench_print_settings();
-    // bench_ctl_dump_csv(ctl, "", tscperus);
+// bench_ctl_dump_csv(ctl, "", tscperus);
     bench_ctl_dump_analysis(ctl, 0, "RTT", tscperus);
 
     return SYS_ERR_OK;
@@ -407,7 +441,7 @@ errval_t xphi_bench_start_initator_sync(struct bench_bufs *bufs,
         XPHI_BENCH_NUM_REPS,
                      XPHI_BENCH_NUM_RUNS);
 
-        tsc_start = rdtsc();
+        tsc_start = bench_tsc();
 
         msg = ump_chan_get_next(uc, &ctrl);
         struct bench_buf *buf = &bufs->buf[b_idx];
@@ -454,9 +488,12 @@ errval_t xphi_bench_start_initator_sync(struct bench_bufs *bufs,
                 n_recv++;
             }
         }
-        result = rdtsc();
-        result = result - tsc_start - bench_tscoverhead();
-
+        result = bench_tsc();
+        if (result < tsc_start) {
+            result = (LONG_MAX - tsc_start) + result - bench_tscoverhead();
+        } else {
+            result = (result - tsc_start - bench_tscoverhead());
+        }
     } while (!bench_ctl_add_run(ctl, &result));
 
 #ifdef XPHI_BENCH_CHECK_STOP
@@ -468,7 +505,7 @@ errval_t xphi_bench_start_initator_sync(struct bench_bufs *bufs,
     double avg_s = bench_avg(ctl->data, ctl->result_count) / tscperus;
     avg_s /= 1000000;
     xphi_bench_print_settings();
-    // bench_ctl_dump_csv(ctl, "", tscperus);
+// bench_ctl_dump_csv(ctl, "", tscperus);
     bench_ctl_dump_analysis(ctl, 0, "Sync Throughput", tscperus);
     printf("Average seconds: %f\n", avg_s);
     printf("Average throughput: %f GByte/s\n",
@@ -526,7 +563,7 @@ errval_t xphi_bench_start_initator_async(struct bench_bufs *bufs,
         debug_printf("  > run %u of %u with %u moves...\n", rep_counter++,
         XPHI_BENCH_NUM_REPS,
                      XPHI_BENCH_NUM_RUNS);
-        tsc_start = rdtsc();
+        tsc_start = bench_tsc();
 
         uint32_t irun = 0;
         uint32_t n_recv = 0;
@@ -572,8 +609,15 @@ errval_t xphi_bench_start_initator_async(struct bench_bufs *bufs,
             }
         }
 
-        result = rdtsc();
-        result = result - tsc_start - bench_tscoverhead();
+        result = bench_tsc();
+        if (result-tsc_start > bench_tscoverhead()) {
+            debug_printf("%lu %lu", result-tsc_start, bench_tscoverhead());
+        }
+        if (result < tsc_start) {
+            result = (LONG_MAX - tsc_start) + result - bench_tscoverhead();
+        } else {
+            result = (result - tsc_start - bench_tscoverhead());
+        }
 
         assert(in_transit == 0);
     } while (!bench_ctl_add_run(ctl, &result));
@@ -587,7 +631,7 @@ errval_t xphi_bench_start_initator_async(struct bench_bufs *bufs,
     double avg_s = bench_avg(ctl->data, ctl->result_count) / tscperus;
     avg_s /= 1000000;
     xphi_bench_print_settings();
-    // bench_ctl_dump_csv(ctl, "", tscperus);
+// bench_ctl_dump_csv(ctl, "", tscperus);
     bench_ctl_dump_analysis(ctl, 0, "ASync Throughput", tscperus);
     printf("Average seconds: %f\n", avg_s);
     printf("Average throughput: %f GByte/s\n",
index 3d7a7da..e906b4a 100644 (file)
@@ -183,11 +183,11 @@ int main(int argc,
     debug_printf("Initializing UMP channel...\n");
 
     err = ump_chan_init(&uc, inbuf,
-    XPHI_BENCH_MSG_FRAME_SIZE,
+                        XPHI_BENCH_MSG_FRAME_SIZE,
                         outbuf,
                         XPHI_BENCH_MSG_FRAME_SIZE);
     err = ump_chan_init(&uc_rev, inbuf_rev,
-    XPHI_BENCH_MSG_FRAME_SIZE,
+                        XPHI_BENCH_MSG_FRAME_SIZE,
                         outbuf_rev,
                         XPHI_BENCH_MSG_FRAME_SIZE);
 
@@ -256,7 +256,7 @@ int main(int argc,
                     host_base + 2* XPHI_BENCH_MSG_FRAME_SIZE);
     debug_printf("---------- host -> card ---------\n");
     xphi_bench_memcpy(host_buf + 2* XPHI_BENCH_MSG_FRAME_SIZE,
-                       card_buf  + 2* XPHI_BENCH_MSG_FRAME_SIZE,
+                    card_buf + 2* XPHI_BENCH_MSG_FRAME_SIZE,
                     XPHI_BENCH_BUF_FRAME_SIZE / 2,
                     host_base + 2* XPHI_BENCH_MSG_FRAME_SIZE,
                     card_base + 2* XPHI_BENCH_MSG_FRAME_SIZE);
index 353650f..b0d72f6 100644 (file)
@@ -261,9 +261,6 @@ int main(int argc,
         USER_PANIC_ERR(err, "could not register memory");
     }
 
-#undef XPHI_BENCH_BUF_FRAME_SIZE
-#define XPHI_BENCH_BUF_FRAME_SIZE 0x2000
-
     debug_printf("---------- card -> host ---------\n");
     xphi_bench_memcpy(card_buf + 2* XPHI_BENCH_MSG_FRAME_SIZE,
                     host_buf + 2* XPHI_BENCH_MSG_FRAME_SIZE,