2 * Copyright (c) 2007-2011, 2013, 2014, ETH Zurich.
5 * This file is distributed under the terms in the attached LICENSE file.
6 * If you do not find this file, copies can be found by writing to:
7 * ETH Zurich D-INFK, Haldeneggsteig 4, CH-8092 Zurich. Attn: Systems Group.
16 #include <net_device_manager/net_device_manager.h>
18 #include <barrelfish/nameservice_client.h>
19 #include <barrelfish/debug.h>
20 #include <barrelfish/deferred.h>
23 # include <netif/e1000.h>
25 #include <arpa/inet.h>
27 #include <if/e10k_defs.h>
28 #include <if/e10k_vf_defs.h>
29 #include <if/net_filter_defs.h>
30 #include <dev/e10k_dev.h>
36 //#define VTON_DCBOFF TODO use if VFs are enabled
39 //#define DEBUG(x...) printf("e10k: " x)
40 #define DEBUG(x...) do {} while (0)
47 struct e10k_binding *binding;
48 struct e10k_vf_binding *devif;
50 struct capref tx_frame;
51 struct capref txhwb_frame;
52 struct capref rx_frame;
77 MASK_L4PROTO = (1 << 0),
78 MASK_SRCIP = (1 << 1),
79 MASK_DSTIP = (1 << 2),
80 MASK_SRCPORT = (1 << 3),
81 MASK_DSTPORT = (1 << 4),
103 static union macentry mactable[128] = {
104 { .as8 = "\x0\x0\x0\x0\x0\x0" }, // First MAC is never set (loaded from card EEPROM)
106 { .as8 = "\x22\xc9\xfc\x96\x83\xfc" },
107 { .as8 = "\xce\x43\x5b\xf7\x3e\x60" },
108 { .as8 = "\x6a\xb0\x62\xf6\xa7\x21" },
109 { .as8 = "\xb2\xdf\xf9\x39\xc6\x10" },
110 { .as8 = "\x92\x77\xe7\x3f\x80\x30" },
111 { .as8 = "\xd6\x88\xd6\x86\x4a\x22" },
112 { .as8 = "\x7e\x64\xe9\x2e\xbe\x4b" },
113 { .as8 = "\xba\xac\x49\xd6\x3c\x77" },
115 // We set the rest to all zeroes
117 // Last MAC (127) never set (loaded from card EEPROM ... at least, it's already there)
120 static uint16_t credit_refill[128];
121 static uint32_t tx_rate[128];
123 // Hack for monolithic driver
124 void qd_main(void) __attribute__((weak));
125 void qd_argument(const char *arg) __attribute__((weak));
126 void qd_interrupt(bool is_rx, bool is_tx) __attribute__((weak));
127 void qd_queue_init_data(struct e10k_binding *b, struct capref registers,
128 uint64_t macaddr) __attribute__((weak));
129 void qd_queue_memory_registered(struct e10k_binding *b) __attribute__((weak));
130 void qd_write_queue_tails(struct e10k_binding *b) __attribute__((weak));
133 void cd_request_device_info(struct e10k_binding *b);
134 void cd_register_queue_memory(struct e10k_binding *b,
136 struct capref tx_frame,
137 struct capref txhwb_frame,
138 struct capref rx_frame,
142 uint8_t msix_intdest,
148 void cd_set_interrupt_rate(struct e10k_binding *b,
153 static void idc_write_queue_tails(struct e10k_binding *b);
154 static void stop_device(void);
156 static void device_init(void);
157 static void queue_hw_init(uint8_t n, bool set_tail);
158 //static void queue_hw_stop(uint8_t n);
159 static void interrupt_handler_msix(void* arg);
160 //static void interrupt_handler_msix_b(void* arg);
162 static void e10k_flt_ftqf_setup(int index, struct e10k_filter *filter);
163 //static void e10k_flt_etype_setup(int filter, int queue, uint16_t etype);
167 static const char *service_name = "e10k";
168 static int initialized = 0;
169 static bool exported = false;
170 static e10k_t *d = NULL;
171 static struct capref *regframe;
173 static bool use_interrupts = true;
174 static bool msix = false;
176 /** Specifies if RX/TX is currently enabled on the device. */
177 static bool rxtx_enabled = false;
179 // Management of MSI-X vectors
180 static struct bmallocator msix_alloc;
181 /** MSI-X vector used by cdriver */
182 static size_t cdriver_msix = -1;
183 static uint8_t cdriver_vector;
186 // State of queues and filters
187 static struct queue_state queues[128];
188 static struct e10k_filter filters[128];
190 static char buf[4096];
192 /* PCI device address passed on command line */
193 static uint32_t pci_bus = PCI_DONT_CARE;
194 static uint32_t pci_device = PCI_DONT_CARE;
195 static uint32_t pci_function = 0;
196 static uint32_t pci_deviceid = E10K_PCI_DEVID;
198 /* VFs alloacation data*/
199 static bool vf_used[63];
201 static void e10k_flt_ftqf_setup(int idx, struct e10k_filter* filter)
203 uint16_t m = filter->mask;
205 e10k_ftqf_t ftqf = 0;
206 e10k_l34timir_t timir = 0;
207 e10k_sdpqf_t sdpqf = 0;
211 if (!(m & MASK_SRCIP)) {
212 DEBUG("src_ip=%"PRIx32" ", filter->src_ip);
213 e10k_saqf_wr(d, idx, htonl(filter->src_ip));
216 if (!(m & MASK_DSTIP)) {
217 DEBUG("dst_ip=%"PRIx32" ", filter->dst_ip);
218 e10k_daqf_wr(d, idx, htonl(filter->dst_ip));
221 if (!(m & MASK_SRCPORT)) {
222 DEBUG("src_port=%d ", filter->src_port);
223 sdpqf = e10k_sdpqf_src_port_insert(sdpqf, htons(filter->src_port));
226 if (!(m & MASK_DSTPORT)) {
227 DEBUG("dst_port=%d ", filter->dst_port);
228 sdpqf = e10k_sdpqf_dst_port_insert(sdpqf, htons(filter->dst_port));
230 e10k_sdpqf_wr(d, idx, sdpqf);
231 DEBUG("queue_id=%d \n", filter->queue);
233 if (!(m & MASK_L4PROTO)) {
234 switch (filter->l4_type) {
235 case L4_OTHER: p = e10k_l4other; break;
236 case L4_UDP: p = e10k_l4udp; break;
237 case L4_TCP: p = e10k_l4tcp; break;
238 case L4_SCTP: p = e10k_l4sctp; break;
239 default: assert(0); return;
241 ftqf = e10k_ftqf_protocol_insert(ftqf, p);
245 ftqf = e10k_ftqf_m_srcaddr_insert(ftqf, !!(m & MASK_SRCIP));
246 ftqf = e10k_ftqf_m_dstaddr_insert(ftqf, !!(m & MASK_DSTIP));
247 ftqf = e10k_ftqf_m_srcport_insert(ftqf, !!(m & MASK_SRCPORT));
248 ftqf = e10k_ftqf_m_dstport_insert(ftqf, !!(m & MASK_DSTPORT));
249 ftqf = e10k_ftqf_m_protocol_insert(ftqf, !!(m & MASK_L4PROTO));
252 // Configure destination queue and enable filter
253 timir = e10k_l34timir_rx_queue_insert(timir, filter->queue);
254 e10k_l34timir_wr(d, idx, timir);
256 ftqf = e10k_ftqf_priority_insert(ftqf, filter->priority);
257 ftqf = e10k_ftqf_pool_mask_insert(ftqf, 1);
258 ftqf = e10k_ftqf_queue_en_insert(ftqf, 1);
259 e10k_ftqf_wr(d, idx, ftqf);
263 static int ftqf_index = 0;
264 static int ftqf_alloc(void)
266 // FIXME: Do this reasonably
270 static errval_t reg_ftfq_filter(struct e10k_filter* f, uint64_t* fid)
274 DEBUG("reg_ftfq_filter: called\n");
276 if ((i = ftqf_alloc()) < 0) {
277 return FILTER_ERR_NOT_ENOUGH_MEMORY;
282 filters[i].enabled = true;
284 e10k_flt_ftqf_setup(i, f);
293 /****************************************************************************/
294 /* Net filter interface implementation */
295 /****************************************************************************/
298 static errval_t cb_install_filter(struct net_filter_binding *b,
299 net_filter_filter_type_t type,
309 struct e10k_filter f = {
310 .dst_port = dst_port,
311 .src_port = src_port,
314 .l4_type = (type == net_filter_PORT_TCP ? L4_TCP : L4_UDP),
320 f.mask = f.mask | MASK_SRCIP;
324 f.mask = f.mask | MASK_DSTIP;
327 f.mask = f.mask | MASK_DSTPORT;
331 f.mask = f.mask | MASK_SRCPORT;
336 err = reg_ftfq_filter(&f, fid);
337 DEBUG("filter registered: err=%s, fid=%"PRIu64"\n", err_getstring(err), *fid);
342 static errval_t cb_remove_filter(struct net_filter_binding *b,
343 net_filter_filter_type_t type,
347 if ((type == net_filter_PORT_UDP || type == net_filter_PORT_TCP)){
351 *err = NET_FILTER_ERR_NOT_FOUND;
354 DEBUG("unregister_filter: called (%"PRIx64")\n", filter_id);
358 static struct net_filter_rpc_rx_vtbl net_filter_rpc_rx_vtbl = {
359 .install_filter_ip_call = cb_install_filter,
360 .remove_filter_call = cb_remove_filter,
361 .install_filter_mac_call = NULL,
364 static void net_filter_export_cb(void *st, errval_t err, iref_t iref)
367 printf("exported net filter interface\n");
368 err = nameservice_register("net_filter_e10k", iref);
369 assert(err_is_ok(err));
370 DEBUG("Net filter interface exported\n");
374 static errval_t net_filter_connect_cb(void *st, struct net_filter_binding *b)
376 printf("New connection on net filter interface\n");
377 b->rpc_rx_vtbl = net_filter_rpc_rx_vtbl;
383 static void e10k_flt_etype_setup(int filter, int queue, uint16_t etype)
385 // Clear existing values
386 e10k_etqf_wr(d, filter, 0x0);
387 e10k_etqs_wr(d, filter, 0x0);
389 e10k_etqs_rx_queue_wrf(d, filter, queue);
390 e10k_etqs_queue_en_wrf(d, filter, 1);
392 e10k_etqf_etype_wrf(d, filter, etype);
393 e10k_etqf_filter_en_wrf(d, filter, 1);
397 static errval_t arp_filter(uint64_t qid, uint64_t* fid)
399 e10k_flt_etype_setup(0, (int) qid, 0x0806);
401 DEBUG("reg_arp_filter: called\n");
405 static errval_t reg_ftfq_filter(struct e10k_filter* f, uint64_t* fid)
409 DEBUG("reg_ftfq_filter: called\n");
411 if ((i = ftqf_alloc()) < 0) {
412 return ETHERSRV_ERR_NOT_ENOUGH_MEM;
417 filters[i].enabled = true;
419 e10k_flt_ftqf_setup(i, f);
426 static errval_t ipv4_tcp_port(uint64_t qid, uint16_t port, uint64_t* fid)
428 struct e10k_filter f = {
430 .mask = MASK_SRCIP | MASK_DSTIP | MASK_SRCPORT,
436 DEBUG("ipv4_tcp_port: called\n");
437 return reg_ftfq_filter(&f, fid);
440 static errval_t ipv4_udp_port(uint64_t qid, uint16_t port, uint64_t* fid)
442 struct e10k_filter f = {
444 .mask = MASK_SRCIP | MASK_DSTIP | MASK_SRCPORT,
450 DEBUG("ipv4_udp_port: called\n");
451 return reg_ftfq_filter( &f, fid);
454 static errval_t ipv4_tcp_conn(uint64_t qid,
455 uint32_t l_ip, uint16_t l_port,
456 uint32_t r_ip, uint16_t r_port,
459 struct e10k_filter f = {
470 DEBUG("ipv4_tcp_conn: called\n");
471 return reg_ftfq_filter(&f, fid);
474 static errval_t deregister_filter(uint64_t fid)
476 DEBUG("deregister_filter: called\n");
477 return LIB_ERR_NOT_IMPLEMENTED;
483 /** Enable RX operation for whole card. */
484 static void rx_enable(void)
486 e10k_secrxctrl_rx_dis_wrf(d, 1);
487 while (e10k_secrxstat_sr_rdy_rdf(d) == 0); // TODO: Timeout
488 e10k_rxctrl_rxen_wrf(d, 1);
489 e10k_secrxctrl_rx_dis_wrf(d, 0);
492 /** Disable RX operation for whole card. */
493 static void rx_disable(void)
495 e10k_secrxctrl_rx_dis_wrf(d, 1);
496 while (e10k_secrxstat_sr_rdy_rdf(d) == 0); // TODO: Timeout
497 e10k_rxctrl_rxen_wrf(d, 0);
498 e10k_secrxctrl_rx_dis_wrf(d, 0);
501 /** Enable TX operation for whole card. */
502 static void tx_enable(void)
504 e10k_dmatxctl_txen_wrf(d, 1);
507 /** Disable TX operation for whole card. */
508 static void tx_disable(void)
510 e10k_dmatxctl_txen_wrf(d, 0);
511 while (e10k_dmatxctl_txen_rdf(d) != 0); // TODO: timeout
515 static void setup_interrupt(size_t *msix_index, uint8_t core, uint8_t vector)
521 res = bmallocator_alloc(&msix_alloc, msix_index);
524 err = get_apicid_from_core(core, &dest);
525 assert(err_is_ok(err));
527 err = pci_msix_vector_init(*msix_index, dest, vector);
528 assert(err_is_ok(err));
530 DEBUG("e10k: MSI-X vector setup index=%"PRIx64", core=%d apic=%d swvec=%x\n",
531 *msix_index, core, dest, vector);
535 * Initialize hardware registers.
536 * Is also called after a reset of the device.
538 static void device_init(void)
544 bool initialized_before = initialized;
550 if (initialized_before) {
551 // Save queue heads and tails
552 for (i = 0; i < 128; i++) {
553 if (queues[i].enabled) {
554 queues[i].tx_head = e10k_tdh_rd(d, i);
556 queues[i].rx_head = e10k_rdh_1_rd(d, i);
558 queues[i].rx_head = e10k_rdh_2_rd(d, i - 64);
564 // Make a double reset to be sure
565 for (i = 0; i < 2; i++) {
566 // Issue Global reset
567 ctrl = e10k_ctrl_rd(d);
568 ctrl = e10k_ctrl_lrst_insert(ctrl, 1);
569 ctrl = e10k_ctrl_rst_insert(ctrl, 1);
570 e10k_ctrl_wr(d, ctrl);
571 while ((e10k_ctrl_rst_rdf(d) != 0) ||
572 (e10k_ctrl_lrst_rdf(d) != 0)); // TODO: Timeout
574 // Spec says 10, fbsd driver 50
577 DEBUG("Global reset done\n");
579 // Disable interrupts
580 e10k_eimc_cause_wrf(d, 0x7FFFFFFF);
583 // Let firmware know that we have taken over
584 e10k_ctrl_ext_drv_load_wrf(d, 1);
586 // NO Snoop disable (from FBSD)
587 // Without this, the driver only works on sbrinz1 if the receive buffers are
588 // mapped non cacheable. If the buffers are mapped cacheable, sometimes we
589 // seem to read old buffer contents, not sure exactly why, as far as
590 // understood this, No snoop should only be enabled by the device if it is
592 // TODO: Also check performance implications of this on gottardo and other
593 // machnies where it works without this.
594 e10k_ctrl_ext_ns_dis_wrf(d, 1);
596 // Initialize flow-control registers
597 for (i = 0; i < 8; i++) {
598 if (i < 4) e10k_fcttv_wr(d, i, 0x0);
599 e10k_fcrtl_wr(d, i, 0x0);
600 e10k_fcrth_wr(d, i, 0x0);
602 e10k_fcrtv_wr(d, 0x0);
603 e10k_fccfg_wr(d, 0x0);
608 // Wait for EEPROM auto read
609 while (e10k_eec_auto_rd_rdf(d) == 0); // TODO: Timeout
610 DEBUG("EEPROM auto read done\n");
612 // Wait for DMA initialization
613 while (e10k_rdrxctl_dma_initok_rdf(d) == 0); // TODO: Timeout
615 // Wait for link to come up
616 while (e10k_links_lnk_up_rdf(d) == 0); // TODO: Timeout
620 // Initialize interrupts
621 e10k_eicr_wr(d, 0xffffffff);
623 // Switch to MSI-X mode
624 e10k_gpie_msix_wrf(d, 1);
625 e10k_gpie_pba_sup_wrf(d, 1);
626 e10k_gpie_ocd_wrf(d, 1);
628 // Allocate msix vector for cdriver and set up handler
629 if (cdriver_msix == -1) {
630 err = pci_setup_inthandler(interrupt_handler_msix, NULL, &cdriver_vector);
631 assert(err_is_ok(err));
633 setup_interrupt(&cdriver_msix, disp_get_core_id(), cdriver_vector);
636 // Map management interrupts to our vector
637 e10k_ivar_misc_i_alloc0_wrf(d, cdriver_msix);
638 e10k_ivar_misc_i_alloc1_wrf(d, cdriver_msix);
639 e10k_ivar_misc_i_allocval0_wrf(d, 1);
640 e10k_ivar_misc_i_allocval1_wrf(d, 1);
642 // Enable auto masking of interrupt
643 e10k_gpie_eiame_wrf(d, 1);
644 e10k_eiamn_wr(d, cdriver_msix / 32, (1 << (cdriver_msix % 32)));
646 // Set no interrupt delay
647 e10k_eitr_l_wr(d, cdriver_msix, 0);
648 e10k_gpie_eimen_wrf(d, 1);
651 e10k_eimsn_wr(d, cdriver_msix / 32, (1 << (cdriver_msix % 32)));
653 e10k_gpie_msix_wrf(d, 0);
654 // Set no Interrupt delay
655 e10k_eitr_l_wr(d, 0, 0);
656 e10k_gpie_eimen_wrf(d, 1);
658 // Enable all interrupts
659 e10k_eimc_wr(d, e10k_eims_rd(d));
660 e10k_eims_cause_wrf(d, 0x7fffffff);
663 // Just a guess for RSC delay
664 e10k_gpie_rsc_delay_wrf(d, 2);
666 // Initialize multiple register tables (MAC 0 and 127 are not set)
667 for (i = 0; i < 128; i++) {
668 /* uint64_t mac = e10k_ral_ral_rdf(d, i) | ((uint64_t) e10k_rah_rah_rdf(d, i) << 32); */
669 /* uint8_t *m = (uint8_t *)&mac; */
670 /* DEBUG("Old MAC %d: %02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx ... mac valid = %x\n", */
671 /* i, m[0], m[1], m[2], m[3], m[4], m[5], e10k_rah_av_rdf(d, 0)); */
673 if(i > 0 && i < 127) {
674 e10k_ral_wr(d, i, mactable[i].as64 & 0xffffffff);
675 e10k_rah_wr(d, i, mactable[i].as64 >> 32);
676 e10k_rah_av_wrf(d, i, 1);
678 /* mac = e10k_ral_ral_rdf(d, i) | ((uint64_t) e10k_rah_rah_rdf(d, i) << 32); */
679 /* m = (uint8_t *)&mac; */
680 /* DEBUG("New MAC %d: %02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx ... mac valid = %x\n", */
681 /* i, m[0], m[1], m[2], m[3], m[4], m[5], e10k_rah_av_rdf(d, 0)); */
684 for (i = 0; i < 128; i++)
685 e10k_mta_bit_vec_wrf(d, i, 0);
686 for (i = 0; i < 128; i++)
687 e10k_vfta_vlan_flt_wrf(d, i, 0);
688 for (i = 0; i < 128; i++)
689 e10k_pfvlvfb_wr(d, i, 0);
691 for (i = 0; i < 64; i++) {
693 e10k_pfvlvf_vi_en_wrf(d, i, 1);
695 e10k_pfvlvf_vi_en_wrf(d, i, 0);
697 e10k_psrtype_wr(d, i, 0);
699 for (i = 0; i < 128; i++)
700 e10k_pfuta_wr(d, i, 0);
701 for (i = 0; i < 256; i++)
702 e10k_mpsar_pool_ena_wrf(d, i, 0);
704 // Program direct match MAC forwarding rules
705 // This setup will assign the first 64 MAC addresses each to a different
706 // RX pool. This assumes we have 64 VFs. The rest is set to filtered.
707 for(i = 0; i < 128; i++) {
709 // Pools < 32 (low bits)
710 e10k_mpsar_pool_ena_wrf(d, 2 * i, 1 << i);
711 e10k_mpsar_pool_ena_wrf(d, 2 * i + 1, 0);
713 // Pools >= 32 and < 64 (high bits)
714 e10k_mpsar_pool_ena_wrf(d, 2 * i, 0);
715 e10k_mpsar_pool_ena_wrf(d, 2 * i + 1, 1 << (i - 32));
717 // Pools >= 64 -> DROP
718 e10k_mpsar_pool_ena_wrf(d, 2 * i, 0);
719 e10k_mpsar_pool_ena_wrf(d, 2 * i + 1, 0);
723 for (i = 0; i < 128; i++) {
724 e10k_fhft_1_wr(d, i, 0);
726 e10k_fhft_2_wr(d, i, 0);
731 // Disallow per-queue RSC (not supported in SR-IOV mode)
732 e10k_rfctl_rsc_dis_wrf(d, 1);
734 // Allow for per-queue RSC
735 e10k_rfctl_rsc_dis_wrf(d, 0);
738 // Initialize RX filters
739 for (i = 0; i < 128; i++) {
740 e10k_ftqf_wr(d, i, 0);
741 e10k_saqf_wr(d, i, 0);
742 e10k_daqf_wr(d, i, 0);
743 e10k_sdpqf_wr(d, i, 0);
745 for (i = 0; i < 32; i++)
746 e10k_reta_wr(d, i, 0);
747 e10k_mcstctrl_mfe_wrf(d, 0);
750 e10k_fctrl_bam_wrf(d, 1);
752 // Enable Jumbo frames
753 e10k_hlreg0_jumboen_wrf(d, 1);
754 e10k_maxfrs_mfs_wrf(d, 15872);
756 // Make sure Rx CRC strip is consistently enabled in HLREG0 and RDRXCTL
757 e10k_hlreg0_rxcrcstrp_wrf(d, 1);
758 // Note: rscfrstsz has to be set to 0 (is mbz)
759 e10k_rdrxctl_t rdrxctl = e10k_rdrxctl_rd(d);
760 rdrxctl = e10k_rdrxctl_crcstrip_insert(rdrxctl, 1);
761 e10k_rdrxctl_wr(d, rdrxctl);
764 // Configure buffers etc. according to specification
765 // Section 4.6.11.3.4 (DCB, virtualization, no RSS)
766 // 1:1 from spec, though not sure if everything is necessary, but since
767 // initialization is still buggy, I'd rather be conservative and set some
768 // additional flags, even if they aren't strictly necessary.
769 e10k_rttdcs_arbdis_wrf(d, 1);
772 e10k_rxpbsize_size_wrf(d, 0, 0x200);
773 e10k_txpbsize_size_wrf(d, 0, 0xA0);
774 e10k_txpbthresh_thresh_wrf(d, 0, 0xA0);
775 for (i = 1; i < 8; i++) {
776 e10k_rxpbsize_size_wrf(d, i, 0x0);
777 e10k_txpbsize_size_wrf(d, i, 0x0);
778 e10k_txpbthresh_thresh_wrf(d, i, 0x0);
781 e10k_mrqc_mrque_wrf(d, e10k_vrt_only);
782 e10k_mtqc_rt_en_wrf(d, 0);
783 e10k_mtqc_vt_en_wrf(d, 1);
784 e10k_mtqc_num_tc_wrf(d, 1);
785 e10k_pfvtctl_vt_en_wrf(d, 1);
787 e10k_rxpbsize_size_wrf(d, 0, 0x200);
788 e10k_txpbsize_size_wrf(d, 0, 0xA0);
789 e10k_txpbthresh_thresh_wrf(d, 0, 0xA0);
790 for (i = 1; i < 8; i++) {
791 e10k_rxpbsize_size_wrf(d, i, 0x0);
792 e10k_txpbsize_size_wrf(d, i, 0x0);
793 e10k_txpbthresh_thresh_wrf(d, i, 0x0);
796 e10k_mrqc_mrque_wrf(d, e10k_no_rss);
797 e10k_mtqc_rt_en_wrf(d, 0);
798 e10k_mtqc_vt_en_wrf(d, 0);
799 e10k_mtqc_num_tc_wrf(d, 0);
800 e10k_pfvtctl_vt_en_wrf(d, 0);
802 e10k_rtrup2tc_wr(d, 0);
803 e10k_rttup2tc_wr(d, 0);
806 e10k_dtxmxszrq_max_bytes_wrf(d, 0xFFF);
808 e10k_dtxmxszrq_max_bytes_wrf(d, 0x010);
811 e10k_rttdcs_arbdis_wrf(d, 0);
813 for (i = 0; i < 128; i++) {
814 pfqde = e10k_pfqde_queue_idx_insert(0x0, i);
815 pfqde = e10k_pfqde_we_insert(pfqde, 1);
816 // XXX: Might want to set drop enable here
817 /* pfqde = e10k_pfqde_qde_insert(pfqde, 1); */
818 e10k_pfqde_wr(d, pfqde);
822 e10k_mflcn_rpfce_wrf(d, 0);
823 e10k_mflcn_rfce_wrf(d, 0);
824 e10k_fccfg_tfce_wrf(d, e10k_lfc_en);
826 e10k_mflcn_rpfce_wrf(d, 1);
827 e10k_mflcn_rfce_wrf(d, 0);
828 e10k_fccfg_tfce_wrf(d, e10k_pfc_en);
831 /* Causes ECC error (could be same problem as with l34timir (see e10k.dev) */
832 for (i = 0; i < 128; i++) {
833 e10k_rttdqsel_txdq_idx_wrf(d, i);
834 e10k_rttdt1c_wr(d, credit_refill[i]); // Credit refill x 64 bytes
835 e10k_rttbcnrc_wr(d, 0);
836 if(tx_rate[i] != 0) {
837 // Turn on rate scheduler for this queue and set rate factor
838 e10k_rttbcnrc_t rttbcnrc = 0;
839 // XXX: Assuming 10Gb/s link speed. Change if that's not correct.
840 uint32_t tx_factor = (10000 << 14) / tx_rate[i];
842 rttbcnrc = e10k_rttbcnrc_rf_dec_insert(rttbcnrc, tx_factor & 0x3fff);
843 rttbcnrc = e10k_rttbcnrc_rf_int_insert(rttbcnrc, tx_factor >> 14);
844 rttbcnrc = e10k_rttbcnrc_rs_ena_insert(rttbcnrc, 1);
845 e10k_rttbcnrc_wr(d, rttbcnrc);
847 printf("Setting rate for queue %d to %u\n", i, tx_rate[i]);
851 for (i = 0; i < 8; i++) {
852 e10k_rttdt2c_wr(d, i, 0);
853 e10k_rttpt2c_wr(d, i, 0);
854 e10k_rtrpt4c_wr(d, i, 0);
858 e10k_rttdcs_tdpac_wrf(d, 0);
859 e10k_rttdcs_vmpac_wrf(d, 1); // Remember to set RTTDT1C >= MTU when this is 1
861 e10k_rttdcs_tdrm_wrf(d, 0);
862 e10k_rttdcs_bdpm_wrf(d, 1);
863 e10k_rttdcs_bpbfsm_wrf(d, 0);
864 e10k_rttpcs_tppac_wrf(d, 0);
865 e10k_rttpcs_tprm_wrf(d, 0);
866 e10k_rttpcs_arbd_wrf(d, 0x224);
867 e10k_rtrpcs_rac_wrf(d, 0);
868 e10k_rtrpcs_rrm_wrf(d, 0);
871 e10k_rttdcs_tdpac_wrf(d, 0);
872 e10k_rttdcs_vmpac_wrf(d, 0);
873 e10k_rttdcs_tdrm_wrf(d, 0);
874 e10k_rttdcs_bdpm_wrf(d, 1);
875 e10k_rttdcs_bpbfsm_wrf(d, 1);
876 e10k_rttpcs_tppac_wrf(d, 0);
877 e10k_rttpcs_tprm_wrf(d, 0);
878 e10k_rttpcs_arbd_wrf(d, 0x224);
879 e10k_rtrpcs_rac_wrf(d, 0);
880 e10k_rtrpcs_rrm_wrf(d, 0);
883 // disable relaxed ordering
884 for (i = 0; i < 128; i++) {
885 e10k_dca_txctrl_txdesc_wbro_wrf(d, i, 0);
887 e10k_dca_rxctrl_1_rxhdr_ro_wrf(d, i, 0);
888 e10k_dca_rxctrl_1_rxdata_wrro_wrf(d, i, 0);
890 e10k_dca_rxctrl_2_rxhdr_ro_wrf(d, i - 64, 0);
891 e10k_dca_rxctrl_2_rxdata_wrro_wrf(d, i - 64, 0);
895 // disable all queues
896 for (i = 0; i < 128; i++) {
897 e10k_txdctl_enable_wrf(d, i, 0);
899 e10k_rxdctl_1_enable_wrf(d, i, 0);
901 e10k_rxdctl_2_enable_wrf(d, i - 64, 0);
905 for(i = 0; i < 64; i++) {
906 e10k_pfvml2flt_mpe_wrf(d, i, 1);
907 e10k_pfvml2flt_bam_wrf(d, i, 1);
908 e10k_pfvml2flt_aupe_wrf(d, i, 1);
912 // Enable DCA (Direct Cache Access)
914 e10k_dca_ctrl_t dca_ctrl = 0;
915 dca_ctrl = e10k_dca_ctrl_dca_mode_insert(dca_ctrl, e10k_dca10);
916 e10k_dca_ctrl_wr(d, dca_ctrl);
919 printf("DCA globally enabled\n");
922 DEBUG("Card initialized (%d)\n", initialized_before);
925 // Restore configuration
926 if (initialized_before) {
928 for (i = 0; i < 128; i++) {
929 if (filters[i].enabled) {
930 e10k_flt_ftqf_setup(i, filters + i);
935 for (i = 0; i < 128; i++) {
936 if (queues[i].enabled) {
937 queue_hw_init(i, true);
941 DEBUG("Configuration restored\n");
947 /** Initialize hardware queue n. */
948 static void queue_hw_init(uint8_t n, bool set_tail)
951 struct frame_identity frameid = { .base = 0, .bytes = 0 };
952 uint64_t tx_phys, txhwb_phys, rx_phys;
953 size_t tx_size, rx_size;
954 bool enable_global = !rxtx_enabled;
956 // Get physical addresses for rx/tx rings
957 r = invoke_frame_identify(queues[n].tx_frame, &frameid);
958 assert(err_is_ok(r));
959 tx_phys = frameid.base;
960 tx_size = frameid.bytes;
962 r = invoke_frame_identify(queues[n].rx_frame, &frameid);
963 assert(err_is_ok(r));
964 rx_phys = frameid.base;
965 rx_size = frameid.bytes;
967 DEBUG("tx.phys=%"PRIx64" tx.size=%"PRIu64"\n", tx_phys, tx_size);
968 DEBUG("rx.phys=%"PRIx64" rx.size=%"PRIu64"\n", rx_phys, rx_size);
971 // Initialize RX queue in HW
972 if (queues[n].rx_va) {
973 e10k_rdbal_1_wr(d, n, queues[n].rx_va);
974 e10k_rdbah_1_wr(d, n, (queues[n].rx_va) >> 32);
976 e10k_rdbal_1_wr(d, n, rx_phys);
977 e10k_rdbah_1_wr(d, n, rx_phys >> 32);
979 e10k_rdlen_1_wr(d, n, rx_size);
981 e10k_srrctl_1_bsz_pkt_wrf(d, n, queues[n].rxbufsz / 1024);
982 uint32_t hdrsz = queues[n].rxhdrsz;
986 assert(hdrsz % 64 == 0);
987 assert(hdrsz >= 128 && hdrsz <= 1024);
989 e10k_srrctl_1_bsz_hdr_wrf(d, n, hdrsz / 64);
990 // Enable header split if desired
991 if (queues[n].rxhdrsz != 0) {
992 e10k_srrctl_1_desctype_wrf(d, n, e10k_adv_hdrsp);
993 // Split packets after TCP, UDP, IP4, IP6 and L2 headers if we enable
995 e10k_psrtype_split_tcp_wrf(d, n, 1);
996 e10k_psrtype_split_udp_wrf(d, n, 1);
997 e10k_psrtype_split_ip4_wrf(d, n, 1);
998 e10k_psrtype_split_ip6_wrf(d, n, 1);
999 e10k_psrtype_split_l2_wrf(d, n, 1);
1001 //e10k_srrctl_1_desctype_wrf(d, n, e10k_adv_1buf);
1002 e10k_srrctl_1_desctype_wrf(d, n, e10k_legacy);
1004 e10k_srrctl_1_bsz_hdr_wrf(d, n, 128 / 64); // TODO: Do 128 bytes suffice in
1006 e10k_srrctl_1_drop_en_wrf(d, n, 1);
1009 if (queues[n].use_rsc) {
1010 USER_PANIC("RSC not supported in SR-IOV mode!\n");
1011 e10k_rscctl_1_maxdesc_wrf(d, n, 3);
1012 e10k_rscctl_1_rsc_en_wrf(d, n, 1);
1013 // TODO: (how) does this work for queues >=64?
1014 e10k_psrtype_split_tcp_wrf(d, n, 1); // needed for RSC
1016 e10k_rscctl_1_maxdesc_wrf(d, n, 0);
1017 e10k_rscctl_1_rsc_en_wrf(d, n, 0);
1020 // Initialize queue pointers (empty)
1021 e10k_rdt_1_wr(d, n, queues[n].rx_head);
1022 e10k_rdh_1_wr(d, n, queues[n].rx_head);
1025 // Open virtualization pool gate (assumes 64 VF mapping)
1026 e10k_pfvfre_wr(d, n / 64, e10k_pfvfre_rd(d, n / 64) | (1 << ((n / 2) % 32)));
1029 e10k_rxdctl_1_enable_wrf(d, n, 1);
1030 while (e10k_rxdctl_1_enable_rdf(d, n) == 0); // TODO: Timeout
1031 DEBUG("[%x] RX queue enabled\n", n);
1033 // Setup Interrupts for this queue
1034 if (queues[n].use_irq) {
1036 // Look for interrupt vector
1037 if (queues[n].msix_intvec != 0) {
1038 if (queues[n].msix_index == -1) {
1039 setup_interrupt(&queues[n].msix_index, queues[n].msix_intdest,
1040 queues[n].msix_intvec);
1042 rxv = txv = queues[n].msix_index;
1044 //rxv = QUEUE_INTRX;
1045 //txv = QUEUE_INTTX;
1049 DEBUG("rxv=%d txv=%d\n", rxv, txv);
1051 // Setup mapping queue Rx/Tx -> interrupt
1054 e10k_ivar_i_alloc0_wrf(d, i, rxv);
1055 e10k_ivar_i_allocval0_wrf(d, i, 1);
1056 e10k_ivar_i_alloc1_wrf(d, i, txv);
1057 e10k_ivar_i_allocval1_wrf(d, i, 1);
1059 e10k_ivar_i_alloc2_wrf(d, i, rxv);
1060 e10k_ivar_i_allocval2_wrf(d, i, 1);
1061 e10k_ivar_i_alloc3_wrf(d, i, txv);
1062 e10k_ivar_i_allocval3_wrf(d, i, 1);
1064 if (queues[n].msix_intvec != 0) {
1065 e10k_eitr_l_wr(d, rxv, 0);
1067 // Enable autoclear (higher ones are always auto cleared)
1069 e10k_eiac_rtxq_wrf(d, e10k_eiac_rtxq_rdf(d) | (1 << rxv));
1074 // Make sure interrupt is cleared
1075 e10k_eicr_wr(d, 1 << rxv);
1079 e10k_eimsn_wr(d, rxv / 32, (1 << (rxv % 32)));
1083 if (enable_global) {
1084 DEBUG("[%x] Enabling RX globally...\n", n);
1086 DEBUG("[%x] RX globally enabled\n", n);
1091 // Enable DCA for this queue
1092 e10k_dca_rxctrl_t dca_rxctrl = 0;
1094 dca_rxctrl = e10k_dca_rxctrl_rxdca_desc_insert(dca_rxctrl, 1);
1095 dca_rxctrl = e10k_dca_rxctrl_rxdca_hdr_insert(dca_rxctrl, 1);
1096 dca_rxctrl = e10k_dca_rxctrl_rxdca_payl_insert(dca_rxctrl, 1);
1099 errval_t err = sys_debug_get_apic_id(&my_apic_id);
1100 assert(err_is_ok(err));
1102 dca_rxctrl = e10k_dca_rxctrl_cpuid_insert(dca_rxctrl, my_apic_id);
1105 e10k_dca_rxctrl_1_wr(d, n, dca_rxctrl);
1107 e10k_dca_rxctrl_2_wr(d, n - 64, dca_rxctrl);
1110 printf("DCA enabled on queue %d with APIC ID %d\n", n, my_apic_id);
1114 // Initialize TX queue in HW
1115 if (queues[n].rx_va) {
1116 e10k_tdbal_wr(d, n, queues[n].tx_va);
1117 e10k_tdbah_wr(d, n, (queues[n].tx_va) >> 32);
1119 e10k_tdbal_wr(d, n, tx_phys);
1120 e10k_tdbah_wr(d, n, tx_phys >> 32);
1122 e10k_tdlen_wr(d, n, tx_size);
1124 // Initialize TX head index write back
1125 if (!capref_is_null(queues[n].txhwb_frame)) {
1126 r = invoke_frame_identify(queues[n].txhwb_frame, &frameid);
1127 assert(err_is_ok(r));
1128 txhwb_phys = frameid.base;
1129 if (queues[n].rx_va) {
1130 e10k_tdwbal_headwb_low_wrf(d, n, (queues[n].txhwb_va) >> 2);
1131 e10k_tdwbah_headwb_high_wrf(d, n, (queues[n].txhwb_va) >> 32);
1133 e10k_tdwbal_headwb_low_wrf(d, n, txhwb_phys >> 2);
1134 e10k_tdwbah_headwb_high_wrf(d, n, txhwb_phys >> 32);
1136 e10k_tdwbal_headwb_en_wrf(d, n, 1);
1139 // Initialized by queue driver to avoid race conditions
1140 // Initialize queue pointers
1141 e10k_tdh_wr(d, n, queues[n].tx_head);
1142 e10k_tdt_wr(d, n, queues[n].tx_head);
1144 // Configure prefetch and writeback threshhold
1145 e10k_txdctl_pthresh_wrf(d, n, 8); // FIXME: Figure out what the right number
1147 e10k_txdctl_hthresh_wrf(d, n, 0);
1148 e10k_txdctl_wthresh_wrf(d, n, 0);
1150 if (enable_global) {
1151 DEBUG("[%x] Enabling TX globally...\n", n);
1153 rxtx_enabled = true;
1154 DEBUG("[%x] TX globally enabled\n", n);
1158 // Open virtualization pool gate (assumes 64 VF mapping)
1159 e10k_pfvfte_wr(d, n / 64, e10k_pfvfte_rd(d, n / 64) | (1 << ((n / 2) % 32)));
1162 e10k_txdctl_enable_wrf(d, n, 1);
1163 while (e10k_txdctl_enable_rdf(d, n) == 0); // TODO: Timeout
1164 DEBUG("[%x] TX queue enabled\n", n);
1166 // Some initialization stuff from BSD driver
1167 e10k_dca_txctrl_txdesc_wbro_wrf(d, n, 0);
1170 idc_write_queue_tails(queues[n].binding);
1176 static void queue_hw_stop(uint8_t n)
1178 // This process is described in 4.6.7.1.2
1180 // Disable TX for this queue
1181 e10k_txdctl_enable_wrf(d, n, 0);
1183 // TODO: Flush packet buffers
1184 // TODO: Remove all filters
1185 // TODO: With RSC we have to wait here (see spec), not used atm
1187 // Disable RX for this queue
1188 e10k_rxdctl_1_enable_wrf(d, n, 0);
1189 while (e10k_rxdctl_1_enable_rdf(d, n) != 0); // TODO: Timeout
1191 // A bit too much, but make sure memory is not used anymore
1197 /** Stop whole device. */
1198 static void stop_device(void)
1202 DEBUG("Stopping device\n");
1204 // Disable RX and TX
1207 rxtx_enabled = false;
1209 // Disable interrupts
1210 e10k_eimc_cause_wrf(d, 0x7FFFFFFF);
1213 // Disable each RX and TX queue
1214 for (i = 0; i < 128; i++) {
1215 e10k_txdctl_wr(d, i, e10k_txdctl_swflsh_insert(0x0, 1));
1218 e10k_rxdctl_1_wr(d, i, 0x0);
1220 e10k_rxdctl_2_wr(d, i - 64, 0x0);
1225 // From BSD driver (not in spec)
1228 // Master disable procedure
1229 e10k_ctrl_pcie_md_wrf(d, 1);
1230 while (e10k_status_pcie_mes_rdf(d) != 0); // TODO: Timeout
1231 DEBUG("Stopping device done\n");
1234 static void management_interrupt(e10k_eicr_t eicr)
1236 if (e10k_eicr_ecc_extract(eicr)) {
1237 DEBUG("##########################################\n");
1238 DEBUG("ECC Error, resetting device :-/\n");
1239 DEBUG("##########################################\n");
1241 } else if (eicr >> 16) {
1242 DEBUG("Interrupt: %x\n", eicr);
1243 e10k_eicr_prtval(buf, sizeof(buf), eicr);
1246 DEBUG("Weird management interrupt without cause: eicr=%x\n", eicr);
1250 static void interrupt_handler_msix(void* arg)
1252 DEBUG("e10k: MSI-X management interrupt\n");
1253 e10k_eicr_t eicr = e10k_eicr_rd(d);
1255 eicr &= ~(1 << cdriver_msix);
1256 management_interrupt(eicr);
1258 // Ensure management MSI-X vector is cleared
1259 e10k_eicr_wr(d, (1 << cdriver_msix));
1261 // Reenable interrupt
1262 e10k_eimsn_cause_wrf(d, cdriver_msix / 32, (1 << (cdriver_msix % 32)));
1266 static void resend_interrupt(void* arg)
1269 uint64_t i = (uint64_t) arg;
1270 err = queues[i].devif->tx_vtbl.interrupt(queues[i].devif, NOP_CONT, i);
1271 // If the queue is busy, there is already an oustanding message
1272 if (err_is_fail(err) && err != FLOUNDER_ERR_TX_BUSY) {
1273 USER_PANIC("Error when sending interrupt %s \n", err_getstring(err));
1277 /** Here are the global interrupts handled. */
1278 static void interrupt_handler(void* arg)
1281 e10k_eicr_t eicr = e10k_eicr_rd(d);
1284 management_interrupt(eicr);
1286 e10k_eicr_wr(d, eicr);
1288 for (uint64_t i = 0; i < 16; i++) {
1289 if ((eicr >> i) & 0x1) {
1290 DEBUG("Interrupt eicr=%"PRIx32" \n", eicr);
1291 if (queues[i].use_irq && queues[i].devif != NULL) {
1292 err = queues[i].devif->tx_vtbl.interrupt(queues[i].devif, NOP_CONT, i);
1293 if (err_is_fail(err)) {
1294 err = queues[i].devif->register_send(queues[i].devif,
1295 get_default_waitset(),
1296 MKCONT(resend_interrupt,
1304 /******************************************************************************/
1305 /* Management interface implemetation */
1307 /** Send register cap and mac address to queue driver. */
1308 static void idc_queue_init_data(struct e10k_binding *b,
1309 struct capref registers,
1313 r = e10k_queue_init_data__tx(b, NOP_CONT, registers, macaddr);
1314 // TODO: handle busy
1315 assert(err_is_ok(r));
1318 /** Tell queue driver that we are done initializing the queue. */
1319 static void idc_queue_memory_registered(struct e10k_binding *b)
1322 r = e10k_queue_memory_registered__tx(b, NOP_CONT);
1323 // TODO: handle busy
1324 assert(err_is_ok(r));
1327 /** Send request to queue driver to rewrite the tail pointers of its queues. */
1328 static void idc_write_queue_tails(struct e10k_binding *b)
1332 qd_write_queue_tails(b);
1336 r = e10k_write_queue_tails__tx(b, NOP_CONT);
1337 // TODO: handle busy
1338 assert(err_is_ok(r));
1341 /** Request from queue driver for register memory cap */
1342 void cd_request_device_info(struct e10k_binding *b)
1344 assert(initialized);
1346 uint64_t d_mac = e10k_ral_ral_rdf(d, qi) | ((uint64_t) e10k_rah_rah_rdf(d, qi) << 32);
1347 DEBUG("mac valid = %x\n", e10k_rah_av_rdf(d, qi));
1349 uint64_t d_mac = e10k_ral_ral_rdf(d, 0) | ((uint64_t) e10k_rah_rah_rdf(d, 0) << 32);
1350 DEBUG("mac valid = %x\n", e10k_rah_av_rdf(d, 0));
1355 errval_t err = slot_alloc(&cr);
1356 assert(err_is_ok(err));
1357 err = cap_copy(cr, *regframe);
1358 assert(err_is_ok(err));
1359 qd_queue_init_data(b, cr, d_mac);
1362 idc_queue_init_data(b, *regframe, d_mac);
1365 /** Request from queue driver to initialize hardware queue. */
1366 void cd_register_queue_memory(struct e10k_binding *b,
1368 struct capref tx_frame,
1369 struct capref txhwb_frame,
1370 struct capref rx_frame,
1373 int16_t msix_intvec,
1374 uint8_t msix_intdest,
1381 DEBUG("register_queue_memory(%"PRIu8")\n", n);
1382 // TODO: Make sure that rxbufsz is a power of 2 >= 1024
1384 if (use_irq && msix_intvec != 0 && !msix) {
1385 printf("e10k: Queue %d requests MSI-X, but MSI-X is not enabled "
1386 " card driver. Ignoring queue\n", n);
1389 // Save state so we can restore the configuration in case we need to do a
1391 queues[n].enabled = true;
1392 queues[n].tx_frame = tx_frame;
1393 queues[n].txhwb_frame = txhwb_frame;
1394 queues[n].rx_frame = rx_frame;
1395 queues[n].tx_head = 0;
1396 queues[n].rx_head = 0;
1397 queues[n].rxbufsz = rxbufsz;
1398 queues[n].rxhdrsz = rxhdrsz;
1399 queues[n].msix_index = -1;
1400 queues[n].msix_intvec = msix_intvec;
1401 queues[n].msix_intdest = msix_intdest;
1402 queues[n].binding = b;
1403 queues[n].use_irq = use_irq;
1404 queues[n].use_rsc = use_rsc;
1405 queues[n].tx_va = tx_va;
1406 queues[n].rx_va = rx_va;
1407 queues[n].txhwb_va = txhwb_va;
1409 queue_hw_init(n, true);
1412 qd_queue_memory_registered(b);
1415 idc_queue_memory_registered(b);
1419 /** Request from queue driver to initialize hardware queue. */
1420 void cd_set_interrupt_rate(struct e10k_binding *b,
1424 DEBUG("set_interrupt_rate(%"PRIu8")\n", n);
1427 e10k_eitrn_t eitr = 0;
1428 eitr = e10k_eitrn_itr_int_insert(eitr, rate);
1430 i = (queues[n].msix_index == -1 ? 0 : queues[n].msix_index);
1432 e10k_eitr_l_wr(d, i, eitr);
1434 e10k_eitr_h_wr(d, i - 24, eitr);
1440 * Request from queue driver to stop hardware queue and free everything
1441 * associated with that queue.
1443 static errval_t idc_terminate_queue(struct e10k_binding *b, uint8_t n)
1445 DEBUG("idc_terminate_queue(q=%d)\n", n);
1449 queues[n].enabled = false;
1450 queues[n].binding = NULL;
1452 // TODO: Do we have to free the frame caps, or destroy the binding?
1456 static errval_t idc_register_port_filter(struct e10k_binding *b,
1460 e10k_port_type_t type,
1465 struct e10k_filter f = {
1467 .mask = MASK_SRCIP | MASK_DSTIP | MASK_SRCPORT,
1468 .l4_type = (type == e10k_PORT_TCP ? L4_TCP : L4_UDP),
1474 DEBUG("idc_register_port_filter: called (q=%d t=%d p=%d)\n",
1477 *err = reg_ftfq_filter(&f, filter);
1478 DEBUG("filter registered: err=%"PRIu64", fid=%"PRIu64"\n", *err, *filter);
1482 static errval_t idc_unregister_filter(struct e10k_binding *b,
1483 uint64_t filter, errval_t *err)
1485 DEBUG("unregister_filter: called (%"PRIx64")\n", filter);
1486 *err = LIB_ERR_NOT_IMPLEMENTED;
1490 static struct e10k_rx_vtbl rx_vtbl = {
1491 .request_device_info = cd_request_device_info,
1492 .register_queue_memory = cd_register_queue_memory,
1493 .set_interrupt_rate = cd_set_interrupt_rate,
1496 static struct e10k_rpc_rx_vtbl rpc_rx_vtbl = {
1497 .terminate_queue_call = idc_terminate_queue,
1498 .register_port_filter_call = idc_register_port_filter,
1499 .unregister_filter_call = idc_unregister_filter,
1503 static void export_cb(void *st, errval_t err, iref_t iref)
1505 const char *suffix = "_e10kmng";
1506 char name[strlen(service_name) + strlen(suffix) + 1];
1508 assert(err_is_ok(err));
1510 // Build label for interal management service
1511 sprintf(name, "%s%s", service_name, suffix);
1513 err = nameservice_register(name, iref);
1514 assert(err_is_ok(err));
1515 DEBUG("Management interface exported\n");
1518 static errval_t connect_cb(void *st, struct e10k_binding *b)
1520 DEBUG("New connection on management interface\n");
1521 b->rx_vtbl = rx_vtbl;
1522 b->rpc_rx_vtbl = rpc_rx_vtbl;
1527 * Initialize management interface for queue drivers.
1528 * This has to be done _after_ the hardware is initialized.
1530 static void initialize_mngif(void)
1534 r = e10k_export(NULL, export_cb, connect_cb, get_default_waitset(),
1535 IDC_BIND_FLAGS_DEFAULT);
1536 assert(err_is_ok(r));
1540 /****** VF/PF server interface *******/
1542 static void init_done_vf(struct e10k_vf_binding *b, uint8_t vfn)
1546 DEBUG("VF %d init done\n", vfn);
1548 // Enable correct pool for VF
1549 e10k_pfvfre_wr(d, vfn / 32, e10k_pfvfre_rd(d, vfn / 32) | (1 << (vfn % 32)));
1550 e10k_pfvfte_wr(d, vfn / 32, e10k_pfvfte_rd(d, vfn / 32) | (1 << (vfn % 32)));
1553 e10k_pfvflrec_wr(d, 0, 1 << vfn);
1555 e10k_pfvflrec_wr(d, 1, 1 << (vfn - 32));
1558 errval_t err = b->tx_vtbl.init_done_response(b, NOP_CONT);
1559 assert(err_is_ok(err));
1562 static void get_mac_address_vf(struct e10k_vf_binding *b, uint8_t vfn)
1564 assert(initialized);
1565 uint64_t d_mac = e10k_ral_ral_rdf(d, vfn) | ((uint64_t) e10k_rah_rah_rdf(d, vfn) << 32);
1566 errval_t err = b->tx_vtbl.get_mac_address_response(b, NOP_CONT, d_mac);
1567 assert(err_is_ok(err));
1570 static void request_vf_number(struct e10k_vf_binding *b)
1572 DEBUG("VF allocated\n");
1574 uint8_t vf_num = 255;
1575 for (int i = 0; i < 64; i++) {
1584 err = NIC_ERR_ALLOC_QUEUE;
1589 err = b->tx_vtbl.request_vf_number_response(b, NOP_CONT, vf_num, err);
1590 assert(err_is_ok(err));
1594 static errval_t cd_create_queue_rpc(struct e10k_vf_binding *b,
1595 struct capref tx_frame, struct capref txhwb_frame,
1596 struct capref rx_frame, uint32_t rxbufsz,
1597 int16_t msix_intvec, uint8_t msix_intdest,
1598 bool use_irq, bool use_rsc, bool default_q,
1599 uint64_t *mac, int32_t *qid, struct capref *regs,
1602 // TODO: Make sure that rxbufsz is a power of 2 >= 1024
1604 if (use_irq && msix_intvec != 0 && !msix) {
1605 printf("e10k: Queue requests MSI-X, but MSI-X is not enabled "
1606 " card driver. Ignoring queue\n");
1607 *ret_err = NIC_ERR_ALLOC_QUEUE;
1608 return NIC_ERR_ALLOC_QUEUE;
1613 for (int i = 1; i < 128; i++) {
1614 if (!queues[i].enabled) {
1621 if (queues[0].enabled == false) {
1624 printf("Default queue already initalized \n");
1625 return NIC_ERR_ALLOC_QUEUE;
1629 DEBUG("create queue(%"PRIu8": interrupt %d )\n", n, use_irq);
1632 *ret_err = NIC_ERR_ALLOC_QUEUE;
1633 return NIC_ERR_ALLOC_QUEUE;
1636 // Save state so we can restore the configuration in case we need to do a
1639 queues[n].tx_frame = tx_frame;
1640 queues[n].txhwb_frame = txhwb_frame;
1641 queues[n].rx_frame = rx_frame;
1642 queues[n].tx_head = 0;
1643 queues[n].rx_head = 0;
1644 queues[n].devif = b;
1645 queues[n].rxbufsz = rxbufsz;
1646 queues[n].msix_index = -1;
1647 queues[n].msix_intvec = msix_intvec;
1648 queues[n].msix_intdest = msix_intdest;
1649 queues[n].use_irq = use_irq;
1650 queues[n].use_rsc = use_rsc;
1651 queues[n].enabled = true;
1654 queue_hw_init(n, false);
1656 // TODO for now vfn = 0
1657 uint64_t d_mac = e10k_ral_ral_rdf(d, 0) | ((uint64_t) e10k_rah_rah_rdf(d, 0) << 32);
1663 DEBUG("[%d] Queue int done\n", n);
1664 *ret_err = SYS_ERR_OK;
1668 static void cd_create_queue(struct e10k_vf_binding *b,
1669 struct capref tx_frame, struct capref txhwb_frame,
1670 struct capref rx_frame, uint32_t rxbufsz,
1671 int16_t msix_intvec, uint8_t msix_intdest,
1672 bool use_irq, bool use_rsc, bool default_q)
1681 err = cd_create_queue_rpc(b, tx_frame, txhwb_frame, rx_frame,
1682 rxbufsz, msix_intvec, msix_intdest, use_irq, use_rsc,
1683 default_q, &mac, &queueid, ®s, &err);
1685 err = b->tx_vtbl.create_queue_response(b, NOP_CONT, mac, queueid, regs, err);
1686 assert(err_is_ok(err));
1687 DEBUG("cd_create_queue end\n");
1690 static void vf_export_cb(void *st, errval_t err, iref_t iref)
1692 const char *suffix = "_vf";
1693 char name[strlen(service_name) + strlen(suffix) + 100];
1695 assert(err_is_ok(err));
1697 // Build label for interal management service
1698 sprintf(name, "%s%s%u", service_name, suffix, pci_function);
1700 err = nameservice_register(name, iref);
1701 assert(err_is_ok(err));
1702 DEBUG("VF/PF interface [%s] exported\n", name);
1706 static errval_t vf_connect_cb(void *st, struct e10k_vf_binding *b)
1708 DEBUG("New connection on VF/PF interface\n");
1710 b->rx_vtbl.create_queue_call = cd_create_queue;
1711 b->rx_vtbl.request_vf_number_call = request_vf_number;
1712 b->rx_vtbl.init_done_call = init_done_vf;
1713 b->rx_vtbl.get_mac_address_call = get_mac_address_vf;
1715 b->rpc_rx_vtbl.create_queue_call = cd_create_queue_rpc;
1722 * Initialize management interface for queue drivers.
1723 * This has to be done _after_ the hardware is initialized.
1725 static void initialize_vfif(void)
1729 r = e10k_vf_export(NULL, vf_export_cb, vf_connect_cb, get_default_waitset(),
1730 IDC_BIND_FLAGS_DEFAULT);
1731 assert(err_is_ok(r));
1733 r = net_filter_export(NULL, net_filter_export_cb, net_filter_connect_cb,
1734 get_default_waitset(), IDC_BIND_FLAGS_DEFAULT);
1735 assert(err_is_ok(r));
1738 /******************************************************************************/
1739 /* Initialization code for driver */
1741 /** Callback from pci to initialize a specific PCI device. */
1742 static void pci_init_card(void *arg, struct device_mem* bar_info, int bar_count)
1747 assert(!initialized);
1749 d = malloc(sizeof(*d));
1751 // Map first BAR for register access
1752 assert(bar_count >= 1);
1753 map_device(&bar_info[0]);
1754 regframe = bar_info[0].frame_cap;
1755 DEBUG("BAR[0] mapped (v=%llx p=%llx l=%llx)\n",
1756 (unsigned long long) bar_info[0].vaddr,
1757 (unsigned long long) bar_info[0].paddr,
1758 (unsigned long long) bar_info[0].bytes);
1760 // Initialize Mackerel binding
1761 e10k_initialize(d, (void*) bar_info[0].vaddr);
1763 DEBUG("STATUS = %x\n", e10k_status_rd(d));
1765 // Initialize manager for MSI-X vectors
1767 DEBUG("Enabling MSI-X interrupts\n");
1768 uint16_t msix_count = 0;
1769 err = pci_msix_enable(&msix_count);
1770 assert(err_is_ok(err));
1771 assert(msix_count > 0);
1772 DEBUG("MSI-X #vecs=%d\n", msix_count);
1774 res = bmallocator_init(&msix_alloc, msix_count);
1777 DEBUG("Using legacy interrupts\n");
1780 // Initialize hardware registers etc.
1781 DEBUG("Initializing hardware\n");
1784 assert(initialized);
1787 DEBUG("SR-IOV device up routine\n");
1789 // Setup support for 64 VFs
1790 e10k_gcr_ext_vtmode_wrf(d, e10k_vt_64);
1791 e10k_gpie_vtmode_wrf(d, e10k_vt_64);
1793 // Enable virtualization, disable default pool, replication enable
1794 e10k_pfvtctl_t pfvtctl = e10k_pfvtctl_rd(d);
1795 pfvtctl = e10k_pfvtctl_vt_en_insert(pfvtctl, 1);
1796 pfvtctl = e10k_pfvtctl_def_pl_insert(pfvtctl, 0);
1797 pfvtctl = e10k_pfvtctl_dis_def_pl_insert(pfvtctl, 1);
1798 pfvtctl = e10k_pfvtctl_rpl_en_insert(pfvtctl, 1);
1799 e10k_pfvtctl_wr(d, pfvtctl);
1801 // Enable L2 loopback
1802 e10k_pfdtxgswc_lbe_wrf(d, 1);
1804 // TODO: Accept untagged packets in all VMDQ pools
1805 // TODO: Broadcast accept mode
1806 // TODO: Accept packets matching PFUTA table
1807 // TODO: Accept packets matching MTA table
1808 // TODO: Accept untagged packets enable
1809 // TODO: Strip VLAN tag for incoming packets
1811 DEBUG("STATUS = %x\n", e10k_status_rd(d));
1813 e10k_ctrl_ext_pfrstd_wrf(d, 1);
1817 // Now we initialize the management interface
1818 DEBUG("Initializing management interface\n");
1822 DEBUG("Initializing VF/PF interface\n");
1824 DEBUG("Done with initialization\n");
1828 /** Register with PCI */
1829 static void pci_register(void)
1833 r = pci_client_connect();
1834 assert(err_is_ok(r));
1835 DEBUG("connected to pci\n");
1837 r = pci_register_driver_irq(pci_init_card, NULL, PCI_CLASS_ETHERNET,
1838 PCI_DONT_CARE, PCI_DONT_CARE,
1839 PCI_VENDOR_INTEL, pci_deviceid,
1840 pci_bus, pci_device, pci_function,
1841 interrupt_handler, NULL);
1842 assert(err_is_ok(r));
1845 static void parse_cmdline(int argc, char **argv)
1849 for (i = 1; i < argc; i++) {
1850 if (strncmp(argv[i], "cardname=", strlen("cardname=")) == 0) {
1851 service_name = argv[i] + strlen("cardname=");
1852 } else if (strncmp(argv[i], "bus=", strlen("bus=")) == 0) {
1853 pci_bus = atol(argv[i] + strlen("bus="));
1854 } else if (strncmp(argv[i], "device=", strlen("device=")) == 0) {
1855 pci_device = atol(argv[i] + strlen("device="));
1856 } else if (strncmp(argv[i], "function=", strlen("function=")) == 0) {
1857 pci_function = atol(argv[i] + strlen("function="));
1858 } else if (strncmp(argv[i], "deviceid=", strlen("deviceid=")) == 0) {
1859 pci_deviceid = strtoul(argv[i] + strlen("deviceid="), NULL, 0);
1860 } else if (strncmp(argv[i], "msix=", strlen("msix=")) == 0) {
1861 msix = !!atol(argv[i] + strlen("msix="));
1862 // also pass this to queue driver
1863 qd_argument(argv[i]);
1864 } else if (strncmp(argv[i], "credit_refill[", strlen("credit_refill[") - 1) == 0) {
1865 // Controls the WRR (weighted round-robin) scheduler's credit refill rate
1866 // This seems to be per VM pool
1867 unsigned int entry, val;
1868 int r = sscanf(argv[i], "credit_refill[%u]=%u", &entry, &val);
1870 assert(entry < 128);
1871 assert(val < 0x3fff);
1872 credit_refill[entry] = val;
1873 } else if (strncmp(argv[i], "tx_rate[", strlen("tx_rate[") - 1) == 0) {
1874 // This is specified in Mbits/s and must be >= 10 and <= link speed (typically 10,000)
1875 // This seems to be per Tx queue
1876 unsigned int entry, val;
1877 int r = sscanf(argv[i], "tx_rate[%u]=%u", &entry, &val);
1879 assert(entry < 128);
1880 assert(val >= 10 && val <= 10000);
1881 tx_rate[entry] = val;
1883 qd_argument(argv[i]);
1889 static void eventloop(void)
1893 printf("Entering polling loop\n");
1894 ws = get_default_waitset();
1896 if (use_interrupts) {
1909 void qd_argument(const char *arg) { }
1910 void qd_interrupt(bool is_rx, bool is_tx) { }
1911 void qd_queue_init_data(struct e10k_binding *b, struct capref registers,
1912 uint64_t macaddr) { }
1913 void qd_queue_memory_registered(struct e10k_binding *b) { }
1914 void qd_write_queue_tails(struct e10k_binding *b) { }
1916 int main(int argc, char **argv)
1918 int e1000n_driver_init(int argc, char *argv[])
1921 //barrelfish_usleep(10*1000*1000);
1922 DEBUG("PF driver started\n");
1923 // credit_refill value must be >= 1 for a queue to be able to send.
1924 // Set them all to 1 here. May be overridden via commandline.
1925 for(int i = 0; i < 128; i++) {
1926 credit_refill[i] = 1;
1929 memset(tx_rate, 0, sizeof(tx_rate));
1931 parse_cmdline(argc, argv);
1934 while (!initialized || !exported) {
1935 event_dispatch(get_default_waitset());
1938 DEBUG("e10k driver networking init \n");
1940 if (use_interrupts){
1941 err = networking_init("e10k", NET_FLAGS_DO_DHCP | NET_FLAGS_DEFAULT_QUEUE);
1943 err = networking_init("e10k", NET_FLAGS_DO_DHCP | NET_FLAGS_POLLING |
1944 NET_FLAGS_DEFAULT_QUEUE);
1946 DEBUG("e10k driver networking init done with error: %s \n", err_getstring(err));
1947 assert(err_is_ok(err));