Ignore running over maxbusnum for now.
[barrelfish] / usr / pci / pci.c
1 /**
2  * \file
3  * \brief PCI driver
4  *
5  *  This file walks through the PCI bus, enumarates each device and gathers
6  *  informatiom about each device.
7  */
8
9 /*
10  * Copyright (c) 2007, 2008, 2009, 2010, 2011, ETH Zurich.
11  * All rights reserved.
12  *
13  * This file is distributed under the terms in the attached LICENSE file.
14  * If you do not find this file, copies can be found by writing to:
15  * ETH Zurich D-INFK, Haldeneggsteig 4, CH-8092 Zurich. Attn: Systems Group.
16  */
17
18 #include <stdio.h>
19 #include <stdlib.h>
20
21 #include <barrelfish/barrelfish.h>
22 #include <barrelfish/deferred.h>
23
24 #include <pci/devids.h>
25 #include <mm/mm.h>
26 #include <skb/skb.h>
27 #include <octopus/getset.h>
28 #include <acpi_client/acpi_client.h>
29 #include <dev/pci_sr_iov_cap_dev.h>
30
31 #include "pci.h"
32 #include "driver_mapping.h"
33 #include "ht_config.h"
34 #include "ht_config_dev.h"
35 #include "pci_debug.h"
36
37 #define MIN(a,b)        ((a) < (b) ? (a) : (b))
38
39 #define BAR_PROBE       0xffffffff
40
41 #define PAGE_BITS BASE_PAGE_BITS
42
43 struct device_caps
44 {
45     struct capref *phys_cap;
46     struct capref *frame_cap;
47     size_t nr_caps;
48     uint8_t bar_nr;
49     uint8_t bits;
50     bool assigned;  //false => this entry is not in use
51     uint8_t type;
52 };
53
54 struct device_caps dev_caps[PCI_NBUSES][PCI_NDEVICES][PCI_NFUNCTIONS][PCI_NBARS];
55 const char *skb_bridge_program = "bridge_page";
56 uint16_t max_numvfs = 256;
57
58 static void
59 query_bars(pci_hdr0_t devhdr,
60            struct pci_address addr,
61            bool pci2pci_bridge);
62
63 static uint32_t
64 setup_interrupt(uint32_t bus,
65                 uint32_t dev,
66                 uint32_t fun);
67 static void
68 enable_busmaster(uint8_t bus,
69                  uint8_t dev,
70                  uint8_t fun,
71                  bool pcie);
72
73 static uint32_t bar_mapping_size(pci_hdr0_bar32_t bar)
74 {
75     if (bar.base == 0) {
76         return 0;
77     }
78
79     for (uint32_t mask = 1;; mask <<= 1) {
80         assert(mask != 0);
81         if (bar.base & mask) {
82             return mask << 7;
83         }
84     }
85 }
86
87 static pciaddr_t bar_mapping_size64(uint64_t base)
88 {
89     if (base == 0) {
90         return 0;
91     }
92
93     for (pciaddr_t mask = 1;; mask <<= 1) {
94         assert(mask != 0);
95         if (base & mask) {
96             /*
97              * Note: we get the actual raw register content here and not
98              *       the bar.base value so no shift.
99              *       - 2014-05-03, RA
100              */
101             return mask;
102         }
103     }
104 }
105
106 void pci_init_datastructures(void)
107 {
108     memset(dev_caps, 0, sizeof(dev_caps));
109 }
110
111 int pci_bar_to_caps_index(uint8_t bus,
112                           uint8_t dev,
113                           uint8_t fun,
114                           uint8_t BAR)
115 {
116     uint8_t i;
117     for (i = 0; i < PCI_NBARS && dev_caps[bus][dev][fun][i].assigned; i++) {
118         if (dev_caps[bus][dev][fun][i].bar_nr == BAR) {
119             return i;
120         }
121     }
122     return -1;
123 }
124
125 int pci_get_nr_caps_for_bar(uint8_t bus,
126                             uint8_t dev,
127                             uint8_t fun,
128                             uint8_t idx)
129 {
130     return (dev_caps[bus][dev][fun][idx].nr_caps);
131 }
132
133 struct capref pci_get_cap_for_device(uint8_t bus,
134                                      uint8_t dev,
135                                      uint8_t fun,
136                                      uint8_t idx,
137                                      int cap_nr)
138 {
139     return (dev_caps[bus][dev][fun][idx].frame_cap[cap_nr]);
140 }
141 uint8_t pci_get_cap_type_for_device(uint8_t bus,
142                                     uint8_t dev,
143                                     uint8_t fun,
144                                     uint8_t idx)
145 {
146     return (dev_caps[bus][dev][fun][idx].type);
147 }
148
149 static errval_t alloc_device_bar(uint8_t idx,
150                                  uint8_t bus,
151                                  uint8_t dev,
152                                  uint8_t fun,
153                                  uint8_t BAR,
154                                  pciaddr_t base,
155                                  pciaddr_t high,
156                                  pcisize_t size)
157 {
158     struct acpi_rpc_client* acl = get_acpi_rpc_client();
159
160     struct device_caps *c = &dev_caps[bus][dev][fun][idx];
161     errval_t err;
162
163     // first try with maximally-sized caps (we'll reduce this if it doesn't work)
164     uint8_t bits = log2ceil(size);
165
166     restart: ;
167     pcisize_t framesize = 1UL << bits;
168     c->nr_caps = size / framesize;
169     PCI_DEBUG("nr caps for one BAR of size %"PRIuPCISIZE": %lu\n", size,
170               c->nr_caps);
171
172     c->phys_cap = malloc(c->nr_caps * sizeof(struct capref));
173     if (c->phys_cap == NULL) {
174         return PCI_ERR_MEM_ALLOC;
175     }
176
177     for (int i = 0; i < c->nr_caps; i++) {
178         /*err = mm_alloc_range(&pci_mm_physaddr, bits, base + i * framesize,
179          base + (i + 1) * framesize, &c->phys_cap[i], NULL);*/
180         errval_t error_code;
181         err = acl->vtbl.mm_alloc_range_proxy(acl, bits, base + i * framesize,
182                                              base + (i + 1) * framesize,
183                                              &c->phys_cap[i], &error_code);
184         assert(err_is_ok(err));
185         err = error_code;
186         if (err_is_fail(err)) {
187             PCI_DEBUG("mm_alloc_range() failed: bits = %hhu, base = %"PRIxPCIADDR","
188                       " end = %"PRIxPCIADDR"\n", bits, base + i * framesize,
189                       base + (i + 1) * framesize);
190             if (err_no(err) == MM_ERR_MISSING_CAPS && bits > PAGE_BITS) {
191                 /* try again with smaller page-sized caps */
192                 for (int j = 0; j < i; j++) {
193                     err = acl->vtbl.mm_free_proxy(acl, c->phys_cap[i],
194                                                   base + j * framesize, bits,
195                                                   &error_code);
196                     assert(err_is_ok(err) && err_is_ok(error_code));
197                 }
198
199                 free(c->phys_cap);
200                 bits = PAGE_BITS;
201                 goto restart;
202             } else {
203                 return err;
204             }
205         }
206     }
207
208     c->frame_cap = malloc(c->nr_caps * sizeof(struct capref));
209     if (c->frame_cap == NULL) {
210         /* TODO: mm_free() */
211         free(c->phys_cap);
212         return PCI_ERR_MEM_ALLOC;
213     }
214
215     for (int i = 0; i < c->nr_caps; i++) {
216         err = devframe_type(&c->frame_cap[i], c->phys_cap[i], bits);
217         if (err_is_fail(err)) {
218             PCI_DEBUG("devframe_type() failed: bits = %hhu, base = %"PRIxPCIADDR
219                       ", doba = %"PRIxPCIADDR"\n", bits, base, base + (1UL << bits));
220             return err;
221         }
222     }
223
224     c->bits = bits;
225     c->bar_nr = BAR;
226     c->assigned = true;
227     c->type = 0;
228
229     return SYS_ERR_OK;
230 }
231
232 //XXX: FIXME: HACK: BAD!!! Only needed to allocate a full I/O range cap to
233 //                         the VESA graphics driver
234 static errval_t assign_complete_io_range(uint8_t idx,
235                                          uint8_t bus,
236                                          uint8_t dev,
237                                          uint8_t fun,
238                                          uint8_t BAR)
239 {
240     dev_caps[bus][dev][fun][idx].frame_cap = (struct capref*) malloc(
241                     sizeof(struct capref));
242     errval_t err = slot_alloc(&(dev_caps[bus][dev][fun][idx].frame_cap[0]));
243     assert(err_is_ok(err));
244     err = cap_copy(dev_caps[bus][dev][fun][idx].frame_cap[0], cap_io);
245     assert(err_is_ok(err));
246
247     dev_caps[bus][dev][fun][idx].bits = 16;
248     dev_caps[bus][dev][fun][idx].bar_nr = BAR;
249     dev_caps[bus][dev][fun][idx].assigned = true;
250     dev_caps[bus][dev][fun][idx].type = 1;
251     dev_caps[bus][dev][fun][idx].nr_caps = 1;
252     return SYS_ERR_OK;
253 }
254
255 errval_t device_init(bool enable_irq,
256                      uint8_t coreid,
257                      int vector,
258                      uint32_t class_code,
259                      uint32_t sub_class,
260                      uint32_t prog_if,
261                      uint32_t vendor_id,
262                      uint32_t device_id,
263                      uint32_t *bus,
264                      uint32_t *dev,
265                      uint32_t *fun,
266                      int *nr_allocated_bars)
267 {
268     *nr_allocated_bars = 0;
269
270     errval_t err;
271     char s_bus[10], s_dev[10], s_fun[10], s_vendor_id[10], s_device_id[10];
272     char s_class_code[10], s_sub_class[10], s_prog_if[10];
273     char s_pcie[5];
274     bool pcie;
275     int error_code;
276     int bar_nr;
277     pciaddr_t bar_base, bar_high;
278     pcisize_t bar_size;
279
280     if (*bus != PCI_DONT_CARE) {
281         snprintf(s_bus, sizeof(s_bus), "%"PRIu32"", *bus);
282     } else {
283         strncpy(s_bus, "Bus", sizeof(s_bus));
284     }
285     if (*dev != PCI_DONT_CARE) {
286         snprintf(s_dev, sizeof(s_dev), "%"PRIu32, *dev);
287     } else {
288         strncpy(s_dev, "Dev", sizeof(s_dev));
289     }
290     if (*fun != PCI_DONT_CARE) {
291         snprintf(s_fun, sizeof(s_fun), "%"PRIu32, *fun);
292     } else {
293         strncpy(s_fun, "Fun", sizeof(s_fun));
294     }
295     if (vendor_id != PCI_DONT_CARE) {
296         snprintf(s_vendor_id, sizeof(s_vendor_id), "%"PRIu32, vendor_id);
297     } else {
298         strncpy(s_vendor_id, "Ven", sizeof(s_vendor_id));
299     }
300     if (device_id != PCI_DONT_CARE) {
301         snprintf(s_device_id, sizeof(s_device_id), "%"PRIu32, device_id);
302     } else {
303         strncpy(s_device_id, "DevID", sizeof(s_device_id));
304     }
305     if (class_code != PCI_DONT_CARE) {
306         snprintf(s_class_code, sizeof(s_class_code), "%"PRIu32, class_code);
307     } else {
308         strncpy(s_class_code, "Cl", sizeof(s_class_code));
309     }
310     if (sub_class != PCI_DONT_CARE) {
311         snprintf(s_sub_class, sizeof(s_sub_class), "%"PRIu32, sub_class);
312     } else {
313         strncpy(s_sub_class, "Sub", sizeof(s_sub_class));
314     }
315     if (prog_if != PCI_DONT_CARE) {
316         snprintf(s_prog_if, sizeof(s_prog_if), "%"PRIu32, prog_if);
317     } else {
318         strncpy(s_prog_if, "ProgIf", sizeof(s_prog_if));
319     }
320
321     PCI_DEBUG("device_init(): Searching device %s, %s, %s, %s, %s, %s, %s, %s\n",
322               s_bus, s_dev, s_fun, s_vendor_id, s_device_id, s_class_code,
323               s_sub_class, s_prog_if);
324
325 //find the device: Unify all values
326     error_code = skb_execute_query(
327                     "device(PCIE,addr(%s, %s, %s), %s, %s, %s, %s, %s, _),"
328                     "writeln(d(PCIE,%s,%s,%s,%s,%s,%s,%s,%s)).",
329                     s_bus, s_dev, s_fun, s_vendor_id, s_device_id, s_class_code,
330                     s_sub_class, s_prog_if, s_bus, s_dev, s_fun, s_vendor_id,
331                     s_device_id, s_class_code, s_sub_class, s_prog_if);
332     if (error_code != 0) {
333
334         PCI_DEBUG("pci.c: device_init(): SKB returnd error code %s\n",
335                   err_getcode(error_code));
336
337         PCI_DEBUG("SKB returned: %s\n", skb_get_output());
338         PCI_DEBUG("SKB error returned: %s\n", skb_get_error_output());
339
340         return PCI_ERR_DEVICE_INIT;
341     }
342
343     err = skb_read_output("d(%[a-z], %"PRIu32", %"PRIu32", %"PRIu32", %"PRIu32
344                           ",%"PRIu32", %"PRIu32", %"PRIu32", %"PRIu32").",
345                           s_pcie, bus, dev, fun, &vendor_id, &device_id,
346                           &class_code, &sub_class, &prog_if);
347
348     if (err_is_fail(err)) {
349         DEBUG_ERR(err, "skb read output\n");
350
351         PCI_DEBUG("device_init(): Could not read the SKB's output for the device\n");
352         PCI_DEBUG("device_init(): SKB returned: %s\n", skb_get_output());
353         PCI_DEBUG("device_init(): SKB error returned: %s\n",
354                   skb_get_error_output());
355         return err_push(err, PCI_ERR_DEVICE_INIT);
356     }
357     if (strncmp(s_pcie, "pcie", strlen("pcie")) == 0) {
358         pcie = true;
359     } else {
360         pcie = false;
361     }
362
363     PCI_DEBUG("device_init(): Found device at %u:%u:%u\n", *bus, *dev, *fun);
364     //get the implemented BARs for the found device
365     error_code = skb_execute_query("pci_get_implemented_BAR_addresses(%"PRIu32
366                                    ",%"PRIu32",%"PRIu32",%"PRIu32",%"PRIu32",%"
367                                    PRIu32",%"PRIu32",%"PRIu32",L),length(L,Len)"
368                                    ",writeln(L)", *bus, *dev, *fun, vendor_id,
369                                    device_id, class_code, sub_class, prog_if);
370
371     if (error_code != 0) {
372         PCI_DEBUG("pci.c: device_init(): SKB returnd error code %d\n",
373                   error_code);
374
375         PCI_DEBUG("SKB returned: %s\n", skb_get_output());
376         PCI_DEBUG("SKB error returned: %s\n", skb_get_error_output());
377
378         return PCI_ERR_DEVICE_INIT;
379     }
380
381     struct list_parser_status status;
382     skb_read_list_init(&status);
383
384     //iterate over all buselements
385     while (skb_read_list(&status, "baraddr(%d, %"PRIuPCIADDR", %"PRIuPCIADDR", "
386                          "%"PRIuPCISIZE")", &bar_nr, &bar_base, &bar_high, &bar_size)) {
387         err = alloc_device_bar(*nr_allocated_bars, *bus, *dev, *fun, bar_nr,
388                                bar_base, bar_high, bar_size);
389
390         PCI_DEBUG("device_init(): BAR %d: base = %"PRIxPCIADDR ", size = %"
391                   PRIxPCISIZE"\n", bar_nr, bar_base, bar_size);
392
393         if (err_is_fail(err)) {
394             PCI_DEBUG("device_init(): Could not allocate cap for BAR %d\n", bar_nr);
395             return err_push(err, PCI_ERR_DEVICE_INIT);
396         }
397         (*nr_allocated_bars)++;
398     }
399
400 //XXX: FIXME: HACK: BAD!!! Only needed to allocate a full I/O range cap to
401 //                         the VESA graphics driver
402     if (class_code == PCI_CLASS_DISPLAY) {
403         assert(*nr_allocated_bars < PCI_NBARS);
404         err = assign_complete_io_range(*nr_allocated_bars, *bus, *dev, *fun,
405                                        5 /*very BAAAD */);
406         (*nr_allocated_bars)++;
407     }
408 //end of badness
409
410     PCI_DEBUG("device_init(): Allocated caps for %d BARs\n", *nr_allocated_bars);
411     if (enable_irq) {
412         int irq = setup_interrupt(*bus, *dev, *fun);
413         PCI_DEBUG("pci: init_device_handler_irq: init interrupt.\n");
414         PCI_DEBUG("pci: irq = %u, core = %hhu, vector = %u\n", irq, coreid,
415                   vector);
416         struct acpi_rpc_client* cl = get_acpi_rpc_client();
417         errval_t ret_error;
418         err = cl->vtbl.enable_and_route_interrupt(cl, irq, coreid, vector,
419                                                   &ret_error);
420         assert(err_is_ok(err));
421         assert(err_is_ok(ret_error));  // FIXME
422 //        printf("IRQ for this device is %d\n", irq);
423                         //DEBUG_ERR(err, "enable_and_route_interrupt");
424         pci_enable_interrupt_for_device(*bus, *dev, *fun, pcie);
425     }
426
427     PCI_DEBUG("enable busmaster for device (%u, %u, %u)...\n", *bus, *dev, *fun);
428     enable_busmaster(*bus, *dev, *fun, pcie);
429
430     return SYS_ERR_OK;
431 }
432
433 void pci_enable_interrupt_for_device(uint32_t bus,
434                                      uint32_t dev,
435                                      uint32_t fun,
436                                      bool pcie)
437 {
438     struct pci_address addr = {
439         .bus = (uint8_t) (bus & 0xff),
440         .device = (uint8_t) (dev & 0xff),
441         .function = (uint8_t) (fun % 0xff)
442     };
443
444     pci_hdr0_t hdr;
445     pci_hdr0_initialize(&hdr, addr);
446
447     if (pcie) {
448         pcie_enable();
449     } else {
450         pcie_disable();
451     }
452
453     pci_hdr0_command_t cmd = pci_hdr0_command_rd(&hdr);
454     cmd.int_dis = 0;
455     pci_hdr0_command_wr(&hdr, cmd);
456 }
457
458 /**
459  * This function performs a recursive, depth-first search through the
460  * PCI hierarchy starting at parentaddr (this should initially be a
461  * PCI root complex), with bus number A. It enters whatever it
462  * discovers (bridges and devices) into the SKB.
463  *
464  * Refer to http://www.tldp.org/LDP/tlk/dd/pci.html for an overview of
465  * a similar discovery algorithm.
466  *
467  * Upon discovery of a bridge, it sets the bridge's primary bus number
468  * to A and assigns a secondary bus number of A + 2. The subordinate
469  * bus number is set to A + 3. This way, buses are spaced 2 apart,
470  * which is sometimes required for SR-IOV hot-plugged buses.
471  */
472 static void assign_bus_numbers(struct pci_address parentaddr,
473                                uint8_t *busnum,
474                                uint8_t maxchild,
475                                char* handle)
476 {
477     struct pci_address addr = {
478         .bus = parentaddr.bus
479     };
480
481     pcie_enable();
482
483     // First go through all bridges on this bus and disable them
484     for (addr.device = 0; addr.device < PCI_NDEVICES; addr.device++) {
485         for (addr.function = 0; addr.function < PCI_NFUNCTIONS; addr.function++) {
486             pci_hdr1_t bhdr;
487             pci_hdr1_initialize(&bhdr, addr);
488
489             uint16_t vendor = pci_hdr1_vendor_id_rd(&bhdr);
490
491             if (vendor == 0xffff) {
492                 if (addr.function == 0) {
493                     // this device doesn't exist at all
494                     break;
495                 } else {
496                     // this function doesn't exist, but there may be others
497                     continue;
498                 }
499             }
500
501             pci_hdr1_hdr_type_t hdr_type = pci_hdr1_hdr_type_rd(&bhdr);
502             if (hdr_type.fmt == pci_hdr1_pci2pci) {
503                 PCI_DEBUG("Disabling bridge (%u,%u,%u)\n", addr.bus, addr.device,
504                           addr.function);
505
506                 pci_hdr1_bcfg_t bcfg = pci_hdr1_bcfg_rd(&bhdr);
507                 bcfg.pri_bus = 0;
508                 bcfg.sec_bus = 0;
509                 bcfg.sub_bus = 0;
510                 pci_hdr1_bcfg_wr(&bhdr, bcfg);
511             }
512         }
513     }
514
515     for (addr.device = 0; addr.device < PCI_NDEVICES; addr.device++) {
516         for (addr.function = 0; addr.function < PCI_NFUNCTIONS; addr.function++) {
517             pci_hdr0_t hdr;
518             pci_hdr0_initialize(&hdr, addr);
519
520             pcie_enable();
521             uint16_t pcie_vendor = pci_hdr0_vendor_id_rd(&hdr);
522             uint16_t vendor = pcie_vendor;
523             bool pcie = true;
524             bool extended_caps = false;  // Whether to scan for PCI Express extended caps
525
526             // Disable PCIe if device exists only in PCI
527             if (pcie_vendor != 0xffff) {
528                 vendor = pcie_vendor;
529                 pcie = true;
530             } else {
531                 pcie_disable();
532                 vendor = pci_hdr0_vendor_id_rd(&hdr);
533                 pcie = false;
534             }
535
536             if (vendor == 0xffff) {
537                 if (addr.function == 0) {
538                     // this device doesn't exist at all
539                     break;
540                 } else {
541                     // this function doesn't exist, but there may be others
542                     continue;
543                 }
544             }
545             pci_hdr0_class_code_t classcode = pci_hdr0_class_code_rd(&hdr);
546             uint16_t device_id = pci_hdr0_device_id_rd(&hdr);
547
548             /* Disable all decoders for this device,
549              * they will be re-enabled as devices are setup.
550              * NB: we are using "pci_hdr1" here, but the command field is
551              * common to all configuration header types.
552              */
553             /* PCI_DEBUG("disabling decoders for (%hhu,%hhu,%hhu)\n", */
554             /*     addr.bus, addr.device, addr.function); */
555             pci_hdr0_command_t cmd = pci_hdr0_command_rd(&hdr);
556
557             cmd.mem_space = 0;
558             cmd.io_space = 0;  // XXX: not handled in setup yet
559
560             // Ticket #210
561             //XXX: This should be set to 0 and only enabled if needed
562             //     (whenever a driver attaches to a device).
563             //     For bridges the pci driver enables the bit later when
564             //     programming the bridge window
565 //            cmd.master = 0;
566
567             // Ticket 229
568             //pci_hdr0_command_wr(&hdr, cmd);
569
570             // do we have a bridge?
571             pci_hdr0_hdr_type_t hdr_type = pci_hdr0_hdr_type_rd(&hdr);
572             if (hdr_type.fmt == pci_hdr0_pci2pci) {
573                 pci_hdr1_t bhdr;
574                 pci_hdr1_initialize(&bhdr, addr);
575
576                 //ACPI_HANDLE child;
577                 char* child = NULL;
578                 errval_t error_code;
579                 PCI_DEBUG("get irq table for (%hhu,%hhu,%hhu)\n", (*busnum) + 1,
580                           addr.device, addr.function);
581                 struct acpi_rpc_client* cl = get_acpi_rpc_client();
582                 // XXX: why do we have two different types for the same thing?
583                 acpi_pci_address_t xaddr = {
584                     .bus = addr.bus,
585                     .device = addr.device,
586                     .function = addr.function,
587                 };
588                 cl->vtbl.read_irq_table(cl, handle, xaddr, (*busnum) + 1,
589                                         &error_code, &child);
590                 if (err_is_fail(error_code)) {
591                     DEBUG_ERR(error_code, "Reading IRQs failed");
592                     assert(!"Check ACPI code");
593                 }
594
595                 // Increase by 2 to leave room for SR-IOV
596                 (*busnum) += 2;
597                 //assert(*busnum <= maxchild);
598
599                 PCI_DEBUG("program busses for bridge (%hhu,%hhu,%hhu)\n"
600                           "primary: %hhu, secondary: %hhu, subordinate: %hhu\n",
601                           addr.bus, addr.device, addr.function, addr.bus, *busnum,
602                           (*busnum) + 1);
603
604                 // Disable master abort mode on the bridge
605                 pci_hdr1_brdg_ctrl_mabort_wrf(&bhdr, 0);
606
607                 // Clear all errors
608                 pci_hdr1_status_wr_raw(&bhdr, 0);
609
610                 // program bus numbers for this bridge
611                 pci_hdr1_bcfg_t bcfg = pci_hdr1_bcfg_rd(&bhdr);
612                 bcfg.pri_bus = addr.bus;
613                 bcfg.sec_bus = *busnum;
614                 bcfg.sub_bus = 0xff;
615                 pci_hdr1_bcfg_wr(&bhdr, bcfg);
616
617                 skb_add_fact("bridge(%s,addr(%u,%u,%u),%u,%u,%u,%u,%u, secondary(%hhu)).",
618                              (pcie ? "pcie" : "pci"), addr.bus, addr.device,
619                              addr.function, vendor, device_id, classcode.clss,
620                              classcode.subclss, classcode.prog_if, *busnum);
621
622                 //use the original hdr (pci_hdr0_t) here
623                 query_bars(hdr, addr, true);
624
625                 // assign bus numbers to secondary bus
626                 struct pci_address bridge_addr = {
627                     .bus = *busnum,
628                     .device = addr.device,
629                     .function = addr.function
630                 };
631                 assign_bus_numbers(bridge_addr, busnum, maxchild, child);
632                 // Restore the old state of pcie. The above call changes this
633                 // state according to the devices under this bridge
634                 if (pcie) {
635                     pcie_enable();
636                 } else {
637                     pcie_disable();
638                 }
639
640                 // Set this bridge's subordinate to the maximum of the underlying hierarchy
641                 pci_hdr1_bcfg_sub_bus_wrf(&bhdr, (*busnum) + 1);
642             }
643
644             //is this a normal PCI device?
645             if (hdr_type.fmt == pci_hdr0_nonbridge) {
646                 PCI_DEBUG("Found device (%u, %u, %u), vendor = %x, device = %x\n",
647                           addr.bus, addr.device, addr.function, vendor,
648                           device_id);
649
650                 pci_hdr0_t devhdr;
651                 pci_hdr0_initialize(&devhdr, addr);
652                 skb_add_fact("device(%s,addr(%u,%u,%u),%u,%u,%u, %u, %u, %d).",
653                              (pcie ? "pcie" : "pci"), addr.bus, addr.device,
654                              addr.function, vendor, device_id, classcode.clss,
655                              classcode.subclss, classcode.prog_if,
656                              pci_hdr0_int_pin_rd(&devhdr) - 1);
657
658                 // octopus start
659                 char* record = NULL;
660                 static char* device_fmt = "hw.pci.device. { "
661                                 "bus: %u, device: %u, function: %u, "
662                                 "vendor: %u, device_id: %u, class: %u, "
663                                 "subclass: %u, prog_if: %u }";
664
665                 errval_t err = oct_mset(SET_SEQUENTIAL, device_fmt, addr.bus,
666                                         addr.device, addr.function, vendor,
667                                         device_id, classcode.clss,
668                                         classcode.subclss, classcode.prog_if);
669
670                 assert(err_is_ok(err));
671                 free(record);
672                 // end octopus
673
674                 query_bars(devhdr, addr, false);
675
676                 // Process device capabilities if existing
677                 if (pci_hdr0_status_rd(&devhdr).caplist) {
678                     uint8_t cap_ptr = pci_hdr0_cap_ptr_rd(&devhdr);
679
680                     // Walk capabilities list
681                     while (cap_ptr != 0) {
682                         assert(cap_ptr % 4 == 0 && cap_ptr >= 0x40
683                                && cap_ptr < 0x100);
684                         uint32_t capword = pci_read_conf_header(&addr,
685                                                                 cap_ptr / 4);
686
687                         switch (capword & 0xff) {
688                             case 0x10:  // PCI Express
689                                 PCI_DEBUG("PCI Express device\n");
690                                 extended_caps = true;
691                                 break;
692
693                             default:
694                                 PCI_DEBUG("Unknown PCI device capability 0x%x at 0x%x\n",
695                                           capword & 0xff, cap_ptr);
696                                 break;
697                         }
698
699                         cap_ptr = (capword >> 8) & 0xff;
700                     }
701                 }
702
703                 // Process extended device capabilities if existing
704                 if (pcie && extended_caps && addr.bus < pcie_get_endbus()) {
705                     uint32_t *ad = (uint32_t *) pcie_confspace_access(addr);
706                     assert(ad != NULL);
707                     uint16_t cap_ptr = 0x100;
708
709                     while (cap_ptr != 0) {
710                         uint32_t capword = *(ad + (cap_ptr / 4));
711                         assert(cap_ptr % 4 == 0 && cap_ptr >= 0x100
712                                && cap_ptr < 0x1000);
713
714                         switch (capword & 0xffff) {  // Switch on capability ID
715                             case 0:
716                                 // No extended caps
717                                 break;
718
719                             case 16:
720                                 // SR-IOV capability
721                                 {
722                                 /*
723                                  * XXX: When using our e1000 driver with the
724                                  *      I350 network card (device id 0x152x),
725                                  *      the configuration fails when VF are
726                                  *      enabled: Legacy descriptors are ignored
727                                  *      when VF are enabled.
728                                  */
729                                 if (vendor == 0x8086 && (device_id & 0xFFF0) == 0x1520) {
730                                     debug_printf("skipping SR IOV initialization"
731                                                     "for e1000 card.\n");
732                                     break;
733                                 }
734                                 pci_sr_iov_cap_t sr_iov_cap;
735                                 pci_sr_iov_cap_initialize(&sr_iov_cap,
736                                      (mackerel_addr_t) (ad + (cap_ptr / 4)));
737
738                                 PCI_DEBUG("Found SR-IOV capability\n");
739
740                                 // Support version 1 for the moment
741                                 assert(pci_sr_iov_cap_hdr_ver_rdf(&sr_iov_cap) == 1);
742
743                                 // Support system page size of 4K at the moment
744                                 assert(pci_sr_iov_cap_sys_psize_rd(&sr_iov_cap)
745                                        == 1);
746
747 #if 0   // Dump cap contents
748                                 pci_sr_iov_cap_caps_pr(str, 256, &sr_iov_cap);
749                                 PCI_DEBUG("%s\n", str);
750                                 pci_sr_iov_cap_ctrl_pr(str, 256, &sr_iov_cap);
751                                 PCI_DEBUG("%s\n", str);
752                                 pci_sr_iov_cap_status_pr(str, 256, &sr_iov_cap);
753                                 PCI_DEBUG("%s\n", str);
754                                 pci_sr_iov_cap_initialvfs_pr(str, 256, &sr_iov_cap);
755                                 PCI_DEBUG("%s\n", str);
756                                 pci_sr_iov_cap_totalvfs_pr(str, 256, &sr_iov_cap);
757                                 PCI_DEBUG("%s\n", str);
758                                 pci_sr_iov_cap_numvfs_pr(str, 256, &sr_iov_cap);
759                                 PCI_DEBUG("%s\n", str);
760                                 pci_sr_iov_cap_fdl_pr(str, 256, &sr_iov_cap);
761                                 PCI_DEBUG("%s\n", str);
762                                 pci_sr_iov_cap_offset_pr(str, 256, &sr_iov_cap);
763                                 PCI_DEBUG("%s\n", str);
764                                 pci_sr_iov_cap_stride_pr(str, 256, &sr_iov_cap);
765                                 PCI_DEBUG("%s\n", str);
766                                 pci_sr_iov_cap_devid_pr(str, 256, &sr_iov_cap);
767                                 PCI_DEBUG("%s\n", str);
768                                 pci_sr_iov_cap_sup_psize_pr(str, 256, &sr_iov_cap);
769                                 PCI_DEBUG("%s\n", str);
770                                 pci_sr_iov_cap_sys_psize_pr(str, 256, &sr_iov_cap);
771                                 PCI_DEBUG("%s\n", str);
772 #endif
773
774                                 if (max_numvfs > 0) {
775                                     // Set maximum number of VFs
776                                     uint16_t totalvfs = pci_sr_iov_cap_totalvfs_rd( &sr_iov_cap);
777                                     uint16_t numvfs = MIN(totalvfs, max_numvfs);
778                                     //                  uint16_t numvfs = 8;
779                                     PCI_DEBUG("Maximum supported VFs: %u. Enabling: %u\n",
780                                               totalvfs, numvfs);
781                                     pci_sr_iov_cap_numvfs_wr(&sr_iov_cap, numvfs);
782
783                                     uint16_t offset = pci_sr_iov_cap_offset_rd(&sr_iov_cap);
784                                     uint16_t stride = pci_sr_iov_cap_stride_rd(&sr_iov_cap);
785                                     uint16_t vf_devid = pci_sr_iov_cap_devid_rd(&sr_iov_cap);
786
787                                     PCI_DEBUG("VF offset is 0x%x, stride is 0x%x, "
788                                               "device ID is 0x%x\n",
789                                               offset, stride, vf_devid);
790
791 #if 0
792                                     // Make sure we enable the PF
793                                     cmd = pci_hdr0_command_rd(&hdr);
794                                     cmd.mem_space = 1;
795                                     cmd.io_space = 1;
796                                     /* cmd.master = 1; */
797                                     pci_hdr0_command_wr(&hdr, cmd);
798 #endif
799
800                                     // Start VFs (including memory spaces)
801                                     pci_sr_iov_cap_ctrl_vf_mse_wrf(&sr_iov_cap, 1);
802                                     pci_sr_iov_cap_ctrl_vf_enable_wrf(&sr_iov_cap, 1);
803
804                                     // Spec says to wait here for at least 100ms
805                                     err = barrelfish_usleep(100000);
806                                     assert(err_is_ok(err));
807
808                                     // Add all VFs
809                                     for (int vfn = 0; vfn < numvfs; vfn++) {
810                                         uint8_t busnr = addr.bus + ((((addr.device << 3)
811                                                                  + addr.function)
812                                                                  + offset
813                                                                  + stride * vfn)
814                                                                  >> 8);
815                                         uint8_t devfn = (((addr.device << 3)
816                                                         + addr.function)
817                                                         + offset
818                                                         + stride * vfn)
819                                                         & 0xff;
820                                         struct pci_address vf_addr = {
821                                             .bus = busnr,
822                                             .device = devfn >> 3,
823                                             .function = devfn & 7,
824                                         };
825
826                                         PCI_DEBUG("Adding VF (%u, %u, %u)\n",
827                                                   vf_addr.bus, vf_addr.device,
828                                                   vf_addr.function);
829
830                                         skb_add_fact("device(%s,addr(%u,%u,%u),%u,%u,%u, %u, %u, %d).",
831                                                      (pcie ? "pcie" : "pci"),
832                                                      vf_addr.bus,
833                                                      vf_addr.device,
834                                                      vf_addr.function, vendor,
835                                                      vf_devid, classcode.clss,
836                                                      classcode.subclss,
837                                                      classcode.prog_if, 0);
838
839                                         // octopus start
840                                         device_fmt ="hw.pci.device. { "
841                                                      "bus: %u, device: %u, function: %u, "
842                                                      "vendor: %u, device_id: %u, class: %u, "
843                                                      "subclass: %u, prog_if: %u }";
844                                         err = oct_mset(SET_SEQUENTIAL,
845                                                        device_fmt,
846                                                        vf_addr.bus,
847                                                        vf_addr.device,
848                                                        vf_addr.function,
849                                                        vendor, vf_devid,
850                                                        classcode.clss,
851                                                        classcode.subclss,
852                                                        classcode.prog_if);
853
854                                         assert(err_is_ok(err));
855                                         // end octopus
856
857                                         // We probe the BARs several times. Strictly
858                                         // speaking, this is not necessary, as we
859                                         // can calculate all offsets, but we're
860                                         // lazy...
861                                         pci_hdr0_bar32_t bar, barorigaddr;
862                                         for (int i = 0; i < pci_sr_iov_cap_vf_bar_length; i++) {
863                                             union pci_hdr0_bar32_un orig_value;
864                                             orig_value.raw = pci_sr_iov_cap_vf_bar_rd(&sr_iov_cap, i);
865                                             barorigaddr = orig_value.val;
866
867                                             // probe BAR to see if it is implemented
868                                             pci_sr_iov_cap_vf_bar_wr(&sr_iov_cap, i, BAR_PROBE);
869
870                                             union pci_hdr0_bar32_un bar_value;
871                                             bar_value.raw = pci_sr_iov_cap_vf_bar_rd(&sr_iov_cap, i);
872                                             bar = (union pci_hdr0_bar32_un ) {
873                                                         .raw =bar_value.raw
874                                                    }.val;
875
876                                             //write original value back to the BAR
877                                             pci_sr_iov_cap_vf_bar_wr(&sr_iov_cap,
878                                                                  i, orig_value.raw);
879
880                                             /*
881                                              * We need to check the entire register
882                                              * here to make sure the bar is not
883                                              * implemented as it could lie in the
884                                              * high 64 bit range...
885                                              */
886                                             if (bar_value.raw == 0) {
887                                                 // BAR not implemented
888                                                 continue;
889                                             }
890
891                                             // SR-IOV doesn't support IO space BARs
892                                             assert(bar.space == 0);
893                                             int type = -1;
894                                             if (bar.tpe == pci_hdr0_bar_32bit) {
895                                                 type = 32;
896                                             }
897                                             if (bar.tpe == pci_hdr0_bar_64bit) {
898                                                 type = 64;
899                                             }
900
901                                             if (bar.tpe == pci_hdr0_bar_64bit) {
902                                                 //read the upper 32bits of the address
903                                                 pci_hdr0_bar32_t bar_high, barorigaddr_high;
904                                                 union pci_hdr0_bar32_un orig_value_high;
905                                                 orig_value_high.raw = pci_sr_iov_cap_vf_bar_rd(&sr_iov_cap, i + 1);
906                                                 barorigaddr_high = orig_value_high.val;
907
908                                                 // probe BAR to determine the mapping size
909                                                 pci_sr_iov_cap_vf_bar_wr(&sr_iov_cap, i + 1, BAR_PROBE);
910
911                                                 bar_high = (union pci_hdr0_bar32_un ) {
912                                                               .raw =pci_sr_iov_cap_vf_bar_rd(&sr_iov_cap,i + 1)
913                                                             }.val;
914
915                                                 //write original value back to the BAR
916                                                 pci_sr_iov_cap_vf_bar_wr(&sr_iov_cap,
917                                                           i + 1, orig_value_high.raw);
918
919                                                 pciaddr_t base64 = bar_high.base;
920                                                 base64 <<= 32;
921                                                 base64 |= bar.base << 7;
922
923                                                 pciaddr_t origbase64 = barorigaddr_high.base;
924                                                 origbase64 <<= 32;
925                                                 origbase64 |= barorigaddr.base << 7;
926
927                                                 PCI_DEBUG("(%u,%u,%u): 64bit BAR %d at 0x%"
928                                                           PRIxPCIADDR ", size %" PRIx64 ", %s\n",
929                                                           vf_addr.bus, vf_addr.device,
930                                                           vf_addr.function, i,
931                                                           (origbase64 << 7) + bar_mapping_size64(base64) * vfn,
932                                                           bar_mapping_size64(base64),
933                                                           (bar.prefetch == 1 ? "prefetchable" : "nonprefetchable"));
934
935                                                 skb_add_fact("bar(addr(%u, %u, %u), %d, 16'%"
936                                                              PRIxPCIADDR", ""16'%" PRIx64 ", vf, %s, %d).",
937                                                              vf_addr.bus, vf_addr.device, vf_addr.function, i,
938                                                              (origbase64 << 7) + bar_mapping_size64(base64) * vfn,
939                                                              bar_mapping_size64(base64),
940                                                              (bar.prefetch == 1 ? "prefetchable" : "nonprefetchable"),
941                                                              type);
942
943                                                 i++;  //step one forward, because it is a 64bit BAR
944                                             } else {
945                                                 PCI_DEBUG("(%u,%u,%u): 32bit BAR %d at 0x%" PRIx32 ", size %x, %s\n",
946                                                           vf_addr.bus, vf_addr.device, vf_addr.function, i,
947                                                           (barorigaddr.base << 7) + bar_mapping_size(bar) * vfn,
948                                                           bar_mapping_size(bar),
949                                                           (bar.prefetch == 1 ? "prefetchable" : "nonprefetchable"));
950
951                                                 //32bit BAR
952                                                 skb_add_fact("bar(addr(%u, %u, %u), %d, 16'%"PRIx32", 16'%"
953                                                              PRIx32 ", vf, %s, %d).", vf_addr.bus,
954                                                              vf_addr.device, vf_addr.function, i,
955                                                              (uint32_t) ((barorigaddr.base << 7)
956                                                                          + bar_mapping_size( bar) * vfn),
957                                                              (uint32_t) bar_mapping_size(bar),
958                                                              (bar.prefetch == 1 ? "prefetchable" : "nonprefetchable"),
959                                                              type);
960                                             }
961                                         }
962                                     }
963                                 }
964                             }
965                                 break;
966
967                             default:
968                                 PCI_DEBUG("Unknown extended PCI device capability 0x%x at 0x%x\n",
969                                           capword & 0xffff, cap_ptr);
970                                 break;
971                         }
972
973                         cap_ptr = capword >> 20;
974                     }
975                 }
976             }
977
978             // is this a multi-function device?
979             if (addr.function == 0 && !hdr_type.multi) {
980                 break;
981             }
982         }
983     }
984
985     free(handle);
986 }
987
988 #if 0
989 static void get_bridges(struct pci_address myad)
990 {
991     struct pci_address addr = {.bus = myad.bus};
992
993     pcie_enable();
994
995     // First go through all bridges on this bus and disable them
996     for (addr.device = 0; addr.device < PCI_NDEVICES; addr.device++) {
997         for (addr.function = 0; addr.function < PCI_NFUNCTIONS; addr.function++) {
998             pci_hdr1_t bhdr;
999             pci_hdr1_initialize(&bhdr, addr);
1000
1001             uint16_t vendor = pci_hdr1_vendor_id_rd(&bhdr);
1002
1003             if (vendor == 0xffff) {
1004                 if (addr.function == 0) {
1005                     // this device doesn't exist at all
1006                     break;
1007                 } else {
1008                     // this function doesn't exist, but there may be others
1009                     continue;
1010                 }
1011             }
1012
1013             pci_hdr1_hdr_type_t hdr_type = pci_hdr1_hdr_type_rd(&bhdr);
1014             if (hdr_type.fmt == pci_hdr1_pci2pci) {
1015                 pci_hdr1_bcfg_t bcfg = pci_hdr1_bcfg_rd(&bhdr);
1016
1017                 PCI_DEBUG("Found bridge (%u,%u,%u), primary %u, secondary %u, subordinate %u\n",
1018                                 addr.bus, addr.device, addr.function,
1019                                 bcfg.pri_bus, bcfg.sec_bus, bcfg.sub_bus);
1020
1021                 struct pci_address bridge_addr= {
1022                     .bus = bcfg.sec_bus, .device = addr.device,
1023                     .function = addr.function
1024                 };
1025
1026                 get_bridges(bridge_addr);
1027             }
1028         }
1029     }
1030 }
1031 #endif
1032
1033 void pci_add_root(struct pci_address addr,
1034                   uint8_t maxchild,
1035                   char* handle)
1036 {
1037     uint8_t busnum = addr.bus;
1038     /* get_bridges(addr); */
1039     assign_bus_numbers(addr, &busnum, maxchild, handle);
1040     /* get_bridges(addr); */
1041 }
1042
1043 errval_t pci_setup_root_complex(void)
1044 {
1045     errval_t err;
1046     char* record = NULL;
1047     char** names = NULL;
1048     size_t len = 0;
1049
1050     // TODO: react to new rootbridges
1051     err = oct_get_names(&names, &len, "r'hw.pci.rootbridge.[0-9]+' "
1052                         "{ acpi_node: _, bus: _, device: _, function: _, maxbus: _ }");
1053     if (err_is_fail(err)) {
1054         DEBUG_ERR(err, "get names");
1055         goto out;
1056     }
1057
1058     for (size_t i = 0; i < len; i++) {
1059         err = oct_get(&record, names[i]);
1060         if (err_is_fail(err)) {
1061             goto out;
1062         }
1063
1064         PCI_DEBUG("found new root complex: %s\n", record);
1065
1066         char* acpi_node = NULL;  // freed in pci_add_root
1067         int64_t bus, device, function, maxbus;
1068         static char* format =
1069                         "_ { acpi_node: %s, bus: %d, device: %d, function: %d, maxbus: %d }";
1070         err = oct_read(record, format, &acpi_node, &bus, &device, &function,
1071                        &maxbus);
1072         if (err_is_fail(err)) {
1073             free(acpi_node);
1074             free(record);
1075             goto out;
1076         }
1077
1078         struct pci_address addr;
1079         addr.bus = (uint8_t) bus;
1080         addr.device = (uint8_t) device;
1081         addr.function = (uint8_t) function;
1082
1083         pcie_enable();
1084         pci_add_root(addr, maxbus, acpi_node);
1085         pcie_disable();
1086
1087         free(record);
1088     }
1089
1090     out: oct_free_names(names, len);
1091     return err;
1092 }
1093
1094 //query all BARs. That means, get the original address, the mapping size
1095 //and all attributes.
1096
1097 // XXX: asq: We are using this function to program also the _two_ BARs
1098 //           of a PCI-to-PCI bridge. They are at the same offset within the
1099 //           PCI header like for any PCI device. PCI HDR0 is misused
1100 //           here for the bridges.
1101
1102 static void query_bars(pci_hdr0_t devhdr,
1103                        struct pci_address addr,
1104                        bool pci2pci_bridge)
1105 {
1106     pci_hdr0_bar32_t bar, barorigaddr;
1107
1108     int maxbars = pci2pci_bridge ? 1 : pci_hdr0_bars_length;
1109     for (int i = 0; i <= maxbars; i++) {
1110         union pci_hdr0_bar32_un orig_value;
1111         orig_value.raw = pci_hdr0_bars_rd(&devhdr, i);
1112         barorigaddr = orig_value.val;
1113
1114         // probe BAR to determine the mapping size
1115         pci_hdr0_bars_wr(&devhdr, i, BAR_PROBE);
1116
1117         uint32_t bar_value = pci_hdr0_bars_rd(&devhdr, i);
1118
1119         bar = (union pci_hdr0_bar32_un ) { .raw = bar_value }.val;
1120
1121         //write original value back to the BAR
1122         pci_hdr0_bars_wr(&devhdr, i, orig_value.raw);
1123
1124         /*
1125          * Cannot just compare the base value, with addresses over 4G there
1126          * will be a problem. Thus we need to check if the entire register is
1127          * zero. If it is a 32bit register, then the address part will be filled.
1128          * If it is a 64bit register, the type will contain a nonzero value.
1129          * - 2014-05-02, RA
1130          */
1131         if (bar_value == 0) {
1132             // BAR not implemented
1133             continue;
1134         }
1135
1136         if (bar.space == 0) {  // memory mapped
1137             //bar(addr(bus, device, function), barnr, orig address, size, space,
1138             //         prefetchable?, 64bit?).
1139             //where space = mem | io, prefetchable= prefetchable | nonprefetchable,
1140             //64bit = 64bit | 32bit.
1141
1142             int type = -1;
1143             if (bar.tpe == pci_hdr0_bar_32bit) {
1144                 type = 32;
1145             }
1146             if (bar.tpe == pci_hdr0_bar_64bit) {
1147                 type = 64;
1148             }
1149
1150             if (bar.tpe == pci_hdr0_bar_64bit) {
1151                 //we must take the next BAR into account and do the same
1152                 //tests like in the 32bit case, but this time with the combined
1153                 //value from the current and the next BAR, since a 64bit BAR
1154                 //is constructed out of two consequtive 32bit BARs
1155
1156                 //read the upper 32bits of the address
1157                 uint32_t orig_value_high = pci_hdr0_bars_rd(&devhdr, i + 1);
1158
1159                 // probe BAR to determine the mapping size
1160                 pci_hdr0_bars_wr(&devhdr, i + 1, BAR_PROBE);
1161
1162                 // read the size information of the bar
1163                 uint32_t bar_value_high = pci_hdr0_bars_rd(&devhdr, i + 1);
1164
1165                 //write original value back to the BAR
1166                 pci_hdr0_bars_wr(&devhdr, i + 1, orig_value_high);
1167
1168                 pciaddr_t base64 = 0, origbase64 = 0;
1169                 base64 = bar_value_high;
1170                 base64 <<= 32;
1171                 base64 |= (uint32_t) (bar.base << 7);
1172
1173                 origbase64 = orig_value_high;
1174                 origbase64 <<= 32;
1175                 origbase64 |= (uint32_t) (barorigaddr.base << 7);
1176
1177                 PCI_DEBUG("(%u,%u,%u): 64bit BAR %d at 0x%" PRIxPCIADDR ", size %"
1178                           PRIx64 ", %s\n", addr.bus, addr.device, addr.function,
1179                           i, origbase64, bar_mapping_size64(base64),
1180                           (bar.prefetch == 1 ? "prefetchable" : "nonprefetchable"));
1181
1182                 skb_add_fact("bar(addr(%u, %u, %u), %d, 16'%"PRIxPCIADDR", "
1183                              "16'%" PRIx64 ", mem, %s, %d).", addr.bus,
1184                              addr.device, addr.function, i, origbase64,
1185                              bar_mapping_size64(base64),
1186                              (bar.prefetch == 1 ? "prefetchable" : "nonprefetchable"),
1187                              type);
1188
1189                 i++;  //step one forward, because it is a 64bit BAR
1190             } else {
1191                 //32bit BAR
1192                 skb_add_fact("bar(addr(%u, %u, %u), %d, 16'%"PRIx32", 16'%" PRIx32
1193                              ", mem, %s, %d).", addr.bus, addr.device, addr.function,
1194                              i, (uint32_t) (barorigaddr.base << 7),
1195                              (uint32_t) bar_mapping_size(bar),
1196                              (bar.prefetch == 1 ? "prefetchable" : "nonprefetchable"),
1197                              type);
1198             }
1199         } else {
1200             //bar(addr(bus, device, function), barnr, orig address, size, space).
1201             //where space = mem | io
1202             skb_add_fact("bar(addr(%u, %u, %u), %d, 16'%"PRIx32", 16'%" PRIx32 ", io, "
1203                          "nonprefetchable, 32).", addr.bus, addr.device, addr.function, i,
1204                          (uint32_t) (barorigaddr.base << 7), (uint32_t) bar_mapping_size(bar));
1205         }
1206     }
1207 }
1208
1209 static void program_bridge_window(uint8_t bus,
1210                                   uint8_t dev,
1211                                   uint8_t fun,
1212                                   pciaddr_t base,
1213                                   pciaddr_t high,
1214                                   bool pcie,
1215                                   bool mem,
1216                                   bool pref)
1217 {
1218     struct pci_address addr;
1219     pci_hdr1_prefbl_t pref_reg;
1220     pci_hdr1_command_t cmd;
1221
1222     if (pcie) {
1223         pcie_enable();
1224     } else {
1225         pcie_disable();
1226     }
1227
1228     assert((base & 0x000fffff) == 0);
1229     assert((high & 0x000fffff) == 0x000fffff);
1230
1231     addr.bus = bus;
1232     addr.device = dev;
1233     addr.function = fun;
1234
1235     pci_hdr1_t bridgehdr;
1236     pci_hdr1_initialize(&bridgehdr, addr);
1237
1238     cmd = pci_hdr1_command_rd(&bridgehdr);
1239
1240     if (mem) {
1241         if (pref) {
1242             pci_hdr1_pref_base_upper_wr(&bridgehdr, base >> 32);
1243             pci_hdr1_pref_limit_upper_wr(&bridgehdr, high >> 32);
1244             /*
1245              * The least significant nibble of this register value (1h)
1246              * indicates that a 64 bit address decoder is supported and
1247              * that the Upper Base/Limit Registers are also used.
1248              */
1249             if ((base >> 32)) {
1250                 pref_reg.tpe = pci_hdr1_mem_64bit;
1251             } else {
1252                 pref_reg.tpe = pci_hdr1_mem_32bit;
1253             }
1254             pref_reg.val = base >> 20;
1255             pci_hdr1_pref_base_wr(&bridgehdr, pref_reg);
1256             if ((high >> 32)) {
1257                 pref_reg.tpe = pci_hdr1_mem_64bit;
1258             } else {
1259                 pref_reg.tpe = pci_hdr1_mem_32bit;
1260             }
1261             pref_reg.val = high >> 20;
1262             pci_hdr1_pref_limit_wr(&bridgehdr, pref_reg);
1263         } else {
1264             assert((base & 0xffffffff00000000) == 0);
1265             assert((high & 0xffffffff00000000) == 0);
1266             pci_hdr1_membl_t membl = {
1267                 .base = base >> 16,
1268                 .limit = high >> 16,
1269             };
1270             pci_hdr1_membl_wr(&bridgehdr, membl);
1271             /* pci_hdr1_mem_base_wr(&bridgehdr, base >> 16); */
1272             /* pci_hdr1_mem_limit_wr(&bridgehdr, high >> 16); */
1273         }
1274         // enable the memory decoder
1275         cmd.mem_space = 1;
1276     } else {
1277         // I/O
1278     }
1279
1280     cmd.int_dis = 0;
1281     cmd.master = 1;
1282     pci_hdr1_command_wr(&bridgehdr, cmd);
1283 }
1284
1285 static void program_device_bar(uint8_t bus,
1286                                uint8_t dev,
1287                                uint8_t fun,
1288                                int bar,
1289                                pciaddr_t base,
1290                                pcisize_t size,
1291                                int bits,
1292                                bool memspace,
1293                                bool pcie)
1294 {
1295     struct pci_address addr;
1296     addr.bus = bus;
1297     addr.device = dev;
1298     addr.function = fun;
1299
1300     if (pcie) {
1301         pcie_enable();
1302     } else {
1303         pcie_disable();
1304     }
1305
1306     pci_hdr0_t devhdr;
1307     pci_hdr0_initialize(&devhdr, addr);
1308
1309     //disable the address decoder for programming the BARs
1310     pci_hdr0_command_t cmd = pci_hdr0_command_rd(&devhdr);
1311     if (memspace) {
1312         cmd.mem_space = 0;
1313     } else {
1314         cmd.io_space = 0;
1315     }
1316     //disbale interrupts here. enable them as soon as a driver requests
1317     //interrupts
1318     cmd.int_dis = 1;
1319     pci_hdr0_command_wr(&devhdr, cmd);
1320
1321     if (bits == 64) {
1322         pci_hdr0_bars_wr(&devhdr, bar, base & 0xffffffff);
1323         pci_hdr0_bars_wr(&devhdr, bar + 1, base >> 32);
1324     } else {  // 32-bit
1325         assert(base + size <= 0xffffffff);  // 32-bit BAR
1326         pci_hdr0_bars_wr(&devhdr, bar, base);
1327     }
1328
1329     //re-enable the decoder for the BARs
1330     if (memspace) {
1331         cmd.mem_space = 1;
1332     } else {
1333         cmd.io_space = 1;
1334     }
1335     pci_hdr0_command_wr(&devhdr, cmd);
1336 }
1337
1338 static void enable_busmaster(uint8_t bus,
1339                              uint8_t dev,
1340                              uint8_t fun,
1341                              bool pcie)
1342 {
1343     struct pci_address addr;
1344     addr.bus = bus;
1345     addr.device = dev;
1346     addr.function = fun;
1347
1348     if (pcie) {
1349         pcie_enable();
1350     } else {
1351         pcie_disable();
1352     }
1353
1354     pci_hdr0_t devhdr;
1355     pci_hdr0_initialize(&devhdr, addr);
1356
1357     //enable bus master
1358     pci_hdr0_command_t cmd = pci_hdr0_command_rd(&devhdr);
1359     cmd.master = 1;
1360     pci_hdr0_command_wr(&devhdr, cmd);
1361 }
1362
1363 void pci_program_bridges(void)
1364 {
1365     char element_type[7];  // "device" | "bridge"
1366     char bar_secondary[16];  //[0-6] | secondary(<0-255>)
1367     char space[4];  // "mem" | "io"
1368     char prefetch[16];  // "prefetchable" | "nonprefetchable"
1369     char pcie_pci[5];  // "pcie" | "pci"
1370     int bar;  // the value of bar_secondary after parsing secondary(<nr>) to <nr>
1371     uint8_t bus, dev, fun;
1372     pciaddr_t base, high;
1373     pcisize_t size;
1374     int bits;
1375     bool mem, pcie, pref;
1376     char *output = NULL;
1377     int output_length = 0;
1378     int error_code = 0;
1379
1380     /*
1381      output = NULL;
1382      output_length = 0;
1383      skb_execute("listing.");
1384      output = skb_get_output();
1385      assert(output != NULL);
1386      output_length = strlen(output);
1387      PCI_DEBUG("pci_program_bridges: output = %s\n", output);
1388      PCI_DEBUG("pci_program_bridges: output length = %d\n", output_length);
1389
1390      error_code = skb_read_error_code();
1391      if (error_code != 0) {
1392      printf("pci.c: pci_program_bridges(): SKB returnd error code %d\n",
1393      error_code);
1394
1395      const char *errout = skb_get_error_output();
1396      printf("\nSKB error returned: %s\n", errout);
1397      printf("\nSKB output: %s\n", output);
1398      // XXX: no device can be used...
1399      return;
1400      }
1401      */
1402
1403     output = NULL;
1404     output_length = 0;
1405     char bridge_program[512];
1406     snprintf(bridge_program, 512, "[%s], bridge_programming(P, Nr),"
1407              "flatten(P, F),replace_current_BAR_values(F),"
1408              "write(nrelements(Nr)),writeln(P).",
1409              skb_bridge_program);
1410     skb_execute(bridge_program);
1411     output = skb_get_output();
1412     assert(output != NULL);
1413     output_length = strlen(output);
1414     PCI_DEBUG("pci_program_bridges: output = %s\n", output);
1415     PCI_DEBUG("pci_program_bridges: output length = %d\n", output_length);
1416
1417     error_code = skb_read_error_code();
1418     if (error_code != 0) {
1419         printf("pci.c: pci_program_bridges(): SKB returned error code %d\n",
1420                error_code);
1421
1422         const char *errout = skb_get_error_output();
1423         printf("SKB error returned: %s\n", errout);
1424         printf("SKB output: %s\n", output);
1425         // XXX: no device can be used...
1426         printf("WARNING: CONTINUING, HOWEVER PCI DEVICES WILL BE UNUSABLE\n");
1427         // except IO-space devices which aren't yet affected by bridge programming
1428         return;
1429     }
1430
1431     /*
1432      ********************************************************************************
1433      //for the ASPLOS11 paper:
1434      skb_execute("[bridge_page].");
1435      while (skb_read_error_code() == SKB_PROCESSING) messages_wait_and_handle_next();
1436      char *output = skb_get_output();
1437      assert(output != NULL);
1438      int output_length = strlen(output);
1439      PCI_DEBUG("pci_program_bridges: output = %s\n", output);
1440      PCI_DEBUG("pci_program_bridges: output length = %d\n", output_length);
1441
1442      int error_code = skb_read_error_code();
1443      if (error_code != 0) {
1444      printf("pci.c: pci_program_bridges() <2>: SKB returnd error code %d\n",
1445      error_code);
1446
1447      const char *errout = skb_get_error_output();
1448      printf("\nSKB error returned <2>: %s\n", errout);
1449      printf("\nSKB output <2>: %s\n", output);
1450      // XXX: no device can be used...
1451      return;
1452      }
1453      uint64_t start =rdtsc();
1454      //    uint64_t start =rdtscp();
1455      skb_execute("bridge_programming(P, Nr),write(nrelements(Nr)),writeln(P).");
1456      uint64_t end =rdtsc();
1457      //    uint64_t end =rdtscp();
1458      assert(end >= start);
1459
1460      printf("\n\nTicks: %lu\n\n", end - start);
1461      while (skb_read_error_code() == SKB_PROCESSING) messages_wait_and_handle_next();
1462      output = skb_get_output();
1463      assert(output != NULL);
1464      output_length = strlen(output);
1465      printf("pci_program_bridges: output = %s\n", output);
1466      PCI_DEBUG("pci_program_bridges: output length = %d\n", output_length);
1467
1468      error_code = skb_read_error_code();
1469      if (error_code != 0) {
1470      printf("pci.c: pci_program_bridges() <3>: SKB returnd error code %d\n",
1471      error_code);
1472
1473      const char *errout = skb_get_error_output();
1474      printf("\nSKB error returned <3>: %s\n", errout);
1475      printf("\nSKB output <3>: %s\n", output);
1476      // XXX: no device can be used...
1477      return;
1478      }
1479      ********************************************************************************
1480      */
1481
1482     //get the number of buselements from the output
1483     int nr_elements;
1484     int nr_conversions;
1485     nr_conversions = sscanf(output, "nrelements(%d)", &nr_elements);
1486     if (nr_conversions != 1) {
1487         printf("pci.c: No valid pci plan returned by the SKB\n.");
1488         //XXX: no device can be used
1489         return;
1490     }
1491
1492     //keep a pointer to the current location within the output
1493     char *conv_ptr = output;
1494
1495     //iterate over all buselements
1496     for (int i = 0; i < nr_elements; i++) {
1497         // search the beginning of the next buselement
1498         while ((conv_ptr < output + output_length) && (strncmp(
1499                         conv_ptr, "buselement", strlen("buselement"))
1500                                                        != 0)) {
1501             conv_ptr++;
1502         }
1503         //convert the string to single elements and numbers
1504         nr_conversions = sscanf(conv_ptr, "buselement(%[a-z], addr(%hhu, %hhu, %hhu), "
1505                                 "%[a-z0-9()], %"PRIuPCIADDR", %"PRIuPCIADDR", "
1506                                 "%"PRIuPCISIZE", %[a-z], %[a-z], %[a-z], %d",
1507                                 element_type, &bus, &dev, &fun, bar_secondary,
1508                                 &base, &high, &size, space, prefetch, pcie_pci,
1509                                 &bits);
1510         conv_ptr++;
1511         if (nr_conversions != 12) {
1512             printf("Could not parse output for device or bridge number %d\n"
1513                    "nr conversions: %d\n",
1514                    i, nr_conversions);
1515             continue;
1516         }
1517         if (strncmp(space, "mem", strlen("mem")) == 0) {
1518             mem = true;
1519         } else {
1520             mem = false;
1521         }
1522         if (strncmp(pcie_pci, "pcie", strlen("pcie")) == 0) {
1523             pcie = true;
1524         } else {
1525             pcie = false;
1526         }
1527         if (strncmp(prefetch, "prefetchable", strlen("prefetchable")) == 0) {
1528             pref = true;
1529         } else {
1530             pref = false;
1531         }
1532
1533         // Skip virtual functions
1534         if (strncmp(space, "vf", strlen("vf")) == 0) {
1535             /* PCI_DEBUG("Skipping VF addr(%hhu, %hhu, %hhu)\n", */
1536             /*      bus, dev, fun); */
1537             continue;
1538         }
1539
1540         if (strncmp(element_type, "device", strlen("device")) == 0) {
1541             nr_conversions = sscanf(bar_secondary, "%d", &bar);
1542             if (nr_conversions != 1) {
1543                 printf("Could not determine BAR number while programming BAR\n");
1544                 continue;
1545             }
1546             PCI_DEBUG("programming %s addr(%hhu, %hhu, %hhu), BAR %d, with base = "
1547                       "%"PRIxPCIADDR", high = %"PRIxPCIADDR", size = %"PRIxPCISIZE
1548                       " in" "space = %s, prefetch = %s, %s...\n",
1549                       element_type, bus, dev, fun, bar, base, high, size, space,
1550                       prefetch, pcie ? "PCIe" : "PCI");
1551             program_device_bar(bus, dev, fun, bar, base, size, bits, mem, pcie);
1552
1553         } else {
1554             PCI_DEBUG("programming %s addr(%hhu, %hhu, %hhu), with base = "
1555                       "%"PRIxPCIADDR", high = %"PRIxPCIADDR", size = %"PRIxPCISIZE
1556                       " in space = %s, prefetch = %s...\n",
1557                       element_type, bus, dev, fun, base, high, size, space,
1558                       prefetch);
1559             //a bridge expects the high address excluding the last byte which
1560             //is the base for the next bridge => decrement by one
1561             high--;
1562             program_bridge_window(bus, dev, fun, base, high, pcie, mem, pref);
1563         }
1564     }
1565 }
1566
1567 static uint32_t setup_interrupt(uint32_t bus,
1568                                 uint32_t dev,
1569                                 uint32_t fun)
1570 {
1571     char str[256], ldev[128];
1572
1573     snprintf(str, 256, "[\"irq_routing.pl\"], assigndeviceirq(addr(%"PRIu32
1574                        ", %"PRIu32", %"PRIu32")).",
1575                        bus, dev, fun);
1576     char *output, *error_out;
1577     int32_t int_err;
1578     errval_t err = skb_evaluate(str, &output, &error_out, &int_err);
1579     assert(output != NULL);
1580     assert(err_is_ok(err));
1581
1582     uint8_t irq;
1583     sscanf(output, "%s %hhu", ldev, &irq);
1584
1585     // It's a GSI
1586     if (strcmp(ldev, "fixedGsi") == 0) {
1587         printf("Got GSI %u\n", irq);
1588         return irq;
1589     }
1590
1591     struct acpi_rpc_client* cl = get_acpi_rpc_client();
1592     errval_t error_code;
1593     err = cl->vtbl.set_device_irq(cl, ldev, irq, &error_code);
1594     assert(err_is_ok(err));
1595     if (err_is_fail(error_code)) {
1596         //DEBUG_ERR(error_code, "set device irq failed.");
1597         return 0;
1598     }
1599
1600     return irq;
1601 }