mlx4: import of Vlad's Mellanox ConnectX InfiniBand driver
authorAdam Turowski <adam.turowski@inf.ethz.ch>
Mon, 4 Sep 2017 08:07:33 +0000 (10:07 +0200)
committerAdam Turowski <adam.turowski@inf.ethz.ch>
Tue, 3 Oct 2017 15:05:17 +0000 (17:05 +0200)
Signed-off-by: Adam Turowski <adam.turowski@inf.ethz.ch>

278 files changed:
include/arch/x86_64/machine/endian.h
include/devif/backends/net/mlx4_devif.h [new file with mode: 0644]
include/net/ethernet.h [new file with mode: 0644]
include/net/if.h [new file with mode: 0644]
include/net/if_arp.h [new file with mode: 0644]
include/net/if_types.h [new file with mode: 0644]
include/net/if_vlan_var.h [new file with mode: 0644]
include/netinet/if_ether.h [new file with mode: 0644]
include/netinet/ip6.h [new file with mode: 0644]
include/netinet/tcp_lro.h [new file with mode: 0644]
include/sys/tree.h [new file with mode: 0644]
lib/barrelfish/threads.c
lib/devif/backends/net/e1000/e1000.c
lib/devif/backends/net/e1000/e1000.h
lib/devif/backends/net/mlx4/Hakefile [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/Kconfig [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/Makefile [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/core/Makefile [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/core/addr.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/core/agent.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/core/agent.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/core/cache.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/core/cm.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/core/cm_msgs.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/core/cma.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/core/core_priv.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/core/device.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/core/fmr_pool.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/core/iwcm.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/core/iwcm.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/core/local_sa.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/core/mad.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/core/mad_priv.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/core/mad_rmpp.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/core/mad_rmpp.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/core/multicast.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/core/notice.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/core/packer.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/core/sa.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/core/sa_query.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/core/smi.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/core/smi.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/core/sysfs.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/core/ucm.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/core/ucma.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/core/ud_header.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/core/umem.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/core/user_mad.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/core/uverbs.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/core/uverbs_cmd.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/core/uverbs_main.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/core/uverbs_marshall.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/core/verbs.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/hw/mlx4/Kconfig [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/hw/mlx4/Makefile [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/hw/mlx4/ah.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/hw/mlx4/alias_GUID.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/hw/mlx4/cm.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/hw/mlx4/cq.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/hw/mlx4/doorbell.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/hw/mlx4/mad.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/hw/mlx4/main.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/hw/mlx4/mcg.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/hw/mlx4/mlx4_ib.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/hw/mlx4/mr.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/hw/mlx4/qp.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/hw/mlx4/srq.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/hw/mlx4/sysfs.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/hw/mlx4/user.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/hw/mlx4/wc.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/hw/mlx4/wc.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/hw/mthca/Kconfig [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/hw/mthca/Makefile [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/hw/mthca/mthca_allocator.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/hw/mthca/mthca_av.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/hw/mthca/mthca_catas.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/hw/mthca/mthca_cmd.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/hw/mthca/mthca_cmd.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/hw/mthca/mthca_config_reg.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/hw/mthca/mthca_cq.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/hw/mthca/mthca_dev.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/hw/mthca/mthca_doorbell.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/hw/mthca/mthca_eq.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/hw/mthca/mthca_mad.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/hw/mthca/mthca_main.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/hw/mthca/mthca_mcg.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/hw/mthca/mthca_memfree.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/hw/mthca/mthca_memfree.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/hw/mthca/mthca_mr.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/hw/mthca/mthca_pd.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/hw/mthca/mthca_profile.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/hw/mthca/mthca_profile.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/hw/mthca/mthca_provider.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/hw/mthca/mthca_provider.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/hw/mthca/mthca_qp.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/hw/mthca/mthca_reset.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/hw/mthca/mthca_srq.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/hw/mthca/mthca_uar.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/hw/mthca/mthca_user.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/infiniband/hw/mthca/mthca_wqe.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/net/mlx4/alloc.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/net/mlx4/cmd.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/net/mlx4/cq.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/net/mlx4/en_cq.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/net/mlx4/en_main.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/net/mlx4/en_netdev.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/net/mlx4/en_port.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/net/mlx4/en_port.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/net/mlx4/en_resources.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/net/mlx4/en_rx.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/net/mlx4/en_tx.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/net/mlx4/eq.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/net/mlx4/fw.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/net/mlx4/fw.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/net/mlx4/icm.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/net/mlx4/icm.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/net/mlx4/mcg.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/net/mlx4/mlx4.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/net/mlx4/mlx4_devif_queue.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/net/mlx4/mlx4_devif_queue.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/net/mlx4/mlx4_en.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/net/mlx4/mlx4_stats.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/net/mlx4/mr.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/net/mlx4/pd.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/net/mlx4/port.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/net/mlx4/profile.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/net/mlx4/qp.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/net/mlx4/reset.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/net/mlx4/resource_tracker.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/net/mlx4/srq.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/test_ib.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/drivers/test_wq.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/asm/atomic-long.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/asm/atomic.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/asm/byteorder.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/asm/fcntl.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/asm/io.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/asm/pgtable.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/asm/types.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/asm/uaccess.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/debug.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/infiniband/arch.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/infiniband/driver.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/infiniband/kern-abi.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/infiniband/marshall.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/infiniband/opcode.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/infiniband/sa-kern-abi.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/infiniband/sa.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/infiniband/verbs.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/bitops.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/cache.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/cdev.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/clocksource.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/compat.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/compiler.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/completion.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/delay.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/device.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/dma-attrs.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/dma-mapping.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/dmapool.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/err.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/errno.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/etherdevice.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/file.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/fs.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/gfp.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/hardirq.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/idr.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/if_arp.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/if_ether.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/if_vlan.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/in.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/in6.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/inetdevice.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/interrupt.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/io-mapping.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/io.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/ioctl.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/jhash.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/jiffies.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/kdev_t.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/kernel.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/kmod.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/kobject.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/kref.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/kthread.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/ktime.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/linux_compat.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/linux_idr.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/linux_radix.c [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/list.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/lockdep.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/log2.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/math64.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/miscdevice.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/mlx4/cmd.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/mlx4/cq.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/mlx4/device.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/mlx4/doorbell.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/mlx4/driver.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/mlx4/qp.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/mlx4/srq.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/mm.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/module.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/moduleparam.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/mutex.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/net.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/net/ethernet.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/net/if_types.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/netdevice.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/notifier.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/page.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/pci.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/poll.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/printk.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/radix-tree.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/random.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/rbtree.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/rwlock.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/rwsem.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/scatterlist.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/sched.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/semaphore.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/slab.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/socket.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/spinlock.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/srcu.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/string.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/sysfs.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/timer.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/types.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/uaccess.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/vmalloc.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/wait.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/linux/workqueue.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/mlx4en.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/mlx4ib.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/net/if_inet6.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/net/if_vlan_var.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/net/ip.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/net/ipv6.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/net/netevent.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/net/tcp.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/netinet/ip.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/netinet/ip6.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/netinet/tcp.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/rdma/Kbuild [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/rdma/ib_addr.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/rdma/ib_cache.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/rdma/ib_cm.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/rdma/ib_fmr_pool.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/rdma/ib_mad.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/rdma/ib_marshall.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/rdma/ib_pack.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/rdma/ib_peer_mem.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/rdma/ib_pma.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/rdma/ib_sa.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/rdma/ib_smi.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/rdma/ib_umem.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/rdma/ib_user_cm.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/rdma/ib_user_mad.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/rdma/ib_user_sa.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/rdma/ib_user_verbs.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/rdma/ib_user_verbs_exp.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/rdma/ib_verbs.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/rdma/ib_verbs_exp.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/rdma/iw_cm.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/rdma/peer_mem.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/rdma/rdma_cm.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/rdma/rdma_cm_ib.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/rdma/rdma_user_cm.h [new file with mode: 0644]
lib/devif/backends/net/mlx4/include/rdma/sdp_socket.h [new file with mode: 0644]
lib/net/Hakefile
lib/net/net_queue.c
lib/net/networking_internal.h
usr/net_socket_server/Hakefile
usr/skb/programs/device_db.pl

index d0b028b..6ca6a6c 100644 (file)
@@ -27,7 +27,7 @@
  * SUCH DAMAGE.
  *
  *     @(#)endian.h    7.8 (Berkeley) 4/3/91
- * $FreeBSD: src/sys/amd64/include/endian.h,v 1.8 2005/03/11 21:46:01 peter Exp $
+ * $FreeBSD$
  */
 
 #ifndef _MACHINE_ENDIAN_H_
 #include <sys/cdefs.h>
 #include <sys/_types.h>
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /*
  * Define the order of 32-bit words in 64-bit words.
  */
@@ -61,123 +57,70 @@ extern "C" {
  * strict namespaces.
  */
 #if __BSD_VISIBLE
-#ifndef LITTLE_ENDIAN
 #define        LITTLE_ENDIAN   _LITTLE_ENDIAN
-#endif
-#ifndef BIG_ENDIAN
 #define        BIG_ENDIAN      _BIG_ENDIAN
-#endif
 #define        PDP_ENDIAN      _PDP_ENDIAN
 #define        BYTE_ORDER      _BYTE_ORDER
 #endif
 
-#if defined(__GNUCLIKE_ASM) && defined(__GNUCLIKE_BUILTIN_CONSTANT_P)
-
-#define __word_swap_int_var(x) \
-__extension__ ({ register __uint32_t __X = (x); \
-   __asm ("rorl $16, %0" : "+r" (__X)); \
-   __X; })
-
-#ifdef __OPTIMIZE__
-
-#define        __word_swap_int_const(x) \
-       ((((x) & 0xffff0000) >> 16) | \
-        (((x) & 0x0000ffff) << 16))
-#define        __word_swap_int(x) (__builtin_constant_p(x) ? \
-       __word_swap_int_const(x) : __word_swap_int_var(x))
-
-#else  /* __OPTIMIZE__ */
-
-#define        __word_swap_int(x) __word_swap_int_var(x)
-
-#endif /* __OPTIMIZE__ */
-
-#define __byte_swap_int_var(x) \
-__extension__ ({ register __uint32_t __X = (x); \
-   __asm ("bswap %0" : "+r" (__X)); \
-   __X; })
-
-#ifdef __OPTIMIZE__
-
-#define        __byte_swap_int_const(x) \
-       ((((x) & 0xff000000) >> 24) | \
-        (((x) & 0x00ff0000) >>  8) | \
-        (((x) & 0x0000ff00) <<  8) | \
-        (((x) & 0x000000ff) << 24))
-#define        __byte_swap_int(x) (__builtin_constant_p(x) ? \
-       __byte_swap_int_const(x) : __byte_swap_int_var(x))
-
-#else  /* __OPTIMIZE__ */
-
-#define        __byte_swap_int(x) __byte_swap_int_var(x)
-
-#endif /* __OPTIMIZE__ */
-
-#define __byte_swap_long_var(x) \
-__extension__ ({ register __uint64_t __X = (x); \
-   __asm ("bswap %0" : "+r" (__X)); \
-   __X; })
-
-#ifdef __OPTIMIZE__
-
-#define        __byte_swap_long_const(x) \
-       (((x >> 56) | \
-        ((x >> 40) & 0xff00) | \
-        ((x >> 24) & 0xff0000) | \
-        ((x >> 8) & 0xff000000) | \
-        ((x << 8) & (0xfful << 32)) | \
-        ((x << 24) & (0xfful << 40)) | \
-        ((x << 40) & (0xfful << 48)) | \
-        ((x << 56))))
-
-#define        __byte_swap_long(x) (__builtin_constant_p(x) ? \
-       __byte_swap_long_const(x) : __byte_swap_long_var(x))
-
-#else  /* __OPTIMIZE__ */
-
-#define        __byte_swap_long(x) __byte_swap_long_var(x)
-
-#endif /* __OPTIMIZE__ */
-
-#define __byte_swap_word_var(x) \
-__extension__ ({ register __uint16_t __X = (x); \
-   __asm ("xchgb %h0, %b0" : "+Q" (__X)); \
-   __X; })
-
-#ifdef __OPTIMIZE__
-
-#define        __byte_swap_word_const(x) \
-       ((((x) & 0xff00) >> 8) | \
-        (((x) & 0x00ff) << 8))
-
-#define        __byte_swap_word(x) (__builtin_constant_p(x) ? \
-       __byte_swap_word_const(x) : __byte_swap_word_var(x))
-
-#else  /* __OPTIMIZE__ */
-
-#define        __byte_swap_word(x) __byte_swap_word_var(x)
+#define        __bswap16_gen(x)        (__uint16_t)((x) << 8 | (x) >> 8)
+#define        __bswap32_gen(x)                \
+       (((__uint32_t)__bswap16((x) & 0xffff) << 16) | __bswap16((x) >> 16))
+#define        __bswap64_gen(x)                \
+       (((__uint64_t)__bswap32((x) & 0xffffffff) << 32) | __bswap32((x) >> 32))
+
+#ifdef __GNUCLIKE_BUILTIN_CONSTANT_P
+#define        __bswap16(x)                            \
+       ((__uint16_t)(__builtin_constant_p(x) ? \
+           __bswap16_gen((__uint16_t)(x)) : __bswap16_var(x)))
+#define        __bswap32(x)                    \
+       (__builtin_constant_p(x) ?      \
+           __bswap32_gen((__uint32_t)(x)) : __bswap32_var(x))
+#define        __bswap64(x)                    \
+       (__builtin_constant_p(x) ?      \
+           __bswap64_gen((__uint64_t)(x)) : __bswap64_var(x))
+#else
+/* XXX these are broken for use in static initializers. */
+#define        __bswap16(x)    __bswap16_var(x)
+#define        __bswap32(x)    __bswap32_var(x)
+#define        __bswap64(x)    __bswap64_var(x)
+#endif
 
-#endif /* __OPTIMIZE__ */
+/* These are defined as functions to avoid multiple evaluation of x. */
 
-static __inline __uint64_t
-__bswap64(__uint64_t _x)
+static __inline __uint16_t
+__bswap16_var(__uint16_t _x)
 {
 
-       return (__byte_swap_long(_x));
+       return (__bswap16_gen(_x));
 }
 
 static __inline __uint32_t
-__bswap32(__uint32_t _x)
+__bswap32_var(__uint32_t _x)
 {
 
-       return (__byte_swap_int(_x));
+#ifdef __GNUCLIKE_ASM
+       __asm("bswap %0" : "+r" (_x));
+       return (_x);
+#else
+       return (__bswap32_gen(_x));
+#endif
 }
 
-static __inline __uint16_t
-__bswap16(__uint16_t _x)
+static __inline __uint64_t
+__bswap64_var(__uint64_t _x)
 {
 
-       return (__byte_swap_word(_x));
+#if defined(__amd64__) && defined(__GNUCLIKE_ASM)
+       __asm("bswap %0" : "+r" (_x));
+       return (_x);
+#else
+       /*
+        * It is important for the optimizations that the following is not
+        * really generic, but expands to 2 __bswap32_var()'s.
+        */
+       return (__bswap64_gen(_x));
+#endif
 }
 
 #define        __htonl(x)      __bswap32(x)
@@ -185,19 +128,4 @@ __bswap16(__uint16_t _x)
 #define        __ntohl(x)      __bswap32(x)
 #define        __ntohs(x)      __bswap16(x)
 
-#else /* !(__GNUCLIKE_ASM && __GNUCLIKE_BUILTIN_CONSTANT_P) */
-
-/*
- * No optimizations are available for this compiler.  Fall back to
- * non-optimized functions by defining the constant usually used to prevent
- * redefinition.
- */
-#define        _BYTEORDER_FUNC_DEFINED
-
-#endif /* __GNUCLIKE_ASM && __GNUCLIKE_BUILTIN_CONSTANT_P */
-
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* !_MACHINE_ENDIAN_H_ */
diff --git a/include/devif/backends/net/mlx4_devif.h b/include/devif/backends/net/mlx4_devif.h
new file mode 100644 (file)
index 0000000..9581315
--- /dev/null
@@ -0,0 +1,20 @@
+/*
+ * Copyright (c) 2017 ETH Zurich.
+ * All rights reserved.
+ *
+ * This file is distributed under the terms in the attached LICENSE file.
+ * If you do not find this file, copies can be found by writing to:
+ * ETH Zurich D-INFK, Universitaetstr. 6, CH-8092 Zurich. Attn: Systems Group.
+ */
+#ifndef MLX4_DEVIF_H
+#define MLX4_DEVIF_H
+
+struct mlx4_queue;
+
+// interrupt_mode: 0 - none, 1 - normal
+errval_t mlx4_queue_create(struct mlx4_queue** q, uint32_t vendor, uint32_t deviceid,
+    uint32_t bus, uint32_t device, uint32_t function, unsigned interrupt_mode,
+    void (*isr)(void *));
+
+#endif
diff --git a/include/net/ethernet.h b/include/net/ethernet.h
new file mode 100644 (file)
index 0000000..5ec9d20
--- /dev/null
@@ -0,0 +1,428 @@
+/*
+ * Fundamental constants relating to ethernet.
+ *
+ * $FreeBSD$
+ *
+ */
+
+#ifndef _NET_ETHERNET_H_
+#define _NET_ETHERNET_H_
+
+/*
+ * Some basic Ethernet constants.
+ */
+#define        ETHER_ADDR_LEN          6       /* length of an Ethernet address */
+#define        ETHER_TYPE_LEN          2       /* length of the Ethernet type field */
+#define        ETHER_CRC_LEN           4       /* length of the Ethernet CRC */
+#define        ETHER_HDR_LEN           (ETHER_ADDR_LEN*2+ETHER_TYPE_LEN)
+#define        ETHER_MIN_LEN           64      /* minimum frame len, including CRC */
+#define        ETHER_MAX_LEN           1518    /* maximum frame len, including CRC */
+#define        ETHER_MAX_LEN_JUMBO     9018    /* max jumbo frame len, including CRC */
+
+#define        ETHER_VLAN_ENCAP_LEN    4       /* len of 802.1Q VLAN encapsulation */
+/*
+ * Mbuf adjust factor to force 32-bit alignment of IP header.
+ * Drivers should do m_adj(m, ETHER_ALIGN) when setting up a
+ * receive so the upper layers get the IP header properly aligned
+ * past the 14-byte Ethernet header.
+ */
+#define        ETHER_ALIGN             2       /* driver adjust for IP hdr alignment */
+
+/*
+ * Compute the maximum frame size based on ethertype (i.e. possible
+ * encapsulation) and whether or not an FCS is present.
+ */
+#define        ETHER_MAX_FRAME(ifp, etype, hasfcs)                             \
+       ((ifp)->if_mtu + ETHER_HDR_LEN +                                \
+        ((hasfcs) ? ETHER_CRC_LEN : 0) +                               \
+        (((etype) == ETHERTYPE_VLAN) ? ETHER_VLAN_ENCAP_LEN : 0))
+
+/*
+ * Ethernet-specific mbuf flags.
+ */
+#define        M_HASFCS        M_PROTO5        /* FCS included at end of frame */
+
+/*
+ * Ethernet CRC32 polynomials (big- and little-endian verions).
+ */
+#define        ETHER_CRC_POLY_LE       0xedb88320
+#define        ETHER_CRC_POLY_BE       0x04c11db6
+
+/*
+ * A macro to validate a length with
+ */
+#define        ETHER_IS_VALID_LEN(foo) \
+       ((foo) >= ETHER_MIN_LEN && (foo) <= ETHER_MAX_LEN)
+
+/*
+ * Structure of a 10Mb/s Ethernet header.
+ */
+struct ether_header {
+       u_char  ether_dhost[ETHER_ADDR_LEN];
+       u_char  ether_shost[ETHER_ADDR_LEN];
+       u_short ether_type;
+} __packed;
+
+/*
+ * Structure of a 48-bit Ethernet address.
+ */
+struct ether_addr {
+       u_char octet[ETHER_ADDR_LEN];
+} __packed;
+
+#define        ETHER_IS_MULTICAST(addr) (*(addr) & 0x01) /* is address mcast/bcast? */
+#define        ETHER_IS_BROADCAST(addr) \
+       (((addr)[0] & (addr)[1] & (addr)[2] & \
+         (addr)[3] & (addr)[4] & (addr)[5]) == 0xff)
+
+/*
+ * 802.1q Virtual LAN header.
+ */
+struct ether_vlan_header {
+       uint8_t evl_dhost[ETHER_ADDR_LEN];
+       uint8_t evl_shost[ETHER_ADDR_LEN];
+       uint16_t evl_encap_proto;
+       uint16_t evl_tag;
+       uint16_t evl_proto;
+} __packed;
+
+#define        EVL_VLID_MASK           0x0FFF
+#define        EVL_PRI_MASK            0xE000
+#define        EVL_VLANOFTAG(tag)      ((tag) & EVL_VLID_MASK)
+#define        EVL_PRIOFTAG(tag)       (((tag) >> 13) & 7)
+#define        EVL_CFIOFTAG(tag)       (((tag) >> 12) & 1)
+#define        EVL_MAKETAG(vlid, pri, cfi)                                     \
+       ((((((pri) & 7) << 13) | ((cfi) & 1)) << 12) | ((vlid) & EVL_VLID_MASK))
+
+/*
+ *  NOTE: 0x0000-0x05DC (0..1500) are generally IEEE 802.3 length fields.
+ *  However, there are some conflicts.
+ */
+
+#define        ETHERTYPE_8023          0x0004  /* IEEE 802.3 packet */
+                  /* 0x0101 .. 0x1FF      Experimental */
+#define        ETHERTYPE_PUP           0x0200  /* Xerox PUP protocol - see 0A00 */
+#define        ETHERTYPE_PUPAT         0x0200  /* PUP Address Translation - see 0A01 */
+#define        ETHERTYPE_SPRITE        0x0500  /* ??? */
+                            /* 0x0400     Nixdorf */
+#define        ETHERTYPE_NS            0x0600  /* XNS */
+#define        ETHERTYPE_NSAT          0x0601  /* XNS Address Translation (3Mb only) */
+#define        ETHERTYPE_DLOG1         0x0660  /* DLOG (?) */
+#define        ETHERTYPE_DLOG2         0x0661  /* DLOG (?) */
+#define        ETHERTYPE_IP            0x0800  /* IP protocol */
+#define        ETHERTYPE_X75           0x0801  /* X.75 Internet */
+#define        ETHERTYPE_NBS           0x0802  /* NBS Internet */
+#define        ETHERTYPE_ECMA          0x0803  /* ECMA Internet */
+#define        ETHERTYPE_CHAOS         0x0804  /* CHAOSnet */
+#define        ETHERTYPE_X25           0x0805  /* X.25 Level 3 */
+#define        ETHERTYPE_ARP           0x0806  /* Address resolution protocol */
+#define        ETHERTYPE_NSCOMPAT      0x0807  /* XNS Compatibility */
+#define        ETHERTYPE_FRARP         0x0808  /* Frame Relay ARP (RFC1701) */
+                            /* 0x081C     Symbolics Private */
+                   /* 0x0888 - 0x088A     Xyplex */
+#define        ETHERTYPE_UBDEBUG       0x0900  /* Ungermann-Bass network debugger */
+#define        ETHERTYPE_IEEEPUP       0x0A00  /* Xerox IEEE802.3 PUP */
+#define        ETHERTYPE_IEEEPUPAT     0x0A01  /* Xerox IEEE802.3 PUP Address Translation */
+#define        ETHERTYPE_VINES         0x0BAD  /* Banyan VINES */
+#define        ETHERTYPE_VINESLOOP     0x0BAE  /* Banyan VINES Loopback */
+#define        ETHERTYPE_VINESECHO     0x0BAF  /* Banyan VINES Echo */
+
+/*                    0x1000 - 0x100F     Berkeley Trailer */
+/*
+ * The ETHERTYPE_NTRAILER packet types starting at ETHERTYPE_TRAIL have
+ * (type-ETHERTYPE_TRAIL)*512 bytes of data followed
+ * by an ETHER type (as given above) and then the (variable-length) header.
+ */
+#define        ETHERTYPE_TRAIL         0x1000  /* Trailer packet */
+#define        ETHERTYPE_NTRAILER      16
+
+#define        ETHERTYPE_DCA           0x1234  /* DCA - Multicast */
+#define        ETHERTYPE_VALID         0x1600  /* VALID system protocol */
+#define        ETHERTYPE_DOGFIGHT      0x1989  /* Artificial Horizons ("Aviator" dogfight simulator [on Sun]) */
+#define        ETHERTYPE_RCL           0x1995  /* Datapoint Corporation (RCL lan protocol) */
+
+                                       /* The following 3C0x types
+                                          are unregistered: */
+#define        ETHERTYPE_NBPVCD        0x3C00  /* 3Com NBP virtual circuit datagram (like XNS SPP) not registered */
+#define        ETHERTYPE_NBPSCD        0x3C01  /* 3Com NBP System control datagram not registered */
+#define        ETHERTYPE_NBPCREQ       0x3C02  /* 3Com NBP Connect request (virtual cct) not registered */
+#define        ETHERTYPE_NBPCRSP       0x3C03  /* 3Com NBP Connect response not registered */
+#define        ETHERTYPE_NBPCC         0x3C04  /* 3Com NBP Connect complete not registered */
+#define        ETHERTYPE_NBPCLREQ      0x3C05  /* 3Com NBP Close request (virtual cct) not registered */
+#define        ETHERTYPE_NBPCLRSP      0x3C06  /* 3Com NBP Close response not registered */
+#define        ETHERTYPE_NBPDG         0x3C07  /* 3Com NBP Datagram (like XNS IDP) not registered */
+#define        ETHERTYPE_NBPDGB        0x3C08  /* 3Com NBP Datagram broadcast not registered */
+#define        ETHERTYPE_NBPCLAIM      0x3C09  /* 3Com NBP Claim NetBIOS name not registered */
+#define        ETHERTYPE_NBPDLTE       0x3C0A  /* 3Com NBP Delete NetBIOS name not registered */
+#define        ETHERTYPE_NBPRAS        0x3C0B  /* 3Com NBP Remote adaptor status request not registered */
+#define        ETHERTYPE_NBPRAR        0x3C0C  /* 3Com NBP Remote adaptor response not registered */
+#define        ETHERTYPE_NBPRST        0x3C0D  /* 3Com NBP Reset not registered */
+
+#define        ETHERTYPE_PCS           0x4242  /* PCS Basic Block Protocol */
+#define        ETHERTYPE_IMLBLDIAG     0x424C  /* Information Modes Little Big LAN diagnostic */
+#define        ETHERTYPE_DIDDLE        0x4321  /* THD - Diddle */
+#define        ETHERTYPE_IMLBL         0x4C42  /* Information Modes Little Big LAN */
+#define        ETHERTYPE_SIMNET        0x5208  /* BBN Simnet Private */
+#define        ETHERTYPE_DECEXPER      0x6000  /* DEC Unassigned, experimental */
+#define        ETHERTYPE_MOPDL         0x6001  /* DEC MOP dump/load */
+#define        ETHERTYPE_MOPRC         0x6002  /* DEC MOP remote console */
+#define        ETHERTYPE_DECnet        0x6003  /* DEC DECNET Phase IV route */
+#define        ETHERTYPE_DN            ETHERTYPE_DECnet        /* libpcap, tcpdump */
+#define        ETHERTYPE_LAT           0x6004  /* DEC LAT */
+#define        ETHERTYPE_DECDIAG       0x6005  /* DEC diagnostic protocol (at interface initialization?) */
+#define        ETHERTYPE_DECCUST       0x6006  /* DEC customer protocol */
+#define        ETHERTYPE_SCA           0x6007  /* DEC LAVC, SCA */
+#define        ETHERTYPE_AMBER         0x6008  /* DEC AMBER */
+#define        ETHERTYPE_DECMUMPS      0x6009  /* DEC MUMPS */
+                   /* 0x6010 - 0x6014     3Com Corporation */
+#define        ETHERTYPE_TRANSETHER    0x6558  /* Trans Ether Bridging (RFC1701)*/
+#define        ETHERTYPE_RAWFR         0x6559  /* Raw Frame Relay (RFC1701) */
+#define        ETHERTYPE_UBDL          0x7000  /* Ungermann-Bass download */
+#define        ETHERTYPE_UBNIU         0x7001  /* Ungermann-Bass NIUs */
+#define        ETHERTYPE_UBDIAGLOOP    0x7002  /* Ungermann-Bass diagnostic/loopback */
+#define        ETHERTYPE_UBNMC         0x7003  /* Ungermann-Bass ??? (NMC to/from UB Bridge) */
+#define        ETHERTYPE_UBBST         0x7005  /* Ungermann-Bass Bridge Spanning Tree */
+#define        ETHERTYPE_OS9           0x7007  /* OS/9 Microware */
+#define        ETHERTYPE_OS9NET        0x7009  /* OS/9 Net? */
+                   /* 0x7020 - 0x7029     LRT (England) (now Sintrom) */
+#define        ETHERTYPE_RACAL         0x7030  /* Racal-Interlan */
+#define        ETHERTYPE_PRIMENTS      0x7031  /* Prime NTS (Network Terminal Service) */
+#define        ETHERTYPE_CABLETRON     0x7034  /* Cabletron */
+#define        ETHERTYPE_CRONUSVLN     0x8003  /* Cronus VLN */
+#define        ETHERTYPE_CRONUS        0x8004  /* Cronus Direct */
+#define        ETHERTYPE_HP            0x8005  /* HP Probe */
+#define        ETHERTYPE_NESTAR        0x8006  /* Nestar */
+#define        ETHERTYPE_ATTSTANFORD   0x8008  /* AT&T/Stanford (local use) */
+#define        ETHERTYPE_EXCELAN       0x8010  /* Excelan */
+#define        ETHERTYPE_SG_DIAG       0x8013  /* SGI diagnostic type */
+#define        ETHERTYPE_SG_NETGAMES   0x8014  /* SGI network games */
+#define        ETHERTYPE_SG_RESV       0x8015  /* SGI reserved type */
+#define        ETHERTYPE_SG_BOUNCE     0x8016  /* SGI bounce server */
+#define        ETHERTYPE_APOLLODOMAIN  0x8019  /* Apollo DOMAIN */
+#define        ETHERTYPE_TYMSHARE      0x802E  /* Tymeshare */
+#define        ETHERTYPE_TIGAN         0x802F  /* Tigan, Inc. */
+#define        ETHERTYPE_REVARP        0x8035  /* Reverse addr resolution protocol */
+#define        ETHERTYPE_AEONIC        0x8036  /* Aeonic Systems */
+#define        ETHERTYPE_IPXNEW        0x8037  /* IPX (Novell Netware?) */
+#define        ETHERTYPE_LANBRIDGE     0x8038  /* DEC LANBridge */
+#define        ETHERTYPE_DSMD  0x8039  /* DEC DSM/DDP */
+#define        ETHERTYPE_ARGONAUT      0x803A  /* DEC Argonaut Console */
+#define        ETHERTYPE_VAXELN        0x803B  /* DEC VAXELN */
+#define        ETHERTYPE_DECDNS        0x803C  /* DEC DNS Naming Service */
+#define        ETHERTYPE_ENCRYPT       0x803D  /* DEC Ethernet Encryption */
+#define        ETHERTYPE_DECDTS        0x803E  /* DEC Distributed Time Service */
+#define        ETHERTYPE_DECLTM        0x803F  /* DEC LAN Traffic Monitor */
+#define        ETHERTYPE_DECNETBIOS    0x8040  /* DEC PATHWORKS DECnet NETBIOS Emulation */
+#define        ETHERTYPE_DECLAST       0x8041  /* DEC Local Area System Transport */
+                            /* 0x8042     DEC Unassigned */
+#define        ETHERTYPE_PLANNING      0x8044  /* Planning Research Corp. */
+                   /* 0x8046 - 0x8047     AT&T */
+#define        ETHERTYPE_DECAM         0x8048  /* DEC Availability Manager for Distributed Systems DECamds (but someone at DEC says not) */
+#define        ETHERTYPE_EXPERDATA     0x8049  /* ExperData */
+#define        ETHERTYPE_VEXP          0x805B  /* Stanford V Kernel exp. */
+#define        ETHERTYPE_VPROD         0x805C  /* Stanford V Kernel prod. */
+#define        ETHERTYPE_ES            0x805D  /* Evans & Sutherland */
+#define        ETHERTYPE_LITTLE        0x8060  /* Little Machines */
+#define        ETHERTYPE_COUNTERPOINT  0x8062  /* Counterpoint Computers */
+                   /* 0x8065 - 0x8066     Univ. of Mass @ Amherst */
+#define        ETHERTYPE_VEECO         0x8067  /* Veeco Integrated Auto. */
+#define        ETHERTYPE_GENDYN        0x8068  /* General Dynamics */
+#define        ETHERTYPE_ATT           0x8069  /* AT&T */
+#define        ETHERTYPE_AUTOPHON      0x806A  /* Autophon */
+#define        ETHERTYPE_COMDESIGN     0x806C  /* ComDesign */
+#define        ETHERTYPE_COMPUGRAPHIC  0x806D  /* Compugraphic Corporation */
+                   /* 0x806E - 0x8077     Landmark Graphics Corp. */
+#define        ETHERTYPE_MATRA         0x807A  /* Matra */
+#define        ETHERTYPE_DDE           0x807B  /* Dansk Data Elektronik */
+#define        ETHERTYPE_MERIT         0x807C  /* Merit Internodal (or Univ of Michigan?) */
+                   /* 0x807D - 0x807F     Vitalink Communications */
+#define        ETHERTYPE_VLTLMAN       0x8080  /* Vitalink TransLAN III Management */
+                   /* 0x8081 - 0x8083     Counterpoint Computers */
+                   /* 0x8088 - 0x808A     Xyplex */
+#define        ETHERTYPE_ATALK         0x809B  /* AppleTalk */
+#define        ETHERTYPE_AT            ETHERTYPE_ATALK         /* old NetBSD */
+#define        ETHERTYPE_APPLETALK     ETHERTYPE_ATALK         /* HP-UX */
+                   /* 0x809C - 0x809E     Datability */
+#define        ETHERTYPE_SPIDER        0x809F  /* Spider Systems Ltd. */
+                            /* 0x80A3     Nixdorf */
+                   /* 0x80A4 - 0x80B3     Siemens Gammasonics Inc. */
+                   /* 0x80C0 - 0x80C3     DCA (Digital Comm. Assoc.) Data Exchange Cluster */
+                   /* 0x80C4 - 0x80C5     Banyan Systems */
+#define        ETHERTYPE_PACER         0x80C6  /* Pacer Software */
+#define        ETHERTYPE_APPLITEK      0x80C7  /* Applitek Corporation */
+                   /* 0x80C8 - 0x80CC     Intergraph Corporation */
+                   /* 0x80CD - 0x80CE     Harris Corporation */
+                   /* 0x80CF - 0x80D2     Taylor Instrument */
+                   /* 0x80D3 - 0x80D4     Rosemount Corporation */
+#define        ETHERTYPE_SNA           0x80D5  /* IBM SNA Services over Ethernet */
+#define        ETHERTYPE_VARIAN        0x80DD  /* Varian Associates */
+                   /* 0x80DE - 0x80DF     TRFS (Integrated Solutions Transparent Remote File System) */
+                   /* 0x80E0 - 0x80E3     Allen-Bradley */
+                   /* 0x80E4 - 0x80F0     Datability */
+#define        ETHERTYPE_RETIX         0x80F2  /* Retix */
+#define        ETHERTYPE_AARP          0x80F3  /* AppleTalk AARP */
+                   /* 0x80F4 - 0x80F5     Kinetics */
+#define        ETHERTYPE_APOLLO        0x80F7  /* Apollo Computer */
+#define ETHERTYPE_VLAN         0x8100  /* IEEE 802.1Q VLAN tagging (XXX conflicts) */
+                   /* 0x80FF - 0x8101     Wellfleet Communications (XXX conflicts) */
+#define        ETHERTYPE_BOFL          0x8102  /* Wellfleet; BOFL (Breath OF Life) pkts [every 5-10 secs.] */
+#define        ETHERTYPE_WELLFLEET     0x8103  /* Wellfleet Communications */
+                   /* 0x8107 - 0x8109     Symbolics Private */
+#define        ETHERTYPE_TALARIS       0x812B  /* Talaris */
+#define        ETHERTYPE_WATERLOO      0x8130  /* Waterloo Microsystems Inc. (XXX which?) */
+#define        ETHERTYPE_HAYES         0x8130  /* Hayes Microcomputers (XXX which?) */
+#define        ETHERTYPE_VGLAB         0x8131  /* VG Laboratory Systems */
+                   /* 0x8132 - 0x8137     Bridge Communications */
+#define        ETHERTYPE_IPX           0x8137  /* Novell (old) NetWare IPX (ECONFIG E option) */
+#define        ETHERTYPE_NOVELL        0x8138  /* Novell, Inc. */
+                   /* 0x8139 - 0x813D     KTI */
+#define        ETHERTYPE_MUMPS         0x813F  /* M/MUMPS data sharing */
+#define        ETHERTYPE_AMOEBA        0x8145  /* Vrije Universiteit (NL) Amoeba 4 RPC (obsolete) */
+#define        ETHERTYPE_FLIP          0x8146  /* Vrije Universiteit (NL) FLIP (Fast Local Internet Protocol) */
+#define        ETHERTYPE_VURESERVED    0x8147  /* Vrije Universiteit (NL) [reserved] */
+#define        ETHERTYPE_LOGICRAFT     0x8148  /* Logicraft */
+#define        ETHERTYPE_NCD           0x8149  /* Network Computing Devices */
+#define        ETHERTYPE_ALPHA         0x814A  /* Alpha Micro */
+#define        ETHERTYPE_SNMP          0x814C  /* SNMP over Ethernet (see RFC1089) */
+                   /* 0x814D - 0x814E     BIIN */
+#define        ETHERTYPE_TEC   0x814F  /* Technically Elite Concepts */
+#define        ETHERTYPE_RATIONAL      0x8150  /* Rational Corp */
+                   /* 0x8151 - 0x8153     Qualcomm */
+                   /* 0x815C - 0x815E     Computer Protocol Pty Ltd */
+                   /* 0x8164 - 0x8166     Charles River Data Systems */
+#define        ETHERTYPE_XTP           0x817D  /* Protocol Engines XTP */
+#define        ETHERTYPE_SGITW         0x817E  /* SGI/Time Warner prop. */
+#define        ETHERTYPE_HIPPI_FP      0x8180  /* HIPPI-FP encapsulation */
+#define        ETHERTYPE_STP           0x8181  /* Scheduled Transfer STP, HIPPI-ST */
+                   /* 0x8182 - 0x8183     Reserved for HIPPI-6400 */
+                   /* 0x8184 - 0x818C     SGI prop. */
+#define        ETHERTYPE_MOTOROLA      0x818D  /* Motorola */
+#define        ETHERTYPE_NETBEUI       0x8191  /* PowerLAN NetBIOS/NetBEUI (PC) */
+                   /* 0x819A - 0x81A3     RAD Network Devices */
+                   /* 0x81B7 - 0x81B9     Xyplex */
+                   /* 0x81CC - 0x81D5     Apricot Computers */
+                   /* 0x81D6 - 0x81DD     Artisoft Lantastic */
+                   /* 0x81E6 - 0x81EF     Polygon */
+                   /* 0x81F0 - 0x81F2     Comsat Labs */
+                   /* 0x81F3 - 0x81F5     SAIC */
+                   /* 0x81F6 - 0x81F8     VG Analytical */
+                   /* 0x8203 - 0x8205     QNX Software Systems Ltd. */
+                   /* 0x8221 - 0x8222     Ascom Banking Systems */
+                   /* 0x823E - 0x8240     Advanced Encryption Systems */
+                   /* 0x8263 - 0x826A     Charles River Data Systems */
+                   /* 0x827F - 0x8282     Athena Programming */
+                   /* 0x829A - 0x829B     Inst Ind Info Tech */
+                   /* 0x829C - 0x82AB     Taurus Controls */
+                   /* 0x82AC - 0x8693     Walker Richer & Quinn */
+#define        ETHERTYPE_ACCTON        0x8390  /* Accton Technologies (unregistered) */
+#define        ETHERTYPE_TALARISMC     0x852B  /* Talaris multicast */
+#define        ETHERTYPE_KALPANA       0x8582  /* Kalpana */
+                   /* 0x8694 - 0x869D     Idea Courier */
+                   /* 0x869E - 0x86A1     Computer Network Tech */
+                   /* 0x86A3 - 0x86AC     Gateway Communications */
+#define        ETHERTYPE_SECTRA        0x86DB  /* SECTRA */
+#define        ETHERTYPE_IPV6          0x86DD  /* IP protocol version 6 */
+#define        ETHERTYPE_DELTACON      0x86DE  /* Delta Controls */
+#define        ETHERTYPE_ATOMIC        0x86DF  /* ATOMIC */
+                   /* 0x86E0 - 0x86EF     Landis & Gyr Powers */
+                   /* 0x8700 - 0x8710     Motorola */
+#define        ETHERTYPE_RDP           0x8739  /* Control Technology Inc. RDP Without IP */
+#define        ETHERTYPE_MICP          0x873A  /* Control Technology Inc. Mcast Industrial Ctrl Proto. */
+                   /* 0x873B - 0x873C     Control Technology Inc. Proprietary */
+#define        ETHERTYPE_TCPCOMP       0x876B  /* TCP/IP Compression (RFC1701) */
+#define        ETHERTYPE_IPAS          0x876C  /* IP Autonomous Systems (RFC1701) */
+#define        ETHERTYPE_SECUREDATA    0x876D  /* Secure Data (RFC1701) */
+#define        ETHERTYPE_FLOWCONTROL   0x8808  /* 802.3x flow control packet */
+#define        ETHERTYPE_SLOW          0x8809  /* 802.3ad link aggregation (LACP) */
+#define        ETHERTYPE_PPP           0x880B  /* PPP (obsolete by PPPoE) */
+#define        ETHERTYPE_HITACHI       0x8820  /* Hitachi Cable (Optoelectronic Systems Laboratory) */
+#define ETHERTYPE_TEST         0x8822  /* Network Conformance Testing */
+#define        ETHERTYPE_MPLS          0x8847  /* MPLS Unicast */
+#define        ETHERTYPE_MPLS_MCAST    0x8848  /* MPLS Multicast */
+#define        ETHERTYPE_AXIS          0x8856  /* Axis Communications AB proprietary bootstrap/config */
+#define        ETHERTYPE_PPPOEDISC     0x8863  /* PPP Over Ethernet Discovery Stage */
+#define        ETHERTYPE_PPPOE         0x8864  /* PPP Over Ethernet Session Stage */
+#define        ETHERTYPE_LANPROBE      0x8888  /* HP LanProbe test? */
+#define        ETHERTYPE_PAE           0x888e  /* EAPOL PAE/802.1x */
+#define        ETHERTYPE_LOOPBACK      0x9000  /* Loopback: used to test interfaces */
+#define        ETHERTYPE_LBACK         ETHERTYPE_LOOPBACK      /* DEC MOP loopback */
+#define        ETHERTYPE_XNSSM         0x9001  /* 3Com (Formerly Bridge Communications), XNS Systems Management */
+#define        ETHERTYPE_TCPSM         0x9002  /* 3Com (Formerly Bridge Communications), TCP/IP Systems Management */
+#define        ETHERTYPE_BCLOOP        0x9003  /* 3Com (Formerly Bridge Communications), loopback detection */
+#define        ETHERTYPE_DEBNI         0xAAAA  /* DECNET? Used by VAX 6220 DEBNI */
+#define        ETHERTYPE_SONIX         0xFAF5  /* Sonix Arpeggio */
+#define        ETHERTYPE_VITAL         0xFF00  /* BBN VITAL-LanBridge cache wakeups */
+                   /* 0xFF00 - 0xFFOF     ISC Bunker Ramo */
+
+#define        ETHERTYPE_MAX           0xFFFF  /* Maximum valid ethernet type, reserved */
+
+/*
+ * The ETHERTYPE_NTRAILER packet types starting at ETHERTYPE_TRAIL have
+ * (type-ETHERTYPE_TRAIL)*512 bytes of data followed
+ * by an ETHER type (as given above) and then the (variable-length) header.
+ */
+#define        ETHERTYPE_TRAIL         0x1000          /* Trailer packet */
+#define        ETHERTYPE_NTRAILER      16
+
+#define        ETHERMTU        (ETHER_MAX_LEN-ETHER_HDR_LEN-ETHER_CRC_LEN)
+#define        ETHERMIN        (ETHER_MIN_LEN-ETHER_HDR_LEN-ETHER_CRC_LEN)
+#define        ETHERMTU_JUMBO  (ETHER_MAX_LEN_JUMBO - ETHER_HDR_LEN - ETHER_CRC_LEN)
+/*
+ * The ETHER_BPF_MTAP macro should be used by drivers which support hardware
+ * offload for VLAN tag processing.  It will check the mbuf to see if it has
+ * M_VLANTAG set, and if it does, will pass the packet along to
+ * ether_vlan_mtap.  This function will re-insert VLAN tags for the duration
+ * of the tap, so they show up properly for network analyzers.
+ */
+#define ETHER_BPF_MTAP(_ifp, _m) do {                                  \
+       if (bpf_peers_present((_ifp)->if_bpf)) {                        \
+               M_ASSERTVALID(_m);                                      \
+               if (((_m)->m_flags & M_VLANTAG) != 0)                   \
+                       ether_vlan_mtap((_ifp)->if_bpf, (_m), NULL, 0); \
+               else                                                    \
+                       bpf_mtap((_ifp)->if_bpf, (_m));                 \
+       }                                                               \
+} while (0)
+
+#ifdef _KERNEL
+
+struct ifnet;
+struct mbuf;
+struct route;
+struct sockaddr;
+struct bpf_if;
+
+extern uint32_t ether_crc32_le(const uint8_t *, size_t);
+extern uint32_t ether_crc32_be(const uint8_t *, size_t);
+extern void ether_demux(struct ifnet *, struct mbuf *);
+extern void ether_ifattach(struct ifnet *, const u_int8_t *);
+extern void ether_ifdetach(struct ifnet *);
+extern int  ether_ioctl(struct ifnet *, u_long, caddr_t);
+extern int  ether_output(struct ifnet *, struct mbuf *,
+           const struct sockaddr *, struct route *);
+extern int  ether_output_frame(struct ifnet *, struct mbuf *);
+extern char *ether_sprintf(const u_int8_t *);
+void   ether_vlan_mtap(struct bpf_if *, struct mbuf *,
+           void *, u_int);
+struct mbuf  *ether_vlanencap(struct mbuf *, uint16_t);
+
+#else /* _KERNEL */
+
+#include <sys/cdefs.h>
+
+/*
+ * Ethernet address conversion/parsing routines.
+ */
+__BEGIN_DECLS
+struct ether_addr *ether_aton(const char *);
+struct ether_addr *ether_aton_r(const char *, struct ether_addr *);
+int    ether_hostton(const char *, struct ether_addr *);
+int    ether_line(const char *, struct ether_addr *, char *);
+char   *ether_ntoa(const struct ether_addr *);
+char   *ether_ntoa_r(const struct ether_addr *, char *);
+int    ether_ntohost(char *, const struct ether_addr *);
+__END_DECLS
+
+#endif /* !_KERNEL */
+
+#endif /* !_NET_ETHERNET_H_ */
diff --git a/include/net/if.h b/include/net/if.h
new file mode 100644 (file)
index 0000000..98ae0a8
--- /dev/null
@@ -0,0 +1,550 @@
+/*-
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *     The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *     @(#)if.h        8.1 (Berkeley) 6/10/93
+ * $FreeBSD$
+ */
+
+#ifndef _NET_IF_H_
+#define        _NET_IF_H_
+
+#include <sys/cdefs.h>
+
+#if __BSD_VISIBLE
+/*
+ * <net/if.h> does not depend on <sys/time.h> on most other systems.  This
+ * helps userland compatibility.  (struct timeval ifi_lastchange)
+ * The same holds for <sys/socket.h>.  (struct sockaddr ifru_addr)
+ */
+#ifndef _KERNEL
+#include <sys/time.h>
+#include <sys/socket.h>
+#endif
+#endif
+
+/*
+ * Length of interface external name, including terminating '\0'.
+ * Note: this is the same size as a generic device's external name.
+ */
+#define                IF_NAMESIZE     16
+#if __BSD_VISIBLE
+#define                IFNAMSIZ        IF_NAMESIZE
+#define                IF_MAXUNIT      0x7fff  /* historical value */
+#endif
+#if __BSD_VISIBLE
+
+/*
+ * Structure used to query names of interface cloners.
+ */
+
+struct if_clonereq {
+       int     ifcr_total;             /* total cloners (out) */
+       int     ifcr_count;             /* room for this many in user buffer */
+       char    *ifcr_buffer;           /* buffer for cloner names */
+};
+
+/*
+ * Structure describing information about an interface
+ * which may be of interest to management entities.
+ */
+struct if_data {
+       /* generic interface information */
+       uint8_t ifi_type;               /* ethernet, tokenring, etc */
+       uint8_t ifi_physical;           /* e.g., AUI, Thinnet, 10base-T, etc */
+       uint8_t ifi_addrlen;            /* media address length */
+       uint8_t ifi_hdrlen;             /* media header length */
+       uint8_t ifi_link_state;         /* current link state */
+       uint8_t ifi_vhid;               /* carp vhid */
+       uint16_t        ifi_datalen;    /* length of this data struct */
+       uint32_t        ifi_mtu;        /* maximum transmission unit */
+       uint32_t        ifi_metric;     /* routing metric (external only) */
+       uint64_t        ifi_baudrate;   /* linespeed */
+       /* volatile statistics */
+       uint64_t        ifi_ipackets;   /* packets received on interface */
+       uint64_t        ifi_ierrors;    /* input errors on interface */
+       uint64_t        ifi_opackets;   /* packets sent on interface */
+       uint64_t        ifi_oerrors;    /* output errors on interface */
+       uint64_t        ifi_collisions; /* collisions on csma interfaces */
+       uint64_t        ifi_ibytes;     /* total number of octets received */
+       uint64_t        ifi_obytes;     /* total number of octets sent */
+       uint64_t        ifi_imcasts;    /* packets received via multicast */
+       uint64_t        ifi_omcasts;    /* packets sent via multicast */
+       uint64_t        ifi_iqdrops;    /* dropped on input */
+       uint64_t        ifi_oqdrops;    /* dropped on output */
+       uint64_t        ifi_noproto;    /* destined for unsupported protocol */
+       uint64_t        ifi_hwassist;   /* HW offload capabilities, see IFCAP */
+
+       /* Unions are here to make sizes MI. */
+       union {                         /* uptime at attach or stat reset */
+               time_t          tt;
+               uint64_t        ph;
+       } __ifi_epoch;
+#define        ifi_epoch       __ifi_epoch.tt
+       union {                         /* time of last administrative change */
+               struct timeval  tv;
+               struct {
+                       uint64_t ph1;
+                       uint64_t ph2;
+               } ph;
+       } __ifi_lastchange;
+#define        ifi_lastchange  __ifi_lastchange.tv
+};
+
+/*-
+ * Interface flags are of two types: network stack owned flags, and driver
+ * owned flags.  Historically, these values were stored in the same ifnet
+ * flags field, but with the advent of fine-grained locking, they have been
+ * broken out such that the network stack is responsible for synchronizing
+ * the stack-owned fields, and the device driver the device-owned fields.
+ * Both halves can perform lockless reads of the other half's field, subject
+ * to accepting the involved races.
+ *
+ * Both sets of flags come from the same number space, and should not be
+ * permitted to conflict, as they are exposed to user space via a single
+ * field.
+ *
+ * The following symbols identify read and write requirements for fields:
+ *
+ * (i) if_flags field set by device driver before attach, read-only there
+ *     after.
+ * (n) if_flags field written only by the network stack, read by either the
+ *     stack or driver.
+ * (d) if_drv_flags field written only by the device driver, read by either
+ *     the stack or driver.
+ */
+#define        IFF_UP          0x1             /* (n) interface is up */
+#define        IFF_BROADCAST   0x2             /* (i) broadcast address valid */
+#define        IFF_DEBUG       0x4             /* (n) turn on debugging */
+#define        IFF_LOOPBACK    0x8             /* (i) is a loopback net */
+#define        IFF_POINTOPOINT 0x10            /* (i) is a point-to-point link */
+/*                     0x20               was IFF_SMART */
+#define        IFF_DRV_RUNNING 0x40            /* (d) resources allocated */
+#define        IFF_NOARP       0x80            /* (n) no address resolution protocol */
+#define        IFF_PROMISC     0x100           /* (n) receive all packets */
+#define        IFF_ALLMULTI    0x200           /* (n) receive all multicast packets */
+#define        IFF_DRV_OACTIVE 0x400           /* (d) tx hardware queue is full */
+#define        IFF_SIMPLEX     0x800           /* (i) can't hear own transmissions */
+#define        IFF_LINK0       0x1000          /* per link layer defined bit */
+#define        IFF_LINK1       0x2000          /* per link layer defined bit */
+#define        IFF_LINK2       0x4000          /* per link layer defined bit */
+#define        IFF_ALTPHYS     IFF_LINK2       /* use alternate physical connection */
+#define        IFF_MULTICAST   0x8000          /* (i) supports multicast */
+#define        IFF_CANTCONFIG  0x10000         /* (i) unconfigurable using ioctl(2) */
+#define        IFF_PPROMISC    0x20000         /* (n) user-requested promisc mode */
+#define        IFF_MONITOR     0x40000         /* (n) user-requested monitor mode */
+#define        IFF_STATICARP   0x80000         /* (n) static ARP */
+#define        IFF_DYING       0x200000        /* (n) interface is winding down */
+#define        IFF_RENAMING    0x400000        /* (n) interface is being renamed */
+/*
+ * Old names for driver flags so that user space tools can continue to use
+ * the old (portable) names.
+ */
+#ifndef _KERNEL
+#define        IFF_RUNNING     IFF_DRV_RUNNING
+#define        IFF_OACTIVE     IFF_DRV_OACTIVE
+#endif
+
+/* flags set internally only: */
+#define        IFF_CANTCHANGE \
+       (IFF_BROADCAST|IFF_POINTOPOINT|IFF_DRV_RUNNING|IFF_DRV_OACTIVE|\
+           IFF_SIMPLEX|IFF_MULTICAST|IFF_ALLMULTI|IFF_PROMISC|\
+           IFF_DYING|IFF_CANTCONFIG)
+
+/*
+ * Values for if_link_state.
+ */
+#define        LINK_STATE_UNKNOWN      0       /* link invalid/unknown */
+#define        LINK_STATE_DOWN         1       /* link is down */
+#define        LINK_STATE_UP           2       /* link is up */
+
+/*
+ * Some convenience macros used for setting ifi_baudrate.
+ * XXX 1000 vs. 1024? --thorpej@netbsd.org
+ */
+#define        IF_Kbps(x)      ((uintmax_t)(x) * 1000) /* kilobits/sec. */
+#define        IF_Mbps(x)      (IF_Kbps((x) * 1000))   /* megabits/sec. */
+#define        IF_Gbps(x)      (IF_Mbps((x) * 1000))   /* gigabits/sec. */
+
+/*
+ * Capabilities that interfaces can advertise.
+ *
+ * struct ifnet.if_capabilities
+ *   contains the optional features & capabilities a particular interface
+ *   supports (not only the driver but also the detected hw revision).
+ *   Capabilities are defined by IFCAP_* below.
+ * struct ifnet.if_capenable
+ *   contains the enabled (either by default or through ifconfig) optional
+ *   features & capabilities on this interface.
+ *   Capabilities are defined by IFCAP_* below.
+ * struct if_data.ifi_hwassist in mbuf CSUM_ flag form, controlled by above
+ *   contains the enabled optional feature & capabilites that can be used
+ *   individually per packet and are specified in the mbuf pkthdr.csum_flags
+ *   field.  IFCAP_* and CSUM_* do not match one to one and CSUM_* may be
+ *   more detailed or differenciated than IFCAP_*.
+ *   Hwassist features are defined CSUM_* in sys/mbuf.h
+ *
+ * Capabilities that cannot be arbitrarily changed with ifconfig/ioctl
+ * are listed in IFCAP_CANTCHANGE, similar to IFF_CANTCHANGE.
+ * This is not strictly necessary because the common code never
+ * changes capabilities, and it is left to the individual driver
+ * to do the right thing. However, having the filter here
+ * avoids replication of the same code in all individual drivers.
+ */
+#define        IFCAP_RXCSUM            0x00001  /* can offload checksum on RX */
+#define        IFCAP_TXCSUM            0x00002  /* can offload checksum on TX */
+#define        IFCAP_NETCONS           0x00004  /* can be a network console */
+#define        IFCAP_VLAN_MTU          0x00008 /* VLAN-compatible MTU */
+#define        IFCAP_VLAN_HWTAGGING    0x00010 /* hardware VLAN tag support */
+#define        IFCAP_JUMBO_MTU         0x00020 /* 9000 byte MTU supported */
+#define        IFCAP_POLLING           0x00040 /* driver supports polling */
+#define        IFCAP_VLAN_HWCSUM       0x00080 /* can do IFCAP_HWCSUM on VLANs */
+#define        IFCAP_TSO4              0x00100 /* can do TCP Segmentation Offload */
+#define        IFCAP_TSO6              0x00200 /* can do TCP6 Segmentation Offload */
+#define        IFCAP_LRO               0x00400 /* can do Large Receive Offload */
+#define        IFCAP_WOL_UCAST         0x00800 /* wake on any unicast frame */
+#define        IFCAP_WOL_MCAST         0x01000 /* wake on any multicast frame */
+#define        IFCAP_WOL_MAGIC         0x02000 /* wake on any Magic Packet */
+#define        IFCAP_TOE4              0x04000 /* interface can offload TCP */
+#define        IFCAP_TOE6              0x08000 /* interface can offload TCP6 */
+#define        IFCAP_VLAN_HWFILTER     0x10000 /* interface hw can filter vlan tag */
+#define        IFCAP_POLLING_NOCOUNT   0x20000 /* polling ticks cannot be fragmented */
+#define        IFCAP_VLAN_HWTSO        0x40000 /* can do IFCAP_TSO on VLANs */
+#define        IFCAP_LINKSTATE         0x80000 /* the runtime link state is dynamic */
+#define        IFCAP_NETMAP            0x100000 /* netmap mode supported/enabled */
+#define        IFCAP_RXCSUM_IPV6       0x200000  /* can offload checksum on IPv6 RX */
+#define        IFCAP_TXCSUM_IPV6       0x400000  /* can offload checksum on IPv6 TX */
+#define        IFCAP_HWSTATS           0x800000 /* manages counters internally */
+
+#define IFCAP_HWCSUM_IPV6      (IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6)
+
+#define IFCAP_HWCSUM   (IFCAP_RXCSUM | IFCAP_TXCSUM)
+#define        IFCAP_TSO       (IFCAP_TSO4 | IFCAP_TSO6)
+#define        IFCAP_WOL       (IFCAP_WOL_UCAST | IFCAP_WOL_MCAST | IFCAP_WOL_MAGIC)
+#define        IFCAP_TOE       (IFCAP_TOE4 | IFCAP_TOE6)
+
+#define        IFCAP_CANTCHANGE        (IFCAP_NETMAP)
+
+#define        IFQ_MAXLEN      50
+#define        IFNET_SLOWHZ    1               /* granularity is 1 second */
+
+/*
+ * Message format for use in obtaining information about interfaces
+ * from getkerninfo and the routing socket
+ * For the new, extensible interface see struct if_msghdrl below.
+ */
+struct if_msghdr {
+       u_short ifm_msglen;     /* to skip over non-understood messages */
+       u_char  ifm_version;    /* future binary compatibility */
+       u_char  ifm_type;       /* message type */
+       int     ifm_addrs;      /* like rtm_addrs */
+       int     ifm_flags;      /* value of if_flags */
+       u_short ifm_index;      /* index for associated ifp */
+       struct  if_data ifm_data;/* statistics and other data about if */
+};
+
+/*
+ * The 'l' version shall be used by new interfaces, like NET_RT_IFLISTL.  It is
+ * extensible after ifm_data_off or within ifm_data.  Both the if_msghdr and
+ * if_data now have a member field detailing the struct length in addition to
+ * the routing message length.  Macros are provided to find the start of
+ * ifm_data and the start of the socket address strucutres immediately following
+ * struct if_msghdrl given a pointer to struct if_msghdrl.
+ */
+#define        IF_MSGHDRL_IFM_DATA(_l) \
+    (struct if_data *)((char *)(_l) + (_l)->ifm_data_off)
+#define        IF_MSGHDRL_RTA(_l) \
+    (void *)((uintptr_t)(_l) + (_l)->ifm_len)
+struct if_msghdrl {
+       u_short ifm_msglen;     /* to skip over non-understood messages */
+       u_char  ifm_version;    /* future binary compatibility */
+       u_char  ifm_type;       /* message type */
+       int     ifm_addrs;      /* like rtm_addrs */
+       int     ifm_flags;      /* value of if_flags */
+       u_short ifm_index;      /* index for associated ifp */
+       u_short _ifm_spare1;    /* spare space to grow if_index, see if_var.h */
+       u_short ifm_len;        /* length of if_msghdrl incl. if_data */
+       u_short ifm_data_off;   /* offset of if_data from beginning */
+       struct  if_data ifm_data;/* statistics and other data about if */
+};
+
+/*
+ * Message format for use in obtaining information about interface addresses
+ * from getkerninfo and the routing socket
+ * For the new, extensible interface see struct ifa_msghdrl below.
+ */
+struct ifa_msghdr {
+       u_short ifam_msglen;    /* to skip over non-understood messages */
+       u_char  ifam_version;   /* future binary compatibility */
+       u_char  ifam_type;      /* message type */
+       int     ifam_addrs;     /* like rtm_addrs */
+       int     ifam_flags;     /* value of ifa_flags */
+       u_short ifam_index;     /* index for associated ifp */
+       int     ifam_metric;    /* value of ifa_ifp->if_metric */
+};
+
+/*
+ * The 'l' version shall be used by new interfaces, like NET_RT_IFLISTL.  It is
+ * extensible after ifam_metric or within ifam_data.  Both the ifa_msghdrl and
+ * if_data now have a member field detailing the struct length in addition to
+ * the routing message length.  Macros are provided to find the start of
+ * ifm_data and the start of the socket address strucutres immediately following
+ * struct ifa_msghdrl given a pointer to struct ifa_msghdrl.
+ */
+#define        IFA_MSGHDRL_IFAM_DATA(_l) \
+    (struct if_data *)((char *)(_l) + (_l)->ifam_data_off)
+#define        IFA_MSGHDRL_RTA(_l) \
+    (void *)((uintptr_t)(_l) + (_l)->ifam_len)
+struct ifa_msghdrl {
+       u_short ifam_msglen;    /* to skip over non-understood messages */
+       u_char  ifam_version;   /* future binary compatibility */
+       u_char  ifam_type;      /* message type */
+       int     ifam_addrs;     /* like rtm_addrs */
+       int     ifam_flags;     /* value of ifa_flags */
+       u_short ifam_index;     /* index for associated ifp */
+       u_short _ifam_spare1;   /* spare space to grow if_index, see if_var.h */
+       u_short ifam_len;       /* length of ifa_msghdrl incl. if_data */
+       u_short ifam_data_off;  /* offset of if_data from beginning */
+       int     ifam_metric;    /* value of ifa_ifp->if_metric */
+       struct  if_data ifam_data;/* statistics and other data about if or
+                                * address */
+};
+
+/*
+ * Message format for use in obtaining information about multicast addresses
+ * from the routing socket
+ */
+struct ifma_msghdr {
+       u_short ifmam_msglen;   /* to skip over non-understood messages */
+       u_char  ifmam_version;  /* future binary compatibility */
+       u_char  ifmam_type;     /* message type */
+       int     ifmam_addrs;    /* like rtm_addrs */
+       int     ifmam_flags;    /* value of ifa_flags */
+       u_short ifmam_index;    /* index for associated ifp */
+};
+
+/*
+ * Message format announcing the arrival or departure of a network interface.
+ */
+struct if_announcemsghdr {
+       u_short ifan_msglen;    /* to skip over non-understood messages */
+       u_char  ifan_version;   /* future binary compatibility */
+       u_char  ifan_type;      /* message type */
+       u_short ifan_index;     /* index for associated ifp */
+       char    ifan_name[IFNAMSIZ]; /* if name, e.g. "en0" */
+       u_short ifan_what;      /* what type of announcement */
+};
+
+#define        IFAN_ARRIVAL    0       /* interface arrival */
+#define        IFAN_DEPARTURE  1       /* interface departure */
+
+/*
+ * Buffer with length to be used in SIOCGIFDESCR/SIOCSIFDESCR requests
+ */
+struct ifreq_buffer {
+       size_t  length;
+       void    *buffer;
+};
+
+/*
+ * Interface request structure used for socket
+ * ioctl's.  All interface ioctl's must have parameter
+ * definitions which begin with ifr_name.  The
+ * remainder may be interface specific.
+ */
+struct ifreq {
+       char    ifr_name[IFNAMSIZ];             /* if name, e.g. "en0" */
+       union {
+               struct  sockaddr ifru_addr;
+               struct  sockaddr ifru_dstaddr;
+               struct  sockaddr ifru_broadaddr;
+               struct  ifreq_buffer ifru_buffer;
+               short   ifru_flags[2];
+               short   ifru_index;
+               int     ifru_jid;
+               int     ifru_metric;
+               int     ifru_mtu;
+               int     ifru_phys;
+               int     ifru_media;
+               caddr_t ifru_data;
+               int     ifru_cap[2];
+               u_int   ifru_fib;
+               u_char  ifru_vlan_pcp;
+       } ifr_ifru;
+#define        ifr_addr        ifr_ifru.ifru_addr      /* address */
+#define        ifr_dstaddr     ifr_ifru.ifru_dstaddr   /* other end of p-to-p link */
+#define        ifr_broadaddr   ifr_ifru.ifru_broadaddr /* broadcast address */
+#define        ifr_buffer      ifr_ifru.ifru_buffer    /* user supplied buffer with its length */
+#define        ifr_flags       ifr_ifru.ifru_flags[0]  /* flags (low 16 bits) */
+#define        ifr_flagshigh   ifr_ifru.ifru_flags[1]  /* flags (high 16 bits) */
+#define        ifr_jid         ifr_ifru.ifru_jid       /* jail/vnet */
+#define        ifr_metric      ifr_ifru.ifru_metric    /* metric */
+#define        ifr_mtu         ifr_ifru.ifru_mtu       /* mtu */
+#define ifr_phys       ifr_ifru.ifru_phys      /* physical wire */
+#define ifr_media      ifr_ifru.ifru_media     /* physical media */
+#define        ifr_data        ifr_ifru.ifru_data      /* for use by interface */
+#define        ifr_reqcap      ifr_ifru.ifru_cap[0]    /* requested capabilities */
+#define        ifr_curcap      ifr_ifru.ifru_cap[1]    /* current capabilities */
+#define        ifr_index       ifr_ifru.ifru_index     /* interface index */
+#define        ifr_fib         ifr_ifru.ifru_fib       /* interface fib */
+#define        ifr_vlan_pcp    ifr_ifru.ifru_vlan_pcp  /* VLAN priority */
+};
+
+#define        _SIZEOF_ADDR_IFREQ(ifr) \
+       ((ifr).ifr_addr.sa_len > sizeof(struct sockaddr) ? \
+        (sizeof(struct ifreq) - sizeof(struct sockaddr) + \
+         (ifr).ifr_addr.sa_len) : sizeof(struct ifreq))
+
+struct ifaliasreq {
+       char    ifra_name[IFNAMSIZ];            /* if name, e.g. "en0" */
+       struct  sockaddr ifra_addr;
+       struct  sockaddr ifra_broadaddr;
+       struct  sockaddr ifra_mask;
+       int     ifra_vhid;
+};
+
+/* 9.x compat */
+struct oifaliasreq {
+       char    ifra_name[IFNAMSIZ];
+       struct  sockaddr ifra_addr;
+       struct  sockaddr ifra_broadaddr;
+       struct  sockaddr ifra_mask;
+};
+
+struct ifmediareq {
+       char    ifm_name[IFNAMSIZ];     /* if name, e.g. "en0" */
+       int     ifm_current;            /* current media options */
+       int     ifm_mask;               /* don't care mask */
+       int     ifm_status;             /* media status */
+       int     ifm_active;             /* active options */
+       int     ifm_count;              /* # entries in ifm_ulist array */
+       int     *ifm_ulist;             /* media words */
+};
+
+struct  ifdrv {
+       char            ifd_name[IFNAMSIZ];     /* if name, e.g. "en0" */
+       unsigned long   ifd_cmd;
+       size_t          ifd_len;
+       void            *ifd_data;
+};
+
+/* 
+ * Structure used to retrieve aux status data from interfaces.
+ * Kernel suppliers to this interface should respect the formatting
+ * needed by ifconfig(8): each line starts with a TAB and ends with
+ * a newline.  The canonical example to copy and paste is in if_tun.c.
+ */
+
+#define        IFSTATMAX       800             /* 10 lines of text */
+struct ifstat {
+       char    ifs_name[IFNAMSIZ];     /* if name, e.g. "en0" */
+       char    ascii[IFSTATMAX + 1];
+};
+
+/*
+ * Structure used in SIOCGIFCONF request.
+ * Used to retrieve interface configuration
+ * for machine (useful for programs which
+ * must know all networks accessible).
+ */
+struct ifconf {
+       int     ifc_len;                /* size of associated buffer */
+       union {
+               caddr_t ifcu_buf;
+               struct  ifreq *ifcu_req;
+       } ifc_ifcu;
+#define        ifc_buf ifc_ifcu.ifcu_buf       /* buffer address */
+#define        ifc_req ifc_ifcu.ifcu_req       /* array of structures returned */
+};
+
+/*
+ * interface groups
+ */
+
+#define        IFG_ALL         "all"           /* group contains all interfaces */
+/* XXX: will we implement this? */
+#define        IFG_EGRESS      "egress"        /* if(s) default route(s) point to */
+
+struct ifg_req {
+       union {
+               char                     ifgrqu_group[IFNAMSIZ];
+               char                     ifgrqu_member[IFNAMSIZ];
+       } ifgrq_ifgrqu;
+#define        ifgrq_group     ifgrq_ifgrqu.ifgrqu_group
+#define        ifgrq_member    ifgrq_ifgrqu.ifgrqu_member
+};
+
+/*
+ * Used to lookup groups for an interface
+ */
+struct ifgroupreq {
+       char    ifgr_name[IFNAMSIZ];
+       u_int   ifgr_len;
+       union {
+               char    ifgru_group[IFNAMSIZ];
+               struct  ifg_req *ifgru_groups;
+       } ifgr_ifgru;
+#define ifgr_group     ifgr_ifgru.ifgru_group
+#define ifgr_groups    ifgr_ifgru.ifgru_groups
+};
+
+/*
+ * Structure used to request i2c data
+ * from interface transceivers.
+ */
+struct ifi2creq {
+       uint8_t dev_addr;       /* i2c address (0xA0, 0xA2) */
+       uint8_t offset;         /* read offset */
+       uint8_t len;            /* read length */
+       uint8_t spare0;
+       uint32_t spare1;
+       uint8_t data[8];        /* read buffer */
+}; 
+
+#endif /* __BSD_VISIBLE */
+
+#ifdef _KERNEL
+#ifdef MALLOC_DECLARE
+MALLOC_DECLARE(M_IFADDR);
+MALLOC_DECLARE(M_IFMADDR);
+#endif
+#endif
+
+#ifndef _KERNEL
+struct if_nameindex {
+       unsigned int    if_index;       /* 1, 2, ... */
+       char            *if_name;       /* null terminated name: "le0", ... */
+};
+
+__BEGIN_DECLS
+void                    if_freenameindex(struct if_nameindex *);
+char                   *if_indextoname(unsigned int, char *);
+struct if_nameindex    *if_nameindex(void);
+unsigned int            if_nametoindex(const char *);
+__END_DECLS
+#endif
+#endif /* !_NET_IF_H_ */
diff --git a/include/net/if_arp.h b/include/net/if_arp.h
new file mode 100644 (file)
index 0000000..7d141f3
--- /dev/null
@@ -0,0 +1,133 @@
+/*-
+ * Copyright (c) 1986, 1993
+ *     The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *     @(#)if_arp.h    8.1 (Berkeley) 6/10/93
+ * $FreeBSD$
+ */
+
+#ifndef _NET_IF_ARP_H_
+#define        _NET_IF_ARP_H_
+
+/*
+ * Address Resolution Protocol.
+ *
+ * See RFC 826 for protocol description.  ARP packets are variable
+ * in size; the arphdr structure defines the fixed-length portion.
+ * Protocol type values are the same as those for 10 Mb/s Ethernet.
+ * It is followed by the variable-sized fields ar_sha, arp_spa,
+ * arp_tha and arp_tpa in that order, according to the lengths
+ * specified.  Field names used correspond to RFC 826.
+ */
+struct arphdr {
+       u_short ar_hrd;         /* format of hardware address */
+#define ARPHRD_ETHER   1       /* ethernet hardware format */
+#define ARPHRD_IEEE802 6       /* token-ring hardware format */
+#define ARPHRD_ARCNET  7       /* arcnet hardware format */
+#define ARPHRD_FRELAY  15      /* frame relay hardware format */
+#define ARPHRD_IEEE1394        24      /* firewire hardware format */
+#define ARPHRD_INFINIBAND 32   /* infiniband hardware format */
+       u_short ar_pro;         /* format of protocol address */
+       u_char  ar_hln;         /* length of hardware address */
+       u_char  ar_pln;         /* length of protocol address */
+       u_short ar_op;          /* one of: */
+#define        ARPOP_REQUEST   1       /* request to resolve address */
+#define        ARPOP_REPLY     2       /* response to previous request */
+#define        ARPOP_REVREQUEST 3      /* request protocol address given hardware */
+#define        ARPOP_REVREPLY  4       /* response giving protocol address */
+#define ARPOP_INVREQUEST 8     /* request to identify peer */
+#define ARPOP_INVREPLY 9       /* response identifying peer */
+/*
+ * The remaining fields are variable in size,
+ * according to the sizes above.
+ */
+#ifdef COMMENT_ONLY
+       u_char  ar_sha[];       /* sender hardware address */
+       u_char  ar_spa[];       /* sender protocol address */
+       u_char  ar_tha[];       /* target hardware address */
+       u_char  ar_tpa[];       /* target protocol address */
+#endif
+};
+
+#define ar_sha(ap)     (((caddr_t)((ap)+1)) +   0)
+#define ar_spa(ap)     (((caddr_t)((ap)+1)) +   (ap)->ar_hln)
+#define ar_tha(ap)     (((caddr_t)((ap)+1)) +   (ap)->ar_hln + (ap)->ar_pln)
+#define ar_tpa(ap)     (((caddr_t)((ap)+1)) + 2*(ap)->ar_hln + (ap)->ar_pln)
+
+#define arphdr_len2(ar_hln, ar_pln)                                    \
+       (sizeof(struct arphdr) + 2*(ar_hln) + 2*(ar_pln))
+#define arphdr_len(ap) (arphdr_len2((ap)->ar_hln, (ap)->ar_pln))
+
+/*
+ * ARP ioctl request
+ */
+struct arpreq {
+       struct  sockaddr arp_pa;                /* protocol address */
+       struct  sockaddr arp_ha;                /* hardware address */
+       int     arp_flags;                      /* flags */
+};
+/*  arp_flags and at_flags field values */
+#define        ATF_INUSE       0x01    /* entry in use */
+#define ATF_COM                0x02    /* completed entry (enaddr valid) */
+#define        ATF_PERM        0x04    /* permanent entry */
+#define        ATF_PUBL        0x08    /* publish entry (respond for other host) */
+#define        ATF_USETRAILERS 0x10    /* has requested trailers */
+
+struct arpstat {
+       /* Normal things that happen: */
+       uint64_t txrequests;    /* # of ARP requests sent by this host. */
+       uint64_t txreplies;     /* # of ARP replies sent by this host. */
+       uint64_t rxrequests;    /* # of ARP requests received by this host. */
+       uint64_t rxreplies;     /* # of ARP replies received by this host. */
+       uint64_t received;      /* # of ARP packets received by this host. */
+
+       uint64_t arp_spares[4]; /* For either the upper or lower half. */
+       /* Abnormal event and error  counting: */
+       uint64_t dropped;       /* # of packets dropped waiting for a reply. */
+       uint64_t timeouts;      /* # of times with entries removed */
+                               /* due to timeout. */
+       uint64_t dupips;        /* # of duplicate IPs detected. */
+};
+
+#ifdef _KERNEL
+#include <sys/counter.h>
+#include <net/vnet.h>
+
+VNET_PCPUSTAT_DECLARE(struct arpstat, arpstat);
+/*
+ * In-kernel consumers can use these accessor macros directly to update
+ * stats.
+ */
+#define        ARPSTAT_ADD(name, val)  \
+    VNET_PCPUSTAT_ADD(struct arpstat, arpstat, name, (val))
+#define        ARPSTAT_SUB(name, val)  ARPSTAT_ADD(name, -(val))
+#define        ARPSTAT_INC(name)       ARPSTAT_ADD(name, 1)
+#define        ARPSTAT_DEC(name)       ARPSTAT_SUB(name, 1)
+
+#endif /* _KERNEL */
+
+#endif /* !_NET_IF_ARP_H_ */
diff --git a/include/net/if_types.h b/include/net/if_types.h
new file mode 100644 (file)
index 0000000..92e101a
--- /dev/null
@@ -0,0 +1,273 @@
+/*-
+ * Copyright (c) 1989, 1993, 1994
+ *     The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *     @(#)if_types.h  8.3 (Berkeley) 4/28/95
+ * $FreeBSD$
+ * $NetBSD: if_types.h,v 1.16 2000/04/19 06:30:53 itojun Exp $
+ */
+
+#ifndef _NET_IF_TYPES_H_
+#define _NET_IF_TYPES_H_
+
+/*
+ * Interface types for benefit of parsing media address headers.
+ * This list is derived from the SNMP list of ifTypes, originally
+ * documented in RFC1573, now maintained as:
+ *
+ *     http://www.iana.org/assignments/smi-numbers
+ */
+
+typedef enum {
+       IFT_OTHER       = 0x1,          /* none of the following */
+       IFT_1822        = 0x2,          /* old-style arpanet imp */
+       IFT_HDH1822     = 0x3,          /* HDH arpanet imp */
+       IFT_X25DDN      = 0x4,          /* x25 to imp */
+       IFT_X25         = 0x5,          /* PDN X25 interface (RFC877) */
+       IFT_ETHER       = 0x6,          /* Ethernet CSMA/CD */
+       IFT_ISO88023    = 0x7,          /* CMSA/CD */
+       IFT_ISO88024    = 0x8,          /* Token Bus */
+       IFT_ISO88025    = 0x9,          /* Token Ring */
+       IFT_ISO88026    = 0xa,          /* MAN */
+       IFT_STARLAN     = 0xb,
+       IFT_P10         = 0xc,          /* Proteon 10MBit ring */
+       IFT_P80         = 0xd,          /* Proteon 80MBit ring */
+       IFT_HY          = 0xe,          /* Hyperchannel */
+       IFT_FDDI        = 0xf,
+       IFT_LAPB        = 0x10,
+       IFT_SDLC        = 0x11,
+       IFT_T1          = 0x12,
+       IFT_CEPT        = 0x13,         /* E1 - european T1 */
+       IFT_ISDNBASIC   = 0x14,
+       IFT_ISDNPRIMARY = 0x15,
+       IFT_PTPSERIAL   = 0x16,         /* Proprietary PTP serial */
+       IFT_PPP         = 0x17,         /* RFC 1331 */
+       IFT_LOOP        = 0x18,         /* loopback */
+       IFT_EON         = 0x19,         /* ISO over IP */
+       IFT_XETHER      = 0x1a,         /* obsolete 3MB experimental ethernet */
+       IFT_NSIP        = 0x1b,         /* XNS over IP */
+       IFT_SLIP        = 0x1c,         /* IP over generic TTY */
+       IFT_ULTRA       = 0x1d,         /* Ultra Technologies */
+       IFT_DS3         = 0x1e,         /* Generic T3 */
+       IFT_SIP         = 0x1f,         /* SMDS */
+       IFT_FRELAY      = 0x20,         /* Frame Relay DTE only */
+       IFT_RS232       = 0x21,
+       IFT_PARA        = 0x22,         /* parallel-port */
+       IFT_ARCNET      = 0x23,
+       IFT_ARCNETPLUS  = 0x24,
+       IFT_ATM         = 0x25,         /* ATM cells */
+       IFT_MIOX25      = 0x26,
+       IFT_SONET       = 0x27,         /* SONET or SDH */
+       IFT_X25PLE      = 0x28,
+       IFT_ISO88022LLC = 0x29,
+       IFT_LOCALTALK   = 0x2a,
+       IFT_SMDSDXI     = 0x2b,
+       IFT_FRELAYDCE   = 0x2c,         /* Frame Relay DCE */
+       IFT_V35         = 0x2d,
+       IFT_HSSI        = 0x2e,
+       IFT_HIPPI       = 0x2f,
+       IFT_MODEM       = 0x30,         /* Generic Modem */
+       IFT_AAL5        = 0x31,         /* AAL5 over ATM */
+       IFT_SONETPATH   = 0x32,
+       IFT_SONETVT     = 0x33,
+       IFT_SMDSICIP    = 0x34,         /* SMDS InterCarrier Interface */
+       IFT_PROPVIRTUAL = 0x35,         /* Proprietary Virtual/internal */
+       IFT_PROPMUX     = 0x36,         /* Proprietary Multiplexing */
+       IFT_IEEE80212   = 0x37,         /* 100BaseVG */
+       IFT_FIBRECHANNEL = 0x38,        /* Fibre Channel */
+       IFT_HIPPIINTERFACE = 0x39,      /* HIPPI interfaces      */
+       IFT_FRAMERELAYINTERCONNECT = 0x3a, /* Obsolete, use 0x20 either 0x2c */
+       IFT_AFLANE8023  = 0x3b,         /* ATM Emulated LAN for 802.3 */
+       IFT_AFLANE8025  = 0x3c,         /* ATM Emulated LAN for 802.5 */
+       IFT_CCTEMUL     = 0x3d,         /* ATM Emulated circuit           */
+       IFT_FASTETHER   = 0x3e,         /* Fast Ethernet (100BaseT) */
+       IFT_ISDN        = 0x3f,         /* ISDN and X.25            */
+       IFT_V11         = 0x40,         /* CCITT V.11/X.21              */
+       IFT_V36         = 0x41,         /* CCITT V.36                   */
+       IFT_G703AT64K   = 0x42,         /* CCITT G703 at 64Kbps */
+       IFT_G703AT2MB   = 0x43,         /* Obsolete see DS1-MIB */
+       IFT_QLLC        = 0x44,         /* SNA QLLC                     */
+       IFT_FASTETHERFX = 0x45,         /* Fast Ethernet (100BaseFX)    */
+       IFT_CHANNEL     = 0x46,         /* channel                      */
+       IFT_IEEE80211   = 0x47,         /* radio spread spectrum        */
+       IFT_IBM370PARCHAN = 0x48,       /* IBM System 360/370 OEMI Channel */
+       IFT_ESCON       = 0x49,         /* IBM Enterprise Systems Connection */
+       IFT_DLSW        = 0x4a,         /* Data Link Switching */
+       IFT_ISDNS       = 0x4b,         /* ISDN S/T interface */
+       IFT_ISDNU       = 0x4c,         /* ISDN U interface */
+       IFT_LAPD        = 0x4d,         /* Link Access Protocol D */
+       IFT_IPSWITCH    = 0x4e,         /* IP Switching Objects */
+       IFT_RSRB        = 0x4f,         /* Remote Source Route Bridging */
+       IFT_ATMLOGICAL  = 0x50,         /* ATM Logical Port */
+       IFT_DS0         = 0x51,         /* Digital Signal Level 0 */
+       IFT_DS0BUNDLE   = 0x52,         /* group of ds0s on the same ds1 */
+       IFT_BSC         = 0x53,         /* Bisynchronous Protocol */
+       IFT_ASYNC       = 0x54,         /* Asynchronous Protocol */
+       IFT_CNR         = 0x55,         /* Combat Net Radio */
+       IFT_ISO88025DTR = 0x56,         /* ISO 802.5r DTR */
+       IFT_EPLRS       = 0x57,         /* Ext Pos Loc Report Sys */
+       IFT_ARAP        = 0x58,         /* Appletalk Remote Access Protocol */
+       IFT_PROPCNLS    = 0x59,         /* Proprietary Connectionless Protocol*/
+       IFT_HOSTPAD     = 0x5a,         /* CCITT-ITU X.29 PAD Protocol */
+       IFT_TERMPAD     = 0x5b,         /* CCITT-ITU X.3 PAD Facility */
+       IFT_FRAMERELAYMPI = 0x5c,       /* Multiproto Interconnect over FR */
+       IFT_X213        = 0x5d,         /* CCITT-ITU X213 */
+       IFT_ADSL        = 0x5e,         /* Asymmetric Digital Subscriber Loop */
+       IFT_RADSL       = 0x5f,         /* Rate-Adapt. Digital Subscriber Loop*/
+       IFT_SDSL        = 0x60,         /* Symmetric Digital Subscriber Loop */
+       IFT_VDSL        = 0x61,         /* Very H-Speed Digital Subscrib. Loop*/
+       IFT_ISO88025CRFPINT = 0x62,     /* ISO 802.5 CRFP */
+       IFT_MYRINET     = 0x63,         /* Myricom Myrinet */
+       IFT_VOICEEM     = 0x64,         /* voice recEive and transMit */
+       IFT_VOICEFXO    = 0x65,         /* voice Foreign Exchange Office */
+       IFT_VOICEFXS    = 0x66,         /* voice Foreign Exchange Station */
+       IFT_VOICEENCAP  = 0x67,         /* voice encapsulation */
+       IFT_VOICEOVERIP = 0x68,         /* voice over IP encapsulation */
+       IFT_ATMDXI      = 0x69,         /* ATM DXI */
+       IFT_ATMFUNI     = 0x6a,         /* ATM FUNI */
+       IFT_ATMIMA      = 0x6b,         /* ATM IMA                    */
+       IFT_PPPMULTILINKBUNDLE = 0x6c,  /* PPP Multilink Bundle */
+       IFT_IPOVERCDLC  = 0x6d,         /* IBM ipOverCdlc */
+       IFT_IPOVERCLAW  = 0x6e,         /* IBM Common Link Access to Workstn */
+       IFT_STACKTOSTACK = 0x6f,        /* IBM stackToStack */
+       IFT_VIRTUALIPADDRESS = 0x70,    /* IBM VIPA */
+       IFT_MPC         = 0x71,         /* IBM multi-protocol channel support */
+       IFT_IPOVERATM   = 0x72,         /* IBM ipOverAtm */
+       IFT_ISO88025FIBER = 0x73,       /* ISO 802.5j Fiber Token Ring */
+       IFT_TDLC        = 0x74,         /* IBM twinaxial data link control */
+       IFT_GIGABITETHERNET = 0x75,     /* Gigabit Ethernet */
+       IFT_HDLC        = 0x76,         /* HDLC */
+       IFT_LAPF        = 0x77,         /* LAP F */
+       IFT_V37         = 0x78,         /* V.37 */
+       IFT_X25MLP      = 0x79,         /* Multi-Link Protocol */
+       IFT_X25HUNTGROUP = 0x7a,        /* X25 Hunt Group */
+       IFT_TRANSPHDLC  = 0x7b,         /* Transp HDLC */
+       IFT_INTERLEAVE  = 0x7c,         /* Interleave channel */
+       IFT_FAST        = 0x7d,         /* Fast channel */
+       IFT_IP          = 0x7e,         /* IP (for APPN HPR in IP networks) */
+       IFT_DOCSCABLEMACLAYER = 0x7f,   /* CATV Mac Layer */
+       IFT_DOCSCABLEDOWNSTREAM = 0x80, /* CATV Downstream interface */
+       IFT_DOCSCABLEUPSTREAM = 0x81,   /* CATV Upstream interface */
+       IFT_A12MPPSWITCH = 0x82,        /* Avalon Parallel Processor */
+       IFT_TUNNEL      = 0x83,         /* Encapsulation interface */
+       IFT_COFFEE      = 0x84,         /* coffee pot */
+       IFT_CES         = 0x85,         /* Circiut Emulation Service */
+       IFT_ATMSUBINTERFACE = 0x86,     /* (x)  ATM Sub Interface */
+       IFT_L2VLAN      = 0x87,         /* Layer 2 Virtual LAN using 802.1Q */
+       IFT_L3IPVLAN    = 0x88,         /* Layer 3 Virtual LAN - IP Protocol */
+       IFT_L3IPXVLAN   = 0x89,         /* Layer 3 Virtual LAN - IPX Prot. */
+       IFT_DIGITALPOWERLINE = 0x8a,    /* IP over Power Lines */
+       IFT_MEDIAMAILOVERIP = 0x8b,     /* (xxx)  Multimedia Mail over IP */
+       IFT_DTM         = 0x8c,         /* Dynamic synchronous Transfer Mode */
+       IFT_DCN         = 0x8d,         /* Data Communications Network */
+       IFT_IPFORWARD   = 0x8e,         /* IP Forwarding Interface */
+       IFT_MSDSL       = 0x8f,         /* Multi-rate Symmetric DSL */
+       IFT_IEEE1394    = 0x90,         /* IEEE1394 High Performance SerialBus*/
+       IFT_IFGSN       = 0x91,         /* HIPPI-6400 */
+       IFT_DVBRCCMACLAYER = 0x92,      /* DVB-RCC MAC Layer */
+       IFT_DVBRCCDOWNSTREAM = 0x93,    /* DVB-RCC Downstream Channel */
+       IFT_DVBRCCUPSTREAM = 0x94,      /* DVB-RCC Upstream Channel */
+       IFT_ATMVIRTUAL  = 0x95,         /* ATM Virtual Interface */
+       IFT_MPLSTUNNEL  = 0x96,         /* MPLS Tunnel Virtual Interface */
+       IFT_SRP         = 0x97,         /* Spatial Reuse Protocol */
+       IFT_VOICEOVERATM = 0x98,        /* Voice over ATM */
+       IFT_VOICEOVERFRAMERELAY = 0x99, /* Voice Over Frame Relay */
+       IFT_IDSL        = 0x9a,         /* Digital Subscriber Loop over ISDN */
+       IFT_COMPOSITELINK = 0x9b,       /* Avici Composite Link Interface */
+       IFT_SS7SIGLINK  = 0x9c,         /* SS7 Signaling Link */
+       IFT_PROPWIRELESSP2P = 0x9d,     /* Prop. P2P wireless interface */
+       IFT_FRFORWARD   = 0x9e,         /* Frame forward Interface */
+       IFT_RFC1483     = 0x9f,         /* Multiprotocol over ATM AAL5 */
+       IFT_USB         = 0xa0,         /* USB Interface */
+       IFT_IEEE8023ADLAG = 0xa1,       /* IEEE 802.3ad Link Aggregate*/
+       IFT_BGPPOLICYACCOUNTING = 0xa2, /* BGP Policy Accounting */
+       IFT_FRF16MFRBUNDLE = 0xa3,      /* FRF.16 Multilik Frame Relay*/
+       IFT_H323GATEKEEPER = 0xa4,      /* H323 Gatekeeper */
+       IFT_H323PROXY   = 0xa5,         /* H323 Voice and Video Proxy */
+       IFT_MPLS        = 0xa6,         /* MPLS */
+       IFT_MFSIGLINK   = 0xa7,         /* Multi-frequency signaling link */
+       IFT_HDSL2       = 0xa8,         /* High Bit-Rate DSL, 2nd gen. */
+       IFT_SHDSL       = 0xa9,         /* Multirate HDSL2 */
+       IFT_DS1FDL      = 0xaa,         /* Facility Data Link (4Kbps) on a DS1*/
+       IFT_POS         = 0xab,         /* Packet over SONET/SDH Interface */
+       IFT_DVBASILN    = 0xac,         /* DVB-ASI Input */
+       IFT_DVBASIOUT   = 0xad,         /* DVB-ASI Output */
+       IFT_PLC         = 0xae,         /* Power Line Communications */
+       IFT_NFAS        = 0xaf,         /* Non-Facility Associated Signaling */
+       IFT_TR008       = 0xb0,         /* TROO8 */
+       IFT_GR303RDT    = 0xb1,         /* Remote Digital Terminal */
+       IFT_GR303IDT    = 0xb2,         /* Integrated Digital Terminal */
+       IFT_ISUP        = 0xb3,         /* ISUP */
+       IFT_PROPDOCSWIRELESSMACLAYER = 0xb4,    /* prop/Wireless MAC Layer */
+       IFT_PROPDOCSWIRELESSDOWNSTREAM = 0xb5,  /* prop/Wireless Downstream */
+       IFT_PROPDOCSWIRELESSUPSTREAM = 0xb6,    /* prop/Wireless Upstream */
+       IFT_HIPERLAN2   = 0xb7,         /* HIPERLAN Type 2 Radio Interface */
+       IFT_PROPBWAP2MP = 0xb8,         /* PropBroadbandWirelessAccess P2MP*/
+       IFT_SONETOVERHEADCHANNEL = 0xb9, /* SONET Overhead Channel */
+       IFT_DIGITALWRAPPEROVERHEADCHANNEL = 0xba, /* Digital Wrapper Overhead */
+       IFT_AAL2        = 0xbb,         /* ATM adaptation layer 2 */
+       IFT_RADIOMAC    = 0xbc,         /* MAC layer over radio links */
+       IFT_ATMRADIO    = 0xbd,         /* ATM over radio links */
+       IFT_IMT         = 0xbe,         /* Inter-Machine Trunks */
+       IFT_MVL         = 0xbf,         /* Multiple Virtual Lines DSL */
+       IFT_REACHDSL    = 0xc0,         /* Long Reach DSL */
+       IFT_FRDLCIENDPT = 0xc1,         /* Frame Relay DLCI End Point */
+       IFT_ATMVCIENDPT = 0xc2,         /* ATM VCI End Point */
+       IFT_OPTICALCHANNEL = 0xc3,      /* Optical Channel */
+       IFT_OPTICALTRANSPORT = 0xc4,    /* Optical Transport */
+       IFT_INFINIBAND  = 0xc7,         /* Infiniband */
+       IFT_BRIDGE      = 0xd1,         /* Transparent bridge interface */
+       IFT_STF         = 0xd7,         /* 6to4 interface */
+
+       /*
+        * Not based on IANA assignments.  Conflicting with IANA assignments.
+        * We should make them negative probably.
+        * This requires changes to struct if_data.
+        */
+       IFT_GIF         = 0xf0,         /* Generic tunnel interface */
+       IFT_PVC         = 0xf1,         /* Unused */
+       IFT_ENC         = 0xf4,         /* Encapsulating interface */
+       IFT_PFLOG       = 0xf6,         /* PF packet filter logging */
+       IFT_PFSYNC      = 0xf7,         /* PF packet filter synchronization */
+} ifType;
+
+/*
+ * Some (broken) software uses #ifdef IFT_TYPE to check whether
+ * an operating systems supports certain interface type.  Lack of
+ * ifdef leads to a piece of functionality compiled out.
+ */
+#ifndef BURN_BRIDGES
+#define        IFT_BRIDGE      IFT_BRIDGE
+#define        IFT_PPP         IFT_PPP
+#define        IFT_PROPVIRTUAL IFT_PROPVIRTUAL
+#define        IFT_L2VLAN      IFT_L2VLAN
+#define        IFT_L3IPVLAN    IFT_L3IPVLAN
+#define        IFT_IEEE1394    IFT_IEEE1394
+#define        IFT_INFINIBAND  IFT_INFINIBAND
+#endif
+
+#endif /* !_NET_IF_TYPES_H_ */
diff --git a/include/net/if_vlan_var.h b/include/net/if_vlan_var.h
new file mode 100644 (file)
index 0000000..6b20d14
--- /dev/null
@@ -0,0 +1,174 @@
+/*-
+ * Copyright 1998 Massachusetts Institute of Technology
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation for any purpose and without fee is hereby
+ * granted, provided that both the above copyright notice and this
+ * permission notice appear in all copies, that both the above
+ * copyright notice and this permission notice appear in all
+ * supporting documentation, and that the name of M.I.T. not be used
+ * in advertising or publicity pertaining to distribution of the
+ * software without specific, written prior permission.  M.I.T. makes
+ * no representations about the suitability of this software for any
+ * purpose.  It is provided "as is" without express or implied
+ * warranty.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
+ * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
+ * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NET_IF_VLAN_VAR_H_
+#define        _NET_IF_VLAN_VAR_H_     1
+
+/* Set the VLAN ID in an mbuf packet header non-destructively. */
+#define EVL_APPLY_VLID(m, vlid)                                                \
+       do {                                                            \
+               if ((m)->m_flags & M_VLANTAG) {                         \
+                       (m)->m_pkthdr.ether_vtag &= EVL_VLID_MASK;      \
+                       (m)->m_pkthdr.ether_vtag |= (vlid);             \
+               } else {                                                \
+                       (m)->m_pkthdr.ether_vtag = (vlid);              \
+                       (m)->m_flags |= M_VLANTAG;                      \
+               }                                                       \
+       } while (0)
+
+/* Set the priority ID in an mbuf packet header non-destructively. */
+#define EVL_APPLY_PRI(m, pri)                                          \
+       do {                                                            \
+               if ((m)->m_flags & M_VLANTAG) {                         \
+                       uint16_t __vlantag = (m)->m_pkthdr.ether_vtag;  \
+                       (m)->m_pkthdr.ether_vtag |= EVL_MAKETAG(        \
+                           EVL_VLANOFTAG(__vlantag), (pri),            \
+                           EVL_CFIOFTAG(__vlantag));                   \
+               } else {                                                \
+                       (m)->m_pkthdr.ether_vtag =                      \
+                           EVL_MAKETAG(0, (pri), 0);                   \
+                       (m)->m_flags |= M_VLANTAG;                      \
+               }                                                       \
+       } while (0)
+
+/* sysctl(3) tags, for compatibility purposes */
+#define        VLANCTL_PROTO   1
+#define        VLANCTL_MAX     2
+
+/*
+ * Configuration structure for SIOCSETVLAN and SIOCGETVLAN ioctls.
+ */
+struct vlanreq {
+       char    vlr_parent[IFNAMSIZ];
+       u_short vlr_tag;
+};
+#define        SIOCSETVLAN     SIOCSIFGENERIC
+#define        SIOCGETVLAN     SIOCGIFGENERIC
+
+#define        SIOCGVLANPCP    _IOWR('i', 152, struct ifreq)   /* Get VLAN PCP */
+#define        SIOCSVLANPCP     _IOW('i', 153, struct ifreq)   /* Set VLAN PCP */
+
+/*
+ * Names for 802.1q priorities ("802.1p").  Notice that in this scheme,
+ * (0 < 1), allowing default 0-tagged traffic to take priority over background
+ * tagged traffic.
+ */
+#define        IEEE8021Q_PCP_BK        1       /* Background (lowest) */
+#define        IEEE8021Q_PCP_BE        0       /* Best effort (default) */
+#define        IEEE8021Q_PCP_EE        2       /* Excellent effort */
+#define        IEEE8021Q_PCP_CA        3       /* Critical applications */
+#define        IEEE8021Q_PCP_VI        4       /* Video, < 100ms latency */
+#define        IEEE8021Q_PCP_VO        5       /* Video, < 10ms latency */
+#define        IEEE8021Q_PCP_IC        6       /* Internetwork control */
+#define        IEEE8021Q_PCP_NC        7       /* Network control (highest) */
+
+#ifdef _KERNEL
+/*
+ * Drivers that are capable of adding and removing the VLAN header
+ * in hardware indicate they support this by marking IFCAP_VLAN_HWTAGGING
+ * in if_capabilities.  Drivers for hardware that is capable
+ * of handling larger MTU's that may include a software-appended
+ * VLAN header w/o lowering the normal MTU should mark IFCAP_VLAN_MTU
+ * in if_capabilities; this notifies the VLAN code it can leave the
+ * MTU on the vlan interface at the normal setting.
+ */
+
+/*
+ * VLAN tags are stored in host byte order.  Byte swapping may be
+ * necessary.
+ *
+ * Drivers that support hardware VLAN tag stripping fill in the
+ * received VLAN tag (containing both vlan and priority information)
+ * into the ether_vtag mbuf packet header field:
+ * 
+ *     m->m_pkthdr.ether_vtag = vtag;          // ntohs()?
+ *     m->m_flags |= M_VLANTAG;
+ *
+ * to mark the packet m with the specified VLAN tag.
+ *
+ * On output the driver should check the mbuf for the M_VLANTAG
+ * flag to see if a VLAN tag is present and valid:
+ *
+ *     if (m->m_flags & M_VLANTAG) {
+ *             ... = m->m_pkthdr.ether_vtag;   // htons()?
+ *             ... pass tag to hardware ...
+ *     }
+ *
+ * Note that a driver must indicate it supports hardware VLAN
+ * stripping/insertion by marking IFCAP_VLAN_HWTAGGING in
+ * if_capabilities.
+ */
+
+/*
+ * The 802.1q code may also tag mbufs with the PCP (priority) field for use in
+ * other layers of the stack, in which case an m_tag will be used.  This is
+ * semantically quite different from use of the ether_vtag field, which is
+ * defined only between the device driver and VLAN layer.
+ */
+#define        MTAG_8021Q              1326104895
+#define        MTAG_8021Q_PCP_IN       0               /* Input priority. */
+#define        MTAG_8021Q_PCP_OUT      1               /* Output priority. */
+
+#define        VLAN_CAPABILITIES(_ifp) do {                            \
+       if ((_ifp)->if_vlantrunk != NULL)                       \
+               (*vlan_trunk_cap_p)(_ifp);                      \
+} while (0)
+
+#define        VLAN_TRUNKDEV(_ifp)                                     \
+       (_ifp)->if_type == IFT_L2VLAN ? (*vlan_trunkdev_p)((_ifp)) : NULL
+#define        VLAN_TAG(_ifp, _vid)                                    \
+       (_ifp)->if_type == IFT_L2VLAN ? (*vlan_tag_p)((_ifp), (_vid)) : EINVAL
+#define        VLAN_COOKIE(_ifp)                                       \
+       (_ifp)->if_type == IFT_L2VLAN ? (*vlan_cookie_p)((_ifp)) : NULL
+#define        VLAN_SETCOOKIE(_ifp, _cookie)                           \
+       (_ifp)->if_type == IFT_L2VLAN ?                         \
+           (*vlan_setcookie_p)((_ifp), (_cookie)) : EINVAL
+#define        VLAN_DEVAT(_ifp, _vid)                                  \
+       (_ifp)->if_vlantrunk != NULL ? (*vlan_devat_p)((_ifp), (_vid)) : NULL
+
+extern void (*vlan_trunk_cap_p)(struct ifnet *);
+extern struct ifnet *(*vlan_trunkdev_p)(struct ifnet *);
+extern struct ifnet *(*vlan_devat_p)(struct ifnet *, uint16_t);
+extern int (*vlan_tag_p)(struct ifnet *, uint16_t *);
+extern int (*vlan_setcookie_p)(struct ifnet *, void *);
+extern void *(*vlan_cookie_p)(struct ifnet *);
+
+#ifdef _SYS_EVENTHANDLER_H_
+/* VLAN state change events */
+typedef void (*vlan_config_fn)(void *, struct ifnet *, uint16_t);
+typedef void (*vlan_unconfig_fn)(void *, struct ifnet *, uint16_t);
+EVENTHANDLER_DECLARE(vlan_config, vlan_config_fn);
+EVENTHANDLER_DECLARE(vlan_unconfig, vlan_unconfig_fn);
+#endif /* _SYS_EVENTHANDLER_H_ */
+
+#endif /* _KERNEL */
+
+#endif /* _NET_IF_VLAN_VAR_H_ */
diff --git a/include/netinet/if_ether.h b/include/netinet/if_ether.h
new file mode 100644 (file)
index 0000000..27e51f7
--- /dev/null
@@ -0,0 +1,130 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ *     The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *     @(#)if_ether.h  8.3 (Berkeley) 5/2/95
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_IF_ETHER_H_
+#define _NETINET_IF_ETHER_H_
+
+#include <net/ethernet.h>
+#include <net/if_arp.h>
+
+/*
+ * Macro to map an IP multicast address to an Ethernet multicast address.
+ * The high-order 25 bits of the Ethernet address are statically assigned,
+ * and the low-order 23 bits are taken from the low end of the IP address.
+ */
+#define ETHER_MAP_IP_MULTICAST(ipaddr, enaddr) \
+       /* struct in_addr *ipaddr; */ \
+       /* u_char enaddr[ETHER_ADDR_LEN];          */ \
+{ \
+       (enaddr)[0] = 0x01; \
+       (enaddr)[1] = 0x00; \
+       (enaddr)[2] = 0x5e; \
+       (enaddr)[3] = ((const u_char *)ipaddr)[1] & 0x7f; \
+       (enaddr)[4] = ((const u_char *)ipaddr)[2]; \
+       (enaddr)[5] = ((const u_char *)ipaddr)[3]; \
+}
+/*
+ * Macro to map an IP6 multicast address to an Ethernet multicast address.
+ * The high-order 16 bits of the Ethernet address are statically assigned,
+ * and the low-order 32 bits are taken from the low end of the IP6 address.
+ */
+#define ETHER_MAP_IPV6_MULTICAST(ip6addr, enaddr)                      \
+/* struct      in6_addr *ip6addr; */                                   \
+/* u_char      enaddr[ETHER_ADDR_LEN]; */                              \
+{                                                                       \
+       (enaddr)[0] = 0x33;                                             \
+       (enaddr)[1] = 0x33;                                             \
+       (enaddr)[2] = ((const u_char *)ip6addr)[12];                    \
+       (enaddr)[3] = ((const u_char *)ip6addr)[13];                    \
+       (enaddr)[4] = ((const u_char *)ip6addr)[14];                    \
+       (enaddr)[5] = ((const u_char *)ip6addr)[15];                    \
+}
+
+/*
+ * Ethernet Address Resolution Protocol.
+ *
+ * See RFC 826 for protocol description.  Structure below is adapted
+ * to resolving internet addresses.  Field names used correspond to
+ * RFC 826.
+ */
+struct ether_arp {
+       struct  arphdr ea_hdr;  /* fixed-size header */
+       u_char  arp_sha[ETHER_ADDR_LEN];        /* sender hardware address */
+       u_char  arp_spa[4];     /* sender protocol address */
+       u_char  arp_tha[ETHER_ADDR_LEN];        /* target hardware address */
+       u_char  arp_tpa[4];     /* target protocol address */
+};
+#define        arp_hrd ea_hdr.ar_hrd
+#define        arp_pro ea_hdr.ar_pro
+#define        arp_hln ea_hdr.ar_hln
+#define        arp_pln ea_hdr.ar_pln
+#define        arp_op  ea_hdr.ar_op
+
+#ifndef BURN_BRIDGES   /* Can be used by third party software. */
+struct sockaddr_inarp {
+       u_char  sin_len;
+       u_char  sin_family;
+       u_short sin_port;
+       struct  in_addr sin_addr;
+       struct  in_addr sin_srcaddr;
+       u_short sin_tos;
+       u_short sin_other;
+#define SIN_PROXY 1
+};
+#endif /* !BURN_BRIDGES  */
+
+/*
+ * IP and ethernet specific routing flags
+ */
+#define        RTF_USETRAILERS RTF_PROTO1      /* use trailers */
+#define RTF_ANNOUNCE   RTF_PROTO2      /* announce new arp entry */
+
+#ifdef _KERNEL
+extern u_char  ether_ipmulticast_min[ETHER_ADDR_LEN];
+extern u_char  ether_ipmulticast_max[ETHER_ADDR_LEN];
+
+struct ifaddr;
+struct llentry;
+
+int    arpresolve_addr(struct ifnet *ifp, int flags,
+           const struct sockaddr *dst, char *desten, uint32_t *pflags,
+           struct llentry **plle);
+int    arpresolve(struct ifnet *ifp, int is_gw, struct mbuf *m,
+           const struct sockaddr *dst, u_char *desten, uint32_t *pflags,
+           struct llentry **plle);
+void   arprequest(struct ifnet *, const struct in_addr *,
+           const struct in_addr *, u_char *);
+void   arp_ifinit(struct ifnet *, struct ifaddr *);
+void   arp_announce_ifaddr(struct ifnet *, struct in_addr addr, u_char *);
+#endif
+
+#endif
diff --git a/include/netinet/ip6.h b/include/netinet/ip6.h
new file mode 100644 (file)
index 0000000..ff87057
--- /dev/null
@@ -0,0 +1,346 @@
+/*     $FreeBSD$       */
+/*     $KAME: ip6.h,v 1.18 2001/03/29 05:34:30 itojun Exp $    */
+
+/*-
+ * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the project nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ *     The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *     @(#)ip.h        8.1 (Berkeley) 6/10/93
+ */
+
+#ifndef _NETINET_IP6_H_
+#define _NETINET_IP6_H_
+
+/*
+ * Definition for internet protocol version 6.
+ * RFC 2460
+ */
+
+struct ip6_hdr {
+       union {
+               struct ip6_hdrctl {
+                       u_int32_t ip6_un1_flow; /* 20 bits of flow-ID */
+                       u_int16_t ip6_un1_plen; /* payload length */
+                       u_int8_t  ip6_un1_nxt;  /* next header */
+                       u_int8_t  ip6_un1_hlim; /* hop limit */
+               } ip6_un1;
+               u_int8_t ip6_un2_vfc;   /* 4 bits version, top 4 bits class */
+       } ip6_ctlun;
+       struct in6_addr ip6_src;        /* source address */
+       struct in6_addr ip6_dst;        /* destination address */
+} __packed;
+
+#define ip6_vfc                ip6_ctlun.ip6_un2_vfc
+#define ip6_flow       ip6_ctlun.ip6_un1.ip6_un1_flow
+#define ip6_plen       ip6_ctlun.ip6_un1.ip6_un1_plen
+#define ip6_nxt                ip6_ctlun.ip6_un1.ip6_un1_nxt
+#define ip6_hlim       ip6_ctlun.ip6_un1.ip6_un1_hlim
+#define ip6_hops       ip6_ctlun.ip6_un1.ip6_un1_hlim
+
+#define IPV6_VERSION           0x60
+#define IPV6_VERSION_MASK      0xf0
+
+#if BYTE_ORDER == BIG_ENDIAN
+#define IPV6_FLOWINFO_MASK     0x0fffffff      /* flow info (28 bits) */
+#define IPV6_FLOWLABEL_MASK    0x000fffff      /* flow label (20 bits) */
+#else
+#if BYTE_ORDER == LITTLE_ENDIAN
+#define IPV6_FLOWINFO_MASK     0xffffff0f      /* flow info (28 bits) */
+#define IPV6_FLOWLABEL_MASK    0xffff0f00      /* flow label (20 bits) */
+#endif /* LITTLE_ENDIAN */
+#endif
+#if 1
+/* ECN bits proposed by Sally Floyd */
+#define IP6TOS_CE              0x01    /* congestion experienced */
+#define IP6TOS_ECT             0x02    /* ECN-capable transport */
+#endif
+
+/*
+ * Extension Headers
+ */
+
+struct ip6_ext {
+       u_int8_t ip6e_nxt;
+       u_int8_t ip6e_len;
+} __packed;
+
+/* Hop-by-Hop options header */
+/* XXX should we pad it to force alignment on an 8-byte boundary? */
+struct ip6_hbh {
+       u_int8_t ip6h_nxt;      /* next header */
+       u_int8_t ip6h_len;      /* length in units of 8 octets */
+       /* followed by options */
+} __packed;
+
+/* Destination options header */
+/* XXX should we pad it to force alignment on an 8-byte boundary? */
+struct ip6_dest {
+       u_int8_t ip6d_nxt;      /* next header */
+       u_int8_t ip6d_len;      /* length in units of 8 octets */
+       /* followed by options */
+} __packed;
+
+/* Option types and related macros */
+#define IP6OPT_PAD1            0x00    /* 00 0 00000 */
+#define IP6OPT_PADN            0x01    /* 00 0 00001 */
+#define IP6OPT_JUMBO           0xC2    /* 11 0 00010 = 194 */
+#define IP6OPT_NSAP_ADDR       0xC3    /* 11 0 00011 */
+#define IP6OPT_TUNNEL_LIMIT    0x04    /* 00 0 00100 */
+#ifndef _KERNEL
+#define IP6OPT_RTALERT         0x05    /* 00 0 00101 (KAME definition) */
+#endif
+#define IP6OPT_ROUTER_ALERT    0x05    /* 00 0 00101 (RFC3542, recommended) */
+
+#define IP6OPT_RTALERT_LEN     4
+#define IP6OPT_RTALERT_MLD     0       /* Datagram contains an MLD message */
+#define IP6OPT_RTALERT_RSVP    1       /* Datagram contains an RSVP message */
+#define IP6OPT_RTALERT_ACTNET  2       /* contains an Active Networks msg */
+#define IP6OPT_MINLEN          2
+
+#define IP6OPT_EID             0x8a    /* 10 0 01010 */
+
+#define IP6OPT_TYPE(o)         ((o) & 0xC0)
+#define IP6OPT_TYPE_SKIP       0x00
+#define IP6OPT_TYPE_DISCARD    0x40
+#define IP6OPT_TYPE_FORCEICMP  0x80
+#define IP6OPT_TYPE_ICMP       0xC0
+
+#define IP6OPT_MUTABLE         0x20
+
+/* IPv6 options: common part */
+struct ip6_opt {
+       u_int8_t ip6o_type;
+       u_int8_t ip6o_len;
+} __packed;
+
+/* Jumbo Payload Option */
+struct ip6_opt_jumbo {
+       u_int8_t ip6oj_type;
+       u_int8_t ip6oj_len;
+       u_int8_t ip6oj_jumbo_len[4];
+} __packed;
+#define IP6OPT_JUMBO_LEN       6
+
+/* NSAP Address Option */
+struct ip6_opt_nsap {
+       u_int8_t ip6on_type;
+       u_int8_t ip6on_len;
+       u_int8_t ip6on_src_nsap_len;
+       u_int8_t ip6on_dst_nsap_len;
+       /* followed by source NSAP */
+       /* followed by destination NSAP */
+} __packed;
+
+/* Tunnel Limit Option */
+struct ip6_opt_tunnel {
+       u_int8_t ip6ot_type;
+       u_int8_t ip6ot_len;
+       u_int8_t ip6ot_encap_limit;
+} __packed;
+
+/* Router Alert Option */
+struct ip6_opt_router {
+       u_int8_t ip6or_type;
+       u_int8_t ip6or_len;
+       u_int8_t ip6or_value[2];
+} __packed;
+/* Router alert values (in network byte order) */
+#if BYTE_ORDER == BIG_ENDIAN
+#define IP6_ALERT_MLD  0x0000
+#define IP6_ALERT_RSVP 0x0001
+#define IP6_ALERT_AN   0x0002
+#else
+#if BYTE_ORDER == LITTLE_ENDIAN
+#define IP6_ALERT_MLD  0x0000
+#define IP6_ALERT_RSVP 0x0100
+#define IP6_ALERT_AN   0x0200
+#endif /* LITTLE_ENDIAN */
+#endif
+
+/* Routing header */
+struct ip6_rthdr {
+       u_int8_t  ip6r_nxt;     /* next header */
+       u_int8_t  ip6r_len;     /* length in units of 8 octets */
+       u_int8_t  ip6r_type;    /* routing type */
+       u_int8_t  ip6r_segleft; /* segments left */
+       /* followed by routing type specific data */
+} __packed;
+
+/* Type 0 Routing header, deprecated by RFC 5095. */
+struct ip6_rthdr0 {
+       u_int8_t  ip6r0_nxt;            /* next header */
+       u_int8_t  ip6r0_len;            /* length in units of 8 octets */
+       u_int8_t  ip6r0_type;           /* always zero */
+       u_int8_t  ip6r0_segleft;        /* segments left */
+       u_int32_t  ip6r0_reserved;      /* reserved field */
+       /* followed by up to 127 struct in6_addr */
+} __packed;
+
+/* Fragment header */
+struct ip6_frag {
+       u_int8_t  ip6f_nxt;             /* next header */
+       u_int8_t  ip6f_reserved;        /* reserved field */
+       u_int16_t ip6f_offlg;           /* offset, reserved, and flag */
+       u_int32_t ip6f_ident;           /* identification */
+} __packed;
+
+#if BYTE_ORDER == BIG_ENDIAN
+#define IP6F_OFF_MASK          0xfff8  /* mask out offset from _offlg */
+#define IP6F_RESERVED_MASK     0x0006  /* reserved bits in ip6f_offlg */
+#define IP6F_MORE_FRAG         0x0001  /* more-fragments flag */
+#else /* BYTE_ORDER == LITTLE_ENDIAN */
+#define IP6F_OFF_MASK          0xf8ff  /* mask out offset from _offlg */
+#define IP6F_RESERVED_MASK     0x0600  /* reserved bits in ip6f_offlg */
+#define IP6F_MORE_FRAG         0x0100  /* more-fragments flag */
+#endif /* BYTE_ORDER == LITTLE_ENDIAN */
+
+/*
+ * Internet implementation parameters.
+ */
+#define IPV6_MAXHLIM   255     /* maximum hoplimit */
+#define IPV6_DEFHLIM   64      /* default hlim */
+#define IPV6_FRAGTTL   120     /* ttl for fragment packets, in slowtimo tick */
+#define IPV6_HLIMDEC   1       /* subtracted when forwarding */
+
+#define IPV6_MMTU      1280    /* minimal MTU and reassembly. 1024 + 256 */
+#define IPV6_MAXPACKET 65535   /* ip6 max packet size without Jumbo payload*/
+#define IPV6_MAXOPTHDR 2048    /* max option header size, 256 64-bit words */
+
+#ifdef _KERNEL
+/*
+ * IP6_EXTHDR_CHECK ensures that region between the IP6 header and the
+ * target header (including IPv6 itself, extension headers and
+ * TCP/UDP/ICMP6 headers) are contiguous. KAME requires drivers
+ * to store incoming data into one internal mbuf or one or more external
+ * mbufs(never into two or more internal mbufs). Thus, the third case is
+ * supposed to never be matched but is prepared just in case.
+ */
+
+#define IP6_EXTHDR_CHECK(m, off, hlen, ret)                            \
+do {                                                                   \
+    if ((m)->m_next != NULL) {                                         \
+       if (((m)->m_flags & M_LOOP) &&                                  \
+           ((m)->m_len < (off) + (hlen)) &&                            \
+           (((m) = m_pullup((m), (off) + (hlen))) == NULL)) {          \
+               IP6STAT_INC(ip6s_exthdrtoolong);                                \
+               return ret;                                             \
+       } else {                                                        \
+               if ((m)->m_len < (off) + (hlen)) {                      \
+                       IP6STAT_INC(ip6s_exthdrtoolong);                        \
+                       m_freem(m);                                     \
+                       return ret;                                     \
+               }                                                       \
+       }                                                               \
+    } else {                                                           \
+       if ((m)->m_len < (off) + (hlen)) {                              \
+               IP6STAT_INC(ip6s_tooshort);                             \
+               in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated);   \
+               m_freem(m);                                             \
+               return ret;                                             \
+       }                                                               \
+    }                                                                  \
+} while (/*CONSTCOND*/ 0)
+
+/*
+ * IP6_EXTHDR_GET ensures that intermediate protocol header (from "off" to
+ * "len") is located in single mbuf, on contiguous memory region.
+ * The pointer to the region will be returned to pointer variable "val",
+ * with type "typ".
+ * IP6_EXTHDR_GET0 does the same, except that it aligns the structure at the
+ * very top of mbuf.  GET0 is likely to make memory copy than GET.
+ *
+ * XXX we're now testing this, needs m_pulldown()
+ */
+#define IP6_EXTHDR_GET(val, typ, m, off, len) \
+do {                                                                   \
+       struct mbuf *t;                                                 \
+       int tmp;                                                        \
+       if ((m)->m_len >= (off) + (len))                                \
+               (val) = (typ)(mtod((m), caddr_t) + (off));              \
+       else {                                                          \
+               t = m_pulldown((m), (off), (len), &tmp);                \
+               if (t) {                                                \
+                       if (t->m_len < tmp + (len))                     \
+                               panic("m_pulldown malfunction");        \
+                       (val) = (typ)(mtod(t, caddr_t) + tmp);          \
+               } else {                                                \
+                       (val) = (typ)NULL;                              \
+                       (m) = NULL;                                     \
+               }                                                       \
+       }                                                               \
+} while (/*CONSTCOND*/ 0)
+
+#define IP6_EXTHDR_GET0(val, typ, m, off, len) \
+do {                                                                   \
+       struct mbuf *t;                                                 \
+       if ((off) == 0)                                                 \
+               (val) = (typ)mtod(m, caddr_t);                          \
+       else {                                                          \
+               t = m_pulldown((m), (off), (len), NULL);                \
+               if (t) {                                                \
+                       if (t->m_len < (len))                           \
+                               panic("m_pulldown malfunction");        \
+                       (val) = (typ)mtod(t, caddr_t);                  \
+               } else {                                                \
+                       (val) = (typ)NULL;                              \
+                       (m) = NULL;                                     \
+               }                                                       \
+       }                                                               \
+} while (/*CONSTCOND*/ 0)
+
+#endif /*_KERNEL*/
+
+#endif /* not _NETINET_IP6_H_ */
diff --git a/include/netinet/tcp_lro.h b/include/netinet/tcp_lro.h
new file mode 100644 (file)
index 0000000..e019cd1
--- /dev/null
@@ -0,0 +1,121 @@
+/*-
+ * Copyright (c) 2006, Myricom Inc.
+ * Copyright (c) 2008, Intel Corporation.
+ * Copyright (c) 2016 Mellanox Technologies.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _TCP_LRO_H_
+#define _TCP_LRO_H_
+
+#include <sys/time.h>
+
+#ifndef TCP_LRO_ENTRIES
+/* Define default number of LRO entries per RX queue */
+#define        TCP_LRO_ENTRIES 8
+#endif
+
+struct lro_entry {
+       LIST_ENTRY(lro_entry)   next;
+       LIST_ENTRY(lro_entry)   hash_next;
+       struct mbuf             *m_head;
+       struct mbuf             *m_tail;
+       union {
+               struct ip       *ip4;
+               struct ip6_hdr  *ip6;
+       } leip;
+       union {
+               in_addr_t       s_ip4;
+               struct in6_addr s_ip6;
+       } lesource;
+       union {
+               in_addr_t       d_ip4;
+               struct in6_addr d_ip6;
+       } ledest;
+       uint16_t                source_port;
+       uint16_t                dest_port;
+       uint16_t                eh_type;        /* EthernetHeader type. */
+       uint16_t                append_cnt;
+       uint32_t                p_len;          /* IP header payload length. */
+       uint32_t                ulp_csum;       /* TCP, etc. checksum. */
+       uint32_t                next_seq;       /* tcp_seq */
+       uint32_t                ack_seq;        /* tcp_seq */
+       uint32_t                tsval;
+       uint32_t                tsecr;
+       uint16_t                window;
+       uint16_t                timestamp;      /* flag, not a TCP hdr field. */
+       struct timeval          mtime;
+};
+LIST_HEAD(lro_head, lro_entry);
+
+#define        le_ip4                  leip.ip4
+#define        le_ip6                  leip.ip6
+#define        source_ip4              lesource.s_ip4
+#define        dest_ip4                ledest.d_ip4
+#define        source_ip6              lesource.s_ip6
+#define        dest_ip6                ledest.d_ip6
+
+struct lro_mbuf_sort {
+       uint64_t seq;
+       struct mbuf *mb;
+};
+
+/* NB: This is part of driver structs. */
+struct lro_ctrl {
+       struct ifnet    *ifp;
+       struct lro_mbuf_sort *lro_mbuf_data;
+       uint64_t        lro_queued;
+       uint64_t        lro_flushed;
+       uint64_t        lro_bad_csum;
+       unsigned        lro_cnt;
+       unsigned        lro_mbuf_count;
+       unsigned        lro_mbuf_max;
+       unsigned short  lro_ackcnt_lim;         /* max # of aggregated ACKs */
+       unsigned        lro_length_lim;         /* max len of aggregated data */
+
+       u_long          lro_hashsz;
+       struct lro_head *lro_hash;
+       struct lro_head lro_active;
+       struct lro_head lro_free;
+};
+
+#define        TCP_LRO_LENGTH_MAX      65535
+#define        TCP_LRO_ACKCNT_MAX      65535           /* unlimited */
+
+int tcp_lro_init(struct lro_ctrl *);
+int tcp_lro_init_args(struct lro_ctrl *, struct ifnet *, unsigned, unsigned);
+void tcp_lro_free(struct lro_ctrl *);
+void tcp_lro_flush_inactive(struct lro_ctrl *, const struct timeval *);
+void tcp_lro_flush(struct lro_ctrl *, struct lro_entry *);
+void tcp_lro_flush_all(struct lro_ctrl *);
+int tcp_lro_rx(struct lro_ctrl *, struct mbuf *, uint32_t);
+void tcp_lro_queue_mbuf(struct lro_ctrl *, struct mbuf *);
+
+#define        TCP_LRO_NO_ENTRIES      -2
+#define        TCP_LRO_CANNOT          -1
+#define        TCP_LRO_NOT_SUPPORTED   1
+
+#endif /* _TCP_LRO_H_ */
diff --git a/include/sys/tree.h b/include/sys/tree.h
new file mode 100644 (file)
index 0000000..c9df686
--- /dev/null
@@ -0,0 +1,801 @@
+/*     $NetBSD: tree.h,v 1.8 2004/03/28 19:38:30 provos Exp $  */
+/*     $OpenBSD: tree.h,v 1.7 2002/10/17 21:51:54 art Exp $    */
+/* $FreeBSD$ */
+
+/*-
+ * Copyright 2002 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef        _SYS_TREE_H_
+#define        _SYS_TREE_H_
+
+#include <sys/cdefs.h>
+
+/*
+ * This file defines data structures for different types of trees:
+ * splay trees and red-black trees.
+ *
+ * A splay tree is a self-organizing data structure.  Every operation
+ * on the tree causes a splay to happen.  The splay moves the requested
+ * node to the root of the tree and partly rebalances it.
+ *
+ * This has the benefit that request locality causes faster lookups as
+ * the requested nodes move to the top of the tree.  On the other hand,
+ * every lookup causes memory writes.
+ *
+ * The Balance Theorem bounds the total access time for m operations
+ * and n inserts on an initially empty tree as O((m + n)lg n).  The
+ * amortized cost for a sequence of m accesses to a splay tree is O(lg n);
+ *
+ * A red-black tree is a binary search tree with the node color as an
+ * extra attribute.  It fulfills a set of conditions:
+ *     - every search path from the root to a leaf consists of the
+ *       same number of black nodes,
+ *     - each red node (except for the root) has a black parent,
+ *     - each leaf node is black.
+ *
+ * Every operation on a red-black tree is bounded as O(lg n).
+ * The maximum height of a red-black tree is 2lg (n+1).
+ */
+
+#define SPLAY_HEAD(name, type)                                         \
+struct name {                                                          \
+       struct type *sph_root; /* root of the tree */                   \
+}
+
+#define SPLAY_INITIALIZER(root)                                                \
+       { NULL }
+
+#define SPLAY_INIT(root) do {                                          \
+       (root)->sph_root = NULL;                                        \
+} while (/*CONSTCOND*/ 0)
+
+#define SPLAY_ENTRY(type)                                              \
+struct {                                                               \
+       struct type *spe_left; /* left element */                       \
+       struct type *spe_right; /* right element */                     \
+}
+
+#define SPLAY_LEFT(elm, field)         (elm)->field.spe_left
+#define SPLAY_RIGHT(elm, field)                (elm)->field.spe_right
+#define SPLAY_ROOT(head)               (head)->sph_root
+#define SPLAY_EMPTY(head)              (SPLAY_ROOT(head) == NULL)
+
+/* SPLAY_ROTATE_{LEFT,RIGHT} expect that tmp hold SPLAY_{RIGHT,LEFT} */
+#define SPLAY_ROTATE_RIGHT(head, tmp, field) do {                      \
+       SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(tmp, field);  \
+       SPLAY_RIGHT(tmp, field) = (head)->sph_root;                     \
+       (head)->sph_root = tmp;                                         \
+} while (/*CONSTCOND*/ 0)
+       
+#define SPLAY_ROTATE_LEFT(head, tmp, field) do {                       \
+       SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(tmp, field);  \
+       SPLAY_LEFT(tmp, field) = (head)->sph_root;                      \
+       (head)->sph_root = tmp;                                         \
+} while (/*CONSTCOND*/ 0)
+
+#define SPLAY_LINKLEFT(head, tmp, field) do {                          \
+       SPLAY_LEFT(tmp, field) = (head)->sph_root;                      \
+       tmp = (head)->sph_root;                                         \
+       (head)->sph_root = SPLAY_LEFT((head)->sph_root, field);         \
+} while (/*CONSTCOND*/ 0)
+
+#define SPLAY_LINKRIGHT(head, tmp, field) do {                         \
+       SPLAY_RIGHT(tmp, field) = (head)->sph_root;                     \
+       tmp = (head)->sph_root;                                         \
+       (head)->sph_root = SPLAY_RIGHT((head)->sph_root, field);        \
+} while (/*CONSTCOND*/ 0)
+
+#define SPLAY_ASSEMBLE(head, node, left, right, field) do {            \
+       SPLAY_RIGHT(left, field) = SPLAY_LEFT((head)->sph_root, field); \
+       SPLAY_LEFT(right, field) = SPLAY_RIGHT((head)->sph_root, field);\
+       SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(node, field); \
+       SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(node, field); \
+} while (/*CONSTCOND*/ 0)
+
+/* Generates prototypes and inline functions */
+
+#define SPLAY_PROTOTYPE(name, type, field, cmp)                                \
+void name##_SPLAY(struct name *, struct type *);                       \
+void name##_SPLAY_MINMAX(struct name *, int);                          \
+struct type *name##_SPLAY_INSERT(struct name *, struct type *);                \
+struct type *name##_SPLAY_REMOVE(struct name *, struct type *);                \
+                                                                       \
+/* Finds the node with the same key as elm */                          \
+static __inline struct type *                                          \
+name##_SPLAY_FIND(struct name *head, struct type *elm)                 \
+{                                                                      \
+       if (SPLAY_EMPTY(head))                                          \
+               return(NULL);                                           \
+       name##_SPLAY(head, elm);                                        \
+       if ((cmp)(elm, (head)->sph_root) == 0)                          \
+               return (head->sph_root);                                \
+       return (NULL);                                                  \
+}                                                                      \
+                                                                       \
+static __inline struct type *                                          \
+name##_SPLAY_NEXT(struct name *head, struct type *elm)                 \
+{                                                                      \
+       name##_SPLAY(head, elm);                                        \
+       if (SPLAY_RIGHT(elm, field) != NULL) {                          \
+               elm = SPLAY_RIGHT(elm, field);                          \
+               while (SPLAY_LEFT(elm, field) != NULL) {                \
+                       elm = SPLAY_LEFT(elm, field);                   \
+               }                                                       \
+       } else                                                          \
+               elm = NULL;                                             \
+       return (elm);                                                   \
+}                                                                      \
+                                                                       \
+static __inline struct type *                                          \
+name##_SPLAY_MIN_MAX(struct name *head, int val)                       \
+{                                                                      \
+       name##_SPLAY_MINMAX(head, val);                                 \
+        return (SPLAY_ROOT(head));                                     \
+}
+
+/* Main splay operation.
+ * Moves node close to the key of elm to top
+ */
+#define SPLAY_GENERATE(name, type, field, cmp)                         \
+struct type *                                                          \
+name##_SPLAY_INSERT(struct name *head, struct type *elm)               \
+{                                                                      \
+    if (SPLAY_EMPTY(head)) {                                           \
+           SPLAY_LEFT(elm, field) = SPLAY_RIGHT(elm, field) = NULL;    \
+    } else {                                                           \
+           int __comp;                                                 \
+           name##_SPLAY(head, elm);                                    \
+           __comp = (cmp)(elm, (head)->sph_root);                      \
+           if(__comp < 0) {                                            \
+                   SPLAY_LEFT(elm, field) = SPLAY_LEFT((head)->sph_root, field);\
+                   SPLAY_RIGHT(elm, field) = (head)->sph_root;         \
+                   SPLAY_LEFT((head)->sph_root, field) = NULL;         \
+           } else if (__comp > 0) {                                    \
+                   SPLAY_RIGHT(elm, field) = SPLAY_RIGHT((head)->sph_root, field);\
+                   SPLAY_LEFT(elm, field) = (head)->sph_root;          \
+                   SPLAY_RIGHT((head)->sph_root, field) = NULL;        \
+           } else                                                      \
+                   return ((head)->sph_root);                          \
+    }                                                                  \
+    (head)->sph_root = (elm);                                          \
+    return (NULL);                                                     \
+}                                                                      \
+                                                                       \
+struct type *                                                          \
+name##_SPLAY_REMOVE(struct name *head, struct type *elm)               \
+{                                                                      \
+       struct type *__tmp;                                             \
+       if (SPLAY_EMPTY(head))                                          \
+               return (NULL);                                          \
+       name##_SPLAY(head, elm);                                        \
+       if ((cmp)(elm, (head)->sph_root) == 0) {                        \
+               if (SPLAY_LEFT((head)->sph_root, field) == NULL) {      \
+                       (head)->sph_root = SPLAY_RIGHT((head)->sph_root, field);\
+               } else {                                                \
+                       __tmp = SPLAY_RIGHT((head)->sph_root, field);   \
+                       (head)->sph_root = SPLAY_LEFT((head)->sph_root, field);\
+                       name##_SPLAY(head, elm);                        \
+                       SPLAY_RIGHT((head)->sph_root, field) = __tmp;   \
+               }                                                       \
+               return (elm);                                           \
+       }                                                               \
+       return (NULL);                                                  \
+}                                                                      \
+                                                                       \
+void                                                                   \
+name##_SPLAY(struct name *head, struct type *elm)                      \
+{                                                                      \
+       struct type __node, *__left, *__right, *__tmp;                  \
+       int __comp;                                                     \
+\
+       SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL;\
+       __left = __right = &__node;                                     \
+\
+       while ((__comp = (cmp)(elm, (head)->sph_root)) != 0) {          \
+               if (__comp < 0) {                                       \
+                       __tmp = SPLAY_LEFT((head)->sph_root, field);    \
+                       if (__tmp == NULL)                              \
+                               break;                                  \
+                       if ((cmp)(elm, __tmp) < 0){                     \
+                               SPLAY_ROTATE_RIGHT(head, __tmp, field); \
+                               if (SPLAY_LEFT((head)->sph_root, field) == NULL)\
+                                       break;                          \
+                       }                                               \
+                       SPLAY_LINKLEFT(head, __right, field);           \
+               } else if (__comp > 0) {                                \
+                       __tmp = SPLAY_RIGHT((head)->sph_root, field);   \
+                       if (__tmp == NULL)                              \
+                               break;                                  \
+                       if ((cmp)(elm, __tmp) > 0){                     \
+                               SPLAY_ROTATE_LEFT(head, __tmp, field);  \
+                               if (SPLAY_RIGHT((head)->sph_root, field) == NULL)\
+                                       break;                          \
+                       }                                               \
+                       SPLAY_LINKRIGHT(head, __left, field);           \
+               }                                                       \
+       }                                                               \
+       SPLAY_ASSEMBLE(head, &__node, __left, __right, field);          \
+}                                                                      \
+                                                                       \
+/* Splay with either the minimum or the maximum element                        \
+ * Used to find minimum or maximum element in tree.                    \
+ */                                                                    \
+void name##_SPLAY_MINMAX(struct name *head, int __comp) \
+{                                                                      \
+       struct type __node, *__left, *__right, *__tmp;                  \
+\
+       SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL;\
+       __left = __right = &__node;                                     \
+\
+       while (1) {                                                     \
+               if (__comp < 0) {                                       \
+                       __tmp = SPLAY_LEFT((head)->sph_root, field);    \
+                       if (__tmp == NULL)                              \
+                               break;                                  \
+                       if (__comp < 0){                                \
+                               SPLAY_ROTATE_RIGHT(head, __tmp, field); \
+                               if (SPLAY_LEFT((head)->sph_root, field) == NULL)\
+                                       break;                          \
+                       }                                               \
+                       SPLAY_LINKLEFT(head, __right, field);           \
+               } else if (__comp > 0) {                                \
+                       __tmp = SPLAY_RIGHT((head)->sph_root, field);   \
+                       if (__tmp == NULL)                              \
+                               break;                                  \
+                       if (__comp > 0) {                               \
+                               SPLAY_ROTATE_LEFT(head, __tmp, field);  \
+                               if (SPLAY_RIGHT((head)->sph_root, field) == NULL)\
+                                       break;                          \
+                       }                                               \
+                       SPLAY_LINKRIGHT(head, __left, field);           \
+               }                                                       \
+       }                                                               \
+       SPLAY_ASSEMBLE(head, &__node, __left, __right, field);          \
+}
+
+#define SPLAY_NEGINF   -1
+#define SPLAY_INF      1
+
+#define SPLAY_INSERT(name, x, y)       name##_SPLAY_INSERT(x, y)
+#define SPLAY_REMOVE(name, x, y)       name##_SPLAY_REMOVE(x, y)
+#define SPLAY_FIND(name, x, y)         name##_SPLAY_FIND(x, y)
+#define SPLAY_NEXT(name, x, y)         name##_SPLAY_NEXT(x, y)
+#define SPLAY_MIN(name, x)             (SPLAY_EMPTY(x) ? NULL  \
+                                       : name##_SPLAY_MIN_MAX(x, SPLAY_NEGINF))
+#define SPLAY_MAX(name, x)             (SPLAY_EMPTY(x) ? NULL  \
+                                       : name##_SPLAY_MIN_MAX(x, SPLAY_INF))
+
+#define SPLAY_FOREACH(x, name, head)                                   \
+       for ((x) = SPLAY_MIN(name, head);                               \
+            (x) != NULL;                                               \
+            (x) = SPLAY_NEXT(name, head, x))
+
+/* Macros that define a red-black tree */
+#define RB_HEAD(name, type)                                            \
+struct name {                                                          \
+       struct type *rbh_root; /* root of the tree */                   \
+}
+
+#define RB_INITIALIZER(root)                                           \
+       { NULL }
+
+#define RB_INIT(root) do {                                             \
+       (root)->rbh_root = NULL;                                        \
+} while (/*CONSTCOND*/ 0)
+
+#define RB_BLACK       0
+#define RB_RED         1
+#define RB_ENTRY(type)                                                 \
+struct {                                                               \
+       struct type *rbe_left;          /* left element */              \
+       struct type *rbe_right;         /* right element */             \
+       struct type *rbe_parent;        /* parent element */            \
+       int rbe_color;                  /* node color */                \
+}
+
+#define RB_LEFT(elm, field)            (elm)->field.rbe_left
+#define RB_RIGHT(elm, field)           (elm)->field.rbe_right
+#define RB_PARENT(elm, field)          (elm)->field.rbe_parent
+#define RB_COLOR(elm, field)           (elm)->field.rbe_color
+#define RB_ROOT(head)                  (head)->rbh_root
+#define RB_EMPTY(head)                 (RB_ROOT(head) == NULL)
+
+#define RB_SET(elm, parent, field) do {                                        \
+       RB_PARENT(elm, field) = parent;                                 \
+       RB_LEFT(elm, field) = RB_RIGHT(elm, field) = NULL;              \
+       RB_COLOR(elm, field) = RB_RED;                                  \
+} while (/*CONSTCOND*/ 0)
+
+#define RB_SET_BLACKRED(black, red, field) do {                                \
+       RB_COLOR(black, field) = RB_BLACK;                              \
+       RB_COLOR(red, field) = RB_RED;                                  \
+} while (/*CONSTCOND*/ 0)
+
+#ifndef RB_AUGMENT
+#define RB_AUGMENT(x)  do {} while (0)
+#endif
+
+#define RB_ROTATE_LEFT(head, elm, tmp, field) do {                     \
+       (tmp) = RB_RIGHT(elm, field);                                   \
+       if ((RB_RIGHT(elm, field) = RB_LEFT(tmp, field)) != NULL) {     \
+               RB_PARENT(RB_LEFT(tmp, field), field) = (elm);          \
+       }                                                               \
+       RB_AUGMENT(elm);                                                \
+       if ((RB_PARENT(tmp, field) = RB_PARENT(elm, field)) != NULL) {  \
+               if ((elm) == RB_LEFT(RB_PARENT(elm, field), field))     \
+                       RB_LEFT(RB_PARENT(elm, field), field) = (tmp);  \
+               else                                                    \
+                       RB_RIGHT(RB_PARENT(elm, field), field) = (tmp); \
+       } else                                                          \
+               (head)->rbh_root = (tmp);                               \
+       RB_LEFT(tmp, field) = (elm);                                    \
+       RB_PARENT(elm, field) = (tmp);                                  \
+       RB_AUGMENT(tmp);                                                \
+       if ((RB_PARENT(tmp, field)))                                    \
+               RB_AUGMENT(RB_PARENT(tmp, field));                      \
+} while (/*CONSTCOND*/ 0)
+
+#define RB_ROTATE_RIGHT(head, elm, tmp, field) do {                    \
+       (tmp) = RB_LEFT(elm, field);                                    \
+       if ((RB_LEFT(elm, field) = RB_RIGHT(tmp, field)) != NULL) {     \
+               RB_PARENT(RB_RIGHT(tmp, field), field) = (elm);         \
+       }                                                               \
+       RB_AUGMENT(elm);                                                \
+       if ((RB_PARENT(tmp, field) = RB_PARENT(elm, field)) != NULL) {  \
+               if ((elm) == RB_LEFT(RB_PARENT(elm, field), field))     \
+                       RB_LEFT(RB_PARENT(elm, field), field) = (tmp);  \
+               else                                                    \
+                       RB_RIGHT(RB_PARENT(elm, field), field) = (tmp); \
+       } else                                                          \
+               (head)->rbh_root = (tmp);                               \
+       RB_RIGHT(tmp, field) = (elm);                                   \
+       RB_PARENT(elm, field) = (tmp);                                  \
+       RB_AUGMENT(tmp);                                                \
+       if ((RB_PARENT(tmp, field)))                                    \
+               RB_AUGMENT(RB_PARENT(tmp, field));                      \
+} while (/*CONSTCOND*/ 0)
+
+/* Generates prototypes and inline functions */
+#define        RB_PROTOTYPE(name, type, field, cmp)                            \
+       RB_PROTOTYPE_INTERNAL(name, type, field, cmp,)
+#define        RB_PROTOTYPE_STATIC(name, type, field, cmp)                     \
+       RB_PROTOTYPE_INTERNAL(name, type, field, cmp, __unused static)
+#define RB_PROTOTYPE_INTERNAL(name, type, field, cmp, attr)            \
+       RB_PROTOTYPE_INSERT_COLOR(name, type, attr);                    \
+       RB_PROTOTYPE_REMOVE_COLOR(name, type, attr);                    \
+       RB_PROTOTYPE_INSERT(name, type, attr);                          \
+       RB_PROTOTYPE_REMOVE(name, type, attr);                          \
+       RB_PROTOTYPE_FIND(name, type, attr);                            \
+       RB_PROTOTYPE_NFIND(name, type, attr);                           \
+       RB_PROTOTYPE_NEXT(name, type, attr);                            \
+       RB_PROTOTYPE_PREV(name, type, attr);                            \
+       RB_PROTOTYPE_MINMAX(name, type, attr);
+#define RB_PROTOTYPE_INSERT_COLOR(name, type, attr)                    \
+       attr void name##_RB_INSERT_COLOR(struct name *, struct type *)
+#define RB_PROTOTYPE_REMOVE_COLOR(name, type, attr)                    \
+       attr void name##_RB_REMOVE_COLOR(struct name *, struct type *, struct type *)
+#define RB_PROTOTYPE_REMOVE(name, type, attr)                          \
+       attr struct type *name##_RB_REMOVE(struct name *, struct type *)
+#define RB_PROTOTYPE_INSERT(name, type, attr)                          \
+       attr struct type *name##_RB_INSERT(struct name *, struct type *)
+#define RB_PROTOTYPE_FIND(name, type, attr)                            \
+       attr struct type *name##_RB_FIND(struct name *, struct type *)
+#define RB_PROTOTYPE_NFIND(name, type, attr)                           \
+       attr struct type *name##_RB_NFIND(struct name *, struct type *)
+#define RB_PROTOTYPE_NEXT(name, type, attr)                            \
+       attr struct type *name##_RB_NEXT(struct type *)
+#define RB_PROTOTYPE_PREV(name, type, attr)                            \
+       attr struct type *name##_RB_PREV(struct type *)
+#define RB_PROTOTYPE_MINMAX(name, type, attr)                          \
+       attr struct type *name##_RB_MINMAX(struct name *, int)
+
+/* Main rb operation.
+ * Moves node close to the key of elm to top
+ */
+#define        RB_GENERATE(name, type, field, cmp)                             \
+       RB_GENERATE_INTERNAL(name, type, field, cmp,)
+#define        RB_GENERATE_STATIC(name, type, field, cmp)                      \
+       RB_GENERATE_INTERNAL(name, type, field, cmp, __unused static)
+#define RB_GENERATE_INTERNAL(name, type, field, cmp, attr)             \
+       RB_GENERATE_INSERT_COLOR(name, type, field, attr)               \
+       RB_GENERATE_REMOVE_COLOR(name, type, field, attr)               \
+       RB_GENERATE_INSERT(name, type, field, cmp, attr)                \
+       RB_GENERATE_REMOVE(name, type, field, attr)                     \
+       RB_GENERATE_FIND(name, type, field, cmp, attr)                  \
+       RB_GENERATE_NFIND(name, type, field, cmp, attr)                 \
+       RB_GENERATE_NEXT(name, type, field, attr)                       \
+       RB_GENERATE_PREV(name, type, field, attr)                       \
+       RB_GENERATE_MINMAX(name, type, field, attr)
+
+#define RB_GENERATE_INSERT_COLOR(name, type, field, attr)              \
+attr void                                                              \
+name##_RB_INSERT_COLOR(struct name *head, struct type *elm)            \
+{                                                                      \
+       struct type *parent, *gparent, *tmp;                            \
+       while ((parent = RB_PARENT(elm, field)) != NULL &&              \
+           RB_COLOR(parent, field) == RB_RED) {                        \
+               gparent = RB_PARENT(parent, field);                     \
+               if (parent == RB_LEFT(gparent, field)) {                \
+                       tmp = RB_RIGHT(gparent, field);                 \
+                       if (tmp && RB_COLOR(tmp, field) == RB_RED) {    \
+                               RB_COLOR(tmp, field) = RB_BLACK;        \
+                               RB_SET_BLACKRED(parent, gparent, field);\
+                               elm = gparent;                          \
+                               continue;                               \
+                       }                                               \
+                       if (RB_RIGHT(parent, field) == elm) {           \
+                               RB_ROTATE_LEFT(head, parent, tmp, field);\
+                               tmp = parent;                           \
+                               parent = elm;                           \
+                               elm = tmp;                              \
+                       }                                               \
+                       RB_SET_BLACKRED(parent, gparent, field);        \
+                       RB_ROTATE_RIGHT(head, gparent, tmp, field);     \
+               } else {                                                \
+                       tmp = RB_LEFT(gparent, field);                  \
+                       if (tmp && RB_COLOR(tmp, field) == RB_RED) {    \
+                               RB_COLOR(tmp, field) = RB_BLACK;        \
+                               RB_SET_BLACKRED(parent, gparent, field);\
+                               elm = gparent;                          \
+                               continue;                               \
+                       }                                               \
+                       if (RB_LEFT(parent, field) == elm) {            \
+                               RB_ROTATE_RIGHT(head, parent, tmp, field);\
+                               tmp = parent;                           \
+                               parent = elm;                           \
+                               elm = tmp;                              \
+                       }                                               \
+                       RB_SET_BLACKRED(parent, gparent, field);        \
+                       RB_ROTATE_LEFT(head, gparent, tmp, field);      \
+               }                                                       \
+       }                                                               \
+       RB_COLOR(head->rbh_root, field) = RB_BLACK;                     \
+}
+
+#define RB_GENERATE_REMOVE_COLOR(name, type, field, attr)              \
+attr void                                                              \
+name##_RB_REMOVE_COLOR(struct name *head, struct type *parent, struct type *elm) \
+{                                                                      \
+       struct type *tmp;                                               \
+       while ((elm == NULL || RB_COLOR(elm, field) == RB_BLACK) &&     \
+           elm != RB_ROOT(head)) {                                     \
+               if (RB_LEFT(parent, field) == elm) {                    \
+                       tmp = RB_RIGHT(parent, field);                  \
+                       if (RB_COLOR(tmp, field) == RB_RED) {           \
+                               RB_SET_BLACKRED(tmp, parent, field);    \
+                               RB_ROTATE_LEFT(head, parent, tmp, field);\
+                               tmp = RB_RIGHT(parent, field);          \
+                       }                                               \
+                       if ((RB_LEFT(tmp, field) == NULL ||             \
+                           RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) &&\
+                           (RB_RIGHT(tmp, field) == NULL ||            \
+                           RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)) {\
+                               RB_COLOR(tmp, field) = RB_RED;          \
+                               elm = parent;                           \
+                               parent = RB_PARENT(elm, field);         \
+                       } else {                                        \
+                               if (RB_RIGHT(tmp, field) == NULL ||     \
+                                   RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK) {\
+                                       struct type *oleft;             \
+                                       if ((oleft = RB_LEFT(tmp, field)) \
+                                           != NULL)                    \
+                                               RB_COLOR(oleft, field) = RB_BLACK;\
+                                       RB_COLOR(tmp, field) = RB_RED;  \
+                                       RB_ROTATE_RIGHT(head, tmp, oleft, field);\
+                                       tmp = RB_RIGHT(parent, field);  \
+                               }                                       \
+                               RB_COLOR(tmp, field) = RB_COLOR(parent, field);\
+                               RB_COLOR(parent, field) = RB_BLACK;     \
+                               if (RB_RIGHT(tmp, field))               \
+                                       RB_COLOR(RB_RIGHT(tmp, field), field) = RB_BLACK;\
+                               RB_ROTATE_LEFT(head, parent, tmp, field);\
+                               elm = RB_ROOT(head);                    \
+                               break;                                  \
+                       }                                               \
+               } else {                                                \
+                       tmp = RB_LEFT(parent, field);                   \
+                       if (RB_COLOR(tmp, field) == RB_RED) {           \
+                               RB_SET_BLACKRED(tmp, parent, field);    \
+                               RB_ROTATE_RIGHT(head, parent, tmp, field);\
+                               tmp = RB_LEFT(parent, field);           \
+                       }                                               \
+                       if ((RB_LEFT(tmp, field) == NULL ||             \
+                           RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) &&\
+                           (RB_RIGHT(tmp, field) == NULL ||            \
+                           RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)) {\
+                               RB_COLOR(tmp, field) = RB_RED;          \
+                               elm = parent;                           \
+                               parent = RB_PARENT(elm, field);         \
+                       } else {                                        \
+                               if (RB_LEFT(tmp, field) == NULL ||      \
+                                   RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) {\
+                                       struct type *oright;            \
+                                       if ((oright = RB_RIGHT(tmp, field)) \
+                                           != NULL)                    \
+                                               RB_COLOR(oright, field) = RB_BLACK;\
+                                       RB_COLOR(tmp, field) = RB_RED;  \
+                                       RB_ROTATE_LEFT(head, tmp, oright, field);\
+                                       tmp = RB_LEFT(parent, field);   \
+                               }                                       \
+                               RB_COLOR(tmp, field) = RB_COLOR(parent, field);\
+                               RB_COLOR(parent, field) = RB_BLACK;     \
+                               if (RB_LEFT(tmp, field))                \
+                                       RB_COLOR(RB_LEFT(tmp, field), field) = RB_BLACK;\
+                               RB_ROTATE_RIGHT(head, parent, tmp, field);\
+                               elm = RB_ROOT(head);                    \
+                               break;                                  \
+                       }                                               \
+               }                                                       \
+       }                                                               \
+       if (elm)                                                        \
+               RB_COLOR(elm, field) = RB_BLACK;                        \
+}
+
+#define RB_GENERATE_REMOVE(name, type, field, attr)                    \
+attr struct type *                                                     \
+name##_RB_REMOVE(struct name *head, struct type *elm)                  \
+{                                                                      \
+       struct type *child, *parent, *old = elm;                        \
+       int color;                                                      \
+       if (RB_LEFT(elm, field) == NULL)                                \
+               child = RB_RIGHT(elm, field);                           \
+       else if (RB_RIGHT(elm, field) == NULL)                          \
+               child = RB_LEFT(elm, field);                            \
+       else {                                                          \
+               struct type *left;                                      \
+               elm = RB_RIGHT(elm, field);                             \
+               while ((left = RB_LEFT(elm, field)) != NULL)            \
+                       elm = left;                                     \
+               child = RB_RIGHT(elm, field);                           \
+               parent = RB_PARENT(elm, field);                         \
+               color = RB_COLOR(elm, field);                           \
+               if (child)                                              \
+                       RB_PARENT(child, field) = parent;               \
+               if (parent) {                                           \
+                       if (RB_LEFT(parent, field) == elm)              \
+                               RB_LEFT(parent, field) = child;         \
+                       else                                            \
+                               RB_RIGHT(parent, field) = child;        \
+                       RB_AUGMENT(parent);                             \
+               } else                                                  \
+                       RB_ROOT(head) = child;                          \
+               if (RB_PARENT(elm, field) == old)                       \
+                       parent = elm;                                   \
+               (elm)->field = (old)->field;                            \
+               if (RB_PARENT(old, field)) {                            \
+                       if (RB_LEFT(RB_PARENT(old, field), field) == old)\
+                               RB_LEFT(RB_PARENT(old, field), field) = elm;\
+                       else                                            \
+                               RB_RIGHT(RB_PARENT(old, field), field) = elm;\
+                       RB_AUGMENT(RB_PARENT(old, field));              \
+               } else                                                  \
+                       RB_ROOT(head) = elm;                            \
+               RB_PARENT(RB_LEFT(old, field), field) = elm;            \
+               if (RB_RIGHT(old, field))                               \
+                       RB_PARENT(RB_RIGHT(old, field), field) = elm;   \
+               if (parent) {                                           \
+                       left = parent;                                  \
+                       do {                                            \
+                               RB_AUGMENT(left);                       \
+                       } while ((left = RB_PARENT(left, field)) != NULL); \
+               }                                                       \
+               goto color;                                             \
+       }                                                               \
+       parent = RB_PARENT(elm, field);                                 \
+       color = RB_COLOR(elm, field);                                   \
+       if (child)                                                      \
+               RB_PARENT(child, field) = parent;                       \
+       if (parent) {                                                   \
+               if (RB_LEFT(parent, field) == elm)                      \
+                       RB_LEFT(parent, field) = child;                 \
+               else                                                    \
+                       RB_RIGHT(parent, field) = child;                \
+               RB_AUGMENT(parent);                                     \
+       } else                                                          \
+               RB_ROOT(head) = child;                                  \
+color:                                                                 \
+       if (color == RB_BLACK)                                          \
+               name##_RB_REMOVE_COLOR(head, parent, child);            \
+       return (old);                                                   \
+}                                                                      \
+
+#define RB_GENERATE_INSERT(name, type, field, cmp, attr)               \
+/* Inserts a node into the RB tree */                                  \
+attr struct type *                                                     \
+name##_RB_INSERT(struct name *head, struct type *elm)                  \
+{                                                                      \
+       struct type *tmp;                                               \
+       struct type *parent = NULL;                                     \
+       int comp = 0;                                                   \
+       tmp = RB_ROOT(head);                                            \
+       while (tmp) {                                                   \
+               parent = tmp;                                           \
+               comp = (cmp)(elm, parent);                              \
+               if (comp < 0)                                           \
+                       tmp = RB_LEFT(tmp, field);                      \
+               else if (comp > 0)                                      \
+                       tmp = RB_RIGHT(tmp, field);                     \
+               else                                                    \
+                       return (tmp);                                   \
+       }                                                               \
+       RB_SET(elm, parent, field);                                     \
+       if (parent != NULL) {                                           \
+               if (comp < 0)                                           \
+                       RB_LEFT(parent, field) = elm;                   \
+               else                                                    \
+                       RB_RIGHT(parent, field) = elm;                  \
+               RB_AUGMENT(parent);                                     \
+       } else                                                          \
+               RB_ROOT(head) = elm;                                    \
+       name##_RB_INSERT_COLOR(head, elm);                              \
+       return (NULL);                                                  \
+}
+
+#define RB_GENERATE_FIND(name, type, field, cmp, attr)                 \
+/* Finds the node with the same key as elm */                          \
+attr struct type *                                                     \
+name##_RB_FIND(struct name *head, struct type *elm)                    \
+{                                                                      \
+       struct type *tmp = RB_ROOT(head);                               \
+       int comp;                                                       \
+       while (tmp) {                                                   \
+               comp = cmp(elm, tmp);                                   \
+               if (comp < 0)                                           \
+                       tmp = RB_LEFT(tmp, field);                      \
+               else if (comp > 0)                                      \
+                       tmp = RB_RIGHT(tmp, field);                     \
+               else                                                    \
+                       return (tmp);                                   \
+       }                                                               \
+       return (NULL);                                                  \
+}
+
+#define RB_GENERATE_NFIND(name, type, field, cmp, attr)                        \
+/* Finds the first node greater than or equal to the search key */     \
+attr struct type *                                                     \
+name##_RB_NFIND(struct name *head, struct type *elm)                   \
+{                                                                      \
+       struct type *tmp = RB_ROOT(head);                               \
+       struct type *res = NULL;                                        \
+       int comp;                                                       \
+       while (tmp) {                                                   \
+               comp = cmp(elm, tmp);                                   \
+               if (comp < 0) {                                         \
+                       res = tmp;                                      \
+                       tmp = RB_LEFT(tmp, field);                      \
+               }                                                       \
+               else if (comp > 0)                                      \
+                       tmp = RB_RIGHT(tmp, field);                     \
+               else                                                    \
+                       return (tmp);                                   \
+       }                                                               \
+       return (res);                                                   \
+}
+
+#define RB_GENERATE_NEXT(name, type, field, attr)                      \
+/* ARGSUSED */                                                         \
+attr struct type *                                                     \
+name##_RB_NEXT(struct type *elm)                                       \
+{                                                                      \
+       if (RB_RIGHT(elm, field)) {                                     \
+               elm = RB_RIGHT(elm, field);                             \
+               while (RB_LEFT(elm, field))                             \
+                       elm = RB_LEFT(elm, field);                      \
+       } else {                                                        \
+               if (RB_PARENT(elm, field) &&                            \
+                   (elm == RB_LEFT(RB_PARENT(elm, field), field)))     \
+                       elm = RB_PARENT(elm, field);                    \
+               else {                                                  \
+                       while (RB_PARENT(elm, field) &&                 \
+                           (elm == RB_RIGHT(RB_PARENT(elm, field), field)))\
+                               elm = RB_PARENT(elm, field);            \
+                       elm = RB_PARENT(elm, field);                    \
+               }                                                       \
+       }                                                               \
+       return (elm);                                                   \
+}
+
+#define RB_GENERATE_PREV(name, type, field, attr)                      \
+/* ARGSUSED */                                                         \
+attr struct type *                                                     \
+name##_RB_PREV(struct type *elm)                                       \
+{                                                                      \
+       if (RB_LEFT(elm, field)) {                                      \
+               elm = RB_LEFT(elm, field);                              \
+               while (RB_RIGHT(elm, field))                            \
+                       elm = RB_RIGHT(elm, field);                     \
+       } else {                                                        \
+               if (RB_PARENT(elm, field) &&                            \
+                   (elm == RB_RIGHT(RB_PARENT(elm, field), field)))    \
+                       elm = RB_PARENT(elm, field);                    \
+               else {                                                  \
+                       while (RB_PARENT(elm, field) &&                 \
+                           (elm == RB_LEFT(RB_PARENT(elm, field), field)))\
+                               elm = RB_PARENT(elm, field);            \
+                       elm = RB_PARENT(elm, field);                    \
+               }                                                       \
+       }                                                               \
+       return (elm);                                                   \
+}
+
+#define RB_GENERATE_MINMAX(name, type, field, attr)                    \
+attr struct type *                                                     \
+name##_RB_MINMAX(struct name *head, int val)                           \
+{                                                                      \
+       struct type *tmp = RB_ROOT(head);                               \
+       struct type *parent = NULL;                                     \
+       while (tmp) {                                                   \
+               parent = tmp;                                           \
+               if (val < 0)                                            \
+                       tmp = RB_LEFT(tmp, field);                      \
+               else                                                    \
+                       tmp = RB_RIGHT(tmp, field);                     \
+       }                                                               \
+       return (parent);                                                \
+}
+
+#define RB_NEGINF      -1
+#define RB_INF 1
+
+#define RB_INSERT(name, x, y)  name##_RB_INSERT(x, y)
+#define RB_REMOVE(name, x, y)  name##_RB_REMOVE(x, y)
+#define RB_FIND(name, x, y)    name##_RB_FIND(x, y)
+#define RB_NFIND(name, x, y)   name##_RB_NFIND(x, y)
+#define RB_NEXT(name, x, y)    name##_RB_NEXT(y)
+#define RB_PREV(name, x, y)    name##_RB_PREV(y)
+#define RB_MIN(name, x)                name##_RB_MINMAX(x, RB_NEGINF)
+#define RB_MAX(name, x)                name##_RB_MINMAX(x, RB_INF)
+
+#define RB_FOREACH(x, name, head)                                      \
+       for ((x) = RB_MIN(name, head);                                  \
+            (x) != NULL;                                               \
+            (x) = name##_RB_NEXT(x))
+
+#define RB_FOREACH_FROM(x, name, y)                                    \
+       for ((x) = (y);                                                 \
+           ((x) != NULL) && ((y) = name##_RB_NEXT(x), (x) != NULL);    \
+            (x) = (y))
+
+#define RB_FOREACH_SAFE(x, name, head, y)                              \
+       for ((x) = RB_MIN(name, head);                                  \
+           ((x) != NULL) && ((y) = name##_RB_NEXT(x), (x) != NULL);    \
+            (x) = (y))
+
+#define RB_FOREACH_REVERSE(x, name, head)                              \
+       for ((x) = RB_MAX(name, head);                                  \
+            (x) != NULL;                                               \
+            (x) = name##_RB_PREV(x))
+
+#define RB_FOREACH_REVERSE_FROM(x, name, y)                            \
+       for ((x) = (y);                                                 \
+           ((x) != NULL) && ((y) = name##_RB_PREV(x), (x) != NULL);    \
+            (x) = (y))
+
+#define RB_FOREACH_REVERSE_SAFE(x, name, head, y)                      \
+       for ((x) = RB_MAX(name, head);                                  \
+           ((x) != NULL) && ((y) = name##_RB_PREV(x), (x) != NULL);    \
+            (x) = (y))
+
+#endif /* _SYS_TREE_H_ */
index 1275a26..4c7513a 100644 (file)
@@ -697,8 +697,9 @@ errval_t thread_get_async_error(void)
 }
 
 /**
- * \brief Store receive slot provided by rpc in thread state
+ * \brief Store receive slot provided by rpc
  */
+
 void thread_store_recv_slot(struct capref recv_slot)
 {
     dispatcher_handle_t handle = disp_disable();
index 3a052eb..4f5837c 100644 (file)
@@ -1,9 +1,10 @@
 /*
- * Copyright (c) 2017, ETH Zurich. All rights reserved.
+ * Copyright (c) 2017 ETH Zurich.
+ * All rights reserved.
  *
  * This file is distributed under the terms in the attached LICENSE file.
  * If you do not find this file, copies can be found by writing to:
- * ETH Zurich D-INFK, Haldeneggsteig 4, CH-8092 Zurich. Attn: Systems Group.
+ * ETH Zurich D-INFK, Universitaetstr. 6, CH-8092 Zurich. Attn: Systems Group.
  */
 
 #include <barrelfish/barrelfish.h>
index ff884ba..50432e6 100644 (file)
@@ -1,9 +1,10 @@
 /*
- * Copyright (c) 2017, ETH Zurich. All rights reserved.
+ * Copyright (c) 2017 ETH Zurich.
+ * All rights reserved.
  *
  * This file is distributed under the terms in the attached LICENSE file.
  * If you do not find this file, copies can be found by writing to:
- * ETH Zurich D-INFK, Haldeneggsteig 4, CH-8092 Zurich. Attn: Systems Group.
+ * ETH Zurich D-INFK, Universitaetstr. 6, CH-8092 Zurich. Attn: Systems Group.
  */
 
 #ifndef E1000_H
diff --git a/lib/devif/backends/net/mlx4/Hakefile b/lib/devif/backends/net/mlx4/Hakefile
new file mode 100644 (file)
index 0000000..746b50a
--- /dev/null
@@ -0,0 +1,74 @@
+--------------------------------------------------------------------------
+-- Copyright (c) 2015, ETH Zurich.
+-- All rights reserved.
+--
+-- This file is distributed under the terms in the attached LICENSE file.
+-- If you do not find this file, copies can be found by writing to:
+-- ETH Zurich D-INFK, Universitaetsstrasse 6, CH-8092 Zurich. Attn: Systems Group.
+--
+-- Hakefile for /lib/devif/backends/net/mlx4
+-- 
+--------------------------------------------------------------------------
+
+[ build library {
+    target = "devif_backend_mlx4",
+    cFiles = [
+        "drivers/net/mlx4/mlx4_devif_queue.c",
+        "drivers/net/mlx4/reset.c",
+        "drivers/net/mlx4/fw.c",
+        "drivers/net/mlx4/cmd.c",
+        "drivers/net/mlx4/mcg.c",
+        "drivers/net/mlx4/profile.c",
+        "drivers/net/mlx4/eq.c",
+        "drivers/net/mlx4/pd.c",
+        "drivers/net/mlx4/mr.c",
+        "drivers/net/mlx4/cq.c",
+        "drivers/net/mlx4/alloc.c",
+        "drivers/net/mlx4/icm.c",
+        "drivers/net/mlx4/srq.c",
+        "drivers/net/mlx4/qp.c",
+        "drivers/net/mlx4/port.c",
+        "drivers/net/mlx4/resource_tracker.c",
+        "include/linux/linux_radix.c",
+
+               "drivers/net/mlx4/en_main.c",
+               "drivers/net/mlx4/en_netdev.c",
+               "drivers/net/mlx4/en_rx.c",
+               "drivers/net/mlx4/en_tx.c",
+               "drivers/net/mlx4/en_cq.c",
+               "drivers/net/mlx4/en_resources.c",
+               "drivers/net/mlx4/en_port.c",
+        
+        --"libibverbs/src/device.c",
+        --"libibverbs/src/init.c",
+        --"libibverbs/src/verbs.c",
+        
+        --"libmlx4/src/mlx4.c",
+        --"libmlx4/src/verbs.c",
+        --"libmlx4/src/dbrec.c",
+        --"libmlx4/src/buf.c",
+        --"libmlx4/src/qp.c",
+        
+        "drivers/infiniband/hw/mlx4/main.c",
+        "drivers/infiniband/hw/mlx4/mad.c",
+        "drivers/infiniband/hw/mlx4/cq.c",
+        "drivers/infiniband/hw/mlx4/mr.c",
+        "drivers/infiniband/hw/mlx4/qp.c",
+        "drivers/infiniband/hw/mlx4/ah.c",
+        
+        --"drivers/test_wq.c"
+        "drivers/test_ib.c",
+        
+        "drivers/infiniband/core/ud_header.c",
+        "drivers/infiniband/core/packer.c",
+        "drivers/infiniband/core/mad.c",
+        "drivers/infiniband/core/cache.c",
+        "drivers/infiniband/core/verbs.c",
+        "drivers/infiniband/core/device.c",
+        "drivers/infiniband/core/agent.c",
+        "drivers/infiniband/core/smi.c"
+    ],
+    addLibraries = [ "pci", "pciconfspace" ],
+    addIncludes = [ "include" ]
+  }
+]
diff --git a/lib/devif/backends/net/mlx4/drivers/infiniband/Kconfig b/lib/devif/backends/net/mlx4/drivers/infiniband/Kconfig
new file mode 100644 (file)
index 0000000..0a2ef11
--- /dev/null
@@ -0,0 +1,66 @@
+menuconfig INFINIBAND
+       tristate "InfiniBand support"
+       depends on PCI || BROKEN
+       depends on HAS_IOMEM
+       ---help---
+         Core support for InfiniBand (IB).  Make sure to also select
+         any protocols you wish to use as well as drivers for your
+         InfiniBand hardware.
+
+if INFINIBAND
+
+config INFINIBAND_USER_MAD
+       tristate "InfiniBand userspace MAD support"
+       depends on INFINIBAND
+       ---help---
+         Userspace InfiniBand Management Datagram (MAD) support.  This
+         is the kernel side of the userspace MAD support, which allows
+         userspace processes to send and receive MADs. You will also
+         need libibumad from <http://www.openib.org>.
+
+config INFINIBAND_USER_ACCESS
+       tristate "InfiniBand userspace access (verbs and CM)"
+       ---help---
+         Userspace InfiniBand access support.  This enables the
+         kernel side of userspace verbs and the userspace
+         communication manager (CM).  This allows userspace processes
+         to set up connections and directly access InfiniBand
+         hardware for fast-path operations.  You will also need
+         libibverbs, libibcm and a hardware driver library from
+         <http://www.openib.org>.
+
+config INFINIBAND_USER_MEM
+       bool
+       depends on INFINIBAND_USER_ACCESS != n
+       default y
+
+config INFINIBAND_ADDR_TRANS
+       bool
+       depends on INET
+       depends on !(INFINIBAND = y && IPV6 = m)
+       default y
+
+source "drivers/infiniband/hw/mthca/Kconfig"
+source "drivers/infiniband/hw/ipath/Kconfig"
+source "drivers/infiniband/hw/qib/Kconfig"
+source "drivers/infiniband/hw/ehca/Kconfig"
+source "drivers/infiniband/hw/amso1100/Kconfig"
+source "drivers/infiniband/hw/cxgb3/Kconfig"
+source "drivers/infiniband/hw/mlx4/Kconfig"
+source "drivers/infiniband/hw/nes/Kconfig"
+
+source "drivers/infiniband/ulp/ipoib/Kconfig"
+
+source "drivers/infiniband/ulp/srp/Kconfig"
+
+source "drivers/infiniband/ulp/srpt/Kconfig"
+
+source "drivers/infiniband/ulp/iser/Kconfig"
+
+source "drivers/infiniband/ulp/sdp/Kconfig"
+
+source "drivers/infiniband/ulp/qlgc_vnic/Kconfig"
+
+source "drivers/infiniband/util/Kconfig"
+
+endif # INFINIBAND
diff --git a/lib/devif/backends/net/mlx4/drivers/infiniband/Makefile b/lib/devif/backends/net/mlx4/drivers/infiniband/Makefile
new file mode 100644 (file)
index 0000000..ea5dbe0
--- /dev/null
@@ -0,0 +1,17 @@
+obj-$(CONFIG_INFINIBAND)               += core/
+obj-$(CONFIG_INFINIBAND_MTHCA)         += hw/mthca/
+obj-$(CONFIG_INFINIBAND_IPATH)         += hw/ipath/
+obj-$(CONFIG_INFINIBAND_QIB)           += hw/qib/
+obj-$(CONFIG_INFINIBAND_EHCA)          += hw/ehca/
+obj-$(CONFIG_INFINIBAND_AMSO1100)      += hw/amso1100/
+obj-$(CONFIG_INFINIBAND_CXGB3)         += hw/cxgb3/
+obj-$(CONFIG_INFINIBAND_NES)           += hw/nes/
+obj-$(CONFIG_MLX4_INFINIBAND)          += hw/mlx4/
+obj-$(CONFIG_INFINIBAND_NES)           += hw/nes/
+obj-$(CONFIG_INFINIBAND_IPOIB)         += ulp/ipoib/
+obj-$(CONFIG_INFINIBAND_SRP)           += ulp/srp/
+obj-$(CONFIG_INFINIBAND_SRPT)          += ulp/srpt/
+obj-$(CONFIG_INFINIBAND_ISER)          += ulp/iser/
+obj-$(CONFIG_INFINIBAND_SDP)           += ulp/sdp/
+obj-$(CONFIG_INFINIBAND_QLGC_VNIC)     += ulp/qlgc_vnic/
+obj-$(CONFIG_INFINIBAND_MADEYE)                += util/
diff --git a/lib/devif/backends/net/mlx4/drivers/infiniband/core/Makefile b/lib/devif/backends/net/mlx4/drivers/infiniband/core/Makefile
new file mode 100644 (file)
index 0000000..f646040
--- /dev/null
@@ -0,0 +1,32 @@
+infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS)     := ib_addr.o rdma_cm.o
+user_access-$(CONFIG_INFINIBAND_ADDR_TRANS)    := rdma_ucm.o
+
+obj-$(CONFIG_INFINIBAND) +=            ib_core.o ib_mad.o ib_sa.o \
+                                       ib_cm.o iw_cm.o $(infiniband-y)
+obj-$(CONFIG_INFINIBAND_USER_MAD) +=   ib_umad.o
+obj-$(CONFIG_INFINIBAND_USER_ACCESS) +=        ib_uverbs.o ib_ucm.o \
+                                       $(user_access-y)
+
+ib_core-y :=                   packer.o ud_header.o verbs.o sysfs.o \
+                               device.o fmr_pool.o cache.o
+ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
+
+ib_mad-y :=                    mad.o smi.o agent.o mad_rmpp.o
+
+ib_sa-y :=                     sa_query.o multicast.o notice.o local_sa.o
+
+ib_cm-y :=                     cm.o
+
+iw_cm-y :=                     iwcm.o
+
+rdma_cm-y :=                   cma.o
+
+rdma_ucm-y :=                  ucma.o
+
+ib_addr-y :=                   addr.o
+
+ib_umad-y :=                   user_mad.o
+
+ib_ucm-y :=                    ucm.o
+
+ib_uverbs-y :=                 uverbs_main.o uverbs_cmd.o uverbs_marshall.o
diff --git a/lib/devif/backends/net/mlx4/drivers/infiniband/core/addr.c b/lib/devif/backends/net/mlx4/drivers/infiniband/core/addr.c
new file mode 100644 (file)
index 0000000..f608244
--- /dev/null
@@ -0,0 +1,644 @@
+/*
+ * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
+ * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
+ * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
+ * Copyright (c) 2005 Intel Corporation.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mutex.h>
+#include <linux/inetdevice.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
+#include <net/route.h>
+#include <net/netevent.h>
+#include <rdma/ib_addr.h>
+#include <netinet/if_ether.h>
+
+
+MODULE_AUTHOR("Sean Hefty");
+MODULE_DESCRIPTION("IB Address Translation");
+MODULE_LICENSE("Dual BSD/GPL");
+
+struct addr_req {
+       struct list_head list;
+       struct sockaddr_storage src_addr;
+       struct sockaddr_storage dst_addr;
+       struct rdma_dev_addr *addr;
+       struct rdma_addr_client *client;
+       void *context;
+       void (*callback)(int status, struct sockaddr *src_addr,
+                        struct rdma_dev_addr *addr, void *context);
+       unsigned long timeout;
+       int status;
+};
+
+static void process_req(struct work_struct *work);
+
+static DEFINE_MUTEX(lock);
+static LIST_HEAD(req_list);
+static struct delayed_work work;
+static struct workqueue_struct *addr_wq;
+
+void rdma_addr_register_client(struct rdma_addr_client *client)
+{
+       atomic_set(&client->refcount, 1);
+       init_completion(&client->comp);
+}
+EXPORT_SYMBOL(rdma_addr_register_client);
+
+static inline void put_client(struct rdma_addr_client *client)
+{
+       if (atomic_dec_and_test(&client->refcount))
+               complete(&client->comp);
+}
+
+void rdma_addr_unregister_client(struct rdma_addr_client *client)
+{
+       put_client(client);
+       wait_for_completion(&client->comp);
+}
+EXPORT_SYMBOL(rdma_addr_unregister_client);
+
+#ifdef __linux__
+int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
+                    const unsigned char *dst_dev_addr)
+{
+       dev_addr->dev_type = dev->type;
+       memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
+       memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
+       if (dst_dev_addr)
+               memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
+       dev_addr->bound_dev_if = dev->ifindex;
+       return 0;
+}
+#else
+int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct ifnet *dev,
+                    const unsigned char *dst_dev_addr)
+{
+       if (dev->if_type == IFT_INFINIBAND)
+               dev_addr->dev_type = ARPHRD_INFINIBAND;
+       else if (dev->if_type == IFT_ETHER)
+               dev_addr->dev_type = ARPHRD_ETHER;
+       else
+               dev_addr->dev_type = 0;
+       memcpy(dev_addr->src_dev_addr, IF_LLADDR(dev), dev->if_addrlen);
+       memcpy(dev_addr->broadcast, __DECONST(char *, dev->if_broadcastaddr),
+           dev->if_addrlen);
+       if (dst_dev_addr)
+               memcpy(dev_addr->dst_dev_addr, dst_dev_addr, dev->if_addrlen);
+       dev_addr->bound_dev_if = dev->if_index;
+       return 0;
+}
+#endif
+EXPORT_SYMBOL(rdma_copy_addr);
+
+int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
+{
+       struct net_device *dev;
+       int ret = -EADDRNOTAVAIL;
+
+       if (dev_addr->bound_dev_if) {
+               dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
+               if (!dev)
+                       return -ENODEV;
+               ret = rdma_copy_addr(dev_addr, dev, NULL);
+               dev_put(dev);
+               return ret;
+       }
+
+       switch (addr->sa_family) {
+#ifdef INET
+       case AF_INET:
+               dev = ip_dev_find(NULL,
+                       ((struct sockaddr_in *) addr)->sin_addr.s_addr);
+
+               if (!dev)
+                       return ret;
+
+               ret = rdma_copy_addr(dev_addr, dev, NULL);
+               dev_put(dev);
+               break;
+#endif
+
+#if defined(INET6)
+       case AF_INET6:
+#ifdef __linux__
+               read_lock(&dev_base_lock);
+               for_each_netdev(&init_net, dev) {
+                       if (ipv6_chk_addr(&init_net,
+                                         &((struct sockaddr_in6 *) addr)->sin6_addr,
+                                         dev, 1)) {
+                               ret = rdma_copy_addr(dev_addr, dev, NULL);
+                               break;
+                       }
+               }
+               read_unlock(&dev_base_lock);
+#else
+               {
+                       struct sockaddr_in6 *sin6;
+                       struct ifaddr *ifa;
+                       in_port_t port;
+
+                       sin6 = (struct sockaddr_in6 *)addr;
+                       port = sin6->sin6_port;
+                       sin6->sin6_port = 0;
+                       ifa = ifa_ifwithaddr(addr);
+                       sin6->sin6_port = port;
+                       if (ifa == NULL) {
+                               ret = -ENODEV;
+                               break;
+                       }
+                       ret = rdma_copy_addr(dev_addr, ifa->ifa_ifp, NULL);
+                       ifa_free(ifa);
+                       break;
+               }
+#endif
+               break;
+#endif
+       }
+       return ret;
+}
+EXPORT_SYMBOL(rdma_translate_ip);
+
+static void set_timeout(unsigned long time)
+{
+       unsigned long delay;
+
+       delay = time - jiffies;
+       if ((long)delay <= 0)
+               delay = 1;
+
+       mod_delayed_work(addr_wq, &work, delay);
+}
+
+static void queue_req(struct addr_req *req)
+{
+       struct addr_req *temp_req;
+
+       mutex_lock(&lock);
+       list_for_each_entry_reverse(temp_req, &req_list, list) {
+               if (time_after_eq(req->timeout, temp_req->timeout))
+                       break;
+       }
+
+       list_add(&req->list, &temp_req->list);
+
+       if (req_list.next == &req->list)
+               set_timeout(req->timeout);
+       mutex_unlock(&lock);
+}
+
+#ifdef __linux__
+static int addr4_resolve(struct sockaddr_in *src_in,
+                        struct sockaddr_in *dst_in,
+                        struct rdma_dev_addr *addr)
+{
+       __be32 src_ip = src_in->sin_addr.s_addr;
+       __be32 dst_ip = dst_in->sin_addr.s_addr;
+       struct flowi fl;
+       struct rtable *rt;
+       struct neighbour *neigh;
+       int ret;
+
+       memset(&fl, 0, sizeof fl);
+       fl.nl_u.ip4_u.daddr = dst_ip;
+       fl.nl_u.ip4_u.saddr = src_ip;
+       fl.oif = addr->bound_dev_if;
+
+       ret = ip_route_output_key(&init_net, &rt, &fl);
+       if (ret)
+               goto out;
+
+       src_in->sin_family = AF_INET;
+       src_in->sin_addr.s_addr = rt->rt_src;
+
+       if (rt->idev->dev->flags & IFF_LOOPBACK) {
+               ret = rdma_translate_ip((struct sockaddr *) dst_in, addr);
+               if (!ret)
+                       memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
+               goto put;
+       }
+
+       /* If the device does ARP internally, return 'done' */
+       if (rt->idev->dev->flags & IFF_NOARP) {
+               rdma_copy_addr(addr, rt->idev->dev, NULL);
+               goto put;
+       }
+
+       neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->idev->dev);
+       if (!neigh || !(neigh->nud_state & NUD_VALID)) {
+               neigh_event_send(rt->u.dst.neighbour, NULL);
+               ret = -ENODATA;
+               if (neigh)
+                       goto release;
+               goto put;
+       }
+
+       ret = rdma_copy_addr(addr, neigh->dev, neigh->ha);
+release:
+       neigh_release(neigh);
+put:
+       ip_rt_put(rt);
+out:
+       return ret;
+}
+
+#if defined(INET6)
+static int addr6_resolve(struct sockaddr_in6 *src_in,
+                        struct sockaddr_in6 *dst_in,
+                        struct rdma_dev_addr *addr)
+{
+       struct flowi fl;
+       struct neighbour *neigh;
+       struct dst_entry *dst;
+       int ret;
+
+       memset(&fl, 0, sizeof fl);
+       ipv6_addr_copy(&fl.fl6_dst, &dst_in->sin6_addr);
+       ipv6_addr_copy(&fl.fl6_src, &src_in->sin6_addr);
+       fl.oif = addr->bound_dev_if;
+
+       dst = ip6_route_output(&init_net, NULL, &fl);
+       if ((ret = dst->error))
+               goto put;
+
+       if (ipv6_addr_any(&fl.fl6_src)) {
+               ret = ipv6_dev_get_saddr(&init_net, ip6_dst_idev(dst)->dev,
+                                        &fl.fl6_dst, 0, &fl.fl6_src);
+               if (ret)
+                       goto put;
+
+               src_in->sin6_family = AF_INET6;
+               ipv6_addr_copy(&src_in->sin6_addr, &fl.fl6_src);
+       }
+
+       if (dst->dev->flags & IFF_LOOPBACK) {
+               ret = rdma_translate_ip((struct sockaddr *) dst_in, addr);
+               if (!ret)
+                       memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
+               goto put;
+       }
+
+       /* If the device does ARP internally, return 'done' */
+       if (dst->dev->flags & IFF_NOARP) {
+               ret = rdma_copy_addr(addr, dst->dev, NULL);
+               goto put;
+       }
+       
+       neigh = dst->neighbour;
+       if (!neigh || !(neigh->nud_state & NUD_VALID)) {
+               neigh_event_send(dst->neighbour, NULL);
+               ret = -ENODATA;
+               goto put;
+       }
+
+       ret = rdma_copy_addr(addr, dst->dev, neigh->ha);
+put:
+       dst_release(dst);
+       return ret;
+}
+#else
+static int addr6_resolve(struct sockaddr_in6 *src_in,
+                        struct sockaddr_in6 *dst_in,
+                        struct rdma_dev_addr *addr)
+{
+       return -EADDRNOTAVAIL;
+}
+#endif
+
+#else
+#include <netinet/if_ether.h>
+
+static int addr_resolve(struct sockaddr *src_in,
+                       struct sockaddr *dst_in,
+                       struct rdma_dev_addr *addr)
+{
+       struct sockaddr_in *sin;
+       struct sockaddr_in6 *sin6;
+       struct ifaddr *ifa;
+       struct ifnet *ifp;
+#if defined(INET) || defined(INET6)
+       struct llentry *lle;
+#endif
+       struct rtentry *rte;
+       in_port_t port;
+       u_char edst[MAX_ADDR_LEN];
+       int multi;
+       int bcast;
+       int error = 0;
+
+       /*
+        * Determine whether the address is unicast, multicast, or broadcast
+        * and whether the source interface is valid.
+        */
+       multi = 0;
+       bcast = 0;
+       sin = NULL;
+       sin6 = NULL;
+       ifp = NULL;
+       rte = NULL;
+       switch (dst_in->sa_family) {
+#ifdef INET
+       case AF_INET:
+               sin = (struct sockaddr_in *)dst_in;
+               if (sin->sin_addr.s_addr == INADDR_BROADCAST)
+                       bcast = 1;
+               if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
+                       multi = 1;
+               sin = (struct sockaddr_in *)src_in;
+               if (sin->sin_addr.s_addr != INADDR_ANY) {
+                       /*
+                        * Address comparison fails if the port is set
+                        * cache it here to be restored later.
+                        */
+                       port = sin->sin_port;
+                       sin->sin_port = 0;
+                       memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
+               } else
+                       src_in = NULL; 
+               break;
+#endif
+#ifdef INET6
+       case AF_INET6:
+               sin6 = (struct sockaddr_in6 *)dst_in;
+               if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
+                       multi = 1;
+               sin6 = (struct sockaddr_in6 *)src_in;
+               if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
+                       port = sin6->sin6_port;
+                       sin6->sin6_port = 0;
+               } else
+                       src_in = NULL;
+               break;
+#endif
+       default:
+               return -EINVAL;
+       }
+       /*
+        * If we have a source address to use look it up first and verify
+        * that it is a local interface.
+        */
+       if (src_in) {
+               ifa = ifa_ifwithaddr(src_in);
+               if (sin)
+                       sin->sin_port = port;
+               if (sin6)
+                       sin6->sin6_port = port;
+               if (ifa == NULL)
+                       return -ENETUNREACH;
+               ifp = ifa->ifa_ifp;
+               ifa_free(ifa);
+               if (bcast || multi)
+                       goto mcast;
+       }
+       /*
+        * Make sure the route exists and has a valid link.
+        */
+       rte = rtalloc1(dst_in, 1, 0);
+       if (rte == NULL || rte->rt_ifp == NULL || !RT_LINK_IS_UP(rte->rt_ifp)) {
+               if (rte) 
+                       RTFREE_LOCKED(rte);
+               return -EHOSTUNREACH;
+       }
+       /*
+        * If it's not multicast or broadcast and the route doesn't match the
+        * requested interface return unreachable.  Otherwise fetch the
+        * correct interface pointer and unlock the route.
+        */
+       if (multi || bcast) {
+               if (ifp == NULL)
+                       ifp = rte->rt_ifp;
+               RTFREE_LOCKED(rte);
+       } else if (ifp && ifp != rte->rt_ifp) {
+               RTFREE_LOCKED(rte);
+               return -ENETUNREACH;
+       } else {
+               if (ifp == NULL)
+                       ifp = rte->rt_ifp;
+               RT_UNLOCK(rte);
+       }
+mcast:
+       if (bcast)
+               return rdma_copy_addr(addr, ifp, ifp->if_broadcastaddr);
+       if (multi) {
+               struct sockaddr *llsa;
+
+               error = ifp->if_resolvemulti(ifp, &llsa, dst_in);
+               if (error)
+                       return -error;
+               error = rdma_copy_addr(addr, ifp,
+                   LLADDR((struct sockaddr_dl *)llsa));
+               free(llsa, M_IFMADDR);
+               return error;
+       }
+       /*
+        * Resolve the link local address.
+        */
+       switch (dst_in->sa_family) {
+#ifdef INET
+       case AF_INET:
+               error = arpresolve(ifp, rte, NULL, dst_in, edst, &lle);
+               break;
+#endif
+#ifdef INET6
+       case AF_INET6:
+               error = nd6_storelladdr(ifp, NULL, dst_in, (u_char *)edst, &lle);
+               break;
+#endif
+       default:
+               /* XXX: Shouldn't happen. */
+               error = -EINVAL;
+       }
+       RTFREE(rte);
+       if (error == 0)
+               return rdma_copy_addr(addr, ifp, edst);
+       if (error == EWOULDBLOCK)
+               return -ENODATA;
+       return -error;
+}
+
+#endif
+
+static void process_req(struct work_struct *work)
+{
+       struct addr_req *req, *temp_req;
+       struct sockaddr *src_in, *dst_in;
+       struct list_head done_list;
+
+       INIT_LIST_HEAD(&done_list);
+
+       mutex_lock(&lock);
+       list_for_each_entry_safe(req, temp_req, &req_list, list) {
+               if (req->status == -ENODATA) {
+                       src_in = (struct sockaddr *) &req->src_addr;
+                       dst_in = (struct sockaddr *) &req->dst_addr;
+                       req->status = addr_resolve(src_in, dst_in, req->addr);
+                       if (req->status && time_after_eq(jiffies, req->timeout))
+                               req->status = -ETIMEDOUT;
+                       else if (req->status == -ENODATA)
+                               continue;
+               }
+               list_move_tail(&req->list, &done_list);
+       }
+
+       if (!list_empty(&req_list)) {
+               req = list_entry(req_list.next, struct addr_req, list);
+               set_timeout(req->timeout);
+       }
+       mutex_unlock(&lock);
+
+       list_for_each_entry_safe(req, temp_req, &done_list, list) {
+               list_del(&req->list);
+               req->callback(req->status, (struct sockaddr *) &req->src_addr,
+                       req->addr, req->context);
+               put_client(req->client);
+               kfree(req);
+       }
+}
+
+int rdma_resolve_ip(struct rdma_addr_client *client,
+                   struct sockaddr *src_addr, struct sockaddr *dst_addr,
+                   struct rdma_dev_addr *addr, int timeout_ms,
+                   void (*callback)(int status, struct sockaddr *src_addr,
+                                    struct rdma_dev_addr *addr, void *context),
+                   void *context)
+{
+       struct sockaddr *src_in, *dst_in;
+       struct addr_req *req;
+       int ret = 0;
+
+       req = kzalloc(sizeof *req, GFP_KERNEL);
+       if (!req)
+               return -ENOMEM;
+
+       src_in = (struct sockaddr *) &req->src_addr;
+       dst_in = (struct sockaddr *) &req->dst_addr;
+
+       if (src_addr) {
+               if (src_addr->sa_family != dst_addr->sa_family) {
+                       ret = -EINVAL;
+                       goto err;
+               }
+
+               memcpy(src_in, src_addr, ip_addr_size(src_addr));
+       } else {
+               src_in->sa_family = dst_addr->sa_family;
+       }
+
+       memcpy(dst_in, dst_addr, ip_addr_size(dst_addr));
+       req->addr = addr;
+       req->callback = callback;
+       req->context = context;
+       req->client = client;
+       atomic_inc(&client->refcount);
+
+       req->status = addr_resolve(src_in, dst_in, addr);
+       switch (req->status) {
+       case 0:
+               req->timeout = jiffies;
+               queue_req(req);
+               break;
+       case -ENODATA:
+               req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
+               queue_req(req);
+               break;
+       default:
+               ret = req->status;
+               atomic_dec(&client->refcount);
+               goto err;
+       }
+       return ret;
+err:
+       kfree(req);
+       return ret;
+}
+EXPORT_SYMBOL(rdma_resolve_ip);
+
+void rdma_addr_cancel(struct rdma_dev_addr *addr)
+{
+       struct addr_req *req, *temp_req;
+
+       mutex_lock(&lock);
+       list_for_each_entry_safe(req, temp_req, &req_list, list) {
+               if (req->addr == addr) {
+                       req->status = -ECANCELED;
+                       req->timeout = jiffies;
+                       list_move(&req->list, &req_list);
+                       set_timeout(req->timeout);
+                       break;
+               }
+       }
+       mutex_unlock(&lock);
+}
+EXPORT_SYMBOL(rdma_addr_cancel);
+
+static int netevent_callback(struct notifier_block *self, unsigned long event,
+       void *ctx)
+{
+       if (event == NETEVENT_NEIGH_UPDATE) {
+#ifdef __linux__
+               struct neighbour *neigh = ctx;
+
+               if (neigh->nud_state & NUD_VALID) {
+                       set_timeout(jiffies);
+               }
+#else
+               set_timeout(jiffies);
+#endif
+       }
+       return 0;
+}
+
+static struct notifier_block nb = {
+       .notifier_call = netevent_callback
+};
+
+static int __init addr_init(void)
+{
+       INIT_DELAYED_WORK(&work, process_req);
+       addr_wq = create_singlethread_workqueue("ib_addr");
+       if (!addr_wq)
+               return -ENOMEM;
+
+       register_netevent_notifier(&nb);
+       return 0;
+}
+
+static void __exit addr_cleanup(void)
+{
+       unregister_netevent_notifier(&nb);
+       destroy_workqueue(addr_wq);
+}
+
+module_init(addr_init);
+module_exit(addr_cleanup);
diff --git a/lib/devif/backends/net/mlx4/drivers/infiniband/core/agent.c b/lib/devif/backends/net/mlx4/drivers/infiniband/core/agent.c
new file mode 100644 (file)
index 0000000..1b834bb
--- /dev/null
@@ -0,0 +1,215 @@
+/*
+
+ * Copyright (c) 2004, 2005 Mellanox Technologies Ltd.  All rights reserved.
+ * Copyright (c) 2004, 2005 Infinicon Corporation.  All rights reserved.
+ * Copyright (c) 2004, 2005 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2004, 2005 Topspin Corporation.  All rights reserved.
+ * Copyright (c) 2004-2007 Voltaire Corporation.  All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+// #include <linux/slab.h>
+// #include <linux/string.h>
+
+#include <linux/gfp.h>
+
+#include "agent.h"
+/*
+ #include "smi.h"
+ */
+#include "mad_priv.h"
+
+#define SPFX "ib_agent: "
+
+struct ib_agent_port_private {
+       struct list_head port_list;
+       struct ib_mad_agent *agent[2];
+};
+/*
+ static DEFINE_SPINLOCK(ib_agent_port_list_lock);
+ */
+static MLX4_LIST_HEAD( ib_agent_port_list);
+
+static struct ib_agent_port_private *
+__ib_get_agent_port(struct ib_device *device, int port_num) {
+       struct ib_agent_port_private *entry;
+
+       list_for_each_entry(entry, &ib_agent_port_list, port_list)
+       {
+               if (entry->agent[1]->device == device
+                               && entry->agent[1]->port_num == port_num)
+                       return entry;
+       }
+       return NULL;
+}
+
+static struct ib_agent_port_private *
+ib_get_agent_port(struct ib_device *device, int port_num) {
+       struct ib_agent_port_private *entry;
+       /*unsigned long flags;*/
+
+       /*spin_lock_irqsave(&ib_agent_port_list_lock, flags);*/
+       entry = __ib_get_agent_port(device, port_num);
+       /*spin_unlock_irqrestore(&ib_agent_port_list_lock, flags);*/
+       return entry;
+}
+
+void agent_send_response(struct ib_mad *mad, struct ib_grh *grh,
+               struct ib_wc *wc, struct ib_device *device, int port_num, int qpn) {
+       struct ib_agent_port_private *port_priv;
+       struct ib_mad_agent *agent;
+       struct ib_mad_send_buf *send_buf;
+       struct ib_ah *ah;
+       struct ib_mad_send_wr_private *mad_send_wr;
+
+       if (device->node_type == RDMA_NODE_IB_SWITCH)
+               port_priv = ib_get_agent_port(device, 0);
+       else
+               port_priv = ib_get_agent_port(device, port_num);
+
+       if (!port_priv) {
+               printf("Unable to find port agent\n");
+               return;
+       }
+
+       agent = port_priv->agent[qpn];
+       ah = ib_create_ah_from_wc(agent->qp->pd, wc, grh, port_num);
+       if (IS_ERR(ah)) {
+               printf("ib_create_ah_from_wc error %ld\n", PTR_ERR(ah));
+               return;
+       }
+
+       send_buf = ib_create_send_mad(agent, wc->src_qp, wc->pkey_index, 0,
+                       IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, GFP_KERNEL);
+       if (IS_ERR(send_buf)) {
+               printf("ib_create_send_mad error\n");
+               goto err1;
+       }
+
+       memcpy(send_buf->mad, mad, sizeof *mad);
+       send_buf->ah = ah;
+
+       if (device->node_type == RDMA_NODE_IB_SWITCH) {
+               mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private,
+                               send_buf);
+               mad_send_wr->send_wr.wr.ud.port_num = port_num;
+       }
+
+       if (ib_post_send_mad(send_buf, NULL)) {
+               printf("ib_post_send_mad error\n");
+               goto err2;
+       }
+       return;
+
+       /*TODO: cleanup*/
+       err2: /*ib_free_send_mad(send_buf);*/
+       err1: /*ib_destroy_ah(ah);*/
+       return;
+}
+
+static void agent_send_handler(struct ib_mad_agent *mad_agent,
+               struct ib_mad_send_wc *mad_send_wc) {
+       ib_destroy_ah(mad_send_wc->send_buf->ah);
+       ib_free_send_mad(mad_send_wc->send_buf);
+}
+
+int ib_agent_port_open(struct ib_device *device, int port_num) {
+       struct ib_agent_port_private *port_priv;
+       /*unsigned long flags;*/
+       int ret;
+
+       /*Create new device info*/
+       port_priv = calloc(1, sizeof *port_priv);
+       if (!port_priv) {
+               printf("No memory for ib_agent_port_private\n");
+               ret = -ENOMEM;
+               goto error1;
+       }
+
+       if (rdma_port_get_link_layer(device, port_num)
+                       == IB_LINK_LAYER_INFINIBAND) {
+               /*Obtain send only MAD agent for SMI QP*/
+               port_priv->agent[0] = ib_register_mad_agent(device, port_num,
+                               IB_QPT_SMI,
+                               NULL, 0, &agent_send_handler,
+                               NULL, NULL);
+               if (IS_ERR(port_priv->agent[0])) {
+                       ret = PTR_ERR(port_priv->agent[0]);
+                       goto error2;
+               }
+       }
+
+       /*Obtain send only MAD agent for GSI QP*/
+       port_priv->agent[1] = ib_register_mad_agent(device, port_num, IB_QPT_GSI,
+       NULL, 0, &agent_send_handler,
+       NULL, NULL);
+       if (IS_ERR(port_priv->agent[1])) {
+               ret = PTR_ERR(port_priv->agent[1]);
+               goto error3;
+       }
+
+       /*spin_lock_irqsave(&ib_agent_port_list_lock, flags);*/
+       list_add_tail(&port_priv->port_list, &ib_agent_port_list);
+       /*spin_unlock_irqrestore(&ib_agent_port_list_lock, flags);*/
+
+       return 0;
+
+       /*TODO: cleanup*/
+       error3: /*if (port_priv->agent[0])
+        ib_unregister_mad_agent(port_priv->agent[0]);*/
+       error2: free(port_priv);
+       error1: return ret;
+}
+/*
+ int ib_agent_port_close(struct ib_device *device, int port_num)
+ {
+ struct ib_agent_port_private *port_priv;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ib_agent_port_list_lock, flags);
+ port_priv = __ib_get_agent_port(device, port_num);
+ if (port_priv == NULL) {
+ spin_unlock_irqrestore(&ib_agent_port_list_lock, flags);
+ printf( "Port %d not found\n", port_num);
+ return -ENODEV;
+ }
+ list_del(&port_priv->port_list);
+ spin_unlock_irqrestore(&ib_agent_port_list_lock, flags);
+
+ ib_unregister_mad_agent(port_priv->agent[1]);
+ if (port_priv->agent[0])
+ ib_unregister_mad_agent(port_priv->agent[0]);
+
+ kfree(port_priv);
+ return 0;
+ }
+ */
diff --git a/lib/devif/backends/net/mlx4/drivers/infiniband/core/agent.h b/lib/devif/backends/net/mlx4/drivers/infiniband/core/agent.h
new file mode 100644 (file)
index 0000000..6669287
--- /dev/null
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2004 Mellanox Technologies Ltd.  All rights reserved.
+ * Copyright (c) 2004 Infinicon Corporation.  All rights reserved.
+ * Copyright (c) 2004 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
+ * Copyright (c) 2004 Voltaire Corporation.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __AGENT_H_
+#define __AGENT_H_
+
+#include <linux/err.h>
+#include <rdma/ib_mad.h>
+
+extern int ib_agent_port_open(struct ib_device *device, int port_num);
+
+extern int ib_agent_port_close(struct ib_device *device, int port_num);
+
+extern void agent_send_response(struct ib_mad *mad, struct ib_grh *grh,
+                               struct ib_wc *wc, struct ib_device *device,
+                               int port_num, int qpn);
+
+#endif /* __AGENT_H_ */
diff --git a/lib/devif/backends/net/mlx4/drivers/infiniband/core/cache.c b/lib/devif/backends/net/mlx4/drivers/infiniband/core/cache.c
new file mode 100644 (file)
index 0000000..3f7a85c
--- /dev/null
@@ -0,0 +1,393 @@
+/*
+
+ * Copyright (c) 2004 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Intel Corporation. All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+
+
+ #include <linux/module.h>
+ #include <linux/errno.h>
+ #include <linux/slab.h>
+ #include <linux/workqueue.h>
+
+ #include <rdma/ib_cache.h>
+ */
+#include "core_priv.h"
+
+struct ib_pkey_cache {
+       int table_len;
+       u16 table[0];
+};
+
+struct ib_gid_cache {
+       int table_len;
+       union ib_gid table[0];
+};
+/*
+ struct ib_update_work {
+ struct work_struct work;
+ struct ib_device  *device;
+ u8                 port_num;
+ };
+ */
+static inline int start_port(struct ib_device *device) {
+       return (device->node_type == RDMA_NODE_IB_SWITCH) ? 0 : 1;
+}
+
+static inline int end_port(struct ib_device *device) {
+       return (device->node_type == RDMA_NODE_IB_SWITCH) ?
+                       0 : device->phys_port_cnt;
+}
+/*
+ int ib_get_cached_gid(struct ib_device *device,
+ u8                port_num,
+ int               index,
+ union ib_gid     *gid)
+ {
+ struct ib_gid_cache *cache;
+ unsigned long flags;
+ int ret = 0;
+
+ if (port_num < start_port(device) || port_num > end_port(device))
+ return -EINVAL;
+
+ read_lock_irqsave(&device->cache.lock, flags);
+
+ cache = device->cache.gid_cache[port_num - start_port(device)];
+
+ if (index < 0 || index >= cache->table_len)
+ ret = -EINVAL;
+ else
+ *gid = cache->table[index];
+
+ read_unlock_irqrestore(&device->cache.lock, flags);
+
+ return ret;
+ }
+ EXPORT_SYMBOL(ib_get_cached_gid);
+
+ int ib_find_cached_gid(struct ib_device *device,
+ union ib_gid  *gid,
+ u8               *port_num,
+ u16              *index)
+ {
+ struct ib_gid_cache *cache;
+ unsigned long flags;
+ int p, i;
+ int ret = -ENOENT;
+
+ *port_num = -1;
+ if (index)
+ *index = -1;
+
+ read_lock_irqsave(&device->cache.lock, flags);
+
+ for (p = 0; p <= end_port(device) - start_port(device); ++p) {
+ cache = device->cache.gid_cache[p];
+ for (i = 0; i < cache->table_len; ++i) {
+ if (!memcmp(gid, &cache->table[i], sizeof *gid)) {
+ *port_num = p + start_port(device);
+ if (index)
+ *index = i;
+ ret = 0;
+ goto found;
+ }
+ }
+ }
+ found:
+ read_unlock_irqrestore(&device->cache.lock, flags);
+
+ return ret;
+ }
+ EXPORT_SYMBOL(ib_find_cached_gid);
+
+ int ib_get_cached_pkey(struct ib_device *device,
+ u8                port_num,
+ int               index,
+ u16              *pkey)
+ {
+ struct ib_pkey_cache *cache;
+ unsigned long flags;
+ int ret = 0;
+
+ if (port_num < start_port(device) || port_num > end_port(device))
+ return -EINVAL;
+
+ read_lock_irqsave(&device->cache.lock, flags);
+
+ cache = device->cache.pkey_cache[port_num - start_port(device)];
+
+ if (index < 0 || index >= cache->table_len)
+ ret = -EINVAL;
+ else
+ *pkey = cache->table[index];
+
+ read_unlock_irqrestore(&device->cache.lock, flags);
+
+ return ret;
+ }
+ EXPORT_SYMBOL(ib_get_cached_pkey);
+
+ int ib_find_cached_pkey(struct ib_device *device,
+ u8                port_num,
+ u16               pkey,
+ u16              *index)
+ {
+ struct ib_pkey_cache *cache;
+ unsigned long flags;
+ int i;
+ int ret = -ENOENT;
+
+ if (port_num < start_port(device) || port_num > end_port(device))
+ return -EINVAL;
+
+ read_lock_irqsave(&device->cache.lock, flags);
+
+ cache = device->cache.pkey_cache[port_num - start_port(device)];
+
+ *index = -1;
+
+ for (i = 0; i < cache->table_len; ++i)
+ if ((cache->table[i] & 0x7fff) == (pkey & 0x7fff)) {
+ *index = i;
+ ret = 0;
+ break;
+ }
+
+ read_unlock_irqrestore(&device->cache.lock, flags);
+
+ return ret;
+ }
+ EXPORT_SYMBOL(ib_find_cached_pkey);
+
+ int ib_get_cached_lmc(struct ib_device *device,
+ u8                port_num,
+ u8                *lmc)
+ {
+ unsigned long flags;
+ int ret = 0;
+
+ if (port_num < start_port(device) || port_num > end_port(device))
+ return -EINVAL;
+
+ read_lock_irqsave(&device->cache.lock, flags);
+ *lmc = device->cache.lmc_cache[port_num - start_port(device)];
+ read_unlock_irqrestore(&device->cache.lock, flags);
+
+ return ret;
+ }
+ EXPORT_SYMBOL(ib_get_cached_lmc);
+ */
+static void ib_cache_update(struct ib_device *device, u8 port) {
+       struct ib_port_attr *tprops = NULL;
+       struct ib_pkey_cache *pkey_cache = NULL, *old_pkey_cache;
+       struct ib_gid_cache *gid_cache = NULL, *old_gid_cache;
+       int i;
+       int ret;
+
+       tprops = malloc(sizeof *tprops);
+       if (!tprops)
+               return;
+
+       ret = ib_query_port(device, port, tprops);
+       if (ret) {
+               printf("ib_query_port failed (%d) for %s\n", ret, device->name);
+               goto err;
+       }
+
+       pkey_cache = malloc(
+                       sizeof *pkey_cache
+                                       + tprops->pkey_tbl_len * sizeof *pkey_cache->table);
+       if (!pkey_cache)
+               goto err;
+
+       pkey_cache->table_len = tprops->pkey_tbl_len;
+
+       gid_cache = malloc(
+                       sizeof *gid_cache + tprops->gid_tbl_len * sizeof *gid_cache->table);
+       if (!gid_cache)
+               goto err;
+
+       gid_cache->table_len = tprops->gid_tbl_len;
+
+       for (i = 0; i < pkey_cache->table_len; ++i) {
+               ret = ib_query_pkey(device, port, i, pkey_cache->table + i);
+               if (ret) {
+                       printf("ib_query_pkey failed (%d) for %s (index %d)\n", ret,
+                                       device->name, i);
+                       goto err;
+               }
+       }
+
+       for (i = 0; i < gid_cache->table_len; ++i) {
+               ret = ib_query_gid(device, port, i, gid_cache->table + i);
+               if (ret) {
+                       printf("ib_query_gid failed (%d) for %s (index %d)\n", ret,
+                                       device->name, i);
+                       goto err;
+               }
+       }
+
+       /*write_lock_irq(&device->cache.lock);*/
+
+       old_pkey_cache = device->cache.pkey_cache[port - start_port(device)];
+       old_gid_cache = device->cache.gid_cache[port - start_port(device)];
+
+       device->cache.pkey_cache[port - start_port(device)] = pkey_cache;
+       device->cache.gid_cache[port - start_port(device)] = gid_cache;
+
+       device->cache.lmc_cache[port - start_port(device)] = tprops->lmc;
+
+       /*write_unlock_irq(&device->cache.lock);*/
+
+       free(old_pkey_cache);
+       free(old_gid_cache);
+       free(tprops);
+       return;
+
+       err: free(pkey_cache);
+       free(gid_cache);
+       free(tprops);
+}
+/*
+ static void ib_cache_task(struct work_struct *_work)
+ {
+ struct ib_update_work *work =
+ container_of(_work, struct ib_update_work, work);
+
+ ib_cache_update(work->device, work->port_num);
+ kfree(work);
+ }
+
+ static void ib_cache_event(struct ib_event_handler *handler,
+ struct ib_event *event)
+ {
+ struct ib_update_work *work;
+
+ if (event->event == IB_EVENT_PORT_ERR    ||
+ event->event == IB_EVENT_PORT_ACTIVE ||
+ event->event == IB_EVENT_LID_CHANGE  ||
+ event->event == IB_EVENT_PKEY_CHANGE ||
+ event->event == IB_EVENT_SM_CHANGE   ||
+ event->event == IB_EVENT_CLIENT_REREGISTER ||
+ event->event == IB_EVENT_GID_CHANGE) {
+ work = kmalloc(sizeof *work, GFP_ATOMIC);
+ if (work) {
+ INIT_WORK(&work->work, ib_cache_task);
+ work->device   = event->device;
+ work->port_num = event->element.port_num;
+ schedule_work(&work->work);
+ }
+ }
+ }
+ */
+void ib_cache_setup_one(struct ib_device *device) {
+       int p;
+
+       /*rwlock_init(&device->cache.lock);*/
+
+       device->cache.pkey_cache = malloc(
+                       sizeof *device->cache.pkey_cache
+                                       * (end_port(device) - start_port(device) + 1));
+       device->cache.gid_cache = malloc(
+                       sizeof *device->cache.gid_cache
+                                       * (end_port(device) - start_port(device) + 1));
+
+       device->cache.lmc_cache = malloc(
+                       sizeof *device->cache.lmc_cache
+                                       * (end_port(device) - start_port(device) + 1));
+
+       if (!device->cache.pkey_cache || !device->cache.gid_cache
+                       || !device->cache.lmc_cache) {
+               printf("Couldn't allocate cache "
+                               "for %s\n", device->name);
+               goto err;
+       }
+
+       printf("end_port(device): %d\n", end_port(device));
+       printf("start_port(device): %d\n", start_port(device));
+
+       for (p = 0; p <= end_port(device) - start_port(device); ++p) {
+               device->cache.pkey_cache[p] = NULL;
+               device->cache.gid_cache[p] = NULL;
+               ib_cache_update(device, p + start_port(device));
+       }
+
+       /*INIT_IB_EVENT_HANDLER(&device->cache.event_handler, device, ib_cache_event);
+        if (ib_register_event_handler(&device->cache.event_handler))
+        goto err_cache;*/
+
+       return;
+
+       /*err_cache: for (p = 0; p <= end_port(device) - start_port(device); ++p) {
+        free(device->cache.pkey_cache[p]);
+        free(device->cache.gid_cache[p]);
+        }*/
+
+       err: free(device->cache.pkey_cache);
+       free(device->cache.gid_cache);
+       free(device->cache.lmc_cache);
+}
+/*
+ static void ib_cache_cleanup_one(struct ib_device *device)
+ {
+ int p;
+
+ ib_unregister_event_handler(&device->cache.event_handler);
+ flush_scheduled_work();
+
+ for (p = 0; p <= end_port(device) - start_port(device); ++p) {
+ kfree(device->cache.pkey_cache[p]);
+ kfree(device->cache.gid_cache[p]);
+ }
+
+ kfree(device->cache.pkey_cache);
+ kfree(device->cache.gid_cache);
+ kfree(device->cache.lmc_cache);
+ }
+
+ static struct ib_client cache_client = {
+ .name   = "cache",
+ .add    = ib_cache_setup_one,
+ .remove = ib_cache_cleanup_one
+ };
+
+ int __init ib_cache_setup(void)
+ {
+ return ib_register_client(&cache_client);
+ }
+
+ void __exit ib_cache_cleanup(void)
+ {
+ ib_unregister_client(&cache_client);
+ }
+ */
diff --git a/lib/devif/backends/net/mlx4/drivers/infiniband/core/cm.c b/lib/devif/backends/net/mlx4/drivers/infiniband/core/cm.c
new file mode 100644 (file)
index 0000000..3d2794d
--- /dev/null
@@ -0,0 +1,3897 @@
+/*
+ * Copyright (c) 2004-2007 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
+ * Copyright (c) 2004, 2005 Voltaire Corporation.  All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/completion.h>
+#include <linux/dma-mapping.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/idr.h>
+#include <linux/interrupt.h>
+#include <linux/random.h>
+#include <linux/rbtree.h>
+#include <linux/spinlock.h>
+#include <linux/sysfs.h>
+#include <linux/workqueue.h>
+#include <linux/kdev_t.h>
+#include <linux/string.h>
+
+#include <asm/atomic-long.h>
+
+#include <rdma/ib_cache.h>
+#include <rdma/ib_cm.h>
+#include "cm_msgs.h"
+
+MODULE_AUTHOR("Sean Hefty");
+MODULE_DESCRIPTION("InfiniBand CM");
+MODULE_LICENSE("Dual BSD/GPL");
+
+#define PFX    "ib_cm: "
+
+/*
+ * Limit CM message timeouts to something reasonable:
+ * 8 seconds per message, with up to 15 retries
+ */
+static int max_timeout = 21;
+module_param(max_timeout, int, 0644);
+MODULE_PARM_DESC(max_timeout, "Maximum IB CM per message timeout "
+                             "(default=21, or ~8 seconds)");
+
+static void cm_add_one(struct ib_device *device);
+static void cm_remove_one(struct ib_device *device);
+
+static struct ib_client cm_client = {
+       .name   = "cm",
+       .add    = cm_add_one,
+       .remove = cm_remove_one
+};
+
+static struct ib_cm {
+       spinlock_t lock;
+       struct list_head device_list;
+       rwlock_t device_lock;
+       struct rb_root listen_service_table;
+       u64 listen_service_id;
+       /* struct rb_root peer_service_table; todo: fix peer to peer */
+       struct rb_root remote_qp_table;
+       struct rb_root remote_id_table;
+       struct rb_root remote_sidr_table;
+       struct idr local_id_table;
+       __be32 random_id_operand;
+       struct list_head timewait_list;
+       struct workqueue_struct *wq;
+} cm;
+
+/* Counter indexes ordered by attribute ID */
+enum {
+       CM_REQ_COUNTER,
+       CM_MRA_COUNTER,
+       CM_REJ_COUNTER,
+       CM_REP_COUNTER,
+       CM_RTU_COUNTER,
+       CM_DREQ_COUNTER,
+       CM_DREP_COUNTER,
+       CM_SIDR_REQ_COUNTER,
+       CM_SIDR_REP_COUNTER,
+       CM_LAP_COUNTER,
+       CM_APR_COUNTER,
+       CM_ATTR_COUNT,
+       CM_ATTR_ID_OFFSET = 0x0010,
+};
+
+enum {
+       CM_XMIT,
+       CM_XMIT_RETRIES,
+       CM_RECV,
+       CM_RECV_DUPLICATES,
+       CM_COUNTER_GROUPS
+};
+
+static char const counter_group_names[CM_COUNTER_GROUPS]
+                                    [sizeof("cm_rx_duplicates")] = {
+       "cm_tx_msgs", "cm_tx_retries",
+       "cm_rx_msgs", "cm_rx_duplicates"
+};
+
+struct cm_counter_group {
+       struct kobject obj;
+       atomic_long_t counter[CM_ATTR_COUNT];
+};
+
+struct cm_counter_attribute {
+       struct attribute attr;
+       int index;
+};
+
+#define CM_COUNTER_ATTR(_name, _index) \
+struct cm_counter_attribute cm_##_name##_counter_attr = { \
+       .attr = { .name = __stringify(_name), .mode = 0444 }, \
+       .index = _index \
+}
+
+static CM_COUNTER_ATTR(req, CM_REQ_COUNTER);
+static CM_COUNTER_ATTR(mra, CM_MRA_COUNTER);
+static CM_COUNTER_ATTR(rej, CM_REJ_COUNTER);
+static CM_COUNTER_ATTR(rep, CM_REP_COUNTER);
+static CM_COUNTER_ATTR(rtu, CM_RTU_COUNTER);
+static CM_COUNTER_ATTR(dreq, CM_DREQ_COUNTER);
+static CM_COUNTER_ATTR(drep, CM_DREP_COUNTER);
+static CM_COUNTER_ATTR(sidr_req, CM_SIDR_REQ_COUNTER);
+static CM_COUNTER_ATTR(sidr_rep, CM_SIDR_REP_COUNTER);
+static CM_COUNTER_ATTR(lap, CM_LAP_COUNTER);
+static CM_COUNTER_ATTR(apr, CM_APR_COUNTER);
+
+static struct attribute *cm_counter_default_attrs[] = {
+       &cm_req_counter_attr.attr,
+       &cm_mra_counter_attr.attr,
+       &cm_rej_counter_attr.attr,
+       &cm_rep_counter_attr.attr,
+       &cm_rtu_counter_attr.attr,
+       &cm_dreq_counter_attr.attr,
+       &cm_drep_counter_attr.attr,
+       &cm_sidr_req_counter_attr.attr,
+       &cm_sidr_rep_counter_attr.attr,
+       &cm_lap_counter_attr.attr,
+       &cm_apr_counter_attr.attr,
+       NULL
+};
+
+struct cm_port {
+       struct cm_device *cm_dev;
+       struct ib_mad_agent *mad_agent;
+       struct kobject port_obj;
+       u8 port_num;
+       struct cm_counter_group counter_group[CM_COUNTER_GROUPS];
+};
+
+struct cm_device {
+       struct list_head list;
+       struct ib_device *ib_device;
+       struct device *device;
+       u8 ack_delay;
+       struct cm_port *port[0];
+};
+
+struct cm_av {
+       struct cm_port *port;
+       union ib_gid dgid;
+       struct ib_ah_attr ah_attr;
+       u16 pkey_index;
+       u8 timeout;
+};
+
+struct cm_work {
+       struct delayed_work work;
+       struct list_head list;
+       struct cm_port *port;
+       struct ib_mad_recv_wc *mad_recv_wc;     /* Received MADs */
+       __be32 local_id;                        /* Established / timewait */
+       __be32 remote_id;
+       struct ib_cm_event cm_event;
+       struct ib_sa_path_rec path[0];
+};
+
+struct cm_timewait_info {
+       struct cm_work work;                    /* Must be first. */
+       struct list_head list;
+       struct rb_node remote_qp_node;
+       struct rb_node remote_id_node;
+       __be64 remote_ca_guid;
+       __be32 remote_qpn;
+       u8 inserted_remote_qp;
+       u8 inserted_remote_id;
+};
+
+struct cm_id_private {
+       struct ib_cm_id id;
+
+       struct rb_node service_node;
+       struct rb_node sidr_id_node;
+       spinlock_t lock;        /* Do not acquire inside cm.lock */
+       struct completion comp;
+       atomic_t refcount;
+
+       struct ib_mad_send_buf *msg;
+       struct cm_timewait_info *timewait_info;
+       /* todo: use alternate port on send failure */
+       struct cm_av av;
+       struct cm_av alt_av;
+       struct ib_cm_compare_data *compare_data;
+
+       void *private_data;
+       __be64 tid;
+       __be32 local_qpn;
+       __be32 remote_qpn;
+       enum ib_qp_type qp_type;
+       __be32 sq_psn;
+       __be32 rq_psn;
+       int timeout_ms;
+       enum ib_mtu path_mtu;
+       __be16 pkey;
+       u8 private_data_len;
+       u8 max_cm_retries;
+       u8 peer_to_peer;
+       u8 responder_resources;
+       u8 initiator_depth;
+       u8 retry_count;
+       u8 rnr_retry_count;
+       u8 service_timeout;
+       u8 target_ack_delay;
+
+       struct list_head work_list;
+       atomic_t work_count;
+};
+
+static void cm_work_handler(struct work_struct *work);
+
+static inline void cm_deref_id(struct cm_id_private *cm_id_priv)
+{
+       if (atomic_dec_and_test(&cm_id_priv->refcount))
+               complete(&cm_id_priv->comp);
+}
+
+static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
+                       struct ib_mad_send_buf **msg)
+{
+       struct ib_mad_agent *mad_agent;
+       struct ib_mad_send_buf *m;
+       struct ib_ah *ah;
+
+       mad_agent = cm_id_priv->av.port->mad_agent;
+       ah = ib_create_ah(mad_agent->qp->pd, &cm_id_priv->av.ah_attr);
+       if (IS_ERR(ah))
+               return PTR_ERR(ah);
+
+       m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn,
+                              cm_id_priv->av.pkey_index,
+                              0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
+                              GFP_ATOMIC);
+       if (IS_ERR(m)) {
+               ib_destroy_ah(ah);
+               return PTR_ERR(m);
+       }
+
+       /* Timeout set by caller if response is expected. */
+       m->ah = ah;
+       m->retries = cm_id_priv->max_cm_retries;
+
+       atomic_inc(&cm_id_priv->refcount);
+       m->context[0] = cm_id_priv;
+       *msg = m;
+       return 0;
+}
+
+static int cm_alloc_response_msg(struct cm_port *port,
+                                struct ib_mad_recv_wc *mad_recv_wc,
+                                struct ib_mad_send_buf **msg)
+{
+       struct ib_mad_send_buf *m;
+       struct ib_ah *ah;
+
+       ah = ib_create_ah_from_wc(port->mad_agent->qp->pd, mad_recv_wc->wc,
+                                 mad_recv_wc->recv_buf.grh, port->port_num);
+       if (IS_ERR(ah))
+               return PTR_ERR(ah);
+
+       m = ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index,
+                              0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
+                              GFP_ATOMIC);
+       if (IS_ERR(m)) {
+               ib_destroy_ah(ah);
+               return PTR_ERR(m);
+       }
+       m->ah = ah;
+       *msg = m;
+       return 0;
+}
+
+static void cm_free_msg(struct ib_mad_send_buf *msg)
+{
+       ib_destroy_ah(msg->ah);
+       if (msg->context[0])
+               cm_deref_id(msg->context[0]);
+       ib_free_send_mad(msg);
+}
+
+static void * cm_copy_private_data(const void *private_data,
+                                  u8 private_data_len)
+{
+       void *data;
+
+       if (!private_data || !private_data_len)
+               return NULL;
+
+       data = kmemdup(private_data, private_data_len, GFP_KERNEL);
+       if (!data)
+               return ERR_PTR(-ENOMEM);
+
+       return data;
+}
+
+static void cm_set_private_data(struct cm_id_private *cm_id_priv,
+                                void *private_data, u8 private_data_len)
+{
+       if (cm_id_priv->private_data && cm_id_priv->private_data_len)
+               kfree(cm_id_priv->private_data);
+
+       cm_id_priv->private_data = private_data;
+       cm_id_priv->private_data_len = private_data_len;
+}
+
+static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
+                                   struct ib_grh *grh, struct cm_av *av)
+{
+       av->port = port;
+       av->pkey_index = wc->pkey_index;
+       ib_init_ah_from_wc(port->cm_dev->ib_device, port->port_num, wc,
+                          grh, &av->ah_attr);
+}
+
+static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
+{
+       struct cm_device *cm_dev;
+       struct cm_port *port = NULL;
+       unsigned long flags;
+       int ret;
+       u8 p;
+
+       read_lock_irqsave(&cm.device_lock, flags);
+       list_for_each_entry(cm_dev, &cm.device_list, list) {
+               if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid,
+                                       &p, NULL)) {
+                       port = cm_dev->port[p-1];
+                       break;
+               }
+       }
+       read_unlock_irqrestore(&cm.device_lock, flags);
+
+       if (!port)
+               return -EINVAL;
+
+       ret = ib_find_cached_pkey(cm_dev->ib_device, port->port_num,
+                                 be16_to_cpu(path->pkey), &av->pkey_index);
+       if (ret)
+               return ret;
+
+       av->port = port;
+       ib_init_ah_from_path(cm_dev->ib_device, port->port_num, path,
+                            &av->ah_attr);
+       av->timeout = path->packet_life_time + 1;
+       return 0;
+}
+
+static int cm_alloc_id(struct cm_id_private *cm_id_priv)
+{
+       unsigned long flags;
+       int ret, id;
+       static int next_id;
+
+       do {
+               spin_lock_irqsave(&cm.lock, flags);
+               ret = idr_get_new_above(&cm.local_id_table, cm_id_priv,
+                                       next_id, &id);
+               if (!ret)
+                       next_id = ((unsigned) id + 1) & MAX_ID_MASK;
+               spin_unlock_irqrestore(&cm.lock, flags);
+       } while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) );
+
+       cm_id_priv->id.local_id = (__force __be32)id ^ cm.random_id_operand;
+       return ret;
+}
+
+static void cm_free_id(__be32 local_id)
+{
+       spin_lock_irq(&cm.lock);
+       idr_remove(&cm.local_id_table,
+                  (__force int) (local_id ^ cm.random_id_operand));
+       spin_unlock_irq(&cm.lock);
+}
+
+static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id)
+{
+       struct cm_id_private *cm_id_priv;
+
+       cm_id_priv = idr_find(&cm.local_id_table,
+                             (__force int) (local_id ^ cm.random_id_operand));
+       if (cm_id_priv) {
+               if (cm_id_priv->id.remote_id == remote_id)
+                       atomic_inc(&cm_id_priv->refcount);
+               else
+                       cm_id_priv = NULL;
+       }
+
+       return cm_id_priv;
+}
+
+static struct cm_id_private * cm_acquire_id(__be32 local_id, __be32 remote_id)
+{
+       struct cm_id_private *cm_id_priv;
+
+       spin_lock_irq(&cm.lock);
+       cm_id_priv = cm_get_id(local_id, remote_id);
+       spin_unlock_irq(&cm.lock);
+
+       return cm_id_priv;
+}
+
+static void cm_mask_copy(u8 *dst, u8 *src, u8 *mask)
+{
+       int i;
+
+       for (i = 0; i < IB_CM_COMPARE_SIZE / sizeof(unsigned long); i++)
+               ((unsigned long *) dst)[i] = ((unsigned long *) src)[i] &
+                                            ((unsigned long *) mask)[i];
+}
+
+static int cm_compare_data(struct ib_cm_compare_data *src_data,
+                          struct ib_cm_compare_data *dst_data)
+{
+       u8 src[IB_CM_COMPARE_SIZE];
+       u8 dst[IB_CM_COMPARE_SIZE];
+
+       if (!src_data || !dst_data)
+               return 0;
+
+       cm_mask_copy(src, src_data->data, dst_data->mask);
+       cm_mask_copy(dst, dst_data->data, src_data->mask);
+       return memcmp(src, dst, IB_CM_COMPARE_SIZE);
+}
+
+static int cm_compare_private_data(u8 *private_data,
+                                  struct ib_cm_compare_data *dst_data)
+{
+       u8 src[IB_CM_COMPARE_SIZE];
+
+       if (!dst_data)
+               return 0;
+
+       cm_mask_copy(src, private_data, dst_data->mask);
+       return memcmp(src, dst_data->data, IB_CM_COMPARE_SIZE);
+}
+
+/*
+ * Trivial helpers to strip endian annotation and compare; the
+ * endianness doesn't actually matter since we just need a stable
+ * order for the RB tree.
+ */
+static int be32_lt(__be32 a, __be32 b)
+{
+       return (__force u32) a < (__force u32) b;
+}
+
+static int be32_gt(__be32 a, __be32 b)
+{
+       return (__force u32) a > (__force u32) b;
+}
+
+static int be64_lt(__be64 a, __be64 b)
+{
+       return (__force u64) a < (__force u64) b;
+}
+
+static int be64_gt(__be64 a, __be64 b)
+{
+       return (__force u64) a > (__force u64) b;
+}
+
+static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
+{
+       struct rb_node **link = &cm.listen_service_table.rb_node;
+       struct rb_node *parent = NULL;
+       struct cm_id_private *cur_cm_id_priv;
+       __be64 service_id = cm_id_priv->id.service_id;
+       __be64 service_mask = cm_id_priv->id.service_mask;
+       int data_cmp;
+
+       while (*link) {
+               parent = *link;
+               cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
+                                         service_node);
+               data_cmp = cm_compare_data(cm_id_priv->compare_data,
+                                          cur_cm_id_priv->compare_data);
+               if ((cur_cm_id_priv->id.service_mask & service_id) ==
+                   (service_mask & cur_cm_id_priv->id.service_id) &&
+                   (cm_id_priv->id.device == cur_cm_id_priv->id.device) &&
+                   !data_cmp)
+                       return cur_cm_id_priv;
+
+               if (cm_id_priv->id.device < cur_cm_id_priv->id.device)
+                       link = &(*link)->rb_left;
+               else if (cm_id_priv->id.device > cur_cm_id_priv->id.device)
+                       link = &(*link)->rb_right;
+               else if (be64_lt(service_id, cur_cm_id_priv->id.service_id))
+                       link = &(*link)->rb_left;
+               else if (be64_gt(service_id, cur_cm_id_priv->id.service_id))
+                       link = &(*link)->rb_right;
+               else if (data_cmp < 0)
+                       link = &(*link)->rb_left;
+               else
+                       link = &(*link)->rb_right;
+       }
+       rb_link_node(&cm_id_priv->service_node, parent, link);
+       rb_insert_color(&cm_id_priv->service_node, &cm.listen_service_table);
+       return NULL;
+}
+
+static struct cm_id_private * cm_find_listen(struct ib_device *device,
+                                            __be64 service_id,
+                                            u8 *private_data)
+{
+       struct rb_node *node = cm.listen_service_table.rb_node;
+       struct cm_id_private *cm_id_priv;
+       int data_cmp;
+
+       while (node) {
+               cm_id_priv = rb_entry(node, struct cm_id_private, service_node);
+               data_cmp = cm_compare_private_data(private_data,
+                                                  cm_id_priv->compare_data);
+               if ((cm_id_priv->id.service_mask & service_id) ==
+                    cm_id_priv->id.service_id &&
+                   (cm_id_priv->id.device == device) && !data_cmp)
+                       return cm_id_priv;
+
+               if (device < cm_id_priv->id.device)
+                       node = node->rb_left;
+               else if (device > cm_id_priv->id.device)
+                       node = node->rb_right;
+               else if (be64_lt(service_id, cm_id_priv->id.service_id))
+                       node = node->rb_left;
+               else if (be64_gt(service_id, cm_id_priv->id.service_id))
+                       node = node->rb_right;
+               else if (data_cmp < 0)
+                       node = node->rb_left;
+               else
+                       node = node->rb_right;
+       }
+       return NULL;
+}
+
+static struct cm_timewait_info * cm_insert_remote_id(struct cm_timewait_info
+                                                    *timewait_info)
+{
+       struct rb_node **link = &cm.remote_id_table.rb_node;
+       struct rb_node *parent = NULL;
+       struct cm_timewait_info *cur_timewait_info;
+       __be64 remote_ca_guid = timewait_info->remote_ca_guid;
+       __be32 remote_id = timewait_info->work.remote_id;
+
+       while (*link) {
+               parent = *link;
+               cur_timewait_info = rb_entry(parent, struct cm_timewait_info,
+                                            remote_id_node);
+               if (be32_lt(remote_id, cur_timewait_info->work.remote_id))
+                       link = &(*link)->rb_left;
+               else if (be32_gt(remote_id, cur_timewait_info->work.remote_id))
+                       link = &(*link)->rb_right;
+               else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
+                       link = &(*link)->rb_left;
+               else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
+                       link = &(*link)->rb_right;
+               else
+                       return cur_timewait_info;
+       }
+       timewait_info->inserted_remote_id = 1;
+       rb_link_node(&timewait_info->remote_id_node, parent, link);
+       rb_insert_color(&timewait_info->remote_id_node, &cm.remote_id_table);
+       return NULL;
+}
+
+static struct cm_timewait_info * cm_find_remote_id(__be64 remote_ca_guid,
+                                                  __be32 remote_id)
+{
+       struct rb_node *node = cm.remote_id_table.rb_node;
+       struct cm_timewait_info *timewait_info;
+
+       while (node) {
+               timewait_info = rb_entry(node, struct cm_timewait_info,
+                                        remote_id_node);
+               if (be32_lt(remote_id, timewait_info->work.remote_id))
+                       node = node->rb_left;
+               else if (be32_gt(remote_id, timewait_info->work.remote_id))
+                       node = node->rb_right;
+               else if (be64_lt(remote_ca_guid, timewait_info->remote_ca_guid))
+                       node = node->rb_left;
+               else if (be64_gt(remote_ca_guid, timewait_info->remote_ca_guid))
+                       node = node->rb_right;
+               else
+                       return timewait_info;
+       }
+       return NULL;
+}
+
+static struct cm_timewait_info * cm_insert_remote_qpn(struct cm_timewait_info
+                                                     *timewait_info)
+{
+       struct rb_node **link = &cm.remote_qp_table.rb_node;
+       struct rb_node *parent = NULL;
+       struct cm_timewait_info *cur_timewait_info;
+       __be64 remote_ca_guid = timewait_info->remote_ca_guid;
+       __be32 remote_qpn = timewait_info->remote_qpn;
+
+       while (*link) {
+               parent = *link;
+               cur_timewait_info = rb_entry(parent, struct cm_timewait_info,
+                                            remote_qp_node);
+               if (be32_lt(remote_qpn, cur_timewait_info->remote_qpn))
+                       link = &(*link)->rb_left;
+               else if (be32_gt(remote_qpn, cur_timewait_info->remote_qpn))
+                       link = &(*link)->rb_right;
+               else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
+                       link = &(*link)->rb_left;
+               else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
+                       link = &(*link)->rb_right;
+               else
+                       return cur_timewait_info;
+       }
+       timewait_info->inserted_remote_qp = 1;
+       rb_link_node(&timewait_info->remote_qp_node, parent, link);
+       rb_insert_color(&timewait_info->remote_qp_node, &cm.remote_qp_table);
+       return NULL;
+}
+
+static struct cm_id_private * cm_insert_remote_sidr(struct cm_id_private
+                                                   *cm_id_priv)
+{
+       struct rb_node **link = &cm.remote_sidr_table.rb_node;
+       struct rb_node *parent = NULL;
+       struct cm_id_private *cur_cm_id_priv;
+       union ib_gid *port_gid = &cm_id_priv->av.dgid;
+       __be32 remote_id = cm_id_priv->id.remote_id;
+
+       while (*link) {
+               parent = *link;
+               cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
+                                         sidr_id_node);
+               if (be32_lt(remote_id, cur_cm_id_priv->id.remote_id))
+                       link = &(*link)->rb_left;
+               else if (be32_gt(remote_id, cur_cm_id_priv->id.remote_id))
+                       link = &(*link)->rb_right;
+               else {
+                       int cmp;
+                       cmp = memcmp(port_gid, &cur_cm_id_priv->av.dgid,
+                                    sizeof *port_gid);
+                       if (cmp < 0)
+                               link = &(*link)->rb_left;
+                       else if (cmp > 0)
+                               link = &(*link)->rb_right;
+                       else
+                               return cur_cm_id_priv;
+               }
+       }
+       rb_link_node(&cm_id_priv->sidr_id_node, parent, link);
+       rb_insert_color(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
+       return NULL;
+}
+
+static void cm_reject_sidr_req(struct cm_id_private *cm_id_priv,
+                              enum ib_cm_sidr_status status)
+{
+       struct ib_cm_sidr_rep_param param;
+
+       memset(&param, 0, sizeof param);
+       param.status = status;
+       ib_send_cm_sidr_rep(&cm_id_priv->id, &param);
+}
+
+struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
+                                ib_cm_handler cm_handler,
+                                void *context)
+{
+       struct cm_id_private *cm_id_priv;
+       int ret;
+
+       cm_id_priv = kzalloc(sizeof *cm_id_priv, GFP_KERNEL);
+       if (!cm_id_priv)
+               return ERR_PTR(-ENOMEM);
+
+       cm_id_priv->id.state = IB_CM_IDLE;
+       cm_id_priv->id.device = device;
+       cm_id_priv->id.cm_handler = cm_handler;
+       cm_id_priv->id.context = context;
+       cm_id_priv->id.remote_cm_qpn = 1;
+       ret = cm_alloc_id(cm_id_priv);
+       if (ret)
+               goto error;
+
+       spin_lock_init(&cm_id_priv->lock);
+       init_completion(&cm_id_priv->comp);
+       INIT_LIST_HEAD(&cm_id_priv->work_list);
+       atomic_set(&cm_id_priv->work_count, -1);
+       atomic_set(&cm_id_priv->refcount, 1);
+       return &cm_id_priv->id;
+
+error:
+       kfree(cm_id_priv);
+       return ERR_PTR(-ENOMEM);
+}
+EXPORT_SYMBOL(ib_create_cm_id);
+
+static struct cm_work * cm_dequeue_work(struct cm_id_private *cm_id_priv)
+{
+       struct cm_work *work;
+
+       if (list_empty(&cm_id_priv->work_list))
+               return NULL;
+
+       work = list_entry(cm_id_priv->work_list.next, struct cm_work, list);
+       list_del(&work->list);
+       return work;
+}
+
+static void cm_free_work(struct cm_work *work)
+{
+       if (work->mad_recv_wc)
+               ib_free_recv_mad(work->mad_recv_wc);
+       kfree(work);
+}
+
+static inline int cm_convert_to_ms(int iba_time)
+{
+       /* approximate conversion to ms from 4.096us x 2^iba_time */
+       return 1 << max(iba_time - 8, 0);
+}
+
+/*
+ * calculate: 4.096x2^ack_timeout = 4.096x2^ack_delay + 2x4.096x2^life_time
+ * Because of how ack_timeout is stored, adding one doubles the timeout.
+ * To avoid large timeouts, select the max(ack_delay, life_time + 1), and
+ * increment it (round up) only if the other is within 50%.
+ */
+static u8 cm_ack_timeout(u8 ca_ack_delay, u8 packet_life_time)
+{
+       int ack_timeout = packet_life_time + 1;
+
+       if (ack_timeout >= ca_ack_delay)
+               ack_timeout += (ca_ack_delay >= (ack_timeout - 1));
+       else
+               ack_timeout = ca_ack_delay +
+                             (ack_timeout >= (ca_ack_delay - 1));
+
+       return min(31, ack_timeout);
+}
+
+static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info)
+{
+       if (timewait_info->inserted_remote_id) {
+               rb_erase(&timewait_info->remote_id_node, &cm.remote_id_table);
+               timewait_info->inserted_remote_id = 0;
+       }
+
+       if (timewait_info->inserted_remote_qp) {
+               rb_erase(&timewait_info->remote_qp_node, &cm.remote_qp_table);
+               timewait_info->inserted_remote_qp = 0;
+       }
+}
+
+static struct cm_timewait_info * cm_create_timewait_info(__be32 local_id)
+{
+       struct cm_timewait_info *timewait_info;
+
+       timewait_info = kzalloc(sizeof *timewait_info, GFP_KERNEL);
+       if (!timewait_info)
+               return ERR_PTR(-ENOMEM);
+
+       timewait_info->work.local_id = local_id;
+       INIT_DELAYED_WORK(&timewait_info->work.work, cm_work_handler);
+       timewait_info->work.cm_event.event = IB_CM_TIMEWAIT_EXIT;
+       return timewait_info;
+}
+
+static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
+{
+       int wait_time;
+       unsigned long flags;
+
+       spin_lock_irqsave(&cm.lock, flags);
+       cm_cleanup_timewait(cm_id_priv->timewait_info);
+       list_add_tail(&cm_id_priv->timewait_info->list, &cm.timewait_list);
+       spin_unlock_irqrestore(&cm.lock, flags);
+
+       /*
+        * The cm_id could be destroyed by the user before we exit timewait.
+        * To protect against this, we search for the cm_id after exiting
+        * timewait before notifying the user that we've exited timewait.
+        */
+       cm_id_priv->id.state = IB_CM_TIMEWAIT;
+       wait_time = cm_convert_to_ms(cm_id_priv->av.timeout);
+       queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work,
+                          msecs_to_jiffies(wait_time));
+       cm_id_priv->timewait_info = NULL;
+}
+
+static void cm_reset_to_idle(struct cm_id_private *cm_id_priv)
+{
+       unsigned long flags;
+
+       cm_id_priv->id.state = IB_CM_IDLE;
+       if (cm_id_priv->timewait_info) {
+               spin_lock_irqsave(&cm.lock, flags);
+               cm_cleanup_timewait(cm_id_priv->timewait_info);
+               spin_unlock_irqrestore(&cm.lock, flags);
+               kfree(cm_id_priv->timewait_info);
+               cm_id_priv->timewait_info = NULL;
+       }
+}
+
+static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
+{
+       struct cm_id_private *cm_id_priv;
+       struct cm_work *work;
+
+       cm_id_priv = container_of(cm_id, struct cm_id_private, id);
+retest:
+       spin_lock_irq(&cm_id_priv->lock);
+       switch (cm_id->state) {
+       case IB_CM_LISTEN:
+               cm_id->state = IB_CM_IDLE;
+               spin_unlock_irq(&cm_id_priv->lock);
+               spin_lock_irq(&cm.lock);
+               rb_erase(&cm_id_priv->service_node, &cm.listen_service_table);
+               spin_unlock_irq(&cm.lock);
+               break;
+       case IB_CM_SIDR_REQ_SENT:
+               cm_id->state = IB_CM_IDLE;
+               ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+               spin_unlock_irq(&cm_id_priv->lock);
+               break;
+       case IB_CM_SIDR_REQ_RCVD:
+               spin_unlock_irq(&cm_id_priv->lock);
+               cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT);
+               break;
+       case IB_CM_REQ_SENT:
+               ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+               spin_unlock_irq(&cm_id_priv->lock);
+               ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT,
+                              &cm_id_priv->id.device->node_guid,
+                              sizeof cm_id_priv->id.device->node_guid,
+                              NULL, 0);
+               break;
+       case IB_CM_REQ_RCVD:
+               if (err == -ENOMEM) {
+                       /* Do not reject to allow future retries. */
+                       cm_reset_to_idle(cm_id_priv);
+                       spin_unlock_irq(&cm_id_priv->lock);
+               } else {
+                       spin_unlock_irq(&cm_id_priv->lock);
+                       ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
+                                      NULL, 0, NULL, 0);
+               }
+               break;
+       case IB_CM_MRA_REQ_RCVD:
+       case IB_CM_REP_SENT:
+       case IB_CM_MRA_REP_RCVD:
+               ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+               /* Fall through */
+       case IB_CM_MRA_REQ_SENT:
+       case IB_CM_REP_RCVD:
+       case IB_CM_MRA_REP_SENT:
+               spin_unlock_irq(&cm_id_priv->lock);
+               ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
+                              NULL, 0, NULL, 0);
+               break;
+       case IB_CM_ESTABLISHED:
+               spin_unlock_irq(&cm_id_priv->lock);
+               ib_send_cm_dreq(cm_id, NULL, 0);
+               goto retest;
+       case IB_CM_DREQ_SENT:
+               ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+               cm_enter_timewait(cm_id_priv);
+               spin_unlock_irq(&cm_id_priv->lock);
+               break;
+       case IB_CM_DREQ_RCVD:
+               spin_unlock_irq(&cm_id_priv->lock);
+               ib_send_cm_drep(cm_id, NULL, 0);
+               break;
+       default:
+               spin_unlock_irq(&cm_id_priv->lock);
+               break;
+       }
+
+       cm_free_id(cm_id->local_id);
+       cm_deref_id(cm_id_priv);
+       wait_for_completion(&cm_id_priv->comp);
+       while ((work = cm_dequeue_work(cm_id_priv)) != NULL)
+               cm_free_work(work);
+       kfree(cm_id_priv->compare_data);
+       kfree(cm_id_priv->private_data);
+       kfree(cm_id_priv);
+}
+
+void ib_destroy_cm_id(struct ib_cm_id *cm_id)
+{
+       cm_destroy_id(cm_id, 0);
+}
+EXPORT_SYMBOL(ib_destroy_cm_id);
+
+int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask,
+                struct ib_cm_compare_data *compare_data)
+{
+       struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
+       unsigned long flags;
+       int ret = 0;
+
+       service_mask = service_mask ? service_mask : ~cpu_to_be64(0);
+       service_id &= service_mask;
+       if ((service_id & IB_SERVICE_ID_AGN_MASK) == IB_CM_ASSIGN_SERVICE_ID &&
+           (service_id != IB_CM_ASSIGN_SERVICE_ID))
+               return -EINVAL;
+
+       cm_id_priv = container_of(cm_id, struct cm_id_private, id);
+       if (cm_id->state != IB_CM_IDLE)
+               return -EINVAL;
+
+       if (compare_data) {
+               cm_id_priv->compare_data = kzalloc(sizeof *compare_data,
+                                                  GFP_KERNEL);
+               if (!cm_id_priv->compare_data)
+                       return -ENOMEM;
+               cm_mask_copy(cm_id_priv->compare_data->data,
+                            compare_data->data, compare_data->mask);
+               memcpy(cm_id_priv->compare_data->mask, compare_data->mask,
+                      IB_CM_COMPARE_SIZE);
+       }
+
+       cm_id->state = IB_CM_LISTEN;
+
+       spin_lock_irqsave(&cm.lock, flags);
+       if (service_id == IB_CM_ASSIGN_SERVICE_ID) {
+               cm_id->service_id = cpu_to_be64(cm.listen_service_id++);
+               cm_id->service_mask = ~cpu_to_be64(0);
+       } else {
+               cm_id->service_id = service_id;
+               cm_id->service_mask = service_mask;
+       }
+       cur_cm_id_priv = cm_insert_listen(cm_id_priv);
+       spin_unlock_irqrestore(&cm.lock, flags);
+
+       if (cur_cm_id_priv) {
+               cm_id->state = IB_CM_IDLE;
+               kfree(cm_id_priv->compare_data);
+               cm_id_priv->compare_data = NULL;
+               ret = -EBUSY;
+       }
+       return ret;
+}
+EXPORT_SYMBOL(ib_cm_listen);
+
+static __be64 cm_form_tid(struct cm_id_private *cm_id_priv,
+                         enum cm_msg_sequence msg_seq)
+{
+       u64 hi_tid, low_tid;
+
+       hi_tid   = ((u64) cm_id_priv->av.port->mad_agent->hi_tid) << 32;
+       low_tid  = (u64) ((__force u32)cm_id_priv->id.local_id |
+                         (msg_seq << 30));
+       return cpu_to_be64(hi_tid | low_tid);
+}
+
+static void cm_format_mad_hdr(struct ib_mad_hdr *hdr,
+                             __be16 attr_id, __be64 tid)
+{
+       hdr->base_version  = IB_MGMT_BASE_VERSION;
+       hdr->mgmt_class    = IB_MGMT_CLASS_CM;
+       hdr->class_version = IB_CM_CLASS_VERSION;
+       hdr->method        = IB_MGMT_METHOD_SEND;
+       hdr->attr_id       = attr_id;
+       hdr->tid           = tid;
+}
+
+static void cm_format_req(struct cm_req_msg *req_msg,
+                         struct cm_id_private *cm_id_priv,
+                         struct ib_cm_req_param *param)
+{
+       struct ib_sa_path_rec *pri_path = param->primary_path;
+       struct ib_sa_path_rec *alt_path = param->alternate_path;
+
+       cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID,
+                         cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_REQ));
+
+       req_msg->local_comm_id = cm_id_priv->id.local_id;
+       req_msg->service_id = param->service_id;
+       req_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
+       cm_req_set_local_qpn(req_msg, cpu_to_be32(param->qp_num));
+       cm_req_set_resp_res(req_msg, param->responder_resources);
+       cm_req_set_init_depth(req_msg, param->initiator_depth);
+       cm_req_set_remote_resp_timeout(req_msg,
+                                      param->remote_cm_response_timeout);
+       if (param->remote_cm_response_timeout > (u8) max_timeout) {
+               printk(KERN_WARNING PFX "req remote_cm_response_timeout %d > "
+                      "%d, decreasing\n", param->remote_cm_response_timeout,
+                      max_timeout);
+               cm_req_set_remote_resp_timeout(req_msg, (u8) max_timeout);
+       }
+       cm_req_set_qp_type(req_msg, param->qp_type);
+       cm_req_set_flow_ctrl(req_msg, param->flow_control);
+       cm_req_set_starting_psn(req_msg, cpu_to_be32(param->starting_psn));
+       cm_req_set_local_resp_timeout(req_msg,
+                                     param->local_cm_response_timeout);
+       if (param->local_cm_response_timeout > (u8) max_timeout) {
+               printk(KERN_WARNING PFX "req local_cm_response_timeout