[dpdk-dev,RFC,2/6] eal: direct ring access support by linux af_packet

Message ID 1416924682-24170-3-git-send-email-cunming.liang@intel.com (mailing list archive)
State RFC, archived
Headers

Commit Message

Cunming Liang Nov. 25, 2014, 2:11 p.m. UTC
  Signed-off-by: Cunming Liang <cunming.liang@intel.com>
---
 lib/librte_eal/linuxapp/eal/Makefile         |   1 +
 lib/librte_eal/linuxapp/eal/eal_pci_bifurc.c | 336 +++++++++++++++++++++++++++
 2 files changed, 337 insertions(+)
 create mode 100644 lib/librte_eal/linuxapp/eal/eal_pci_bifurc.c
  

Patch

diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile
index 06c1dc5..f775203 100644
--- a/lib/librte_eal/linuxapp/eal/Makefile
+++ b/lib/librte_eal/linuxapp/eal/Makefile
@@ -61,6 +61,7 @@  SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_pci.c
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_pci_uio.c
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_pci_vfio.c
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_pci_vfio_mp_sync.c
+SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_pci_bifurc.c
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_debug.c
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_lcore.c
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_timer.c
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_bifurc.c b/lib/librte_eal/linuxapp/eal/eal_pci_bifurc.c
new file mode 100644
index 0000000..94ad4df
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_bifurc.c
@@ -0,0 +1,336 @@ 
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <arpa/inet.h>
+#include <net/if.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+
+#include <rte_malloc.h>
+#include <rte_dev.h>
+#include <rte_pci.h>
+#include <rte_log.h>
+#include <rte_devargs.h>
+#include <rte_pci_bifurc.h>
+
+int
+rte_eal_bifurc_get_ifinfo(int sockfd, char *iface_name,
+			  int *if_index, uint8_t *hwaddr, int *mtu)
+{
+	struct ifreq req;
+
+	if (iface_name == NULL)
+		return -1;
+
+	memset(&req, 0, sizeof(req));
+
+	/* request for ifindex */
+	if (if_index) {
+		memcpy(req.ifr_name, iface_name, IFNAMSIZ);
+		if (ioctl(sockfd, SIOCGIFINDEX, &req) == -1) {
+			RTE_LOG(ERR, EAL,
+				"%s: ioctl failed (SIOCGIFINDEX)\n",
+				iface_name);
+			return -1;
+		}
+		*if_index = req.ifr_ifindex;
+	}
+
+	/* request for hwaddr */
+	if (hwaddr) {
+		if (ioctl(sockfd, SIOCGIFHWADDR, &req) == -1) {
+			RTE_LOG(ERR, EAL,
+				"%s: ioctl failed (SIOCGIFHWADDR)\n",
+				iface_name);
+			return -1;
+		}
+		memcpy(hwaddr, req.ifr_hwaddr.sa_data, IFHWADDRLEN);
+	}
+
+	/* request for mtu */
+	if (mtu) {
+		if (ioctl(sockfd, SIOCGIFMTU, &req) == -1) {
+			RTE_LOG(ERR, EAL,
+				"%s: ioctl failed (SIOCGIFMTU)\n",
+				iface_name);
+			return -1;
+		}
+		*mtu = req.ifr_mtu;
+	}
+
+	return 0;
+}
+
+static int
+get_map_size(int sockfd, uint32_t *size)
+{
+	struct tpacket_dev_qpair_map_region_info info;
+	socklen_t optlen;
+	int ret;
+
+	/* request for map region info */
+	optlen = sizeof(struct tpacket_dev_qpair_map_region_info);
+	ret = getsockopt(sockfd, SOL_PACKET, PACKET_DEV_QPAIR_MAP_REGION_INFO,
+			 &info, &optlen);
+	if (ret == -1) {
+		RTE_LOG(ERR, PMD,
+			"could not get PACKET_DEV_QPAIR_MAP_REGION_INFO "
+			"on AF_PACKET socket, errno = %d\n", errno);
+		return -1;
+	}
+
+	*size = info.tp_dev_bar_sz;
+	return 0;
+}
+
+/* map PCIE configure space of queue pairs */
+int
+rte_eal_bifurc_map(int sockfd, void **addr, uint32_t *size)
+{
+	if (addr == NULL || size == NULL)
+		return -1;
+
+	if (get_map_size(sockfd, size))
+		return -1;
+
+	*addr = mmap(NULL, *size, PROT_READ | PROT_WRITE,
+		     MAP_SHARED, sockfd, 0);
+	if (*addr == MAP_FAILED) {
+		RTE_LOG(ERR, EAL,
+			"call to mmap failed on AF_PACKET socket %d\n",
+			sockfd);
+		return -1;
+	}
+
+	RTE_LOG(INFO, EAL,
+		"mapping sockfd %d PCIE configuraiton space,"
+		"address = %p, size = 0x%x\n", sockfd,
+		*addr, *size);
+
+	return 0;
+}
+
+void
+rte_eal_bifurc_unmap(int sockfd, void *addr)
+{
+	uint32_t size;
+
+	if (addr && !get_map_size(sockfd, &size))
+		munmap(addr, size);
+}
+
+/* split queue pairs */
+int
+rte_eal_bifurc_split(int sockfd, uint32_t *nb_qp, uint32_t *qp_start)
+{
+	struct tpacket_dev_qpairs_info qpairs_info;
+	socklen_t optlen;
+	int ret;
+
+	optlen = sizeof(struct tpacket_dev_qpairs_info);
+
+	/* request for qpairs split */
+	qpairs_info.tp_qpairs_start_from = -1;
+	qpairs_info.tp_qpairs_num = *nb_qp;
+	ret = setsockopt(sockfd, SOL_PACKET, PACKET_RXTX_QPAIRS_SPLIT,
+			&qpairs_info, optlen);
+	if (ret == -1) {
+		RTE_LOG(ERR, EAL,
+			"request PACKET_RXTX_QPAIRS_SPLIT on AF_PACKET "
+			"socket for %d fail, errno = %d\n",
+			sockfd, errno);
+		return -1;
+	}
+
+	/* parse respone of qpairs split */
+	ret = getsockopt(sockfd, SOL_PACKET, PACKET_RXTX_QPAIRS_SPLIT,
+			 &qpairs_info, &optlen);
+	if (ret == -1) {
+		RTE_LOG(ERR, EAL,
+			"could not get PACKET_RXTX_QPAIRS_SPLIT on AF_PACKET "
+			"socket for %d, errno = %d\n", sockfd, errno);
+		return -1;
+	}
+
+	*nb_qp    = qpairs_info.tp_qpairs_num;
+	*qp_start = qpairs_info.tp_qpairs_start_from;
+
+	RTE_LOG(INFO, EAL,
+		"kernel driver allocates queue pairs from %u to %u\n",
+		qpairs_info.tp_qpairs_start_from,
+		qpairs_info.tp_qpairs_start_from +
+		qpairs_info.tp_qpairs_num - 1);
+
+	return 0;
+}
+
+/* retire queue pairs back */
+void
+rte_eal_bifurc_retire(int sockfd, uint32_t nb_qp, uint32_t qp_start)
+{
+	struct tpacket_dev_qpairs_info qpairs_info;
+	int ret;
+
+	/* return queues to kernel driver */
+	qpairs_info.tp_qpairs_start_from = qp_start;
+	qpairs_info.tp_qpairs_num        = nb_qp;
+	ret = setsockopt(sockfd, SOL_PACKET, PACKET_RXTX_QPAIRS_RETURN,
+			 &qpairs_info, sizeof(qpairs_info));
+	if (ret == -1)
+		RTE_LOG(ERR, EAL,
+			"could not set PACKET_RXTX_QPAIRS_RETURN on AF_PACKET "
+			"socket %d for queue pairs from %d to %d\n",
+			sockfd, qp_start, qp_start + nb_qp - 1);
+}
+
+/*
+ * Opens an AF_PACKET socket
+ */
+int
+rte_eal_bifurc_open(int *sockfd)
+{
+	/* Open an AF_PACKET socket... */
+	if (sockfd)
+		*sockfd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
+
+	if (!sockfd || *sockfd == -1) {
+		RTE_LOG(ERR, EAL, "Could not open AF_PACKET socket\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+int
+rte_eal_bifurc_bind(int sockfd, int if_index)
+{
+	struct sockaddr_ll sockaddr;
+	int ret;
+
+	/* af_packet bind iface with sockfd */
+	memset(&sockaddr, 0, sizeof(sockaddr));
+	sockaddr.sll_family = AF_PACKET;
+	sockaddr.sll_protocol = htons(ETH_P_ALL);
+	sockaddr.sll_ifindex = if_index;
+	ret = bind(sockfd, (const struct sockaddr *)&sockaddr,
+		   sizeof(sockaddr));
+	if (ret == -1) {
+		RTE_LOG(ERR, EAL, "could not bind AF_PACKET socket to %d\n",
+			if_index);
+		return ret;
+	}
+
+	return 0;
+}
+
+int
+rte_eal_bifurc_set_pci(int sockfd, struct rte_pci_device *pci_dev)
+{
+	struct tpacket_dev_info dev_info;
+	socklen_t optlen;
+	int ret;
+
+	/* request for desc info */
+	optlen = sizeof(struct tpacket_dev_info);
+	ret = getsockopt(sockfd, SOL_PACKET, PACKET_DEV_DESC_INFO,
+			 &dev_info, &optlen);
+	if (ret == -1) {
+		RTE_LOG(ERR, EAL,
+			"could not get PACKET_DEV_DESC_INFO on AF_PACKET "
+			"socket for %d, errno = %d\n", sockfd, errno);
+		return -1;
+	}
+
+	RTE_LOG(INFO, EAL,
+		"vendorid = 0x%x, deviceid = 0x%x, "
+		"num of qpairs = %d, insue qpairs = %d\n",
+		 dev_info.tp_vendor_id, dev_info.tp_device_id,
+		 dev_info.tp_num_total_qpairs, dev_info.tp_num_inuse_qpairs);
+
+	/* pci_dev update and mmap configure space */
+	memset(pci_dev, 0, sizeof(*pci_dev));
+	pci_dev->id.vendor_id = dev_info.tp_vendor_id;
+	pci_dev->id.device_id = dev_info.tp_device_id;
+	pci_dev->id.subsystem_vendor_id = PCI_ANY_ID;
+	pci_dev->id.subsystem_device_id = PCI_ANY_ID;
+	pci_dev->numa_node = dev_info.tp_numa_node;
+
+	return 0;
+}
+
+struct rte_devargs *
+rte_eal_bifurc_get_devargs(const char *drv_name, const char *args)
+{
+	struct rte_devargs *devargs;
+	int ret;
+
+	devargs = malloc(sizeof(*devargs));
+	if (devargs == NULL) {
+		RTE_LOG(ERR, EAL, "cannot allocate devargs\n");
+		return NULL;
+	}
+	memset(devargs, 0, sizeof(*devargs));
+	devargs->type = RTE_DEVTYPE_VIRTUAL;
+
+	ret = snprintf(devargs->virtual.drv_name,
+		       sizeof(devargs->virtual.drv_name), "%s", drv_name);
+	if (ret < 0 || ret >= (int)sizeof(devargs->virtual.drv_name)) {
+		RTE_LOG(ERR, EAL,
+			"driver name too large: <%s>\n", drv_name);
+		free(devargs);
+		return NULL;
+	}
+
+	ret = snprintf(devargs->args, sizeof(devargs->args), "%s", args);
+	if (ret < 0 || ret >= (int)sizeof(devargs->args)) {
+		RTE_LOG(ERR, EAL,
+			"driver args too large: <%s>\n", args);
+		free(devargs);
+		return NULL;
+	}
+	return devargs;
+}
+
+void
+rte_eal_bifurc_put_devargs(struct rte_devargs *devargs)
+{
+	if (devargs != NULL)
+		free(devargs);
+}