new file mode 100644
@@ -0,0 +1,249 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Intel Corporation
+ */
+
+#ifndef _GVE_H_
+#define _GVE_H_
+
+#include <ethdev_driver.h>
+#include <ethdev_pci.h>
+#include <rte_ether.h>
+
+#include "gve_desc.h"
+
+#ifndef GOOGLE_VENDOR_ID
+#define GOOGLE_VENDOR_ID 0x1ae0
+#endif
+
+#define GVE_DEV_ID 0x0042
+
+#define GVE_REG_BAR 0
+#define GVE_DB_BAR 2
+
+/* 1 for management, 1 for rx, 1 for tx */
+#define GVE_MIN_MSIX 3
+
+/* PTYPEs are always 10 bits. */
+#define GVE_NUM_PTYPES 1024
+
+/* A list of pages registered with the device during setup and used by a queue
+ * as buffers
+ */
+struct gve_queue_page_list {
+ uint32_t id; /* unique id */
+ uint32_t num_entries;
+ dma_addr_t *page_buses; /* the dma addrs of the pages */
+ const struct rte_memzone *mz;
+};
+
+/* A TX desc ring entry */
+union gve_tx_desc {
+ struct gve_tx_pkt_desc pkt; /* first desc for a packet */
+ struct gve_tx_seg_desc seg; /* subsequent descs for a packet */
+};
+
+struct gve_tx_queue {
+ volatile union gve_tx_desc *tx_desc_ring;
+ const struct rte_memzone *mz;
+ uint64_t tx_ring_phys_addr;
+
+ uint16_t nb_tx_desc;
+
+ /* Only valid for DQO_QPL queue format */
+ struct gve_queue_page_list *qpl;
+
+ uint16_t port_id;
+ uint16_t queue_id;
+
+ uint16_t ntfy_id;
+ volatile rte_be32_t *ntfy_addr;
+
+ struct gve_priv *hw;
+ const struct rte_memzone *qres_mz;
+ struct gve_queue_resources *qres;
+
+ /* Only valid for DQO_RDA queue format */
+ struct gve_tx_queue *complq;
+};
+
+struct gve_rx_queue {
+ volatile struct gve_rx_desc *rx_desc_ring;
+ volatile union gve_rx_data_slot *rx_data_ring;
+ const struct rte_memzone *mz;
+ const struct rte_memzone *data_mz;
+ uint64_t rx_ring_phys_addr;
+
+ uint16_t nb_rx_desc;
+
+ volatile rte_be32_t *ntfy_addr;
+
+ /* only valid for GQI_QPL queue format */
+ struct gve_queue_page_list *qpl;
+
+ struct gve_priv *hw;
+ const struct rte_memzone *qres_mz;
+ struct gve_queue_resources *qres;
+
+ uint16_t port_id;
+ uint16_t queue_id;
+ uint16_t ntfy_id;
+ uint16_t rx_buf_len;
+
+ /* Only valid for DQO_RDA queue format */
+ struct gve_rx_queue *bufq;
+};
+
+struct gve_irq_db {
+ rte_be32_t id;
+} ____cacheline_aligned;
+
+struct gve_ptype {
+ uint8_t l3_type; /* `gve_l3_type` in gve_adminq.h */
+ uint8_t l4_type; /* `gve_l4_type` in gve_adminq.h */
+};
+
+struct gve_ptype_lut {
+ struct gve_ptype ptypes[GVE_NUM_PTYPES];
+};
+
+enum gve_queue_format {
+ GVE_QUEUE_FORMAT_UNSPECIFIED = 0x0, /* default unspecified */
+ GVE_GQI_RDA_FORMAT = 0x1, /* GQI Raw Addressing */
+ GVE_GQI_QPL_FORMAT = 0x2, /* GQI Queue Page List */
+ GVE_DQO_RDA_FORMAT = 0x3, /* DQO Raw Addressing */
+};
+
+struct gve_priv {
+ struct gve_irq_db *irq_dbs; /* array of num_ntfy_blks */
+ const struct rte_memzone *irq_dbs_mz;
+ uint32_t mgmt_msix_idx;
+ rte_be32_t *cnt_array; /* array of num_event_counters */
+ const struct rte_memzone *cnt_array_mz;
+
+ uint16_t num_event_counters;
+ uint16_t tx_desc_cnt; /* txq size */
+ uint16_t rx_desc_cnt; /* rxq size */
+ uint16_t tx_pages_per_qpl; /* tx buffer length */
+ uint16_t rx_data_slot_cnt; /* rx buffer length */
+
+ /* Only valid for DQO_RDA queue format */
+ uint16_t tx_compq_size; /* tx completion queue size */
+ uint16_t rx_bufq_size; /* rx buff queue size */
+
+ uint64_t max_registered_pages;
+ uint64_t num_registered_pages; /* num pages registered with NIC */
+ uint16_t default_num_queues; /* default num queues to set up */
+ enum gve_queue_format queue_format; /* see enum gve_queue_format */
+ uint8_t enable_lsc;
+
+ uint16_t max_nb_txq;
+ uint16_t max_nb_rxq;
+ uint32_t num_ntfy_blks; /* spilt between TX and RX so must be even */
+
+ struct gve_registers __iomem *reg_bar0; /* see gve_register.h */
+ rte_be32_t __iomem *db_bar2; /* "array" of doorbells */
+ struct rte_pci_device *pci_dev;
+
+ /* Admin queue - see gve_adminq.h*/
+ union gve_adminq_command *adminq;
+ struct gve_dma_mem adminq_dma_mem;
+ uint32_t adminq_mask; /* masks prod_cnt to adminq size */
+ uint32_t adminq_prod_cnt; /* free-running count of AQ cmds executed */
+ uint32_t adminq_cmd_fail; /* free-running count of AQ cmds failed */
+ uint32_t adminq_timeouts; /* free-running count of AQ cmds timeouts */
+ /* free-running count of per AQ cmd executed */
+ uint32_t adminq_describe_device_cnt;
+ uint32_t adminq_cfg_device_resources_cnt;
+ uint32_t adminq_register_page_list_cnt;
+ uint32_t adminq_unregister_page_list_cnt;
+ uint32_t adminq_create_tx_queue_cnt;
+ uint32_t adminq_create_rx_queue_cnt;
+ uint32_t adminq_destroy_tx_queue_cnt;
+ uint32_t adminq_destroy_rx_queue_cnt;
+ uint32_t adminq_dcfg_device_resources_cnt;
+ uint32_t adminq_set_driver_parameter_cnt;
+ uint32_t adminq_report_stats_cnt;
+ uint32_t adminq_report_link_speed_cnt;
+ uint32_t adminq_get_ptype_map_cnt;
+
+ volatile uint32_t state_flags;
+
+ /* Gvnic device link speed from hypervisor. */
+ uint64_t link_speed;
+
+ uint16_t max_mtu;
+ struct rte_ether_addr dev_addr; /* mac address */
+
+ struct gve_queue_page_list *qpl;
+
+ struct gve_tx_queue **txqs;
+ struct gve_rx_queue **rxqs;
+};
+
+enum gve_state_flags_bit {
+ GVE_PRIV_FLAGS_ADMIN_QUEUE_OK = 1,
+ GVE_PRIV_FLAGS_DEVICE_RESOURCES_OK = 2,
+ GVE_PRIV_FLAGS_DEVICE_RINGS_OK = 3,
+ GVE_PRIV_FLAGS_NAPI_ENABLED = 4,
+};
+
+static inline bool gve_is_gqi(struct gve_priv *priv)
+{
+ return priv->queue_format == GVE_GQI_RDA_FORMAT ||
+ priv->queue_format == GVE_GQI_QPL_FORMAT;
+}
+
+static inline bool gve_get_admin_queue_ok(struct gve_priv *priv)
+{
+ return !!rte_bit_relaxed_get32(GVE_PRIV_FLAGS_ADMIN_QUEUE_OK,
+ &priv->state_flags);
+}
+
+static inline void gve_set_admin_queue_ok(struct gve_priv *priv)
+{
+ rte_bit_relaxed_set32(GVE_PRIV_FLAGS_ADMIN_QUEUE_OK,
+ &priv->state_flags);
+}
+
+static inline void gve_clear_admin_queue_ok(struct gve_priv *priv)
+{
+ rte_bit_relaxed_clear32(GVE_PRIV_FLAGS_ADMIN_QUEUE_OK,
+ &priv->state_flags);
+}
+
+static inline bool gve_get_device_resources_ok(struct gve_priv *priv)
+{
+ return !!rte_bit_relaxed_get32(GVE_PRIV_FLAGS_DEVICE_RESOURCES_OK,
+ &priv->state_flags);
+}
+
+static inline void gve_set_device_resources_ok(struct gve_priv *priv)
+{
+ rte_bit_relaxed_set32(GVE_PRIV_FLAGS_DEVICE_RESOURCES_OK,
+ &priv->state_flags);
+}
+
+static inline void gve_clear_device_resources_ok(struct gve_priv *priv)
+{
+ rte_bit_relaxed_clear32(GVE_PRIV_FLAGS_DEVICE_RESOURCES_OK,
+ &priv->state_flags);
+}
+
+static inline bool gve_get_device_rings_ok(struct gve_priv *priv)
+{
+ return !!rte_bit_relaxed_get32(GVE_PRIV_FLAGS_DEVICE_RINGS_OK,
+ &priv->state_flags);
+}
+
+static inline void gve_set_device_rings_ok(struct gve_priv *priv)
+{
+ rte_bit_relaxed_set32(GVE_PRIV_FLAGS_DEVICE_RINGS_OK,
+ &priv->state_flags);
+}
+
+static inline void gve_clear_device_rings_ok(struct gve_priv *priv)
+{
+ rte_bit_relaxed_clear32(GVE_PRIV_FLAGS_DEVICE_RINGS_OK,
+ &priv->state_flags);
+}
+#endif /* _GVE_H_ */
@@ -5,6 +5,7 @@
* Copyright(C) 2022 Intel Corporation
*/
+#include "gve.h"
#include "gve_adminq.h"
#include "gve_register.h"
new file mode 100644
@@ -0,0 +1,375 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Intel Corporation
+ */
+#include <linux/pci_regs.h>
+
+#include "gve.h"
+#include "gve_adminq.h"
+#include "gve_register.h"
+
+#define GVE_VERSION "1.3.0"
+#define GVE_VERSION_PREFIX "GVE-"
+
+const char gve_version_str[] = GVE_VERSION;
+static const char gve_version_prefix[] = GVE_VERSION_PREFIX;
+
+static void
+gve_write_version(uint8_t *driver_version_register)
+{
+ const char *c = gve_version_prefix;
+
+ while (*c) {
+ writeb(*c, driver_version_register);
+ c++;
+ }
+
+ c = gve_version_str;
+ while (*c) {
+ writeb(*c, driver_version_register);
+ c++;
+ }
+ writeb('\n', driver_version_register);
+}
+
+static int
+gve_dev_configure(__rte_unused struct rte_eth_dev *dev)
+{
+ return 0;
+}
+
+static int
+gve_dev_start(struct rte_eth_dev *dev)
+{
+ dev->data->dev_started = 1;
+
+ return 0;
+}
+
+static int
+gve_dev_stop(struct rte_eth_dev *dev)
+{
+ dev->data->dev_link.link_status = RTE_ETH_LINK_DOWN;
+ dev->data->dev_started = 0;
+
+ return 0;
+}
+
+static int
+gve_dev_close(struct rte_eth_dev *dev)
+{
+ int err = 0;
+
+ if (dev->data->dev_started) {
+ err = gve_dev_stop(dev);
+ if (err != 0)
+ PMD_DRV_LOG(ERR, "Failed to stop dev.");
+ }
+
+ return err;
+}
+
+static const struct eth_dev_ops gve_eth_dev_ops = {
+ .dev_configure = gve_dev_configure,
+ .dev_start = gve_dev_start,
+ .dev_stop = gve_dev_stop,
+ .dev_close = gve_dev_close,
+};
+
+static void
+gve_free_counter_array(struct gve_priv *priv)
+{
+ rte_memzone_free(priv->cnt_array_mz);
+ priv->cnt_array = NULL;
+}
+
+static void
+gve_free_irq_db(struct gve_priv *priv)
+{
+ rte_memzone_free(priv->irq_dbs_mz);
+ priv->irq_dbs = NULL;
+}
+
+static void
+gve_teardown_device_resources(struct gve_priv *priv)
+{
+ int err;
+
+ /* Tell device its resources are being freed */
+ if (gve_get_device_resources_ok(priv)) {
+ err = gve_adminq_deconfigure_device_resources(priv);
+ if (err)
+ PMD_DRV_LOG(ERR, "Could not deconfigure device resources: err=%d\n", err);
+ }
+ gve_free_counter_array(priv);
+ gve_free_irq_db(priv);
+ gve_clear_device_resources_ok(priv);
+}
+
+static uint8_t
+pci_dev_find_capability(struct rte_pci_device *pdev, int cap)
+{
+ uint8_t pos, id;
+ uint16_t ent;
+ int loops;
+ int ret;
+
+ ret = rte_pci_read_config(pdev, &pos, sizeof(pos), PCI_CAPABILITY_LIST);
+ if (ret != sizeof(pos))
+ return 0;
+
+ loops = (PCI_CFG_SPACE_SIZE - PCI_STD_HEADER_SIZEOF) / PCI_CAP_SIZEOF;
+
+ while (pos && loops--) {
+ ret = rte_pci_read_config(pdev, &ent, sizeof(ent), pos);
+ if (ret != sizeof(ent))
+ return 0;
+
+ id = ent & 0xff;
+ if (id == 0xff)
+ break;
+
+ if (id == cap)
+ return pos;
+
+ pos = (ent >> 8);
+ }
+
+ return 0;
+}
+
+static int
+pci_dev_msix_vec_count(struct rte_pci_device *pdev)
+{
+ uint8_t msix_cap = pci_dev_find_capability(pdev, PCI_CAP_ID_MSIX);
+ uint16_t control;
+ int ret;
+
+ if (!msix_cap)
+ return 0;
+
+ ret = rte_pci_read_config(pdev, &control, sizeof(control), msix_cap + PCI_MSIX_FLAGS);
+ if (ret != sizeof(control))
+ return 0;
+
+ return (control & PCI_MSIX_FLAGS_QSIZE) + 1;
+}
+
+static int
+gve_setup_device_resources(struct gve_priv *priv)
+{
+ char z_name[RTE_MEMZONE_NAMESIZE];
+ const struct rte_memzone *mz;
+ int err = 0;
+
+ snprintf(z_name, sizeof(z_name), "gve_%s_cnt_arr", priv->pci_dev->device.name);
+ mz = rte_memzone_reserve_aligned(z_name,
+ priv->num_event_counters * sizeof(*priv->cnt_array),
+ rte_socket_id(), RTE_MEMZONE_IOVA_CONTIG,
+ PAGE_SIZE);
+ if (mz == NULL) {
+ PMD_INIT_LOG(ERR, "Could not alloc memzone for count array");
+ return -ENOMEM;
+ }
+ priv->cnt_array = (rte_be32_t *)mz->addr;
+ priv->cnt_array_mz = mz;
+
+ snprintf(z_name, sizeof(z_name), "gve_%s_irqmz", priv->pci_dev->device.name);
+ mz = rte_memzone_reserve_aligned(z_name,
+ sizeof(*priv->irq_dbs) * (priv->num_ntfy_blks),
+ rte_socket_id(), RTE_MEMZONE_IOVA_CONTIG,
+ PAGE_SIZE);
+ if (mz == NULL) {
+ PMD_INIT_LOG(ERR, "Could not alloc memzone for irq_dbs");
+ err = -ENOMEM;
+ goto free_cnt_array;
+ }
+ priv->irq_dbs = (struct gve_irq_db *)mz->addr;
+ priv->irq_dbs_mz = mz;
+
+ err = gve_adminq_configure_device_resources(priv,
+ priv->cnt_array_mz->iova,
+ priv->num_event_counters,
+ priv->irq_dbs_mz->iova,
+ priv->num_ntfy_blks);
+ if (unlikely(err)) {
+ PMD_INIT_LOG(ERR, "Could not config device resources: err=%d", err);
+ goto free_irq_dbs;
+ }
+ return 0;
+
+free_irq_dbs:
+ gve_free_irq_db(priv);
+free_cnt_array:
+ gve_free_counter_array(priv);
+
+ return err;
+}
+
+static int
+gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
+{
+ int num_ntfy;
+ int err;
+
+ /* Set up the adminq */
+ err = gve_adminq_alloc(priv);
+ if (err) {
+ PMD_INIT_LOG(ERR, "Failed to alloc admin queue: err=%d", err);
+ return err;
+ }
+
+ if (skip_describe_device)
+ goto setup_device;
+
+ /* Get the initial information we need from the device */
+ err = gve_adminq_describe_device(priv);
+ if (err) {
+ PMD_INIT_LOG(ERR, "Could not get device information: err=%d", err);
+ goto free_adminq;
+ }
+
+ num_ntfy = pci_dev_msix_vec_count(priv->pci_dev);
+ if (num_ntfy <= 0) {
+ PMD_DRV_LOG(ERR, "Could not count MSI-x vectors");
+ err = -EIO;
+ goto free_adminq;
+ } else if (num_ntfy < GVE_MIN_MSIX) {
+ PMD_DRV_LOG(ERR, "GVE needs at least %d MSI-x vectors, but only has %d",
+ GVE_MIN_MSIX, num_ntfy);
+ err = -EINVAL;
+ goto free_adminq;
+ }
+
+ priv->num_registered_pages = 0;
+
+ /* gvnic has one Notification Block per MSI-x vector, except for the
+ * management vector
+ */
+ priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1;
+ priv->mgmt_msix_idx = priv->num_ntfy_blks;
+
+ priv->max_nb_txq = RTE_MIN(priv->max_nb_txq, priv->num_ntfy_blks / 2);
+ priv->max_nb_rxq = RTE_MIN(priv->max_nb_rxq, priv->num_ntfy_blks / 2);
+
+ if (priv->default_num_queues > 0) {
+ priv->max_nb_txq = RTE_MIN(priv->default_num_queues, priv->max_nb_txq);
+ priv->max_nb_rxq = RTE_MIN(priv->default_num_queues, priv->max_nb_rxq);
+ }
+
+ PMD_DRV_LOG(INFO, "Max TX queues %d, Max RX queues %d",
+ priv->max_nb_txq, priv->max_nb_rxq);
+
+setup_device:
+ err = gve_setup_device_resources(priv);
+ if (!err)
+ return 0;
+free_adminq:
+ gve_adminq_free(priv);
+ return err;
+}
+
+static void
+gve_teardown_priv_resources(struct gve_priv *priv)
+{
+ gve_teardown_device_resources(priv);
+ gve_adminq_free(priv);
+}
+
+static int
+gve_dev_init(struct rte_eth_dev *eth_dev)
+{
+ struct gve_priv *priv = eth_dev->data->dev_private;
+ int max_tx_queues, max_rx_queues;
+ struct rte_pci_device *pci_dev;
+ struct gve_registers *reg_bar;
+ rte_be32_t *db_bar;
+ int err;
+
+ eth_dev->dev_ops = &gve_eth_dev_ops;
+
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+ return 0;
+
+ pci_dev = RTE_DEV_TO_PCI(eth_dev->device);
+
+ reg_bar = pci_dev->mem_resource[GVE_REG_BAR].addr;
+ if (!reg_bar) {
+ PMD_INIT_LOG(ERR, "Failed to map pci bar!\n");
+ return -ENOMEM;
+ }
+
+ db_bar = pci_dev->mem_resource[GVE_DB_BAR].addr;
+ if (!db_bar) {
+ PMD_INIT_LOG(ERR, "Failed to map doorbell bar!\n");
+ return -ENOMEM;
+ }
+
+ gve_write_version(®_bar->driver_version);
+ /* Get max queues to alloc etherdev */
+ max_tx_queues = ioread32be(®_bar->max_tx_queues);
+ max_rx_queues = ioread32be(®_bar->max_rx_queues);
+
+ priv->reg_bar0 = reg_bar;
+ priv->db_bar2 = db_bar;
+ priv->pci_dev = pci_dev;
+ priv->state_flags = 0x0;
+
+ priv->max_nb_txq = max_tx_queues;
+ priv->max_nb_rxq = max_rx_queues;
+
+ err = gve_init_priv(priv, false);
+ if (err)
+ return err;
+
+ eth_dev->data->mac_addrs = rte_zmalloc("gve_mac", sizeof(struct rte_ether_addr), 0);
+ if (!eth_dev->data->mac_addrs) {
+ PMD_INIT_LOG(ERR, "Failed to allocate memory to store mac address");
+ return -ENOMEM;
+ }
+ rte_ether_addr_copy(&priv->dev_addr, eth_dev->data->mac_addrs);
+
+ return 0;
+}
+
+static int
+gve_dev_uninit(struct rte_eth_dev *eth_dev)
+{
+ struct gve_priv *priv = eth_dev->data->dev_private;
+
+ eth_dev->data->mac_addrs = NULL;
+
+ gve_teardown_priv_resources(priv);
+
+ return 0;
+}
+
+static int
+gve_pci_probe(__rte_unused struct rte_pci_driver *pci_drv,
+ struct rte_pci_device *pci_dev)
+{
+ return rte_eth_dev_pci_generic_probe(pci_dev, sizeof(struct gve_priv), gve_dev_init);
+}
+
+static int
+gve_pci_remove(struct rte_pci_device *pci_dev)
+{
+ return rte_eth_dev_pci_generic_remove(pci_dev, gve_dev_uninit);
+}
+
+static const struct rte_pci_id pci_id_gve_map[] = {
+ { RTE_PCI_DEVICE(GOOGLE_VENDOR_ID, GVE_DEV_ID) },
+ { .device_id = 0 },
+};
+
+static struct rte_pci_driver rte_gve_pmd = {
+ .id_table = pci_id_gve_map,
+ .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
+ .probe = gve_pci_probe,
+ .remove = gve_pci_remove,
+};
+
+RTE_PMD_REGISTER_PCI(net_gve, rte_gve_pmd);
+RTE_PMD_REGISTER_PCI_TABLE(net_gve, pci_id_gve_map);
+RTE_PMD_REGISTER_KMOD_DEP(net_gve, "* igb_uio | vfio-pci");
+RTE_LOG_REGISTER_SUFFIX(gve_logtype_init, init, NOTICE);
+RTE_LOG_REGISTER_SUFFIX(gve_logtype_driver, driver, NOTICE);
new file mode 100644
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(C) 2022 Intel Corporation
+
+if is_windows
+ build = false
+ reason = 'not supported on Windows'
+ subdir_done()
+endif
+
+sources = files(
+ 'gve_adminq.c',
+ 'gve_ethdev.c',
+)
new file mode 100644
@@ -0,0 +1,3 @@
+DPDK_22 {
+ local: *;
+};
@@ -23,6 +23,7 @@ drivers = [
'enic',
'failsafe',
'fm10k',
+ 'gve',
'hinic',
'hns3',
'i40e',