[dpdk-dev,PATCHv4,1/9] bonding: rss dynamic configuration

Message ID 1436981189-3320-2-git-send-email-tomaszx.kulasek@intel.com (mailing list archive)
State Superseded, archived
Headers

Commit Message

Tomasz Kulasek July 15, 2015, 5:26 p.m. UTC
Bonding device implements independent management of RSS settings. It
stores its own copies of settings i.e. RETA, RSS hash function and RSS
key. It’s required to ensure consistency.

1) RSS hash function set for bonding device is maximal set of RSS hash
functions supported by all bonded devices. That mean, to have RSS support
for bonding, all slaves should be RSS-capable.

2) RSS key is propagated over the slaves "as is".

3) RETA for bonding is an internal table managed by bonding API, and is
used as a pattern to set up slaves. Its size is GCD of all RETA sizes, so
it can be easily used as a pattern providing expected behavior, even if
slaves RETA sizes are different.

Signed-off-by: Tomasz Kulasek <tomaszx.kulasek@intel.com>
---
 drivers/net/bonding/rte_eth_bond_api.c     |   28 ++++
 drivers/net/bonding/rte_eth_bond_pmd.c     |  205 ++++++++++++++++++++++++++--
 drivers/net/bonding/rte_eth_bond_private.h |   12 ++
 3 files changed, 231 insertions(+), 14 deletions(-)
  

Patch

diff --git a/drivers/net/bonding/rte_eth_bond_api.c b/drivers/net/bonding/rte_eth_bond_api.c
index d810ec4..22eb575 100644
--- a/drivers/net/bonding/rte_eth_bond_api.c
+++ b/drivers/net/bonding/rte_eth_bond_api.c
@@ -303,6 +303,9 @@  rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id)
 	internals->rx_offload_capa = 0;
 	internals->tx_offload_capa = 0;
 
+	/* Initially allow to choose any offload type */
+	internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
+
 	memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
 	memset(internals->slaves, 0, sizeof(internals->slaves));
 
@@ -366,6 +369,11 @@  __eth_bond_slave_add_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id)
 
 	rte_eth_dev_info_get(slave_port_id, &dev_info);
 
+	/* We need to store slaves reta_size to be able to synchronize RETA for all
+	 * slave devices even if its sizes are different.
+	 */
+	internals->slaves[internals->slave_count].reta_size = dev_info.reta_size;
+
 	if (internals->slave_count < 1) {
 		/* if MAC is not user defined then use MAC of first slave add to
 		 * bonded device */
@@ -379,9 +387,16 @@  __eth_bond_slave_add_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id)
 		/* Make primary slave */
 		internals->primary_port = slave_port_id;
 
+		/* Inherit queues settings from first slave */
+		internals->nb_rx_queues = slave_eth_dev->data->nb_rx_queues;
+		internals->nb_tx_queues = slave_eth_dev->data->nb_tx_queues;
+
+		internals->reta_size = dev_info.reta_size;
+
 		/* Take the first dev's offload capabilities */
 		internals->rx_offload_capa = dev_info.rx_offload_capa;
 		internals->tx_offload_capa = dev_info.tx_offload_capa;
+		internals->flow_type_rss_offloads = dev_info.flow_type_rss_offloads;
 
 	} else {
 		/* Check slave link properties are supported if props are set,
@@ -400,8 +415,19 @@  __eth_bond_slave_add_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id)
 		}
 		internals->rx_offload_capa &= dev_info.rx_offload_capa;
 		internals->tx_offload_capa &= dev_info.tx_offload_capa;
+		internals->flow_type_rss_offloads &= dev_info.flow_type_rss_offloads;
+
+		/* RETA size is GCD of all slaves RETA sizes, so, if all sizes will be
+		 * the power of 2, the lower one is GCD
+		 */
+		if (internals->reta_size > dev_info.reta_size)
+			internals->reta_size = dev_info.reta_size;
+
 	}
 
+	bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf &=
+			internals->flow_type_rss_offloads;
+
 	internals->slave_count++;
 
 	/* Update all slave devices MACs*/
@@ -528,6 +554,8 @@  __eth_bond_slave_remove_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id)
 	if (internals->slave_count == 0) {
 		internals->rx_offload_capa = 0;
 		internals->tx_offload_capa = 0;
+		internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
+		internals->reta_size = 0;
 	}
 	return 0;
 }
diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c b/drivers/net/bonding/rte_eth_bond_pmd.c
index 989e878..cd23f42 100644
--- a/drivers/net/bonding/rte_eth_bond_pmd.c
+++ b/drivers/net/bonding/rte_eth_bond_pmd.c
@@ -1310,6 +1310,23 @@  slave_configure(struct rte_eth_dev *bonded_eth_dev,
 	if (slave_eth_dev->driver->pci_drv.drv_flags & RTE_PCI_DRV_INTR_LSC)
 		slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
 
+	/* If RSS is enabled for bonding, try to enable it for slaves  */
+	if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
+		if (bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len
+				!= 0) {
+			slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
+					bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
+			slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
+					bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
+		} else {
+			slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
+		}
+
+		slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
+				bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
+		slave_eth_dev->data->dev_conf.rxmode.mq_mode |= ETH_MQ_RX_RSS;
+	}
+
 	/* Configure device */
 	errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
 			bonded_eth_dev->data->nb_rx_queues,
@@ -1361,6 +1378,30 @@  slave_configure(struct rte_eth_dev *bonded_eth_dev,
 		return -1;
 	}
 
+	/* If RSS is enabled for bonding, synchronize RETA */
+	if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
+		int i;
+		struct bond_dev_private *internals;
+
+		internals = bonded_eth_dev->data->dev_private;
+
+		for (i = 0; i < internals->slave_count; i++) {
+			if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
+				errval = rte_eth_dev_rss_reta_update(
+						slave_eth_dev->data->port_id,
+						&internals->reta_conf[0],
+						internals->slaves[i].reta_size);
+				if (errval != 0) {
+					RTE_LOG(WARNING, PMD,
+							"rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
+							" RSS Configuration for bonding may be inconsistent.\n",
+							slave_eth_dev->data->port_id, errval);
+				}
+				break;
+			}
+		}
+	}
+
 	/* If lsc interrupt is set, check initial slave's link status */
 	if (slave_eth_dev->driver->pci_drv.drv_flags & RTE_PCI_DRV_INTR_LSC)
 		bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
@@ -1578,6 +1619,9 @@  bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 
 	dev_info->rx_offload_capa = internals->rx_offload_capa;
 	dev_info->tx_offload_capa = internals->tx_offload_capa;
+	dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
+
+	dev_info->reta_size = internals->reta_size;
 }
 
 static int
@@ -1959,21 +2003,132 @@  bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
 	}
 }
 
+static int
+bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
+		struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
+{
+	unsigned i, j;
+	int result = 0;
+	int slave_reta_size;
+	unsigned reta_count;
+	struct bond_dev_private *internals = dev->data->dev_private;
+
+	if (reta_size != internals->reta_size)
+		return -EINVAL;
+
+	 /* Copy RETA table */
+	reta_count = reta_size / RTE_RETA_GROUP_SIZE;
+
+	for (i = 0; i < reta_count; i++) {
+		internals->reta_conf[i].mask = reta_conf[i].mask;
+		for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
+			if ((reta_conf[i].mask >> j) & 0x01)
+				internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
+	}
+
+	/* Fill rest of array */
+	for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
+		memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
+				sizeof(internals->reta_conf[0]) * reta_count);
+
+	/* Propagate RETA over slaves */
+	for (i = 0; i < internals->slave_count; i++) {
+		slave_reta_size = internals->slaves[i].reta_size;
+		result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
+				&internals->reta_conf[0], slave_reta_size);
+		if (result < 0)
+			return result;
+	}
+
+	return 0;
+}
+
+static int
+bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
+		struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
+{
+	int i, j;
+	struct bond_dev_private *internals = dev->data->dev_private;
+
+	if (reta_size != internals->reta_size)
+		return -EINVAL;
+
+	 /* Copy RETA table */
+	for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
+		for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
+			if ((reta_conf[i].mask >> j) & 0x01)
+				reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
+
+	return 0;
+}
+
+static int
+bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
+		struct rte_eth_rss_conf *rss_conf)
+{
+	int i, result = 0;
+	struct bond_dev_private *internals = dev->data->dev_private;
+	struct rte_eth_rss_conf bond_rss_conf;
+
+	memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
+
+	bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
+
+	if (bond_rss_conf.rss_hf != 0)
+		dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
+
+	if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
+			sizeof(internals->rss_key)) {
+		if (bond_rss_conf.rss_key_len == 0)
+			bond_rss_conf.rss_key_len = 40;
+		internals->rss_key_len = bond_rss_conf.rss_key_len;
+		memcpy(internals->rss_key, bond_rss_conf.rss_key,
+				internals->rss_key_len);
+	}
+
+	for (i = 0; i < internals->slave_count; i++) {
+		result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
+				&bond_rss_conf);
+		if (result < 0)
+			return result;
+	}
+
+	return 0;
+}
+
+static int
+bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
+		struct rte_eth_rss_conf *rss_conf)
+{
+	struct bond_dev_private *internals = dev->data->dev_private;
+
+	rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
+	rss_conf->rss_key_len = internals->rss_key_len;
+	if (rss_conf->rss_key)
+		memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
+
+	return 0;
+}
+
 struct eth_dev_ops default_dev_ops = {
-		.dev_start = bond_ethdev_start,
-		.dev_stop = bond_ethdev_stop,
-		.dev_close = bond_ethdev_close,
-		.dev_configure = bond_ethdev_configure,
-		.dev_infos_get = bond_ethdev_info,
-		.rx_queue_setup = bond_ethdev_rx_queue_setup,
-		.tx_queue_setup = bond_ethdev_tx_queue_setup,
-		.rx_queue_release = bond_ethdev_rx_queue_release,
-		.tx_queue_release = bond_ethdev_tx_queue_release,
-		.link_update = bond_ethdev_link_update,
-		.stats_get = bond_ethdev_stats_get,
-		.stats_reset = bond_ethdev_stats_reset,
-		.promiscuous_enable = bond_ethdev_promiscuous_enable,
-		.promiscuous_disable = bond_ethdev_promiscuous_disable
+		.dev_start            = bond_ethdev_start,
+		.dev_stop             = bond_ethdev_stop,
+		.dev_close            = bond_ethdev_close,
+		.dev_configure        = bond_ethdev_configure,
+		.dev_infos_get        = bond_ethdev_info,
+		.rx_queue_setup       = bond_ethdev_rx_queue_setup,
+		.tx_queue_setup       = bond_ethdev_tx_queue_setup,
+		.rx_queue_release     = bond_ethdev_rx_queue_release,
+		.tx_queue_release     = bond_ethdev_tx_queue_release,
+		.link_update          = bond_ethdev_link_update,
+		.stats_get            = bond_ethdev_stats_get,
+		.stats_reset          = bond_ethdev_stats_reset,
+		.promiscuous_enable   = bond_ethdev_promiscuous_enable,
+		.promiscuous_disable  = bond_ethdev_promiscuous_disable,
+		.reta_update          = bond_ethdev_rss_reta_update,
+		.reta_query           = bond_ethdev_rss_reta_query,
+		.rss_hash_update      = bond_ethdev_rss_hash_update,
+		.rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get
 };
 
 static int
@@ -2054,6 +2209,28 @@  bond_ethdev_configure(struct rte_eth_dev *dev)
 	int arg_count;
 	uint8_t port_id = dev - rte_eth_devices;
 
+	static const uint8_t default_rss_key[40] = {
+		0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
+		0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
+		0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
+		0xBE, 0xAC, 0x01, 0xFA
+	};
+
+	unsigned i, j;
+
+	/* If RSS is enabled, fill table and key with default values */
+	if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
+		dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = internals->rss_key;
+		dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 0;
+		memcpy(internals->rss_key, default_rss_key, 40);
+
+		for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
+			internals->reta_conf[i].mask = ~0LL;
+			for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
+				internals->reta_conf[i].reta[j] = j % dev->data->nb_rx_queues;
+		}
+	}
+
 	/*
 	 * if no kvlist, it means that this bonded device has been created
 	 * through the bonding api.
diff --git a/drivers/net/bonding/rte_eth_bond_private.h b/drivers/net/bonding/rte_eth_bond_private.h
index 45e5c65..98bb64d 100644
--- a/drivers/net/bonding/rte_eth_bond_private.h
+++ b/drivers/net/bonding/rte_eth_bond_private.h
@@ -103,6 +103,8 @@  struct bond_slave_details {
 	uint8_t last_link_status;
 	/**< Port Id of slave eth_dev */
 	struct ether_addr persisted_mac_addr;
+
+	uint16_t reta_size;
 };
 
 
@@ -155,6 +157,16 @@  struct bond_dev_private {
 	uint32_t rx_offload_capa;            /** Rx offload capability */
 	uint32_t tx_offload_capa;            /** Tx offload capability */
 
+	/** Bit mask of RSS offloads, the bit offset also means flow type */
+	uint64_t flow_type_rss_offloads;
+
+	uint16_t reta_size;
+	struct rte_eth_rss_reta_entry64 reta_conf[ETH_RSS_RETA_SIZE_512 /
+			RTE_RETA_GROUP_SIZE];
+
+	uint8_t rss_key[52];				/**< 52-byte hash key buffer. */
+	uint8_t rss_key_len;				/**< hash key length in bytes. */
+
 	struct rte_kvargs *kvlist;
 	uint8_t slave_update_idx;
 };