[dpdk-dev] ADD mode 5(tlb) to link bonding pmd

Message ID 1410948060-31098-1-git-send-email-danielx.t.mrzyglod@intel.com (mailing list archive)
State Superseded, archived
Headers

Commit Message

Daniel Mrzyglod Sept. 17, 2014, 10:01 a.m. UTC
This patch set adds support of mode 5 to link bonding pmd

This patchset depend on  Declan Doherty patch set:
http://dpdk.org/ml/archives/dev/2014-September/005069.html

Signed-off-by: Daniel Mrzyglod <danielx.t.mrzyglod@intel.com>
---
 lib/librte_pmd_bond/rte_eth_bond.h         |   23 ++++
 lib/librte_pmd_bond/rte_eth_bond_args.c    |    1 +
 lib/librte_pmd_bond/rte_eth_bond_pmd.c     |  163 +++++++++++++++++++++++++++-
 lib/librte_pmd_bond/rte_eth_bond_private.h |    5 +-
 4 files changed, 189 insertions(+), 3 deletions(-)
  

Comments

De Lara Guarch, Pablo Nov. 6, 2014, 6:56 p.m. UTC | #1
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Daniel Mrzyglod
> Sent: Wednesday, September 17, 2014 11:01 AM
> To: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH] ADD mode 5(tlb) to link bonding pmd
> 
> This patch set adds support of mode 5 to link bonding pmd
> 
> This patchset depend on  Declan Doherty patch set:
> http://dpdk.org/ml/archives/dev/2014-September/005069.html
> 
> Signed-off-by: Daniel Mrzyglod <danielx.t.mrzyglod@intel.com>
> ---
>  lib/librte_pmd_bond/rte_eth_bond.h         |   23 ++++
>  lib/librte_pmd_bond/rte_eth_bond_args.c    |    1 +
>  lib/librte_pmd_bond/rte_eth_bond_pmd.c     |  163
> +++++++++++++++++++++++++++-
>  lib/librte_pmd_bond/rte_eth_bond_private.h |    5 +-
>  4 files changed, 189 insertions(+), 3 deletions(-)
> 
> diff --git a/lib/librte_pmd_bond/rte_eth_bond.h
> b/lib/librte_pmd_bond/rte_eth_bond.h
> index bd59780..1bd76ce 100644
> --- a/lib/librte_pmd_bond/rte_eth_bond.h
> +++ b/lib/librte_pmd_bond/rte_eth_bond.h
> @@ -75,6 +75,29 @@ extern "C" {
>  /**< Broadcast (Mode 3).
>   * In this mode all transmitted packets will be transmitted on all available
>   * active slaves of the bonded. */
> +#define BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING
> 	(5)
> +/**< Broadcast (Mode 5)

Typo, should be Adaptive TLB (Mode 5).

> + * Adaptive transmit load balancing: channel bonding that
> + * does not require any special switch support.  The
> + * outgoing traffic is distributed according to the
> + * current load (computed relative to the speed) on each
> + * slave.  Incoming traffic is received by the current
> + * slave.  If the receiving slave fails, another slave
> + * takes over the MAC address of the failed receiving
> + * slave.*/
> +#define BONDING_MODE_ADAPTIVE_LOAD_BALANCING
> 	(6)
  

Patch

diff --git a/lib/librte_pmd_bond/rte_eth_bond.h b/lib/librte_pmd_bond/rte_eth_bond.h
index bd59780..1bd76ce 100644
--- a/lib/librte_pmd_bond/rte_eth_bond.h
+++ b/lib/librte_pmd_bond/rte_eth_bond.h
@@ -75,6 +75,29 @@  extern "C" {
 /**< Broadcast (Mode 3).
  * In this mode all transmitted packets will be transmitted on all available
  * active slaves of the bonded. */
+#define BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING	(5)
+/**< Broadcast (Mode 5)
+ * Adaptive transmit load balancing: channel bonding that
+ * does not require any special switch support.  The
+ * outgoing traffic is distributed according to the
+ * current load (computed relative to the speed) on each
+ * slave.  Incoming traffic is received by the current
+ * slave.  If the receiving slave fails, another slave
+ * takes over the MAC address of the failed receiving
+ * slave.*/
+#define BONDING_MODE_ADAPTIVE_LOAD_BALANCING			(6)
+/**
+ * Adaptive load balancing: includes balance-tlb plus
+ * receive load balancing (rlb) for IPV4 traffic, and
+ * does not require any special switch support.  The
+ * receive load balancing is achieved by ARP negotiation.
+ * The bonding driver intercepts the ARP Replies sent by
+ * the local system on their way out and overwrites the
+ * source hardware address with the unique hardware
+ * address of one of the slaves in the bond such that
+ * different peers use different hardware addresses for
+ * the server. */
+
 
 /* Balance Mode Transmit Policies */
 #define BALANCE_XMIT_POLICY_LAYER2		(0)
diff --git a/lib/librte_pmd_bond/rte_eth_bond_args.c b/lib/librte_pmd_bond/rte_eth_bond_args.c
index 11d9816..bb1cfae 100644
--- a/lib/librte_pmd_bond/rte_eth_bond_args.c
+++ b/lib/librte_pmd_bond/rte_eth_bond_args.c
@@ -170,6 +170,7 @@  bond_ethdev_parse_slave_mode_kvarg(const char *key __rte_unused,
 	case BONDING_MODE_ACTIVE_BACKUP:
 	case BONDING_MODE_BALANCE:
 	case BONDING_MODE_BROADCAST:
+	case BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING:
 		return 0;
 	default:
 		return -1;
diff --git a/lib/librte_pmd_bond/rte_eth_bond_pmd.c b/lib/librte_pmd_bond/rte_eth_bond_pmd.c
index 38cc1ae..9c4c174 100644
--- a/lib/librte_pmd_bond/rte_eth_bond_pmd.c
+++ b/lib/librte_pmd_bond/rte_eth_bond_pmd.c
@@ -30,7 +30,7 @@ 
  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
-
+#include <stdlib.h>
 #include <rte_mbuf.h>
 #include <rte_malloc.h>
 #include <rte_ethdev.h>
@@ -40,10 +40,14 @@ 
 #include <rte_devargs.h>
 #include <rte_kvargs.h>
 #include <rte_dev.h>
+#include <rte_alarm.h>
+#include <rte_cycles.h>
 
 #include "rte_eth_bond.h"
 #include "rte_eth_bond_private.h"
 
+#define REORDER_PERIOD_MS 10
+
 static uint16_t
 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 {
@@ -286,6 +290,141 @@  xmit_slave_hash(const struct rte_mbuf *buf, uint8_t slave_count, uint8_t policy)
 	return hash % slave_count;
 }
 
+struct bwg_slave {
+	uint64_t bwg_left_int;
+	uint64_t bwg_left_remainder;
+	uint8_t slave;
+};
+
+static int
+bandwidth_cmp(const void *a, const void *b)
+{
+	const struct bwg_slave *bwg_a = a;
+	const struct bwg_slave *bwg_b = b;
+	int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
+	int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
+			(int64_t)bwg_a->bwg_left_remainder;
+	if (diff > 0)
+		return 1;
+	else if (diff < 0)
+		return -1;
+	else if (diff2 > 0)
+		return 1;
+	else if (diff2 < 0)
+		return -1;
+	else
+		return 0;
+}
+
+static void
+bandwidth_left(int port_id, uint64_t load, uint8_t update_idx, struct bwg_slave *bwg_slave)
+{
+	struct rte_eth_link link_status;
+
+	rte_eth_link_get(port_id, &link_status);
+	uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
+	if (link_bwg == 0)
+		return;
+	link_bwg = (link_bwg * (update_idx+1) * REORDER_PERIOD_MS);
+	bwg_slave->bwg_left_int = ((link_bwg - 1000*load)) / link_bwg;
+	bwg_slave->bwg_left_remainder = ((link_bwg - 1000*load)) % link_bwg;
+}
+
+static void
+bond_ethdev_update_tlb_slave_cb(void *arg)
+{
+	struct bond_dev_private *internals = arg;
+	struct rte_eth_stats slave_stats;
+	struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
+	uint8_t slave_count;
+	uint64_t tx_bytes;
+	uint8_t update_stats = 0;
+	int8_t i;
+
+	internals->slave_update_idx++;
+
+
+	if (internals->slave_update_idx >= REORDER_PERIOD_MS)
+		update_stats = 1;
+
+	for (i = 0; i < internals->active_slave_count; i++) {
+		rte_eth_stats_get(internals->active_slaves[i], &slave_stats);
+		tx_bytes = slave_stats.obytes -
+				internals->presisted_slaves_conf[i].last_obytes;
+		bandwidth_left(internals->active_slaves[i], tx_bytes,
+				internals->slave_update_idx, &bwg_array[i]);
+		bwg_array[i].slave = internals->active_slaves[i];
+
+		if (update_stats)
+			internals->presisted_slaves_conf[i].last_obytes += tx_bytes;
+	}
+
+	if (update_stats == 1)
+		internals->slave_update_idx = 0;
+
+	slave_count = i;
+	qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
+	for (i = 0; i < slave_count; i++)
+		internals->active_slaves[i] = bwg_array[i].slave;
+
+	rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
+			(struct bond_dev_private *)internals);
+}
+
+static uint16_t
+bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+	struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
+	struct bond_dev_private *internals = bd_tx_q->dev_private;
+
+	struct rte_eth_dev *primary_port = &rte_eth_devices[internals->primary_port];
+	uint16_t num_tx_total = 0;
+	uint8_t i, j;
+
+	uint8_t num_of_slaves = internals->active_slave_count;
+	uint8_t slaves[RTE_MAX_ETHPORTS];
+
+	struct ether_hdr *ether_hdr;
+	struct ether_addr primary_slave_addr;
+	struct ether_addr active_slave_addr;
+
+	if (num_of_slaves < 1)
+		return num_tx_total;
+
+	memcpy(slaves, internals->active_slaves,
+				sizeof(internals->active_slaves[0]) * num_of_slaves);
+
+
+	ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
+
+	if (nb_pkts > 3) {
+		for (i = 0; i < 3; i++)
+			rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
+	}
+
+	for (i = 0; i < num_of_slaves; i++) {
+		ether_addr_copy(&internals->presisted_slaves_conf[slaves[i]].mac_addr,
+				&active_slave_addr);
+
+		for (j = num_tx_total; j < nb_pkts; j++) {
+			if (j + 3 < nb_pkts)
+				rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
+
+			ether_hdr = bufs[j]->pkt.data;
+			if (is_same_ether_addr(&ether_hdr->s_addr, &primary_slave_addr))
+				ether_addr_copy(&active_slave_addr, &ether_hdr->s_addr);
+		}
+
+		num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
+				bufs + num_tx_total, nb_pkts - num_tx_total);
+
+		if (num_tx_total == nb_pkts)
+			break;
+	}
+
+	return num_tx_total;
+}
+
 static uint16_t
 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
 		uint16_t nb_pkts)
@@ -495,6 +634,7 @@  mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
 		}
 		break;
 	case BONDING_MODE_ACTIVE_BACKUP:
+	case BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING:
 	default:
 		for (i = 0; i < internals->slave_count; i++) {
 			if (internals->slaves[i] == internals->current_primary_port) {
@@ -547,6 +687,10 @@  bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
 		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
 		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
 		break;
+	case BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING:
+		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
+		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
+		break;
 	default:
 		return -1;
 	}
@@ -742,7 +886,9 @@  bond_ethdev_start(struct rte_eth_dev *eth_dev)
 
 	if (internals->user_defined_primary_port)
 		bond_ethdev_primary_set(internals, internals->primary_port);
-
+	if (internals->mode == BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING) {
+		bond_ethdev_update_tlb_slave_cb(internals);
+	}
 	return 0;
 }
 
@@ -751,6 +897,10 @@  bond_ethdev_stop(struct rte_eth_dev *eth_dev)
 {
 	struct bond_dev_private *internals = eth_dev->data->dev_private;
 
+	if (internals->mode == BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING) {
+		rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
+		rte_delay_ms(REORDER_PERIOD_MS + 1);
+	}
 	internals->active_slave_count = 0;
 
 	eth_dev->data->dev_link.link_status = 0;
@@ -760,6 +910,13 @@  bond_ethdev_stop(struct rte_eth_dev *eth_dev)
 static void
 bond_ethdev_close(struct rte_eth_dev *dev __rte_unused)
 {
+	struct bond_dev_private *internals = dev->data->dev_private;
+
+	if (internals->mode == BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING) {
+		rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
+		rte_delay_ms(REORDER_PERIOD_MS+1);
+	}
+	internals->active_slave_count = 0;
 }
 
 static int
@@ -938,6 +1095,7 @@  bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
 		break;
 	/* Promiscuous mode is propagated only to primary slave */
 	case BONDING_MODE_ACTIVE_BACKUP:
+	case BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING:
 	default:
 		rte_eth_promiscuous_enable(internals->current_primary_port);
 
@@ -962,6 +1120,7 @@  bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
 		break;
 	/* Promiscuous mode is propagated only to primary slave */
 	case BONDING_MODE_ACTIVE_BACKUP:
+	case BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING:
 	default:
 		rte_eth_promiscuous_disable(internals->current_primary_port);
 	}
diff --git a/lib/librte_pmd_bond/rte_eth_bond_private.h b/lib/librte_pmd_bond/rte_eth_bond_private.h
index 60f1e8d..9cc7915 100644
--- a/lib/librte_pmd_bond/rte_eth_bond_private.h
+++ b/lib/librte_pmd_bond/rte_eth_bond_private.h
@@ -88,6 +88,7 @@  struct slave_conf {
 	/**< Port Id of slave eth_dev */
 	struct ether_addr mac_addr;
 	/**< Slave eth_dev original MAC address */
+	uint64_t last_obytes;
 };
 /** Bonded slave devices structure */
 struct bond_ethdev_slave_ports {
@@ -120,9 +121,11 @@  struct bond_dev_private {
 
 	uint8_t active_slaves[RTE_MAX_ETHPORTS];	/**< Active slave list */
 	uint8_t slaves[RTE_MAX_ETHPORTS];			/**< Slave list */
-
+	uint8_t slaves_order[RTE_MAX_ETHPORTS];
+	uint8_t slave_update_idx;
 	/** Persisted configuration of slaves */
 	struct slave_conf presisted_slaves_conf[RTE_MAX_ETHPORTS];
+
 };
 
 extern struct eth_dev_ops default_dev_ops;