[dpdk-dev,v2] bond: Add mode 4 support.

Message ID 1411996978-3123-1-git-send-email-pawelx.wodkowski@intel.com (mailing list archive)
State Superseded, archived
Headers

Commit Message

Wodkowski, PawelX Sept. 29, 2014, 1:22 p.m. UTC
  This patch adds support mode 4 of link bonding. It depend on Delcan Doherty
patches v3 and rte alarms patch v2 or above.

New version handles race issues with setting/cancelin callbacks,
fixes promiscus mode setting in mode 4 and some other minor errors in mode 4
implementation.


Signed-off-by: Pawel Wodkowski <pawelx.wodkowski@intel.com>
---
 lib/librte_ether/rte_ether.h               |    1 +
 lib/librte_pmd_bond/Makefile               |    1 +
 lib/librte_pmd_bond/rte_eth_bond.h         |    4 +
 lib/librte_pmd_bond/rte_eth_bond_api.c     |   82 ++++++---
 lib/librte_pmd_bond/rte_eth_bond_args.c    |    1 +
 lib/librte_pmd_bond/rte_eth_bond_pmd.c     |  261 +++++++++++++++++++++++++---
 lib/librte_pmd_bond/rte_eth_bond_private.h |   42 ++++-
 7 files changed, 346 insertions(+), 46 deletions(-)
  

Comments

Michal Jastrzebski Sept. 29, 2014, 1:51 p.m. UTC | #1
Please don't take this patch into account. Two files are missing.

Best regards
Michal


> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Pawel Wodkowski
> Sent: Monday, September 29, 2014 3:23 PM
> To: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH v2] bond: Add mode 4 support.
> 
> This patch adds support mode 4 of link bonding. It depend on Delcan
> Doherty
> patches v3 and rte alarms patch v2 or above.
> 
> New version handles race issues with setting/cancelin callbacks,
> fixes promiscus mode setting in mode 4 and some other minor errors in
> mode 4
> implementation.
> 
> 
> Signed-off-by: Pawel Wodkowski <pawelx.wodkowski@intel.com>
> ---
>  lib/librte_ether/rte_ether.h               |    1 +
>  lib/librte_pmd_bond/Makefile               |    1 +
>  lib/librte_pmd_bond/rte_eth_bond.h         |    4 +
>  lib/librte_pmd_bond/rte_eth_bond_api.c     |   82 ++++++---
>  lib/librte_pmd_bond/rte_eth_bond_args.c    |    1 +
>  lib/librte_pmd_bond/rte_eth_bond_pmd.c     |  261
> +++++++++++++++++++++++++---
>  lib/librte_pmd_bond/rte_eth_bond_private.h |   42 ++++-
>  7 files changed, 346 insertions(+), 46 deletions(-)
> 
> diff --git a/lib/librte_ether/rte_ether.h b/lib/librte_ether/rte_ether.h
> index 2e08f23..1a3711b 100644
> --- a/lib/librte_ether/rte_ether.h
> +++ b/lib/librte_ether/rte_ether.h
> @@ -293,6 +293,7 @@ struct vlan_hdr {
>  #define ETHER_TYPE_RARP 0x8035 /**< Reverse Arp Protocol. */
>  #define ETHER_TYPE_VLAN 0x8100 /**< IEEE 802.1Q VLAN tagging. */
>  #define ETHER_TYPE_1588 0x88F7 /**< IEEE 802.1AS 1588 Precise Time
> Protocol. */
> +#define ETHER_TYPE_SLOW 0x8809 /**< Slow protocols (LACP and Marker).
> */
> 
>  #ifdef __cplusplus
>  }
> diff --git a/lib/librte_pmd_bond/Makefile b/lib/librte_pmd_bond/Makefile
> index 953d75e..c2312c2 100644
> --- a/lib/librte_pmd_bond/Makefile
> +++ b/lib/librte_pmd_bond/Makefile
> @@ -44,6 +44,7 @@ CFLAGS += $(WERROR_FLAGS)
>  #
>  SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_api.c
>  SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_pmd.c
> +SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_8023ad.c
>  SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_args.c
> 
>  #
> diff --git a/lib/librte_pmd_bond/rte_eth_bond.h
> b/lib/librte_pmd_bond/rte_eth_bond.h
> index 6811c7b..b0223c2 100644
> --- a/lib/librte_pmd_bond/rte_eth_bond.h
> +++ b/lib/librte_pmd_bond/rte_eth_bond.h
> @@ -75,6 +75,10 @@ extern "C" {
>  /**< Broadcast (Mode 3).
>   * In this mode all transmitted packets will be transmitted on all available
>   * active slaves of the bonded. */
> +#define BONDING_MODE_8023AD				(4)
> +/**< 802.3AD (Mode 4).
> + * In this mode transmission and reception of packets is managed by LACP
> + * protocol specified in 802.3AD documentation. */
> 
>  /* Balance Mode Transmit Policies */
>  #define BALANCE_XMIT_POLICY_LAYER2		(0)
> diff --git a/lib/librte_pmd_bond/rte_eth_bond_api.c
> b/lib/librte_pmd_bond/rte_eth_bond_api.c
> index c690ceb..c547164 100644
> --- a/lib/librte_pmd_bond/rte_eth_bond_api.c
> +++ b/lib/librte_pmd_bond/rte_eth_bond_api.c
> @@ -31,6 +31,8 @@
>   *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> DAMAGE.
>   */
> 
> +#include <string.h>
> +
>  #include <rte_mbuf.h>
>  #include <rte_malloc.h>
>  #include <rte_ethdev.h>
> @@ -104,6 +106,44 @@ valid_slave_port_id(uint8_t port_id)
>  	return 0;
>  }
> 
> +void
> +activate_slave(struct rte_eth_dev *eth_dev, uint8_t port_id)
> +{
> +	struct bond_dev_private *internals = eth_dev->data->dev_private;
> +	uint8_t active_count = internals->active_slave_count;
> +
> +	internals->active_slaves[active_count] = port_id;
> +
> +	if (internals->mode == BONDING_MODE_8023AD)
> +		bond_mode_8023ad_slave_append(eth_dev);
> +
> +	internals->active_slave_count = active_count + 1;
> +}
> +
> +void
> +deactivate_slave(struct rte_eth_dev *eth_dev,
> +	uint8_t slave_pos)
> +{
> +	struct bond_dev_private *internals = eth_dev->data->dev_private;
> +	uint8_t active_count = internals->active_slave_count;
> +
> +	if (internals->mode == BONDING_MODE_8023AD)
> +		bond_mode_8023ad_deactivate_slave(eth_dev, slave_pos);
> +
> +	active_count--;
> +
> +	/* If slave was not at the end of the list
> +	 * shift active slaves up active array list */
> +	if (slave_pos < active_count) {
> +		memmove(internals->active_slaves + slave_pos,
> +				internals->active_slaves + slave_pos + 1,
> +				(active_count - slave_pos) *
> +					sizeof(internals->active_slaves[0]));
> +	}
> +
> +	internals->active_slave_count = active_count;
> +}
> +
>  uint8_t
>  number_of_sockets(void)
>  {
> @@ -216,12 +256,8 @@ rte_eth_bond_create(const char *name, uint8_t
> mode, uint8_t socket_id)
>  	eth_dev->dev_ops = &default_dev_ops;
>  	eth_dev->pci_dev = pci_dev;
> 
> -	if (bond_ethdev_mode_set(eth_dev, mode)) {
> -		RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode
> too %d",
> -				 eth_dev->data->port_id, mode);
> -		goto err;
> -	}
> -
> +	internals->port_id = eth_dev->data->port_id;
> +	internals->mode = BONDING_MODE_INVALID;
>  	internals->current_primary_port = 0;
>  	internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
>  	internals->user_defined_mac = 0;
> @@ -241,6 +277,12 @@ rte_eth_bond_create(const char *name, uint8_t
> mode, uint8_t socket_id)
>  	memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
>  	memset(internals->slaves, 0, sizeof(internals->slaves));
> 
> +	if (bond_ethdev_mode_set(eth_dev, mode)) {
> +		RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode
> too %d",
> +				 eth_dev->data->port_id, mode);
> +		goto err;
> +	}
> +
>  	return eth_dev->data->port_id;
> 
>  err:
> @@ -348,14 +390,12 @@ __eth_bond_slave_add_lock_free(uint8_t
> bonded_port_id, uint8_t slave_port_id)
>  		rte_eth_link_get_nowait(slave_port_id, &link_props);
> 
>  		 if (link_props.link_status == 1)
> -			internals->active_slaves[internals-
> >active_slave_count++] =
> -					slave_port_id;
> +			activate_slave(bonded_eth_dev, slave_port_id);
>  	}
>  	return 0;
> 
>  }
> 
> -
>  int
>  rte_eth_bond_slave_add(uint8_t bonded_port_id, uint8_t slave_port_id)
>  {
> @@ -380,31 +420,26 @@ rte_eth_bond_slave_add(uint8_t bonded_port_id,
> uint8_t slave_port_id)
>  	return retval;
>  }
> 
> -
>  static int
>  __eth_bond_slave_remove_lock_free(uint8_t bonded_port_id, uint8_t
> slave_port_id)
>  {
> +	struct rte_eth_dev *bonded_eth_dev;
>  	struct bond_dev_private *internals;
> 
> -	int i, slave_idx = -1;
> +	int i, slave_idx;
> 
>  	if (valid_slave_port_id(slave_port_id) != 0)
>  		return -1;
> 
> -	internals = rte_eth_devices[bonded_port_id].data->dev_private;
> +	bonded_eth_dev = &rte_eth_devices[bonded_port_id];
> +	internals = bonded_eth_dev->data->dev_private;
> 
>  	/* first remove from active slave list */
> -	for (i = 0; i < internals->active_slave_count; i++) {
> -		if (internals->active_slaves[i] == slave_port_id)
> -			slave_idx = i;
> +	slave_idx = find_slave_by_id(internals->active_slaves, internals-
> >active_slave_count,
> +			slave_port_id);
> 
> -		/* shift active slaves up active array list */
> -		if (slave_idx >= 0 && i < (internals->active_slave_count - 1))
> -			internals->active_slaves[i] = internals-
> >active_slaves[i+1];
> -	}
> -
> -	if (slave_idx >= 0)
> -		internals->active_slave_count--;
> +	if (slave_idx < internals->active_slave_count)
> +		deactivate_slave(bonded_eth_dev, slave_idx);
> 
>  	slave_idx = -1;
>  	/* now find in slave list */
> @@ -538,6 +573,7 @@ rte_eth_bond_primary_get(uint8_t bonded_port_id)
> 
>  	return internals->current_primary_port;
>  }
> +
>  int
>  rte_eth_bond_slaves_get(uint8_t bonded_port_id, uint8_t slaves[], uint8_t
> len)
>  {
> @@ -673,7 +709,6 @@ rte_eth_bond_xmit_policy_get(uint8_t
> bonded_port_id)
>  	return internals->balance_xmit_policy;
>  }
> 
> -
>  int
>  rte_eth_bond_link_monitoring_set(uint8_t bonded_port_id, uint32_t
> internal_ms)
>  {
> @@ -729,7 +764,6 @@ rte_eth_bond_link_down_prop_delay_get(uint8_t
> bonded_port_id)
>  	return internals->link_down_delay_ms;
>  }
> 
> -
>  int
>  rte_eth_bond_link_up_prop_delay_set(uint8_t bonded_port_id, uint32_t
> delay_ms)
> 
> diff --git a/lib/librte_pmd_bond/rte_eth_bond_args.c
> b/lib/librte_pmd_bond/rte_eth_bond_args.c
> index bbbc69b..a0be0e6 100644
> --- a/lib/librte_pmd_bond/rte_eth_bond_args.c
> +++ b/lib/librte_pmd_bond/rte_eth_bond_args.c
> @@ -171,6 +171,7 @@ bond_ethdev_parse_slave_mode_kvarg(const char
> *key __rte_unused,
>  	case BONDING_MODE_ACTIVE_BACKUP:
>  	case BONDING_MODE_BALANCE:
>  	case BONDING_MODE_BROADCAST:
> +	case BONDING_MODE_8023AD:
>  		return 0;
>  	default:
>  		RTE_BOND_LOG(ERR, "Invalid slave mode value (%s)
> specified", value);
> diff --git a/lib/librte_pmd_bond/rte_eth_bond_pmd.c
> b/lib/librte_pmd_bond/rte_eth_bond_pmd.c
> index 6d0fb1b..13630d9 100644
> --- a/lib/librte_pmd_bond/rte_eth_bond_pmd.c
> +++ b/lib/librte_pmd_bond/rte_eth_bond_pmd.c
> @@ -44,6 +44,7 @@
> 
>  #include "rte_eth_bond.h"
>  #include "rte_eth_bond_private.h"
> +#include "rte_eth_bond_8023ad.h"
> 
>  static uint16_t
>  bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t
> nb_pkts)
> @@ -168,6 +169,56 @@ bond_ethdev_tx_burst_active_backup(void *queue,
>  			bufs, nb_pkts);
>  }
> 
> +static uint16_t
> +bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
> +		uint16_t nb_pkts)
> +{
> +	/* Cast to structure, containing bonded device's port id and queue id
> */
> +	struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
> +	struct bond_dev_private *internals = bd_rx_q->dev_private;
> +	struct mode8023ad_data *mode4 = &internals->mode4;
> +	struct ether_addr bond_mac;
> +
> +	struct ether_hdr *hdr;
> +	struct rte_mbuf *pkts[nb_pkts + 1]; /* one packet more for slow
> packet */
> +
> +	uint16_t num_rx_slave = 0;	/* Number of packet received on
> current slave */
> +	uint16_t num_rx_total = 0;	/* Total number of received packets
> */
> +
> +	uint8_t i, j;
> +
> +	rte_eth_macaddr_get(internals->port_id, &bond_mac);
> +
> +	for (i = 0; i < internals->active_slave_count && num_rx_total <
> nb_pkts; i++) {
> +		/* Read packets from this slave */
> +		num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
> +				bd_rx_q->queue_id, pkts, nb_pkts + 1 -
> num_rx_total);
> +
> +		/* Separate slow protocol packets from other packets */
> +		for (j = 0; j < num_rx_slave; j++) {
> +			hdr = rte_pktmbuf_mtod(pkts[j], struct ether_hdr *);
> +
> +			uint16_t ether_type = rte_be_to_cpu_16(hdr-
> >ether_type);
> +			if (unlikely(ether_type == ETHER_TYPE_SLOW)) {
> +
> 	bond_mode_8023ad_handle_slow_pkt(internals, i, pkts[j]);
> +				continue;
> +			}
> +
> +			/* Check if we can receive this packet. Also filter
> packets if
> +			 * bonding interface is not in promiscuous mode
> (slaves are always
> +			 * in promiscuous mode). */
> +			if (likely(ACTOR_STATE(&mode4->port_list[i],
> COLLECTING)) &&
> +					likely(internals->promiscuous_en ||
> +					is_same_ether_addr(&bond_mac,
> &hdr->d_addr))) {
> +				bufs[num_rx_total++] = pkts[j];
> +			} else
> +				rte_pktmbuf_free(pkts[j]);
> +		}
> +	}
> +
> +	return num_rx_total;
> +}
> +
>  static inline uint16_t
>  ether_hash(struct ether_hdr *eth_hdr)
>  {
> @@ -350,6 +401,126 @@ bond_ethdev_tx_burst_balance(void *queue,
> struct rte_mbuf **bufs,
>  }
> 
>  static uint16_t
> +bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
> +		uint16_t nb_pkts)
> +{
> +	struct bond_dev_private *internals;
> +	struct mode8023ad_data *mode4;
> +	struct bond_tx_queue *bd_tx_q;
> +
> +	uint8_t num_of_slaves;
> +	uint8_t slaves[RTE_MAX_ETHPORTS];
> +	 /* possitions in slaves, not ID */
> +	uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
> +	uint8_t distributing_slaves_count;
> +
> +	uint16_t num_tx_slave, num_tx_total = 0, tx_fail_total = 0;
> +	uint16_t i, op_slave_idx;
> +
> +	/* Slow packets from 802.3AX state machines. */
> +	struct slow_protocol_msg *slow_msg;
> +
> +	/* Allocate one additional packet in case 8023AD mode.
> +	 * First element if not NULL is slow packet. */
> +	struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts + 1];
> +	/* Total amount of packets in slave_bufs */
> +	uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
> +	/* Array of slow packets placed in each slave */
> +	uint8_t slave_slow_packets[RTE_MAX_ETHPORTS] = { 0 };
> +
> +	bd_tx_q = (struct bond_tx_queue *)queue;
> +	internals = bd_tx_q->dev_private;
> +	mode4 = &internals->mode4;
> +
> +	/* Copy slave list to protect against slave up/down changes during tx
> +	 * bursting */
> +	num_of_slaves = internals->active_slave_count;
> +	if (num_of_slaves < 1)
> +		return num_tx_total;
> +
> +	memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) *
> num_of_slaves);
> +
> +	distributing_slaves_count = mode4->distibuting_slaves_count;
> +	memcpy(distributing_offsets, mode4->distibuting_slaves_offsets,
> +			sizeof(slaves[0]) * distributing_slaves_count);
> +
> +	for (i = 0; i < num_of_slaves; i++)
> +		slave_bufs[i][0] = NULL;
> +
> +	/* It is likely that tx ring will be empty. If it is not empty, it is
> +	 * likely that there will be only one frame. */
> +	while (unlikely(!rte_ring_empty(mode4->tx_ring)) &&
> +			rte_ring_dequeue(mode4->tx_ring, (void
> **)&slow_msg) != -ENOENT) {
> +		i = find_slave_by_id(slaves, num_of_slaves, slow_msg-
> >port_id);
> +
> +		/* Assign slow packet to slave or drop it if slave is not in
> active list
> +		 * (ex: link down). */
> +		if (likely(i < num_of_slaves)) {
> +			 /* If there is more than one slow packet to the same
> slave, send
> +			  * only latest, and drop previouse - tx burst was no
> called quick
> +			  * enough. */
> +			if (slave_bufs[i][0] != NULL)
> +				rte_pktmbuf_free(slave_bufs[i][0]);
> +
> +			slave_bufs[i][0] = slow_msg->pkt;
> +			slave_nb_pkts[i] = 1;
> +			slave_slow_packets[i] = 1;
> +		} else
> +			rte_pktmbuf_free(slow_msg->pkt);
> +
> +		rte_ring_enqueue(mode4->free_ring, slow_msg);
> +	}
> +
> +	if (likely(distributing_slaves_count > 0)) {
> +		/* Populate slaves mbuf with the packets which are to be
> sent on it */
> +		for (i = 0; i < nb_pkts; i++) {
> +			/* Select output slave using hash based on xmit
> policy */
> +			op_slave_idx = xmit_slave_hash(bufs[i],
> distributing_slaves_count,
> +					internals->balance_xmit_policy);
> +
> +			/* Populate slave mbuf arrays with mbufs for that
> slave. Use only
> +			 * slaves that are currently distributing. */
> +			uint8_t slave_offset =
> distributing_offsets[op_slave_idx];
> +			uint16_t pkt_pos = slave_nb_pkts[slave_offset];
> +			slave_nb_pkts[slave_offset]++;
> +
> +			slave_bufs[slave_offset][pkt_pos] = bufs[i];
> +		}
> +	}
> +
> +	/* Send packet burst on each slave device */
> +	for (i = 0; i < num_of_slaves; i++) {
> +		if (slave_nb_pkts[i] > 0) {
> +			num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q-
> >queue_id,
> +					slave_bufs[i], slave_nb_pkts[i]);
> +
> +			/* if tx burst fails move packets to end of bufs */
> +			if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
> +				uint16_t slave_tx_fail_count =
> slave_nb_pkts[i] - num_tx_slave;
> +
> +				/* Free slow packet if it exists and not send.
> */
> +				if (slave_slow_packets[i] != 0 &&
> num_tx_slave == 0) {
> +					rte_pktmbuf_free(slave_bufs[i][0]);
> +					slave_tx_fail_count--;
> +				}
> +
> +				tx_fail_total += slave_tx_fail_count;
> +				memcpy(bufs[nb_pkts - tx_fail_total],
> +					slave_bufs[i][num_tx_slave],
> +					slave_tx_fail_count);
> +			}
> +
> +			if (num_tx_slave > 0)
> +				num_tx_slave -= slave_slow_packets[i];
> +
> +			num_tx_total += num_tx_slave;
> +		}
> +	}
> +
> +	return num_tx_total;
> +}
> +
> +static uint16_t
>  bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
>  		uint16_t nb_pkts)
>  {
> @@ -448,6 +619,27 @@ link_properties_valid(struct rte_eth_link
> *bonded_dev_link,
>  }
> 
>  int
> +mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr
> *dst_mac_addr)
> +{
> +	struct ether_addr *mac_addr;
> +
> +	mac_addr = eth_dev->data->mac_addrs;
> +
> +	if (eth_dev == NULL) {
> +		RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n",
> __func__);
> +		return -1;
> +	}
> +
> +	if (dst_mac_addr == NULL) {
> +		RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n",
> __func__);
> +		return -1;
> +	}
> +
> +	ether_addr_copy(mac_addr, dst_mac_addr);
> +	return 0;
> +}
> +
> +int
>  mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr
> *new_mac_addr)
>  {
>  	struct ether_addr *mac_addr;
> @@ -455,7 +647,7 @@ mac_address_set(struct rte_eth_dev *eth_dev, struct
> ether_addr *new_mac_addr)
>  	mac_addr = eth_dev->data->mac_addrs;
> 
>  	if (eth_dev == NULL) {
> -		RTE_BOND_LOG(ERR,  "NULL pointer eth_dev specified");
> +		RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
>  		return -1;
>  	}
> 
> @@ -494,6 +686,8 @@ mac_address_slaves_update(struct rte_eth_dev
> *bonded_eth_dev)
>  			}
>  		}
>  		break;
> +	case BONDING_MODE_8023AD:
> +		break;
>  	case BONDING_MODE_ACTIVE_BACKUP:
>  	default:
>  		for (i = 0; i < internals->slave_count; i++) {
> @@ -544,6 +738,13 @@ bond_ethdev_mode_set(struct rte_eth_dev
> *eth_dev, int mode)
>  		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
>  		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
>  		break;
> +	case BONDING_MODE_8023AD:
> +		if (bond_mode_8023ad_init(eth_dev) != 0)
> +			return -1;
> +
> +		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
> +		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
> +		break;
>  	default:
>  		return -1;
>  	}
> @@ -751,6 +952,8 @@ bond_ethdev_start(struct rte_eth_dev *eth_dev)
>  	if (internals->user_defined_primary_port)
>  		bond_ethdev_primary_set(internals, internals-
> >primary_port);
> 
> +	if (internals->mode == BONDING_MODE_8023AD)
> +		bond_mode_8023ad_start(eth_dev);
> 
>  	if (internals->link_status_polling_enabled)
>  		rte_eal_alarm_set(internals->link_status_polling_interval_ms
> * 1000,
> @@ -765,6 +968,25 @@ bond_ethdev_stop(struct rte_eth_dev *eth_dev)
>  {
>  	struct bond_dev_private *internals = eth_dev->data->dev_private;
> 
> +	if (internals->mode == BONDING_MODE_8023AD) {
> +		struct mode8023ad_data *data = &internals->mode4;
> +		struct slow_protocol_msg *msg;
> +
> +		bond_mode_8023ad_stop(eth_dev);
> +		data->distibuting_slaves_count = 0;
> +
> +		/* Discard all messages to/from mode 4 state machines */
> +		while (rte_ring_dequeue(data->rx_ring, (void **)&msg) != -
> ENOENT) {
> +			rte_pktmbuf_free(msg->pkt);
> +			rte_ring_enqueue(data->free_ring, msg);
> +		}
> +
> +		while (rte_ring_dequeue(data->tx_ring, (void **)&msg) != -
> ENOENT) {
> +			rte_pktmbuf_free(msg->pkt);
> +			rte_ring_enqueue(data->free_ring, msg);
> +		}
> +	}
> +
>  	internals->active_slave_count = 0;
>  	internals->link_status_polling_enabled = 0;
> 
> @@ -832,7 +1054,7 @@ bond_ethdev_tx_queue_setup(struct rte_eth_dev
> *dev, uint16_t tx_queue_id,
>  					0, dev->pci_dev->numa_node);
> 
>  	if (bd_tx_q == NULL)
> -			return -1;
> +		return -1;
> 
>  	bd_tx_q->queue_id = tx_queue_id;
>  	bd_tx_q->dev_private = dev->data->dev_private;
> @@ -863,7 +1085,6 @@ bond_ethdev_tx_queue_release(void *queue)
>  	rte_free(queue);
>  }
> 
> -
>  static void
>  bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
>  {
> @@ -884,7 +1105,7 @@
> bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
> 
>  	/* If device is currently being configured then don't check slaves link
>  	 * status, wait until next period */
> -	if (rte_spinlock_trylock(&internals->lock)){
> +	if (rte_spinlock_trylock(&internals->lock)) {
>  		for (i = 0; i < internals->slave_count; i++) {
>  			if (internals->slaves[i].link_status_polling_enabled) {
>  				slave_ethdev = &rte_eth_devices[internals-
> >slaves[i].port_id];
> @@ -1002,11 +1223,13 @@ bond_ethdev_promiscuous_enable(struct
> rte_eth_dev *eth_dev)
>  		for (i = 0; i < internals->slave_count; i++)
>  			rte_eth_promiscuous_enable(internals-
> >slaves[i].port_id);
>  		break;
> +	/* In mode4 promiscus mode is managed when slave is
> added/removed */
> +	case BONDING_MODE_8023AD:
> +		break;
>  	/* Promiscuous mode is propagated only to primary slave */
>  	case BONDING_MODE_ACTIVE_BACKUP:
>  	default:
>  		rte_eth_promiscuous_enable(internals-
> >current_primary_port);
> -
>  	}
>  }
> 
> @@ -1017,7 +1240,7 @@ bond_ethdev_promiscuous_disable(struct
> rte_eth_dev *dev)
>  	int i;
> 
>  	internals->promiscuous_en = 0;
> -
> +
>  	switch (internals->mode) {
>  	/* Promiscuous mode is propagated to all slaves */
>  	case BONDING_MODE_ROUND_ROBIN:
> @@ -1026,6 +1249,9 @@ bond_ethdev_promiscuous_disable(struct
> rte_eth_dev *dev)
>  		for (i = 0; i < internals->slave_count; i++)
>  			rte_eth_promiscuous_disable(internals-
> >slaves[i].port_id);
>  		break;
> +	/* In mode4 promiscus mode is set managed when slave is
> added/removed */
> +	case BONDING_MODE_8023AD:
> +		break;
>  	/* Promiscuous mode is propagated only to primary slave */
>  	case BONDING_MODE_ACTIVE_BACKUP:
>  	default:
> @@ -1051,7 +1277,8 @@ bond_ethdev_lsc_event_callback(uint8_t port_id,
> enum rte_eth_event_type type,
>  	struct bond_dev_private *internals;
>  	struct rte_eth_link link;
> 
> -	int i, valid_slave = 0, active_pos = -1;
> +	int i, valid_slave = 0;
> +	uint8_t active_pos;
>  	uint8_t lsc_flag = 0;
> 
>  	if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
> @@ -1081,16 +1308,12 @@ bond_ethdev_lsc_event_callback(uint8_t
> port_id, enum rte_eth_event_type type,
>  		return;
> 
>  	/* Search for port in active port list */
> -	for (i = 0; i < internals->active_slave_count; i++) {
> -		if (port_id == internals->active_slaves[i]) {
> -			active_pos = i;
> -			break;
> -		}
> -	}
> +	active_pos = find_slave_by_id(internals->active_slaves,
> +			internals->active_slave_count, port_id);
> 
>  	rte_eth_link_get_nowait(port_id, &link);
>  	if (link.link_status) {
> -		if (active_pos >= 0)
> +		if (active_pos < internals->active_slave_count)
>  			return;
> 
>  		/* if no active slave ports then set this port to be primary
> port */
> @@ -1104,21 +1327,19 @@ bond_ethdev_lsc_event_callback(uint8_t
> port_id, enum rte_eth_event_type type,
>  			link_properties_set(bonded_eth_dev,
>  					&(slave_eth_dev->data->dev_link));
>  		}
> -		internals->active_slaves[internals->active_slave_count++] =
> port_id;
> +
> +		activate_slave(bonded_eth_dev, port_id);
> 
>  		/* If user has defined the primary port then default to using
> it */
>  		if (internals->user_defined_primary_port &&
>  				internals->primary_port == port_id)
>  			bond_ethdev_primary_set(internals, port_id);
>  	} else {
> -		if (active_pos < 0)
> +		if (active_pos == internals->active_slave_count)
>  			return;
> 
>  		/* Remove from active slave list */
> -		for (i = active_pos; i < (internals->active_slave_count - 1); i++)
> -			internals->active_slaves[i] = internals-
> >active_slaves[i+1];
> -
> -		internals->active_slave_count--;
> +		deactivate_slave(bonded_eth_dev, active_pos);
> 
>  		/* No active slaves, change link status to down and reset
> other
>  		 * link properties */
> diff --git a/lib/librte_pmd_bond/rte_eth_bond_private.h
> b/lib/librte_pmd_bond/rte_eth_bond_private.h
> index 6db5144..77f7bb0 100644
> --- a/lib/librte_pmd_bond/rte_eth_bond_private.h
> +++ b/lib/librte_pmd_bond/rte_eth_bond_private.h
> @@ -42,6 +42,7 @@ extern "C" {
>  #include <rte_spinlock.h>
> 
>  #include "rte_eth_bond.h"
> +#include "rte_eth_bond_8023ad.h"
> 
>  #define PMD_BOND_SLAVE_PORT_KVARG			("slave")
>  #define PMD_BOND_PRIMARY_SLAVE_KVARG		("primary")
> @@ -60,6 +61,8 @@ extern "C" {
>  #define RTE_BOND_LOG(lvl, msg, ...) 		\
>  	RTE_LOG(lvl, PMD, "%s(%d) - " msg "\n", __func__, __LINE__,
> ##__VA_ARGS__);
> 
> +#define BONDING_MODE_INVALID 0xFF
> +
>  extern const char *pmd_bond_init_valid_arguments[];
> 
>  extern const char *driver_name;
> @@ -89,7 +92,13 @@ struct bond_tx_queue {
>  	/**< Copy of TX configuration structure for queue */
>  };
> 
> -
> +/** Persisted Slave Configuration Structure */
> +struct slave_conf {
> +	uint8_t port_id;
> +	/**< Port Id of slave eth_dev */
> +	struct ether_addr mac_addr;
> +	/**< Slave eth_dev original MAC address */
> +};
>  /** Bonded slave devices structure */
>  struct bond_ethdev_slave_ports {
>  	uint8_t slaves[RTE_MAX_ETHPORTS];	/**< Slave port id array */
> @@ -124,7 +133,7 @@ struct bond_dev_private {
>  	uint8_t user_defined_mac;
>  	/**< Flag for whether MAC address is user defined or not */
>  	uint8_t promiscuous_en;
> -	/**< Enabled/disable promiscuous mode on slave devices */
> +	/**< Enabled/disable promiscuous mode on bonding device */
>  	uint8_t link_props_set;
>  	/**< flag to denote if the link properties are set */
> 
> @@ -143,6 +152,9 @@ struct bond_dev_private {
>  	uint8_t slave_count;			/**< Number of bonded
> slaves */
>  	struct bond_slave_details slaves[RTE_MAX_ETHPORTS];
>  	/**< Arary of bonded slaves details */
> +
> +	struct mode8023ad_data mode4;
> +		/**< Mode 4 private data */
>  };
> 
>  extern struct eth_dev_ops default_dev_ops;
> @@ -150,6 +162,21 @@ extern struct eth_dev_ops default_dev_ops;
>  int
>  valid_bonded_ethdev(struct rte_eth_dev *eth_dev);
> 
> +/* Search given slave array to find possition of given id.
> + * Return slave pos or slaves_count if not found. */
> +static inline uint8_t
> +find_slave_by_id(uint8_t *slaves, uint8_t slaves_count,
> +	uint8_t slave_id ) {
> +
> +	uint8_t pos;
> +	for (pos = 0; pos < slaves_count; pos++) {
> +		if (slave_id == slaves[pos])
> +			break;
> +	}
> +
> +	return pos;
> +}
> +
>  int
>  valid_port_id(uint8_t port_id);
> 
> @@ -160,6 +187,14 @@ int
>  valid_slave_port_id(uint8_t port_id);
> 
>  void
> +deactivate_slave(struct rte_eth_dev *eth_dev,
> +	uint8_t slave_pos );
> +
> +void
> +activate_slave(struct rte_eth_dev *eth_dev,
> +	uint8_t port_id );
> +
> +void
>  link_properties_set(struct rte_eth_dev *bonded_eth_dev,
>  		struct rte_eth_link *slave_dev_link);
>  void
> @@ -173,6 +208,9 @@ int
>  mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr
> *new_mac_addr);
> 
>  int
> +mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr
> *dst_mac_addr);
> +
> +int
>  mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev);
> 
>  uint8_t
> --
> 1.7.9.5
  
Wodkowski, PawelX Sept. 30, 2014, 11:17 a.m. UTC | #2
Fixed patch version sent.

Pawel
  

Patch

diff --git a/lib/librte_ether/rte_ether.h b/lib/librte_ether/rte_ether.h
index 2e08f23..1a3711b 100644
--- a/lib/librte_ether/rte_ether.h
+++ b/lib/librte_ether/rte_ether.h
@@ -293,6 +293,7 @@  struct vlan_hdr {
 #define ETHER_TYPE_RARP 0x8035 /**< Reverse Arp Protocol. */
 #define ETHER_TYPE_VLAN 0x8100 /**< IEEE 802.1Q VLAN tagging. */
 #define ETHER_TYPE_1588 0x88F7 /**< IEEE 802.1AS 1588 Precise Time Protocol. */
+#define ETHER_TYPE_SLOW 0x8809 /**< Slow protocols (LACP and Marker). */
 
 #ifdef __cplusplus
 }
diff --git a/lib/librte_pmd_bond/Makefile b/lib/librte_pmd_bond/Makefile
index 953d75e..c2312c2 100644
--- a/lib/librte_pmd_bond/Makefile
+++ b/lib/librte_pmd_bond/Makefile
@@ -44,6 +44,7 @@  CFLAGS += $(WERROR_FLAGS)
 #
 SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_api.c
 SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_pmd.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_8023ad.c
 SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_args.c
 
 #
diff --git a/lib/librte_pmd_bond/rte_eth_bond.h b/lib/librte_pmd_bond/rte_eth_bond.h
index 6811c7b..b0223c2 100644
--- a/lib/librte_pmd_bond/rte_eth_bond.h
+++ b/lib/librte_pmd_bond/rte_eth_bond.h
@@ -75,6 +75,10 @@  extern "C" {
 /**< Broadcast (Mode 3).
  * In this mode all transmitted packets will be transmitted on all available
  * active slaves of the bonded. */
+#define BONDING_MODE_8023AD				(4)
+/**< 802.3AD (Mode 4).
+ * In this mode transmission and reception of packets is managed by LACP
+ * protocol specified in 802.3AD documentation. */
 
 /* Balance Mode Transmit Policies */
 #define BALANCE_XMIT_POLICY_LAYER2		(0)
diff --git a/lib/librte_pmd_bond/rte_eth_bond_api.c b/lib/librte_pmd_bond/rte_eth_bond_api.c
index c690ceb..c547164 100644
--- a/lib/librte_pmd_bond/rte_eth_bond_api.c
+++ b/lib/librte_pmd_bond/rte_eth_bond_api.c
@@ -31,6 +31,8 @@ 
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <string.h>
+
 #include <rte_mbuf.h>
 #include <rte_malloc.h>
 #include <rte_ethdev.h>
@@ -104,6 +106,44 @@  valid_slave_port_id(uint8_t port_id)
 	return 0;
 }
 
+void
+activate_slave(struct rte_eth_dev *eth_dev, uint8_t port_id)
+{
+	struct bond_dev_private *internals = eth_dev->data->dev_private;
+	uint8_t active_count = internals->active_slave_count;
+
+	internals->active_slaves[active_count] = port_id;
+
+	if (internals->mode == BONDING_MODE_8023AD)
+		bond_mode_8023ad_slave_append(eth_dev);
+
+	internals->active_slave_count = active_count + 1;
+}
+
+void
+deactivate_slave(struct rte_eth_dev *eth_dev,
+	uint8_t slave_pos)
+{
+	struct bond_dev_private *internals = eth_dev->data->dev_private;
+	uint8_t active_count = internals->active_slave_count;
+
+	if (internals->mode == BONDING_MODE_8023AD)
+		bond_mode_8023ad_deactivate_slave(eth_dev, slave_pos);
+
+	active_count--;
+
+	/* If slave was not at the end of the list
+	 * shift active slaves up active array list */
+	if (slave_pos < active_count) {
+		memmove(internals->active_slaves + slave_pos,
+				internals->active_slaves + slave_pos + 1,
+				(active_count - slave_pos) *
+					sizeof(internals->active_slaves[0]));
+	}
+
+	internals->active_slave_count = active_count;
+}
+
 uint8_t
 number_of_sockets(void)
 {
@@ -216,12 +256,8 @@  rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id)
 	eth_dev->dev_ops = &default_dev_ops;
 	eth_dev->pci_dev = pci_dev;
 
-	if (bond_ethdev_mode_set(eth_dev, mode)) {
-		RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode too %d",
-				 eth_dev->data->port_id, mode);
-		goto err;
-	}
-
+	internals->port_id = eth_dev->data->port_id;
+	internals->mode = BONDING_MODE_INVALID;
 	internals->current_primary_port = 0;
 	internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
 	internals->user_defined_mac = 0;
@@ -241,6 +277,12 @@  rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id)
 	memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
 	memset(internals->slaves, 0, sizeof(internals->slaves));
 
+	if (bond_ethdev_mode_set(eth_dev, mode)) {
+		RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode too %d",
+				 eth_dev->data->port_id, mode);
+		goto err;
+	}
+
 	return eth_dev->data->port_id;
 
 err:
@@ -348,14 +390,12 @@  __eth_bond_slave_add_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id)
 		rte_eth_link_get_nowait(slave_port_id, &link_props);
 
 		 if (link_props.link_status == 1)
-			internals->active_slaves[internals->active_slave_count++] =
-					slave_port_id;
+			activate_slave(bonded_eth_dev, slave_port_id);
 	}
 	return 0;
 
 }
 
-
 int
 rte_eth_bond_slave_add(uint8_t bonded_port_id, uint8_t slave_port_id)
 {
@@ -380,31 +420,26 @@  rte_eth_bond_slave_add(uint8_t bonded_port_id, uint8_t slave_port_id)
 	return retval;
 }
 
-
 static int
 __eth_bond_slave_remove_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id)
 {
+	struct rte_eth_dev *bonded_eth_dev;
 	struct bond_dev_private *internals;
 
-	int i, slave_idx = -1;
+	int i, slave_idx;
 
 	if (valid_slave_port_id(slave_port_id) != 0)
 		return -1;
 
-	internals = rte_eth_devices[bonded_port_id].data->dev_private;
+	bonded_eth_dev = &rte_eth_devices[bonded_port_id];
+	internals = bonded_eth_dev->data->dev_private;
 
 	/* first remove from active slave list */
-	for (i = 0; i < internals->active_slave_count; i++) {
-		if (internals->active_slaves[i] == slave_port_id)
-			slave_idx = i;
+	slave_idx = find_slave_by_id(internals->active_slaves, internals->active_slave_count,
+			slave_port_id);
 
-		/* shift active slaves up active array list */
-		if (slave_idx >= 0 && i < (internals->active_slave_count - 1))
-			internals->active_slaves[i] = internals->active_slaves[i+1];
-	}
-
-	if (slave_idx >= 0)
-		internals->active_slave_count--;
+	if (slave_idx < internals->active_slave_count)
+		deactivate_slave(bonded_eth_dev, slave_idx);
 
 	slave_idx = -1;
 	/* now find in slave list */
@@ -538,6 +573,7 @@  rte_eth_bond_primary_get(uint8_t bonded_port_id)
 
 	return internals->current_primary_port;
 }
+
 int
 rte_eth_bond_slaves_get(uint8_t bonded_port_id, uint8_t slaves[], uint8_t len)
 {
@@ -673,7 +709,6 @@  rte_eth_bond_xmit_policy_get(uint8_t bonded_port_id)
 	return internals->balance_xmit_policy;
 }
 
-
 int
 rte_eth_bond_link_monitoring_set(uint8_t bonded_port_id, uint32_t internal_ms)
 {
@@ -729,7 +764,6 @@  rte_eth_bond_link_down_prop_delay_get(uint8_t bonded_port_id)
 	return internals->link_down_delay_ms;
 }
 
-
 int
 rte_eth_bond_link_up_prop_delay_set(uint8_t bonded_port_id, uint32_t delay_ms)
 
diff --git a/lib/librte_pmd_bond/rte_eth_bond_args.c b/lib/librte_pmd_bond/rte_eth_bond_args.c
index bbbc69b..a0be0e6 100644
--- a/lib/librte_pmd_bond/rte_eth_bond_args.c
+++ b/lib/librte_pmd_bond/rte_eth_bond_args.c
@@ -171,6 +171,7 @@  bond_ethdev_parse_slave_mode_kvarg(const char *key __rte_unused,
 	case BONDING_MODE_ACTIVE_BACKUP:
 	case BONDING_MODE_BALANCE:
 	case BONDING_MODE_BROADCAST:
+	case BONDING_MODE_8023AD:
 		return 0;
 	default:
 		RTE_BOND_LOG(ERR, "Invalid slave mode value (%s) specified", value);
diff --git a/lib/librte_pmd_bond/rte_eth_bond_pmd.c b/lib/librte_pmd_bond/rte_eth_bond_pmd.c
index 6d0fb1b..13630d9 100644
--- a/lib/librte_pmd_bond/rte_eth_bond_pmd.c
+++ b/lib/librte_pmd_bond/rte_eth_bond_pmd.c
@@ -44,6 +44,7 @@ 
 
 #include "rte_eth_bond.h"
 #include "rte_eth_bond_private.h"
+#include "rte_eth_bond_8023ad.h"
 
 static uint16_t
 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
@@ -168,6 +169,56 @@  bond_ethdev_tx_burst_active_backup(void *queue,
 			bufs, nb_pkts);
 }
 
+static uint16_t
+bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
+		uint16_t nb_pkts)
+{
+	/* Cast to structure, containing bonded device's port id and queue id */
+	struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
+	struct bond_dev_private *internals = bd_rx_q->dev_private;
+	struct mode8023ad_data *mode4 = &internals->mode4;
+	struct ether_addr bond_mac;
+
+	struct ether_hdr *hdr;
+	struct rte_mbuf *pkts[nb_pkts + 1]; /* one packet more for slow packet */
+
+	uint16_t num_rx_slave = 0;	/* Number of packet received on current slave */
+	uint16_t num_rx_total = 0;	/* Total number of received packets */
+
+	uint8_t i, j;
+
+	rte_eth_macaddr_get(internals->port_id, &bond_mac);
+
+	for (i = 0; i < internals->active_slave_count && num_rx_total < nb_pkts; i++) {
+		/* Read packets from this slave */
+		num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
+				bd_rx_q->queue_id, pkts, nb_pkts + 1 - num_rx_total);
+
+		/* Separate slow protocol packets from other packets */
+		for (j = 0; j < num_rx_slave; j++) {
+			hdr = rte_pktmbuf_mtod(pkts[j], struct ether_hdr *);
+
+			uint16_t ether_type = rte_be_to_cpu_16(hdr->ether_type);
+			if (unlikely(ether_type == ETHER_TYPE_SLOW)) {
+				bond_mode_8023ad_handle_slow_pkt(internals, i, pkts[j]);
+				continue;
+			}
+
+			/* Check if we can receive this packet. Also filter packets if
+			 * bonding interface is not in promiscuous mode (slaves are always
+			 * in promiscuous mode). */
+			if (likely(ACTOR_STATE(&mode4->port_list[i], COLLECTING)) &&
+					likely(internals->promiscuous_en ||
+					is_same_ether_addr(&bond_mac, &hdr->d_addr))) {
+				bufs[num_rx_total++] = pkts[j];
+			} else
+				rte_pktmbuf_free(pkts[j]);
+		}
+	}
+
+	return num_rx_total;
+}
+
 static inline uint16_t
 ether_hash(struct ether_hdr *eth_hdr)
 {
@@ -350,6 +401,126 @@  bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
 }
 
 static uint16_t
+bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
+		uint16_t nb_pkts)
+{
+	struct bond_dev_private *internals;
+	struct mode8023ad_data *mode4;
+	struct bond_tx_queue *bd_tx_q;
+
+	uint8_t num_of_slaves;
+	uint8_t slaves[RTE_MAX_ETHPORTS];
+	 /* possitions in slaves, not ID */
+	uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
+	uint8_t distributing_slaves_count;
+
+	uint16_t num_tx_slave, num_tx_total = 0, tx_fail_total = 0;
+	uint16_t i, op_slave_idx;
+
+	/* Slow packets from 802.3AX state machines. */
+	struct slow_protocol_msg *slow_msg;
+
+	/* Allocate one additional packet in case 8023AD mode.
+	 * First element if not NULL is slow packet. */
+	struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts + 1];
+	/* Total amount of packets in slave_bufs */
+	uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
+	/* Array of slow packets placed in each slave */
+	uint8_t slave_slow_packets[RTE_MAX_ETHPORTS] = { 0 };
+
+	bd_tx_q = (struct bond_tx_queue *)queue;
+	internals = bd_tx_q->dev_private;
+	mode4 = &internals->mode4;
+
+	/* Copy slave list to protect against slave up/down changes during tx
+	 * bursting */
+	num_of_slaves = internals->active_slave_count;
+	if (num_of_slaves < 1)
+		return num_tx_total;
+
+	memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves);
+
+	distributing_slaves_count = mode4->distibuting_slaves_count;
+	memcpy(distributing_offsets, mode4->distibuting_slaves_offsets,
+			sizeof(slaves[0]) * distributing_slaves_count);
+
+	for (i = 0; i < num_of_slaves; i++)
+		slave_bufs[i][0] = NULL;
+
+	/* It is likely that tx ring will be empty. If it is not empty, it is
+	 * likely that there will be only one frame. */
+	while (unlikely(!rte_ring_empty(mode4->tx_ring)) &&
+			rte_ring_dequeue(mode4->tx_ring, (void **)&slow_msg) != -ENOENT) {
+		i = find_slave_by_id(slaves, num_of_slaves, slow_msg->port_id);
+
+		/* Assign slow packet to slave or drop it if slave is not in active list
+		 * (ex: link down). */
+		if (likely(i < num_of_slaves)) {
+			 /* If there is more than one slow packet to the same slave, send
+			  * only latest, and drop previouse - tx burst was no called quick
+			  * enough. */
+			if (slave_bufs[i][0] != NULL)
+				rte_pktmbuf_free(slave_bufs[i][0]);
+
+			slave_bufs[i][0] = slow_msg->pkt;
+			slave_nb_pkts[i] = 1;
+			slave_slow_packets[i] = 1;
+		} else
+			rte_pktmbuf_free(slow_msg->pkt);
+
+		rte_ring_enqueue(mode4->free_ring, slow_msg);
+	}
+
+	if (likely(distributing_slaves_count > 0)) {
+		/* Populate slaves mbuf with the packets which are to be sent on it */
+		for (i = 0; i < nb_pkts; i++) {
+			/* Select output slave using hash based on xmit policy */
+			op_slave_idx = xmit_slave_hash(bufs[i], distributing_slaves_count,
+					internals->balance_xmit_policy);
+
+			/* Populate slave mbuf arrays with mbufs for that slave. Use only
+			 * slaves that are currently distributing. */
+			uint8_t slave_offset = distributing_offsets[op_slave_idx];
+			uint16_t pkt_pos = slave_nb_pkts[slave_offset];
+			slave_nb_pkts[slave_offset]++;
+
+			slave_bufs[slave_offset][pkt_pos] = bufs[i];
+		}
+	}
+
+	/* Send packet burst on each slave device */
+	for (i = 0; i < num_of_slaves; i++) {
+		if (slave_nb_pkts[i] > 0) {
+			num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
+					slave_bufs[i], slave_nb_pkts[i]);
+
+			/* if tx burst fails move packets to end of bufs */
+			if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
+				uint16_t slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave;
+
+				/* Free slow packet if it exists and not send. */
+				if (slave_slow_packets[i] != 0 && num_tx_slave == 0) {
+					rte_pktmbuf_free(slave_bufs[i][0]);
+					slave_tx_fail_count--;
+				}
+
+				tx_fail_total += slave_tx_fail_count;
+				memcpy(bufs[nb_pkts - tx_fail_total],
+					slave_bufs[i][num_tx_slave],
+					slave_tx_fail_count);
+			}
+
+			if (num_tx_slave > 0)
+				num_tx_slave -= slave_slow_packets[i];
+
+			num_tx_total += num_tx_slave;
+		}
+	}
+
+	return num_tx_total;
+}
+
+static uint16_t
 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
 		uint16_t nb_pkts)
 {
@@ -448,6 +619,27 @@  link_properties_valid(struct rte_eth_link *bonded_dev_link,
 }
 
 int
+mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
+{
+	struct ether_addr *mac_addr;
+
+	mac_addr = eth_dev->data->mac_addrs;
+
+	if (eth_dev == NULL) {
+		RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
+		return -1;
+	}
+
+	if (dst_mac_addr == NULL) {
+		RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
+		return -1;
+	}
+
+	ether_addr_copy(mac_addr, dst_mac_addr);
+	return 0;
+}
+
+int
 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
 {
 	struct ether_addr *mac_addr;
@@ -455,7 +647,7 @@  mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
 	mac_addr = eth_dev->data->mac_addrs;
 
 	if (eth_dev == NULL) {
-		RTE_BOND_LOG(ERR,  "NULL pointer eth_dev specified");
+		RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
 		return -1;
 	}
 
@@ -494,6 +686,8 @@  mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
 			}
 		}
 		break;
+	case BONDING_MODE_8023AD:
+		break;
 	case BONDING_MODE_ACTIVE_BACKUP:
 	default:
 		for (i = 0; i < internals->slave_count; i++) {
@@ -544,6 +738,13 @@  bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
 		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
 		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
 		break;
+	case BONDING_MODE_8023AD:
+		if (bond_mode_8023ad_init(eth_dev) != 0)
+			return -1;
+
+		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
+		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
+		break;
 	default:
 		return -1;
 	}
@@ -751,6 +952,8 @@  bond_ethdev_start(struct rte_eth_dev *eth_dev)
 	if (internals->user_defined_primary_port)
 		bond_ethdev_primary_set(internals, internals->primary_port);
 
+	if (internals->mode == BONDING_MODE_8023AD)
+		bond_mode_8023ad_start(eth_dev);
 
 	if (internals->link_status_polling_enabled)
 		rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
@@ -765,6 +968,25 @@  bond_ethdev_stop(struct rte_eth_dev *eth_dev)
 {
 	struct bond_dev_private *internals = eth_dev->data->dev_private;
 
+	if (internals->mode == BONDING_MODE_8023AD) {
+		struct mode8023ad_data *data = &internals->mode4;
+		struct slow_protocol_msg *msg;
+
+		bond_mode_8023ad_stop(eth_dev);
+		data->distibuting_slaves_count = 0;
+
+		/* Discard all messages to/from mode 4 state machines */
+		while (rte_ring_dequeue(data->rx_ring, (void **)&msg) != -ENOENT) {
+			rte_pktmbuf_free(msg->pkt);
+			rte_ring_enqueue(data->free_ring, msg);
+		}
+
+		while (rte_ring_dequeue(data->tx_ring, (void **)&msg) != -ENOENT) {
+			rte_pktmbuf_free(msg->pkt);
+			rte_ring_enqueue(data->free_ring, msg);
+		}
+	}
+
 	internals->active_slave_count = 0;
 	internals->link_status_polling_enabled = 0;
 
@@ -832,7 +1054,7 @@  bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
 					0, dev->pci_dev->numa_node);
 
 	if (bd_tx_q == NULL)
-			return -1;
+		return -1;
 
 	bd_tx_q->queue_id = tx_queue_id;
 	bd_tx_q->dev_private = dev->data->dev_private;
@@ -863,7 +1085,6 @@  bond_ethdev_tx_queue_release(void *queue)
 	rte_free(queue);
 }
 
-
 static void
 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
 {
@@ -884,7 +1105,7 @@  bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
 
 	/* If device is currently being configured then don't check slaves link
 	 * status, wait until next period */
-	if (rte_spinlock_trylock(&internals->lock)){
+	if (rte_spinlock_trylock(&internals->lock)) {
 		for (i = 0; i < internals->slave_count; i++) {
 			if (internals->slaves[i].link_status_polling_enabled) {
 				slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
@@ -1002,11 +1223,13 @@  bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
 		for (i = 0; i < internals->slave_count; i++)
 			rte_eth_promiscuous_enable(internals->slaves[i].port_id);
 		break;
+	/* In mode4 promiscus mode is managed when slave is added/removed */
+	case BONDING_MODE_8023AD:
+		break;
 	/* Promiscuous mode is propagated only to primary slave */
 	case BONDING_MODE_ACTIVE_BACKUP:
 	default:
 		rte_eth_promiscuous_enable(internals->current_primary_port);
-
 	}
 }
 
@@ -1017,7 +1240,7 @@  bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
 	int i;
 
 	internals->promiscuous_en = 0;
-
+	
 	switch (internals->mode) {
 	/* Promiscuous mode is propagated to all slaves */
 	case BONDING_MODE_ROUND_ROBIN:
@@ -1026,6 +1249,9 @@  bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
 		for (i = 0; i < internals->slave_count; i++)
 			rte_eth_promiscuous_disable(internals->slaves[i].port_id);
 		break;
+	/* In mode4 promiscus mode is set managed when slave is added/removed */
+	case BONDING_MODE_8023AD:
+		break;
 	/* Promiscuous mode is propagated only to primary slave */
 	case BONDING_MODE_ACTIVE_BACKUP:
 	default:
@@ -1051,7 +1277,8 @@  bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
 	struct bond_dev_private *internals;
 	struct rte_eth_link link;
 
-	int i, valid_slave = 0, active_pos = -1;
+	int i, valid_slave = 0;
+	uint8_t active_pos;
 	uint8_t lsc_flag = 0;
 
 	if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
@@ -1081,16 +1308,12 @@  bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
 		return;
 
 	/* Search for port in active port list */
-	for (i = 0; i < internals->active_slave_count; i++) {
-		if (port_id == internals->active_slaves[i]) {
-			active_pos = i;
-			break;
-		}
-	}
+	active_pos = find_slave_by_id(internals->active_slaves,
+			internals->active_slave_count, port_id);
 
 	rte_eth_link_get_nowait(port_id, &link);
 	if (link.link_status) {
-		if (active_pos >= 0)
+		if (active_pos < internals->active_slave_count)
 			return;
 
 		/* if no active slave ports then set this port to be primary port */
@@ -1104,21 +1327,19 @@  bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
 			link_properties_set(bonded_eth_dev,
 					&(slave_eth_dev->data->dev_link));
 		}
-		internals->active_slaves[internals->active_slave_count++] = port_id;
+
+		activate_slave(bonded_eth_dev, port_id);
 
 		/* If user has defined the primary port then default to using it */
 		if (internals->user_defined_primary_port &&
 				internals->primary_port == port_id)
 			bond_ethdev_primary_set(internals, port_id);
 	} else {
-		if (active_pos < 0)
+		if (active_pos == internals->active_slave_count)
 			return;
 
 		/* Remove from active slave list */
-		for (i = active_pos; i < (internals->active_slave_count - 1); i++)
-			internals->active_slaves[i] = internals->active_slaves[i+1];
-
-		internals->active_slave_count--;
+		deactivate_slave(bonded_eth_dev, active_pos);
 
 		/* No active slaves, change link status to down and reset other
 		 * link properties */
diff --git a/lib/librte_pmd_bond/rte_eth_bond_private.h b/lib/librte_pmd_bond/rte_eth_bond_private.h
index 6db5144..77f7bb0 100644
--- a/lib/librte_pmd_bond/rte_eth_bond_private.h
+++ b/lib/librte_pmd_bond/rte_eth_bond_private.h
@@ -42,6 +42,7 @@  extern "C" {
 #include <rte_spinlock.h>
 
 #include "rte_eth_bond.h"
+#include "rte_eth_bond_8023ad.h"
 
 #define PMD_BOND_SLAVE_PORT_KVARG			("slave")
 #define PMD_BOND_PRIMARY_SLAVE_KVARG		("primary")
@@ -60,6 +61,8 @@  extern "C" {
 #define RTE_BOND_LOG(lvl, msg, ...) 		\
 	RTE_LOG(lvl, PMD, "%s(%d) - " msg "\n", __func__, __LINE__, ##__VA_ARGS__);
 
+#define BONDING_MODE_INVALID 0xFF
+
 extern const char *pmd_bond_init_valid_arguments[];
 
 extern const char *driver_name;
@@ -89,7 +92,13 @@  struct bond_tx_queue {
 	/**< Copy of TX configuration structure for queue */
 };
 
-
+/** Persisted Slave Configuration Structure */
+struct slave_conf {
+	uint8_t port_id;
+	/**< Port Id of slave eth_dev */
+	struct ether_addr mac_addr;
+	/**< Slave eth_dev original MAC address */
+};
 /** Bonded slave devices structure */
 struct bond_ethdev_slave_ports {
 	uint8_t slaves[RTE_MAX_ETHPORTS];	/**< Slave port id array */
@@ -124,7 +133,7 @@  struct bond_dev_private {
 	uint8_t user_defined_mac;
 	/**< Flag for whether MAC address is user defined or not */
 	uint8_t promiscuous_en;
-	/**< Enabled/disable promiscuous mode on slave devices */
+	/**< Enabled/disable promiscuous mode on bonding device */
 	uint8_t link_props_set;
 	/**< flag to denote if the link properties are set */
 
@@ -143,6 +152,9 @@  struct bond_dev_private {
 	uint8_t slave_count;			/**< Number of bonded slaves */
 	struct bond_slave_details slaves[RTE_MAX_ETHPORTS];
 	/**< Arary of bonded slaves details */
+	
+	struct mode8023ad_data mode4;
+		/**< Mode 4 private data */
 };
 
 extern struct eth_dev_ops default_dev_ops;
@@ -150,6 +162,21 @@  extern struct eth_dev_ops default_dev_ops;
 int
 valid_bonded_ethdev(struct rte_eth_dev *eth_dev);
 
+/* Search given slave array to find possition of given id.
+ * Return slave pos or slaves_count if not found. */
+static inline uint8_t
+find_slave_by_id(uint8_t *slaves, uint8_t slaves_count,
+	uint8_t slave_id ) {
+
+	uint8_t pos;
+	for (pos = 0; pos < slaves_count; pos++) {
+		if (slave_id == slaves[pos])
+			break;
+	}
+
+	return pos;
+}
+
 int
 valid_port_id(uint8_t port_id);
 
@@ -160,6 +187,14 @@  int
 valid_slave_port_id(uint8_t port_id);
 
 void
+deactivate_slave(struct rte_eth_dev *eth_dev,
+	uint8_t slave_pos );
+
+void
+activate_slave(struct rte_eth_dev *eth_dev,
+	uint8_t port_id );
+
+void
 link_properties_set(struct rte_eth_dev *bonded_eth_dev,
 		struct rte_eth_link *slave_dev_link);
 void
@@ -173,6 +208,9 @@  int
 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr);
 
 int
+mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr);
+
+int
 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev);
 
 uint8_t