@@ -50,6 +50,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_api.c
SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_pmd.c
SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_args.c
SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_8023ad.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_alb.c
ifeq ($(CONFIG_RTE_MBUF_REFCNT),n)
$(info WARNING: Link Bonding Broadcast mode is disabled because it needs MBUF_REFCNT.)
@@ -101,6 +101,15 @@ extern "C" {
* This mode provides an adaptive transmit load balancing. It dynamically
* changes the transmitting slave, according to the computed load. Statistics
* are collected in 100ms intervals and scheduled every 10ms */
+#define BONDING_MODE_ALB (6)
+/**< Adaptive Load Balancing (Mode 6)
+ * This mode includes adaptive TLB and receive load balancing (RLB). In RLB the
+ * bonding driver intercepts ARP replies send by local system and overwrites its
+ * source MAC address, so that different peers send data to the server on
+ * different slave interfaces. When local system sends ARP request, it saves IP
+ * information from it. When ARP reply from that peer is received, its MAC is
+ * stored, one of slave MACs assigned and ARP reply send to that peer.
+ */
/* Balance Mode Transmit Policies */
#define BALANCE_XMIT_POLICY_LAYER2 (0)
new file mode 100644
@@ -0,0 +1,256 @@
+#include "rte_eth_bond_private.h"
+#include "rte_eth_bond_alb.h"
+
+static inline uint8_t
+simple_hash(uint8_t *hash_start, int hash_size)
+{
+ int i;
+ uint8_t hash;
+
+ hash = 0;
+ for (i = 0; i < hash_size; ++i)
+ hash ^= hash_start[i];
+
+ return hash;
+}
+
+static uint8_t
+calculate_slave(struct bond_dev_private *internals)
+{
+ uint8_t idx;
+
+ idx = (internals->mode6.last_slave + 1) % internals->active_slave_count;
+ internals->mode6.last_slave = idx;
+ return internals->active_slaves[idx];
+}
+
+int
+bond_mode_alb_enable(struct rte_eth_dev *bond_dev)
+{
+ struct bond_dev_private *internals = bond_dev->data->dev_private;
+ struct client_data *hash_table = internals->mode6.client_table;
+
+ uint16_t element_size;
+ char mem_name[RTE_ETH_NAME_MAX_LEN];
+ int socket_id = bond_dev->pci_dev->numa_node;
+
+ /* Fill hash table with initial values */
+ memset(hash_table, 0, sizeof(struct client_data) * ALB_HASH_TABLE_SIZE);
+ rte_spinlock_init(&internals->mode6.lock);
+ internals->mode6.last_slave = ALB_NULL_INDEX;
+ internals->mode6.ntt = 0;
+
+ /* Initialize memory pool for ARP packets to send */
+ if (internals->mode6.mempool == NULL) {
+ /*
+ * 256 is size of ETH header, ARP header and nested VLAN headers.
+ * The value is chosen to be cache aligned.
+ */
+ element_size = 256 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM;
+ snprintf(mem_name, sizeof(mem_name), "%s_MODE6", bond_dev->data->name);
+ internals->mode6.mempool = rte_mempool_create(mem_name,
+ 512 * RTE_MAX_ETHPORTS,
+ element_size,
+ RTE_MEMPOOL_CACHE_MAX_SIZE >= 32 ?
+ 32 : RTE_MEMPOOL_CACHE_MAX_SIZE,
+ sizeof(struct rte_pktmbuf_pool_private), rte_pktmbuf_pool_init,
+ NULL, rte_pktmbuf_init, NULL, socket_id, 0);
+
+ if (internals->mode6.mempool == NULL) {
+ RTE_LOG(ERR, PMD, "%s: Failed to initialize ALB mempool.\n",
+ bond_dev->data->name);
+ rte_panic(
+ "Failed to alocate memory pool ('%s')\n" "for bond device '%s'\n",
+ mem_name, bond_dev->data->name);
+ }
+ }
+
+ return 0;
+}
+
+void bond_mode_alb_arp_recv(struct ether_hdr *eth_h, uint16_t offset,
+ struct bond_dev_private *internals) {
+ struct arp_hdr *arp;
+
+ struct client_data *hash_table = internals->mode6.client_table;
+ struct client_data *client_info;
+
+ uint8_t hash_index;
+
+ arp = (struct arp_hdr *) ((char *) (eth_h + 1) + offset);
+
+ /* ARP Requests are forwarded to the application with no changes */
+ if (arp->arp_op != rte_cpu_to_be_16(ARP_OP_REPLY))
+ return;
+
+ /* From now on, we analyze only ARP Reply packets */
+ hash_index = simple_hash((uint8_t *) &arp->arp_data.arp_sip,
+ sizeof(arp->arp_data.arp_sip));
+ client_info = &hash_table[hash_index];
+
+ /*
+ * We got reply for ARP Request send by the application. We need to
+ * update client table when received data differ from what is stored
+ * in ALB table and issue sending update packet to that slave.
+ */
+ rte_spinlock_lock(&internals->mode6.lock);
+ if (client_info->in_use == 0 ||
+ client_info->app_ip != arp->arp_data.arp_tip ||
+ client_info->cli_ip != arp->arp_data.arp_sip ||
+ !is_same_ether_addr(&client_info->cli_mac, &arp->arp_data.arp_sha) ||
+ client_info->vlan_count != offset / sizeof(struct vlan_hdr) ||
+ memcmp(client_info->vlan, eth_h + 1, offset) != 0
+ ) {
+ client_info->in_use = 1;
+ client_info->app_ip = arp->arp_data.arp_tip;
+ client_info->cli_ip = arp->arp_data.arp_sip;
+ ether_addr_copy(&arp->arp_data.arp_sha, &client_info->cli_mac);
+ client_info->slave_idx = calculate_slave(internals);
+ rte_eth_macaddr_get(client_info->slave_idx, &client_info->app_mac);
+ ether_addr_copy(&client_info->app_mac, &arp->arp_data.arp_tha);
+ memcpy(client_info->vlan, eth_h + 1, offset);
+ client_info->vlan_count = offset / sizeof(struct vlan_hdr);
+ }
+ internals->mode6.ntt = 1;
+ rte_spinlock_unlock(&internals->mode6.lock);
+}
+
+uint8_t
+bond_mode_alb_arp_xmit(struct ether_hdr *eth_h, uint16_t offset,
+ struct bond_dev_private *internals)
+{
+ struct arp_hdr *arp;
+
+ struct client_data *hash_table = internals->mode6.client_table;
+ struct client_data *client_info;
+
+ uint8_t hash_index;
+
+ struct ether_addr bonding_mac;
+
+ arp = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
+
+ /*
+ * Traffic with src MAC other than bonding should be sent on
+ * current primary port.
+ */
+ rte_eth_macaddr_get(internals->port_id, &bonding_mac);
+ if (!is_same_ether_addr(&bonding_mac, &arp->arp_data.arp_sha)) {
+ rte_eth_macaddr_get(internals->current_primary_port,
+ &arp->arp_data.arp_sha);
+ return internals->current_primary_port;
+ }
+
+ hash_index = simple_hash((uint8_t *)&arp->arp_data.arp_tip,
+ sizeof(uint32_t));
+ client_info = &hash_table[hash_index];
+
+ rte_spinlock_lock(&internals->mode6.lock);
+ if (arp->arp_op == rte_cpu_to_be_16(ARP_OP_REPLY)) {
+ if (client_info->in_use) {
+ if (client_info->app_ip == arp->arp_data.arp_sip &&
+ client_info->cli_ip == arp->arp_data.arp_tip) {
+ /* Entry is already assigned to this client */
+ if (!is_broadcast_ether_addr(&arp->arp_data.arp_tha)) {
+ ether_addr_copy(&arp->arp_data.arp_tha,
+ &client_info->cli_mac);
+ }
+ rte_eth_macaddr_get(client_info->slave_idx, &client_info->app_mac);
+ ether_addr_copy(&client_info->app_mac, &arp->arp_data.arp_sha);
+ memcpy(client_info->vlan, eth_h + 1, offset);
+ client_info->vlan_count = offset / sizeof(struct vlan_hdr);
+ rte_spinlock_unlock(&internals->mode6.lock);
+ return client_info->slave_idx;
+ }
+ }
+
+ /* Assign new slave to this client and update src mac in ARP */
+ client_info->in_use = 1;
+ client_info->ntt = 0;
+ client_info->app_ip = arp->arp_data.arp_sip;
+ ether_addr_copy(&arp->arp_data.arp_tha, &client_info->cli_mac);
+ client_info->cli_ip = arp->arp_data.arp_tip;
+ client_info->slave_idx = calculate_slave(internals);
+ rte_eth_macaddr_get(client_info->slave_idx, &client_info->app_mac);
+ ether_addr_copy(&client_info->app_mac, &arp->arp_data.arp_sha);
+ memcpy(client_info->vlan, eth_h + 1, offset);
+ client_info->vlan_count = offset / sizeof(struct vlan_hdr);
+ rte_spinlock_unlock(&internals->mode6.lock);
+ return client_info->slave_idx;
+ }
+
+ /* If packet is not ARP Reply, send it on current primary port. */
+ rte_spinlock_unlock(&internals->mode6.lock);
+ rte_eth_macaddr_get(internals->current_primary_port,
+ &arp->arp_data.arp_sha);
+ return internals->current_primary_port;
+}
+
+uint8_t
+bond_mode_alb_arp_upd(struct client_data *client_info,
+ struct rte_mbuf *pkt, struct bond_dev_private *internals)
+{
+ struct ether_hdr *eth_h;
+ struct arp_hdr *arp_h;
+ uint8_t slave_idx;
+
+ rte_spinlock_lock(&internals->mode6.lock);
+ eth_h = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
+
+ ether_addr_copy(&client_info->app_mac, ð_h->s_addr);
+ ether_addr_copy(&client_info->cli_mac, ð_h->d_addr);
+ if (client_info->vlan_count > 0) {
+ eth_h->ether_type = rte_cpu_to_be_16(ETHER_TYPE_VLAN);
+ }
+ else {
+ eth_h->ether_type = rte_cpu_to_be_16(ETHER_TYPE_ARP);
+ }
+
+ arp_h = (struct arp_hdr *)((char *)eth_h + sizeof(struct ether_hdr)
+ + client_info->vlan_count * sizeof(struct vlan_hdr));
+
+ memcpy(eth_h + 1, client_info->vlan,
+ client_info->vlan_count * sizeof(struct vlan_hdr));
+
+ ether_addr_copy(&client_info->app_mac, &arp_h->arp_data.arp_sha);
+ arp_h->arp_data.arp_sip = client_info->app_ip;
+ ether_addr_copy(&client_info->cli_mac, &arp_h->arp_data.arp_tha);
+ arp_h->arp_data.arp_tip = client_info->cli_ip;
+
+ arp_h->arp_hrd = rte_cpu_to_be_16(ARP_HRD_ETHER);
+ arp_h->arp_pro = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
+ arp_h->arp_hln = ETHER_ADDR_LEN;
+ arp_h->arp_pln = sizeof(uint32_t);
+ arp_h->arp_op = rte_cpu_to_be_16(ARP_OP_REPLY);
+
+ slave_idx = client_info->slave_idx;
+ rte_spinlock_unlock(&internals->mode6.lock);
+
+ return slave_idx;
+}
+
+void
+bond_mode_alb_client_list_upd(struct rte_eth_dev *bond_dev)
+{
+ struct bond_dev_private *internals = bond_dev->data->dev_private;
+ struct client_data *client_info;
+
+ int i;
+
+ /* If active slave count is 0, it's pointless to refresh alb table */
+ if (internals->active_slave_count <= 0)
+ return;
+
+ rte_spinlock_lock(&internals->mode6.lock);
+ internals->mode6.last_slave = ALB_NULL_INDEX;
+
+ for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
+ client_info = &internals->mode6.client_table[i];
+ if (client_info->in_use) {
+ client_info->slave_idx = calculate_slave(internals);
+ rte_eth_macaddr_get(client_info->slave_idx, &client_info->app_mac);
+ internals->mode6.ntt = 1;
+ }
+ }
+ rte_spinlock_unlock(&internals->mode6.lock);
+}
new file mode 100644
@@ -0,0 +1,109 @@
+#ifndef RTE_ETH_BOND_ALB_H_
+#define RTE_ETH_BOND_ALB_H_
+
+#include <rte_ether.h>
+#include <rte_arp.h>
+
+#define ALB_HASH_TABLE_SIZE 256
+#define ALB_NULL_INDEX 0xFFFFFFFF
+
+struct client_data {
+ /** ARP data of single client */
+ struct ether_addr app_mac;
+ /**< MAC address of application running DPDK */
+ uint32_t app_ip;
+ /**< IP address of application running DPDK */
+ struct ether_addr cli_mac;
+ /**< Client MAC address */
+ uint32_t cli_ip;
+ /**< Client IP address */
+
+ uint8_t slave_idx;
+ /**< Index of slave on which we connect with that client */
+ uint8_t in_use;
+ /**< Flag indicating if entry in client table is currently used */
+ uint8_t ntt;
+ /**< Flag indicating if we need to send update to this client on next tx */
+
+ struct vlan_hdr vlan[2];
+ /**< Content of vlan headers */
+ uint8_t vlan_count;
+ /**< Number of nested vlan headers */
+};
+
+struct mode_alb_private {
+ struct client_data client_table[ALB_HASH_TABLE_SIZE];
+ /**< Hash table storing ARP data of every client connected */
+ struct rte_mempool *mempool;
+ /**< Mempool for creating ARP update packets */
+ uint8_t ntt;
+ /**< Flag indicating if we need to send update to any client on next tx */
+ uint32_t last_slave;
+ /**< Index of last used slave in client table */
+ rte_spinlock_t lock;
+};
+
+/**
+ * ALB mode initialization.
+ *
+ * @param bond_dev Pointer to bonding device.
+ *
+ * @return
+ * Error code - 0 on success.
+ */
+int
+bond_mode_alb_enable(struct rte_eth_dev *bond_dev);
+
+/**
+ * Function handles ARP packet reception. If received ARP request, it is
+ * forwarded to application without changes. If it is ARP reply, client table
+ * is updated.
+ *
+ * @param eth_h ETH header of received packet.
+ * @param offset Vlan header offset.
+ * @param internals Bonding data.
+ */
+void
+bond_mode_alb_arp_recv(struct ether_hdr *eth_h, uint16_t offset,
+ struct bond_dev_private *internals);
+
+/**
+ * Function handles ARP packet transmission. It also decides on which slave
+ * send that packet. If packet is ARP Request, it is send on primary slave.
+ * If it is ARP Reply, it is send on slave stored in client table for that
+ * connection. On Reply function also updates data in client table.
+ *
+ * @param eth_h ETH header of transmitted packet.
+ * @param offset Vlan header offset.
+ * @param internals Bonding data.
+ *
+ * @return
+ * Index of slave on which packet should be sent.
+ */
+uint8_t
+bond_mode_alb_arp_xmit(struct ether_hdr *eth_h, uint16_t offset,
+ struct bond_dev_private *internals);
+
+/**
+ * Function fills packet with ARP data from client_info.
+ *
+ * @param client_info Data of client to which packet is sent.
+ * @param pkt Pointer to packet which is sent.
+ * @param internals Bonding data.
+ *
+ * @return
+ * Index of slawe on which packet should be sent.
+ */
+uint8_t
+bond_mode_alb_arp_upd(struct client_data *client_info,
+ struct rte_mbuf *pkt, struct bond_dev_private *internals);
+
+/**
+ * Function updates slave indexes of active connections.
+ *
+ * @param bond_dev Pointer to bonded device struct.
+ */
+void
+bond_mode_alb_client_list_upd(struct rte_eth_dev *bond_dev);
+
+#endif /* RTE_ETH_BOND_ALB_H_ */
@@ -111,15 +111,27 @@ void
activate_slave(struct rte_eth_dev *eth_dev, uint8_t port_id)
{
struct bond_dev_private *internals = eth_dev->data->dev_private;
+ uint8_t active_count = internals->active_slave_count;
if (internals->mode == BONDING_MODE_8023AD)
bond_mode_8023ad_activate_slave(eth_dev, port_id);
+ if (internals->mode == BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING
+ || internals->mode == BONDING_MODE_ALB) {
+
+ internals->tlb_slaves_order[active_count] = port_id;
+ }
+
RTE_VERIFY(internals->active_slave_count <
(RTE_DIM(internals->active_slaves) - 1));
internals->active_slaves[internals->active_slave_count] = port_id;
internals->active_slave_count++;
+
+ if (internals->mode == BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING)
+ bond_tlb_activate_slave(internals);
+ if (internals->mode == BONDING_MODE_ALB)
+ bond_mode_alb_client_list_upd(eth_dev);
}
void
@@ -132,7 +144,9 @@ deactivate_slave(struct rte_eth_dev *eth_dev, uint8_t port_id)
if (internals->mode == BONDING_MODE_8023AD) {
bond_mode_8023ad_stop(eth_dev);
bond_mode_8023ad_deactivate_slave(eth_dev, port_id);
- }
+ } else if (internals->mode == BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING
+ || internals->mode == BONDING_MODE_ALB)
+ bond_tlb_disable(internals);
slave_pos = find_slave_by_id(internals->active_slaves, active_count,
port_id);
@@ -150,8 +164,16 @@ deactivate_slave(struct rte_eth_dev *eth_dev, uint8_t port_id)
RTE_VERIFY(active_count < RTE_DIM(internals->active_slaves));
internals->active_slave_count = active_count;
- if (eth_dev->data->dev_started && internals->mode == BONDING_MODE_8023AD)
- bond_mode_8023ad_start(eth_dev);
+ if (eth_dev->data->dev_started) {
+ if (internals->mode == BONDING_MODE_8023AD) {
+ bond_mode_8023ad_start(eth_dev);
+ } else if (internals->mode == BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING) {
+ bond_tlb_enable(internals);
+ } else if (internals->mode == BONDING_MODE_ALB) {
+ bond_tlb_enable(internals);
+ bond_mode_alb_client_list_upd(eth_dev);
+ }
+ }
}
uint8_t
@@ -175,6 +175,7 @@ bond_ethdev_parse_slave_mode_kvarg(const char *key __rte_unused,
#endif
case BONDING_MODE_8023AD:
case BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING:
+ case BONDING_MODE_ALB:
return 0;
default:
RTE_BOND_LOG(ERR, "Invalid slave mode value (%s) specified", value);
@@ -56,6 +56,26 @@
/* Table for statistics in mode 5 TLB */
static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
+static inline size_t
+get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
+{
+ size_t vlan_offset = 0;
+
+ if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
+ struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
+ vlan_offset = sizeof(struct vlan_hdr);
+ *proto = vlan_hdr->eth_proto;
+
+ if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
+ vlan_hdr = vlan_hdr + 1;
+
+ *proto = vlan_hdr->eth_proto;
+ vlan_offset += sizeof(struct vlan_hdr);
+ }
+ }
+ return vlan_offset;
+}
+
static uint16_t
bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
{
@@ -173,6 +193,34 @@ bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
}
static uint16_t
+bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+ struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
+ struct bond_dev_private *internals = bd_tx_q->dev_private;
+
+ struct ether_hdr *eth_h;
+
+ uint16_t ether_type, offset;
+ uint16_t nb_recv_pkts;
+
+ int i;
+
+ nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
+
+ for (i = 0; i < nb_recv_pkts; i++) {
+ eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
+ ether_type = eth_h->ether_type;
+ offset = get_vlan_offset(eth_h, ðer_type);
+
+ if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
+ bond_mode_alb_arp_recv(eth_h, offset, internals);
+ }
+ }
+
+ return nb_recv_pkts;
+}
+
+static uint16_t
bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
uint16_t nb_pkts)
{
@@ -281,26 +329,6 @@ ipv6_hash(struct ipv6_hdr *ipv6_hdr)
(word_src_addr[3] ^ word_dst_addr[3]);
}
-static inline size_t
-get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
-{
- size_t vlan_offset = 0;
-
- if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
- struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
- vlan_offset = sizeof(struct vlan_hdr);
- *proto = vlan_hdr->eth_proto;
-
- if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
- vlan_hdr = vlan_hdr + 1;
-
- *proto = vlan_hdr->eth_proto;
- vlan_offset += sizeof(struct vlan_hdr);
- }
- }
- return vlan_offset;
-}
-
uint16_t
xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count)
{
@@ -396,6 +424,15 @@ struct bwg_slave {
uint8_t slave;
};
+void
+bond_tlb_activate_slave(struct bond_dev_private *internals) {
+ int i;
+
+ for (i = 0; i < internals->active_slave_count; i++) {
+ tlb_last_obytets[internals->active_slaves[i]] = 0;
+ }
+}
+
static int
bandwidth_cmp(const void *a, const void *b)
{
@@ -426,7 +463,7 @@ bandwidth_left(int port_id, uint64_t load, uint8_t update_idx,
uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
if (link_bwg == 0)
return;
- link_bwg = (link_bwg * (update_idx+1) * REORDER_PERIOD_MS);
+ link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
}
@@ -457,8 +494,9 @@ bond_ethdev_update_tlb_slave_cb(void *arg)
internals->slave_update_idx, &bwg_array[i]);
bwg_array[i].slave = slave_id;
- if (update_stats)
+ if (update_stats) {
tlb_last_obytets[slave_id] = slave_stats.obytes;
+ }
}
if (update_stats == 1)
@@ -467,7 +505,7 @@ bond_ethdev_update_tlb_slave_cb(void *arg)
slave_count = i;
qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
for (i = 0; i < slave_count; i++)
- internals->active_slaves[i] = bwg_array[i].slave;
+ internals->tlb_slaves_order[i] = bwg_array[i].slave;
rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
(struct bond_dev_private *)internals);
@@ -494,8 +532,8 @@ bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
if (num_of_slaves < 1)
return num_tx_total;
- memcpy(slaves, internals->active_slaves,
- sizeof(internals->active_slaves[0]) * num_of_slaves);
+ memcpy(slaves, internals->tlb_slaves_order,
+ sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
@@ -506,9 +544,7 @@ bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
}
for (i = 0; i < num_of_slaves; i++) {
- ether_addr_copy(&internals->slaves[slaves[i]].persisted_mac_addr,
- &active_slave_addr);
-
+ rte_eth_macaddr_get(slaves[i], &active_slave_addr);
for (j = num_tx_total; j < nb_pkts; j++) {
if (j + 3 < nb_pkts)
rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
@@ -528,6 +564,147 @@ bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
return num_tx_total;
}
+void
+bond_tlb_disable(struct bond_dev_private *internals)
+{
+ rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
+}
+
+void
+bond_tlb_enable(struct bond_dev_private *internals)
+{
+ bond_ethdev_update_tlb_slave_cb(internals);
+}
+
+static uint16_t
+bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+ struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
+ struct bond_dev_private *internals = bd_tx_q->dev_private;
+
+ struct ether_hdr *eth_h;
+ uint16_t ether_type, offset;
+
+ struct client_data *client_info;
+
+ /*
+ * We create transmit buffers for every slave and one additional to send
+ * through tlb. In worst case every packet will be send on one port.
+ */
+ struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
+ uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
+
+ /*
+ * We create separate transmit buffers for update packets as they wont be
+ * counted in num_tx_total.
+ */
+ struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
+ uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
+
+ struct rte_mbuf *upd_pkt;
+ size_t pkt_size;
+
+ uint16_t num_send, num_not_send = 0;
+ uint16_t num_tx_total = 0;
+ uint8_t slave_idx;
+
+ int i, j;
+
+ /* Search tx buffer for ARP packets and forward them to alb */
+ for (i = 0; i < nb_pkts; i++) {
+ eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
+ ether_type = eth_h->ether_type;
+ offset = get_vlan_offset(eth_h, ðer_type);
+
+ if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
+ slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
+
+ /* Change src mac in eth header */
+ rte_eth_macaddr_get(slave_idx, ð_h->s_addr);
+
+ /* Add packet to slave tx buffer */
+ slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
+ slave_bufs_pkts[slave_idx]++;
+ } else {
+ /* If packet is not ARP, send it with TLB policy */
+ slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
+ bufs[i];
+ slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
+ }
+ }
+
+ /* Update connected client ARP tables */
+ if (internals->mode6.ntt) {
+ for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
+ client_info = &internals->mode6.client_table[i];
+
+ if (client_info->in_use) {
+ /* Allocate new packet to send ARP update on current slave */
+ upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
+ if (upd_pkt == NULL) {
+ RTE_LOG(ERR, PMD, "Failed to allocate ARP packet from pool\n");
+ continue;
+ }
+ pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
+ + client_info->vlan_count * sizeof(struct vlan_hdr);
+ upd_pkt->data_len = pkt_size;
+ upd_pkt->pkt_len = pkt_size;
+
+ slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
+ internals);
+
+ /* Add packet to update tx buffer */
+ update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
+ update_bufs_pkts[slave_idx]++;
+ }
+ }
+ internals->mode6.ntt = 0;
+ }
+
+ /* Send ARP packets on proper slaves */
+ for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
+ if (slave_bufs_pkts[i] > 0) {
+ num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
+ slave_bufs[i], slave_bufs_pkts[i]);
+ for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
+ bufs[nb_pkts - 1 - num_not_send - j] =
+ slave_bufs[i][nb_pkts - 1 - j];
+ }
+
+ num_tx_total += num_send;
+ num_not_send += slave_bufs_pkts[i] - num_send;
+ }
+ }
+
+ /* Send update packets on proper slaves */
+ for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
+ if (update_bufs_pkts[i] > 0) {
+ num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
+ update_bufs_pkts[i]);
+ for (j = num_send; j < update_bufs_pkts[i]; j++) {
+ rte_pktmbuf_free(update_bufs[i][j]);
+ }
+ }
+ }
+
+ /* Send non-ARP packets using tlb policy */
+ if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
+ num_send = bond_ethdev_tx_burst_tlb(queue,
+ slave_bufs[RTE_MAX_ETHPORTS],
+ slave_bufs_pkts[RTE_MAX_ETHPORTS]);
+
+ for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
+ bufs[nb_pkts - 1 - num_not_send - j] =
+ slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
+ }
+
+ num_tx_total += num_send;
+ num_not_send += slave_bufs_pkts[RTE_MAX_ETHPORTS] - num_send;
+ }
+
+ return num_tx_total;
+}
+
static uint16_t
bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
uint16_t nb_pkts)
@@ -856,6 +1033,7 @@ mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
break;
case BONDING_MODE_ACTIVE_BACKUP:
case BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING:
+ case BONDING_MODE_ALB:
default:
for (i = 0; i < internals->slave_count; i++) {
if (internals->slaves[i].port_id ==
@@ -921,6 +1099,13 @@ bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
break;
+ case BONDING_MODE_ALB:
+ if (bond_mode_alb_enable(eth_dev) != 0)
+ return -1;
+
+ eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
+ eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
+ break;
default:
return -1;
}
@@ -1136,8 +1321,9 @@ bond_ethdev_start(struct rte_eth_dev *eth_dev)
if (internals->mode == BONDING_MODE_8023AD)
bond_mode_8023ad_start(eth_dev);
- if (internals->mode == BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING)
- bond_ethdev_update_tlb_slave_cb(internals);
+ if (internals->mode == BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING ||
+ internals->mode == BONDING_MODE_ALB)
+ bond_tlb_enable(internals);
return 0;
}
@@ -1168,8 +1354,11 @@ bond_ethdev_stop(struct rte_eth_dev *eth_dev)
}
}
- if (internals->mode == BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING) {
- rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
+ if (internals->mode == BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING ||
+ internals->mode == BONDING_MODE_ALB) {
+ bond_tlb_disable(internals);
+ for (i = 0; i < internals->active_slave_count; i++)
+ tlb_last_obytets[internals->active_slaves[i]] = 0;
}
internals->active_slave_count = 0;
@@ -1366,8 +1555,12 @@ bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
{
struct bond_dev_private *internals = dev->data->dev_private;
struct rte_eth_stats slave_stats;
+
int i;
+ /* clear bonded stats before populating from slaves */
+ memset(stats, 0, sizeof(*stats));
+
for (i = 0; i < internals->slave_count; i++) {
rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
@@ -1422,6 +1615,7 @@ bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
/* Promiscuous mode is propagated only to primary slave */
case BONDING_MODE_ACTIVE_BACKUP:
case BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING:
+ case BONDING_MODE_ALB:
default:
rte_eth_promiscuous_enable(internals->current_primary_port);
}
@@ -1451,6 +1645,7 @@ bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
/* Promiscuous mode is propagated only to primary slave */
case BONDING_MODE_ACTIVE_BACKUP:
case BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING:
+ case BONDING_MODE_ALB:
default:
rte_eth_promiscuous_disable(internals->current_primary_port);
}
@@ -39,6 +39,7 @@
#include "rte_eth_bond.h"
#include "rte_eth_bond_8023ad_private.h"
+#include "rte_eth_bond_alb.h"
#define PMD_BOND_SLAVE_PORT_KVARG ("slave")
#define PMD_BOND_PRIMARY_SLAVE_KVARG ("primary")
@@ -148,6 +149,8 @@ struct bond_dev_private {
/**< Arary of bonded slaves details */
struct mode8023ad_private mode4;
+ uint8_t tlb_slaves_order[RTE_MAX_ETHPORTS]; /* TLB active slaves send order */
+ struct mode_alb_private mode6;
uint32_t rx_offload_capa; /** Rx offload capability */
uint32_t tx_offload_capa; /** Tx offload capability */
@@ -272,4 +275,13 @@ int
bond_ethdev_parse_time_ms_kvarg(const char *key __rte_unused,
const char *value, void *extra_args);
+void
+bond_tlb_disable(struct bond_dev_private *internals);
+
+void
+bond_tlb_enable(struct bond_dev_private *internals);
+
+void
+bond_tlb_activate_slave(struct bond_dev_private *internals);
+
#endif