[v2] net/af_packet: allow changing fanout mode
Checks
Commit Message
This allows us to control the algorithm used to spread traffic between
sockets, adding more fine grained control. If the user does not
specify a fanout mode, the PMD driver will default to
PACKET_FANOUT_HASH.
Signed-off-by: Tudor Cornea <tudor.cornea@gmail.com>
---
v2:
* Renamed the patch
* Replaced packet_fanout argument with fanout_mode, which allows
more fine grained control
---
doc/guides/nics/af_packet.rst | 4 +-
drivers/net/af_packet/rte_eth_af_packet.c | 92 ++++++++++++++++++++---
2 files changed, 83 insertions(+), 13 deletions(-)
Comments
On Mon, 6 Jan 2025 17:35:08 +0200
Tudor Cornea <tudor.cornea@gmail.com> wrote:
> This allows us to control the algorithm used to spread traffic between
> sockets, adding more fine grained control. If the user does not
> specify a fanout mode, the PMD driver will default to
> PACKET_FANOUT_HASH.
>
> Signed-off-by: Tudor Cornea <tudor.cornea@gmail.com>
>
> ---
> v2:
> * Renamed the patch
> * Replaced packet_fanout argument with fanout_mode, which allows
> more fine grained control
> ---
> doc/guides/nics/af_packet.rst | 4 +-
> drivers/net/af_packet/rte_eth_af_packet.c | 92 ++++++++++++++++++++---
> 2 files changed, 83 insertions(+), 13 deletions(-)
>
> diff --git a/doc/guides/nics/af_packet.rst b/doc/guides/nics/af_packet.rst
> index a343d3a961..3443f95004 100644
> --- a/doc/guides/nics/af_packet.rst
> +++ b/doc/guides/nics/af_packet.rst
> @@ -25,6 +25,8 @@ Some of these, in turn, will be used to configure the PACKET_MMAP settings.
> * ``qpairs`` - number of Rx and Tx queues (optional, default 1);
> * ``qdisc_bypass`` - set PACKET_QDISC_BYPASS option in AF_PACKET (optional,
> disabled by default);
> +* ``fanout_mode`` - set fanout algorithm. Possible choices: hash,lb,cpu,rollover,rnd,qm (optional,
> + default hash);
Use proper punctuation here, need space after comma.
A description of what the differences are or reference to af-packet man page would help.
https://www.man7.org/linux/man-pages/man7/packet.7.html
The user should not have to huting to find what "qm" means for example.
Should also address the earlier part in this document that is:
The PACKET_FANOUT_HASH behavior of AF_PACKET is used for frame reception.
> * ``blocksz`` - PACKET_MMAP block size (optional, default 4096);
> * ``framesz`` - PACKET_MMAP frame size (optional, default 2048B; Note: multiple
> of 16B);
> @@ -64,7 +66,7 @@ framecnt=512):
>
> .. code-block:: console
>
> - --vdev=eth_af_packet0,iface=tap0,blocksz=4096,framesz=2048,framecnt=512,qpairs=1,qdisc_bypass=0
> + --vdev=eth_af_packet0,iface=tap0,blocksz=4096,framesz=2048,framecnt=512,qpairs=1,qdisc_bypass=0,fanout_mode=hash
>
> Features and Limitations
> ------------------------
> diff --git a/drivers/net/af_packet/rte_eth_af_packet.c b/drivers/net/af_packet/rte_eth_af_packet.c
> index ceb8d9356a..8449975384 100644
> --- a/drivers/net/af_packet/rte_eth_af_packet.c
> +++ b/drivers/net/af_packet/rte_eth_af_packet.c
> @@ -36,6 +36,7 @@
> #define ETH_AF_PACKET_FRAMESIZE_ARG "framesz"
> #define ETH_AF_PACKET_FRAMECOUNT_ARG "framecnt"
> #define ETH_AF_PACKET_QDISC_BYPASS_ARG "qdisc_bypass"
> +#define ETH_AF_PACKET_FANOUT_MODE_ARG "fanout_mode"
>
> #define DFLT_FRAME_SIZE (1 << 11)
> #define DFLT_FRAME_COUNT (1 << 9)
> @@ -96,6 +97,7 @@ static const char *valid_arguments[] = {
> ETH_AF_PACKET_FRAMESIZE_ARG,
> ETH_AF_PACKET_FRAMECOUNT_ARG,
> ETH_AF_PACKET_QDISC_BYPASS_ARG,
> + ETH_AF_PACKET_FANOUT_MODE_ARG,
> NULL
> };
>
> @@ -700,6 +702,61 @@ open_packet_iface(const char *key __rte_unused,
> return 0;
> }
>
> +#if defined(PACKET_FANOUT)
Since PACKET_FANOUT has existed since Linux 3.1, the #ifdef
for this can be removed now.
> +#define PACKET_FANOUT_INVALID -1
> +
> +static int
> +get_fanout_group_id(int if_index)
> +{
> + return (getpid() ^ if_index) & 0xffff;
> +}
> +
> +static int
> +get_fanout_mode(const char *fanout_mode)
> +{
> + int mode = PACKET_FANOUT_FLAG_DEFRAG;
> +
> +#if defined(PACKET_FANOUT_FLAG_ROLLOVER)
No more #ifdefs please
> + mode |= PACKET_FANOUT_FLAG_ROLLOVER;
> +#endif
> +
> + if (!fanout_mode) {
> + /* Default */
> + mode |= PACKET_FANOUT_HASH;
> + } else if (!strcmp(fanout_mode, "hash")) {
> + mode |= PACKET_FANOUT_HASH;
> + } else if (!strcmp(fanout_mode, "lb")) {
> + mode |= PACKET_FANOUT_LB;
> + } else if (!strcmp(fanout_mode, "cpu")) {
> + mode |= PACKET_FANOUT_CPU;
> + } else if (!strcmp(fanout_mode, "rollover")) {
> + mode |= PACKET_FANOUT_ROLLOVER;
> + } else if (!strcmp(fanout_mode, "rnd")) {
> + mode |= PACKET_FANOUT_RND;
> + } else if (!strcmp(fanout_mode, "qm")) {
> + mode |= PACKET_FANOUT_QM;
> + } else {
> + /* Invalid Fanout Mode */
> + mode = PACKET_FANOUT_INVALID;
> + }
Maybe allow longer names like "load_balance" or "queue_map"
> +
> + return mode;
> +}
> +
> +static int
> +get_fanout(const char *fanout_mode, int if_index)
> +{
> + int group_id = get_fanout_group_id(if_index);
> + int mode = get_fanout_mode(fanout_mode);
> + int fanout = PACKET_FANOUT_INVALID;
> +
> + if (mode != PACKET_FANOUT_INVALID)
> + fanout = group_id | (mode << 16);
> +
> + return fanout;
> +}
This could be simplified as:
static int
get_fanout(const char *fanout_mode, int if_index)
{
int mode = get_fanout_mode(fanout_mode);
if (mode != PACKET_FANOUT_INVALID) {
return get_fanout_group_id(if_index) | (mode << 16);
else
return PACKOUT_FANOUT_INVALID;
}
> +#endif
> +
> static int
> rte_pmd_init_internals(struct rte_vdev_device *dev,
> const int sockfd,
> @@ -709,6 +766,7 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
> unsigned int framesize,
> unsigned int framecnt,
> unsigned int qdisc_bypass,
> + const char *fanout_mode,
> struct pmd_internals **internals,
> struct rte_eth_dev **eth_dev,
> struct rte_kvargs *kvlist)
> @@ -810,11 +868,12 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
> sockaddr.sll_ifindex = (*internals)->if_index;
>
> #if defined(PACKET_FANOUT)
> - fanout_arg = (getpid() ^ (*internals)->if_index) & 0xffff;
> - fanout_arg |= (PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_DEFRAG) << 16;
> -#if defined(PACKET_FANOUT_FLAG_ROLLOVER)
> - fanout_arg |= PACKET_FANOUT_FLAG_ROLLOVER << 16;
> -#endif
> + fanout_arg = get_fanout(fanout_mode, (*internals)->if_index);
> +
> + if (fanout_arg == PACKET_FANOUT_INVALID) {
Looks better without blank line between get_fanout() and the if() but
that is totally a matter of personal taste.
> + PMD_LOG(ERR, "Invalid fanout mode: %s", fanout_mode);
> + goto error;
> + }
@@ -25,6 +25,8 @@ Some of these, in turn, will be used to configure the PACKET_MMAP settings.
* ``qpairs`` - number of Rx and Tx queues (optional, default 1);
* ``qdisc_bypass`` - set PACKET_QDISC_BYPASS option in AF_PACKET (optional,
disabled by default);
+* ``fanout_mode`` - set fanout algorithm. Possible choices: hash,lb,cpu,rollover,rnd,qm (optional,
+ default hash);
* ``blocksz`` - PACKET_MMAP block size (optional, default 4096);
* ``framesz`` - PACKET_MMAP frame size (optional, default 2048B; Note: multiple
of 16B);
@@ -64,7 +66,7 @@ framecnt=512):
.. code-block:: console
- --vdev=eth_af_packet0,iface=tap0,blocksz=4096,framesz=2048,framecnt=512,qpairs=1,qdisc_bypass=0
+ --vdev=eth_af_packet0,iface=tap0,blocksz=4096,framesz=2048,framecnt=512,qpairs=1,qdisc_bypass=0,fanout_mode=hash
Features and Limitations
------------------------
@@ -36,6 +36,7 @@
#define ETH_AF_PACKET_FRAMESIZE_ARG "framesz"
#define ETH_AF_PACKET_FRAMECOUNT_ARG "framecnt"
#define ETH_AF_PACKET_QDISC_BYPASS_ARG "qdisc_bypass"
+#define ETH_AF_PACKET_FANOUT_MODE_ARG "fanout_mode"
#define DFLT_FRAME_SIZE (1 << 11)
#define DFLT_FRAME_COUNT (1 << 9)
@@ -96,6 +97,7 @@ static const char *valid_arguments[] = {
ETH_AF_PACKET_FRAMESIZE_ARG,
ETH_AF_PACKET_FRAMECOUNT_ARG,
ETH_AF_PACKET_QDISC_BYPASS_ARG,
+ ETH_AF_PACKET_FANOUT_MODE_ARG,
NULL
};
@@ -700,6 +702,61 @@ open_packet_iface(const char *key __rte_unused,
return 0;
}
+#if defined(PACKET_FANOUT)
+#define PACKET_FANOUT_INVALID -1
+
+static int
+get_fanout_group_id(int if_index)
+{
+ return (getpid() ^ if_index) & 0xffff;
+}
+
+static int
+get_fanout_mode(const char *fanout_mode)
+{
+ int mode = PACKET_FANOUT_FLAG_DEFRAG;
+
+#if defined(PACKET_FANOUT_FLAG_ROLLOVER)
+ mode |= PACKET_FANOUT_FLAG_ROLLOVER;
+#endif
+
+ if (!fanout_mode) {
+ /* Default */
+ mode |= PACKET_FANOUT_HASH;
+ } else if (!strcmp(fanout_mode, "hash")) {
+ mode |= PACKET_FANOUT_HASH;
+ } else if (!strcmp(fanout_mode, "lb")) {
+ mode |= PACKET_FANOUT_LB;
+ } else if (!strcmp(fanout_mode, "cpu")) {
+ mode |= PACKET_FANOUT_CPU;
+ } else if (!strcmp(fanout_mode, "rollover")) {
+ mode |= PACKET_FANOUT_ROLLOVER;
+ } else if (!strcmp(fanout_mode, "rnd")) {
+ mode |= PACKET_FANOUT_RND;
+ } else if (!strcmp(fanout_mode, "qm")) {
+ mode |= PACKET_FANOUT_QM;
+ } else {
+ /* Invalid Fanout Mode */
+ mode = PACKET_FANOUT_INVALID;
+ }
+
+ return mode;
+}
+
+static int
+get_fanout(const char *fanout_mode, int if_index)
+{
+ int group_id = get_fanout_group_id(if_index);
+ int mode = get_fanout_mode(fanout_mode);
+ int fanout = PACKET_FANOUT_INVALID;
+
+ if (mode != PACKET_FANOUT_INVALID)
+ fanout = group_id | (mode << 16);
+
+ return fanout;
+}
+#endif
+
static int
rte_pmd_init_internals(struct rte_vdev_device *dev,
const int sockfd,
@@ -709,6 +766,7 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
unsigned int framesize,
unsigned int framecnt,
unsigned int qdisc_bypass,
+ const char *fanout_mode,
struct pmd_internals **internals,
struct rte_eth_dev **eth_dev,
struct rte_kvargs *kvlist)
@@ -810,11 +868,12 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
sockaddr.sll_ifindex = (*internals)->if_index;
#if defined(PACKET_FANOUT)
- fanout_arg = (getpid() ^ (*internals)->if_index) & 0xffff;
- fanout_arg |= (PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_DEFRAG) << 16;
-#if defined(PACKET_FANOUT_FLAG_ROLLOVER)
- fanout_arg |= PACKET_FANOUT_FLAG_ROLLOVER << 16;
-#endif
+ fanout_arg = get_fanout(fanout_mode, (*internals)->if_index);
+
+ if (fanout_arg == PACKET_FANOUT_INVALID) {
+ PMD_LOG(ERR, "Invalid fanout mode: %s", fanout_mode);
+ goto error;
+ }
#endif
for (q = 0; q < nb_queues; q++) {
@@ -927,13 +986,16 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
}
#if defined(PACKET_FANOUT)
- rc = setsockopt(qsockfd, SOL_PACKET, PACKET_FANOUT,
- &fanout_arg, sizeof(fanout_arg));
- if (rc == -1) {
- PMD_LOG_ERRNO(ERR,
- "%s: could not set PACKET_FANOUT on AF_PACKET socket for %s",
- name, pair->value);
- goto error;
+ if (nb_queues > 1) {
+ rc = setsockopt(qsockfd, SOL_PACKET, PACKET_FANOUT,
+ &fanout_arg, sizeof(fanout_arg));
+ if (rc == -1) {
+ PMD_LOG_ERRNO(ERR,
+ "%s: could not set PACKET_FANOUT "
+ "on AF_PACKET socket for %s",
+ name, pair->value);
+ goto error;
+ }
}
#endif
}
@@ -1003,6 +1065,7 @@ rte_eth_from_packet(struct rte_vdev_device *dev,
unsigned int framecount = DFLT_FRAME_COUNT;
unsigned int qpairs = 1;
unsigned int qdisc_bypass = 1;
+ const char *fanout_mode = NULL;
/* do some parameter checking */
if (*sockfd < 0)
@@ -1065,6 +1128,10 @@ rte_eth_from_packet(struct rte_vdev_device *dev,
}
continue;
}
+ if (strstr(pair->key, ETH_AF_PACKET_FANOUT_MODE_ARG) != NULL) {
+ fanout_mode = pair->value;
+ continue;
+ }
}
if (framesize > blocksize) {
@@ -1091,6 +1158,7 @@ rte_eth_from_packet(struct rte_vdev_device *dev,
blocksize, blockcount,
framesize, framecount,
qdisc_bypass,
+ fanout_mode,
&internals, ð_dev,
kvlist) < 0)
return -1;