[v1] net/af_xdp: enable a sock path alongside use_cni
Checks
Commit Message
With the original 'use_cni' implementation, (using a
hardcoded socket rather than a configurable one),
if a single pod is requesting multiple net devices
and these devices are from different pools, then
the container attempts to mount all the netdev UDSes
in the pod as /tmp/afxdp.sock. Which means that at best
only 1 netdev will handshake correctly with the AF_XDP
DP. This patch addresses this by making the socket
parameter configurable alongside the 'use_cni' param.
Tested with the AF_XDP DP CNI PR 81.
Signed-off-by: Maryam Tahhan <mtahhan@redhat.com>
---
doc/guides/howto/af_xdp_cni.rst | 18 +++++++---
drivers/net/af_xdp/rte_eth_af_xdp.c | 56 +++++++++++++++++++----------
2 files changed, 51 insertions(+), 23 deletions(-)
Comments
Hi Maryam,
I have added some suggestion below.
Regrads,
Shibin
> -----Original Message-----
> From: Maryam Tahhan <mtahhan@redhat.com>
> Sent: Thursday, November 30, 2023 9:14 AM
> To: ferruh.yigit@amd.com; stephen@networkplumber.org;
> lihuisong@huawei.com; fengchengwen@huawei.com;
> liuyonglong@huawei.com
> Cc: dev@dpdk.org; Tahhan, Maryam <mtahhan@redhat.com>
> Subject: [v1] net/af_xdp: enable a sock path alongside use_cni
>
> With the original 'use_cni' implementation, (using a hardcoded socket rather
> than a configurable one), if a single pod is requesting multiple net devices
> and these devices are from different pools, then the container attempts to
> mount all the netdev UDSes in the pod as /tmp/afxdp.sock. Which means
> that at best only 1 netdev will handshake correctly with the AF_XDP DP. This
> patch addresses this by making the socket parameter configurable alongside
> the 'use_cni' param.
> Tested with the AF_XDP DP CNI PR 81.
>
> Signed-off-by: Maryam Tahhan <mtahhan@redhat.com>
> ---
> doc/guides/howto/af_xdp_cni.rst | 18 +++++++---
> drivers/net/af_xdp/rte_eth_af_xdp.c | 56 +++++++++++++++++++----------
> 2 files changed, 51 insertions(+), 23 deletions(-)
>
> diff --git a/doc/guides/howto/af_xdp_cni.rst
> b/doc/guides/howto/af_xdp_cni.rst index a1a6d5b99c..a2d90c665d 100644
> --- a/doc/guides/howto/af_xdp_cni.rst
> +++ b/doc/guides/howto/af_xdp_cni.rst
> @@ -38,9 +38,10 @@ The XSKMAP is a BPF map of AF_XDP sockets (XSK).
> The client can then proceed with creating an AF_XDP socket and inserting
> that socket into the XSKMAP pointed to by the descriptor.
>
> -The EAL vdev argument ``use_cni`` is used to indicate that the user wishes
> +The EAL vdev arguments ``use_cni`` and ``sock`` are used to indicate
> +that the user wishes
> to run the PMD in unprivileged mode and to receive the XSKMAP file
> descriptor from the CNI.
> +
> When this flag is set,
> the ``XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD`` libbpf flag should be
> used when creating the socket @@ -49,7 +50,7 @@ Instead the loading is
> handled by the CNI.
>
> .. note::
>
> - The Unix Domain Socket file path appear in the end user is
> "/tmp/afxdp.sock".
> + The Unix Domain Socket file path appears to the end user at
> "/tmp/afxdp_dp/<netdev>/afxdp.sock".
>
>
> Prerequisites
> @@ -224,7 +225,6 @@ Howto run dpdk-testpmd with CNI plugin:
> capabilities:
> add:
> - CAP_NET_RAW
> - - CAP_BPF
> resources:
> requests:
> hugepages-2Mi: 2Gi
> @@ -245,7 +245,17 @@ Howto run dpdk-testpmd with CNI plugin:
>
> kubectl exec -i <Pod name> --container <containers name> -- \
> /<Path>/dpdk-testpmd -l 0,1 --no-pci \
> - --vdev=net_af_xdp0,use_cni=1,iface=<interface name> \
> + --vdev=net_af_xdp0,use_cni=1,iface=<interface
> name>,sock=/tmp/afxdp_dp/<interface name>/afxdp.sock \
> + -- --no-mlockall --in-memory
> +
> +for multiple devices use:
> +
> + .. code-block:: console
> +
> + kubectl exec -i <Pod name> --container <containers name> -- \
> + /<Path>/dpdk-testpmd -l 0-2 --no-pci \
> + --vdev=net_af_xdp0,use_cni=1,iface=<interface
> name>,sock=/tmp/afxdp_dp/<interface name>/afxdp.sock \
> + --vdev=net_af_xdp1,use_cni=1,iface=<interface
> + name>,sock=/tmp/afxdp_dp/<interface name>/afxdp.sock \
> -- --no-mlockall --in-memory
>
> For further reference please use the `e2e`_ test case in `AF_XDP Plugin for
> Kubernetes`_ diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c
> b/drivers/net/af_xdp/rte_eth_af_xdp.c
> index 353c8688ec..f728dae2f9 100644
> --- a/drivers/net/af_xdp/rte_eth_af_xdp.c
> +++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
> @@ -88,7 +88,6 @@ RTE_LOG_REGISTER_DEFAULT(af_xdp_logtype,
> NOTICE);
> #define UDS_MAX_CMD_LEN 64
> #define UDS_MAX_CMD_RESP 128
> #define UDS_XSK_MAP_FD_MSG "/xsk_map_fd"
> -#define UDS_SOCK "/tmp/afxdp.sock"
> #define UDS_CONNECT_MSG "/connect"
> #define UDS_HOST_OK_MSG "/host_ok"
> #define UDS_HOST_NAK_MSG "/host_nak"
> @@ -171,6 +170,7 @@ struct pmd_internals {
> bool custom_prog_configured;
> bool force_copy;
> bool use_cni;
> + char sock_path[PATH_MAX];
I would recommend using variable name as "uds_path".
> struct bpf_map *map;
>
> struct rte_ether_addr eth_addr;
> @@ -191,6 +191,7 @@ struct pmd_process_private {
> #define ETH_AF_XDP_BUDGET_ARG "busy_budget"
> #define ETH_AF_XDP_FORCE_COPY_ARG "force_copy"
> #define ETH_AF_XDP_USE_CNI_ARG "use_cni"
> +#define ETH_AF_XDP_SOCK_ARG "sock"
To make it clear would recommend using "sock_path" and also ETH_AF_XDP_CNI_UDS_PATH_ARG or ETH_AF_XDP_SOCK_PATH_ARG.
>
> static const char * const valid_arguments[] = {
> ETH_AF_XDP_IFACE_ARG,
> @@ -201,6 +202,7 @@ static const char * const valid_arguments[] = {
> ETH_AF_XDP_BUDGET_ARG,
> ETH_AF_XDP_FORCE_COPY_ARG,
> ETH_AF_XDP_USE_CNI_ARG,
> + ETH_AF_XDP_SOCK_ARG,
> NULL
> };
>
> @@ -1351,7 +1353,7 @@ configure_preferred_busy_poll(struct
> pkt_rx_queue *rxq) }
>
> static int
> -init_uds_sock(struct sockaddr_un *server)
> +init_uds_sock(struct sockaddr_un *server, const char *sock_path)
> {
> int sock;
>
> @@ -1362,7 +1364,7 @@ init_uds_sock(struct sockaddr_un *server)
> }
>
> server->sun_family = AF_UNIX;
> - strlcpy(server->sun_path, UDS_SOCK, sizeof(server->sun_path));
> + strlcpy(server->sun_path, sock_path, sizeof(server->sun_path));
>
> if (connect(sock, (struct sockaddr *)server, sizeof(struct
> sockaddr_un)) < 0) {
> close(sock);
> @@ -1382,7 +1384,7 @@ struct msg_internal { };
>
> static int
> -send_msg(int sock, char *request, int *fd)
> +send_msg(int sock, char *request, int *fd, const char *sock_path)
> {
> int snd;
> struct iovec iov;
> @@ -1393,7 +1395,7 @@ send_msg(int sock, char *request, int *fd)
>
> memset(&dst, 0, sizeof(dst));
> dst.sun_family = AF_UNIX;
> - strlcpy(dst.sun_path, UDS_SOCK, sizeof(dst.sun_path));
> + strlcpy(dst.sun_path, sock_path, sizeof(dst.sun_path));
>
> /* Initialize message header structure */
> memset(&msgh, 0, sizeof(msgh));
> @@ -1471,7 +1473,7 @@ read_msg(int sock, char *response, struct
> sockaddr_un *s, int *fd)
>
> static int
> make_request_cni(int sock, struct sockaddr_un *server, char *request,
> - int *req_fd, char *response, int *out_fd)
> + int *req_fd, char *response, int *out_fd, const char
> *sock_path)
> {
> int rval;
>
> @@ -1483,7 +1485,7 @@ make_request_cni(int sock, struct sockaddr_un
> *server, char *request,
> if (req_fd == NULL)
> rval = write(sock, request, strlen(request));
> else
> - rval = send_msg(sock, request, req_fd);
> + rval = send_msg(sock, request, req_fd, sock_path);
>
> if (rval < 0) {
> AF_XDP_LOG(ERR, "Write error %s\n", strerror(errno)); @@
> -1507,7 +1509,7 @@ check_response(char *response, char *exp_resp, long
> size) }
>
> static int
> -get_cni_fd(char *if_name)
> +get_cni_fd(char *if_name, const char *sock_path)
> {
> char request[UDS_MAX_CMD_LEN],
> response[UDS_MAX_CMD_RESP];
> char hostname[MAX_LONG_OPT_SZ],
> exp_resp[UDS_MAX_CMD_RESP]; @@ -1520,14 +1522,14 @@
> get_cni_fd(char *if_name)
> return -1;
>
> memset(&server, 0, sizeof(server));
> - sock = init_uds_sock(&server);
> + sock = init_uds_sock(&server, sock_path);
> if (sock < 0)
> return -1;
>
> /* Initiates handshake to CNI send: /connect,hostname */
> snprintf(request, sizeof(request), "%s,%s", UDS_CONNECT_MSG,
> hostname);
> memset(response, 0, sizeof(response));
> - if (make_request_cni(sock, &server, request, NULL, response,
> &out_fd) < 0) {
> + if (make_request_cni(sock, &server, request, NULL, response,
> &out_fd,
> +sock_path) < 0) {
Why do we need to pass "sock_path" here as we have already connected the sock with sock_path in init_uds_sock()?
> AF_XDP_LOG(ERR, "Error in processing cmd [%s]\n",
> request);
> goto err_close;
> }
> @@ -1541,7 +1543,7 @@ get_cni_fd(char *if_name)
> /* Request for "/version" */
> strlcpy(request, UDS_VERSION_MSG, UDS_MAX_CMD_LEN);
> memset(response, 0, sizeof(response));
> - if (make_request_cni(sock, &server, request, NULL, response,
> &out_fd) < 0) {
> + if (make_request_cni(sock, &server, request, NULL, response,
> &out_fd,
> +sock_path) < 0) {
Same question as above.
> AF_XDP_LOG(ERR, "Error in processing cmd [%s]\n",
> request);
> goto err_close;
> }
> @@ -1549,7 +1551,7 @@ get_cni_fd(char *if_name)
> /* Request for file descriptor for netdev name*/
> snprintf(request, sizeof(request), "%s,%s",
> UDS_XSK_MAP_FD_MSG, if_name);
> memset(response, 0, sizeof(response));
> - if (make_request_cni(sock, &server, request, NULL, response,
> &out_fd) < 0) {
> + if (make_request_cni(sock, &server, request, NULL, response,
> &out_fd,
> +sock_path) < 0) {
Same question as above.
> AF_XDP_LOG(ERR, "Error in processing cmd [%s]\n",
> request);
> goto err_close;
> }
> @@ -1571,7 +1573,7 @@ get_cni_fd(char *if_name)
> /* Initiate close connection */
> strlcpy(request, UDS_FIN_MSG, UDS_MAX_CMD_LEN);
> memset(response, 0, sizeof(response));
> - if (make_request_cni(sock, &server, request, NULL, response,
> &out_fd) < 0) {
> + if (make_request_cni(sock, &server, request, NULL, response,
> &out_fd,
> +sock_path) < 0) {
Same question as above.
> AF_XDP_LOG(ERR, "Error in processing cmd [%s]\n",
> request);
> goto err_close;
> }
> @@ -1698,7 +1700,7 @@ xsk_configure(struct pmd_internals *internals,
> struct pkt_rx_queue *rxq,
> int err, fd, map_fd;
>
> /* get socket fd from CNI plugin */
> - map_fd = get_cni_fd(internals->if_name);
> + map_fd = get_cni_fd(internals->if_name, internals-
> >sock_path);
> if (map_fd < 0) {
> AF_XDP_LOG(ERR, "Failed to receive CNI plugin
> fd\n");
> goto out_xsk;
> @@ -2023,7 +2025,8 @@ xdp_get_channels_info(const char *if_name, int
> *max_queues, static int parse_parameters(struct rte_kvargs *kvlist, char
> *if_name, int *start_queue,
> int *queue_cnt, int *shared_umem, char *prog_path,
> - int *busy_budget, int *force_copy, int *use_cni)
> + int *busy_budget, int *force_copy, int *use_cni,
> + char *sock_path)
> {
> int ret;
>
> @@ -2069,6 +2072,11 @@ parse_parameters(struct rte_kvargs *kvlist, char
> *if_name, int *start_queue,
> if (ret < 0)
> goto free_kvlist;
>
> + ret = rte_kvargs_process(kvlist, ETH_AF_XDP_SOCK_ARG,
> + &parse_prog_arg, sock_path);
Parse_prog_arg does 2 things copy the sock_arg value and also check the access to the socket.
Checking access here has a chance of causing raise condition so I would recommend to skip this check here as this will be taken care in the init_uds_sock().
> + if (ret < 0)
> + goto free_kvlist;
> +
> free_kvlist:
> rte_kvargs_free(kvlist);
> return ret;
> @@ -2108,7 +2116,7 @@ static struct rte_eth_dev * init_internals(struct
> rte_vdev_device *dev, const char *if_name,
> int start_queue_idx, int queue_cnt, int shared_umem,
> const char *prog_path, int busy_budget, int force_copy,
> - int use_cni)
> + int use_cni, const char *sock_path)
> {
> const char *name = rte_vdev_device_name(dev);
> const unsigned int numa_node = dev->device.numa_node; @@ -
> 2138,6 +2146,7 @@ init_internals(struct rte_vdev_device *dev, const char
> *if_name,
> internals->shared_umem = shared_umem;
> internals->force_copy = force_copy;
> internals->use_cni = use_cni;
> + strlcpy(internals->sock_path, sock_path, PATH_MAX);
>
> if (xdp_get_channels_info(if_name, &internals->max_queue_cnt,
> &internals->combined_queue_cnt)) { @@ -
> 2328,6 +2337,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
> int busy_budget = -1, ret;
> int force_copy = 0;
> int use_cni = 0;
> + char sock_path[PATH_MAX] = {'\0'};
> struct rte_eth_dev *eth_dev = NULL;
> const char *name = rte_vdev_device_name(dev);
>
> @@ -2370,7 +2380,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device
> *dev)
>
> if (parse_parameters(kvlist, if_name, &xsk_start_queue_idx,
> &xsk_queue_cnt, &shared_umem, prog_path,
> - &busy_budget, &force_copy, &use_cni) < 0) {
> + &busy_budget, &force_copy, &use_cni,
> sock_path) < 0) {
> AF_XDP_LOG(ERR, "Invalid kvargs value\n");
> return -EINVAL;
> }
> @@ -2387,6 +2397,13 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device
> *dev)
> return -EINVAL;
> }
>
> + if (use_cni && !strnlen(sock_path, PATH_MAX)) {
> + AF_XDP_LOG(ERR, "When '%s' parameter is used, '%s' must
> also be provided\n",
> + ETH_AF_XDP_USE_CNI_ARG,
> ETH_AF_XDP_SOCK_ARG);
> + return -EINVAL;
> + }
> +
> +
> if (strlen(if_name) == 0) {
> AF_XDP_LOG(ERR, "Network interface must be
> specified\n");
> return -EINVAL;
> @@ -2410,7 +2427,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device
> *dev)
>
> eth_dev = init_internals(dev, if_name, xsk_start_queue_idx,
> xsk_queue_cnt, shared_umem, prog_path,
> - busy_budget, force_copy, use_cni);
> + busy_budget, force_copy, use_cni,
> sock_path);
> if (eth_dev == NULL) {
> AF_XDP_LOG(ERR, "Failed to init internals\n");
> return -1;
> @@ -2471,4 +2488,5 @@
> RTE_PMD_REGISTER_PARAM_STRING(net_af_xdp,
> "xdp_prog=<string> "
> "busy_budget=<int> "
> "force_copy=<int> "
> - "use_cni=<int> ");
> + "use_cni=<int> "
> + "sock=<string> ");
> --
> 2.41.0
Hi Shibin
Thanks for the review. Comments inline.
BR
Maryam
On 30/11/2023 12:14, Koikkara Reeny, Shibin wrote:
> Hi Maryam,
>
> I have added some suggestion below.
>
> Regrads,
> Shibin
[snip]
> NOTICE);
> #define UDS_MAX_CMD_LEN 64
> #define UDS_MAX_CMD_RESP 128
> #define UDS_XSK_MAP_FD_MSG "/xsk_map_fd"
> -#define UDS_SOCK "/tmp/afxdp.sock"
> #define UDS_CONNECT_MSG "/connect"
> #define UDS_HOST_OK_MSG "/host_ok"
> #define UDS_HOST_NAK_MSG "/host_nak"
> @@ -171,6 +170,7 @@ struct pmd_internals {
> bool custom_prog_configured;
> bool force_copy;
> bool use_cni;
> + char sock_path[PATH_MAX];
> I would recommend using variable name as "uds_path".
No issues in renaming
>
>> struct bpf_map *map;
>>
>> struct rte_ether_addr eth_addr;
>> @@ -191,6 +191,7 @@ struct pmd_process_private {
>> #define ETH_AF_XDP_BUDGET_ARG "busy_budget"
>> #define ETH_AF_XDP_FORCE_COPY_ARG "force_copy"
>> #define ETH_AF_XDP_USE_CNI_ARG "use_cni"
>> +#define ETH_AF_XDP_SOCK_ARG "sock"
> To make it clear would recommend using "sock_path" and also ETH_AF_XDP_CNI_UDS_PATH_ARG or ETH_AF_XDP_SOCK_PATH_ARG
No issues in renaming
> .
>
>> static const char * const valid_arguments[] = {
>> ETH_AF_XDP_IFACE_ARG,
>> @@ -201,6 +202,7 @@ static const char * const valid_arguments[] = {
>> ETH_AF_XDP_BUDGET_ARG,
>> ETH_AF_XDP_FORCE_COPY_ARG,
>> ETH_AF_XDP_USE_CNI_ARG,
>> + ETH_AF_XDP_SOCK_ARG,
>> NULL
>> };
>>
>> @@ -1351,7 +1353,7 @@ configure_preferred_busy_poll(struct
>> pkt_rx_queue *rxq) }
>>
>> static int
>> -init_uds_sock(struct sockaddr_un *server)
>> +init_uds_sock(struct sockaddr_un *server, const char *sock_path)
>> {
>> int sock;
>>
>> @@ -1362,7 +1364,7 @@ init_uds_sock(struct sockaddr_un *server)
>> }
>>
>> server->sun_family = AF_UNIX;
>> - strlcpy(server->sun_path, UDS_SOCK, sizeof(server->sun_path));
>> + strlcpy(server->sun_path, sock_path, sizeof(server->sun_path));
>>
>> if (connect(sock, (struct sockaddr *)server, sizeof(struct
>> sockaddr_un)) < 0) {
>> close(sock);
>> @@ -1382,7 +1384,7 @@ struct msg_internal { };
>>
>> static int
>> -send_msg(int sock, char *request, int *fd)
>> +send_msg(int sock, char *request, int *fd, const char *sock_path)
>> {
>> int snd;
>> struct iovec iov;
>> @@ -1393,7 +1395,7 @@ send_msg(int sock, char *request, int *fd)
>>
>> memset(&dst, 0, sizeof(dst));
>> dst.sun_family = AF_UNIX;
>> - strlcpy(dst.sun_path, UDS_SOCK, sizeof(dst.sun_path));
>> + strlcpy(dst.sun_path, sock_path, sizeof(dst.sun_path));
>>
>> /* Initialize message header structure */
>> memset(&msgh, 0, sizeof(msgh));
>> @@ -1471,7 +1473,7 @@ read_msg(int sock, char *response, struct
>> sockaddr_un *s, int *fd)
>>
>> static int
>> make_request_cni(int sock, struct sockaddr_un *server, char *request,
>> - int *req_fd, char *response, int *out_fd)
>> + int *req_fd, char *response, int *out_fd, const char
>> *sock_path)
>> {
>> int rval;
>>
>> @@ -1483,7 +1485,7 @@ make_request_cni(int sock, struct sockaddr_un
>> *server, char *request,
>> if (req_fd == NULL)
>> rval = write(sock, request, strlen(request));
>> else
>> - rval = send_msg(sock, request, req_fd);
>> + rval = send_msg(sock, request, req_fd, sock_path);
>>
>> if (rval < 0) {
>> AF_XDP_LOG(ERR, "Write error %s\n", strerror(errno)); @@
>> -1507,7 +1509,7 @@ check_response(char *response, char *exp_resp, long
>> size) }
>>
>> static int
>> -get_cni_fd(char *if_name)
>> +get_cni_fd(char *if_name, const char *sock_path)
>> {
>> char request[UDS_MAX_CMD_LEN],
>> response[UDS_MAX_CMD_RESP];
>> char hostname[MAX_LONG_OPT_SZ],
>> exp_resp[UDS_MAX_CMD_RESP]; @@ -1520,14 +1522,14 @@
>> get_cni_fd(char *if_name)
>> return -1;
>>
>> memset(&server, 0, sizeof(server));
>> - sock = init_uds_sock(&server);
>> + sock = init_uds_sock(&server, sock_path);
>> if (sock < 0)
>> return -1;
>>
>> /* Initiates handshake to CNI send: /connect,hostname */
>> snprintf(request, sizeof(request), "%s,%s", UDS_CONNECT_MSG,
>> hostname);
>> memset(response, 0, sizeof(response));
>> - if (make_request_cni(sock, &server, request, NULL, response,
>> &out_fd) < 0) {
>> + if (make_request_cni(sock, &server, request, NULL, response,
>> &out_fd,
>> +sock_path) < 0) {
> Why do we need to pass "sock_path" here as we have already connected the sock with sock_path in init_uds_sock()?
it's used in the send_msg() function insidemake_request_cni(). In the
original implementation of send_msg used a #defined value which is why
it wasn't needed before. so as far as I can tell send_msg still needs
that path. but I'm open to suggestions.
>
>> AF_XDP_LOG(ERR, "Error in processing cmd [%s]\n",
>> request);
>> goto err_close;
>> }
>> @@ -1541,7 +1543,7 @@ get_cni_fd(char *if_name)
>> /* Request for "/version" */
>> strlcpy(request, UDS_VERSION_MSG, UDS_MAX_CMD_LEN);
>> memset(response, 0, sizeof(response));
>> - if (make_request_cni(sock, &server, request, NULL, response,
>> &out_fd) < 0) {
>> + if (make_request_cni(sock, &server, request, NULL, response,
>> &out_fd,
>> +sock_path) < 0) {
> Same question as above.
Please see previous ans
>
>> AF_XDP_LOG(ERR, "Error in processing cmd [%s]\n",
>> request);
>> goto err_close;
>> }
>> @@ -1549,7 +1551,7 @@ get_cni_fd(char *if_name)
>> /* Request for file descriptor for netdev name*/
>> snprintf(request, sizeof(request), "%s,%s",
>> UDS_XSK_MAP_FD_MSG, if_name);
>> memset(response, 0, sizeof(response));
>> - if (make_request_cni(sock, &server, request, NULL, response,
>> &out_fd) < 0) {
>> + if (make_request_cni(sock, &server, request, NULL, response,
>> &out_fd,
>> +sock_path) < 0) {
> Same question as above.
Please see previous ans
>
>> AF_XDP_LOG(ERR, "Error in processing cmd [%s]\n",
>> request);
>> goto err_close;
>> }
>> @@ -1571,7 +1573,7 @@ get_cni_fd(char *if_name)
>> /* Initiate close connection */
>> strlcpy(request, UDS_FIN_MSG, UDS_MAX_CMD_LEN);
>> memset(response, 0, sizeof(response));
>> - if (make_request_cni(sock, &server, request, NULL, response,
>> &out_fd) < 0) {
>> + if (make_request_cni(sock, &server, request, NULL, response,
>> &out_fd,
>> +sock_path) < 0) {
> Same question as above.
Please see previous ans
>
>> AF_XDP_LOG(ERR, "Error in processing cmd [%s]\n",
>> request);
>> goto err_close;
>> }
>> @@ -1698,7 +1700,7 @@ xsk_configure(struct pmd_internals *internals,
>> struct pkt_rx_queue *rxq,
>> int err, fd, map_fd;
>>
>> /* get socket fd from CNI plugin */
>> - map_fd = get_cni_fd(internals->if_name);
>> + map_fd = get_cni_fd(internals->if_name, internals-
>>> sock_path);
>> if (map_fd < 0) {
>> AF_XDP_LOG(ERR, "Failed to receive CNI plugin
>> fd\n");
>> goto out_xsk;
>> @@ -2023,7 +2025,8 @@ xdp_get_channels_info(const char *if_name, int
>> *max_queues, static int parse_parameters(struct rte_kvargs *kvlist, char
>> *if_name, int *start_queue,
>> int *queue_cnt, int *shared_umem, char *prog_path,
>> - int *busy_budget, int *force_copy, int *use_cni)
>> + int *busy_budget, int *force_copy, int *use_cni,
>> + char *sock_path)
>> {
>> int ret;
>>
>> @@ -2069,6 +2072,11 @@ parse_parameters(struct rte_kvargs *kvlist, char
>> *if_name, int *start_queue,
>> if (ret < 0)
>> goto free_kvlist;
>>
>> + ret = rte_kvargs_process(kvlist, ETH_AF_XDP_SOCK_ARG,
>> + &parse_prog_arg, sock_path);
> Parse_prog_arg does 2 things copy the sock_arg value and also check the access to the socket.
> Checking access here has a chance of causing raise condition so I would recommend to skip this check here as this will be taken care in the init_uds_sock().
If there's an issue here processing is abandoned. Is it not better to
catch any issue early on? I'm not sure I understand the race condition?
>
>> + if (ret < 0)
>> + goto free_kvlist;
>> +
>> free_kvlist:
>> rte_kvargs_free(kvlist);
>> return ret;
>> @@ -2108,7 +2116,7 @@ static struct rte_eth_dev * init_internals(struct
>> rte_vdev_device *dev, const char *if_name,
>> int start_queue_idx, int queue_cnt, int shared_umem,
>> const char *prog_path, int busy_budget, int force_copy,
>> - int use_cni)
>> + int use_cni, const char *sock_path)
>> {
>> const char *name = rte_vdev_device_name(dev);
>> const unsigned int numa_node = dev->device.numa_node; @@ -
>> 2138,6 +2146,7 @@ init_internals(struct rte_vdev_device *dev, const char
>> *if_name,
>> internals->shared_umem = shared_umem;
>> internals->force_copy = force_copy;
>> internals->use_cni = use_cni;
>> + strlcpy(internals->sock_path, sock_path, PATH_MAX);
>>
>> if (xdp_get_channels_info(if_name, &internals->max_queue_cnt,
>> &internals->combined_queue_cnt)) { @@ -
>> 2328,6 +2337,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
>> int busy_budget = -1, ret;
>> int force_copy = 0;
>> int use_cni = 0;
>> + char sock_path[PATH_MAX] = {'\0'};
>> struct rte_eth_dev *eth_dev = NULL;
>> const char *name = rte_vdev_device_name(dev);
>>
>> @@ -2370,7 +2380,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device
>> *dev)
>>
>> if (parse_parameters(kvlist, if_name, &xsk_start_queue_idx,
>> &xsk_queue_cnt, &shared_umem, prog_path,
>> - &busy_budget, &force_copy, &use_cni) < 0) {
>> + &busy_budget, &force_copy, &use_cni,
>> sock_path) < 0) {
>> AF_XDP_LOG(ERR, "Invalid kvargs value\n");
>> return -EINVAL;
>> }
>> @@ -2387,6 +2397,13 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device
>> *dev)
>> return -EINVAL;
>> }
>>
>> + if (use_cni && !strnlen(sock_path, PATH_MAX)) {
>> + AF_XDP_LOG(ERR, "When '%s' parameter is used, '%s' must
>> also be provided\n",
>> + ETH_AF_XDP_USE_CNI_ARG,
>> ETH_AF_XDP_SOCK_ARG);
>> + return -EINVAL;
>> + }
>> +
>> +
>> if (strlen(if_name) == 0) {
>> AF_XDP_LOG(ERR, "Network interface must be
>> specified\n");
>> return -EINVAL;
>> @@ -2410,7 +2427,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device
>> *dev)
>>
>> eth_dev = init_internals(dev, if_name, xsk_start_queue_idx,
>> xsk_queue_cnt, shared_umem, prog_path,
>> - busy_budget, force_copy, use_cni);
>> + busy_budget, force_copy, use_cni,
>> sock_path);
>> if (eth_dev == NULL) {
>> AF_XDP_LOG(ERR, "Failed to init internals\n");
>> return -1;
>> @@ -2471,4 +2488,5 @@
>> RTE_PMD_REGISTER_PARAM_STRING(net_af_xdp,
>> "xdp_prog=<string> "
>> "busy_budget=<int> "
>> "force_copy=<int> "
>> - "use_cni=<int> ");
>> + "use_cni=<int> "
>> + "sock=<string> ");
>> --
>> 2.41.0
Hi Maryam,
I have replied inline.
Regards,
Shibin
From: Maryam Tahhan <mtahhan@redhat.com>
Sent: Thursday, November 30, 2023 12:33 PM
To: Koikkara Reeny, Shibin <shibin.koikkara.reeny@intel.com>; ferruh.yigit@amd.com; stephen@networkplumber.org; lihuisong@huawei.com; fengchengwen@huawei.com; liuyonglong@huawei.com
Cc: dev@dpdk.org
Subject: Re: [v1] net/af_xdp: enable a sock path alongside use_cni
Hi Shibin
Thanks for the review. Comments inline.
BR
Maryam
On 30/11/2023 12:14, Koikkara Reeny, Shibin wrote:
Hi Maryam,
I have added some suggestion below.
Regrads,
Shibin
[snip]
NOTICE);
#define UDS_MAX_CMD_LEN 64
#define UDS_MAX_CMD_RESP 128
#define UDS_XSK_MAP_FD_MSG "/xsk_map_fd"
-#define UDS_SOCK "/tmp/afxdp.sock"
#define UDS_CONNECT_MSG "/connect"
#define UDS_HOST_OK_MSG "/host_ok"
#define UDS_HOST_NAK_MSG "/host_nak"
@@ -171,6 +170,7 @@ struct pmd_internals {
bool custom_prog_configured;
bool force_copy;
bool use_cni;
+ char sock_path[PATH_MAX];
I would recommend using variable name as "uds_path".
No issues in renaming
struct bpf_map *map;
struct rte_ether_addr eth_addr;
@@ -191,6 +191,7 @@ struct pmd_process_private {
#define ETH_AF_XDP_BUDGET_ARG "busy_budget"
#define ETH_AF_XDP_FORCE_COPY_ARG "force_copy"
#define ETH_AF_XDP_USE_CNI_ARG "use_cni"
+#define ETH_AF_XDP_SOCK_ARG "sock"
To make it clear would recommend using "sock_path" and also ETH_AF_XDP_CNI_UDS_PATH_ARG or ETH_AF_XDP_SOCK_PATH_ARG
No issues in renaming
.
static const char * const valid_arguments[] = {
ETH_AF_XDP_IFACE_ARG,
@@ -201,6 +202,7 @@ static const char * const valid_arguments[] = {
ETH_AF_XDP_BUDGET_ARG,
ETH_AF_XDP_FORCE_COPY_ARG,
ETH_AF_XDP_USE_CNI_ARG,
+ ETH_AF_XDP_SOCK_ARG,
NULL
};
@@ -1351,7 +1353,7 @@ configure_preferred_busy_poll(struct
pkt_rx_queue *rxq) }
static int
-init_uds_sock(struct sockaddr_un *server)
+init_uds_sock(struct sockaddr_un *server, const char *sock_path)
{
int sock;
@@ -1362,7 +1364,7 @@ init_uds_sock(struct sockaddr_un *server)
}
server->sun_family = AF_UNIX;
- strlcpy(server->sun_path, UDS_SOCK, sizeof(server->sun_path));
+ strlcpy(server->sun_path, sock_path, sizeof(server->sun_path));
if (connect(sock, (struct sockaddr *)server, sizeof(struct
sockaddr_un)) < 0) {
close(sock);
@@ -1382,7 +1384,7 @@ struct msg_internal { };
static int
-send_msg(int sock, char *request, int *fd)
+send_msg(int sock, char *request, int *fd, const char *sock_path)
{
int snd;
struct iovec iov;
@@ -1393,7 +1395,7 @@ send_msg(int sock, char *request, int *fd)
memset(&dst, 0, sizeof(dst));
dst.sun_family = AF_UNIX;
- strlcpy(dst.sun_path, UDS_SOCK, sizeof(dst.sun_path));
+ strlcpy(dst.sun_path, sock_path, sizeof(dst.sun_path));
/* Initialize message header structure */
memset(&msgh, 0, sizeof(msgh));
@@ -1471,7 +1473,7 @@ read_msg(int sock, char *response, struct
sockaddr_un *s, int *fd)
static int
make_request_cni(int sock, struct sockaddr_un *server, char *request,
- int *req_fd, char *response, int *out_fd)
+ int *req_fd, char *response, int *out_fd, const char
*sock_path)
{
int rval;
@@ -1483,7 +1485,7 @@ make_request_cni(int sock, struct sockaddr_un
*server, char *request,
if (req_fd == NULL)
rval = write(sock, request, strlen(request));
else
- rval = send_msg(sock, request, req_fd);
+ rval = send_msg(sock, request, req_fd, sock_path);
if (rval < 0) {
AF_XDP_LOG(ERR, "Write error %s\n", strerror(errno)); @@
-1507,7 +1509,7 @@ check_response(char *response, char *exp_resp, long
size) }
static int
-get_cni_fd(char *if_name)
+get_cni_fd(char *if_name, const char *sock_path)
{
char request[UDS_MAX_CMD_LEN],
response[UDS_MAX_CMD_RESP];
char hostname[MAX_LONG_OPT_SZ],
exp_resp[UDS_MAX_CMD_RESP]; @@ -1520,14 +1522,14 @@
get_cni_fd(char *if_name)
return -1;
memset(&server, 0, sizeof(server));
- sock = init_uds_sock(&server);
+ sock = init_uds_sock(&server, sock_path);
if (sock < 0)
return -1;
/* Initiates handshake to CNI send: /connect,hostname */
snprintf(request, sizeof(request), "%s,%s", UDS_CONNECT_MSG,
hostname);
memset(response, 0, sizeof(response));
- if (make_request_cni(sock, &server, request, NULL, response,
&out_fd) < 0) {
+ if (make_request_cni(sock, &server, request, NULL, response,
&out_fd,
+sock_path) < 0) {
Why do we need to pass "sock_path" here as we have already connected the sock with sock_path in init_uds_sock()?
it's used in the send_msg() function inside make_request_cni(). In the original implementation of send_msg used a #defined value which is why it wasn't needed before. so as far as I can tell send_msg still needs that path. but I'm open to suggestions.
Sorry my mistake. You will need to pass the argument.
AF_XDP_LOG(ERR, "Error in processing cmd [%s]\n",
request);
goto err_close;
}
@@ -1541,7 +1543,7 @@ get_cni_fd(char *if_name)
/* Request for "/version" */
strlcpy(request, UDS_VERSION_MSG, UDS_MAX_CMD_LEN);
memset(response, 0, sizeof(response));
- if (make_request_cni(sock, &server, request, NULL, response,
&out_fd) < 0) {
+ if (make_request_cni(sock, &server, request, NULL, response,
&out_fd,
+sock_path) < 0) {
Same question as above.
Please see previous ans
AF_XDP_LOG(ERR, "Error in processing cmd [%s]\n",
request);
goto err_close;
}
@@ -1549,7 +1551,7 @@ get_cni_fd(char *if_name)
/* Request for file descriptor for netdev name*/
snprintf(request, sizeof(request), "%s,%s",
UDS_XSK_MAP_FD_MSG, if_name);
memset(response, 0, sizeof(response));
- if (make_request_cni(sock, &server, request, NULL, response,
&out_fd) < 0) {
+ if (make_request_cni(sock, &server, request, NULL, response,
&out_fd,
+sock_path) < 0) {
Same question as above.
Please see previous ans
AF_XDP_LOG(ERR, "Error in processing cmd [%s]\n",
request);
goto err_close;
}
@@ -1571,7 +1573,7 @@ get_cni_fd(char *if_name)
/* Initiate close connection */
strlcpy(request, UDS_FIN_MSG, UDS_MAX_CMD_LEN);
memset(response, 0, sizeof(response));
- if (make_request_cni(sock, &server, request, NULL, response,
&out_fd) < 0) {
+ if (make_request_cni(sock, &server, request, NULL, response,
&out_fd,
+sock_path) < 0) {
Same question as above.
Please see previous ans
AF_XDP_LOG(ERR, "Error in processing cmd [%s]\n",
request);
goto err_close;
}
@@ -1698,7 +1700,7 @@ xsk_configure(struct pmd_internals *internals,
struct pkt_rx_queue *rxq,
int err, fd, map_fd;
/* get socket fd from CNI plugin */
- map_fd = get_cni_fd(internals->if_name);
+ map_fd = get_cni_fd(internals->if_name, internals-
sock_path);
if (map_fd < 0) {
AF_XDP_LOG(ERR, "Failed to receive CNI plugin
fd\n");
goto out_xsk;
@@ -2023,7 +2025,8 @@ xdp_get_channels_info(const char *if_name, int
*max_queues, static int parse_parameters(struct rte_kvargs *kvlist, char
*if_name, int *start_queue,
int *queue_cnt, int *shared_umem, char *prog_path,
- int *busy_budget, int *force_copy, int *use_cni)
+ int *busy_budget, int *force_copy, int *use_cni,
+ char *sock_path)
{
int ret;
@@ -2069,6 +2072,11 @@ parse_parameters(struct rte_kvargs *kvlist, char
*if_name, int *start_queue,
if (ret < 0)
goto free_kvlist;
+ ret = rte_kvargs_process(kvlist, ETH_AF_XDP_SOCK_ARG,
+ &parse_prog_arg, sock_path);
Parse_prog_arg does 2 things copy the sock_arg value and also check the access to the socket.
Checking access here has a chance of causing raise condition so I would recommend to skip this check here as this will be taken care in the init_uds_sock().
If there's an issue here processing is abandoned. Is it not better to catch any issue early on? I'm not sure I understand the race condition?
Race condition if af_xdp application pod start by a bit of out of sync before the USD server thread start. So we can wait till the initialization process. It is a suggestion.
+ if (ret < 0)
+ goto free_kvlist;
+
free_kvlist:
rte_kvargs_free(kvlist);
return ret;
@@ -2108,7 +2116,7 @@ static struct rte_eth_dev * init_internals(struct
rte_vdev_device *dev, const char *if_name,
int start_queue_idx, int queue_cnt, int shared_umem,
const char *prog_path, int busy_budget, int force_copy,
- int use_cni)
+ int use_cni, const char *sock_path)
{
const char *name = rte_vdev_device_name(dev);
const unsigned int numa_node = dev->device.numa_node; @@ -
2138,6 +2146,7 @@ init_internals(struct rte_vdev_device *dev, const char
*if_name,
internals->shared_umem = shared_umem;
internals->force_copy = force_copy;
internals->use_cni = use_cni;
+ strlcpy(internals->sock_path, sock_path, PATH_MAX);
if (xdp_get_channels_info(if_name, &internals->max_queue_cnt,
&internals->combined_queue_cnt)) { @@ -
2328,6 +2337,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
int busy_budget = -1, ret;
int force_copy = 0;
int use_cni = 0;
+ char sock_path[PATH_MAX] = {'\0'};
struct rte_eth_dev *eth_dev = NULL;
const char *name = rte_vdev_device_name(dev);
@@ -2370,7 +2380,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device
*dev)
if (parse_parameters(kvlist, if_name, &xsk_start_queue_idx,
&xsk_queue_cnt, &shared_umem, prog_path,
- &busy_budget, &force_copy, &use_cni) < 0) {
+ &busy_budget, &force_copy, &use_cni,
sock_path) < 0) {
AF_XDP_LOG(ERR, "Invalid kvargs value\n");
return -EINVAL;
}
@@ -2387,6 +2397,13 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device
*dev)
return -EINVAL;
}
+ if (use_cni && !strnlen(sock_path, PATH_MAX)) {
+ AF_XDP_LOG(ERR, "When '%s' parameter is used, '%s' must
also be provided\n",
+ ETH_AF_XDP_USE_CNI_ARG,
ETH_AF_XDP_SOCK_ARG);
+ return -EINVAL;
+ }
+
+
if (strlen(if_name) == 0) {
AF_XDP_LOG(ERR, "Network interface must be
specified\n");
return -EINVAL;
@@ -2410,7 +2427,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device
*dev)
eth_dev = init_internals(dev, if_name, xsk_start_queue_idx,
xsk_queue_cnt, shared_umem, prog_path,
- busy_budget, force_copy, use_cni);
+ busy_budget, force_copy, use_cni,
sock_path);
if (eth_dev == NULL) {
AF_XDP_LOG(ERR, "Failed to init internals\n");
return -1;
@@ -2471,4 +2488,5 @@
RTE_PMD_REGISTER_PARAM_STRING(net_af_xdp,
"xdp_prog=<string> "
"busy_budget=<int> "
"force_copy=<int> "
- "use_cni=<int> ");
+ "use_cni=<int> "
+ "sock=<string> ");
--
2.41.0
Hi Maryam,
I have one more question.
Regards,
Shibin
> -----Original Message-----
> From: Koikkara Reeny, Shibin <shibin.koikkara.reeny@intel.com>
> Sent: Thursday, November 30, 2023 12:14 PM
> To: Tahhan, Maryam <mtahhan@redhat.com>; ferruh.yigit@amd.com;
> stephen@networkplumber.org; lihuisong@huawei.com;
> fengchengwen@huawei.com; liuyonglong@huawei.com
> Cc: dev@dpdk.org; Tahhan, Maryam <mtahhan@redhat.com>
> Subject: RE: [v1] net/af_xdp: enable a sock path alongside use_cni
>
> Hi Maryam,
>
> I have added some suggestion below.
>
> Regrads,
> Shibin
>
> > -----Original Message-----
> > From: Maryam Tahhan <mtahhan@redhat.com>
> > Sent: Thursday, November 30, 2023 9:14 AM
> > To: ferruh.yigit@amd.com; stephen@networkplumber.org;
> > lihuisong@huawei.com; fengchengwen@huawei.com;
> liuyonglong@huawei.com
> > Cc: dev@dpdk.org; Tahhan, Maryam <mtahhan@redhat.com>
> > Subject: [v1] net/af_xdp: enable a sock path alongside use_cni
> >
> > With the original 'use_cni' implementation, (using a hardcoded socket
> > rather than a configurable one), if a single pod is requesting
> > multiple net devices and these devices are from different pools, then
> > the container attempts to mount all the netdev UDSes in the pod as
> > /tmp/afxdp.sock. Which means that at best only 1 netdev will handshake
> > correctly with the AF_XDP DP. This patch addresses this by making the
> > socket parameter configurable alongside the 'use_cni' param.
> > Tested with the AF_XDP DP CNI PR 81.
> >
> > Signed-off-by: Maryam Tahhan <mtahhan@redhat.com>
> > ---
> > doc/guides/howto/af_xdp_cni.rst | 18 +++++++---
> > drivers/net/af_xdp/rte_eth_af_xdp.c | 56
> > +++++++++++++++++++----------
> > 2 files changed, 51 insertions(+), 23 deletions(-)
> >
> > diff --git a/doc/guides/howto/af_xdp_cni.rst
> > b/doc/guides/howto/af_xdp_cni.rst index a1a6d5b99c..a2d90c665d 100644
> > --- a/doc/guides/howto/af_xdp_cni.rst
> > +++ b/doc/guides/howto/af_xdp_cni.rst
> > @@ -38,9 +38,10 @@ The XSKMAP is a BPF map of AF_XDP sockets (XSK).
> > The client can then proceed with creating an AF_XDP socket and
> > inserting that socket into the XSKMAP pointed to by the descriptor.
> >
> > -The EAL vdev argument ``use_cni`` is used to indicate that the user
> > wishes
> > +The EAL vdev arguments ``use_cni`` and ``sock`` are used to indicate
> > +that the user wishes
> > to run the PMD in unprivileged mode and to receive the XSKMAP file
> > descriptor from the CNI.
> > +
> > When this flag is set,
> > the ``XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD`` libbpf flag should be
> > used when creating the socket @@ -49,7 +50,7 @@ Instead the loading is
> > handled by the CNI.
> >
> > .. note::
> >
> > - The Unix Domain Socket file path appear in the end user is
> > "/tmp/afxdp.sock".
> > + The Unix Domain Socket file path appears to the end user at
> > "/tmp/afxdp_dp/<netdev>/afxdp.sock".
> >
> >
> > Prerequisites
> > @@ -224,7 +225,6 @@ Howto run dpdk-testpmd with CNI plugin:
> > capabilities:
> > add:
> > - CAP_NET_RAW
> > - - CAP_BPF
Why the CAP_BPF is removed?
> > resources:
> > requests:
> > hugepages-2Mi: 2Gi
> > @@ -245,7 +245,17 @@ Howto run dpdk-testpmd with CNI plugin:
> >
> > kubectl exec -i <Pod name> --container <containers name> -- \
> > /<Path>/dpdk-testpmd -l 0,1 --no-pci \
> > - --vdev=net_af_xdp0,use_cni=1,iface=<interface name> \
> > + --vdev=net_af_xdp0,use_cni=1,iface=<interface
> > name>,sock=/tmp/afxdp_dp/<interface name>/afxdp.sock \
> > + -- --no-mlockall --in-memory
> > +
> > +for multiple devices use:
> > +
> > + .. code-block:: console
> > +
> > + kubectl exec -i <Pod name> --container <containers name> -- \
> > + /<Path>/dpdk-testpmd -l 0-2 --no-pci \
> > + --vdev=net_af_xdp0,use_cni=1,iface=<interface
> > name>,sock=/tmp/afxdp_dp/<interface name>/afxdp.sock \
> > + --vdev=net_af_xdp1,use_cni=1,iface=<interface
> > + name>,sock=/tmp/afxdp_dp/<interface name>/afxdp.sock \
> > -- --no-mlockall --in-memory
> >
> > For further reference please use the `e2e`_ test case in `AF_XDP
> > Plugin for Kubernetes`_ diff --git
> > a/drivers/net/af_xdp/rte_eth_af_xdp.c
> > b/drivers/net/af_xdp/rte_eth_af_xdp.c
> > index 353c8688ec..f728dae2f9 100644
> > --- a/drivers/net/af_xdp/rte_eth_af_xdp.c
> > +++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
> > @@ -88,7 +88,6 @@ RTE_LOG_REGISTER_DEFAULT(af_xdp_logtype,
> > NOTICE);
> > #define UDS_MAX_CMD_LEN 64
> > #define UDS_MAX_CMD_RESP 128
> > #define UDS_XSK_MAP_FD_MSG "/xsk_map_fd"
> > -#define UDS_SOCK "/tmp/afxdp.sock"
> > #define UDS_CONNECT_MSG "/connect"
> > #define UDS_HOST_OK_MSG "/host_ok"
> > #define UDS_HOST_NAK_MSG "/host_nak"
> > @@ -171,6 +170,7 @@ struct pmd_internals {
> > bool custom_prog_configured;
> > bool force_copy;
> > bool use_cni;
> > + char sock_path[PATH_MAX];
> I would recommend using variable name as "uds_path".
>
> > struct bpf_map *map;
> >
> > struct rte_ether_addr eth_addr;
> > @@ -191,6 +191,7 @@ struct pmd_process_private {
> > #define ETH_AF_XDP_BUDGET_ARG
> "busy_budget"
> > #define ETH_AF_XDP_FORCE_COPY_ARG "force_copy"
> > #define ETH_AF_XDP_USE_CNI_ARG "use_cni"
> > +#define ETH_AF_XDP_SOCK_ARG "sock"
> To make it clear would recommend using "sock_path" and also
> ETH_AF_XDP_CNI_UDS_PATH_ARG or ETH_AF_XDP_SOCK_PATH_ARG.
>
> >
> > static const char * const valid_arguments[] = {
> > ETH_AF_XDP_IFACE_ARG,
> > @@ -201,6 +202,7 @@ static const char * const valid_arguments[] = {
> > ETH_AF_XDP_BUDGET_ARG,
> > ETH_AF_XDP_FORCE_COPY_ARG,
> > ETH_AF_XDP_USE_CNI_ARG,
> > + ETH_AF_XDP_SOCK_ARG,
> > NULL
> > };
> >
> > @@ -1351,7 +1353,7 @@ configure_preferred_busy_poll(struct
> > pkt_rx_queue *rxq) }
> >
> > static int
> > -init_uds_sock(struct sockaddr_un *server)
> > +init_uds_sock(struct sockaddr_un *server, const char *sock_path)
> > {
> > int sock;
> >
> > @@ -1362,7 +1364,7 @@ init_uds_sock(struct sockaddr_un *server)
> > }
> >
> > server->sun_family = AF_UNIX;
> > - strlcpy(server->sun_path, UDS_SOCK, sizeof(server->sun_path));
> > + strlcpy(server->sun_path, sock_path, sizeof(server->sun_path));
> >
> > if (connect(sock, (struct sockaddr *)server, sizeof(struct
> > sockaddr_un)) < 0) {
> > close(sock);
> > @@ -1382,7 +1384,7 @@ struct msg_internal { };
> >
> > static int
> > -send_msg(int sock, char *request, int *fd)
> > +send_msg(int sock, char *request, int *fd, const char *sock_path)
> > {
> > int snd;
> > struct iovec iov;
> > @@ -1393,7 +1395,7 @@ send_msg(int sock, char *request, int *fd)
> >
> > memset(&dst, 0, sizeof(dst));
> > dst.sun_family = AF_UNIX;
> > - strlcpy(dst.sun_path, UDS_SOCK, sizeof(dst.sun_path));
> > + strlcpy(dst.sun_path, sock_path, sizeof(dst.sun_path));
> >
> > /* Initialize message header structure */
> > memset(&msgh, 0, sizeof(msgh));
> > @@ -1471,7 +1473,7 @@ read_msg(int sock, char *response, struct
> > sockaddr_un *s, int *fd)
> >
> > static int
> > make_request_cni(int sock, struct sockaddr_un *server, char *request,
> > - int *req_fd, char *response, int *out_fd)
> > + int *req_fd, char *response, int *out_fd, const char
> > *sock_path)
> > {
> > int rval;
> >
> > @@ -1483,7 +1485,7 @@ make_request_cni(int sock, struct sockaddr_un
> > *server, char *request,
> > if (req_fd == NULL)
> > rval = write(sock, request, strlen(request));
> > else
> > - rval = send_msg(sock, request, req_fd);
> > + rval = send_msg(sock, request, req_fd, sock_path);
> >
> > if (rval < 0) {
> > AF_XDP_LOG(ERR, "Write error %s\n", strerror(errno)); @@
> > -1507,7 +1509,7 @@ check_response(char *response, char *exp_resp,
> long
> > size) }
> >
> > static int
> > -get_cni_fd(char *if_name)
> > +get_cni_fd(char *if_name, const char *sock_path)
> > {
> > char request[UDS_MAX_CMD_LEN],
> > response[UDS_MAX_CMD_RESP];
> > char hostname[MAX_LONG_OPT_SZ],
> > exp_resp[UDS_MAX_CMD_RESP]; @@ -1520,14 +1522,14 @@
> get_cni_fd(char
> > *if_name)
> > return -1;
> >
> > memset(&server, 0, sizeof(server));
> > - sock = init_uds_sock(&server);
> > + sock = init_uds_sock(&server, sock_path);
> > if (sock < 0)
> > return -1;
> >
> > /* Initiates handshake to CNI send: /connect,hostname */
> > snprintf(request, sizeof(request), "%s,%s", UDS_CONNECT_MSG,
> > hostname);
> > memset(response, 0, sizeof(response));
> > - if (make_request_cni(sock, &server, request, NULL, response,
> > &out_fd) < 0) {
> > + if (make_request_cni(sock, &server, request, NULL, response,
> > &out_fd,
> > +sock_path) < 0) {
> Why do we need to pass "sock_path" here as we have already connected
> the sock with sock_path in init_uds_sock()?
>
> > AF_XDP_LOG(ERR, "Error in processing cmd [%s]\n",
> request);
> > goto err_close;
> > }
> > @@ -1541,7 +1543,7 @@ get_cni_fd(char *if_name)
> > /* Request for "/version" */
> > strlcpy(request, UDS_VERSION_MSG, UDS_MAX_CMD_LEN);
> > memset(response, 0, sizeof(response));
> > - if (make_request_cni(sock, &server, request, NULL, response,
> > &out_fd) < 0) {
> > + if (make_request_cni(sock, &server, request, NULL, response,
> > &out_fd,
> > +sock_path) < 0) {
> Same question as above.
>
> > AF_XDP_LOG(ERR, "Error in processing cmd [%s]\n",
> request);
> > goto err_close;
> > }
> > @@ -1549,7 +1551,7 @@ get_cni_fd(char *if_name)
> > /* Request for file descriptor for netdev name*/
> > snprintf(request, sizeof(request), "%s,%s",
> UDS_XSK_MAP_FD_MSG,
> > if_name);
> > memset(response, 0, sizeof(response));
> > - if (make_request_cni(sock, &server, request, NULL, response,
> > &out_fd) < 0) {
> > + if (make_request_cni(sock, &server, request, NULL, response,
> > &out_fd,
> > +sock_path) < 0) {
>
> Same question as above.
>
> > AF_XDP_LOG(ERR, "Error in processing cmd [%s]\n",
> request);
> > goto err_close;
> > }
> > @@ -1571,7 +1573,7 @@ get_cni_fd(char *if_name)
> > /* Initiate close connection */
> > strlcpy(request, UDS_FIN_MSG, UDS_MAX_CMD_LEN);
> > memset(response, 0, sizeof(response));
> > - if (make_request_cni(sock, &server, request, NULL, response,
> > &out_fd) < 0) {
> > + if (make_request_cni(sock, &server, request, NULL, response,
> > &out_fd,
> > +sock_path) < 0) {
>
> Same question as above.
>
> > AF_XDP_LOG(ERR, "Error in processing cmd [%s]\n",
> request);
> > goto err_close;
> > }
> > @@ -1698,7 +1700,7 @@ xsk_configure(struct pmd_internals *internals,
> > struct pkt_rx_queue *rxq,
> > int err, fd, map_fd;
> >
> > /* get socket fd from CNI plugin */
> > - map_fd = get_cni_fd(internals->if_name);
> > + map_fd = get_cni_fd(internals->if_name, internals-
> > >sock_path);
> > if (map_fd < 0) {
> > AF_XDP_LOG(ERR, "Failed to receive CNI plugin
> fd\n");
> > goto out_xsk;
> > @@ -2023,7 +2025,8 @@ xdp_get_channels_info(const char *if_name, int
> > *max_queues, static int parse_parameters(struct rte_kvargs *kvlist,
> > char *if_name, int *start_queue,
> > int *queue_cnt, int *shared_umem, char *prog_path,
> > - int *busy_budget, int *force_copy, int *use_cni)
> > + int *busy_budget, int *force_copy, int *use_cni,
> > + char *sock_path)
> > {
> > int ret;
> >
> > @@ -2069,6 +2072,11 @@ parse_parameters(struct rte_kvargs *kvlist,
> > char *if_name, int *start_queue,
> > if (ret < 0)
> > goto free_kvlist;
> >
> > + ret = rte_kvargs_process(kvlist, ETH_AF_XDP_SOCK_ARG,
> > + &parse_prog_arg, sock_path);
>
> Parse_prog_arg does 2 things copy the sock_arg value and also check the
> access to the socket.
> Checking access here has a chance of causing raise condition so I would
> recommend to skip this check here as this will be taken care in the
> init_uds_sock().
>
> > + if (ret < 0)
> > + goto free_kvlist;
> > +
> > free_kvlist:
> > rte_kvargs_free(kvlist);
> > return ret;
> > @@ -2108,7 +2116,7 @@ static struct rte_eth_dev *
> > init_internals(struct rte_vdev_device *dev, const char *if_name,
> > int start_queue_idx, int queue_cnt, int shared_umem,
> > const char *prog_path, int busy_budget, int force_copy,
> > - int use_cni)
> > + int use_cni, const char *sock_path)
> > {
> > const char *name = rte_vdev_device_name(dev);
> > const unsigned int numa_node = dev->device.numa_node; @@ -
> > 2138,6 +2146,7 @@ init_internals(struct rte_vdev_device *dev, const
> > char *if_name,
> > internals->shared_umem = shared_umem;
> > internals->force_copy = force_copy;
> > internals->use_cni = use_cni;
> > + strlcpy(internals->sock_path, sock_path, PATH_MAX);
> >
> > if (xdp_get_channels_info(if_name, &internals->max_queue_cnt,
> > &internals->combined_queue_cnt)) { @@ -
> > 2328,6 +2337,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
> > int busy_budget = -1, ret;
> > int force_copy = 0;
> > int use_cni = 0;
> > + char sock_path[PATH_MAX] = {'\0'};
> > struct rte_eth_dev *eth_dev = NULL;
> > const char *name = rte_vdev_device_name(dev);
> >
> > @@ -2370,7 +2380,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device
> > *dev)
> >
> > if (parse_parameters(kvlist, if_name, &xsk_start_queue_idx,
> > &xsk_queue_cnt, &shared_umem, prog_path,
> > - &busy_budget, &force_copy, &use_cni) < 0) {
> > + &busy_budget, &force_copy, &use_cni,
> > sock_path) < 0) {
> > AF_XDP_LOG(ERR, "Invalid kvargs value\n");
> > return -EINVAL;
> > }
> > @@ -2387,6 +2397,13 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device
> > *dev)
> > return -EINVAL;
> > }
> >
> > + if (use_cni && !strnlen(sock_path, PATH_MAX)) {
> > + AF_XDP_LOG(ERR, "When '%s' parameter is used, '%s' must
> > also be provided\n",
> > + ETH_AF_XDP_USE_CNI_ARG,
> > ETH_AF_XDP_SOCK_ARG);
> > + return -EINVAL;
> > + }
> > +
> > +
> > if (strlen(if_name) == 0) {
> > AF_XDP_LOG(ERR, "Network interface must be
> specified\n");
> > return -EINVAL;
> > @@ -2410,7 +2427,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device
> > *dev)
> >
> > eth_dev = init_internals(dev, if_name, xsk_start_queue_idx,
> > xsk_queue_cnt, shared_umem, prog_path,
> > - busy_budget, force_copy, use_cni);
> > + busy_budget, force_copy, use_cni,
> > sock_path);
> > if (eth_dev == NULL) {
> > AF_XDP_LOG(ERR, "Failed to init internals\n");
> > return -1;
> > @@ -2471,4 +2488,5 @@
> > RTE_PMD_REGISTER_PARAM_STRING(net_af_xdp,
> > "xdp_prog=<string> "
> > "busy_budget=<int> "
> > "force_copy=<int> "
> > - "use_cni=<int> ");
> > + "use_cni=<int> "
> > + "sock=<string> ");
> > --
> > 2.41.0
Hi Shibin
No problem.
Answer below.
BR
Maryam
On 30/11/2023 13:56, Koikkara Reeny, Shibin wrote:
> Hi Maryam,
>
> I have one more question.
>
> Regards,
> Shibin
>
>> -----Original Message-----
>> From: Koikkara Reeny, Shibin<shibin.koikkara.reeny@intel.com>
>> Sent: Thursday, November 30, 2023 12:14 PM
>> To: Tahhan, Maryam<mtahhan@redhat.com>;ferruh.yigit@amd.com;
>> stephen@networkplumber.org;lihuisong@huawei.com;
>> fengchengwen@huawei.com;liuyonglong@huawei.com
>> Cc:dev@dpdk.org; Tahhan, Maryam<mtahhan@redhat.com>
>> Subject: RE: [v1] net/af_xdp: enable a sock path alongside use_cni
>>
>> Hi Maryam,
>>
>> I have added some suggestion below.
>>
>> Regrads,
>> Shibin
[snip]
>>> Prerequisites
>>> @@ -224,7 +225,6 @@ Howto run dpdk-testpmd with CNI plugin:
>>> capabilities:
>>> add:
>>> - CAP_NET_RAW
>>> - - CAP_BPF
> Why the CAP_BPF is removed?
Good question. It's removed because in our case CAP_BPF is only needed
when we want to load or unload the XDP program on the interface inside
the Pod. In our case the CNI is loading the xdp program on the interface
and then we are doing a handshake to get the xskmap file descriptor and
so we don't need the CAP_BPF.
You will find a detailed listing of the permissions used at different
stages when utilizing an XDP prog in this article
https://next.redhat.com/2023/07/18/using-ebpf-in-unprivileged-pods/
I'm currently also working on enabling pinned map sharing with the
af_xdp vdev eal arguments so we can integrate with bpfman for
centralized BPF program lifecycle management [currently under test].
[snip]
Hi Shibin
I will add a more detailed note re CAP_BPF in the documentation on the V2.
BR
Maryam
On 30/11/2023 13:56, Koikkara Reeny, Shibin wrote:
> Hi Maryam,
>
> I have one more question.
>
> Regards,
> Shibin
>
>> -----Original Message-----
>> From: Koikkara Reeny, Shibin <shibin.koikkara.reeny@intel.com>
>> Sent: Thursday, November 30, 2023 12:14 PM
>> To: Tahhan, Maryam <mtahhan@redhat.com>; ferruh.yigit@amd.com;
>> stephen@networkplumber.org; lihuisong@huawei.com;
>> fengchengwen@huawei.com; liuyonglong@huawei.com
>> Cc: dev@dpdk.org; Tahhan, Maryam <mtahhan@redhat.com>
>> Subject: RE: [v1] net/af_xdp: enable a sock path alongside use_cni
>>
>> Hi Maryam,
>>
>> I have added some suggestion below.
>>
>> Regrads,
>> Shibin
>>
>>> -----Original Message-----
>>> From: Maryam Tahhan <mtahhan@redhat.com>
>>> Sent: Thursday, November 30, 2023 9:14 AM
>>> To: ferruh.yigit@amd.com; stephen@networkplumber.org;
>>> lihuisong@huawei.com; fengchengwen@huawei.com;
>> liuyonglong@huawei.com
>>> Cc: dev@dpdk.org; Tahhan, Maryam <mtahhan@redhat.com>
>>> Subject: [v1] net/af_xdp: enable a sock path alongside use_cni
>>>
>>> With the original 'use_cni' implementation, (using a hardcoded socket
>>> rather than a configurable one), if a single pod is requesting
>>> multiple net devices and these devices are from different pools, then
>>> the container attempts to mount all the netdev UDSes in the pod as
>>> /tmp/afxdp.sock. Which means that at best only 1 netdev will handshake
>>> correctly with the AF_XDP DP. This patch addresses this by making the
>>> socket parameter configurable alongside the 'use_cni' param.
>>> Tested with the AF_XDP DP CNI PR 81.
>>>
>>> Signed-off-by: Maryam Tahhan <mtahhan@redhat.com>
>>> ---
>>> doc/guides/howto/af_xdp_cni.rst | 18 +++++++---
>>> drivers/net/af_xdp/rte_eth_af_xdp.c | 56
>>> +++++++++++++++++++----------
>>> 2 files changed, 51 insertions(+), 23 deletions(-)
>>>
>>> diff --git a/doc/guides/howto/af_xdp_cni.rst
>>> b/doc/guides/howto/af_xdp_cni.rst index a1a6d5b99c..a2d90c665d 100644
>>> --- a/doc/guides/howto/af_xdp_cni.rst
>>> +++ b/doc/guides/howto/af_xdp_cni.rst
>>> @@ -38,9 +38,10 @@ The XSKMAP is a BPF map of AF_XDP sockets (XSK).
>>> The client can then proceed with creating an AF_XDP socket and
>>> inserting that socket into the XSKMAP pointed to by the descriptor.
>>>
>>> -The EAL vdev argument ``use_cni`` is used to indicate that the user
>>> wishes
>>> +The EAL vdev arguments ``use_cni`` and ``sock`` are used to indicate
>>> +that the user wishes
>>> to run the PMD in unprivileged mode and to receive the XSKMAP file
>>> descriptor from the CNI.
>>> +
>>> When this flag is set,
>>> the ``XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD`` libbpf flag should be
>>> used when creating the socket @@ -49,7 +50,7 @@ Instead the loading is
>>> handled by the CNI.
>>>
>>> .. note::
>>>
>>> - The Unix Domain Socket file path appear in the end user is
>>> "/tmp/afxdp.sock".
>>> + The Unix Domain Socket file path appears to the end user at
>>> "/tmp/afxdp_dp/<netdev>/afxdp.sock".
>>>
>>>
>>> Prerequisites
>>> @@ -224,7 +225,6 @@ Howto run dpdk-testpmd with CNI plugin:
>>> capabilities:
>>> add:
>>> - CAP_NET_RAW
>>> - - CAP_BPF
> Why the CAP_BPF is removed?
>
>>> resources:
>>> requests:
>>> hugepages-2Mi: 2Gi
>>> @@ -245,7 +245,17 @@ Howto run dpdk-testpmd with CNI plugin:
>>>
>>> kubectl exec -i <Pod name> --container <containers name> -- \
>>> /<Path>/dpdk-testpmd -l 0,1 --no-pci \
>>> - --vdev=net_af_xdp0,use_cni=1,iface=<interface name> \
>>> + --vdev=net_af_xdp0,use_cni=1,iface=<interface
>>> name>,sock=/tmp/afxdp_dp/<interface name>/afxdp.sock \
>>> + -- --no-mlockall --in-memory
>>> +
>>> +for multiple devices use:
>>> +
>>> + .. code-block:: console
>>> +
>>> + kubectl exec -i <Pod name> --container <containers name> -- \
>>> + /<Path>/dpdk-testpmd -l 0-2 --no-pci \
>>> + --vdev=net_af_xdp0,use_cni=1,iface=<interface
>>> name>,sock=/tmp/afxdp_dp/<interface name>/afxdp.sock \
>>> + --vdev=net_af_xdp1,use_cni=1,iface=<interface
>>> + name>,sock=/tmp/afxdp_dp/<interface name>/afxdp.sock \
>>> -- --no-mlockall --in-memory
>>>
>>> For further reference please use the `e2e`_ test case in `AF_XDP
>>> Plugin for Kubernetes`_ diff --git
>>> a/drivers/net/af_xdp/rte_eth_af_xdp.c
>>> b/drivers/net/af_xdp/rte_eth_af_xdp.c
>>> index 353c8688ec..f728dae2f9 100644
>>> --- a/drivers/net/af_xdp/rte_eth_af_xdp.c
>>> +++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
>>> @@ -88,7 +88,6 @@ RTE_LOG_REGISTER_DEFAULT(af_xdp_logtype,
>>> NOTICE);
>>> #define UDS_MAX_CMD_LEN 64
>>> #define UDS_MAX_CMD_RESP 128
>>> #define UDS_XSK_MAP_FD_MSG "/xsk_map_fd"
>>> -#define UDS_SOCK "/tmp/afxdp.sock"
>>> #define UDS_CONNECT_MSG "/connect"
>>> #define UDS_HOST_OK_MSG "/host_ok"
>>> #define UDS_HOST_NAK_MSG "/host_nak"
>>> @@ -171,6 +170,7 @@ struct pmd_internals {
>>> bool custom_prog_configured;
>>> bool force_copy;
>>> bool use_cni;
>>> + char sock_path[PATH_MAX];
>> I would recommend using variable name as "uds_path".
>>
>>> struct bpf_map *map;
>>>
>>> struct rte_ether_addr eth_addr;
>>> @@ -191,6 +191,7 @@ struct pmd_process_private {
>>> #define ETH_AF_XDP_BUDGET_ARG
>> "busy_budget"
>>> #define ETH_AF_XDP_FORCE_COPY_ARG "force_copy"
>>> #define ETH_AF_XDP_USE_CNI_ARG "use_cni"
>>> +#define ETH_AF_XDP_SOCK_ARG "sock"
>> To make it clear would recommend using "sock_path" and also
>> ETH_AF_XDP_CNI_UDS_PATH_ARG or ETH_AF_XDP_SOCK_PATH_ARG.
>>
>>> static const char * const valid_arguments[] = {
>>> ETH_AF_XDP_IFACE_ARG,
>>> @@ -201,6 +202,7 @@ static const char * const valid_arguments[] = {
>>> ETH_AF_XDP_BUDGET_ARG,
>>> ETH_AF_XDP_FORCE_COPY_ARG,
>>> ETH_AF_XDP_USE_CNI_ARG,
>>> + ETH_AF_XDP_SOCK_ARG,
>>> NULL
>>> };
>>>
>>> @@ -1351,7 +1353,7 @@ configure_preferred_busy_poll(struct
>>> pkt_rx_queue *rxq) }
>>>
>>> static int
>>> -init_uds_sock(struct sockaddr_un *server)
>>> +init_uds_sock(struct sockaddr_un *server, const char *sock_path)
>>> {
>>> int sock;
>>>
>>> @@ -1362,7 +1364,7 @@ init_uds_sock(struct sockaddr_un *server)
>>> }
>>>
>>> server->sun_family = AF_UNIX;
>>> - strlcpy(server->sun_path, UDS_SOCK, sizeof(server->sun_path));
>>> + strlcpy(server->sun_path, sock_path, sizeof(server->sun_path));
>>>
>>> if (connect(sock, (struct sockaddr *)server, sizeof(struct
>>> sockaddr_un)) < 0) {
>>> close(sock);
>>> @@ -1382,7 +1384,7 @@ struct msg_internal { };
>>>
>>> static int
>>> -send_msg(int sock, char *request, int *fd)
>>> +send_msg(int sock, char *request, int *fd, const char *sock_path)
>>> {
>>> int snd;
>>> struct iovec iov;
>>> @@ -1393,7 +1395,7 @@ send_msg(int sock, char *request, int *fd)
>>>
>>> memset(&dst, 0, sizeof(dst));
>>> dst.sun_family = AF_UNIX;
>>> - strlcpy(dst.sun_path, UDS_SOCK, sizeof(dst.sun_path));
>>> + strlcpy(dst.sun_path, sock_path, sizeof(dst.sun_path));
>>>
>>> /* Initialize message header structure */
>>> memset(&msgh, 0, sizeof(msgh));
>>> @@ -1471,7 +1473,7 @@ read_msg(int sock, char *response, struct
>>> sockaddr_un *s, int *fd)
>>>
>>> static int
>>> make_request_cni(int sock, struct sockaddr_un *server, char *request,
>>> - int *req_fd, char *response, int *out_fd)
>>> + int *req_fd, char *response, int *out_fd, const char
>>> *sock_path)
>>> {
>>> int rval;
>>>
>>> @@ -1483,7 +1485,7 @@ make_request_cni(int sock, struct sockaddr_un
>>> *server, char *request,
>>> if (req_fd == NULL)
>>> rval = write(sock, request, strlen(request));
>>> else
>>> - rval = send_msg(sock, request, req_fd);
>>> + rval = send_msg(sock, request, req_fd, sock_path);
>>>
>>> if (rval < 0) {
>>> AF_XDP_LOG(ERR, "Write error %s\n", strerror(errno)); @@
>>> -1507,7 +1509,7 @@ check_response(char *response, char *exp_resp,
>> long
>>> size) }
>>>
>>> static int
>>> -get_cni_fd(char *if_name)
>>> +get_cni_fd(char *if_name, const char *sock_path)
>>> {
>>> char request[UDS_MAX_CMD_LEN],
>>> response[UDS_MAX_CMD_RESP];
>>> char hostname[MAX_LONG_OPT_SZ],
>>> exp_resp[UDS_MAX_CMD_RESP]; @@ -1520,14 +1522,14 @@
>> get_cni_fd(char
>>> *if_name)
>>> return -1;
>>>
>>> memset(&server, 0, sizeof(server));
>>> - sock = init_uds_sock(&server);
>>> + sock = init_uds_sock(&server, sock_path);
>>> if (sock < 0)
>>> return -1;
>>>
>>> /* Initiates handshake to CNI send: /connect,hostname */
>>> snprintf(request, sizeof(request), "%s,%s", UDS_CONNECT_MSG,
>>> hostname);
>>> memset(response, 0, sizeof(response));
>>> - if (make_request_cni(sock, &server, request, NULL, response,
>>> &out_fd) < 0) {
>>> + if (make_request_cni(sock, &server, request, NULL, response,
>>> &out_fd,
>>> +sock_path) < 0) {
>> Why do we need to pass "sock_path" here as we have already connected
>> the sock with sock_path in init_uds_sock()?
>>
>>> AF_XDP_LOG(ERR, "Error in processing cmd [%s]\n",
>> request);
>>> goto err_close;
>>> }
>>> @@ -1541,7 +1543,7 @@ get_cni_fd(char *if_name)
>>> /* Request for "/version" */
>>> strlcpy(request, UDS_VERSION_MSG, UDS_MAX_CMD_LEN);
>>> memset(response, 0, sizeof(response));
>>> - if (make_request_cni(sock, &server, request, NULL, response,
>>> &out_fd) < 0) {
>>> + if (make_request_cni(sock, &server, request, NULL, response,
>>> &out_fd,
>>> +sock_path) < 0) {
>> Same question as above.
>>
>>> AF_XDP_LOG(ERR, "Error in processing cmd [%s]\n",
>> request);
>>> goto err_close;
>>> }
>>> @@ -1549,7 +1551,7 @@ get_cni_fd(char *if_name)
>>> /* Request for file descriptor for netdev name*/
>>> snprintf(request, sizeof(request), "%s,%s",
>> UDS_XSK_MAP_FD_MSG,
>>> if_name);
>>> memset(response, 0, sizeof(response));
>>> - if (make_request_cni(sock, &server, request, NULL, response,
>>> &out_fd) < 0) {
>>> + if (make_request_cni(sock, &server, request, NULL, response,
>>> &out_fd,
>>> +sock_path) < 0) {
>> Same question as above.
>>
>>> AF_XDP_LOG(ERR, "Error in processing cmd [%s]\n",
>> request);
>>> goto err_close;
>>> }
>>> @@ -1571,7 +1573,7 @@ get_cni_fd(char *if_name)
>>> /* Initiate close connection */
>>> strlcpy(request, UDS_FIN_MSG, UDS_MAX_CMD_LEN);
>>> memset(response, 0, sizeof(response));
>>> - if (make_request_cni(sock, &server, request, NULL, response,
>>> &out_fd) < 0) {
>>> + if (make_request_cni(sock, &server, request, NULL, response,
>>> &out_fd,
>>> +sock_path) < 0) {
>> Same question as above.
>>
>>> AF_XDP_LOG(ERR, "Error in processing cmd [%s]\n",
>> request);
>>> goto err_close;
>>> }
>>> @@ -1698,7 +1700,7 @@ xsk_configure(struct pmd_internals *internals,
>>> struct pkt_rx_queue *rxq,
>>> int err, fd, map_fd;
>>>
>>> /* get socket fd from CNI plugin */
>>> - map_fd = get_cni_fd(internals->if_name);
>>> + map_fd = get_cni_fd(internals->if_name, internals-
>>>> sock_path);
>>> if (map_fd < 0) {
>>> AF_XDP_LOG(ERR, "Failed to receive CNI plugin
>> fd\n");
>>> goto out_xsk;
>>> @@ -2023,7 +2025,8 @@ xdp_get_channels_info(const char *if_name, int
>>> *max_queues, static int parse_parameters(struct rte_kvargs *kvlist,
>>> char *if_name, int *start_queue,
>>> int *queue_cnt, int *shared_umem, char *prog_path,
>>> - int *busy_budget, int *force_copy, int *use_cni)
>>> + int *busy_budget, int *force_copy, int *use_cni,
>>> + char *sock_path)
>>> {
>>> int ret;
>>>
>>> @@ -2069,6 +2072,11 @@ parse_parameters(struct rte_kvargs *kvlist,
>>> char *if_name, int *start_queue,
>>> if (ret < 0)
>>> goto free_kvlist;
>>>
>>> + ret = rte_kvargs_process(kvlist, ETH_AF_XDP_SOCK_ARG,
>>> + &parse_prog_arg, sock_path);
>> Parse_prog_arg does 2 things copy the sock_arg value and also check the
>> access to the socket.
>> Checking access here has a chance of causing raise condition so I would
>> recommend to skip this check here as this will be taken care in the
>> init_uds_sock().
>>
>>> + if (ret < 0)
>>> + goto free_kvlist;
>>> +
>>> free_kvlist:
>>> rte_kvargs_free(kvlist);
>>> return ret;
>>> @@ -2108,7 +2116,7 @@ static struct rte_eth_dev *
>>> init_internals(struct rte_vdev_device *dev, const char *if_name,
>>> int start_queue_idx, int queue_cnt, int shared_umem,
>>> const char *prog_path, int busy_budget, int force_copy,
>>> - int use_cni)
>>> + int use_cni, const char *sock_path)
>>> {
>>> const char *name = rte_vdev_device_name(dev);
>>> const unsigned int numa_node = dev->device.numa_node; @@ -
>>> 2138,6 +2146,7 @@ init_internals(struct rte_vdev_device *dev, const
>>> char *if_name,
>>> internals->shared_umem = shared_umem;
>>> internals->force_copy = force_copy;
>>> internals->use_cni = use_cni;
>>> + strlcpy(internals->sock_path, sock_path, PATH_MAX);
>>>
>>> if (xdp_get_channels_info(if_name, &internals->max_queue_cnt,
>>> &internals->combined_queue_cnt)) { @@ -
>>> 2328,6 +2337,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
>>> int busy_budget = -1, ret;
>>> int force_copy = 0;
>>> int use_cni = 0;
>>> + char sock_path[PATH_MAX] = {'\0'};
>>> struct rte_eth_dev *eth_dev = NULL;
>>> const char *name = rte_vdev_device_name(dev);
>>>
>>> @@ -2370,7 +2380,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device
>>> *dev)
>>>
>>> if (parse_parameters(kvlist, if_name, &xsk_start_queue_idx,
>>> &xsk_queue_cnt, &shared_umem, prog_path,
>>> - &busy_budget, &force_copy, &use_cni) < 0) {
>>> + &busy_budget, &force_copy, &use_cni,
>>> sock_path) < 0) {
>>> AF_XDP_LOG(ERR, "Invalid kvargs value\n");
>>> return -EINVAL;
>>> }
>>> @@ -2387,6 +2397,13 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device
>>> *dev)
>>> return -EINVAL;
>>> }
>>>
>>> + if (use_cni && !strnlen(sock_path, PATH_MAX)) {
>>> + AF_XDP_LOG(ERR, "When '%s' parameter is used, '%s' must
>>> also be provided\n",
>>> + ETH_AF_XDP_USE_CNI_ARG,
>>> ETH_AF_XDP_SOCK_ARG);
>>> + return -EINVAL;
>>> + }
>>> +
>>> +
>>> if (strlen(if_name) == 0) {
>>> AF_XDP_LOG(ERR, "Network interface must be
>> specified\n");
>>> return -EINVAL;
>>> @@ -2410,7 +2427,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device
>>> *dev)
>>>
>>> eth_dev = init_internals(dev, if_name, xsk_start_queue_idx,
>>> xsk_queue_cnt, shared_umem, prog_path,
>>> - busy_budget, force_copy, use_cni);
>>> + busy_budget, force_copy, use_cni,
>>> sock_path);
>>> if (eth_dev == NULL) {
>>> AF_XDP_LOG(ERR, "Failed to init internals\n");
>>> return -1;
>>> @@ -2471,4 +2488,5 @@
>>> RTE_PMD_REGISTER_PARAM_STRING(net_af_xdp,
>>> "xdp_prog=<string> "
>>> "busy_budget=<int> "
>>> "force_copy=<int> "
>>> - "use_cni=<int> ");
>>> + "use_cni=<int> "
>>> + "sock=<string> ");
>>> --
>>> 2.41.0
Thanks for reply Maryam.
My reply is below.
Regards,
Shibin
From: Maryam Tahhan <mtahhan@redhat.com>
Sent: Thursday, November 30, 2023 2:17 PM
To: Koikkara Reeny, Shibin <shibin.koikkara.reeny@intel.com>; ferruh.yigit@amd.com; stephen@networkplumber.org; lihuisong@huawei.com; fengchengwen@huawei.com; liuyonglong@huawei.com
Cc: dev@dpdk.org
Subject: Re: [v1] net/af_xdp: enable a sock path alongside use_cni
Hi Shibin
No problem.
Answer below.
BR
Maryam
On 30/11/2023 13:56, Koikkara Reeny, Shibin wrote:
Hi Maryam,
I have one more question.
Regards,
Shibin
-----Original Message-----
From: Koikkara Reeny, Shibin <shibin.koikkara.reeny@intel.com><mailto:shibin.koikkara.reeny@intel.com>
Sent: Thursday, November 30, 2023 12:14 PM
To: Tahhan, Maryam <mtahhan@redhat.com><mailto:mtahhan@redhat.com>; ferruh.yigit@amd.com<mailto:ferruh.yigit@amd.com>;
stephen@networkplumber.org<mailto:stephen@networkplumber.org>; lihuisong@huawei.com<mailto:lihuisong@huawei.com>;
fengchengwen@huawei.com<mailto:fengchengwen@huawei.com>; liuyonglong@huawei.com<mailto:liuyonglong@huawei.com>
Cc: dev@dpdk.org<mailto:dev@dpdk.org>; Tahhan, Maryam <mtahhan@redhat.com><mailto:mtahhan@redhat.com>
Subject: RE: [v1] net/af_xdp: enable a sock path alongside use_cni
Hi Maryam,
I have added some suggestion below.
Regrads,
Shibin
[snip]
Prerequisites
@@ -224,7 +225,6 @@ Howto run dpdk-testpmd with CNI plugin:
capabilities:
add:
- CAP_NET_RAW
- - CAP_BPF
Why the CAP_BPF is removed?
Good question. It's removed because in our case CAP_BPF is only needed when we want to load or unload the XDP program on the interface inside the Pod. In our case the CNI is loading the xdp program on the interface and then we are doing a handshake to get the xskmap file descriptor and so we don't need the CAP_BPF.
You will find a detailed listing of the permissions used at different stages when utilizing an XDP prog in this article https://next.redhat.com/2023/07/18/using-ebpf-in-unprivileged-pods/
I'm currently also working on enabling pinned map sharing with the af_xdp vdev eal arguments so we can integrate with bpfman for centralized BPF program lifecycle management [currently under test].
Correct me if I am wrong, Don’t we still need the CAP_BPF for bpf operations?
If CAP_BPF is not need so do we need the CAP_NET_RAW also?
[snip]
[snip]
>
> Prerequisites
>
> @@ -224,7 +225,6 @@ Howto run dpdk-testpmd with CNI plugin:
>
> capabilities:
>
> add:
>
> - CAP_NET_RAW
>
> - - CAP_BPF
>
> Why the CAP_BPF is removed?
>
> Good question. It's removed because in our case CAP_BPF is only needed
> when we want to load or unload the XDP program on the interface inside
> the Pod. In our case the CNI is loading the xdp program on the
> interface and then we are doing a handshake to get the xskmap file
> descriptor and so we don't need the CAP_BPF.
>
> You will find a detailed listing of the permissions used at different
> stages when utilizing an XDP prog in this article
> https://next.redhat.com/2023/07/18/using-ebpf-in-unprivileged-pods/
>
> I'm currently also working on enabling pinned map sharing with the
> af_xdp vdev eal arguments so we can integrate with bpfman for
> centralized BPF program lifecycle management [currently under test].
>
> Correct me if I am wrong, Don’t we still need the CAP_BPF for bpf
> operations?
>
The only BPF operation in the Pod (when using the AF_XDP CNI) is
interacting with the BPF maps via bpf_map_update_elem().
There's no BPF load/unload operations when you are using the AF_XDP DP
and CNI (it does this for you) similar to what bpfman is doing in [1].
To interact with BPF maps you don't need CAP_BPF since the 5.19 Kernel
please see [2], in addition to the previous links. In other words the
only time you should need cap_bpf to access a map is if your kernel is
<= 5.18. Which was the note I was referring to when I said I would
update the documentation.
I've tested this patch in pod on Fedora 38 Kernel version:
6.5.12-200.fc38.x86_64 and there you don't need CAP_BPF.
Additionally Ubuntu 22.04 LTS is now shipping with a 6.2 Kernel [3], so
you will not need it there either.
We don't want to give the pods more permissions than they need. CAP_BPF
is in IMO an elevated permission.
> If CAP_BPF is not need so do we need the CAP_NET_RAW also?
>
You need the CAP_NET_RAW to create the AF_XDP socket.
> [snip]
>
[1]
https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit/?id=c8644cd0efe7
[2]
https://bpfd.dev/developer-guide/linux-capabilities/#removing-cap_bpf-from-bpfman-clients
[3]
https://tuxcare.com/blog/ubuntu-22-04-3-is-here-with-linux-kernel-6-2/#:~:text=Initially%20released%20on%20April%2021,Ubuntu%2023.04%20(Lunar%20Lobster).
Hello,
On Thu, Nov 30, 2023 at 10:13 AM Maryam Tahhan <mtahhan@redhat.com> wrote:
[snip]
> diff --git a/doc/guides/howto/af_xdp_cni.rst b/doc/guides/howto/af_xdp_cni.rst
> index a1a6d5b99c..a2d90c665d 100644
> --- a/doc/guides/howto/af_xdp_cni.rst
> +++ b/doc/guides/howto/af_xdp_cni.rst
> @@ -38,9 +38,10 @@ The XSKMAP is a BPF map of AF_XDP sockets (XSK).
> The client can then proceed with creating an AF_XDP socket
> and inserting that socket into the XSKMAP pointed to by the descriptor.
>
> -The EAL vdev argument ``use_cni`` is used to indicate that the user wishes
> +The EAL vdev arguments ``use_cni`` and ``sock`` are used to indicate that the user wishes
> to run the PMD in unprivileged mode and to receive the XSKMAP file descriptor
> from the CNI.
> +
> When this flag is set,
> the ``XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD`` libbpf flag
> should be used when creating the socket
> @@ -49,7 +50,7 @@ Instead the loading is handled by the CNI.
>
> .. note::
>
> - The Unix Domain Socket file path appear in the end user is "/tmp/afxdp.sock".
> + The Unix Domain Socket file path appears to the end user at "/tmp/afxdp_dp/<netdev>/afxdp.sock".
>
>
> Prerequisites
> @@ -224,7 +225,6 @@ Howto run dpdk-testpmd with CNI plugin:
> capabilities:
> add:
> - CAP_NET_RAW
> - - CAP_BPF
> resources:
> requests:
> hugepages-2Mi: 2Gi
> @@ -245,7 +245,17 @@ Howto run dpdk-testpmd with CNI plugin:
>
> kubectl exec -i <Pod name> --container <containers name> -- \
> /<Path>/dpdk-testpmd -l 0,1 --no-pci \
> - --vdev=net_af_xdp0,use_cni=1,iface=<interface name> \
> + --vdev=net_af_xdp0,use_cni=1,iface=<interface name>,sock=/tmp/afxdp_dp/<interface name>/afxdp.sock \
> + -- --no-mlockall --in-memory
Quick look at the doc update.
- is this hunk related to $subject?
- --in-memory is not a testpmd level option, but an EAL one.
On 01/12/2023 10:26, David Marchand wrote:
> Hello,
>
> On Thu, Nov 30, 2023 at 10:13 AM Maryam Tahhan<mtahhan@redhat.com> wrote:
> [snip]
>> diff --git a/doc/guides/howto/af_xdp_cni.rst b/doc/guides/howto/af_xdp_cni.rst
>> index a1a6d5b99c..a2d90c665d 100644
>> --- a/doc/guides/howto/af_xdp_cni.rst
>> +++ b/doc/guides/howto/af_xdp_cni.rst
>> @@ -38,9 +38,10 @@ The XSKMAP is a BPF map of AF_XDP sockets (XSK).
>> The client can then proceed with creating an AF_XDP socket
>> and inserting that socket into the XSKMAP pointed to by the descriptor.
>>
>> -The EAL vdev argument ``use_cni`` is used to indicate that the user wishes
>> +The EAL vdev arguments ``use_cni`` and ``sock`` are used to indicate that the user wishes
>> to run the PMD in unprivileged mode and to receive the XSKMAP file descriptor
>> from the CNI.
>> +
>> When this flag is set,
>> the ``XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD`` libbpf flag
>> should be used when creating the socket
>> @@ -49,7 +50,7 @@ Instead the loading is handled by the CNI.
>>
>> .. note::
>>
>> - The Unix Domain Socket file path appear in the end user is "/tmp/afxdp.sock".
>> + The Unix Domain Socket file path appears to the end user at "/tmp/afxdp_dp/<netdev>/afxdp.sock".
>>
>>
>> Prerequisites
>> @@ -224,7 +225,6 @@ Howto run dpdk-testpmd with CNI plugin:
>> capabilities:
>> add:
>> - CAP_NET_RAW
>> - - CAP_BPF
>> resources:
>> requests:
>> hugepages-2Mi: 2Gi
>> @@ -245,7 +245,17 @@ Howto run dpdk-testpmd with CNI plugin:
>>
>> kubectl exec -i <Pod name> --container <containers name> -- \
>> /<Path>/dpdk-testpmd -l 0,1 --no-pci \
>> - --vdev=net_af_xdp0,use_cni=1,iface=<interface name> \
>> + --vdev=net_af_xdp0,use_cni=1,iface=<interface name>,sock=/tmp/afxdp_dp/<interface name>/afxdp.sock \
>> + -- --no-mlockall --in-memory
> Quick look at the doc update.
> - is this hunk related to $subject?
> - --in-memory is not a testpmd level option, but an EAL one.
>
>
Yeah - I actually will remove the `--no-mlockall --in-memory` in the v2
respin (it's a typo). I'm only interested in showing the multiple af_xdp
device (vdev) arguments. I think it's useful for anyone who is looking
for a quick reference on how to do it.
On 01/12/2023 10:31, Maryam Tahhan wrote:
> Yeah - I actually will remove the `--no-mlockall --in-memory` in the
> v2 respin (it's a typo). I'm only interested in showing the multiple
> af_xdp device (vdev) arguments. I think it's useful for anyone who is
> looking for a quick reference on how to do it
I might distill it to one example rather than the two I have there now.
From: Maryam Tahhan <mtahhan@redhat.com>
Sent: Friday, December 1, 2023 10:20 AM
To: Koikkara Reeny, Shibin <shibin.koikkara.reeny@intel.com>; ferruh.yigit@amd.com; stephen@networkplumber.org; lihuisong@huawei.com; fengchengwen@huawei.com; liuyonglong@huawei.com
Cc: dev@dpdk.org
Subject: Re: [v1] net/af_xdp: enable a sock path alongside use_cni
[snip]
Prerequisites
@@ -224,7 +225,6 @@ Howto run dpdk-testpmd with CNI plugin:
capabilities:
add:
- CAP_NET_RAW
- - CAP_BPF
Why the CAP_BPF is removed?
Good question. It's removed because in our case CAP_BPF is only needed when we want to load or unload the XDP program on the interface inside the Pod. In our case the CNI is loading the xdp program on the interface and then we are doing a handshake to get the xskmap file descriptor and so we don't need the CAP_BPF.
You will find a detailed listing of the permissions used at different stages when utilizing an XDP prog in this article https://next.redhat.com/2023/07/18/using-ebpf-in-unprivileged-pods/
I'm currently also working on enabling pinned map sharing with the af_xdp vdev eal arguments so we can integrate with bpfman for centralized BPF program lifecycle management [currently under test].
Correct me if I am wrong, Don’t we still need the CAP_BPF for bpf operations?
The only BPF operation in the Pod (when using the AF_XDP CNI) is interacting with the BPF maps via bpf_map_update_elem().
There's no BPF load/unload operations when you are using the AF_XDP DP and CNI (it does this for you) similar to what bpfman is doing in [1]. To interact with BPF maps you don't need CAP_BPF since the 5.19 Kernel please see [2], in addition to the previous links. In other words the only time you should need cap_bpf to access a map is if your kernel is <= 5.18. Which was the note I was referring to when I said I would update the documentation.
=======
Please add this also to the V2.
I've tested this patch in pod on Fedora 38 Kernel version: 6.5.12-200.fc38.x86_64 and there you don't need CAP_BPF.
Additionally Ubuntu 22.04 LTS is now shipping with a 6.2 Kernel [3], so you will not need it there either.
We don't want to give the pods more permissions than they need. CAP_BPF is in IMO an elevated permission.
If CAP_BPF is not need so do we need the CAP_NET_RAW also?
You need the CAP_NET_RAW to create the AF_XDP socket.
[snip]
[1] https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit/?id=c8644cd0efe7
[2] https://bpfd.dev/developer-guide/linux-capabilities/#removing-cap_bpf-from-bpfman-clients
[3] https://tuxcare.com/blog/ubuntu-22-04-3-is-here-with-linux-kernel-6-2/#:~:text=Initially%20released%20on%20April%2021,Ubuntu%2023.04%20(Lunar%20Lobster).
@@ -38,9 +38,10 @@ The XSKMAP is a BPF map of AF_XDP sockets (XSK).
The client can then proceed with creating an AF_XDP socket
and inserting that socket into the XSKMAP pointed to by the descriptor.
-The EAL vdev argument ``use_cni`` is used to indicate that the user wishes
+The EAL vdev arguments ``use_cni`` and ``sock`` are used to indicate that the user wishes
to run the PMD in unprivileged mode and to receive the XSKMAP file descriptor
from the CNI.
+
When this flag is set,
the ``XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD`` libbpf flag
should be used when creating the socket
@@ -49,7 +50,7 @@ Instead the loading is handled by the CNI.
.. note::
- The Unix Domain Socket file path appear in the end user is "/tmp/afxdp.sock".
+ The Unix Domain Socket file path appears to the end user at "/tmp/afxdp_dp/<netdev>/afxdp.sock".
Prerequisites
@@ -224,7 +225,6 @@ Howto run dpdk-testpmd with CNI plugin:
capabilities:
add:
- CAP_NET_RAW
- - CAP_BPF
resources:
requests:
hugepages-2Mi: 2Gi
@@ -245,7 +245,17 @@ Howto run dpdk-testpmd with CNI plugin:
kubectl exec -i <Pod name> --container <containers name> -- \
/<Path>/dpdk-testpmd -l 0,1 --no-pci \
- --vdev=net_af_xdp0,use_cni=1,iface=<interface name> \
+ --vdev=net_af_xdp0,use_cni=1,iface=<interface name>,sock=/tmp/afxdp_dp/<interface name>/afxdp.sock \
+ -- --no-mlockall --in-memory
+
+for multiple devices use:
+
+ .. code-block:: console
+
+ kubectl exec -i <Pod name> --container <containers name> -- \
+ /<Path>/dpdk-testpmd -l 0-2 --no-pci \
+ --vdev=net_af_xdp0,use_cni=1,iface=<interface name>,sock=/tmp/afxdp_dp/<interface name>/afxdp.sock \
+ --vdev=net_af_xdp1,use_cni=1,iface=<interface name>,sock=/tmp/afxdp_dp/<interface name>/afxdp.sock \
-- --no-mlockall --in-memory
For further reference please use the `e2e`_ test case in `AF_XDP Plugin for Kubernetes`_
@@ -88,7 +88,6 @@ RTE_LOG_REGISTER_DEFAULT(af_xdp_logtype, NOTICE);
#define UDS_MAX_CMD_LEN 64
#define UDS_MAX_CMD_RESP 128
#define UDS_XSK_MAP_FD_MSG "/xsk_map_fd"
-#define UDS_SOCK "/tmp/afxdp.sock"
#define UDS_CONNECT_MSG "/connect"
#define UDS_HOST_OK_MSG "/host_ok"
#define UDS_HOST_NAK_MSG "/host_nak"
@@ -171,6 +170,7 @@ struct pmd_internals {
bool custom_prog_configured;
bool force_copy;
bool use_cni;
+ char sock_path[PATH_MAX];
struct bpf_map *map;
struct rte_ether_addr eth_addr;
@@ -191,6 +191,7 @@ struct pmd_process_private {
#define ETH_AF_XDP_BUDGET_ARG "busy_budget"
#define ETH_AF_XDP_FORCE_COPY_ARG "force_copy"
#define ETH_AF_XDP_USE_CNI_ARG "use_cni"
+#define ETH_AF_XDP_SOCK_ARG "sock"
static const char * const valid_arguments[] = {
ETH_AF_XDP_IFACE_ARG,
@@ -201,6 +202,7 @@ static const char * const valid_arguments[] = {
ETH_AF_XDP_BUDGET_ARG,
ETH_AF_XDP_FORCE_COPY_ARG,
ETH_AF_XDP_USE_CNI_ARG,
+ ETH_AF_XDP_SOCK_ARG,
NULL
};
@@ -1351,7 +1353,7 @@ configure_preferred_busy_poll(struct pkt_rx_queue *rxq)
}
static int
-init_uds_sock(struct sockaddr_un *server)
+init_uds_sock(struct sockaddr_un *server, const char *sock_path)
{
int sock;
@@ -1362,7 +1364,7 @@ init_uds_sock(struct sockaddr_un *server)
}
server->sun_family = AF_UNIX;
- strlcpy(server->sun_path, UDS_SOCK, sizeof(server->sun_path));
+ strlcpy(server->sun_path, sock_path, sizeof(server->sun_path));
if (connect(sock, (struct sockaddr *)server, sizeof(struct sockaddr_un)) < 0) {
close(sock);
@@ -1382,7 +1384,7 @@ struct msg_internal {
};
static int
-send_msg(int sock, char *request, int *fd)
+send_msg(int sock, char *request, int *fd, const char *sock_path)
{
int snd;
struct iovec iov;
@@ -1393,7 +1395,7 @@ send_msg(int sock, char *request, int *fd)
memset(&dst, 0, sizeof(dst));
dst.sun_family = AF_UNIX;
- strlcpy(dst.sun_path, UDS_SOCK, sizeof(dst.sun_path));
+ strlcpy(dst.sun_path, sock_path, sizeof(dst.sun_path));
/* Initialize message header structure */
memset(&msgh, 0, sizeof(msgh));
@@ -1471,7 +1473,7 @@ read_msg(int sock, char *response, struct sockaddr_un *s, int *fd)
static int
make_request_cni(int sock, struct sockaddr_un *server, char *request,
- int *req_fd, char *response, int *out_fd)
+ int *req_fd, char *response, int *out_fd, const char *sock_path)
{
int rval;
@@ -1483,7 +1485,7 @@ make_request_cni(int sock, struct sockaddr_un *server, char *request,
if (req_fd == NULL)
rval = write(sock, request, strlen(request));
else
- rval = send_msg(sock, request, req_fd);
+ rval = send_msg(sock, request, req_fd, sock_path);
if (rval < 0) {
AF_XDP_LOG(ERR, "Write error %s\n", strerror(errno));
@@ -1507,7 +1509,7 @@ check_response(char *response, char *exp_resp, long size)
}
static int
-get_cni_fd(char *if_name)
+get_cni_fd(char *if_name, const char *sock_path)
{
char request[UDS_MAX_CMD_LEN], response[UDS_MAX_CMD_RESP];
char hostname[MAX_LONG_OPT_SZ], exp_resp[UDS_MAX_CMD_RESP];
@@ -1520,14 +1522,14 @@ get_cni_fd(char *if_name)
return -1;
memset(&server, 0, sizeof(server));
- sock = init_uds_sock(&server);
+ sock = init_uds_sock(&server, sock_path);
if (sock < 0)
return -1;
/* Initiates handshake to CNI send: /connect,hostname */
snprintf(request, sizeof(request), "%s,%s", UDS_CONNECT_MSG, hostname);
memset(response, 0, sizeof(response));
- if (make_request_cni(sock, &server, request, NULL, response, &out_fd) < 0) {
+ if (make_request_cni(sock, &server, request, NULL, response, &out_fd, sock_path) < 0) {
AF_XDP_LOG(ERR, "Error in processing cmd [%s]\n", request);
goto err_close;
}
@@ -1541,7 +1543,7 @@ get_cni_fd(char *if_name)
/* Request for "/version" */
strlcpy(request, UDS_VERSION_MSG, UDS_MAX_CMD_LEN);
memset(response, 0, sizeof(response));
- if (make_request_cni(sock, &server, request, NULL, response, &out_fd) < 0) {
+ if (make_request_cni(sock, &server, request, NULL, response, &out_fd, sock_path) < 0) {
AF_XDP_LOG(ERR, "Error in processing cmd [%s]\n", request);
goto err_close;
}
@@ -1549,7 +1551,7 @@ get_cni_fd(char *if_name)
/* Request for file descriptor for netdev name*/
snprintf(request, sizeof(request), "%s,%s", UDS_XSK_MAP_FD_MSG, if_name);
memset(response, 0, sizeof(response));
- if (make_request_cni(sock, &server, request, NULL, response, &out_fd) < 0) {
+ if (make_request_cni(sock, &server, request, NULL, response, &out_fd, sock_path) < 0) {
AF_XDP_LOG(ERR, "Error in processing cmd [%s]\n", request);
goto err_close;
}
@@ -1571,7 +1573,7 @@ get_cni_fd(char *if_name)
/* Initiate close connection */
strlcpy(request, UDS_FIN_MSG, UDS_MAX_CMD_LEN);
memset(response, 0, sizeof(response));
- if (make_request_cni(sock, &server, request, NULL, response, &out_fd) < 0) {
+ if (make_request_cni(sock, &server, request, NULL, response, &out_fd, sock_path) < 0) {
AF_XDP_LOG(ERR, "Error in processing cmd [%s]\n", request);
goto err_close;
}
@@ -1698,7 +1700,7 @@ xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq,
int err, fd, map_fd;
/* get socket fd from CNI plugin */
- map_fd = get_cni_fd(internals->if_name);
+ map_fd = get_cni_fd(internals->if_name, internals->sock_path);
if (map_fd < 0) {
AF_XDP_LOG(ERR, "Failed to receive CNI plugin fd\n");
goto out_xsk;
@@ -2023,7 +2025,8 @@ xdp_get_channels_info(const char *if_name, int *max_queues,
static int
parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue,
int *queue_cnt, int *shared_umem, char *prog_path,
- int *busy_budget, int *force_copy, int *use_cni)
+ int *busy_budget, int *force_copy, int *use_cni,
+ char *sock_path)
{
int ret;
@@ -2069,6 +2072,11 @@ parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue,
if (ret < 0)
goto free_kvlist;
+ ret = rte_kvargs_process(kvlist, ETH_AF_XDP_SOCK_ARG,
+ &parse_prog_arg, sock_path);
+ if (ret < 0)
+ goto free_kvlist;
+
free_kvlist:
rte_kvargs_free(kvlist);
return ret;
@@ -2108,7 +2116,7 @@ static struct rte_eth_dev *
init_internals(struct rte_vdev_device *dev, const char *if_name,
int start_queue_idx, int queue_cnt, int shared_umem,
const char *prog_path, int busy_budget, int force_copy,
- int use_cni)
+ int use_cni, const char *sock_path)
{
const char *name = rte_vdev_device_name(dev);
const unsigned int numa_node = dev->device.numa_node;
@@ -2138,6 +2146,7 @@ init_internals(struct rte_vdev_device *dev, const char *if_name,
internals->shared_umem = shared_umem;
internals->force_copy = force_copy;
internals->use_cni = use_cni;
+ strlcpy(internals->sock_path, sock_path, PATH_MAX);
if (xdp_get_channels_info(if_name, &internals->max_queue_cnt,
&internals->combined_queue_cnt)) {
@@ -2328,6 +2337,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
int busy_budget = -1, ret;
int force_copy = 0;
int use_cni = 0;
+ char sock_path[PATH_MAX] = {'\0'};
struct rte_eth_dev *eth_dev = NULL;
const char *name = rte_vdev_device_name(dev);
@@ -2370,7 +2380,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
if (parse_parameters(kvlist, if_name, &xsk_start_queue_idx,
&xsk_queue_cnt, &shared_umem, prog_path,
- &busy_budget, &force_copy, &use_cni) < 0) {
+ &busy_budget, &force_copy, &use_cni, sock_path) < 0) {
AF_XDP_LOG(ERR, "Invalid kvargs value\n");
return -EINVAL;
}
@@ -2387,6 +2397,13 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
return -EINVAL;
}
+ if (use_cni && !strnlen(sock_path, PATH_MAX)) {
+ AF_XDP_LOG(ERR, "When '%s' parameter is used, '%s' must also be provided\n",
+ ETH_AF_XDP_USE_CNI_ARG, ETH_AF_XDP_SOCK_ARG);
+ return -EINVAL;
+ }
+
+
if (strlen(if_name) == 0) {
AF_XDP_LOG(ERR, "Network interface must be specified\n");
return -EINVAL;
@@ -2410,7 +2427,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
eth_dev = init_internals(dev, if_name, xsk_start_queue_idx,
xsk_queue_cnt, shared_umem, prog_path,
- busy_budget, force_copy, use_cni);
+ busy_budget, force_copy, use_cni, sock_path);
if (eth_dev == NULL) {
AF_XDP_LOG(ERR, "Failed to init internals\n");
return -1;
@@ -2471,4 +2488,5 @@ RTE_PMD_REGISTER_PARAM_STRING(net_af_xdp,
"xdp_prog=<string> "
"busy_budget=<int> "
"force_copy=<int> "
- "use_cni=<int> ");
+ "use_cni=<int> "
+ "sock=<string> ");