[v2,3/6] lib/vhost: restrict pointer aliasing for packed vpmd
Checks
Commit Message
Restrict pointer aliasing to allow the compiler to vectorize loop
more aggressively.
With this patch, a 9.6% improvement is observed in throughput for
the packed virtio-net PVP case, and a 2.8% improvement in throughput
for the packed virtio-user PVP case. All performance data are measured
under 0.001% acceptable packet loss with 1 core on both vhost and
virtio side.
Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Phil Yang <phil.yang@arm.com>
---
drivers/net/virtio/virtio_rxtx_simple_neon.c | 5 +++--
lib/librte_vhost/virtio_net.c | 14 +++++++-------
2 files changed, 10 insertions(+), 9 deletions(-)
Comments
On Mon, Jul 6, 2020 at 9:50 AM Joyce Kong <joyce.kong@arm.com> wrote:
>
> Restrict pointer aliasing to allow the compiler to vectorize loop
> more aggressively.
>
> With this patch, a 9.6% improvement is observed in throughput for
> the packed virtio-net PVP case, and a 2.8% improvement in throughput
> for the packed virtio-user PVP case. All performance data are measured
> under 0.001% acceptable packet loss with 1 core on both vhost and
> virtio side.
>
> Signed-off-by: Joyce Kong <joyce.kong@arm.com>
> Reviewed-by: Phil Yang <phil.yang@arm.com>
> ---
> drivers/net/virtio/virtio_rxtx_simple_neon.c | 5 +++--
> lib/librte_vhost/virtio_net.c | 14 +++++++-------
> 2 files changed, 10 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/net/virtio/virtio_rxtx_simple_neon.c b/drivers/net/virtio/virtio_rxtx_simple_neon.c
> index 5febfb0f5..31824a931 100644
> --- a/drivers/net/virtio/virtio_rxtx_simple_neon.c
> +++ b/drivers/net/virtio/virtio_rxtx_simple_neon.c
> @@ -36,8 +36,9 @@
> * - nb_pkts < RTE_VIRTIO_DESC_PER_LOOP, just return no packet
> */
> uint16_t
> -virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf
> - **__rte_restrict rx_pkts, uint16_t nb_pkts)
> +virtio_recv_pkts_vec(void *rx_queue,
> + struct rte_mbuf **__rte_restrict rx_pkts,
> + uint16_t nb_pkts)
> {
> struct virtnet_rx *rxvq = rx_queue;
> struct virtqueue *vq = rxvq->vq;
For the neon bits, I trust you.
> diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
> index 751c1f373..e60358251 100644
> --- a/lib/librte_vhost/virtio_net.c
> +++ b/lib/librte_vhost/virtio_net.c
> @@ -1133,8 +1133,8 @@ virtio_dev_rx_single_packed(struct virtio_net *dev,
>
> static __rte_noinline uint32_t
> virtio_dev_rx_packed(struct virtio_net *dev,
> - struct vhost_virtqueue *vq,
> - struct rte_mbuf **pkts,
> + struct vhost_virtqueue *__rte_restrict vq,
> + struct rte_mbuf **__rte_restrict pkts,
> uint32_t count)
> {
> uint32_t pkt_idx = 0;
But for the generic part, I'd like to get others opinion.
Added Zhihong and Adrian.
> @@ -1219,7 +1219,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
>
> uint16_t
> rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
> - struct rte_mbuf **pkts, uint16_t count)
> + struct rte_mbuf **__rte_restrict pkts, uint16_t count)
> {
> struct virtio_net *dev = get_device(vid);
>
> @@ -2124,9 +2124,9 @@ free_zmbuf(struct vhost_virtqueue *vq)
>
> static __rte_noinline uint16_t
> virtio_dev_tx_packed_zmbuf(struct virtio_net *dev,
> - struct vhost_virtqueue *vq,
> + struct vhost_virtqueue *__rte_restrict vq,
> struct rte_mempool *mbuf_pool,
> - struct rte_mbuf **pkts,
> + struct rte_mbuf **__rte_restrict pkts,
> uint32_t count)
> {
> uint32_t pkt_idx = 0;
> @@ -2160,9 +2160,9 @@ virtio_dev_tx_packed_zmbuf(struct virtio_net *dev,
>
> static __rte_noinline uint16_t
> virtio_dev_tx_packed(struct virtio_net *dev,
> - struct vhost_virtqueue *vq,
> + struct vhost_virtqueue *__rte_restrict vq,
> struct rte_mempool *mbuf_pool,
> - struct rte_mbuf **pkts,
> + struct rte_mbuf **__rte_restrict pkts,
> uint32_t count)
> {
> uint32_t pkt_idx = 0;
> --
> 2.27.0
>
@@ -36,8 +36,9 @@
* - nb_pkts < RTE_VIRTIO_DESC_PER_LOOP, just return no packet
*/
uint16_t
-virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf
- **__rte_restrict rx_pkts, uint16_t nb_pkts)
+virtio_recv_pkts_vec(void *rx_queue,
+ struct rte_mbuf **__rte_restrict rx_pkts,
+ uint16_t nb_pkts)
{
struct virtnet_rx *rxvq = rx_queue;
struct virtqueue *vq = rxvq->vq;
@@ -1133,8 +1133,8 @@ virtio_dev_rx_single_packed(struct virtio_net *dev,
static __rte_noinline uint32_t
virtio_dev_rx_packed(struct virtio_net *dev,
- struct vhost_virtqueue *vq,
- struct rte_mbuf **pkts,
+ struct vhost_virtqueue *__rte_restrict vq,
+ struct rte_mbuf **__rte_restrict pkts,
uint32_t count)
{
uint32_t pkt_idx = 0;
@@ -1219,7 +1219,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
uint16_t
rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
- struct rte_mbuf **pkts, uint16_t count)
+ struct rte_mbuf **__rte_restrict pkts, uint16_t count)
{
struct virtio_net *dev = get_device(vid);
@@ -2124,9 +2124,9 @@ free_zmbuf(struct vhost_virtqueue *vq)
static __rte_noinline uint16_t
virtio_dev_tx_packed_zmbuf(struct virtio_net *dev,
- struct vhost_virtqueue *vq,
+ struct vhost_virtqueue *__rte_restrict vq,
struct rte_mempool *mbuf_pool,
- struct rte_mbuf **pkts,
+ struct rte_mbuf **__rte_restrict pkts,
uint32_t count)
{
uint32_t pkt_idx = 0;
@@ -2160,9 +2160,9 @@ virtio_dev_tx_packed_zmbuf(struct virtio_net *dev,
static __rte_noinline uint16_t
virtio_dev_tx_packed(struct virtio_net *dev,
- struct vhost_virtqueue *vq,
+ struct vhost_virtqueue *__rte_restrict vq,
struct rte_mempool *mbuf_pool,
- struct rte_mbuf **pkts,
+ struct rte_mbuf **__rte_restrict pkts,
uint32_t count)
{
uint32_t pkt_idx = 0;