@@ -46,7 +46,7 @@ vhost_user_iotlb_pool_put(struct vhost_virtqueue *vq,
}
static void
-vhost_user_iotlb_cache_random_evict(struct vhost_virtqueue *vq);
+vhost_user_iotlb_cache_random_evict(struct virtio_net *dev, struct vhost_virtqueue *vq);
static void
vhost_user_iotlb_pending_remove_all(struct vhost_virtqueue *vq)
@@ -98,7 +98,7 @@ vhost_user_iotlb_pending_insert(struct virtio_net *dev, struct vhost_virtqueue *
if (!TAILQ_EMPTY(&vq->iotlb_pending_list))
vhost_user_iotlb_pending_remove_all(vq);
else
- vhost_user_iotlb_cache_random_evict(vq);
+ vhost_user_iotlb_cache_random_evict(dev, vq);
node = vhost_user_iotlb_pool_get(vq);
if (node == NULL) {
VHOST_LOG_CONFIG(dev->ifname, ERR,
@@ -142,14 +142,15 @@ vhost_user_iotlb_pending_remove(struct vhost_virtqueue *vq,
}
static void
-vhost_user_iotlb_cache_remove_all(struct vhost_virtqueue *vq)
+vhost_user_iotlb_cache_remove_all(struct virtio_net *dev, struct vhost_virtqueue *vq)
{
struct vhost_iotlb_entry *node, *temp_node;
rte_rwlock_write_lock(&vq->iotlb_lock);
RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
- mem_set_dump((void *)(uintptr_t)node->uaddr, node->size, true);
+ mem_set_dump((void *)(uintptr_t)node->uaddr, node->size, false,
+ hua_to_alignment(dev->mem, (void *)(uintptr_t)node->uaddr));
TAILQ_REMOVE(&vq->iotlb_list, node, next);
vhost_user_iotlb_pool_put(vq, node);
}
@@ -160,9 +161,10 @@ vhost_user_iotlb_cache_remove_all(struct vhost_virtqueue *vq)
}
static void
-vhost_user_iotlb_cache_random_evict(struct vhost_virtqueue *vq)
+vhost_user_iotlb_cache_random_evict(struct virtio_net *dev, struct vhost_virtqueue *vq)
{
- struct vhost_iotlb_entry *node, *temp_node;
+ struct vhost_iotlb_entry *node, *temp_node, *prev_node = NULL;
+ uint64_t alignment, mask;
int entry_idx;
rte_rwlock_write_lock(&vq->iotlb_lock);
@@ -171,12 +173,26 @@ vhost_user_iotlb_cache_random_evict(struct vhost_virtqueue *vq)
RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
if (!entry_idx) {
- mem_set_dump((void *)(uintptr_t)node->uaddr, node->size, true);
+ struct vhost_iotlb_entry *next_node;
+ alignment = hua_to_alignment(dev->mem, (void *)(uintptr_t)node->uaddr);
+ mask = ~(alignment - 1);
+
+ /* Don't disable coredump if the previous node is in the same page */
+ if (prev_node == NULL ||
+ (node->uaddr & mask) != (prev_node->uaddr & mask)) {
+ next_node = RTE_TAILQ_NEXT(node, next);
+ /* Don't disable coredump if the next node is in the same page */
+ if (next_node == NULL ||
+ (node->uaddr & mask) != (next_node->uaddr & mask))
+ mem_set_dump((void *)(uintptr_t)node->uaddr, node->size,
+ false, alignment);
+ }
TAILQ_REMOVE(&vq->iotlb_list, node, next);
vhost_user_iotlb_pool_put(vq, node);
vq->iotlb_cache_nr--;
break;
}
+ prev_node = node;
entry_idx--;
}
@@ -196,7 +212,7 @@ vhost_user_iotlb_cache_insert(struct virtio_net *dev, struct vhost_virtqueue *vq
"IOTLB pool vq %"PRIu32" empty, clear entries for cache insertion\n",
vq->index);
if (!TAILQ_EMPTY(&vq->iotlb_list))
- vhost_user_iotlb_cache_random_evict(vq);
+ vhost_user_iotlb_cache_random_evict(dev, vq);
else
vhost_user_iotlb_pending_remove_all(vq);
new_node = vhost_user_iotlb_pool_get(vq);
@@ -224,14 +240,16 @@ vhost_user_iotlb_cache_insert(struct virtio_net *dev, struct vhost_virtqueue *vq
vhost_user_iotlb_pool_put(vq, new_node);
goto unlock;
} else if (node->iova > new_node->iova) {
- mem_set_dump((void *)(uintptr_t)node->uaddr, node->size, true);
+ mem_set_dump((void *)(uintptr_t)new_node->uaddr, new_node->size, true,
+ hua_to_alignment(dev->mem, (void *)(uintptr_t)new_node->uaddr));
TAILQ_INSERT_BEFORE(node, new_node, next);
vq->iotlb_cache_nr++;
goto unlock;
}
}
- mem_set_dump((void *)(uintptr_t)node->uaddr, node->size, true);
+ mem_set_dump((void *)(uintptr_t)new_node->uaddr, new_node->size, true,
+ hua_to_alignment(dev->mem, (void *)(uintptr_t)new_node->uaddr));
TAILQ_INSERT_TAIL(&vq->iotlb_list, new_node, next);
vq->iotlb_cache_nr++;
@@ -243,10 +261,11 @@ vhost_user_iotlb_cache_insert(struct virtio_net *dev, struct vhost_virtqueue *vq
}
void
-vhost_user_iotlb_cache_remove(struct vhost_virtqueue *vq,
+vhost_user_iotlb_cache_remove(struct virtio_net *dev, struct vhost_virtqueue *vq,
uint64_t iova, uint64_t size)
{
- struct vhost_iotlb_entry *node, *temp_node;
+ struct vhost_iotlb_entry *node, *temp_node, *prev_node = NULL;
+ uint64_t alignment, mask;
if (unlikely(!size))
return;
@@ -259,11 +278,26 @@ vhost_user_iotlb_cache_remove(struct vhost_virtqueue *vq,
break;
if (iova < node->iova + node->size) {
- mem_set_dump((void *)(uintptr_t)node->uaddr, node->size, true);
+ struct vhost_iotlb_entry *next_node;
+ alignment = hua_to_alignment(dev->mem, (void *)(uintptr_t)node->uaddr);
+ mask = ~(alignment-1);
+
+ /* Don't disable coredump if the previous node is in the same page */
+ if (prev_node == NULL ||
+ (node->uaddr & mask) != (prev_node->uaddr & mask)) {
+ next_node = RTE_TAILQ_NEXT(node, next);
+ /* Don't disable coredump if the next node is in the same page */
+ if (next_node == NULL ||
+ (node->uaddr & mask) != (next_node->uaddr & mask))
+ mem_set_dump((void *)(uintptr_t)node->uaddr, node->size,
+ false, alignment);
+ }
+
TAILQ_REMOVE(&vq->iotlb_list, node, next);
vhost_user_iotlb_pool_put(vq, node);
vq->iotlb_cache_nr--;
- }
+ } else
+ prev_node = node;
}
rte_rwlock_write_unlock(&vq->iotlb_lock);
@@ -312,9 +346,9 @@ vhost_user_iotlb_cache_find(struct vhost_virtqueue *vq, uint64_t iova,
}
void
-vhost_user_iotlb_flush_all(struct vhost_virtqueue *vq)
+vhost_user_iotlb_flush_all(struct virtio_net *dev, struct vhost_virtqueue *vq)
{
- vhost_user_iotlb_cache_remove_all(vq);
+ vhost_user_iotlb_cache_remove_all(dev, vq);
vhost_user_iotlb_pending_remove_all(vq);
}
@@ -329,7 +363,7 @@ vhost_user_iotlb_init(struct virtio_net *dev, struct vhost_virtqueue *vq)
* The cache has already been initialized,
* just drop all cached and pending entries.
*/
- vhost_user_iotlb_flush_all(vq);
+ vhost_user_iotlb_flush_all(dev, vq);
rte_free(vq->iotlb_pool);
}
@@ -40,7 +40,7 @@ vhost_user_iotlb_wr_unlock(struct vhost_virtqueue *vq)
void vhost_user_iotlb_cache_insert(struct virtio_net *dev, struct vhost_virtqueue *vq,
uint64_t iova, uint64_t uaddr,
uint64_t size, uint8_t perm);
-void vhost_user_iotlb_cache_remove(struct vhost_virtqueue *vq,
+void vhost_user_iotlb_cache_remove(struct virtio_net *dev, struct vhost_virtqueue *vq,
uint64_t iova, uint64_t size);
uint64_t vhost_user_iotlb_cache_find(struct vhost_virtqueue *vq, uint64_t iova,
uint64_t *size, uint8_t perm);
@@ -50,8 +50,7 @@ void vhost_user_iotlb_pending_insert(struct virtio_net *dev, struct vhost_virtqu
uint64_t iova, uint8_t perm);
void vhost_user_iotlb_pending_remove(struct vhost_virtqueue *vq, uint64_t iova,
uint64_t size, uint8_t perm);
-void vhost_user_iotlb_flush_all(struct vhost_virtqueue *vq);
+void vhost_user_iotlb_flush_all(struct virtio_net *dev, struct vhost_virtqueue *vq);
int vhost_user_iotlb_init(struct virtio_net *dev, struct vhost_virtqueue *vq);
void vhost_user_iotlb_destroy(struct vhost_virtqueue *vq);
-
#endif /* _VHOST_IOTLB_H_ */
@@ -1009,14 +1009,6 @@ mbuf_is_consumed(struct rte_mbuf *m)
return true;
}
-static __rte_always_inline void
-mem_set_dump(__rte_unused void *ptr, __rte_unused size_t size, __rte_unused bool enable)
-{
-#ifdef MADV_DONTDUMP
- if (madvise(ptr, size, enable ? MADV_DODUMP : MADV_DONTDUMP) == -1) {
- rte_log(RTE_LOG_INFO, vhost_config_log_level,
- "VHOST_CONFIG: could not set coredump preference (%s).\n", strerror(errno));
- }
-#endif
-}
+uint64_t hua_to_alignment(struct rte_vhost_memory *mem, void *ptr);
+void mem_set_dump(void *ptr, size_t size, bool enable, uint64_t alignment);
#endif /* _VHOST_NET_CDEV_H_ */
@@ -737,6 +737,40 @@ log_addr_to_gpa(struct virtio_net *dev, struct vhost_virtqueue *vq)
return log_gpa;
}
+uint64_t
+hua_to_alignment(struct rte_vhost_memory *mem, void *ptr)
+{
+ struct rte_vhost_mem_region *r;
+ uint32_t i;
+ uintptr_t hua = (uintptr_t)ptr;
+
+ for (i = 0; i < mem->nregions; i++) {
+ r = &mem->regions[i];
+ if (hua >= r->host_user_addr &&
+ hua < r->host_user_addr + r->size) {
+ return get_blk_size(r->fd);
+ }
+ }
+
+ /* If region isn't found, don't align at all */
+ return 1;
+}
+
+void
+mem_set_dump(void *ptr, size_t size, bool enable, uint64_t pagesz)
+{
+#ifdef MADV_DONTDUMP
+ void *start = RTE_PTR_ALIGN_FLOOR(ptr, pagesz);
+ uintptr_t end = RTE_ALIGN_CEIL((uintptr_t)ptr + size, pagesz);
+ size_t len = end - (uintptr_t)start;
+
+ if (madvise(start, len, enable ? MADV_DODUMP : MADV_DONTDUMP) == -1) {
+ rte_log(RTE_LOG_INFO, vhost_config_log_level,
+ "VHOST_CONFIG: could not set coredump preference (%s).\n", strerror(errno));
+ }
+#endif
+}
+
static void
translate_ring_addresses(struct virtio_net **pdev, struct vhost_virtqueue **pvq)
{
@@ -767,6 +801,8 @@ translate_ring_addresses(struct virtio_net **pdev, struct vhost_virtqueue **pvq)
return;
}
+ mem_set_dump(vq->desc_packed, len, true,
+ hua_to_alignment(dev->mem, vq->desc_packed));
numa_realloc(&dev, &vq);
*pdev = dev;
*pvq = vq;
@@ -782,6 +818,8 @@ translate_ring_addresses(struct virtio_net **pdev, struct vhost_virtqueue **pvq)
return;
}
+ mem_set_dump(vq->driver_event, len, true,
+ hua_to_alignment(dev->mem, vq->driver_event));
len = sizeof(struct vring_packed_desc_event);
vq->device_event = (struct vring_packed_desc_event *)
(uintptr_t)ring_addr_to_vva(dev,
@@ -793,9 +831,8 @@ translate_ring_addresses(struct virtio_net **pdev, struct vhost_virtqueue **pvq)
return;
}
- mem_set_dump(vq->desc_packed, len, true);
- mem_set_dump(vq->driver_event, len, true);
- mem_set_dump(vq->device_event, len, true);
+ mem_set_dump(vq->device_event, len, true,
+ hua_to_alignment(dev->mem, vq->device_event));
vq->access_ok = true;
return;
}
@@ -812,6 +849,7 @@ translate_ring_addresses(struct virtio_net **pdev, struct vhost_virtqueue **pvq)
return;
}
+ mem_set_dump(vq->desc, len, true, hua_to_alignment(dev->mem, vq->desc));
numa_realloc(&dev, &vq);
*pdev = dev;
*pvq = vq;
@@ -827,6 +865,7 @@ translate_ring_addresses(struct virtio_net **pdev, struct vhost_virtqueue **pvq)
return;
}
+ mem_set_dump(vq->avail, len, true, hua_to_alignment(dev->mem, vq->avail));
len = sizeof(struct vring_used) +
sizeof(struct vring_used_elem) * vq->size;
if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
@@ -839,6 +878,8 @@ translate_ring_addresses(struct virtio_net **pdev, struct vhost_virtqueue **pvq)
return;
}
+ mem_set_dump(vq->used, len, true, hua_to_alignment(dev->mem, vq->used));
+
if (vq->last_used_idx != vq->used->idx) {
VHOST_LOG_CONFIG(dev->ifname, WARNING,
"last_used_idx (%u) and vq->used->idx (%u) mismatches;\n",
@@ -849,9 +890,6 @@ translate_ring_addresses(struct virtio_net **pdev, struct vhost_virtqueue **pvq)
"some packets maybe resent for Tx and dropped for Rx\n");
}
- mem_set_dump(vq->desc, len, true);
- mem_set_dump(vq->avail, len, true);
- mem_set_dump(vq->used, len, true);
vq->access_ok = true;
VHOST_LOG_CONFIG(dev->ifname, DEBUG, "mapped address desc: %p\n", vq->desc);
@@ -1230,7 +1268,7 @@ vhost_user_mmap_region(struct virtio_net *dev,
region->mmap_addr = mmap_addr;
region->mmap_size = mmap_size;
region->host_user_addr = (uint64_t)(uintptr_t)mmap_addr + mmap_offset;
- mem_set_dump(mmap_addr, mmap_size, false);
+ mem_set_dump(mmap_addr, mmap_size, false, alignment);
if (dev->async_copy) {
if (add_guest_pages(dev, region, alignment) < 0) {
@@ -1325,7 +1363,7 @@ vhost_user_set_mem_table(struct virtio_net **pdev,
/* Flush IOTLB cache as previous HVAs are now invalid */
if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
for (i = 0; i < dev->nr_vring; i++)
- vhost_user_iotlb_flush_all(dev->virtqueue[i]);
+ vhost_user_iotlb_flush_all(dev, dev->virtqueue[i]);
/*
* If VQ 0 has already been allocated, try to allocate on the same
@@ -1504,6 +1542,7 @@ inflight_mem_alloc(struct virtio_net *dev, const char *name, size_t size, int *f
{
void *ptr;
int mfd = -1;
+ uint64_t alignment;
char fname[20] = "/tmp/memfd-XXXXXX";
*fd = -1;
@@ -1535,7 +1574,8 @@ inflight_mem_alloc(struct virtio_net *dev, const char *name, size_t size, int *f
return NULL;
}
- mem_set_dump(ptr, size, false);
+ alignment = get_blk_size(mfd);
+ mem_set_dump(ptr, size, false, alignment);
*fd = mfd;
return ptr;
}
@@ -1744,7 +1784,7 @@ vhost_user_set_inflight_fd(struct virtio_net **pdev,
dev->inflight_info->fd = -1;
}
- mem_set_dump(addr, mmap_size, false);
+ mem_set_dump(addr, mmap_size, false, get_blk_size(fd));
dev->inflight_info->fd = fd;
dev->inflight_info->addr = addr;
dev->inflight_info->size = mmap_size;
@@ -2151,7 +2191,7 @@ vhost_user_get_vring_base(struct virtio_net **pdev,
ctx->msg.size = sizeof(ctx->msg.payload.state);
ctx->fd_num = 0;
- vhost_user_iotlb_flush_all(vq);
+ vhost_user_iotlb_flush_all(dev, vq);
vring_invalidate(dev, vq);
@@ -2242,6 +2282,7 @@ vhost_user_set_log_base(struct virtio_net **pdev,
struct virtio_net *dev = *pdev;
int fd = ctx->fds[0];
uint64_t size, off;
+ uint64_t alignment;
void *addr;
uint32_t i;
@@ -2280,6 +2321,7 @@ vhost_user_set_log_base(struct virtio_net **pdev,
* fail when offset is not page size aligned.
*/
addr = mmap(0, size + off, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ alignment = get_blk_size(fd);
close(fd);
if (addr == MAP_FAILED) {
VHOST_LOG_CONFIG(dev->ifname, ERR, "mmap log base failed!\n");
@@ -2296,7 +2338,7 @@ vhost_user_set_log_base(struct virtio_net **pdev,
dev->log_addr = (uint64_t)(uintptr_t)addr;
dev->log_base = dev->log_addr + off;
dev->log_size = size;
- mem_set_dump(addr, size, false);
+ mem_set_dump(addr, size + off, false, alignment);
for (i = 0; i < dev->nr_vring; i++) {
struct vhost_virtqueue *vq = dev->virtqueue[i];
@@ -2618,7 +2660,7 @@ vhost_user_iotlb_msg(struct virtio_net **pdev,
if (!vq)
continue;
- vhost_user_iotlb_cache_remove(vq, imsg->iova,
+ vhost_user_iotlb_cache_remove(dev, vq, imsg->iova,
imsg->size);
if (is_vring_iotlb(dev, vq, imsg)) {