On 17-Aug-18 11:51 AM, Jeff Guo wrote:
> This patch will add req notifier processing to enable hotplug for vfio.
> When device is be hotplug-out, the vfio kernel module will sent req
> notifier to request user space to release the allocated resources. It
> could use the bus failure mechanism to trigger the currently hotplug
> procedure of user space. After that, vfio kernel module will detect the
> device disappear, and then release the kernel resource of the device.
>
> Signed-off-by: Jeff Guo <jia.guo@intel.com>
> ---
<snip>
> + * resources before resources be released in kernel, so it can directly
> + * call the bus failure handler to use the specific mechanism in
> + * user space to handle it.
> + */
> + ret = bus->memory_failure_handler(device);
> + if (ret) {
> + RTE_LOG(ERR, EAL, "Can not handle failure for "
> + "device (%s)\n", device->name);
> + return;
Return is not needed here :)
Otherwise, LGTM
Anatoly, thanks for your review, please see comment as below.
On 9/26/2018 8:28 PM, Burakov, Anatoly wrote:
> On 17-Aug-18 11:51 AM, Jeff Guo wrote:
>> This patch will add req notifier processing to enable hotplug for vfio.
>> When device is be hotplug-out, the vfio kernel module will sent req
>> notifier to request user space to release the allocated resources. It
>> could use the bus failure mechanism to trigger the currently hotplug
>> procedure of user space. After that, vfio kernel module will detect the
>> device disappear, and then release the kernel resource of the device.
>>
>> Signed-off-by: Jeff Guo <jia.guo@intel.com>
>> ---
>
> <snip>
>
>> + * resources before resources be released in kernel, so it can
>> directly
>> + * call the bus failure handler to use the specific mechanism in
>> + * user space to handle it.
>> + */
>> + ret = bus->memory_failure_handler(device);
>> + if (ret) {
>> + RTE_LOG(ERR, EAL, "Can not handle failure for "
>> + "device (%s)\n", device->name);
>> + return;
>
> Return is not needed here :)
>
> Otherwise, LGTM
>
yes, will modify it.
@@ -17,6 +17,8 @@
#include <rte_eal_memconfig.h>
#include <rte_malloc.h>
#include <rte_vfio.h>
+#include <rte_eal.h>
+#include <rte_bus.h>
#include "eal_filesystem.h"
@@ -277,6 +279,92 @@ pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd)
return -1;
}
+static void
+pci_vfio_req_handler(void *param)
+{
+ struct rte_bus *bus;
+ int ret;
+ struct rte_device *device = (struct rte_device *)param;
+
+ bus = rte_bus_find_by_device(device);
+ if (bus == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot find bus for device (%s)\n",
+ device->name);
+ return;
+ }
+
+ /**
+ * vfio kernel module request user space to release allocated
+ * resources before resources be released in kernel, so it can directly
+ * call the bus failure handler to use the specific mechanism in
+ * user space to handle it.
+ */
+ ret = bus->memory_failure_handler(device);
+ if (ret) {
+ RTE_LOG(ERR, EAL, "Can not handle failure for "
+ "device (%s)\n", device->name);
+ return;
+ }
+}
+
+/* enable notifier (only enable req now) */
+static int
+pci_vfio_enable_notifier(struct rte_pci_device *dev, int vfio_dev_fd)
+{
+ int ret;
+ int fd = -1;
+
+ /* set up an eventfd for req notifier */
+ fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL, "Cannot set up eventfd, "
+ "error %i (%s)\n", errno, strerror(errno));
+ return -1;
+ }
+
+ dev->req_notifier_handler.fd = fd;
+ dev->req_notifier_handler.type = RTE_INTR_HANDLE_VFIO_REQ;
+ dev->req_notifier_handler.vfio_dev_fd = vfio_dev_fd;
+ ret = rte_intr_callback_register(&dev->req_notifier_handler,
+ pci_vfio_req_handler,
+ (void *)&dev->device);
+ if (ret) {
+ RTE_LOG(ERR, EAL, "Fail to register req notifier handler.\n");
+ return -1;
+ }
+
+ ret = rte_intr_enable(&dev->req_notifier_handler);
+ if (ret) {
+ RTE_LOG(ERR, EAL, "Fail to enable req notifier.\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+/*disable notifier (only disable req now) */
+static int
+pci_vfio_disable_notifier(struct rte_pci_device *dev)
+{
+ int ret;
+
+ ret = rte_intr_callback_unregister(&dev->req_notifier_handler,
+ pci_vfio_req_handler,
+ (void *)&dev->device);
+ if (ret) {
+ RTE_LOG(ERR, EAL, "fail to unregister req notifier handler.\n");
+ return -1;
+ }
+
+ ret = rte_intr_enable(&dev->req_notifier_handler);
+ if (ret) {
+ RTE_LOG(ERR, EAL, "fail to disable req notifier.\n");
+ return -1;
+ }
+
+ return 0;
+}
+
static int
pci_vfio_is_ioport_bar(int vfio_dev_fd, int bar_index)
{
@@ -430,6 +518,7 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev)
struct pci_map *maps;
dev->intr_handle.fd = -1;
+ dev->req_notifier_handler.fd = -1;
/* store PCI address string */
snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
@@ -521,6 +610,11 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev)
goto err_vfio_res;
}
+ if (pci_vfio_enable_notifier(dev, vfio_dev_fd) != 0) {
+ RTE_LOG(ERR, EAL, "Error setting up notifier!\n");
+ return -1;
+ }
+
TAILQ_INSERT_TAIL(vfio_res_list, vfio_res, next);
return 0;
@@ -546,6 +640,7 @@ pci_vfio_map_resource_secondary(struct rte_pci_device *dev)
struct pci_map *maps;
dev->intr_handle.fd = -1;
+ dev->req_notifier_handler.fd = -1;
/* store PCI address string */
snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
@@ -586,6 +681,7 @@ pci_vfio_map_resource_secondary(struct rte_pci_device *dev)
/* we need save vfio_dev_fd, so it can be used during release */
dev->intr_handle.vfio_dev_fd = vfio_dev_fd;
+ dev->req_notifier_handler.vfio_dev_fd = vfio_dev_fd;
return 0;
err_vfio_dev_fd:
@@ -658,12 +754,20 @@ pci_vfio_unmap_resource_primary(struct rte_pci_device *dev)
snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
loc->domain, loc->bus, loc->devid, loc->function);
+ pci_vfio_disable_notifier(dev);
+
if (close(dev->intr_handle.fd) < 0) {
RTE_LOG(INFO, EAL, "Error when closing eventfd file descriptor for %s\n",
pci_addr);
return -1;
}
+ if (close(dev->req_notifier_handler.fd) < 0) {
+ RTE_LOG(INFO, EAL, "Error when closing req eventfd file "
+ "descriptor for %s\n", pci_addr);
+ return -1;
+ }
+
if (pci_vfio_set_bus_master(dev->intr_handle.vfio_dev_fd, false)) {
RTE_LOG(ERR, EAL, " %s cannot unset bus mastering for PCI device!\n",
pci_addr);
@@ -446,6 +446,16 @@ pci_memory_failure_handler(struct rte_device *dev)
return -1;
switch (pdev->kdrv) {
+ case RTE_KDRV_VFIO:
+ /**
+ * vfio kernel module guaranty the pci device would not be
+ * deleted until the user space release the resource, so no
+ * need to remap mmio resource here, just directly notify
+ * the req event to the user space to handle it.
+ */
+ rte_dev_event_callback_process(dev->name,
+ RTE_DEV_EVENT_REQ);
+ break;
case RTE_KDRV_IGB_UIO:
case RTE_KDRV_UIO_GENERIC:
case RTE_KDRV_NIC_UIO:
@@ -4424,6 +4424,7 @@ eth_dev_event_callback(const char *device_name, enum rte_dev_event_type type,
struct rte_eth_dev *eth_dev = (struct rte_eth_dev *)arg;
switch (type) {
+ case RTE_DEV_EVENT_REQ:
case RTE_DEV_EVENT_REMOVE:
RTE_ETHDEV_LOG(INFO, "The device %s has been removed!\n",
device_name);