[dpdk-dev,v2] devargs: add blacklisting by linux interface name
Commit Message
If a system is using deterministic interface names, it may be easier in
some cases to use the interface name to blacklist an interface.
Signed-off-by: Chas Williams <3chas3@gmail.com>
---
app/test/test_devargs.c | 2 ++
lib/librte_eal/common/eal_common_devargs.c | 9 +++++++--
lib/librte_eal/common/eal_common_options.c | 2 +-
lib/librte_eal/common/eal_common_pci.c | 10 ++++++++--
lib/librte_eal/common/include/rte_devargs.h | 2 ++
lib/librte_eal/common/include/rte_pci.h | 1 +
lib/librte_eal/linuxapp/eal/eal_pci.c | 15 +++++++++++++++
7 files changed, 36 insertions(+), 5 deletions(-)
Comments
On Mon, 5 Oct 2015 11:26:08 -0400
Chas Williams <3chas3@gmail.com> wrote:
> diff --git a/lib/librte_eal/common/include/rte_pci.h b/lib/librte_eal/common/include/rte_pci.h
> index 83e3c28..852c149 100644
> --- a/lib/librte_eal/common/include/rte_pci.h
> +++ b/lib/librte_eal/common/include/rte_pci.h
> @@ -161,6 +161,7 @@ struct rte_pci_device {
> struct rte_pci_resource mem_resource[PCI_MAX_RESOURCE]; /**< PCI Memory Resource */
> struct rte_intr_handle intr_handle; /**< Interrupt handle */
> struct rte_pci_driver *driver; /**< Associated driver */
> + char name[32];
Why not use IFNAMSIZ rather than magic constant here?
On Tue, 2015-10-06 at 08:35 +0100, Stephen Hemminger wrote:
> On Mon, 5 Oct 2015 11:26:08 -0400
> Chas Williams <3chas3@gmail.com> wrote:
>
> > diff --git a/lib/librte_eal/common/include/rte_pci.h b/lib/librte_eal/common/include/rte_pci.h
> > index 83e3c28..852c149 100644
> > --- a/lib/librte_eal/common/include/rte_pci.h
> > +++ b/lib/librte_eal/common/include/rte_pci.h
> > @@ -161,6 +161,7 @@ struct rte_pci_device {
> > struct rte_pci_resource mem_resource[PCI_MAX_RESOURCE]; /**< PCI Memory Resource */
> > struct rte_intr_handle intr_handle; /**< Interrupt handle */
> > struct rte_pci_driver *driver; /**< Associated driver */
> > + char name[32];
>
> Why not use IFNAMSIZ rather than magic constant here?
No particular reason. It just matches the virtual device name size.
I will change it.
Hi Chas,
On 10/05/2015 05:26 PM, Chas Williams wrote:
> If a system is using deterministic interface names, it may be easier in
> some cases to use the interface name to blacklist an interface.
>
> Signed-off-by: Chas Williams <3chas3@gmail.com>
> ---
> app/test/test_devargs.c | 2 ++
> lib/librte_eal/common/eal_common_devargs.c | 9 +++++++--
> lib/librte_eal/common/eal_common_options.c | 2 +-
> lib/librte_eal/common/eal_common_pci.c | 10 ++++++++--
> lib/librte_eal/common/include/rte_devargs.h | 2 ++
> lib/librte_eal/common/include/rte_pci.h | 1 +
> lib/librte_eal/linuxapp/eal/eal_pci.c | 15 +++++++++++++++
> 7 files changed, 36 insertions(+), 5 deletions(-)
>
> diff --git a/app/test/test_devargs.c b/app/test/test_devargs.c
> index f7fc59c..27855ff 100644
>
> [...]
>
> @@ -352,6 +354,19 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus,
> return -1;
> }
>
> + /* get network interface name */
> + snprintf(filename, sizeof(filename), "%s/net", dirname);
> + dir = opendir(filename);
> + if (dir) {
> + while ((e = readdir(dir)) != NULL) {
> + if (e->d_name[0] == '.')
> + continue;
> +
> + strncpy(dev->name, e->d_name, sizeof(dev->name));
> + }
> + closedir(dir);
> + }
> +
> if (!ret) {
> if (!strcmp(driver, "vfio-pci"))
> dev->kdrv = RTE_KDRV_VFIO;
>
For PCI devices that have several interfaces (I think it's the case for
some Mellanox boards), maybe we should not store the interface name?
Another small comment about the strncpy(): it's maybe safer to ensure
that dev->name is properly nul-terminated.
Regards,
Olivier
On Tue, 2015-10-13 at 14:49 +0200, Olivier MATZ wrote:
> Hi Chas,
>
> > @@ -352,6 +354,19 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus,
> > return -1;
> > }
> >
> > + /* get network interface name */
> > + snprintf(filename, sizeof(filename), "%s/net", dirname);
> > + dir = opendir(filename);
> > + if (dir) {
> > + while ((e = readdir(dir)) != NULL) {
> > + if (e->d_name[0] == '.')
> > + continue;
> > +
> > + strncpy(dev->name, e->d_name, sizeof(dev->name));
> > + }
> > + closedir(dir);
> > + }
> > +
> > if (!ret) {
> > if (!strcmp(driver, "vfio-pci"))
> > dev->kdrv = RTE_KDRV_VFIO;
> >
>
> For PCI devices that have several interfaces (I think it's the case for
> some Mellanox boards), maybe we should not store the interface name?
I am not sure what you mean here. If a device has multiple ethernet
interfaces, then it should a have seperate PCI device address space for
each interface (I dont know of any DPDK drivers that don't make this
assumption as well). If the device is multiprotocol, say Infiniband,
the device might have a net/ subdirectory, but it will be called something
like ib0 which you might want to blacklist for some reason.
> Another small comment about the strncpy(): it's maybe safer to ensure
> that dev->name is properly nul-terminated.
A good idea but it shouldn't happen in practice since dev.name will
be IFNAMSIZ. I will fix it in the next version.
2015-10-14 09:41, Charles Williams:
> On Tue, 2015-10-13 at 14:49 +0200, Olivier MATZ wrote:
> > For PCI devices that have several interfaces (I think it's the case for
> > some Mellanox boards), maybe we should not store the interface name?
>
> I am not sure what you mean here. If a device has multiple ethernet
> interfaces, then it should a have seperate PCI device address space for
> each interface (I dont know of any DPDK drivers that don't make this
> assumption as well).
mlx4 and cxgbe?
On Wed, 2015-11-04 at 23:40 +0100, Thomas Monjalon wrote:
> 2015-10-14 09:41, Charles Williams:
> > On Tue, 2015-10-13 at 14:49 +0200, Olivier MATZ wrote:
> > > For PCI devices that have several interfaces (I think it's the case for
> > > some Mellanox boards), maybe we should not store the interface name?
> >
> > I am not sure what you mean here. If a device has multiple ethernet
> > interfaces, then it should a have seperate PCI device address space for
> > each interface (I dont know of any DPDK drivers that don't make this
> > assumption as well).
>
> mlx4 and cxgbe?
OK, I see now. I don't know of a way to tell if a device has multiple
ports just from the pci vendor/device id without maintaining some
sort of table.
Do these devices have multiple interfaces listed in their
/sys/devices/.../net diretory? If so, matching one of the listed
interfaces can just blacklist the whole device similar to blacklisting
by the device id.
On Thu, 05 Nov 2015 11:39:04 -0500
"Charles (Chas) Williams" <3chas3@gmail.com> wrote:
> On Wed, 2015-11-04 at 23:40 +0100, Thomas Monjalon wrote:
> > 2015-10-14 09:41, Charles Williams:
> > > On Tue, 2015-10-13 at 14:49 +0200, Olivier MATZ wrote:
> > > > For PCI devices that have several interfaces (I think it's the case for
> > > > some Mellanox boards), maybe we should not store the interface name?
> > >
> > > I am not sure what you mean here. If a device has multiple ethernet
> > > interfaces, then it should a have seperate PCI device address space for
> > > each interface (I dont know of any DPDK drivers that don't make this
> > > assumption as well).
> >
> > mlx4 and cxgbe?
>
> OK, I see now. I don't know of a way to tell if a device has multiple
> ports just from the pci vendor/device id without maintaining some
> sort of table.
>
> Do these devices have multiple interfaces listed in their
> /sys/devices/.../net diretory? If so, matching one of the listed
> interfaces can just blacklist the whole device similar to blacklisting
> by the device id.
Devices with multiple ports are supposed to report the port via /sys/class/net/xxx/portid
But you aren't going to be able to blacklist only one port of these devices.
The two drivers would be fighting over registers and IRQ management.
Plus kernel bind/unbind is by PCI id.
On Thu, 2015-11-05 at 11:23 -0800, Stephen Hemminger wrote:
> On Thu, 05 Nov 2015 11:39:04 -0500
> "Charles (Chas) Williams" <3chas3@gmail.com> wrote:
>
> > On Wed, 2015-11-04 at 23:40 +0100, Thomas Monjalon wrote:
> > > 2015-10-14 09:41, Charles Williams:
> > > > On Tue, 2015-10-13 at 14:49 +0200, Olivier MATZ wrote:
> > > > > For PCI devices that have several interfaces (I think it's the case for
> > > > > some Mellanox boards), maybe we should not store the interface name?
> > > >
> > > > I am not sure what you mean here. If a device has multiple ethernet
> > > > interfaces, then it should a have seperate PCI device address space for
> > > > each interface (I dont know of any DPDK drivers that don't make this
> > > > assumption as well).
> > >
> > > mlx4 and cxgbe?
> >
> > OK, I see now. I don't know of a way to tell if a device has multiple
> > ports just from the pci vendor/device id without maintaining some
> > sort of table.
> >
> > Do these devices have multiple interfaces listed in their
> > /sys/devices/.../net diretory? If so, matching one of the listed
> > interfaces can just blacklist the whole device similar to blacklisting
> > by the device id.
>
> Devices with multiple ports are supposed to report the port via /sys/class/net/xxx/portid
But I want to find the ports associated by the PCI devices.
> But you aren't going to be able to blacklist only one port of these devices.
> The two drivers would be fighting over registers and IRQ management.
> Plus kernel bind/unbind is by PCI id.
I understand that. Blacklisting an interface on a multiple port device
would be essentially the same as blacklist by the PCI device id. You
can't split the PCI device. I just need to find the list of ports
associated with a single PCI device.
@@ -73,6 +73,8 @@ test_devargs(void)
goto fail;
if (rte_eal_devargs_add(RTE_DEVTYPE_BLACKLISTED_PCI, "0000:01:00.1") < 0)
goto fail;
+ if (rte_eal_devargs_add(RTE_DEVTYPE_BLACKLISTED_PCI, "eth0") < 0)
+ goto fail;
if (rte_eal_devargs_type_count(RTE_DEVTYPE_WHITELISTED_PCI) != 2)
goto fail;
if (rte_eal_devargs_type_count(RTE_DEVTYPE_BLACKLISTED_PCI) != 2)
@@ -101,8 +101,13 @@ rte_eal_devargs_add(enum rte_devtype devtype, const char *devargs_str)
case RTE_DEVTYPE_BLACKLISTED_PCI:
/* try to parse pci identifier */
if (eal_parse_pci_BDF(buf, &devargs->pci.addr) != 0 &&
- eal_parse_pci_DomBDF(buf, &devargs->pci.addr) != 0)
- goto fail;
+ eal_parse_pci_DomBDF(buf, &devargs->pci.addr) != 0) {
+ /* save as interface name instead */
+ ret = snprintf(devargs->pci.name,
+ sizeof(devargs->pci.name), "%s", buf);
+ if (ret < 0 || ret >= (int)sizeof(devargs->pci.name))
+ goto fail;
+ }
break;
case RTE_DEVTYPE_VIRTUAL:
@@ -889,7 +889,7 @@ eal_common_usage(void)
" -r RANKS Force number of memory ranks (don't detect)\n"
" -b, --"OPT_PCI_BLACKLIST" Add a PCI device in black list.\n"
" Prevent EAL from using this PCI device. The argument\n"
- " format is <domain:bus:devid.func>.\n"
+ " format is <domain:bus:devid.func> or <name>.\n"
" -w, --"OPT_PCI_WHITELIST" Add a PCI device in white list.\n"
" Only use the specified PCI devices. The argument format\n"
" is <[domain:]bus:devid.func>. This option can be present\n"
@@ -93,8 +93,14 @@ static struct rte_devargs *pci_devargs_lookup(struct rte_pci_device *dev)
if (devargs->type != RTE_DEVTYPE_BLACKLISTED_PCI &&
devargs->type != RTE_DEVTYPE_WHITELISTED_PCI)
continue;
- if (!rte_eal_compare_pci_addr(&dev->addr, &devargs->pci.addr))
- return devargs;
+
+ if (devargs->pci.name[0] == '\0') {
+ if (!rte_eal_compare_pci_addr(&dev->addr, &devargs->pci.addr))
+ return devargs;
+ } else {
+ if (strcmp(dev->name, devargs->pci.name) == 0)
+ return devargs;
+ }
}
return NULL;
}
@@ -81,6 +81,8 @@ struct rte_devargs {
struct {
/** PCI location. */
struct rte_pci_addr addr;
+ /** Interface name. */
+ char name[32];
} pci;
/** Used if type is RTE_DEVTYPE_VIRTUAL. */
struct {
@@ -161,6 +161,7 @@ struct rte_pci_device {
struct rte_pci_resource mem_resource[PCI_MAX_RESOURCE]; /**< PCI Memory Resource */
struct rte_intr_handle intr_handle; /**< Interrupt handle */
struct rte_pci_driver *driver; /**< Associated driver */
+ char name[32]; /**< Interface name (if any) */
uint16_t max_vfs; /**< sriov enable if not zero */
int numa_node; /**< NUMA node connection */
struct rte_devargs *devargs; /**< Device user arguments */
@@ -260,6 +260,8 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus,
unsigned long tmp;
struct rte_pci_device *dev;
char driver[PATH_MAX];
+ struct dirent *e;
+ DIR *dir;
int ret;
dev = malloc(sizeof(*dev));
@@ -352,6 +354,19 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus,
return -1;
}
+ /* get network interface name */
+ snprintf(filename, sizeof(filename), "%s/net", dirname);
+ dir = opendir(filename);
+ if (dir) {
+ while ((e = readdir(dir)) != NULL) {
+ if (e->d_name[0] == '.')
+ continue;
+
+ strncpy(dev->name, e->d_name, sizeof(dev->name));
+ }
+ closedir(dir);
+ }
+
if (!ret) {
if (!strcmp(driver, "vfio-pci"))
dev->kdrv = RTE_KDRV_VFIO;