From patchwork Thu Nov 15 15:47:14 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Burakov, Anatoly" X-Patchwork-Id: 48128 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id A2F25532C; Thu, 15 Nov 2018 16:47:34 +0100 (CET) Received: from mga03.intel.com (mga03.intel.com [134.134.136.65]) by dpdk.org (Postfix) with ESMTP id 5F8F04CAD for ; Thu, 15 Nov 2018 16:47:25 +0100 (CET) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga007.jf.intel.com ([10.7.209.58]) by orsmga103.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 15 Nov 2018 07:47:24 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.56,236,1539673200"; d="scan'208";a="89543721" Received: from irvmail001.ir.intel.com ([163.33.26.43]) by orsmga007.jf.intel.com with ESMTP; 15 Nov 2018 07:47:22 -0800 Received: from sivswdev01.ir.intel.com (sivswdev01.ir.intel.com [10.237.217.45]) by irvmail001.ir.intel.com (8.14.3/8.13.6/MailSET/Hub) with ESMTP id wAFFlLUx024795; Thu, 15 Nov 2018 15:47:21 GMT Received: from sivswdev01.ir.intel.com (localhost [127.0.0.1]) by sivswdev01.ir.intel.com with ESMTP id wAFFlLIc028107; Thu, 15 Nov 2018 15:47:21 GMT Received: (from aburakov@localhost) by sivswdev01.ir.intel.com with LOCAL id wAFFlLfS028103; Thu, 15 Nov 2018 15:47:21 GMT From: Anatoly Burakov To: dev@dpdk.org Cc: john.mcnamara@intel.com, bruce.richardson@intel.com, pablo.de.lara.guarch@intel.com, david.hunt@intel.com, mohammad.abdul.awal@intel.com, thomas@monjalon.net, ferruh.yigit@intel.com Date: Thu, 15 Nov 2018 15:47:14 +0000 Message-Id: <3b48e558478fdad1e9e6a8e58f723734be79ae21.1542291869.git.anatoly.burakov@intel.com> X-Mailer: git-send-email 1.7.0.7 In-Reply-To: References: In-Reply-To: References: Subject: [dpdk-dev] [RFC v2 2/9] usertools/lib: add platform info library X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Add a library that will parse system information: * NUMA nodes * Cores and threads * Mapping from NUMA node and core to thread id's * Hyperthreading support status * RAM size * Default hugepage size as reported by kernel This can be used by scripts. Signed-off-by: Anatoly Burakov --- usertools/DPDKConfigLib/PlatformInfo.py | 130 ++++++++++++++++++++++++ usertools/DPDKConfigLib/Util.py | 16 +++ 2 files changed, 146 insertions(+) create mode 100755 usertools/DPDKConfigLib/PlatformInfo.py create mode 100755 usertools/DPDKConfigLib/Util.py diff --git a/usertools/DPDKConfigLib/PlatformInfo.py b/usertools/DPDKConfigLib/PlatformInfo.py new file mode 100755 index 000000000..734d22026 --- /dev/null +++ b/usertools/DPDKConfigLib/PlatformInfo.py @@ -0,0 +1,130 @@ +#!/usr/bin/python +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2018 Intel Corporation + + +from .Util import * +import re +import glob + +__SYSFS_CPU_INFO_PATH = "/sys/devices/system/cpu" +__SYSFS_IOMMU_CLASS_PATH = "/sys/class/iommu" +__KERNEL_HPSZ_PATH = "/sys/kernel/mm/hugepages/hugepages-*" + +try: + xrange # python 2 +except NameError: + xrange = range # python 3 + + +__CPU_FLAGS_TO_HP_SIZE = { + # x86 supports 2M and 1G pages + "pse": 2048, + "pdpe1gb": 1048576 +} +__IOMMU_CPU_FLAGS = ["vmx", "vms"] + +__HT_CPU_FLAGS = ["ht"] + + +def _parse_cpuinfo(pinfo): + core_info_list = [] + with open("/proc/cpuinfo") as f: + cur_core = {} + for line in f: + line = line.strip() + # if we've reached end of current core info, store it and clear it + if line == "": + core_info_list.append(cur_core) + cur_core = {} + continue + key, value = kv_split(line, ":") + cur_core[key] = value + # parse flags - they're the same for all CPU's so only parse the first one + flags = set(core_info_list[0]["flags"].split()) + for flag in flags: + if flag in __CPU_FLAGS_TO_HP_SIZE: + pinfo.hugepage_sizes_supported.append(__CPU_FLAGS_TO_HP_SIZE[flag]) + elif flag in __IOMMU_CPU_FLAGS: + pinfo.iommu_supported = True + elif flag in __HT_CPU_FLAGS: + pinfo.hyperthreading_supported = True + + # parse cores and sockets + numa_nodes = set() + core_map = {} + for core_dict in core_info_list: + thread_id = int(core_dict["processor"]) + core_id = int(core_dict["core id"]) + numa_node = int(core_dict["physical id"]) + + core_map.setdefault((numa_node, core_id), []).append(thread_id) + numa_nodes.add(numa_node) + + # now, populate PlatformInfo with our, well, info - convert to lists + pinfo.numa_nodes = list(numa_nodes) + pinfo.core_map = core_map + + +def _parse_meminfo(pinfo): + meminfo_data = {} + with open("/proc/meminfo") as f: + for line in f: + key, value = kv_split(line, ":") + meminfo_data[key] = value + + # regex used to capture kilobytes + r = re.compile("(\d+) kB") + + # total ram size + m = r.match(meminfo_data["MemTotal"]) + if not m: + raise RuntimeError("BUG: Bad regular expression") + pinfo.ram_size = int(m.group(1)) + + # hugepages may not be supported + if "Hugepagesize" in meminfo_data: + m = r.match(meminfo_data["Hugepagesize"]) + if not m: + raise RuntimeError("BUG: Bad regular expression") + pinfo.default_hugepage_size = int(m.group(1)) + + +def _find_enabled_hugepage_sizes(): + paths = glob.glob(__KERNEL_HPSZ_PATH) + r = re.compile("hugepages-(\d+)kB") + sizes = [] + for p in paths: + p = os.path.basename(p) + m = r.search(p) + if not m: + raise RuntimeError("BUG: Bad regular expression") + sizes.append(int(m.group(1))) + return sizes + + +def _iommu_is_enabled(): + pass + + +class PlatformInfo: + def __init__(self): + self.update() + + def reset(self): + self.numa_nodes = [] + self.hyperthreading_supported = False + self.core_map = {} # numa_node, core_id: [thread_id] + self.iommu_supported = False + self.iommu_mode = "" + self.bootloader_iommu_mode = "" + self.hugepage_sizes_supported = [] + self.hugepage_sizes_enabled = [] + self.default_hugepage_size = 0 + self.ram_size = 0 + + def update(self): + self.reset() + _parse_cpuinfo(self) + _parse_meminfo(self) + self.hugepage_sizes_enabled = _find_enabled_hugepage_sizes() diff --git a/usertools/DPDKConfigLib/Util.py b/usertools/DPDKConfigLib/Util.py new file mode 100755 index 000000000..42434e728 --- /dev/null +++ b/usertools/DPDKConfigLib/Util.py @@ -0,0 +1,16 @@ +#!/usr/bin/python +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2018 Intel Corporation + +# split line into key-value pair, cleaning up the values in the process +def kv_split(line, separator): + # just in case + line = line.strip() + + tokens = line.split(separator, 1) + key, value = None, None + if len(tokens) > 0: + key = tokens[0].strip() + if len(tokens) > 1: + value = tokens[1].strip() + return key, value From patchwork Thu Nov 15 15:47:15 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Burakov, Anatoly" X-Patchwork-Id: 48126 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 327F54CC5; Thu, 15 Nov 2018 16:47:30 +0100 (CET) Received: from mga09.intel.com (mga09.intel.com [134.134.136.24]) by dpdk.org (Postfix) with ESMTP id 2F2B54CA2 for ; Thu, 15 Nov 2018 16:47:25 +0100 (CET) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga002.jf.intel.com ([10.7.209.21]) by orsmga102.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 15 Nov 2018 07:47:24 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.56,236,1539673200"; d="scan'208";a="108832359" Received: from irvmail001.ir.intel.com ([163.33.26.43]) by orsmga002.jf.intel.com with ESMTP; 15 Nov 2018 07:47:22 -0800 Received: from sivswdev01.ir.intel.com (sivswdev01.ir.intel.com [10.237.217.45]) by irvmail001.ir.intel.com (8.14.3/8.13.6/MailSET/Hub) with ESMTP id wAFFlLDx024799; Thu, 15 Nov 2018 15:47:21 GMT Received: from sivswdev01.ir.intel.com (localhost [127.0.0.1]) by sivswdev01.ir.intel.com with ESMTP id wAFFlLhh028114; Thu, 15 Nov 2018 15:47:21 GMT Received: (from aburakov@localhost) by sivswdev01.ir.intel.com with LOCAL id wAFFlLvB028110; Thu, 15 Nov 2018 15:47:21 GMT From: Anatoly Burakov To: dev@dpdk.org Cc: john.mcnamara@intel.com, bruce.richardson@intel.com, pablo.de.lara.guarch@intel.com, david.hunt@intel.com, mohammad.abdul.awal@intel.com, thomas@monjalon.net, ferruh.yigit@intel.com Date: Thu, 15 Nov 2018 15:47:15 +0000 Message-Id: <097ace69a66250f46ed24e4f9dbd1102bc912f63.1542291869.git.anatoly.burakov@intel.com> X-Mailer: git-send-email 1.7.0.7 In-Reply-To: References: In-Reply-To: References: Subject: [dpdk-dev] [RFC v2 3/9] usertools/cpu_layout: rewrite to use DPDKConfigLib X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Rewrite cpu layout script to use data provided by DPDKConfigLib script. This demonstrates an example of how external tools can get the same information from DPDKConfigLib. Signed-off-by: Anatoly Burakov --- usertools/cpu_layout.py | 53 ++++++++++------------------------------- 1 file changed, 13 insertions(+), 40 deletions(-) diff --git a/usertools/cpu_layout.py b/usertools/cpu_layout.py index 6f129b1db..af950361c 100755 --- a/usertools/cpu_layout.py +++ b/usertools/cpu_layout.py @@ -1,73 +1,46 @@ #!/usr/bin/env python # SPDX-License-Identifier: BSD-3-Clause -# Copyright(c) 2010-2014 Intel Corporation +# Copyright(c) 2010-2018 Intel Corporation # Copyright(c) 2017 Cavium, Inc. All rights reserved. from __future__ import print_function -import sys -try: - xrange # Python 2 -except NameError: - xrange = range # Python 3 +from DPDKConfigLib import PlatformInfo -sockets = [] -cores = [] -core_map = {} +info = PlatformInfo.PlatformInfo() base_path = "/sys/devices/system/cpu" -fd = open("{}/kernel_max".format(base_path)) -max_cpus = int(fd.read()) -fd.close() -for cpu in xrange(max_cpus + 1): - try: - fd = open("{}/cpu{}/topology/core_id".format(base_path, cpu)) - except IOError: - continue - except: - break - core = int(fd.read()) - fd.close() - fd = open("{}/cpu{}/topology/physical_package_id".format(base_path, cpu)) - socket = int(fd.read()) - fd.close() - if core not in cores: - cores.append(core) - if socket not in sockets: - sockets.append(socket) - key = (socket, core) - if key not in core_map: - core_map[key] = [] - core_map[key].append(cpu) + +cores = sorted(set([core for _, core in info.core_map])) print(format("=" * (47 + len(base_path)))) print("Core and Socket Information (as reported by '{}')".format(base_path)) print("{}\n".format("=" * (47 + len(base_path)))) print("cores = ", cores) -print("sockets = ", sockets) +print("sockets = ", info.numa_nodes) print("") -max_processor_len = len(str(len(cores) * len(sockets) * 2 - 1)) -max_thread_count = len(list(core_map.values())[0]) +max_processor_len = len(str(len(cores) * len(info.numa_nodes) * 2 - 1)) +max_thread_count = len(info.core_map.values()[0]) max_core_map_len = (max_processor_len * max_thread_count) \ + len(", ") * (max_thread_count - 1) \ + len('[]') + len('Socket ') max_core_id_len = len(str(max(cores))) output = " ".ljust(max_core_id_len + len('Core ')) -for s in sockets: +for s in info.numa_nodes: output += " Socket %s" % str(s).ljust(max_core_map_len - len('Socket ')) print(output) output = " ".ljust(max_core_id_len + len('Core ')) -for s in sockets: +for s in info.numa_nodes: output += " --------".ljust(max_core_map_len) output += " " print(output) for c in cores: output = "Core %s" % str(c).ljust(max_core_id_len) - for s in sockets: - if (s,c) in core_map: - output += " " + str(core_map[(s, c)]).ljust(max_core_map_len) + for s in info.numa_nodes: + if (s, c) in info.core_map: + output += " " + str(info.core_map[(s, c)]).ljust(max_core_map_len) else: output += " " * (max_core_map_len + 1) print(output) From patchwork Thu Nov 15 15:47:16 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Burakov, Anatoly" X-Patchwork-Id: 48124 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id C0AEE4CB3; Thu, 15 Nov 2018 16:47:25 +0100 (CET) Received: from mga04.intel.com (mga04.intel.com [192.55.52.120]) by dpdk.org (Postfix) with ESMTP id C2D843238 for ; Thu, 15 Nov 2018 16:47:24 +0100 (CET) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga006.fm.intel.com ([10.253.24.20]) by fmsmga104.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 15 Nov 2018 07:47:23 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.56,236,1539673200"; d="scan'208";a="281360508" Received: from irvmail001.ir.intel.com ([163.33.26.43]) by fmsmga006.fm.intel.com with ESMTP; 15 Nov 2018 07:47:22 -0800 Received: from sivswdev01.ir.intel.com (sivswdev01.ir.intel.com [10.237.217.45]) by irvmail001.ir.intel.com (8.14.3/8.13.6/MailSET/Hub) with ESMTP id wAFFlLTh024802; Thu, 15 Nov 2018 15:47:21 GMT Received: from sivswdev01.ir.intel.com (localhost [127.0.0.1]) by sivswdev01.ir.intel.com with ESMTP id wAFFlLRK028121; Thu, 15 Nov 2018 15:47:21 GMT Received: (from aburakov@localhost) by sivswdev01.ir.intel.com with LOCAL id wAFFlL3t028117; Thu, 15 Nov 2018 15:47:21 GMT From: Anatoly Burakov To: dev@dpdk.org Cc: john.mcnamara@intel.com, bruce.richardson@intel.com, pablo.de.lara.guarch@intel.com, david.hunt@intel.com, mohammad.abdul.awal@intel.com, thomas@monjalon.net, ferruh.yigit@intel.com Date: Thu, 15 Nov 2018 15:47:16 +0000 Message-Id: <9e93f5ad10d291946ac15d27a907aaef73dc2086.1542291869.git.anatoly.burakov@intel.com> X-Mailer: git-send-email 1.7.0.7 In-Reply-To: References: In-Reply-To: References: Subject: [dpdk-dev] [RFC v2 4/9] usertools/lib: support FreeBSD for platform info X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" This enables FreeBSD support for PlatformInfo part of the DPDKConfigLib script, thereby enabling FreeBSD support for the cpu_layout script. Signed-off-by: Anatoly Burakov --- usertools/DPDKConfigLib/PlatformInfo.py | 81 ++++++++++++++++++++++++- 1 file changed, 78 insertions(+), 3 deletions(-) diff --git a/usertools/DPDKConfigLib/PlatformInfo.py b/usertools/DPDKConfigLib/PlatformInfo.py index 734d22026..4ca507a37 100755 --- a/usertools/DPDKConfigLib/PlatformInfo.py +++ b/usertools/DPDKConfigLib/PlatformInfo.py @@ -6,6 +6,15 @@ from .Util import * import re import glob +import platform +import subprocess +import string +import xml.etree.ElementTree # for FreeBSD topology parsing +try: + import queue # python 3 +except ImportError: + import Queue as queue # python 2 + __SYSFS_CPU_INFO_PATH = "/sys/devices/system/cpu" __SYSFS_IOMMU_CLASS_PATH = "/sys/class/iommu" @@ -26,6 +35,9 @@ __HT_CPU_FLAGS = ["ht"] +__SYSCTL_TOPOLOGY_CMDLINE = ['sysctl', '-b', 'kern.sched.topology_spec'] +__SYSCTL_MEM_CMDLINE = ['sysctl', 'hw.realmem'] + def _parse_cpuinfo(pinfo): core_info_list = [] @@ -107,6 +119,56 @@ def _iommu_is_enabled(): pass +def _parse_sysctl_cpu_topology(pinfo): + output = subprocess.check_output(__SYSCTL_TOPOLOGY_CMDLINE) + + # output from sysctl contains null terminator, remove it + raw_xml = output[:-1] + tree = xml.etree.ElementTree.fromstring(raw_xml) + groups = queue.Queue() + + # put first group onto the queue + for e in tree.findall('group'): + groups.put(e) + + # per-level list of cores + levels = {} + + while not groups.empty(): + group = groups.get() + level = int(group.get('level')) + cpus = [int(cpu) for cpu in group.find('cpu').text.split(",")] + cur_value = levels.setdefault(level, []) + + # store discovered cpu's + cur_value.append(cpus) + levels[level] = cur_value + + children = group.find('children') + if children is not None: + for c in children.findall('group'): + groups.put(c) + + # find deepest level + max_level = max(levels.keys()) + + # for each group in the deepest level, take first CPU and make it physical + # core id + for cpus in levels[max_level]: + pinfo.core_map[pinfo.numa_nodes[0], cpus[0]] = cpus + + # also make note of hyperthreading + if len(cpus) > 1: + pinfo.hyperthreading_supported = True + + +def _parse_sysctl_ram_size(): + output = subprocess.check_output(__SYSCTL_MEM_CMDLINE) + _, mem_str = kv_split(output, ':') + mem_amount = int(mem_str) / 1024 # kilobytes + return mem_amount + + class PlatformInfo: def __init__(self): self.update() @@ -125,6 +187,19 @@ def reset(self): def update(self): self.reset() - _parse_cpuinfo(self) - _parse_meminfo(self) - self.hugepage_sizes_enabled = _find_enabled_hugepage_sizes() + system = platform.system() + if system == 'Linux': + _parse_cpuinfo(self) + _parse_meminfo(self) + self.hugepage_sizes_enabled = _find_enabled_hugepage_sizes() + elif system == 'FreeBSD': + # DPDK doesn't support NUMA on FreeBSD + self.numa_nodes = [0] + # find number of threads + _parse_sysctl_cpu_topology(self) + # find RAM size + self.ram_size = _parse_sysctl_ram_size() + # DPDK doesn't use hugepages on FreeBSD + self.hugepage_sizes_supported = [] + self.hugepage_sizes_enabled = [] + self.default_hugepage_size = 0 From patchwork Thu Nov 15 15:47:17 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Burakov, Anatoly" X-Patchwork-Id: 48131 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 5F41D5B1E; Thu, 15 Nov 2018 16:47:40 +0100 (CET) Received: from mga17.intel.com (mga17.intel.com [192.55.52.151]) by dpdk.org (Postfix) with ESMTP id 6ACD74CAD for ; Thu, 15 Nov 2018 16:47:26 +0100 (CET) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga006.jf.intel.com ([10.7.209.51]) by fmsmga107.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 15 Nov 2018 07:47:24 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.56,236,1539673200"; d="scan'208";a="91384362" Received: from irvmail001.ir.intel.com ([163.33.26.43]) by orsmga006.jf.intel.com with ESMTP; 15 Nov 2018 07:47:22 -0800 Received: from sivswdev01.ir.intel.com (sivswdev01.ir.intel.com [10.237.217.45]) by irvmail001.ir.intel.com (8.14.3/8.13.6/MailSET/Hub) with ESMTP id wAFFlLTY024805; Thu, 15 Nov 2018 15:47:21 GMT Received: from sivswdev01.ir.intel.com (localhost [127.0.0.1]) by sivswdev01.ir.intel.com with ESMTP id wAFFlLEW028128; Thu, 15 Nov 2018 15:47:21 GMT Received: (from aburakov@localhost) by sivswdev01.ir.intel.com with LOCAL id wAFFlLGt028124; Thu, 15 Nov 2018 15:47:21 GMT From: Anatoly Burakov To: dev@dpdk.org Cc: john.mcnamara@intel.com, bruce.richardson@intel.com, pablo.de.lara.guarch@intel.com, david.hunt@intel.com, mohammad.abdul.awal@intel.com, thomas@monjalon.net, ferruh.yigit@intel.com Date: Thu, 15 Nov 2018 15:47:17 +0000 Message-Id: <9af4b611a659983a8a155a7d1d10b078b74c1d3d.1542291869.git.anatoly.burakov@intel.com> X-Mailer: git-send-email 1.7.0.7 In-Reply-To: References: In-Reply-To: References: Subject: [dpdk-dev] [RFC v2 5/9] usertools/lib: add device information library X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" This library is mostly copy-paste of devbind script, but with few additional bells and whistles, such as the ability to enumerate and create/destroy VF devices. Signed-off-by: Anatoly Burakov --- usertools/DPDKConfigLib/DevInfo.py | 424 +++++++++++++++++++++++++++++ usertools/DPDKConfigLib/DevUtil.py | 242 ++++++++++++++++ usertools/DPDKConfigLib/Util.py | 19 ++ 3 files changed, 685 insertions(+) create mode 100755 usertools/DPDKConfigLib/DevInfo.py create mode 100755 usertools/DPDKConfigLib/DevUtil.py diff --git a/usertools/DPDKConfigLib/DevInfo.py b/usertools/DPDKConfigLib/DevInfo.py new file mode 100755 index 000000000..52edae771 --- /dev/null +++ b/usertools/DPDKConfigLib/DevInfo.py @@ -0,0 +1,424 @@ +#!/usr/bin/python +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2010-2018 Intel Corporation +# Copyright(c) 2017 Cavium, Inc. All rights reserved. + +import glob +from .Util import * + +__DEFAULT_DPDK_DRIVERS = ["igb_uio", "vfio-pci", "uio_pci_generic"] +__dpdk_drivers = None # list of detected dpdk drivers +__devices = None # map from PCI address to device objects + +# The PCI base class for all devices +__network_class = {'Class': '02', 'Vendor': None, 'Device': None, + 'SVendor': None, 'SDevice': None} +__encryption_class = {'Class': '10', 'Vendor': None, 'Device': None, + 'SVendor': None, 'SDevice': None} +__intel_processor_class = {'Class': '0b', 'Vendor': '8086', 'Device': None, + 'SVendor': None, 'SDevice': None} +__cavium_sso = {'Class': '08', 'Vendor': '177d', 'Device': 'a04b,a04d', + 'SVendor': None, 'SDevice': None} +__cavium_fpa = {'Class': '08', 'Vendor': '177d', 'Device': 'a053', + 'SVendor': None, 'SDevice': None} +__cavium_pkx = {'Class': '08', 'Vendor': '177d', 'Device': 'a0dd,a049', + 'SVendor': None, 'SDevice': None} +__cavium_tim = {'Class': '08', 'Vendor': '177d', 'Device': 'a051', + 'SVendor': None, 'SDevice': None} +__cavium_zip = {'Class': '12', 'Vendor': '177d', 'Device': 'a037', + 'SVendor': None, 'SDevice': None} +__avp_vnic = {'Class': '05', 'Vendor': '1af4', 'Device': '1110', + 'SVendor': None, 'SDevice': None} + +# internal data, not supposed to be exposed, but available to local classes +_network_devices = [__network_class, __cavium_pkx, __avp_vnic] +_crypto_devices = [__encryption_class, __intel_processor_class] +_eventdev_devices = [__cavium_sso, __cavium_tim] +_mempool_devices = [__cavium_fpa] +_compress_devices = [__cavium_zip] + +__DRIVER_PATH_FMT = "/sys/bus/pci/drivers/%s/" +__DEVICE_PATH_FMT = "/sys/bus/pci/devices/%s/" + + +def _get_pci_speed_info(pci_addr): + data = subprocess.check_output(["lspci", "-vvs", pci_addr]).splitlines() + + # scan until we find capability structure + raw_data = {} + cur_key = "" + r = re.compile(r"Express \(v\d\) Endpoint") # PCI-E cap + found_pci_express_cap = False + for line in data: + key, value = kv_split(line, ":") + if not found_pci_express_cap: + if key != "Capabilities": + continue + # this is a capability structure - check if it's a PCI-E cap + m = r.search(value) + if not m: + continue # not a PCI-E cap + found_pci_express_cap = True + continue # start scanning for info + elif key == "Capabilities": + break # we've reached end of our PCI-E cap structure + if value is not None: + # this is a new key + cur_key = key + else: + value = key # this is continuation of previous key + raw_data[cur_key] = " ".join([raw_data.get(cur_key, ""), value]) + + # now, get our data out of there + result = { + "speed_supported": 0, + "width_supported": 0, + "speed_active": 0, + "width_active": 0 + } + speed_re = re.compile(r"Speed (\d+(\.\d+)?)GT/s") + width_re = re.compile(r"Width x(\d+)") + + val = raw_data.get("LnkCap", "") + speed_m = speed_re.search(val) + width_m = width_re.search(val) + # return empty + if speed_m: + result["speed_supported"] = float(speed_m.group(1)) + if width_m: + result["width_supported"] = int(width_m.group(1)) + + val = raw_data.get("LnkSta", "") + speed_m = speed_re.search(val) + width_m = width_re.search(val) + if speed_m: + result["speed_active"] = float(speed_m.group(1)) + if width_m: + result["width_active"] = int(width_m.group(1)) + return result + + +def _device_type_match(dev_dict, devices_type): + for i in range(len(devices_type)): + param_count = len( + [x for x in devices_type[i].values() if x is not None]) + match_count = 0 + if dev_dict["Class"][0:2] == devices_type[i]["Class"]: + match_count = match_count + 1 + for key in devices_type[i].keys(): + if key != 'Class' and devices_type[i][key]: + value_list = devices_type[i][key].split(',') + for value in value_list: + if value.strip() == dev_dict[key]: + match_count = match_count + 1 + # count must be the number of non None parameters to match + if match_count == param_count: + return True + return False + + +def _get_numa_node(addr): + path = get_device_path(addr, "numa_node") + if not os.path.isfile(path): + return 0 + val = int(read_file(path)) + return val if val >= 0 else 0 + + +def _basename_from_symlink(path): + if not os.path.islink(path): + raise ValueError("Invalid link: %s" % path) + return os.path.basename(os.path.realpath(path)) + + +def _get_pf_addr(pci_addr): + return _basename_from_symlink(get_device_path(pci_addr, "physfn")) + + +def _get_vf_addrs(pci_addr): + vf_path = get_device_path(pci_addr, "virtfn*") + return [_basename_from_symlink(path) for path in glob.glob(vf_path)] + + +def _get_total_vfs(pci_addr): + path = get_device_path(pci_addr, "sriov_totalvfs") + if not os.path.isfile(path): + return 0 + return int(read_file(path)) + + +# not allowed to use Enum because it's Python3.4+, so... +class DeviceType: + '''Device type identifier''' + DEVTYPE_UNKNOWN = -1 + DEVTYPE_NETWORK = 0 + DEVTYPE_CRYPTO = 1 + DEVTYPE_EVENT = 2 + DEVTYPE_MEMPOOL = 3 + DEVTYPE_COMPRESS = 4 + + +class DevInfo(object): + # map from lspci output to DevInfo attributes + __attr_map = { + 'Class': 'class_id', + 'Vendor': 'vendor_id', + 'Device': 'device_id', + 'SVendor': 'subsystem_vendor_id', + 'SDevice': 'subsystem_device_id', + 'Class_str': 'class_name', + 'Vendor_str': 'vendor_name', + 'Device_str': 'device_name', + 'SVendor_str': 'subsystem_vendor_name', + 'SDevice_str': 'subsystem_device_name', + 'Driver': 'active_driver' + } + + def __init__(self, pci_addr): + self.pci_addr = pci_addr # Slot + + # initialize all attributes + self.reset() + + # we know our PCI address at this point, so read lspci + self.update() + + def reset(self): + self.devtype = DeviceType.DEVTYPE_UNKNOWN # start with unknown type + self.class_id = "" # Class + self.vendor_id = "" # Vendor + self.device_id = "" # Device + self.subsystem_vendor_id = "" # SVendor + self.subsystem_device_id = "" # SDevice + self.class_name = "" # Class_str + self.vendor_name = "" # Vendor_str + self.device_name = "" # Device_str + self.subsystem_vendor_name = "" # SVendor_str + self.subsystem_device_name = "" # SDevice_str + self.kernel_drivers = [] # list of drivers in Module + self.active_driver = "" # Driver + self.available_drivers = [] + self.numa_node = -1 + self.is_virtual_function = False + self.virtual_functions = [] # list of VF pci addresses + self.physical_function = "" # PF PCI address if this is a VF + self.numvfs = 0 + self.totalvfs = 0 + self.pci_width_supported = 0 + self.pci_width_active = 0 + self.pci_speed_supported = 0 + self.pci_speed_active = 0 + + def update(self): + # clear everything + self.reset() + + lspci_info = subprocess.check_output(["lspci", "-vmmnnks", + self.pci_addr]).splitlines() + lspci_dict = {} + r = re.compile(r"\[[\da-f]{4}\]$") + + # parse lspci details + for line in lspci_info: + if len(line) == 0: + continue + name, value = line.decode().split("\t", 1) + name = name.strip(":") + has_id = r.search(value) is not None + if has_id: + namestr = name + "_str" + strvalue = value[:-7] # cut off hex value for _str value + value = value[-5:-1] # store hex value + lspci_dict[namestr] = strvalue + lspci_dict[name] = value + + # update object using map of lspci values to object attributes + for key, value in lspci_dict.items(): + if key in self.__attr_map: + setattr(self, self.__attr_map[key], value) + + # match device type + if _device_type_match(lspci_dict, _network_devices): + self.devtype = DeviceType.DEVTYPE_NETWORK + elif _device_type_match(lspci_dict, _crypto_devices): + self.devtype = DeviceType.DEVTYPE_CRYPTO + elif _device_type_match(lspci_dict, _eventdev_devices): + self.devtype = DeviceType.DEVTYPE_EVENT + elif _device_type_match(lspci_dict, _mempool_devices): + self.devtype = DeviceType.DEVTYPE_MEMPOOL + elif _device_type_match(lspci_dict, _compress_devices): + self.devtype = DeviceType.DEVTYPE_COMPRESS + + # special case - Module may have several drivers + if 'Module' in lspci_dict: + module_str = lspci_dict['Module'].split(',') + self.kernel_drivers = [d.strip() for d in module_str] + + # read NUMA node + self.numa_node = _get_numa_node(self.pci_addr) + + # check if device is a PF or a VF + try: + pf_addr = _get_pf_addr(self.pci_addr) + self.is_virtual_function = True + self.physical_function = pf_addr + except ValueError: + self.virtual_functions = _get_vf_addrs(self.pci_addr) + self.numvfs = len(self.virtual_functions) + self.totalvfs = _get_total_vfs(self.pci_addr) + + if not self.is_virtual_function: + speed_info = _get_pci_speed_info(self.pci_addr) + else: + speed_info = _get_pci_speed_info(self.physical_function) + + self.pci_width_active = speed_info["width_active"] + self.pci_width_supported = speed_info["width_supported"] + self.pci_speed_active = speed_info["speed_active"] + self.pci_speed_supported = speed_info["speed_supported"] + + # update available drivers + all_drivers = self.kernel_drivers + get_loaded_dpdk_drivers() + self.available_drivers = [driver for driver in all_drivers + if driver != self.active_driver] + + +# extends PCI device info with a few things unique to network devices +class NetworkDevInfo(DevInfo): + def __init__(self, pci_addr): + super(NetworkDevInfo, self).__init__(pci_addr) + + def reset(self): + super(NetworkDevInfo, self).reset() + self.interfaces = [] + self.ssh_interface = "" + self.active_interface = False + + def update(self): + # do regular update from lspci first + super(NetworkDevInfo, self).update() + + # now, update network-device-specific stuff + dirs = glob.glob(get_device_path(self.pci_addr, "net/*")) + self.interfaces = [os.path.basename(d) for d in dirs] + + # check what is the interface if any for an ssh connection if + # any to this host, so we can mark it. + route = subprocess.check_output(["ip", "-o", "route"]) + # filter out all lines for 169.254 routes + route = "\n".join(filter(lambda ln: not ln.startswith("169.254"), + route.decode().splitlines())) + rt_info = route.split() + for i in range(len(rt_info) - 1): + if rt_info[i] == "dev": + iface = rt_info[i + 1] + if iface in self.interfaces: + self.ssh_interface = iface + self.active_interface = True + break + + +def __update_device_list(): + global __devices + + __devices = {} + + non_network_devices = _crypto_devices + _mempool_devices +\ + _eventdev_devices + _compress_devices + + # first loop through and read details for all devices + # request machine readable format, with numeric IDs and String + dev_dict = {} + lspci_lines = subprocess.check_output(["lspci", "-Dvmmnk"]).splitlines() + for line in lspci_lines: + if line.strip() == "": + # we've completed reading this device, so parse it + pci_addr = dev_dict['Slot'] + if _device_type_match(dev_dict, _network_devices): + d = NetworkDevInfo(pci_addr) + __devices[pci_addr] = d + elif _device_type_match(dev_dict, non_network_devices): + d = DevInfo(pci_addr) + __devices[pci_addr] = d + else: + # unsupported device, ignore + pass + dev_dict = {} # clear the dictionary for next + continue + name, value = line.decode().split("\t", 1) + name = name.rstrip(":") + # Numeric IDs + dev_dict[name] = value + + +def __update_dpdk_driver_list(): + global __dpdk_drivers + + __dpdk_drivers = __DEFAULT_DPDK_DRIVERS[:] # make a copy + + # list of supported modules + mods = [{"Name": driver, "Found": False} for driver in __dpdk_drivers] + + # first check if module is loaded + try: + # Get list of sysfs modules (both built-in and dynamically loaded) + sysfs_path = '/sys/module/' + + # Get the list of directories in sysfs_path + sysfs_mods = [os.path.join(sysfs_path, o) for o + in os.listdir(sysfs_path) + if os.path.isdir(os.path.join(sysfs_path, o))] + + # Extract the last element of '/sys/module/abc' in the array + sysfs_mods = [a.split('/')[-1] for a in sysfs_mods] + + # special case for vfio_pci (module is named vfio-pci, + # but its .ko is named vfio_pci) + sysfs_mods = [a if a != 'vfio_pci' else 'vfio-pci' for a in sysfs_mods] + + for mod in mods: + if mod["Name"] in sysfs_mods: + mod["Found"] = True + except: + pass + + # change DPDK driver list to only contain drivers that are loaded + __dpdk_drivers = [mod["Name"] for mod in mods if mod["Found"]] + + +# get a file/directory inside sysfs dir for a given PCI address +def get_device_path(pci_addr, fname): + return os.path.join(__DEVICE_PATH_FMT % pci_addr, fname) + + +# get a file/directory inside sysfs dir for a given driver +def get_driver_path(driver, fname): + return os.path.join(__DRIVER_PATH_FMT % driver, fname) + + +def get_loaded_dpdk_drivers(force_refresh=False): + '''Get list of loaded DPDK drivers''' + global __dpdk_drivers + + if __dpdk_drivers is not None and not force_refresh: + return __dpdk_drivers + + __update_dpdk_driver_list() + + return __dpdk_drivers + + +def get_supported_dpdk_drivers(): + return __DEFAULT_DPDK_DRIVERS + + +def get_devices(force_refresh=False): + '''Get list of detected devices''' + global __devices + + if __devices is not None and not force_refresh: + return __devices + + __update_device_list() + + return __devices diff --git a/usertools/DPDKConfigLib/DevUtil.py b/usertools/DPDKConfigLib/DevUtil.py new file mode 100755 index 000000000..17ee657f7 --- /dev/null +++ b/usertools/DPDKConfigLib/DevUtil.py @@ -0,0 +1,242 @@ +#!/usr/bin/python +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2010-2018 Intel Corporation +# Copyright(c) 2017 Cavium, Inc. All rights reserved. + +from .DevInfo import * +import errno + + +# check if we have support for driver_override by looking at sysfs and checking +# if any of the PCI device directories have driver_override file inside them +__have_override = len(glob.glob("/sys/bus/pci/devices/*/driver_override")) != 0 + + +# wrap custom exceptions, so that we can handle errors we expect, but still pass +# through any unexpected errors to the caller (which might indicate a bug) +class BindException(Exception): + def __init__(self, *args, **kwargs): + Exception.__init__(self, *args, **kwargs) + + +class UnbindException(Exception): + def __init__(self, *args, **kwargs): + Exception.__init__(self, *args, **kwargs) + + +# change num vfs for a given device +def __write_numvfs(dev, num_vfs): + path = get_device_path(dev.pci_addr, "sriov_numvfs") + append_file(path, num_vfs) + dev.update() + + +# unbind device from its driver +def __unbind_device(dev): + '''Unbind the device identified by "addr" from its current driver''' + addr = dev.pci_addr + + # For kernels >= 3.15 driver_override is used to bind a device to a driver. + # Before unbinding it, overwrite driver_override with empty string so that + # the device can be bound to any other driver. + if __have_override: + override_fname = get_device_path(dev.pci_addr, "driver_override") + try: + write_file(override_fname, "\00") + except IOError as e: + raise UnbindException("Couldn't overwrite 'driver_override' " + "for PCI device '%s': %s" % + (addr, e.strerror)) + + filename = get_driver_path(dev.active_driver, "unbind") + try: + append_file(filename, addr) + except IOError: + raise UnbindException("Couldn't unbind PCI device '%s'" % addr) + dev.update() + + +# bind device to a specified driver +def __bind_device_to_driver(dev, driver): + '''Bind the device given by "dev_id" to the driver "driver". If the device + is already bound to a different driver, it will be unbound first''' + addr = dev.pci_addr + + # For kernels >= 3.15 driver_override can be used to specify the driver + # for a device rather than relying on the driver to provide a positive + # match of the device. The existing process of looking up + # the vendor and device ID, adding them to the driver new_id, + # will erroneously bind other devices too which has the additional burden + # of unbinding those devices + if driver in get_loaded_dpdk_drivers(): + if __have_override: + override_fname = get_device_path(dev.pci_addr, "driver_override") + try: + write_file(override_fname, driver) + except IOError as e: + raise BindException("Couldn't write 'driver_override' for " + "PCI device '%s': %s" % (addr, e.strerror)) + # For kernels < 3.15 use new_id to add PCI id's to the driver + else: + newid_fname = get_driver_path(driver, "new_id") + try: + # Convert Device and Vendor Id to int to write to new_id + write_file(newid_fname, "%04x %04x" % (int(dev.vendor_id, 16), + int(dev.device_id, 16))) + except IOError as e: + # for some reason, closing new_id after adding a new PCI + # ID to new_id results in IOError (with errno set to + # ENODEV). however, if the device was successfully bound, we + # don't care for any errors and can safely ignore the + # error. + if e.errno != errno.ENODEV: + raise BindException("Couldn't write 'new_id' for PCI " + "device '%s': %s" % (addr, e.strerror)) + + print(get_driver_path(driver, "bind")) + bind_fname = get_driver_path(driver, "bind") + try: + append_file(bind_fname, addr) + except IOError as e: + dev.update() + print(driver) + print(dev.active_driver) + raise BindException("Couldn't bind PCI device '%s' to driver '%s': %s" % + (addr, driver, e.strerror)) + dev.update() + + +def set_num_vfs(dev, num_vfs): + if not isinstance(dev, DevInfo): + dev = get_devices()[dev] + if dev.is_virtual_function: + raise ValueError("Device '%s' is a virtual function" % dev.pci_addr) + if num_vfs > dev.totalvfs: + raise ValueError("Device '%s' has '%i' virtual functions," + "'%i' requested" % (dev.pci_addr, dev.totalvfs, + num_vfs)) + if dev.num_vfs == num_vfs: + return + __write_numvfs(dev, num_vfs) + dev.update() + + +def unbind(addrs, force_unbind=False): + '''Unbind device(s) from all drivers''' + # build a list if we were not provided a list + pci_dev_list = [] + try: + pci_dev_list.extend(addrs) + except AttributeError: + pci_dev_list.append(addrs) + + # ensure we are only working with DevInfo objects + filter_func = (lambda d: d.active_interface != "" and + (d.devtype != DeviceType.DEVTYPE_NETWORK or + not d.active_interface or not force_unbind)) + pci_dev_list = filter(filter_func, [a if isinstance(a, DevInfo) + else get_devices()[get_device_name(a)] + for a in pci_dev_list]) + for d in pci_dev_list: + __unbind_device(d) + + +# we are intentionally not providing a "simple" function to bind a single +# device due to complexities involved with using kernels < 3.15. instead, we're +# allowing to call this function with either one PCI address or a list of PCI +# addresses, or one DevInfo object, or a list of DevInfo objects, and will +# automatically do cleanup even if we fail to bind some devices +def bind(addrs, driver, force_unbind=False): + '''Bind device(s) to a specified driver''' + # build a list if we were not provided a list + pci_dev_list = [] + try: + pci_dev_list.extend(addrs) + except AttributeError: + pci_dev_list.append(addrs) + + # we want devices that aren't already bound to the driver we want, and are + # either not network devices, or aren't active network interfaces, unless we + # are in force-unbind mode + filter_func = (lambda d: d.active_driver != driver and + (d.devtype != DeviceType.DEVTYPE_NETWORK or + not d.active_interface or not force_unbind)) + # ensure we are working with DevInfo instances, and filter them out + pci_dev_list = list(filter(filter_func, + [a if isinstance(a, DevInfo) + else get_devices()[get_device_name(a)] + for a in pci_dev_list])) + if len(pci_dev_list) == 0: + # nothing to be done, bail out + return + ex = None + try: + for dev in pci_dev_list: + old_driver = dev.active_driver + if dev.active_driver != "": + __unbind_device(dev) + __bind_device_to_driver(dev, driver) + except UnbindException as e: + # no need to roll back anything, but still stop + ex = e + except BindException as e: + # roll back changes, stop and raise later + dev.update() + if old_driver != dev.active_driver: + try: + __bind_device_to_driver(dev, old_driver) + except BindException: + # ignore this one, nothing we can do about it + pass + ex = e + finally: + # we need to do this regardless of whether we succeeded or failed + + # For kernels < 3.15 when binding devices to a generic driver + # (i.e. one that doesn't have a PCI ID table) using new_id, some devices + # that are not bound to any other driver could be bound even if no one + # has asked them to. hence, we check the list of drivers again, and see + # if some of the previously-unbound devices were erroneously bound. + if not __have_override: + for dev in get_devices(): + # skip devices that were already (or supposed to be) bound + if dev in pci_dev_list or dev.active_driver != "": + continue + + # update information about this device + dev.update() + + # check if updated information indicates the device was bound + if dev.active_driver != "": + try: + __unbind_device(dev) + except UnbindException as e: + # if we already had an exception previously, don't throw + # this one, because we have a higher-priority one that + # we haven't thrown yet + if ex is not None: + break + raise e + # if we've failed somewhere during the bind process, raise that + if ex is not None: + raise ex + + +def get_device_name(name): + '''Take a device "name" - a string passed in by user to identify a NIC + device, and determine the device id - i.e. the domain:bus:slot.func - for + it, which can then be used to index into the devices array''' + + # check if it's already a suitable index + if name in get_devices(): + return name + # check if it's an index just missing the domain part + elif "0000:" + name in get_devices(): + return "0000:" + name + else: + # check if it's an interface name, e.g. eth1 + filter_func = (lambda i: i.devtype == DeviceType.DEVTYPE_NETWORK) + for dev in filter(filter_func, get_devices().values()): + if name in dev.interfaces: + return dev.pci_addr + return None diff --git a/usertools/DPDKConfigLib/Util.py b/usertools/DPDKConfigLib/Util.py index 42434e728..eb21cce15 100755 --- a/usertools/DPDKConfigLib/Util.py +++ b/usertools/DPDKConfigLib/Util.py @@ -2,6 +2,25 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright(c) 2018 Intel Corporation +# read entire file and return the result +def read_file(path): + with open(path, 'r') as f: + result = f.read().strip() + return result + + +# write value to file +def write_file(path, value): + with open(path, 'w') as f: + f.write(value) + + +# append value to file +def append_file(path, value): + with open(path, 'a') as f: + f.write(value) + + # split line into key-value pair, cleaning up the values in the process def kv_split(line, separator): # just in case From patchwork Thu Nov 15 15:47:18 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Burakov, Anatoly" X-Patchwork-Id: 48132 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id ED6335F0D; Thu, 15 Nov 2018 16:47:41 +0100 (CET) Received: from mga05.intel.com (mga05.intel.com [192.55.52.43]) by dpdk.org (Postfix) with ESMTP id B04223238 for ; Thu, 15 Nov 2018 16:47:27 +0100 (CET) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga001.jf.intel.com ([10.7.209.18]) by fmsmga105.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 15 Nov 2018 07:47:25 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.56,236,1539673200"; d="scan'208";a="108528224" Received: from irvmail001.ir.intel.com ([163.33.26.43]) by orsmga001.jf.intel.com with ESMTP; 15 Nov 2018 07:47:22 -0800 Received: from sivswdev01.ir.intel.com (sivswdev01.ir.intel.com [10.237.217.45]) by irvmail001.ir.intel.com (8.14.3/8.13.6/MailSET/Hub) with ESMTP id wAFFlL2D024808; Thu, 15 Nov 2018 15:47:21 GMT Received: from sivswdev01.ir.intel.com (localhost [127.0.0.1]) by sivswdev01.ir.intel.com with ESMTP id wAFFlLEk028136; Thu, 15 Nov 2018 15:47:21 GMT Received: (from aburakov@localhost) by sivswdev01.ir.intel.com with LOCAL id wAFFlLsE028132; Thu, 15 Nov 2018 15:47:21 GMT From: Anatoly Burakov To: dev@dpdk.org Cc: john.mcnamara@intel.com, bruce.richardson@intel.com, pablo.de.lara.guarch@intel.com, david.hunt@intel.com, mohammad.abdul.awal@intel.com, thomas@monjalon.net, ferruh.yigit@intel.com Date: Thu, 15 Nov 2018 15:47:18 +0000 Message-Id: <087f92918853c99ef4163eb548fa046c8e536fdb.1542291869.git.anatoly.burakov@intel.com> X-Mailer: git-send-email 1.7.0.7 In-Reply-To: References: In-Reply-To: References: Subject: [dpdk-dev] [RFC v2 6/9] usertools/devbind: switch to using DPDKConfigLib X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Make devbind use DPDKConfigLib library instead of having its own logic for binding/unbinding/enumerating devices and getting device information. All semantics and output are kept the same, only the internals changed. Signed-off-by: Anatoly Burakov --- usertools/dpdk-devbind.py | 533 ++++++-------------------------------- 1 file changed, 74 insertions(+), 459 deletions(-) diff --git a/usertools/dpdk-devbind.py b/usertools/dpdk-devbind.py index 7d564634c..b1834c302 100755 --- a/usertools/dpdk-devbind.py +++ b/usertools/dpdk-devbind.py @@ -1,50 +1,18 @@ #! /usr/bin/env python # SPDX-License-Identifier: BSD-3-Clause -# Copyright(c) 2010-2014 Intel Corporation +# Copyright(c) 2010-2018 Intel Corporation # +from DPDKConfigLib import DevInfo, DevUtil import sys -import os import getopt -import subprocess -from os.path import exists, abspath, dirname, basename - -# The PCI base class for all devices -network_class = {'Class': '02', 'Vendor': None, 'Device': None, - 'SVendor': None, 'SDevice': None} -encryption_class = {'Class': '10', 'Vendor': None, 'Device': None, - 'SVendor': None, 'SDevice': None} -intel_processor_class = {'Class': '0b', 'Vendor': '8086', 'Device': None, - 'SVendor': None, 'SDevice': None} -cavium_sso = {'Class': '08', 'Vendor': '177d', 'Device': 'a04b,a04d', - 'SVendor': None, 'SDevice': None} -cavium_fpa = {'Class': '08', 'Vendor': '177d', 'Device': 'a053', - 'SVendor': None, 'SDevice': None} -cavium_pkx = {'Class': '08', 'Vendor': '177d', 'Device': 'a0dd,a049', - 'SVendor': None, 'SDevice': None} -cavium_tim = {'Class': '08', 'Vendor': '177d', 'Device': 'a051', - 'SVendor': None, 'SDevice': None} -cavium_zip = {'Class': '12', 'Vendor': '177d', 'Device': 'a037', - 'SVendor': None, 'SDevice': None} -avp_vnic = {'Class': '05', 'Vendor': '1af4', 'Device': '1110', - 'SVendor': None, 'SDevice': None} - -network_devices = [network_class, cavium_pkx, avp_vnic] -crypto_devices = [encryption_class, intel_processor_class] -eventdev_devices = [cavium_sso, cavium_tim] -mempool_devices = [cavium_fpa] -compress_devices = [cavium_zip] - -# global dict ethernet devices present. Dictionary indexed by PCI address. -# Each device within this is itself a dictionary of device properties -devices = {} -# list of supported DPDK drivers -dpdk_drivers = ["igb_uio", "vfio-pci", "uio_pci_generic"] +from os.path import basename # command-line arg flags b_flag = None status_flag = False force_flag = False +status_dev = "" args = [] @@ -119,394 +87,43 @@ def usage(): """ % locals()) # replace items from local variables -# This is roughly compatible with check_output function in subprocess module -# which is only available in python 2.7. -def check_output(args, stderr=None): - '''Run a command and capture its output''' - return subprocess.Popen(args, stdout=subprocess.PIPE, - stderr=stderr).communicate()[0] - - def check_modules(): '''Checks that igb_uio is loaded''' - global dpdk_drivers + loaded_drivers = DevUtil.get_loaded_dpdk_drivers() + supported_drivers = DevUtil.get_supported_dpdk_drivers() - # list of supported modules - mods = [{"Name": driver, "Found": False} for driver in dpdk_drivers] - - # first check if module is loaded - try: - # Get list of sysfs modules (both built-in and dynamically loaded) - sysfs_path = '/sys/module/' - - # Get the list of directories in sysfs_path - sysfs_mods = [os.path.join(sysfs_path, o) for o - in os.listdir(sysfs_path) - if os.path.isdir(os.path.join(sysfs_path, o))] - - # Extract the last element of '/sys/module/abc' in the array - sysfs_mods = [a.split('/')[-1] for a in sysfs_mods] - - # special case for vfio_pci (module is named vfio-pci, - # but its .ko is named vfio_pci) - sysfs_mods = [a if a != 'vfio_pci' else 'vfio-pci' for a in sysfs_mods] - - for mod in mods: - if mod["Name"] in sysfs_mods: - mod["Found"] = True - except: - pass + if b_flag in supported_drivers and b_flag not in loaded_drivers: + print("Error - %s driver is not loaded" % b_flag) + sys.exit(1) # check if we have at least one loaded module - if True not in [mod["Found"] for mod in mods] and b_flag is not None: - if b_flag in dpdk_drivers: - print("Error - no supported modules(DPDK driver) are loaded") - sys.exit(1) - else: - print("Warning - no supported modules(DPDK driver) are loaded") - - # change DPDK driver list to only contain drivers that are loaded - dpdk_drivers = [mod["Name"] for mod in mods if mod["Found"]] - - -def has_driver(dev_id): - '''return true if a device is assigned to a driver. False otherwise''' - return "Driver_str" in devices[dev_id] - - -def get_pci_device_details(dev_id, probe_lspci): - '''This function gets additional details for a PCI device''' - device = {} - - if probe_lspci: - extra_info = check_output(["lspci", "-vmmks", dev_id]).splitlines() - - # parse lspci details - for line in extra_info: - if len(line) == 0: - continue - name, value = line.decode().split("\t", 1) - name = name.strip(":") + "_str" - device[name] = value - # check for a unix interface name - device["Interface"] = "" - for base, dirs, _ in os.walk("/sys/bus/pci/devices/%s/" % dev_id): - if "net" in dirs: - device["Interface"] = \ - ",".join(os.listdir(os.path.join(base, "net"))) - break - # check if a port is used for ssh connection - device["Ssh_if"] = False - device["Active"] = "" - - return device - -def clear_data(): - '''This function clears any old data''' - devices = {} - -def get_device_details(devices_type): - '''This function populates the "devices" dictionary. The keys used are - the pci addresses (domain:bus:slot.func). The values are themselves - dictionaries - one for each NIC.''' - global devices - global dpdk_drivers - - # first loop through and read details for all devices - # request machine readable format, with numeric IDs and String - dev = {} - dev_lines = check_output(["lspci", "-Dvmmnnk"]).splitlines() - for dev_line in dev_lines: - if len(dev_line) == 0: - if device_type_match(dev, devices_type): - # Replace "Driver" with "Driver_str" to have consistency of - # of dictionary key names - if "Driver" in dev.keys(): - dev["Driver_str"] = dev.pop("Driver") - if "Module" in dev.keys(): - dev["Module_str"] = dev.pop("Module") - # use dict to make copy of dev - devices[dev["Slot"]] = dict(dev) - # Clear previous device's data - dev = {} - else: - name, value = dev_line.decode().split("\t", 1) - value_list = value.rsplit(' ', 1) - if len(value_list) > 1: - # String stored in _str - dev[name.rstrip(":") + '_str'] = value_list[0] - # Numeric IDs - dev[name.rstrip(":")] = value_list[len(value_list) - 1] \ - .rstrip("]").lstrip("[") - - if devices_type == network_devices: - # check what is the interface if any for an ssh connection if - # any to this host, so we can mark it later. - ssh_if = [] - route = check_output(["ip", "-o", "route"]) - # filter out all lines for 169.254 routes - route = "\n".join(filter(lambda ln: not ln.startswith("169.254"), - route.decode().splitlines())) - rt_info = route.split() - for i in range(len(rt_info) - 1): - if rt_info[i] == "dev": - ssh_if.append(rt_info[i+1]) - - # based on the basic info, get extended text details - for d in devices.keys(): - if not device_type_match(devices[d], devices_type): - continue - - # get additional info and add it to existing data - devices[d] = devices[d].copy() - # No need to probe lspci - devices[d].update(get_pci_device_details(d, False).items()) - - if devices_type == network_devices: - for _if in ssh_if: - if _if in devices[d]["Interface"].split(","): - devices[d]["Ssh_if"] = True - devices[d]["Active"] = "*Active*" - break - - # add igb_uio to list of supporting modules if needed - if "Module_str" in devices[d]: - for driver in dpdk_drivers: - if driver not in devices[d]["Module_str"]: - devices[d]["Module_str"] = \ - devices[d]["Module_str"] + ",%s" % driver - else: - devices[d]["Module_str"] = ",".join(dpdk_drivers) - - # make sure the driver and module strings do not have any duplicates - if has_driver(d): - modules = devices[d]["Module_str"].split(",") - if devices[d]["Driver_str"] in modules: - modules.remove(devices[d]["Driver_str"]) - devices[d]["Module_str"] = ",".join(modules) - - -def device_type_match(dev, devices_type): - for i in range(len(devices_type)): - param_count = len( - [x for x in devices_type[i].values() if x is not None]) - match_count = 0 - if dev["Class"][0:2] == devices_type[i]["Class"]: - match_count = match_count + 1 - for key in devices_type[i].keys(): - if key != 'Class' and devices_type[i][key]: - value_list = devices_type[i][key].split(',') - for value in value_list: - if value.strip(' ') == dev[key]: - match_count = match_count + 1 - # count must be the number of non None parameters to match - if match_count == param_count: - return True - return False - -def dev_id_from_dev_name(dev_name): - '''Take a device "name" - a string passed in by user to identify a NIC - device, and determine the device id - i.e. the domain:bus:slot.func - for - it, which can then be used to index into the devices array''' - - # check if it's already a suitable index - if dev_name in devices: - return dev_name - # check if it's an index just missing the domain part - elif "0000:" + dev_name in devices: - return "0000:" + dev_name - else: - # check if it's an interface name, e.g. eth1 - for d in devices.keys(): - if dev_name in devices[d]["Interface"].split(","): - return devices[d]["Slot"] - # if nothing else matches - error - print("Unknown device: %s. " - "Please specify device in \"bus:slot.func\" format" % dev_name) - sys.exit(1) - - -def unbind_one(dev_id, force): - '''Unbind the device identified by "dev_id" from its current driver''' - dev = devices[dev_id] - if not has_driver(dev_id): - print("%s %s %s is not currently managed by any driver\n" % - (dev["Slot"], dev["Device_str"], dev["Interface"])) - return - - # prevent us disconnecting ourselves - if dev["Ssh_if"] and not force: - print("Routing table indicates that interface %s is active. " - "Skipping unbind" % (dev_id)) - return - - # write to /sys to unbind - filename = "/sys/bus/pci/drivers/%s/unbind" % dev["Driver_str"] - try: - f = open(filename, "a") - except: - print("Error: unbind failed for %s - Cannot open %s" - % (dev_id, filename)) - sys.exit(1) - f.write(dev_id) - f.close() - - -def bind_one(dev_id, driver, force): - '''Bind the device given by "dev_id" to the driver "driver". If the device - is already bound to a different driver, it will be unbound first''' - dev = devices[dev_id] - saved_driver = None # used to rollback any unbind in case of failure - - # prevent disconnection of our ssh session - if dev["Ssh_if"] and not force: - print("Routing table indicates that interface %s is active. " - "Not modifying" % (dev_id)) - return - - # unbind any existing drivers we don't want - if has_driver(dev_id): - if dev["Driver_str"] == driver: - print("%s already bound to driver %s, skipping\n" - % (dev_id, driver)) - return - else: - saved_driver = dev["Driver_str"] - unbind_one(dev_id, force) - dev["Driver_str"] = "" # clear driver string - - # For kernels >= 3.15 driver_override can be used to specify the driver - # for a device rather than relying on the driver to provide a positive - # match of the device. The existing process of looking up - # the vendor and device ID, adding them to the driver new_id, - # will erroneously bind other devices too which has the additional burden - # of unbinding those devices - if driver in dpdk_drivers: - filename = "/sys/bus/pci/devices/%s/driver_override" % dev_id - if os.path.exists(filename): - try: - f = open(filename, "w") - except: - print("Error: bind failed for %s - Cannot open %s" - % (dev_id, filename)) - return - try: - f.write("%s" % driver) - f.close() - except: - print("Error: bind failed for %s - Cannot write driver %s to " - "PCI ID " % (dev_id, driver)) - return - # For kernels < 3.15 use new_id to add PCI id's to the driver - else: - filename = "/sys/bus/pci/drivers/%s/new_id" % driver - try: - f = open(filename, "w") - except: - print("Error: bind failed for %s - Cannot open %s" - % (dev_id, filename)) - return - try: - # Convert Device and Vendor Id to int to write to new_id - f.write("%04x %04x" % (int(dev["Vendor"],16), - int(dev["Device"], 16))) - f.close() - except: - print("Error: bind failed for %s - Cannot write new PCI ID to " - "driver %s" % (dev_id, driver)) - return - - # do the bind by writing to /sys - filename = "/sys/bus/pci/drivers/%s/bind" % driver - try: - f = open(filename, "a") - except: - print("Error: bind failed for %s - Cannot open %s" - % (dev_id, filename)) - if saved_driver is not None: # restore any previous driver - bind_one(dev_id, saved_driver, force) - return - try: - f.write(dev_id) - f.close() - except: - # for some reason, closing dev_id after adding a new PCI ID to new_id - # results in IOError. however, if the device was successfully bound, - # we don't care for any errors and can safely ignore IOError - tmp = get_pci_device_details(dev_id, True) - if "Driver_str" in tmp and tmp["Driver_str"] == driver: - return - print("Error: bind failed for %s - Cannot bind to driver %s" - % (dev_id, driver)) - if saved_driver is not None: # restore any previous driver - bind_one(dev_id, saved_driver, force) - return - - # For kernels > 3.15 driver_override is used to bind a device to a driver. - # Before unbinding it, overwrite driver_override with empty string so that - # the device can be bound to any other driver - filename = "/sys/bus/pci/devices/%s/driver_override" % dev_id - if os.path.exists(filename): - try: - f = open(filename, "w") - except: - print("Error: unbind failed for %s - Cannot open %s" - % (dev_id, filename)) - sys.exit(1) - try: - f.write("\00") - f.close() - except: - print("Error: unbind failed for %s - Cannot open %s" - % (dev_id, filename)) - sys.exit(1) + if len(loaded_drivers) == 0: + print("Warning - no supported modules(DPDK driver) are loaded") def unbind_all(dev_list, force=False): """Unbind method, takes a list of device locations""" - - if dev_list[0] == "dpdk": - for d in devices.keys(): - if "Driver_str" in devices[d]: - if devices[d]["Driver_str"] in dpdk_drivers: - unbind_one(devices[d]["Slot"], force) - return - - dev_list = map(dev_id_from_dev_name, dev_list) - for d in dev_list: - unbind_one(d, force) + try: + DevUtil.unbind(dev_list, force) + except DevUtil.UnbindException as e: + print("Unbind failed: %s" % e) + sys.exit(1) def bind_all(dev_list, driver, force=False): """Bind method, takes a list of device locations""" - global devices - dev_list = map(dev_id_from_dev_name, dev_list) + try: + DevUtil.bind(dev_list, driver, force) + except DevUtil.BindException as e: + print("Bind failed: %s" % e) + sys.exit(1) + except DevUtil.UnbindException as e: + print("Bind failed: %s" % e) + sys.exit(1) - for d in dev_list: - bind_one(d, driver, force) - # For kernels < 3.15 when binding devices to a generic driver - # (i.e. one that doesn't have a PCI ID table) using new_id, some devices - # that are not bound to any other driver could be bound even if no one has - # asked them to. hence, we check the list of drivers again, and see if - # some of the previously-unbound devices were erroneously bound. - if not os.path.exists("/sys/bus/pci/devices/%s/driver_override" % d): - for d in devices.keys(): - # skip devices that were already bound or that we know should be bound - if "Driver_str" in devices[d] or d in dev_list: - continue - - # update information about this device - devices[d] = dict(devices[d].items() + - get_pci_device_details(d, True).items()) - - # check if updated information indicates that the device was bound - if "Driver_str" in devices[d]: - unbind_one(d, force) - - -def display_devices(title, dev_list, extra_params=None): +def display_devices(title, dev_list): '''Displays to the user the details of a list of devices given in "dev_list". The "extra_params" parameter, if given, should contain a string with %()s fields in it for replacement by the named fields in each @@ -518,62 +135,74 @@ def display_devices(title, dev_list, extra_params=None): strings.append("") else: for dev in dev_list: - if extra_params is not None: - strings.append("%s '%s %s' %s" % (dev["Slot"], - dev["Device_str"], - dev["Device"], - extra_params % dev)) + if dev.devtype == DevInfo.DeviceType.DEVTYPE_NETWORK: + extra_str = "if=%s drv=%s unused=%s%s" % \ + (",".join(dev.interfaces), dev.active_driver, + ",".join(dev.available_drivers), + " *Active*" if dev.active_interface else "") else: - strings.append("%s '%s'" % (dev["Slot"], dev["Device_str"])) + extra_str = "drv=%s unused=%s" % \ + (dev.active_driver, ",".join(dev.available_drivers)) + strings.append("%s '%s %s' %s" % (dev.pci_addr, + dev.device_name, + dev.device_id, + extra_str)) # sort before printing, so that the entries appear in PCI order strings.sort() print("\n".join(strings)) # print one per line -def show_device_status(devices_type, device_name): - global dpdk_drivers + +def show_device_status(device_list, device_type): kernel_drv = [] dpdk_drv = [] no_drv = [] + typenames = { + DevInfo.DeviceType.DEVTYPE_NETWORK: "Network", + DevInfo.DeviceType.DEVTYPE_CRYPTO: "Crypto", + DevInfo.DeviceType.DEVTYPE_EVENT: "Event", + DevInfo.DeviceType.DEVTYPE_MEMPOOL: "Mempool", + DevInfo.DeviceType.DEVTYPE_COMPRESS: "Compress" + } + typename = typenames[device_type] + # split our list of network devices into the three categories above - for d in devices.keys(): - if device_type_match(devices[d], devices_type): - if not has_driver(d): - no_drv.append(devices[d]) - continue - if devices[d]["Driver_str"] in dpdk_drivers: - dpdk_drv.append(devices[d]) + for d in device_list: + if d.devtype == device_type: + if d.active_driver == "": + no_drv.append(d) + elif d.active_driver in DevInfo.get_loaded_dpdk_drivers(): + dpdk_drv.append(d) else: - kernel_drv.append(devices[d]) + kernel_drv.append(d) # print each category separately, so we can clearly see what's used by DPDK - display_devices("%s devices using DPDK-compatible driver" % device_name, - dpdk_drv, "drv=%(Driver_str)s unused=%(Module_str)s") - display_devices("%s devices using kernel driver" % device_name, kernel_drv, - "if=%(Interface)s drv=%(Driver_str)s " - "unused=%(Module_str)s %(Active)s") - display_devices("Other %s devices" % device_name, no_drv, - "unused=%(Module_str)s") + display_devices("%s devices using DPDK-compatible driver" % typename, + dpdk_drv) + display_devices("%s devices using kernel driver" % typename, kernel_drv) + display_devices("Other %s devices" % typename, no_drv) + def show_status(): '''Function called when the script is passed the "--status" option. Displays to the user what devices are bound to the igb_uio driver, the kernel driver or to no driver''' - if status_dev == "net" or status_dev == "all": - show_device_status(network_devices, "Network") - - if status_dev == "crypto" or status_dev == "all": - show_device_status(crypto_devices, "Crypto") - - if status_dev == "event" or status_dev == "all": - show_device_status(eventdev_devices, "Eventdev") - - if status_dev == "mempool" or status_dev == "all": - show_device_status(mempool_devices, "Mempool") - - if status_dev == "compress" or status_dev == "all": - show_device_status(compress_devices , "Compress") + devtypes = { + "net": DevInfo.DeviceType.DEVTYPE_NETWORK, + "crypto": DevInfo.DeviceType.DEVTYPE_CRYPTO, + "event": DevInfo.DeviceType.DEVTYPE_EVENT, + "mempool": DevInfo.DeviceType.DEVTYPE_MEMPOOL, + "compress": DevInfo.DeviceType.DEVTYPE_COMPRESS + } + device_list = DevInfo.get_devices().values() + + found_types = sorted([devtypes[t] for t in devtypes.keys() + if t == status_dev or status_dev == "all"]) + + for t in found_types: + devices = list(filter(lambda dev: dev.devtype == t, device_list)) + show_device_status(devices, t) def parse_args(): @@ -642,14 +271,6 @@ def do_arg_actions(): elif b_flag is not None: bind_all(args, b_flag, force_flag) if status_flag: - if b_flag is not None: - clear_data() - # refresh if we have changed anything - get_device_details(network_devices) - get_device_details(crypto_devices) - get_device_details(eventdev_devices) - get_device_details(mempool_devices) - get_device_details(compress_devices) show_status() @@ -657,12 +278,6 @@ def main(): '''program main function''' parse_args() check_modules() - clear_data() - get_device_details(network_devices) - get_device_details(crypto_devices) - get_device_details(eventdev_devices) - get_device_details(mempool_devices) - get_device_details(compress_devices) do_arg_actions() if __name__ == "__main__": From patchwork Thu Nov 15 15:47:19 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Burakov, Anatoly" X-Patchwork-Id: 48130 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id CC7F658FE; Thu, 15 Nov 2018 16:47:38 +0100 (CET) Received: from mga17.intel.com (mga17.intel.com [192.55.52.151]) by dpdk.org (Postfix) with ESMTP id 8FFFE3238 for ; Thu, 15 Nov 2018 16:47:26 +0100 (CET) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga008.jf.intel.com ([10.7.209.65]) by fmsmga107.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 15 Nov 2018 07:47:25 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.56,236,1539673200"; d="scan'208";a="92312824" Received: from irvmail001.ir.intel.com ([163.33.26.43]) by orsmga008.jf.intel.com with ESMTP; 15 Nov 2018 07:47:23 -0800 Received: from sivswdev01.ir.intel.com (sivswdev01.ir.intel.com [10.237.217.45]) by irvmail001.ir.intel.com (8.14.3/8.13.6/MailSET/Hub) with ESMTP id wAFFlMcV024809; Thu, 15 Nov 2018 15:47:22 GMT Received: from sivswdev01.ir.intel.com (localhost [127.0.0.1]) by sivswdev01.ir.intel.com with ESMTP id wAFFlLgh028143; Thu, 15 Nov 2018 15:47:21 GMT Received: (from aburakov@localhost) by sivswdev01.ir.intel.com with LOCAL id wAFFlLwa028139; Thu, 15 Nov 2018 15:47:21 GMT From: Anatoly Burakov To: dev@dpdk.org Cc: john.mcnamara@intel.com, bruce.richardson@intel.com, pablo.de.lara.guarch@intel.com, david.hunt@intel.com, mohammad.abdul.awal@intel.com, thomas@monjalon.net, ferruh.yigit@intel.com Date: Thu, 15 Nov 2018 15:47:19 +0000 Message-Id: <659b0b703272daba739ed8c821b2911778864de6.1542291869.git.anatoly.burakov@intel.com> X-Mailer: git-send-email 1.7.0.7 In-Reply-To: References: In-Reply-To: References: Subject: [dpdk-dev] [RFC v2 7/9] usertools/lib: add hugepage information library X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Add a library for getting hugepage information on Linux system. Supported functionality: - List active hugetlbfs mountpoints - Change hugetlbfs mountpoints - Supports both transient and persistent (fstab) mountpoints - Display/change number of allocated hugepages - Supports both total and per-NUMA node page counts Signed-off-by: Anatoly Burakov --- usertools/DPDKConfigLib/HugeUtil.py | 309 ++++++++++++++++++++++++++++ usertools/DPDKConfigLib/Util.py | 49 +++++ 2 files changed, 358 insertions(+) create mode 100755 usertools/DPDKConfigLib/HugeUtil.py diff --git a/usertools/DPDKConfigLib/HugeUtil.py b/usertools/DPDKConfigLib/HugeUtil.py new file mode 100755 index 000000000..79ed97bb7 --- /dev/null +++ b/usertools/DPDKConfigLib/HugeUtil.py @@ -0,0 +1,309 @@ +#!/usr/bin/env python +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2018 Intel Corporation + + +from .PlatformInfo import * +from .Util import * +import re +import os +import subprocess + +__KERNEL_NUMA_HP_PATH = \ + "/sys/devices/system/node/node%i/hugepages/hugepages-%ikB/" +__KERNEL_HP_PATH = "/sys/kernel/mm/hugepages/hugepages-%ikB/" +__NR_HP_FNAME = "nr_hugepages" +# check if we have systemd +_have_systemd = run(["which", "systemctl"]) + +# local copy of platform info +info = PlatformInfo() + + +def _find_runtime_hugetlbfs_mountpoints(): + mountpoints = {} + with open("/proc/mounts") as f: + for line in f: + if not _is_hugetlbfs_mount(line): + continue + line = line.strip() + _, path, _, options, _, _ = line.split() + + m = re.search(r"pagesize=(\d+\w)", options) + if m: + pagesz = human_readable_to_kilobytes(m.group(1)) + else: + # if no size specified, assume default hugepage size + pagesz = info.default_hugepage_size + if pagesz in mountpoints: + raise RuntimeError("Multiple mountpoints for same hugetlbfs") + mountpoints[pagesz] = path + return mountpoints + + +def _find_nr_hugepages(page_sz, node=None): + if node is not None: + path = os.path.join(__KERNEL_NUMA_HP_PATH % (node, page_sz), + __NR_HP_FNAME) + else: + path = os.path.join(__KERNEL_HP_PATH % (page_sz), __NR_HP_FNAME) + return int(read_file(path)) + + +def _write_nr_hugepages(page_sz, nr_pages, node=None): + if node is not None: + path = os.path.join(__KERNEL_NUMA_HP_PATH % (node, page_sz), + __NR_HP_FNAME) + else: + path = os.path.join(__KERNEL_HP_PATH % (page_sz), __NR_HP_FNAME) + write_file(path, str(nr_pages)) + + +def _is_hugetlbfs_mount(line): + # ignore comemnts + if line.strip().startswith("#"): + return False + tokens = line.split() + if len(tokens) != 6: + return False + return tokens[2] == "hugetlbfs" + + +def _update_fstab_hugetlbfs_mounts(mountpoints): + # remove all hugetlbfs mappings + with open("/etc/fstab") as f: + lines = f.readlines() + mount_idxs = [idx for idx, line in enumerate(lines) + if _is_hugetlbfs_mount(line)] + + # delete all lines with hugetlbfs mountpoints + for idx in reversed(sorted(mount_idxs)): + del lines[idx] + + # append new mountpoints + lines.extend(["hugetlbfs %s hugetlbfs pagesize=%s 0 0\n" % + (mountpoints[size], kilobytes_to_human_readable(size)) + for size in mountpoints.keys() if mountpoints[size] != ""]) + + # finally, write everything back + with open("/etc/fstab", "w") as f: + f.writelines(lines) + + +def _find_fstab_hugetlbfs_mounts(): + mountpoints = {} + with open("/etc/fstab") as f: + for line in f: + if not _is_hugetlbfs_mount(line): + continue + line = line.strip() + _, path, _, options, _, _ = line.split() + + m = re.search(r"pagesize=(\d+\w)", options) + if m: + pagesz = human_readable_to_kilobytes(m.group(1)) + else: + # if no size specified, assume default hugepage size + pagesz = info.default_hugepage_size + if pagesz in mountpoints: + raise RuntimeError("Multiple mountpoints for same hugetlbfs") + mountpoints[pagesz] = path + return mountpoints + + +def _find_systemd_hugetlbfs_mounts(): + # we find systemd mounts by virtue of them not being in fstab, so check each + units = [] + out = subprocess.check_output(["systemctl", "-t", "mount", "--all"], + stderr=None) + lines = out.decode("utf-8").splitlines() + for line in lines: + line = line.strip() + + tokens = line.split() + + if len(tokens) == 0: + continue + + # masked unit files are second token + if tokens[0].endswith(".mount"): + unit = tokens[0] + elif tokens[1].endswith(".mount"): + tokens = tokens[1:] + unit = tokens[0] + else: + continue # not a unit line + + # if this is inactive and masked, we don't care + load, active, sub = tokens[1:4] + if load == "masked" and active == "inactive": + continue + + units.append({"unit": unit, "load": load, "active": active, "sub": sub}) + + for unit_dict in units: + # status may return non-zero, but we don't care + try: + out = subprocess.check_output(["systemctl", "status", + unit_dict["unit"]], stderr=None) + except subprocess.CalledProcessError as e: + out = e.output + lines = out.decode("utf-8").splitlines() + for line in lines: + line = line.strip() + if line.startswith("What"): + unit_dict["fs"] = line.split()[1] + elif line.startswith("Where"): + unit_dict["path"] = line.split()[1] + + fstab_mountpoints = _find_fstab_hugetlbfs_mounts().values() + filter_func = (lambda x: x.get("fs", "") == "hugetlbfs" and + x.get("path", "") not in fstab_mountpoints) + return {u["unit"]: u["path"] for u in filter(filter_func, units)} + + +def _disable_systemd_hugetlbfs_mounts(): + mounts = _find_systemd_hugetlbfs_mounts() + for unit, path in mounts.keys(): + run(["systemctl", "stop", unit]) # unmount + run(["systemctl", "mask", unit]) # prevent this from ever running + + +class PersistentMountpointConfig: + def __init__(self): + self.update() + + def update(self): + self.reset() + self.mountpoints = _find_fstab_hugetlbfs_mounts() + for sz in info.hugepage_sizes_enabled: + self.mountpoints.setdefault(sz, "") + + def commit(self): + # check if we are trying to mount hugetlbfs of unsupported size + supported = set(info.hugepage_sizes_supported) + all_sizes = set(self.mountpoints.keys()) + if not all_sizes.issubset(supported): + diff = supported.difference(all_sizes) + raise ValueError("Unsupported hugepage sizes: %s" % + [kilobytes_to_human_readable(s) for s in diff]) + + if _have_systemd: + # dealing with fstab is easier, so disable all systemd mounts + _disable_systemd_hugetlbfs_mounts() + + _update_fstab_hugetlbfs_mounts(self.mountpoints) + + if _have_systemd: + run(["systemctl", "daemon-reload"]) + self.update() + + def reset(self): + self.mountpoints = {} # pagesz : path + + +class RuntimeMountpointConfig: + def __init__(self): + self.update() + + def update(self): + self.reset() + self.mountpoints = _find_runtime_hugetlbfs_mountpoints() + for sz in info.hugepage_sizes_enabled: + self.mountpoints.setdefault(sz, "") + + def commit(self): + # check if we are trying to mount hugetlbfs of unsupported size + supported = set(info.hugepage_sizes_supported) + all_sizes = set(self.mountpoints.keys()) + if not all_sizes.issubset(supported): + diff = supported.difference(all_sizes) + raise ValueError("Unsupported hugepage sizes: %s" % + [kilobytes_to_human_readable(s) for s in diff]) + + cur_mp = _find_runtime_hugetlbfs_mountpoints() + sizes = set(cur_mp.keys()).union(self.mountpoints.keys()) + + for size in sizes: + old = cur_mp.get(size, "") + new = self.mountpoints.get(size, "") + + is_unmount = old != "" and new == "" + is_mount = old == "" and new != "" + is_remount = old != "" and new != "" and old != new + + mount_param = ["-t", "hugetlbfs", "-o", + "pagesize=%sM" % (size / 1024)] + + if is_unmount: + run(["umount", old]) + elif is_mount: + mkpath(new) + run(["mount"] + mount_param + [new]) + elif is_remount: + mkpath(new) + run(["umount", old]) + run(["mount"] + mount_param + [new]) + + if _have_systemd: + run(["systemctl", "daemon-reload"]) + self.update() + + def reset(self): + self.mountpoints = {} # pagesz : path + + +class RuntimeHugepageConfig: + def __init__(self): + self.update() + + def update(self): + self.reset() + + hugepage_sizes = info.hugepage_sizes_enabled + if len(hugepage_sizes) == 0: + raise RuntimeError("Hugepages appear to be disabled") + self.total_nr_hugepages = \ + {page_sz: _find_nr_hugepages(page_sz) + for page_sz in hugepage_sizes} + for node in info.numa_nodes: + for page_sz in hugepage_sizes: + self.hugepages_per_node[node, page_sz] = \ + _find_nr_hugepages(page_sz, node) + + def commit(self): + # sanity checks + + # check if user has messed with hugepage sizes + supported_sizes = set(info.hugepage_sizes_supported) + keys = self.total_nr_hugepages.keys() + if set(keys) != set(supported_sizes): + diff = supported_sizes.difference(keys) + raise ValueError("Missing hugepage sizes: %s" % + [kilobytes_to_human_readable(s) for s in diff]) + + for d in self.hugepages_per_node: + keys = d.keys() + if set(keys) != set(supported_sizes): + diff = supported_sizes.difference(keys) + raise ValueError("Missing hugepage sizes: %s" % + [kilobytes_to_human_readable(s) for s in diff]) + + # check if all hugepage numbers add up + for size in supported_sizes: + total_hps = sum([self.hugepages_per_node[node, size] + for node in info.numa_nodes]) + if total_hps != self.total_nr_hugepages[size]: + raise ValueError("Total number of hugepages not equal to sum of" + "pages on all NUMA nodes") + + # now, commit our configuration + for size, value in self.total_nr_hugepages.items(): + _write_nr_hugepages(size, value) + for node, size, value in self.hugepages_per_node.items(): + _write_nr_hugepages(size, value, node) + self.update() + + def reset(self): + self.total_nr_hugepages = {} + self.hugepages_per_node = {} diff --git a/usertools/DPDKConfigLib/Util.py b/usertools/DPDKConfigLib/Util.py index eb21cce15..ba0c36537 100755 --- a/usertools/DPDKConfigLib/Util.py +++ b/usertools/DPDKConfigLib/Util.py @@ -2,6 +2,25 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright(c) 2018 Intel Corporation +import subprocess +import re +import os +import errno + +__PGSZ_UNITS = ['k', 'M', 'G', 'T', 'P'] + + +# equivalent to mkdir -p +def mkpath(path): + try: + os.makedirs(path) + except OSError as e: + if e.errno == errno.EEXIST: + pass + else: + raise e + + # read entire file and return the result def read_file(path): with open(path, 'r') as f: @@ -21,6 +40,36 @@ def append_file(path, value): f.write(value) +# run command while suppressing its output +def run(args): + try: + subprocess.check_output(args, stderr=None) + except subprocess.CalledProcessError: + return False + return True + + +def kilobytes_to_human_readable(value): + for unit in __PGSZ_UNITS: + if abs(value) < 1024: + cur_unit = unit + break + value /= 1024 + else: + raise ValueError("Value too large") + return "%i%s" % (value, cur_unit) + + +def human_readable_to_kilobytes(value): + m = re.match(r"(\d+)([%s])$" % ''.join(__PGSZ_UNITS), value) + if not m: + raise ValueError("Invalid value format: %s" % value) + ival = int(m.group(1)) + suffix = m.group(2) + pow = __PGSZ_UNITS.index(suffix) + return ival * (1024 ** pow) + + # split line into key-value pair, cleaning up the values in the process def kv_split(line, separator): # just in case From patchwork Thu Nov 15 15:47:20 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Burakov, Anatoly" X-Patchwork-Id: 48127 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id C8B2E4F94; Thu, 15 Nov 2018 16:47:32 +0100 (CET) Received: from mga11.intel.com (mga11.intel.com [192.55.52.93]) by dpdk.org (Postfix) with ESMTP id 52E574CAB for ; Thu, 15 Nov 2018 16:47:25 +0100 (CET) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga001.fm.intel.com ([10.253.24.23]) by fmsmga102.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 15 Nov 2018 07:47:24 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.56,236,1539673200"; d="scan'208";a="108355352" Received: from irvmail001.ir.intel.com ([163.33.26.43]) by fmsmga001.fm.intel.com with ESMTP; 15 Nov 2018 07:47:23 -0800 Received: from sivswdev01.ir.intel.com (sivswdev01.ir.intel.com [10.237.217.45]) by irvmail001.ir.intel.com (8.14.3/8.13.6/MailSET/Hub) with ESMTP id wAFFlM4J024814; Thu, 15 Nov 2018 15:47:22 GMT Received: from sivswdev01.ir.intel.com (localhost [127.0.0.1]) by sivswdev01.ir.intel.com with ESMTP id wAFFlM7Z028150; Thu, 15 Nov 2018 15:47:22 GMT Received: (from aburakov@localhost) by sivswdev01.ir.intel.com with LOCAL id wAFFlMLX028146; Thu, 15 Nov 2018 15:47:22 GMT From: Anatoly Burakov To: dev@dpdk.org Cc: john.mcnamara@intel.com, bruce.richardson@intel.com, pablo.de.lara.guarch@intel.com, david.hunt@intel.com, mohammad.abdul.awal@intel.com, thomas@monjalon.net, ferruh.yigit@intel.com Date: Thu, 15 Nov 2018 15:47:20 +0000 Message-Id: <979a0e8fbbef524e2c07d80265c8cc440382a450.1542291869.git.anatoly.burakov@intel.com> X-Mailer: git-send-email 1.7.0.7 In-Reply-To: References: In-Reply-To: References: Subject: [dpdk-dev] [RFC v2 8/9] usertools: add hugepage info script X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Add a simple hugepage info script for demonstration purposes. It lists the following information: - Currently configured persistent hugetlbfs mountpoints (fstab) - Currently mounted hugetlbfs mountpoints - Total number of hugepages for each size - Per-NUMA node number of hugepages for each size Signed-off-by: Anatoly Burakov --- usertools/hugepage-info.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100755 usertools/hugepage-info.py diff --git a/usertools/hugepage-info.py b/usertools/hugepage-info.py new file mode 100755 index 000000000..bbea35e7b --- /dev/null +++ b/usertools/hugepage-info.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2018 Intel Corporation + + +from __future__ import print_function +from DPDKConfigLib import HugeUtil, PlatformInfo, Util + +pc = HugeUtil.PersistentMountpointConfig() +print("Persistent hugetlbfs mountpoints:") + +for sz, mp in pc.mountpoints.items(): + print("%s: %s" % (Util.kilobytes_to_human_readable(sz), mp)) + +rc = HugeUtil.RuntimeMountpointConfig() +print("Current hugetlbfs mountpoints:") + +for sz, mp in rc.mountpoints.items(): + print("%s: %s" % (Util.kilobytes_to_human_readable(sz), mp)) + +info = PlatformInfo.PlatformInfo() +rhc = HugeUtil.RuntimeHugepageConfig() +print("Current hugepage configuration:") +for sz, nr in rhc.total_nr_hugepages.items(): + print("%s: %s" % (Util.kilobytes_to_human_readable(sz), nr)) + +print("Current per-NUMA node configuration:") +for node in info.numa_nodes: + for pagesz in info.hugepage_sizes_supported: + print("[Node %s] %s: %s" % (node, + Util.kilobytes_to_human_readable(pagesz), + rhc.hugepages_per_node[node, pagesz])) From patchwork Thu Nov 15 15:47:21 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Burakov, Anatoly" X-Patchwork-Id: 48129 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 342685587; Thu, 15 Nov 2018 16:47:36 +0100 (CET) Received: from mga04.intel.com (mga04.intel.com [192.55.52.120]) by dpdk.org (Postfix) with ESMTP id 96C883238 for ; Thu, 15 Nov 2018 16:47:25 +0100 (CET) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga006.fm.intel.com ([10.253.24.20]) by fmsmga104.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 15 Nov 2018 07:47:24 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.56,236,1539673200"; d="scan'208";a="281360514" Received: from irvmail001.ir.intel.com ([163.33.26.43]) by fmsmga006.fm.intel.com with ESMTP; 15 Nov 2018 07:47:22 -0800 Received: from sivswdev01.ir.intel.com (sivswdev01.ir.intel.com [10.237.217.45]) by irvmail001.ir.intel.com (8.14.3/8.13.6/MailSET/Hub) with ESMTP id wAFFlM17024815; Thu, 15 Nov 2018 15:47:22 GMT Received: from sivswdev01.ir.intel.com (localhost [127.0.0.1]) by sivswdev01.ir.intel.com with ESMTP id wAFFlMeD028157; Thu, 15 Nov 2018 15:47:22 GMT Received: (from aburakov@localhost) by sivswdev01.ir.intel.com with LOCAL id wAFFlMVO028153; Thu, 15 Nov 2018 15:47:22 GMT From: Anatoly Burakov To: dev@dpdk.org Cc: john.mcnamara@intel.com, bruce.richardson@intel.com, pablo.de.lara.guarch@intel.com, david.hunt@intel.com, mohammad.abdul.awal@intel.com, thomas@monjalon.net, ferruh.yigit@intel.com Date: Thu, 15 Nov 2018 15:47:21 +0000 Message-Id: <58e36e0a9bc7f9cb3d5a288feed1230984e72ee6.1542291869.git.anatoly.burakov@intel.com> X-Mailer: git-send-email 1.7.0.7 In-Reply-To: References: In-Reply-To: References: Subject: [dpdk-dev] [RFC v2 9/9] usertools/lib: add GRUB utility library for hugepage config X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" This library is highly experimental and can kill kittens, but its main purpose is to automatically set up GRUB command-line to allocate a given number of hugepages at boot time. It works in a similar way HugeUtil library does, but instead of committing changes to fstab or runtime configuration, it commits its changes to GRUB default command-line and updates all GRUB entries afterwards. I got it to a state where it's safe to use on my system, but see the part above about killing kittens - you have been warned :) No example scripts will currently be provided. Signed-off-by: Anatoly Burakov --- usertools/DPDKConfigLib/GrubHugeUtil.py | 175 ++++++++++++++++++++++++ 1 file changed, 175 insertions(+) create mode 100755 usertools/DPDKConfigLib/GrubHugeUtil.py diff --git a/usertools/DPDKConfigLib/GrubHugeUtil.py b/usertools/DPDKConfigLib/GrubHugeUtil.py new file mode 100755 index 000000000..4b8e349b8 --- /dev/null +++ b/usertools/DPDKConfigLib/GrubHugeUtil.py @@ -0,0 +1,175 @@ +#!/usr/bin/env python +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2018 Intel Corporation + + +from .PlatformInfo import * +from .Util import * +import re +import os + +__KERNEL_TRANSPARENT_HP = "/sys/kernel/mm/transparent_hugepage/enabled" +_GRUB_CMDLINE_PARAM_NAME = "GRUB_CMDLINE_LINUX_DEFAULT" + +# local copy of platform info +info = PlatformInfo() + +def _find_linux_default_cmdline(): + with open("/etc/default/grub") as f: + for line in f: + line = line.strip() + if line.startswith(_GRUB_CMDLINE_PARAM_NAME): + return line + else: + raise RuntimeError("Invalid GRUB default configuration format") + + +def _parse_linux_default_cmdline(line): + # get value to the right of equals sign, strip whitespace and quotes, + # split into separate keys and make a list of values + _, cmdline = kv_split(line, "=") + # remove quotes + if cmdline[0] == cmdline[-1] == '"': + cmdline = cmdline[1:-1] + + return [kv_split(v, "=") for v in cmdline.split()] + + +def _generate_linux_default_cmdline(cmdline): + lines = [] + cmdline_idx = -1 + with open("/etc/default/grub") as f: + for idx, line in enumerate(f): + line = line.strip() + lines.extend([line]) + if line.startswith(_GRUB_CMDLINE_PARAM_NAME): + cmdline_idx = idx + if cmdline_idx == -1: + raise RuntimeError("Invalid GRUB default configuration format") + + # write the lines back, replacing one we want + with open("/etc/default/grub", "w") as f: + for idx, line in enumerate(lines): + if idx == cmdline_idx: + line = cmdline + f.write(line + "\n") + + +def _find_transparent_hugepage(): + if not os.path.exists(__KERNEL_TRANSPARENT_HP): + return None + value = read_file(__KERNEL_TRANSPARENT_HP) + m = re.search(r"\[([a-z]+)\]", value) + if not m: + raise RuntimeError("BUG: Bad regular expression") + return m.group(1) + + +class GrubHugepageConfig: + def __init__(self): + self.update() + + def update(self): + self.reset() + + hugepage_sizes = info.hugepage_sizes_supported + if len(hugepage_sizes) == 0: + raise RuntimeError("Hugepages appear to be unsupported") + cmdline = _find_linux_default_cmdline() + values = _parse_linux_default_cmdline(cmdline) + + # parse values in the list + self.default_hugepagesz = info.default_hugepage_size + self.transparent_hugepage = _find_transparent_hugepage() + sizes = [] + nrs = [] + for k, v in values: + if k == "default_hugepagesz": + self.default_hugepagesz = human_readable_to_kilobytes(v) + elif k == "transparent_hugepage": + self.transparent_hugepage = v + elif k == "hugepagesz": + sizes.append(human_readable_to_kilobytes(v)) + elif k == "hugepages": + nrs.append(v) + if len(sizes) != len(nrs): + raise RuntimeError("GRUB hugepage configuration is wrong") + detected_hugepages = dict(zip(sizes, map(int, nrs))) + self.nr_hugepages = {size: detected_hugepages.get(size, 0) + for size in hugepage_sizes} + + def commit(self): + # perform sanity checks - we can't afford invalid data making it into + # bootloader config, as that might render user's machine unbootable, so + # tread really really carefully + + # first, check if user didn't add any unexpected hugepage sizes + configured_sizes = set(self.nr_hugepages.keys()) + supported_sizes = set(info.hugepage_sizes_supported) + + if configured_sizes != supported_sizes: + diff = configured_sizes.difference(supported_sizes) + raise ValueError("Unsupported hugepage sizes: %s" % + [kilobytes_to_human_readable(s) for s in diff]) + + # check if default hugepage is one of the supported ones + if self.default_hugepagesz is not None and\ + self.default_hugepagesz not in configured_sizes: + s = kilobytes_to_human_readable(self.default_hugepagesz) + raise ValueError("Unsupported default hugepage size: %i" % s) + + # transparent hugepages support was added in recent kernels, so check + # if user is trying to set this + if _find_transparent_hugepage() is None and \ + self.transparent_hugepage is not None: + raise ValueError("Transparent hugepages are not unsupported") + + # OK, parameters look to be valid - let's roll + + # read and parse current cmdline + cmdline = _find_linux_default_cmdline() + + values = _parse_linux_default_cmdline(cmdline) + + # clear out old data + klist = ["transparent_hugepage", "default_hugepagesz", + "hugepage", "hugepagesz"] + # iterate over a copy so that we could delete items + for k, v in values[:]: + if k in klist: + values.remove((k, v)) + + # generate new cmdline + cmdline = " ".join([("%s=%s" % (k, v)) if v is not None else k + for k, v in values]) + + # now, populate cmdline with new data + new_items = [] + for sz, nr in self.nr_hugepages.items(): + sz = kilobytes_to_human_readable(sz) + new_items += "hugepagesz=%s hugepages=%i" % (sz, nr) + if self.default_hugepagesz is not None: + new_items += "default_hugepagesz=%i" % self.default_hugepagesz + if self.transparent_hugepage is not None: + new_items += "transparent_hugepage=%s" % self.transparent_hugepage + + cmdline = "%s %s" % (cmdline, " ".join(new_items)) + + # strip any extraneous whitespace we may have added + cmdline = re.sub(r"\s\s+", " ", cmdline).strip() + + # now, put everything back together + cmdline = '%s="%s"' % (_GRUB_CMDLINE_PARAM_NAME, cmdline) + + # write it to config + _generate_linux_default_cmdline(cmdline) + + # finally, update GRUB + if not run(["update-grub"]): + raise RuntimeError("Failed to update GRUB") + self.update() + + def reset(self): + self.nr_hugepages = {} # pagesz: number + self.default_hugepagesz = None + self.transparent_hugepage = None