get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/105533/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 105533,
    "url": "http://patchwork.dpdk.org/api/patches/105533/?format=api",
    "web_url": "http://patchwork.dpdk.org/project/dpdk/patch/20211230143744.3550098-5-dkozlyuk@nvidia.com/",
    "project": {
        "id": 1,
        "url": "http://patchwork.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<20211230143744.3550098-5-dkozlyuk@nvidia.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/20211230143744.3550098-5-dkozlyuk@nvidia.com",
    "date": "2021-12-30T14:37:42",
    "name": "[RFC,4/6] eal/linux: allow hugepage file reuse",
    "commit_ref": null,
    "pull_url": null,
    "state": "superseded",
    "archived": true,
    "hash": "4e69d3ebdd81d5e7d90915cff843a9831971be50",
    "submitter": {
        "id": 2248,
        "url": "http://patchwork.dpdk.org/api/people/2248/?format=api",
        "name": "Dmitry Kozlyuk",
        "email": "dkozlyuk@nvidia.com"
    },
    "delegate": {
        "id": 24651,
        "url": "http://patchwork.dpdk.org/api/users/24651/?format=api",
        "username": "dmarchand",
        "first_name": "David",
        "last_name": "Marchand",
        "email": "david.marchand@redhat.com"
    },
    "mbox": "http://patchwork.dpdk.org/project/dpdk/patch/20211230143744.3550098-5-dkozlyuk@nvidia.com/mbox/",
    "series": [
        {
            "id": 21042,
            "url": "http://patchwork.dpdk.org/api/series/21042/?format=api",
            "web_url": "http://patchwork.dpdk.org/project/dpdk/list/?series=21042",
            "date": "2021-12-30T14:37:38",
            "name": "Fast restart with many hugepages",
            "version": 1,
            "mbox": "http://patchwork.dpdk.org/series/21042/mbox/"
        }
    ],
    "comments": "http://patchwork.dpdk.org/api/patches/105533/comments/",
    "check": "warning",
    "checks": "http://patchwork.dpdk.org/api/patches/105533/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@inbox.dpdk.org",
        "Delivered-To": "patchwork@inbox.dpdk.org",
        "Received": [
            "from mails.dpdk.org (mails.dpdk.org [217.70.189.124])\n\tby inbox.dpdk.org (Postfix) with ESMTP id 9F640A04A5;\n\tThu, 30 Dec 2021 15:38:36 +0100 (CET)",
            "from [217.70.189.124] (localhost [127.0.0.1])\n\tby mails.dpdk.org (Postfix) with ESMTP id 2E56741174;\n\tThu, 30 Dec 2021 15:38:17 +0100 (CET)",
            "from NAM10-BN7-obe.outbound.protection.outlook.com\n (mail-bn7nam10on2079.outbound.protection.outlook.com [40.107.92.79])\n by mails.dpdk.org (Postfix) with ESMTP id 0C84B41148\n for <dev@dpdk.org>; Thu, 30 Dec 2021 15:38:13 +0100 (CET)",
            "from CO2PR18CA0061.namprd18.prod.outlook.com (2603:10b6:104:2::29)\n by DM6PR12MB3210.namprd12.prod.outlook.com (2603:10b6:5:185::10) with\n Microsoft SMTP Server (version=TLS1_2,\n cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.20.4823.19; Thu, 30 Dec\n 2021 14:38:10 +0000",
            "from CO1NAM11FT053.eop-nam11.prod.protection.outlook.com\n (2603:10b6:104:2:cafe::73) by CO2PR18CA0061.outlook.office365.com\n (2603:10b6:104:2::29) with Microsoft SMTP Server (version=TLS1_2,\n cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.20.4844.14 via Frontend\n Transport; Thu, 30 Dec 2021 14:38:10 +0000",
            "from mail.nvidia.com (12.22.5.236) by\n CO1NAM11FT053.mail.protection.outlook.com (10.13.175.63) with Microsoft SMTP\n Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384) id\n 15.20.4844.14 via Frontend Transport; Thu, 30 Dec 2021 14:38:10 +0000",
            "from rnnvmail201.nvidia.com (10.129.68.8) by DRHQMAIL109.nvidia.com\n (10.27.9.19) with Microsoft SMTP Server (TLS) id 15.0.1497.18;\n Thu, 30 Dec 2021 14:38:08 +0000",
            "from nvidia.com (172.20.187.6) by rnnvmail201.nvidia.com\n (10.129.68.8) with Microsoft SMTP Server (version=TLS1_2,\n cipher=TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384) id 15.2.986.9; Thu, 30 Dec 2021\n 06:38:06 -0800"
        ],
        "ARC-Seal": "i=1; a=rsa-sha256; s=arcselector9901; d=microsoft.com; cv=none;\n b=R2yneNHPSRcqbKN8/okvXGnhIdZjvfRhJ7NEcCEOfSpjTLVGY9+5TvZXzB1p9bmJgL8oYjpB0uPMOjO/GvhMmu2BCF6V2BP1/4kB5M9V72Q1oaVM7CwJI5TrJUAn1cuUfHpneziyyAzdKXueiql3QfJnvhzlEySeOFmLI5zB9ZPAaOnYTWYuKOuFM6zDKwa2y/UVDxuv70Os/MSIGdmY+D0Iun/5GrYSGDP3uL4k/EdzO3KjJHV8tbXMOddnnmxe6nJMU1DOt0KavMPl+Cgewgn4nHhqF4mpLf5YqnU0tOnyxyfa2g0tBzVBnUWBf8g65f6KzCNCvh8ExoyBOmuqDQ==",
        "ARC-Message-Signature": "i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com;\n s=arcselector9901;\n h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-AntiSpam-MessageData-ChunkCount:X-MS-Exchange-AntiSpam-MessageData-0:X-MS-Exchange-AntiSpam-MessageData-1;\n bh=eQVaTsSne+Qcod8p9FmCKdIBumlNwY94+Fc+J4zCyfM=;\n b=MVL/Vv74FJrhBavq/gzemnSrsGrNx4cfoTIzvbj4P0KEpPOCR+0BZGjTVP7XBTB15erWlEq2UH6bhWHQ7w4Dvk9T1f67eK7YnGUhd2OYkQAKMvdnyW+eStoc3fi/ZGLIT5bEWIopuJeRyGP6zHZ4tcHG89ZipeGAYjBtIakzjas/A1q6zlIWXF6M6ntZF5TTBGxrzghBxhB4X5p7I3pWLjHb6ZY3UMlzJzP4zlKDAjHVXxXGlNZiG+rz5mQ4p6jY7Ya7LZOfR5FhnaDsZNG391+O9gDzRHriw43JnjqI6bea/rHIba3QnskbiRW/bkQZNzGDrbhjWb8+DdMKc0v4pg==",
        "ARC-Authentication-Results": "i=1; mx.microsoft.com 1; spf=pass (sender ip is\n 12.22.5.236) smtp.rcpttodomain=intel.com smtp.mailfrom=nvidia.com; dmarc=pass\n (p=reject sp=reject pct=100) action=none header.from=nvidia.com; dkim=none\n (message not signed); arc=none",
        "DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed; d=Nvidia.com;\n s=selector2;\n h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-SenderADCheck;\n bh=eQVaTsSne+Qcod8p9FmCKdIBumlNwY94+Fc+J4zCyfM=;\n b=dhKvDYXB0TJnyEcaoSRysuUjwuMSOxsukRnFok9xFf8wVCqtdchHo43mAkLEmTr/iJkBhmgs0UdtB4AtYqhByVwAv5IPzwaRiCcv+Hg9xcfDmQvBcW4rK1nJWGMCrUy7iXOHqZKCqKfi7lEH8mj3AM8VtxsxFdnd/9VNFE+E/I6ghRim9c2oKZrYZN1HGlBesCqUFygUDo/6oLpuemV+pTKAdgzYmFVlmg16HiLJVB1sZIg2qbmL7wULFBFHhKjw+puqr0YAuJtPHAstuUCbJIZai6Ka/3I7p5GyqMgvkwydqBvbS8zA1v38jUnvD883+zmojCB7rvKl/dIoCbaz1A==",
        "X-MS-Exchange-Authentication-Results": "spf=pass (sender IP is 12.22.5.236)\n smtp.mailfrom=nvidia.com; dkim=none (message not signed)\n header.d=none;dmarc=pass action=none header.from=nvidia.com;",
        "Received-SPF": "Pass (protection.outlook.com: domain of nvidia.com designates\n 12.22.5.236 as permitted sender) receiver=protection.outlook.com;\n client-ip=12.22.5.236; helo=mail.nvidia.com;",
        "From": "Dmitry Kozlyuk <dkozlyuk@nvidia.com>",
        "To": "<dev@dpdk.org>",
        "CC": "Anatoly Burakov <anatoly.burakov@intel.com>",
        "Subject": "[RFC PATCH 4/6] eal/linux: allow hugepage file reuse",
        "Date": "Thu, 30 Dec 2021 16:37:42 +0200",
        "Message-ID": "<20211230143744.3550098-5-dkozlyuk@nvidia.com>",
        "X-Mailer": "git-send-email 2.25.1",
        "In-Reply-To": "<20211230143744.3550098-1-dkozlyuk@nvidia.com>",
        "References": "<20211230143744.3550098-1-dkozlyuk@nvidia.com>",
        "MIME-Version": "1.0",
        "Content-Transfer-Encoding": "8bit",
        "Content-Type": "text/plain",
        "X-Originating-IP": "[172.20.187.6]",
        "X-ClientProxiedBy": "HQMAIL105.nvidia.com (172.20.187.12) To\n rnnvmail201.nvidia.com (10.129.68.8)",
        "X-EOPAttributedMessage": "0",
        "X-MS-PublicTrafficType": "Email",
        "X-MS-Office365-Filtering-Correlation-Id": "21990e4b-43a2-4a65-ec97-08d9cba200be",
        "X-MS-TrafficTypeDiagnostic": "DM6PR12MB3210:EE_",
        "X-Microsoft-Antispam-PRVS": "\n <DM6PR12MB32107C5A3B5D8489E950EE00B9459@DM6PR12MB3210.namprd12.prod.outlook.com>",
        "X-MS-Oob-TLC-OOBClassifiers": "OLM:6790;",
        "X-MS-Exchange-SenderADCheck": "1",
        "X-MS-Exchange-AntiSpam-Relay": "0",
        "X-Microsoft-Antispam": "BCL:0;",
        "X-Microsoft-Antispam-Message-Info": "\n wFTlkvoS7xI7HeJrnS5eRmjFBMVpfT4RoK1GvngqZDhYovx+lTdUwNyLmeQXS1cQZa23gp7obifaKq0Iu8+2ORq5zrx68bQQRM9y0xuO1UqBD0gLdcrYFSkcVgNPM6CHYGarqg123rMYq10u4joM6wLN7abr7zEmsn1wE/7+MybDOas1oCMa7iGa0q74fsStf68k1ZC3+22rofJ8vYAUYgYKxXAe2/pEbC92jPNpjvzFRHnxrT9jkqU95d/+qMt0s+izLS0YSt8JfKlPMa4ogoDIp23P4lwt1C+xehFYW0vOz+uCG+tANg96vccB5dd7P6arDmJ/wM9QAg5uXZqX+SVcE53NHy3uNvf34tOy44lPEdJzCEpAsLj9sP0ADA5hoPgytaeZOxClNVmpxCWAe5fLK4t3GeXsCm5i7oPs//3bXYWvrzmDo76lyDGelxMCu6Dt8M/WKmNXfllok7w/5YF3W9q0ipC3HCTgaq/E37OA6wldxi9AlvhhCB4bshzVEX4VuG7V0cV3xN6lWwmtdbgjT6OBnvAsxDoVYv1b5vjTKBuM/2Nr0UZUFf+jlU2gciIXaUZQB3AJTCTkRBHhhnw9OgBHM6JM4VzhiOFRcHSX85Zt010xf6HuD5Gft/pulkk09KEzbC+mLa0fJSr7TokDQGyet82hSd3FUXd5qi722wkGmF8zdYWf831OCuVBoc8xgHLkGVlgV7mlf4oeyiZoFO12opivOdtc4NbfLzn1DqUlo7hz3rcA/7vCpli0TyZvhXmE5Eb66bcDCoU6+ASd/ouLYAuqHFnMyLXpPps=",
        "X-Forefront-Antispam-Report": "CIP:12.22.5.236; CTRY:US; LANG:en; SCL:1; SRV:;\n IPV:CAL; SFV:NSPM; H:mail.nvidia.com; PTR:InfoNoRecords; CAT:NONE;\n SFS:(4636009)(40470700002)(36840700001)(46966006)(8936002)(4326008)(30864003)(70586007)(70206006)(7696005)(8676002)(47076005)(82310400004)(426003)(26005)(16526019)(36860700001)(336012)(186003)(36756003)(86362001)(81166007)(356005)(6286002)(1076003)(6666004)(2906002)(316002)(83380400001)(2616005)(40460700001)(55016003)(508600001)(5660300002)(6916009)(36900700001);\n DIR:OUT; SFP:1101;",
        "X-OriginatorOrg": "Nvidia.com",
        "X-MS-Exchange-CrossTenant-OriginalArrivalTime": "30 Dec 2021 14:38:10.6038 (UTC)",
        "X-MS-Exchange-CrossTenant-Network-Message-Id": "\n 21990e4b-43a2-4a65-ec97-08d9cba200be",
        "X-MS-Exchange-CrossTenant-Id": "43083d15-7273-40c1-b7db-39efd9ccc17a",
        "X-MS-Exchange-CrossTenant-OriginalAttributedTenantConnectingIp": "\n TenantId=43083d15-7273-40c1-b7db-39efd9ccc17a; Ip=[12.22.5.236];\n Helo=[mail.nvidia.com]",
        "X-MS-Exchange-CrossTenant-AuthSource": "\n CO1NAM11FT053.eop-nam11.prod.protection.outlook.com",
        "X-MS-Exchange-CrossTenant-AuthAs": "Anonymous",
        "X-MS-Exchange-CrossTenant-FromEntityHeader": "HybridOnPrem",
        "X-MS-Exchange-Transport-CrossTenantHeadersStamped": "DM6PR12MB3210",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.29",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n <mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://mails.dpdk.org/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n <mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org"
    },
    "content": "Linux EAL ensured that mapped hugepages are clean\nby always mapping from newly created files:\nexisting hugepage backing files were always removed.\nIn this case, the kernel clears the page to prevent data leaks,\nbecause the mapped memory may contain leftover data\nfrom the previous process that was using this memory.\nClearing takes the bulk of the time spent in mmap(2),\nincreasing EAL initialization time.\n\nIntroduce a mode to keep existing files and reuse them\nin order to speed up initial memory allocation in EAL.\nHugepages mapped from such files may contain data\nleft by the previous process that used this memory,\nso RTE_MEMSEG_FLAG_DIRTY is set for their segments.\nIf multiple hugepages are mapped from the same file:\n1. When fallocate(2) is used, all memory mapped from this file\n   is considered dirty, because it is unknown\n   which parts of the file are holes.\n2. When ftruncate(3) is used, memory mapped from this file\n   is considered dirty unless the file is extended\n   to create a new mapping, which implies clean memory.\n\nSigned-off-by: Dmitry Kozlyuk <dkozlyuk@nvidia.com>\n---\n lib/eal/common/eal_internal_cfg.h |   2 +\n lib/eal/linux/eal_hugepage_info.c |  59 +++++++----\n lib/eal/linux/eal_memalloc.c      | 157 ++++++++++++++++++------------\n 3 files changed, 140 insertions(+), 78 deletions(-)",
    "diff": "diff --git a/lib/eal/common/eal_internal_cfg.h b/lib/eal/common/eal_internal_cfg.h\nindex b5e6942578..3685aa7c52 100644\n--- a/lib/eal/common/eal_internal_cfg.h\n+++ b/lib/eal/common/eal_internal_cfg.h\n@@ -44,6 +44,8 @@ struct simd_bitwidth {\n struct hugepage_file_discipline {\n \t/** Unlink files before mapping them to leave no trace in hugetlbfs. */\n \tbool unlink_before_mapping;\n+\t/** Reuse existing files, never delete or re-create them. */\n+\tbool keep_existing;\n };\n \n /**\ndiff --git a/lib/eal/linux/eal_hugepage_info.c b/lib/eal/linux/eal_hugepage_info.c\nindex 9fb0e968db..55debdedf0 100644\n--- a/lib/eal/linux/eal_hugepage_info.c\n+++ b/lib/eal/linux/eal_hugepage_info.c\n@@ -84,7 +84,7 @@ static int get_hp_sysfs_value(const char *subdir, const char *file, unsigned lon\n /* this function is only called from eal_hugepage_info_init which itself\n  * is only called from a primary process */\n static uint32_t\n-get_num_hugepages(const char *subdir, size_t sz)\n+get_num_hugepages(const char *subdir, size_t sz, unsigned int reusable_pages)\n {\n \tunsigned long resv_pages, num_pages, over_pages, surplus_pages;\n \tconst char *nr_hp_file = \"free_hugepages\";\n@@ -116,7 +116,7 @@ get_num_hugepages(const char *subdir, size_t sz)\n \telse\n \t\tover_pages = 0;\n \n-\tif (num_pages == 0 && over_pages == 0)\n+\tif (num_pages == 0 && over_pages == 0 && reusable_pages)\n \t\tRTE_LOG(WARNING, EAL, \"No available %zu kB hugepages reported\\n\",\n \t\t\t\tsz >> 10);\n \n@@ -124,6 +124,10 @@ get_num_hugepages(const char *subdir, size_t sz)\n \tif (num_pages < over_pages) /* overflow */\n \t\tnum_pages = UINT32_MAX;\n \n+\tnum_pages += reusable_pages;\n+\tif (num_pages < reusable_pages) /* overflow */\n+\t\tnum_pages = UINT32_MAX;\n+\n \t/* we want to return a uint32_t and more than this looks suspicious\n \t * anyway ... */\n \tif (num_pages > UINT32_MAX)\n@@ -298,12 +302,12 @@ get_hugepage_dir(uint64_t hugepage_sz, char *hugedir, int len)\n }\n \n /*\n- * Clear the hugepage directory of whatever hugepage files\n- * there are. Checks if the file is locked (i.e.\n- * if it's in use by another DPDK process).\n+ * Search the hugepage directory for whatever hugepage files there are.\n+ * Check if the file is in use by another DPDK process.\n+ * If not, either remove it, or keep and count the page as reusable.\n  */\n static int\n-clear_hugedir(const char * hugedir)\n+clear_hugedir(const char *hugedir, bool keep, unsigned int *reusable_pages)\n {\n \tDIR *dir;\n \tstruct dirent *dirent;\n@@ -346,8 +350,12 @@ clear_hugedir(const char * hugedir)\n \t\tlck_result = flock(fd, LOCK_EX | LOCK_NB);\n \n \t\t/* if lock succeeds, remove the file */\n-\t\tif (lck_result != -1)\n-\t\t\tunlinkat(dir_fd, dirent->d_name, 0);\n+\t\tif (lck_result != -1) {\n+\t\t\tif (keep)\n+\t\t\t\t(*reusable_pages)++;\n+\t\t\telse\n+\t\t\t\tunlinkat(dir_fd, dirent->d_name, 0);\n+\t\t}\n \t\tclose (fd);\n \t\tdirent = readdir(dir);\n \t}\n@@ -375,7 +383,8 @@ compare_hpi(const void *a, const void *b)\n }\n \n static void\n-calc_num_pages(struct hugepage_info *hpi, struct dirent *dirent)\n+calc_num_pages(struct hugepage_info *hpi, struct dirent *dirent,\n+\t\tunsigned int reusable_pages)\n {\n \tuint64_t total_pages = 0;\n \tunsigned int i;\n@@ -388,8 +397,15 @@ calc_num_pages(struct hugepage_info *hpi, struct dirent *dirent)\n \t * in one socket and sorting them later\n \t */\n \ttotal_pages = 0;\n-\t/* we also don't want to do this for legacy init */\n-\tif (!internal_conf->legacy_mem)\n+\n+\t/*\n+\t * We also don't want to do this for legacy init.\n+\t * When there are hugepage files to reuse it is unknown\n+\t * what NUMA node the pages are on.\n+\t * This could be determined by mapping,\n+\t * but it is precisely what hugepage file reuse is trying to avoid.\n+\t */\n+\tif (!internal_conf->legacy_mem && reusable_pages == 0)\n \t\tfor (i = 0; i < rte_socket_count(); i++) {\n \t\t\tint socket = rte_socket_id_by_idx(i);\n \t\t\tunsigned int num_pages =\n@@ -405,7 +421,7 @@ calc_num_pages(struct hugepage_info *hpi, struct dirent *dirent)\n \t */\n \tif (total_pages == 0) {\n \t\thpi->num_pages[0] = get_num_hugepages(dirent->d_name,\n-\t\t\t\thpi->hugepage_sz);\n+\t\t\t\thpi->hugepage_sz, reusable_pages);\n \n #ifndef RTE_ARCH_64\n \t\t/* for 32-bit systems, limit number of hugepages to\n@@ -421,6 +437,7 @@ hugepage_info_init(void)\n {\tconst char dirent_start_text[] = \"hugepages-\";\n \tconst size_t dirent_start_len = sizeof(dirent_start_text) - 1;\n \tunsigned int i, num_sizes = 0;\n+\tunsigned int reusable_pages;\n \tDIR *dir;\n \tstruct dirent *dirent;\n \tstruct internal_config *internal_conf =\n@@ -454,7 +471,7 @@ hugepage_info_init(void)\n \t\t\tuint32_t num_pages;\n \n \t\t\tnum_pages = get_num_hugepages(dirent->d_name,\n-\t\t\t\t\thpi->hugepage_sz);\n+\t\t\t\t\thpi->hugepage_sz, 0);\n \t\t\tif (num_pages > 0)\n \t\t\t\tRTE_LOG(NOTICE, EAL,\n \t\t\t\t\t\"%\" PRIu32 \" hugepages of size \"\n@@ -473,7 +490,7 @@ hugepage_info_init(void)\n \t\t\t\t\t\"hugepages of size %\" PRIu64 \" bytes \"\n \t\t\t\t\t\"will be allocated anonymously\\n\",\n \t\t\t\t\thpi->hugepage_sz);\n-\t\t\t\tcalc_num_pages(hpi, dirent);\n+\t\t\t\tcalc_num_pages(hpi, dirent, 0);\n \t\t\t\tnum_sizes++;\n \t\t\t}\n #endif\n@@ -489,11 +506,17 @@ hugepage_info_init(void)\n \t\t\t\t\"Failed to lock hugepage directory!\\n\");\n \t\t\tbreak;\n \t\t}\n-\t\t/* clear out the hugepages dir from unused pages */\n-\t\tif (clear_hugedir(hpi->hugedir) == -1)\n-\t\t\tbreak;\n \n-\t\tcalc_num_pages(hpi, dirent);\n+\t\t/*\n+\t\t * Check for existing hugepage files and either remove them\n+\t\t * or count how many of them can be reused.\n+\t\t */\n+\t\treusable_pages = 0;\n+\t\tif (clear_hugedir(hpi->hugedir,\n+\t\t\t\tinternal_conf->hugepage_file.keep_existing,\n+\t\t\t\t&reusable_pages) == -1)\n+\t\t\tbreak;\n+\t\tcalc_num_pages(hpi, dirent, reusable_pages);\n \n \t\tnum_sizes++;\n \t}\ndiff --git a/lib/eal/linux/eal_memalloc.c b/lib/eal/linux/eal_memalloc.c\nindex abbe605e49..cbd7c9cbee 100644\n--- a/lib/eal/linux/eal_memalloc.c\n+++ b/lib/eal/linux/eal_memalloc.c\n@@ -287,12 +287,19 @@ get_seg_memfd(struct hugepage_info *hi __rte_unused,\n \n static int\n get_seg_fd(char *path, int buflen, struct hugepage_info *hi,\n-\t\tunsigned int list_idx, unsigned int seg_idx)\n+\t\tunsigned int list_idx, unsigned int seg_idx,\n+\t\tbool *dirty)\n {\n \tint fd;\n+\tint *out_fd;\n+\tstruct stat st;\n+\tint ret;\n \tconst struct internal_config *internal_conf =\n \t\teal_get_internal_configuration();\n \n+\tif (dirty != NULL)\n+\t\t*dirty = false;\n+\n \t/* for in-memory mode, we only make it here when we're sure we support\n \t * memfd, and this is a special case.\n \t */\n@@ -300,66 +307,68 @@ get_seg_fd(char *path, int buflen, struct hugepage_info *hi,\n \t\treturn get_seg_memfd(hi, list_idx, seg_idx);\n \n \tif (internal_conf->single_file_segments) {\n-\t\t/* create a hugepage file path */\n+\t\tout_fd = &fd_list[list_idx].memseg_list_fd;\n \t\teal_get_hugefile_path(path, buflen, hi->hugedir, list_idx);\n-\n-\t\tfd = fd_list[list_idx].memseg_list_fd;\n-\n-\t\tif (fd < 0) {\n-\t\t\tfd = open(path, O_CREAT | O_RDWR, 0600);\n-\t\t\tif (fd < 0) {\n-\t\t\t\tRTE_LOG(ERR, EAL, \"%s(): open failed: %s\\n\",\n-\t\t\t\t\t__func__, strerror(errno));\n-\t\t\t\treturn -1;\n-\t\t\t}\n-\t\t\t/* take out a read lock and keep it indefinitely */\n-\t\t\tif (lock(fd, LOCK_SH) < 0) {\n-\t\t\t\tRTE_LOG(ERR, EAL, \"%s(): lock failed: %s\\n\",\n-\t\t\t\t\t__func__, strerror(errno));\n-\t\t\t\tclose(fd);\n-\t\t\t\treturn -1;\n-\t\t\t}\n-\t\t\tfd_list[list_idx].memseg_list_fd = fd;\n-\t\t}\n \t} else {\n-\t\t/* create a hugepage file path */\n+\t\tout_fd = &fd_list[list_idx].fds[seg_idx];\n \t\teal_get_hugefile_path(path, buflen, hi->hugedir,\n \t\t\t\tlist_idx * RTE_MAX_MEMSEG_PER_LIST + seg_idx);\n+\t}\n+\tfd = *out_fd;\n+\tif (fd >= 0)\n+\t\treturn fd;\n \n-\t\tfd = fd_list[list_idx].fds[seg_idx];\n-\n-\t\tif (fd < 0) {\n-\t\t\t/* A primary process is the only one creating these\n-\t\t\t * files. If there is a leftover that was not cleaned\n-\t\t\t * by clear_hugedir(), we must *now* make sure to drop\n-\t\t\t * the file or we will remap old stuff while the rest\n-\t\t\t * of the code is built on the assumption that a new\n-\t\t\t * page is clean.\n-\t\t\t */\n-\t\t\tif (rte_eal_process_type() == RTE_PROC_PRIMARY &&\n-\t\t\t\t\tunlink(path) == -1 &&\n-\t\t\t\t\terrno != ENOENT) {\n+\t/*\n+\t * The kernel clears a hugepage only when it is mapped\n+\t * from a particular file for the first time.\n+\t * If the file already exists, mapped will be the old\n+\t * content of the hugepages. If the memory manager\n+\t * assumes all mapped pages to be clean,\n+\t * the file must be removed and created anew.\n+\t * Otherwise the primary caller must be notified\n+\t * that mapped pages will be dirty (secondary callers\n+\t * receive the segment state from the primary one).\n+\t * When multiple hugepages are mapped from the same file,\n+\t * whether they will be dirty depends on the part that is mapped.\n+\t *\n+\t * There is no TOCTOU between stat() and unlink()/open()\n+\t * because the hugepage directory is locked.\n+\t */\n+\tif (!internal_conf->single_file_segments) {\n+\t\tret = stat(path, &st);\n+\t\tif (ret < 0 && errno != ENOENT) {\n+\t\t\tRTE_LOG(DEBUG, EAL, \"%s(): stat() for '%s' failed: %s\\n\",\n+\t\t\t\t__func__, path, strerror(errno));\n+\t\t\treturn -1;\n+\t\t}\n+\t\tif (rte_eal_process_type() == RTE_PROC_PRIMARY && ret == 0) {\n+\t\t\tif (internal_conf->hugepage_file.keep_existing &&\n+\t\t\t\t\tdirty != NULL) {\n+\t\t\t\t*dirty = true;\n+\t\t\t/* coverity[toctou] */\n+\t\t\t} else if (unlink(path) < 0) {\n \t\t\t\tRTE_LOG(DEBUG, EAL, \"%s(): could not remove '%s': %s\\n\",\n \t\t\t\t\t__func__, path, strerror(errno));\n \t\t\t\treturn -1;\n \t\t\t}\n-\n-\t\t\tfd = open(path, O_CREAT | O_RDWR, 0600);\n-\t\t\tif (fd < 0) {\n-\t\t\t\tRTE_LOG(DEBUG, EAL, \"%s(): open failed: %s\\n\",\n-\t\t\t\t\t__func__, strerror(errno));\n-\t\t\t\treturn -1;\n-\t\t\t}\n-\t\t\t/* take out a read lock */\n-\t\t\tif (lock(fd, LOCK_SH) < 0) {\n-\t\t\t\tRTE_LOG(ERR, EAL, \"%s(): lock failed: %s\\n\",\n-\t\t\t\t\t__func__, strerror(errno));\n-\t\t\t\tclose(fd);\n-\t\t\t\treturn -1;\n-\t\t\t}\n-\t\t\tfd_list[list_idx].fds[seg_idx] = fd;\n \t\t}\n \t}\n+\n+\t/* coverity[toctou] */\n+\tfd = open(path, O_CREAT | O_RDWR, 0600);\n+\tif (fd < 0) {\n+\t\tRTE_LOG(DEBUG, EAL, \"%s(): open failed: %s\\n\",\n+\t\t\t__func__, strerror(errno));\n+\t\treturn -1;\n+\t}\n+\t/* take out a read lock */\n+\tif (lock(fd, LOCK_SH) < 0) {\n+\t\tRTE_LOG(ERR, EAL, \"%s(): lock failed: %s\\n\",\n+\t\t\t__func__, strerror(errno));\n+\t\tclose(fd);\n+\t\treturn -1;\n+\t}\n+\t*out_fd = fd;\n \treturn fd;\n }\n \n@@ -385,8 +394,10 @@ resize_hugefile_in_memory(int fd, uint64_t fa_offset,\n \n static int\n resize_hugefile_in_filesystem(int fd, uint64_t fa_offset, uint64_t page_sz,\n-\t\tbool grow)\n+\t\tbool grow, bool *dirty)\n {\n+\tconst struct internal_config *internal_conf =\n+\t\t\teal_get_internal_configuration();\n \tbool again = false;\n \n \tdo {\n@@ -405,6 +416,8 @@ resize_hugefile_in_filesystem(int fd, uint64_t fa_offset, uint64_t page_sz,\n \t\t\tuint64_t cur_size = get_file_size(fd);\n \n \t\t\t/* fallocate isn't supported, fall back to ftruncate */\n+\t\t\tif (dirty != NULL)\n+\t\t\t\t*dirty = new_size <= cur_size;\n \t\t\tif (new_size > cur_size &&\n \t\t\t\t\tftruncate(fd, new_size) < 0) {\n \t\t\t\tRTE_LOG(DEBUG, EAL, \"%s(): ftruncate() failed: %s\\n\",\n@@ -447,8 +460,17 @@ resize_hugefile_in_filesystem(int fd, uint64_t fa_offset, uint64_t page_sz,\n \t\t\t\t\t\tstrerror(errno));\n \t\t\t\t\treturn -1;\n \t\t\t\t}\n-\t\t\t} else\n+\t\t\t} else {\n \t\t\t\tfallocate_supported = 1;\n+\t\t\t\t/*\n+\t\t\t\t * It is unknown which portions of an existing\n+\t\t\t\t * hugepage file were allocated previously,\n+\t\t\t\t * so all pages within the file are considered\n+\t\t\t\t * dirty, unless the file is a fresh one.\n+\t\t\t\t */\n+\t\t\t\tif (dirty != NULL)\n+\t\t\t\t\t*dirty = internal_conf->hugepage_file.keep_existing;\n+\t\t\t}\n \t\t}\n \t} while (again);\n \n@@ -475,7 +497,8 @@ close_hugefile(int fd, char *path, int list_idx)\n }\n \n static int\n-resize_hugefile(int fd, uint64_t fa_offset, uint64_t page_sz, bool grow)\n+resize_hugefile(int fd, uint64_t fa_offset, uint64_t page_sz, bool grow,\n+\t\tbool *dirty)\n {\n \t/* in-memory mode is a special case, because we can be sure that\n \t * fallocate() is supported.\n@@ -483,12 +506,15 @@ resize_hugefile(int fd, uint64_t fa_offset, uint64_t page_sz, bool grow)\n \tconst struct internal_config *internal_conf =\n \t\teal_get_internal_configuration();\n \n-\tif (internal_conf->in_memory)\n+\tif (internal_conf->in_memory) {\n+\t\tif (dirty != NULL)\n+\t\t\t*dirty = false;\n \t\treturn resize_hugefile_in_memory(fd, fa_offset,\n \t\t\t\tpage_sz, grow);\n+\t}\n \n \treturn resize_hugefile_in_filesystem(fd, fa_offset, page_sz,\n-\t\t\t\tgrow);\n+\t\t\tgrow, dirty);\n }\n \n static int\n@@ -505,6 +531,7 @@ alloc_seg(struct rte_memseg *ms, void *addr, int socket_id,\n \tchar path[PATH_MAX];\n \tint ret = 0;\n \tint fd;\n+\tbool dirty;\n \tsize_t alloc_sz;\n \tint flags;\n \tvoid *new_addr;\n@@ -534,6 +561,7 @@ alloc_seg(struct rte_memseg *ms, void *addr, int socket_id,\n \n \t\tpagesz_flag = pagesz_flags(alloc_sz);\n \t\tfd = -1;\n+\t\tdirty = false;\n \t\tmmap_flags = in_memory_flags | pagesz_flag;\n \n \t\t/* single-file segments codepath will never be active\n@@ -544,7 +572,8 @@ alloc_seg(struct rte_memseg *ms, void *addr, int socket_id,\n \t\tmap_offset = 0;\n \t} else {\n \t\t/* takes out a read lock on segment or segment list */\n-\t\tfd = get_seg_fd(path, sizeof(path), hi, list_idx, seg_idx);\n+\t\tfd = get_seg_fd(path, sizeof(path), hi, list_idx, seg_idx,\n+\t\t\t\t&dirty);\n \t\tif (fd < 0) {\n \t\t\tRTE_LOG(ERR, EAL, \"Couldn't get fd on hugepage file\\n\");\n \t\t\treturn -1;\n@@ -552,7 +581,8 @@ alloc_seg(struct rte_memseg *ms, void *addr, int socket_id,\n \n \t\tif (internal_conf->single_file_segments) {\n \t\t\tmap_offset = seg_idx * alloc_sz;\n-\t\t\tret = resize_hugefile(fd, map_offset, alloc_sz, true);\n+\t\t\tret = resize_hugefile(fd, map_offset, alloc_sz, true,\n+\t\t\t\t\t&dirty);\n \t\t\tif (ret < 0)\n \t\t\t\tgoto resized;\n \n@@ -662,6 +692,7 @@ alloc_seg(struct rte_memseg *ms, void *addr, int socket_id,\n \tms->nrank = rte_memory_get_nrank();\n \tms->iova = iova;\n \tms->socket_id = socket_id;\n+\tms->flags = dirty ? RTE_MEMSEG_FLAG_DIRTY : 0;\n \n \treturn 0;\n \n@@ -689,7 +720,7 @@ alloc_seg(struct rte_memseg *ms, void *addr, int socket_id,\n \t\treturn -1;\n \n \tif (internal_conf->single_file_segments) {\n-\t\tresize_hugefile(fd, map_offset, alloc_sz, false);\n+\t\tresize_hugefile(fd, map_offset, alloc_sz, false, NULL);\n \t\t/* ignore failure, can't make it any worse */\n \n \t\t/* if refcount is at zero, close the file */\n@@ -739,13 +770,13 @@ free_seg(struct rte_memseg *ms, struct hugepage_info *hi,\n \t * segment and thus drop the lock on original fd, but hugepage dir is\n \t * now locked so we can take out another one without races.\n \t */\n-\tfd = get_seg_fd(path, sizeof(path), hi, list_idx, seg_idx);\n+\tfd = get_seg_fd(path, sizeof(path), hi, list_idx, seg_idx, NULL);\n \tif (fd < 0)\n \t\treturn -1;\n \n \tif (internal_conf->single_file_segments) {\n \t\tmap_offset = seg_idx * ms->len;\n-\t\tif (resize_hugefile(fd, map_offset, ms->len, false))\n+\t\tif (resize_hugefile(fd, map_offset, ms->len, false, NULL))\n \t\t\treturn -1;\n \n \t\tif (--(fd_list[list_idx].count) == 0)\n@@ -1743,6 +1774,12 @@ eal_memalloc_init(void)\n \t\t\tRTE_LOG(ERR, EAL, \"Using anonymous memory is not supported\\n\");\n \t\t\treturn -1;\n \t\t}\n+\t\t/* safety net, should be impossible to configure */\n+\t\tif (internal_conf->hugepage_file.unlink_before_mapping &&\n+\t\t\t\tinternal_conf->hugepage_file.keep_existing) {\n+\t\t\tRTE_LOG(ERR, EAL, \"Unable both to keep existing hugepage files and to unlink them.\\n\");\n+\t\t\treturn -1;\n+\t\t}\n \t}\n \n \t/* initialize all of the fd lists */\n",
    "prefixes": [
        "RFC",
        "4/6"
    ]
}