[v2,8/9] eal: add lcore iterators

Message ID 20200619162244.8239-9-david.marchand@redhat.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series Register non-EAL threads as lcore |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

David Marchand June 19, 2020, 4:22 p.m. UTC
  Add a helper to iterate all lcores.
The iterator callback is read-only wrt the lcores list.

Implement a dump function on top of this for debugging.

Signed-off-by: David Marchand <david.marchand@redhat.com>
---
Changes since v1:
- introduced lcore iterators and implemented rte_lcore_dump,
  this iterator mechanism can then be used outside of EAL,

---
 lib/librte_eal/common/eal_common_lcore.c  | 77 ++++++++++++++++++++---
 lib/librte_eal/common/eal_common_thread.c | 16 +++--
 lib/librte_eal/common/eal_thread.h        | 13 +++-
 lib/librte_eal/freebsd/eal.c              |  2 +-
 lib/librte_eal/freebsd/eal_thread.c       |  2 +-
 lib/librte_eal/include/rte_lcore.h        | 47 +++++++++++++-
 lib/librte_eal/linux/eal.c                |  2 +-
 lib/librte_eal/linux/eal_thread.c         |  2 +-
 lib/librte_eal/rte_eal_version.map        |  2 +
 9 files changed, 140 insertions(+), 23 deletions(-)
  

Comments

Stephen Hemminger June 20, 2020, 2:21 a.m. UTC | #1
On Fri, 19 Jun 2020 18:22:43 +0200
David Marchand <david.marchand@redhat.com> wrote:

> +	rte_rwlock_read_lock(&lcore_lock);

I see you converted a spin lock to a reader lock.
Are you sure this is a good idea, although conceptually faster,
the implementation on most cpu's is slower than a simple spin lock.

https://www.kernel.org/doc/htmldocs/kernel-locking/Efficiency.html

	If your code divides neatly along reader/writer lines (as our
	cache code does), and the lock is held by readers for significant
	lengths of time, using these locks can help. They are slightly slower
	than the normal locks though, so in practice rwlock_t is not usually
	worthwhile.
  

Patch

diff --git a/lib/librte_eal/common/eal_common_lcore.c b/lib/librte_eal/common/eal_common_lcore.c
index 5140026b6c..2524356a88 100644
--- a/lib/librte_eal/common/eal_common_lcore.c
+++ b/lib/librte_eal/common/eal_common_lcore.c
@@ -12,7 +12,7 @@ 
 #include <rte_errno.h>
 #include <rte_lcore.h>
 #include <rte_log.h>
-#include <rte_spinlock.h>
+#include <rte_rwlock.h>
 
 #include "eal_private.h"
 #include "eal_thread.h"
@@ -222,7 +222,7 @@  rte_socket_id_by_idx(unsigned int idx)
 	return config->numa_nodes[idx];
 }
 
-static rte_spinlock_t lcore_lock = RTE_SPINLOCK_INITIALIZER;
+static rte_rwlock_t lcore_lock = RTE_RWLOCK_INITIALIZER;
 struct lcore_callback {
 	TAILQ_ENTRY(lcore_callback) next;
 	char *name;
@@ -271,7 +271,7 @@  rte_lcore_callback_register(const char *name, rte_lcore_init_cb init,
 	callback->init = init;
 	callback->uninit = uninit;
 	callback->arg = arg;
-	rte_spinlock_lock(&lcore_lock);
+	rte_rwlock_write_lock(&lcore_lock);
 	if (callback->init == NULL)
 		goto no_init;
 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
@@ -297,7 +297,7 @@  rte_lcore_callback_register(const char *name, rte_lcore_init_cb init,
 		callback->name, callback->init == NULL ? "NO " : "",
 		callback->uninit == NULL ? "NO " : "");
 out:
-	rte_spinlock_unlock(&lcore_lock);
+	rte_rwlock_write_unlock(&lcore_lock);
 	return callback;
 }
 
@@ -308,7 +308,7 @@  rte_lcore_callback_unregister(void *handle)
 	struct lcore_callback *callback = handle;
 	unsigned int lcore_id;
 
-	rte_spinlock_lock(&lcore_lock);
+	rte_rwlock_write_lock(&lcore_lock);
 	if (callback->uninit == NULL)
 		goto no_uninit;
 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
@@ -318,7 +318,7 @@  rte_lcore_callback_unregister(void *handle)
 	}
 no_uninit:
 	TAILQ_REMOVE(&lcore_callbacks, callback, next);
-	rte_spinlock_unlock(&lcore_lock);
+	rte_rwlock_write_unlock(&lcore_lock);
 	RTE_LOG(DEBUG, EAL, "Unregistered lcore callback %s-%p.\n",
 		callback->name, callback->arg);
 	free(callback->name);
@@ -333,7 +333,7 @@  eal_lcore_non_eal_allocate(void)
 	struct lcore_callback *prev;
 	unsigned int lcore_id;
 
-	rte_spinlock_lock(&lcore_lock);
+	rte_rwlock_write_lock(&lcore_lock);
 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
 		if (cfg->lcore_role[lcore_id] != ROLE_OFF)
 			continue;
@@ -365,7 +365,7 @@  eal_lcore_non_eal_allocate(void)
 		goto out;
 	}
 out:
-	rte_spinlock_unlock(&lcore_lock);
+	rte_rwlock_write_unlock(&lcore_lock);
 	return lcore_id;
 }
 
@@ -375,7 +375,7 @@  eal_lcore_non_eal_release(unsigned int lcore_id)
 	struct rte_config *cfg = rte_eal_get_configuration();
 	struct lcore_callback *callback;
 
-	rte_spinlock_lock(&lcore_lock);
+	rte_rwlock_write_lock(&lcore_lock);
 	if (cfg->lcore_role[lcore_id] != ROLE_NON_EAL)
 		goto out;
 	TAILQ_FOREACH(callback, &lcore_callbacks, next)
@@ -383,5 +383,62 @@  eal_lcore_non_eal_release(unsigned int lcore_id)
 	cfg->lcore_role[lcore_id] = ROLE_OFF;
 	cfg->lcore_count--;
 out:
-	rte_spinlock_unlock(&lcore_lock);
+	rte_rwlock_write_unlock(&lcore_lock);
+}
+
+int
+rte_lcore_iterate(rte_lcore_iterate_cb cb, void *arg)
+{
+	struct rte_config *cfg = rte_eal_get_configuration();
+	unsigned int lcore_id;
+	int ret = 0;
+
+	rte_rwlock_read_lock(&lcore_lock);
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+		if (cfg->lcore_role[lcore_id] == ROLE_OFF)
+			continue;
+		ret = cb(lcore_id, arg);
+		if (ret != 0)
+			break;
+	}
+	rte_rwlock_read_unlock(&lcore_lock);
+	return ret;
+}
+
+static int
+lcore_dump_cb(unsigned int lcore_id, void *arg)
+{
+	struct rte_config *cfg = rte_eal_get_configuration();
+	char cpuset[RTE_CPU_AFFINITY_STR_LEN];
+	const char *role;
+	FILE *f = arg;
+	int ret;
+
+	switch (cfg->lcore_role[lcore_id]) {
+	case ROLE_RTE:
+		role = "RTE";
+		break;
+	case ROLE_SERVICE:
+		role = "SERVICE";
+		break;
+	case ROLE_NON_EAL:
+		role = "NON_EAL";
+		break;
+	default:
+		role = "UNKNOWN";
+		break;
+	}
+
+	ret = eal_thread_dump_affinity(&lcore_config[lcore_id].cpuset, cpuset,
+		sizeof(cpuset));
+	fprintf(f, "lcore %u, socket %u, role %s, cpuset %s%s\n", lcore_id,
+		rte_lcore_to_socket_id(lcore_id), role, cpuset,
+		ret == 0 ? "" : "...");
+	return 0;
+}
+
+void
+rte_lcore_dump(FILE *f)
+{
+	rte_lcore_iterate(lcore_dump_cb, f);
 }
diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
index 338194832b..5dc0b12f42 100644
--- a/lib/librte_eal/common/eal_common_thread.c
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -104,17 +104,14 @@  rte_thread_get_affinity(rte_cpuset_t *cpusetp)
 }
 
 int
-eal_thread_dump_affinity(char *str, unsigned size)
+eal_thread_dump_affinity(rte_cpuset_t *cpuset, char *str, unsigned int size)
 {
-	rte_cpuset_t cpuset;
 	unsigned cpu;
 	int ret;
 	unsigned int out = 0;
 
-	rte_thread_get_affinity(&cpuset);
-
 	for (cpu = 0; cpu < CPU_SETSIZE; cpu++) {
-		if (!CPU_ISSET(cpu, &cpuset))
+		if (!CPU_ISSET(cpu, cpuset))
 			continue;
 
 		ret = snprintf(str + out,
@@ -137,6 +134,15 @@  eal_thread_dump_affinity(char *str, unsigned size)
 	return ret;
 }
 
+int
+eal_thread_dump_current_affinity(char *str, unsigned int size)
+{
+	rte_cpuset_t cpuset;
+
+	rte_thread_get_affinity(&cpuset);
+	return eal_thread_dump_affinity(&cpuset, str, size);
+}
+
 void
 rte_thread_init(unsigned int lcore_id, rte_cpuset_t *cpuset)
 {
diff --git a/lib/librte_eal/common/eal_thread.h b/lib/librte_eal/common/eal_thread.h
index 4ecd8fd53a..13ec252e01 100644
--- a/lib/librte_eal/common/eal_thread.h
+++ b/lib/librte_eal/common/eal_thread.h
@@ -47,13 +47,15 @@  unsigned eal_cpu_socket_id(unsigned cpu_id);
 #define RTE_CPU_AFFINITY_STR_LEN            256
 
 /**
- * Dump the current pthread cpuset.
+ * Dump the cpuset as a human readable string.
  * This function is private to EAL.
  *
  * Note:
  *   If the dump size is greater than the size of given buffer,
  *   the string will be truncated and with '\0' at the end.
  *
+ * @param cpuset
+ *   The CPU affinity object to dump.
  * @param str
  *   The string buffer the cpuset will dump to.
  * @param size
@@ -62,6 +64,13 @@  unsigned eal_cpu_socket_id(unsigned cpu_id);
  *   0 for success, -1 if truncation happens.
  */
 int
-eal_thread_dump_affinity(char *str, unsigned size);
+eal_thread_dump_affinity(rte_cpuset_t *cpuset, char *str, unsigned int size);
+
+/**
+ * Dump the current thread cpuset.
+ * This is a wrapper on eal_thread_dump_affinity().
+ */
+int
+eal_thread_dump_current_affinity(char *str, unsigned int size);
 
 #endif /* EAL_THREAD_H */
diff --git a/lib/librte_eal/freebsd/eal.c b/lib/librte_eal/freebsd/eal.c
index b5ea11df16..69a6f7d8c4 100644
--- a/lib/librte_eal/freebsd/eal.c
+++ b/lib/librte_eal/freebsd/eal.c
@@ -886,7 +886,7 @@  rte_eal_init(int argc, char **argv)
 	rte_thread_init(rte_config.master_lcore,
 		&lcore_config[rte_config.master_lcore].cpuset);
 
-	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
+	ret = eal_thread_dump_current_affinity(cpuset, sizeof(cpuset));
 
 	RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%p;cpuset=[%s%s])\n",
 		rte_config.master_lcore, thread_id, cpuset,
diff --git a/lib/librte_eal/freebsd/eal_thread.c b/lib/librte_eal/freebsd/eal_thread.c
index c1fb8eb2d8..b1a3619f51 100644
--- a/lib/librte_eal/freebsd/eal_thread.c
+++ b/lib/librte_eal/freebsd/eal_thread.c
@@ -92,7 +92,7 @@  eal_thread_loop(__rte_unused void *arg)
 
 	rte_thread_init(lcore_id, &lcore_config[lcore_id].cpuset);
 
-	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
+	ret = eal_thread_dump_current_affinity(cpuset, sizeof(cpuset));
 	RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%p;cpuset=[%s%s])\n",
 		lcore_id, thread_id, cpuset, ret == 0 ? "" : "...");
 
diff --git a/lib/librte_eal/include/rte_lcore.h b/lib/librte_eal/include/rte_lcore.h
index 27b29a1f87..4dee7cbcd7 100644
--- a/lib/librte_eal/include/rte_lcore.h
+++ b/lib/librte_eal/include/rte_lcore.h
@@ -261,8 +261,8 @@  typedef void (*rte_lcore_uninit_cb)(unsigned int lcore_id, void *arg);
  * If this step succeeds, the callbacks are put in the lcore callbacks list
  * that will get called for each lcore allocation/release.
  *
- * Note: callbacks execution is serialised under a lock protecting the lcores
- * and callbacks list.
+ * Note: callbacks execution is serialised under a write lock protecting the
+ * lcores and callbacks list.
  *
  * @param name
  *   A name serving as a small description for this callback.
@@ -297,6 +297,49 @@  __rte_experimental
 void
 rte_lcore_callback_unregister(void *handle);
 
+/**
+ * Callback prototype for iterating over lcores.
+ *
+ * @param lcore_id
+ *   The lcore to consider.
+ * @param arg
+ *   An opaque pointer coming from the caller.
+ * @return
+ *   - 0 lets the iteration continue.
+ *   - !0 makes the iteration stop.
+ */
+typedef int (*rte_lcore_iterate_cb)(unsigned int lcore_id, void *arg);
+
+/**
+ * Iterate on all active lcores (ROLE_RTE, ROLE_SERVICE and ROLE_NON_EAL).
+ * No modification on the lcore states is allowed in the callback.
+ *
+ * Note: as opposed to init/uninit callbacks, iteration callbacks can be
+ * invoked in parallel as they are run under a read lock protecting the lcores
+ * and callbacks list.
+ *
+ * @param cb
+ *   The callback that gets passed each lcore.
+ * @param arg
+ *   An opaque pointer passed to cb.
+ * @return
+ *   Same return code as the callback last invocation (see rte_lcore_iterate_cb
+ *   description).
+ */
+__rte_experimental
+int
+rte_lcore_iterate(rte_lcore_iterate_cb cb, void *arg);
+
+/**
+ * List all lcores.
+ *
+ * @param f
+ *   The output stream where the dump should be sent.
+ */
+__rte_experimental
+void
+rte_lcore_dump(FILE *f);
+
 /**
  * Set core affinity of the current thread.
  * Support both EAL and non-EAL thread and update TLS.
diff --git a/lib/librte_eal/linux/eal.c b/lib/librte_eal/linux/eal.c
index 8638376b8a..2f0efd7cd3 100644
--- a/lib/librte_eal/linux/eal.c
+++ b/lib/librte_eal/linux/eal.c
@@ -1214,7 +1214,7 @@  rte_eal_init(int argc, char **argv)
 	rte_thread_init(rte_config.master_lcore,
 		&lcore_config[rte_config.master_lcore].cpuset);
 
-	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
+	ret = eal_thread_dump_current_affinity(cpuset, sizeof(cpuset));
 	RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%zx;cpuset=[%s%s])\n",
 		rte_config.master_lcore, (uintptr_t)thread_id, cpuset,
 		ret == 0 ? "" : "...");
diff --git a/lib/librte_eal/linux/eal_thread.c b/lib/librte_eal/linux/eal_thread.c
index 07aec0c44d..22d9bc8c01 100644
--- a/lib/librte_eal/linux/eal_thread.c
+++ b/lib/librte_eal/linux/eal_thread.c
@@ -92,7 +92,7 @@  eal_thread_loop(__rte_unused void *arg)
 
 	rte_thread_init(lcore_id, &lcore_config[lcore_id].cpuset);
 
-	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
+	ret = eal_thread_dump_current_affinity(cpuset, sizeof(cpuset));
 	RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%zx;cpuset=[%s%s])\n",
 		lcore_id, (uintptr_t)thread_id, cpuset, ret == 0 ? "" : "...");
 
diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
index c3e762c1d9..3aeb5b11ab 100644
--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -397,6 +397,8 @@  EXPERIMENTAL {
 	# added in 20.08
 	rte_lcore_callback_register;
 	rte_lcore_callback_unregister;
+	rte_lcore_dump;
+	rte_lcore_iterate;
 	rte_thread_register;
 	rte_thread_unregister;
 };