[dpdk-dev,15/17] libte_acl: introduce max_size into rte_acl_config.

Message ID 1418580659-12595-16-git-send-email-konstantin.ananyev@intel.com (mailing list archive)
State Superseded, archived
Headers

Commit Message

Ananyev, Konstantin Dec. 14, 2014, 6:10 p.m. UTC
If at build phase we don't make any trie splitting,
then temporary build structures and resulting RT structure might be
much bigger than current.
From other side - having just one trie instead of multiple can speedup
search quite significantly.
From my measurements on rule-sets with ~10K rules:
RT table up to 8 times bigger, classify() up to 80% faster
than current implementation. 
To make it possible for the user to decide about performance/space trade-off -
new parameter for build config structure (max_size) is introduced.
Setting it to the value greater than zero, instructs  rte_acl_build() to:
- make sure that size of RT table wouldn't exceed given value.
- attempt to minimise number of tries in the table.
Setting it to zero maintains current behaviour.
That introduces a minor change in the public API, but I think the possible
performance gain is too big to ignore it.

Signed-off-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
---
 app/test-acl/main.c       |  33 ++++++++----
 examples/l3fwd-acl/main.c |   3 +-
 lib/librte_acl/acl.h      |   2 +-
 lib/librte_acl/acl_bld.c  | 134 +++++++++++++++++++++++++++++-----------------
 lib/librte_acl/acl_gen.c  |  22 +++++---
 lib/librte_acl/rte_acl.c  |   1 +
 lib/librte_acl/rte_acl.h  |   2 +
 7 files changed, 131 insertions(+), 66 deletions(-)
  

Patch

diff --git a/app/test-acl/main.c b/app/test-acl/main.c
index 52f43c6..5e8db04 100644
--- a/app/test-acl/main.c
+++ b/app/test-acl/main.c
@@ -85,6 +85,7 @@ 
 #define	OPT_SEARCH_ALG		"alg"
 #define	OPT_BLD_CATEGORIES	"bldcat"
 #define	OPT_RUN_CATEGORIES	"runcat"
+#define	OPT_MAX_SIZE		"maxsize"
 #define	OPT_ITER_NUM		"iter"
 #define	OPT_VERBOSE		"verbose"
 #define	OPT_IPV6		"ipv6"
@@ -126,6 +127,7 @@  static struct {
 	const char         *prgname;
 	const char         *rule_file;
 	const char         *trace_file;
+	size_t              max_size;
 	uint32_t            bld_categories;
 	uint32_t            run_categories;
 	uint32_t            nb_rules;
@@ -780,6 +782,8 @@  acx_init(void)
 	FILE *f;
 	struct rte_acl_config cfg;
 
+	memset(&cfg, 0, sizeof(cfg));
+
 	/* setup ACL build config. */
 	if (config.ipv6) {
 		cfg.num_fields = RTE_DIM(ipv6_defs);
@@ -789,6 +793,7 @@  acx_init(void)
 		memcpy(&cfg.defs, ipv4_defs, sizeof(ipv4_defs));
 	}
 	cfg.num_categories = config.bld_categories;
+	cfg.max_size = config.max_size;
 
 	/* setup ACL creation parameters. */
 	prm.rule_size = RTE_ACL_RULE_SZ(cfg.num_fields);
@@ -899,8 +904,8 @@  search_ip5tuples(__attribute__((unused)) void *arg)
 	return 0;
 }
 
-static uint32_t
-get_uint32_opt(const char *opt, const char *name, uint32_t min, uint32_t max)
+static unsigned long
+get_ulong_opt(const char *opt, const char *name, size_t min, size_t max)
 {
 	unsigned long val;
 	char *end;
@@ -964,6 +969,9 @@  print_usage(const char *prgname)
 			"=<number of categories to run with> "
 			"should be either 1 or multiple of %zu, "
 			"but not greater then %u]\n"
+		"[--" OPT_MAX_SIZE
+			"=<size limit (in bytes) for runtime ACL strucutures> "
+			"leave 0 for default behaviour]\n"
 		"[--" OPT_ITER_NUM "=<number of iterations to perform>]\n"
 		"[--" OPT_VERBOSE "=<verbose level>]\n"
 		"[--" OPT_SEARCH_ALG "=%s]\n"
@@ -984,6 +992,7 @@  dump_config(FILE *f)
 	fprintf(f, "%s:%u\n", OPT_TRACE_STEP, config.trace_step);
 	fprintf(f, "%s:%u\n", OPT_BLD_CATEGORIES, config.bld_categories);
 	fprintf(f, "%s:%u\n", OPT_RUN_CATEGORIES, config.run_categories);
+	fprintf(f, "%s:%zu\n", OPT_MAX_SIZE, config.max_size);
 	fprintf(f, "%s:%u\n", OPT_ITER_NUM, config.iter_num);
 	fprintf(f, "%s:%u\n", OPT_VERBOSE, config.verbose);
 	fprintf(f, "%s:%u(%s)\n", OPT_SEARCH_ALG, config.alg.alg,
@@ -1010,6 +1019,7 @@  get_input_opts(int argc, char **argv)
 		{OPT_TRACE_FILE, 1, 0, 0},
 		{OPT_TRACE_NUM, 1, 0, 0},
 		{OPT_RULE_NUM, 1, 0, 0},
+		{OPT_MAX_SIZE, 1, 0, 0},
 		{OPT_TRACE_STEP, 1, 0, 0},
 		{OPT_BLD_CATEGORIES, 1, 0, 0},
 		{OPT_RUN_CATEGORIES, 1, 0, 0},
@@ -1034,29 +1044,32 @@  get_input_opts(int argc, char **argv)
 		} else if (strcmp(lgopts[opt_idx].name, OPT_TRACE_FILE) == 0) {
 			config.trace_file = optarg;
 		} else if (strcmp(lgopts[opt_idx].name, OPT_RULE_NUM) == 0) {
-			config.nb_rules = get_uint32_opt(optarg,
+			config.nb_rules = get_ulong_opt(optarg,
 				lgopts[opt_idx].name, 1, RTE_ACL_MAX_INDEX + 1);
+		} else if (strcmp(lgopts[opt_idx].name, OPT_MAX_SIZE) == 0) {
+			config.max_size = get_ulong_opt(optarg,
+				lgopts[opt_idx].name, 0, SIZE_MAX);
 		} else if (strcmp(lgopts[opt_idx].name, OPT_TRACE_NUM) == 0) {
-			config.nb_traces = get_uint32_opt(optarg,
+			config.nb_traces = get_ulong_opt(optarg,
 				lgopts[opt_idx].name, 1, UINT32_MAX);
 		} else if (strcmp(lgopts[opt_idx].name, OPT_TRACE_STEP) == 0) {
-			config.trace_step = get_uint32_opt(optarg,
+			config.trace_step = get_ulong_opt(optarg,
 				lgopts[opt_idx].name, 1, TRACE_STEP_MAX);
 		} else if (strcmp(lgopts[opt_idx].name,
 				OPT_BLD_CATEGORIES) == 0) {
-			config.bld_categories = get_uint32_opt(optarg,
+			config.bld_categories = get_ulong_opt(optarg,
 				lgopts[opt_idx].name, 1,
 				RTE_ACL_MAX_CATEGORIES);
 		} else if (strcmp(lgopts[opt_idx].name,
 				OPT_RUN_CATEGORIES) == 0) {
-			config.run_categories = get_uint32_opt(optarg,
+			config.run_categories = get_ulong_opt(optarg,
 				lgopts[opt_idx].name, 1,
 				RTE_ACL_MAX_CATEGORIES);
 		} else if (strcmp(lgopts[opt_idx].name, OPT_ITER_NUM) == 0) {
-			config.iter_num = get_uint32_opt(optarg,
-				lgopts[opt_idx].name, 1, UINT16_MAX);
+			config.iter_num = get_ulong_opt(optarg,
+				lgopts[opt_idx].name, 1, INT32_MAX);
 		} else if (strcmp(lgopts[opt_idx].name, OPT_VERBOSE) == 0) {
-			config.verbose = get_uint32_opt(optarg,
+			config.verbose = get_ulong_opt(optarg,
 				lgopts[opt_idx].name, DUMP_NONE, DUMP_MAX);
 		} else if (strcmp(lgopts[opt_idx].name,
 				OPT_SEARCH_ALG) == 0) {
diff --git a/examples/l3fwd-acl/main.c b/examples/l3fwd-acl/main.c
index 4487c95..4f0e543 100644
--- a/examples/l3fwd-acl/main.c
+++ b/examples/l3fwd-acl/main.c
@@ -1178,8 +1178,9 @@  setup_acl(struct rte_acl_rule *route_base,
 			rte_exit(EXIT_FAILURE, "add rules failed\n");
 
 	/* Perform builds */
-	acl_build_param.num_categories = DEFAULT_MAX_CATEGORIES;
+	memset(&acl_build_param, 0, sizeof(acl_build_param));
 
+	acl_build_param.num_categories = DEFAULT_MAX_CATEGORIES;
 	acl_build_param.num_fields = dim;
 	memcpy(&acl_build_param.defs, ipv6 ? ipv6_defs : ipv4_defs,
 		ipv6 ? sizeof(ipv6_defs) : sizeof(ipv4_defs));
diff --git a/lib/librte_acl/acl.h b/lib/librte_acl/acl.h
index d33d7ad..61b849a 100644
--- a/lib/librte_acl/acl.h
+++ b/lib/librte_acl/acl.h
@@ -180,7 +180,7 @@  struct rte_acl_ctx {
 
 int rte_acl_gen(struct rte_acl_ctx *ctx, struct rte_acl_trie *trie,
 	struct rte_acl_bld_trie *node_bld_trie, uint32_t num_tries,
-	uint32_t num_categories, uint32_t data_index_sz);
+	uint32_t num_categories, uint32_t data_index_sz, size_t max_size);
 
 typedef int (*rte_acl_classify_t)
 (const struct rte_acl_ctx *, const uint8_t **, uint32_t *, uint32_t, uint32_t);
diff --git a/lib/librte_acl/acl_bld.c b/lib/librte_acl/acl_bld.c
index 1fd59ee..1fe79fb 100644
--- a/lib/librte_acl/acl_bld.c
+++ b/lib/librte_acl/acl_bld.c
@@ -41,10 +41,9 @@ 
 /* number of pointers per alloc */
 #define ACL_PTR_ALLOC	32
 
-/* variable for dividing rule sets */
-#define NODE_MAX	2500
-#define NODE_PERCENTAGE	(0.40)
-#define RULE_PERCENTAGE	(0.40)
+/* macros for dividing rule sets heuristics */
+#define NODE_MAX	0x4000
+#define NODE_MIN	0x800
 
 /* TALLY are statistics per field */
 enum {
@@ -97,6 +96,7 @@  struct acl_build_context {
 	const struct rte_acl_ctx *acx;
 	struct rte_acl_build_rule *build_rules;
 	struct rte_acl_config     cfg;
+	int32_t                   node_max;
 	uint32_t                  node;
 	uint32_t                  num_nodes;
 	uint32_t                  category_mask;
@@ -1447,7 +1447,7 @@  build_trie(struct acl_build_context *context, struct rte_acl_build_rule *head,
 			return NULL;
 
 		node_count = context->num_nodes - node_count;
-		if (node_count > NODE_MAX) {
+		if (node_count > context->node_max) {
 			*last = prev;
 			return trie;
 		}
@@ -1628,6 +1628,9 @@  rule_cmp_wildness(struct rte_acl_build_rule *r1, struct rte_acl_build_rule *r2)
 	return 0;
 }
 
+/*
+ * Sort list of rules based on the rules wildness.
+ */
 static struct rte_acl_build_rule *
 sort_rules(struct rte_acl_build_rule *head)
 {
@@ -1636,21 +1639,22 @@  sort_rules(struct rte_acl_build_rule *head)
 
 	new_head = NULL;
 	while (head != NULL) {
+
+		/* remove element from the head of the old list. */
 		r = head;
 		head = r->next;
 		r->next = NULL;
-		if (new_head == NULL) {
-			new_head = r;
-		} else {
-			for (p = &new_head;
-					(l = *p) != NULL &&
-					rule_cmp_wildness(l, r) >= 0;
-					p = &l->next)
-				;
-
-			r->next = *p;
-			*p = r;
-		}
+
+		/* walk through new sorted list to find a proper place. */
+		for (p = &new_head;
+				(l = *p) != NULL &&
+				rule_cmp_wildness(l, r) >= 0;
+				p = &l->next)
+			;
+
+		/* insert element into the new sorted list. */
+		r->next = *p;
+		*p = r;
 	}
 
 	return new_head;
@@ -1789,9 +1793,11 @@  acl_build_log(const struct acl_build_context *ctx)
 	uint32_t n;
 
 	RTE_LOG(DEBUG, ACL, "Build phase for ACL \"%s\":\n"
+		"node limit for tree split: %u\n"
 		"nodes created: %u\n"
 		"memory consumed: %zu\n",
 		ctx->acx->name,
+		ctx->node_max,
 		ctx->num_nodes,
 		ctx->pool.alloc);
 
@@ -1868,11 +1874,48 @@  acl_set_data_indexes(struct rte_acl_ctx *ctx)
 	}
 }
 
+/*
+ * Internal routine, performs 'build' phase of trie generation:
+ * - setups build context.
+ * - analizes given set of rules.
+ * - builds internal tree(s).
+ */
+static int
+acl_bld(struct acl_build_context *bcx, struct rte_acl_ctx *ctx,
+	const struct rte_acl_config *cfg, uint32_t node_max)
+{
+	int32_t rc;
+
+	/* setup build context. */
+	memset(bcx, 0, sizeof(*bcx));
+	bcx->acx = ctx;
+	bcx->pool.alignment = ACL_POOL_ALIGN;
+	bcx->pool.min_alloc = ACL_POOL_ALLOC_MIN;
+	bcx->cfg = *cfg;
+	bcx->category_mask = LEN2MASK(bcx->cfg.num_categories);
+	bcx->node_max = node_max;
+
+	/* Create a build rules copy. */
+	rc = acl_build_rules(bcx);
+	if (rc != 0)
+		return rc;
+
+	/* No rules to build for that context+config */
+	if (bcx->build_rules == NULL) {
+		rc = -EINVAL;
+	} else {
+		/* build internal trie representation. */
+		rc = acl_build_tries(bcx, bcx->build_rules);
+	}
+	return rc;
+}
 
 int
 rte_acl_build(struct rte_acl_ctx *ctx, const struct rte_acl_config *cfg)
 {
-	int rc;
+	int32_t rc;
+	uint32_t n;
+	size_t max_size;
 	struct acl_build_context bcx;
 
 	if (ctx == NULL || cfg == NULL || cfg->num_categories == 0 ||
@@ -1881,44 +1924,39 @@  rte_acl_build(struct rte_acl_ctx *ctx, const struct rte_acl_config *cfg)
 
 	acl_build_reset(ctx);
 
-	memset(&bcx, 0, sizeof(bcx));
-	bcx.acx = ctx;
-	bcx.pool.alignment = ACL_POOL_ALIGN;
-	bcx.pool.min_alloc = ACL_POOL_ALLOC_MIN;
-	bcx.cfg = *cfg;
-	bcx.category_mask = LEN2MASK(bcx.cfg.num_categories);
-
-
-	/* Create a build rules copy. */
-	rc = acl_build_rules(&bcx);
-	if (rc != 0)
-		return rc;
+	if (cfg->max_size == 0) {
+		n = NODE_MIN;
+		max_size = SIZE_MAX;
+	} else {
+		n = NODE_MAX;
+		max_size = cfg->max_size;
+	}
 
-	/* No rules to build for that context+config */
-	if (bcx.build_rules == NULL) {
-		rc = -EINVAL;
+	for (rc = -ERANGE; n >= NODE_MIN && rc == -ERANGE; n /= 2) {
 
-	/* build internal trie representation. */
-	} else if ((rc = acl_build_tries(&bcx, bcx.build_rules)) == 0) {
+		/* perform build phase. */
+		rc = acl_bld(&bcx, ctx, cfg, n);
 
-		/* allocate and fill run-time  structures. */
-		rc = rte_acl_gen(ctx, bcx.tries, bcx.bld_tries,
+		if (rc == 0) {
+			/* allocate and fill run-time  structures. */
+			rc = rte_acl_gen(ctx, bcx.tries, bcx.bld_tries,
 				bcx.num_tries, bcx.cfg.num_categories,
 				RTE_ACL_MAX_FIELDS * RTE_DIM(bcx.tries) *
-				sizeof(ctx->data_indexes[0]));
-		if (rc == 0) {
+				sizeof(ctx->data_indexes[0]), max_size);
+			if (rc == 0) {
+				/* set data indexes. */
+				acl_set_data_indexes(ctx);
 
-			/* set data indexes. */
-			acl_set_data_indexes(ctx);
-
-			/* copy in build config. */
-			ctx->config = *cfg;
+				/* copy in build config. */
+				ctx->config = *cfg;
+			}
 		}
-	}
 
-	acl_build_log(&bcx);
+		acl_build_log(&bcx);
+
+		/* cleanup after build. */
+		tb_free_pool(&bcx.pool);
+	}
 
-	/* cleanup after build. */
-	tb_free_pool(&bcx.pool);
 	return rc;
 }
diff --git a/lib/librte_acl/acl_gen.c b/lib/librte_acl/acl_gen.c
index d3def66..ea557ab 100644
--- a/lib/librte_acl/acl_gen.c
+++ b/lib/librte_acl/acl_gen.c
@@ -32,7 +32,6 @@ 
  */
 
 #include <rte_acl.h>
-#include "acl_vect.h"
 #include "acl.h"
 
 #define	QRANGE_MIN	((uint8_t)INT8_MIN)
@@ -63,7 +62,8 @@  struct rte_acl_indices {
 static void
 acl_gen_log_stats(const struct rte_acl_ctx *ctx,
 	const struct acl_node_counters *counts,
-	const struct rte_acl_indices *indices)
+	const struct rte_acl_indices *indices,
+	size_t max_size)
 {
 	RTE_LOG(DEBUG, ACL, "Gen phase for ACL \"%s\":\n"
 		"runtime memory footprint on socket %d:\n"
@@ -71,7 +71,8 @@  acl_gen_log_stats(const struct rte_acl_ctx *ctx,
 		"quad nodes/vectors/bytes used: %d/%d/%zu\n"
 		"DFA nodes/group64/bytes used: %d/%d/%zu\n"
 		"match nodes/bytes used: %d/%zu\n"
-		"total: %zu bytes\n",
+		"total: %zu bytes\n"
+		"max limit: %zu bytes\n",
 		ctx->name, ctx->socket_id,
 		counts->single, counts->single * sizeof(uint64_t),
 		counts->quad, counts->quad_vectors,
@@ -80,7 +81,8 @@  acl_gen_log_stats(const struct rte_acl_ctx *ctx,
 		indices->dfa_index * sizeof(uint64_t),
 		counts->match,
 		counts->match * sizeof(struct rte_acl_match_results),
-		ctx->mem_sz);
+		ctx->mem_sz,
+		max_size);
 }
 
 static uint64_t
@@ -474,7 +476,7 @@  acl_calc_counts_indices(struct acl_node_counters *counts,
 int
 rte_acl_gen(struct rte_acl_ctx *ctx, struct rte_acl_trie *trie,
 	struct rte_acl_bld_trie *node_bld_trie, uint32_t num_tries,
-	uint32_t num_categories, uint32_t data_index_sz)
+	uint32_t num_categories, uint32_t data_index_sz, size_t max_size)
 {
 	void *mem;
 	size_t total_size;
@@ -496,6 +498,14 @@  rte_acl_gen(struct rte_acl_ctx *ctx, struct rte_acl_trie *trie,
 		(counts.match + 1) * sizeof(struct rte_acl_match_results) +
 		XMM_SIZE;
 
+	if (total_size > max_size) {
+		RTE_LOG(DEBUG, ACL,
+			"Gen phase for ACL ctx \"%s\" exceeds max_size limit, "
+			"bytes required: %zu, allowed: %zu\n",
+			ctx->name, total_size, max_size);
+		return -ERANGE;
+	}
+
 	mem = rte_zmalloc_socket(ctx->name, total_size, RTE_CACHE_LINE_SIZE,
 			ctx->socket_id);
 	if (mem == NULL) {
@@ -546,6 +556,6 @@  rte_acl_gen(struct rte_acl_ctx *ctx, struct rte_acl_trie *trie,
 	ctx->trans_table = node_array;
 	memcpy(ctx->trie, trie, sizeof(ctx->trie));
 
-	acl_gen_log_stats(ctx, &counts, &indices);
+	acl_gen_log_stats(ctx, &counts, &indices, max_size);
 	return 0;
 }
diff --git a/lib/librte_acl/rte_acl.c b/lib/librte_acl/rte_acl.c
index 2fa51cb..1667178 100644
--- a/lib/librte_acl/rte_acl.c
+++ b/lib/librte_acl/rte_acl.c
@@ -519,6 +519,7 @@  rte_acl_ipv4vlan_build(struct rte_acl_ctx *ctx,
 	if (ctx == NULL || layout == NULL)
 		return -EINVAL;
 
+	memset(&cfg, 0, sizeof(cfg));
 	acl_ipv4vlan_config(&cfg, layout, num_categories);
 	return rte_acl_build(ctx, &cfg);
 }
diff --git a/lib/librte_acl/rte_acl.h b/lib/librte_acl/rte_acl.h
index 652a234..30aea03 100644
--- a/lib/librte_acl/rte_acl.h
+++ b/lib/librte_acl/rte_acl.h
@@ -94,6 +94,8 @@  struct rte_acl_config {
 	uint32_t num_fields;     /**< Number of field definitions. */
 	struct rte_acl_field_def defs[RTE_ACL_MAX_FIELDS];
 	/**< array of field definitions. */
+	size_t max_size;
+	/**< max memory limit for internal run-time structures. */
 };
 
 /**