[5/5] app/testpmd: add error recovery usage demo

Message ID 20230301030610.49468-6-fengchengwen@huawei.com (mailing list archive)
State Changes Requested, archived
Delegated to: Ferruh Yigit
Headers
Series fix race-condition of proactive error handling mode |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK
ci/intel-Testing success Testing PASS
ci/github-robot: build success github build: passed
ci/intel-Functional success Functional PASS
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-aarch64-unit-testing success Testing PASS
ci/iol-broadcom-Performance success Performance Testing PASS
ci/iol-broadcom-Functional success Functional Testing PASS
ci/loongarch-compilation success Compilation OK
ci/loongarch-unit-testing success Unit Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-aarch64-compile-testing success Testing PASS
ci/iol-x86_64-compile-testing success Testing PASS
ci/iol-testing success Testing PASS
ci/iol-x86_64-unit-testing fail Testing issues

Commit Message

fengchengwen March 1, 2023, 3:06 a.m. UTC
  This patch adds error recovery usage demo which will:
1. stop packet forwarding when the RTE_ETH_EVENT_ERR_RECOVERING event
   is received.
2. restart packet forwarding when the RTE_ETH_EVENT_RECOVERY_SUCCESS
   event is received.
3. prompt the ports that fail to recovery and need to be removed when
   the RTE_ETH_EVENT_RECOVERY_FAILED event is received.

In addition, a message is added to the printed information, requiring
no command to be executed during the error recovery.

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
---
 app/test-pmd/testpmd.c | 80 ++++++++++++++++++++++++++++++++++++++++++
 app/test-pmd/testpmd.h |  4 ++-
 2 files changed, 83 insertions(+), 1 deletion(-)
  

Comments

Konstantin Ananyev March 2, 2023, 1:01 p.m. UTC | #1
> 
> This patch adds error recovery usage demo which will:
> 1. stop packet forwarding when the RTE_ETH_EVENT_ERR_RECOVERING event
>    is received.
> 2. restart packet forwarding when the RTE_ETH_EVENT_RECOVERY_SUCCESS
>    event is received.
> 3. prompt the ports that fail to recovery and need to be removed when
>    the RTE_ETH_EVENT_RECOVERY_FAILED event is received.
> 
> In addition, a message is added to the printed information, requiring
> no command to be executed during the error recovery.
> 
> Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
> ---
>  app/test-pmd/testpmd.c | 80 ++++++++++++++++++++++++++++++++++++++++++
>  app/test-pmd/testpmd.h |  4 ++-
>  2 files changed, 83 insertions(+), 1 deletion(-)
> 
> diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
> index 0c14325b8d..fdc3ae604b 100644
> --- a/app/test-pmd/testpmd.c
> +++ b/app/test-pmd/testpmd.c
> @@ -3823,6 +3823,77 @@ rmv_port_callback(void *arg)
>  		start_packet_forwarding(0);
>  }
> 
> +static int need_start_when_recovery_over;
> +
> +static bool
> +has_port_in_err_recovering(void)
> +{
> +	struct rte_port *port;
> +	portid_t pid;
> +
> +	RTE_ETH_FOREACH_DEV(pid) {
> +		port = &ports[pid];
> +		if (port->err_recovering)
> +			return true;
> +	}
> +
> +	return false;
> +}
> +
> +static void
> +err_recovering_callback(portid_t port_id)
> +{
> +	if (!has_port_in_err_recovering())
> +		printf("Please stop executing any commands until recovery result events are received!\n");
> +
> +	ports[port_id].err_recovering = 1;
> +	ports[port_id].recover_failed = 0;
> +
> +	/* To simplify implementation, stop forwarding regardless of whether the port is used. */
> +	if (!test_done) {
> +		printf("Stop packet forwarding because some ports are in error recovering!\n");
> +		stop_packet_forwarding();
> +		need_start_when_recovery_over = 1;
> +	}
> +}

One thought I have - should we somehow stop user to attempt restart RX/TX while recovery
in progress?
But probably it is an overkill, and just documenting what is happening is enough....
Do we need to update testpmd UG with some short description?
Apart from that, LGTM:
Acked-by: Konstantin Ananyev <konstantin.ananyev@huawei.com>

> +
> +static void
> +recover_success_callback(portid_t port_id)
> +{
> +	ports[port_id].err_recovering = 0;
> +	if (has_port_in_err_recovering())
> +		return;
> +
> +	if (need_start_when_recovery_over) {
> +		printf("Recovery success! Restart packet forwarding!\n");
> +		start_packet_forwarding(0);
> +		need_start_when_recovery_over = 0;
> +	} else {
> +		printf("Recovery success!\n");
> +	}
> +}
> +
> +static void
> +recover_failed_callback(portid_t port_id)
> +{
> +	struct rte_port *port;
> +	portid_t pid;
> +
> +	ports[port_id].err_recovering = 0;
> +	ports[port_id].recover_failed = 1;
> +	if (has_port_in_err_recovering())
> +		return;
> +
> +	need_start_when_recovery_over = 0;
> +	printf("The ports:");
> +	RTE_ETH_FOREACH_DEV(pid) {
> +		port = &ports[pid];
> +		if (port->recover_failed)
> +			printf(" %u", pid);
> +	}
> +	printf(" recovery failed! Please remove them!\n");
> +}
> +
>  /* This function is used by the interrupt thread */
>  static int
>  eth_event_callback(portid_t port_id, enum rte_eth_event_type type, void *param,
> @@ -3878,6 +3949,15 @@ eth_event_callback(portid_t port_id, enum rte_eth_event_type type, void *param,
>  		}
>  		break;
>  	}
> +	case RTE_ETH_EVENT_ERR_RECOVERING:
> +		err_recovering_callback(port_id);
> +		break;
> +	case RTE_ETH_EVENT_RECOVERY_SUCCESS:
> +		recover_success_callback(port_id);
> +		break;
> +	case RTE_ETH_EVENT_RECOVERY_FAILED:
> +		recover_failed_callback(port_id);
> +		break;
>  	default:
>  		break;
>  	}
> diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
> index 329a6378a1..1bbf82a96c 100644
> --- a/app/test-pmd/testpmd.h
> +++ b/app/test-pmd/testpmd.h
> @@ -323,7 +323,9 @@ struct rte_port {
>  	uint8_t                 slave_flag : 1, /**< bonding slave port */
>  				bond_flag : 1, /**< port is bond device */
>  				fwd_mac_swap : 1, /**< swap packet MAC before forward */
> -				update_conf : 1; /**< need to update bonding device configuration */
> +				update_conf : 1, /**< need to update bonding device configuration */
> +				err_recovering : 1, /**< port is in error recovering */
> +				recover_failed : 1; /**< port recover failed */
>  	struct port_template    *pattern_templ_list; /**< Pattern templates. */
>  	struct port_template    *actions_templ_list; /**< Actions templates. */
>  	struct port_table       *table_list; /**< Flow tables. */
> --
> 2.17.1
  
fengchengwen March 3, 2023, 1:49 a.m. UTC | #2
On 2023/3/2 21:01, Konstantin Ananyev wrote:
> 
> 
>>
>> This patch adds error recovery usage demo which will:
>> 1. stop packet forwarding when the RTE_ETH_EVENT_ERR_RECOVERING event
>>    is received.
>> 2. restart packet forwarding when the RTE_ETH_EVENT_RECOVERY_SUCCESS
>>    event is received.
>> 3. prompt the ports that fail to recovery and need to be removed when
>>    the RTE_ETH_EVENT_RECOVERY_FAILED event is received.
>>
>> In addition, a message is added to the printed information, requiring
>> no command to be executed during the error recovery.
>>
>> Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
>> ---
>>  app/test-pmd/testpmd.c | 80 ++++++++++++++++++++++++++++++++++++++++++
>>  app/test-pmd/testpmd.h |  4 ++-
>>  2 files changed, 83 insertions(+), 1 deletion(-)
>>
>> diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
>> index 0c14325b8d..fdc3ae604b 100644
>> --- a/app/test-pmd/testpmd.c
>> +++ b/app/test-pmd/testpmd.c
>> @@ -3823,6 +3823,77 @@ rmv_port_callback(void *arg)
>>  		start_packet_forwarding(0);
>>  }
>>
>> +static int need_start_when_recovery_over;
>> +
>> +static bool
>> +has_port_in_err_recovering(void)
>> +{
>> +	struct rte_port *port;
>> +	portid_t pid;
>> +
>> +	RTE_ETH_FOREACH_DEV(pid) {
>> +		port = &ports[pid];
>> +		if (port->err_recovering)
>> +			return true;
>> +	}
>> +
>> +	return false;
>> +}
>> +
>> +static void
>> +err_recovering_callback(portid_t port_id)
>> +{
>> +	if (!has_port_in_err_recovering())
>> +		printf("Please stop executing any commands until recovery result events are received!\n");
>> +
>> +	ports[port_id].err_recovering = 1;
>> +	ports[port_id].recover_failed = 0;
>> +
>> +	/* To simplify implementation, stop forwarding regardless of whether the port is used. */
>> +	if (!test_done) {
>> +		printf("Stop packet forwarding because some ports are in error recovering!\n");
>> +		stop_packet_forwarding();
>> +		need_start_when_recovery_over = 1;
>> +	}
>> +}
> 
> One thought I have - should we somehow stop user to attempt restart RX/TX while recovery
> in progress?
> But probably it is an overkill, and just documenting what is happening is enough....

Yes, the testpmd is already complicated.
In addition, considering that only a few PMDs support and are not commonly invoking.
So I thinking show above such promote is enough.

> Do we need to update testpmd UG with some short description?

It's better to update UG, but it wasn't triggered by command, I don't know which chapter to put it in.

@Ferruh could you provide some advise ?

> Apart from that, LGTM:
> Acked-by: Konstantin Ananyev <konstantin.ananyev@huawei.com>
> 
>> +
>> +static void
>> +recover_success_callback(portid_t port_id)
>> +{
>> +	ports[port_id].err_recovering = 0;
>> +	if (has_port_in_err_recovering())
>> +		return;
>> +
>> +	if (need_start_when_recovery_over) {
>> +		printf("Recovery success! Restart packet forwarding!\n");
>> +		start_packet_forwarding(0);
>> +		need_start_when_recovery_over = 0;
>> +	} else {
>> +		printf("Recovery success!\n");
>> +	}
>> +}
>> +
>> +static void
>> +recover_failed_callback(portid_t port_id)
>> +{
>> +	struct rte_port *port;
>> +	portid_t pid;
>> +
>> +	ports[port_id].err_recovering = 0;
>> +	ports[port_id].recover_failed = 1;
>> +	if (has_port_in_err_recovering())
>> +		return;
>> +
>> +	need_start_when_recovery_over = 0;
>> +	printf("The ports:");
>> +	RTE_ETH_FOREACH_DEV(pid) {
>> +		port = &ports[pid];
>> +		if (port->recover_failed)
>> +			printf(" %u", pid);
>> +	}
>> +	printf(" recovery failed! Please remove them!\n");
>> +}
>> +
>>  /* This function is used by the interrupt thread */
>>  static int
>>  eth_event_callback(portid_t port_id, enum rte_eth_event_type type, void *param,
>> @@ -3878,6 +3949,15 @@ eth_event_callback(portid_t port_id, enum rte_eth_event_type type, void *param,
>>  		}
>>  		break;
>>  	}
>> +	case RTE_ETH_EVENT_ERR_RECOVERING:
>> +		err_recovering_callback(port_id);
>> +		break;
>> +	case RTE_ETH_EVENT_RECOVERY_SUCCESS:
>> +		recover_success_callback(port_id);
>> +		break;
>> +	case RTE_ETH_EVENT_RECOVERY_FAILED:
>> +		recover_failed_callback(port_id);
>> +		break;
>>  	default:
>>  		break;
>>  	}
>> diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
>> index 329a6378a1..1bbf82a96c 100644
>> --- a/app/test-pmd/testpmd.h
>> +++ b/app/test-pmd/testpmd.h
>> @@ -323,7 +323,9 @@ struct rte_port {
>>  	uint8_t                 slave_flag : 1, /**< bonding slave port */
>>  				bond_flag : 1, /**< port is bond device */
>>  				fwd_mac_swap : 1, /**< swap packet MAC before forward */
>> -				update_conf : 1; /**< need to update bonding device configuration */
>> +				update_conf : 1, /**< need to update bonding device configuration */
>> +				err_recovering : 1, /**< port is in error recovering */
>> +				recover_failed : 1; /**< port recover failed */
>>  	struct port_template    *pattern_templ_list; /**< Pattern templates. */
>>  	struct port_template    *actions_templ_list; /**< Actions templates. */
>>  	struct port_table       *table_list; /**< Flow tables. */
>> --
>> 2.17.1
> 
> .
>
  
Ferruh Yigit March 3, 2023, 4:59 p.m. UTC | #3
On 3/3/2023 1:49 AM, fengchengwen wrote:
> On 2023/3/2 21:01, Konstantin Ananyev wrote:
>>
>>
>>>
>>> This patch adds error recovery usage demo which will:
>>> 1. stop packet forwarding when the RTE_ETH_EVENT_ERR_RECOVERING event
>>>    is received.
>>> 2. restart packet forwarding when the RTE_ETH_EVENT_RECOVERY_SUCCESS
>>>    event is received.
>>> 3. prompt the ports that fail to recovery and need to be removed when
>>>    the RTE_ETH_EVENT_RECOVERY_FAILED event is received.
>>>
>>> In addition, a message is added to the printed information, requiring
>>> no command to be executed during the error recovery.
>>>
>>> Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
>>> ---
>>>  app/test-pmd/testpmd.c | 80 ++++++++++++++++++++++++++++++++++++++++++
>>>  app/test-pmd/testpmd.h |  4 ++-
>>>  2 files changed, 83 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
>>> index 0c14325b8d..fdc3ae604b 100644
>>> --- a/app/test-pmd/testpmd.c
>>> +++ b/app/test-pmd/testpmd.c
>>> @@ -3823,6 +3823,77 @@ rmv_port_callback(void *arg)
>>>  		start_packet_forwarding(0);
>>>  }
>>>
>>> +static int need_start_when_recovery_over;
>>> +
>>> +static bool
>>> +has_port_in_err_recovering(void)
>>> +{
>>> +	struct rte_port *port;
>>> +	portid_t pid;
>>> +
>>> +	RTE_ETH_FOREACH_DEV(pid) {
>>> +		port = &ports[pid];
>>> +		if (port->err_recovering)
>>> +			return true;
>>> +	}
>>> +
>>> +	return false;
>>> +}
>>> +
>>> +static void
>>> +err_recovering_callback(portid_t port_id)
>>> +{
>>> +	if (!has_port_in_err_recovering())
>>> +		printf("Please stop executing any commands until recovery result events are received!\n");
>>> +
>>> +	ports[port_id].err_recovering = 1;
>>> +	ports[port_id].recover_failed = 0;
>>> +
>>> +	/* To simplify implementation, stop forwarding regardless of whether the port is used. */
>>> +	if (!test_done) {
>>> +		printf("Stop packet forwarding because some ports are in error recovering!\n");
>>> +		stop_packet_forwarding();
>>> +		need_start_when_recovery_over = 1;
>>> +	}
>>> +}
>>
>> One thought I have - should we somehow stop user to attempt restart RX/TX while recovery
>> in progress?
>> But probably it is an overkill, and just documenting what is happening is enough....
> 
> Yes, the testpmd is already complicated.
> In addition, considering that only a few PMDs support and are not commonly invoking.
> So I thinking show above such promote is enough.
> 
>> Do we need to update testpmd UG with some short description?
> 
> It's better to update UG, but it wasn't triggered by command, I don't know which chapter to put it in.
> 
> @Ferruh could you provide some advise ?
> 

I think better to extract event handling to a new .c file, something
like 'event.c', and various events handling optional, controlled by
testpmd parameter.

Right now by default all events are just printed (unless explicitly
requested from command line not to do (--mask-event)), that is very
basic and I think sufficient for default behavior.

And in documentation, it would be nice to have a section like "event
handling" and document what option enables which event handling, how it
is used and what is the expected behavior, etc...


btw, overall I agree to implement recover events in testpmd, it is good
to give some examples on how to handle these events in application, I am
just not sure to enable it by default.

>> Apart from that, LGTM:
>> Acked-by: Konstantin Ananyev <konstantin.ananyev@huawei.com>
>>
>>> +
>>> +static void
>>> +recover_success_callback(portid_t port_id)
>>> +{
>>> +	ports[port_id].err_recovering = 0;
>>> +	if (has_port_in_err_recovering())
>>> +		return;
>>> +
>>> +	if (need_start_when_recovery_over) {
>>> +		printf("Recovery success! Restart packet forwarding!\n");
>>> +		start_packet_forwarding(0);
>>> +		need_start_when_recovery_over = 0;
>>> +	} else {
>>> +		printf("Recovery success!\n");
>>> +	}
>>> +}
>>> +
>>> +static void
>>> +recover_failed_callback(portid_t port_id)
>>> +{
>>> +	struct rte_port *port;
>>> +	portid_t pid;
>>> +
>>> +	ports[port_id].err_recovering = 0;
>>> +	ports[port_id].recover_failed = 1;
>>> +	if (has_port_in_err_recovering())
>>> +		return;
>>> +
>>> +	need_start_when_recovery_over = 0;
>>> +	printf("The ports:");
>>> +	RTE_ETH_FOREACH_DEV(pid) {
>>> +		port = &ports[pid];
>>> +		if (port->recover_failed)
>>> +			printf(" %u", pid);
>>> +	}
>>> +	printf(" recovery failed! Please remove them!\n");
>>> +}
>>> +
>>>  /* This function is used by the interrupt thread */
>>>  static int
>>>  eth_event_callback(portid_t port_id, enum rte_eth_event_type type, void *param,
>>> @@ -3878,6 +3949,15 @@ eth_event_callback(portid_t port_id, enum rte_eth_event_type type, void *param,
>>>  		}
>>>  		break;
>>>  	}
>>> +	case RTE_ETH_EVENT_ERR_RECOVERING:
>>> +		err_recovering_callback(port_id);
>>> +		break;
>>> +	case RTE_ETH_EVENT_RECOVERY_SUCCESS:
>>> +		recover_success_callback(port_id);
>>> +		break;
>>> +	case RTE_ETH_EVENT_RECOVERY_FAILED:
>>> +		recover_failed_callback(port_id);
>>> +		break;
>>>  	default:
>>>  		break;
>>>  	}
>>> diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
>>> index 329a6378a1..1bbf82a96c 100644
>>> --- a/app/test-pmd/testpmd.h
>>> +++ b/app/test-pmd/testpmd.h
>>> @@ -323,7 +323,9 @@ struct rte_port {
>>>  	uint8_t                 slave_flag : 1, /**< bonding slave port */
>>>  				bond_flag : 1, /**< port is bond device */
>>>  				fwd_mac_swap : 1, /**< swap packet MAC before forward */
>>> -				update_conf : 1; /**< need to update bonding device configuration */
>>> +				update_conf : 1, /**< need to update bonding device configuration */
>>> +				err_recovering : 1, /**< port is in error recovering */
>>> +				recover_failed : 1; /**< port recover failed */
>>>  	struct port_template    *pattern_templ_list; /**< Pattern templates. */
>>>  	struct port_template    *actions_templ_list; /**< Actions templates. */
>>>  	struct port_table       *table_list; /**< Flow tables. */
>>> --
>>> 2.17.1
>>
>> .
>>
  

Patch

diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index 0c14325b8d..fdc3ae604b 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -3823,6 +3823,77 @@  rmv_port_callback(void *arg)
 		start_packet_forwarding(0);
 }
 
+static int need_start_when_recovery_over;
+
+static bool
+has_port_in_err_recovering(void)
+{
+	struct rte_port *port;
+	portid_t pid;
+
+	RTE_ETH_FOREACH_DEV(pid) {
+		port = &ports[pid];
+		if (port->err_recovering)
+			return true;
+	}
+
+	return false;
+}
+
+static void
+err_recovering_callback(portid_t port_id)
+{
+	if (!has_port_in_err_recovering())
+		printf("Please stop executing any commands until recovery result events are received!\n");
+
+	ports[port_id].err_recovering = 1;
+	ports[port_id].recover_failed = 0;
+
+	/* To simplify implementation, stop forwarding regardless of whether the port is used. */
+	if (!test_done) {
+		printf("Stop packet forwarding because some ports are in error recovering!\n");
+		stop_packet_forwarding();
+		need_start_when_recovery_over = 1;
+	}
+}
+
+static void
+recover_success_callback(portid_t port_id)
+{
+	ports[port_id].err_recovering = 0;
+	if (has_port_in_err_recovering())
+		return;
+
+	if (need_start_when_recovery_over) {
+		printf("Recovery success! Restart packet forwarding!\n");
+		start_packet_forwarding(0);
+		need_start_when_recovery_over = 0;
+	} else {
+		printf("Recovery success!\n");
+	}
+}
+
+static void
+recover_failed_callback(portid_t port_id)
+{
+	struct rte_port *port;
+	portid_t pid;
+
+	ports[port_id].err_recovering = 0;
+	ports[port_id].recover_failed = 1;
+	if (has_port_in_err_recovering())
+		return;
+
+	need_start_when_recovery_over = 0;
+	printf("The ports:");
+	RTE_ETH_FOREACH_DEV(pid) {
+		port = &ports[pid];
+		if (port->recover_failed)
+			printf(" %u", pid);
+	}
+	printf(" recovery failed! Please remove them!\n");
+}
+
 /* This function is used by the interrupt thread */
 static int
 eth_event_callback(portid_t port_id, enum rte_eth_event_type type, void *param,
@@ -3878,6 +3949,15 @@  eth_event_callback(portid_t port_id, enum rte_eth_event_type type, void *param,
 		}
 		break;
 	}
+	case RTE_ETH_EVENT_ERR_RECOVERING:
+		err_recovering_callback(port_id);
+		break;
+	case RTE_ETH_EVENT_RECOVERY_SUCCESS:
+		recover_success_callback(port_id);
+		break;
+	case RTE_ETH_EVENT_RECOVERY_FAILED:
+		recover_failed_callback(port_id);
+		break;
 	default:
 		break;
 	}
diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
index 329a6378a1..1bbf82a96c 100644
--- a/app/test-pmd/testpmd.h
+++ b/app/test-pmd/testpmd.h
@@ -323,7 +323,9 @@  struct rte_port {
 	uint8_t                 slave_flag : 1, /**< bonding slave port */
 				bond_flag : 1, /**< port is bond device */
 				fwd_mac_swap : 1, /**< swap packet MAC before forward */
-				update_conf : 1; /**< need to update bonding device configuration */
+				update_conf : 1, /**< need to update bonding device configuration */
+				err_recovering : 1, /**< port is in error recovering */
+				recover_failed : 1; /**< port recover failed */
 	struct port_template    *pattern_templ_list; /**< Pattern templates. */
 	struct port_template    *actions_templ_list; /**< Actions templates. */
 	struct port_table       *table_list; /**< Flow tables. */