[v6] net/ice: fix crash on closing representor ports

Message ID 20231107101223.225726-1-mingjinx.ye@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Qi Zhang
Headers
Series [v6] net/ice: fix crash on closing representor ports |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/loongarch-compilation success Compilation OK
ci/loongarch-unit-testing success Unit Testing PASS
ci/Intel-compilation success Compilation OK
ci/intel-Testing success Testing PASS
ci/intel-Functional success Functional PASS
ci/github-robot: build success github build: passed

Commit Message

Mingjin Ye Nov. 7, 2023, 10:12 a.m. UTC
  The data resource in struct rte_eth_dev is cleared and points to NULL
when the DCF port is closed.

If the DCF representor port is closed after the DCF port is closed,
a segmentation fault occurs because the representor port accesses the
data resource released by the DCF port.

This patch fixes this issue by synchronizing the state of DCF ports and
representor ports to the peer in real time when their state changes.

Fixes: da9cdcd1f372 ("net/ice: fix crash on representor port closing")
Cc: stable@dpdk.org

Signed-off-by: Mingjin Ye <mingjinx.ye@intel.com>
---
v2: Reformat code to remove unneeded fixlines.
---
v3: New solution.
---
v4: Optimize v2 patch.
---
v5: optimization.
---
v6: Optimize and resolve conflicts.
---
 drivers/net/ice/ice_dcf_ethdev.c         | 30 ++++++++++++--
 drivers/net/ice/ice_dcf_ethdev.h         |  3 ++
 drivers/net/ice/ice_dcf_vf_representor.c | 51 ++++++++++++++++++++++--
 3 files changed, 78 insertions(+), 6 deletions(-)
  

Comments

Qi Zhang Nov. 7, 2023, 12:18 p.m. UTC | #1
> -----Original Message-----
> From: Ye, MingjinX <mingjinx.ye@intel.com>
> Sent: Tuesday, November 7, 2023 6:12 PM
> To: dev@dpdk.org
> Cc: Yang, Qiming <qiming.yang@intel.com>; Ye, MingjinX
> <mingjinx.ye@intel.com>; stable@dpdk.org; Zhang, Qi Z
> <qi.z.zhang@intel.com>
> Subject: [PATCH v6] net/ice: fix crash on closing representor ports
> 
> The data resource in struct rte_eth_dev is cleared and points to NULL when
> the DCF port is closed.
> 
> If the DCF representor port is closed after the DCF port is closed, a
> segmentation fault occurs because the representor port accesses the data
> resource released by the DCF port.
> 
> This patch fixes this issue by synchronizing the state of DCF ports and
> representor ports to the peer in real time when their state changes.
> 
> Fixes: da9cdcd1f372 ("net/ice: fix crash on representor port closing")

The fixline still not make sense, the issue should be already exist before above patch.

> Cc: stable@dpdk.org
> 
> Signed-off-by: Mingjin Ye <mingjinx.ye@intel.com>
> ---
> v2: Reformat code to remove unneeded fixlines.
> ---
> v3: New solution.
> ---
> v4: Optimize v2 patch.
> ---
> v5: optimization.
> ---
> v6: Optimize and resolve conflicts.
> ---
>  drivers/net/ice/ice_dcf_ethdev.c         | 30 ++++++++++++--
>  drivers/net/ice/ice_dcf_ethdev.h         |  3 ++
>  drivers/net/ice/ice_dcf_vf_representor.c | 51 ++++++++++++++++++++++--
>  3 files changed, 78 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/net/ice/ice_dcf_ethdev.c b/drivers/net/ice/ice_dcf_ethdev.c
> index 29699c2c32..5d845bba31 100644
> --- a/drivers/net/ice/ice_dcf_ethdev.c
> +++ b/drivers/net/ice/ice_dcf_ethdev.c
> @@ -1618,6 +1618,26 @@ ice_dcf_free_repr_info(struct ice_dcf_adapter
> *dcf_adapter)
>  	}
>  }
> 
> +int
> +ice_dcf_handle_vf_repr_close(struct ice_dcf_adapter *dcf_adapter,
> +				uint16_t vf_id)
> +{
> +	struct ice_dcf_repr_info *vf_rep_info;
> +
> +	if (dcf_adapter->num_reprs >= vf_id) {
> +		PMD_DRV_LOG(ERR, "Invalid VF id: %d", vf_id);
> +		return -1;
> +	}
> +
> +	if (!dcf_adapter->repr_infos)
> +		return 0;
> +
> +	vf_rep_info = &dcf_adapter->repr_infos[vf_id];
> +	vf_rep_info->vf_rep_eth_dev = NULL;
> +
> +	return 0;
> +}
> +
>  static int
>  ice_dcf_init_repr_info(struct ice_dcf_adapter *dcf_adapter)  { @@ -1641,11
> +1661,10 @@ ice_dcf_dev_close(struct rte_eth_dev *dev)
>  	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
>  		return 0;
> 
> +	ice_dcf_vf_repr_notify_all(adapter, false);
>  	(void)ice_dcf_dev_stop(dev);
> 
>  	ice_free_queues(dev);
> -
> -	ice_dcf_free_repr_info(adapter);
>  	ice_dcf_uninit_parent_adapter(dev);
>  	ice_dcf_uninit_hw(dev, &adapter->real_hw);
> 
> @@ -1835,7 +1854,7 @@ ice_dcf_dev_reset(struct rte_eth_dev *dev)
>  		ice_dcf_reset_hw(dev, hw);
>  	}
> 
> -	ret = ice_dcf_dev_uninit(dev);
> +	ret = ice_dcf_dev_close(dev);
>  	if (ret)
>  		return ret;
> 
> @@ -1940,12 +1959,17 @@ ice_dcf_dev_init(struct rte_eth_dev *eth_dev)
>  	ice_dcf_stats_reset(eth_dev);
> 
>  	dcf_config_promisc(adapter, false, false);
> +	ice_dcf_vf_repr_notify_all(adapter, true);
> +
>  	return 0;
>  }
> 
>  static int
>  ice_dcf_dev_uninit(struct rte_eth_dev *eth_dev)  {
> +	struct ice_dcf_adapter *adapter = eth_dev->data->dev_private;
> +
> +	ice_dcf_free_repr_info(adapter);
>  	ice_dcf_dev_close(eth_dev);
> 
>  	return 0;
> diff --git a/drivers/net/ice/ice_dcf_ethdev.h
> b/drivers/net/ice/ice_dcf_ethdev.h
> index 4baaec4b8b..6dcbaac5eb 100644
> --- a/drivers/net/ice/ice_dcf_ethdev.h
> +++ b/drivers/net/ice/ice_dcf_ethdev.h
> @@ -60,6 +60,7 @@ struct ice_dcf_vf_repr {
>  	struct rte_ether_addr mac_addr;
>  	uint16_t switch_domain_id;
>  	uint16_t vf_id;
> +	bool dcf_valid;
> 
>  	struct ice_dcf_vlan outer_vlan_info; /* DCF always handle outer VLAN
> */  }; @@ -80,6 +81,8 @@ int ice_dcf_vf_repr_init(struct rte_eth_dev
> *vf_rep_eth_dev, void *init_param);  int ice_dcf_vf_repr_uninit(struct
> rte_eth_dev *vf_rep_eth_dev);  int ice_dcf_vf_repr_init_vlan(struct
> rte_eth_dev *vf_rep_eth_dev);  void ice_dcf_vf_repr_stop_all(struct
> ice_dcf_adapter *dcf_adapter);
> +void ice_dcf_vf_repr_notify_all(struct ice_dcf_adapter *dcf_adapter,
> +bool valid); int ice_dcf_handle_vf_repr_close(struct ice_dcf_adapter
> +*dcf_adapter, uint16_t vf_id);
>  bool ice_dcf_adminq_need_retry(struct ice_adapter *ad);
> 
>  #endif /* _ICE_DCF_ETHDEV_H_ */
> diff --git a/drivers/net/ice/ice_dcf_vf_representor.c
> b/drivers/net/ice/ice_dcf_vf_representor.c
> index b9fcfc80ad..00dc322b30 100644
> --- a/drivers/net/ice/ice_dcf_vf_representor.c
> +++ b/drivers/net/ice/ice_dcf_vf_representor.c
> @@ -50,9 +50,30 @@ ice_dcf_vf_repr_dev_stop(struct rte_eth_dev *dev)
>  	return 0;
>  }
> 
> +static int
> +ice_dcf_vf_repr_set_dcf_valid(struct rte_eth_dev *dev, bool valid) {
> +	struct ice_dcf_vf_repr *repr = dev->data->dev_private;
> +
> +	repr->dcf_valid = valid;
> +
> +	return 0;
> +}
> +
>  static int
>  ice_dcf_vf_repr_dev_close(struct rte_eth_dev *dev)  {
> +	struct ice_dcf_vf_repr *repr = dev->data->dev_private;
> +	struct ice_dcf_adapter *dcf_adapter;
> +	int err;
> +
> +	if (repr->dcf_valid) {
> +		dcf_adapter = repr->dcf_eth_dev->data->dev_private;
> +		err = ice_dcf_handle_vf_repr_close(dcf_adapter, repr->vf_id);
> +		if (err)
> +			PMD_DRV_LOG(ERR, "VF representor invalid");
> +	}
> +
>  	return ice_dcf_vf_repr_uninit(dev);
>  }
> 
> @@ -111,14 +132,15 @@ ice_dcf_vf_repr_link_update(__rte_unused struct
> rte_eth_dev *ethdev,  static __rte_always_inline struct ice_dcf_hw *
> ice_dcf_vf_repr_hw(struct ice_dcf_vf_repr *repr)  {
> -	struct ice_dcf_adapter *dcf_adapter =
> -			repr->dcf_eth_dev->data->dev_private;
> +	struct ice_dcf_adapter *dcf_adapter;
> 
> -	if (!dcf_adapter) {
> +	if (!repr->dcf_valid) {
>  		PMD_DRV_LOG(ERR, "DCF for VF representor has been
> released\n");
>  		return NULL;
>  	}
> 
> +	dcf_adapter = repr->dcf_eth_dev->data->dev_private;
> +
>  	return &dcf_adapter->real_hw;
>  }
> 
> @@ -414,6 +436,7 @@ ice_dcf_vf_repr_init(struct rte_eth_dev
> *vf_rep_eth_dev, void *init_param)
>  	repr->dcf_eth_dev = param->dcf_eth_dev;
>  	repr->switch_domain_id = param->switch_domain_id;
>  	repr->vf_id = param->vf_id;
> +	repr->dcf_valid = true;
>  	repr->outer_vlan_info.port_vlan_ena = false;
>  	repr->outer_vlan_info.stripping_ena = false;
>  	repr->outer_vlan_info.tpid = RTE_ETHER_TYPE_VLAN; @@ -488,3
> +511,25 @@ ice_dcf_vf_repr_stop_all(struct ice_dcf_adapter *dcf_adapter)
>  			vf_rep_eth_dev->data->dev_started = 0;
>  	}
>  }
> +
> +void
> +ice_dcf_vf_repr_notify_all(struct ice_dcf_adapter *dcf_adapter, bool
> +valid) {
> +	uint16_t vf_id;
> +	int err;
> +	struct rte_eth_dev *vf_rep_eth_dev;
> +
> +	if (!dcf_adapter->repr_infos)
> +		return;
> +
> +	for (vf_id = 0; vf_id < dcf_adapter->real_hw.num_vfs; vf_id++) {
> +		vf_rep_eth_dev = dcf_adapter-
> >repr_infos[vf_id].vf_rep_eth_dev;
> +
> +		if (!vf_rep_eth_dev)
> +			continue;
> +
> +		err = ice_dcf_vf_repr_set_dcf_valid(vf_rep_eth_dev, valid);

Better to rename ice_dcf_vf_repr_set_dcf_valid  to ice_dcf_vf_repr_notify_one make it more readable in ice_dcf_vf_repr_notify_all

And it's not necessary to check the return value,  it's a internal function not an API, you can add it when it is necessary.



> +		if (err)
> +			PMD_DRV_LOG(ERR, "Failed to notify VF
> representor: %d", vf_id);
> +	}
> +}
> --
> 2.25.1
  

Patch

diff --git a/drivers/net/ice/ice_dcf_ethdev.c b/drivers/net/ice/ice_dcf_ethdev.c
index 29699c2c32..5d845bba31 100644
--- a/drivers/net/ice/ice_dcf_ethdev.c
+++ b/drivers/net/ice/ice_dcf_ethdev.c
@@ -1618,6 +1618,26 @@  ice_dcf_free_repr_info(struct ice_dcf_adapter *dcf_adapter)
 	}
 }
 
+int
+ice_dcf_handle_vf_repr_close(struct ice_dcf_adapter *dcf_adapter,
+				uint16_t vf_id)
+{
+	struct ice_dcf_repr_info *vf_rep_info;
+
+	if (dcf_adapter->num_reprs >= vf_id) {
+		PMD_DRV_LOG(ERR, "Invalid VF id: %d", vf_id);
+		return -1;
+	}
+
+	if (!dcf_adapter->repr_infos)
+		return 0;
+
+	vf_rep_info = &dcf_adapter->repr_infos[vf_id];
+	vf_rep_info->vf_rep_eth_dev = NULL;
+
+	return 0;
+}
+
 static int
 ice_dcf_init_repr_info(struct ice_dcf_adapter *dcf_adapter)
 {
@@ -1641,11 +1661,10 @@  ice_dcf_dev_close(struct rte_eth_dev *dev)
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
 
+	ice_dcf_vf_repr_notify_all(adapter, false);
 	(void)ice_dcf_dev_stop(dev);
 
 	ice_free_queues(dev);
-
-	ice_dcf_free_repr_info(adapter);
 	ice_dcf_uninit_parent_adapter(dev);
 	ice_dcf_uninit_hw(dev, &adapter->real_hw);
 
@@ -1835,7 +1854,7 @@  ice_dcf_dev_reset(struct rte_eth_dev *dev)
 		ice_dcf_reset_hw(dev, hw);
 	}
 
-	ret = ice_dcf_dev_uninit(dev);
+	ret = ice_dcf_dev_close(dev);
 	if (ret)
 		return ret;
 
@@ -1940,12 +1959,17 @@  ice_dcf_dev_init(struct rte_eth_dev *eth_dev)
 	ice_dcf_stats_reset(eth_dev);
 
 	dcf_config_promisc(adapter, false, false);
+	ice_dcf_vf_repr_notify_all(adapter, true);
+
 	return 0;
 }
 
 static int
 ice_dcf_dev_uninit(struct rte_eth_dev *eth_dev)
 {
+	struct ice_dcf_adapter *adapter = eth_dev->data->dev_private;
+
+	ice_dcf_free_repr_info(adapter);
 	ice_dcf_dev_close(eth_dev);
 
 	return 0;
diff --git a/drivers/net/ice/ice_dcf_ethdev.h b/drivers/net/ice/ice_dcf_ethdev.h
index 4baaec4b8b..6dcbaac5eb 100644
--- a/drivers/net/ice/ice_dcf_ethdev.h
+++ b/drivers/net/ice/ice_dcf_ethdev.h
@@ -60,6 +60,7 @@  struct ice_dcf_vf_repr {
 	struct rte_ether_addr mac_addr;
 	uint16_t switch_domain_id;
 	uint16_t vf_id;
+	bool dcf_valid;
 
 	struct ice_dcf_vlan outer_vlan_info; /* DCF always handle outer VLAN */
 };
@@ -80,6 +81,8 @@  int ice_dcf_vf_repr_init(struct rte_eth_dev *vf_rep_eth_dev, void *init_param);
 int ice_dcf_vf_repr_uninit(struct rte_eth_dev *vf_rep_eth_dev);
 int ice_dcf_vf_repr_init_vlan(struct rte_eth_dev *vf_rep_eth_dev);
 void ice_dcf_vf_repr_stop_all(struct ice_dcf_adapter *dcf_adapter);
+void ice_dcf_vf_repr_notify_all(struct ice_dcf_adapter *dcf_adapter, bool valid);
+int ice_dcf_handle_vf_repr_close(struct ice_dcf_adapter *dcf_adapter, uint16_t vf_id);
 bool ice_dcf_adminq_need_retry(struct ice_adapter *ad);
 
 #endif /* _ICE_DCF_ETHDEV_H_ */
diff --git a/drivers/net/ice/ice_dcf_vf_representor.c b/drivers/net/ice/ice_dcf_vf_representor.c
index b9fcfc80ad..00dc322b30 100644
--- a/drivers/net/ice/ice_dcf_vf_representor.c
+++ b/drivers/net/ice/ice_dcf_vf_representor.c
@@ -50,9 +50,30 @@  ice_dcf_vf_repr_dev_stop(struct rte_eth_dev *dev)
 	return 0;
 }
 
+static int
+ice_dcf_vf_repr_set_dcf_valid(struct rte_eth_dev *dev, bool valid)
+{
+	struct ice_dcf_vf_repr *repr = dev->data->dev_private;
+
+	repr->dcf_valid = valid;
+
+	return 0;
+}
+
 static int
 ice_dcf_vf_repr_dev_close(struct rte_eth_dev *dev)
 {
+	struct ice_dcf_vf_repr *repr = dev->data->dev_private;
+	struct ice_dcf_adapter *dcf_adapter;
+	int err;
+
+	if (repr->dcf_valid) {
+		dcf_adapter = repr->dcf_eth_dev->data->dev_private;
+		err = ice_dcf_handle_vf_repr_close(dcf_adapter, repr->vf_id);
+		if (err)
+			PMD_DRV_LOG(ERR, "VF representor invalid");
+	}
+
 	return ice_dcf_vf_repr_uninit(dev);
 }
 
@@ -111,14 +132,15 @@  ice_dcf_vf_repr_link_update(__rte_unused struct rte_eth_dev *ethdev,
 static __rte_always_inline struct ice_dcf_hw *
 ice_dcf_vf_repr_hw(struct ice_dcf_vf_repr *repr)
 {
-	struct ice_dcf_adapter *dcf_adapter =
-			repr->dcf_eth_dev->data->dev_private;
+	struct ice_dcf_adapter *dcf_adapter;
 
-	if (!dcf_adapter) {
+	if (!repr->dcf_valid) {
 		PMD_DRV_LOG(ERR, "DCF for VF representor has been released\n");
 		return NULL;
 	}
 
+	dcf_adapter = repr->dcf_eth_dev->data->dev_private;
+
 	return &dcf_adapter->real_hw;
 }
 
@@ -414,6 +436,7 @@  ice_dcf_vf_repr_init(struct rte_eth_dev *vf_rep_eth_dev, void *init_param)
 	repr->dcf_eth_dev = param->dcf_eth_dev;
 	repr->switch_domain_id = param->switch_domain_id;
 	repr->vf_id = param->vf_id;
+	repr->dcf_valid = true;
 	repr->outer_vlan_info.port_vlan_ena = false;
 	repr->outer_vlan_info.stripping_ena = false;
 	repr->outer_vlan_info.tpid = RTE_ETHER_TYPE_VLAN;
@@ -488,3 +511,25 @@  ice_dcf_vf_repr_stop_all(struct ice_dcf_adapter *dcf_adapter)
 			vf_rep_eth_dev->data->dev_started = 0;
 	}
 }
+
+void
+ice_dcf_vf_repr_notify_all(struct ice_dcf_adapter *dcf_adapter, bool valid)
+{
+	uint16_t vf_id;
+	int err;
+	struct rte_eth_dev *vf_rep_eth_dev;
+
+	if (!dcf_adapter->repr_infos)
+		return;
+
+	for (vf_id = 0; vf_id < dcf_adapter->real_hw.num_vfs; vf_id++) {
+		vf_rep_eth_dev = dcf_adapter->repr_infos[vf_id].vf_rep_eth_dev;
+
+		if (!vf_rep_eth_dev)
+			continue;
+
+		err = ice_dcf_vf_repr_set_dcf_valid(vf_rep_eth_dev, valid);
+		if (err)
+			PMD_DRV_LOG(ERR, "Failed to notify VF representor: %d", vf_id);
+	}
+}