[v5] net/ice: fix crash on closing representor ports

Message ID 20231106100001.40213-1-mingjinx.ye@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Qi Zhang
Headers
Series [v5] net/ice: fix crash on closing representor ports |

Checks

Context Check Description
ci/loongarch-compilation warning apply patch failure
ci/checkpatch success coding style OK
ci/github-robot: build success github build: passed

Commit Message

Mingjin Ye Nov. 6, 2023, 10 a.m. UTC
  The data resource in struct rte_eth_dev is cleared and points to NULL
when the DCF port is closed.

If the DCF representor port is closed after the DCF port is closed,
a segmentation fault occurs because the representor port accesses the
data resource released by the DCF port.

This patch fixes this issue by synchronizing the state of DCF ports and
representor ports to the peer in real time when their state changes.

Fixes: 5674465a32c8 ("net/ice: add DCF VLAN handling")
Fixes: da9cdcd1f372 ("net/ice: fix crash on representor port closing")
Fixes: 7564d5509611 ("net/ice: add DCF hardware initialization")
Fixes: c7e1a1a3bfeb ("net/ice: refactor DCF VLAN handling")
Fixes: 1a86f4dbdf42 ("net/ice: support DCF device reset")
Cc: stable@dpdk.org

Signed-off-by: Mingjin Ye <mingjinx.ye@intel.com>
---
v2: Reformat code to remove unneeded fixlines.
---
v3: New solution.
---
v4: Optimize v2 patch.
---
v5: optimization.
---
 drivers/net/ice/ice_dcf_ethdev.c         | 30 ++++++++++++--
 drivers/net/ice/ice_dcf_ethdev.h         |  3 ++
 drivers/net/ice/ice_dcf_vf_representor.c | 50 ++++++++++++++++++++++--
 3 files changed, 77 insertions(+), 6 deletions(-)
  

Comments

Qi Zhang Nov. 6, 2023, 12:05 p.m. UTC | #1
> -----Original Message-----
> From: Ye, MingjinX <mingjinx.ye@intel.com>
> Sent: Monday, November 6, 2023 6:00 PM
> To: dev@dpdk.org
> Cc: Yang, Qiming <qiming.yang@intel.com>; Zhou, YidingX
> <yidingx.zhou@intel.com>; Ye, MingjinX <mingjinx.ye@intel.com>;
> stable@dpdk.org; Zhang, Qi Z <qi.z.zhang@intel.com>
> Subject: [PATCH v5] net/ice: fix crash on closing representor ports
> 
> The data resource in struct rte_eth_dev is cleared and points to NULL when
> the DCF port is closed.
> 
> If the DCF representor port is closed after the DCF port is closed, a
> segmentation fault occurs because the representor port accesses the data
> resource released by the DCF port.
> 
> This patch fixes this issue by synchronizing the state of DCF ports and
> representor ports to the peer in real time when their state changes.
> 
> Fixes: 5674465a32c8 ("net/ice: add DCF VLAN handling")
> Fixes: da9cdcd1f372 ("net/ice: fix crash on representor port closing")
> Fixes: 7564d5509611 ("net/ice: add DCF hardware initialization")
> Fixes: c7e1a1a3bfeb ("net/ice: refactor DCF VLAN handling")
> Fixes: 1a86f4dbdf42 ("net/ice: support DCF device reset")

These fixlines does not make sense to me, I believe the issue comes from when we enabled port representor for DCF, 
A patch expose the issue does not imply it cause the issue.

> Cc: stable@dpdk.org
> 
> Signed-off-by: Mingjin Ye <mingjinx.ye@intel.com>
> ---
> v2: Reformat code to remove unneeded fixlines.
> ---
> v3: New solution.
> ---
> v4: Optimize v2 patch.
> ---
> v5: optimization.
> ---
>  drivers/net/ice/ice_dcf_ethdev.c         | 30 ++++++++++++--
>  drivers/net/ice/ice_dcf_ethdev.h         |  3 ++
>  drivers/net/ice/ice_dcf_vf_representor.c | 50 ++++++++++++++++++++++--
>  3 files changed, 77 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/net/ice/ice_dcf_ethdev.c b/drivers/net/ice/ice_dcf_ethdev.c
> index 065ec728c2..eea24ee3a9 100644
> --- a/drivers/net/ice/ice_dcf_ethdev.c
> +++ b/drivers/net/ice/ice_dcf_ethdev.c
> @@ -1618,6 +1618,26 @@ ice_dcf_free_repr_info(struct ice_dcf_adapter
> *dcf_adapter)
>  	}
>  }
> 
> +int
> +ice_dcf_handle_vf_repr_close(struct ice_dcf_adapter *dcf_adapter,
> +				uint16_t vf_id)
> +{
> +	struct ice_dcf_repr_info *vf_rep_info;
> +
> +	if (dcf_adapter->num_reprs >= vf_id) {
> +		PMD_DRV_LOG(ERR, "Invalid VF id: %d", vf_id);
> +		return -1;
> +	}
> +
> +	if (!dcf_adapter->repr_infos)
> +		return 0;
> +
> +	vf_rep_info = &dcf_adapter->repr_infos[vf_id];
> +	vf_rep_info->vf_rep_eth_dev = NULL;
> +
> +	return 0;
> +}
> +
>  static int
>  ice_dcf_init_repr_info(struct ice_dcf_adapter *dcf_adapter)  { @@ -1641,11
> +1661,10 @@ ice_dcf_dev_close(struct rte_eth_dev *dev)
>  	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
>  		return 0;
> 
> +	ice_dcf_vf_repr_notify_all(adapter, false);
>  	(void)ice_dcf_dev_stop(dev);
> 
>  	ice_free_queues(dev);
> -
> -	ice_dcf_free_repr_info(adapter);
>  	ice_dcf_uninit_parent_adapter(dev);
>  	ice_dcf_uninit_hw(dev, &adapter->real_hw);
> 
> @@ -1835,7 +1854,7 @@ ice_dcf_dev_reset(struct rte_eth_dev *dev)
>  		ice_dcf_reset_hw(dev, hw);
>  	}
> 
> -	ret = ice_dcf_dev_uninit(dev);
> +	ret = ice_dcf_dev_close(dev);
>  	if (ret)
>  		return ret;
> 
> @@ -1938,12 +1957,17 @@ ice_dcf_dev_init(struct rte_eth_dev *eth_dev)
>  	}
> 
>  	dcf_config_promisc(adapter, false, false);
> +	ice_dcf_vf_repr_notify_all(adapter, true);
> +
>  	return 0;
>  }
> 
>  static int
>  ice_dcf_dev_uninit(struct rte_eth_dev *eth_dev)  {
> +	struct ice_dcf_adapter *adapter = eth_dev->data->dev_private;
> +
> +	ice_dcf_free_repr_info(adapter);
>  	ice_dcf_dev_close(eth_dev);
> 
>  	return 0;
> diff --git a/drivers/net/ice/ice_dcf_ethdev.h
> b/drivers/net/ice/ice_dcf_ethdev.h
> index 4baaec4b8b..6dcbaac5eb 100644
> --- a/drivers/net/ice/ice_dcf_ethdev.h
> +++ b/drivers/net/ice/ice_dcf_ethdev.h
> @@ -60,6 +60,7 @@ struct ice_dcf_vf_repr {
>  	struct rte_ether_addr mac_addr;
>  	uint16_t switch_domain_id;
>  	uint16_t vf_id;
> +	bool dcf_valid;
> 
>  	struct ice_dcf_vlan outer_vlan_info; /* DCF always handle outer VLAN
> */  }; @@ -80,6 +81,8 @@ int ice_dcf_vf_repr_init(struct rte_eth_dev
> *vf_rep_eth_dev, void *init_param);  int ice_dcf_vf_repr_uninit(struct
> rte_eth_dev *vf_rep_eth_dev);  int ice_dcf_vf_repr_init_vlan(struct
> rte_eth_dev *vf_rep_eth_dev);  void ice_dcf_vf_repr_stop_all(struct
> ice_dcf_adapter *dcf_adapter);
> +void ice_dcf_vf_repr_notify_all(struct ice_dcf_adapter *dcf_adapter,
> +bool valid); int ice_dcf_handle_vf_repr_close(struct ice_dcf_adapter
> +*dcf_adapter, uint16_t vf_id);
>  bool ice_dcf_adminq_need_retry(struct ice_adapter *ad);
> 
>  #endif /* _ICE_DCF_ETHDEV_H_ */
> diff --git a/drivers/net/ice/ice_dcf_vf_representor.c
> b/drivers/net/ice/ice_dcf_vf_representor.c
> index b9fcfc80ad..6c342798ac 100644
> --- a/drivers/net/ice/ice_dcf_vf_representor.c
> +++ b/drivers/net/ice/ice_dcf_vf_representor.c
> @@ -50,9 +50,32 @@ ice_dcf_vf_repr_dev_stop(struct rte_eth_dev *dev)
>  	return 0;
>  }
> 
> +static bool

This function return Boolean, but it never be checked.

> +ice_dcf_vf_repr_set_dcf_valid(struct rte_eth_dev *dev, bool valid) {
> +	struct ice_dcf_vf_repr *repr = dev->data->dev_private;
> +
> +	if (!repr)
> +		return false;

Is this check necessary? If repr = null, means the representor port already be closed, the function will not be called for that port.

> +
> +	repr->dcf_valid = valid;
> +
> +	return true;
> +}
> +
>  static int
>  ice_dcf_vf_repr_dev_close(struct rte_eth_dev *dev)  {
> +	struct ice_dcf_vf_repr *repr = dev->data->dev_private;
> +	struct ice_dcf_adapter *dcf_adapter;
> +
> +	if (repr->dcf_valid) {
> +		dcf_adapter = repr->dcf_eth_dev->data->dev_private;
> +		ice_dcf_handle_vf_repr_close(dcf_adapter, repr->vf_id);
> +		/* Commitment to not access DCF port resources */
> +		repr->dcf_valid = false;

This is not necessary, as you already called ice_dcf_handle_vf_repr_close to notify dcf.
And the repr will be freed in ice_dcf_vf_repr_uninit anyway.


> +	}
> +
>  	return ice_dcf_vf_repr_uninit(dev);
>  }
> 
> @@ -111,14 +134,15 @@ ice_dcf_vf_repr_link_update(__rte_unused struct
> rte_eth_dev *ethdev,  static __rte_always_inline struct ice_dcf_hw *
> ice_dcf_vf_repr_hw(struct ice_dcf_vf_repr *repr)  {
> -	struct ice_dcf_adapter *dcf_adapter =
> -			repr->dcf_eth_dev->data->dev_private;
> +	struct ice_dcf_adapter *dcf_adapter;
> 
> -	if (!dcf_adapter) {
> +	if (!repr->dcf_valid) {
>  		PMD_DRV_LOG(ERR, "DCF for VF representor has been
> released\n");
>  		return NULL;
>  	}
> 
> +	dcf_adapter = repr->dcf_eth_dev->data->dev_private;
> +
>  	return &dcf_adapter->real_hw;
>  }
> 
> @@ -414,6 +438,7 @@ ice_dcf_vf_repr_init(struct rte_eth_dev
> *vf_rep_eth_dev, void *init_param)
>  	repr->dcf_eth_dev = param->dcf_eth_dev;
>  	repr->switch_domain_id = param->switch_domain_id;
>  	repr->vf_id = param->vf_id;
> +	repr->dcf_valid = true;
>  	repr->outer_vlan_info.port_vlan_ena = false;
>  	repr->outer_vlan_info.stripping_ena = false;
>  	repr->outer_vlan_info.tpid = RTE_ETHER_TYPE_VLAN; @@ -488,3
> +513,22 @@ ice_dcf_vf_repr_stop_all(struct ice_dcf_adapter *dcf_adapter)
>  			vf_rep_eth_dev->data->dev_started = 0;
>  	}
>  }
> +
> +void
> +ice_dcf_vf_repr_notify_all(struct ice_dcf_adapter *dcf_adapter, bool
> +valid) {
> +	uint16_t vf_id;
> +	struct rte_eth_dev *vf_rep_eth_dev;
> +
> +	if (!dcf_adapter->repr_infos)
> +		return;
> +
> +	for (vf_id = 0; vf_id < dcf_adapter->real_hw.num_vfs; vf_id++) {
> +		vf_rep_eth_dev = dcf_adapter-
> >repr_infos[vf_id].vf_rep_eth_dev;
> +
> +		if (!vf_rep_eth_dev)
> +			continue;
> +
> +		ice_dcf_vf_repr_set_dcf_valid(vf_rep_eth_dev, valid);
> +	}
> +}
> --
> 2.25.1
  

Patch

diff --git a/drivers/net/ice/ice_dcf_ethdev.c b/drivers/net/ice/ice_dcf_ethdev.c
index 065ec728c2..eea24ee3a9 100644
--- a/drivers/net/ice/ice_dcf_ethdev.c
+++ b/drivers/net/ice/ice_dcf_ethdev.c
@@ -1618,6 +1618,26 @@  ice_dcf_free_repr_info(struct ice_dcf_adapter *dcf_adapter)
 	}
 }
 
+int
+ice_dcf_handle_vf_repr_close(struct ice_dcf_adapter *dcf_adapter,
+				uint16_t vf_id)
+{
+	struct ice_dcf_repr_info *vf_rep_info;
+
+	if (dcf_adapter->num_reprs >= vf_id) {
+		PMD_DRV_LOG(ERR, "Invalid VF id: %d", vf_id);
+		return -1;
+	}
+
+	if (!dcf_adapter->repr_infos)
+		return 0;
+
+	vf_rep_info = &dcf_adapter->repr_infos[vf_id];
+	vf_rep_info->vf_rep_eth_dev = NULL;
+
+	return 0;
+}
+
 static int
 ice_dcf_init_repr_info(struct ice_dcf_adapter *dcf_adapter)
 {
@@ -1641,11 +1661,10 @@  ice_dcf_dev_close(struct rte_eth_dev *dev)
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
 
+	ice_dcf_vf_repr_notify_all(adapter, false);
 	(void)ice_dcf_dev_stop(dev);
 
 	ice_free_queues(dev);
-
-	ice_dcf_free_repr_info(adapter);
 	ice_dcf_uninit_parent_adapter(dev);
 	ice_dcf_uninit_hw(dev, &adapter->real_hw);
 
@@ -1835,7 +1854,7 @@  ice_dcf_dev_reset(struct rte_eth_dev *dev)
 		ice_dcf_reset_hw(dev, hw);
 	}
 
-	ret = ice_dcf_dev_uninit(dev);
+	ret = ice_dcf_dev_close(dev);
 	if (ret)
 		return ret;
 
@@ -1938,12 +1957,17 @@  ice_dcf_dev_init(struct rte_eth_dev *eth_dev)
 	}
 
 	dcf_config_promisc(adapter, false, false);
+	ice_dcf_vf_repr_notify_all(adapter, true);
+
 	return 0;
 }
 
 static int
 ice_dcf_dev_uninit(struct rte_eth_dev *eth_dev)
 {
+	struct ice_dcf_adapter *adapter = eth_dev->data->dev_private;
+
+	ice_dcf_free_repr_info(adapter);
 	ice_dcf_dev_close(eth_dev);
 
 	return 0;
diff --git a/drivers/net/ice/ice_dcf_ethdev.h b/drivers/net/ice/ice_dcf_ethdev.h
index 4baaec4b8b..6dcbaac5eb 100644
--- a/drivers/net/ice/ice_dcf_ethdev.h
+++ b/drivers/net/ice/ice_dcf_ethdev.h
@@ -60,6 +60,7 @@  struct ice_dcf_vf_repr {
 	struct rte_ether_addr mac_addr;
 	uint16_t switch_domain_id;
 	uint16_t vf_id;
+	bool dcf_valid;
 
 	struct ice_dcf_vlan outer_vlan_info; /* DCF always handle outer VLAN */
 };
@@ -80,6 +81,8 @@  int ice_dcf_vf_repr_init(struct rte_eth_dev *vf_rep_eth_dev, void *init_param);
 int ice_dcf_vf_repr_uninit(struct rte_eth_dev *vf_rep_eth_dev);
 int ice_dcf_vf_repr_init_vlan(struct rte_eth_dev *vf_rep_eth_dev);
 void ice_dcf_vf_repr_stop_all(struct ice_dcf_adapter *dcf_adapter);
+void ice_dcf_vf_repr_notify_all(struct ice_dcf_adapter *dcf_adapter, bool valid);
+int ice_dcf_handle_vf_repr_close(struct ice_dcf_adapter *dcf_adapter, uint16_t vf_id);
 bool ice_dcf_adminq_need_retry(struct ice_adapter *ad);
 
 #endif /* _ICE_DCF_ETHDEV_H_ */
diff --git a/drivers/net/ice/ice_dcf_vf_representor.c b/drivers/net/ice/ice_dcf_vf_representor.c
index b9fcfc80ad..6c342798ac 100644
--- a/drivers/net/ice/ice_dcf_vf_representor.c
+++ b/drivers/net/ice/ice_dcf_vf_representor.c
@@ -50,9 +50,32 @@  ice_dcf_vf_repr_dev_stop(struct rte_eth_dev *dev)
 	return 0;
 }
 
+static bool
+ice_dcf_vf_repr_set_dcf_valid(struct rte_eth_dev *dev, bool valid)
+{
+	struct ice_dcf_vf_repr *repr = dev->data->dev_private;
+
+	if (!repr)
+		return false;
+
+	repr->dcf_valid = valid;
+
+	return true;
+}
+
 static int
 ice_dcf_vf_repr_dev_close(struct rte_eth_dev *dev)
 {
+	struct ice_dcf_vf_repr *repr = dev->data->dev_private;
+	struct ice_dcf_adapter *dcf_adapter;
+
+	if (repr->dcf_valid) {
+		dcf_adapter = repr->dcf_eth_dev->data->dev_private;
+		ice_dcf_handle_vf_repr_close(dcf_adapter, repr->vf_id);
+		/* Commitment to not access DCF port resources */
+		repr->dcf_valid = false;
+	}
+
 	return ice_dcf_vf_repr_uninit(dev);
 }
 
@@ -111,14 +134,15 @@  ice_dcf_vf_repr_link_update(__rte_unused struct rte_eth_dev *ethdev,
 static __rte_always_inline struct ice_dcf_hw *
 ice_dcf_vf_repr_hw(struct ice_dcf_vf_repr *repr)
 {
-	struct ice_dcf_adapter *dcf_adapter =
-			repr->dcf_eth_dev->data->dev_private;
+	struct ice_dcf_adapter *dcf_adapter;
 
-	if (!dcf_adapter) {
+	if (!repr->dcf_valid) {
 		PMD_DRV_LOG(ERR, "DCF for VF representor has been released\n");
 		return NULL;
 	}
 
+	dcf_adapter = repr->dcf_eth_dev->data->dev_private;
+
 	return &dcf_adapter->real_hw;
 }
 
@@ -414,6 +438,7 @@  ice_dcf_vf_repr_init(struct rte_eth_dev *vf_rep_eth_dev, void *init_param)
 	repr->dcf_eth_dev = param->dcf_eth_dev;
 	repr->switch_domain_id = param->switch_domain_id;
 	repr->vf_id = param->vf_id;
+	repr->dcf_valid = true;
 	repr->outer_vlan_info.port_vlan_ena = false;
 	repr->outer_vlan_info.stripping_ena = false;
 	repr->outer_vlan_info.tpid = RTE_ETHER_TYPE_VLAN;
@@ -488,3 +513,22 @@  ice_dcf_vf_repr_stop_all(struct ice_dcf_adapter *dcf_adapter)
 			vf_rep_eth_dev->data->dev_started = 0;
 	}
 }
+
+void
+ice_dcf_vf_repr_notify_all(struct ice_dcf_adapter *dcf_adapter, bool valid)
+{
+	uint16_t vf_id;
+	struct rte_eth_dev *vf_rep_eth_dev;
+
+	if (!dcf_adapter->repr_infos)
+		return;
+
+	for (vf_id = 0; vf_id < dcf_adapter->real_hw.num_vfs; vf_id++) {
+		vf_rep_eth_dev = dcf_adapter->repr_infos[vf_id].vf_rep_eth_dev;
+
+		if (!vf_rep_eth_dev)
+			continue;
+
+		ice_dcf_vf_repr_set_dcf_valid(vf_rep_eth_dev, valid);
+	}
+}