[2/2] net/octeontx2: support read clock API

Message ID 1564239400-2919-2-git-send-email-hkalra@marvell.com (mailing list archive)
State Superseded, archived
Headers
Series [1/2] net/octeontx2: fix ptp performance issue |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Harman Kalra July 27, 2019, 2:57 p.m. UTC
  This patch implements read clock api whose purpose is to return
raw clock ticks. Using this API real time ticks spent in
processing a packet can be known:
    <read_clock val at any time> - mbuf->timestamp

Signed-off-by: Harman Kalra <hkalra@marvell.com>
---
 drivers/common/octeontx2/otx2_mbox.h |  2 +
 drivers/net/octeontx2/otx2_ethdev.c  | 86 ++++++++++++++++++++++++++++
 drivers/net/octeontx2/otx2_ethdev.h  |  4 ++
 drivers/net/octeontx2/otx2_ptp.c     | 30 ++++++++++
 4 files changed, 122 insertions(+)
  

Comments

Jerin Jacob Kollanukkaran July 27, 2019, 3:48 p.m. UTC | #1
> -----Original Message-----
> From: Harman Kalra <hkalra@marvell.com>
> Sent: Saturday, July 27, 2019 8:27 PM
> To: Jerin Jacob Kollanukkaran <jerinj@marvell.com>; Nithin Kumar
> Dabilpuram <ndabilpuram@marvell.com>; Vamsi Krishna Attunuru
> <vattunuru@marvell.com>; Kiran Kumar Kokkilagadda
> <kirankumark@marvell.com>
> Cc: dev@dpdk.org; Harman Kalra <hkalra@marvell.com>
> Subject: [PATCH 2/2] net/octeontx2: support read clock API
> 
> This patch implements read clock api whose purpose is to return raw clock
> ticks. Using this API real time ticks spent in processing a packet can be known:
>     <read_clock val at any time> - mbuf->timestamp

Add more details.

> 
> Signed-off-by: Harman Kalra <hkalra@marvell.com>
> ---
>  drivers/common/octeontx2/otx2_mbox.h |  2 +
> drivers/net/octeontx2/otx2_ethdev.c  | 86
> ++++++++++++++++++++++++++++  drivers/net/octeontx2/otx2_ethdev.h
> |  4 ++
>  drivers/net/octeontx2/otx2_ptp.c     | 30 ++++++++++
>  4 files changed, 122 insertions(+)
> 
> diff --git a/drivers/common/octeontx2/otx2_mbox.h
> b/drivers/common/octeontx2/otx2_mbox.h
> index c0bb676b2..b2c59c86e 100644
> --- a/drivers/common/octeontx2/otx2_mbox.h
> +++ b/drivers/common/octeontx2/otx2_mbox.h
> @@ -1354,11 +1354,13 @@ struct ptp_req {
>  	struct mbox_msghdr hdr;
>  	uint8_t __otx2_io op;
>  	int64_t __otx2_io scaled_ppm;
> +	uint8_t __otx2_io is_pmu;
>  };
> 
>  struct ptp_rsp {
>  	struct mbox_msghdr hdr;
>  	uint64_t __otx2_io clk;
> +	uint64_t __otx2_io tsc;
>  };
> 
>  struct get_hw_cap_rsp {
> diff --git a/drivers/net/octeontx2/otx2_ethdev.c
> b/drivers/net/octeontx2/otx2_ethdev.c
> index 595c8003a..799e67480 100644
> --- a/drivers/net/octeontx2/otx2_ethdev.c
> +++ b/drivers/net/octeontx2/otx2_ethdev.c
> @@ -521,6 +521,17 @@ otx2_nix_rx_queue_setup(struct rte_eth_dev
> *eth_dev, uint16_t rq,
> 
>  	eth_dev->data->rx_queues[rq] = rxq;
>  	eth_dev->data->rx_queue_state[rq] =
> RTE_ETH_QUEUE_STATE_STOPPED;
> +
> +	/* Calculating delta and freq mult between PTP HI clock and rdtsc.
> +	 * These are needed for deriving PTP HI clock value from tsc counter.
> +	 */
> +	if ((dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) ||
> +	    otx2_ethdev_is_ptp_en(dev)) {
> +		rc = otx2_nix_raw_clock_rdtsc_conv(dev);

rdtsc is an x86 instruction. Change tmc wherever rdtsc used.

> +		if (rc)
> +			otx2_err("Failed to calculate delta and freq mult");
> +	}
> +
>  	return 0;

Make it as return rc to avoid return 0 incase above failure.



> 
>  free_rxq:
> @@ -1186,6 +1197,79 @@ nix_set_nop_rxtx_function(struct rte_eth_dev
> *eth_dev)
>  	rte_mb();
>  }
> 
> +static int
> +nix_read_raw_clock(struct otx2_eth_dev *dev, uint64_t *clock, uint64_t
> *tsc,
> +		   uint8_t is_pmu)
> +{
> +	struct otx2_mbox *mbox = dev->mbox;
> +	struct ptp_req *req;
> +	struct ptp_rsp *rsp;
> +	int rc = 0;

No need to init with 0

> +
> +	req = otx2_mbox_alloc_msg_ptp_op(mbox);
> +	req->op = PTP_OP_GET_CLOCK;
> +	req->is_pmu = is_pmu;
> +	rc = otx2_mbox_process_msg(mbox, (void *)&rsp);
> +	if (rc)
> +		goto done;

s/done/error

> +
> +	*clock = rsp->clk;
If (clock)

> +	*tsc = rsp->tsc;

If (tsc)

> +
> +done:
> +	return rc;
> +}
> +
> +/* This function calculates two parameters "clk_freq_mult" and
> + * "clk_delta" which is useful in deriving PTP HI clock from
> + * rdtsc value.
s/rdstc/tmc

> + */
> +int
> +otx2_nix_raw_clock_rdtsc_conv(struct otx2_eth_dev *dev) {

s/rdstc/tmc

> +	uint64_t ticks_base = 0, ticks = 0, t_freq = 0, tsc = 0;


Init with 0 if it absolute necessary


> +	uint8_t retval = 0, val;

# It should be int
# Change retval to rc for consistency

> +
> +	/* Calculating the frequency at which PTP HI clock is running */
> +	retval = nix_read_raw_clock(dev, &ticks_base, &tsc, false);
> +	if (retval != 0) {

If (rc)

> +		otx2_err("Failed to read the raw clock value: %d", retval);
> +		goto done;
> +	}
> +
> +	rte_delay_ms(100);
> +
> +	retval = nix_read_raw_clock(dev, &ticks, &tsc, false);
> +	if (retval != 0) {

If (rc)

> +		otx2_err("Failed to read the raw clock value: %d", retval);
> +		goto done;
> +	}
> +
> +	t_freq = (ticks - ticks_base) * 10;
> +
> +	/* Calculating the freq multiplier viz the ratio between the
> +	 * frequency at which PTP HI clock works and rdtsc clock runs
> +	 */
> +	dev->clk_freq_mult =
> +		(double)pow(10, floor(log10(t_freq))) / rte_get_timer_hz();
> +
> +	val = false;
> +#ifdef RTE_ARM_EAL_RDTSC_USE_PMU
> +	val = true;
> +#endif
> +	retval = nix_read_raw_clock(dev, &ticks, &tsc, val);
> +	if (retval != 0) {


If (rc)

> +		otx2_err("Failed to read the raw clock value: %d", retval);
> +		goto done;
> +	}
> +
> +	/* Calculating delta between PTP HI clock and rdtsc */
> +	dev->clk_delta = ((uint64_t)(ticks / dev->clk_freq_mult) - tsc);
> +
> +done:
> +	return retval;
> +}
> +
>  static int
>  otx2_nix_configure(struct rte_eth_dev *eth_dev)  { @@ -1363,6 +1447,7
> @@ otx2_nix_configure(struct rte_eth_dev *eth_dev)
>  	dev->configured = 1;
>  	dev->configured_nb_rx_qs = data->nb_rx_queues;
>  	dev->configured_nb_tx_qs = data->nb_tx_queues;
> +

Unrelated change

>  	return 0;
> 
>  cq_fini:
> @@ -1649,6 +1734,7 @@ static const struct eth_dev_ops otx2_eth_dev_ops
> = {
>  	.vlan_pvid_set		  = otx2_nix_vlan_pvid_set,
>  	.rx_queue_intr_enable	  = otx2_nix_rx_queue_intr_enable,
>  	.rx_queue_intr_disable	  = otx2_nix_rx_queue_intr_disable,
> +	.read_clock		  = otx2_nix_read_clock,
>  };
> 
>  static inline int
> diff --git a/drivers/net/octeontx2/otx2_ethdev.h
> b/drivers/net/octeontx2/otx2_ethdev.h
> index 863d4877f..a2bd0ffcf 100644
> --- a/drivers/net/octeontx2/otx2_ethdev.h
> +++ b/drivers/net/octeontx2/otx2_ethdev.h
> @@ -300,6 +300,8 @@ struct otx2_eth_dev {
>  	struct rte_timecounter  systime_tc;
>  	struct rte_timecounter  rx_tstamp_tc;
>  	struct rte_timecounter  tx_tstamp_tc;
> +	double clk_freq_mult;
> +	uint64_t clk_delta;
>  } __rte_cache_aligned;
> 
>  struct otx2_eth_txq {
> @@ -527,5 +529,7 @@ int otx2_nix_timesync_write_time(struct
> rte_eth_dev *eth_dev,  int otx2_nix_timesync_read_time(struct
> rte_eth_dev *eth_dev,
>  				struct timespec *ts);
>  int otx2_eth_dev_ptp_info_update(struct otx2_dev *dev, bool ptp_en);
> +int otx2_nix_read_clock(struct rte_eth_dev *eth_dev, uint64_t *time);
> +int otx2_nix_raw_clock_rdtsc_conv(struct otx2_eth_dev *dev);

s/rdtsc/tsc

> 
>  #endif /* __OTX2_ETHDEV_H__ */
> diff --git a/drivers/net/octeontx2/otx2_ptp.c
> b/drivers/net/octeontx2/otx2_ptp.c
> index 0186c629a..3f54cfeaf 100644
> --- a/drivers/net/octeontx2/otx2_ptp.c
> +++ b/drivers/net/octeontx2/otx2_ptp.c
> @@ -224,6 +224,13 @@ otx2_nix_timesync_adjust_time(struct rte_eth_dev
> *eth_dev, int64_t delta)
>  		rc = otx2_mbox_process_msg(mbox, (void *)&rsp);
>  		if (rc)
>  			return rc;
> +		/* Since the frequency of PTP comp register is tuned, delta
> and
> +		 * freq mult calculation for deriving PTP_HI from rdtsc should
> +		 * be done again.
> +		 */
> +		rc = otx2_nix_raw_clock_rdtsc_conv(dev);
s/rdtsc/tsc
> +		if (rc)
> +			otx2_err("Failed to calculate delta and freq mult");
>  	}
>  	dev->systime_tc.nsec += delta;
>  	dev->rx_tstamp_tc.nsec += delta;
> @@ -271,3 +278,26 @@ otx2_nix_timesync_read_time(struct rte_eth_dev
> *eth_dev, struct timespec *ts)
> 
>  	return 0;
>  }
> +
> +
> +int
> +otx2_nix_read_clock(struct rte_eth_dev *eth_dev, uint64_t *clock) {
> +	struct otx2_eth_dev *dev = otx2_eth_pmd_priv(eth_dev);
> +
> +	if (!otx2_ethdev_is_ptp_en(dev)) {
> +		otx2_err("PTP should be enabled.");
> +		return -EINVAL;
> +	}
> +
> +	/* This API returns the raw PTP HI clock value. Since LFs doesn't
> +	 * have direct access to PTP registers and it requires mbox msg
> +	 * to AF for this value. In fastpath reading this value for every
> +	 * packet (which involes mbox call) becomes very expensive, hence
> +	 * we should be able to derive PTP HI clock value from rdtsc by
> +	 * using freq_mult and clk_delta calculated during configure stage.
> +	 */
> +	*clock = (rte_rdtsc() + dev->clk_delta) * dev->clk_freq_mult;

s/rte_rdtsc /rte_get_tsc_cycles/g

> +
> +	return 0;
> +}
> --
> 2.18.0
  

Patch

diff --git a/drivers/common/octeontx2/otx2_mbox.h b/drivers/common/octeontx2/otx2_mbox.h
index c0bb676b2..b2c59c86e 100644
--- a/drivers/common/octeontx2/otx2_mbox.h
+++ b/drivers/common/octeontx2/otx2_mbox.h
@@ -1354,11 +1354,13 @@  struct ptp_req {
 	struct mbox_msghdr hdr;
 	uint8_t __otx2_io op;
 	int64_t __otx2_io scaled_ppm;
+	uint8_t __otx2_io is_pmu;
 };
 
 struct ptp_rsp {
 	struct mbox_msghdr hdr;
 	uint64_t __otx2_io clk;
+	uint64_t __otx2_io tsc;
 };
 
 struct get_hw_cap_rsp {
diff --git a/drivers/net/octeontx2/otx2_ethdev.c b/drivers/net/octeontx2/otx2_ethdev.c
index 595c8003a..799e67480 100644
--- a/drivers/net/octeontx2/otx2_ethdev.c
+++ b/drivers/net/octeontx2/otx2_ethdev.c
@@ -521,6 +521,17 @@  otx2_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t rq,
 
 	eth_dev->data->rx_queues[rq] = rxq;
 	eth_dev->data->rx_queue_state[rq] = RTE_ETH_QUEUE_STATE_STOPPED;
+
+	/* Calculating delta and freq mult between PTP HI clock and rdtsc.
+	 * These are needed for deriving PTP HI clock value from tsc counter.
+	 */
+	if ((dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) ||
+	    otx2_ethdev_is_ptp_en(dev)) {
+		rc = otx2_nix_raw_clock_rdtsc_conv(dev);
+		if (rc)
+			otx2_err("Failed to calculate delta and freq mult");
+	}
+
 	return 0;
 
 free_rxq:
@@ -1186,6 +1197,79 @@  nix_set_nop_rxtx_function(struct rte_eth_dev *eth_dev)
 	rte_mb();
 }
 
+static int
+nix_read_raw_clock(struct otx2_eth_dev *dev, uint64_t *clock, uint64_t *tsc,
+		   uint8_t is_pmu)
+{
+	struct otx2_mbox *mbox = dev->mbox;
+	struct ptp_req *req;
+	struct ptp_rsp *rsp;
+	int rc = 0;
+
+	req = otx2_mbox_alloc_msg_ptp_op(mbox);
+	req->op = PTP_OP_GET_CLOCK;
+	req->is_pmu = is_pmu;
+	rc = otx2_mbox_process_msg(mbox, (void *)&rsp);
+	if (rc)
+		goto done;
+
+	*clock = rsp->clk;
+	*tsc = rsp->tsc;
+
+done:
+	return rc;
+}
+
+/* This function calculates two parameters "clk_freq_mult" and
+ * "clk_delta" which is useful in deriving PTP HI clock from
+ * rdtsc value.
+ */
+int
+otx2_nix_raw_clock_rdtsc_conv(struct otx2_eth_dev *dev)
+{
+	uint64_t ticks_base = 0, ticks = 0, t_freq = 0, tsc = 0;
+	uint8_t retval = 0, val;
+
+	/* Calculating the frequency at which PTP HI clock is running */
+	retval = nix_read_raw_clock(dev, &ticks_base, &tsc, false);
+	if (retval != 0) {
+		otx2_err("Failed to read the raw clock value: %d", retval);
+		goto done;
+	}
+
+	rte_delay_ms(100);
+
+	retval = nix_read_raw_clock(dev, &ticks, &tsc, false);
+	if (retval != 0) {
+		otx2_err("Failed to read the raw clock value: %d", retval);
+		goto done;
+	}
+
+	t_freq = (ticks - ticks_base) * 10;
+
+	/* Calculating the freq multiplier viz the ratio between the
+	 * frequency at which PTP HI clock works and rdtsc clock runs
+	 */
+	dev->clk_freq_mult =
+		(double)pow(10, floor(log10(t_freq))) / rte_get_timer_hz();
+
+	val = false;
+#ifdef RTE_ARM_EAL_RDTSC_USE_PMU
+	val = true;
+#endif
+	retval = nix_read_raw_clock(dev, &ticks, &tsc, val);
+	if (retval != 0) {
+		otx2_err("Failed to read the raw clock value: %d", retval);
+		goto done;
+	}
+
+	/* Calculating delta between PTP HI clock and rdtsc */
+	dev->clk_delta = ((uint64_t)(ticks / dev->clk_freq_mult) - tsc);
+
+done:
+	return retval;
+}
+
 static int
 otx2_nix_configure(struct rte_eth_dev *eth_dev)
 {
@@ -1363,6 +1447,7 @@  otx2_nix_configure(struct rte_eth_dev *eth_dev)
 	dev->configured = 1;
 	dev->configured_nb_rx_qs = data->nb_rx_queues;
 	dev->configured_nb_tx_qs = data->nb_tx_queues;
+
 	return 0;
 
 cq_fini:
@@ -1649,6 +1734,7 @@  static const struct eth_dev_ops otx2_eth_dev_ops = {
 	.vlan_pvid_set		  = otx2_nix_vlan_pvid_set,
 	.rx_queue_intr_enable	  = otx2_nix_rx_queue_intr_enable,
 	.rx_queue_intr_disable	  = otx2_nix_rx_queue_intr_disable,
+	.read_clock		  = otx2_nix_read_clock,
 };
 
 static inline int
diff --git a/drivers/net/octeontx2/otx2_ethdev.h b/drivers/net/octeontx2/otx2_ethdev.h
index 863d4877f..a2bd0ffcf 100644
--- a/drivers/net/octeontx2/otx2_ethdev.h
+++ b/drivers/net/octeontx2/otx2_ethdev.h
@@ -300,6 +300,8 @@  struct otx2_eth_dev {
 	struct rte_timecounter  systime_tc;
 	struct rte_timecounter  rx_tstamp_tc;
 	struct rte_timecounter  tx_tstamp_tc;
+	double clk_freq_mult;
+	uint64_t clk_delta;
 } __rte_cache_aligned;
 
 struct otx2_eth_txq {
@@ -527,5 +529,7 @@  int otx2_nix_timesync_write_time(struct rte_eth_dev *eth_dev,
 int otx2_nix_timesync_read_time(struct rte_eth_dev *eth_dev,
 				struct timespec *ts);
 int otx2_eth_dev_ptp_info_update(struct otx2_dev *dev, bool ptp_en);
+int otx2_nix_read_clock(struct rte_eth_dev *eth_dev, uint64_t *time);
+int otx2_nix_raw_clock_rdtsc_conv(struct otx2_eth_dev *dev);
 
 #endif /* __OTX2_ETHDEV_H__ */
diff --git a/drivers/net/octeontx2/otx2_ptp.c b/drivers/net/octeontx2/otx2_ptp.c
index 0186c629a..3f54cfeaf 100644
--- a/drivers/net/octeontx2/otx2_ptp.c
+++ b/drivers/net/octeontx2/otx2_ptp.c
@@ -224,6 +224,13 @@  otx2_nix_timesync_adjust_time(struct rte_eth_dev *eth_dev, int64_t delta)
 		rc = otx2_mbox_process_msg(mbox, (void *)&rsp);
 		if (rc)
 			return rc;
+		/* Since the frequency of PTP comp register is tuned, delta and
+		 * freq mult calculation for deriving PTP_HI from rdtsc should
+		 * be done again.
+		 */
+		rc = otx2_nix_raw_clock_rdtsc_conv(dev);
+		if (rc)
+			otx2_err("Failed to calculate delta and freq mult");
 	}
 	dev->systime_tc.nsec += delta;
 	dev->rx_tstamp_tc.nsec += delta;
@@ -271,3 +278,26 @@  otx2_nix_timesync_read_time(struct rte_eth_dev *eth_dev, struct timespec *ts)
 
 	return 0;
 }
+
+
+int
+otx2_nix_read_clock(struct rte_eth_dev *eth_dev, uint64_t *clock)
+{
+	struct otx2_eth_dev *dev = otx2_eth_pmd_priv(eth_dev);
+
+	if (!otx2_ethdev_is_ptp_en(dev)) {
+		otx2_err("PTP should be enabled.");
+		return -EINVAL;
+	}
+
+	/* This API returns the raw PTP HI clock value. Since LFs doesn't
+	 * have direct access to PTP registers and it requires mbox msg
+	 * to AF for this value. In fastpath reading this value for every
+	 * packet (which involes mbox call) becomes very expensive, hence
+	 * we should be able to derive PTP HI clock value from rdtsc by
+	 * using freq_mult and clk_delta calculated during configure stage.
+	 */
+	*clock = (rte_rdtsc() + dev->clk_delta) * dev->clk_freq_mult;
+
+	return 0;
+}