[v2,14/14] net/idpf: add support for timestamp offload

Message ID 20220905105828.3190335-15-junfeng.guo@intel.com (mailing list archive)
State Changes Requested, archived
Delegated to: Andrew Rybchenko
Headers
Series add support for idpf PMD in DPDK |

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/Intel-compilation fail Compilation issues
ci/iol-x86_64-unit-testing fail Testing issues
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-aarch64-unit-testing fail Testing issues
ci/iol-aarch64-compile-testing fail Testing issues
ci/github-robot: build fail github build: failed
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-intel-Functional success Functional Testing PASS

Commit Message

Junfeng Guo Sept. 5, 2022, 10:58 a.m. UTC
  Add support for timestamp offload.

Signed-off-by: Wenjing Qiao <wenjing.qiao@intel.com>
Signed-off-by: Junfeng Guo <junfeng.guo@intel.com>
---
 drivers/net/idpf/idpf_ethdev.h |  3 ++
 drivers/net/idpf/idpf_rxtx.c   | 79 ++++++++++++++++++++++++++++++
 drivers/net/idpf/idpf_rxtx.h   | 89 +++++++++++++++++++++++++++++++++-
 3 files changed, 170 insertions(+), 1 deletion(-)
  

Comments

Andrew Rybchenko Oct. 3, 2022, 2:22 p.m. UTC | #1
On 9/5/22 13:58, Junfeng Guo wrote:
> Add support for timestamp offload.
> 
> Signed-off-by: Wenjing Qiao <wenjing.qiao@intel.com>
> Signed-off-by: Junfeng Guo <junfeng.guo@intel.com>

[snip]

> +/* Helper function to convert a 32b nanoseconds timestamp to 64b. */
> +static inline uint64_t
> +idpf_tstamp_convert_32b_64b(struct iecm_hw *hw, struct idpf_adapter *ad,
> +			    uint32_t flag, uint32_t in_timestamp)
> +{
> +/* TODO: timestamp for ACC */
> +#ifdef RTE_ARCH_ARM64
> +	return 0;
> +#endif /* RTE_ARCH_ARM64 */
> +
> +#ifdef RTE_ARCH_X86_64
> +	const uint64_t mask = 0xFFFFFFFF;
> +	uint32_t hi, lo, lo2, delta;
> +	uint64_t ns;
> +
> +	if (flag) {
> +		IECM_WRITE_REG(hw, GLTSYN_CMD_SYNC_0_0, PF_GLTSYN_CMD_SYNC_SHTIME_EN_M);
> +		IECM_WRITE_REG(hw, GLTSYN_CMD_SYNC_0_0, PF_GLTSYN_CMD_SYNC_EXEC_CMD_M |
> +			       PF_GLTSYN_CMD_SYNC_SHTIME_EN_M);
> +		lo = IECM_READ_REG(hw, PF_GLTSYN_SHTIME_L_0);
> +		hi = IECM_READ_REG(hw, PF_GLTSYN_SHTIME_H_0);
> +		/*
> +		 * On typical system, the delta between lo and lo2 is ~1000ns,
> +		 * so 10000 seems a large-enough but not overly-big guard band.
> +		 */
> +		if (lo > (UINT32_MAX - IDPF_TIMESYNC_REG_WRAP_GUARD_BAND))
> +			lo2 = IECM_READ_REG(hw, PF_GLTSYN_SHTIME_L_0);
> +		else
> +			lo2 = lo;
> +
> +		if (lo2 < lo) {
> +			lo = IECM_READ_REG(hw, PF_GLTSYN_SHTIME_L_0);
> +			hi = IECM_READ_REG(hw, PF_GLTSYN_SHTIME_H_0);
> +		}
> +
> +		ad->time_hw = ((uint64_t)hi << 32) | lo;
> +	}
> +
> +	delta = (in_timestamp - (uint32_t)(ad->time_hw & mask));
> +	if (delta > (mask / 2)) {
> +		delta = ((uint32_t)(ad->time_hw & mask) - in_timestamp);
> +		ns = ad->time_hw - delta;
> +	} else {
> +		ns = ad->time_hw + delta;
> +	}
> +
> +	return ns;
> +#endif /* RTE_ARCH_X86_64 */

Conditional compilation is rather strange above.
Will it break build on some architectures?
Non-x86-64 and non-ARM64.

> +}
>   
> +#endif /* _IDPF_RXTX_H_ */
  
Wenjun Wu Oct. 10, 2022, 7:56 a.m. UTC | #2
> -----Original Message-----
> From: Junfeng Guo <junfeng.guo@intel.com>
> Sent: Monday, September 5, 2022 6:58 PM
> To: Zhang, Qi Z <qi.z.zhang@intel.com>; Wu, Jingjing
> <jingjing.wu@intel.com>; Xing, Beilei <beilei.xing@intel.com>
> Cc: dev@dpdk.org; Wang, Xiao W <xiao.w.wang@intel.com>; Guo, Junfeng
> <junfeng.guo@intel.com>; Qiao, Wenjing <wenjing.qiao@intel.com>
> Subject: [PATCH v2 14/14] net/idpf: add support for timestamp offload
> 
> Add support for timestamp offload.
> 
> Signed-off-by: Wenjing Qiao <wenjing.qiao@intel.com>
> Signed-off-by: Junfeng Guo <junfeng.guo@intel.com>
> ---
>  drivers/net/idpf/idpf_ethdev.h |  3 ++
>  drivers/net/idpf/idpf_rxtx.c   | 79 ++++++++++++++++++++++++++++++
>  drivers/net/idpf/idpf_rxtx.h   | 89 +++++++++++++++++++++++++++++++++-
>  3 files changed, 170 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/idpf/idpf_ethdev.h b/drivers/net/idpf/idpf_ethdev.h
> index a32d5758ac..968e0e3cbf 100644
> --- a/drivers/net/idpf/idpf_ethdev.h
> +++ b/drivers/net/idpf/idpf_ethdev.h
> @@ -184,6 +184,9 @@ struct idpf_adapter {
>  	bool tx_vec_allowed;
>  	bool rx_use_avx512;
>  	bool tx_use_avx512;
> +
> +	/* For PTP */
> +	uint64_t time_hw;
>  };
> 
>  TAILQ_HEAD(idpf_adapter_list, idpf_adapter); diff --git
> a/drivers/net/idpf/idpf_rxtx.c b/drivers/net/idpf/idpf_rxtx.c index
> e31d202646..b0037eca08 100644
> --- a/drivers/net/idpf/idpf_rxtx.c
> +++ b/drivers/net/idpf/idpf_rxtx.c
> @@ -10,6 +10,8 @@
>  #include "idpf_rxtx.h"
>  #include "idpf_rxtx_vec_common.h"
> 
> +static int idpf_timestamp_dynfield_offset = -1;
> +
>  const uint32_t *
>  idpf_dev_supported_ptypes_get(struct rte_eth_dev *dev __rte_unused)
> { @@ -965,6 +967,24 @@ idpf_tx_queue_setup(struct rte_eth_dev *dev,
> uint16_t queue_idx,
>  						 socket_id, tx_conf);
>  }
> 
> +static int
> +idpf_register_ts_mbuf(struct idpf_rx_queue *rxq) {
> +	int err;
> +	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
> +		/* Register mbuf field and flag for Rx timestamp */
> +		err = rte_mbuf_dyn_rx_timestamp_register(
> +				&idpf_timestamp_dynfield_offset,
> +				&idpf_timestamp_dynflag);
> +		if (err) {
> +			PMD_DRV_LOG(ERR,
> +				"Cannot register mbuf field/flag for
> timestamp");
> +			return -EINVAL;
> +		}
> +	}
> +	return 0;
> +}
> +
>  static int
>  idpf_alloc_single_rxq_mbufs(struct idpf_rx_queue *rxq)  { @@ -992,6
> +1012,10 @@ idpf_alloc_single_rxq_mbufs(struct idpf_rx_queue *rxq)
>  		rxd = &((volatile struct virtchnl2_singleq_rx_buf_desc *)(rxq-
> >rx_ring))[i];
>  		rxd->pkt_addr = dma_addr;
>  		rxd->hdr_addr = 0;
> +#ifndef RTE_LIBRTE_IDPF_16BYTE_RX_DESC
> +		rxd->rsvd1 = 0;
> +		rxd->rsvd2 = 0;
> +#endif

It seems that IDPF does not add any configuration related to descriptor length.
The descriptor length is always the default 32B, so the conditional compilation
is not necessary.

> 
>  		rxq->sw_ring[i] = mbuf;
>  	}
> @@ -1057,6 +1081,13 @@ idpf_rx_queue_init(struct rte_eth_dev *dev,
> uint16_t rx_queue_id)
>  		return -EINVAL;
>  	}
> 
> +	err = idpf_register_ts_mbuf(rxq);
> +	if (err) {
> +		PMD_DRV_LOG(ERR, "fail to regidter timestamp mbuf %u",
> +					rx_queue_id);
> +		return -EIO;
> +	}
> +
>  	if (!rxq->bufq1) {
>  		/* Single queue */
>  		err = idpf_alloc_single_rxq_mbufs(rxq); @@ -1441,6
> +1472,12 @@ idpf_splitq_recv_pkts(void *rx_queue, struct rte_mbuf
> **rx_pkts,
>  	nb_rx = 0;
>  	rxq = (struct idpf_rx_queue *)rx_queue;
> 
> +#ifndef RTE_LIBRTE_IDPF_16BYTE_RX_DESC
> +	uint64_t ts_ns;
> +	struct iecm_hw *hw = &rxq->adapter->hw;
> +	struct idpf_adapter *ad = rxq->adapter; #endif
> +
>  	if (unlikely(!rxq) || unlikely(!rxq->q_started))
>  		return nb_rx;
> 

Ditto.

> @@ -1451,6 +1488,11 @@ idpf_splitq_recv_pkts(void *rx_queue, struct
> rte_mbuf **rx_pkts,
>  	       (volatile struct virtchnl2_rx_flex_desc_adv_nic_3 *)rxq->rx_ring;
>  	ptype_tbl = rxq->adapter->ptype_tbl;
> 
> +#ifndef RTE_LIBRTE_IDPF_16BYTE_RX_DESC
> +	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
> +		rxq->hw_register_set = 1;
> +#endif
> +

Ditto.

>  	while (nb_rx < nb_pkts) {
>  		rx_desc = &rx_desc_ring[rx_id];
> 
> @@ -1507,6 +1549,19 @@ idpf_splitq_recv_pkts(void *rx_queue, struct
> rte_mbuf **rx_pkts,
>  		status_err0_qw1 = rx_desc->status_err0_qw1;
>  		pkt_flags = idpf_splitq_rx_csum_offload(status_err0_qw1);
>  		pkt_flags |= idpf_splitq_rx_rss_offload(rxm, rx_desc);
> +#ifndef RTE_LIBRTE_IDPF_16BYTE_RX_DESC
> +		if (idpf_timestamp_dynflag > 0) {
> +			/* timestamp */
> +			ts_ns = idpf_tstamp_convert_32b_64b(hw, ad,
> +							    rxq-
> >hw_register_set,
> +
> rte_le_to_cpu_32(rx_desc->ts_high));
> +			rxq->hw_register_set = 0;
> +			*RTE_MBUF_DYNFIELD(rxm,
> +					   idpf_timestamp_dynfield_offset,
> +					   rte_mbuf_timestamp_t *) = ts_ns;
> +			rxm->ol_flags |= idpf_timestamp_dynflag;
> +		}
> +#endif

Ditto.

>  		rxm->ol_flags |= pkt_flags;
> 
>  		rx_pkts[nb_rx++] = rxm;
> @@ -1778,6 +1833,10 @@ idpf_singleq_recv_pkts(void *rx_queue, struct
> rte_mbuf **rx_pkts,
>  	nb_hold = 0;
>  	rxq = rx_queue;
> 
> +#ifndef RTE_LIBRTE_IDPF_16BYTE_RX_DESC
> +	uint64_t ts_ns;
> +#endif

Ditto.

> +
>  	if (unlikely(!rxq) || unlikely(!rxq->q_started))
>  		return nb_rx;
> 
> @@ -1785,6 +1844,13 @@ idpf_singleq_recv_pkts(void *rx_queue, struct
> rte_mbuf **rx_pkts,
>  	rx_ring = rxq->rx_ring;
>  	ptype_tbl = rxq->adapter->ptype_tbl;
> 
> +#ifndef RTE_LIBRTE_IDPF_16BYTE_RX_DESC
> +	struct iecm_hw *hw = &rxq->adapter->hw;
> +	struct idpf_adapter *ad = rxq->adapter;
> +	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
> +		rxq->hw_register_set = 1;
> +#endif

Ditto.

> +
>  	while (nb_rx < nb_pkts) {
>  		rxdp = &rx_ring[rx_id];
>  		rx_status0 = rte_le_to_cpu_16(rxdp-
> >flex_nic_wb.status_error0);
> @@ -1841,6 +1907,19 @@ idpf_singleq_recv_pkts(void *rx_queue, struct
> rte_mbuf **rx_pkts,
>  		rxm->packet_type =
> 
> 	ptype_tbl[(uint8_t)(rte_cpu_to_le_16(rxd.flex_nic_wb.ptype_flex_fla
> gs0) &
>  				VIRTCHNL2_RX_FLEX_DESC_PTYPE_M)];
> +#ifndef RTE_LIBRTE_IDPF_16BYTE_RX_DESC
> +		if (idpf_timestamp_dynflag > 0) {
> +			/* timestamp */
> +			ts_ns = idpf_tstamp_convert_32b_64b(hw, ad,
> +							    rxq-
> >hw_register_set,
> +
> rte_le_to_cpu_32(rxdp->flex_nic_wb.flex_ts.ts_high));
> +			rxq->hw_register_set = 0;
> +			*RTE_MBUF_DYNFIELD(rxm,
> +					   idpf_timestamp_dynfield_offset,
> +					   rte_mbuf_timestamp_t *) = ts_ns;
> +			rxm->ol_flags |= idpf_timestamp_dynflag;
> +		}
> +#endif

Ditto.

>  		rx_pkts[nb_rx++] = rxm;
>  	}
>  	rxq->rx_tail = rx_id;
> diff --git a/drivers/net/idpf/idpf_rxtx.h b/drivers/net/idpf/idpf_rxtx.h index
> decd0a98c2..6fcb441143 100644
> --- a/drivers/net/idpf/idpf_rxtx.h
> +++ b/drivers/net/idpf/idpf_rxtx.h
> @@ -15,6 +15,41 @@
>  #include "base/virtchnl2_lan_desc.h"
>  #include "idpf_ethdev.h"
> 
> +/* MTS */
> +#define GLTSYN_CMD_SYNC_0_0	(PF_TIMESYNC_BASE + 0x0)
> +#define PF_GLTSYN_SHTIME_0_0	(PF_TIMESYNC_BASE + 0x4)
> +#define PF_GLTSYN_SHTIME_L_0	(PF_TIMESYNC_BASE + 0x8)
> +#define PF_GLTSYN_SHTIME_H_0	(PF_TIMESYNC_BASE + 0xC)
> +#define GLTSYN_ART_L_0		(PF_TIMESYNC_BASE + 0x10)
> +#define GLTSYN_ART_H_0		(PF_TIMESYNC_BASE + 0x14)
> +#define PF_GLTSYN_SHTIME_0_1	(PF_TIMESYNC_BASE + 0x24)
> +#define PF_GLTSYN_SHTIME_L_1	(PF_TIMESYNC_BASE + 0x28)
> +#define PF_GLTSYN_SHTIME_H_1	(PF_TIMESYNC_BASE + 0x2C)
> +#define PF_GLTSYN_SHTIME_0_2	(PF_TIMESYNC_BASE + 0x44)
> +#define PF_GLTSYN_SHTIME_L_2	(PF_TIMESYNC_BASE + 0x48)
> +#define PF_GLTSYN_SHTIME_H_2	(PF_TIMESYNC_BASE + 0x4C)
> +#define PF_GLTSYN_SHTIME_0_3	(PF_TIMESYNC_BASE + 0x64)
> +#define PF_GLTSYN_SHTIME_L_3	(PF_TIMESYNC_BASE + 0x68)
> +#define PF_GLTSYN_SHTIME_H_3	(PF_TIMESYNC_BASE + 0x6C)
> +
> +#define PF_TIMESYNC_BAR4_BASE	0x0E400000
> +#define GLTSYN_ENA		(PF_TIMESYNC_BAR4_BASE + 0x90)
> +#define GLTSYN_CMD		(PF_TIMESYNC_BAR4_BASE + 0x94)
> +#define GLTSYC_TIME_L		(PF_TIMESYNC_BAR4_BASE + 0x104)
> +#define GLTSYC_TIME_H		(PF_TIMESYNC_BAR4_BASE + 0x108)
> +
> +#define GLTSYN_CMD_SYNC_0_4	(PF_TIMESYNC_BAR4_BASE + 0x110)
> +#define PF_GLTSYN_SHTIME_L_4	(PF_TIMESYNC_BAR4_BASE + 0x118)
> +#define PF_GLTSYN_SHTIME_H_4	(PF_TIMESYNC_BAR4_BASE + 0x11C)
> +#define GLTSYN_INCVAL_L		(PF_TIMESYNC_BAR4_BASE + 0x150)
> +#define GLTSYN_INCVAL_H		(PF_TIMESYNC_BAR4_BASE + 0x154)
> +#define GLTSYN_SHADJ_L		(PF_TIMESYNC_BAR4_BASE + 0x158)
> +#define GLTSYN_SHADJ_H		(PF_TIMESYNC_BAR4_BASE + 0x15C)
> +
> +#define GLTSYN_CMD_SYNC_0_5	(PF_TIMESYNC_BAR4_BASE + 0x130)
> +#define PF_GLTSYN_SHTIME_L_5	(PF_TIMESYNC_BAR4_BASE + 0x138)
> +#define PF_GLTSYN_SHTIME_H_5	(PF_TIMESYNC_BAR4_BASE + 0x13C)
> +
>  /* In QLEN must be whole number of 32 descriptors. */
>  #define IDPF_ALIGN_RING_DESC	32
>  #define IDPF_MIN_RING_DESC	32
> @@ -66,6 +101,8 @@
>  	(sizeof(struct virtchnl2_ptype) + \
>  	(((p)->proto_id_count ? ((p)->proto_id_count - 1) : 0) * sizeof((p)-
> >proto_id[0])))
> 
> +extern uint64_t idpf_timestamp_dynflag;
> +
>  struct idpf_rx_queue {
>  	struct idpf_adapter *adapter;	/* the adapter this queue belongs to
> */
>  	struct rte_mempool *mp;		/* mbuf pool to populate Rx
> ring */
> @@ -231,5 +268,55 @@ void idpf_set_tx_function(struct rte_eth_dev *dev);
> 
>  const uint32_t *idpf_dev_supported_ptypes_get(struct rte_eth_dev *dev);
> 
> -#endif /* _IDPF_RXTX_H_ */
> +#define IDPF_TIMESYNC_REG_WRAP_GUARD_BAND  10000
> +/* Helper function to convert a 32b nanoseconds timestamp to 64b. */
> +static inline uint64_t idpf_tstamp_convert_32b_64b(struct iecm_hw *hw,
> +struct idpf_adapter *ad,
> +			    uint32_t flag, uint32_t in_timestamp) {
> +/* TODO: timestamp for ACC */
> +#ifdef RTE_ARCH_ARM64
> +	return 0;
> +#endif /* RTE_ARCH_ARM64 */
> +
> +#ifdef RTE_ARCH_X86_64
> +	const uint64_t mask = 0xFFFFFFFF;
> +	uint32_t hi, lo, lo2, delta;
> +	uint64_t ns;
> +
> +	if (flag) {
> +		IECM_WRITE_REG(hw, GLTSYN_CMD_SYNC_0_0,
> PF_GLTSYN_CMD_SYNC_SHTIME_EN_M);
> +		IECM_WRITE_REG(hw, GLTSYN_CMD_SYNC_0_0,
> PF_GLTSYN_CMD_SYNC_EXEC_CMD_M |
> +			       PF_GLTSYN_CMD_SYNC_SHTIME_EN_M);
> +		lo = IECM_READ_REG(hw, PF_GLTSYN_SHTIME_L_0);
> +		hi = IECM_READ_REG(hw, PF_GLTSYN_SHTIME_H_0);
> +		/*
> +		 * On typical system, the delta between lo and lo2 is ~1000ns,
> +		 * so 10000 seems a large-enough but not overly-big guard
> band.
> +		 */
> +		if (lo > (UINT32_MAX -
> IDPF_TIMESYNC_REG_WRAP_GUARD_BAND))
> +			lo2 = IECM_READ_REG(hw, PF_GLTSYN_SHTIME_L_0);
> +		else
> +			lo2 = lo;
> +
> +		if (lo2 < lo) {
> +			lo = IECM_READ_REG(hw, PF_GLTSYN_SHTIME_L_0);
> +			hi = IECM_READ_REG(hw, PF_GLTSYN_SHTIME_H_0);
> +		}
> +
> +		ad->time_hw = ((uint64_t)hi << 32) | lo;
> +	}
> +
> +	delta = (in_timestamp - (uint32_t)(ad->time_hw & mask));
> +	if (delta > (mask / 2)) {
> +		delta = ((uint32_t)(ad->time_hw & mask) - in_timestamp);
> +		ns = ad->time_hw - delta;
> +	} else {
> +		ns = ad->time_hw + delta;
> +	}
> +
> +	return ns;
> +#endif /* RTE_ARCH_X86_64 */
> +}
> 
> +#endif /* _IDPF_RXTX_H_ */
> --
> 2.25.1
  
Junfeng Guo Oct. 14, 2022, 9:19 a.m. UTC | #3
> -----Original Message-----
> From: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>
> Sent: Monday, October 3, 2022 22:22
> To: Guo, Junfeng <junfeng.guo@intel.com>; Zhang, Qi Z
> <qi.z.zhang@intel.com>; Wu, Jingjing <jingjing.wu@intel.com>; Xing,
> Beilei <beilei.xing@intel.com>
> Cc: dev@dpdk.org; Wang, Xiao W <xiao.w.wang@intel.com>; Qiao,
> Wenjing <wenjing.qiao@intel.com>
> Subject: Re: [PATCH v2 14/14] net/idpf: add support for timestamp
> offload
> 
> On 9/5/22 13:58, Junfeng Guo wrote:
> > Add support for timestamp offload.
> >
> > Signed-off-by: Wenjing Qiao <wenjing.qiao@intel.com>
> > Signed-off-by: Junfeng Guo <junfeng.guo@intel.com>
> 
> [snip]
> 
> > +/* Helper function to convert a 32b nanoseconds timestamp to 64b. */
> > +static inline uint64_t
> > +idpf_tstamp_convert_32b_64b(struct iecm_hw *hw, struct
> idpf_adapter *ad,
> > +			    uint32_t flag, uint32_t in_timestamp)
> > +{
> > +/* TODO: timestamp for ACC */
> > +#ifdef RTE_ARCH_ARM64
> > +	return 0;
> > +#endif /* RTE_ARCH_ARM64 */
> > +
> > +#ifdef RTE_ARCH_X86_64
> > +	const uint64_t mask = 0xFFFFFFFF;
> > +	uint32_t hi, lo, lo2, delta;
> > +	uint64_t ns;
> > +
> > +	if (flag) {
> > +		IECM_WRITE_REG(hw, GLTSYN_CMD_SYNC_0_0,
> PF_GLTSYN_CMD_SYNC_SHTIME_EN_M);
> > +		IECM_WRITE_REG(hw, GLTSYN_CMD_SYNC_0_0,
> PF_GLTSYN_CMD_SYNC_EXEC_CMD_M |
> > +			       PF_GLTSYN_CMD_SYNC_SHTIME_EN_M);
> > +		lo = IECM_READ_REG(hw, PF_GLTSYN_SHTIME_L_0);
> > +		hi = IECM_READ_REG(hw, PF_GLTSYN_SHTIME_H_0);
> > +		/*
> > +		 * On typical system, the delta between lo and lo2 is
> ~1000ns,
> > +		 * so 10000 seems a large-enough but not overly-big
> guard band.
> > +		 */
> > +		if (lo > (UINT32_MAX -
> IDPF_TIMESYNC_REG_WRAP_GUARD_BAND))
> > +			lo2 = IECM_READ_REG(hw,
> PF_GLTSYN_SHTIME_L_0);
> > +		else
> > +			lo2 = lo;
> > +
> > +		if (lo2 < lo) {
> > +			lo = IECM_READ_REG(hw,
> PF_GLTSYN_SHTIME_L_0);
> > +			hi = IECM_READ_REG(hw,
> PF_GLTSYN_SHTIME_H_0);
> > +		}
> > +
> > +		ad->time_hw = ((uint64_t)hi << 32) | lo;
> > +	}
> > +
> > +	delta = (in_timestamp - (uint32_t)(ad->time_hw & mask));
> > +	if (delta > (mask / 2)) {
> > +		delta = ((uint32_t)(ad->time_hw & mask) - in_timestamp);
> > +		ns = ad->time_hw - delta;
> > +	} else {
> > +		ns = ad->time_hw + delta;
> > +	}
> > +
> > +	return ns;
> > +#endif /* RTE_ARCH_X86_64 */
> 
> Conditional compilation is rather strange above.
> Will it break build on some architectures?
> Non-x86-64 and non-ARM64.

Actually the timestamp feature currently is only supported on x86_64
architecture. On the rest conditions will just return 0. We will update
this part in the coming version. Thanks!

> 
> > +}
> >
> > +#endif /* _IDPF_RXTX_H_ */
  

Patch

diff --git a/drivers/net/idpf/idpf_ethdev.h b/drivers/net/idpf/idpf_ethdev.h
index a32d5758ac..968e0e3cbf 100644
--- a/drivers/net/idpf/idpf_ethdev.h
+++ b/drivers/net/idpf/idpf_ethdev.h
@@ -184,6 +184,9 @@  struct idpf_adapter {
 	bool tx_vec_allowed;
 	bool rx_use_avx512;
 	bool tx_use_avx512;
+
+	/* For PTP */
+	uint64_t time_hw;
 };
 
 TAILQ_HEAD(idpf_adapter_list, idpf_adapter);
diff --git a/drivers/net/idpf/idpf_rxtx.c b/drivers/net/idpf/idpf_rxtx.c
index e31d202646..b0037eca08 100644
--- a/drivers/net/idpf/idpf_rxtx.c
+++ b/drivers/net/idpf/idpf_rxtx.c
@@ -10,6 +10,8 @@ 
 #include "idpf_rxtx.h"
 #include "idpf_rxtx_vec_common.h"
 
+static int idpf_timestamp_dynfield_offset = -1;
+
 const uint32_t *
 idpf_dev_supported_ptypes_get(struct rte_eth_dev *dev __rte_unused)
 {
@@ -965,6 +967,24 @@  idpf_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 						 socket_id, tx_conf);
 }
 
+static int
+idpf_register_ts_mbuf(struct idpf_rx_queue *rxq)
+{
+	int err;
+	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
+		/* Register mbuf field and flag for Rx timestamp */
+		err = rte_mbuf_dyn_rx_timestamp_register(
+				&idpf_timestamp_dynfield_offset,
+				&idpf_timestamp_dynflag);
+		if (err) {
+			PMD_DRV_LOG(ERR,
+				"Cannot register mbuf field/flag for timestamp");
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
 static int
 idpf_alloc_single_rxq_mbufs(struct idpf_rx_queue *rxq)
 {
@@ -992,6 +1012,10 @@  idpf_alloc_single_rxq_mbufs(struct idpf_rx_queue *rxq)
 		rxd = &((volatile struct virtchnl2_singleq_rx_buf_desc *)(rxq->rx_ring))[i];
 		rxd->pkt_addr = dma_addr;
 		rxd->hdr_addr = 0;
+#ifndef RTE_LIBRTE_IDPF_16BYTE_RX_DESC
+		rxd->rsvd1 = 0;
+		rxd->rsvd2 = 0;
+#endif
 
 		rxq->sw_ring[i] = mbuf;
 	}
@@ -1057,6 +1081,13 @@  idpf_rx_queue_init(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 		return -EINVAL;
 	}
 
+	err = idpf_register_ts_mbuf(rxq);
+	if (err) {
+		PMD_DRV_LOG(ERR, "fail to regidter timestamp mbuf %u",
+					rx_queue_id);
+		return -EIO;
+	}
+
 	if (!rxq->bufq1) {
 		/* Single queue */
 		err = idpf_alloc_single_rxq_mbufs(rxq);
@@ -1441,6 +1472,12 @@  idpf_splitq_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 	nb_rx = 0;
 	rxq = (struct idpf_rx_queue *)rx_queue;
 
+#ifndef RTE_LIBRTE_IDPF_16BYTE_RX_DESC
+	uint64_t ts_ns;
+	struct iecm_hw *hw = &rxq->adapter->hw;
+	struct idpf_adapter *ad = rxq->adapter;
+#endif
+
 	if (unlikely(!rxq) || unlikely(!rxq->q_started))
 		return nb_rx;
 
@@ -1451,6 +1488,11 @@  idpf_splitq_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 	       (volatile struct virtchnl2_rx_flex_desc_adv_nic_3 *)rxq->rx_ring;
 	ptype_tbl = rxq->adapter->ptype_tbl;
 
+#ifndef RTE_LIBRTE_IDPF_16BYTE_RX_DESC
+	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
+		rxq->hw_register_set = 1;
+#endif
+
 	while (nb_rx < nb_pkts) {
 		rx_desc = &rx_desc_ring[rx_id];
 
@@ -1507,6 +1549,19 @@  idpf_splitq_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 		status_err0_qw1 = rx_desc->status_err0_qw1;
 		pkt_flags = idpf_splitq_rx_csum_offload(status_err0_qw1);
 		pkt_flags |= idpf_splitq_rx_rss_offload(rxm, rx_desc);
+#ifndef RTE_LIBRTE_IDPF_16BYTE_RX_DESC
+		if (idpf_timestamp_dynflag > 0) {
+			/* timestamp */
+			ts_ns = idpf_tstamp_convert_32b_64b(hw, ad,
+							    rxq->hw_register_set,
+							    rte_le_to_cpu_32(rx_desc->ts_high));
+			rxq->hw_register_set = 0;
+			*RTE_MBUF_DYNFIELD(rxm,
+					   idpf_timestamp_dynfield_offset,
+					   rte_mbuf_timestamp_t *) = ts_ns;
+			rxm->ol_flags |= idpf_timestamp_dynflag;
+		}
+#endif
 		rxm->ol_flags |= pkt_flags;
 
 		rx_pkts[nb_rx++] = rxm;
@@ -1778,6 +1833,10 @@  idpf_singleq_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 	nb_hold = 0;
 	rxq = rx_queue;
 
+#ifndef RTE_LIBRTE_IDPF_16BYTE_RX_DESC
+	uint64_t ts_ns;
+#endif
+
 	if (unlikely(!rxq) || unlikely(!rxq->q_started))
 		return nb_rx;
 
@@ -1785,6 +1844,13 @@  idpf_singleq_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 	rx_ring = rxq->rx_ring;
 	ptype_tbl = rxq->adapter->ptype_tbl;
 
+#ifndef RTE_LIBRTE_IDPF_16BYTE_RX_DESC
+	struct iecm_hw *hw = &rxq->adapter->hw;
+	struct idpf_adapter *ad = rxq->adapter;
+	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
+		rxq->hw_register_set = 1;
+#endif
+
 	while (nb_rx < nb_pkts) {
 		rxdp = &rx_ring[rx_id];
 		rx_status0 = rte_le_to_cpu_16(rxdp->flex_nic_wb.status_error0);
@@ -1841,6 +1907,19 @@  idpf_singleq_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 		rxm->packet_type =
 			ptype_tbl[(uint8_t)(rte_cpu_to_le_16(rxd.flex_nic_wb.ptype_flex_flags0) &
 				VIRTCHNL2_RX_FLEX_DESC_PTYPE_M)];
+#ifndef RTE_LIBRTE_IDPF_16BYTE_RX_DESC
+		if (idpf_timestamp_dynflag > 0) {
+			/* timestamp */
+			ts_ns = idpf_tstamp_convert_32b_64b(hw, ad,
+							    rxq->hw_register_set,
+							    rte_le_to_cpu_32(rxdp->flex_nic_wb.flex_ts.ts_high));
+			rxq->hw_register_set = 0;
+			*RTE_MBUF_DYNFIELD(rxm,
+					   idpf_timestamp_dynfield_offset,
+					   rte_mbuf_timestamp_t *) = ts_ns;
+			rxm->ol_flags |= idpf_timestamp_dynflag;
+		}
+#endif
 		rx_pkts[nb_rx++] = rxm;
 	}
 	rxq->rx_tail = rx_id;
diff --git a/drivers/net/idpf/idpf_rxtx.h b/drivers/net/idpf/idpf_rxtx.h
index decd0a98c2..6fcb441143 100644
--- a/drivers/net/idpf/idpf_rxtx.h
+++ b/drivers/net/idpf/idpf_rxtx.h
@@ -15,6 +15,41 @@ 
 #include "base/virtchnl2_lan_desc.h"
 #include "idpf_ethdev.h"
 
+/* MTS */
+#define GLTSYN_CMD_SYNC_0_0	(PF_TIMESYNC_BASE + 0x0)
+#define PF_GLTSYN_SHTIME_0_0	(PF_TIMESYNC_BASE + 0x4)
+#define PF_GLTSYN_SHTIME_L_0	(PF_TIMESYNC_BASE + 0x8)
+#define PF_GLTSYN_SHTIME_H_0	(PF_TIMESYNC_BASE + 0xC)
+#define GLTSYN_ART_L_0		(PF_TIMESYNC_BASE + 0x10)
+#define GLTSYN_ART_H_0		(PF_TIMESYNC_BASE + 0x14)
+#define PF_GLTSYN_SHTIME_0_1	(PF_TIMESYNC_BASE + 0x24)
+#define PF_GLTSYN_SHTIME_L_1	(PF_TIMESYNC_BASE + 0x28)
+#define PF_GLTSYN_SHTIME_H_1	(PF_TIMESYNC_BASE + 0x2C)
+#define PF_GLTSYN_SHTIME_0_2	(PF_TIMESYNC_BASE + 0x44)
+#define PF_GLTSYN_SHTIME_L_2	(PF_TIMESYNC_BASE + 0x48)
+#define PF_GLTSYN_SHTIME_H_2	(PF_TIMESYNC_BASE + 0x4C)
+#define PF_GLTSYN_SHTIME_0_3	(PF_TIMESYNC_BASE + 0x64)
+#define PF_GLTSYN_SHTIME_L_3	(PF_TIMESYNC_BASE + 0x68)
+#define PF_GLTSYN_SHTIME_H_3	(PF_TIMESYNC_BASE + 0x6C)
+
+#define PF_TIMESYNC_BAR4_BASE	0x0E400000
+#define GLTSYN_ENA		(PF_TIMESYNC_BAR4_BASE + 0x90)
+#define GLTSYN_CMD		(PF_TIMESYNC_BAR4_BASE + 0x94)
+#define GLTSYC_TIME_L		(PF_TIMESYNC_BAR4_BASE + 0x104)
+#define GLTSYC_TIME_H		(PF_TIMESYNC_BAR4_BASE + 0x108)
+
+#define GLTSYN_CMD_SYNC_0_4	(PF_TIMESYNC_BAR4_BASE + 0x110)
+#define PF_GLTSYN_SHTIME_L_4	(PF_TIMESYNC_BAR4_BASE + 0x118)
+#define PF_GLTSYN_SHTIME_H_4	(PF_TIMESYNC_BAR4_BASE + 0x11C)
+#define GLTSYN_INCVAL_L		(PF_TIMESYNC_BAR4_BASE + 0x150)
+#define GLTSYN_INCVAL_H		(PF_TIMESYNC_BAR4_BASE + 0x154)
+#define GLTSYN_SHADJ_L		(PF_TIMESYNC_BAR4_BASE + 0x158)
+#define GLTSYN_SHADJ_H		(PF_TIMESYNC_BAR4_BASE + 0x15C)
+
+#define GLTSYN_CMD_SYNC_0_5	(PF_TIMESYNC_BAR4_BASE + 0x130)
+#define PF_GLTSYN_SHTIME_L_5	(PF_TIMESYNC_BAR4_BASE + 0x138)
+#define PF_GLTSYN_SHTIME_H_5	(PF_TIMESYNC_BAR4_BASE + 0x13C)
+
 /* In QLEN must be whole number of 32 descriptors. */
 #define IDPF_ALIGN_RING_DESC	32
 #define IDPF_MIN_RING_DESC	32
@@ -66,6 +101,8 @@ 
 	(sizeof(struct virtchnl2_ptype) + \
 	(((p)->proto_id_count ? ((p)->proto_id_count - 1) : 0) * sizeof((p)->proto_id[0])))
 
+extern uint64_t idpf_timestamp_dynflag;
+
 struct idpf_rx_queue {
 	struct idpf_adapter *adapter;	/* the adapter this queue belongs to */
 	struct rte_mempool *mp;		/* mbuf pool to populate Rx ring */
@@ -231,5 +268,55 @@  void idpf_set_tx_function(struct rte_eth_dev *dev);
 
 const uint32_t *idpf_dev_supported_ptypes_get(struct rte_eth_dev *dev);
 
-#endif /* _IDPF_RXTX_H_ */
+#define IDPF_TIMESYNC_REG_WRAP_GUARD_BAND  10000
+/* Helper function to convert a 32b nanoseconds timestamp to 64b. */
+static inline uint64_t
+idpf_tstamp_convert_32b_64b(struct iecm_hw *hw, struct idpf_adapter *ad,
+			    uint32_t flag, uint32_t in_timestamp)
+{
+/* TODO: timestamp for ACC */
+#ifdef RTE_ARCH_ARM64
+	return 0;
+#endif /* RTE_ARCH_ARM64 */
+
+#ifdef RTE_ARCH_X86_64
+	const uint64_t mask = 0xFFFFFFFF;
+	uint32_t hi, lo, lo2, delta;
+	uint64_t ns;
+
+	if (flag) {
+		IECM_WRITE_REG(hw, GLTSYN_CMD_SYNC_0_0, PF_GLTSYN_CMD_SYNC_SHTIME_EN_M);
+		IECM_WRITE_REG(hw, GLTSYN_CMD_SYNC_0_0, PF_GLTSYN_CMD_SYNC_EXEC_CMD_M |
+			       PF_GLTSYN_CMD_SYNC_SHTIME_EN_M);
+		lo = IECM_READ_REG(hw, PF_GLTSYN_SHTIME_L_0);
+		hi = IECM_READ_REG(hw, PF_GLTSYN_SHTIME_H_0);
+		/*
+		 * On typical system, the delta between lo and lo2 is ~1000ns,
+		 * so 10000 seems a large-enough but not overly-big guard band.
+		 */
+		if (lo > (UINT32_MAX - IDPF_TIMESYNC_REG_WRAP_GUARD_BAND))
+			lo2 = IECM_READ_REG(hw, PF_GLTSYN_SHTIME_L_0);
+		else
+			lo2 = lo;
+
+		if (lo2 < lo) {
+			lo = IECM_READ_REG(hw, PF_GLTSYN_SHTIME_L_0);
+			hi = IECM_READ_REG(hw, PF_GLTSYN_SHTIME_H_0);
+		}
+
+		ad->time_hw = ((uint64_t)hi << 32) | lo;
+	}
+
+	delta = (in_timestamp - (uint32_t)(ad->time_hw & mask));
+	if (delta > (mask / 2)) {
+		delta = ((uint32_t)(ad->time_hw & mask) - in_timestamp);
+		ns = ad->time_hw - delta;
+	} else {
+		ns = ad->time_hw + delta;
+	}
+
+	return ns;
+#endif /* RTE_ARCH_X86_64 */
+}
 
+#endif /* _IDPF_RXTX_H_ */