net/e1000: support launchtime feature

Message ID 20231217202040.478959-1-chuanyu.xue@uconn.edu (mailing list archive)
State Superseded, archived
Delegated to: Qi Zhang
Headers
Series net/e1000: support launchtime feature |

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/loongarch-compilation success Compilation OK
ci/loongarch-unit-testing success Unit Testing PASS
ci/Intel-compilation success Compilation OK
ci/iol-broadcom-Functional success Functional Testing PASS
ci/intel-Testing success Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-abi-testing success Testing PASS
ci/github-robot: build success github build: passed
ci/intel-Functional success Functional PASS
ci/iol-broadcom-Performance success Performance Testing PASS
ci/iol-compile-amd64-testing success Testing PASS
ci/iol-unit-amd64-testing success Testing PASS
ci/iol-sample-apps-testing success Testing PASS
ci/iol-unit-arm64-testing success Testing PASS
ci/iol-compile-arm64-testing success Testing PASS

Commit Message

Chuanyu Xue Dec. 17, 2023, 8:20 p.m. UTC
  Enable the time-based scheduled Tx of packets based on the
RTE_ETH_TX_OFFLOAD_SEND_ON_TIMESTAMP flag. The launchtime defines the
packet transmission time based on PTP clock at MAC layer, which should
be set to the advanced transmit descriptor.

Signed-off-by: Chuanyu Xue <chuanyu.xue@uconn.edu>
---
 drivers/net/e1000/base/e1000_regs.h |  1 +
 drivers/net/e1000/e1000_ethdev.h    |  3 ++
 drivers/net/e1000/igb_ethdev.c      | 28 ++++++++++++++++++
 drivers/net/e1000/igb_rxtx.c        | 44 ++++++++++++++++++++++++-----
 4 files changed, 69 insertions(+), 7 deletions(-)
  

Comments

Simei Su Dec. 20, 2023, 6:29 a.m. UTC | #1
Hi Chuanyu,

> -----Original Message-----
> From: Chuanyu Xue <chuanyu.xue@uconn.edu>
> Sent: Monday, December 18, 2023 4:21 AM
> To: Lu, Wenzhuo <wenzhuo.lu@intel.com>; Zhang, Qi Z
> <qi.z.zhang@intel.com>; Xing, Beilei <beilei.xing@intel.com>
> Cc: dev@dpdk.org; Chuanyu Xue <chuanyu.xue@uconn.edu>
> Subject: [PATCH] net/e1000: support launchtime feature
> 
> Enable the time-based scheduled Tx of packets based on the
> RTE_ETH_TX_OFFLOAD_SEND_ON_TIMESTAMP flag. The launchtime defines
> the packet transmission time based on PTP clock at MAC layer, which should
> be set to the advanced transmit descriptor.
> 
> Signed-off-by: Chuanyu Xue <chuanyu.xue@uconn.edu>
> ---
>  drivers/net/e1000/base/e1000_regs.h |  1 +
>  drivers/net/e1000/e1000_ethdev.h    |  3 ++
>  drivers/net/e1000/igb_ethdev.c      | 28 ++++++++++++++++++
>  drivers/net/e1000/igb_rxtx.c        | 44 ++++++++++++++++++++++++-----
>  4 files changed, 69 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/net/e1000/base/e1000_regs.h
> b/drivers/net/e1000/base/e1000_regs.h
> index d44de59c29..092d9d71e6 100644
> --- a/drivers/net/e1000/base/e1000_regs.h
> +++ b/drivers/net/e1000/base/e1000_regs.h
> @@ -162,6 +162,7 @@
> 
>  /* QAV Tx mode control register */
>  #define E1000_I210_TQAVCTRL	0x3570
> +#define E1000_I210_LAUNCH_OS0 0x3578

What does this register mean?

> 
>  /* QAV Tx mode control register bitfields masks */
>  /* QAV enable */
> diff --git a/drivers/net/e1000/e1000_ethdev.h
> b/drivers/net/e1000/e1000_ethdev.h
> index 718a9746ed..174f7aaf52 100644
> --- a/drivers/net/e1000/e1000_ethdev.h
> +++ b/drivers/net/e1000/e1000_ethdev.h
> @@ -382,6 +382,9 @@ extern struct igb_rss_filter_list igb_filter_rss_list;
> TAILQ_HEAD(igb_flow_mem_list, igb_flow_mem);  extern struct
> igb_flow_mem_list igb_flow_list;
> 
> +extern uint64_t igb_tx_timestamp_dynflag; extern int
> +igb_tx_timestamp_dynfield_offset;
> +
>  extern const struct rte_flow_ops igb_flow_ops;
> 
>  /*
> diff --git a/drivers/net/e1000/igb_ethdev.c b/drivers/net/e1000/igb_ethdev.c
> index 8858f975f8..4d3d8ae30a 100644
> --- a/drivers/net/e1000/igb_ethdev.c
> +++ b/drivers/net/e1000/igb_ethdev.c
> @@ -223,6 +223,7 @@ static int igb_timesync_read_time(struct rte_eth_dev
> *dev,
>  				  struct timespec *timestamp);
>  static int igb_timesync_write_time(struct rte_eth_dev *dev,
>  				   const struct timespec *timestamp);
> +static int eth_igb_read_clock(__rte_unused struct rte_eth_dev *dev,
> +uint64_t *clock);
>  static int eth_igb_rx_queue_intr_enable(struct rte_eth_dev *dev,
>  					uint16_t queue_id);
>  static int eth_igb_rx_queue_intr_disable(struct rte_eth_dev *dev, @@ -313,6
> +314,9 @@ static const struct rte_pci_id pci_id_igbvf_map[] = {
>  	{ .vendor_id = 0, /* sentinel */ },
>  };
> 
> +uint64_t igb_tx_timestamp_dynflag;
> +int igb_tx_timestamp_dynfield_offset = -1;
> +
>  static const struct rte_eth_desc_lim rx_desc_lim = {
>  	.nb_max = E1000_MAX_RING_DESC,
>  	.nb_min = E1000_MIN_RING_DESC,
> @@ -389,6 +393,7 @@ static const struct eth_dev_ops eth_igb_ops = {
>  	.timesync_adjust_time = igb_timesync_adjust_time,
>  	.timesync_read_time   = igb_timesync_read_time,
>  	.timesync_write_time  = igb_timesync_write_time,
> +	.read_clock		      = eth_igb_read_clock,
>  };
> 
>  /*
> @@ -1198,6 +1203,7 @@ eth_igb_start(struct rte_eth_dev *dev)
>  	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
>  	struct rte_intr_handle *intr_handle = pci_dev->intr_handle;
>  	int ret, mask;
> +	uint32_t tqavctrl;
>  	uint32_t intr_vector = 0;
>  	uint32_t ctrl_ext;
>  	uint32_t *speeds;
> @@ -1281,6 +1287,15 @@ eth_igb_start(struct rte_eth_dev *dev)
>  		return ret;
>  	}
> 
> +	if (igb_tx_timestamp_dynflag > 0) {
> +		tqavctrl = E1000_READ_REG(hw, E1000_I210_TQAVCTRL);
> +		tqavctrl |= E1000_TQAVCTRL_MODE;
> +		tqavctrl |= E1000_TQAVCTRL_FETCH_ARB; /* Fetch the queue most
> empty, no Round Robin*/
> +		tqavctrl |= E1000_TQAVCTRL_LAUNCH_TIMER_ENABLE; /* Enable
> launch time */

In kernel driver, "E1000_TQAVCTRL_DATATRANTIM (BIT(9))" and
"E1000_TQAVCTRL_FETCHTIME_DELTA (0xFFFF << 16)" are set, does it have some
other intention here?

> +		E1000_WRITE_REG(hw, E1000_I210_TQAVCTRL, tqavctrl);
> +		E1000_WRITE_REG(hw, E1000_I210_LAUNCH_OS0, 1ULL << 31); /*
> Set launch offset to default */
> +	}
> +
>  	e1000_clear_hw_cntrs_base_generic(hw);
> 
>  	/*
> @@ -4882,6 +4897,19 @@ igb_timesync_read_tx_timestamp(struct
> rte_eth_dev *dev,
>  	return  0;
>  }
> 
> +static int
> +eth_igb_read_clock(__rte_unused struct rte_eth_dev *dev, uint64_t
> +*clock) {
> +	uint64_t systime_cycles;
> +	struct e1000_adapter *adapter = dev->data->dev_private;
> +
> +	systime_cycles = igb_read_systime_cyclecounter(dev);
> +	uint64_t ns = rte_timecounter_update(&adapter->systime_tc,
> systime_cycles);

Do you also run "ptp timesync" when testing this launchtime feature?

> +	*clock = ns;
> +
> +	return 0;
> +}
> +
>  static int
>  eth_igb_get_reg_length(struct rte_eth_dev *dev __rte_unused)  { diff --git
> a/drivers/net/e1000/igb_rxtx.c b/drivers/net/e1000/igb_rxtx.c index
> 448c4b7d9d..e5da8e250d 100644
> --- a/drivers/net/e1000/igb_rxtx.c
> +++ b/drivers/net/e1000/igb_rxtx.c
> @@ -212,6 +212,9 @@ struct igb_tx_queue {
>  #define IGB_TSO_MAX_HDRLEN			(512)
>  #define IGB_TSO_MAX_MSS				(9216)
> 
> +/* Macro to compensate latency in launch time offloading*/
> +#define E1000_I210_LT_LATENCY		0x41F9

What does this value depend on?

> +
> 
> /******************************************************************
> ***
>   *
>   *  TX function
> @@ -244,12 +247,13 @@ check_tso_para(uint64_t ol_req, union
> igb_tx_offload ol_para)  static inline void  igbe_set_xmit_ctx(struct
> igb_tx_queue* txq,
>  		volatile struct e1000_adv_tx_context_desc *ctx_txd,
> -		uint64_t ol_flags, union igb_tx_offload tx_offload)
> +		uint64_t ol_flags, union igb_tx_offload tx_offload, uint64_t txtime)
>  {
>  	uint32_t type_tucmd_mlhl;
>  	uint32_t mss_l4len_idx;
>  	uint32_t ctx_idx, ctx_curr;
>  	uint32_t vlan_macip_lens;
> +	uint32_t launch_time;
>  	union igb_tx_offload tx_offload_mask;
> 
>  	ctx_curr = txq->ctx_curr;
> @@ -312,16 +316,25 @@ igbe_set_xmit_ctx(struct igb_tx_queue* txq,
>  		}
>  	}
> 
> -	txq->ctx_cache[ctx_curr].flags = ol_flags;
> -	txq->ctx_cache[ctx_curr].tx_offload.data =
> -		tx_offload_mask.data & tx_offload.data;
> -	txq->ctx_cache[ctx_curr].tx_offload_mask = tx_offload_mask;
> +	if (!txtime) {
> +		txq->ctx_cache[ctx_curr].flags = ol_flags;
> +		txq->ctx_cache[ctx_curr].tx_offload.data =
> +			tx_offload_mask.data & tx_offload.data;
> +		txq->ctx_cache[ctx_curr].tx_offload_mask = tx_offload_mask;
> +	}
> 
>  	ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
>  	vlan_macip_lens = (uint32_t)tx_offload.data;
>  	ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
>  	ctx_txd->mss_l4len_idx = rte_cpu_to_le_32(mss_l4len_idx);
>  	ctx_txd->u.seqnum_seed = 0;
> +
> +	if (txtime) {
> +		launch_time = (txtime - E1000_I210_LT_LATENCY) % NSEC_PER_SEC;
> +		ctx_txd->u.launch_time = rte_cpu_to_le_32(launch_time / 32);
> +	} else {
> +		ctx_txd->u.launch_time = 0;
> +	}
>  }
> 
>  /*
> @@ -400,6 +413,7 @@ eth_igb_xmit_pkts(void *tx_queue, struct rte_mbuf
> **tx_pkts,
>  	uint32_t new_ctx = 0;
>  	uint32_t ctx = 0;
>  	union igb_tx_offload tx_offload = {0};
> +	uint64_t ts;
> 
>  	txq = tx_queue;
>  	sw_ring = txq->sw_ring;
> @@ -552,7 +566,12 @@ eth_igb_xmit_pkts(void *tx_queue, struct rte_mbuf
> **tx_pkts,
>  					txe->mbuf = NULL;
>  				}
> 
> -				igbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req, tx_offload);
> +				if (igb_tx_timestamp_dynflag > 0) {
> +					ts = *RTE_MBUF_DYNFIELD(tx_pkt,
> igb_tx_timestamp_dynfield_offset, uint64_t *);
> +					igbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req, tx_offload,
> ts);
> +				} else {
> +					igbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req, tx_offload,
> 0);
> +				}
> 
>  				txe->last_id = tx_last;
>  				tx_id = txe->next_id;
> @@ -1464,7 +1483,8 @@ igb_get_tx_port_offloads_capa(struct rte_eth_dev
> *dev)
>  			  RTE_ETH_TX_OFFLOAD_TCP_CKSUM   |
>  			  RTE_ETH_TX_OFFLOAD_SCTP_CKSUM  |
>  			  RTE_ETH_TX_OFFLOAD_TCP_TSO     |
> -			  RTE_ETH_TX_OFFLOAD_MULTI_SEGS;
> +			  RTE_ETH_TX_OFFLOAD_MULTI_SEGS  |
> +			  RTE_ETH_TX_OFFLOAD_SEND_ON_TIMESTAMP;
> 
>  	return tx_offload_capa;
>  }
> @@ -2579,9 +2599,11 @@ eth_igb_tx_init(struct rte_eth_dev *dev)  {
>  	struct e1000_hw     *hw;
>  	struct igb_tx_queue *txq;
> +	uint64_t offloads = dev->data->dev_conf.txmode.offloads;
>  	uint32_t tctl;
>  	uint32_t txdctl;
>  	uint16_t i;
> +	int err;
> 
>  	hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> 
> @@ -2612,6 +2634,14 @@ eth_igb_tx_init(struct rte_eth_dev *dev)
>  		dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED;
>  	}
> 
> +	if (offloads & RTE_ETH_TX_OFFLOAD_SEND_ON_TIMESTAMP) {
> +		err = rte_mbuf_dyn_tx_timestamp_register(
> +			&igb_tx_timestamp_dynfield_offset,
> +			&igb_tx_timestamp_dynflag);
> +		if (err)
> +			PMD_DRV_LOG(ERR, "Failed to register tx timestamp dynamic
> field");
> +	}
> +
>  	/* Program the Transmit Control Register. */
>  	tctl = E1000_READ_REG(hw, E1000_TCTL);
>  	tctl &= ~E1000_TCTL_CT;
> --
> 2.25.1
  
Chuanyu Xue Dec. 22, 2023, 3:03 a.m. UTC | #2
Hi Simei,
Thank you so much for your review.

>> 
>>  /* QAV Tx mode control register */
>>  #define E1000_I210_TQAVCTRL	0x3570
>> +#define E1000_I210_LAUNCH_OS0 0x3578
>
>What does this register mean?
>

"LAUNCH_OS0" is defined as LaunchOffset register, which sets the base time
for launchtime. Based on i210 datasheet V3.7 Sec 7.2.2.2.3, the actual launch
time is computed as 32 * (LaunchOffset + LaunchTime). In this context, the
register is used to set the LaunchOffset later as 0. 

>> 
>> +	if (igb_tx_timestamp_dynflag > 0) {
>> +		tqavctrl = E1000_READ_REG(hw, E1000_I210_TQAVCTRL);
>> +		tqavctrl |= E1000_TQAVCTRL_MODE;
>> +		tqavctrl |= E1000_TQAVCTRL_FETCH_ARB; /* Fetch the queue most
>> empty, no Round Robin*/
>> +		tqavctrl |= E1000_TQAVCTRL_LAUNCH_TIMER_ENABLE; /* Enable
>> launch time */
>
> In kernel driver, "E1000_TQAVCTRL_DATATRANTIM (BIT(9))" and
> "E1000_TQAVCTRL_FETCHTIME_DELTA (0xFFFF << 16)" are set, does it have some
> other intention here?

"E1000_TQAVCTRL_DATATRANTIM" is same as "E1000_TQAVCTRL_LAUNCH_TIMER_ENABLE"

"E1000_TQAVCTRL_FETCHTIME_DELTA" maximizes the data fetch time.
If "E1000_TQAVCTRL_FETCH_ARB" is set, there is no need to set this field,
because the arbitrary fetching prioritizes the most empty queue, regardless
of the fetch time. (referring Sec 7.2.7.5) 

I have also tested aligning with the kernel driver settings using (0xFFFF << 16) 
and omitting 'E1000_TQAVCTRL_FETCH_ARB', the launchtime feature also worked
as expected. However, the arbitrary fetch mode seems more suitable 
as DPDK lacks an interface to set fetch delay, unlike in the kernel which can 
be configured (e.g., through 'Delta' in ETF Qdisc). Any suggestions here?

>> +static int
>> +eth_igb_read_clock(__rte_unused struct rte_eth_dev *dev, uint64_t
>> +*clock) {
>> +	uint64_t systime_cycles;
>> +	struct e1000_adapter *adapter = dev->data->dev_private;
>> +
>> +	systime_cycles = igb_read_systime_cyclecounter(dev);
>> +	uint64_t ns = rte_timecounter_update(&adapter->systime_tc,
>> systime_cycles);
>
>Do you also run "ptp timesync" when testing this launchtime feature?
>

I used `rte_eth_timesync_enable` function during the test. I am not familiar 
with the `ptp timesync` in DPDK. If you are referring to something
else, could you please guide me on how to test it?

>> 
>> +/* Macro to compensate latency in launch time offloading*/
>> +#define E1000_I210_LT_LATENCY		0x41F9
>
>What does this value depend on? 
>

Through my test, I observed a constant latency between the launchtime
and the actual Tx time measured by the `rte_eth_timesync_read_tx_timestamp` function.
I didn't find a description of this latency in the datasheet.

In my test, the latency appears to be relative to the data rate, and 
independent from the packet size and throughput. The latency slightly changed 
in different experiments, but in each experiment, it remained constant for all the Tx packets.
I also tested this latency consistently on two different NICs (I210 GE-1T-X1, I210 X1-V2).

Here are some measurement results (in ns):

+-----------+---------------+---------------+---------------+---------------+---------------+
| Data Rate | Measurement 1 | Measurement 2 | Measurement 3 | Measurement 4 | Measurement 5 |
+-----------+---------------+---------------+---------------+---------------+---------------+
| 10M       | 14400         | 14544         | 14384         | 14896         | 14336         |
+-----------+---------------+---------------+---------------+---------------+---------------+
| 100M      | 31016         | 31016         | 31000         | 31000         | 31048         |
+-----------+---------------+---------------+---------------+---------------+---------------+
| 1G        | 16880         | 16880         | 16880         | 16880         | 16880         |
+-----------+---------------+---------------+---------------+---------------+---------------+

Any suggestions here? Is it supposed to be embedded directly here or left to the 
application level to compensate? I can fix it accordingly.

- Chuanyu
  
Simei Su Dec. 26, 2023, 9:33 a.m. UTC | #3
Hi Chuanyu,

> -----Original Message-----
> From: Chuanyu Xue <chuanyu.xue@uconn.edu>
> Sent: Friday, December 22, 2023 11:04 AM
> To: Su, Simei <simei.su@intel.com>
> Cc: Xing, Beilei <beilei.xing@intel.com>; chuanyu.xue@uconn.edu;
> dev@dpdk.org; Zhang, Qi Z <qi.z.zhang@intel.com>; Lu, Wenzhuo
> <wenzhuo.lu@intel.com>
> Subject: RE: [PATCH] net/e1000: support launchtime feature
> 
> Hi Simei,
> Thank you so much for your review.
> 
> >>
> >>  /* QAV Tx mode control register */
> >>  #define E1000_I210_TQAVCTRL	0x3570
> >> +#define E1000_I210_LAUNCH_OS0 0x3578
> >
> >What does this register mean?
> >
> 
> "LAUNCH_OS0" is defined as LaunchOffset register, which sets the base time
> for launchtime. Based on i210 datasheet V3.7 Sec 7.2.2.2.3, the actual launch
> time is computed as 32 * (LaunchOffset + LaunchTime). In this context, the
> register is used to set the LaunchOffset later as 0.

OK, got it. Thanks for your explanation.

> 
> >>
> >> +	if (igb_tx_timestamp_dynflag > 0) {
> >> +		tqavctrl = E1000_READ_REG(hw, E1000_I210_TQAVCTRL);
> >> +		tqavctrl |= E1000_TQAVCTRL_MODE;
> >> +		tqavctrl |= E1000_TQAVCTRL_FETCH_ARB; /* Fetch the queue most
> >> empty, no Round Robin*/
> >> +		tqavctrl |= E1000_TQAVCTRL_LAUNCH_TIMER_ENABLE; /* Enable
> >> launch time */
> >
> > In kernel driver, "E1000_TQAVCTRL_DATATRANTIM (BIT(9))" and
> > "E1000_TQAVCTRL_FETCHTIME_DELTA (0xFFFF << 16)" are set, does it have
> > some other intention here?
> 
> "E1000_TQAVCTRL_DATATRANTIM" is same as
> "E1000_TQAVCTRL_LAUNCH_TIMER_ENABLE"

Yes, these two values are the same.

> 
> "E1000_TQAVCTRL_FETCHTIME_DELTA" maximizes the data fetch time.
> If "E1000_TQAVCTRL_FETCH_ARB" is set, there is no need to set this field,
> because the arbitrary fetching prioritizes the most empty queue, regardless of
> the fetch time. (referring Sec 7.2.7.5)
> 
> I have also tested aligning with the kernel driver settings using (0xFFFF << 16)
> and omitting 'E1000_TQAVCTRL_FETCH_ARB', the launchtime feature also
> worked as expected. However, the arbitrary fetch mode seems more suitable
> as DPDK lacks an interface to set fetch delay, unlike in the kernel which can be
> configured (e.g., through 'Delta' in ETF Qdisc). Any suggestions here?

Yes, it doesn't have an interface to set delay in DPDK. I agree with your approach.

> 
> >> +static int
> >> +eth_igb_read_clock(__rte_unused struct rte_eth_dev *dev, uint64_t
> >> +*clock) {
> >> +	uint64_t systime_cycles;
> >> +	struct e1000_adapter *adapter = dev->data->dev_private;
> >> +
> >> +	systime_cycles = igb_read_systime_cyclecounter(dev);
> >> +	uint64_t ns = rte_timecounter_update(&adapter->systime_tc,
> >> systime_cycles);
> >
> >Do you also run "ptp timesync" when testing this launchtime feature?
> >
> 
> I used `rte_eth_timesync_enable` function during the test. I am not familiar
> with the `ptp timesync` in DPDK. If you are referring to something else, could
> you please guide me on how to test it?

Do you use your own application or DPDK application to test this launchtime feature,
for example, dpdk testpmd?

> 
> >>
> >> +/* Macro to compensate latency in launch time offloading*/
> >> +#define E1000_I210_LT_LATENCY		0x41F9
> >
> >What does this value depend on?
> >
> 
> Through my test, I observed a constant latency between the launchtime and
> the actual Tx time measured by the `rte_eth_timesync_read_tx_timestamp`
> function.
> I didn't find a description of this latency in the datasheet.
> 
> In my test, the latency appears to be relative to the data rate, and independent
> from the packet size and throughput. The latency slightly changed in different
> experiments, but in each experiment, it remained constant for all the Tx
> packets.

OK, got it.

> I also tested this latency consistently on two different NICs (I210 GE-1T-X1,
> I210 X1-V2).
> 
> Here are some measurement results (in ns):
> 
> +-----------+---------------+---------------+---------------+---------------+---------------+
> | Data Rate | Measurement 1 | Measurement 2 | Measurement 3 |
> | Measurement 4 | Measurement 5 |
> +-----------+---------------+---------------+---------------+---------------+---------------+
> | 10M       | 14400         | 14544         | 14384         |
> 14896         | 14336         |
> +-----------+---------------+---------------+---------------+---------------+---------------+
> | 100M      | 31016         | 31016         | 31000         |
> 31000         | 31048         |
> +-----------+---------------+---------------+---------------+---------------+---------------+
> | 1G        | 16880         | 16880         | 16880         | 16880
> | 16880         |
> +-----------+---------------+---------------+---------------+---------------+---------------+
> 
> Any suggestions here? Is it supposed to be embedded directly here or left to
> the application level to compensate? I can fix it accordingly.

I think it can be put here directly just as you do.

Thanks,
Simei

> 
> - Chuanyu
  
Chuanyu Xue Dec. 29, 2023, 9:29 p.m. UTC | #4
>> 
>> >> +static int
>> >> +eth_igb_read_clock(__rte_unused struct rte_eth_dev *dev, uint64_t
>> >> +*clock) {
>> >> +	uint64_t systime_cycles;
>> >> +	struct e1000_adapter *adapter = dev->data->dev_private;
>> >> +
>> >> +	systime_cycles = igb_read_systime_cyclecounter(dev);
>> >> +	uint64_t ns = rte_timecounter_update(&adapter->systime_tc,
>> >> systime_cycles);
>> >
>> >Do you also run "ptp timesync" when testing this launchtime feature?
>> >
>> 
>> I used `rte_eth_timesync_enable` function during the test. I am not familiar
>> with the `ptp timesync` in DPDK. If you are referring to something else, could
>> you please guide me on how to test it?
>
>Do you use your own application or DPDK application to test this launchtime feature,
>for example, dpdk testpmd?

Yes, I used my own application to test it. The benefit of launch time feature
in boundable delay and jitter is significant compared with when it is disabled. 

Specifically, my app periodically calls `rte_eth_tx_burst` with `rte_dynfield_timestamp` 
field on talker, and compares whether the receiving time in NIC hardware timestamp 
on listener is as expected. Talker and listener are directly connected by a RJ45 cable,
both installed with i210 NIC. The feature works perfect in my test.

I also tested it with testpmd with `txtimes` config. However it seems there is an issue 
in testpmd. Specifically the tx_only mode sends packets as fast as possible, results 
in an increasing gap between the current time and the scheduled transmission time.
Based on i210 NIC sheet Sec 7.2.2.2.3, the launch time should be within 
(current_time, current time + 0.5 Sec), thus most of tx packets are not scheduled. 
I got the similar test results with dpdk igc driver which already implemeted launch 
time feature.

Following is how I try to test with testpmd. Please let me know if I did something wrong.

	sudo ./dpdk-testpmd -- -i --forward-mode=txonly
	
	testpmd> port stop 0
	testpmd> set burst 1
	testpmd> set txtimes 100000000,0
	testpmd> port config 0 tx_offload send_on_timestamp on
	testpmd> port start 0
	testpmd> start

>
>> +-----------+---------------+---------------+---------------+---------------+---------------+
>> | 1G        | 16880         | 16880         | 16880         | 16880
>> | 16880         |
>> +-----------+---------------+---------------+---------------+---------------+---------------+
>> 
>> Any suggestions here? Is it supposed to be embedded directly here or left to
>> the application level to compensate? I can fix it accordingly.
>
>I think it can be put here directly just as you do.

Got it. Will keep this delay compensiation here and revise it in the next batch version.
  
Simei Su Jan. 3, 2024, 2:26 a.m. UTC | #5
Hi Chuanyu,

> -----Original Message-----
> From: Chuanyu Xue <chuanyu.xue@uconn.edu>
> Sent: Saturday, December 30, 2023 5:30 AM
> To: Su, Simei <simei.su@intel.com>
> Cc: Xing, Beilei <beilei.xing@intel.com>; chuanyu.xue@uconn.edu;
> dev@dpdk.org; Zhang, Qi Z <qi.z.zhang@intel.com>; Lu, Wenzhuo
> <wenzhuo.lu@intel.com>
> Subject: RE: [PATCH] net/e1000: support launchtime feature
> 
> >>
> >> >> +static int
> >> >> +eth_igb_read_clock(__rte_unused struct rte_eth_dev *dev, uint64_t
> >> >> +*clock) {
> >> >> +	uint64_t systime_cycles;
> >> >> +	struct e1000_adapter *adapter = dev->data->dev_private;
> >> >> +
> >> >> +	systime_cycles = igb_read_systime_cyclecounter(dev);
> >> >> +	uint64_t ns = rte_timecounter_update(&adapter->systime_tc,
> >> >> systime_cycles);
> >> >
> >> >Do you also run "ptp timesync" when testing this launchtime feature?
> >> >
> >>
> >> I used `rte_eth_timesync_enable` function during the test. I am not
> >> familiar with the `ptp timesync` in DPDK. If you are referring to
> >> something else, could you please guide me on how to test it?
> >
> >Do you use your own application or DPDK application to test this
> >launchtime feature, for example, dpdk testpmd?
> 
> Yes, I used my own application to test it. The benefit of launch time feature in
> boundable delay and jitter is significant compared with when it is disabled.
> 
> Specifically, my app periodically calls `rte_eth_tx_burst` with
> `rte_dynfield_timestamp` field on talker, and compares whether the receiving
> time in NIC hardware timestamp on listener is as expected. Talker and listener
> are directly connected by a RJ45 cable, both installed with i210 NIC. The
> feature works perfect in my test.

OK, it sounds good.

> 
> I also tested it with testpmd with `txtimes` config. However it seems there is an
> issue in testpmd. Specifically the tx_only mode sends packets as fast as
> possible, results in an increasing gap between the current time and the
> scheduled transmission time.
> Based on i210 NIC sheet Sec 7.2.2.2.3, the launch time should be within
> (current_time, current time + 0.5 Sec), thus most of tx packets are not
> scheduled.
> I got the similar test results with dpdk igc driver which already implemeted
> launch time feature.
> 
> Following is how I try to test with testpmd. Please let me know if I did
> something wrong.
> 
> 	sudo ./dpdk-testpmd -- -i --forward-mode=txonly
> 
> 	testpmd> port stop 0
> 	testpmd> set burst 1
> 	testpmd> set txtimes 100000000,0
> 	testpmd> port config 0 tx_offload send_on_timestamp on
> 	testpmd> port start 0
> 	testpmd> start

When testing launch time feature with igc driver, firstly, some code change made in txonly.c:
pkt->ol_flags |= RTE_MBUF_F_TX_IEEE1588_TMST; (this flag should be added to forward PTP packet with hardware Tx timestamp)

# ./build/app/dpdk-testpmd -a 0000:81:00.0 -c f -n 4 -- -i --tx-offloads=0x200000
testpmd> set burst 1
testpmd> set fwd txonly
testpmd> set txtimes 1000000,0
testpmd> start

On receiver side (with tcpdump):
# tcpdump -Q in -ttt -ni ens25f3 --time-stamp-precision=nano -j adapter_unsynced -c 32

Thanks,
Simei

> 
> >
> >> +-----------+---------------+---------------+---------------+---------------+---------------+
> >> | 1G        | 16880         | 16880         | 16880         |
> 16880
> >> | 16880         |
> >> +-----------+---------------+---------------+---------------+---------------+---------------+
> >>
> >> Any suggestions here? Is it supposed to be embedded directly here or
> >> left to the application level to compensate? I can fix it accordingly.
> >
> >I think it can be put here directly just as you do.
> 
> Got it. Will keep this delay compensiation here and revise it in the next batch
> version.
  
Chuanyu Xue Jan. 3, 2024, 9:52 p.m. UTC | #6
Hi, Simei

Thank you for your guidance on how to test this feature.

>> Following is how I try to test with testpmd. Please let me know if I did
>> something wrong.
>> 
>>       sudo ./dpdk-testpmd -- -i --forward-mode=txonly
>> 
>>       testpmd> port stop 0
>>       testpmd> set burst 1
>>       testpmd> set txtimes 100000000,0
>>       testpmd> port config 0 tx_offload send_on_timestamp on
>>       testpmd> port start 0
>>       testpmd> start
>
>When testing launch time feature with igc driver, firstly, some code change 
>made in txonly.c:
>pkt->ol_flags |= RTE_MBUF_F_TX_IEEE1588_TMST; (this flag should be added to 
>forward PTP packet with hardware Tx timestamp)
>
># ./build/app/dpdk-testpmd -a 0000:81:00.0 -c f -n 4 -- -i 
>--tx-offloads=0x200000
>testpmd> set burst 1
>testpmd> set fwd txonly
>testpmd> set txtimes 1000000,0
>testpmd> start
>
>On receiver side (with tcpdump):
># tcpdump -Q in -ttt -ni ens25f3 --time-stamp-precision=nano -j 
>adapter_unsynced -c 32

Now dpdk-testpmd works well with this patch after I add the flag in txonly.c 
as you mentioned. 

It is worth noting that I also added `rte_eth_timesync_enable(pi);` in the 
function `tx_only_begin` in txonly.c to enable the PTP clock. Otherwise, all Tx
packets scheduled are dropped.

Following are the measurement results on the listener. I use the same configuration
as you mentioned for dpdk-testpmd on the talker.

    ➜  ~ sudo tcpdump -Q in -ttt -ni enp1s0 --time-stamp-precision=nano -j adapter_unsynced -c 32

    tcpdump: verbose output suppressed, use -v[v]... for full protocol decode
    listening on enp1s0, link-type EN10MB (Ethernet), snapshot length 262144 bytes


     00:00:00.000000000 IP 198.18.0.1.9 > 198.18.0.2.9: UDP, length 22
     00:00:00.001000008 IP 198.18.0.1.9 > 198.18.0.2.9: UDP, length 22
     00:00:00.001000000 IP 198.18.0.1.9 > 198.18.0.2.9: UDP, length 22
     00:00:00.001000000 IP 198.18.0.1.9 > 198.18.0.2.9: UDP, length 22
     00:00:00.001000000 IP 198.18.0.1.9 > 198.18.0.2.9: UDP, length 22
     ....
     00:00:00.001000000 IP 198.18.0.1.9 > 198.18.0.2.9: UDP, length 22
     00:00:00.001000000 IP 198.18.0.1.9 > 198.18.0.2.9: UDP, length 22
     00:00:00.001000000 IP 198.18.0.1.9 > 198.18.0.2.9: UDP, length 22
     00:00:00.001000008 IP 198.18.0.1.9 > 198.18.0.2.9: UDP, length 22
     00:00:00.001000000 IP 198.18.0.1.9 > 198.18.0.2.9: UDP, length 22
    32 packets captured
    118 packets received by filter
    0 packets dropped by kernel

Above test is based on the patch v2 with Intel i210 NIC.

- Chuanyu
  
Simei Su Jan. 4, 2024, 2:56 a.m. UTC | #7
Hi Chuanyu,

> -----Original Message-----
> From: Chuanyu Xue <chuanyu.xue@uconn.edu>
> Sent: Thursday, January 4, 2024 5:52 AM
> To: Su, Simei <simei.su@intel.com>
> Cc: Xing, Beilei <beilei.xing@intel.com>; chuanyu.xue@uconn.edu;
> dev@dpdk.org; Zhang, Qi Z <qi.z.zhang@intel.com>; Lu, Wenzhuo
> <wenzhuo.lu@intel.com>
> Subject: RE: [PATCH] net/e1000: support launchtime feature
> 
> Hi, Simei
> 
> Thank you for your guidance on how to test this feature.
> 
> >> Following is how I try to test with testpmd. Please let me know if I
> >> did something wrong.
> >>
> >>       sudo ./dpdk-testpmd -- -i --forward-mode=txonly
> >>
> >>       testpmd> port stop 0
> >>       testpmd> set burst 1
> >>       testpmd> set txtimes 100000000,0
> >>       testpmd> port config 0 tx_offload send_on_timestamp on
> >>       testpmd> port start 0
> >>       testpmd> start
> >
> >When testing launch time feature with igc driver, firstly, some code
> >change made in txonly.c:
> >pkt->ol_flags |= RTE_MBUF_F_TX_IEEE1588_TMST; (this flag should be
> >pkt->added to
> >forward PTP packet with hardware Tx timestamp)
> >
> ># ./build/app/dpdk-testpmd -a 0000:81:00.0 -c f -n 4 -- -i
> >--tx-offloads=0x200000
> >testpmd> set burst 1
> >testpmd> set fwd txonly
> >testpmd> set txtimes 1000000,0
> >testpmd> start
> >
> >On receiver side (with tcpdump):
> ># tcpdump -Q in -ttt -ni ens25f3 --time-stamp-precision=nano -j
> >adapter_unsynced -c 32
> 
> Now dpdk-testpmd works well with this patch after I add the flag in txonly.c as
> you mentioned.

OK, good.

> 
> It is worth noting that I also added `rte_eth_timesync_enable(pi);` in the
> function `tx_only_begin` in txonly.c to enable the PTP clock. Otherwise, all Tx
> packets scheduled are dropped.

Yes, got it.

> 
> Following are the measurement results on the listener. I use the same
> configuration as you mentioned for dpdk-testpmd on the talker.
> 
>     ➜  ~ sudo tcpdump -Q in -ttt -ni enp1s0 --time-stamp-precision=nano -j
> adapter_unsynced -c 32
> 
>     tcpdump: verbose output suppressed, use -v[v]... for full protocol decode
>     listening on enp1s0, link-type EN10MB (Ethernet), snapshot length
> 262144 bytes
> 
> 
>      00:00:00.000000000 IP 198.18.0.1.9 > 198.18.0.2.9: UDP, length 22
>      00:00:00.001000008 IP 198.18.0.1.9 > 198.18.0.2.9: UDP, length 22
>      00:00:00.001000000 IP 198.18.0.1.9 > 198.18.0.2.9: UDP, length 22
>      00:00:00.001000000 IP 198.18.0.1.9 > 198.18.0.2.9: UDP, length 22
>      00:00:00.001000000 IP 198.18.0.1.9 > 198.18.0.2.9: UDP, length 22
>      ....
>      00:00:00.001000000 IP 198.18.0.1.9 > 198.18.0.2.9: UDP, length 22
>      00:00:00.001000000 IP 198.18.0.1.9 > 198.18.0.2.9: UDP, length 22
>      00:00:00.001000000 IP 198.18.0.1.9 > 198.18.0.2.9: UDP, length 22
>      00:00:00.001000008 IP 198.18.0.1.9 > 198.18.0.2.9: UDP, length 22
>      00:00:00.001000000 IP 198.18.0.1.9 > 198.18.0.2.9: UDP, length 22
>     32 packets captured
>     118 packets received by filter
>     0 packets dropped by kernel
> 
> Above test is based on the patch v2 with Intel i210 NIC.

OK. I will review v2 patch.

Thanks,
Simei 

> 
> - Chuanyu
  

Patch

diff --git a/drivers/net/e1000/base/e1000_regs.h b/drivers/net/e1000/base/e1000_regs.h
index d44de59c29..092d9d71e6 100644
--- a/drivers/net/e1000/base/e1000_regs.h
+++ b/drivers/net/e1000/base/e1000_regs.h
@@ -162,6 +162,7 @@ 
 
 /* QAV Tx mode control register */
 #define E1000_I210_TQAVCTRL	0x3570
+#define E1000_I210_LAUNCH_OS0 0x3578
 
 /* QAV Tx mode control register bitfields masks */
 /* QAV enable */
diff --git a/drivers/net/e1000/e1000_ethdev.h b/drivers/net/e1000/e1000_ethdev.h
index 718a9746ed..174f7aaf52 100644
--- a/drivers/net/e1000/e1000_ethdev.h
+++ b/drivers/net/e1000/e1000_ethdev.h
@@ -382,6 +382,9 @@  extern struct igb_rss_filter_list igb_filter_rss_list;
 TAILQ_HEAD(igb_flow_mem_list, igb_flow_mem);
 extern struct igb_flow_mem_list igb_flow_list;
 
+extern uint64_t igb_tx_timestamp_dynflag;
+extern int igb_tx_timestamp_dynfield_offset;
+
 extern const struct rte_flow_ops igb_flow_ops;
 
 /*
diff --git a/drivers/net/e1000/igb_ethdev.c b/drivers/net/e1000/igb_ethdev.c
index 8858f975f8..4d3d8ae30a 100644
--- a/drivers/net/e1000/igb_ethdev.c
+++ b/drivers/net/e1000/igb_ethdev.c
@@ -223,6 +223,7 @@  static int igb_timesync_read_time(struct rte_eth_dev *dev,
 				  struct timespec *timestamp);
 static int igb_timesync_write_time(struct rte_eth_dev *dev,
 				   const struct timespec *timestamp);
+static int eth_igb_read_clock(__rte_unused struct rte_eth_dev *dev, uint64_t *clock);
 static int eth_igb_rx_queue_intr_enable(struct rte_eth_dev *dev,
 					uint16_t queue_id);
 static int eth_igb_rx_queue_intr_disable(struct rte_eth_dev *dev,
@@ -313,6 +314,9 @@  static const struct rte_pci_id pci_id_igbvf_map[] = {
 	{ .vendor_id = 0, /* sentinel */ },
 };
 
+uint64_t igb_tx_timestamp_dynflag;
+int igb_tx_timestamp_dynfield_offset = -1;
+
 static const struct rte_eth_desc_lim rx_desc_lim = {
 	.nb_max = E1000_MAX_RING_DESC,
 	.nb_min = E1000_MIN_RING_DESC,
@@ -389,6 +393,7 @@  static const struct eth_dev_ops eth_igb_ops = {
 	.timesync_adjust_time = igb_timesync_adjust_time,
 	.timesync_read_time   = igb_timesync_read_time,
 	.timesync_write_time  = igb_timesync_write_time,
+	.read_clock		      = eth_igb_read_clock,
 };
 
 /*
@@ -1198,6 +1203,7 @@  eth_igb_start(struct rte_eth_dev *dev)
 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
 	struct rte_intr_handle *intr_handle = pci_dev->intr_handle;
 	int ret, mask;
+	uint32_t tqavctrl;
 	uint32_t intr_vector = 0;
 	uint32_t ctrl_ext;
 	uint32_t *speeds;
@@ -1281,6 +1287,15 @@  eth_igb_start(struct rte_eth_dev *dev)
 		return ret;
 	}
 
+	if (igb_tx_timestamp_dynflag > 0) {
+		tqavctrl = E1000_READ_REG(hw, E1000_I210_TQAVCTRL);
+		tqavctrl |= E1000_TQAVCTRL_MODE;
+		tqavctrl |= E1000_TQAVCTRL_FETCH_ARB; /* Fetch the queue most empty, no Round Robin*/
+		tqavctrl |= E1000_TQAVCTRL_LAUNCH_TIMER_ENABLE; /* Enable launch time */
+		E1000_WRITE_REG(hw, E1000_I210_TQAVCTRL, tqavctrl);
+		E1000_WRITE_REG(hw, E1000_I210_LAUNCH_OS0, 1ULL << 31); /* Set launch offset to default */
+	}
+
 	e1000_clear_hw_cntrs_base_generic(hw);
 
 	/*
@@ -4882,6 +4897,19 @@  igb_timesync_read_tx_timestamp(struct rte_eth_dev *dev,
 	return  0;
 }
 
+static int
+eth_igb_read_clock(__rte_unused struct rte_eth_dev *dev, uint64_t *clock)
+{
+	uint64_t systime_cycles;
+	struct e1000_adapter *adapter = dev->data->dev_private;
+
+	systime_cycles = igb_read_systime_cyclecounter(dev);
+	uint64_t ns = rte_timecounter_update(&adapter->systime_tc, systime_cycles);
+	*clock = ns;
+
+	return 0;
+}
+
 static int
 eth_igb_get_reg_length(struct rte_eth_dev *dev __rte_unused)
 {
diff --git a/drivers/net/e1000/igb_rxtx.c b/drivers/net/e1000/igb_rxtx.c
index 448c4b7d9d..e5da8e250d 100644
--- a/drivers/net/e1000/igb_rxtx.c
+++ b/drivers/net/e1000/igb_rxtx.c
@@ -212,6 +212,9 @@  struct igb_tx_queue {
 #define IGB_TSO_MAX_HDRLEN			(512)
 #define IGB_TSO_MAX_MSS				(9216)
 
+/* Macro to compensate latency in launch time offloading*/
+#define E1000_I210_LT_LATENCY		0x41F9
+
 /*********************************************************************
  *
  *  TX function
@@ -244,12 +247,13 @@  check_tso_para(uint64_t ol_req, union igb_tx_offload ol_para)
 static inline void
 igbe_set_xmit_ctx(struct igb_tx_queue* txq,
 		volatile struct e1000_adv_tx_context_desc *ctx_txd,
-		uint64_t ol_flags, union igb_tx_offload tx_offload)
+		uint64_t ol_flags, union igb_tx_offload tx_offload, uint64_t txtime)
 {
 	uint32_t type_tucmd_mlhl;
 	uint32_t mss_l4len_idx;
 	uint32_t ctx_idx, ctx_curr;
 	uint32_t vlan_macip_lens;
+	uint32_t launch_time;
 	union igb_tx_offload tx_offload_mask;
 
 	ctx_curr = txq->ctx_curr;
@@ -312,16 +316,25 @@  igbe_set_xmit_ctx(struct igb_tx_queue* txq,
 		}
 	}
 
-	txq->ctx_cache[ctx_curr].flags = ol_flags;
-	txq->ctx_cache[ctx_curr].tx_offload.data =
-		tx_offload_mask.data & tx_offload.data;
-	txq->ctx_cache[ctx_curr].tx_offload_mask = tx_offload_mask;
+	if (!txtime) {
+		txq->ctx_cache[ctx_curr].flags = ol_flags;
+		txq->ctx_cache[ctx_curr].tx_offload.data =
+			tx_offload_mask.data & tx_offload.data;
+		txq->ctx_cache[ctx_curr].tx_offload_mask = tx_offload_mask;
+	}
 
 	ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
 	vlan_macip_lens = (uint32_t)tx_offload.data;
 	ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
 	ctx_txd->mss_l4len_idx = rte_cpu_to_le_32(mss_l4len_idx);
 	ctx_txd->u.seqnum_seed = 0;
+
+	if (txtime) {
+		launch_time = (txtime - E1000_I210_LT_LATENCY) % NSEC_PER_SEC;
+		ctx_txd->u.launch_time = rte_cpu_to_le_32(launch_time / 32);
+	} else {
+		ctx_txd->u.launch_time = 0;
+	}
 }
 
 /*
@@ -400,6 +413,7 @@  eth_igb_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 	uint32_t new_ctx = 0;
 	uint32_t ctx = 0;
 	union igb_tx_offload tx_offload = {0};
+	uint64_t ts;
 
 	txq = tx_queue;
 	sw_ring = txq->sw_ring;
@@ -552,7 +566,12 @@  eth_igb_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 					txe->mbuf = NULL;
 				}
 
-				igbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req, tx_offload);
+				if (igb_tx_timestamp_dynflag > 0) {
+					ts = *RTE_MBUF_DYNFIELD(tx_pkt, igb_tx_timestamp_dynfield_offset, uint64_t *);
+					igbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req, tx_offload, ts);
+				} else {
+					igbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req, tx_offload, 0);
+				}
 
 				txe->last_id = tx_last;
 				tx_id = txe->next_id;
@@ -1464,7 +1483,8 @@  igb_get_tx_port_offloads_capa(struct rte_eth_dev *dev)
 			  RTE_ETH_TX_OFFLOAD_TCP_CKSUM   |
 			  RTE_ETH_TX_OFFLOAD_SCTP_CKSUM  |
 			  RTE_ETH_TX_OFFLOAD_TCP_TSO     |
-			  RTE_ETH_TX_OFFLOAD_MULTI_SEGS;
+			  RTE_ETH_TX_OFFLOAD_MULTI_SEGS  |
+			  RTE_ETH_TX_OFFLOAD_SEND_ON_TIMESTAMP;
 
 	return tx_offload_capa;
 }
@@ -2579,9 +2599,11 @@  eth_igb_tx_init(struct rte_eth_dev *dev)
 {
 	struct e1000_hw     *hw;
 	struct igb_tx_queue *txq;
+	uint64_t offloads = dev->data->dev_conf.txmode.offloads;
 	uint32_t tctl;
 	uint32_t txdctl;
 	uint16_t i;
+	int err;
 
 	hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
@@ -2612,6 +2634,14 @@  eth_igb_tx_init(struct rte_eth_dev *dev)
 		dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED;
 	}
 
+	if (offloads & RTE_ETH_TX_OFFLOAD_SEND_ON_TIMESTAMP) {
+		err = rte_mbuf_dyn_tx_timestamp_register(
+			&igb_tx_timestamp_dynfield_offset,
+			&igb_tx_timestamp_dynflag);
+		if (err)
+			PMD_DRV_LOG(ERR, "Failed to register tx timestamp dynamic field");
+	}
+
 	/* Program the Transmit Control Register. */
 	tctl = E1000_READ_REG(hw, E1000_TCTL);
 	tctl &= ~E1000_TCTL_CT;