[dpdk-dev,RFC,04/14] mbuf: replace data pointer by an offset

Message ID 1407789890-17355-5-git-send-email-bruce.richardson@intel.com (mailing list archive)
State RFC, archived
Headers

Commit Message

Bruce Richardson Aug. 11, 2014, 8:44 p.m. UTC
From: Olivier Matz <olivier.matz@6wind.com>

Original patch:
 The mbuf structure already contains a pointer to the beginning of the
 buffer (m->buf_addr). It is not needed to use 8 bytes again to store
 another pointer to the beginning of the data.

 Using a 16 bits unsigned integer is enough as we know that a mbuf is
 never longer than 64KB. We gain 6 bytes in the structure thanks to
 this modification.

 Signed-off-by: Olivier Matz <olivier.matz@6wind.com>

This version:
* Updated original patch to apply to latest on mainline.
* Disabled vector PMD in config as it relies heavily on the mbuf layout

Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
 app/test-pmd/csumonly.c                     |  2 +-
 app/test-pmd/flowgen.c                      |  2 +-
 app/test-pmd/icmpecho.c                     |  2 +-
 app/test-pmd/macfwd-retry.c                 |  2 +-
 app/test-pmd/macfwd.c                       |  2 +-
 app/test-pmd/macswap.c                      |  2 +-
 app/test-pmd/rxonly.c                       |  2 +-
 app/test-pmd/testpmd.c                      |  2 +-
 app/test-pmd/txonly.c                       |  7 +++--
 app/test/packet_burst_generator.c           |  7 +++--
 app/test/test_mbuf.c                        |  6 ++--
 app/test/test_table_acl.c                   |  7 +++--
 app/test/test_table_pipeline.c              |  8 ++----
 config/common_linuxapp                      |  2 +-
 examples/exception_path/main.c              |  3 +-
 examples/vhost/main.c                       | 39 ++++++++++++++-----------
 examples/vhost_xen/main.c                   |  2 +-
 lib/librte_ip_frag/rte_ipv4_fragmentation.c |  6 ++--
 lib/librte_ip_frag/rte_ipv6_fragmentation.c |  6 ++--
 lib/librte_mbuf/rte_mbuf.c                  |  6 ++--
 lib/librte_mbuf/rte_mbuf.h                  | 44 +++++++++++++----------------
 lib/librte_pmd_bond/rte_eth_bond_pmd.c      |  4 +--
 lib/librte_pmd_e1000/em_rxtx.c              | 12 ++++----
 lib/librte_pmd_e1000/igb_rxtx.c             | 13 ++++-----
 lib/librte_pmd_i40e/i40e_rxtx.c             | 17 ++++++-----
 lib/librte_pmd_ixgbe/ixgbe_rxtx.c           | 13 +++++----
 lib/librte_pmd_ixgbe/ixgbe_rxtx.h           |  3 +-
 lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c       |  1 -
 lib/librte_pmd_virtio/virtio_rxtx.c         |  4 +--
 lib/librte_pmd_virtio/virtqueue.h           |  3 +-
 lib/librte_pmd_vmxnet3/vmxnet3_rxtx.c       |  5 ++--
 31 files changed, 115 insertions(+), 119 deletions(-)
  

Comments

Olivier Matz Aug. 12, 2014, 8:55 a.m. UTC | #1
Hi Bruce,

On 08/11/2014 10:44 PM, Bruce Richardson wrote:
> From: Olivier Matz <olivier.matz@6wind.com>
>
> Original patch:
>   The mbuf structure already contains a pointer to the beginning of the
>   buffer (m->buf_addr). It is not needed to use 8 bytes again to store
>   another pointer to the beginning of the data.
>
>   Using a 16 bits unsigned integer is enough as we know that a mbuf is
>   never longer than 64KB. We gain 6 bytes in the structure thanks to
>   this modification.
>
>   Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
>
> This version:
> * Updated original patch to apply to latest on mainline.
> * Disabled vector PMD in config as it relies heavily on the mbuf layout
>
> Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>

Remaining references shown by:

git grep 
"\(pkt->data[^_]\)\|\(mb->data[^_]\)\|\(m->data[^_]\)\|\(mbuf->data[^_]\)"

In:
app/test-pmd/ieee1588fwd.c
examples/vhost_xen/main.c
lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c
lib/librte_pmd_pcap/rte_eth_pcap.c
lib/librte_pmd_xenvirt/rte_eth_xenvirt.c

> --- a/lib/librte_mbuf/rte_mbuf.h
> +++ b/lib/librte_mbuf/rte_mbuf.h
> @@ -140,6 +140,13 @@ struct rte_mbuf {
>  	void *buf_addr;           /**< Virtual address of segment buffer. */
>  	phys_addr_t buf_physaddr; /**< Physical address of segment buffer. */
>  	uint16_t buf_len;         /**< Length of segment buffer. */
> +
> +	/* valid for any segment */
> +	struct rte_mbuf *next;    /**< Next segment of scattered packet. */
> +	uint16_t data_off;
> +	uint16_t data_len;        /**< Amount of data in segment buffer. */
> +	uint32_t pkt_len;         /**< Total pkt len: sum of all segments. */
> +
>  #ifdef RTE_MBUF_REFCNT
>  	/**
>  	 * 16-bit Reference counter.
> @@ -156,18 +163,12 @@ struct rte_mbuf {
>  #else
>  	uint16_t refcnt_reserved;     /**< Do not use this field */
>  #endif
> -	uint16_t reserved;             /**< Unused field. Required for padding. */
> -	uint16_t ol_flags;            /**< Offload features. */
> -
> -	/* valid for any segment */
> -	struct rte_mbuf *next;  /**< Next segment of scattered packet. */
> -	void* data;             /**< Start address of data in segment buffer. */
> -	uint16_t data_len;      /**< Amount of data in segment buffer. */
>
>  	/* these fields are valid for first segment only */
>  	uint8_t nb_segs;        /**< Number of segments. */
>  	uint8_t in_port;        /**< Input port. */
> -	uint32_t pkt_len;       /**< Total pkt len: sum of all segment data_len. */
> +	uint16_t ol_flags;            /**< Offload features. */
> +	uint16_t reserved;             /**< Unused field. Required for padding. */

I think we should try to keep comments aligned if possible.

>
>  	/* offload features, valid for first segment only */
>  	union rte_vlan_macip vlan_macip;
> @@ -185,7 +186,7 @@ struct rte_mbuf {
>  		uint16_t metadata16[0];
>  		uint32_t metadata32[0];
>  		uint64_t metadata64[0];
> -	};
> +	} __rte_cache_aligned;
>  } __rte_cache_aligned;
>

In my initial patch, there was a "reserved2" field at the end of the
rte_mbuf structure to keep its size to 64 bytes. This is not really
required because of the __rte_cache_aligned, but I wonder if it's a
problem to have metadata not starting on a cache line. There can be
some conflicts if some part of the code use *(uint32 *)(m + 1)
and other part of code m->metadata32[0].

By the way (that's completely off-topic), but I don't really see why
having this metadata at the end of mbuf structure is useful.

> @@ -1523,7 +1523,8 @@ ixgbe_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
>   		}
>
>   		/* Prefetch data of first segment, if configured to do so. */
> -		rte_packet_prefetch(first_seg->data);
> +		rte_packet_prefetch((char *)first_seg->buf_addr +
> +			first_seg->data_off);
>

It seems there is a trailing whitespace here after the "+" (seen by
"git am").


Regards,
Olivier
  

Patch

diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c
index 655b6d8..6a3aaf0 100644
--- a/app/test-pmd/csumonly.c
+++ b/app/test-pmd/csumonly.c
@@ -263,7 +263,7 @@  pkt_burst_checksum_forward(struct fwd_stream *fs)
 		pkt_ol_flags = mb->ol_flags;
 		ol_flags = (uint16_t) (pkt_ol_flags & (~PKT_TX_L4_MASK));
 
-		eth_hdr = (struct ether_hdr *) mb->data;
+		eth_hdr = rte_pktmbuf_mtod(mb, struct ether_hdr *);
 		eth_type = rte_be_to_cpu_16(eth_hdr->ether_type);
 		if (eth_type == ETHER_TYPE_VLAN) {
 			/* Only allow single VLAN label here */
diff --git a/app/test-pmd/flowgen.c b/app/test-pmd/flowgen.c
index 17dbf83..d69b2b8 100644
--- a/app/test-pmd/flowgen.c
+++ b/app/test-pmd/flowgen.c
@@ -175,7 +175,7 @@  pkt_burst_flow_gen(struct fwd_stream *fs)
 		pkt->next = NULL;
 
 		/* Initialize Ethernet header. */
-		eth_hdr = (struct ether_hdr *)pkt->data;
+		eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
 		ether_addr_copy(&cfg_ether_dst, &eth_hdr->d_addr);
 		ether_addr_copy(&cfg_ether_src, &eth_hdr->s_addr);
 		eth_hdr->ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
diff --git a/app/test-pmd/icmpecho.c b/app/test-pmd/icmpecho.c
index 4a277b8..7fd4b6d 100644
--- a/app/test-pmd/icmpecho.c
+++ b/app/test-pmd/icmpecho.c
@@ -330,7 +330,7 @@  reply_to_icmp_echo_rqsts(struct fwd_stream *fs)
 	nb_replies = 0;
 	for (i = 0; i < nb_rx; i++) {
 		pkt = pkts_burst[i];
-		eth_h = (struct ether_hdr *) pkt->data;
+		eth_h = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
 		eth_type = RTE_BE_TO_CPU_16(eth_h->ether_type);
 		l2_len = sizeof(struct ether_hdr);
 		if (verbose_level > 0) {
diff --git a/app/test-pmd/macfwd-retry.c b/app/test-pmd/macfwd-retry.c
index 5122983..83da26f 100644
--- a/app/test-pmd/macfwd-retry.c
+++ b/app/test-pmd/macfwd-retry.c
@@ -119,7 +119,7 @@  pkt_burst_mac_retry_forward(struct fwd_stream *fs)
 	fs->rx_packets += nb_rx;
 	for (i = 0; i < nb_rx; i++) {
 		mb = pkts_burst[i];
-		eth_hdr = (struct ether_hdr *) mb->data;
+		eth_hdr = rte_pktmbuf_mtod(mb, struct ether_hdr *);
 		ether_addr_copy(&peer_eth_addrs[fs->peer_addr],
 				&eth_hdr->d_addr);
 		ether_addr_copy(&ports[fs->tx_port].eth_addr,
diff --git a/app/test-pmd/macfwd.c b/app/test-pmd/macfwd.c
index 999c8e3..674c189 100644
--- a/app/test-pmd/macfwd.c
+++ b/app/test-pmd/macfwd.c
@@ -110,7 +110,7 @@  pkt_burst_mac_forward(struct fwd_stream *fs)
 	txp = &ports[fs->tx_port];
 	for (i = 0; i < nb_rx; i++) {
 		mb = pkts_burst[i];
-		eth_hdr = (struct ether_hdr *) mb->data;
+		eth_hdr = rte_pktmbuf_mtod(mb, struct ether_hdr *);
 		ether_addr_copy(&peer_eth_addrs[fs->peer_addr],
 				&eth_hdr->d_addr);
 		ether_addr_copy(&ports[fs->tx_port].eth_addr,
diff --git a/app/test-pmd/macswap.c b/app/test-pmd/macswap.c
index 731f487..d274b36 100644
--- a/app/test-pmd/macswap.c
+++ b/app/test-pmd/macswap.c
@@ -110,7 +110,7 @@  pkt_burst_mac_swap(struct fwd_stream *fs)
 	txp = &ports[fs->tx_port];
 	for (i = 0; i < nb_rx; i++) {
 		mb = pkts_burst[i];
-		eth_hdr = (struct ether_hdr *) mb->data;
+		eth_hdr = rte_pktmbuf_mtod(mb, struct ether_hdr *);
 
 		/* Swap dest and src mac addresses. */
 		ether_addr_copy(&eth_hdr->d_addr, &addr);
diff --git a/app/test-pmd/rxonly.c b/app/test-pmd/rxonly.c
index c34a5e1..b477076 100644
--- a/app/test-pmd/rxonly.c
+++ b/app/test-pmd/rxonly.c
@@ -149,7 +149,7 @@  pkt_burst_receive(struct fwd_stream *fs)
 			rte_pktmbuf_free(mb);
 			continue;
 		}
-		eth_hdr = (struct ether_hdr *) mb->data;
+		eth_hdr = rte_pktmbuf_mtod(mb, struct ether_hdr *);
 		eth_type = RTE_BE_TO_CPU_16(eth_hdr->ether_type);
 		ol_flags = mb->ol_flags;
 		print_ether_addr("  src=", &eth_hdr->s_addr);
diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index 81272a0..16de06c 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -404,7 +404,7 @@  testpmd_mbuf_ctor(struct rte_mempool *mp,
 			mb_ctor_arg->seg_buf_offset);
 	mb->buf_len      = mb_ctor_arg->seg_buf_size;
 	mb->ol_flags     = 0;
-	mb->data         = (char *) mb->buf_addr + RTE_PKTMBUF_HEADROOM;
+	mb->data_off     = RTE_PKTMBUF_HEADROOM;
 	mb->nb_segs      = 1;
 	mb->vlan_macip.data = 0;
 	mb->hash.rss     = 0;
diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c
index 1b2f661..95e46a2 100644
--- a/app/test-pmd/txonly.c
+++ b/app/test-pmd/txonly.c
@@ -111,13 +111,13 @@  copy_buf_to_pkt_segs(void* buf, unsigned len, struct rte_mbuf *pkt,
 		seg = seg->next;
 	}
 	copy_len = seg->data_len - offset;
-	seg_buf = ((char *) seg->data + offset);
+	seg_buf = (rte_pktmbuf_mtod(seg, char *) + offset);
 	while (len > copy_len) {
 		rte_memcpy(seg_buf, buf, (size_t) copy_len);
 		len -= copy_len;
 		buf = ((char*) buf + copy_len);
 		seg = seg->next;
-		seg_buf = seg->data;
+		seg_buf = rte_pktmbuf_mtod(seg, char *);
 	}
 	rte_memcpy(seg_buf, buf, (size_t) len);
 }
@@ -126,7 +126,8 @@  static inline void
 copy_buf_to_pkt(void* buf, unsigned len, struct rte_mbuf *pkt, unsigned offset)
 {
 	if (offset + len <= pkt->data_len) {
-		rte_memcpy(((char *) pkt->data + offset), buf, (size_t) len);
+		rte_memcpy((rte_pktmbuf_mtod(pkt, char *) + offset),
+			buf, (size_t) len);
 		return;
 	}
 	copy_buf_to_pkt_segs(buf, len, pkt, offset);
diff --git a/app/test/packet_burst_generator.c b/app/test/packet_burst_generator.c
index 8740348..c9e6c8b 100644
--- a/app/test/packet_burst_generator.c
+++ b/app/test/packet_burst_generator.c
@@ -59,13 +59,13 @@  copy_buf_to_pkt_segs(void *buf, unsigned len, struct rte_mbuf *pkt,
 		seg = seg->next;
 	}
 	copy_len = seg->data_len - offset;
-	seg_buf = ((char *) seg->data + offset);
+	seg_buf = rte_pktmbuf_mtod(seg, char *) + offset;
 	while (len > copy_len) {
 		rte_memcpy(seg_buf, buf, (size_t) copy_len);
 		len -= copy_len;
 		buf = ((char *) buf + copy_len);
 		seg = seg->next;
-		seg_buf = seg->data;
+		seg_buf = rte_pktmbuf_mtod(seg, void *);
 	}
 	rte_memcpy(seg_buf, buf, (size_t) len);
 }
@@ -74,7 +74,8 @@  static inline void
 copy_buf_to_pkt(void *buf, unsigned len, struct rte_mbuf *pkt, unsigned offset)
 {
 	if (offset + len <= pkt->data_len) {
-		rte_memcpy(((char *) pkt->data + offset), buf, (size_t) len);
+		rte_memcpy(rte_pktmbuf_mtod(pkt, char *) + offset,
+				buf, (size_t) len);
 		return;
 	}
 	copy_buf_to_pkt_segs(buf, len, pkt, offset);
diff --git a/app/test/test_mbuf.c b/app/test/test_mbuf.c
index fd26d6f..31ef9fa 100644
--- a/app/test/test_mbuf.c
+++ b/app/test/test_mbuf.c
@@ -432,7 +432,7 @@  test_pktmbuf_pool_ptr(void)
 			printf("rte_pktmbuf_alloc() failed (%u)\n", i);
 			ret = -1;
 		}
-		m[i]->data = RTE_PTR_ADD(m[i]->data, 64);
+		m[i]->data_off += 64;
 	}
 
 	/* free them */
@@ -451,8 +451,8 @@  test_pktmbuf_pool_ptr(void)
 			printf("rte_pktmbuf_alloc() failed (%u)\n", i);
 			ret = -1;
 		}
-		if (m[i]->data != RTE_PTR_ADD(m[i]->buf_addr, RTE_PKTMBUF_HEADROOM)) {
-			printf ("data pointer not set properly\n");
+		if (m[i]->data_off != RTE_PKTMBUF_HEADROOM) {
+			printf ("invalid data_off\n");
 			ret = -1;
 		}
 	}
diff --git a/app/test/test_table_acl.c b/app/test/test_table_acl.c
index 70e2e68..e68585f 100644
--- a/app/test/test_table_acl.c
+++ b/app/test/test_table_acl.c
@@ -515,7 +515,7 @@  test_pipeline_single_filter(int expected_count)
 			struct rte_mbuf *mbuf;
 
 			mbuf = rte_pktmbuf_alloc(pool);
-			memset(mbuf->data, 0x00,
+			memset(rte_pktmbuf_mtod(mbuf, char *), 0x00,
 				sizeof(struct ipv4_5tuple));
 
 			five_tuple.proto = j;
@@ -524,7 +524,7 @@  test_pipeline_single_filter(int expected_count)
 			five_tuple.port_src = rte_bswap16(100 + j);
 			five_tuple.port_dst = rte_bswap16(200 + j);
 
-			memcpy(mbuf->data, &five_tuple,
+			memcpy(rte_pktmbuf_mtod(mbuf, char *), &five_tuple,
 				sizeof(struct ipv4_5tuple));
 			RTE_LOG(INFO, PIPELINE, "%s: Enqueue onto ring %d\n",
 				__func__, i);
@@ -551,7 +551,8 @@  test_pipeline_single_filter(int expected_count)
 			printf("Got %d object(s) from ring %d!\n", ret, i);
 			for (j = 0; j < ret; j++) {
 				mbuf = (struct rte_mbuf *)objs[j];
-				rte_hexdump(stdout, "mbuf", mbuf->data, 64);
+				rte_hexdump(stdout, "mbuf",
+					rte_pktmbuf_mtod(mbuf, char *), 64);
 				rte_pktmbuf_free(mbuf);
 			}
 			tx_count += ret;
diff --git a/app/test/test_table_pipeline.c b/app/test/test_table_pipeline.c
index c5368ae..f6a5b6f 100644
--- a/app/test/test_table_pipeline.c
+++ b/app/test/test_table_pipeline.c
@@ -45,11 +45,6 @@ 
 #include "test_table.h"
 #include "test_table_pipeline.h"
 
-#define RTE_CBUF_UINT8_PTR(cbuf, offset)			\
-	(&cbuf->data[offset])
-#define RTE_CBUF_UINT32_PTR(cbuf, offset)			\
-	(&cbuf->data32[offset/sizeof(uint32_t)])
-
 #if 0
 
 static rte_pipeline_port_out_action_handler port_action_0x00
@@ -504,7 +499,8 @@  test_pipeline_single_filter(int test_type, int expected_count)
 			printf("Got %d object(s) from ring %d!\n", ret, i);
 			for (j = 0; j < ret; j++) {
 				mbuf = (struct rte_mbuf *)objs[j];
-				rte_hexdump(stdout, "Object:", mbuf->data,
+				rte_hexdump(stdout, "Object:",
+					rte_pktmbuf_mtod(mbuf, char *),
 					mbuf->data_len);
 				rte_pktmbuf_free(mbuf);
 			}
diff --git a/config/common_linuxapp b/config/common_linuxapp
index 5bee910..b140af7 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -191,7 +191,7 @@  CONFIG_RTE_LIBRTE_IXGBE_DEBUG_DRIVER=n
 CONFIG_RTE_LIBRTE_IXGBE_PF_DISABLE_STRIP_CRC=n
 CONFIG_RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC=y
 CONFIG_RTE_LIBRTE_IXGBE_ALLOW_UNSUPPORTED_SFP=n
-CONFIG_RTE_IXGBE_INC_VECTOR=y
+CONFIG_RTE_IXGBE_INC_VECTOR=n
 CONFIG_RTE_IXGBE_RX_OLFLAGS_ENABLE=y
 
 #
diff --git a/examples/exception_path/main.c b/examples/exception_path/main.c
index 5045ef8..f286bf2 100644
--- a/examples/exception_path/main.c
+++ b/examples/exception_path/main.c
@@ -302,7 +302,8 @@  main_loop(__attribute__((unused)) void *arg)
 			if (m == NULL)
 				continue;
 
-			ret = read(tap_fd, m->data, MAX_PACKET_SZ);
+			ret = read(tap_fd, rte_pktmbuf_mtod(m, void *),
+				MAX_PACKET_SZ);
 			lcore_stats[lcore_id].rx++;
 			if (unlikely(ret < 0)) {
 				FATAL_ERROR("Reading from %s interface failed",
diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index afbab31..6ccdc24 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -1027,7 +1027,9 @@  virtio_dev_rx(struct virtio_net *dev, struct rte_mbuf **pkts, uint32_t count)
 		vq->used->ring[res_cur_idx & (vq->size - 1)].len = packet_len;
 
 		/* Copy mbuf data to buffer */
-		rte_memcpy((void *)(uintptr_t)buff_addr, (const void*)buff->data, rte_pktmbuf_data_len(buff));
+		rte_memcpy((void *)(uintptr_t)buff_addr,
+			rte_pktmbuf_mtod(buff, const void *),
+			rte_pktmbuf_data_len(buff));
 
 		res_cur_idx++;
 		packet_success++;
@@ -1089,7 +1091,7 @@  link_vmdq(struct virtio_net *dev, struct rte_mbuf *m)
 	int i, ret;
 
 	/* Learn MAC address of guest device from packet */
-	pkt_hdr = (struct ether_hdr *)m->data;
+	pkt_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
 
 	dev_ll = ll_root_used;
 
@@ -1176,7 +1178,7 @@  virtio_tx_local(struct virtio_net *dev, struct rte_mbuf *m)
 	struct ether_hdr *pkt_hdr;
 	uint64_t ret = 0;
 
-	pkt_hdr = (struct ether_hdr *)m->data;
+	pkt_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
 
 	/*get the used devices list*/
 	dev_ll = ll_root_used;
@@ -1235,7 +1237,7 @@  virtio_tx_route(struct virtio_net* dev, struct rte_mbuf *m, struct rte_mempool *
 	unsigned len, ret, offset = 0;
 	const uint16_t lcore_id = rte_lcore_id();
 	struct virtio_net_data_ll *dev_ll = ll_root_used;
-	struct ether_hdr *pkt_hdr = (struct ether_hdr *)m->data;
+	struct ether_hdr *pkt_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
 
 	/*check if destination is local VM*/
 	if ((vm2vm_mode == VM2VM_SOFTWARE) && (virtio_tx_local(dev, m) == 0))
@@ -1292,18 +1294,21 @@  virtio_tx_route(struct virtio_net* dev, struct rte_mbuf *m, struct rte_mempool *
 	mbuf->pkt_len = mbuf->data_len;
 
 	/* Copy ethernet header to mbuf. */
-	rte_memcpy((void*)mbuf->data, (const void*)m->data, ETH_HLEN);
+	rte_memcpy(rte_pktmbuf_mtod(mbuf, void *),
+		rte_pktmbuf_mtod(m, const void *),
+		ETH_HLEN);
 
 
 	/* Setup vlan header. Bytes need to be re-ordered for network with htons()*/
-	vlan_hdr = (struct vlan_ethhdr *) mbuf->data;
+	vlan_hdr = rte_pktmbuf_mtod(mbuf, struct vlan_ethhdr *);
 	vlan_hdr->h_vlan_encapsulated_proto = vlan_hdr->h_vlan_proto;
 	vlan_hdr->h_vlan_proto = htons(ETH_P_8021Q);
 	vlan_hdr->h_vlan_TCI = htons(vlan_tag);
 
 	/* Copy the remaining packet contents to the mbuf. */
-	rte_memcpy((void*) ((uint8_t*)mbuf->data + VLAN_ETH_HLEN),
-		(const void*) ((uint8_t*)m->data + ETH_HLEN), (m->data_len - ETH_HLEN));
+	rte_memcpy((void *)(rte_pktmbuf_mtod(mbuf, uint8_t *) + VLAN_ETH_HLEN),
+		(const void *)(rte_pktmbuf_mtod(m, uint8_t *) + ETH_HLEN),
+		(m->data_len - ETH_HLEN));
 	tx_q->m_table[len] = mbuf;
 	len++;
 	if (enable_stats) {
@@ -1394,7 +1399,7 @@  virtio_dev_tx(struct virtio_net* dev, struct rte_mempool *mbuf_pool)
 
 		/* Setup dummy mbuf. This is copied to a real mbuf if transmitted out the physical port. */
 		m.data_len = desc->len;
-		m.data = (void*)(uintptr_t)buff_addr;
+		m.data_off = 0;
 
 		PRINT_PACKET(dev, (uintptr_t)buff_addr, desc->len, 0);
 
@@ -1713,7 +1718,7 @@  attach_rxmbuf_zcp(struct virtio_net *dev)
 	}
 
 	mbuf->buf_addr = (void *)(uintptr_t)(buff_addr - RTE_PKTMBUF_HEADROOM);
-	mbuf->data = (void *)(uintptr_t)(buff_addr);
+	mbuf->data_off = RTE_PKTMBUF_HEADROOM;
 	mbuf->buf_physaddr = phys_addr - RTE_PKTMBUF_HEADROOM;
 	mbuf->data_len = desc->len;
 	MBUF_HEADROOM_UINT32(mbuf) = (uint32_t)desc_idx;
@@ -1750,7 +1755,7 @@  static inline void pktmbuf_detach_zcp(struct rte_mbuf *m)
 
 	buf_ofs = (RTE_PKTMBUF_HEADROOM <= m->buf_len) ?
 			RTE_PKTMBUF_HEADROOM : m->buf_len;
-	m->data = (char *) m->buf_addr + buf_ofs;
+	m->data_off = buf_ofs;
 
 	m->data_len = 0;
 }
@@ -1984,7 +1989,7 @@  virtio_tx_route_zcp(struct virtio_net *dev, struct rte_mbuf *m,
 	unsigned len, ret, offset = 0;
 	struct vpool *vpool;
 	struct virtio_net_data_ll *dev_ll = ll_root_used;
-	struct ether_hdr *pkt_hdr = (struct ether_hdr *)m->data;
+	struct ether_hdr *pkt_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
 	uint16_t vlan_tag = (uint16_t)vlan_tags[(uint16_t)dev->device_fh];
 
 	/*Add packet to the port tx queue*/
@@ -2061,11 +2066,11 @@  virtio_tx_route_zcp(struct virtio_net *dev, struct rte_mbuf *m,
 	mbuf->pkt_len = mbuf->data_len;
 	if (unlikely(need_copy)) {
 		/* Copy the packet contents to the mbuf. */
-		rte_memcpy((void *)((uint8_t *)mbuf->data),
-			(const void *) ((uint8_t *)m->data),
+		rte_memcpy(rte_pktmbuf_mtod(mbuf, void *),
+			rte_pktmbuf_mtod(m, void *),
 			m->data_len);
 	} else {
-		mbuf->data = m->data;
+		mbuf->data_off = m->data_off;
 		mbuf->buf_physaddr = m->buf_physaddr;
 		mbuf->buf_addr = m->buf_addr;
 	}
@@ -2199,8 +2204,8 @@  virtio_dev_tx_zcp(struct virtio_net *dev)
 		m.data_len = desc->len;
 		m.nb_segs = 1;
 		m.next = NULL;
-		m.data = (void *)(uintptr_t)buff_addr;
-		m.buf_addr = m.data;
+		m.data_off = 0;
+		m.buf_addr = (void *)(uintptr_t)buff_addr;
 		m.buf_physaddr = phys_addr;
 
 		/*
diff --git a/examples/vhost_xen/main.c b/examples/vhost_xen/main.c
index 8162cd8..9a4c7c0 100644
--- a/examples/vhost_xen/main.c
+++ b/examples/vhost_xen/main.c
@@ -981,7 +981,7 @@  virtio_dev_tx(struct virtio_net* dev, struct rte_mempool *mbuf_pool)
 
 		/* Setup dummy mbuf. This is copied to a real mbuf if transmitted out the physical port. */
 		m.data_len = desc->len;
-		m.data = (void*)(uintptr_t)buff_addr;
+		m.data_off = 0;
 		m.nb_segs = 1;
 
 		virtio_tx_route(dev, &m, mbuf_pool, 0);
diff --git a/lib/librte_ip_frag/rte_ipv4_fragmentation.c b/lib/librte_ip_frag/rte_ipv4_fragmentation.c
index 0b10310..cb09d92 100644
--- a/lib/librte_ip_frag/rte_ipv4_fragmentation.c
+++ b/lib/librte_ip_frag/rte_ipv4_fragmentation.c
@@ -109,7 +109,7 @@  rte_ipv4_fragment_packet(struct rte_mbuf *pkt_in,
 	/* Fragment size should be a multiply of 8. */
 	IP_FRAG_ASSERT((frag_size & IPV4_HDR_FO_MASK) == 0);
 
-	in_hdr = (struct ipv4_hdr *) pkt_in->data;
+	in_hdr = rte_pktmbuf_mtod(pkt_in, struct ipv4_hdr *);
 	flag_offset = rte_cpu_to_be_16(in_hdr->fragment_offset);
 
 	/* If Don't Fragment flag is set */
@@ -165,7 +165,7 @@  rte_ipv4_fragment_packet(struct rte_mbuf *pkt_in,
 			if (len > (in_seg->data_len - in_seg_data_pos)) {
 				len = in_seg->data_len - in_seg_data_pos;
 			}
-			out_seg->data = (char*) in_seg->data + (uint16_t)in_seg_data_pos;
+			out_seg->data_off = in_seg->data_off + in_seg_data_pos;
 			out_seg->data_len = (uint16_t)len;
 			out_pkt->pkt_len = (uint16_t)(len +
 			    out_pkt->pkt_len);
@@ -188,7 +188,7 @@  rte_ipv4_fragment_packet(struct rte_mbuf *pkt_in,
 
 		/* Build the IP header */
 
-		out_hdr = (struct ipv4_hdr*) out_pkt->data;
+		out_hdr = rte_pktmbuf_mtod(out_pkt, struct ipv4_hdr *);
 
 		__fill_ipv4hdr_frag(out_hdr, in_hdr,
 		    (uint16_t)out_pkt->pkt_len,
diff --git a/lib/librte_ip_frag/rte_ipv6_fragmentation.c b/lib/librte_ip_frag/rte_ipv6_fragmentation.c
index e007662..4ffcc7c 100644
--- a/lib/librte_ip_frag/rte_ipv6_fragmentation.c
+++ b/lib/librte_ip_frag/rte_ipv6_fragmentation.c
@@ -125,7 +125,7 @@  rte_ipv6_fragment_packet(struct rte_mbuf *pkt_in,
 	    (uint16_t)(pkt_in->pkt_len - sizeof (struct ipv6_hdr))))
 		return (-EINVAL);
 
-	in_hdr = (struct ipv6_hdr *) pkt_in->data;
+	in_hdr = rte_pktmbuf_mtod(pkt_in, struct ipv6_hdr *);
 
 	in_seg = pkt_in;
 	in_seg_data_pos = sizeof(struct ipv6_hdr);
@@ -171,7 +171,7 @@  rte_ipv6_fragment_packet(struct rte_mbuf *pkt_in,
 			if (len > (in_seg->data_len - in_seg_data_pos)) {
 				len = in_seg->data_len - in_seg_data_pos;
 			}
-			out_seg->data = (char *) in_seg->data + (uint16_t) in_seg_data_pos;
+			out_seg->data_off = in_seg->data_off + in_seg_data_pos;
 			out_seg->data_len = (uint16_t)len;
 			out_pkt->pkt_len = (uint16_t)(len +
 			    out_pkt->pkt_len);
@@ -196,7 +196,7 @@  rte_ipv6_fragment_packet(struct rte_mbuf *pkt_in,
 
 		/* Build the IP header */
 
-		out_hdr = (struct ipv6_hdr *) out_pkt->data;
+		out_hdr = rte_pktmbuf_mtod(out_pkt, struct ipv6_hdr *);
 
 		__fill_ipv6hdr_frag(out_hdr, in_hdr,
 		    (uint16_t) out_pkt->pkt_len - sizeof(struct ipv6_hdr),
diff --git a/lib/librte_mbuf/rte_mbuf.c b/lib/librte_mbuf/rte_mbuf.c
index cc62401..2e1dbdf 100644
--- a/lib/librte_mbuf/rte_mbuf.c
+++ b/lib/librte_mbuf/rte_mbuf.c
@@ -104,7 +104,7 @@  rte_pktmbuf_init(struct rte_mempool *mp,
 	m->buf_len = (uint16_t)buf_len;
 
 	/* keep some headroom between start of buffer and data */
-	m->data = (char*) m->buf_addr + RTE_MIN(RTE_PKTMBUF_HEADROOM, m->buf_len);
+	m->data_off = RTE_MIN(RTE_PKTMBUF_HEADROOM, (uint16_t)m->buf_len);
 
 	/* init some constant fields */
 	m->pool = mp;
@@ -170,12 +170,12 @@  rte_pktmbuf_dump(FILE *f, const struct rte_mbuf *m, unsigned dump_len)
 		__rte_mbuf_sanity_check(m, 0);
 
 		fprintf(f, "  segment at 0x%p, data=0x%p, data_len=%u\n",
-		       m, m->data, (unsigned)m->data_len);
+			m, rte_pktmbuf_mtod(m, void *), (unsigned)m->data_len);
 		len = dump_len;
 		if (len > m->data_len)
 			len = m->data_len;
 		if (len != 0)
-			rte_hexdump(f, NULL, m->data, len);
+			rte_hexdump(f, NULL, rte_pktmbuf_mtod(m, void *), len);
 		dump_len -= len;
 		m = m->next;
 		nb_segs --;
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index a2127c1..0cd6928 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -140,6 +140,13 @@  struct rte_mbuf {
 	void *buf_addr;           /**< Virtual address of segment buffer. */
 	phys_addr_t buf_physaddr; /**< Physical address of segment buffer. */
 	uint16_t buf_len;         /**< Length of segment buffer. */
+
+	/* valid for any segment */
+	struct rte_mbuf *next;    /**< Next segment of scattered packet. */
+	uint16_t data_off;
+	uint16_t data_len;        /**< Amount of data in segment buffer. */
+	uint32_t pkt_len;         /**< Total pkt len: sum of all segments. */
+
 #ifdef RTE_MBUF_REFCNT
 	/**
 	 * 16-bit Reference counter.
@@ -156,18 +163,12 @@  struct rte_mbuf {
 #else
 	uint16_t refcnt_reserved;     /**< Do not use this field */
 #endif
-	uint16_t reserved;             /**< Unused field. Required for padding. */
-	uint16_t ol_flags;            /**< Offload features. */
-
-	/* valid for any segment */
-	struct rte_mbuf *next;  /**< Next segment of scattered packet. */
-	void* data;             /**< Start address of data in segment buffer. */
-	uint16_t data_len;      /**< Amount of data in segment buffer. */
 
 	/* these fields are valid for first segment only */
 	uint8_t nb_segs;        /**< Number of segments. */
 	uint8_t in_port;        /**< Input port. */
-	uint32_t pkt_len;       /**< Total pkt len: sum of all segment data_len. */
+	uint16_t ol_flags;            /**< Offload features. */
+	uint16_t reserved;             /**< Unused field. Required for padding. */
 
 	/* offload features, valid for first segment only */
 	union rte_vlan_macip vlan_macip;
@@ -185,7 +186,7 @@  struct rte_mbuf {
 		uint16_t metadata16[0];
 		uint32_t metadata32[0];
 		uint64_t metadata64[0];
-	};
+	} __rte_cache_aligned;
 } __rte_cache_aligned;
 
 #define RTE_MBUF_METADATA_UINT8(mbuf, offset)              \
@@ -468,8 +469,6 @@  void rte_pktmbuf_pool_init(struct rte_mempool *mp, void *opaque_arg);
  */
 static inline void rte_pktmbuf_reset(struct rte_mbuf *m)
 {
-	uint32_t buf_ofs;
-
 	m->next = NULL;
 	m->pkt_len = 0;
 	m->vlan_macip.data = 0;
@@ -477,9 +476,8 @@  static inline void rte_pktmbuf_reset(struct rte_mbuf *m)
 	m->in_port = 0xff;
 
 	m->ol_flags = 0;
-	buf_ofs = (RTE_PKTMBUF_HEADROOM <= m->buf_len) ?
+	m->data_off = (RTE_PKTMBUF_HEADROOM <= m->buf_len) ?
 			RTE_PKTMBUF_HEADROOM : m->buf_len;
-	m->data = (char*) m->buf_addr + buf_ofs;
 
 	m->data_len = 0;
 	__rte_mbuf_sanity_check(m, 1);
@@ -536,7 +534,7 @@  static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *md)
 	mi->buf_len = md->buf_len;
 
 	mi->next = md->next;
-	mi->data = md->data;
+	mi->data_off = md->data_off;
 	mi->data_len = md->data_len;
 	mi->in_port = md->in_port;
 	mi->vlan_macip = md->vlan_macip;
@@ -565,16 +563,14 @@  static inline void rte_pktmbuf_detach(struct rte_mbuf *m)
 {
 	const struct rte_mempool *mp = m->pool;
 	void *buf = RTE_MBUF_TO_BADDR(m);
-	uint32_t buf_ofs;
 	uint32_t buf_len = mp->elt_size - sizeof(*m);
 	m->buf_physaddr = rte_mempool_virt2phy(mp, m) + sizeof (*m);
 
 	m->buf_addr = buf;
 	m->buf_len = (uint16_t)buf_len;
 
-	buf_ofs = (RTE_PKTMBUF_HEADROOM <= m->buf_len) ?
+	m->data_off = (RTE_PKTMBUF_HEADROOM <= m->buf_len) ?
 			RTE_PKTMBUF_HEADROOM : m->buf_len;
-	m->data = (char*) m->buf_addr + buf_ofs;
 
 	m->data_len = 0;
 }
@@ -738,7 +734,7 @@  static inline void rte_pktmbuf_refcnt_update(struct rte_mbuf *m, int16_t v)
 static inline uint16_t rte_pktmbuf_headroom(const struct rte_mbuf *m)
 {
 	__rte_mbuf_sanity_check(m, 1);
-	return (uint16_t) ((char*) m->data - (char*) m->buf_addr);
+	return m->data_off;
 }
 
 /**
@@ -786,7 +782,7 @@  static inline struct rte_mbuf *rte_pktmbuf_lastseg(struct rte_mbuf *m)
  * @param t
  *   The type to cast the result into.
  */
-#define rte_pktmbuf_mtod(m, t) ((t)((m)->data))
+#define rte_pktmbuf_mtod(m, t) ((t)((char *)(m)->buf_addr + (m)->data_off))
 
 /**
  * A macro that returns the length of the packet.
@@ -831,11 +827,11 @@  static inline char *rte_pktmbuf_prepend(struct rte_mbuf *m,
 	if (unlikely(len > rte_pktmbuf_headroom(m)))
 		return NULL;
 
-	m->data = (char*) m->data - len;
+	m->data_off -= len;
 	m->data_len = (uint16_t)(m->data_len + len);
 	m->pkt_len  = (m->pkt_len + len);
 
-	return (char*) m->data;
+	return (char*) m->buf_addr + m->data_off;
 }
 
 /**
@@ -864,7 +860,7 @@  static inline char *rte_pktmbuf_append(struct rte_mbuf *m, uint16_t len)
 	if (unlikely(len > rte_pktmbuf_tailroom(m_last)))
 		return NULL;
 
-	tail = (char*) m_last->data + m_last->data_len;
+	tail = (char*) m_last->buf_addr + m_last->data_off + m_last->data_len;
 	m_last->data_len = (uint16_t)(m_last->data_len + len);
 	m->pkt_len  = (m->pkt_len + len);
 	return (char*) tail;
@@ -892,9 +888,9 @@  static inline char *rte_pktmbuf_adj(struct rte_mbuf *m, uint16_t len)
 		return NULL;
 
 	m->data_len = (uint16_t)(m->data_len - len);
-	m->data = ((char*) m->data + len);
+	m->data_off += len;
 	m->pkt_len  = (m->pkt_len - len);
-	return (char*) m->data;
+	return (char *)m->buf_addr + m->data_off;
 }
 
 /**
diff --git a/lib/librte_pmd_bond/rte_eth_bond_pmd.c b/lib/librte_pmd_bond/rte_eth_bond_pmd.c
index 5979ce5..506a448 100644
--- a/lib/librte_pmd_bond/rte_eth_bond_pmd.c
+++ b/lib/librte_pmd_bond/rte_eth_bond_pmd.c
@@ -198,14 +198,14 @@  xmit_slave_hash(const struct rte_mbuf *buf, uint8_t slave_count, uint8_t policy)
 
 	switch (policy) {
 	case BALANCE_XMIT_POLICY_LAYER2:
-		eth_hdr = (struct ether_hdr *)buf->data;
+		eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
 
 		hash = ether_hash(eth_hdr);
 		hash ^= hash >> 8;
 		return hash % slave_count;
 
 	case BALANCE_XMIT_POLICY_LAYER23:
-		eth_hdr = (struct ether_hdr *)buf->data;
+		eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
 
 		if (buf->ol_flags & PKT_RX_VLAN_PKT)
 			eth_offset = sizeof(struct ether_hdr) + sizeof(struct vlan_hdr);
diff --git a/lib/librte_pmd_e1000/em_rxtx.c b/lib/librte_pmd_e1000/em_rxtx.c
index 058e1bd..66e8544 100644
--- a/lib/librte_pmd_e1000/em_rxtx.c
+++ b/lib/librte_pmd_e1000/em_rxtx.c
@@ -90,8 +90,7 @@  rte_rxmbuf_alloc(struct rte_mempool *mp)
 }
 
 #define RTE_MBUF_DATA_DMA_ADDR(mb)             \
-	(uint64_t) ((mb)->buf_physaddr +       \
-	(uint64_t) ((char *)((mb)->data) - (char *)(mb)->buf_addr))
+	(uint64_t) ((mb)->buf_physaddr + (mb)->data_off)
 
 #define RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mb) \
 	(uint64_t) ((mb)->buf_physaddr + RTE_PKTMBUF_HEADROOM)
@@ -771,8 +770,8 @@  eth_em_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 		 */
 		pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.length) -
 				rxq->crc_len);
-		rxm->data = (char*) rxm->buf_addr + RTE_PKTMBUF_HEADROOM;
-		rte_packet_prefetch(rxm->data);
+		rxm->data_off = RTE_PKTMBUF_HEADROOM;
+		rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
 		rxm->nb_segs = 1;
 		rxm->next = NULL;
 		rxm->pkt_len = pkt_len;
@@ -941,7 +940,7 @@  eth_em_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 		 */
 		data_len = rte_le_to_cpu_16(rxd.length);
 		rxm->data_len = data_len;
-		rxm->data = (char*) rxm->buf_addr + RTE_PKTMBUF_HEADROOM;
+		rxm->data_off = RTE_PKTMBUF_HEADROOM;
 
 		/*
 		 * If this is the first buffer of the received packet,
@@ -1013,7 +1012,8 @@  eth_em_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 		rxm->vlan_macip.f.vlan_tci = rte_le_to_cpu_16(rxd.special);
 
 		/* Prefetch data of first segment, if configured to do so. */
-		rte_packet_prefetch(first_seg->data);
+		rte_packet_prefetch((char *)first_seg->buf_addr +
+			first_seg->data_off);
 
 		/*
 		 * Store the mbuf address into the next entry of the array
diff --git a/lib/librte_pmd_e1000/igb_rxtx.c b/lib/librte_pmd_e1000/igb_rxtx.c
index 99bb9d9..e94eb8d 100644
--- a/lib/librte_pmd_e1000/igb_rxtx.c
+++ b/lib/librte_pmd_e1000/igb_rxtx.c
@@ -95,9 +95,7 @@  rte_rxmbuf_alloc(struct rte_mempool *mp)
 }
 
 #define RTE_MBUF_DATA_DMA_ADDR(mb) \
-	(uint64_t) ((mb)->buf_physaddr +		   \
-			(uint64_t) ((char *)((mb)->data) -     \
-				(char *)(mb)->buf_addr))
+	(uint64_t) ((mb)->buf_physaddr + (mb)->data_off)
 
 #define RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mb) \
 	(uint64_t) ((mb)->buf_physaddr + RTE_PKTMBUF_HEADROOM)
@@ -753,8 +751,8 @@  eth_igb_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 		 */
 		pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
 				      rxq->crc_len);
-		rxm->data = (char*) rxm->buf_addr + RTE_PKTMBUF_HEADROOM;
-		rte_packet_prefetch(rxm->data);
+		rxm->data_off = RTE_PKTMBUF_HEADROOM;
+		rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
 		rxm->nb_segs = 1;
 		rxm->next = NULL;
 		rxm->pkt_len = pkt_len;
@@ -930,7 +928,7 @@  eth_igb_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 		 */
 		data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
 		rxm->data_len = data_len;
-		rxm->data = (char*) rxm->buf_addr + RTE_PKTMBUF_HEADROOM;
+		rxm->data_off = RTE_PKTMBUF_HEADROOM;
 
 		/*
 		 * If this is the first buffer of the received packet,
@@ -1012,7 +1010,8 @@  eth_igb_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 		first_seg->ol_flags = pkt_flags;
 
 		/* Prefetch data of first segment, if configured to do so. */
-		rte_packet_prefetch(first_seg->data);
+		rte_packet_prefetch((char *)first_seg->buf_addr +
+			first_seg->data_off);
 
 		/*
 		 * Store the mbuf address into the next entry of the array
diff --git a/lib/librte_pmd_i40e/i40e_rxtx.c b/lib/librte_pmd_i40e/i40e_rxtx.c
index faf6095..fd5a784 100644
--- a/lib/librte_pmd_i40e/i40e_rxtx.c
+++ b/lib/librte_pmd_i40e/i40e_rxtx.c
@@ -78,9 +78,7 @@ 
 	(uint64_t) ((mb)->buf_physaddr + RTE_PKTMBUF_HEADROOM)
 
 #define RTE_MBUF_DATA_DMA_ADDR(mb) \
-	((uint64_t)((mb)->buf_physaddr + \
-	(uint64_t)((char *)((mb)->data) - \
-	(char *)(mb)->buf_addr)))
+	((uint64_t)((mb)->buf_physaddr + (mb)->data_off))
 
 static const struct rte_memzone *
 i40e_ring_dma_zone_reserve(struct rte_eth_dev *dev,
@@ -688,7 +686,7 @@  i40e_rx_alloc_bufs(struct i40e_rx_queue *rxq)
 		mb = rxep[i].mbuf;
 		rte_mbuf_refcnt_set(mb, 1);
 		mb->next = NULL;
-		mb->data = (char *)mb->buf_addr + RTE_PKTMBUF_HEADROOM;
+		mb->data_off = RTE_PKTMBUF_HEADROOM;
 		mb->nb_segs = 1;
 		mb->in_port = rxq->port_id;
 		dma_addr = rte_cpu_to_le_64(\
@@ -845,8 +843,8 @@  i40e_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 		rx_packet_len = ((qword1 & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
 				I40E_RXD_QW1_LENGTH_PBUF_SHIFT) - rxq->crc_len;
 
-		rxm->data = (char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM;
-		rte_prefetch0(rxm->data);
+		rxm->data_off = RTE_PKTMBUF_HEADROOM;
+		rte_prefetch0(RTE_PTR_ADD(rxm->buf_addr, RTE_PKTMBUF_HEADROOM));
 		rxm->nb_segs = 1;
 		rxm->next = NULL;
 		rxm->pkt_len = rx_packet_len;
@@ -949,7 +947,7 @@  i40e_recv_scattered_pkts(void *rx_queue,
 		rx_packet_len = (qword1 & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
 					I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
 		rxm->data_len = rx_packet_len;
-		rxm->data = (char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM;
+		rxm->data_off = RTE_PKTMBUF_HEADROOM;
 
 		/**
 		 * If this is the first buffer of the received packet, set the
@@ -1018,7 +1016,8 @@  i40e_recv_scattered_pkts(void *rx_queue,
 				rte_le_to_cpu_32(rxd.wb.qword0.hi_dword.rss);
 
 		/* Prefetch data of first segment, if configured to do so. */
-		rte_prefetch0(first_seg->data);
+		rte_prefetch0(RTE_PTR_ADD(first_seg->buf_addr,
+			first_seg->data_off));
 		rx_pkts[nb_rx++] = first_seg;
 		first_seg = NULL;
 	}
@@ -2020,7 +2019,7 @@  i40e_alloc_rx_queue_mbufs(struct i40e_rx_queue *rxq)
 
 		rte_mbuf_refcnt_set(mbuf, 1);
 		mbuf->next = NULL;
-		mbuf->data = (char *)mbuf->buf_addr + RTE_PKTMBUF_HEADROOM;
+		mbuf->data_off = RTE_PKTMBUF_HEADROOM;
 		mbuf->nb_segs = 1;
 		mbuf->in_port = rxq->port_id;
 
diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
index c95e117..5d9451c 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
@@ -996,7 +996,7 @@  ixgbe_rx_alloc_bufs(struct igb_rx_queue *rxq)
 		mb = rxep[i].mbuf;
 		rte_mbuf_refcnt_set(mb, 1);
 		mb->next = NULL;
-		mb->data = (char *)mb->buf_addr + RTE_PKTMBUF_HEADROOM;
+		mb->data_off = RTE_PKTMBUF_HEADROOM;
 		mb->nb_segs = 1;
 		mb->in_port = rxq->port_id;
 
@@ -1247,8 +1247,8 @@  ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 		 */
 		pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
 				      rxq->crc_len);
-		rxm->data = (char*) rxm->buf_addr + RTE_PKTMBUF_HEADROOM;
-		rte_packet_prefetch(rxm->data);
+		rxm->data_off = RTE_PKTMBUF_HEADROOM;
+		rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
 		rxm->nb_segs = 1;
 		rxm->next = NULL;
 		rxm->pkt_len = pkt_len;
@@ -1431,7 +1431,7 @@  ixgbe_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 		 */
 		data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
 		rxm->data_len = data_len;
-		rxm->data = (char*) rxm->buf_addr + RTE_PKTMBUF_HEADROOM;
+		rxm->data_off = RTE_PKTMBUF_HEADROOM;
 
 		/*
 		 * If this is the first buffer of the received packet,
@@ -1523,7 +1523,8 @@  ixgbe_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 		}
 
 		/* Prefetch data of first segment, if configured to do so. */
-		rte_packet_prefetch(first_seg->data);
+		rte_packet_prefetch((char *)first_seg->buf_addr + 
+			first_seg->data_off);
 
 		/*
 		 * Store the mbuf address into the next entry of the array
@@ -3213,7 +3214,7 @@  ixgbe_alloc_rx_queue_mbufs(struct igb_rx_queue *rxq)
 
 		rte_mbuf_refcnt_set(mbuf, 1);
 		mbuf->next = NULL;
-		mbuf->data = (char *)mbuf->buf_addr + RTE_PKTMBUF_HEADROOM;
+		mbuf->data_off = RTE_PKTMBUF_HEADROOM;
 		mbuf->nb_segs = 1;
 		mbuf->in_port = rxq->port_id;
 
diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.h b/lib/librte_pmd_ixgbe/ixgbe_rxtx.h
index 4c9cb74..8d9476d 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.h
+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.h
@@ -47,8 +47,7 @@ 
 #endif
 
 #define RTE_MBUF_DATA_DMA_ADDR(mb) \
-	(uint64_t) ((mb)->buf_physaddr + (uint64_t)((char *)((mb)->data) - \
-	(char *)(mb)->buf_addr))
+	(uint64_t) ((mb)->buf_physaddr + (mb)->data_off)
 
 #define RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mb) \
 	(uint64_t) ((mb)->buf_physaddr + RTE_PKTMBUF_HEADROOM)
diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c
index f9d29bf..dd02cf5 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c
@@ -61,7 +61,6 @@  static struct rte_mbuf mb_def = {
 	.in_port = 0,
 
 	.next = NULL,
-	.data = NULL,
 };
 
 static inline void
diff --git a/lib/librte_pmd_virtio/virtio_rxtx.c b/lib/librte_pmd_virtio/virtio_rxtx.c
index 07bc7b2..c154631 100644
--- a/lib/librte_pmd_virtio/virtio_rxtx.c
+++ b/lib/librte_pmd_virtio/virtio_rxtx.c
@@ -118,7 +118,7 @@  virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts,
 		}
 
 		rte_prefetch0(cookie);
-		rte_packet_prefetch(cookie->data);
+		rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
 		rx_pkts[i]  = cookie;
 		vq->vq_used_cons_idx++;
 		vq_ring_free_chain(vq, desc_idx);
@@ -470,7 +470,7 @@  virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 		}
 
 		rxm->in_port = rxvq->port_id;
-		rxm->data = (char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM;
+		rxm->data_off = RTE_PKTMBUF_HEADROOM;
 		rxm->nb_segs = 1;
 		rxm->next = NULL;
 		rxm->pkt_len  = (uint32_t)(len[i]
diff --git a/lib/librte_pmd_virtio/virtqueue.h b/lib/librte_pmd_virtio/virtqueue.h
index d777feb..fdee054 100644
--- a/lib/librte_pmd_virtio/virtqueue.h
+++ b/lib/librte_pmd_virtio/virtqueue.h
@@ -59,8 +59,7 @@ 
 #define VIRTQUEUE_MAX_NAME_SZ 32
 
 #define RTE_MBUF_DATA_DMA_ADDR(mb) \
-	(uint64_t) ((mb)->buf_physaddr + (uint64_t)((char *)((mb)->data) - \
-	(char *)(mb)->buf_addr))
+	(uint64_t) ((mb)->buf_physaddr + (mb)->data_off)
 
 #define VTNET_SQ_RQ_QUEUE_IDX 0
 #define VTNET_SQ_TQ_QUEUE_IDX 1
diff --git a/lib/librte_pmd_vmxnet3/vmxnet3_rxtx.c b/lib/librte_pmd_vmxnet3/vmxnet3_rxtx.c
index 7dd5a98..e033a1f 100644
--- a/lib/librte_pmd_vmxnet3/vmxnet3_rxtx.c
+++ b/lib/librte_pmd_vmxnet3/vmxnet3_rxtx.c
@@ -79,8 +79,7 @@ 
 
 
 #define RTE_MBUF_DATA_DMA_ADDR(mb) \
-	(uint64_t) ((mb)->buf_physaddr + (uint64_t)((char *)((mb)->data) - \
-	(char *)(mb)->buf_addr))
+	(uint64_t) ((mb)->buf_physaddr + (mb)->data_off)
 
 #define RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mb) \
 	(uint64_t) ((mb)->buf_physaddr + RTE_PKTMBUF_HEADROOM)
@@ -564,7 +563,7 @@  vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 			rxm->data_len = (uint16_t)rcd->len;
 			rxm->in_port = rxq->port_id;
 			rxm->vlan_macip.f.vlan_tci = 0;
-			rxm->data = (char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM;
+			rxm->data_off = RTE_PKTMBUF_HEADROOM;
 
 			rx_pkts[nb_rx++] = rxm;