summaryrefslogtreecommitdiff
path: root/include/linux/skbuff.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/skbuff.h')
-rw-r--r--include/linux/skbuff.h198
1 files changed, 123 insertions, 75 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index a59d9343c25b..85ab7d72b54c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -20,6 +20,8 @@
#include <linux/time.h>
#include <linux/bug.h>
#include <linux/cache.h>
+#include <linux/rbtree.h>
+#include <linux/socket.h>
#include <linux/atomic.h>
#include <asm/types.h>
@@ -148,6 +150,8 @@
struct net_device;
struct scatterlist;
struct pipe_inode_info;
+struct iov_iter;
+struct napi_struct;
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
struct nf_conntrack {
@@ -341,7 +345,6 @@ enum {
SKB_FCLONE_UNAVAILABLE, /* skb has no fclone (from head_cache) */
SKB_FCLONE_ORIG, /* orig skb (from fclone_cache) */
SKB_FCLONE_CLONE, /* companion fclone skb (from fclone_cache) */
- SKB_FCLONE_FREE, /* this companion fclone skb is available */
};
enum {
@@ -370,8 +373,7 @@ enum {
SKB_GSO_UDP_TUNNEL_CSUM = 1 << 11,
- SKB_GSO_MPLS = 1 << 12,
-
+ SKB_GSO_TUNNEL_REMCSUM = 1 << 12,
};
#if BITS_PER_LONG > 32
@@ -440,6 +442,7 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1,
* @next: Next buffer in list
* @prev: Previous buffer in list
* @tstamp: Time we arrived/left
+ * @rbnode: RB tree node, alternative to next/prev for netem/tcp
* @sk: Socket we are owned by
* @dev: Device we arrived on/are leaving by
* @cb: Control buffer. Free for use by every layer. Put private vars here
@@ -504,15 +507,19 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1,
*/
struct sk_buff {
- /* These two members must be first. */
- struct sk_buff *next;
- struct sk_buff *prev;
-
union {
- ktime_t tstamp;
- struct skb_mstamp skb_mstamp;
+ struct {
+ /* These two members must be first. */
+ struct sk_buff *next;
+ struct sk_buff *prev;
+
+ union {
+ ktime_t tstamp;
+ struct skb_mstamp skb_mstamp;
+ };
+ };
+ struct rb_node rbnode; /* used in netem & tcp stack */
};
-
struct sock *sk;
struct net_device *dev;
@@ -557,7 +564,9 @@ struct sk_buff {
/* fields enclosed in headers_start/headers_end are copied
* using a single memcpy() in __copy_skb_header()
*/
+ /* private: */
__u32 headers_start[0];
+ /* public: */
/* if you move pkt_type around you also must adapt those constants */
#ifdef __BIG_ENDIAN_BITFIELD
@@ -595,7 +604,8 @@ struct sk_buff {
#endif
__u8 ipvs_property:1;
__u8 inner_protocol_type:1;
- /* 4 or 6 bit hole */
+ __u8 remcsum_offload:1;
+ /* 3 or 5 bit hole */
#ifdef CONFIG_NET_SCHED
__u16 tc_index; /* traffic control index */
@@ -642,7 +652,9 @@ struct sk_buff {
__u16 network_header;
__u16 mac_header;
+ /* private: */
__u32 headers_end[0];
+ /* public: */
/* These elements must be at the end, see alloc_skb() for details. */
sk_buff_data_t tail;
@@ -662,6 +674,7 @@ struct sk_buff {
#define SKB_ALLOC_FCLONE 0x01
#define SKB_ALLOC_RX 0x02
+#define SKB_ALLOC_NAPI 0x04
/* Returns true if the skb was allocated from PFMEMALLOC reserves */
static inline bool skb_pfmemalloc(const struct sk_buff *skb)
@@ -706,9 +719,6 @@ static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst)
skb->_skb_refdst = (unsigned long)dst;
}
-void __skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst,
- bool force);
-
/**
* skb_dst_set_noref - sets skb dst, hopefully, without taking reference
* @skb: buffer
@@ -721,24 +731,8 @@ void __skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst,
*/
static inline void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst)
{
- __skb_dst_set_noref(skb, dst, false);
-}
-
-/**
- * skb_dst_set_noref_force - sets skb dst, without taking reference
- * @skb: buffer
- * @dst: dst entry
- *
- * Sets skb dst, assuming a reference was not taken on dst.
- * No reference is taken and no dst_release will be called. While for
- * cached dsts deferred reclaim is a basic feature, for entries that are
- * not cached it is caller's job to guarantee that last dst_release for
- * provided dst happens when nobody uses it, eg. after a RCU grace period.
- */
-static inline void skb_dst_set_noref_force(struct sk_buff *skb,
- struct dst_entry *dst)
-{
- __skb_dst_set_noref(skb, dst, true);
+ WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
+ skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF;
}
/**
@@ -795,15 +789,19 @@ struct sk_buff_fclones {
* @skb: buffer
*
* Returns true is skb is a fast clone, and its clone is not freed.
+ * Some drivers call skb_orphan() in their ndo_start_xmit(),
+ * so we also check that this didnt happen.
*/
-static inline bool skb_fclone_busy(const struct sk_buff *skb)
+static inline bool skb_fclone_busy(const struct sock *sk,
+ const struct sk_buff *skb)
{
const struct sk_buff_fclones *fclones;
fclones = container_of(skb, struct sk_buff_fclones, skb1);
return skb->fclone == SKB_FCLONE_ORIG &&
- fclones->skb2.fclone == SKB_FCLONE_CLONE;
+ atomic_read(&fclones->fclone_ref) > 1 &&
+ fclones->skb2.sk == sk;
}
static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
@@ -2168,47 +2166,61 @@ static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev,
return __netdev_alloc_skb_ip_align(dev, length, GFP_ATOMIC);
}
+void *napi_alloc_frag(unsigned int fragsz);
+struct sk_buff *__napi_alloc_skb(struct napi_struct *napi,
+ unsigned int length, gfp_t gfp_mask);
+static inline struct sk_buff *napi_alloc_skb(struct napi_struct *napi,
+ unsigned int length)
+{
+ return __napi_alloc_skb(napi, length, GFP_ATOMIC);
+}
+
/**
- * __skb_alloc_pages - allocate pages for ps-rx on a skb and preserve pfmemalloc data
- * @gfp_mask: alloc_pages_node mask. Set __GFP_NOMEMALLOC if not for network packet RX
- * @skb: skb to set pfmemalloc on if __GFP_MEMALLOC is used
- * @order: size of the allocation
+ * __dev_alloc_pages - allocate page for network Rx
+ * @gfp_mask: allocation priority. Set __GFP_NOMEMALLOC if not for network Rx
+ * @order: size of the allocation
*
- * Allocate a new page.
+ * Allocate a new page.
*
- * %NULL is returned if there is no free memory.
+ * %NULL is returned if there is no free memory.
*/
-static inline struct page *__skb_alloc_pages(gfp_t gfp_mask,
- struct sk_buff *skb,
- unsigned int order)
-{
- struct page *page;
-
- gfp_mask |= __GFP_COLD;
-
- if (!(gfp_mask & __GFP_NOMEMALLOC))
- gfp_mask |= __GFP_MEMALLOC;
+static inline struct page *__dev_alloc_pages(gfp_t gfp_mask,
+ unsigned int order)
+{
+ /* This piece of code contains several assumptions.
+ * 1. This is for device Rx, therefor a cold page is preferred.
+ * 2. The expectation is the user wants a compound page.
+ * 3. If requesting a order 0 page it will not be compound
+ * due to the check to see if order has a value in prep_new_page
+ * 4. __GFP_MEMALLOC is ignored if __GFP_NOMEMALLOC is set due to
+ * code in gfp_to_alloc_flags that should be enforcing this.
+ */
+ gfp_mask |= __GFP_COLD | __GFP_COMP | __GFP_MEMALLOC;
- page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, order);
- if (skb && page && page->pfmemalloc)
- skb->pfmemalloc = true;
+ return alloc_pages_node(NUMA_NO_NODE, gfp_mask, order);
+}
- return page;
+static inline struct page *dev_alloc_pages(unsigned int order)
+{
+ return __dev_alloc_pages(GFP_ATOMIC, order);
}
/**
- * __skb_alloc_page - allocate a page for ps-rx for a given skb and preserve pfmemalloc data
- * @gfp_mask: alloc_pages_node mask. Set __GFP_NOMEMALLOC if not for network packet RX
- * @skb: skb to set pfmemalloc on if __GFP_MEMALLOC is used
+ * __dev_alloc_page - allocate a page for network Rx
+ * @gfp_mask: allocation priority. Set __GFP_NOMEMALLOC if not for network Rx
*
- * Allocate a new page.
+ * Allocate a new page.
*
- * %NULL is returned if there is no free memory.
+ * %NULL is returned if there is no free memory.
*/
-static inline struct page *__skb_alloc_page(gfp_t gfp_mask,
- struct sk_buff *skb)
+static inline struct page *__dev_alloc_page(gfp_t gfp_mask)
+{
+ return __dev_alloc_pages(gfp_mask, 0);
+}
+
+static inline struct page *dev_alloc_page(void)
{
- return __skb_alloc_pages(gfp_mask, skb, 0);
+ return __dev_alloc_page(GFP_ATOMIC);
}
/**
@@ -2440,7 +2452,6 @@ static inline int skb_cow_head(struct sk_buff *skb, unsigned int headroom)
* is untouched. Otherwise it is extended. Returns zero on
* success. The skb is freed on error.
*/
-
static inline int skb_padto(struct sk_buff *skb, unsigned int len)
{
unsigned int size = skb->len;
@@ -2449,6 +2460,29 @@ static inline int skb_padto(struct sk_buff *skb, unsigned int len)
return skb_pad(skb, len - size);
}
+/**
+ * skb_put_padto - increase size and pad an skbuff up to a minimal size
+ * @skb: buffer to pad
+ * @len: minimal length
+ *
+ * Pads up a buffer to ensure the trailing bytes exist and are
+ * blanked. If the buffer already contains sufficient data it
+ * is untouched. Otherwise it is extended. Returns zero on
+ * success. The skb is freed on error.
+ */
+static inline int skb_put_padto(struct sk_buff *skb, unsigned int len)
+{
+ unsigned int size = skb->len;
+
+ if (unlikely(size < len)) {
+ len -= size;
+ if (skb_pad(skb, len))
+ return -ENOMEM;
+ __skb_put(skb, len);
+ }
+ return 0;
+}
+
static inline int skb_add_data(struct sk_buff *skb,
char __user *from, int copy)
{
@@ -2621,18 +2655,18 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock,
int *err);
unsigned int datagram_poll(struct file *file, struct socket *sock,
struct poll_table_struct *wait);
-int skb_copy_datagram_iovec(const struct sk_buff *from, int offset,
- struct iovec *to, int size);
-int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, int hlen,
- struct iovec *iov);
-int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset,
- const struct iovec *from, int from_offset,
- int len);
-int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *frm,
- int offset, size_t count);
-int skb_copy_datagram_const_iovec(const struct sk_buff *from, int offset,
- const struct iovec *to, int to_offset,
- int size);
+int skb_copy_datagram_iter(const struct sk_buff *from, int offset,
+ struct iov_iter *to, int size);
+static inline int skb_copy_datagram_msg(const struct sk_buff *from, int offset,
+ struct msghdr *msg, int size)
+{
+ return skb_copy_datagram_iter(from, offset, &msg->msg_iter, size);
+}
+int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, int hlen,
+ struct msghdr *msg);
+int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset,
+ struct iov_iter *from, int len);
+int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *frm);
void skb_free_datagram(struct sock *sk, struct sk_buff *skb);
void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb);
int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags);
@@ -2653,6 +2687,20 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet);
unsigned int skb_gso_transport_seglen(const struct sk_buff *skb);
struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features);
struct sk_buff *skb_vlan_untag(struct sk_buff *skb);
+int skb_ensure_writable(struct sk_buff *skb, int write_len);
+int skb_vlan_pop(struct sk_buff *skb);
+int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci);
+
+static inline int memcpy_from_msg(void *data, struct msghdr *msg, int len)
+{
+ /* XXX: stripping const */
+ return memcpy_fromiovec(data, (struct iovec *)msg->msg_iter.iov, len);
+}
+
+static inline int memcpy_to_msg(struct msghdr *msg, void *data, int len)
+{
+ return copy_to_iter(data, len, &msg->msg_iter) == len ? 0 : -EFAULT;
+}
struct skb_checksum_ops {
__wsum (*update)(const void *mem, int len, __wsum wsum);