summaryrefslogtreecommitdiff
path: root/include/linux/skbuff.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/skbuff.h')
-rw-r--r--include/linux/skbuff.h363
1 files changed, 271 insertions, 92 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 11c270551d25..6c8b6f604e76 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -28,7 +28,6 @@
#include <linux/textsearch.h>
#include <net/checksum.h>
#include <linux/rcupdate.h>
-#include <linux/dmaengine.h>
#include <linux/hrtimer.h>
#include <linux/dma-mapping.h>
#include <linux/netdev_features.h>
@@ -47,11 +46,29 @@
*
* The hardware you're dealing with doesn't calculate the full checksum
* (as in CHECKSUM_COMPLETE), but it does parse headers and verify checksums
- * for specific protocols e.g. TCP/UDP/SCTP, then, for such packets it will
- * set CHECKSUM_UNNECESSARY if their checksums are okay. skb->csum is still
- * undefined in this case though. It is a bad option, but, unfortunately,
- * nowadays most vendors do this. Apparently with the secret goal to sell
- * you new devices, when you will add new protocol to your host, f.e. IPv6 8)
+ * for specific protocols. For such packets it will set CHECKSUM_UNNECESSARY
+ * if their checksums are okay. skb->csum is still undefined in this case
+ * though. It is a bad option, but, unfortunately, nowadays most vendors do
+ * this. Apparently with the secret goal to sell you new devices, when you
+ * will add new protocol to your host, f.e. IPv6 8)
+ *
+ * CHECKSUM_UNNECESSARY is applicable to following protocols:
+ * TCP: IPv6 and IPv4.
+ * UDP: IPv4 and IPv6. A device may apply CHECKSUM_UNNECESSARY to a
+ * zero UDP checksum for either IPv4 or IPv6, the networking stack
+ * may perform further validation in this case.
+ * GRE: only if the checksum is present in the header.
+ * SCTP: indicates the CRC in SCTP header has been validated.
+ *
+ * skb->csum_level indicates the number of consecutive checksums found in
+ * the packet minus one that have been verified as CHECKSUM_UNNECESSARY.
+ * For instance if a device receives an IPv6->UDP->GRE->IPv4->TCP packet
+ * and a device is able to verify the checksums for UDP (possibly zero),
+ * GRE (checksum flag is set), and TCP-- skb->csum_level would be set to
+ * two. If the device were only able to verify the UDP checksum and not
+ * GRE, either because it doesn't support GRE checksum of because GRE
+ * checksum is bad, skb->csum_level would be set to zero (TCP checksum is
+ * not considered in this case).
*
* CHECKSUM_COMPLETE:
*
@@ -112,6 +129,9 @@
#define CHECKSUM_COMPLETE 2
#define CHECKSUM_PARTIAL 3
+/* Maximum value in skb->csum_level */
+#define SKB_MAX_CSUM_LEVEL 3
+
#define SKB_DATA_ALIGN(X) ALIGN(X, SMP_CACHE_BYTES)
#define SKB_WITH_OVERHEAD(X) \
((X) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
@@ -135,7 +155,7 @@ struct nf_conntrack {
};
#endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
struct nf_bridge_info {
atomic_t use;
unsigned int mask;
@@ -318,9 +338,10 @@ struct skb_shared_info {
enum {
- SKB_FCLONE_UNAVAILABLE,
- SKB_FCLONE_ORIG,
- SKB_FCLONE_CLONE,
+ SKB_FCLONE_UNAVAILABLE, /* skb has no fclone (from head_cache) */
+ SKB_FCLONE_ORIG, /* orig skb (from fclone_cache) */
+ SKB_FCLONE_CLONE, /* companion fclone skb (from fclone_cache) */
+ SKB_FCLONE_FREE, /* this companion fclone skb is available */
};
enum {
@@ -452,6 +473,7 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1,
* @tc_verd: traffic control verdict
* @hash: the packet hash
* @queue_mapping: Queue mapping for multiqueue devices
+ * @xmit_more: More SKBs are pending for this queue
* @ndisc_nodetype: router type (from link layer)
* @ooo_okay: allow the mapping of a socket to a queue to be changed
* @l4_hash: indicate hash is a canonical 4-tuple hash over transport
@@ -460,8 +482,6 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1,
* @wifi_acked_valid: wifi_acked was set
* @wifi_acked: whether frame was acked on wifi or not
* @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS
- * @dma_cookie: a cookie to one of several possible DMA operations
- * done by skb DMA functions
* @napi_id: id of the NAPI struct this skb came from
* @secmark: security marking
* @mark: Generic packet mark
@@ -505,87 +525,101 @@ struct sk_buff {
char cb[48] __aligned(8);
unsigned long _skb_refdst;
+ void (*destructor)(struct sk_buff *skb);
#ifdef CONFIG_XFRM
struct sec_path *sp;
#endif
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+ struct nf_conntrack *nfct;
+#endif
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+ struct nf_bridge_info *nf_bridge;
+#endif
unsigned int len,
data_len;
__u16 mac_len,
hdr_len;
- union {
- __wsum csum;
- struct {
- __u16 csum_start;
- __u16 csum_offset;
- };
- };
- __u32 priority;
+
+ /* Following fields are _not_ copied in __copy_skb_header()
+ * Note that queue_mapping is here mostly to fill a hole.
+ */
kmemcheck_bitfield_begin(flags1);
- __u8 ignore_df:1,
- cloned:1,
- ip_summed:2,
+ __u16 queue_mapping;
+ __u8 cloned:1,
nohdr:1,
- nfctinfo:3;
- __u8 pkt_type:3,
fclone:2,
- ipvs_property:1,
peeked:1,
- nf_trace:1;
+ head_frag:1,
+ xmit_more:1;
+ /* one bit hole */
kmemcheck_bitfield_end(flags1);
- __be16 protocol;
- void (*destructor)(struct sk_buff *skb);
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
- struct nf_conntrack *nfct;
-#endif
-#ifdef CONFIG_BRIDGE_NETFILTER
- struct nf_bridge_info *nf_bridge;
-#endif
-
- int skb_iif;
-
- __u32 hash;
-
- __be16 vlan_proto;
- __u16 vlan_tci;
+ /* fields enclosed in headers_start/headers_end are copied
+ * using a single memcpy() in __copy_skb_header()
+ */
+ /* private: */
+ __u32 headers_start[0];
+ /* public: */
-#ifdef CONFIG_NET_SCHED
- __u16 tc_index; /* traffic control index */
-#ifdef CONFIG_NET_CLS_ACT
- __u16 tc_verd; /* traffic control verdict */
-#endif
+/* if you move pkt_type around you also must adapt those constants */
+#ifdef __BIG_ENDIAN_BITFIELD
+#define PKT_TYPE_MAX (7 << 5)
+#else
+#define PKT_TYPE_MAX 7
#endif
+#define PKT_TYPE_OFFSET() offsetof(struct sk_buff, __pkt_type_offset)
- __u16 queue_mapping;
- kmemcheck_bitfield_begin(flags2);
-#ifdef CONFIG_IPV6_NDISC_NODETYPE
- __u8 ndisc_nodetype:2;
-#endif
+ __u8 __pkt_type_offset[0];
+ __u8 pkt_type:3;
__u8 pfmemalloc:1;
+ __u8 ignore_df:1;
+ __u8 nfctinfo:3;
+
+ __u8 nf_trace:1;
+ __u8 ip_summed:2;
__u8 ooo_okay:1;
__u8 l4_hash:1;
__u8 sw_hash:1;
__u8 wifi_acked_valid:1;
__u8 wifi_acked:1;
+
__u8 no_fcs:1;
- __u8 head_frag:1;
- /* Encapsulation protocol and NIC drivers should use
- * this flag to indicate to each other if the skb contains
- * encapsulated packet or not and maybe use the inner packet
- * headers if needed
- */
+ /* Indicates the inner headers are valid in the skbuff. */
__u8 encapsulation:1;
__u8 encap_hdr_csum:1;
__u8 csum_valid:1;
__u8 csum_complete_sw:1;
- /* 2/4 bit hole (depending on ndisc_nodetype presence) */
- kmemcheck_bitfield_end(flags2);
+ __u8 csum_level:2;
+ __u8 csum_bad:1;
+
+#ifdef CONFIG_IPV6_NDISC_NODETYPE
+ __u8 ndisc_nodetype:2;
+#endif
+ __u8 ipvs_property:1;
+ __u8 inner_protocol_type:1;
+ /* 4 or 6 bit hole */
+
+#ifdef CONFIG_NET_SCHED
+ __u16 tc_index; /* traffic control index */
+#ifdef CONFIG_NET_CLS_ACT
+ __u16 tc_verd; /* traffic control verdict */
+#endif
+#endif
-#if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL
union {
- unsigned int napi_id;
- dma_cookie_t dma_cookie;
+ __wsum csum;
+ struct {
+ __u16 csum_start;
+ __u16 csum_offset;
+ };
};
+ __u32 priority;
+ int skb_iif;
+ __u32 hash;
+ __be16 vlan_proto;
+ __u16 vlan_tci;
+#ifdef CONFIG_NET_RX_BUSY_POLL
+ unsigned int napi_id;
#endif
#ifdef CONFIG_NETWORK_SECMARK
__u32 secmark;
@@ -596,13 +630,24 @@ struct sk_buff {
__u32 reserved_tailroom;
};
- __be16 inner_protocol;
+ union {
+ __be16 inner_protocol;
+ __u8 inner_ipproto;
+ };
+
__u16 inner_transport_header;
__u16 inner_network_header;
__u16 inner_mac_header;
+
+ __be16 protocol;
__u16 transport_header;
__u16 network_header;
__u16 mac_header;
+
+ /* private: */
+ __u32 headers_end[0];
+ /* public: */
+
/* These elements must be at the end, see alloc_skb() for details. */
sk_buff_data_t tail;
sk_buff_data_t end;
@@ -734,6 +779,41 @@ static inline struct sk_buff *alloc_skb(unsigned int size,
return __alloc_skb(size, priority, 0, NUMA_NO_NODE);
}
+struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
+ unsigned long data_len,
+ int max_page_order,
+ int *errcode,
+ gfp_t gfp_mask);
+
+/* Layout of fast clones : [skb1][skb2][fclone_ref] */
+struct sk_buff_fclones {
+ struct sk_buff skb1;
+
+ struct sk_buff skb2;
+
+ atomic_t fclone_ref;
+};
+
+/**
+ * skb_fclone_busy - check if fclone is busy
+ * @skb: buffer
+ *
+ * Returns true is skb is a fast clone, and its clone is not freed.
+ * Some drivers call skb_orphan() in their ndo_start_xmit(),
+ * so we also check that this didnt happen.
+ */
+static inline bool skb_fclone_busy(const struct sock *sk,
+ const struct sk_buff *skb)
+{
+ const struct sk_buff_fclones *fclones;
+
+ fclones = container_of(skb, struct sk_buff_fclones, skb1);
+
+ return skb->fclone == SKB_FCLONE_ORIG &&
+ fclones->skb2.fclone == SKB_FCLONE_CLONE &&
+ fclones->skb2.sk == sk;
+}
+
static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
gfp_t priority)
{
@@ -1042,6 +1122,7 @@ static inline int skb_header_cloned(const struct sk_buff *skb)
* Drop a reference to the header part of the buffer. This is done
* by acquiring a payload reference. You must not read from the header
* part of skb->data after this.
+ * Note : Check if you can use __skb_header_release() instead.
*/
static inline void skb_header_release(struct sk_buff *skb)
{
@@ -1051,6 +1132,20 @@ static inline void skb_header_release(struct sk_buff *skb)
}
/**
+ * __skb_header_release - release reference to header
+ * @skb: buffer to operate on
+ *
+ * Variant of skb_header_release() assuming skb is private to caller.
+ * We can avoid one atomic operation.
+ */
+static inline void __skb_header_release(struct sk_buff *skb)
+{
+ skb->nohdr = 1;
+ atomic_set(&skb_shinfo(skb)->dataref, 1 + (1 << SKB_DATAREF_SHIFT));
+}
+
+
+/**
* skb_shared - is the buffer shared
* @skb: buffer to check
*
@@ -1116,7 +1211,12 @@ static inline struct sk_buff *skb_unshare(struct sk_buff *skb,
might_sleep_if(pri & __GFP_WAIT);
if (skb_cloned(skb)) {
struct sk_buff *nskb = skb_copy(skb, pri);
- kfree_skb(skb); /* Free our shared copy */
+
+ /* Free our shared copy */
+ if (likely(nskb))
+ consume_skb(skb);
+ else
+ kfree_skb(skb);
skb = nskb;
}
return skb;
@@ -1675,6 +1775,23 @@ static inline void skb_reserve(struct sk_buff *skb, int len)
skb->tail += len;
}
+#define ENCAP_TYPE_ETHER 0
+#define ENCAP_TYPE_IPPROTO 1
+
+static inline void skb_set_inner_protocol(struct sk_buff *skb,
+ __be16 protocol)
+{
+ skb->inner_protocol = protocol;
+ skb->inner_protocol_type = ENCAP_TYPE_ETHER;
+}
+
+static inline void skb_set_inner_ipproto(struct sk_buff *skb,
+ __u8 ipproto)
+{
+ skb->inner_ipproto = ipproto;
+ skb->inner_protocol_type = ENCAP_TYPE_IPPROTO;
+}
+
static inline void skb_reset_inner_headers(struct sk_buff *skb)
{
skb->inner_mac_header = skb->mac_header;
@@ -1860,18 +1977,6 @@ static inline int pskb_network_may_pull(struct sk_buff *skb, unsigned int len)
return pskb_may_pull(skb, skb_network_offset(skb) + len);
}
-static inline void skb_pop_rcv_encapsulation(struct sk_buff *skb)
-{
- /* Only continue with checksum unnecessary if device indicated
- * it is valid across encapsulation (skb->encapsulation was set).
- */
- if (skb->ip_summed == CHECKSUM_UNNECESSARY && !skb->encapsulation)
- skb->ip_summed = CHECKSUM_NONE;
-
- skb->encapsulation = 0;
- skb->csum_valid = 0;
-}
-
/*
* CPUs often take a performance hit when accessing unaligned memory
* locations. The actual performance hit varies, it can be small if the
@@ -2555,6 +2660,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen);
void skb_scrub_packet(struct sk_buff *skb, bool xnet);
unsigned int skb_gso_transport_seglen(const struct sk_buff *skb);
struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features);
+struct sk_buff *skb_vlan_untag(struct sk_buff *skb);
struct skb_checksum_ops {
__wsum (*update)(const void *mem, int len, __wsum wsum);
@@ -2566,20 +2672,26 @@ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
__wsum skb_checksum(const struct sk_buff *skb, int offset, int len,
__wsum csum);
-static inline void *skb_header_pointer(const struct sk_buff *skb, int offset,
- int len, void *buffer)
+static inline void *__skb_header_pointer(const struct sk_buff *skb, int offset,
+ int len, void *data, int hlen, void *buffer)
{
- int hlen = skb_headlen(skb);
-
if (hlen - offset >= len)
- return skb->data + offset;
+ return data + offset;
- if (skb_copy_bits(skb, offset, buffer, len) < 0)
+ if (!skb ||
+ skb_copy_bits(skb, offset, buffer, len) < 0)
return NULL;
return buffer;
}
+static inline void *skb_header_pointer(const struct sk_buff *skb, int offset,
+ int len, void *buffer)
+{
+ return __skb_header_pointer(skb, offset, len, skb->data,
+ skb_headlen(skb), buffer);
+}
+
/**
* skb_needs_linearize - check if we need to linearize a given skb
* depending on the given device features.
@@ -2670,6 +2782,8 @@ static inline ktime_t net_invalid_timestamp(void)
return ktime_set(0, 0);
}
+struct sk_buff *skb_clone_sk(struct sk_buff *skb);
+
#ifdef CONFIG_NETWORK_PHY_TIMESTAMPING
void skb_clone_tx_timestamp(struct sk_buff *skb);
@@ -2785,6 +2899,42 @@ static inline __sum16 skb_checksum_complete(struct sk_buff *skb)
0 : __skb_checksum_complete(skb);
}
+static inline void __skb_decr_checksum_unnecessary(struct sk_buff *skb)
+{
+ if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
+ if (skb->csum_level == 0)
+ skb->ip_summed = CHECKSUM_NONE;
+ else
+ skb->csum_level--;
+ }
+}
+
+static inline void __skb_incr_checksum_unnecessary(struct sk_buff *skb)
+{
+ if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
+ if (skb->csum_level < SKB_MAX_CSUM_LEVEL)
+ skb->csum_level++;
+ } else if (skb->ip_summed == CHECKSUM_NONE) {
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ skb->csum_level = 0;
+ }
+}
+
+static inline void __skb_mark_checksum_bad(struct sk_buff *skb)
+{
+ /* Mark current checksum as bad (typically called from GRO
+ * path). In the case that ip_summed is CHECKSUM_NONE
+ * this must be the first checksum encountered in the packet.
+ * When ip_summed is CHECKSUM_UNNECESSARY, this is the first
+ * checksum after the last one validated. For UDP, a zero
+ * checksum can not be marked as bad.
+ */
+
+ if (skb->ip_summed == CHECKSUM_NONE ||
+ skb->ip_summed == CHECKSUM_UNNECESSARY)
+ skb->csum_bad = 1;
+}
+
/* Check if we need to perform checksum complete validation.
*
* Returns true if checksum complete is needed, false otherwise
@@ -2796,6 +2946,7 @@ static inline bool __skb_checksum_validate_needed(struct sk_buff *skb,
{
if (skb_csum_unnecessary(skb) || (zero_okay && !check)) {
skb->csum_valid = 1;
+ __skb_decr_checksum_unnecessary(skb);
return false;
}
@@ -2825,6 +2976,9 @@ static inline __sum16 __skb_checksum_validate_complete(struct sk_buff *skb,
skb->csum_valid = 1;
return 0;
}
+ } else if (skb->csum_bad) {
+ /* ip_summed == CHECKSUM_NONE in this case */
+ return 1;
}
skb->csum = psum;
@@ -2882,6 +3036,26 @@ static inline __wsum null_compute_pseudo(struct sk_buff *skb, int proto)
#define skb_checksum_simple_validate(skb) \
__skb_checksum_validate(skb, 0, true, false, 0, null_compute_pseudo)
+static inline bool __skb_checksum_convert_check(struct sk_buff *skb)
+{
+ return (skb->ip_summed == CHECKSUM_NONE &&
+ skb->csum_valid && !skb->csum_bad);
+}
+
+static inline void __skb_checksum_convert(struct sk_buff *skb,
+ __sum16 check, __wsum pseudo)
+{
+ skb->csum = ~pseudo;
+ skb->ip_summed = CHECKSUM_COMPLETE;
+}
+
+#define skb_checksum_try_convert(skb, proto, check, compute_pseudo) \
+do { \
+ if (__skb_checksum_convert_check(skb)) \
+ __skb_checksum_convert(skb, check, \
+ compute_pseudo(skb, proto)); \
+} while (0)
+
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
void nf_conntrack_destroy(struct nf_conntrack *nfct);
static inline void nf_conntrack_put(struct nf_conntrack *nfct)
@@ -2895,7 +3069,7 @@ static inline void nf_conntrack_get(struct nf_conntrack *nfct)
atomic_inc(&nfct->use);
}
#endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
static inline void nf_bridge_put(struct nf_bridge_info *nf_bridge)
{
if (nf_bridge && atomic_dec_and_test(&nf_bridge->use))
@@ -2913,7 +3087,7 @@ static inline void nf_reset(struct sk_buff *skb)
nf_conntrack_put(skb->nfct);
skb->nfct = NULL;
#endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
nf_bridge_put(skb->nf_bridge);
skb->nf_bridge = NULL;
#endif
@@ -2927,19 +3101,22 @@ static inline void nf_reset_trace(struct sk_buff *skb)
}
/* Note: This doesn't put any conntrack and bridge info in dst. */
-static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src)
+static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src,
+ bool copy)
{
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
dst->nfct = src->nfct;
nf_conntrack_get(src->nfct);
- dst->nfctinfo = src->nfctinfo;
+ if (copy)
+ dst->nfctinfo = src->nfctinfo;
#endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
dst->nf_bridge = src->nf_bridge;
nf_bridge_get(src->nf_bridge);
#endif
#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || defined(CONFIG_NF_TABLES)
- dst->nf_trace = src->nf_trace;
+ if (copy)
+ dst->nf_trace = src->nf_trace;
#endif
}
@@ -2948,10 +3125,10 @@ static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src)
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
nf_conntrack_put(dst->nfct);
#endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
nf_bridge_put(dst->nf_bridge);
#endif
- __nf_copy(dst, src);
+ __nf_copy(dst, src, true);
}
#ifdef CONFIG_NETWORK_SECMARK
@@ -3136,7 +3313,9 @@ bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off);
int skb_checksum_setup(struct sk_buff *skb, bool recalculate);
-u32 __skb_get_poff(const struct sk_buff *skb);
+u32 skb_get_poff(const struct sk_buff *skb);
+u32 __skb_get_poff(const struct sk_buff *skb, void *data,
+ const struct flow_keys *keys, int hlen);
/**
* skb_head_is_locked - Determine if the skb->head is locked down