diff options
Diffstat (limited to 'include/linux/skbuff.h')
-rw-r--r-- | include/linux/skbuff.h | 354 |
1 files changed, 262 insertions, 92 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index abde271c18ae..a59d9343c25b 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -28,7 +28,6 @@ #include <linux/textsearch.h> #include <net/checksum.h> #include <linux/rcupdate.h> -#include <linux/dmaengine.h> #include <linux/hrtimer.h> #include <linux/dma-mapping.h> #include <linux/netdev_features.h> @@ -47,11 +46,29 @@ * * The hardware you're dealing with doesn't calculate the full checksum * (as in CHECKSUM_COMPLETE), but it does parse headers and verify checksums - * for specific protocols e.g. TCP/UDP/SCTP, then, for such packets it will - * set CHECKSUM_UNNECESSARY if their checksums are okay. skb->csum is still - * undefined in this case though. It is a bad option, but, unfortunately, - * nowadays most vendors do this. Apparently with the secret goal to sell - * you new devices, when you will add new protocol to your host, f.e. IPv6 8) + * for specific protocols. For such packets it will set CHECKSUM_UNNECESSARY + * if their checksums are okay. skb->csum is still undefined in this case + * though. It is a bad option, but, unfortunately, nowadays most vendors do + * this. Apparently with the secret goal to sell you new devices, when you + * will add new protocol to your host, f.e. IPv6 8) + * + * CHECKSUM_UNNECESSARY is applicable to following protocols: + * TCP: IPv6 and IPv4. + * UDP: IPv4 and IPv6. A device may apply CHECKSUM_UNNECESSARY to a + * zero UDP checksum for either IPv4 or IPv6, the networking stack + * may perform further validation in this case. + * GRE: only if the checksum is present in the header. + * SCTP: indicates the CRC in SCTP header has been validated. + * + * skb->csum_level indicates the number of consecutive checksums found in + * the packet minus one that have been verified as CHECKSUM_UNNECESSARY. + * For instance if a device receives an IPv6->UDP->GRE->IPv4->TCP packet + * and a device is able to verify the checksums for UDP (possibly zero), + * GRE (checksum flag is set), and TCP-- skb->csum_level would be set to + * two. If the device were only able to verify the UDP checksum and not + * GRE, either because it doesn't support GRE checksum of because GRE + * checksum is bad, skb->csum_level would be set to zero (TCP checksum is + * not considered in this case). * * CHECKSUM_COMPLETE: * @@ -112,6 +129,9 @@ #define CHECKSUM_COMPLETE 2 #define CHECKSUM_PARTIAL 3 +/* Maximum value in skb->csum_level */ +#define SKB_MAX_CSUM_LEVEL 3 + #define SKB_DATA_ALIGN(X) ALIGN(X, SMP_CACHE_BYTES) #define SKB_WITH_OVERHEAD(X) \ ((X) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) @@ -135,7 +155,7 @@ struct nf_conntrack { }; #endif -#ifdef CONFIG_BRIDGE_NETFILTER +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) struct nf_bridge_info { atomic_t use; unsigned int mask; @@ -318,9 +338,10 @@ struct skb_shared_info { enum { - SKB_FCLONE_UNAVAILABLE, - SKB_FCLONE_ORIG, - SKB_FCLONE_CLONE, + SKB_FCLONE_UNAVAILABLE, /* skb has no fclone (from head_cache) */ + SKB_FCLONE_ORIG, /* orig skb (from fclone_cache) */ + SKB_FCLONE_CLONE, /* companion fclone skb (from fclone_cache) */ + SKB_FCLONE_FREE, /* this companion fclone skb is available */ }; enum { @@ -452,6 +473,7 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1, * @tc_verd: traffic control verdict * @hash: the packet hash * @queue_mapping: Queue mapping for multiqueue devices + * @xmit_more: More SKBs are pending for this queue * @ndisc_nodetype: router type (from link layer) * @ooo_okay: allow the mapping of a socket to a queue to be changed * @l4_hash: indicate hash is a canonical 4-tuple hash over transport @@ -460,8 +482,6 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1, * @wifi_acked_valid: wifi_acked was set * @wifi_acked: whether frame was acked on wifi or not * @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS - * @dma_cookie: a cookie to one of several possible DMA operations - * done by skb DMA functions * @napi_id: id of the NAPI struct this skb came from * @secmark: security marking * @mark: Generic packet mark @@ -505,87 +525,99 @@ struct sk_buff { char cb[48] __aligned(8); unsigned long _skb_refdst; + void (*destructor)(struct sk_buff *skb); #ifdef CONFIG_XFRM struct sec_path *sp; #endif +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) + struct nf_conntrack *nfct; +#endif +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) + struct nf_bridge_info *nf_bridge; +#endif unsigned int len, data_len; __u16 mac_len, hdr_len; - union { - __wsum csum; - struct { - __u16 csum_start; - __u16 csum_offset; - }; - }; - __u32 priority; + + /* Following fields are _not_ copied in __copy_skb_header() + * Note that queue_mapping is here mostly to fill a hole. + */ kmemcheck_bitfield_begin(flags1); - __u8 ignore_df:1, - cloned:1, - ip_summed:2, + __u16 queue_mapping; + __u8 cloned:1, nohdr:1, - nfctinfo:3; - __u8 pkt_type:3, fclone:2, - ipvs_property:1, peeked:1, - nf_trace:1; + head_frag:1, + xmit_more:1; + /* one bit hole */ kmemcheck_bitfield_end(flags1); - __be16 protocol; - void (*destructor)(struct sk_buff *skb); -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) - struct nf_conntrack *nfct; -#endif -#ifdef CONFIG_BRIDGE_NETFILTER - struct nf_bridge_info *nf_bridge; -#endif - - int skb_iif; - - __u32 hash; - - __be16 vlan_proto; - __u16 vlan_tci; + /* fields enclosed in headers_start/headers_end are copied + * using a single memcpy() in __copy_skb_header() + */ + __u32 headers_start[0]; -#ifdef CONFIG_NET_SCHED - __u16 tc_index; /* traffic control index */ -#ifdef CONFIG_NET_CLS_ACT - __u16 tc_verd; /* traffic control verdict */ -#endif +/* if you move pkt_type around you also must adapt those constants */ +#ifdef __BIG_ENDIAN_BITFIELD +#define PKT_TYPE_MAX (7 << 5) +#else +#define PKT_TYPE_MAX 7 #endif +#define PKT_TYPE_OFFSET() offsetof(struct sk_buff, __pkt_type_offset) - __u16 queue_mapping; - kmemcheck_bitfield_begin(flags2); -#ifdef CONFIG_IPV6_NDISC_NODETYPE - __u8 ndisc_nodetype:2; -#endif + __u8 __pkt_type_offset[0]; + __u8 pkt_type:3; __u8 pfmemalloc:1; + __u8 ignore_df:1; + __u8 nfctinfo:3; + + __u8 nf_trace:1; + __u8 ip_summed:2; __u8 ooo_okay:1; __u8 l4_hash:1; __u8 sw_hash:1; __u8 wifi_acked_valid:1; __u8 wifi_acked:1; + __u8 no_fcs:1; - __u8 head_frag:1; - /* Encapsulation protocol and NIC drivers should use - * this flag to indicate to each other if the skb contains - * encapsulated packet or not and maybe use the inner packet - * headers if needed - */ + /* Indicates the inner headers are valid in the skbuff. */ __u8 encapsulation:1; __u8 encap_hdr_csum:1; __u8 csum_valid:1; __u8 csum_complete_sw:1; - /* 2/4 bit hole (depending on ndisc_nodetype presence) */ - kmemcheck_bitfield_end(flags2); + __u8 csum_level:2; + __u8 csum_bad:1; + +#ifdef CONFIG_IPV6_NDISC_NODETYPE + __u8 ndisc_nodetype:2; +#endif + __u8 ipvs_property:1; + __u8 inner_protocol_type:1; + /* 4 or 6 bit hole */ + +#ifdef CONFIG_NET_SCHED + __u16 tc_index; /* traffic control index */ +#ifdef CONFIG_NET_CLS_ACT + __u16 tc_verd; /* traffic control verdict */ +#endif +#endif -#if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL union { - unsigned int napi_id; - dma_cookie_t dma_cookie; + __wsum csum; + struct { + __u16 csum_start; + __u16 csum_offset; + }; }; + __u32 priority; + int skb_iif; + __u32 hash; + __be16 vlan_proto; + __u16 vlan_tci; +#ifdef CONFIG_NET_RX_BUSY_POLL + unsigned int napi_id; #endif #ifdef CONFIG_NETWORK_SECMARK __u32 secmark; @@ -596,13 +628,22 @@ struct sk_buff { __u32 reserved_tailroom; }; - __be16 inner_protocol; + union { + __be16 inner_protocol; + __u8 inner_ipproto; + }; + __u16 inner_transport_header; __u16 inner_network_header; __u16 inner_mac_header; + + __be16 protocol; __u16 transport_header; __u16 network_header; __u16 mac_header; + + __u32 headers_end[0]; + /* These elements must be at the end, see alloc_skb() for details. */ sk_buff_data_t tail; sk_buff_data_t end; @@ -734,6 +775,37 @@ static inline struct sk_buff *alloc_skb(unsigned int size, return __alloc_skb(size, priority, 0, NUMA_NO_NODE); } +struct sk_buff *alloc_skb_with_frags(unsigned long header_len, + unsigned long data_len, + int max_page_order, + int *errcode, + gfp_t gfp_mask); + +/* Layout of fast clones : [skb1][skb2][fclone_ref] */ +struct sk_buff_fclones { + struct sk_buff skb1; + + struct sk_buff skb2; + + atomic_t fclone_ref; +}; + +/** + * skb_fclone_busy - check if fclone is busy + * @skb: buffer + * + * Returns true is skb is a fast clone, and its clone is not freed. + */ +static inline bool skb_fclone_busy(const struct sk_buff *skb) +{ + const struct sk_buff_fclones *fclones; + + fclones = container_of(skb, struct sk_buff_fclones, skb1); + + return skb->fclone == SKB_FCLONE_ORIG && + fclones->skb2.fclone == SKB_FCLONE_CLONE; +} + static inline struct sk_buff *alloc_skb_fclone(unsigned int size, gfp_t priority) { @@ -1042,6 +1114,7 @@ static inline int skb_header_cloned(const struct sk_buff *skb) * Drop a reference to the header part of the buffer. This is done * by acquiring a payload reference. You must not read from the header * part of skb->data after this. + * Note : Check if you can use __skb_header_release() instead. */ static inline void skb_header_release(struct sk_buff *skb) { @@ -1051,6 +1124,20 @@ static inline void skb_header_release(struct sk_buff *skb) } /** + * __skb_header_release - release reference to header + * @skb: buffer to operate on + * + * Variant of skb_header_release() assuming skb is private to caller. + * We can avoid one atomic operation. + */ +static inline void __skb_header_release(struct sk_buff *skb) +{ + skb->nohdr = 1; + atomic_set(&skb_shinfo(skb)->dataref, 1 + (1 << SKB_DATAREF_SHIFT)); +} + + +/** * skb_shared - is the buffer shared * @skb: buffer to check * @@ -1116,7 +1203,12 @@ static inline struct sk_buff *skb_unshare(struct sk_buff *skb, might_sleep_if(pri & __GFP_WAIT); if (skb_cloned(skb)) { struct sk_buff *nskb = skb_copy(skb, pri); - kfree_skb(skb); /* Free our shared copy */ + + /* Free our shared copy */ + if (likely(nskb)) + consume_skb(skb); + else + kfree_skb(skb); skb = nskb; } return skb; @@ -1675,6 +1767,23 @@ static inline void skb_reserve(struct sk_buff *skb, int len) skb->tail += len; } +#define ENCAP_TYPE_ETHER 0 +#define ENCAP_TYPE_IPPROTO 1 + +static inline void skb_set_inner_protocol(struct sk_buff *skb, + __be16 protocol) +{ + skb->inner_protocol = protocol; + skb->inner_protocol_type = ENCAP_TYPE_ETHER; +} + +static inline void skb_set_inner_ipproto(struct sk_buff *skb, + __u8 ipproto) +{ + skb->inner_ipproto = ipproto; + skb->inner_protocol_type = ENCAP_TYPE_IPPROTO; +} + static inline void skb_reset_inner_headers(struct sk_buff *skb) { skb->inner_mac_header = skb->mac_header; @@ -1860,18 +1969,6 @@ static inline int pskb_network_may_pull(struct sk_buff *skb, unsigned int len) return pskb_may_pull(skb, skb_network_offset(skb) + len); } -static inline void skb_pop_rcv_encapsulation(struct sk_buff *skb) -{ - /* Only continue with checksum unnecessary if device indicated - * it is valid across encapsulation (skb->encapsulation was set). - */ - if (skb->ip_summed == CHECKSUM_UNNECESSARY && !skb->encapsulation) - skb->ip_summed = CHECKSUM_NONE; - - skb->encapsulation = 0; - skb->csum_valid = 0; -} - /* * CPUs often take a performance hit when accessing unaligned memory * locations. The actual performance hit varies, it can be small if the @@ -2567,20 +2664,26 @@ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len, __wsum skb_checksum(const struct sk_buff *skb, int offset, int len, __wsum csum); -static inline void *skb_header_pointer(const struct sk_buff *skb, int offset, - int len, void *buffer) +static inline void *__skb_header_pointer(const struct sk_buff *skb, int offset, + int len, void *data, int hlen, void *buffer) { - int hlen = skb_headlen(skb); - if (hlen - offset >= len) - return skb->data + offset; + return data + offset; - if (skb_copy_bits(skb, offset, buffer, len) < 0) + if (!skb || + skb_copy_bits(skb, offset, buffer, len) < 0) return NULL; return buffer; } +static inline void *skb_header_pointer(const struct sk_buff *skb, int offset, + int len, void *buffer) +{ + return __skb_header_pointer(skb, offset, len, skb->data, + skb_headlen(skb), buffer); +} + /** * skb_needs_linearize - check if we need to linearize a given skb * depending on the given device features. @@ -2671,6 +2774,8 @@ static inline ktime_t net_invalid_timestamp(void) return ktime_set(0, 0); } +struct sk_buff *skb_clone_sk(struct sk_buff *skb); + #ifdef CONFIG_NETWORK_PHY_TIMESTAMPING void skb_clone_tx_timestamp(struct sk_buff *skb); @@ -2786,6 +2891,42 @@ static inline __sum16 skb_checksum_complete(struct sk_buff *skb) 0 : __skb_checksum_complete(skb); } +static inline void __skb_decr_checksum_unnecessary(struct sk_buff *skb) +{ + if (skb->ip_summed == CHECKSUM_UNNECESSARY) { + if (skb->csum_level == 0) + skb->ip_summed = CHECKSUM_NONE; + else + skb->csum_level--; + } +} + +static inline void __skb_incr_checksum_unnecessary(struct sk_buff *skb) +{ + if (skb->ip_summed == CHECKSUM_UNNECESSARY) { + if (skb->csum_level < SKB_MAX_CSUM_LEVEL) + skb->csum_level++; + } else if (skb->ip_summed == CHECKSUM_NONE) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + skb->csum_level = 0; + } +} + +static inline void __skb_mark_checksum_bad(struct sk_buff *skb) +{ + /* Mark current checksum as bad (typically called from GRO + * path). In the case that ip_summed is CHECKSUM_NONE + * this must be the first checksum encountered in the packet. + * When ip_summed is CHECKSUM_UNNECESSARY, this is the first + * checksum after the last one validated. For UDP, a zero + * checksum can not be marked as bad. + */ + + if (skb->ip_summed == CHECKSUM_NONE || + skb->ip_summed == CHECKSUM_UNNECESSARY) + skb->csum_bad = 1; +} + /* Check if we need to perform checksum complete validation. * * Returns true if checksum complete is needed, false otherwise @@ -2797,6 +2938,7 @@ static inline bool __skb_checksum_validate_needed(struct sk_buff *skb, { if (skb_csum_unnecessary(skb) || (zero_okay && !check)) { skb->csum_valid = 1; + __skb_decr_checksum_unnecessary(skb); return false; } @@ -2826,6 +2968,9 @@ static inline __sum16 __skb_checksum_validate_complete(struct sk_buff *skb, skb->csum_valid = 1; return 0; } + } else if (skb->csum_bad) { + /* ip_summed == CHECKSUM_NONE in this case */ + return 1; } skb->csum = psum; @@ -2883,6 +3028,26 @@ static inline __wsum null_compute_pseudo(struct sk_buff *skb, int proto) #define skb_checksum_simple_validate(skb) \ __skb_checksum_validate(skb, 0, true, false, 0, null_compute_pseudo) +static inline bool __skb_checksum_convert_check(struct sk_buff *skb) +{ + return (skb->ip_summed == CHECKSUM_NONE && + skb->csum_valid && !skb->csum_bad); +} + +static inline void __skb_checksum_convert(struct sk_buff *skb, + __sum16 check, __wsum pseudo) +{ + skb->csum = ~pseudo; + skb->ip_summed = CHECKSUM_COMPLETE; +} + +#define skb_checksum_try_convert(skb, proto, check, compute_pseudo) \ +do { \ + if (__skb_checksum_convert_check(skb)) \ + __skb_checksum_convert(skb, check, \ + compute_pseudo(skb, proto)); \ +} while (0) + #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) void nf_conntrack_destroy(struct nf_conntrack *nfct); static inline void nf_conntrack_put(struct nf_conntrack *nfct) @@ -2896,7 +3061,7 @@ static inline void nf_conntrack_get(struct nf_conntrack *nfct) atomic_inc(&nfct->use); } #endif -#ifdef CONFIG_BRIDGE_NETFILTER +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) static inline void nf_bridge_put(struct nf_bridge_info *nf_bridge) { if (nf_bridge && atomic_dec_and_test(&nf_bridge->use)) @@ -2914,7 +3079,7 @@ static inline void nf_reset(struct sk_buff *skb) nf_conntrack_put(skb->nfct); skb->nfct = NULL; #endif -#ifdef CONFIG_BRIDGE_NETFILTER +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) nf_bridge_put(skb->nf_bridge); skb->nf_bridge = NULL; #endif @@ -2928,19 +3093,22 @@ static inline void nf_reset_trace(struct sk_buff *skb) } /* Note: This doesn't put any conntrack and bridge info in dst. */ -static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src) +static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src, + bool copy) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) dst->nfct = src->nfct; nf_conntrack_get(src->nfct); - dst->nfctinfo = src->nfctinfo; + if (copy) + dst->nfctinfo = src->nfctinfo; #endif -#ifdef CONFIG_BRIDGE_NETFILTER +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) dst->nf_bridge = src->nf_bridge; nf_bridge_get(src->nf_bridge); #endif #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || defined(CONFIG_NF_TABLES) - dst->nf_trace = src->nf_trace; + if (copy) + dst->nf_trace = src->nf_trace; #endif } @@ -2949,10 +3117,10 @@ static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src) #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) nf_conntrack_put(dst->nfct); #endif -#ifdef CONFIG_BRIDGE_NETFILTER +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) nf_bridge_put(dst->nf_bridge); #endif - __nf_copy(dst, src); + __nf_copy(dst, src, true); } #ifdef CONFIG_NETWORK_SECMARK @@ -3137,7 +3305,9 @@ bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off); int skb_checksum_setup(struct sk_buff *skb, bool recalculate); -u32 __skb_get_poff(const struct sk_buff *skb); +u32 skb_get_poff(const struct sk_buff *skb); +u32 __skb_get_poff(const struct sk_buff *skb, void *data, + const struct flow_keys *keys, int hlen); /** * skb_head_is_locked - Determine if the skb->head is locked down |