diff options
author | David S. Miller <davem@davemloft.net> | 2018-03-25 23:46:05 +0300 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2018-03-25 23:46:05 +0300 |
commit | 74b4bed972acf967a65d2aaf29c13b429c8cb927 (patch) | |
tree | 363db69617b1f877f43821037fb803d56790c461 | |
parent | 0374016579d0ef5d86655e48645ccc05678e0cf5 (diff) | |
parent | 76db8087c4c991dcd17f5ea8ac0eafd0696ab450 (diff) | |
download | linux-74b4bed972acf967a65d2aaf29c13b429c8cb927.tar.xz |
Merge branch 'net-permit-skb_segment-on-head_frag-frag_list-skb'
Yonghong Song says:
====================
net: permit skb_segment on head_frag frag_list skb
One of our in-house projects, bpf-based NAT, hits a kernel BUG_ON at
function skb_segment(), line 3667. The bpf program attaches to
clsact ingress, calls bpf_skb_change_proto to change protocol
from ipv4 to ipv6 or from ipv6 to ipv4, and then calls bpf_redirect
to send the changed packet out.
...
3665 while (pos < offset + len) {
3666 if (i >= nfrags) {
3667 BUG_ON(skb_headlen(list_skb));
...
The triggering input skb has the following properties:
list_skb = skb->frag_list;
skb->nfrags != NULL && skb_headlen(list_skb) != 0
and skb_segment() is not able to handle a frag_list skb
if its headlen (list_skb->len - list_skb->data_len) is not 0.
Patch #1 provides a simple solution to avoid BUG_ON. If
list_skb->head_frag is true, its page-backed frag will
be processed before the list_skb->frags.
Patch #2 provides a test case in test_bpf module which
constructs a skb and calls skb_segment() directly. The test
case is able to trigger the BUG_ON without Patch #1.
The patch has been tested in the following setup:
ipv6_host <-> nat_server <-> ipv4_host
where nat_server has a bpf program doing ipv4<->ipv6
translation and forwarding through clsact hook
bpf_skb_change_proto.
Changelog:
v5 -> v6:
. Added back missed BUG_ON(!nfrags) for zero
skb_headlen(skb) case, plus a couple of
cosmetic changes, from Alexander.
v4 -> v5:
. Replace local variable head_frag with
a static inline function skb_head_frag_to_page_desc
which gets the head_frag on-demand. This makes
code more readable and also does not increase
the stack size, from Alexander.
. Remove the "if(nfrags)" guard for skb_orphan_frags
and skb_zerocopy_clone as I found that they can
handle zero-frag skb (with non-zero skb_headlen(skb))
properly.
. Properly release segment list from skb_segment()
in the test, from Eric.
v3 -> v4:
. Remove dynamic memory allocation and use rewinding
for both index and frag to remove one branch in fast path,
from Alexander.
. Fix a bunch of issues in test_bpf skb_segment() test,
including proper way to allocate skb, proper function
argument for skb_add_rx_frag and not freeint skb, etc.,
from Eric.
v2 -> v3:
. Use starting frag index -1 (instead of 0) to
special process head_frag before other frags in the skb,
from Alexander Duyck.
v1 -> v2:
. Removed never-hit BUG_ON, spotted by Linyu Yuan.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | lib/test_bpf.c | 93 | ||||
-rw-r--r-- | net/core/skbuff.c | 27 |
2 files changed, 113 insertions, 7 deletions
diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 3e9335493fe4..b2badf6b23cd 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -6574,6 +6574,93 @@ static bool exclude_test(int test_id) return test_id < test_range[0] || test_id > test_range[1]; } +static __init struct sk_buff *build_test_skb(void) +{ + u32 headroom = NET_SKB_PAD + NET_IP_ALIGN + ETH_HLEN; + struct sk_buff *skb[2]; + struct page *page[2]; + int i, data_size = 8; + + for (i = 0; i < 2; i++) { + page[i] = alloc_page(GFP_KERNEL); + if (!page[i]) { + if (i == 0) + goto err_page0; + else + goto err_page1; + } + + /* this will set skb[i]->head_frag */ + skb[i] = dev_alloc_skb(headroom + data_size); + if (!skb[i]) { + if (i == 0) + goto err_skb0; + else + goto err_skb1; + } + + skb_reserve(skb[i], headroom); + skb_put(skb[i], data_size); + skb[i]->protocol = htons(ETH_P_IP); + skb_reset_network_header(skb[i]); + skb_set_mac_header(skb[i], -ETH_HLEN); + + skb_add_rx_frag(skb[i], 0, page[i], 0, 64, 64); + // skb_headlen(skb[i]): 8, skb[i]->head_frag = 1 + } + + /* setup shinfo */ + skb_shinfo(skb[0])->gso_size = 1448; + skb_shinfo(skb[0])->gso_type = SKB_GSO_TCPV4; + skb_shinfo(skb[0])->gso_type |= SKB_GSO_DODGY; + skb_shinfo(skb[0])->gso_segs = 0; + skb_shinfo(skb[0])->frag_list = skb[1]; + + /* adjust skb[0]'s len */ + skb[0]->len += skb[1]->len; + skb[0]->data_len += skb[1]->data_len; + skb[0]->truesize += skb[1]->truesize; + + return skb[0]; + +err_skb1: + __free_page(page[1]); +err_page1: + kfree_skb(skb[0]); +err_skb0: + __free_page(page[0]); +err_page0: + return NULL; +} + +static __init int test_skb_segment(void) +{ + netdev_features_t features; + struct sk_buff *skb, *segs; + int ret = -1; + + features = NETIF_F_SG | NETIF_F_GSO_PARTIAL | NETIF_F_IP_CSUM | + NETIF_F_IPV6_CSUM; + features |= NETIF_F_RXCSUM; + skb = build_test_skb(); + if (!skb) { + pr_info("%s: failed to build_test_skb", __func__); + goto done; + } + + segs = skb_segment(skb, features); + if (segs) { + kfree_skb_list(segs); + ret = 0; + pr_info("%s: success in skb_segment!", __func__); + } else { + pr_info("%s: failed in skb_segment!", __func__); + } + kfree_skb(skb); +done: + return ret; +} + static __init int test_bpf(void) { int i, err_cnt = 0, pass_cnt = 0; @@ -6632,9 +6719,11 @@ static int __init test_bpf_init(void) return ret; ret = test_bpf(); - destroy_bpf_tests(); - return ret; + if (ret) + return ret; + + return test_skb_segment(); } static void __exit test_bpf_exit(void) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 46cb22215ff4..b5c75d4fcf37 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3460,6 +3460,19 @@ void *skb_pull_rcsum(struct sk_buff *skb, unsigned int len) } EXPORT_SYMBOL_GPL(skb_pull_rcsum); +static inline skb_frag_t skb_head_frag_to_page_desc(struct sk_buff *frag_skb) +{ + skb_frag_t head_frag; + struct page *page; + + page = virt_to_head_page(frag_skb->head); + head_frag.page.p = page; + head_frag.page_offset = frag_skb->data - + (unsigned char *)page_address(page); + head_frag.size = skb_headlen(frag_skb); + return head_frag; +} + /** * skb_segment - Perform protocol segmentation on skb. * @head_skb: buffer to segment @@ -3664,15 +3677,19 @@ normal: while (pos < offset + len) { if (i >= nfrags) { - BUG_ON(skb_headlen(list_skb)); - i = 0; nfrags = skb_shinfo(list_skb)->nr_frags; frag = skb_shinfo(list_skb)->frags; frag_skb = list_skb; + if (!skb_headlen(list_skb)) { + BUG_ON(!nfrags); + } else { + BUG_ON(!list_skb->head_frag); - BUG_ON(!nfrags); - + /* to make room for head_frag. */ + i--; + frag--; + } if (skb_orphan_frags(frag_skb, GFP_ATOMIC) || skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC)) @@ -3689,7 +3706,7 @@ normal: goto err; } - *nskb_frag = *frag; + *nskb_frag = (i < 0) ? skb_head_frag_to_page_desc(frag_skb) : *frag; __skb_frag_ref(nskb_frag); size = skb_frag_size(nskb_frag); |