diff options
Diffstat (limited to 'net')
132 files changed, 1046 insertions, 906 deletions
diff --git a/net/9p/client.c b/net/9p/client.c index 3fc94a49ccd5..25cfd8a4bc36 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -32,7 +32,7 @@ #include <linux/idr.h> #include <linux/mutex.h> #include <linux/slab.h> -#include <linux/sched.h> +#include <linux/sched/signal.h> #include <linux/uaccess.h> #include <linux/uio.h> #include <net/9p/9p.h> diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 10d2bdce686e..465cc24b41e5 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1656,7 +1656,7 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) ddp->deh_dport = usat->sat_port; ddp->deh_sport = at->src_port; - SOCK_DEBUG(sk, "SK %p: Copy user data (%Zd bytes).\n", sk, len); + SOCK_DEBUG(sk, "SK %p: Copy user data (%zd bytes).\n", sk, len); err = memcpy_from_msg(skb_put(skb, len), msg, len); if (err) { @@ -1720,7 +1720,7 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) */ aarp_send_ddp(dev, skb, &usat->sat_addr, NULL); } - SOCK_DEBUG(sk, "SK %p: Done write (%Zd).\n", sk, len); + SOCK_DEBUG(sk, "SK %p: Done write (%zd).\n", sk, len); out: release_sock(sk); diff --git a/net/atm/common.c b/net/atm/common.c index a3ca922d307b..9613381f5db0 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -13,7 +13,7 @@ #include <linux/errno.h> /* error codes */ #include <linux/capability.h> #include <linux/mm.h> -#include <linux/sched.h> +#include <linux/sched/signal.h> #include <linux/time.h> /* struct timeval */ #include <linux/skbuff.h> #include <linux/bitops.h> diff --git a/net/atm/mpc.c b/net/atm/mpc.c index 3b3b1a292ec8..a190800572bd 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -451,7 +451,7 @@ static void lane2_assoc_ind(struct net_device *dev, const u8 *mac_addr, return; } if (end_of_tlvs - tlvs != 0) - pr_info("(%s) ignoring %Zd bytes of trailing TLV garbage\n", + pr_info("(%s) ignoring %zd bytes of trailing TLV garbage\n", dev->name, end_of_tlvs - tlvs); } diff --git a/net/atm/svc.c b/net/atm/svc.c index 878563a8354d..db9794ec61d8 100644 --- a/net/atm/svc.c +++ b/net/atm/svc.c @@ -10,7 +10,7 @@ #include <linux/kernel.h> /* printk */ #include <linux/skbuff.h> #include <linux/wait.h> -#include <linux/sched.h> /* jiffies and HZ */ +#include <linux/sched/signal.h> #include <linux/fcntl.h> /* O_NONBLOCK */ #include <linux/init.h> #include <linux/atm.h> /* ATM stuff */ diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 90fcf5fc2e0a..a8e42cedf1db 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -20,7 +20,7 @@ #include <linux/socket.h> #include <linux/in.h> #include <linux/kernel.h> -#include <linux/sched.h> +#include <linux/sched/signal.h> #include <linux/timer.h> #include <linux/string.h> #include <linux/sockios.h> diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index cfb2faba46de..69e1f7d362a8 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -27,6 +27,8 @@ #include <linux/module.h> #include <linux/debugfs.h> #include <linux/stringify.h> +#include <linux/sched/signal.h> + #include <asm/ioctls.h> #include <net/bluetooth/bluetooth.h> diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c index 46ac686c8911..bb308224099c 100644 --- a/net/bluetooth/cmtp/capi.c +++ b/net/bluetooth/cmtp/capi.c @@ -26,7 +26,7 @@ #include <linux/types.h> #include <linux/errno.h> #include <linux/kernel.h> -#include <linux/sched.h> +#include <linux/sched/signal.h> #include <linux/slab.h> #include <linux/poll.h> #include <linux/fcntl.h> diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 1015d9c8d97d..b5faff458d8b 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -21,6 +21,8 @@ SOFTWARE IS DISCLAIMED. */ +#include <linux/sched/signal.h> + #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> #include <net/bluetooth/mgmt.h> diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 48f9471e7c85..f64d6566021f 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -851,7 +851,7 @@ static int hci_sock_release(struct socket *sock) if (hdev) { if (hci_pi(sk)->channel == HCI_CHANNEL_USER) { - /* When releasing an user channel exclusive access, + /* When releasing a user channel exclusive access, * call hci_dev_do_close directly instead of calling * hci_dev_close to ensure the exclusive access will * be released and the controller brought back down. @@ -1172,7 +1172,7 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, /* In case the transport is already up and * running, clear the error here. * - * This can happen when opening an user + * This can happen when opening a user * channel and HCI_AUTO_OFF grace period * is still active. */ @@ -1190,7 +1190,7 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, if (!hci_sock_gen_cookie(sk)) { /* In the case when a cookie has already been assigned, * this socket will transition from a raw socket into - * an user channel socket. For a clean transition, send + * a user channel socket. For a clean transition, send * the close notification first. */ skb = create_monitor_ctrl_close(sk); diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index a8ba752732c9..f307b145ea54 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -29,6 +29,7 @@ #include <linux/module.h> #include <linux/export.h> +#include <linux/sched/signal.h> #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 7511df72347f..aa1a814ceddc 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -27,6 +27,7 @@ #include <linux/export.h> #include <linux/debugfs.h> +#include <linux/sched/signal.h> #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 3125ce670c2f..e4e9a2da1e7e 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -27,6 +27,7 @@ #include <linux/module.h> #include <linux/debugfs.h> #include <linux/seq_file.h> +#include <linux/sched/signal.h> #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 0f4034934d56..0b5dd607444c 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -19,6 +19,7 @@ #include <linux/rtnetlink.h> #include <linux/spinlock.h> #include <linux/times.h> +#include <linux/sched/signal.h> #include "br_private.h" diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index 05e8946ccc03..79aee759aba5 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -17,6 +17,7 @@ #include <linux/if_bridge.h> #include <linux/rtnetlink.h> #include <linux/spinlock.h> +#include <linux/sched/signal.h> #include "br_private.h" diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c index 9024283d2bca..279527f8b1fe 100644 --- a/net/bridge/netfilter/ebt_among.c +++ b/net/bridge/netfilter/ebt_among.c @@ -187,7 +187,7 @@ static int ebt_among_mt_check(const struct xt_mtchk_param *par) expected_length += ebt_mac_wormhash_size(wh_src); if (em->match_size != EBT_ALIGN(expected_length)) { - pr_info("wrong size: %d against expected %d, rounded to %Zd\n", + pr_info("wrong size: %d against expected %d, rounded to %zd\n", em->match_size, expected_length, EBT_ALIGN(expected_length)); return -EINVAL; diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 92cbbd2afddb..adcad344c843 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -9,7 +9,7 @@ #include <linux/fs.h> #include <linux/init.h> #include <linux/module.h> -#include <linux/sched.h> +#include <linux/sched/signal.h> #include <linux/spinlock.h> #include <linux/mutex.h> #include <linux/list.h> diff --git a/net/ceph/cls_lock_client.c b/net/ceph/cls_lock_client.c index 50f040fdb2a9..b9233b990399 100644 --- a/net/ceph/cls_lock_client.c +++ b/net/ceph/cls_lock_client.c @@ -69,8 +69,8 @@ int ceph_cls_lock(struct ceph_osd_client *osdc, dout("%s lock_name %s type %d cookie %s tag %s desc %s flags 0x%x\n", __func__, lock_name, type, cookie, tag, desc, flags); ret = ceph_osdc_call(osdc, oid, oloc, "lock", "lock", - CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, - lock_op_page, lock_op_buf_size, NULL, NULL); + CEPH_OSD_FLAG_WRITE, lock_op_page, + lock_op_buf_size, NULL, NULL); dout("%s: status %d\n", __func__, ret); __free_page(lock_op_page); @@ -117,8 +117,8 @@ int ceph_cls_unlock(struct ceph_osd_client *osdc, dout("%s lock_name %s cookie %s\n", __func__, lock_name, cookie); ret = ceph_osdc_call(osdc, oid, oloc, "lock", "unlock", - CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, - unlock_op_page, unlock_op_buf_size, NULL, NULL); + CEPH_OSD_FLAG_WRITE, unlock_op_page, + unlock_op_buf_size, NULL, NULL); dout("%s: status %d\n", __func__, ret); __free_page(unlock_op_page); @@ -170,8 +170,8 @@ int ceph_cls_break_lock(struct ceph_osd_client *osdc, dout("%s lock_name %s cookie %s locker %s%llu\n", __func__, lock_name, cookie, ENTITY_NAME(*locker)); ret = ceph_osdc_call(osdc, oid, oloc, "lock", "break_lock", - CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, - break_op_page, break_op_buf_size, NULL, NULL); + CEPH_OSD_FLAG_WRITE, break_op_page, + break_op_buf_size, NULL, NULL); dout("%s: status %d\n", __func__, ret); __free_page(break_op_page); @@ -278,7 +278,7 @@ int ceph_cls_lock_info(struct ceph_osd_client *osdc, int get_info_op_buf_size; int name_len = strlen(lock_name); struct page *get_info_op_page, *reply_page; - size_t reply_len; + size_t reply_len = PAGE_SIZE; void *p, *end; int ret; diff --git a/net/ceph/crush/crush.c b/net/ceph/crush/crush.c index 80d7c3a97cb8..5bf94c04f645 100644 --- a/net/ceph/crush/crush.c +++ b/net/ceph/crush/crush.c @@ -45,7 +45,6 @@ int crush_get_bucket_item_weight(const struct crush_bucket *b, int p) void crush_destroy_bucket_uniform(struct crush_bucket_uniform *b) { - kfree(b->h.perm); kfree(b->h.items); kfree(b); } @@ -54,14 +53,12 @@ void crush_destroy_bucket_list(struct crush_bucket_list *b) { kfree(b->item_weights); kfree(b->sum_weights); - kfree(b->h.perm); kfree(b->h.items); kfree(b); } void crush_destroy_bucket_tree(struct crush_bucket_tree *b) { - kfree(b->h.perm); kfree(b->h.items); kfree(b->node_weights); kfree(b); @@ -71,7 +68,6 @@ void crush_destroy_bucket_straw(struct crush_bucket_straw *b) { kfree(b->straws); kfree(b->item_weights); - kfree(b->h.perm); kfree(b->h.items); kfree(b); } @@ -79,7 +75,6 @@ void crush_destroy_bucket_straw(struct crush_bucket_straw *b) void crush_destroy_bucket_straw2(struct crush_bucket_straw2 *b) { kfree(b->item_weights); - kfree(b->h.perm); kfree(b->h.items); kfree(b); } diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c index 130ab407c5ec..b5cd8c21bfdf 100644 --- a/net/ceph/crush/mapper.c +++ b/net/ceph/crush/mapper.c @@ -54,7 +54,6 @@ int crush_find_rule(const struct crush_map *map, int ruleset, int type, int size return -1; } - /* * bucket choose methods * @@ -72,59 +71,60 @@ int crush_find_rule(const struct crush_map *map, int ruleset, int type, int size * Since this is expensive, we optimize for the r=0 case, which * captures the vast majority of calls. */ -static int bucket_perm_choose(struct crush_bucket *bucket, +static int bucket_perm_choose(const struct crush_bucket *bucket, + struct crush_work_bucket *work, int x, int r) { unsigned int pr = r % bucket->size; unsigned int i, s; /* start a new permutation if @x has changed */ - if (bucket->perm_x != (__u32)x || bucket->perm_n == 0) { + if (work->perm_x != (__u32)x || work->perm_n == 0) { dprintk("bucket %d new x=%d\n", bucket->id, x); - bucket->perm_x = x; + work->perm_x = x; /* optimize common r=0 case */ if (pr == 0) { s = crush_hash32_3(bucket->hash, x, bucket->id, 0) % bucket->size; - bucket->perm[0] = s; - bucket->perm_n = 0xffff; /* magic value, see below */ + work->perm[0] = s; + work->perm_n = 0xffff; /* magic value, see below */ goto out; } for (i = 0; i < bucket->size; i++) - bucket->perm[i] = i; - bucket->perm_n = 0; - } else if (bucket->perm_n == 0xffff) { + work->perm[i] = i; + work->perm_n = 0; + } else if (work->perm_n == 0xffff) { /* clean up after the r=0 case above */ for (i = 1; i < bucket->size; i++) - bucket->perm[i] = i; - bucket->perm[bucket->perm[0]] = 0; - bucket->perm_n = 1; + work->perm[i] = i; + work->perm[work->perm[0]] = 0; + work->perm_n = 1; } /* calculate permutation up to pr */ - for (i = 0; i < bucket->perm_n; i++) - dprintk(" perm_choose have %d: %d\n", i, bucket->perm[i]); - while (bucket->perm_n <= pr) { - unsigned int p = bucket->perm_n; + for (i = 0; i < work->perm_n; i++) + dprintk(" perm_choose have %d: %d\n", i, work->perm[i]); + while (work->perm_n <= pr) { + unsigned int p = work->perm_n; /* no point in swapping the final entry */ if (p < bucket->size - 1) { i = crush_hash32_3(bucket->hash, x, bucket->id, p) % (bucket->size - p); if (i) { - unsigned int t = bucket->perm[p + i]; - bucket->perm[p + i] = bucket->perm[p]; - bucket->perm[p] = t; + unsigned int t = work->perm[p + i]; + work->perm[p + i] = work->perm[p]; + work->perm[p] = t; } dprintk(" perm_choose swap %d with %d\n", p, p+i); } - bucket->perm_n++; + work->perm_n++; } for (i = 0; i < bucket->size; i++) - dprintk(" perm_choose %d: %d\n", i, bucket->perm[i]); + dprintk(" perm_choose %d: %d\n", i, work->perm[i]); - s = bucket->perm[pr]; + s = work->perm[pr]; out: dprintk(" perm_choose %d sz=%d x=%d r=%d (%d) s=%d\n", bucket->id, bucket->size, x, r, pr, s); @@ -132,14 +132,14 @@ out: } /* uniform */ -static int bucket_uniform_choose(struct crush_bucket_uniform *bucket, - int x, int r) +static int bucket_uniform_choose(const struct crush_bucket_uniform *bucket, + struct crush_work_bucket *work, int x, int r) { - return bucket_perm_choose(&bucket->h, x, r); + return bucket_perm_choose(&bucket->h, work, x, r); } /* list */ -static int bucket_list_choose(struct crush_bucket_list *bucket, +static int bucket_list_choose(const struct crush_bucket_list *bucket, int x, int r) { int i; @@ -155,8 +155,9 @@ static int bucket_list_choose(struct crush_bucket_list *bucket, w *= bucket->sum_weights[i]; w = w >> 16; /*dprintk(" scaled %llx\n", w);*/ - if (w < bucket->item_weights[i]) + if (w < bucket->item_weights[i]) { return bucket->h.items[i]; + } } dprintk("bad list sums for bucket %d\n", bucket->h.id); @@ -192,7 +193,7 @@ static int terminal(int x) return x & 1; } -static int bucket_tree_choose(struct crush_bucket_tree *bucket, +static int bucket_tree_choose(const struct crush_bucket_tree *bucket, int x, int r) { int n; @@ -224,7 +225,7 @@ static int bucket_tree_choose(struct crush_bucket_tree *bucket, /* straw */ -static int bucket_straw_choose(struct crush_bucket_straw *bucket, +static int bucket_straw_choose(const struct crush_bucket_straw *bucket, int x, int r) { __u32 i; @@ -301,7 +302,7 @@ static __u64 crush_ln(unsigned int xin) * */ -static int bucket_straw2_choose(struct crush_bucket_straw2 *bucket, +static int bucket_straw2_choose(const struct crush_bucket_straw2 *bucket, int x, int r) { unsigned int i, high = 0; @@ -344,37 +345,42 @@ static int bucket_straw2_choose(struct crush_bucket_straw2 *bucket, high_draw = draw; } } + return bucket->h.items[high]; } -static int crush_bucket_choose(struct crush_bucket *in, int x, int r) +static int crush_bucket_choose(const struct crush_bucket *in, + struct crush_work_bucket *work, + int x, int r) { dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r); BUG_ON(in->size == 0); switch (in->alg) { case CRUSH_BUCKET_UNIFORM: - return bucket_uniform_choose((struct crush_bucket_uniform *)in, - x, r); + return bucket_uniform_choose( + (const struct crush_bucket_uniform *)in, + work, x, r); case CRUSH_BUCKET_LIST: - return bucket_list_choose((struct crush_bucket_list *)in, + return bucket_list_choose((const struct crush_bucket_list *)in, x, r); case CRUSH_BUCKET_TREE: - return bucket_tree_choose((struct crush_bucket_tree *)in, + return bucket_tree_choose((const struct crush_bucket_tree *)in, x, r); case CRUSH_BUCKET_STRAW: - return bucket_straw_choose((struct crush_bucket_straw *)in, - x, r); + return bucket_straw_choose( + (const struct crush_bucket_straw *)in, + x, r); case CRUSH_BUCKET_STRAW2: - return bucket_straw2_choose((struct crush_bucket_straw2 *)in, - x, r); + return bucket_straw2_choose( + (const struct crush_bucket_straw2 *)in, + x, r); default: dprintk("unknown bucket %d alg %d\n", in->id, in->alg); return in->items[0]; } } - /* * true if device is marked "out" (failed, fully offloaded) * of the cluster @@ -416,7 +422,8 @@ static int is_out(const struct crush_map *map, * @parent_r: r value passed from the parent */ static int crush_choose_firstn(const struct crush_map *map, - struct crush_bucket *bucket, + struct crush_work *work, + const struct crush_bucket *bucket, const __u32 *weight, int weight_max, int x, int numrep, int type, int *out, int outpos, @@ -434,7 +441,7 @@ static int crush_choose_firstn(const struct crush_map *map, int rep; unsigned int ftotal, flocal; int retry_descent, retry_bucket, skip_rep; - struct crush_bucket *in = bucket; + const struct crush_bucket *in = bucket; int r; int i; int item = 0; @@ -473,9 +480,13 @@ static int crush_choose_firstn(const struct crush_map *map, if (local_fallback_retries > 0 && flocal >= (in->size>>1) && flocal > local_fallback_retries) - item = bucket_perm_choose(in, x, r); + item = bucket_perm_choose( + in, work->work[-1-in->id], + x, r); else - item = crush_bucket_choose(in, x, r); + item = crush_bucket_choose( + in, work->work[-1-in->id], + x, r); if (item >= map->max_devices) { dprintk(" bad item %d\n", item); skip_rep = 1; @@ -518,19 +529,21 @@ static int crush_choose_firstn(const struct crush_map *map, sub_r = r >> (vary_r-1); else sub_r = 0; - if (crush_choose_firstn(map, - map->buckets[-1-item], - weight, weight_max, - x, stable ? 1 : outpos+1, 0, - out2, outpos, count, - recurse_tries, 0, - local_retries, - local_fallback_retries, - 0, - vary_r, - stable, - NULL, - sub_r) <= outpos) + if (crush_choose_firstn( + map, + work, + map->buckets[-1-item], + weight, weight_max, + x, stable ? 1 : outpos+1, 0, + out2, outpos, count, + recurse_tries, 0, + local_retries, + local_fallback_retries, + 0, + vary_r, + stable, + NULL, + sub_r) <= outpos) /* didn't get leaf */ reject = 1; } else { @@ -539,14 +552,12 @@ static int crush_choose_firstn(const struct crush_map *map, } } - if (!reject) { + if (!reject && !collide) { /* out? */ if (itemtype == 0) reject = is_out(map, weight, weight_max, item, x); - else - reject = 0; } reject: @@ -600,7 +611,8 @@ reject: * */ static void crush_choose_indep(const struct crush_map *map, - struct crush_bucket *bucket, + struct crush_work *work, + const struct crush_bucket *bucket, const __u32 *weight, int weight_max, int x, int left, int numrep, int type, int *out, int outpos, @@ -610,7 +622,7 @@ static void crush_choose_indep(const struct crush_map *map, int *out2, int parent_r) { - struct crush_bucket *in = bucket; + const struct crush_bucket *in = bucket; int endpos = outpos + left; int rep; unsigned int ftotal; @@ -678,7 +690,9 @@ static void crush_choose_indep(const struct crush_map *map, break; } - item = crush_bucket_choose(in, x, r); + item = crush_bucket_choose( + in, work->work[-1-in->id], + x, r); if (item >= map->max_devices) { dprintk(" bad item %d\n", item); out[rep] = CRUSH_ITEM_NONE; @@ -724,13 +738,15 @@ static void crush_choose_indep(const struct crush_map *map, if (recurse_to_leaf) { if (item < 0) { - crush_choose_indep(map, - map->buckets[-1-item], - weight, weight_max, - x, 1, numrep, 0, - out2, rep, - recurse_tries, 0, - 0, NULL, r); + crush_choose_indep( + map, + work, + map->buckets[-1-item], + weight, weight_max, + x, 1, numrep, 0, + out2, rep, + recurse_tries, 0, + 0, NULL, r); if (out2[rep] == CRUSH_ITEM_NONE) { /* placed nothing; no leaf */ break; @@ -781,6 +797,53 @@ static void crush_choose_indep(const struct crush_map *map, #endif } + +/* + * This takes a chunk of memory and sets it up to be a shiny new + * working area for a CRUSH placement computation. It must be called + * on any newly allocated memory before passing it in to + * crush_do_rule. It may be used repeatedly after that, so long as the + * map has not changed. If the map /has/ changed, you must make sure + * the working size is no smaller than what was allocated and re-run + * crush_init_workspace. + * + * If you do retain the working space between calls to crush, make it + * thread-local. + */ +void crush_init_workspace(const struct crush_map *map, void *v) +{ + struct crush_work *w = v; + __s32 b; + + /* + * We work by moving through the available space and setting + * values and pointers as we go. + * + * It's a bit like Forth's use of the 'allot' word since we + * set the pointer first and then reserve the space for it to + * point to by incrementing the point. + */ + v += sizeof(struct crush_work *); + w->work = v; + v += map->max_buckets * sizeof(struct crush_work_bucket *); + for (b = 0; b < map->max_buckets; ++b) { + if (!map->buckets[b]) + continue; + + w->work[b] = v; + switch (map->buckets[b]->alg) { + default: + v += sizeof(struct crush_work_bucket); + break; + } + w->work[b]->perm_x = 0; + w->work[b]->perm_n = 0; + w->work[b]->perm = v; + v += map->buckets[b]->size * sizeof(__u32); + } + BUG_ON(v - (void *)w != map->working_size); +} + /** * crush_do_rule - calculate a mapping with the given input and rule * @map: the crush_map @@ -790,24 +853,25 @@ static void crush_choose_indep(const struct crush_map *map, * @result_max: maximum result size * @weight: weight vector (for map leaves) * @weight_max: size of weight vector - * @scratch: scratch vector for private use; must be >= 3 * result_max + * @cwin: pointer to at least crush_work_size() bytes of memory */ int crush_do_rule(const struct crush_map *map, int ruleno, int x, int *result, int result_max, const __u32 *weight, int weight_max, - int *scratch) + void *cwin) { int result_len; - int *a = scratch; - int *b = scratch + result_max; - int *c = scratch + result_max*2; + struct crush_work *cw = cwin; + int *a = cwin + map->working_size; + int *b = a + result_max; + int *c = b + result_max; + int *w = a; + int *o = b; int recurse_to_leaf; - int *w; int wsize = 0; - int *o; int osize; int *tmp; - struct crush_rule *rule; + const struct crush_rule *rule; __u32 step; int i, j; int numrep; @@ -835,12 +899,10 @@ int crush_do_rule(const struct crush_map *map, rule = map->rules[ruleno]; result_len = 0; - w = a; - o = b; for (step = 0; step < rule->len; step++) { int firstn = 0; - struct crush_rule_step *curstep = &rule->steps[step]; + const struct crush_rule_step *curstep = &rule->steps[step]; switch (curstep->op) { case CRUSH_RULE_TAKE: @@ -936,6 +998,7 @@ int crush_do_rule(const struct crush_map *map, recurse_tries = choose_tries; osize += crush_choose_firstn( map, + cw, map->buckets[bno], weight, weight_max, x, numrep, @@ -956,6 +1019,7 @@ int crush_do_rule(const struct crush_map *map, numrep : (result_max-osize)); crush_choose_indep( map, + cw, map->buckets[bno], weight, weight_max, x, out_size, numrep, @@ -997,5 +1061,6 @@ int crush_do_rule(const struct crush_map *map, break; } } + return result_len; } diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c index 292e33bd916e..46008d5ac504 100644 --- a/net/ceph/crypto.c +++ b/net/ceph/crypto.c @@ -3,10 +3,12 @@ #include <linux/err.h> #include <linux/scatterlist.h> +#include <linux/sched.h> #include <linux/slab.h> #include <crypto/aes.h> #include <crypto/skcipher.h> #include <linux/key-type.h> +#include <linux/sched/mm.h> #include <keys/ceph-type.h> #include <keys/user-type.h> diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index bad3d4ae43f6..38dcf1eb427d 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -520,7 +520,8 @@ static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len) struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL }; int r; - r = kernel_recvmsg(sock, &msg, &iov, 1, len, msg.msg_flags); + iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, len); + r = sock_recvmsg(sock, &msg, msg.msg_flags); if (r == -EAGAIN) r = 0; return r; @@ -529,17 +530,20 @@ static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len) static int ceph_tcp_recvpage(struct socket *sock, struct page *page, int page_offset, size_t length) { - void *kaddr; - int ret; + struct bio_vec bvec = { + .bv_page = page, + .bv_offset = page_offset, + .bv_len = length + }; + struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL }; + int r; BUG_ON(page_offset + length > PAGE_SIZE); - - kaddr = kmap(page); - BUG_ON(!kaddr); - ret = ceph_tcp_recvmsg(sock, kaddr + page_offset, length); - kunmap(page); - - return ret; + iov_iter_bvec(&msg.msg_iter, READ | ITER_BVEC, &bvec, 1, length); + r = sock_recvmsg(sock, &msg, msg.msg_flags); + if (r == -EAGAIN) + r = 0; + return r; } /* @@ -579,18 +583,28 @@ static int __ceph_tcp_sendpage(struct socket *sock, struct page *page, static int ceph_tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, bool more) { + struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL }; + struct bio_vec bvec; int ret; - struct kvec iov; /* sendpage cannot properly handle pages with page_count == 0, * we need to fallback to sendmsg if that's the case */ if (page_count(page) >= 1) return __ceph_tcp_sendpage(sock, page, offset, size, more); - iov.iov_base = kmap(page) + offset; - iov.iov_len = size; - ret = ceph_tcp_sendmsg(sock, &iov, 1, size, more); - kunmap(page); + bvec.bv_page = page; + bvec.bv_offset = offset; + bvec.bv_len = size; + + if (more) + msg.msg_flags |= MSG_MORE; + else + msg.msg_flags |= MSG_EOR; /* superfluous, but what the hell */ + + iov_iter_bvec(&msg.msg_iter, WRITE | ITER_BVEC, &bvec, 1, size); + ret = sock_sendmsg(sock, &msg); + if (ret == -EAGAIN) + ret = 0; return ret; } diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index f3378ba1a828..b65bbf9f45eb 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -460,7 +460,6 @@ static void request_init(struct ceph_osd_request *req) kref_init(&req->r_kref); init_completion(&req->r_completion); - init_completion(&req->r_done_completion); RB_CLEAR_NODE(&req->r_node); RB_CLEAR_NODE(&req->r_mc_node); INIT_LIST_HEAD(&req->r_unsafe_item); @@ -672,7 +671,8 @@ void osd_req_op_extent_update(struct ceph_osd_request *osd_req, BUG_ON(length > previous); op->extent.length = length; - op->indata_len -= previous - length; + if (op->op == CEPH_OSD_OP_WRITE || op->op == CEPH_OSD_OP_WRITEFULL) + op->indata_len -= previous - length; } EXPORT_SYMBOL(osd_req_op_extent_update); @@ -1636,7 +1636,7 @@ static void __submit_request(struct ceph_osd_request *req, bool wrlocked) bool need_send = false; bool promoted = false; - WARN_ON(req->r_tid || req->r_got_reply); + WARN_ON(req->r_tid); dout("%s req %p wrlocked %d\n", __func__, req, wrlocked); again: @@ -1704,17 +1704,10 @@ promote: static void account_request(struct ceph_osd_request *req) { - unsigned int mask = CEPH_OSD_FLAG_ACK | CEPH_OSD_FLAG_ONDISK; + WARN_ON(req->r_flags & (CEPH_OSD_FLAG_ACK | CEPH_OSD_FLAG_ONDISK)); + WARN_ON(!(req->r_flags & (CEPH_OSD_FLAG_READ | CEPH_OSD_FLAG_WRITE))); - if (req->r_flags & CEPH_OSD_FLAG_READ) { - WARN_ON(req->r_flags & mask); - req->r_flags |= CEPH_OSD_FLAG_ACK; - } else if (req->r_flags & CEPH_OSD_FLAG_WRITE) - WARN_ON(!(req->r_flags & mask)); - else - WARN_ON(1); - - WARN_ON(req->r_unsafe_callback && (req->r_flags & mask) != mask); + req->r_flags |= CEPH_OSD_FLAG_ONDISK; atomic_inc(&req->r_osdc->num_requests); } @@ -1749,15 +1742,15 @@ static void finish_request(struct ceph_osd_request *req) static void __complete_request(struct ceph_osd_request *req) { - if (req->r_callback) + if (req->r_callback) { + dout("%s req %p tid %llu cb %pf result %d\n", __func__, req, + req->r_tid, req->r_callback, req->r_result); req->r_callback(req); - else - complete_all(&req->r_completion); + } } /* - * Note that this is open-coded in handle_reply(), which has to deal - * with ack vs commit, dup acks, etc. + * This is open-coded in handle_reply(). */ static void complete_request(struct ceph_osd_request *req, int err) { @@ -1766,7 +1759,7 @@ static void complete_request(struct ceph_osd_request *req, int err) req->r_result = err; finish_request(req); __complete_request(req); - complete_all(&req->r_done_completion); + complete_all(&req->r_completion); ceph_osdc_put_request(req); } @@ -1792,7 +1785,7 @@ static void cancel_request(struct ceph_osd_request *req) cancel_map_check(req); finish_request(req); - complete_all(&req->r_done_completion); + complete_all(&req->r_completion); ceph_osdc_put_request(req); } @@ -2169,7 +2162,6 @@ static void linger_commit_cb(struct ceph_osd_request *req) mutex_lock(&lreq->lock); dout("%s lreq %p linger_id %llu result %d\n", __func__, lreq, lreq->linger_id, req->r_result); - WARN_ON(!__linger_registered(lreq)); linger_reg_commit_complete(lreq, req->r_result); lreq->committed = true; @@ -2785,31 +2777,8 @@ e_inval: } /* - * We are done with @req if - * - @m is a safe reply, or - * - @m is an unsafe reply and we didn't want a safe one - */ -static bool done_request(const struct ceph_osd_request *req, - const struct MOSDOpReply *m) -{ - return (m->result < 0 || - (m->flags & CEPH_OSD_FLAG_ONDISK) || - !(req->r_flags & CEPH_OSD_FLAG_ONDISK)); -} - -/* - * handle osd op reply. either call the callback if it is specified, - * or do the completion to wake up the waiting thread. - * - * ->r_unsafe_callback is set? yes no - * - * first reply is OK (needed r_cb/r_completion, r_cb/r_completion, - * any or needed/got safe) r_done_completion r_done_completion - * - * first reply is unsafe r_unsafe_cb(true) (nothing) - * - * when we get the safe reply r_unsafe_cb(false), r_cb/r_completion, - * r_done_completion r_done_completion + * Handle MOSDOpReply. Set ->r_result and call the callback if it is + * specified. */ static void handle_reply(struct ceph_osd *osd, struct ceph_msg *msg) { @@ -2818,7 +2787,6 @@ static void handle_reply(struct ceph_osd *osd, struct ceph_msg *msg) struct MOSDOpReply m; u64 tid = le64_to_cpu(msg->hdr.tid); u32 data_len = 0; - bool already_acked; int ret; int i; @@ -2897,50 +2865,22 @@ static void handle_reply(struct ceph_osd *osd, struct ceph_msg *msg) le32_to_cpu(msg->hdr.data_len), req->r_tid); goto fail_request; } - dout("%s req %p tid %llu acked %d result %d data_len %u\n", __func__, - req, req->r_tid, req->r_got_reply, m.result, data_len); - - already_acked = req->r_got_reply; - if (!already_acked) { - req->r_result = m.result ?: data_len; - req->r_replay_version = m.replay_version; /* struct */ - req->r_got_reply = true; - } else if (!(m.flags & CEPH_OSD_FLAG_ONDISK)) { - dout("req %p tid %llu dup ack\n", req, req->r_tid); - goto out_unlock_session; - } - - if (done_request(req, &m)) { - finish_request(req); - if (req->r_linger) { - WARN_ON(req->r_unsafe_callback); - dout("req %p tid %llu cb (locked)\n", req, req->r_tid); - __complete_request(req); - } - } + dout("%s req %p tid %llu result %d data_len %u\n", __func__, + req, req->r_tid, m.result, data_len); + /* + * Since we only ever request ONDISK, we should only ever get + * one (type of) reply back. + */ + WARN_ON(!(m.flags & CEPH_OSD_FLAG_ONDISK)); + req->r_result = m.result ?: data_len; + finish_request(req); mutex_unlock(&osd->lock); up_read(&osdc->lock); - if (done_request(req, &m)) { - if (already_acked && req->r_unsafe_callback) { - dout("req %p tid %llu safe-cb\n", req, req->r_tid); - req->r_unsafe_callback(req, false); - } else if (!req->r_linger) { - dout("req %p tid %llu cb\n", req, req->r_tid); - __complete_request(req); - } - complete_all(&req->r_done_completion); - ceph_osdc_put_request(req); - } else { - if (req->r_unsafe_callback) { - dout("req %p tid %llu unsafe-cb\n", req, req->r_tid); - req->r_unsafe_callback(req, true); - } else { - WARN_ON(1); - } - } - + __complete_request(req); + complete_all(&req->r_completion); + ceph_osdc_put_request(req); return; fail_request: @@ -3540,7 +3480,7 @@ again: up_read(&osdc->lock); dout("%s waiting on req %p tid %llu last_tid %llu\n", __func__, req, req->r_tid, last_tid); - wait_for_completion(&req->r_done_completion); + wait_for_completion(&req->r_completion); ceph_osdc_put_request(req); goto again; } @@ -3599,7 +3539,7 @@ ceph_osdc_watch(struct ceph_osd_client *osdc, ceph_oid_copy(&lreq->t.base_oid, oid); ceph_oloc_copy(&lreq->t.base_oloc, oloc); - lreq->t.flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK; + lreq->t.flags = CEPH_OSD_FLAG_WRITE; lreq->mtime = CURRENT_TIME; lreq->reg_req = alloc_linger_request(lreq); @@ -3657,7 +3597,7 @@ int ceph_osdc_unwatch(struct ceph_osd_client *osdc, ceph_oid_copy(&req->r_base_oid, &lreq->t.base_oid); ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc); - req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK; + req->r_flags = CEPH_OSD_FLAG_WRITE; req->r_mtime = CURRENT_TIME; osd_req_op_watch_init(req, 0, lreq->linger_id, CEPH_OSD_WATCH_OP_UNWATCH); @@ -4022,7 +3962,7 @@ EXPORT_SYMBOL(ceph_osdc_maybe_request_map); * Execute an OSD class method on an object. * * @flags: CEPH_OSD_FLAG_* - * @resp_len: out param for reply length + * @resp_len: in/out param for reply length */ int ceph_osdc_call(struct ceph_osd_client *osdc, struct ceph_object_id *oid, @@ -4035,6 +3975,9 @@ int ceph_osdc_call(struct ceph_osd_client *osdc, struct ceph_osd_request *req; int ret; + if (req_len > PAGE_SIZE || (resp_page && *resp_len > PAGE_SIZE)) + return -E2BIG; + req = ceph_osdc_alloc_request(osdc, NULL, 1, false, GFP_NOIO); if (!req) return -ENOMEM; @@ -4053,7 +3996,7 @@ int ceph_osdc_call(struct ceph_osd_client *osdc, 0, false, false); if (resp_page) osd_req_op_cls_response_data_pages(req, 0, &resp_page, - PAGE_SIZE, 0, false, false); + *resp_len, 0, false, false); ceph_osdc_start_request(osdc, req, false); ret = ceph_osdc_wait_request(osdc, req); @@ -4220,8 +4163,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, int page_align = off & ~PAGE_MASK; req = ceph_osdc_new_request(osdc, layout, vino, off, &len, 0, 1, - CEPH_OSD_OP_WRITE, - CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE, + CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE, snapc, truncate_seq, truncate_size, true); if (IS_ERR(req)) diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index d2436880b305..6824c0ec8373 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -153,6 +153,32 @@ bad: return -EINVAL; } +static void crush_finalize(struct crush_map *c) +{ + __s32 b; + + /* Space for the array of pointers to per-bucket workspace */ + c->working_size = sizeof(struct crush_work) + + c->max_buckets * sizeof(struct crush_work_bucket *); + + for (b = 0; b < c->max_buckets; b++) { + if (!c->buckets[b]) + continue; + + switch (c->buckets[b]->alg) { + default: + /* + * The base case, permutation variables and + * the pointer to the permutation array. + */ + c->working_size += sizeof(struct crush_work_bucket); + break; + } + /* Every bucket has a permutation array. */ + c->working_size += c->buckets[b]->size * sizeof(__u32); + } +} + static struct crush_map *crush_decode(void *pbyval, void *end) { struct crush_map *c; @@ -246,10 +272,6 @@ static struct crush_map *crush_decode(void *pbyval, void *end) b->items = kcalloc(b->size, sizeof(__s32), GFP_NOFS); if (b->items == NULL) goto badmem; - b->perm = kcalloc(b->size, sizeof(u32), GFP_NOFS); - if (b->perm == NULL) - goto badmem; - b->perm_n = 0; ceph_decode_need(p, end, b->size*sizeof(u32), bad); for (j = 0; j < b->size; j++) @@ -368,6 +390,8 @@ static struct crush_map *crush_decode(void *pbyval, void *end) dout("crush decode tunable chooseleaf_stable = %d\n", c->chooseleaf_stable); + crush_finalize(c); + done: dout("crush_decode success\n"); return c; @@ -719,7 +743,7 @@ struct ceph_osdmap *ceph_osdmap_alloc(void) map->pool_max = -1; map->pg_temp = RB_ROOT; map->primary_temp = RB_ROOT; - mutex_init(&map->crush_scratch_mutex); + mutex_init(&map->crush_workspace_mutex); return map; } @@ -753,6 +777,7 @@ void ceph_osdmap_destroy(struct ceph_osdmap *map) kfree(map->osd_weight); kfree(map->osd_addr); kfree(map->osd_primary_affinity); + kfree(map->crush_workspace); kfree(map); } @@ -808,6 +833,31 @@ static int osdmap_set_max_osd(struct ceph_osdmap *map, int max) return 0; } +static int osdmap_set_crush(struct ceph_osdmap *map, struct crush_map *crush) +{ + void *workspace; + size_t work_size; + + if (IS_ERR(crush)) + return PTR_ERR(crush); + + work_size = crush_work_size(crush, CEPH_PG_MAX_SIZE); + dout("%s work_size %zu bytes\n", __func__, work_size); + workspace = kmalloc(work_size, GFP_NOIO); + if (!workspace) { + crush_destroy(crush); + return -ENOMEM; + } + crush_init_workspace(crush, workspace); + + if (map->crush) + crush_destroy(map->crush); + kfree(map->crush_workspace); + map->crush = crush; + map->crush_workspace = workspace; + return 0; +} + #define OSDMAP_WRAPPER_COMPAT_VER 7 #define OSDMAP_CLIENT_DATA_COMPAT_VER 1 @@ -1214,13 +1264,9 @@ static int osdmap_decode(void **p, void *end, struct ceph_osdmap *map) /* crush */ ceph_decode_32_safe(p, end, len, e_inval); - map->crush = crush_decode(*p, min(*p + len, end)); - if (IS_ERR(map->crush)) { - err = PTR_ERR(map->crush); - map->crush = NULL; + err = osdmap_set_crush(map, crush_decode(*p, min(*p + len, end))); + if (err) goto bad; - } - *p += len; /* ignore the rest */ *p = end; @@ -1375,7 +1421,6 @@ e_inval: struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, struct ceph_osdmap *map) { - struct crush_map *newcrush = NULL; struct ceph_fsid fsid; u32 epoch = 0; struct ceph_timespec modified; @@ -1414,12 +1459,10 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, /* new crush? */ ceph_decode_32_safe(p, end, len, e_inval); if (len > 0) { - newcrush = crush_decode(*p, min(*p+len, end)); - if (IS_ERR(newcrush)) { - err = PTR_ERR(newcrush); - newcrush = NULL; + err = osdmap_set_crush(map, + crush_decode(*p, min(*p + len, end))); + if (err) goto bad; - } *p += len; } @@ -1439,12 +1482,6 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, map->epoch++; map->modified = modified; - if (newcrush) { - if (map->crush) - crush_destroy(map->crush); - map->crush = newcrush; - newcrush = NULL; - } /* new_pools */ err = decode_new_pools(p, end, map); @@ -1505,8 +1542,6 @@ bad: print_hex_dump(KERN_DEBUG, "osdmap: ", DUMP_PREFIX_OFFSET, 16, 1, start, end - start, true); - if (newcrush) - crush_destroy(newcrush); return ERR_PTR(err); } @@ -1942,10 +1977,10 @@ static int do_crush(struct ceph_osdmap *map, int ruleno, int x, BUG_ON(result_max > CEPH_PG_MAX_SIZE); - mutex_lock(&map->crush_scratch_mutex); + mutex_lock(&map->crush_workspace_mutex); r = crush_do_rule(map->crush, ruleno, x, result, result_max, - weight, weight_max, map->crush_scratch_ary); - mutex_unlock(&map->crush_scratch_mutex); + weight, weight_max, map->crush_workspace); + mutex_unlock(&map->crush_workspace_mutex); return r; } @@ -1978,8 +2013,14 @@ static void pg_to_raw_osds(struct ceph_osdmap *osdmap, return; } - len = do_crush(osdmap, ruleno, pps, raw->osds, - min_t(int, pi->size, ARRAY_SIZE(raw->osds)), + if (pi->size > ARRAY_SIZE(raw->osds)) { + pr_err_ratelimited("pool %lld ruleset %d type %d too wide: size %d > %zu\n", + pi->id, pi->crush_ruleset, pi->type, pi->size, + ARRAY_SIZE(raw->osds)); + return; + } + + len = do_crush(osdmap, ruleno, pps, raw->osds, pi->size, osdmap->osd_weight, osdmap->max_osd); if (len < 0) { pr_err("error %d from crush rule %d: pool %lld ruleset %d type %d size %d\n", diff --git a/net/ceph/snapshot.c b/net/ceph/snapshot.c index 154683f5f14c..705414e78ae0 100644 --- a/net/ceph/snapshot.c +++ b/net/ceph/snapshot.c @@ -18,8 +18,6 @@ * 02110-1301, USA. */ -#include <stddef.h> - #include <linux/types.h> #include <linux/export.h> #include <linux/ceph/libceph.h> diff --git a/net/core/ethtool.c b/net/core/ethtool.c index be7bab1adcde..aecb2c7241b6 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -24,7 +24,7 @@ #include <linux/vmalloc.h> #include <linux/slab.h> #include <linux/rtnetlink.h> -#include <linux/sched.h> +#include <linux/sched/signal.h> #include <linux/net.h> /* diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index b0c04cf4851d..3945821e9c1f 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -15,6 +15,7 @@ #include <net/switchdev.h> #include <linux/if_arp.h> #include <linux/slab.h> +#include <linux/sched/signal.h> #include <linux/nsproxy.h> #include <net/sock.h> #include <net/net_namespace.h> diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 3c4bbec39713..652468ff65b7 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -16,6 +16,8 @@ #include <linux/export.h> #include <linux/user_namespace.h> #include <linux/net_namespace.h> +#include <linux/sched/task.h> + #include <net/sock.h> #include <net/netlink.h> #include <net/net_namespace.h> diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c index 11fce17274f6..6ae56037bb13 100644 --- a/net/core/netclassid_cgroup.c +++ b/net/core/netclassid_cgroup.c @@ -12,6 +12,8 @@ #include <linux/slab.h> #include <linux/cgroup.h> #include <linux/fdtable.h> +#include <linux/sched/task.h> + #include <net/cls_cgroup.h> #include <net/sock.h> diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 756637dc7a57..0f9275ee5595 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -20,6 +20,8 @@ #include <linux/cgroup.h> #include <linux/rcupdate.h> #include <linux/atomic.h> +#include <linux/sched/task.h> + #include <net/rtnetlink.h> #include <net/pkt_cls.h> #include <net/sock.h> diff --git a/net/core/scm.c b/net/core/scm.c index b6d83686e149..b1ff8a441748 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -14,6 +14,7 @@ #include <linux/capability.h> #include <linux/errno.h> #include <linux/sched.h> +#include <linux/sched/user.h> #include <linux/mm.h> #include <linux/kernel.h> #include <linux/stat.h> diff --git a/net/core/stream.c b/net/core/stream.c index f575bcf64af2..20231dbb1da0 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -13,6 +13,7 @@ */ #include <linux/module.h> +#include <linux/sched/signal.h> #include <linux/net.h> #include <linux/signal.h> #include <linux/tcp.h> diff --git a/net/dccp/output.c b/net/dccp/output.c index b66c84db0766..91a15b3c4915 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -14,6 +14,7 @@ #include <linux/kernel.h> #include <linux/skbuff.h> #include <linux/slab.h> +#include <linux/sched/signal.h> #include <net/inet_sock.h> #include <net/sock.h> diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index a90ed67027b0..e6e79eda9763 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -106,7 +106,7 @@ Version 0.0.6 2.1.110 07-aug-98 Eduardo Marcelo Serrat #include <linux/socket.h> #include <linux/in.h> #include <linux/kernel.h> -#include <linux/sched.h> +#include <linux/sched/signal.h> #include <linux/timer.h> #include <linux/string.h> #include <linux/sockios.h> diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c index ecc28cff08ab..af781010753b 100644 --- a/net/dns_resolver/dns_query.c +++ b/net/dns_resolver/dns_query.c @@ -37,8 +37,10 @@ #include <linux/module.h> #include <linux/slab.h> +#include <linux/cred.h> #include <linux/dns_resolver.h> #include <linux/err.h> + #include <keys/dns_resolver-type.h> #include <keys/user-type.h> @@ -70,7 +72,7 @@ int dns_query(const char *type, const char *name, size_t namelen, const char *options, char **_result, time64_t *_expiry) { struct key *rkey; - const struct user_key_payload *upayload; + struct user_key_payload *upayload; const struct cred *saved_cred; size_t typelen, desclen; char *desc, *cp; @@ -141,7 +143,7 @@ int dns_query(const char *type, const char *name, size_t namelen, if (ret) goto put; - upayload = user_key_payload(rkey); + upayload = user_key_payload_locked(rkey); len = upayload->datalen; ret = -ENOMEM; diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index e0bd013a1e5e..eedba7670b51 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -279,7 +279,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) pr_debug("name = %s, mtu = %u\n", dev->name, mtu); if (size > mtu) { - pr_debug("size = %Zu, mtu = %u\n", size, mtu); + pr_debug("size = %zu, mtu = %u\n", size, mtu); err = -EMSGSIZE; goto out_dev; } @@ -645,7 +645,7 @@ static int dgram_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) pr_debug("name = %s, mtu = %u\n", dev->name, mtu); if (size > mtu) { - pr_debug("size = %Zu, mtu = %u\n", size, mtu); + pr_debug("size = %zu, mtu = %u\n", size, mtu); err = -EMSGSIZE; goto out_dev; } diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 5d367b7ff542..cebedd545e5e 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -32,6 +32,7 @@ #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> +#include <linux/sched/signal.h> #include <linux/string.h> #include <linux/mm.h> #include <linux/socket.h> diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 7db2ad2e82d3..b39a791f6756 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -319,7 +319,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, int ret, no_addr; struct fib_result res; struct flowi4 fl4; - struct net *net; + struct net *net = dev_net(dev); bool dev_match; fl4.flowi4_oif = 0; @@ -332,6 +332,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, fl4.flowi4_scope = RT_SCOPE_UNIVERSE; fl4.flowi4_tun_key.tun_id = 0; fl4.flowi4_flags = 0; + fl4.flowi4_uid = sock_net_uid(net, NULL); no_addr = idev->ifa_list == NULL; @@ -339,13 +340,12 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, trace_fib_validate_source(dev, &fl4); - net = dev_net(dev); if (fib_lookup(net, &fl4, &res, 0)) goto last_resort; if (res.type != RTN_UNICAST && (res.type != RTN_LOCAL || !IN_DEV_ACCEPT_LOCAL(idev))) goto e_inval; - if (!rpf && !fib_num_tclassid_users(dev_net(dev)) && + if (!rpf && !fib_num_tclassid_users(net) && (dev->ifindex != oif || !IN_DEV_TX_REDIRECTS(idev))) goto last_resort; fib_combine_itag(itag, &res); diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index d8cea210af0e..2f0d8233950f 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -2388,7 +2388,7 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "Basic info: size of leaf:" - " %Zd bytes, size of tnode: %Zd bytes.\n", + " %zd bytes, size of tnode: %zd bytes.\n", LEAF_SIZE, TNODE_SIZE(0)); for (h = 0; h < FIB_TABLE_HASHSZ; h++) { diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index beacd028848c..c0317c940bcd 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2596,7 +2596,7 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v) const char *name = vif->dev ? vif->dev->name : "none"; seq_printf(seq, - "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n", + "%2zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n", vif - mrt->vif_table, name, vif->bytes_in, vif->pkt_in, vif->bytes_out, vif->pkt_out, diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index fcfd071f4705..bc1486f2c064 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -235,7 +235,7 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len) } if ((unsigned int) *len < sizeof(struct sockaddr_in)) { - pr_debug("SO_ORIGINAL_DST: len %d not %Zu\n", + pr_debug("SO_ORIGINAL_DST: len %d not %zu\n", *len, sizeof(struct sockaddr_in)); return -EINVAL; } diff --git a/net/ipv4/netfilter/nf_log_arp.c b/net/ipv4/netfilter/nf_log_arp.c index f6f713376e6e..2f3895ddc275 100644 --- a/net/ipv4/netfilter/nf_log_arp.c +++ b/net/ipv4/netfilter/nf_log_arp.c @@ -69,7 +69,7 @@ static void dump_arp_packet(struct nf_log_buf *m, ap = skb_header_pointer(skb, sizeof(_arph), sizeof(_arpp), &_arpp); if (ap == NULL) { - nf_log_buf_add(m, " INCOMPLETE [%Zu bytes]", + nf_log_buf_add(m, " INCOMPLETE [%zu bytes]", skb->len - sizeof(_arph)); return; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index cb494a5050f7..8471dd116771 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1876,6 +1876,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, fl4.flowi4_flags = 0; fl4.daddr = daddr; fl4.saddr = saddr; + fl4.flowi4_uid = sock_net_uid(net, NULL); err = fib_lookup(net, &fl4, &res, 0); if (err != 0) { if (!IN_DEV_FORWARD(in_dev)) @@ -2008,6 +2009,7 @@ int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr, { int res; + tos &= IPTOS_RT_MASK; rcu_read_lock(); /* Multicast recognition logic is moved from route cache to here. diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c index 35b280361cb2..50a0f3e51d5b 100644 --- a/net/ipv4/tcp_cdg.c +++ b/net/ipv4/tcp_cdg.c @@ -27,6 +27,8 @@ #include <linux/kernel.h> #include <linux/random.h> #include <linux/module.h> +#include <linux/sched/clock.h> + #include <net/tcp.h> #define HYSTART_ACK_TRAIN 1 diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 3a2025f5bf2c..77362b88a661 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -43,6 +43,7 @@ #include <linux/errno.h> #include <linux/types.h> #include <linux/kernel.h> +#include <linux/sched/signal.h> #include <linux/socket.h> #include <linux/sockios.h> #include <linux/net.h> diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index c795fee372c4..644ba59fbd9d 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -693,6 +693,10 @@ vti6_parm_to_user(struct ip6_tnl_parm2 *u, const struct __ip6_tnl_parm *p) u->link = p->link; u->i_key = p->i_key; u->o_key = p->o_key; + if (u->i_key) + u->i_flags |= GRE_KEY; + if (u->o_key) + u->o_flags |= GRE_KEY; u->proto = p->proto; memcpy(u->name, p->name, sizeof(u->name)); diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index babaf3ec2742..6ba6c900ebcf 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1666,6 +1666,10 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns struct net *net = sock_net(sk); struct mr6_table *mrt; + if (sk->sk_type != SOCK_RAW || + inet_sk(sk)->inet_num != IPPROTO_ICMPV6) + return -EOPNOTSUPP; + mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); if (!mrt) return -ENOENT; @@ -1677,9 +1681,6 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns switch (optname) { case MRT6_INIT: - if (sk->sk_type != SOCK_RAW || - inet_sk(sk)->inet_num != IPPROTO_ICMPV6) - return -EOPNOTSUPP; if (optlen < sizeof(int)) return -EINVAL; @@ -1815,6 +1816,10 @@ int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, struct net *net = sock_net(sk); struct mr6_table *mrt; + if (sk->sk_type != SOCK_RAW || + inet_sk(sk)->inet_num != IPPROTO_ICMPV6) + return -EOPNOTSUPP; + mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); if (!mrt) return -ENOENT; diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c index 055c51b80f5d..97c724224da7 100644 --- a/net/ipv6/netfilter/nf_log_ipv6.c +++ b/net/ipv6/netfilter/nf_log_ipv6.c @@ -64,7 +64,7 @@ static void dump_ipv6_packet(struct nf_log_buf *m, nf_log_buf_add(m, "SRC=%pI6 DST=%pI6 ", &ih->saddr, &ih->daddr); /* Max length: 44 "LEN=65535 TC=255 HOPLIMIT=255 FLOWLBL=FFFFF " */ - nf_log_buf_add(m, "LEN=%Zu TC=%u HOPLIMIT=%u FLOWLBL=%u ", + nf_log_buf_add(m, "LEN=%zu TC=%u HOPLIMIT=%u FLOWLBL=%u ", ntohs(ih->payload_len) + sizeof(struct ipv6hdr), (ntohl(*(__be32 *)ih) & 0x0ff00000) >> 20, ih->hop_limit, diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index ab254041dab7..81adc29a448d 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -46,6 +46,7 @@ #include <linux/socket.h> #include <linux/sockios.h> #include <linux/slab.h> +#include <linux/sched/signal.h> #include <linux/init.h> #include <linux/net.h> #include <linux/irda.h> diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c index 817b1b186aff..f6061c4bb0a8 100644 --- a/net/irda/ircomm/ircomm_tty.c +++ b/net/irda/ircomm/ircomm_tty.c @@ -32,7 +32,7 @@ #include <linux/module.h> #include <linux/fs.h> #include <linux/slab.h> -#include <linux/sched.h> +#include <linux/sched/signal.h> #include <linux/seq_file.h> #include <linux/termios.h> #include <linux/tty.h> diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c index 1215693fdd22..7025dcb853d0 100644 --- a/net/irda/irnet/irnet_ppp.c +++ b/net/irda/irnet/irnet_ppp.c @@ -13,8 +13,9 @@ * 2) as a control channel (write commands, read events) */ -#include <linux/sched.h> +#include <linux/sched/signal.h> #include <linux/slab.h> + #include "irnet_ppp.h" /* Private header */ /* Please put other headers in irnet.h - Thanks */ @@ -51,7 +52,7 @@ irnet_ctrl_write(irnet_socket * ap, char * next; /* Next command to process */ int length; /* Length of current command */ - DENTER(CTRL_TRACE, "(ap=0x%p, count=%Zd)\n", ap, count); + DENTER(CTRL_TRACE, "(ap=0x%p, count=%zd)\n", ap, count); /* Check for overflow... */ DABORT(count >= IRNET_MAX_COMMAND, -ENOMEM, @@ -66,7 +67,7 @@ irnet_ctrl_write(irnet_socket * ap, /* Safe terminate the string */ command[count] = '\0'; - DEBUG(CTRL_INFO, "Command line received is ``%s'' (%Zd).\n", + DEBUG(CTRL_INFO, "Command line received is ``%s'' (%zd).\n", command, count); /* Check every commands in the command line */ @@ -285,7 +286,7 @@ irnet_ctrl_read(irnet_socket * ap, char event[75]; ssize_t ret = 0; - DENTER(CTRL_TRACE, "(ap=0x%p, count=%Zd)\n", ap, count); + DENTER(CTRL_TRACE, "(ap=0x%p, count=%zd)\n", ap, count); #ifdef INITIAL_DISCOVERY /* Check if we have read the log */ @@ -328,7 +329,7 @@ irnet_ctrl_read(irnet_socket * ap, if(ret != 0) { /* No, return the error code */ - DEXIT(CTRL_TRACE, " - ret %Zd\n", ret); + DEXIT(CTRL_TRACE, " - ret %zd\n", ret); return ret; } @@ -568,7 +569,7 @@ dev_irnet_write(struct file * file, { irnet_socket * ap = file->private_data; - DPASS(FS_TRACE, "(file=0x%p, ap=0x%p, count=%Zd)\n", + DPASS(FS_TRACE, "(file=0x%p, ap=0x%p, count=%zd)\n", file, ap, count); DABORT(ap == NULL, -ENXIO, FS_ERROR, "ap is NULL !!!\n"); @@ -592,7 +593,7 @@ dev_irnet_read(struct file * file, { irnet_socket * ap = file->private_data; - DPASS(FS_TRACE, "(file=0x%p, ap=0x%p, count=%Zd)\n", + DPASS(FS_TRACE, "(file=0x%p, ap=0x%p, count=%zd)\n", file, ap, count); DABORT(ap == NULL, -ENXIO, FS_ERROR, "ap is NULL !!!\n"); diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 13190b38f22e..89bbde1081ce 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -17,7 +17,7 @@ #include <linux/list.h> #include <linux/errno.h> #include <linux/kernel.h> -#include <linux/sched.h> +#include <linux/sched/signal.h> #include <linux/slab.h> #include <linux/skbuff.h> #include <linux/init.h> diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index a646f3481240..309062f3debe 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -24,6 +24,8 @@ #include <linux/uaccess.h> #include <linux/workqueue.h> #include <linux/syscalls.h> +#include <linux/sched/signal.h> + #include <net/kcm.h> #include <net/netns/generic.h> #include <net/sock.h> diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index b58000efee73..8adab6335ced 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1058,10 +1058,10 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, /* Debug */ if (session->send_seq) - l2tp_dbg(session, L2TP_MSG_DATA, "%s: send %Zd bytes, ns=%u\n", + l2tp_dbg(session, L2TP_MSG_DATA, "%s: send %zd bytes, ns=%u\n", session->name, data_len, session->ns - 1); else - l2tp_dbg(session, L2TP_MSG_DATA, "%s: send %Zd bytes\n", + l2tp_dbg(session, L2TP_MSG_DATA, "%s: send %zd bytes\n", session->name, data_len); if (session->debug & L2TP_MSG_DATA) { diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index c59712057dc8..d25038cfd64e 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -388,7 +388,7 @@ static int l2tp_ip_backlog_recv(struct sock *sk, struct sk_buff *skb) drop: IP_INC_STATS(sock_net(sk), IPSTATS_MIB_INDISCARDS); kfree_skb(skb); - return -1; + return 0; } /* Userspace will call sendmsg() on the tunnel socket to send L2TP diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 5e9296382420..06186d608a27 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -26,6 +26,8 @@ #include <linux/rtnetlink.h> #include <linux/init.h> #include <linux/slab.h> +#include <linux/sched/signal.h> + #include <net/llc.h> #include <net/llc_sap.h> #include <net/llc_pdu.h> diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index c28b0af9c1f2..6e7b6a07b7d5 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -681,7 +681,7 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) 2 + /* NULL SSID */ /* Channel Switch Announcement */ 2 + sizeof(struct ieee80211_channel_sw_ie) + - /* Mesh Channel Swith Parameters */ + /* Mesh Channel Switch Parameters */ 2 + sizeof(struct ieee80211_mesh_chansw_params_ie) + 2 + 8 + /* supported rates */ 2 + 3; /* DS params */ diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index fcba70e57073..953d71e784a9 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -9,6 +9,8 @@ #include <linux/gfp.h> #include <linux/kernel.h> #include <linux/random.h> +#include <linux/rculist.h> + #include "ieee80211_i.h" #include "rate.h" #include "mesh.h" diff --git a/net/mac80211/status.c b/net/mac80211/status.c index a3af6e1bfd98..0dd7c351002d 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -462,9 +462,7 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local, unsigned long flags; spin_lock_irqsave(&local->ack_status_lock, flags); - skb = idr_find(&local->ack_status_frames, info->ack_frame_id); - if (skb) - idr_remove(&local->ack_status_frames, info->ack_frame_id); + skb = idr_remove(&local->ack_status_frames, info->ack_frame_id); spin_unlock_irqrestore(&local->ack_status_lock, flags); if (!skb) diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c index 6a3e1c2181d3..1e1c9b20bab7 100644 --- a/net/mac802154/llsec.c +++ b/net/mac802154/llsec.c @@ -18,6 +18,8 @@ #include <linux/bug.h> #include <linux/completion.h> #include <linux/ieee802154.h> +#include <linux/rculist.h> + #include <crypto/aead.h> #include <crypto/skcipher.h> diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 096a45103f14..e6a2753dff9e 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -1429,7 +1429,7 @@ int __init ip_vs_conn_init(void) "(size=%d, memory=%ldKbytes)\n", ip_vs_conn_tab_size, (long)(ip_vs_conn_tab_size*sizeof(struct list_head))/1024); - IP_VS_DBG(0, "Each connection entry needs %Zd bytes at least\n", + IP_VS_DBG(0, "Each connection entry needs %zd bytes at least\n", sizeof(struct ip_vs_conn)); for (idx = 0; idx < ip_vs_conn_tab_size; idx++) diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c index 6be5c538b71e..75f798f8e83b 100644 --- a/net/netfilter/ipvs/ip_vs_dh.c +++ b/net/netfilter/ipvs/ip_vs_dh.c @@ -163,7 +163,7 @@ static int ip_vs_dh_init_svc(struct ip_vs_service *svc) return -ENOMEM; svc->sched_data = s; - IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) allocated for " + IP_VS_DBG(6, "DH hash table (memory=%zdbytes) allocated for " "current service\n", sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE); @@ -183,7 +183,7 @@ static void ip_vs_dh_done_svc(struct ip_vs_service *svc) /* release the table itself */ kfree_rcu(s, rcu_head); - IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) released\n", + IP_VS_DBG(6, "DH hash table (memory=%zdbytes) released\n", sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE); } diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index cccf4d637412..5824927cf8e0 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -356,7 +356,7 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc) return -ENOMEM; svc->sched_data = tbl; - IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) allocated for " + IP_VS_DBG(6, "LBLC hash table (memory=%zdbytes) allocated for " "current service\n", sizeof(*tbl)); /* @@ -393,7 +393,7 @@ static void ip_vs_lblc_done_svc(struct ip_vs_service *svc) /* release the table itself */ kfree_rcu(tbl, rcu_head); - IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) released\n", + IP_VS_DBG(6, "LBLC hash table (memory=%zdbytes) released\n", sizeof(*tbl)); } diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 796d70e47ddd..703f11877bee 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -519,7 +519,7 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc) return -ENOMEM; svc->sched_data = tbl; - IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) allocated for " + IP_VS_DBG(6, "LBLCR hash table (memory=%zdbytes) allocated for " "current service\n", sizeof(*tbl)); /* @@ -556,7 +556,7 @@ static void ip_vs_lblcr_done_svc(struct ip_vs_service *svc) /* release the table itself */ kfree_rcu(tbl, rcu_head); - IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) released\n", + IP_VS_DBG(6, "LBLCR hash table (memory=%zdbytes) released\n", sizeof(*tbl)); } diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c index 1e373a5e44e3..16aaac6eedc9 100644 --- a/net/netfilter/ipvs/ip_vs_sh.c +++ b/net/netfilter/ipvs/ip_vs_sh.c @@ -239,7 +239,7 @@ static int ip_vs_sh_init_svc(struct ip_vs_service *svc) return -ENOMEM; svc->sched_data = s; - IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) allocated for " + IP_VS_DBG(6, "SH hash table (memory=%zdbytes) allocated for " "current service\n", sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE); @@ -259,7 +259,7 @@ static void ip_vs_sh_done_svc(struct ip_vs_service *svc) /* release the table itself */ kfree_rcu(s, rcu_head); - IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) released\n", + IP_VS_DBG(6, "SH hash table (memory=%zdbytes) released\n", sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE); } diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 9350530c16c1..b03c28084f81 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1791,7 +1791,7 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c, u16 mtu, min_mtu; IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current)); - IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n", + IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %zd bytes\n", sizeof(struct ip_vs_sync_conn_v0)); if (!ipvs->sync_state) { diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index e19a69787d99..4b2e1fb28bb4 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -410,7 +410,7 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) struct net *net = nf_ct_exp_net(expect); struct hlist_node *next; unsigned int h; - int ret = 1; + int ret = 0; if (!master_help) { ret = -ESHUTDOWN; @@ -460,14 +460,14 @@ int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, spin_lock_bh(&nf_conntrack_expect_lock); ret = __nf_ct_expect_check(expect); - if (ret <= 0) + if (ret < 0) goto out; nf_ct_expect_insert(expect); spin_unlock_bh(&nf_conntrack_expect_lock); nf_ct_expect_event_report(IPEXP_NEW, expect, portid, report); - return ret; + return 0; out: spin_unlock_bh(&nf_conntrack_expect_lock); return ret; diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index e3ed20060878..4aecef4a89fb 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -300,7 +300,7 @@ static int find_pattern(const char *data, size_t dlen, { size_t i = plen; - pr_debug("find_pattern `%s': dlen = %Zu\n", pattern, dlen); + pr_debug("find_pattern `%s': dlen = %zu\n", pattern, dlen); if (dlen <= plen) { /* Short packet: try for partial? */ diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index 3b79f34b5095..de8782345c86 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -48,7 +48,7 @@ nfnl_userspace_cthelper(struct sk_buff *skb, unsigned int protoff, if (helper == NULL) return NF_DROP; - /* This is an user-space helper not yet configured, skip. */ + /* This is a user-space helper not yet configured, skip. */ if ((helper->flags & (NF_CT_HELPER_F_USERSPACE | NF_CT_HELPER_F_CONFIGURED)) == NF_CT_HELPER_F_USERSPACE) diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index c6b8022c0e47..bf548a7a71ec 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -528,6 +528,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx, if (!nft_ct_tmpl_alloc_pcpu()) return -ENOMEM; nft_ct_pcpu_template_refcnt++; + len = sizeof(u16); break; #endif default: diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c index 97f9649bcc7e..152d226552c1 100644 --- a/net/netfilter/nft_set_bitmap.c +++ b/net/netfilter/nft_set_bitmap.c @@ -258,7 +258,7 @@ static int nft_bitmap_init(const struct nft_set *set, { struct nft_bitmap *priv = nft_set_priv(set); - priv->bitmap_size = nft_bitmap_total_size(set->klen); + priv->bitmap_size = nft_bitmap_size(set->klen); return 0; } diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 016db6be94b9..14857afc9937 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -667,7 +667,7 @@ int xt_compat_check_entry_offsets(const void *base, const char *elems, COMPAT_XT_ALIGN(target_offset + sizeof(struct compat_xt_standard_target)) != next_offset) return -EINVAL; - /* compat_xt_entry match has less strict aligment requirements, + /* compat_xt_entry match has less strict alignment requirements, * otherwise they are identical. In case of padding differences * we need to add compat version of xt_check_entry_match. */ diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c index 16477df45b3b..3d705c688a27 100644 --- a/net/netfilter/xt_owner.c +++ b/net/netfilter/xt_owner.c @@ -13,6 +13,8 @@ #include <linux/module.h> #include <linux/skbuff.h> #include <linux/file.h> +#include <linux/cred.h> + #include <net/sock.h> #include <net/inet_sock.h> #include <linux/netfilter/x_tables.h> diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index ed212ffc1d9d..4bbf4526b885 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -17,7 +17,7 @@ #include <linux/in.h> #include <linux/slab.h> #include <linux/kernel.h> -#include <linux/sched.h> +#include <linux/sched/signal.h> #include <linux/timer.h> #include <linux/string.h> #include <linux/sockios.h> diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index b9edf5fae6ae..879885b31cce 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -21,6 +21,7 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/nfc.h> +#include <linux/sched/signal.h> #include "nfc.h" #include "llcp.h" diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 8bad5624a27a..222bedcd9575 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -23,6 +23,7 @@ */ #include <linux/kernel.h> +#include <linux/sched/signal.h> #include <linux/slab.h> #include <linux/socket.h> #include <net/sock.h> diff --git a/net/phonet/socket.c b/net/phonet/socket.c index ffd5f2297584..a6c8da3ee893 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -27,6 +27,8 @@ #include <linux/kernel.h> #include <linux/net.h> #include <linux/poll.h> +#include <linux/sched/signal.h> + #include <net/sock.h> #include <net/tcp_states.h> diff --git a/net/rds/ib.c b/net/rds/ib.c index 8d70884d7bb6..91fe46f1e4cc 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -111,8 +111,7 @@ static void rds_ib_dev_free(struct work_struct *work) kfree(i_ipaddr); } - if (rds_ibdev->vector_load) - kfree(rds_ibdev->vector_load); + kfree(rds_ibdev->vector_load); kfree(rds_ibdev); } diff --git a/net/rds/ib.h b/net/rds/ib.h index 540458928f3c..ec550626e221 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -136,7 +136,7 @@ struct rds_ib_connection { struct rds_ib_work_ring i_send_ring; struct rm_data_op *i_data_op; struct rds_header *i_send_hdrs; - u64 i_send_hdrs_dma; + dma_addr_t i_send_hdrs_dma; struct rds_ib_send_work *i_sends; atomic_t i_signaled_sends; @@ -146,7 +146,7 @@ struct rds_ib_connection { struct rds_ib_incoming *i_ibinc; u32 i_recv_data_rem; struct rds_header *i_recv_hdrs; - u64 i_recv_hdrs_dma; + dma_addr_t i_recv_hdrs_dma; struct rds_ib_recv_work *i_recvs; u64 i_ack_recv; /* last ACK received */ struct rds_ib_refill_cache i_cache_incs; @@ -164,7 +164,7 @@ struct rds_ib_connection { struct rds_header *i_ack; struct ib_send_wr i_ack_wr; struct ib_sge i_ack_sge; - u64 i_ack_dma; + dma_addr_t i_ack_dma; unsigned long i_ack_queued; /* Flow control related information @@ -235,7 +235,7 @@ struct rds_ib_device { int *vector_load; }; -#define ibdev_to_node(ibdev) dev_to_node(ibdev->dma_device) +#define ibdev_to_node(ibdev) dev_to_node((ibdev)->dev.parent) #define rdsibdev_to_node(rdsibdev) ibdev_to_node(rdsibdev->dev) /* bits for i_ack_flags */ diff --git a/net/rds/ib_mr.h b/net/rds/ib_mr.h index 1c754f4acbe5..24c086db4511 100644 --- a/net/rds/ib_mr.h +++ b/net/rds/ib_mr.h @@ -45,7 +45,6 @@ struct rds_ib_fmr { struct ib_fmr *fmr; - u64 *dma; }; enum rds_ib_fr_state { diff --git a/net/rds/page.c b/net/rds/page.c index e2b5a5832d3d..7cc57e098ddb 100644 --- a/net/rds/page.c +++ b/net/rds/page.c @@ -45,35 +45,6 @@ struct rds_page_remainder { static DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_page_remainder, rds_page_remainders); -/* - * returns 0 on success or -errno on failure. - * - * We don't have to worry about flush_dcache_page() as this only works - * with private pages. If, say, we were to do directed receive to pinned - * user pages we'd have to worry more about cache coherence. (Though - * the flush_dcache_page() in get_user_pages() would probably be enough). - */ -int rds_page_copy_user(struct page *page, unsigned long offset, - void __user *ptr, unsigned long bytes, - int to_user) -{ - unsigned long ret; - void *addr; - - addr = kmap(page); - if (to_user) { - rds_stats_add(s_copy_to_user, bytes); - ret = copy_to_user(ptr, addr + offset, bytes); - } else { - rds_stats_add(s_copy_from_user, bytes); - ret = copy_from_user(addr + offset, ptr, bytes); - } - kunmap(page); - - return ret ? -EFAULT : 0; -} -EXPORT_SYMBOL_GPL(rds_page_copy_user); - /** * rds_page_remainder_alloc - build up regions of a message. * diff --git a/net/rds/rds.h b/net/rds/rds.h index 07fff73dd4f3..966d2ee1f107 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -798,13 +798,6 @@ static inline int rds_message_verify_checksum(const struct rds_header *hdr) /* page.c */ int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes, gfp_t gfp); -int rds_page_copy_user(struct page *page, unsigned long offset, - void __user *ptr, unsigned long bytes, - int to_user); -#define rds_page_copy_to_user(page, offset, ptr, bytes) \ - rds_page_copy_user(page, offset, ptr, bytes, 1) -#define rds_page_copy_from_user(page, offset, ptr, bytes) \ - rds_page_copy_user(page, offset, ptr, bytes, 0) void rds_page_exit(void); /* recv.c */ diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 57bb52361e0f..5438f6725092 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -641,12 +641,12 @@ static int rds_tcp_init(void) ret = register_netdevice_notifier(&rds_tcp_dev_notifier); if (ret) { pr_warn("could not register rds_tcp_dev_notifier\n"); - goto out; + goto out_slab; } ret = register_pernet_subsys(&rds_tcp_net_ops); if (ret) - goto out_slab; + goto out_notifier; ret = rds_tcp_recv_init(); if (ret) @@ -664,9 +664,10 @@ out_recv: rds_tcp_recv_exit(); out_pernet: unregister_pernet_subsys(&rds_tcp_net_ops); -out_slab: +out_notifier: if (unregister_netdevice_notifier(&rds_tcp_dev_notifier)) pr_warn("could not unregister rds_tcp_dev_notifier\n"); +out_slab: kmem_cache_destroy(rds_tcp_conn_slab); out: return ret; diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 9ad301c46b88..b8a1df2c9785 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -20,7 +20,7 @@ #include <linux/in.h> #include <linux/slab.h> #include <linux/kernel.h> -#include <linux/sched.h> +#include <linux/sched/signal.h> #include <linux/spinlock.h> #include <linux/timer.h> #include <linux/string.h> diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 40a1ef2adeb4..c3be03e8d098 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -76,6 +76,8 @@ #include <linux/slab.h> #include <linux/idr.h> #include <linux/timer.h> +#include <linux/sched/signal.h> + #include "ar-internal.h" __read_mostly unsigned int rxrpc_max_client_connections = 1000; diff --git a/net/rxrpc/key.c b/net/rxrpc/key.c index 18c737a61d80..0a4e28477ad9 100644 --- a/net/rxrpc/key.c +++ b/net/rxrpc/key.c @@ -1065,7 +1065,7 @@ static long rxrpc_read(const struct key *key, switch (token->security_index) { case RXRPC_SECURITY_RXKAD: - toksize += 8 * 4; /* viceid, kvno, key*2, begin, + toksize += 9 * 4; /* viceid, kvno, key*2 + len, begin, * end, primary, tktlen */ toksize += RND(token->kad->ticket_len); break; diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index c29362d50a92..28274a3c9831 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -14,6 +14,8 @@ #include <linux/net.h> #include <linux/skbuff.h> #include <linux/export.h> +#include <linux/sched/signal.h> + #include <net/sock.h> #include <net/af_rxrpc.h> #include "ar-internal.h" @@ -320,8 +322,10 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, /* Barriers against rxrpc_input_data(). */ hard_ack = call->rx_hard_ack; - top = smp_load_acquire(&call->rx_top); - for (seq = hard_ack + 1; before_eq(seq, top); seq++) { + seq = hard_ack + 1; + while (top = smp_load_acquire(&call->rx_top), + before_eq(seq, top) + ) { ix = seq & RXRPC_RXTX_BUFF_MASK; skb = call->rxtx_buffer[ix]; if (!skb) { @@ -394,6 +398,8 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, ret = 1; goto out; } + + seq++; } out: diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 0a6ef217aa8a..19b36c60fb4c 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -15,6 +15,8 @@ #include <linux/gfp.h> #include <linux/skbuff.h> #include <linux/export.h> +#include <linux/sched/signal.h> + #include <net/sock.h> #include <net/af_rxrpc.h> #include "ar-internal.h" diff --git a/net/sched/act_api.c b/net/sched/act_api.c index f219ff325ed4..b70aa57319ea 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -613,8 +613,8 @@ struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla, goto err_mod; } - err = nla_memdup_cookie(a, tb); - if (err < 0) { + if (nla_memdup_cookie(a, tb) < 0) { + err = -ENOMEM; tcf_hash_release(a, bind); goto err_mod; } @@ -859,10 +859,8 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, goto out_module_put; err = ops->walk(net, skb, &dcb, RTM_DELACTION, ops); - if (err < 0) + if (err <= 0) goto out_module_put; - if (err == 0) - goto noflush_out; nla_nest_end(skb, nest); @@ -879,7 +877,6 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, out_module_put: module_put(ops->owner); err_out: -noflush_out: kfree_skb(skb); return err; } diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 41c80b6c3906..ae7e4f5b348b 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -63,6 +63,7 @@ #include <linux/types.h> #include <linux/kernel.h> #include <linux/sched.h> +#include <linux/sched/loadavg.h> #include <linux/string.h> #include <linux/skbuff.h> #include <linux/random.h> diff --git a/net/sctp/output.c b/net/sctp/output.c index 85406d5f8f41..71ce6b945dcb 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -177,7 +177,7 @@ sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet, { sctp_xmit_t retval; - pr_debug("%s: packet:%p size:%Zu chunk:%p size:%d\n", __func__, + pr_debug("%s: packet:%p size:%zu chunk:%p size:%d\n", __func__, packet, packet->size, chunk, chunk->skb ? chunk->skb->len : -1); switch ((retval = (sctp_packet_append_chunk(packet, chunk)))) { diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 8227bbbd077a..1b6d4574d2b0 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -199,6 +199,7 @@ int sctp_copy_local_addr_list(struct net *net, struct sctp_bind_addr *bp, sctp_scope_t scope, gfp_t gfp, int copy_flags) { struct sctp_sockaddr_entry *addr; + union sctp_addr laddr; int error = 0; rcu_read_lock(); @@ -220,7 +221,10 @@ int sctp_copy_local_addr_list(struct net *net, struct sctp_bind_addr *bp, !(copy_flags & SCTP_ADDR6_PEERSUPP))) continue; - if (sctp_bind_addr_state(bp, &addr->a) != -1) + laddr = addr->a; + /* also works for setting ipv6 address port */ + laddr.v4.sin_port = htons(bp->port); + if (sctp_bind_addr_state(bp, &laddr) != -1) continue; error = sctp_add_bind_addr(bp, &addr->a, sizeof(addr->a), diff --git a/net/sctp/socket.c b/net/sctp/socket.c index b5321486fbed..6f0a9be50f50 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -57,6 +57,7 @@ #include <linux/kernel.h> #include <linux/wait.h> #include <linux/time.h> +#include <linux/sched/signal.h> #include <linux/ip.h> #include <linux/capability.h> #include <linux/fcntl.h> @@ -4862,6 +4863,12 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp) if (!asoc) return -EINVAL; + /* If there is a thread waiting on more sndbuf space for + * sending on this asoc, it cannot be peeled. + */ + if (waitqueue_active(&asoc->wait)) + return -EBUSY; + /* An association cannot be branched off from an already peeled-off * socket, nor is this supported for tcp style sockets. */ @@ -7599,8 +7606,6 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, */ release_sock(sk); current_timeo = schedule_timeout(current_timeo); - if (sk != asoc->base.sk) - goto do_error; lock_sock(sk); *timeo_p = current_timeo; diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 5b63ceb3bf37..3379668af368 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -643,9 +643,7 @@ void sctp_transport_reset(struct sctp_transport *t) t->srtt = 0; t->rttvar = 0; - /* Reset these additional varibles so that we have a clean - * slate. - */ + /* Reset these additional variables so that we have a clean slate. */ t->partial_bytes_acked = 0; t->flight_size = 0; t->error_count = 0; diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 5d4208ad029e..85837ab90e89 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -27,6 +27,8 @@ #include <linux/inetdevice.h> #include <linux/workqueue.h> #include <linux/in.h> +#include <linux/sched/signal.h> + #include <net/sock.h> #include <net/tcp.h> #include <net/smc.h> diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index cc6b6f8651eb..e41f594a1e1d 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -11,6 +11,8 @@ #include <linux/in.h> #include <linux/if_ether.h> +#include <linux/sched/signal.h> + #include <net/sock.h> #include <net/tcp.h> diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index 03dfcc6b7661..67a71d170bed 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -9,6 +9,8 @@ */ #include <linux/workqueue.h> +#include <linux/sched/signal.h> + #include <net/sock.h> #include "smc.h" diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c index 5d1878732f46..c4ef9a4ec569 100644 --- a/net/smc/smc_rx.c +++ b/net/smc/smc_rx.c @@ -11,6 +11,8 @@ #include <linux/net.h> #include <linux/rcupdate.h> +#include <linux/sched/signal.h> + #include <net/sock.h> #include "smc.h" diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index 6e73b28915ea..69a0013dd25c 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -15,6 +15,8 @@ #include <linux/net.h> #include <linux/rcupdate.h> #include <linux/workqueue.h> +#include <linux/sched/signal.h> + #include <net/sock.h> #include "smc.h" diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 2bff63a73cf8..d2623b9f23d6 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -8,6 +8,7 @@ #include <linux/types.h> #include <linux/sched.h> +#include <linux/cred.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/errno.h> @@ -464,8 +465,10 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan) * Note that the cred_unused list must be time-ordered. */ if (time_in_range(cred->cr_expire, expired, jiffies) && - test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) + test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) { + freed = SHRINK_STOP; break; + } list_del_init(&cred->cr_lru); number_cred_unused--; @@ -520,7 +523,7 @@ static unsigned long rpcauth_cache_shrink_count(struct shrinker *shrink, struct shrink_control *sc) { - return (number_cred_unused / 100) * sysctl_vfs_cache_pressure; + return number_cred_unused * sysctl_vfs_cache_pressure / 100; } static void @@ -646,9 +649,6 @@ rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred, cred->cr_auth = auth; cred->cr_ops = ops; cred->cr_expire = jiffies; -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) - cred->cr_magic = RPCAUTH_CRED_MAGIC; -#endif cred->cr_uid = acred->uid; } EXPORT_SYMBOL_GPL(rpcauth_init_cred); @@ -876,8 +876,12 @@ int __init rpcauth_init_module(void) err = rpc_init_generic_auth(); if (err < 0) goto out2; - register_shrinker(&rpc_cred_shrinker); + err = register_shrinker(&rpc_cred_shrinker); + if (err < 0) + goto out3; return 0; +out3: + rpc_destroy_generic_auth(); out2: rpc_destroy_authunix(); out1: diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index cdeb1d814833..4f16953e4954 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -763,7 +763,7 @@ err_put_ctx: err: kfree(buf); out: - dprintk("RPC: %s returning %Zd\n", __func__, err); + dprintk("RPC: %s returning %zd\n", __func__, err); return err; } diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 153082598522..a54a7a3d28f5 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1489,8 +1489,8 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) case RPC_GSS_PROC_DESTROY: if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq)) goto auth_err; - rsci->h.expiry_time = seconds_since_boot(); - set_bit(CACHE_NEGATIVE, &rsci->h.flags); + /* Delete the entry from the cache_list and call cache_put */ + sunrpc_cache_unhash(sn->rsc_cache, &rsci->h); if (resv->iov_len + 4 > PAGE_SIZE) goto drop; svc_putnl(resv, RPC_SUCCESS); diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c index 4d17376b2acb..5f3d527dff65 100644 --- a/net/sunrpc/auth_null.c +++ b/net/sunrpc/auth_null.c @@ -139,7 +139,4 @@ struct rpc_cred null_cred = { .cr_ops = &null_credops, .cr_count = ATOMIC_INIT(1), .cr_flags = 1UL << RPCAUTH_CRED_UPTODATE, -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) - .cr_magic = RPCAUTH_CRED_MAGIC, -#endif }; diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index 306fc0f54596..82337e1ec9cd 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -14,12 +14,10 @@ #include <linux/sunrpc/auth.h> #include <linux/user_namespace.h> -#define NFS_NGROUPS 16 - struct unx_cred { struct rpc_cred uc_base; kgid_t uc_gid; - kgid_t uc_gids[NFS_NGROUPS]; + kgid_t uc_gids[UNX_NGROUPS]; }; #define uc_uid uc_base.cr_uid @@ -82,13 +80,13 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t if (acred->group_info != NULL) groups = acred->group_info->ngroups; - if (groups > NFS_NGROUPS) - groups = NFS_NGROUPS; + if (groups > UNX_NGROUPS) + groups = UNX_NGROUPS; cred->uc_gid = acred->gid; for (i = 0; i < groups; i++) cred->uc_gids[i] = acred->group_info->gid[i]; - if (i < NFS_NGROUPS) + if (i < UNX_NGROUPS) cred->uc_gids[i] = INVALID_GID; return &cred->uc_base; @@ -132,12 +130,12 @@ unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int flags) if (acred->group_info != NULL) groups = acred->group_info->ngroups; - if (groups > NFS_NGROUPS) - groups = NFS_NGROUPS; + if (groups > UNX_NGROUPS) + groups = UNX_NGROUPS; for (i = 0; i < groups ; i++) if (!gid_eq(cred->uc_gids[i], acred->group_info->gid[i])) return 0; - if (groups < NFS_NGROUPS && gid_valid(cred->uc_gids[groups])) + if (groups < UNX_NGROUPS && gid_valid(cred->uc_gids[groups])) return 0; return 1; } @@ -166,7 +164,7 @@ unx_marshal(struct rpc_task *task, __be32 *p) *p++ = htonl((u32) from_kuid(&init_user_ns, cred->uc_uid)); *p++ = htonl((u32) from_kgid(&init_user_ns, cred->uc_gid)); hold = p++; - for (i = 0; i < 16 && gid_valid(cred->uc_gids[i]); i++) + for (i = 0; i < UNX_NGROUPS && gid_valid(cred->uc_gids[i]); i++) *p++ = htonl((u32) from_kgid(&init_user_ns, cred->uc_gids[i])); *hold = htonl(p - hold - 1); /* gid array length */ *base = htonl((p - base - 1) << 2); /* cred length */ diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index f39e3e11f9aa..79d55d949d9a 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -362,11 +362,6 @@ void sunrpc_destroy_cache_detail(struct cache_detail *cd) cache_purge(cd); spin_lock(&cache_list_lock); write_lock(&cd->hash_lock); - if (cd->entries) { - write_unlock(&cd->hash_lock); - spin_unlock(&cache_list_lock); - goto out; - } if (current_detail == cd) current_detail = NULL; list_del_init(&cd->others); @@ -376,9 +371,6 @@ void sunrpc_destroy_cache_detail(struct cache_detail *cd) /* module must be being unloaded so its safe to kill the worker */ cancel_delayed_work_sync(&cache_cleaner); } - return; -out: - printk(KERN_ERR "RPC: failed to unregister %s cache\n", cd->name); } EXPORT_SYMBOL_GPL(sunrpc_destroy_cache_detail); @@ -497,13 +489,32 @@ EXPORT_SYMBOL_GPL(cache_flush); void cache_purge(struct cache_detail *detail) { - time_t now = seconds_since_boot(); - if (detail->flush_time >= now) - now = detail->flush_time + 1; - /* 'now' is the maximum value any 'last_refresh' can have */ - detail->flush_time = now; - detail->nextcheck = seconds_since_boot(); - cache_flush(); + struct cache_head *ch = NULL; + struct hlist_head *head = NULL; + struct hlist_node *tmp = NULL; + int i = 0; + + write_lock(&detail->hash_lock); + if (!detail->entries) { + write_unlock(&detail->hash_lock); + return; + } + + dprintk("RPC: %d entries in %s cache\n", detail->entries, detail->name); + for (i = 0; i < detail->hash_size; i++) { + head = &detail->hash_table[i]; + hlist_for_each_entry_safe(ch, tmp, head, cache_list) { + hlist_del_init(&ch->cache_list); + detail->entries--; + + set_bit(CACHE_CLEANED, &ch->flags); + write_unlock(&detail->hash_lock); + cache_fresh_unlocked(ch, detail); + cache_put(ch, detail); + write_lock(&detail->hash_lock); + } + } + write_unlock(&detail->hash_lock); } EXPORT_SYMBOL_GPL(cache_purge); @@ -717,7 +728,7 @@ void cache_clean_deferred(void *owner) /* * communicate with user-space * - * We have a magic /proc file - /proc/sunrpc/<cachename>/channel. + * We have a magic /proc file - /proc/net/rpc/<cachename>/channel. * On read, you get a full request, or block. * On write, an update request is processed. * Poll works if anything to read, and always allows write. @@ -1272,7 +1283,7 @@ EXPORT_SYMBOL_GPL(qword_get); /* - * support /proc/sunrpc/cache/$CACHENAME/content + * support /proc/net/rpc/$CACHENAME/content * as a seqfile. * We call ->cache_show passing NULL for the item to * get a header, then pass each real item in the cache @@ -1427,20 +1438,11 @@ static ssize_t read_flush(struct file *file, char __user *buf, struct cache_detail *cd) { char tbuf[22]; - unsigned long p = *ppos; size_t len; - snprintf(tbuf, sizeof(tbuf), "%lu\n", convert_to_wallclock(cd->flush_time)); - len = strlen(tbuf); - if (p >= len) - return 0; - len -= p; - if (len > count) - len = count; - if (copy_to_user(buf, (void*)(tbuf+p), len)) - return -EFAULT; - *ppos += len; - return len; + len = snprintf(tbuf, sizeof(tbuf), "%lu\n", + convert_to_wallclock(cd->flush_time)); + return simple_read_from_buffer(buf, count, ppos, tbuf, len); } static ssize_t write_flush(struct file *file, const char __user *buf, @@ -1600,21 +1602,12 @@ static const struct file_operations cache_flush_operations_procfs = { .llseek = no_llseek, }; -static void remove_cache_proc_entries(struct cache_detail *cd, struct net *net) +static void remove_cache_proc_entries(struct cache_detail *cd) { - struct sunrpc_net *sn; - - if (cd->u.procfs.proc_ent == NULL) - return; - if (cd->u.procfs.flush_ent) - remove_proc_entry("flush", cd->u.procfs.proc_ent); - if (cd->u.procfs.channel_ent) - remove_proc_entry("channel", cd->u.procfs.proc_ent); - if (cd->u.procfs.content_ent) - remove_proc_entry("content", cd->u.procfs.proc_ent); - cd->u.procfs.proc_ent = NULL; - sn = net_generic(net, sunrpc_net_id); - remove_proc_entry(cd->name, sn->proc_net_rpc); + if (cd->procfs) { + proc_remove(cd->procfs); + cd->procfs = NULL; + } } #ifdef CONFIG_PROC_FS @@ -1624,38 +1617,30 @@ static int create_cache_proc_entries(struct cache_detail *cd, struct net *net) struct sunrpc_net *sn; sn = net_generic(net, sunrpc_net_id); - cd->u.procfs.proc_ent = proc_mkdir(cd->name, sn->proc_net_rpc); - if (cd->u.procfs.proc_ent == NULL) + cd->procfs = proc_mkdir(cd->name, sn->proc_net_rpc); + if (cd->procfs == NULL) goto out_nomem; - cd->u.procfs.channel_ent = NULL; - cd->u.procfs.content_ent = NULL; p = proc_create_data("flush", S_IFREG|S_IRUSR|S_IWUSR, - cd->u.procfs.proc_ent, - &cache_flush_operations_procfs, cd); - cd->u.procfs.flush_ent = p; + cd->procfs, &cache_flush_operations_procfs, cd); if (p == NULL) goto out_nomem; if (cd->cache_request || cd->cache_parse) { p = proc_create_data("channel", S_IFREG|S_IRUSR|S_IWUSR, - cd->u.procfs.proc_ent, - &cache_file_operations_procfs, cd); - cd->u.procfs.channel_ent = p; + cd->procfs, &cache_file_operations_procfs, cd); if (p == NULL) goto out_nomem; } if (cd->cache_show) { p = proc_create_data("content", S_IFREG|S_IRUSR, - cd->u.procfs.proc_ent, - &content_file_operations_procfs, cd); - cd->u.procfs.content_ent = p; + cd->procfs, &content_file_operations_procfs, cd); if (p == NULL) goto out_nomem; } return 0; out_nomem: - remove_cache_proc_entries(cd, net); + remove_cache_proc_entries(cd); return -ENOMEM; } #else /* CONFIG_PROC_FS */ @@ -1684,7 +1669,7 @@ EXPORT_SYMBOL_GPL(cache_register_net); void cache_unregister_net(struct cache_detail *cd, struct net *net) { - remove_cache_proc_entries(cd, net); + remove_cache_proc_entries(cd); sunrpc_destroy_cache_detail(cd); } EXPORT_SYMBOL_GPL(cache_unregister_net); @@ -1843,15 +1828,29 @@ int sunrpc_cache_register_pipefs(struct dentry *parent, struct dentry *dir = rpc_create_cache_dir(parent, name, umode, cd); if (IS_ERR(dir)) return PTR_ERR(dir); - cd->u.pipefs.dir = dir; + cd->pipefs = dir; return 0; } EXPORT_SYMBOL_GPL(sunrpc_cache_register_pipefs); void sunrpc_cache_unregister_pipefs(struct cache_detail *cd) { - rpc_remove_cache_dir(cd->u.pipefs.dir); - cd->u.pipefs.dir = NULL; + if (cd->pipefs) { + rpc_remove_cache_dir(cd->pipefs); + cd->pipefs = NULL; + } } EXPORT_SYMBOL_GPL(sunrpc_cache_unregister_pipefs); +void sunrpc_cache_unhash(struct cache_detail *cd, struct cache_head *h) +{ + write_lock(&cd->hash_lock); + if (!hlist_unhashed(&h->cache_list)){ + hlist_del_init(&h->cache_list); + cd->entries--; + write_unlock(&cd->hash_lock); + cache_put(h, cd); + } else + write_unlock(&cd->hash_lock); +} +EXPORT_SYMBOL_GPL(sunrpc_cache_unhash); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 1dc9f3bac099..52da3ce54bb5 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1453,21 +1453,6 @@ size_t rpc_max_bc_payload(struct rpc_clnt *clnt) EXPORT_SYMBOL_GPL(rpc_max_bc_payload); /** - * rpc_get_timeout - Get timeout for transport in units of HZ - * @clnt: RPC client to query - */ -unsigned long rpc_get_timeout(struct rpc_clnt *clnt) -{ - unsigned long ret; - - rcu_read_lock(); - ret = rcu_dereference(clnt->cl_xprt)->timeout->to_initval; - rcu_read_unlock(); - return ret; -} -EXPORT_SYMBOL_GPL(rpc_get_timeout); - -/** * rpc_force_rebind - force transport to check that remote port is unchanged * @clnt: client to rebind * @@ -2699,6 +2684,7 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt, { struct rpc_xprt_switch *xps; struct rpc_xprt *xprt; + unsigned long connect_timeout; unsigned long reconnect_timeout; unsigned char resvport; int ret = 0; @@ -2711,6 +2697,7 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt, return -EAGAIN; } resvport = xprt->resvport; + connect_timeout = xprt->connect_timeout; reconnect_timeout = xprt->max_reconnect_timeout; rcu_read_unlock(); @@ -2720,7 +2707,10 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt, goto out_put_switch; } xprt->resvport = resvport; - xprt->max_reconnect_timeout = reconnect_timeout; + if (xprt->ops->set_connect_timeout != NULL) + xprt->ops->set_connect_timeout(xprt, + connect_timeout, + reconnect_timeout); rpc_xprt_switch_set_roundrobin(xps); if (setup) { @@ -2737,26 +2727,39 @@ out_put_switch: } EXPORT_SYMBOL_GPL(rpc_clnt_add_xprt); +struct connect_timeout_data { + unsigned long connect_timeout; + unsigned long reconnect_timeout; +}; + static int -rpc_xprt_cap_max_reconnect_timeout(struct rpc_clnt *clnt, +rpc_xprt_set_connect_timeout(struct rpc_clnt *clnt, struct rpc_xprt *xprt, void *data) { - unsigned long timeout = *((unsigned long *)data); + struct connect_timeout_data *timeo = data; - if (timeout < xprt->max_reconnect_timeout) - xprt->max_reconnect_timeout = timeout; + if (xprt->ops->set_connect_timeout) + xprt->ops->set_connect_timeout(xprt, + timeo->connect_timeout, + timeo->reconnect_timeout); return 0; } void -rpc_cap_max_reconnect_timeout(struct rpc_clnt *clnt, unsigned long timeo) +rpc_set_connect_timeout(struct rpc_clnt *clnt, + unsigned long connect_timeout, + unsigned long reconnect_timeout) { + struct connect_timeout_data timeout = { + .connect_timeout = connect_timeout, + .reconnect_timeout = reconnect_timeout, + }; rpc_clnt_iterate_for_each_xprt(clnt, - rpc_xprt_cap_max_reconnect_timeout, - &timeo); + rpc_xprt_set_connect_timeout, + &timeout); } -EXPORT_SYMBOL_GPL(rpc_cap_max_reconnect_timeout); +EXPORT_SYMBOL_GPL(rpc_set_connect_timeout); void rpc_clnt_xprt_switch_put(struct rpc_clnt *clnt) { diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c index e7b4d93566df..c8fd0b6c1618 100644 --- a/net/sunrpc/debugfs.c +++ b/net/sunrpc/debugfs.c @@ -16,11 +16,6 @@ static struct dentry *rpc_xprt_dir; unsigned int rpc_inject_disconnect; -struct rpc_clnt_iter { - struct rpc_clnt *clnt; - loff_t pos; -}; - static int tasks_show(struct seq_file *f, void *v) { @@ -47,12 +42,10 @@ static void * tasks_start(struct seq_file *f, loff_t *ppos) __acquires(&clnt->cl_lock) { - struct rpc_clnt_iter *iter = f->private; + struct rpc_clnt *clnt = f->private; loff_t pos = *ppos; - struct rpc_clnt *clnt = iter->clnt; struct rpc_task *task; - iter->pos = pos + 1; spin_lock(&clnt->cl_lock); list_for_each_entry(task, &clnt->cl_tasks, tk_task) if (pos-- == 0) @@ -63,12 +56,10 @@ tasks_start(struct seq_file *f, loff_t *ppos) static void * tasks_next(struct seq_file *f, void *v, loff_t *pos) { - struct rpc_clnt_iter *iter = f->private; - struct rpc_clnt *clnt = iter->clnt; + struct rpc_clnt *clnt = f->private; struct rpc_task *task = v; struct list_head *next = task->tk_task.next; - ++iter->pos; ++*pos; /* If there's another task on list, return it */ @@ -81,9 +72,7 @@ static void tasks_stop(struct seq_file *f, void *v) __releases(&clnt->cl_lock) { - struct rpc_clnt_iter *iter = f->private; - struct rpc_clnt *clnt = iter->clnt; - + struct rpc_clnt *clnt = f->private; spin_unlock(&clnt->cl_lock); } @@ -96,17 +85,13 @@ static const struct seq_operations tasks_seq_operations = { static int tasks_open(struct inode *inode, struct file *filp) { - int ret = seq_open_private(filp, &tasks_seq_operations, - sizeof(struct rpc_clnt_iter)); - + int ret = seq_open(filp, &tasks_seq_operations); if (!ret) { struct seq_file *seq = filp->private_data; - struct rpc_clnt_iter *iter = seq->private; - - iter->clnt = inode->i_private; + struct rpc_clnt *clnt = seq->private = inode->i_private; - if (!atomic_inc_not_zero(&iter->clnt->cl_count)) { - seq_release_private(inode, filp); + if (!atomic_inc_not_zero(&clnt->cl_count)) { + seq_release(inode, filp); ret = -EINVAL; } } @@ -118,10 +103,10 @@ static int tasks_release(struct inode *inode, struct file *filp) { struct seq_file *seq = filp->private_data; - struct rpc_clnt_iter *iter = seq->private; + struct rpc_clnt *clnt = seq->private; - rpc_release_client(iter->clnt); - return seq_release_private(inode, filp); + rpc_release_client(clnt); + return seq_release(inode, filp); } static const struct file_operations tasks_fops = { diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 75f290bddca1..a08aeb56b8e4 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -11,7 +11,7 @@ */ #include <linux/linkage.h> -#include <linux/sched.h> +#include <linux/sched/signal.h> #include <linux/errno.h> #include <linux/net.h> #include <linux/in.h> @@ -385,7 +385,7 @@ static int svc_uses_rpcbind(struct svc_serv *serv) for (i = 0; i < progp->pg_nvers; i++) { if (progp->pg_vers[i] == NULL) continue; - if (progp->pg_vers[i]->vs_hidden == 0) + if (!progp->pg_vers[i]->vs_hidden) return 1; } } @@ -976,6 +976,13 @@ int svc_register(const struct svc_serv *serv, struct net *net, if (vers->vs_hidden) continue; + /* + * Don't register a UDP port if we need congestion + * control. + */ + if (vers->vs_need_cong_ctrl && proto == IPPROTO_UDP) + continue; + error = __svc_register(net, progp->pg_name, progp->pg_prog, i, family, proto, port); @@ -1169,6 +1176,21 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) !(versp = progp->pg_vers[vers])) goto err_bad_vers; + /* + * Some protocol versions (namely NFSv4) require some form of + * congestion control. (See RFC 7530 section 3.1 paragraph 2) + * In other words, UDP is not allowed. We mark those when setting + * up the svc_xprt, and verify that here. + * + * The spec is not very clear about what error should be returned + * when someone tries to access a server that is listening on UDP + * for lower versions. RPC_PROG_MISMATCH seems to be the closest + * fit. + */ + if (versp->vs_need_cong_ctrl && + !test_bit(XPT_CONG_CTRL, &rqstp->rq_xprt->xpt_flags)) + goto err_bad_vers; + procp = versp->vs_proc + proc; if (proc >= versp->vs_nproc || !procp->pc_func) goto err_bad_proc; @@ -1260,7 +1282,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) return 0; err_short_len: - svc_printk(rqstp, "short len %Zd, dropping request\n", + svc_printk(rqstp, "short len %zd, dropping request\n", argv->iov_len); goto close; diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 64af4f034de6..f81eaa8e0888 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -403,7 +403,7 @@ svcauth_unix_info_release(struct svc_xprt *xpt) /**************************************************************************** * auth.unix.gid cache * simple cache to map a UID to a list of GIDs - * because AUTH_UNIX aka AUTH_SYS has a max of 16 + * because AUTH_UNIX aka AUTH_SYS has a max of UNX_NGROUPS */ #define GID_HASHBITS 8 #define GID_HASHMAX (1<<GID_HASHBITS) @@ -810,7 +810,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp) cred->cr_uid = make_kuid(&init_user_ns, svc_getnl(argv)); /* uid */ cred->cr_gid = make_kgid(&init_user_ns, svc_getnl(argv)); /* gid */ slen = svc_getnl(argv); /* gids length */ - if (slen > 16 || (len -= (slen + 2)*4) < 0) + if (slen > UNX_NGROUPS || (len -= (slen + 2)*4) < 0) goto badcred; cred->cr_group_info = groups_alloc(slen); if (cred->cr_group_info == NULL) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index de066acdb34e..8931e33b6541 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -278,7 +278,7 @@ static int svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr) rqstp->rq_respages[0], tailoff); out: - dprintk("svc: socket %p sendto([%p %Zu... ], %d) = %d (addr %s)\n", + dprintk("svc: socket %p sendto([%p %zu... ], %d) = %d (addr %s)\n", svsk, xdr->head[0].iov_base, xdr->head[0].iov_len, xdr->len, len, svc_print_addr(rqstp, buf, sizeof(buf))); @@ -346,7 +346,7 @@ static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, if (len == buflen) set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); - dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n", + dprintk("svc: socket %p recvfrom(%p, %zu) = %d\n", svsk, iov[0].iov_base, iov[0].iov_len, len); return len; } @@ -1306,6 +1306,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_tcp_class, &svsk->sk_xprt, serv); set_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags); + set_bit(XPT_CONG_CTRL, &svsk->sk_xprt.xpt_flags); if (sk->sk_state == TCP_LISTEN) { dprintk("setting up TCP socket for listening\n"); set_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags); diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 7f1071e103ca..1f7082144e01 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -1518,3 +1518,37 @@ out: } EXPORT_SYMBOL_GPL(xdr_process_buf); +/** + * xdr_stream_decode_string_dup - Decode and duplicate variable length string + * @xdr: pointer to xdr_stream + * @str: location to store pointer to string + * @maxlen: maximum acceptable string length + * @gfp_flags: GFP mask to use + * + * Return values: + * On success, returns length of NUL-terminated string stored in *@ptr + * %-EBADMSG on XDR buffer overflow + * %-EMSGSIZE if the size of the string would exceed @maxlen + * %-ENOMEM on memory allocation failure + */ +ssize_t xdr_stream_decode_string_dup(struct xdr_stream *xdr, char **str, + size_t maxlen, gfp_t gfp_flags) +{ + void *p; + ssize_t ret; + + ret = xdr_stream_decode_opaque_inline(xdr, &p, maxlen); + if (ret > 0) { + char *s = kmalloc(ret + 1, gfp_flags); + if (s != NULL) { + memcpy(s, p, ret); + s[ret] = '\0'; + *str = s; + return strlen(s); + } + ret = -ENOMEM; + } + *str = NULL; + return ret; +} +EXPORT_SYMBOL_GPL(xdr_stream_decode_string_dup); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 9a6be030ca7d..b530a2852ba8 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -897,13 +897,11 @@ static void xprt_timer(struct rpc_task *task) return; dprintk("RPC: %5u xprt_timer\n", task->tk_pid); - spin_lock_bh(&xprt->transport_lock); if (!req->rq_reply_bytes_recvd) { if (xprt->ops->timer) xprt->ops->timer(xprt, task); } else task->tk_status = 0; - spin_unlock_bh(&xprt->transport_lock); } /** diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c index 1ebb09e1ac4f..59e64025ed96 100644 --- a/net/sunrpc/xprtrdma/fmr_ops.c +++ b/net/sunrpc/xprtrdma/fmr_ops.c @@ -310,10 +310,7 @@ fmr_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, struct rpcrdma_mw *mw; while (!list_empty(&req->rl_registered)) { - mw = list_first_entry(&req->rl_registered, - struct rpcrdma_mw, mw_list); - list_del_init(&mw->mw_list); - + mw = rpcrdma_pop_mw(&req->rl_registered); if (sync) fmr_op_recover_mr(mw); else diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 47bed5333c7f..f81dd93176c0 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -466,8 +466,8 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) struct ib_send_wr *first, **prev, *last, *bad_wr; struct rpcrdma_rep *rep = req->rl_reply; struct rpcrdma_ia *ia = &r_xprt->rx_ia; - struct rpcrdma_mw *mw, *tmp; struct rpcrdma_frmr *f; + struct rpcrdma_mw *mw; int count, rc; dprintk("RPC: %s: req %p\n", __func__, req); @@ -534,10 +534,10 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) * them to the free MW list. */ unmap: - list_for_each_entry_safe(mw, tmp, &req->rl_registered, mw_list) { + while (!list_empty(&req->rl_registered)) { + mw = rpcrdma_pop_mw(&req->rl_registered); dprintk("RPC: %s: DMA unmapping frmr %p\n", __func__, &mw->frmr); - list_del_init(&mw->mw_list); ib_dma_unmap_sg(ia->ri_device, mw->mw_sg, mw->mw_nents, mw->mw_dir); rpcrdma_put_mw(r_xprt, mw); @@ -571,10 +571,7 @@ frwr_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, struct rpcrdma_mw *mw; while (!list_empty(&req->rl_registered)) { - mw = list_first_entry(&req->rl_registered, - struct rpcrdma_mw, mw_list); - list_del_init(&mw->mw_list); - + mw = rpcrdma_pop_mw(&req->rl_registered); if (sync) frwr_op_recover_mr(mw); else diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index c52e0f2ffe52..a044be2d6ad7 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -125,14 +125,34 @@ void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *r_xprt) /* The client can send a request inline as long as the RPCRDMA header * plus the RPC call fit under the transport's inline limit. If the * combined call message size exceeds that limit, the client must use - * the read chunk list for this operation. + * a Read chunk for this operation. + * + * A Read chunk is also required if sending the RPC call inline would + * exceed this device's max_sge limit. */ static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) { - struct rpcrdma_ia *ia = &r_xprt->rx_ia; + struct xdr_buf *xdr = &rqst->rq_snd_buf; + unsigned int count, remaining, offset; + + if (xdr->len > r_xprt->rx_ia.ri_max_inline_write) + return false; + + if (xdr->page_len) { + remaining = xdr->page_len; + offset = xdr->page_base & ~PAGE_MASK; + count = 0; + while (remaining) { + remaining -= min_t(unsigned int, + PAGE_SIZE - offset, remaining); + offset = 0; + if (++count > r_xprt->rx_ia.ri_max_send_sges) + return false; + } + } - return rqst->rq_snd_buf.len <= ia->ri_max_inline_write; + return true; } /* The client can't know how large the actual reply will be. Thus it @@ -186,9 +206,9 @@ rpcrdma_convert_kvec(struct kvec *vec, struct rpcrdma_mr_seg *seg, int n) */ static int -rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, - enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg, - bool reminv_expected) +rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf, + unsigned int pos, enum rpcrdma_chunktype type, + struct rpcrdma_mr_seg *seg) { int len, n, p, page_base; struct page **ppages; @@ -226,22 +246,21 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, if (len && n == RPCRDMA_MAX_SEGS) goto out_overflow; - /* When encoding the read list, the tail is always sent inline */ - if (type == rpcrdma_readch) + /* When encoding a Read chunk, the tail iovec contains an + * XDR pad and may be omitted. + */ + if (type == rpcrdma_readch && r_xprt->rx_ia.ri_implicit_roundup) return n; - /* When encoding the Write list, some servers need to see an extra - * segment for odd-length Write chunks. The upper layer provides - * space in the tail iovec for this purpose. + /* When encoding a Write chunk, some servers need to see an + * extra segment for non-XDR-aligned Write chunks. The upper + * layer provides space in the tail iovec that may be used + * for this purpose. */ - if (type == rpcrdma_writech && reminv_expected) + if (type == rpcrdma_writech && r_xprt->rx_ia.ri_implicit_roundup) return n; if (xdrbuf->tail[0].iov_len) { - /* the rpcrdma protocol allows us to omit any trailing - * xdr pad bytes, saving the server an RDMA operation. */ - if (xdrbuf->tail[0].iov_len < 4 && xprt_rdma_pad_optimize) - return n; n = rpcrdma_convert_kvec(&xdrbuf->tail[0], seg, n); if (n == RPCRDMA_MAX_SEGS) goto out_overflow; @@ -293,7 +312,8 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, if (rtype == rpcrdma_areadch) pos = 0; seg = req->rl_segments; - nsegs = rpcrdma_convert_iovs(&rqst->rq_snd_buf, pos, rtype, seg, false); + nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_snd_buf, pos, + rtype, seg); if (nsegs < 0) return ERR_PTR(nsegs); @@ -302,7 +322,7 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, false, &mw); if (n < 0) return ERR_PTR(n); - list_add(&mw->mw_list, &req->rl_registered); + rpcrdma_push_mw(mw, &req->rl_registered); *iptr++ = xdr_one; /* item present */ @@ -355,10 +375,9 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, } seg = req->rl_segments; - nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf, + nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, rqst->rq_rcv_buf.head[0].iov_len, - wtype, seg, - r_xprt->rx_ia.ri_reminv_expected); + wtype, seg); if (nsegs < 0) return ERR_PTR(nsegs); @@ -371,7 +390,7 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, true, &mw); if (n < 0) return ERR_PTR(n); - list_add(&mw->mw_list, &req->rl_registered); + rpcrdma_push_mw(mw, &req->rl_registered); iptr = xdr_encode_rdma_segment(iptr, mw); @@ -423,8 +442,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, } seg = req->rl_segments; - nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf, 0, wtype, seg, - r_xprt->rx_ia.ri_reminv_expected); + nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, 0, wtype, seg); if (nsegs < 0) return ERR_PTR(nsegs); @@ -437,7 +455,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, true, &mw); if (n < 0) return ERR_PTR(n); - list_add(&mw->mw_list, &req->rl_registered); + rpcrdma_push_mw(mw, &req->rl_registered); iptr = xdr_encode_rdma_segment(iptr, mw); @@ -741,13 +759,13 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) iptr = headerp->rm_body.rm_chunks; iptr = rpcrdma_encode_read_list(r_xprt, req, rqst, iptr, rtype); if (IS_ERR(iptr)) - goto out_unmap; + goto out_err; iptr = rpcrdma_encode_write_list(r_xprt, req, rqst, iptr, wtype); if (IS_ERR(iptr)) - goto out_unmap; + goto out_err; iptr = rpcrdma_encode_reply_chunk(r_xprt, req, rqst, iptr, wtype); if (IS_ERR(iptr)) - goto out_unmap; + goto out_err; hdrlen = (unsigned char *)iptr - (unsigned char *)headerp; dprintk("RPC: %5u %s: %s/%s: hdrlen %zd rpclen %zd\n", @@ -758,12 +776,14 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) if (!rpcrdma_prepare_send_sges(&r_xprt->rx_ia, req, hdrlen, &rqst->rq_snd_buf, rtype)) { iptr = ERR_PTR(-EIO); - goto out_unmap; + goto out_err; } return 0; -out_unmap: - r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req, false); +out_err: + pr_err("rpcrdma: rpcrdma_marshal_req failed, status %ld\n", + PTR_ERR(iptr)); + r_xprt->rx_stats.failed_marshal_count++; return PTR_ERR(iptr); } diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index cb1e48e54eb1..ff1df40f0d26 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -201,19 +201,20 @@ rpcrdma_bc_send_request(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst) { struct rpc_xprt *xprt = rqst->rq_xprt; struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); - struct rpcrdma_msg *headerp = (struct rpcrdma_msg *)rqst->rq_buffer; + __be32 *p; int rc; /* Space in the send buffer for an RPC/RDMA header is reserved * via xprt->tsh_size. */ - headerp->rm_xid = rqst->rq_xid; - headerp->rm_vers = rpcrdma_version; - headerp->rm_credit = cpu_to_be32(r_xprt->rx_buf.rb_bc_max_requests); - headerp->rm_type = rdma_msg; - headerp->rm_body.rm_chunks[0] = xdr_zero; - headerp->rm_body.rm_chunks[1] = xdr_zero; - headerp->rm_body.rm_chunks[2] = xdr_zero; + p = rqst->rq_buffer; + *p++ = rqst->rq_xid; + *p++ = rpcrdma_version; + *p++ = cpu_to_be32(r_xprt->rx_buf.rb_bc_max_requests); + *p++ = rdma_msg; + *p++ = xdr_zero; + *p++ = xdr_zero; + *p = xdr_zero; #ifdef SVCRDMA_BACKCHANNEL_DEBUG pr_info("%s: %*ph\n", __func__, 64, rqst->rq_buffer); diff --git a/net/sunrpc/xprtrdma/svc_rdma_marshal.c b/net/sunrpc/xprtrdma/svc_rdma_marshal.c index 0ba9887f3e22..1c4aabf0f657 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_marshal.c +++ b/net/sunrpc/xprtrdma/svc_rdma_marshal.c @@ -1,4 +1,5 @@ /* + * Copyright (c) 2016 Oracle. All rights reserved. * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -47,102 +48,43 @@ #define RPCDBG_FACILITY RPCDBG_SVCXPRT -/* - * Decodes a read chunk list. The expected format is as follows: - * descrim : xdr_one - * position : __be32 offset into XDR stream - * handle : __be32 RKEY - * . . . - * end-of-list: xdr_zero - */ -static __be32 *decode_read_list(__be32 *va, __be32 *vaend) +static __be32 *xdr_check_read_list(__be32 *p, __be32 *end) { - struct rpcrdma_read_chunk *ch = (struct rpcrdma_read_chunk *)va; + __be32 *next; - while (ch->rc_discrim != xdr_zero) { - if (((unsigned long)ch + sizeof(struct rpcrdma_read_chunk)) > - (unsigned long)vaend) { - dprintk("svcrdma: vaend=%p, ch=%p\n", vaend, ch); + while (*p++ != xdr_zero) { + next = p + rpcrdma_readchunk_maxsz - 1; + if (next > end) return NULL; - } - ch++; + p = next; } - return &ch->rc_position; + return p; } -/* - * Decodes a write chunk list. The expected format is as follows: - * descrim : xdr_one - * nchunks : <count> - * handle : __be32 RKEY ---+ - * length : __be32 <len of segment> | - * offset : remove va + <count> - * . . . | - * ---+ - */ -static __be32 *decode_write_list(__be32 *va, __be32 *vaend) +static __be32 *xdr_check_write_list(__be32 *p, __be32 *end) { - unsigned long start, end; - int nchunks; - - struct rpcrdma_write_array *ary = - (struct rpcrdma_write_array *)va; + __be32 *next; - /* Check for not write-array */ - if (ary->wc_discrim == xdr_zero) - return &ary->wc_nchunks; - - if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) > - (unsigned long)vaend) { - dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend); - return NULL; - } - nchunks = be32_to_cpu(ary->wc_nchunks); - - start = (unsigned long)&ary->wc_array[0]; - end = (unsigned long)vaend; - if (nchunks < 0 || - nchunks > (SIZE_MAX - start) / sizeof(struct rpcrdma_write_chunk) || - (start + (sizeof(struct rpcrdma_write_chunk) * nchunks)) > end) { - dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n", - ary, nchunks, vaend); - return NULL; + while (*p++ != xdr_zero) { + next = p + 1 + be32_to_cpup(p) * rpcrdma_segment_maxsz; + if (next > end) + return NULL; + p = next; } - /* - * rs_length is the 2nd 4B field in wc_target and taking its - * address skips the list terminator - */ - return &ary->wc_array[nchunks].wc_target.rs_length; + return p; } -static __be32 *decode_reply_array(__be32 *va, __be32 *vaend) +static __be32 *xdr_check_reply_chunk(__be32 *p, __be32 *end) { - unsigned long start, end; - int nchunks; - struct rpcrdma_write_array *ary = - (struct rpcrdma_write_array *)va; - - /* Check for no reply-array */ - if (ary->wc_discrim == xdr_zero) - return &ary->wc_nchunks; - - if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) > - (unsigned long)vaend) { - dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend); - return NULL; - } - nchunks = be32_to_cpu(ary->wc_nchunks); - - start = (unsigned long)&ary->wc_array[0]; - end = (unsigned long)vaend; - if (nchunks < 0 || - nchunks > (SIZE_MAX - start) / sizeof(struct rpcrdma_write_chunk) || - (start + (sizeof(struct rpcrdma_write_chunk) * nchunks)) > end) { - dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n", - ary, nchunks, vaend); - return NULL; + __be32 *next; + + if (*p++ != xdr_zero) { + next = p + 1 + be32_to_cpup(p) * rpcrdma_segment_maxsz; + if (next > end) + return NULL; + p = next; } - return (__be32 *)&ary->wc_array[nchunks]; + return p; } /** @@ -158,87 +100,71 @@ static __be32 *decode_reply_array(__be32 *va, __be32 *vaend) */ int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg) { - struct rpcrdma_msg *rmsgp; - __be32 *va, *vaend; - unsigned int len; - u32 hdr_len; + __be32 *p, *end, *rdma_argp; + unsigned int hdr_len; /* Verify that there's enough bytes for header + something */ - if (rq_arg->len <= RPCRDMA_HDRLEN_ERR) { - dprintk("svcrdma: header too short = %d\n", - rq_arg->len); - return -EINVAL; - } + if (rq_arg->len <= RPCRDMA_HDRLEN_ERR) + goto out_short; - rmsgp = (struct rpcrdma_msg *)rq_arg->head[0].iov_base; - if (rmsgp->rm_vers != rpcrdma_version) { - dprintk("%s: bad version %u\n", __func__, - be32_to_cpu(rmsgp->rm_vers)); - return -EPROTONOSUPPORT; - } + rdma_argp = rq_arg->head[0].iov_base; + if (*(rdma_argp + 1) != rpcrdma_version) + goto out_version; - switch (be32_to_cpu(rmsgp->rm_type)) { - case RDMA_MSG: - case RDMA_NOMSG: + switch (*(rdma_argp + 3)) { + case rdma_msg: + case rdma_nomsg: break; - case RDMA_DONE: - /* Just drop it */ - dprintk("svcrdma: dropping RDMA_DONE message\n"); - return 0; - - case RDMA_ERROR: - /* Possible if this is a backchannel reply. - * XXX: We should cancel this XID, though. - */ - dprintk("svcrdma: dropping RDMA_ERROR message\n"); - return 0; - - case RDMA_MSGP: - /* Pull in the extra for the padded case, bump our pointer */ - rmsgp->rm_body.rm_padded.rm_align = - be32_to_cpu(rmsgp->rm_body.rm_padded.rm_align); - rmsgp->rm_body.rm_padded.rm_thresh = - be32_to_cpu(rmsgp->rm_body.rm_padded.rm_thresh); - - va = &rmsgp->rm_body.rm_padded.rm_pempty[4]; - rq_arg->head[0].iov_base = va; - len = (u32)((unsigned long)va - (unsigned long)rmsgp); - rq_arg->head[0].iov_len -= len; - if (len > rq_arg->len) - return -EINVAL; - return len; - default: - dprintk("svcrdma: bad rdma procedure (%u)\n", - be32_to_cpu(rmsgp->rm_type)); - return -EINVAL; - } + case rdma_done: + goto out_drop; - /* The chunk list may contain either a read chunk list or a write - * chunk list and a reply chunk list. - */ - va = &rmsgp->rm_body.rm_chunks[0]; - vaend = (__be32 *)((unsigned long)rmsgp + rq_arg->len); - va = decode_read_list(va, vaend); - if (!va) { - dprintk("svcrdma: failed to decode read list\n"); - return -EINVAL; - } - va = decode_write_list(va, vaend); - if (!va) { - dprintk("svcrdma: failed to decode write list\n"); - return -EINVAL; - } - va = decode_reply_array(va, vaend); - if (!va) { - dprintk("svcrdma: failed to decode reply chunk\n"); - return -EINVAL; + case rdma_error: + goto out_drop; + + default: + goto out_proc; } - rq_arg->head[0].iov_base = va; - hdr_len = (unsigned long)va - (unsigned long)rmsgp; + end = (__be32 *)((unsigned long)rdma_argp + rq_arg->len); + p = xdr_check_read_list(rdma_argp + 4, end); + if (!p) + goto out_inval; + p = xdr_check_write_list(p, end); + if (!p) + goto out_inval; + p = xdr_check_reply_chunk(p, end); + if (!p) + goto out_inval; + if (p > end) + goto out_inval; + + rq_arg->head[0].iov_base = p; + hdr_len = (unsigned long)p - (unsigned long)rdma_argp; rq_arg->head[0].iov_len -= hdr_len; return hdr_len; + +out_short: + dprintk("svcrdma: header too short = %d\n", rq_arg->len); + return -EINVAL; + +out_version: + dprintk("svcrdma: bad xprt version: %u\n", + be32_to_cpup(rdma_argp + 1)); + return -EPROTONOSUPPORT; + +out_drop: + dprintk("svcrdma: dropping RDMA_DONE/ERROR message\n"); + return 0; + +out_proc: + dprintk("svcrdma: bad rdma procedure (%u)\n", + be32_to_cpup(rdma_argp + 3)); + return -EINVAL; + +out_inval: + dprintk("svcrdma: failed to parse transport header\n"); + return -EINVAL; } int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt, @@ -249,7 +175,7 @@ int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt, *va++ = rmsgp->rm_xid; *va++ = rmsgp->rm_vers; - *va++ = cpu_to_be32(xprt->sc_max_requests); + *va++ = xprt->sc_fc_credits; *va++ = rdma_error; *va++ = cpu_to_be32(err); if (err == ERR_VERS) { @@ -260,32 +186,35 @@ int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt, return (int)((unsigned long)va - (unsigned long)startp); } -int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *rmsgp) +/** + * svc_rdma_xdr_get_reply_hdr_length - Get length of Reply transport header + * @rdma_resp: buffer containing Reply transport header + * + * Returns length of transport header, in bytes. + */ +unsigned int svc_rdma_xdr_get_reply_hdr_len(__be32 *rdma_resp) { - struct rpcrdma_write_array *wr_ary; + unsigned int nsegs; + __be32 *p; - /* There is no read-list in a reply */ + p = rdma_resp; - /* skip write list */ - wr_ary = (struct rpcrdma_write_array *) - &rmsgp->rm_body.rm_chunks[1]; - if (wr_ary->wc_discrim) - wr_ary = (struct rpcrdma_write_array *) - &wr_ary->wc_array[be32_to_cpu(wr_ary->wc_nchunks)]. - wc_target.rs_length; - else - wr_ary = (struct rpcrdma_write_array *) - &wr_ary->wc_nchunks; - - /* skip reply array */ - if (wr_ary->wc_discrim) - wr_ary = (struct rpcrdma_write_array *) - &wr_ary->wc_array[be32_to_cpu(wr_ary->wc_nchunks)]; - else - wr_ary = (struct rpcrdma_write_array *) - &wr_ary->wc_nchunks; - - return (unsigned long) wr_ary - (unsigned long) rmsgp; + /* RPC-over-RDMA V1 replies never have a Read list. */ + p += rpcrdma_fixed_maxsz + 1; + + /* Skip Write list. */ + while (*p++ != xdr_zero) { + nsegs = be32_to_cpup(p++); + p += nsegs * rpcrdma_segment_maxsz; + } + + /* Skip Reply chunk. */ + if (*p++ != xdr_zero) { + nsegs = be32_to_cpup(p++); + p += nsegs * rpcrdma_segment_maxsz; + } + + return (unsigned long)p - (unsigned long)rdma_resp; } void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *rmsgp, int chunks) @@ -326,19 +255,3 @@ void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *ary, seg->rs_offset = rs_offset; seg->rs_length = cpu_to_be32(write_len); } - -void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *xprt, - struct rpcrdma_msg *rdma_argp, - struct rpcrdma_msg *rdma_resp, - enum rpcrdma_proc rdma_type) -{ - rdma_resp->rm_xid = rdma_argp->rm_xid; - rdma_resp->rm_vers = rdma_argp->rm_vers; - rdma_resp->rm_credit = cpu_to_be32(xprt->sc_max_requests); - rdma_resp->rm_type = cpu_to_be32(rdma_type); - - /* Encode <nul> chunks lists */ - rdma_resp->rm_body.rm_chunks[0] = xdr_zero; - rdma_resp->rm_body.rm_chunks[1] = xdr_zero; - rdma_resp->rm_body.rm_chunks[2] = xdr_zero; -} diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 172b537f8cfc..f7b2daf72a86 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -606,26 +606,24 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) dprintk("svcrdma: rqstp=%p\n", rqstp); - spin_lock_bh(&rdma_xprt->sc_rq_dto_lock); + spin_lock(&rdma_xprt->sc_rq_dto_lock); if (!list_empty(&rdma_xprt->sc_read_complete_q)) { - ctxt = list_entry(rdma_xprt->sc_read_complete_q.next, - struct svc_rdma_op_ctxt, - dto_q); - list_del_init(&ctxt->dto_q); - spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock); + ctxt = list_first_entry(&rdma_xprt->sc_read_complete_q, + struct svc_rdma_op_ctxt, list); + list_del(&ctxt->list); + spin_unlock(&rdma_xprt->sc_rq_dto_lock); rdma_read_complete(rqstp, ctxt); goto complete; } else if (!list_empty(&rdma_xprt->sc_rq_dto_q)) { - ctxt = list_entry(rdma_xprt->sc_rq_dto_q.next, - struct svc_rdma_op_ctxt, - dto_q); - list_del_init(&ctxt->dto_q); + ctxt = list_first_entry(&rdma_xprt->sc_rq_dto_q, + struct svc_rdma_op_ctxt, list); + list_del(&ctxt->list); } else { atomic_inc(&rdma_stat_rq_starve); clear_bit(XPT_DATA, &xprt->xpt_flags); ctxt = NULL; } - spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock); + spin_unlock(&rdma_xprt->sc_rq_dto_lock); if (!ctxt) { /* This is the EAGAIN path. The svc_recv routine will * return -EAGAIN, the nfsd thread will go to call into diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index ad4d286a83c5..515221b16d09 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -476,7 +476,8 @@ static int send_reply(struct svcxprt_rdma *rdma, /* Prepare the SGE for the RPCRDMA Header */ ctxt->sge[0].lkey = rdma->sc_pd->local_dma_lkey; - ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp); + ctxt->sge[0].length = + svc_rdma_xdr_get_reply_hdr_len((__be32 *)rdma_resp); ctxt->sge[0].addr = ib_dma_map_page(rdma->sc_cm_id->device, page, 0, ctxt->sge[0].length, DMA_TO_DEVICE); @@ -559,12 +560,12 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) struct rpcrdma_msg *rdma_argp; struct rpcrdma_msg *rdma_resp; struct rpcrdma_write_array *wr_ary, *rp_ary; - enum rpcrdma_proc reply_type; int ret; int inline_bytes; struct page *res_page; struct svc_rdma_req_map *vec; u32 inv_rkey; + __be32 *p; dprintk("svcrdma: sending response for rqstp=%p\n", rqstp); @@ -596,12 +597,17 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) if (!res_page) goto err0; rdma_resp = page_address(res_page); - if (rp_ary) - reply_type = RDMA_NOMSG; - else - reply_type = RDMA_MSG; - svc_rdma_xdr_encode_reply_header(rdma, rdma_argp, - rdma_resp, reply_type); + + p = &rdma_resp->rm_xid; + *p++ = rdma_argp->rm_xid; + *p++ = rdma_argp->rm_vers; + *p++ = rdma->sc_fc_credits; + *p++ = rp_ary ? rdma_nomsg : rdma_msg; + + /* Start with empty chunks */ + *p++ = xdr_zero; + *p++ = xdr_zero; + *p = xdr_zero; /* Send any write-chunk data and build resp write-list */ if (wr_ary) { diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 39652d390a9c..c13a5c35ce14 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -157,8 +157,7 @@ static struct svc_rdma_op_ctxt *alloc_ctxt(struct svcxprt_rdma *xprt, ctxt = kmalloc(sizeof(*ctxt), flags); if (ctxt) { ctxt->xprt = xprt; - INIT_LIST_HEAD(&ctxt->free); - INIT_LIST_HEAD(&ctxt->dto_q); + INIT_LIST_HEAD(&ctxt->list); } return ctxt; } @@ -180,7 +179,7 @@ static bool svc_rdma_prealloc_ctxts(struct svcxprt_rdma *xprt) dprintk("svcrdma: No memory for RDMA ctxt\n"); return false; } - list_add(&ctxt->free, &xprt->sc_ctxts); + list_add(&ctxt->list, &xprt->sc_ctxts); } return true; } @@ -189,15 +188,15 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) { struct svc_rdma_op_ctxt *ctxt = NULL; - spin_lock_bh(&xprt->sc_ctxt_lock); + spin_lock(&xprt->sc_ctxt_lock); xprt->sc_ctxt_used++; if (list_empty(&xprt->sc_ctxts)) goto out_empty; ctxt = list_first_entry(&xprt->sc_ctxts, - struct svc_rdma_op_ctxt, free); - list_del_init(&ctxt->free); - spin_unlock_bh(&xprt->sc_ctxt_lock); + struct svc_rdma_op_ctxt, list); + list_del(&ctxt->list); + spin_unlock(&xprt->sc_ctxt_lock); out: ctxt->count = 0; @@ -209,15 +208,15 @@ out_empty: /* Either pre-allocation missed the mark, or send * queue accounting is broken. */ - spin_unlock_bh(&xprt->sc_ctxt_lock); + spin_unlock(&xprt->sc_ctxt_lock); ctxt = alloc_ctxt(xprt, GFP_NOIO); if (ctxt) goto out; - spin_lock_bh(&xprt->sc_ctxt_lock); + spin_lock(&xprt->sc_ctxt_lock); xprt->sc_ctxt_used--; - spin_unlock_bh(&xprt->sc_ctxt_lock); + spin_unlock(&xprt->sc_ctxt_lock); WARN_ONCE(1, "svcrdma: empty RDMA ctxt list?\n"); return NULL; } @@ -254,10 +253,10 @@ void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages) for (i = 0; i < ctxt->count; i++) put_page(ctxt->pages[i]); - spin_lock_bh(&xprt->sc_ctxt_lock); + spin_lock(&xprt->sc_ctxt_lock); xprt->sc_ctxt_used--; - list_add(&ctxt->free, &xprt->sc_ctxts); - spin_unlock_bh(&xprt->sc_ctxt_lock); + list_add(&ctxt->list, &xprt->sc_ctxts); + spin_unlock(&xprt->sc_ctxt_lock); } static void svc_rdma_destroy_ctxts(struct svcxprt_rdma *xprt) @@ -266,8 +265,8 @@ static void svc_rdma_destroy_ctxts(struct svcxprt_rdma *xprt) struct svc_rdma_op_ctxt *ctxt; ctxt = list_first_entry(&xprt->sc_ctxts, - struct svc_rdma_op_ctxt, free); - list_del(&ctxt->free); + struct svc_rdma_op_ctxt, list); + list_del(&ctxt->list); kfree(ctxt); } } @@ -404,7 +403,7 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) /* All wc fields are now known to be valid */ ctxt->byte_len = wc->byte_len; spin_lock(&xprt->sc_rq_dto_lock); - list_add_tail(&ctxt->dto_q, &xprt->sc_rq_dto_q); + list_add_tail(&ctxt->list, &xprt->sc_rq_dto_q); spin_unlock(&xprt->sc_rq_dto_lock); set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); @@ -525,7 +524,7 @@ void svc_rdma_wc_read(struct ib_cq *cq, struct ib_wc *wc) read_hdr = ctxt->read_hdr; spin_lock(&xprt->sc_rq_dto_lock); - list_add_tail(&read_hdr->dto_q, + list_add_tail(&read_hdr->list, &xprt->sc_read_complete_q); spin_unlock(&xprt->sc_rq_dto_lock); @@ -557,7 +556,6 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv, return NULL; svc_xprt_init(&init_net, &svc_rdma_class, &cma_xprt->sc_xprt, serv); INIT_LIST_HEAD(&cma_xprt->sc_accept_q); - INIT_LIST_HEAD(&cma_xprt->sc_dto_q); INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q); INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q); INIT_LIST_HEAD(&cma_xprt->sc_frmr_q); @@ -571,6 +569,14 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv, spin_lock_init(&cma_xprt->sc_ctxt_lock); spin_lock_init(&cma_xprt->sc_map_lock); + /* + * Note that this implies that the underlying transport support + * has some form of congestion control (see RFC 7530 section 3.1 + * paragraph 2). For now, we assume that all supported RDMA + * transports are suitable here. + */ + set_bit(XPT_CONG_CTRL, &cma_xprt->sc_xprt.xpt_flags); + if (listener) set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags); @@ -923,14 +929,14 @@ struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma) { struct svc_rdma_fastreg_mr *frmr = NULL; - spin_lock_bh(&rdma->sc_frmr_q_lock); + spin_lock(&rdma->sc_frmr_q_lock); if (!list_empty(&rdma->sc_frmr_q)) { frmr = list_entry(rdma->sc_frmr_q.next, struct svc_rdma_fastreg_mr, frmr_list); list_del_init(&frmr->frmr_list); frmr->sg_nents = 0; } - spin_unlock_bh(&rdma->sc_frmr_q_lock); + spin_unlock(&rdma->sc_frmr_q_lock); if (frmr) return frmr; @@ -943,10 +949,10 @@ void svc_rdma_put_frmr(struct svcxprt_rdma *rdma, if (frmr) { ib_dma_unmap_sg(rdma->sc_cm_id->device, frmr->sg, frmr->sg_nents, frmr->direction); - spin_lock_bh(&rdma->sc_frmr_q_lock); + spin_lock(&rdma->sc_frmr_q_lock); WARN_ON_ONCE(!list_empty(&frmr->frmr_list)); list_add(&frmr->frmr_list, &rdma->sc_frmr_q); - spin_unlock_bh(&rdma->sc_frmr_q_lock); + spin_unlock(&rdma->sc_frmr_q_lock); } } @@ -1002,6 +1008,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) newxprt->sc_max_req_size = svcrdma_max_req_size; newxprt->sc_max_requests = min_t(u32, dev->attrs.max_qp_wr, svcrdma_max_requests); + newxprt->sc_fc_credits = cpu_to_be32(newxprt->sc_max_requests); newxprt->sc_max_bc_requests = min_t(u32, dev->attrs.max_qp_wr, svcrdma_max_bc_requests); newxprt->sc_rq_depth = newxprt->sc_max_requests + @@ -1027,13 +1034,13 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) goto errout; } newxprt->sc_sq_cq = ib_alloc_cq(dev, newxprt, newxprt->sc_sq_depth, - 0, IB_POLL_SOFTIRQ); + 0, IB_POLL_WORKQUEUE); if (IS_ERR(newxprt->sc_sq_cq)) { dprintk("svcrdma: error creating SQ CQ for connect request\n"); goto errout; } newxprt->sc_rq_cq = ib_alloc_cq(dev, newxprt, newxprt->sc_rq_depth, - 0, IB_POLL_SOFTIRQ); + 0, IB_POLL_WORKQUEUE); if (IS_ERR(newxprt->sc_rq_cq)) { dprintk("svcrdma: error creating RQ CQ for connect request\n"); goto errout; @@ -1213,20 +1220,18 @@ static void __svc_rdma_free(struct work_struct *work) */ while (!list_empty(&rdma->sc_read_complete_q)) { struct svc_rdma_op_ctxt *ctxt; - ctxt = list_entry(rdma->sc_read_complete_q.next, - struct svc_rdma_op_ctxt, - dto_q); - list_del_init(&ctxt->dto_q); + ctxt = list_first_entry(&rdma->sc_read_complete_q, + struct svc_rdma_op_ctxt, list); + list_del(&ctxt->list); svc_rdma_put_context(ctxt, 1); } /* Destroy queued, but not processed recv completions */ while (!list_empty(&rdma->sc_rq_dto_q)) { struct svc_rdma_op_ctxt *ctxt; - ctxt = list_entry(rdma->sc_rq_dto_q.next, - struct svc_rdma_op_ctxt, - dto_q); - list_del_init(&ctxt->dto_q); + ctxt = list_first_entry(&rdma->sc_rq_dto_q, + struct svc_rdma_op_ctxt, list); + list_del(&ctxt->list); svc_rdma_put_context(ctxt, 1); } diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 534c178d2a7e..c717f5410776 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -67,7 +67,7 @@ unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; static unsigned int xprt_rdma_inline_write_padding; static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR; - int xprt_rdma_pad_optimize = 1; + int xprt_rdma_pad_optimize = 0; #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) @@ -709,10 +709,6 @@ xprt_rdma_send_request(struct rpc_task *task) return 0; failed_marshal: - dprintk("RPC: %s: rpcrdma_marshal_req failed, status %i\n", - __func__, rc); - if (rc == -EIO) - r_xprt->rx_stats.failed_marshal_count++; if (rc != -ENOTCONN) return rc; drop_connection: diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 11d07748f699..81cd31acf690 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -54,6 +54,7 @@ #include <linux/sunrpc/svc_rdma.h> #include <asm/bitops.h> #include <linux/module.h> /* try_module_get()/module_put() */ +#include <rdma/ib_cm.h> #include "xprt_rdma.h" @@ -208,6 +209,7 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt, /* Default settings for RPC-over-RDMA Version One */ r_xprt->rx_ia.ri_reminv_expected = false; + r_xprt->rx_ia.ri_implicit_roundup = xprt_rdma_pad_optimize; rsize = RPCRDMA_V1_DEF_INLINE_SIZE; wsize = RPCRDMA_V1_DEF_INLINE_SIZE; @@ -215,6 +217,7 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt, pmsg->cp_magic == rpcrdma_cmp_magic && pmsg->cp_version == RPCRDMA_CMP_VERSION) { r_xprt->rx_ia.ri_reminv_expected = true; + r_xprt->rx_ia.ri_implicit_roundup = true; rsize = rpcrdma_decode_buffer_size(pmsg->cp_send_size); wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size); } @@ -277,7 +280,14 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) connstate = -ENETDOWN; goto connected; case RDMA_CM_EVENT_REJECTED: +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) + pr_info("rpcrdma: connection to %pIS:%u on %s rejected: %s\n", + sap, rpc_get_port(sap), ia->ri_device->name, + rdma_reject_msg(id, event->status)); +#endif connstate = -ECONNREFUSED; + if (event->status == IB_CM_REJ_STALE_CONN) + connstate = -EAGAIN; goto connected; case RDMA_CM_EVENT_DISCONNECTED: connstate = -ECONNABORTED; @@ -486,18 +496,19 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia) */ int rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, - struct rpcrdma_create_data_internal *cdata) + struct rpcrdma_create_data_internal *cdata) { struct rpcrdma_connect_private *pmsg = &ep->rep_cm_private; + unsigned int max_qp_wr, max_sge; struct ib_cq *sendcq, *recvcq; - unsigned int max_qp_wr; int rc; - if (ia->ri_device->attrs.max_sge < RPCRDMA_MAX_SEND_SGES) { - dprintk("RPC: %s: insufficient sge's available\n", - __func__); + max_sge = min(ia->ri_device->attrs.max_sge, RPCRDMA_MAX_SEND_SGES); + if (max_sge < RPCRDMA_MIN_SEND_SGES) { + pr_warn("rpcrdma: HCA provides only %d send SGEs\n", max_sge); return -ENOMEM; } + ia->ri_max_send_sges = max_sge - RPCRDMA_MIN_SEND_SGES; if (ia->ri_device->attrs.max_qp_wr <= RPCRDMA_BACKWARD_WRS) { dprintk("RPC: %s: insufficient wqe's available\n", @@ -522,7 +533,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, ep->rep_attr.cap.max_recv_wr = cdata->max_requests; ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS; ep->rep_attr.cap.max_recv_wr += 1; /* drain cqe */ - ep->rep_attr.cap.max_send_sge = RPCRDMA_MAX_SEND_SGES; + ep->rep_attr.cap.max_send_sge = max_sge; ep->rep_attr.cap.max_recv_sge = 1; ep->rep_attr.cap.max_inline_data = 0; ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR; @@ -640,20 +651,21 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) int rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) { + struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt, + rx_ia); struct rdma_cm_id *id, *old; + struct sockaddr *sap; + unsigned int extras; int rc = 0; - int retry_count = 0; if (ep->rep_connected != 0) { - struct rpcrdma_xprt *xprt; retry: dprintk("RPC: %s: reconnecting...\n", __func__); rpcrdma_ep_disconnect(ep, ia); - xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); - id = rpcrdma_create_id(xprt, ia, - (struct sockaddr *)&xprt->rx_data.addr); + sap = (struct sockaddr *)&r_xprt->rx_data.addr; + id = rpcrdma_create_id(r_xprt, ia, sap); if (IS_ERR(id)) { rc = -EHOSTUNREACH; goto out; @@ -708,51 +720,18 @@ retry: } wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0); - - /* - * Check state. A non-peer reject indicates no listener - * (ECONNREFUSED), which may be a transient state. All - * others indicate a transport condition which has already - * undergone a best-effort. - */ - if (ep->rep_connected == -ECONNREFUSED && - ++retry_count <= RDMA_CONNECT_RETRY_MAX) { - dprintk("RPC: %s: non-peer_reject, retry\n", __func__); - goto retry; - } if (ep->rep_connected <= 0) { - /* Sometimes, the only way to reliably connect to remote - * CMs is to use same nonzero values for ORD and IRD. */ - if (retry_count++ <= RDMA_CONNECT_RETRY_MAX + 1 && - (ep->rep_remote_cma.responder_resources == 0 || - ep->rep_remote_cma.initiator_depth != - ep->rep_remote_cma.responder_resources)) { - if (ep->rep_remote_cma.responder_resources == 0) - ep->rep_remote_cma.responder_resources = 1; - ep->rep_remote_cma.initiator_depth = - ep->rep_remote_cma.responder_resources; + if (ep->rep_connected == -EAGAIN) goto retry; - } rc = ep->rep_connected; - } else { - struct rpcrdma_xprt *r_xprt; - unsigned int extras; - - dprintk("RPC: %s: connected\n", __func__); - - r_xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); - extras = r_xprt->rx_buf.rb_bc_srv_max_requests; - - if (extras) { - rc = rpcrdma_ep_post_extra_recv(r_xprt, extras); - if (rc) { - pr_warn("%s: rpcrdma_ep_post_extra_recv: %i\n", - __func__, rc); - rc = 0; - } - } + goto out; } + dprintk("RPC: %s: connected\n", __func__); + extras = r_xprt->rx_buf.rb_bc_srv_max_requests; + if (extras) + rpcrdma_ep_post_extra_recv(r_xprt, extras); + out: if (rc) ep->rep_connected = rc; @@ -797,9 +776,7 @@ rpcrdma_mr_recovery_worker(struct work_struct *work) spin_lock(&buf->rb_recovery_lock); while (!list_empty(&buf->rb_stale_mrs)) { - mw = list_first_entry(&buf->rb_stale_mrs, - struct rpcrdma_mw, mw_list); - list_del_init(&mw->mw_list); + mw = rpcrdma_pop_mw(&buf->rb_stale_mrs); spin_unlock(&buf->rb_recovery_lock); dprintk("RPC: %s: recovering MR %p\n", __func__, mw); @@ -817,7 +794,7 @@ rpcrdma_defer_mr_recovery(struct rpcrdma_mw *mw) struct rpcrdma_buffer *buf = &r_xprt->rx_buf; spin_lock(&buf->rb_recovery_lock); - list_add(&mw->mw_list, &buf->rb_stale_mrs); + rpcrdma_push_mw(mw, &buf->rb_stale_mrs); spin_unlock(&buf->rb_recovery_lock); schedule_delayed_work(&buf->rb_recovery_worker, 0); @@ -1093,11 +1070,8 @@ rpcrdma_get_mw(struct rpcrdma_xprt *r_xprt) struct rpcrdma_mw *mw = NULL; spin_lock(&buf->rb_mwlock); - if (!list_empty(&buf->rb_mws)) { - mw = list_first_entry(&buf->rb_mws, - struct rpcrdma_mw, mw_list); - list_del_init(&mw->mw_list); - } + if (!list_empty(&buf->rb_mws)) + mw = rpcrdma_pop_mw(&buf->rb_mws); spin_unlock(&buf->rb_mwlock); if (!mw) @@ -1120,7 +1094,7 @@ rpcrdma_put_mw(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw) struct rpcrdma_buffer *buf = &r_xprt->rx_buf; spin_lock(&buf->rb_mwlock); - list_add_tail(&mw->mw_list, &buf->rb_mws); + rpcrdma_push_mw(mw, &buf->rb_mws); spin_unlock(&buf->rb_mwlock); } diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index e35efd4ac1e4..171a35116de9 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -74,7 +74,9 @@ struct rpcrdma_ia { unsigned int ri_max_frmr_depth; unsigned int ri_max_inline_write; unsigned int ri_max_inline_read; + unsigned int ri_max_send_sges; bool ri_reminv_expected; + bool ri_implicit_roundup; enum ib_mr_type ri_mrtype; struct ib_qp_attr ri_qp_attr; struct ib_qp_init_attr ri_qp_init_attr; @@ -303,15 +305,19 @@ struct rpcrdma_mr_seg { /* chunk descriptors */ char *mr_offset; /* kva if no page, else offset */ }; -/* Reserve enough Send SGEs to send a maximum size inline request: +/* The Send SGE array is provisioned to send a maximum size + * inline request: * - RPC-over-RDMA header * - xdr_buf head iovec - * - RPCRDMA_MAX_INLINE bytes, possibly unaligned, in pages + * - RPCRDMA_MAX_INLINE bytes, in pages * - xdr_buf tail iovec + * + * The actual number of array elements consumed by each RPC + * depends on the device's max_sge limit. */ enum { - RPCRDMA_MAX_SEND_PAGES = PAGE_SIZE + RPCRDMA_MAX_INLINE - 1, - RPCRDMA_MAX_PAGE_SGES = (RPCRDMA_MAX_SEND_PAGES >> PAGE_SHIFT) + 1, + RPCRDMA_MIN_SEND_SGES = 3, + RPCRDMA_MAX_PAGE_SGES = RPCRDMA_MAX_INLINE >> PAGE_SHIFT, RPCRDMA_MAX_SEND_SGES = 1 + 1 + RPCRDMA_MAX_PAGE_SGES + 1, }; @@ -348,6 +354,22 @@ rpcr_to_rdmar(struct rpc_rqst *rqst) return rqst->rq_xprtdata; } +static inline void +rpcrdma_push_mw(struct rpcrdma_mw *mw, struct list_head *list) +{ + list_add_tail(&mw->mw_list, list); +} + +static inline struct rpcrdma_mw * +rpcrdma_pop_mw(struct list_head *list) +{ + struct rpcrdma_mw *mw; + + mw = list_first_entry(list, struct rpcrdma_mw, mw_list); + list_del(&mw->mw_list); + return mw; +} + /* * struct rpcrdma_buffer -- holds list/queue of pre-registered memory for * inline requests/replies, and client/server credits. diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index af392d9b9cec..16aff8ddc16f 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -52,6 +52,8 @@ #include "sunrpc.h" static void xs_close(struct rpc_xprt *xprt); +static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt, + struct socket *sock); /* * xprtsock tunables @@ -666,6 +668,9 @@ static int xs_tcp_send_request(struct rpc_task *task) if (task->tk_flags & RPC_TASK_SENT) zerocopy = false; + if (test_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state)) + xs_tcp_set_socket_timeouts(xprt, transport->sock); + /* Continue transmitting the packet/record. We must be careful * to cope with writespace callbacks arriving _after_ we have * called sendmsg(). */ @@ -1188,7 +1193,7 @@ static inline void xs_tcp_read_xid(struct sock_xprt *transport, struct xdr_skb_r char *p; len = sizeof(transport->tcp_xid) - transport->tcp_offset; - dprintk("RPC: reading XID (%Zu bytes)\n", len); + dprintk("RPC: reading XID (%zu bytes)\n", len); p = ((char *) &transport->tcp_xid) + transport->tcp_offset; used = xdr_skb_read_bits(desc, p, len); transport->tcp_offset += used; @@ -1219,7 +1224,7 @@ static inline void xs_tcp_read_calldir(struct sock_xprt *transport, */ offset = transport->tcp_offset - sizeof(transport->tcp_xid); len = sizeof(transport->tcp_calldir) - offset; - dprintk("RPC: reading CALL/REPLY flag (%Zu bytes)\n", len); + dprintk("RPC: reading CALL/REPLY flag (%zu bytes)\n", len); p = ((char *) &transport->tcp_calldir) + offset; used = xdr_skb_read_bits(desc, p, len); transport->tcp_offset += used; @@ -1310,7 +1315,7 @@ static inline void xs_tcp_read_common(struct rpc_xprt *xprt, return; } - dprintk("RPC: XID %08x read %Zd bytes\n", + dprintk("RPC: XID %08x read %zd bytes\n", ntohl(transport->tcp_xid), r); dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, " "tcp_reclen = %u\n", xprt, transport->tcp_copied, @@ -1456,7 +1461,7 @@ static inline void xs_tcp_read_discard(struct sock_xprt *transport, struct xdr_s desc->count -= len; desc->offset += len; transport->tcp_offset += len; - dprintk("RPC: discarded %Zu bytes\n", len); + dprintk("RPC: discarded %zu bytes\n", len); xs_tcp_check_fraghdr(transport); } @@ -1734,7 +1739,9 @@ static void xs_udp_set_buffer_size(struct rpc_xprt *xprt, size_t sndsize, size_t */ static void xs_udp_timer(struct rpc_xprt *xprt, struct rpc_task *task) { + spin_lock_bh(&xprt->transport_lock); xprt_adjust_cwnd(xprt, task, -ETIMEDOUT); + spin_unlock_bh(&xprt->transport_lock); } static unsigned short xs_get_random_port(void) @@ -2235,6 +2242,66 @@ static void xs_tcp_shutdown(struct rpc_xprt *xprt) xs_reset_transport(transport); } +static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt, + struct socket *sock) +{ + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + unsigned int keepidle; + unsigned int keepcnt; + unsigned int opt_on = 1; + unsigned int timeo; + + spin_lock_bh(&xprt->transport_lock); + keepidle = DIV_ROUND_UP(xprt->timeout->to_initval, HZ); + keepcnt = xprt->timeout->to_retries + 1; + timeo = jiffies_to_msecs(xprt->timeout->to_initval) * + (xprt->timeout->to_retries + 1); + clear_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state); + spin_unlock_bh(&xprt->transport_lock); + + /* TCP Keepalive options */ + kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, + (char *)&opt_on, sizeof(opt_on)); + kernel_setsockopt(sock, SOL_TCP, TCP_KEEPIDLE, + (char *)&keepidle, sizeof(keepidle)); + kernel_setsockopt(sock, SOL_TCP, TCP_KEEPINTVL, + (char *)&keepidle, sizeof(keepidle)); + kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT, + (char *)&keepcnt, sizeof(keepcnt)); + + /* TCP user timeout (see RFC5482) */ + kernel_setsockopt(sock, SOL_TCP, TCP_USER_TIMEOUT, + (char *)&timeo, sizeof(timeo)); +} + +static void xs_tcp_set_connect_timeout(struct rpc_xprt *xprt, + unsigned long connect_timeout, + unsigned long reconnect_timeout) +{ + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + struct rpc_timeout to; + unsigned long initval; + + spin_lock_bh(&xprt->transport_lock); + if (reconnect_timeout < xprt->max_reconnect_timeout) + xprt->max_reconnect_timeout = reconnect_timeout; + if (connect_timeout < xprt->connect_timeout) { + memcpy(&to, xprt->timeout, sizeof(to)); + initval = DIV_ROUND_UP(connect_timeout, to.to_retries + 1); + /* Arbitrary lower limit */ + if (initval < XS_TCP_INIT_REEST_TO << 1) + initval = XS_TCP_INIT_REEST_TO << 1; + to.to_initval = initval; + to.to_maxval = initval; + memcpy(&transport->tcp_timeout, &to, + sizeof(transport->tcp_timeout)); + xprt->timeout = &transport->tcp_timeout; + xprt->connect_timeout = connect_timeout; + } + set_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state); + spin_unlock_bh(&xprt->transport_lock); +} + static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); @@ -2242,22 +2309,8 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) if (!transport->inet) { struct sock *sk = sock->sk; - unsigned int keepidle = xprt->timeout->to_initval / HZ; - unsigned int keepcnt = xprt->timeout->to_retries + 1; - unsigned int opt_on = 1; - unsigned int timeo; unsigned int addr_pref = IPV6_PREFER_SRC_PUBLIC; - /* TCP Keepalive options */ - kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, - (char *)&opt_on, sizeof(opt_on)); - kernel_setsockopt(sock, SOL_TCP, TCP_KEEPIDLE, - (char *)&keepidle, sizeof(keepidle)); - kernel_setsockopt(sock, SOL_TCP, TCP_KEEPINTVL, - (char *)&keepidle, sizeof(keepidle)); - kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT, - (char *)&keepcnt, sizeof(keepcnt)); - /* Avoid temporary address, they are bad for long-lived * connections such as NFS mounts. * RFC4941, section 3.6 suggests that: @@ -2268,11 +2321,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) kernel_setsockopt(sock, SOL_IPV6, IPV6_ADDR_PREFERENCES, (char *)&addr_pref, sizeof(addr_pref)); - /* TCP user timeout (see RFC5482) */ - timeo = jiffies_to_msecs(xprt->timeout->to_initval) * - (xprt->timeout->to_retries + 1); - kernel_setsockopt(sock, SOL_TCP, TCP_USER_TIMEOUT, - (char *)&timeo, sizeof(timeo)); + xs_tcp_set_socket_timeouts(xprt, sock); write_lock_bh(&sk->sk_callback_lock); @@ -2721,6 +2770,7 @@ static struct rpc_xprt_ops xs_tcp_ops = { .set_retrans_timeout = xprt_set_retrans_timeout_def, .close = xs_tcp_shutdown, .destroy = xs_destroy, + .set_connect_timeout = xs_tcp_set_connect_timeout, .print_stats = xs_tcp_print_stats, .enable_swap = xs_enable_swap, .disable_swap = xs_disable_swap, @@ -3007,6 +3057,8 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) xprt->timeout = &xs_tcp_default_timeout; xprt->max_reconnect_timeout = xprt->timeout->to_maxval; + xprt->connect_timeout = xprt->timeout->to_initval * + (xprt->timeout->to_retries + 1); INIT_WORK(&transport->recv_worker, xs_tcp_data_receive_workfn); INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_setup_socket); @@ -3209,7 +3261,9 @@ static int param_set_uint_minmax(const char *val, if (!val) return -EINVAL; ret = kstrtouint(val, 0, &num); - if (ret == -EINVAL || num < min || num > max) + if (ret) + return ret; + if (num < min || num > max) return -EINVAL; *((unsigned int *)kp->arg) = num; return 0; diff --git a/net/tipc/node.c b/net/tipc/node.c index e9295fa3a554..4512e83652b1 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1505,19 +1505,21 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) { struct sk_buff_head xmitq; struct tipc_node *n; - struct tipc_msg *hdr = buf_msg(skb); - int usr = msg_user(hdr); + struct tipc_msg *hdr; int bearer_id = b->identity; struct tipc_link_entry *le; - u16 bc_ack = msg_bcast_ack(hdr); u32 self = tipc_own_addr(net); - int rc = 0; + int usr, rc = 0; + u16 bc_ack; __skb_queue_head_init(&xmitq); - /* Ensure message is well-formed */ + /* Ensure message is well-formed before touching the header */ if (unlikely(!tipc_msg_validate(skb))) goto discard; + hdr = buf_msg(skb); + usr = msg_user(hdr); + bc_ack = msg_bcast_ack(hdr); /* Handle arrival of discovery or broadcast packet */ if (unlikely(msg_non_seq(hdr))) { diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 6b09a778cc71..43e4045e72bc 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -35,6 +35,8 @@ */ #include <linux/rhashtable.h> +#include <linux/sched/signal.h> + #include "core.h" #include "name_table.h" #include "node.h" diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index e2d18b9f910f..ee37b390260a 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -85,7 +85,7 @@ #include <linux/module.h> #include <linux/kernel.h> #include <linux/signal.h> -#include <linux/sched.h> +#include <linux/sched/signal.h> #include <linux/errno.h> #include <linux/string.h> #include <linux/stat.h> diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 8a398b3fb532..9192ead66751 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -90,6 +90,7 @@ #include <linux/init.h> #include <linux/io.h> #include <linux/kernel.h> +#include <linux/sched/signal.h> #include <linux/kmod.h> #include <linux/list.h> #include <linux/miscdevice.h> diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 6788264acc63..9d24c0e958b1 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -532,7 +532,8 @@ static int virtio_vsock_probe(struct virtio_device *vdev) vsock->vdev = vdev; ret = vsock->vdev->config->find_vqs(vsock->vdev, VSOCK_VQ_MAX, - vsock->vqs, callbacks, names); + vsock->vqs, callbacks, names, + NULL); if (ret < 0) goto out; diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 849c4ad0411e..8d592a45b597 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -9,6 +9,7 @@ */ #include <linux/spinlock.h> #include <linux/module.h> +#include <linux/sched/signal.h> #include <linux/ctype.h> #include <linux/list.h> #include <linux/virtio.h> diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 079c883aa96e..fd28a49dbe8f 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -41,7 +41,7 @@ #include <linux/capability.h> #include <linux/errno.h> #include <linux/kernel.h> -#include <linux/sched.h> +#include <linux/sched/signal.h> #include <linux/timer.h> #include <linux/string.h> #include <linux/net.h> diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 5f3e87866438..0806dccdf507 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2836,14 +2836,8 @@ static unsigned int xfrm_mtu(const struct dst_entry *dst) return mtu ? : dst_mtu(dst->path); } -static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst, - struct sk_buff *skb, - const void *daddr) -{ - return dst->path->ops->neigh_lookup(dst, skb, daddr); -} - -static void xfrm_confirm_neigh(const struct dst_entry *dst, const void *daddr) +static const void *xfrm_get_dst_nexthop(const struct dst_entry *dst, + const void *daddr) { const struct dst_entry *path = dst->path; @@ -2857,6 +2851,25 @@ static void xfrm_confirm_neigh(const struct dst_entry *dst, const void *daddr) else if (!(xfrm->type->flags & XFRM_TYPE_LOCAL_COADDR)) daddr = &xfrm->id.daddr; } + return daddr; +} + +static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst, + struct sk_buff *skb, + const void *daddr) +{ + const struct dst_entry *path = dst->path; + + if (!skb) + daddr = xfrm_get_dst_nexthop(dst, daddr); + return path->ops->neigh_lookup(path, skb, daddr); +} + +static void xfrm_confirm_neigh(const struct dst_entry *dst, const void *daddr) +{ + const struct dst_entry *path = dst->path; + + daddr = xfrm_get_dst_nexthop(dst, daddr); path->ops->confirm_neigh(path, daddr); } |