diff options
Diffstat (limited to 'net')
172 files changed, 4109 insertions, 1721 deletions
diff --git a/net/9p/client.c b/net/9p/client.c index dd43a8289b0d..5bf5f227dbe0 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -60,9 +60,9 @@ static struct p9_req_t * p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...); /** - * v9fs_parse_options - parse mount options into session structure - * @options: options string passed from mount - * @v9ses: existing v9fs session information + * parse_options - parse mount options into client structure + * @opts: options string passed from mount + * @clnt: existing v9fs client information * * Return 0 upon success, -ERRNO upon failure */ @@ -117,9 +117,6 @@ static int parse_opts(char *opts, struct p9_client *clnt) } } - if (!clnt->trans_mod) - clnt->trans_mod = v9fs_get_default_trans(); - kfree(options); return ret; } @@ -235,7 +232,7 @@ EXPORT_SYMBOL(p9_tag_lookup); /** * p9_tag_init - setup tags structure and contents - * @tags: tags structure from the client struct + * @c: v9fs client struct * * This initializes the tags structure for each client instance. * @@ -261,7 +258,7 @@ error: /** * p9_tag_cleanup - cleans up tags structure and reclaims resources - * @tags: tags structure from the client struct + * @c: v9fs client struct * * This frees resources associated with the tags structure * @@ -414,14 +411,9 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) if (c->dotu) err = -ecode; - if (!err) { + if (!err || !IS_ERR_VALUE(err)) err = p9_errstr2errno(ename, strlen(ename)); - /* string match failed */ - if (!err) - err = -ESERVERFAULT; - } - P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode, ename); kfree(ename); @@ -433,8 +425,8 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) /** * p9_client_flush - flush (cancel) a request - * c: client state - * req: request to cancel + * @c: client state + * @oldreq: request to cancel * * This sents a flush for a particular requests and links * the flush request to the original request. The current @@ -689,6 +681,9 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) if (err < 0) goto error; + if (!clnt->trans_mod) + clnt->trans_mod = v9fs_get_default_trans(); + if (clnt->trans_mod == NULL) { err = -EPROTONOSUPPORT; P9_DPRINTK(P9_DEBUG_ERROR, @@ -1098,7 +1093,6 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, if (data) { memmove(data, dataptr, count); - data += count; } if (udata) { @@ -1192,9 +1186,9 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid) err = p9pdu_readf(req->rc, clnt->dotu, "wS", &ignored, ret); if (err) { - ret = ERR_PTR(err); p9pdu_dump(1, req->rc); - goto free_and_error; + p9_free_req(clnt, req); + goto error; } P9_DPRINTK(P9_DEBUG_9P, @@ -1211,8 +1205,6 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid) p9_free_req(clnt, req); return ret; -free_and_error: - p9_free_req(clnt, req); error: kfree(ret); return ERR_PTR(err); diff --git a/net/9p/error.c b/net/9p/error.c index fdebe4314062..52518512a93e 100644 --- a/net/9p/error.c +++ b/net/9p/error.c @@ -239,7 +239,7 @@ int p9_errstr2errno(char *errstr, int len) errstr[len] = 0; printk(KERN_ERR "%s: server reported unknown error %s\n", __func__, errstr); - errno = 1; + errno = ESERVERFAULT; } return -errno; diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index a2a1814c7a8d..8d934dd7fd54 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -119,8 +119,8 @@ struct p9_poll_wait { * @wpos: write position for current frame * @wsize: amount of data to write for current frame * @wbuf: current write buffer + * @poll_pending_link: pending links to be polled per conn * @poll_wait: array of wait_q's for various worker threads - * @poll_waddr: ???? * @pt: poll state * @rq: current read work * @wq: current write work @@ -700,9 +700,9 @@ static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req) } /** - * parse_options - parse mount options into session structure - * @options: options string passed from mount - * @opts: transport-specific structure to parse options into + * parse_opts - parse mount options into p9_fd_opts structure + * @params: options string passed from mount + * @opts: fd transport-specific structure to parse options into * * Returns 0 upon success, -ERRNO upon failure */ @@ -735,12 +735,14 @@ static int parse_opts(char *params, struct p9_fd_opts *opts) if (!*p) continue; token = match_token(p, tokens, args); - r = match_int(&args[0], &option); - if (r < 0) { - P9_DPRINTK(P9_DEBUG_ERROR, - "integer field, but no integer?\n"); - ret = r; - continue; + if (token != Opt_err) { + r = match_int(&args[0], &option); + if (r < 0) { + P9_DPRINTK(P9_DEBUG_ERROR, + "integer field, but no integer?\n"); + ret = r; + continue; + } } switch (token) { case Opt_port: diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index ac4990041ebb..65cb29db03f8 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -67,14 +67,15 @@ * @pd: Protection Domain pointer * @qp: Queue Pair pointer * @cq: Completion Queue pointer + * @dm_mr: DMA Memory Region pointer * @lkey: The local access only memory region key * @timeout: Number of uSecs to wait for connection management events * @sq_depth: The depth of the Send Queue * @sq_sem: Semaphore for the SQ * @rq_depth: The depth of the Receive Queue. + * @rq_count: Count of requests in the Receive Queue. * @addr: The remote peer's address * @req_lock: Protects the active request list - * @send_wait: Wait list when the SQ fills up * @cm_done: Completion event for connection management tracking */ struct p9_trans_rdma { @@ -154,9 +155,9 @@ static match_table_t tokens = { }; /** - * parse_options - parse mount options into session structure - * @options: options string passed from mount - * @opts: transport-specific structure to parse options into + * parse_opts - parse mount options into rdma options structure + * @params: options string passed from mount + * @opts: rdma transport-specific structure to parse options into * * Returns 0 upon success, -ERRNO upon failure */ diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index a49484e67e1d..9bf0b737aa51 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -57,11 +57,9 @@ static int chan_index; * @initialized: whether the channel is initialized * @inuse: whether the channel is in use * @lock: protects multiple elements within this structure + * @client: client instance * @vdev: virtio dev associated with this channel * @vq: virtio queue associated with this channel - * @tagpool: accounting for tag ids (and request slots) - * @reqs: array of request slots - * @max_tag: current number of request_slots allocated * @sg: scatter gather list which is used to pack a request (protected?) * * We keep all per-channel information in a structure. @@ -92,7 +90,7 @@ static unsigned int rest_of_page(void *data) /** * p9_virtio_close - reclaim resources of a channel - * @trans: transport state + * @client: client instance * * This reclaims a channel by freeing its resources and * reseting its inuse flag. @@ -181,9 +179,8 @@ static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req) /** * p9_virtio_request - issue a request - * @t: transport state - * @tc: &p9_fcall request to transmit - * @rc: &p9_fcall to put reponse into + * @client: client instance issuing the request + * @req: request to be issued * */ diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c index fd8e0847b254..80caad1a31a5 100644 --- a/net/appletalk/atalk_proc.c +++ b/net/appletalk/atalk_proc.c @@ -204,8 +204,8 @@ static int atalk_seq_socket_show(struct seq_file *seq, void *v) "%02X %d\n", s->sk_type, ntohs(at->src_net), at->src_node, at->src_port, ntohs(at->dest_net), at->dest_node, at->dest_port, - atomic_read(&s->sk_wmem_alloc), - atomic_read(&s->sk_rmem_alloc), + sk_wmem_alloc_get(s), + sk_rmem_alloc_get(s), s->sk_state, SOCK_INODE(s->sk_socket)->i_uid); out: return 0; diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index b603cbacdc58..875eda5dbad7 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -54,6 +54,7 @@ #include <linux/capability.h> #include <linux/module.h> #include <linux/if_arp.h> +#include <linux/smp_lock.h> #include <linux/termios.h> /* For TIOCOUTQ/INQ */ #include <net/datalink.h> #include <net/psnap.h> @@ -162,8 +163,7 @@ static void atalk_destroy_timer(unsigned long data) { struct sock *sk = (struct sock *)data; - if (atomic_read(&sk->sk_wmem_alloc) || - atomic_read(&sk->sk_rmem_alloc)) { + if (sk_has_allocations(sk)) { sk->sk_timer.expires = jiffies + SOCK_DESTROY_TIME; add_timer(&sk->sk_timer); } else @@ -175,8 +175,7 @@ static inline void atalk_destroy_socket(struct sock *sk) atalk_remove_socket(sk); skb_queue_purge(&sk->sk_receive_queue); - if (atomic_read(&sk->sk_wmem_alloc) || - atomic_read(&sk->sk_rmem_alloc)) { + if (sk_has_allocations(sk)) { setup_timer(&sk->sk_timer, atalk_destroy_timer, (unsigned long)sk); sk->sk_timer.expires = jiffies + SOCK_DESTROY_TIME; @@ -1239,6 +1238,7 @@ static int atalk_getname(struct socket *sock, struct sockaddr *uaddr, return -ENOBUFS; *uaddr_len = sizeof(struct sockaddr_at); + memset(&sat.sat_zero, 0, sizeof(sat.sat_zero)); if (peer) { if (sk->sk_state != TCP_ESTABLISHED) @@ -1750,8 +1750,7 @@ static int atalk_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) switch (cmd) { /* Protocol layer */ case TIOCOUTQ: { - long amount = sk->sk_sndbuf - - atomic_read(&sk->sk_wmem_alloc); + long amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); if (amount < 0) amount = 0; diff --git a/net/atm/common.c b/net/atm/common.c index d34edbe754c8..8c4d843eb17f 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -62,15 +62,15 @@ static struct sk_buff *alloc_tx(struct atm_vcc *vcc,unsigned int size) struct sk_buff *skb; struct sock *sk = sk_atm(vcc); - if (atomic_read(&sk->sk_wmem_alloc) && !atm_may_send(vcc, size)) { + if (sk_wmem_alloc_get(sk) && !atm_may_send(vcc, size)) { pr_debug("Sorry: wmem_alloc = %d, size = %d, sndbuf = %d\n", - atomic_read(&sk->sk_wmem_alloc), size, + sk_wmem_alloc_get(sk), size, sk->sk_sndbuf); return NULL; } - while (!(skb = alloc_skb(size,GFP_KERNEL))) schedule(); - pr_debug("AlTx %d += %d\n", atomic_read(&sk->sk_wmem_alloc), - skb->truesize); + while (!(skb = alloc_skb(size, GFP_KERNEL))) + schedule(); + pr_debug("AlTx %d += %d\n", sk_wmem_alloc_get(sk), skb->truesize); atomic_add(skb->truesize, &sk->sk_wmem_alloc); return skb; } @@ -92,7 +92,7 @@ static void vcc_sock_destruct(struct sock *sk) static void vcc_def_wakeup(struct sock *sk) { read_lock(&sk->sk_callback_lock); - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) + if (sk_has_sleeper(sk)) wake_up(sk->sk_sleep); read_unlock(&sk->sk_callback_lock); } @@ -110,7 +110,7 @@ static void vcc_write_space(struct sock *sk) read_lock(&sk->sk_callback_lock); if (vcc_writable(sk)) { - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) + if (sk_has_sleeper(sk)) wake_up_interruptible(sk->sk_sleep); sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); @@ -145,7 +145,7 @@ int vcc_create(struct net *net, struct socket *sock, int protocol, int family) memset(&vcc->local,0,sizeof(struct sockaddr_atmsvc)); memset(&vcc->remote,0,sizeof(struct sockaddr_atmsvc)); vcc->qos.txtp.max_sdu = 1 << 16; /* for meta VCs */ - atomic_set(&sk->sk_wmem_alloc, 0); + atomic_set(&sk->sk_wmem_alloc, 1); atomic_set(&sk->sk_rmem_alloc, 0); vcc->push = NULL; vcc->pop = NULL; @@ -594,7 +594,7 @@ unsigned int vcc_poll(struct file *file, struct socket *sock, poll_table *wait) struct atm_vcc *vcc; unsigned int mask; - poll_wait(file, sk->sk_sleep, wait); + sock_poll_wait(file, sk->sk_sleep, wait); mask = 0; vcc = ATM_SD(sock); diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c index 76ed3c8d26e6..4da8892ced5f 100644 --- a/net/atm/ioctl.c +++ b/net/atm/ioctl.c @@ -63,8 +63,7 @@ static int do_vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg error = -EINVAL; goto done; } - error = put_user(sk->sk_sndbuf - - atomic_read(&sk->sk_wmem_alloc), + error = put_user(sk->sk_sndbuf - sk_wmem_alloc_get(sk), (int __user *) argp) ? -EFAULT : 0; goto done; case SIOCINQ: diff --git a/net/atm/proc.c b/net/atm/proc.c index e7b3b273907d..38de5ff61ecd 100644 --- a/net/atm/proc.c +++ b/net/atm/proc.c @@ -204,8 +204,8 @@ static void vcc_info(struct seq_file *seq, struct atm_vcc *vcc) seq_printf(seq, "%3d", sk->sk_family); } seq_printf(seq, " %04lx %5d %7d/%7d %7d/%7d [%d]\n", vcc->flags, sk->sk_err, - atomic_read(&sk->sk_wmem_alloc), sk->sk_sndbuf, - atomic_read(&sk->sk_rmem_alloc), sk->sk_rcvbuf, + sk_wmem_alloc_get(sk), sk->sk_sndbuf, + sk_rmem_alloc_get(sk), sk->sk_rcvbuf, atomic_read(&sk->sk_refcnt)); } diff --git a/net/atm/raw.c b/net/atm/raw.c index b0a2d8cb6744..cbfcc71a17b1 100644 --- a/net/atm/raw.c +++ b/net/atm/raw.c @@ -33,7 +33,7 @@ static void atm_pop_raw(struct atm_vcc *vcc,struct sk_buff *skb) struct sock *sk = sk_atm(vcc); pr_debug("APopR (%d) %d -= %d\n", vcc->vci, - atomic_read(&sk->sk_wmem_alloc), skb->truesize); + sk_wmem_alloc_get(sk), skb->truesize); atomic_sub(skb->truesize, &sk->sk_wmem_alloc); dev_kfree_skb_any(skb); sk->sk_write_space(sk); diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index fd9d06f291dc..da0f64f82b57 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -330,8 +330,7 @@ void ax25_destroy_socket(ax25_cb *ax25) } if (ax25->sk != NULL) { - if (atomic_read(&ax25->sk->sk_wmem_alloc) || - atomic_read(&ax25->sk->sk_rmem_alloc)) { + if (sk_has_allocations(ax25->sk)) { /* Defer: outstanding buffers */ setup_timer(&ax25->dtimer, ax25_destroy_timer, (unsigned long)ax25); @@ -1691,7 +1690,8 @@ static int ax25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) switch (cmd) { case TIOCOUTQ: { long amount; - amount = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc); + + amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); if (amount < 0) amount = 0; res = put_user(amount, (int __user *)argp); @@ -1781,8 +1781,8 @@ static int ax25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) ax25_info.idletimer = ax25_display_timer(&ax25->idletimer) / (60 * HZ); ax25_info.n2count = ax25->n2count; ax25_info.state = ax25->state; - ax25_info.rcv_q = atomic_read(&sk->sk_rmem_alloc); - ax25_info.snd_q = atomic_read(&sk->sk_wmem_alloc); + ax25_info.rcv_q = sk_wmem_alloc_get(sk); + ax25_info.snd_q = sk_rmem_alloc_get(sk); ax25_info.vs = ax25->vs; ax25_info.vr = ax25->vr; ax25_info.va = ax25->va; @@ -1922,8 +1922,8 @@ static int ax25_info_show(struct seq_file *seq, void *v) if (ax25->sk != NULL) { seq_printf(seq, " %d %d %lu\n", - atomic_read(&ax25->sk->sk_wmem_alloc), - atomic_read(&ax25->sk->sk_rmem_alloc), + sk_wmem_alloc_get(ax25->sk), + sk_rmem_alloc_get(ax25->sk), sock_i_ino(ax25->sk)); } else { seq_puts(seq, " * * *\n"); diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c index 5f1d2107a1dd..de56d3983de0 100644 --- a/net/ax25/ax25_in.c +++ b/net/ax25/ax25_in.c @@ -437,8 +437,7 @@ free: int ax25_kiss_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype, struct net_device *orig_dev) { - skb->sk = NULL; /* Initially we don't know who it's for */ - skb->destructor = NULL; /* Who initializes this, dammit?! */ + skb_orphan(skb); if (!net_eq(dev_net(dev), &init_net)) { kfree_skb(skb); diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 02b9baa1930b..0250e0600150 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -337,7 +337,7 @@ int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) if (sk->sk_state == BT_LISTEN) return -EINVAL; - amount = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc); + amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); if (amount < 0) amount = 0; err = put_user(amount, (int __user *) arg); diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index e50566ebf9f9..94b3388c188b 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -2080,28 +2080,41 @@ static CLASS_ATTR(rfcomm_dlc, S_IRUGO, rfcomm_dlc_sysfs_show, NULL); /* ---- Initialization ---- */ static int __init rfcomm_init(void) { + int ret; + l2cap_load(); hci_register_cb(&rfcomm_cb); rfcomm_thread = kthread_run(rfcomm_run, NULL, "krfcommd"); if (IS_ERR(rfcomm_thread)) { - hci_unregister_cb(&rfcomm_cb); - return PTR_ERR(rfcomm_thread); + ret = PTR_ERR(rfcomm_thread); + goto out_thread; } if (class_create_file(bt_class, &class_attr_rfcomm_dlc) < 0) BT_ERR("Failed to create RFCOMM info file"); - rfcomm_init_sockets(); + ret = rfcomm_init_ttys(); + if (ret) + goto out_tty; -#ifdef CONFIG_BT_RFCOMM_TTY - rfcomm_init_ttys(); -#endif + ret = rfcomm_init_sockets(); + if (ret) + goto out_sock; BT_INFO("RFCOMM ver %s", VERSION); return 0; + +out_sock: + rfcomm_cleanup_ttys(); +out_tty: + kthread_stop(rfcomm_thread); +out_thread: + hci_unregister_cb(&rfcomm_cb); + + return ret; } static void __exit rfcomm_exit(void) @@ -2112,9 +2125,7 @@ static void __exit rfcomm_exit(void) kthread_stop(rfcomm_thread); -#ifdef CONFIG_BT_RFCOMM_TTY rfcomm_cleanup_ttys(); -#endif rfcomm_cleanup_sockets(); } diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 7f482784e9f7..0b85e8116859 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -1132,7 +1132,7 @@ error: return err; } -void __exit rfcomm_cleanup_sockets(void) +void rfcomm_cleanup_sockets(void) { class_remove_file(bt_class, &class_attr_rfcomm); diff --git a/net/bridge/br.c b/net/bridge/br.c index 9aac5213105a..e1241c76239a 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -93,7 +93,7 @@ static void __exit br_deinit(void) unregister_pernet_subsys(&br_net_ops); - synchronize_net(); + rcu_barrier(); /* Wait for completion of call_rcu()'s */ br_netfilter_fini(); #if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 8a96672e2c5c..eb404dc3ed6e 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -424,7 +424,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) err2: br_fdb_delete_by_port(br, p, 1); err1: - kobject_del(&p->kobj); + kobject_put(&p->kobj); err0: dev_set_promiscuity(dev, -1); put_back: diff --git a/net/can/bcm.c b/net/can/bcm.c index 95d7f32643ae..72720c710351 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -75,6 +75,7 @@ static __initdata const char banner[] = KERN_INFO MODULE_DESCRIPTION("PF_CAN broadcast manager protocol"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Oliver Hartkopp <oliver.hartkopp@volkswagen.de>"); +MODULE_ALIAS("can-proto-2"); /* easy access to can_frame payload */ static inline u64 GET_U64(const struct can_frame *cp) @@ -1469,6 +1470,9 @@ static int bcm_release(struct socket *sock) bo->ifindex = 0; } + sock_orphan(sk); + sock->sk = NULL; + release_sock(sk); sock_put(sk); diff --git a/net/can/raw.c b/net/can/raw.c index 6aa154e806ae..db3152df7d2b 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -62,6 +62,7 @@ static __initdata const char banner[] = MODULE_DESCRIPTION("PF_CAN raw protocol"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Urs Thuermann <urs.thuermann@volkswagen.de>"); +MODULE_ALIAS("can-proto-1"); #define MASK_ALL 0 @@ -306,6 +307,9 @@ static int raw_release(struct socket *sock) ro->bound = 0; ro->count = 0; + sock_orphan(sk); + sock->sk = NULL; + release_sock(sk); sock_put(sk); @@ -397,6 +401,7 @@ static int raw_getname(struct socket *sock, struct sockaddr *uaddr, if (peer) return -EOPNOTSUPP; + memset(addr, 0, sizeof(*addr)); addr->can_family = AF_CAN; addr->can_ifindex = ro->ifindex; diff --git a/net/core/datagram.c b/net/core/datagram.c index 58abee1f1df1..b0fe69211eef 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -712,7 +712,7 @@ unsigned int datagram_poll(struct file *file, struct socket *sock, struct sock *sk = sock->sk; unsigned int mask; - poll_wait(file, sk->sk_sleep, wait); + sock_poll_wait(file, sk->sk_sleep, wait); mask = 0; /* exceptional events? */ diff --git a/net/core/dev.c b/net/core/dev.c index 576a61574a93..278d489aad3b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1031,7 +1031,7 @@ void dev_load(struct net *net, const char *name) dev = __dev_get_by_name(net, name); read_unlock(&dev_base_lock); - if (!dev && capable(CAP_SYS_MODULE)) + if (!dev && capable(CAP_NET_ADMIN)) request_module("%s", name); } @@ -2310,8 +2310,6 @@ ncls: if (!skb) goto out; - skb_orphan(skb); - type = skb->protocol; list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { @@ -2825,9 +2823,11 @@ static void net_rx_action(struct softirq_action *h) * move the instance around on the list at-will. */ if (unlikely(work == weight)) { - if (unlikely(napi_disable_pending(n))) - __napi_complete(n); - else + if (unlikely(napi_disable_pending(n))) { + local_irq_enable(); + napi_complete(n); + local_irq_disable(); + } else list_move_tail(&n->poll_list, list); } @@ -3461,10 +3461,10 @@ void __dev_set_rx_mode(struct net_device *dev) /* Unicast addresses changes may only happen under the rtnl, * therefore calling __dev_set_promiscuity here is safe. */ - if (dev->uc_count > 0 && !dev->uc_promisc) { + if (dev->uc.count > 0 && !dev->uc_promisc) { __dev_set_promiscuity(dev, 1); dev->uc_promisc = 1; - } else if (dev->uc_count == 0 && dev->uc_promisc) { + } else if (dev->uc.count == 0 && dev->uc_promisc) { __dev_set_promiscuity(dev, -1); dev->uc_promisc = 0; } @@ -3483,9 +3483,8 @@ void dev_set_rx_mode(struct net_device *dev) /* hw addresses list handling functions */ -static int __hw_addr_add(struct list_head *list, int *delta, - unsigned char *addr, int addr_len, - unsigned char addr_type) +static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr, + int addr_len, unsigned char addr_type) { struct netdev_hw_addr *ha; int alloc_size; @@ -3493,7 +3492,7 @@ static int __hw_addr_add(struct list_head *list, int *delta, if (addr_len > MAX_ADDR_LEN) return -EINVAL; - list_for_each_entry(ha, list, list) { + list_for_each_entry(ha, &list->list, list) { if (!memcmp(ha->addr, addr, addr_len) && ha->type == addr_type) { ha->refcount++; @@ -3512,9 +3511,8 @@ static int __hw_addr_add(struct list_head *list, int *delta, ha->type = addr_type; ha->refcount = 1; ha->synced = false; - list_add_tail_rcu(&ha->list, list); - if (delta) - (*delta)++; + list_add_tail_rcu(&ha->list, &list->list); + list->count++; return 0; } @@ -3526,120 +3524,121 @@ static void ha_rcu_free(struct rcu_head *head) kfree(ha); } -static int __hw_addr_del(struct list_head *list, int *delta, - unsigned char *addr, int addr_len, - unsigned char addr_type) +static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr, + int addr_len, unsigned char addr_type) { struct netdev_hw_addr *ha; - list_for_each_entry(ha, list, list) { + list_for_each_entry(ha, &list->list, list) { if (!memcmp(ha->addr, addr, addr_len) && (ha->type == addr_type || !addr_type)) { if (--ha->refcount) return 0; list_del_rcu(&ha->list); call_rcu(&ha->rcu_head, ha_rcu_free); - if (delta) - (*delta)--; + list->count--; return 0; } } return -ENOENT; } -static int __hw_addr_add_multiple(struct list_head *to_list, int *to_delta, - struct list_head *from_list, int addr_len, +static int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list, + struct netdev_hw_addr_list *from_list, + int addr_len, unsigned char addr_type) { int err; struct netdev_hw_addr *ha, *ha2; unsigned char type; - list_for_each_entry(ha, from_list, list) { + list_for_each_entry(ha, &from_list->list, list) { type = addr_type ? addr_type : ha->type; - err = __hw_addr_add(to_list, to_delta, ha->addr, - addr_len, type); + err = __hw_addr_add(to_list, ha->addr, addr_len, type); if (err) goto unroll; } return 0; unroll: - list_for_each_entry(ha2, from_list, list) { + list_for_each_entry(ha2, &from_list->list, list) { if (ha2 == ha) break; type = addr_type ? addr_type : ha2->type; - __hw_addr_del(to_list, to_delta, ha2->addr, - addr_len, type); + __hw_addr_del(to_list, ha2->addr, addr_len, type); } return err; } -static void __hw_addr_del_multiple(struct list_head *to_list, int *to_delta, - struct list_head *from_list, int addr_len, +static void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list, + struct netdev_hw_addr_list *from_list, + int addr_len, unsigned char addr_type) { struct netdev_hw_addr *ha; unsigned char type; - list_for_each_entry(ha, from_list, list) { + list_for_each_entry(ha, &from_list->list, list) { type = addr_type ? addr_type : ha->type; - __hw_addr_del(to_list, to_delta, ha->addr, - addr_len, addr_type); + __hw_addr_del(to_list, ha->addr, addr_len, addr_type); } } -static int __hw_addr_sync(struct list_head *to_list, int *to_delta, - struct list_head *from_list, int *from_delta, +static int __hw_addr_sync(struct netdev_hw_addr_list *to_list, + struct netdev_hw_addr_list *from_list, int addr_len) { int err = 0; struct netdev_hw_addr *ha, *tmp; - list_for_each_entry_safe(ha, tmp, from_list, list) { + list_for_each_entry_safe(ha, tmp, &from_list->list, list) { if (!ha->synced) { - err = __hw_addr_add(to_list, to_delta, ha->addr, + err = __hw_addr_add(to_list, ha->addr, addr_len, ha->type); if (err) break; ha->synced = true; ha->refcount++; } else if (ha->refcount == 1) { - __hw_addr_del(to_list, to_delta, ha->addr, - addr_len, ha->type); - __hw_addr_del(from_list, from_delta, ha->addr, - addr_len, ha->type); + __hw_addr_del(to_list, ha->addr, addr_len, ha->type); + __hw_addr_del(from_list, ha->addr, addr_len, ha->type); } } return err; } -static void __hw_addr_unsync(struct list_head *to_list, int *to_delta, - struct list_head *from_list, int *from_delta, +static void __hw_addr_unsync(struct netdev_hw_addr_list *to_list, + struct netdev_hw_addr_list *from_list, int addr_len) { struct netdev_hw_addr *ha, *tmp; - list_for_each_entry_safe(ha, tmp, from_list, list) { + list_for_each_entry_safe(ha, tmp, &from_list->list, list) { if (ha->synced) { - __hw_addr_del(to_list, to_delta, ha->addr, + __hw_addr_del(to_list, ha->addr, addr_len, ha->type); ha->synced = false; - __hw_addr_del(from_list, from_delta, ha->addr, + __hw_addr_del(from_list, ha->addr, addr_len, ha->type); } } } - -static void __hw_addr_flush(struct list_head *list) +static void __hw_addr_flush(struct netdev_hw_addr_list *list) { struct netdev_hw_addr *ha, *tmp; - list_for_each_entry_safe(ha, tmp, list, list) { + list_for_each_entry_safe(ha, tmp, &list->list, list) { list_del_rcu(&ha->list); call_rcu(&ha->rcu_head, ha_rcu_free); } + list->count = 0; +} + +static void __hw_addr_init(struct netdev_hw_addr_list *list) +{ + INIT_LIST_HEAD(&list->list); + list->count = 0; } /* Device addresses handling functions */ @@ -3648,7 +3647,7 @@ static void dev_addr_flush(struct net_device *dev) { /* rtnl_mutex must be held here */ - __hw_addr_flush(&dev->dev_addr_list); + __hw_addr_flush(&dev->dev_addrs); dev->dev_addr = NULL; } @@ -3660,16 +3659,16 @@ static int dev_addr_init(struct net_device *dev) /* rtnl_mutex must be held here */ - INIT_LIST_HEAD(&dev->dev_addr_list); + __hw_addr_init(&dev->dev_addrs); memset(addr, 0, sizeof(addr)); - err = __hw_addr_add(&dev->dev_addr_list, NULL, addr, sizeof(addr), + err = __hw_addr_add(&dev->dev_addrs, addr, sizeof(addr), NETDEV_HW_ADDR_T_LAN); if (!err) { /* * Get the first (previously created) address from the list * and set dev_addr pointer to this location. */ - ha = list_first_entry(&dev->dev_addr_list, + ha = list_first_entry(&dev->dev_addrs.list, struct netdev_hw_addr, list); dev->dev_addr = ha->addr; } @@ -3694,8 +3693,7 @@ int dev_addr_add(struct net_device *dev, unsigned char *addr, ASSERT_RTNL(); - err = __hw_addr_add(&dev->dev_addr_list, NULL, addr, dev->addr_len, - addr_type); + err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type); if (!err) call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); return err; @@ -3725,11 +3723,12 @@ int dev_addr_del(struct net_device *dev, unsigned char *addr, * We can not remove the first address from the list because * dev->dev_addr points to that. */ - ha = list_first_entry(&dev->dev_addr_list, struct netdev_hw_addr, list); + ha = list_first_entry(&dev->dev_addrs.list, + struct netdev_hw_addr, list); if (ha->addr == dev->dev_addr && ha->refcount == 1) return -ENOENT; - err = __hw_addr_del(&dev->dev_addr_list, NULL, addr, dev->addr_len, + err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len, addr_type); if (!err) call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); @@ -3757,8 +3756,7 @@ int dev_addr_add_multiple(struct net_device *to_dev, if (from_dev->addr_len != to_dev->addr_len) return -EINVAL; - err = __hw_addr_add_multiple(&to_dev->dev_addr_list, NULL, - &from_dev->dev_addr_list, + err = __hw_addr_add_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs, to_dev->addr_len, addr_type); if (!err) call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev); @@ -3784,15 +3782,14 @@ int dev_addr_del_multiple(struct net_device *to_dev, if (from_dev->addr_len != to_dev->addr_len) return -EINVAL; - __hw_addr_del_multiple(&to_dev->dev_addr_list, NULL, - &from_dev->dev_addr_list, + __hw_addr_del_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs, to_dev->addr_len, addr_type); call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev); return 0; } EXPORT_SYMBOL(dev_addr_del_multiple); -/* unicast and multicast addresses handling functions */ +/* multicast addresses handling functions */ int __dev_addr_delete(struct dev_addr_list **list, int *count, void *addr, int alen, int glbl) @@ -3868,10 +3865,12 @@ int dev_unicast_delete(struct net_device *dev, void *addr) ASSERT_RTNL(); - err = __hw_addr_del(&dev->uc_list, &dev->uc_count, addr, - dev->addr_len, NETDEV_HW_ADDR_T_UNICAST); + netif_addr_lock_bh(dev); + err = __hw_addr_del(&dev->uc, addr, dev->addr_len, + NETDEV_HW_ADDR_T_UNICAST); if (!err) __dev_set_rx_mode(dev); + netif_addr_unlock_bh(dev); return err; } EXPORT_SYMBOL(dev_unicast_delete); @@ -3892,10 +3891,12 @@ int dev_unicast_add(struct net_device *dev, void *addr) ASSERT_RTNL(); - err = __hw_addr_add(&dev->uc_list, &dev->uc_count, addr, - dev->addr_len, NETDEV_HW_ADDR_T_UNICAST); + netif_addr_lock_bh(dev); + err = __hw_addr_add(&dev->uc, addr, dev->addr_len, + NETDEV_HW_ADDR_T_UNICAST); if (!err) __dev_set_rx_mode(dev); + netif_addr_unlock_bh(dev); return err; } EXPORT_SYMBOL(dev_unicast_add); @@ -3952,7 +3953,8 @@ void __dev_addr_unsync(struct dev_addr_list **to, int *to_count, * @from: source device * * Add newly added addresses to the destination device and release - * addresses that have no users left. + * addresses that have no users left. The source device must be + * locked by netif_tx_lock_bh. * * This function is intended to be called from the dev->set_rx_mode * function of layered software devices. @@ -3961,15 +3963,14 @@ int dev_unicast_sync(struct net_device *to, struct net_device *from) { int err = 0; - ASSERT_RTNL(); - if (to->addr_len != from->addr_len) return -EINVAL; - err = __hw_addr_sync(&to->uc_list, &to->uc_count, - &from->uc_list, &from->uc_count, to->addr_len); + netif_addr_lock_bh(to); + err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len); if (!err) __dev_set_rx_mode(to); + netif_addr_unlock_bh(to); return err; } EXPORT_SYMBOL(dev_unicast_sync); @@ -3985,30 +3986,28 @@ EXPORT_SYMBOL(dev_unicast_sync); */ void dev_unicast_unsync(struct net_device *to, struct net_device *from) { - ASSERT_RTNL(); - if (to->addr_len != from->addr_len) return; - __hw_addr_unsync(&to->uc_list, &to->uc_count, - &from->uc_list, &from->uc_count, to->addr_len); + netif_addr_lock_bh(from); + netif_addr_lock(to); + __hw_addr_unsync(&to->uc, &from->uc, to->addr_len); __dev_set_rx_mode(to); + netif_addr_unlock(to); + netif_addr_unlock_bh(from); } EXPORT_SYMBOL(dev_unicast_unsync); static void dev_unicast_flush(struct net_device *dev) { - /* rtnl_mutex must be held here */ - - __hw_addr_flush(&dev->uc_list); - dev->uc_count = 0; + netif_addr_lock_bh(dev); + __hw_addr_flush(&dev->uc); + netif_addr_unlock_bh(dev); } static void dev_unicast_init(struct net_device *dev) { - /* rtnl_mutex must be held here */ - - INIT_LIST_HEAD(&dev->uc_list); + __hw_addr_init(&dev->uc); } diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 78e5bfc454ae..493775f4f2f1 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -81,7 +81,7 @@ struct gen_estimator { struct list_head list; - struct gnet_stats_basic *bstats; + struct gnet_stats_basic_packed *bstats; struct gnet_stats_rate_est *rate_est; spinlock_t *stats_lock; int ewma_log; @@ -165,7 +165,7 @@ static void gen_add_node(struct gen_estimator *est) } static -struct gen_estimator *gen_find_node(const struct gnet_stats_basic *bstats, +struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats, const struct gnet_stats_rate_est *rate_est) { struct rb_node *p = est_root.rb_node; @@ -202,7 +202,7 @@ struct gen_estimator *gen_find_node(const struct gnet_stats_basic *bstats, * * NOTE: Called under rtnl_mutex */ -int gen_new_estimator(struct gnet_stats_basic *bstats, +int gen_new_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_rate_est *rate_est, spinlock_t *stats_lock, struct nlattr *opt) @@ -262,7 +262,7 @@ static void __gen_kill_estimator(struct rcu_head *head) * * NOTE: Called under rtnl_mutex */ -void gen_kill_estimator(struct gnet_stats_basic *bstats, +void gen_kill_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_rate_est *rate_est) { struct gen_estimator *e; @@ -292,7 +292,7 @@ EXPORT_SYMBOL(gen_kill_estimator); * * Returns 0 on success or a negative error code. */ -int gen_replace_estimator(struct gnet_stats_basic *bstats, +int gen_replace_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_rate_est *rate_est, spinlock_t *stats_lock, struct nlattr *opt) { @@ -308,7 +308,7 @@ EXPORT_SYMBOL(gen_replace_estimator); * * Returns true if estimator is active, and false if not. */ -bool gen_estimator_active(const struct gnet_stats_basic *bstats, +bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats, const struct gnet_stats_rate_est *rate_est) { ASSERT_RTNL(); diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index c3d0ffeac243..8569310268ab 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -106,16 +106,21 @@ gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock, * if the room in the socket buffer was not sufficient. */ int -gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic *b) +gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic_packed *b) { if (d->compat_tc_stats) { d->tc_stats.bytes = b->bytes; d->tc_stats.packets = b->packets; } - if (d->tail) - return gnet_stats_copy(d, TCA_STATS_BASIC, b, sizeof(*b)); + if (d->tail) { + struct gnet_stats_basic sb; + memset(&sb, 0, sizeof(sb)); + sb.bytes = b->bytes; + sb.packets = b->packets; + return gnet_stats_copy(d, TCA_STATS_BASIC, &sb, sizeof(sb)); + } return 0; } diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index b7292a2719dc..197283072cc8 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -488,7 +488,7 @@ int net_assign_generic(struct net *net, int id, void *data) */ ng->len = id; - memcpy(&ng->ptr, &old_ng->ptr, old_ng->len); + memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*)); rcu_assign_pointer(net->gen, ng); call_rcu(&old_ng->rcu, net_generic_release); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 9675f312830d..1b76eb11deb4 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -319,6 +319,11 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) udelay(USEC_PER_POLL); } + + WARN_ONCE(!irqs_disabled(), + "netpoll_send_skb(): %s enabled interrupts in poll (%pF)\n", + dev->name, ops->ndo_start_xmit); + local_irq_restore(flags); } @@ -740,7 +745,7 @@ int netpoll_setup(struct netpoll *np) np->name); break; } - cond_resched(); + msleep(1); } /* If carrier appears to come up instantly, we don't diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 5c93435b0347..9e0597d189b0 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -204,6 +204,10 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, skb->end = skb->tail + size; kmemcheck_annotate_bitfield(skb, flags1); kmemcheck_annotate_bitfield(skb, flags2); +#ifdef NET_SKBUFF_DATA_USES_OFFSET + skb->mac_header = ~0U; +#endif + /* make sure we initialize shinfo sequentially */ shinfo = skb_shinfo(skb); atomic_set(&shinfo->dataref, 1); @@ -665,7 +669,8 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) /* {transport,network,mac}_header are relative to skb->head */ new->transport_header += offset; new->network_header += offset; - new->mac_header += offset; + if (skb_mac_header_was_set(new)) + new->mac_header += offset; #endif skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size; skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs; @@ -847,7 +852,8 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, skb->tail += off; skb->transport_header += off; skb->network_header += off; - skb->mac_header += off; + if (skb_mac_header_was_set(skb)) + skb->mac_header += off; skb->csum_start += nhead; skb->cloned = 0; skb->hdr_len = 0; @@ -939,7 +945,8 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, #ifdef NET_SKBUFF_DATA_USES_OFFSET n->transport_header += off; n->network_header += off; - n->mac_header += off; + if (skb_mac_header_was_set(skb)) + n->mac_header += off; #endif return n; diff --git a/net/core/sock.c b/net/core/sock.c index b0ba569bc973..76334228ed1c 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -631,7 +631,7 @@ set_rcvbuf: case SO_TIMESTAMPING: if (val & ~SOF_TIMESTAMPING_MASK) { - ret = EINVAL; + ret = -EINVAL; break; } sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE, @@ -919,13 +919,19 @@ static inline void sock_lock_init(struct sock *sk) af_family_keys + sk->sk_family); } +/* + * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet, + * even temporarly, because of RCU lookups. sk_node should also be left as is. + */ static void sock_copy(struct sock *nsk, const struct sock *osk) { #ifdef CONFIG_SECURITY_NETWORK void *sptr = nsk->sk_security; #endif - - memcpy(nsk, osk, osk->sk_prot->obj_size); + BUILD_BUG_ON(offsetof(struct sock, sk_copy_start) != + sizeof(osk->sk_node) + sizeof(osk->sk_refcnt)); + memcpy(&nsk->sk_copy_start, &osk->sk_copy_start, + osk->sk_prot->obj_size - offsetof(struct sock, sk_copy_start)); #ifdef CONFIG_SECURITY_NETWORK nsk->sk_security = sptr; security_sk_clone(osk, nsk); @@ -939,8 +945,23 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, struct kmem_cache *slab; slab = prot->slab; - if (slab != NULL) - sk = kmem_cache_alloc(slab, priority); + if (slab != NULL) { + sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO); + if (!sk) + return sk; + if (priority & __GFP_ZERO) { + /* + * caches using SLAB_DESTROY_BY_RCU should let + * sk_node.next un-modified. Special care is taken + * when initializing object to zero. + */ + if (offsetof(struct sock, sk_node.next) != 0) + memset(sk, 0, offsetof(struct sock, sk_node.next)); + memset(&sk->sk_node.pprev, 0, + prot->obj_size - offsetof(struct sock, + sk_node.pprev)); + } + } else sk = kmalloc(prot->obj_size, priority); @@ -1004,6 +1025,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, sk->sk_prot = sk->sk_prot_creator = prot; sock_lock_init(sk); sock_net_set(sk, get_net(net)); + atomic_set(&sk->sk_wmem_alloc, 1); } return sk; @@ -1125,6 +1147,11 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) newsk->sk_err = 0; newsk->sk_priority = 0; + /* + * Before updating sk_refcnt, we must commit prior changes to memory + * (Documentation/RCU/rculist_nulls.txt for details) + */ + smp_wmb(); atomic_set(&newsk->sk_refcnt, 2); /* @@ -1715,7 +1742,7 @@ EXPORT_SYMBOL(sock_no_sendpage); static void sock_def_wakeup(struct sock *sk) { read_lock(&sk->sk_callback_lock); - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) + if (sk_has_sleeper(sk)) wake_up_interruptible_all(sk->sk_sleep); read_unlock(&sk->sk_callback_lock); } @@ -1723,7 +1750,7 @@ static void sock_def_wakeup(struct sock *sk) static void sock_def_error_report(struct sock *sk) { read_lock(&sk->sk_callback_lock); - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) + if (sk_has_sleeper(sk)) wake_up_interruptible_poll(sk->sk_sleep, POLLERR); sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR); read_unlock(&sk->sk_callback_lock); @@ -1732,7 +1759,7 @@ static void sock_def_error_report(struct sock *sk) static void sock_def_readable(struct sock *sk, int len) { read_lock(&sk->sk_callback_lock); - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) + if (sk_has_sleeper(sk)) wake_up_interruptible_sync_poll(sk->sk_sleep, POLLIN | POLLRDNORM | POLLRDBAND); sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); @@ -1747,7 +1774,7 @@ static void sock_def_write_space(struct sock *sk) * progress. --DaveM */ if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) + if (sk_has_sleeper(sk)) wake_up_interruptible_sync_poll(sk->sk_sleep, POLLOUT | POLLWRNORM | POLLWRBAND); @@ -1840,8 +1867,12 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_stamp = ktime_set(-1L, 0); + /* + * Before updating sk_refcnt, we must commit prior changes to memory + * (Documentation/RCU/rculist_nulls.txt for details) + */ + smp_wmb(); atomic_set(&sk->sk_refcnt, 1); - atomic_set(&sk->sk_wmem_alloc, 1); atomic_set(&sk->sk_drops, 0); } EXPORT_SYMBOL(sock_init_data); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 05ea7440d9e5..3e70faab2989 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -85,7 +85,7 @@ static inline __u32 dccp_v6_init_sequence(struct sk_buff *skb) } static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __be32 info) + u8 type, u8 code, int offset, __be32 info) { struct ipv6hdr *hdr = (struct ipv6hdr *)skb->data; const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset); diff --git a/net/dccp/output.c b/net/dccp/output.c index c0e88c16d088..c96119fda688 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -196,7 +196,7 @@ void dccp_write_space(struct sock *sk) { read_lock(&sk->sk_callback_lock); - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) + if (sk_has_sleeper(sk)) wake_up_interruptible(sk->sk_sleep); /* Should agree with poll, otherwise some programs break */ if (sock_writeable(sk)) diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 314a1b5c033c..1bca9205104e 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -311,7 +311,7 @@ unsigned int dccp_poll(struct file *file, struct socket *sock, unsigned int mask; struct sock *sk = sock->sk; - poll_wait(file, sk->sk_sleep, wait); + sock_poll_wait(file, sk->sk_sleep, wait); if (sk->sk_state == DCCP_LISTEN) return inet_csk_listen_poll(sk); @@ -1066,7 +1066,7 @@ static int __init dccp_init(void) (dccp_hashinfo.ehash_size - 1)) dccp_hashinfo.ehash_size--; dccp_hashinfo.ehash = (struct inet_ehash_bucket *) - __get_free_pages(GFP_ATOMIC, ehash_order); + __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, ehash_order); } while (!dccp_hashinfo.ehash && --ehash_order > 0); if (!dccp_hashinfo.ehash) { @@ -1091,7 +1091,7 @@ static int __init dccp_init(void) bhash_order > 0) continue; dccp_hashinfo.bhash = (struct inet_bind_hashbucket *) - __get_free_pages(GFP_ATOMIC, bhash_order); + __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, bhash_order); } while (!dccp_hashinfo.bhash && --bhash_order >= 0); if (!dccp_hashinfo.bhash) { @@ -1159,6 +1159,7 @@ static void __exit dccp_fini(void) kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); dccp_ackvec_exit(); dccp_sysctl_exit(); + percpu_counter_destroy(&dccp_orphan_count); } module_init(dccp_init); diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index a5e3a593e472..77d40289653c 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -1240,7 +1240,7 @@ static int dn_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) return val; case TIOCOUTQ: - amount = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc); + amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); if (amount < 0) amount = 0; err = put_user(amount, (int __user *)arg); @@ -2413,6 +2413,8 @@ static void __exit decnet_exit(void) proc_net_remove(&init_net, "decnet"); proto_unregister(&dn_proto); + + rcu_barrier_bh(); /* Wait for completion of call_rcu_bh()'s */ } module_exit(decnet_exit); #endif diff --git a/net/dsa/mv88e6xxx.c b/net/dsa/mv88e6xxx.c index 4e4d8b5ad03d..efe661a9def4 100644 --- a/net/dsa/mv88e6xxx.c +++ b/net/dsa/mv88e6xxx.c @@ -418,7 +418,7 @@ static int mv88e6xxx_stats_wait(struct dsa_switch *ds) int i; for (i = 0; i < 10; i++) { - ret = REG_READ(REG_GLOBAL2, 0x1d); + ret = REG_READ(REG_GLOBAL, 0x1d); if ((ret & 0x8000) == 0) return 0; } diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index 8121bf0029e3..f0bbc57926cd 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -520,6 +520,7 @@ static int econet_getname(struct socket *sock, struct sockaddr *uaddr, if (peer) return -EOPNOTSUPP; + memset(sec, 0, sizeof(*sec)); mutex_lock(&econet_mutex); sk = sock->sk; @@ -540,8 +541,7 @@ static void econet_destroy_timer(unsigned long data) { struct sock *sk=(struct sock *)data; - if (!atomic_read(&sk->sk_wmem_alloc) && - !atomic_read(&sk->sk_rmem_alloc)) { + if (!sk_has_allocations(sk)) { sk_free(sk); return; } @@ -579,8 +579,7 @@ static int econet_release(struct socket *sock) skb_queue_purge(&sk->sk_receive_queue); - if (atomic_read(&sk->sk_rmem_alloc) || - atomic_read(&sk->sk_wmem_alloc)) { + if (sk_has_allocations(sk)) { sk->sk_timer.data = (unsigned long)sk; sk->sk_timer.expires = jiffies + HZ; sk->sk_timer.function = econet_destroy_timer; diff --git a/net/ieee802154/af_ieee802154.c b/net/ieee802154/af_ieee802154.c index 882a927cefae..af661805b9fa 100644 --- a/net/ieee802154/af_ieee802154.c +++ b/net/ieee802154/af_ieee802154.c @@ -39,14 +39,6 @@ #include "af802154.h" -#define DBG_DUMP(data, len) { \ - int i; \ - pr_debug("function: %s: data: len %d:\n", __func__, len); \ - for (i = 0; i < len; i++) {\ - pr_debug("%02x: %02x\n", i, (data)[i]); \ - } \ -} - /* * Utility function for families */ @@ -144,7 +136,7 @@ static int ieee802154_dev_ioctl(struct sock *sk, struct ifreq __user *arg, unsigned int cmd) { struct ifreq ifr; - int ret = -EINVAL; + int ret = -ENOIOCTLCMD; struct net_device *dev; if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) @@ -154,8 +146,10 @@ static int ieee802154_dev_ioctl(struct sock *sk, struct ifreq __user *arg, dev_load(sock_net(sk), ifr.ifr_name); dev = dev_get_by_name(sock_net(sk), ifr.ifr_name); - if (dev->type == ARPHRD_IEEE802154 || - dev->type == ARPHRD_IEEE802154_PHY) + + if ((dev->type == ARPHRD_IEEE802154 || + dev->type == ARPHRD_IEEE802154_PHY) && + dev->netdev_ops->ndo_do_ioctl) ret = dev->netdev_ops->ndo_do_ioctl(dev, &ifr, cmd); if (!ret && copy_to_user(arg, &ifr, sizeof(struct ifreq))) @@ -302,10 +296,12 @@ static struct net_proto_family ieee802154_family_ops = { static int ieee802154_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { - DBG_DUMP(skb->data, skb->len); if (!netif_running(dev)) return -ENODEV; pr_debug("got frame, type %d, dev %p\n", dev->type, dev); +#ifdef DEBUG + print_hex_dump_bytes("ieee802154_rcv ", DUMP_PREFIX_NONE, skb->data, skb->len); +#endif if (!net_eq(dev_net(dev), &init_net)) goto drop; diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c index 1779677aed46..ba8b214dda8f 100644 --- a/net/ieee802154/dgram.c +++ b/net/ieee802154/dgram.c @@ -126,7 +126,8 @@ static int dgram_ioctl(struct sock *sk, int cmd, unsigned long arg) switch (cmd) { case SIOCOUTQ: { - int amount = atomic_read(&sk->sk_wmem_alloc); + int amount = sk_wmem_alloc_get(sk); + return put_user(amount, (int __user *)arg); } @@ -376,6 +377,18 @@ int ieee802154_dgram_deliver(struct net_device *dev, struct sk_buff *skb) return ret; } +static int dgram_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen) +{ + return -EOPNOTSUPP; +} + +static int dgram_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user optlen) +{ + return -EOPNOTSUPP; +} + struct proto ieee802154_dgram_prot = { .name = "IEEE-802.15.4-MAC", .owner = THIS_MODULE, @@ -390,5 +403,7 @@ struct proto ieee802154_dgram_prot = { .connect = dgram_connect, .disconnect = dgram_disconnect, .ioctl = dgram_ioctl, + .getsockopt = dgram_getsockopt, + .setsockopt = dgram_setsockopt, }; diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c index 105ad10876af..27eda9fdf3c2 100644 --- a/net/ieee802154/netlink.c +++ b/net/ieee802154/netlink.c @@ -276,6 +276,9 @@ static struct net_device *ieee802154_nl_get_dev(struct genl_info *info) else return NULL; + if (!dev) + return NULL; + if (dev->type != ARPHRD_IEEE802154) { dev_put(dev); return NULL; @@ -521,3 +524,6 @@ static void __exit ieee802154_nl_exit(void) } module_exit(ieee802154_nl_exit); +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("ieee 802.15.4 configuration interface"); + diff --git a/net/ieee802154/raw.c b/net/ieee802154/raw.c index fca44d59f97e..9315977c4c61 100644 --- a/net/ieee802154/raw.c +++ b/net/ieee802154/raw.c @@ -238,6 +238,18 @@ void ieee802154_raw_deliver(struct net_device *dev, struct sk_buff *skb) read_unlock(&raw_lock); } +static int raw_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen) +{ + return -EOPNOTSUPP; +} + +static int raw_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user optlen) +{ + return -EOPNOTSUPP; +} + struct proto ieee802154_raw_prot = { .name = "IEEE-802.15.4-RAW", .owner = THIS_MODULE, @@ -250,5 +262,7 @@ struct proto ieee802154_raw_prot = { .unhash = raw_unhash, .connect = raw_connect, .disconnect = raw_disconnect, + .getsockopt = raw_getsockopt, + .setsockopt = raw_setsockopt, }; diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 8a3881e28aca..090e9991ac2a 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -801,11 +801,8 @@ static int arp_process(struct sk_buff *skb) * cache. */ - /* - * Special case: IPv4 duplicate address detection packet (RFC2131) - * and Gratuitous ARP/ARP Announce. (RFC3927, Section 2.4) - */ - if (sip == 0 || tip == sip) { + /* Special case: IPv4 duplicate address detection packet (RFC2131) */ + if (sip == 0) { if (arp->ar_op == htons(ARPOP_REQUEST) && inet_addr_type(net, tip) == RTN_LOCAL && !arp_ignore(in_dev, sip, tip)) @@ -1307,7 +1304,9 @@ static void arp_format_neigh_entry(struct seq_file *seq, hbuffer[k++] = hex_asc_lo(n->ha[j]); hbuffer[k++] = ':'; } - hbuffer[--k] = 0; + if (k != 0) + --k; + hbuffer[k] = 0; #if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) } #endif diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index d1a39b1277d6..63c2fa7b68c4 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -316,8 +316,8 @@ static inline void check_tnode(const struct tnode *tn) static const int halve_threshold = 25; static const int inflate_threshold = 50; -static const int halve_threshold_root = 8; -static const int inflate_threshold_root = 15; +static const int halve_threshold_root = 15; +static const int inflate_threshold_root = 25; static void __alias_free_mem(struct rcu_head *head) @@ -391,13 +391,8 @@ static inline void tnode_free(struct tnode *tn) static void tnode_free_safe(struct tnode *tn) { BUG_ON(IS_LEAF(tn)); - - if (node_parent((struct node *) tn)) { - tn->tnode_free = tnode_free_head; - tnode_free_head = tn; - } else { - tnode_free(tn); - } + tn->tnode_free = tnode_free_head; + tnode_free_head = tn; } static void tnode_free_flush(void) @@ -1009,7 +1004,7 @@ fib_find_node(struct trie *t, u32 key) return NULL; } -static struct node *trie_rebalance(struct trie *t, struct tnode *tn) +static void trie_rebalance(struct trie *t, struct tnode *tn) { int wasfull; t_key cindex, key; @@ -1026,6 +1021,9 @@ static struct node *trie_rebalance(struct trie *t, struct tnode *tn) (struct node *)tn, wasfull); tp = node_parent((struct node *) tn); + if (!tp) + rcu_assign_pointer(t->trie, (struct node *)tn); + tnode_free_flush(); if (!tp) break; @@ -1033,12 +1031,13 @@ static struct node *trie_rebalance(struct trie *t, struct tnode *tn) } /* Handle last (top) tnode */ - if (IS_TNODE(tn)) { + if (IS_TNODE(tn)) tn = (struct tnode *)resize(t, (struct tnode *)tn); - tnode_free_flush(); - } - return (struct node *)tn; + rcu_assign_pointer(t->trie, (struct node *)tn); + tnode_free_flush(); + + return; } /* only used from updater-side */ @@ -1186,7 +1185,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) /* Rebalance the trie */ - rcu_assign_pointer(t->trie, trie_rebalance(t, tp)); + trie_rebalance(t, tp); done: return fa_head; } @@ -1605,7 +1604,7 @@ static void trie_leaf_remove(struct trie *t, struct leaf *l) if (tp) { t_key cindex = tkey_extract_bits(l->key, tp->pos, tp->bits); put_child(t, (struct tnode *)tp, cindex, NULL); - rcu_assign_pointer(t->trie, trie_rebalance(t, tp)); + trie_rebalance(t, tp); } else rcu_assign_pointer(t->trie, NULL); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index b0b273503e2a..a706a47f4dbb 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -156,10 +156,10 @@ static int inet_csk_diag_fill(struct sock *sk, r->idiag_inode = sock_i_ino(sk); if (minfo) { - minfo->idiag_rmem = atomic_read(&sk->sk_rmem_alloc); + minfo->idiag_rmem = sk_rmem_alloc_get(sk); minfo->idiag_wmem = sk->sk_wmem_queued; minfo->idiag_fmem = sk->sk_forward_alloc; - minfo->idiag_tmem = atomic_read(&sk->sk_wmem_alloc); + minfo->idiag_tmem = sk_wmem_alloc_get(sk); } handler->idiag_get_info(sk, r, info); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 44e2a3d2359a..82c11dd10a62 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -735,10 +735,10 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) } tos = tiph->tos; - if (tos&1) { + if (tos == 1) { + tos = 0; if (skb->protocol == htons(ETH_P_IP)) tos = old_iph->tos; - tos &= ~1; } { @@ -951,7 +951,7 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev) addend += 4; } dev->needed_headroom = addend + hlen; - mtu -= dev->hard_header_len - addend; + mtu -= dev->hard_header_len + addend; if (mtu < 68) mtu = 68; diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 490ce20faf38..db46b4b5b2b9 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -440,6 +440,9 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, /* Remove any debris in the socket control block */ memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); + /* Must drop socket now because of tproxy. */ + skb_orphan(skb); + return NF_HOOK(PF_INET, NF_INET_PRE_ROUTING, skb, dev, NULL, ip_rcv_finish); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 247026282669..7ffcd96fe591 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -813,6 +813,8 @@ int ip_append_data(struct sock *sk, inet->cork.addr = ipc->addr; } rt = *rtp; + if (unlikely(!rt)) + return -EFAULT; /* * We steal reference to this route, caller should not release it */ @@ -1243,7 +1245,6 @@ int ip_push_pending_frames(struct sock *sk) skb->len += tmp_skb->len; skb->data_len += tmp_skb->len; skb->truesize += tmp_skb->truesize; - __sock_put(tmp_skb->sk); tmp_skb->destructor = NULL; tmp_skb->sk = NULL; } diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index 155c008626c8..09172a65d9b6 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c @@ -191,7 +191,8 @@ nf_nat_mangle_tcp_packet(struct sk_buff *skb, ct, ctinfo); /* Tell TCP window tracking about seq change */ nf_conntrack_tcp_update(skb, ip_hdrlen(skb), - ct, CTINFO2DIR(ctinfo)); + ct, CTINFO2DIR(ctinfo), + (int)rep_len - (int)match_len); nf_conntrack_event_cache(IPCT_NATSEQADJ, ct); } @@ -377,6 +378,7 @@ nf_nat_seq_adjust(struct sk_buff *skb, struct tcphdr *tcph; int dir; __be32 newseq, newack; + s16 seqoff, ackoff; struct nf_conn_nat *nat = nfct_nat(ct); struct nf_nat_seq *this_way, *other_way; @@ -390,15 +392,18 @@ nf_nat_seq_adjust(struct sk_buff *skb, tcph = (void *)skb->data + ip_hdrlen(skb); if (after(ntohl(tcph->seq), this_way->correction_pos)) - newseq = htonl(ntohl(tcph->seq) + this_way->offset_after); + seqoff = this_way->offset_after; else - newseq = htonl(ntohl(tcph->seq) + this_way->offset_before); + seqoff = this_way->offset_before; if (after(ntohl(tcph->ack_seq) - other_way->offset_before, other_way->correction_pos)) - newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_after); + ackoff = other_way->offset_after; else - newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_before); + ackoff = other_way->offset_before; + + newseq = htonl(ntohl(tcph->seq) + seqoff); + newack = htonl(ntohl(tcph->ack_seq) - ackoff); inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0); inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0); @@ -413,7 +418,7 @@ nf_nat_seq_adjust(struct sk_buff *skb, if (!nf_nat_sack_adjust(skb, tcph, ct, ctinfo)) return 0; - nf_conntrack_tcp_update(skb, ip_hdrlen(skb), ct, dir); + nf_conntrack_tcp_update(skb, ip_hdrlen(skb), ct, dir, seqoff); return 1; } diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 3dc9171a272f..2979f14bb188 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -799,7 +799,8 @@ static int raw_ioctl(struct sock *sk, int cmd, unsigned long arg) { switch (cmd) { case SIOCOUTQ: { - int amount = atomic_read(&sk->sk_wmem_alloc); + int amount = sk_wmem_alloc_get(sk); + return put_user(amount, (int __user *)arg); } case SIOCINQ: { @@ -935,8 +936,8 @@ static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) seq_printf(seq, "%4d: %08X:%04X %08X:%04X" " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n", i, src, srcp, dest, destp, sp->sk_state, - atomic_read(&sp->sk_wmem_alloc), - atomic_read(&sp->sk_rmem_alloc), + sk_wmem_alloc_get(sp), + sk_rmem_alloc_get(sp), 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops)); } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index cd76b3cb7092..278f46f5011b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1085,8 +1085,35 @@ restart: now = jiffies; if (!rt_caching(dev_net(rt->u.dst.dev))) { - rt_drop(rt); - return 0; + /* + * If we're not caching, just tell the caller we + * were successful and don't touch the route. The + * caller hold the sole reference to the cache entry, and + * it will be released when the caller is done with it. + * If we drop it here, the callers have no way to resolve routes + * when we're not caching. Instead, just point *rp at rt, so + * the caller gets a single use out of the route + * Note that we do rt_free on this new route entry, so that + * once its refcount hits zero, we are still able to reap it + * (Thanks Alexey) + * Note also the rt_free uses call_rcu. We don't actually + * need rcu protection here, this is just our path to get + * on the route gc list. + */ + + if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { + int err = arp_bind_neighbour(&rt->u.dst); + if (err) { + if (net_ratelimit()) + printk(KERN_WARNING + "Neighbour table failure & not caching routes.\n"); + rt_drop(rt); + return err; + } + } + + rt_free(rt); + goto skip_hashing; } rthp = &rt_hash_table[hash].chain; @@ -1203,7 +1230,8 @@ restart: #if RT_CACHE_DEBUG >= 2 if (rt->u.dst.rt_next) { struct rtable *trt; - printk(KERN_DEBUG "rt_cache @%02x: %pI4", hash, &rt->rt_dst); + printk(KERN_DEBUG "rt_cache @%02x: %pI4", + hash, &rt->rt_dst); for (trt = rt->u.dst.rt_next; trt; trt = trt->u.dst.rt_next) printk(" . %pI4", &trt->rt_dst); printk("\n"); @@ -1217,6 +1245,8 @@ restart: rcu_assign_pointer(rt_hash_table[hash].chain, rt); spin_unlock_bh(rt_hash_lock_addr(hash)); + +skip_hashing: if (rp) *rp = rt; else diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 17b89c523f9d..91145244ea63 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -339,7 +339,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) struct sock *sk = sock->sk; struct tcp_sock *tp = tcp_sk(sk); - poll_wait(file, sk->sk_sleep, wait); + sock_poll_wait(file, sk->sk_sleep, wait); if (sk->sk_state == TCP_LISTEN) return inet_csk_listen_poll(sk); @@ -903,13 +903,17 @@ int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, iov++; while (seglen > 0) { - int copy; + int copy = 0; + int max = size_goal; skb = tcp_write_queue_tail(sk); + if (tcp_send_head(sk)) { + if (skb->ip_summed == CHECKSUM_NONE) + max = mss_now; + copy = max - skb->len; + } - if (!tcp_send_head(sk) || - (copy = size_goal - skb->len) <= 0) { - + if (copy <= 0) { new_segment: /* Allocate new segment. If the interface is SG, * allocate skb fitting to single page. @@ -930,6 +934,7 @@ new_segment: skb_entail(sk, skb); copy = size_goal; + max = size_goal; } /* Try to append data to the end of skb. */ @@ -1028,7 +1033,7 @@ new_segment: if ((seglen -= copy) == 0 && iovlen == 0) goto out; - if (skb->len < size_goal || (flags & MSG_OOB)) + if (skb->len < max || (flags & MSG_OOB)) continue; if (forced_push(tp)) { diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index e92beb9e55e0..6428b342b164 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -116,7 +116,7 @@ int tcp_set_default_congestion_control(const char *name) spin_lock(&tcp_cong_list_lock); ca = tcp_ca_find(name); #ifdef CONFIG_MODULES - if (!ca && capable(CAP_SYS_MODULE)) { + if (!ca && capable(CAP_NET_ADMIN)) { spin_unlock(&tcp_cong_list_lock); request_module("tcp_%s", name); @@ -246,7 +246,7 @@ int tcp_set_congestion_control(struct sock *sk, const char *name) #ifdef CONFIG_MODULES /* not found attempt to autoload module */ - if (!ca && capable(CAP_SYS_MODULE)) { + if (!ca && capable(CAP_NET_ADMIN)) { rcu_read_unlock(); request_module("tcp_%s", name); rcu_read_lock(); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 5a1ca2698c88..6d88219c5e22 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1160,6 +1160,7 @@ struct request_sock_ops tcp_request_sock_ops __read_mostly = { #ifdef CONFIG_TCP_MD5SIG static struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { .md5_lookup = tcp_v4_reqsk_md5_lookup, + .calc_md5_hash = tcp_v4_md5_hash_skb, }; #endif @@ -1373,7 +1374,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, */ char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC); if (newkey != NULL) - tcp_v4_md5_do_add(newsk, inet_sk(sk)->daddr, + tcp_v4_md5_do_add(newsk, newinet->daddr, newkey, key->keylen); newsk->sk_route_caps &= ~NETIF_F_GSO_MASK; } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 43bbba7926ee..f8d67ccc64f3 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -128,7 +128,8 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, goto kill_with_rst; /* Dup ACK? */ - if (!after(TCP_SKB_CB(skb)->end_seq, tcptw->tw_rcv_nxt) || + if (!th->ack || + !after(TCP_SKB_CB(skb)->end_seq, tcptw->tw_rcv_nxt) || TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq) { inet_twsk_put(tw); return TCP_TW_SUCCESS; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 416fc4c2e7eb..bd62712848fa 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -725,7 +725,8 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now) { - if (skb->len <= mss_now || !sk_can_gso(sk)) { + if (skb->len <= mss_now || !sk_can_gso(sk) || + skb->ip_summed == CHECKSUM_NONE) { /* Avoid the costly divide in the normal * non-TSO case. */ @@ -2260,7 +2261,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, #ifdef CONFIG_TCP_MD5SIG /* Okay, we have all we need - do the md5 hash if needed */ if (md5) { - tp->af_specific->calc_md5_hash(md5_hash_location, + tcp_rsk(req)->af_specific->calc_md5_hash(md5_hash_location, md5, NULL, req, skb); } #endif diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 8f4158d7c9a6..80e3812837ad 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -840,7 +840,8 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg) switch (cmd) { case SIOCOUTQ: { - int amount = atomic_read(&sk->sk_wmem_alloc); + int amount = sk_wmem_alloc_get(sk); + return put_user(amount, (int __user *)arg); } @@ -1721,8 +1722,8 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f, seq_printf(f, "%4d: %08X:%04X %08X:%04X" " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d%n", bucket, src, srcp, dest, destp, sp->sk_state, - atomic_read(&sp->sk_wmem_alloc), - atomic_read(&sp->sk_rmem_alloc), + sk_wmem_alloc_get(sp), + sk_rmem_alloc_get(sp), 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops), len); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 60d918c96a4f..0071ee6f441f 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -136,7 +136,8 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) case IPPROTO_TCP: case IPPROTO_SCTP: case IPPROTO_DCCP: - if (pskb_may_pull(skb, xprth + 4 - skb->data)) { + if (xprth + 4 < skb->data || + pskb_may_pull(skb, xprth + 4 - skb->data)) { __be16 *ports = (__be16 *)xprth; fl->fl_ip_sport = ports[!!reverse]; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 8c1e86afbbf5..43b3c9f89c12 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1916,8 +1916,32 @@ ok: update_lft = 1; else if (stored_lft <= MIN_VALID_LIFETIME) { /* valid_lft <= stored_lft is always true */ - /* XXX: IPsec */ - update_lft = 0; + /* + * RFC 4862 Section 5.5.3e: + * "Note that the preferred lifetime of + * the corresponding address is always + * reset to the Preferred Lifetime in + * the received Prefix Information + * option, regardless of whether the + * valid lifetime is also reset or + * ignored." + * + * So if the preferred lifetime in + * this advertisement is different + * than what we have stored, but the + * valid lifetime is invalid, just + * reset prefered_lft. + * + * We must set the valid lifetime + * to the stored lifetime since we'll + * be updating the timestamp below, + * else we'll set it back to the + * minumum. + */ + if (prefered_lft != ifp->prefered_lft) { + valid_lft = stored_lft; + update_lft = 1; + } } else { valid_lft = MIN_VALID_LIFETIME; if (valid_lft < prefered_lft) @@ -3085,7 +3109,7 @@ restart: spin_unlock(&ifp->lock); continue; } else if (age >= ifp->prefered_lft) { - /* jiffies - ifp->tsamp > age >= ifp->prefered_lft */ + /* jiffies - ifp->tstamp > age >= ifp->prefered_lft */ int deprecate = 0; if (!(ifp->flags&IFA_F_DEPRECATED)) { @@ -3362,7 +3386,10 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, valid = ifa->valid_lft; if (preferred != INFINITY_LIFE_TIME) { long tval = (jiffies - ifa->tstamp)/HZ; - preferred -= tval; + if (preferred > tval) + preferred -= tval; + else + preferred = 0; if (valid != INFINITY_LIFE_TIME) valid -= tval; } diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 85b3d0036afd..45f9a2a42d56 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -306,8 +306,10 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) v4addr != htonl(INADDR_ANY) && chk_addr_ret != RTN_LOCAL && chk_addr_ret != RTN_MULTICAST && - chk_addr_ret != RTN_BROADCAST) + chk_addr_ret != RTN_BROADCAST) { + err = -EADDRNOTAVAIL; goto out; + } } else { if (addr_type != IPV6_ADDR_ANY) { struct net_device *dev = NULL; @@ -1284,6 +1286,8 @@ static void __exit inet6_exit(void) proto_unregister(&udplitev6_prot); proto_unregister(&udpv6_prot); proto_unregister(&tcpv6_prot); + + rcu_barrier(); /* Wait for completion of call_rcu()'s */ } module_exit(inet6_exit); diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 52449f7a1b71..86f42a288c4b 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -405,7 +405,7 @@ out: } static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __be32 info) + u8 type, u8 code, int offset, __be32 info) { struct net *net = dev_net(skb->dev); struct ipv6hdr *iph = (struct ipv6hdr*)skb->data; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index c2f250150db1..678bb95b1525 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -354,7 +354,7 @@ static u32 esp6_get_mtu(struct xfrm_state *x, int mtu) } static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __be32 info) + u8 type, u8 code, int offset, __be32 info) { struct net *net = dev_net(skb->dev); struct ipv6hdr *iph = (struct ipv6hdr*)skb->data; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 36dff8807183..eab62a7a8f06 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -117,7 +117,7 @@ static __inline__ void icmpv6_xmit_unlock(struct sock *sk) /* * Slightly more convenient version of icmpv6_send. */ -void icmpv6_param_prob(struct sk_buff *skb, int code, int pos) +void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos) { icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos, skb->dev); kfree_skb(skb); @@ -161,7 +161,7 @@ static int is_ineligible(struct sk_buff *skb) /* * Check the ICMP output rate limit */ -static inline int icmpv6_xrlim_allow(struct sock *sk, int type, +static inline int icmpv6_xrlim_allow(struct sock *sk, u8 type, struct flowi *fl) { struct dst_entry *dst; @@ -305,7 +305,7 @@ static inline void mip6_addr_swap(struct sk_buff *skb) {} /* * Send an ICMP message in response to a packet in error */ -void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, +void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, struct net_device *dev) { struct net *net = dev_net(skb->dev); @@ -590,7 +590,7 @@ out: icmpv6_xmit_unlock(sk); } -static void icmpv6_notify(struct sk_buff *skb, int type, int code, __be32 info) +static void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info) { struct inet6_protocol *ipprot; int inner_offset; @@ -643,7 +643,7 @@ static int icmpv6_rcv(struct sk_buff *skb) struct in6_addr *saddr, *daddr; struct ipv6hdr *orig_hdr; struct icmp6hdr *hdr; - int type; + u8 type; if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { struct sec_path *sp = skb_sec_path(skb); @@ -914,7 +914,7 @@ static const struct icmp6_err { }, }; -int icmpv6_err_convert(int type, int code, int *err) +int icmpv6_err_convert(u8 type, u8 code, int *err) { int fatal = 0; diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index c3a07d75b5f5..6d6a4277c677 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -139,6 +139,9 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt rcu_read_unlock(); + /* Must drop socket now because of tproxy. */ + skb_orphan(skb); + return NF_HOOK(PF_INET6, NF_INET_PRE_ROUTING, skb, dev, NULL, ip6_rcv_finish); err: diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 7c76e3d18215..87f8419a68fd 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1484,7 +1484,6 @@ int ip6_push_pending_frames(struct sock *sk) skb->len += tmp_skb->len; skb->data_len += tmp_skb->len; skb->truesize += tmp_skb->truesize; - __sock_put(tmp_skb->sk); tmp_skb->destructor = NULL; tmp_skb->sk = NULL; } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 404d16a97d5c..51f410e7775a 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -394,13 +394,13 @@ parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw) static int ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, - int *type, int *code, int *msg, __u32 *info, int offset) + u8 *type, u8 *code, int *msg, __u32 *info, int offset) { struct ipv6hdr *ipv6h = (struct ipv6hdr *) skb->data; struct ip6_tnl *t; int rel_msg = 0; - int rel_type = ICMPV6_DEST_UNREACH; - int rel_code = ICMPV6_ADDR_UNREACH; + u8 rel_type = ICMPV6_DEST_UNREACH; + u8 rel_code = ICMPV6_ADDR_UNREACH; __u32 rel_info = 0; __u16 len; int err = -ENOENT; @@ -488,11 +488,11 @@ out: static int ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __be32 info) + u8 type, u8 code, int offset, __be32 info) { int rel_msg = 0; - int rel_type = type; - int rel_code = code; + u8 rel_type = type; + u8 rel_code = code; __u32 rel_info = ntohl(info); int err; struct sk_buff *skb2; @@ -586,11 +586,11 @@ out: static int ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __be32 info) + u8 type, u8 code, int offset, __be32 info) { int rel_msg = 0; - int rel_type = type; - int rel_code = code; + u8 rel_type = type; + u8 rel_code = code; __u32 rel_info = ntohl(info); int err; diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 3a0b3be7ece5..79c172f1ff01 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -51,7 +51,7 @@ #include <linux/mutex.h> static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __be32 info) + u8 type, u8 code, int offset, __be32 info) { __be32 spi; struct ipv6hdr *iph = (struct ipv6hdr*)skb->data; diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index f995e19c87a9..f797e8c6f3b3 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -54,7 +54,7 @@ static inline void *mip6_padn(__u8 *data, __u8 padlen) return data + padlen; } -static inline void mip6_param_prob(struct sk_buff *skb, int code, int pos) +static inline void mip6_param_prob(struct sk_buff *skb, u8 code, int pos) { icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos, skb->dev); } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 36a090d87a3d..d6c3c1c34b2d 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -310,7 +310,7 @@ out: static void rawv6_err(struct sock *sk, struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __be32 info) + u8 type, u8 code, int offset, __be32 info) { struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); @@ -343,7 +343,7 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb, } void raw6_icmp_error(struct sk_buff *skb, int nexthdr, - int type, int code, int inner_offset, __be32 info) + u8 type, u8 code, int inner_offset, __be32 info) { struct sock *sk; int hash; @@ -1130,7 +1130,8 @@ static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg) switch(cmd) { case SIOCOUTQ: { - int amount = atomic_read(&sk->sk_wmem_alloc); + int amount = sk_wmem_alloc_get(sk); + return put_user(amount, (int __user *)arg); } case SIOCINQ: @@ -1236,8 +1237,8 @@ static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) dest->s6_addr32[0], dest->s6_addr32[1], dest->s6_addr32[2], dest->s6_addr32[3], destp, sp->sk_state, - atomic_read(&sp->sk_wmem_alloc), - atomic_read(&sp->sk_rmem_alloc), + sk_wmem_alloc_get(sp), + sk_rmem_alloc_get(sp), 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 658293ea05ba..1473ee0a1f51 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1865,7 +1865,7 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) * Drop the packet on the floor */ -static int ip6_pkt_drop(struct sk_buff *skb, int code, int ipstats_mib_noroutes) +static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes) { int type; struct dst_entry *dst = skb_dst(skb); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 68e52308e552..98b7327d0949 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1018,6 +1018,7 @@ static void ipip6_tunnel_setup(struct net_device *dev) dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr); dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr); dev->flags = IFF_NOARP; + dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; dev->iflink = 0; dev->addr_len = 4; dev->features |= NETIF_F_NETNS_LOCAL; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 53b6a4192b16..d849dd53b788 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -317,7 +317,7 @@ failure: } static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __be32 info) + u8 type, u8 code, int offset, __be32 info) { struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data; const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); @@ -896,6 +896,7 @@ struct request_sock_ops tcp6_request_sock_ops __read_mostly = { #ifdef CONFIG_TCP_MD5SIG static struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { .md5_lookup = tcp_v6_reqsk_md5_lookup, + .calc_md5_hash = tcp_v6_md5_hash_skb, }; #endif @@ -1441,7 +1442,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, */ char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC); if (newkey != NULL) - tcp_v6_md5_do_add(newsk, &inet6_sk(sk)->daddr, + tcp_v6_md5_do_add(newsk, &newnp->daddr, newkey, key->keylen); } #endif diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c index 669f280989c3..633ad789effc 100644 --- a/net/ipv6/tunnel6.c +++ b/net/ipv6/tunnel6.c @@ -124,7 +124,7 @@ drop: } static void tunnel6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __be32 info) + u8 type, u8 code, int offset, __be32 info) { struct xfrm6_tunnel *handler; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index fc333d854728..33b59bd92c4d 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -312,7 +312,7 @@ csum_copy_err: } void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __be32 info, + u8 type, u8 code, int offset, __be32 info, struct udp_table *udptable) { struct ipv6_pinfo *np; @@ -346,8 +346,8 @@ out: } static __inline__ void udpv6_err(struct sk_buff *skb, - struct inet6_skb_parm *opt, int type, - int code, int offset, __be32 info ) + struct inet6_skb_parm *opt, u8 type, + u8 code, int offset, __be32 info ) { __udp6_lib_err(skb, opt, type, code, offset, info, &udp_table); } @@ -1061,8 +1061,8 @@ static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket dest->s6_addr32[0], dest->s6_addr32[1], dest->s6_addr32[2], dest->s6_addr32[3], destp, sp->sk_state, - atomic_read(&sp->sk_wmem_alloc), - atomic_read(&sp->sk_rmem_alloc), + sk_wmem_alloc_get(sp), + sk_rmem_alloc_get(sp), 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h index 23779208c334..6bb303471e20 100644 --- a/net/ipv6/udp_impl.h +++ b/net/ipv6/udp_impl.h @@ -9,7 +9,7 @@ extern int __udp6_lib_rcv(struct sk_buff *, struct udp_table *, int ); extern void __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, - int , int , int , __be32 , struct udp_table *); + u8 , u8 , int , __be32 , struct udp_table *); extern int udp_v6_get_port(struct sock *sk, unsigned short snum); diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index ba162a824585..4818c48688f2 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -20,7 +20,7 @@ static int udplitev6_rcv(struct sk_buff *skb) static void udplitev6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __be32 info) + u8 type, u8 code, int offset, __be32 info) { __udp6_lib_err(skb, opt, type, code, offset, info, &udplite_table); } diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index b4b16a43f277..3a3c677bc0f2 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -157,7 +157,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) ipv6_addr_copy(&fl->fl6_dst, reverse ? &hdr->saddr : &hdr->daddr); ipv6_addr_copy(&fl->fl6_src, reverse ? &hdr->daddr : &hdr->saddr); - while (pskb_may_pull(skb, nh + offset + 1 - skb->data)) { + while (nh + offset + 1 < skb->data || + pskb_may_pull(skb, nh + offset + 1 - skb->data)) { nh = skb_network_header(skb); exthdr = (struct ipv6_opt_hdr *)(nh + offset); @@ -177,7 +178,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) case IPPROTO_TCP: case IPPROTO_SCTP: case IPPROTO_DCCP: - if (!onlyproto && pskb_may_pull(skb, nh + offset + 4 - skb->data)) { + if (!onlyproto && (nh + offset + 4 < skb->data || + pskb_may_pull(skb, nh + offset + 4 - skb->data))) { __be16 *ports = (__be16 *)exthdr; fl->fl_ip_sport = ports[!!reverse]; diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 80193db224d9..81a95c00e503 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -262,7 +262,7 @@ static int xfrm6_tunnel_rcv(struct sk_buff *skb) } static int xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __be32 info) + u8 type, u8 code, int offset, __be32 info) { /* xfrm6_tunnel native err handling */ switch (type) { diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 1627050e29fd..f1118d92a191 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -41,6 +41,7 @@ #include <linux/netdevice.h> #include <linux/uio.h> #include <linux/skbuff.h> +#include <linux/smp_lock.h> #include <linux/socket.h> #include <linux/sockios.h> #include <linux/string.h> @@ -1835,7 +1836,7 @@ static int ipx_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) switch (cmd) { case TIOCOUTQ: - amount = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc); + amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); if (amount < 0) amount = 0; rc = put_user(amount, (int __user *)argp); diff --git a/net/ipx/ipx_proc.c b/net/ipx/ipx_proc.c index 5ed97ad0e2e3..576178482f89 100644 --- a/net/ipx/ipx_proc.c +++ b/net/ipx/ipx_proc.c @@ -280,8 +280,8 @@ static int ipx_seq_socket_show(struct seq_file *seq, void *v) } seq_printf(seq, "%08X %08X %02X %03d\n", - atomic_read(&s->sk_wmem_alloc), - atomic_read(&s->sk_rmem_alloc), + sk_wmem_alloc_get(s), + sk_rmem_alloc_get(s), s->sk_state, SOCK_INODE(s->sk_socket)->i_uid); out: return 0; diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 3eb5bcc75f99..50b43c57d5d8 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -45,6 +45,7 @@ #include <linux/capability.h> #include <linux/module.h> #include <linux/types.h> +#include <linux/smp_lock.h> #include <linux/socket.h> #include <linux/sockios.h> #include <linux/init.h> @@ -714,6 +715,7 @@ static int irda_getname(struct socket *sock, struct sockaddr *uaddr, struct sock *sk = sock->sk; struct irda_sock *self = irda_sk(sk); + memset(&saddr, 0, sizeof(saddr)); if (peer) { if (sk->sk_state != TCP_ESTABLISHED) return -ENOTCONN; @@ -913,9 +915,6 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags) /* Clean up the original one to keep it in listen state */ irttp_listen(self->tsap); - /* Wow ! What is that ? Jean II */ - skb->sk = NULL; - skb->destructor = NULL; kfree_skb(skb); sk->sk_ack_backlog--; @@ -1762,7 +1761,8 @@ static int irda_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) switch (cmd) { case TIOCOUTQ: { long amount; - amount = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc); + + amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); if (amount < 0) amount = 0; if (put_user(amount, (unsigned int __user *)arg)) diff --git a/net/irda/ircomm/ircomm_lmp.c b/net/irda/ircomm/ircomm_lmp.c index 67c99d20857f..7ba96618660e 100644 --- a/net/irda/ircomm/ircomm_lmp.c +++ b/net/irda/ircomm/ircomm_lmp.c @@ -196,6 +196,7 @@ static int ircomm_lmp_data_request(struct ircomm_cb *self, /* Don't forget to refcount it - see ircomm_tty_do_softint() */ skb_get(skb); + skb_orphan(skb); skb->destructor = ircomm_lmp_flow_control; if ((self->pkt_count++ > 7) && (self->flow_status == FLOW_START)) { diff --git a/net/irda/irnet/irnet.h b/net/irda/irnet/irnet.h index bccf4d0059f0..b001c361ad30 100644 --- a/net/irda/irnet/irnet.h +++ b/net/irda/irnet/irnet.h @@ -241,7 +241,6 @@ #include <linux/module.h> #include <linux/kernel.h> -#include <linux/smp_lock.h> #include <linux/skbuff.h> #include <linux/tty.h> #include <linux/proc_fs.h> diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c index 6d8ae03c14f5..68cbcb19cbd8 100644 --- a/net/irda/irnet/irnet_ppp.c +++ b/net/irda/irnet/irnet_ppp.c @@ -13,6 +13,7 @@ * 2) as a control channel (write commands, read events) */ +#include <linux/smp_lock.h> #include "irnet_ppp.h" /* Private header */ /* Please put other headers in irnet.h - Thanks */ diff --git a/net/irda/irttp.c b/net/irda/irttp.c index ecf4eb2717cb..9cb79f95bf63 100644 --- a/net/irda/irttp.c +++ b/net/irda/irttp.c @@ -1453,6 +1453,7 @@ struct tsap_cb *irttp_dup(struct tsap_cb *orig, void *instance) } /* Dup */ memcpy(new, orig, sizeof(struct tsap_cb)); + spin_lock_init(&new->lock); /* We don't need the old instance any more */ spin_unlock_irqrestore(&irttp->tsaps->hb_spinlock, flags); diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 656cbd195825..49c15b48408e 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -54,6 +54,38 @@ static const u8 iprm_shutdown[8] = #define CB_TRGCLS(skb) ((skb)->cb + CB_TAG_LEN) /* iucv msg target class */ #define CB_TRGCLS_LEN (TRGCLS_SIZE) +#define __iucv_sock_wait(sk, condition, timeo, ret) \ +do { \ + DEFINE_WAIT(__wait); \ + long __timeo = timeo; \ + ret = 0; \ + while (!(condition)) { \ + prepare_to_wait(sk->sk_sleep, &__wait, TASK_INTERRUPTIBLE); \ + if (!__timeo) { \ + ret = -EAGAIN; \ + break; \ + } \ + if (signal_pending(current)) { \ + ret = sock_intr_errno(__timeo); \ + break; \ + } \ + release_sock(sk); \ + __timeo = schedule_timeout(__timeo); \ + lock_sock(sk); \ + ret = sock_error(sk); \ + if (ret) \ + break; \ + } \ + finish_wait(sk->sk_sleep, &__wait); \ +} while (0) + +#define iucv_sock_wait(sk, condition, timeo) \ +({ \ + int __ret = 0; \ + if (!(condition)) \ + __iucv_sock_wait(sk, condition, timeo, __ret); \ + __ret; \ +}) static void iucv_sock_kill(struct sock *sk); static void iucv_sock_close(struct sock *sk); @@ -238,6 +270,48 @@ static inline size_t iucv_msg_length(struct iucv_message *msg) return msg->length; } +/** + * iucv_sock_in_state() - check for specific states + * @sk: sock structure + * @state: first iucv sk state + * @state: second iucv sk state + * + * Returns true if the socket in either in the first or second state. + */ +static int iucv_sock_in_state(struct sock *sk, int state, int state2) +{ + return (sk->sk_state == state || sk->sk_state == state2); +} + +/** + * iucv_below_msglim() - function to check if messages can be sent + * @sk: sock structure + * + * Returns true if the send queue length is lower than the message limit. + * Always returns true if the socket is not connected (no iucv path for + * checking the message limit). + */ +static inline int iucv_below_msglim(struct sock *sk) +{ + struct iucv_sock *iucv = iucv_sk(sk); + + if (sk->sk_state != IUCV_CONNECTED) + return 1; + return (skb_queue_len(&iucv->send_skb_q) < iucv->path->msglim); +} + +/** + * iucv_sock_wake_msglim() - Wake up thread waiting on msg limit + */ +static void iucv_sock_wake_msglim(struct sock *sk) +{ + read_lock(&sk->sk_callback_lock); + if (sk_has_sleeper(sk)) + wake_up_interruptible_all(sk->sk_sleep); + sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); + read_unlock(&sk->sk_callback_lock); +} + /* Timers */ static void iucv_sock_timeout(unsigned long arg) { @@ -329,7 +403,9 @@ static void iucv_sock_close(struct sock *sk) timeo = sk->sk_lingertime; else timeo = IUCV_DISCONN_TIMEOUT; - err = iucv_sock_wait_state(sk, IUCV_CLOSED, 0, timeo); + err = iucv_sock_wait(sk, + iucv_sock_in_state(sk, IUCV_CLOSED, 0), + timeo); } case IUCV_CLOSING: /* fall through */ @@ -510,39 +586,6 @@ struct sock *iucv_accept_dequeue(struct sock *parent, struct socket *newsock) return NULL; } -int iucv_sock_wait_state(struct sock *sk, int state, int state2, - unsigned long timeo) -{ - DECLARE_WAITQUEUE(wait, current); - int err = 0; - - add_wait_queue(sk->sk_sleep, &wait); - while (sk->sk_state != state && sk->sk_state != state2) { - set_current_state(TASK_INTERRUPTIBLE); - - if (!timeo) { - err = -EAGAIN; - break; - } - - if (signal_pending(current)) { - err = sock_intr_errno(timeo); - break; - } - - release_sock(sk); - timeo = schedule_timeout(timeo); - lock_sock(sk); - - err = sock_error(sk); - if (err) - break; - } - set_current_state(TASK_RUNNING); - remove_wait_queue(sk->sk_sleep, &wait); - return err; -} - /* Bind an unbound socket */ static int iucv_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len) @@ -687,8 +730,9 @@ static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr, } if (sk->sk_state != IUCV_CONNECTED) { - err = iucv_sock_wait_state(sk, IUCV_CONNECTED, IUCV_DISCONN, - sock_sndtimeo(sk, flags & O_NONBLOCK)); + err = iucv_sock_wait(sk, iucv_sock_in_state(sk, IUCV_CONNECTED, + IUCV_DISCONN), + sock_sndtimeo(sk, flags & O_NONBLOCK)); } if (sk->sk_state == IUCV_DISCONN) { @@ -842,9 +886,11 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct iucv_message txmsg; struct cmsghdr *cmsg; int cmsg_done; + long timeo; char user_id[9]; char appl_id[9]; int err; + int noblock = msg->msg_flags & MSG_DONTWAIT; err = sock_error(sk); if (err) @@ -864,108 +910,119 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, goto out; } - if (sk->sk_state == IUCV_CONNECTED) { - /* initialize defaults */ - cmsg_done = 0; /* check for duplicate headers */ - txmsg.class = 0; + /* Return if the socket is not in connected state */ + if (sk->sk_state != IUCV_CONNECTED) { + err = -ENOTCONN; + goto out; + } - /* iterate over control messages */ - for (cmsg = CMSG_FIRSTHDR(msg); cmsg; - cmsg = CMSG_NXTHDR(msg, cmsg)) { + /* initialize defaults */ + cmsg_done = 0; /* check for duplicate headers */ + txmsg.class = 0; - if (!CMSG_OK(msg, cmsg)) { - err = -EINVAL; - goto out; - } + /* iterate over control messages */ + for (cmsg = CMSG_FIRSTHDR(msg); cmsg; + cmsg = CMSG_NXTHDR(msg, cmsg)) { + + if (!CMSG_OK(msg, cmsg)) { + err = -EINVAL; + goto out; + } + + if (cmsg->cmsg_level != SOL_IUCV) + continue; - if (cmsg->cmsg_level != SOL_IUCV) - continue; + if (cmsg->cmsg_type & cmsg_done) { + err = -EINVAL; + goto out; + } + cmsg_done |= cmsg->cmsg_type; - if (cmsg->cmsg_type & cmsg_done) { + switch (cmsg->cmsg_type) { + case SCM_IUCV_TRGCLS: + if (cmsg->cmsg_len != CMSG_LEN(TRGCLS_SIZE)) { err = -EINVAL; goto out; } - cmsg_done |= cmsg->cmsg_type; - - switch (cmsg->cmsg_type) { - case SCM_IUCV_TRGCLS: - if (cmsg->cmsg_len != CMSG_LEN(TRGCLS_SIZE)) { - err = -EINVAL; - goto out; - } - /* set iucv message target class */ - memcpy(&txmsg.class, - (void *) CMSG_DATA(cmsg), TRGCLS_SIZE); + /* set iucv message target class */ + memcpy(&txmsg.class, + (void *) CMSG_DATA(cmsg), TRGCLS_SIZE); - break; + break; - default: - err = -EINVAL; - goto out; - break; - } + default: + err = -EINVAL; + goto out; + break; } + } - /* allocate one skb for each iucv message: - * this is fine for SOCK_SEQPACKET (unless we want to support - * segmented records using the MSG_EOR flag), but - * for SOCK_STREAM we might want to improve it in future */ - if (!(skb = sock_alloc_send_skb(sk, len, - msg->msg_flags & MSG_DONTWAIT, - &err))) - goto out; + /* allocate one skb for each iucv message: + * this is fine for SOCK_SEQPACKET (unless we want to support + * segmented records using the MSG_EOR flag), but + * for SOCK_STREAM we might want to improve it in future */ + skb = sock_alloc_send_skb(sk, len, noblock, &err); + if (!skb) + goto out; + if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) { + err = -EFAULT; + goto fail; + } - if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) { - err = -EFAULT; - goto fail; - } + /* wait if outstanding messages for iucv path has reached */ + timeo = sock_sndtimeo(sk, noblock); + err = iucv_sock_wait(sk, iucv_below_msglim(sk), timeo); + if (err) + goto fail; - /* increment and save iucv message tag for msg_completion cbk */ - txmsg.tag = iucv->send_tag++; - memcpy(CB_TAG(skb), &txmsg.tag, CB_TAG_LEN); - skb_queue_tail(&iucv->send_skb_q, skb); + /* return -ECONNRESET if the socket is no longer connected */ + if (sk->sk_state != IUCV_CONNECTED) { + err = -ECONNRESET; + goto fail; + } - if (((iucv->path->flags & IUCV_IPRMDATA) & iucv->flags) - && skb->len <= 7) { - err = iucv_send_iprm(iucv->path, &txmsg, skb); + /* increment and save iucv message tag for msg_completion cbk */ + txmsg.tag = iucv->send_tag++; + memcpy(CB_TAG(skb), &txmsg.tag, CB_TAG_LEN); + skb_queue_tail(&iucv->send_skb_q, skb); - /* on success: there is no message_complete callback - * for an IPRMDATA msg; remove skb from send queue */ - if (err == 0) { - skb_unlink(skb, &iucv->send_skb_q); - kfree_skb(skb); - } + if (((iucv->path->flags & IUCV_IPRMDATA) & iucv->flags) + && skb->len <= 7) { + err = iucv_send_iprm(iucv->path, &txmsg, skb); - /* this error should never happen since the - * IUCV_IPRMDATA path flag is set... sever path */ - if (err == 0x15) { - iucv_path_sever(iucv->path, NULL); - skb_unlink(skb, &iucv->send_skb_q); - err = -EPIPE; - goto fail; - } - } else - err = iucv_message_send(iucv->path, &txmsg, 0, 0, - (void *) skb->data, skb->len); - if (err) { - if (err == 3) { - user_id[8] = 0; - memcpy(user_id, iucv->dst_user_id, 8); - appl_id[8] = 0; - memcpy(appl_id, iucv->dst_name, 8); - pr_err("Application %s on z/VM guest %s" - " exceeds message limit\n", - user_id, appl_id); - } + /* on success: there is no message_complete callback + * for an IPRMDATA msg; remove skb from send queue */ + if (err == 0) { + skb_unlink(skb, &iucv->send_skb_q); + kfree_skb(skb); + } + + /* this error should never happen since the + * IUCV_IPRMDATA path flag is set... sever path */ + if (err == 0x15) { + iucv_path_sever(iucv->path, NULL); skb_unlink(skb, &iucv->send_skb_q); err = -EPIPE; goto fail; } - - } else { - err = -ENOTCONN; - goto out; + } else + err = iucv_message_send(iucv->path, &txmsg, 0, 0, + (void *) skb->data, skb->len); + if (err) { + if (err == 3) { + user_id[8] = 0; + memcpy(user_id, iucv->dst_user_id, 8); + appl_id[8] = 0; + memcpy(appl_id, iucv->dst_name, 8); + pr_err("Application %s on z/VM guest %s" + " exceeds message limit\n", + appl_id, user_id); + err = -EAGAIN; + } else + err = -EPIPE; + skb_unlink(skb, &iucv->send_skb_q); + goto fail; } release_sock(sk); @@ -1199,7 +1256,7 @@ unsigned int iucv_sock_poll(struct file *file, struct socket *sock, struct sock *sk = sock->sk; unsigned int mask = 0; - poll_wait(file, sk->sk_sleep, wait); + sock_poll_wait(file, sk->sk_sleep, wait); if (sk->sk_state == IUCV_LISTEN) return iucv_accept_poll(sk); @@ -1581,7 +1638,11 @@ static void iucv_callback_txdone(struct iucv_path *path, spin_unlock_irqrestore(&list->lock, flags); - kfree_skb(this); + if (this) { + kfree_skb(this); + /* wake up any process waiting for sending */ + iucv_sock_wake_msglim(sk); + } } BUG_ON(!this); diff --git a/net/key/af_key.c b/net/key/af_key.c index 643c1be2d02e..dba9abd27f90 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3662,8 +3662,8 @@ static int pfkey_seq_show(struct seq_file *f, void *v) seq_printf(f ,"%p %-6d %-6u %-6u %-6u %-6lu\n", s, atomic_read(&s->sk_refcnt), - atomic_read(&s->sk_rmem_alloc), - atomic_read(&s->sk_wmem_alloc), + sk_rmem_alloc_get(s), + sk_wmem_alloc_get(s), sock_i_uid(s), sock_i_ino(s) ); diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 9208cf5f2bd5..c45eee1c0e8d 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -914,6 +914,7 @@ static int llc_ui_getname(struct socket *sock, struct sockaddr *uaddr, struct llc_sock *llc = llc_sk(sk); int rc = 0; + memset(&sllc, 0, sizeof(sllc)); lock_sock(sk); if (sock_flag(sk, SOCK_ZAPPED)) goto out; diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c index d208b3396d94..f97be471fe2e 100644 --- a/net/llc/llc_proc.c +++ b/net/llc/llc_proc.c @@ -134,8 +134,8 @@ static int llc_seq_socket_show(struct seq_file *seq, void *v) seq_printf(seq, "@%02X ", llc->sap->laddr.lsap); llc_ui_format_mac(seq, llc->daddr.mac); seq_printf(seq, "@%02X %8d %8d %2d %3d %4d\n", llc->daddr.lsap, - atomic_read(&sk->sk_wmem_alloc), - atomic_read(&sk->sk_rmem_alloc) - llc->copied_seq, + sk_wmem_alloc_get(sk), + sk_rmem_alloc_get(sk) - llc->copied_seq, sk->sk_state, sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : -1, llc->link); diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index ba2643a43c73..7836ee928983 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -83,6 +83,7 @@ endmenu config MAC80211_MESH bool "Enable mac80211 mesh networking (pre-802.11s) support" depends on MAC80211 && EXPERIMENTAL + depends on BROKEN ---help--- This options enables support of Draft 802.11s mesh networking. The implementation is based on Draft 1.08 of the Mesh Networking diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 9e5762ad307d..a24e59816b93 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -381,6 +381,14 @@ static void ieee80211_agg_splice_packets(struct ieee80211_local *local, &local->hw, queue, IEEE80211_QUEUE_STOP_REASON_AGGREGATION); + if (!(sta->ampdu_mlme.tid_state_tx[tid] & HT_ADDBA_REQUESTED_MSK)) + return; + + if (WARN(!sta->ampdu_mlme.tid_tx[tid], + "TID %d gone but expected when splicing aggregates from" + "the pending queue\n", tid)) + return; + if (!skb_queue_empty(&sta->ampdu_mlme.tid_tx[tid]->pending)) { spin_lock_irqsave(&local->queue_stop_reason_lock, flags); /* mark queue as pending, it is stopped already */ diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 11c72311f35b..6c439cd5ccea 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -163,6 +163,29 @@ static const struct file_operations noack_ops = { .open = mac80211_open_file_generic }; +static ssize_t queues_read(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct ieee80211_local *local = file->private_data; + unsigned long flags; + char buf[IEEE80211_MAX_QUEUES * 20]; + int q, res = 0; + + spin_lock_irqsave(&local->queue_stop_reason_lock, flags); + for (q = 0; q < local->hw.queues; q++) + res += sprintf(buf + res, "%02d: %#.8lx/%d\n", q, + local->queue_stop_reasons[q], + __netif_subqueue_stopped(local->mdev, q)); + spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); + + return simple_read_from_buffer(user_buf, count, ppos, buf, res); +} + +static const struct file_operations queues_ops = { + .read = queues_read, + .open = mac80211_open_file_generic +}; + /* statistics stuff */ #define DEBUGFS_STATS_FILE(name, buflen, fmt, value...) \ @@ -298,6 +321,7 @@ void debugfs_hw_add(struct ieee80211_local *local) DEBUGFS_ADD(total_ps_buffered); DEBUGFS_ADD(wep_iv); DEBUGFS_ADD(tsf); + DEBUGFS_ADD(queues); DEBUGFS_ADD_MODE(reset, 0200); DEBUGFS_ADD(noack); @@ -350,6 +374,7 @@ void debugfs_hw_del(struct ieee80211_local *local) DEBUGFS_DEL(total_ps_buffered); DEBUGFS_DEL(wep_iv); DEBUGFS_DEL(tsf); + DEBUGFS_DEL(queues); DEBUGFS_DEL(reset); DEBUGFS_DEL(noack); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 4dbc28964196..68eb5052179a 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -783,6 +783,7 @@ struct ieee80211_local { struct dentry *total_ps_buffered; struct dentry *wep_iv; struct dentry *tsf; + struct dentry *queues; struct dentry *reset; struct dentry *noack; struct dentry *statistics; @@ -1100,7 +1101,6 @@ void ieee802_11_parse_elems(u8 *start, size_t len, u32 ieee802_11_parse_elems_crc(u8 *start, size_t len, struct ieee802_11_elems *elems, u64 filter, u32 crc); -int ieee80211_set_freq(struct ieee80211_sub_if_data *sdata, int freq); u32 ieee80211_mandatory_rates(struct ieee80211_local *local, enum ieee80211_band band); diff --git a/net/mac80211/key.c b/net/mac80211/key.c index ce267565e180..659a42d529e3 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -67,6 +67,8 @@ static DECLARE_WORK(todo_work, key_todo); * * @key: key to add to do item for * @flag: todo flag(s) + * + * Must be called with IRQs or softirqs disabled. */ static void add_todo(struct ieee80211_key *key, u32 flag) { @@ -140,9 +142,9 @@ static void ieee80211_key_enable_hw_accel(struct ieee80211_key *key) ret = drv_set_key(key->local, SET_KEY, &sdata->vif, sta, &key->conf); if (!ret) { - spin_lock(&todo_lock); + spin_lock_bh(&todo_lock); key->flags |= KEY_FLAG_UPLOADED_TO_HARDWARE; - spin_unlock(&todo_lock); + spin_unlock_bh(&todo_lock); } if (ret && ret != -ENOSPC && ret != -EOPNOTSUPP) @@ -164,12 +166,12 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key) if (!key || !key->local->ops->set_key) return; - spin_lock(&todo_lock); + spin_lock_bh(&todo_lock); if (!(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) { - spin_unlock(&todo_lock); + spin_unlock_bh(&todo_lock); return; } - spin_unlock(&todo_lock); + spin_unlock_bh(&todo_lock); sta = get_sta_for_key(key); sdata = key->sdata; @@ -188,9 +190,9 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key) wiphy_name(key->local->hw.wiphy), key->conf.keyidx, sta ? sta->addr : bcast_addr, ret); - spin_lock(&todo_lock); + spin_lock_bh(&todo_lock); key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE; - spin_unlock(&todo_lock); + spin_unlock_bh(&todo_lock); } static void __ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, @@ -437,14 +439,14 @@ void ieee80211_key_link(struct ieee80211_key *key, __ieee80211_key_replace(sdata, sta, old_key, key); - spin_unlock_irqrestore(&sdata->local->key_lock, flags); - /* free old key later */ add_todo(old_key, KEY_FLAG_TODO_DELETE); add_todo(key, KEY_FLAG_TODO_ADD_DEBUGFS); if (netif_running(sdata->dev)) add_todo(key, KEY_FLAG_TODO_HWACCEL_ADD); + + spin_unlock_irqrestore(&sdata->local->key_lock, flags); } static void __ieee80211_key_free(struct ieee80211_key *key) @@ -547,7 +549,7 @@ static void __ieee80211_key_todo(void) */ synchronize_rcu(); - spin_lock(&todo_lock); + spin_lock_bh(&todo_lock); while (!list_empty(&todo_list)) { key = list_first_entry(&todo_list, struct ieee80211_key, todo); list_del_init(&key->todo); @@ -558,7 +560,7 @@ static void __ieee80211_key_todo(void) KEY_FLAG_TODO_HWACCEL_REMOVE | KEY_FLAG_TODO_DELETE); key->flags &= ~todoflags; - spin_unlock(&todo_lock); + spin_unlock_bh(&todo_lock); work_done = false; @@ -591,9 +593,9 @@ static void __ieee80211_key_todo(void) WARN_ON(!work_done); - spin_lock(&todo_lock); + spin_lock_bh(&todo_lock); } - spin_unlock(&todo_lock); + spin_unlock_bh(&todo_lock); } void ieee80211_key_todo(void) diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index fc712e60705d..11cf45bce38a 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -494,7 +494,7 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) * should it be using the interface and enqueuing * frames at this very time on another CPU. */ - synchronize_rcu(); + rcu_barrier(); /* Wait for RX path and call_rcu()'s */ skb_queue_purge(&sdata->u.mesh.skb_queue); } diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 003cb470ac84..f49ef288e2e2 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -637,7 +637,7 @@ static void mesh_queue_preq(struct mesh_path *mpath, u8 flags) struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct mesh_preq_queue *preq_node; - preq_node = kmalloc(sizeof(struct mesh_preq_queue), GFP_KERNEL); + preq_node = kmalloc(sizeof(struct mesh_preq_queue), GFP_ATOMIC); if (!preq_node) { printk(KERN_DEBUG "Mesh HWMP: could not allocate PREQ node\n"); return; diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 3c72557df45a..479597e88583 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -175,6 +175,8 @@ int mesh_path_add(u8 *dst, struct ieee80211_sub_if_data *sdata) int err = 0; u32 hash_idx; + might_sleep(); + if (memcmp(dst, sdata->dev->dev_addr, ETH_ALEN) == 0) /* never add ourselves as neighbours */ return -ENOTSUPP; @@ -265,6 +267,7 @@ int mpp_path_add(u8 *dst, u8 *mpp, struct ieee80211_sub_if_data *sdata) int err = 0; u32 hash_idx; + might_sleep(); if (memcmp(dst, sdata->dev->dev_addr, ETH_ALEN) == 0) /* never add ourselves as neighbours */ @@ -491,8 +494,10 @@ void mesh_path_tx_pending(struct mesh_path *mpath) * @skb: frame to discard * @sdata: network subif the frame was to be sent through * - * If the frame was beign forwarded from another MP, a PERR frame will be sent - * to the precursor. + * If the frame was being forwarded from another MP, a PERR frame will be sent + * to the precursor. The precursor's address (i.e. the previous hop) was saved + * in addr1 of the frame-to-be-forwarded, and would only be overwritten once + * the destination is successfully resolved. * * Locking: the function must me called within a rcu_read_lock region */ @@ -507,7 +512,7 @@ void mesh_path_discard_frame(struct sk_buff *skb, u8 *ra, *da; da = hdr->addr3; - ra = hdr->addr2; + ra = hdr->addr1; mpath = mesh_path_lookup(da, sdata); if (mpath) dsn = ++mpath->dsn; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index d779c57a8220..07e7e41816be 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -721,7 +721,7 @@ void ieee80211_dynamic_ps_timer(unsigned long data) { struct ieee80211_local *local = (void *) data; - if (local->quiescing) + if (local->quiescing || local->suspended) return; queue_work(local->hw.workqueue, &local->dynamic_ps_enable_work); @@ -1102,14 +1102,6 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, struct sta_info *sta; u32 changed = 0, config_changed = 0; - rcu_read_lock(); - - sta = sta_info_get(local, ifmgd->bssid); - if (!sta) { - rcu_read_unlock(); - return; - } - if (deauth) { ifmgd->direct_probe_tries = 0; ifmgd->auth_tries = 0; @@ -1120,7 +1112,11 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, netif_tx_stop_all_queues(sdata->dev); netif_carrier_off(sdata->dev); - ieee80211_sta_tear_down_BA_sessions(sta); + rcu_read_lock(); + sta = sta_info_get(local, ifmgd->bssid); + if (sta) + ieee80211_sta_tear_down_BA_sessions(sta); + rcu_read_unlock(); bss = ieee80211_rx_bss_get(local, ifmgd->bssid, conf->channel->center_freq, @@ -1156,8 +1152,6 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, ifmgd->ssid, ifmgd->ssid_len); } - rcu_read_unlock(); - ieee80211_set_wmm_default(sdata); ieee80211_recalc_idle(local); @@ -2223,7 +2217,10 @@ static int ieee80211_sta_config_auth(struct ieee80211_sub_if_data *sdata) capa_mask, capa_val); if (bss) { - ieee80211_set_freq(sdata, bss->cbss.channel->center_freq); + local->oper_channel = bss->cbss.channel; + local->oper_channel_type = NL80211_CHAN_NO_HT; + ieee80211_hw_config(local, 0); + if (!(ifmgd->flags & IEEE80211_STA_SSID_SET)) ieee80211_sta_set_ssid(sdata, bss->ssid, bss->ssid_len); @@ -2445,6 +2442,14 @@ void ieee80211_sta_req_auth(struct ieee80211_sub_if_data *sdata) ieee80211_set_disassoc(sdata, true, true, WLAN_REASON_DEAUTH_LEAVING); + if (ifmgd->ssid_len == 0) { + /* + * Only allow association to be started if a valid SSID + * is configured. + */ + return; + } + if (!(ifmgd->flags & IEEE80211_STA_EXT_SME) || ifmgd->state != IEEE80211_STA_MLME_ASSOCIATE) set_bit(IEEE80211_STA_REQ_AUTH, &ifmgd->request); @@ -2476,6 +2481,10 @@ int ieee80211_sta_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size ifmgd = &sdata->u.mgd; if (ifmgd->ssid_len != len || memcmp(ifmgd->ssid, ssid, len) != 0) { + if (ifmgd->state == IEEE80211_STA_MLME_ASSOCIATED) + ieee80211_set_disassoc(sdata, true, true, + WLAN_REASON_DEAUTH_LEAVING); + /* * Do not use reassociation if SSID is changed (different ESS). */ @@ -2500,6 +2509,11 @@ int ieee80211_sta_set_bssid(struct ieee80211_sub_if_data *sdata, u8 *bssid) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; + if (compare_ether_addr(bssid, ifmgd->bssid) != 0 && + ifmgd->state == IEEE80211_STA_MLME_ASSOCIATED) + ieee80211_set_disassoc(sdata, true, true, + WLAN_REASON_DEAUTH_LEAVING); + if (is_valid_ether_addr(bssid)) { memcpy(ifmgd->bssid, bssid, ETH_ALEN); ifmgd->flags |= IEEE80211_STA_BSSID_SET; diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index 7a549f9deb96..5e3d476972f9 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -55,15 +55,6 @@ int __ieee80211_suspend(struct ieee80211_hw *hw) rcu_read_unlock(); - /* flush again, in case driver queued work */ - flush_workqueue(local->hw.workqueue); - - /* stop hardware - this must stop RX */ - if (local->open_count) { - ieee80211_led_radio(local, false); - drv_stop(local); - } - /* remove STAs */ spin_lock_irqsave(&local->sta_lock, flags); list_for_each_entry(sta, &local->sta_list, list) { @@ -111,7 +102,22 @@ int __ieee80211_suspend(struct ieee80211_hw *hw) drv_remove_interface(local, &conf); } + /* stop hardware - this must stop RX */ + if (local->open_count) { + ieee80211_led_radio(local, false); + drv_stop(local); + } + + /* + * flush again, in case driver queued work -- it + * shouldn't be doing (or cancel everything in the + * stop callback) that but better safe than sorry. + */ + flush_workqueue(local->hw.workqueue); + local->suspended = true; + /* need suspended to be visible before quiescing is false */ + barrier(); local->quiescing = false; return 0; diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index b218b98fba7f..37771abd8f5a 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -66,7 +66,7 @@ rix_to_ndx(struct minstrel_sta_info *mi, int rix) for (i = rix; i >= 0; i--) if (mi->r[i].rix == rix) break; - WARN_ON(mi->r[i].rix != rix); + WARN_ON(i < 0); return i; } @@ -181,6 +181,9 @@ minstrel_tx_status(void *priv, struct ieee80211_supported_band *sband, break; ndx = rix_to_ndx(mi, ar[i].idx); + if (ndx < 0) + continue; + mi->r[ndx].attempts += ar[i].count; if ((i != IEEE80211_TX_MAX_RATES - 1) && (ar[i + 1].idx < 0)) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index de5bba7f910a..0936fc24942d 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2453,6 +2453,18 @@ void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb, return; } + /* + * If we're suspending, it is possible although not too likely + * that we'd be receiving frames after having already partially + * quiesced the stack. We can't process such frames then since + * that might, for example, cause stations to be added or other + * driver callbacks be invoked. + */ + if (unlikely(local->quiescing || local->suspended)) { + kfree_skb(skb); + return; + } + if (status->flag & RX_FLAG_HT) { /* rate_idx is MCS index */ if (WARN_ON(status->rate_idx < 0 || diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index d238a8939a09..3a8922cd1038 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1455,7 +1455,7 @@ int ieee80211_master_start_xmit(struct sk_buff *skb, struct net_device *dev) monitor_iface = UNKNOWN_ADDRESS; len_rthdr = ieee80211_get_radiotap_len(skb->data); - hdr = (struct ieee80211_hdr *)skb->data + len_rthdr; + hdr = (struct ieee80211_hdr *)(skb->data + len_rthdr); hdrlen = ieee80211_hdrlen(hdr->frame_control); /* check the header is complete in the frame */ diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 66ce96a69f31..915e77769312 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -774,31 +774,6 @@ void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, dev_queue_xmit(skb); } -int ieee80211_set_freq(struct ieee80211_sub_if_data *sdata, int freqMHz) -{ - int ret = -EINVAL; - struct ieee80211_channel *chan; - struct ieee80211_local *local = sdata->local; - - chan = ieee80211_get_channel(local->hw.wiphy, freqMHz); - - if (chan && !(chan->flags & IEEE80211_CHAN_DISABLED)) { - if (sdata->vif.type == NL80211_IFTYPE_ADHOC && - chan->flags & IEEE80211_CHAN_NO_IBSS) - return ret; - local->oper_channel = chan; - local->oper_channel_type = NL80211_CHAN_NO_HT; - - if (local->sw_scanning || local->hw_scanning) - ret = 0; - else - ret = ieee80211_hw_config( - local, IEEE80211_CONF_CHANGE_CHANNEL); - } - - return ret; -} - u32 ieee80211_mandatory_rates(struct ieee80211_local *local, enum ieee80211_band band) { diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index d2d81b103341..1da81f456744 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -55,6 +55,8 @@ static int ieee80211_ioctl_siwfreq(struct net_device *dev, struct iw_freq *freq, char *extra) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; + struct ieee80211_channel *chan; if (sdata->vif.type == NL80211_IFTYPE_ADHOC) return cfg80211_ibss_wext_siwfreq(dev, info, freq, extra); @@ -69,17 +71,38 @@ static int ieee80211_ioctl_siwfreq(struct net_device *dev, IEEE80211_STA_AUTO_CHANNEL_SEL; return 0; } else - return ieee80211_set_freq(sdata, + chan = ieee80211_get_channel(local->hw.wiphy, ieee80211_channel_to_frequency(freq->m)); } else { int i, div = 1000000; for (i = 0; i < freq->e; i++) div /= 10; - if (div > 0) - return ieee80211_set_freq(sdata, freq->m / div); - else + if (div <= 0) return -EINVAL; + chan = ieee80211_get_channel(local->hw.wiphy, freq->m / div); } + + if (!chan) + return -EINVAL; + + if (chan->flags & IEEE80211_CHAN_DISABLED) + return -EINVAL; + + /* + * no change except maybe auto -> fixed, ignore the HT + * setting so you can fix a channel you're on already + */ + if (local->oper_channel == chan) + return 0; + + if (sdata->vif.type == NL80211_IFTYPE_STATION) + ieee80211_sta_req_auth(sdata); + + local->oper_channel = chan; + local->oper_channel_type = NL80211_CHAN_NO_HT; + ieee80211_hw_config(local, 0); + + return 0; } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 5f72b94b4918..b5869b9574b0 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -335,7 +335,8 @@ begin: h = __nf_conntrack_find(net, tuple); if (h) { ct = nf_ct_tuplehash_to_ctrack(h); - if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use))) + if (unlikely(nf_ct_is_dying(ct) || + !atomic_inc_not_zero(&ct->ct_general.use))) h = NULL; else { if (unlikely(!nf_ct_tuple_equal(tuple, &h->tuple))) { @@ -425,7 +426,6 @@ __nf_conntrack_confirm(struct sk_buff *skb) /* Remove from unconfirmed list */ hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode); - __nf_conntrack_hash_insert(ct, hash, repl_hash); /* Timer relative to confirmation time, not original setting time, otherwise we'd get timer wrap in weird delay cases. */ @@ -433,8 +433,16 @@ __nf_conntrack_confirm(struct sk_buff *skb) add_timer(&ct->timeout); atomic_inc(&ct->ct_general.use); set_bit(IPS_CONFIRMED_BIT, &ct->status); + + /* Since the lookup is lockless, hash insertion must be done after + * starting the timer and setting the CONFIRMED bit. The RCU barriers + * guarantee that no other CPU can find the conntrack before the above + * stores are visible. + */ + __nf_conntrack_hash_insert(ct, hash, repl_hash); NF_CT_STAT_INC(net, insert); spin_unlock_bh(&nf_conntrack_lock); + help = nfct_help(ct); if (help && help->helper) nf_conntrack_event_cache(IPCT_HELPER, ct); @@ -503,7 +511,8 @@ static noinline int early_drop(struct net *net, unsigned int hash) cnt++; } - if (ct && unlikely(!atomic_inc_not_zero(&ct->ct_general.use))) + if (ct && unlikely(nf_ct_is_dying(ct) || + !atomic_inc_not_zero(&ct->ct_general.use))) ct = NULL; if (ct || cnt >= NF_CT_EVICTION_RANGE) break; @@ -552,23 +561,38 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, } } - ct = kmem_cache_zalloc(nf_conntrack_cachep, gfp); + /* + * Do not use kmem_cache_zalloc(), as this cache uses + * SLAB_DESTROY_BY_RCU. + */ + ct = kmem_cache_alloc(nf_conntrack_cachep, gfp); if (ct == NULL) { pr_debug("nf_conntrack_alloc: Can't alloc conntrack.\n"); atomic_dec(&net->ct.count); return ERR_PTR(-ENOMEM); } - + /* + * Let ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.next + * and ct->tuplehash[IP_CT_DIR_REPLY].hnnode.next unchanged. + */ + memset(&ct->tuplehash[IP_CT_DIR_MAX], 0, + sizeof(*ct) - offsetof(struct nf_conn, tuplehash[IP_CT_DIR_MAX])); spin_lock_init(&ct->lock); - atomic_set(&ct->ct_general.use, 1); ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; + ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL; ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl; + ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev = NULL; /* Don't set timer yet: wait for confirmation */ setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct); #ifdef CONFIG_NET_NS ct->ct_net = net; #endif + /* + * changes to lookup keys must be done before setting refcnt to 1 + */ + smp_wmb(); + atomic_set(&ct->ct_general.use, 1); return ct; } EXPORT_SYMBOL_GPL(nf_conntrack_alloc); @@ -1267,13 +1291,19 @@ err_cache: return ret; } +/* + * We need to use special "null" values, not used in hash table + */ +#define UNCONFIRMED_NULLS_VAL ((1<<30)+0) +#define DYING_NULLS_VAL ((1<<30)+1) + static int nf_conntrack_init_net(struct net *net) { int ret; atomic_set(&net->ct.count, 0); - INIT_HLIST_NULLS_HEAD(&net->ct.unconfirmed, 0); - INIT_HLIST_NULLS_HEAD(&net->ct.dying, 0); + INIT_HLIST_NULLS_HEAD(&net->ct.unconfirmed, UNCONFIRMED_NULLS_VAL); + INIT_HLIST_NULLS_HEAD(&net->ct.dying, DYING_NULLS_VAL); net->ct.stat = alloc_percpu(struct ip_conntrack_stat); if (!net->ct.stat) { ret = -ENOMEM; diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index afde8f991646..2032dfe25ca8 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -617,8 +617,10 @@ err1: void nf_conntrack_expect_fini(struct net *net) { exp_proc_remove(net); - if (net_eq(net, &init_net)) + if (net_eq(net, &init_net)) { + rcu_barrier(); /* Wait for call_rcu() before destroy */ kmem_cache_destroy(nf_ct_expect_cachep); + } nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc, nf_ct_expect_hsize); } diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index 4b2c769d555f..fef95be334bd 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -186,6 +186,6 @@ void nf_ct_extend_unregister(struct nf_ct_ext_type *type) rcu_assign_pointer(nf_ct_ext_types[type->id], NULL); update_alloc_size(type); mutex_unlock(&nf_ct_ext_type_mutex); - synchronize_rcu(); + rcu_barrier(); /* Wait for completion of call_rcu()'s */ } EXPORT_SYMBOL_GPL(nf_ct_extend_unregister); diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 33fc0a443f3d..97a82ba75376 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -720,8 +720,8 @@ static bool tcp_in_window(const struct nf_conn *ct, /* Caller must linearize skb at tcp header. */ void nf_conntrack_tcp_update(const struct sk_buff *skb, unsigned int dataoff, - struct nf_conn *ct, - int dir) + struct nf_conn *ct, int dir, + s16 offset) { const struct tcphdr *tcph = (const void *)skb->data + dataoff; const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[dir]; @@ -734,7 +734,7 @@ void nf_conntrack_tcp_update(const struct sk_buff *skb, /* * We have to worry for the ack in the reply packet only... */ - if (after(end, ct->proto.tcp.seen[dir].td_end)) + if (ct->proto.tcp.seen[dir].td_end + offset == end) ct->proto.tcp.seen[dir].td_end = end; ct->proto.tcp.last_end = end; spin_unlock_bh(&ct->lock); diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 2fefe147750a..4e620305f28c 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -47,7 +47,6 @@ int nf_log_register(u_int8_t pf, struct nf_logger *logger) mutex_lock(&nf_log_mutex); if (pf == NFPROTO_UNSPEC) { - int i; for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) list_add_tail(&(logger->list[i]), &(nf_loggers_l[i])); } else { @@ -216,7 +215,7 @@ static const struct file_operations nflog_file_ops = { #endif /* PROC_FS */ #ifdef CONFIG_SYSCTL -struct ctl_path nf_log_sysctl_path[] = { +static struct ctl_path nf_log_sysctl_path[] = { { .procname = "net", .ctl_name = CTL_NET, }, { .procname = "netfilter", .ctl_name = NET_NETFILTER, }, { .procname = "nf_log", .ctl_name = CTL_UNNUMBERED, }, @@ -228,19 +227,26 @@ static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO+1]; static struct ctl_table_header *nf_log_dir_header; static int nf_log_proc_dostring(ctl_table *table, int write, struct file *filp, - void *buffer, size_t *lenp, loff_t *ppos) + void __user *buffer, size_t *lenp, loff_t *ppos) { const struct nf_logger *logger; + char buf[NFLOGGER_NAME_LEN]; + size_t size = *lenp; int r = 0; int tindex = (unsigned long)table->extra1; if (write) { - if (!strcmp(buffer, "NONE")) { + if (size > sizeof(buf)) + size = sizeof(buf); + if (copy_from_user(buf, buffer, size)) + return -EFAULT; + + if (!strcmp(buf, "NONE")) { nf_log_unbind_pf(tindex); return 0; } mutex_lock(&nf_log_mutex); - logger = __find_logger(tindex, buffer); + logger = __find_logger(tindex, buf); if (logger == NULL) { mutex_unlock(&nf_log_mutex); return -ENOENT; diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c index 498b45101df7..f28f6a5fc02d 100644 --- a/net/netfilter/xt_NFQUEUE.c +++ b/net/netfilter/xt_NFQUEUE.c @@ -40,12 +40,12 @@ nfqueue_tg(struct sk_buff *skb, const struct xt_target_param *par) static u32 hash_v4(const struct sk_buff *skb) { const struct iphdr *iph = ip_hdr(skb); - u32 ipaddr; + __be32 ipaddr; /* packets in either direction go into same queue */ ipaddr = iph->saddr ^ iph->daddr; - return jhash_2words(ipaddr, iph->protocol, jhash_initval); + return jhash_2words((__force u32)ipaddr, iph->protocol, jhash_initval); } static unsigned int @@ -63,14 +63,14 @@ nfqueue_tg4_v1(struct sk_buff *skb, const struct xt_target_param *par) static u32 hash_v6(const struct sk_buff *skb) { const struct ipv6hdr *ip6h = ipv6_hdr(skb); - u32 addr[4]; + __be32 addr[4]; addr[0] = ip6h->saddr.s6_addr32[0] ^ ip6h->daddr.s6_addr32[0]; addr[1] = ip6h->saddr.s6_addr32[1] ^ ip6h->daddr.s6_addr32[1]; addr[2] = ip6h->saddr.s6_addr32[2] ^ ip6h->daddr.s6_addr32[2]; addr[3] = ip6h->saddr.s6_addr32[3] ^ ip6h->daddr.s6_addr32[3]; - return jhash2(addr, ARRAY_SIZE(addr), jhash_initval); + return jhash2((__force u32 *)addr, ARRAY_SIZE(addr), jhash_initval); } static unsigned int diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c index 43f5676b1af4..d80b8192e0d4 100644 --- a/net/netfilter/xt_RATEEST.c +++ b/net/netfilter/xt_RATEEST.c @@ -74,7 +74,7 @@ static unsigned int xt_rateest_tg(struct sk_buff *skb, const struct xt_target_param *par) { const struct xt_rateest_target_info *info = par->targinfo; - struct gnet_stats_basic *stats = &info->est->bstats; + struct gnet_stats_basic_packed *stats = &info->est->bstats; spin_lock_bh(&info->est->lock); stats->bytes += skb->len; diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c index 69a639f35403..225ee3ecd69d 100644 --- a/net/netfilter/xt_cluster.c +++ b/net/netfilter/xt_cluster.c @@ -15,14 +15,14 @@ #include <net/netfilter/nf_conntrack.h> #include <linux/netfilter/xt_cluster.h> -static inline u_int32_t nf_ct_orig_ipv4_src(const struct nf_conn *ct) +static inline u32 nf_ct_orig_ipv4_src(const struct nf_conn *ct) { - return ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip; + return (__force u32)ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip; } -static inline const void *nf_ct_orig_ipv6_src(const struct nf_conn *ct) +static inline const u32 *nf_ct_orig_ipv6_src(const struct nf_conn *ct) { - return ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip6; + return (__force u32 *)ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip6; } static inline u_int32_t diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index 0b7139f3dd78..fc581800698e 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -129,7 +129,7 @@ conntrack_addrcmp(const union nf_inet_addr *kaddr, static inline bool conntrack_mt_origsrc(const struct nf_conn *ct, - const struct xt_conntrack_mtinfo1 *info, + const struct xt_conntrack_mtinfo2 *info, u_int8_t family) { return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3, @@ -138,7 +138,7 @@ conntrack_mt_origsrc(const struct nf_conn *ct, static inline bool conntrack_mt_origdst(const struct nf_conn *ct, - const struct xt_conntrack_mtinfo1 *info, + const struct xt_conntrack_mtinfo2 *info, u_int8_t family) { return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3, @@ -147,7 +147,7 @@ conntrack_mt_origdst(const struct nf_conn *ct, static inline bool conntrack_mt_replsrc(const struct nf_conn *ct, - const struct xt_conntrack_mtinfo1 *info, + const struct xt_conntrack_mtinfo2 *info, u_int8_t family) { return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3, @@ -156,7 +156,7 @@ conntrack_mt_replsrc(const struct nf_conn *ct, static inline bool conntrack_mt_repldst(const struct nf_conn *ct, - const struct xt_conntrack_mtinfo1 *info, + const struct xt_conntrack_mtinfo2 *info, u_int8_t family) { return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3, @@ -164,7 +164,7 @@ conntrack_mt_repldst(const struct nf_conn *ct, } static inline bool -ct_proto_port_check(const struct xt_conntrack_mtinfo1 *info, +ct_proto_port_check(const struct xt_conntrack_mtinfo2 *info, const struct nf_conn *ct) { const struct nf_conntrack_tuple *tuple; @@ -204,7 +204,7 @@ ct_proto_port_check(const struct xt_conntrack_mtinfo1 *info, static bool conntrack_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_conntrack_mtinfo1 *info = par->matchinfo; + const struct xt_conntrack_mtinfo2 *info = par->matchinfo; enum ip_conntrack_info ctinfo; const struct nf_conn *ct; unsigned int statebit; @@ -278,6 +278,16 @@ conntrack_mt(const struct sk_buff *skb, const struct xt_match_param *par) return true; } +static bool +conntrack_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par) +{ + const struct xt_conntrack_mtinfo2 *const *info = par->matchinfo; + struct xt_match_param newpar = *par; + + newpar.matchinfo = *info; + return conntrack_mt(skb, &newpar); +} + static bool conntrack_mt_check(const struct xt_mtchk_param *par) { if (nf_ct_l3proto_try_module_get(par->family) < 0) { @@ -288,11 +298,45 @@ static bool conntrack_mt_check(const struct xt_mtchk_param *par) return true; } +static bool conntrack_mt_check_v1(const struct xt_mtchk_param *par) +{ + struct xt_conntrack_mtinfo1 *info = par->matchinfo; + struct xt_conntrack_mtinfo2 *up; + int ret = conntrack_mt_check(par); + + if (ret < 0) + return ret; + + up = kmalloc(sizeof(*up), GFP_KERNEL); + if (up == NULL) { + nf_ct_l3proto_module_put(par->family); + return -ENOMEM; + } + + /* + * The strategy here is to minimize the overhead of v1 matching, + * by prebuilding a v2 struct and putting the pointer into the + * v1 dataspace. + */ + memcpy(up, info, offsetof(typeof(*info), state_mask)); + up->state_mask = info->state_mask; + up->status_mask = info->status_mask; + *(void **)info = up; + return true; +} + static void conntrack_mt_destroy(const struct xt_mtdtor_param *par) { nf_ct_l3proto_module_put(par->family); } +static void conntrack_mt_destroy_v1(const struct xt_mtdtor_param *par) +{ + struct xt_conntrack_mtinfo2 **info = par->matchinfo; + kfree(*info); + conntrack_mt_destroy(par); +} + #ifdef CONFIG_COMPAT struct compat_xt_conntrack_info { @@ -363,6 +407,16 @@ static struct xt_match conntrack_mt_reg[] __read_mostly = { .revision = 1, .family = NFPROTO_UNSPEC, .matchsize = sizeof(struct xt_conntrack_mtinfo1), + .match = conntrack_mt_v1, + .checkentry = conntrack_mt_check_v1, + .destroy = conntrack_mt_destroy_v1, + .me = THIS_MODULE, + }, + { + .name = "conntrack", + .revision = 2, + .family = NFPROTO_UNSPEC, + .matchsize = sizeof(struct xt_conntrack_mtinfo2), .match = conntrack_mt, .checkentry = conntrack_mt_check, .destroy = conntrack_mt_destroy, diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c index 863e40977a4d..0f482e2440b4 100644 --- a/net/netfilter/xt_osf.c +++ b/net/netfilter/xt_osf.c @@ -330,7 +330,8 @@ static bool xt_osf_match_packet(const struct sk_buff *skb, fcount++; if (info->flags & XT_OSF_LOG) - nf_log_packet(p->hooknum, 0, skb, p->in, p->out, NULL, + nf_log_packet(p->family, p->hooknum, skb, + p->in, p->out, NULL, "%s [%s:%s] : %pi4:%d -> %pi4:%d hops=%d\n", f->genre, f->version, f->subtype, &ip->saddr, ntohs(tcp->source), @@ -345,7 +346,7 @@ static bool xt_osf_match_packet(const struct sk_buff *skb, rcu_read_unlock(); if (!fcount && (info->flags & XT_OSF_LOG)) - nf_log_packet(p->hooknum, 0, skb, p->in, p->out, NULL, + nf_log_packet(p->family, p->hooknum, skb, p->in, p->out, NULL, "Remote OS is not known: %pi4:%u -> %pi4:%u\n", &ip->saddr, ntohs(tcp->source), &ip->daddr, ntohs(tcp->dest)); diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c index 01dd07b764ec..390b7d09fe51 100644 --- a/net/netfilter/xt_quota.c +++ b/net/netfilter/xt_quota.c @@ -52,8 +52,9 @@ static bool quota_mt_check(const struct xt_mtchk_param *par) q->master = kmalloc(sizeof(*q->master), GFP_KERNEL); if (q->master == NULL) - return -ENOMEM; + return false; + q->master->quota = q->quota; return true; } diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c index 220a1d588ee0..4fc6a917f6de 100644 --- a/net/netfilter/xt_rateest.c +++ b/net/netfilter/xt_rateest.c @@ -66,7 +66,7 @@ xt_rateest_mt(const struct sk_buff *skb, const struct xt_match_param *par) if (info->flags & XT_RATEEST_MATCH_BPS) ret &= bps1 == bps2; if (info->flags & XT_RATEEST_MATCH_PPS) - ret &= pps2 == pps2; + ret &= pps1 == pps2; break; } diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index b0e582f2d37a..16e6c4378ff1 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -151,7 +151,7 @@ int netlbl_cfg_unlbl_map_add(const char *domain, addr6 = addr; mask6 = mask; map6 = kzalloc(sizeof(*map6), GFP_ATOMIC); - if (map4 == NULL) + if (map6 == NULL) goto cfg_unlbl_map_add_failure; map6->type = NETLBL_NLTYPE_UNLABELED; ipv6_addr_copy(&map6->list.addr, addr6); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 8b6bbb3032b0..2936fa3b6dc8 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1914,8 +1914,8 @@ static int netlink_seq_show(struct seq_file *seq, void *v) s->sk_protocol, nlk->pid, nlk->groups ? (u32)nlk->groups[0] : 0, - atomic_read(&s->sk_rmem_alloc), - atomic_read(&s->sk_wmem_alloc), + sk_rmem_alloc_get(s), + sk_wmem_alloc_get(s), nlk->cb, atomic_read(&s->sk_refcnt), atomic_read(&s->sk_drops) diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 3be0e016ab7d..ce1a34b99c23 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -286,8 +286,7 @@ void nr_destroy_socket(struct sock *sk) kfree_skb(skb); } - if (atomic_read(&sk->sk_wmem_alloc) || - atomic_read(&sk->sk_rmem_alloc)) { + if (sk_has_allocations(sk)) { /* Defer: outstanding buffers */ sk->sk_timer.function = nr_destroy_timer; sk->sk_timer.expires = jiffies + 2 * HZ; @@ -848,6 +847,7 @@ static int nr_getname(struct socket *sock, struct sockaddr *uaddr, sax->fsa_ax25.sax25_family = AF_NETROM; sax->fsa_ax25.sax25_ndigis = 1; sax->fsa_ax25.sax25_call = nr->user_addr; + memset(sax->fsa_digipeater, 0, sizeof(sax->fsa_digipeater)); sax->fsa_digipeater[0] = nr->dest_addr; *uaddr_len = sizeof(struct full_sockaddr_ax25); } else { @@ -1206,7 +1206,7 @@ static int nr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) long amount; lock_sock(sk); - amount = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc); + amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); if (amount < 0) amount = 0; release_sock(sk); @@ -1342,8 +1342,8 @@ static int nr_info_show(struct seq_file *seq, void *v) nr->n2count, nr->n2, nr->window, - atomic_read(&s->sk_wmem_alloc), - atomic_read(&s->sk_rmem_alloc), + sk_wmem_alloc_get(s), + sk_rmem_alloc_get(s), s->sk_socket ? SOCK_INODE(s->sk_socket)->i_ino : 0L); bh_unlock_sock(s); diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index e943c16552a2..4eb1ac9a7679 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -630,23 +630,23 @@ out: return dev; } -static ax25_digi *nr_call_to_digi(int ndigis, ax25_address *digipeaters) +static ax25_digi *nr_call_to_digi(ax25_digi *digi, int ndigis, + ax25_address *digipeaters) { - static ax25_digi ax25_digi; int i; if (ndigis == 0) return NULL; for (i = 0; i < ndigis; i++) { - ax25_digi.calls[i] = digipeaters[i]; - ax25_digi.repeated[i] = 0; + digi->calls[i] = digipeaters[i]; + digi->repeated[i] = 0; } - ax25_digi.ndigi = ndigis; - ax25_digi.lastrepeat = -1; + digi->ndigi = ndigis; + digi->lastrepeat = -1; - return &ax25_digi; + return digi; } /* @@ -656,6 +656,7 @@ int nr_rt_ioctl(unsigned int cmd, void __user *arg) { struct nr_route_struct nr_route; struct net_device *dev; + ax25_digi digi; int ret; switch (cmd) { @@ -673,13 +674,15 @@ int nr_rt_ioctl(unsigned int cmd, void __user *arg) ret = nr_add_node(&nr_route.callsign, nr_route.mnemonic, &nr_route.neighbour, - nr_call_to_digi(nr_route.ndigis, nr_route.digipeaters), + nr_call_to_digi(&digi, nr_route.ndigis, + nr_route.digipeaters), dev, nr_route.quality, nr_route.obs_count); break; case NETROM_NEIGH: ret = nr_add_neigh(&nr_route.callsign, - nr_call_to_digi(nr_route.ndigis, nr_route.digipeaters), + nr_call_to_digi(&digi, nr_route.ndigis, + nr_route.digipeaters), dev, nr_route.quality); break; default: diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 4f76e5552d8e..ebe5718baa31 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1987,7 +1987,8 @@ static int packet_ioctl(struct socket *sock, unsigned int cmd, switch (cmd) { case SIOCOUTQ: { - int amount = atomic_read(&sk->sk_wmem_alloc); + int amount = sk_wmem_alloc_get(sk); + return put_user(amount, (int __user *)arg); } case SIOCINQ: diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index 80a322d77909..c2b77a698695 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -69,17 +69,34 @@ static struct phonet_device *__phonet_get(struct net_device *dev) return NULL; } -static void __phonet_device_free(struct phonet_device *pnd) +static void phonet_device_destroy(struct net_device *dev) { - list_del(&pnd->list); - kfree(pnd); + struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev)); + struct phonet_device *pnd; + + ASSERT_RTNL(); + + spin_lock_bh(&pndevs->lock); + pnd = __phonet_get(dev); + if (pnd) + list_del(&pnd->list); + spin_unlock_bh(&pndevs->lock); + + if (pnd) { + u8 addr; + + for (addr = find_first_bit(pnd->addrs, 64); addr < 64; + addr = find_next_bit(pnd->addrs, 64, 1+addr)) + phonet_address_notify(RTM_DELADDR, dev, addr); + kfree(pnd); + } } struct net_device *phonet_device_get(struct net *net) { struct phonet_device_list *pndevs = phonet_device_list(net); struct phonet_device *pnd; - struct net_device *dev; + struct net_device *dev = NULL; spin_lock_bh(&pndevs->lock); list_for_each_entry(pnd, &pndevs->list, list) { @@ -126,8 +143,10 @@ int phonet_address_del(struct net_device *dev, u8 addr) pnd = __phonet_get(dev); if (!pnd || !test_and_clear_bit(addr >> 2, pnd->addrs)) err = -EADDRNOTAVAIL; - else if (bitmap_empty(pnd->addrs, 64)) - __phonet_device_free(pnd); + else if (bitmap_empty(pnd->addrs, 64)) { + list_del(&pnd->list); + kfree(pnd); + } spin_unlock_bh(&pndevs->lock); return err; } @@ -181,18 +200,8 @@ static int phonet_device_notify(struct notifier_block *me, unsigned long what, { struct net_device *dev = arg; - if (what == NETDEV_UNREGISTER) { - struct phonet_device_list *pndevs; - struct phonet_device *pnd; - - /* Destroy phonet-specific device data */ - pndevs = phonet_device_list(dev_net(dev)); - spin_lock_bh(&pndevs->lock); - pnd = __phonet_get(dev); - if (pnd) - __phonet_device_free(pnd); - spin_unlock_bh(&pndevs->lock); - } + if (what == NETDEV_UNREGISTER) + phonet_device_destroy(dev); return 0; } @@ -218,11 +227,12 @@ static int phonet_init_net(struct net *net) static void phonet_exit_net(struct net *net) { struct phonet_net *pnn = net_generic(net, phonet_net_id); - struct phonet_device *pnd, *n; - - list_for_each_entry_safe(pnd, n, &pnn->pndevs.list, list) - __phonet_device_free(pnd); + struct net_device *dev; + rtnl_lock(); + for_each_netdev(net, dev) + phonet_device_destroy(dev); + rtnl_unlock(); kfree(pnn); } diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index cec4e5951681..f8b4cee434c2 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -32,7 +32,7 @@ static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr, u32 pid, u32 seq, int event); -static void rtmsg_notify(int event, struct net_device *dev, u8 addr) +void phonet_address_notify(int event, struct net_device *dev, u8 addr) { struct sk_buff *skb; int err = -ENOBUFS; @@ -94,7 +94,7 @@ static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *attr) else err = phonet_address_del(dev, pnaddr); if (!err) - rtmsg_notify(nlh->nlmsg_type, dev, pnaddr); + phonet_address_notify(nlh->nlmsg_type, dev, pnaddr); return err; } diff --git a/net/rfkill/core.c b/net/rfkill/core.c index 4e68ab439d5d..2fc4a1724eb8 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -56,7 +56,6 @@ struct rfkill { u32 idx; bool registered; - bool suspended; bool persistent; const struct rfkill_ops *ops; @@ -224,7 +223,7 @@ static void rfkill_send_events(struct rfkill *rfkill, enum rfkill_operation op) static void rfkill_event(struct rfkill *rfkill) { - if (!rfkill->registered || rfkill->suspended) + if (!rfkill->registered) return; kobject_uevent(&rfkill->dev.kobj, KOBJ_CHANGE); @@ -270,6 +269,9 @@ static void rfkill_set_block(struct rfkill *rfkill, bool blocked) unsigned long flags; int err; + if (unlikely(rfkill->dev.power.power_state.event & PM_EVENT_SLEEP)) + return; + /* * Some platforms (...!) generate input events which affect the * _hard_ kill state -- whenever something tries to change the @@ -292,9 +294,6 @@ static void rfkill_set_block(struct rfkill *rfkill, bool blocked) rfkill->state |= RFKILL_BLOCK_SW_SETCALL; spin_unlock_irqrestore(&rfkill->lock, flags); - if (unlikely(rfkill->dev.power.power_state.event & PM_EVENT_SLEEP)) - return; - err = rfkill->ops->set_block(rfkill->data, blocked); spin_lock_irqsave(&rfkill->lock, flags); @@ -508,19 +507,32 @@ bool rfkill_set_sw_state(struct rfkill *rfkill, bool blocked) blocked = blocked || hwblock; spin_unlock_irqrestore(&rfkill->lock, flags); - if (!rfkill->registered) { - rfkill->persistent = true; - } else { - if (prev != blocked && !hwblock) - schedule_work(&rfkill->uevent_work); + if (!rfkill->registered) + return blocked; - rfkill_led_trigger_event(rfkill); - } + if (prev != blocked && !hwblock) + schedule_work(&rfkill->uevent_work); + + rfkill_led_trigger_event(rfkill); return blocked; } EXPORT_SYMBOL(rfkill_set_sw_state); +void rfkill_init_sw_state(struct rfkill *rfkill, bool blocked) +{ + unsigned long flags; + + BUG_ON(!rfkill); + BUG_ON(rfkill->registered); + + spin_lock_irqsave(&rfkill->lock, flags); + __rfkill_set_sw_state(rfkill, blocked); + rfkill->persistent = true; + spin_unlock_irqrestore(&rfkill->lock, flags); +} +EXPORT_SYMBOL(rfkill_init_sw_state); + void rfkill_set_states(struct rfkill *rfkill, bool sw, bool hw) { unsigned long flags; @@ -537,6 +549,10 @@ void rfkill_set_states(struct rfkill *rfkill, bool sw, bool hw) swprev = !!(rfkill->state & RFKILL_BLOCK_SW); hwprev = !!(rfkill->state & RFKILL_BLOCK_HW); __rfkill_set_sw_state(rfkill, sw); + if (hw) + rfkill->state |= RFKILL_BLOCK_HW; + else + rfkill->state &= ~RFKILL_BLOCK_HW; spin_unlock_irqrestore(&rfkill->lock, flags); @@ -598,6 +614,15 @@ static ssize_t rfkill_idx_show(struct device *dev, return sprintf(buf, "%d\n", rfkill->idx); } +static ssize_t rfkill_persistent_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct rfkill *rfkill = to_rfkill(dev); + + return sprintf(buf, "%d\n", rfkill->persistent); +} + static u8 user_state_from_blocked(unsigned long state) { if (state & RFKILL_BLOCK_HW) @@ -627,15 +652,26 @@ static ssize_t rfkill_state_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - /* - * The intention was that userspace can only take control over - * a given device when/if rfkill-input doesn't control it due - * to user_claim. Since user_claim is currently unsupported, - * we never support changing the state from userspace -- this - * can be implemented again later. - */ + struct rfkill *rfkill = to_rfkill(dev); + unsigned long state; + int err; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + err = strict_strtoul(buf, 0, &state); + if (err) + return err; - return -EPERM; + if (state != RFKILL_USER_STATE_SOFT_BLOCKED && + state != RFKILL_USER_STATE_UNBLOCKED) + return -EINVAL; + + mutex_lock(&rfkill_global_mutex); + rfkill_set_block(rfkill, state == RFKILL_USER_STATE_SOFT_BLOCKED); + mutex_unlock(&rfkill_global_mutex); + + return err ?: count; } static ssize_t rfkill_claim_show(struct device *dev, @@ -656,6 +692,7 @@ static struct device_attribute rfkill_dev_attrs[] = { __ATTR(name, S_IRUGO, rfkill_name_show, NULL), __ATTR(type, S_IRUGO, rfkill_type_show, NULL), __ATTR(index, S_IRUGO, rfkill_idx_show, NULL), + __ATTR(persistent, S_IRUGO, rfkill_persistent_show, NULL), __ATTR(state, S_IRUGO|S_IWUSR, rfkill_state_show, rfkill_state_store), __ATTR(claim, S_IRUGO|S_IWUSR, rfkill_claim_show, rfkill_claim_store), __ATTR_NULL @@ -718,8 +755,6 @@ static int rfkill_suspend(struct device *dev, pm_message_t state) rfkill_pause_polling(rfkill); - rfkill->suspended = true; - return 0; } @@ -728,10 +763,10 @@ static int rfkill_resume(struct device *dev) struct rfkill *rfkill = to_rfkill(dev); bool cur; - cur = !!(rfkill->state & RFKILL_BLOCK_SW); - rfkill_set_block(rfkill, cur); - - rfkill->suspended = false; + if (!rfkill->persistent) { + cur = !!(rfkill->state & RFKILL_BLOCK_SW); + rfkill_set_block(rfkill, cur); + } rfkill_resume_polling(rfkill); diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 877a7f65f707..e5f478ca3d61 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -92,23 +92,21 @@ static void rose_set_lockdep_key(struct net_device *dev) /* * Convert a ROSE address into text. */ -const char *rose2asc(const rose_address *addr) +char *rose2asc(char *buf, const rose_address *addr) { - static char buffer[11]; - if (addr->rose_addr[0] == 0x00 && addr->rose_addr[1] == 0x00 && addr->rose_addr[2] == 0x00 && addr->rose_addr[3] == 0x00 && addr->rose_addr[4] == 0x00) { - strcpy(buffer, "*"); + strcpy(buf, "*"); } else { - sprintf(buffer, "%02X%02X%02X%02X%02X", addr->rose_addr[0] & 0xFF, + sprintf(buf, "%02X%02X%02X%02X%02X", addr->rose_addr[0] & 0xFF, addr->rose_addr[1] & 0xFF, addr->rose_addr[2] & 0xFF, addr->rose_addr[3] & 0xFF, addr->rose_addr[4] & 0xFF); } - return buffer; + return buf; } /* @@ -356,8 +354,7 @@ void rose_destroy_socket(struct sock *sk) kfree_skb(skb); } - if (atomic_read(&sk->sk_wmem_alloc) || - atomic_read(&sk->sk_rmem_alloc)) { + if (sk_has_allocations(sk)) { /* Defer: outstanding buffers */ setup_timer(&sk->sk_timer, rose_destroy_timer, (unsigned long)sk); @@ -957,6 +954,7 @@ static int rose_getname(struct socket *sock, struct sockaddr *uaddr, struct rose_sock *rose = rose_sk(sk); int n; + memset(srose, 0, sizeof(*srose)); if (peer != 0) { if (sk->sk_state != TCP_ESTABLISHED) return -ENOTCONN; @@ -1310,7 +1308,8 @@ static int rose_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) switch (cmd) { case TIOCOUTQ: { long amount; - amount = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc); + + amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); if (amount < 0) amount = 0; return put_user(amount, (unsigned int __user *) argp); @@ -1437,7 +1436,7 @@ static void rose_info_stop(struct seq_file *seq, void *v) static int rose_info_show(struct seq_file *seq, void *v) { - char buf[11]; + char buf[11], rsbuf[11]; if (v == SEQ_START_TOKEN) seq_puts(seq, @@ -1455,8 +1454,8 @@ static int rose_info_show(struct seq_file *seq, void *v) devname = dev->name; seq_printf(seq, "%-10s %-9s ", - rose2asc(&rose->dest_addr), - ax2asc(buf, &rose->dest_call)); + rose2asc(rsbuf, &rose->dest_addr), + ax2asc(buf, &rose->dest_call)); if (ax25cmp(&rose->source_call, &null_ax25_address) == 0) callsign = "??????-?"; @@ -1465,7 +1464,7 @@ static int rose_info_show(struct seq_file *seq, void *v) seq_printf(seq, "%-10s %-9s %-5s %3.3X %05d %d %d %d %d %3lu %3lu %3lu %3lu %3lu %3lu/%03lu %5d %5d %ld\n", - rose2asc(&rose->source_addr), + rose2asc(rsbuf, &rose->source_addr), callsign, devname, rose->lci & 0x0FFF, @@ -1481,8 +1480,8 @@ static int rose_info_show(struct seq_file *seq, void *v) rose->hb / HZ, ax25_display_timer(&rose->idletimer) / (60 * HZ), rose->idle / (60 * HZ), - atomic_read(&s->sk_wmem_alloc), - atomic_read(&s->sk_rmem_alloc), + sk_wmem_alloc_get(s), + sk_rmem_alloc_get(s), s->sk_socket ? SOCK_INODE(s->sk_socket)->i_ino : 0L); } diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index a81066a1010a..9478d9b3d977 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -1104,6 +1104,7 @@ static void rose_node_stop(struct seq_file *seq, void *v) static int rose_node_show(struct seq_file *seq, void *v) { + char rsbuf[11]; int i; if (v == SEQ_START_TOKEN) @@ -1112,13 +1113,13 @@ static int rose_node_show(struct seq_file *seq, void *v) const struct rose_node *rose_node = v; /* if (rose_node->loopback) { seq_printf(seq, "%-10s %04d 1 loopback\n", - rose2asc(&rose_node->address), - rose_node->mask); + rose2asc(rsbuf, &rose_node->address), + rose_node->mask); } else { */ seq_printf(seq, "%-10s %04d %d", - rose2asc(&rose_node->address), - rose_node->mask, - rose_node->count); + rose2asc(rsbuf, &rose_node->address), + rose_node->mask, + rose_node->count); for (i = 0; i < rose_node->count; i++) seq_printf(seq, " %05d", @@ -1267,7 +1268,7 @@ static void rose_route_stop(struct seq_file *seq, void *v) static int rose_route_show(struct seq_file *seq, void *v) { - char buf[11]; + char buf[11], rsbuf[11]; if (v == SEQ_START_TOKEN) seq_puts(seq, @@ -1279,7 +1280,7 @@ static int rose_route_show(struct seq_file *seq, void *v) seq_printf(seq, "%3.3X %-10s %-9s %05d ", rose_route->lci1, - rose2asc(&rose_route->src_addr), + rose2asc(rsbuf, &rose_route->src_addr), ax2asc(buf, &rose_route->src_call), rose_route->neigh1->number); else @@ -1289,10 +1290,10 @@ static int rose_route_show(struct seq_file *seq, void *v) if (rose_route->neigh2) seq_printf(seq, "%3.3X %-10s %-9s %05d\n", - rose_route->lci2, - rose2asc(&rose_route->dest_addr), - ax2asc(buf, &rose_route->dest_call), - rose_route->neigh2->number); + rose_route->lci2, + rose2asc(rsbuf, &rose_route->dest_addr), + ax2asc(buf, &rose_route->dest_call), + rose_route->neigh2->number); else seq_puts(seq, "000 * * 00000\n"); diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index eac5e7bb7365..bfe493ebf27c 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -63,7 +63,7 @@ static void rxrpc_write_space(struct sock *sk) _enter("%p", sk); read_lock(&sk->sk_callback_lock); if (rxrpc_writable(sk)) { - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) + if (sk_has_sleeper(sk)) wake_up_interruptible(sk->sk_sleep); sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); } @@ -588,7 +588,7 @@ static unsigned int rxrpc_poll(struct file *file, struct socket *sock, unsigned int mask; struct sock *sk = sock->sk; - poll_wait(file, sk->sk_sleep, wait); + sock_poll_wait(file, sk->sk_sleep, wait); mask = 0; /* the socket is readable if there are any messages waiting on the Rx diff --git a/net/sched/act_police.c b/net/sched/act_police.c index f8f047b61245..723964c3ee4f 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -294,6 +294,8 @@ static int tcf_act_police(struct sk_buff *skb, struct tc_action *a, if (police->tcfp_ewma_rate && police->tcf_rate_est.bps >= police->tcfp_ewma_rate) { police->tcf_qstats.overlimits++; + if (police->tcf_action == TC_ACT_SHOT) + police->tcf_qstats.drops++; spin_unlock(&police->tcf_lock); return police->tcf_action; } @@ -327,6 +329,8 @@ static int tcf_act_police(struct sk_buff *skb, struct tc_action *a, } police->tcf_qstats.overlimits++; + if (police->tcf_action == TC_ACT_SHOT) + police->tcf_qstats.drops++; spin_unlock(&police->tcf_lock); return police->tcf_action; } diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 266151ae85a3..18d85d259104 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -349,13 +349,13 @@ META_COLLECTOR(int_sk_type) META_COLLECTOR(int_sk_rmem_alloc) { SKIP_NONLOCAL(skb); - dst->value = atomic_read(&skb->sk->sk_rmem_alloc); + dst->value = sk_rmem_alloc_get(skb->sk); } META_COLLECTOR(int_sk_wmem_alloc) { SKIP_NONLOCAL(skb); - dst->value = atomic_read(&skb->sk->sk_wmem_alloc); + dst->value = sk_wmem_alloc_get(skb->sk); } META_COLLECTOR(int_sk_omem_alloc) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 24d17ce9c294..fdb694e9f759 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1456,6 +1456,8 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags); tcm = NLMSG_DATA(nlh); tcm->tcm_family = AF_UNSPEC; + tcm->tcm__pad1 = 0; + tcm->tcm__pad2 = 0; tcm->tcm_ifindex = qdisc_dev(q)->ifindex; tcm->tcm_parent = q->handle; tcm->tcm_handle = q->handle; diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 2a8b83af7c47..ab82f145f689 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -49,7 +49,7 @@ struct atm_flow_data { struct socket *sock; /* for closing */ u32 classid; /* x:y type ID */ int ref; /* reference count */ - struct gnet_stats_basic bstats; + struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; struct atm_flow_data *next; struct atm_flow_data *excess; /* flow for excess traffic; diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 23a167670fd5..d5798e17a832 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -128,7 +128,7 @@ struct cbq_class long avgidle; long deficit; /* Saved deficit for WRR */ psched_time_t penalized; - struct gnet_stats_basic bstats; + struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; struct gnet_stats_rate_est rate_est; struct tc_cbq_xstats xstats; diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index 7597fe146866..12b2fb04b29b 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -22,7 +22,7 @@ struct drr_class { unsigned int refcnt; unsigned int filter_cnt; - struct gnet_stats_basic bstats; + struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; struct gnet_stats_rate_est rate_est; struct list_head alist; diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 362c2811b2df..dad0144423da 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -116,7 +116,7 @@ struct hfsc_class struct Qdisc_class_common cl_common; unsigned int refcnt; /* usage count */ - struct gnet_stats_basic bstats; + struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; struct gnet_stats_rate_est rate_est; unsigned int level; /* class level in hierarchy */ diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 88cd02626621..ec4d46399d59 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -74,7 +74,7 @@ enum htb_cmode { struct htb_class { struct Qdisc_class_common common; /* general class parameters */ - struct gnet_stats_basic bstats; + struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; struct gnet_stats_rate_est rate_est; struct tc_htb_xstats xstats; /* our special stats */ diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index a63de3f7f185..6a4b19094143 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -133,7 +133,7 @@ static struct notifier_block sctp_inet6addr_notifier = { /* ICMP error handler. */ SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __be32 info) + u8 type, u8 code, int offset, __be32 info) { struct inet6_dev *idev; struct sock *sk; diff --git a/net/sctp/output.c b/net/sctp/output.c index b76411444515..b94c21190566 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -407,7 +407,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) } dst = dst_clone(tp->dst); skb_dst_set(nskb, dst); - if (dst) + if (!dst) goto no_route; /* Build the SCTP header. */ diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 79cbd47f4df7..a76da657244a 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -160,6 +160,7 @@ static void sctp_proc_exit(void) remove_proc_entry("sctp", init_net.proc_net); } #endif + percpu_counter_destroy(&sctp_sockets_allocated); } /* Private helper to extract ipv4 address and stash them in diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 0f01e5d8a24f..971890dbfea0 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -130,7 +130,7 @@ static inline int sctp_wspace(struct sctp_association *asoc) if (asoc->ep->sndbuf_policy) amt = asoc->sndbuf_used; else - amt = atomic_read(&asoc->base.sk->sk_wmem_alloc); + amt = sk_wmem_alloc_get(asoc->base.sk); if (amt >= asoc->base.sk->sk_sndbuf) { if (asoc->base.sk->sk_userlocks & SOCK_SNDBUF_LOCK) @@ -6523,7 +6523,7 @@ static int sctp_writeable(struct sock *sk) { int amt = 0; - amt = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc); + amt = sk->sk_sndbuf - sk_wmem_alloc_get(sk); if (amt < 0) amt = 0; return amt; @@ -6652,21 +6652,6 @@ static void sctp_wait_for_close(struct sock *sk, long timeout) finish_wait(sk->sk_sleep, &wait); } -static void sctp_sock_rfree_frag(struct sk_buff *skb) -{ - struct sk_buff *frag; - - if (!skb->data_len) - goto done; - - /* Don't forget the fragments. */ - skb_walk_frags(skb, frag) - sctp_sock_rfree_frag(frag); - -done: - sctp_sock_rfree(skb); -} - static void sctp_skb_set_owner_r_frag(struct sk_buff *skb, struct sock *sk) { struct sk_buff *frag; @@ -6776,7 +6761,6 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, sctp_skb_for_each(skb, &oldsk->sk_receive_queue, tmp) { event = sctp_skb2event(skb); if (event->asoc == assoc) { - sctp_sock_rfree_frag(skb); __skb_unlink(skb, &oldsk->sk_receive_queue); __skb_queue_tail(&newsk->sk_receive_queue, skb); sctp_skb_set_owner_r_frag(skb, newsk); @@ -6807,7 +6791,6 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, sctp_skb_for_each(skb, &oldsp->pd_lobby, tmp) { event = sctp_skb2event(skb); if (event->asoc == assoc) { - sctp_sock_rfree_frag(skb); __skb_unlink(skb, &oldsp->pd_lobby); __skb_queue_tail(queue, skb); sctp_skb_set_owner_r_frag(skb, newsk); @@ -6822,15 +6805,11 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, } - sctp_skb_for_each(skb, &assoc->ulpq.reasm, tmp) { - sctp_sock_rfree_frag(skb); + sctp_skb_for_each(skb, &assoc->ulpq.reasm, tmp) sctp_skb_set_owner_r_frag(skb, newsk); - } - sctp_skb_for_each(skb, &assoc->ulpq.lobby, tmp) { - sctp_sock_rfree_frag(skb); + sctp_skb_for_each(skb, &assoc->ulpq.lobby, tmp) sctp_skb_set_owner_r_frag(skb, newsk); - } /* Set the type of socket to indicate that it is peeled off from the * original UDP-style socket or created with the accept() call on a diff --git a/net/socket.c b/net/socket.c index 791d71a36a93..6d4716559047 100644 --- a/net/socket.c +++ b/net/socket.c @@ -736,7 +736,7 @@ static ssize_t sock_sendpage(struct file *file, struct page *page, if (more) flags |= MSG_MORE; - return sock->ops->sendpage(sock, page, offset, size, flags); + return kernel_sendpage(sock, page, offset, size, flags); } static ssize_t sock_splice_read(struct file *file, loff_t *ppos, diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile index 5369aa369b35..9d2fca5ad14a 100644 --- a/net/sunrpc/Makefile +++ b/net/sunrpc/Makefile @@ -10,8 +10,9 @@ obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma/ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ auth.o auth_null.o auth_unix.o auth_generic.o \ svc.o svcsock.o svcauth.o svcauth_unix.o \ - rpcb_clnt.o timer.o xdr.o \ + addr.o rpcb_clnt.o timer.o xdr.o \ sunrpc_syms.o cache.o rpc_pipe.o \ svc_xprt.o +sunrpc-$(CONFIG_NFS_V4_1) += backchannel_rqst.o bc_svc.o sunrpc-$(CONFIG_PROC_FS) += stats.o sunrpc-$(CONFIG_SYSCTL) += sysctl.o diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c new file mode 100644 index 000000000000..22e8fd89477f --- /dev/null +++ b/net/sunrpc/addr.c @@ -0,0 +1,364 @@ +/* + * Copyright 2009, Oracle. All rights reserved. + * + * Convert socket addresses to presentation addresses and universal + * addresses, and vice versa. + * + * Universal addresses are introduced by RFC 1833 and further refined by + * recent RFCs describing NFSv4. The universal address format is part + * of the external (network) interface provided by rpcbind version 3 + * and 4, and by NFSv4. Such an address is a string containing a + * presentation format IP address followed by a port number in + * "hibyte.lobyte" format. + * + * IPv6 addresses can also include a scope ID, typically denoted by + * a '%' followed by a device name or a non-negative integer. Refer to + * RFC 4291, Section 2.2 for details on IPv6 presentation formats. + */ + +#include <net/ipv6.h> +#include <linux/sunrpc/clnt.h> + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + +static size_t rpc_ntop6_noscopeid(const struct sockaddr *sap, + char *buf, const int buflen) +{ + const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; + const struct in6_addr *addr = &sin6->sin6_addr; + + /* + * RFC 4291, Section 2.2.2 + * + * Shorthanded ANY address + */ + if (ipv6_addr_any(addr)) + return snprintf(buf, buflen, "::"); + + /* + * RFC 4291, Section 2.2.2 + * + * Shorthanded loopback address + */ + if (ipv6_addr_loopback(addr)) + return snprintf(buf, buflen, "::1"); + + /* + * RFC 4291, Section 2.2.3 + * + * Special presentation address format for mapped v4 + * addresses. + */ + if (ipv6_addr_v4mapped(addr)) + return snprintf(buf, buflen, "::ffff:%pI4", + &addr->s6_addr32[3]); + + /* + * RFC 4291, Section 2.2.1 + * + * To keep the result as short as possible, especially + * since we don't shorthand, we don't want leading zeros + * in each halfword, so avoid %pI6. + */ + return snprintf(buf, buflen, "%x:%x:%x:%x:%x:%x:%x:%x", + ntohs(addr->s6_addr16[0]), ntohs(addr->s6_addr16[1]), + ntohs(addr->s6_addr16[2]), ntohs(addr->s6_addr16[3]), + ntohs(addr->s6_addr16[4]), ntohs(addr->s6_addr16[5]), + ntohs(addr->s6_addr16[6]), ntohs(addr->s6_addr16[7])); +} + +static size_t rpc_ntop6(const struct sockaddr *sap, + char *buf, const size_t buflen) +{ + const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; + char scopebuf[IPV6_SCOPE_ID_LEN]; + size_t len; + int rc; + + len = rpc_ntop6_noscopeid(sap, buf, buflen); + if (unlikely(len == 0)) + return len; + + if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) && + !(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_SITELOCAL)) + return len; + + rc = snprintf(scopebuf, sizeof(scopebuf), "%c%u", + IPV6_SCOPE_DELIMITER, sin6->sin6_scope_id); + if (unlikely((size_t)rc > sizeof(scopebuf))) + return 0; + + len += rc; + if (unlikely(len > buflen)) + return 0; + + strcat(buf, scopebuf); + return len; +} + +#else /* !(defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)) */ + +static size_t rpc_ntop6_noscopeid(const struct sockaddr *sap, + char *buf, const int buflen) +{ + return 0; +} + +static size_t rpc_ntop6(const struct sockaddr *sap, + char *buf, const size_t buflen) +{ + return 0; +} + +#endif /* !(defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)) */ + +static int rpc_ntop4(const struct sockaddr *sap, + char *buf, const size_t buflen) +{ + const struct sockaddr_in *sin = (struct sockaddr_in *)sap; + + return snprintf(buf, buflen, "%pI4", &sin->sin_addr); +} + +/** + * rpc_ntop - construct a presentation address in @buf + * @sap: socket address + * @buf: construction area + * @buflen: size of @buf, in bytes + * + * Plants a %NUL-terminated string in @buf and returns the length + * of the string, excluding the %NUL. Otherwise zero is returned. + */ +size_t rpc_ntop(const struct sockaddr *sap, char *buf, const size_t buflen) +{ + switch (sap->sa_family) { + case AF_INET: + return rpc_ntop4(sap, buf, buflen); + case AF_INET6: + return rpc_ntop6(sap, buf, buflen); + } + + return 0; +} +EXPORT_SYMBOL_GPL(rpc_ntop); + +static size_t rpc_pton4(const char *buf, const size_t buflen, + struct sockaddr *sap, const size_t salen) +{ + struct sockaddr_in *sin = (struct sockaddr_in *)sap; + u8 *addr = (u8 *)&sin->sin_addr.s_addr; + + if (buflen > INET_ADDRSTRLEN || salen < sizeof(struct sockaddr_in)) + return 0; + + memset(sap, 0, sizeof(struct sockaddr_in)); + + if (in4_pton(buf, buflen, addr, '\0', NULL) == 0) + return 0; + + sin->sin_family = AF_INET; + return sizeof(struct sockaddr_in);; +} + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +static int rpc_parse_scope_id(const char *buf, const size_t buflen, + const char *delim, struct sockaddr_in6 *sin6) +{ + char *p; + size_t len; + + if ((buf + buflen) == delim) + return 1; + + if (*delim != IPV6_SCOPE_DELIMITER) + return 0; + + if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) && + !(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_SITELOCAL)) + return 0; + + len = (buf + buflen) - delim - 1; + p = kstrndup(delim + 1, len, GFP_KERNEL); + if (p) { + unsigned long scope_id = 0; + struct net_device *dev; + + dev = dev_get_by_name(&init_net, p); + if (dev != NULL) { + scope_id = dev->ifindex; + dev_put(dev); + } else { + if (strict_strtoul(p, 10, &scope_id) == 0) { + kfree(p); + return 0; + } + } + + kfree(p); + + sin6->sin6_scope_id = scope_id; + return 1; + } + + return 0; +} + +static size_t rpc_pton6(const char *buf, const size_t buflen, + struct sockaddr *sap, const size_t salen) +{ + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; + u8 *addr = (u8 *)&sin6->sin6_addr.in6_u; + const char *delim; + + if (buflen > (INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN) || + salen < sizeof(struct sockaddr_in6)) + return 0; + + memset(sap, 0, sizeof(struct sockaddr_in6)); + + if (in6_pton(buf, buflen, addr, IPV6_SCOPE_DELIMITER, &delim) == 0) + return 0; + + if (!rpc_parse_scope_id(buf, buflen, delim, sin6)) + return 0; + + sin6->sin6_family = AF_INET6; + return sizeof(struct sockaddr_in6); +} +#else +static size_t rpc_pton6(const char *buf, const size_t buflen, + struct sockaddr *sap, const size_t salen) +{ + return 0; +} +#endif + +/** + * rpc_pton - Construct a sockaddr in @sap + * @buf: C string containing presentation format IP address + * @buflen: length of presentation address in bytes + * @sap: buffer into which to plant socket address + * @salen: size of buffer in bytes + * + * Returns the size of the socket address if successful; otherwise + * zero is returned. + * + * Plants a socket address in @sap and returns the size of the + * socket address, if successful. Returns zero if an error + * occurred. + */ +size_t rpc_pton(const char *buf, const size_t buflen, + struct sockaddr *sap, const size_t salen) +{ + unsigned int i; + + for (i = 0; i < buflen; i++) + if (buf[i] == ':') + return rpc_pton6(buf, buflen, sap, salen); + return rpc_pton4(buf, buflen, sap, salen); +} +EXPORT_SYMBOL_GPL(rpc_pton); + +/** + * rpc_sockaddr2uaddr - Construct a universal address string from @sap. + * @sap: socket address + * + * Returns a %NUL-terminated string in dynamically allocated memory; + * otherwise NULL is returned if an error occurred. Caller must + * free the returned string. + */ +char *rpc_sockaddr2uaddr(const struct sockaddr *sap) +{ + char portbuf[RPCBIND_MAXUADDRPLEN]; + char addrbuf[RPCBIND_MAXUADDRLEN]; + unsigned short port; + + switch (sap->sa_family) { + case AF_INET: + if (rpc_ntop4(sap, addrbuf, sizeof(addrbuf)) == 0) + return NULL; + port = ntohs(((struct sockaddr_in *)sap)->sin_port); + break; + case AF_INET6: + if (rpc_ntop6_noscopeid(sap, addrbuf, sizeof(addrbuf)) == 0) + return NULL; + port = ntohs(((struct sockaddr_in6 *)sap)->sin6_port); + break; + default: + return NULL; + } + + if (snprintf(portbuf, sizeof(portbuf), + ".%u.%u", port >> 8, port & 0xff) > (int)sizeof(portbuf)) + return NULL; + + if (strlcat(addrbuf, portbuf, sizeof(addrbuf)) > sizeof(addrbuf)) + return NULL; + + return kstrdup(addrbuf, GFP_KERNEL); +} +EXPORT_SYMBOL_GPL(rpc_sockaddr2uaddr); + +/** + * rpc_uaddr2sockaddr - convert a universal address to a socket address. + * @uaddr: C string containing universal address to convert + * @uaddr_len: length of universal address string + * @sap: buffer into which to plant socket address + * @salen: size of buffer + * + * Returns the size of the socket address if successful; otherwise + * zero is returned. + */ +size_t rpc_uaddr2sockaddr(const char *uaddr, const size_t uaddr_len, + struct sockaddr *sap, const size_t salen) +{ + char *c, buf[RPCBIND_MAXUADDRLEN]; + unsigned long portlo, porthi; + unsigned short port; + + if (uaddr_len > sizeof(buf)) + return 0; + + memcpy(buf, uaddr, uaddr_len); + + buf[uaddr_len] = '\n'; + buf[uaddr_len + 1] = '\0'; + + c = strrchr(buf, '.'); + if (unlikely(c == NULL)) + return 0; + if (unlikely(strict_strtoul(c + 1, 10, &portlo) != 0)) + return 0; + if (unlikely(portlo > 255)) + return 0; + + c[0] = '\n'; + c[1] = '\0'; + + c = strrchr(buf, '.'); + if (unlikely(c == NULL)) + return 0; + if (unlikely(strict_strtoul(c + 1, 10, &porthi) != 0)) + return 0; + if (unlikely(porthi > 255)) + return 0; + + port = (unsigned short)((porthi << 8) | portlo); + + c[0] = '\0'; + + if (rpc_pton(buf, strlen(buf), sap, salen) == 0) + return 0; + + switch (sap->sa_family) { + case AF_INET: + ((struct sockaddr_in *)sap)->sin_port = htons(port); + return sizeof(struct sockaddr_in); + case AF_INET6: + ((struct sockaddr_in6 *)sap)->sin6_port = htons(port); + return sizeof(struct sockaddr_in6); + } + + return 0; +} +EXPORT_SYMBOL_GPL(rpc_uaddr2sockaddr); diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 66d458fc6920..fc6a43ccd950 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -89,8 +89,8 @@ static struct rpc_wait_queue pipe_version_rpc_waitqueue; static DECLARE_WAIT_QUEUE_HEAD(pipe_version_waitqueue); static void gss_free_ctx(struct gss_cl_ctx *); -static struct rpc_pipe_ops gss_upcall_ops_v0; -static struct rpc_pipe_ops gss_upcall_ops_v1; +static const struct rpc_pipe_ops gss_upcall_ops_v0; +static const struct rpc_pipe_ops gss_upcall_ops_v1; static inline struct gss_cl_ctx * gss_get_ctx(struct gss_cl_ctx *ctx) @@ -777,7 +777,7 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) * that we supported only the old pipe. So we instead create * the new pipe first. */ - gss_auth->dentry[1] = rpc_mkpipe(clnt->cl_dentry, + gss_auth->dentry[1] = rpc_mkpipe(clnt->cl_path.dentry, "gssd", clnt, &gss_upcall_ops_v1, RPC_PIPE_WAIT_FOR_OPEN); @@ -786,7 +786,7 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) goto err_put_mech; } - gss_auth->dentry[0] = rpc_mkpipe(clnt->cl_dentry, + gss_auth->dentry[0] = rpc_mkpipe(clnt->cl_path.dentry, gss_auth->mech->gm_name, clnt, &gss_upcall_ops_v0, RPC_PIPE_WAIT_FOR_OPEN); @@ -1507,7 +1507,7 @@ static const struct rpc_credops gss_nullops = { .crunwrap_resp = gss_unwrap_resp, }; -static struct rpc_pipe_ops gss_upcall_ops_v0 = { +static const struct rpc_pipe_ops gss_upcall_ops_v0 = { .upcall = gss_pipe_upcall, .downcall = gss_pipe_downcall, .destroy_msg = gss_pipe_destroy_msg, @@ -1515,7 +1515,7 @@ static struct rpc_pipe_ops gss_upcall_ops_v0 = { .release_pipe = gss_pipe_release, }; -static struct rpc_pipe_ops gss_upcall_ops_v1 = { +static const struct rpc_pipe_ops gss_upcall_ops_v1 = { .upcall = gss_pipe_upcall, .downcall = gss_pipe_downcall, .destroy_msg = gss_pipe_destroy_msg, diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 2278a50c6444..2e6a148d277c 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -181,6 +181,11 @@ static void rsi_request(struct cache_detail *cd, (*bpp)[-1] = '\n'; } +static int rsi_upcall(struct cache_detail *cd, struct cache_head *h) +{ + return sunrpc_cache_pipe_upcall(cd, h, rsi_request); +} + static int rsi_parse(struct cache_detail *cd, char *mesg, int mlen) @@ -270,7 +275,7 @@ static struct cache_detail rsi_cache = { .hash_table = rsi_table, .name = "auth.rpcsec.init", .cache_put = rsi_put, - .cache_request = rsi_request, + .cache_upcall = rsi_upcall, .cache_parse = rsi_parse, .match = rsi_match, .init = rsi_init, diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c new file mode 100644 index 000000000000..553621fb2c41 --- /dev/null +++ b/net/sunrpc/backchannel_rqst.c @@ -0,0 +1,281 @@ +/****************************************************************************** + +(c) 2007 Network Appliance, Inc. All Rights Reserved. +(c) 2009 NetApp. All Rights Reserved. + +NetApp provides this source code under the GPL v2 License. +The GPL v2 license is available at +http://opensource.org/licenses/gpl-license.php. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +******************************************************************************/ + +#include <linux/tcp.h> +#include <linux/sunrpc/xprt.h> + +#ifdef RPC_DEBUG +#define RPCDBG_FACILITY RPCDBG_TRANS +#endif + +#if defined(CONFIG_NFS_V4_1) + +/* + * Helper routines that track the number of preallocation elements + * on the transport. + */ +static inline int xprt_need_to_requeue(struct rpc_xprt *xprt) +{ + return xprt->bc_alloc_count > 0; +} + +static inline void xprt_inc_alloc_count(struct rpc_xprt *xprt, unsigned int n) +{ + xprt->bc_alloc_count += n; +} + +static inline int xprt_dec_alloc_count(struct rpc_xprt *xprt, unsigned int n) +{ + return xprt->bc_alloc_count -= n; +} + +/* + * Free the preallocated rpc_rqst structure and the memory + * buffers hanging off of it. + */ +static void xprt_free_allocation(struct rpc_rqst *req) +{ + struct xdr_buf *xbufp; + + dprintk("RPC: free allocations for req= %p\n", req); + BUG_ON(test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state)); + xbufp = &req->rq_private_buf; + free_page((unsigned long)xbufp->head[0].iov_base); + xbufp = &req->rq_snd_buf; + free_page((unsigned long)xbufp->head[0].iov_base); + list_del(&req->rq_bc_pa_list); + kfree(req); +} + +/* + * Preallocate up to min_reqs structures and related buffers for use + * by the backchannel. This function can be called multiple times + * when creating new sessions that use the same rpc_xprt. The + * preallocated buffers are added to the pool of resources used by + * the rpc_xprt. Anyone of these resources may be used used by an + * incoming callback request. It's up to the higher levels in the + * stack to enforce that the maximum number of session slots is not + * being exceeded. + * + * Some callback arguments can be large. For example, a pNFS server + * using multiple deviceids. The list can be unbound, but the client + * has the ability to tell the server the maximum size of the callback + * requests. Each deviceID is 16 bytes, so allocate one page + * for the arguments to have enough room to receive a number of these + * deviceIDs. The NFS client indicates to the pNFS server that its + * callback requests can be up to 4096 bytes in size. + */ +int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs) +{ + struct page *page_rcv = NULL, *page_snd = NULL; + struct xdr_buf *xbufp = NULL; + struct rpc_rqst *req, *tmp; + struct list_head tmp_list; + int i; + + dprintk("RPC: setup backchannel transport\n"); + + /* + * We use a temporary list to keep track of the preallocated + * buffers. Once we're done building the list we splice it + * into the backchannel preallocation list off of the rpc_xprt + * struct. This helps minimize the amount of time the list + * lock is held on the rpc_xprt struct. It also makes cleanup + * easier in case of memory allocation errors. + */ + INIT_LIST_HEAD(&tmp_list); + for (i = 0; i < min_reqs; i++) { + /* Pre-allocate one backchannel rpc_rqst */ + req = kzalloc(sizeof(struct rpc_rqst), GFP_KERNEL); + if (req == NULL) { + printk(KERN_ERR "Failed to create bc rpc_rqst\n"); + goto out_free; + } + + /* Add the allocated buffer to the tmp list */ + dprintk("RPC: adding req= %p\n", req); + list_add(&req->rq_bc_pa_list, &tmp_list); + + req->rq_xprt = xprt; + INIT_LIST_HEAD(&req->rq_list); + INIT_LIST_HEAD(&req->rq_bc_list); + + /* Preallocate one XDR receive buffer */ + page_rcv = alloc_page(GFP_KERNEL); + if (page_rcv == NULL) { + printk(KERN_ERR "Failed to create bc receive xbuf\n"); + goto out_free; + } + xbufp = &req->rq_rcv_buf; + xbufp->head[0].iov_base = page_address(page_rcv); + xbufp->head[0].iov_len = PAGE_SIZE; + xbufp->tail[0].iov_base = NULL; + xbufp->tail[0].iov_len = 0; + xbufp->page_len = 0; + xbufp->len = PAGE_SIZE; + xbufp->buflen = PAGE_SIZE; + + /* Preallocate one XDR send buffer */ + page_snd = alloc_page(GFP_KERNEL); + if (page_snd == NULL) { + printk(KERN_ERR "Failed to create bc snd xbuf\n"); + goto out_free; + } + + xbufp = &req->rq_snd_buf; + xbufp->head[0].iov_base = page_address(page_snd); + xbufp->head[0].iov_len = 0; + xbufp->tail[0].iov_base = NULL; + xbufp->tail[0].iov_len = 0; + xbufp->page_len = 0; + xbufp->len = 0; + xbufp->buflen = PAGE_SIZE; + } + + /* + * Add the temporary list to the backchannel preallocation list + */ + spin_lock_bh(&xprt->bc_pa_lock); + list_splice(&tmp_list, &xprt->bc_pa_list); + xprt_inc_alloc_count(xprt, min_reqs); + spin_unlock_bh(&xprt->bc_pa_lock); + + dprintk("RPC: setup backchannel transport done\n"); + return 0; + +out_free: + /* + * Memory allocation failed, free the temporary list + */ + list_for_each_entry_safe(req, tmp, &tmp_list, rq_bc_pa_list) + xprt_free_allocation(req); + + dprintk("RPC: setup backchannel transport failed\n"); + return -1; +} +EXPORT_SYMBOL(xprt_setup_backchannel); + +/* + * Destroys the backchannel preallocated structures. + * Since these structures may have been allocated by multiple calls + * to xprt_setup_backchannel, we only destroy up to the maximum number + * of reqs specified by the caller. + * @xprt: the transport holding the preallocated strucures + * @max_reqs the maximum number of preallocated structures to destroy + */ +void xprt_destroy_backchannel(struct rpc_xprt *xprt, unsigned int max_reqs) +{ + struct rpc_rqst *req = NULL, *tmp = NULL; + + dprintk("RPC: destroy backchannel transport\n"); + + BUG_ON(max_reqs == 0); + spin_lock_bh(&xprt->bc_pa_lock); + xprt_dec_alloc_count(xprt, max_reqs); + list_for_each_entry_safe(req, tmp, &xprt->bc_pa_list, rq_bc_pa_list) { + dprintk("RPC: req=%p\n", req); + xprt_free_allocation(req); + if (--max_reqs == 0) + break; + } + spin_unlock_bh(&xprt->bc_pa_lock); + + dprintk("RPC: backchannel list empty= %s\n", + list_empty(&xprt->bc_pa_list) ? "true" : "false"); +} +EXPORT_SYMBOL(xprt_destroy_backchannel); + +/* + * One or more rpc_rqst structure have been preallocated during the + * backchannel setup. Buffer space for the send and private XDR buffers + * has been preallocated as well. Use xprt_alloc_bc_request to allocate + * to this request. Use xprt_free_bc_request to return it. + * + * We know that we're called in soft interrupt context, grab the spin_lock + * since there is no need to grab the bottom half spin_lock. + * + * Return an available rpc_rqst, otherwise NULL if non are available. + */ +struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt) +{ + struct rpc_rqst *req; + + dprintk("RPC: allocate a backchannel request\n"); + spin_lock(&xprt->bc_pa_lock); + if (!list_empty(&xprt->bc_pa_list)) { + req = list_first_entry(&xprt->bc_pa_list, struct rpc_rqst, + rq_bc_pa_list); + list_del(&req->rq_bc_pa_list); + } else { + req = NULL; + } + spin_unlock(&xprt->bc_pa_lock); + + if (req != NULL) { + set_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state); + req->rq_reply_bytes_recvd = 0; + req->rq_bytes_sent = 0; + memcpy(&req->rq_private_buf, &req->rq_rcv_buf, + sizeof(req->rq_private_buf)); + } + dprintk("RPC: backchannel req=%p\n", req); + return req; +} + +/* + * Return the preallocated rpc_rqst structure and XDR buffers + * associated with this rpc_task. + */ +void xprt_free_bc_request(struct rpc_rqst *req) +{ + struct rpc_xprt *xprt = req->rq_xprt; + + dprintk("RPC: free backchannel req=%p\n", req); + + smp_mb__before_clear_bit(); + BUG_ON(!test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state)); + clear_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state); + smp_mb__after_clear_bit(); + + if (!xprt_need_to_requeue(xprt)) { + /* + * The last remaining session was destroyed while this + * entry was in use. Free the entry and don't attempt + * to add back to the list because there is no need to + * have anymore preallocated entries. + */ + dprintk("RPC: Last session removed req=%p\n", req); + xprt_free_allocation(req); + return; + } + + /* + * Return it to the list of preallocations so that it + * may be reused by a new callback request. + */ + spin_lock_bh(&xprt->bc_pa_lock); + list_add(&req->rq_bc_pa_list, &xprt->bc_pa_list); + spin_unlock_bh(&xprt->bc_pa_lock); +} + +#endif /* CONFIG_NFS_V4_1 */ diff --git a/net/sunrpc/bc_svc.c b/net/sunrpc/bc_svc.c new file mode 100644 index 000000000000..13f214f53120 --- /dev/null +++ b/net/sunrpc/bc_svc.c @@ -0,0 +1,81 @@ +/****************************************************************************** + +(c) 2007 Network Appliance, Inc. All Rights Reserved. +(c) 2009 NetApp. All Rights Reserved. + +NetApp provides this source code under the GPL v2 License. +The GPL v2 license is available at +http://opensource.org/licenses/gpl-license.php. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +******************************************************************************/ + +/* + * The NFSv4.1 callback service helper routines. + * They implement the transport level processing required to send the + * reply over an existing open connection previously established by the client. + */ + +#if defined(CONFIG_NFS_V4_1) + +#include <linux/module.h> + +#include <linux/sunrpc/xprt.h> +#include <linux/sunrpc/sched.h> +#include <linux/sunrpc/bc_xprt.h> + +#define RPCDBG_FACILITY RPCDBG_SVCDSP + +void bc_release_request(struct rpc_task *task) +{ + struct rpc_rqst *req = task->tk_rqstp; + + dprintk("RPC: bc_release_request: task= %p\n", task); + + /* + * Release this request only if it's a backchannel + * preallocated request + */ + if (!bc_prealloc(req)) + return; + xprt_free_bc_request(req); +} + +/* Empty callback ops */ +static const struct rpc_call_ops nfs41_callback_ops = { +}; + + +/* + * Send the callback reply + */ +int bc_send(struct rpc_rqst *req) +{ + struct rpc_task *task; + int ret; + + dprintk("RPC: bc_send req= %p\n", req); + task = rpc_run_bc_task(req, &nfs41_callback_ops); + if (IS_ERR(task)) + ret = PTR_ERR(task); + else { + BUG_ON(atomic_read(&task->tk_count) != 1); + ret = task->tk_status; + rpc_put_task(task); + } + return ret; + dprintk("RPC: bc_send ret= %d \n", ret); +} + +#endif /* CONFIG_NFS_V4_1 */ diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 20029a79a5de..45cdaff9b361 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -27,10 +27,12 @@ #include <linux/net.h> #include <linux/workqueue.h> #include <linux/mutex.h> +#include <linux/pagemap.h> #include <asm/ioctls.h> #include <linux/sunrpc/types.h> #include <linux/sunrpc/cache.h> #include <linux/sunrpc/stats.h> +#include <linux/sunrpc/rpc_pipe_fs.h> #define RPCDBG_FACILITY RPCDBG_CACHE @@ -175,7 +177,13 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail, } EXPORT_SYMBOL_GPL(sunrpc_cache_update); -static int cache_make_upcall(struct cache_detail *detail, struct cache_head *h); +static int cache_make_upcall(struct cache_detail *cd, struct cache_head *h) +{ + if (!cd->cache_upcall) + return -EINVAL; + return cd->cache_upcall(cd, h); +} + /* * This is the generic cache management routine for all * the authentication caches. @@ -284,76 +292,11 @@ static DEFINE_SPINLOCK(cache_list_lock); static struct cache_detail *current_detail; static int current_index; -static const struct file_operations cache_file_operations; -static const struct file_operations content_file_operations; -static const struct file_operations cache_flush_operations; - static void do_cache_clean(struct work_struct *work); static DECLARE_DELAYED_WORK(cache_cleaner, do_cache_clean); -static void remove_cache_proc_entries(struct cache_detail *cd) -{ - if (cd->proc_ent == NULL) - return; - if (cd->flush_ent) - remove_proc_entry("flush", cd->proc_ent); - if (cd->channel_ent) - remove_proc_entry("channel", cd->proc_ent); - if (cd->content_ent) - remove_proc_entry("content", cd->proc_ent); - cd->proc_ent = NULL; - remove_proc_entry(cd->name, proc_net_rpc); -} - -#ifdef CONFIG_PROC_FS -static int create_cache_proc_entries(struct cache_detail *cd) -{ - struct proc_dir_entry *p; - - cd->proc_ent = proc_mkdir(cd->name, proc_net_rpc); - if (cd->proc_ent == NULL) - goto out_nomem; - cd->channel_ent = cd->content_ent = NULL; - - p = proc_create_data("flush", S_IFREG|S_IRUSR|S_IWUSR, - cd->proc_ent, &cache_flush_operations, cd); - cd->flush_ent = p; - if (p == NULL) - goto out_nomem; - - if (cd->cache_request || cd->cache_parse) { - p = proc_create_data("channel", S_IFREG|S_IRUSR|S_IWUSR, - cd->proc_ent, &cache_file_operations, cd); - cd->channel_ent = p; - if (p == NULL) - goto out_nomem; - } - if (cd->cache_show) { - p = proc_create_data("content", S_IFREG|S_IRUSR|S_IWUSR, - cd->proc_ent, &content_file_operations, cd); - cd->content_ent = p; - if (p == NULL) - goto out_nomem; - } - return 0; -out_nomem: - remove_cache_proc_entries(cd); - return -ENOMEM; -} -#else /* CONFIG_PROC_FS */ -static int create_cache_proc_entries(struct cache_detail *cd) -{ - return 0; -} -#endif - -int cache_register(struct cache_detail *cd) +static void sunrpc_init_cache_detail(struct cache_detail *cd) { - int ret; - - ret = create_cache_proc_entries(cd); - if (ret) - return ret; rwlock_init(&cd->hash_lock); INIT_LIST_HEAD(&cd->queue); spin_lock(&cache_list_lock); @@ -367,11 +310,9 @@ int cache_register(struct cache_detail *cd) /* start the cleaning process */ schedule_delayed_work(&cache_cleaner, 0); - return 0; } -EXPORT_SYMBOL_GPL(cache_register); -void cache_unregister(struct cache_detail *cd) +static void sunrpc_destroy_cache_detail(struct cache_detail *cd) { cache_purge(cd); spin_lock(&cache_list_lock); @@ -386,7 +327,6 @@ void cache_unregister(struct cache_detail *cd) list_del_init(&cd->others); write_unlock(&cd->hash_lock); spin_unlock(&cache_list_lock); - remove_cache_proc_entries(cd); if (list_empty(&cache_list)) { /* module must be being unloaded so its safe to kill the worker */ cancel_delayed_work_sync(&cache_cleaner); @@ -395,7 +335,6 @@ void cache_unregister(struct cache_detail *cd) out: printk(KERN_ERR "nfsd: failed to unregister %s cache\n", cd->name); } -EXPORT_SYMBOL_GPL(cache_unregister); /* clean cache tries to find something to clean * and cleans it. @@ -488,7 +427,7 @@ static void do_cache_clean(struct work_struct *work) { int delay = 5; if (cache_clean() == -1) - delay = 30*HZ; + delay = round_jiffies_relative(30*HZ); if (list_empty(&cache_list)) delay = 0; @@ -687,18 +626,18 @@ struct cache_reader { int offset; /* if non-0, we have a refcnt on next request */ }; -static ssize_t -cache_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos) +static ssize_t cache_read(struct file *filp, char __user *buf, size_t count, + loff_t *ppos, struct cache_detail *cd) { struct cache_reader *rp = filp->private_data; struct cache_request *rq; - struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; + struct inode *inode = filp->f_path.dentry->d_inode; int err; if (count == 0) return 0; - mutex_lock(&queue_io_mutex); /* protect against multiple concurrent + mutex_lock(&inode->i_mutex); /* protect against multiple concurrent * readers on this file */ again: spin_lock(&queue_lock); @@ -711,7 +650,7 @@ cache_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos) } if (rp->q.list.next == &cd->queue) { spin_unlock(&queue_lock); - mutex_unlock(&queue_io_mutex); + mutex_unlock(&inode->i_mutex); BUG_ON(rp->offset); return 0; } @@ -758,49 +697,90 @@ cache_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos) } if (err == -EAGAIN) goto again; - mutex_unlock(&queue_io_mutex); + mutex_unlock(&inode->i_mutex); return err ? err : count; } -static char write_buf[8192]; /* protected by queue_io_mutex */ +static ssize_t cache_do_downcall(char *kaddr, const char __user *buf, + size_t count, struct cache_detail *cd) +{ + ssize_t ret; -static ssize_t -cache_write(struct file *filp, const char __user *buf, size_t count, - loff_t *ppos) + if (copy_from_user(kaddr, buf, count)) + return -EFAULT; + kaddr[count] = '\0'; + ret = cd->cache_parse(cd, kaddr, count); + if (!ret) + ret = count; + return ret; +} + +static ssize_t cache_slow_downcall(const char __user *buf, + size_t count, struct cache_detail *cd) { - int err; - struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; + static char write_buf[8192]; /* protected by queue_io_mutex */ + ssize_t ret = -EINVAL; - if (count == 0) - return 0; if (count >= sizeof(write_buf)) - return -EINVAL; - + goto out; mutex_lock(&queue_io_mutex); + ret = cache_do_downcall(write_buf, buf, count, cd); + mutex_unlock(&queue_io_mutex); +out: + return ret; +} - if (copy_from_user(write_buf, buf, count)) { - mutex_unlock(&queue_io_mutex); - return -EFAULT; - } - write_buf[count] = '\0'; - if (cd->cache_parse) - err = cd->cache_parse(cd, write_buf, count); - else - err = -EINVAL; +static ssize_t cache_downcall(struct address_space *mapping, + const char __user *buf, + size_t count, struct cache_detail *cd) +{ + struct page *page; + char *kaddr; + ssize_t ret = -ENOMEM; + + if (count >= PAGE_CACHE_SIZE) + goto out_slow; + + page = find_or_create_page(mapping, 0, GFP_KERNEL); + if (!page) + goto out_slow; + + kaddr = kmap(page); + ret = cache_do_downcall(kaddr, buf, count, cd); + kunmap(page); + unlock_page(page); + page_cache_release(page); + return ret; +out_slow: + return cache_slow_downcall(buf, count, cd); +} - mutex_unlock(&queue_io_mutex); - return err ? err : count; +static ssize_t cache_write(struct file *filp, const char __user *buf, + size_t count, loff_t *ppos, + struct cache_detail *cd) +{ + struct address_space *mapping = filp->f_mapping; + struct inode *inode = filp->f_path.dentry->d_inode; + ssize_t ret = -EINVAL; + + if (!cd->cache_parse) + goto out; + + mutex_lock(&inode->i_mutex); + ret = cache_downcall(mapping, buf, count, cd); + mutex_unlock(&inode->i_mutex); +out: + return ret; } static DECLARE_WAIT_QUEUE_HEAD(queue_wait); -static unsigned int -cache_poll(struct file *filp, poll_table *wait) +static unsigned int cache_poll(struct file *filp, poll_table *wait, + struct cache_detail *cd) { unsigned int mask; struct cache_reader *rp = filp->private_data; struct cache_queue *cq; - struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; poll_wait(filp, &queue_wait, wait); @@ -822,14 +802,13 @@ cache_poll(struct file *filp, poll_table *wait) return mask; } -static int -cache_ioctl(struct inode *ino, struct file *filp, - unsigned int cmd, unsigned long arg) +static int cache_ioctl(struct inode *ino, struct file *filp, + unsigned int cmd, unsigned long arg, + struct cache_detail *cd) { int len = 0; struct cache_reader *rp = filp->private_data; struct cache_queue *cq; - struct cache_detail *cd = PDE(ino)->data; if (cmd != FIONREAD || !rp) return -EINVAL; @@ -852,15 +831,15 @@ cache_ioctl(struct inode *ino, struct file *filp, return put_user(len, (int __user *)arg); } -static int -cache_open(struct inode *inode, struct file *filp) +static int cache_open(struct inode *inode, struct file *filp, + struct cache_detail *cd) { struct cache_reader *rp = NULL; + if (!cd || !try_module_get(cd->owner)) + return -EACCES; nonseekable_open(inode, filp); if (filp->f_mode & FMODE_READ) { - struct cache_detail *cd = PDE(inode)->data; - rp = kmalloc(sizeof(*rp), GFP_KERNEL); if (!rp) return -ENOMEM; @@ -875,11 +854,10 @@ cache_open(struct inode *inode, struct file *filp) return 0; } -static int -cache_release(struct inode *inode, struct file *filp) +static int cache_release(struct inode *inode, struct file *filp, + struct cache_detail *cd) { struct cache_reader *rp = filp->private_data; - struct cache_detail *cd = PDE(inode)->data; if (rp) { spin_lock(&queue_lock); @@ -903,23 +881,12 @@ cache_release(struct inode *inode, struct file *filp) cd->last_close = get_seconds(); atomic_dec(&cd->readers); } + module_put(cd->owner); return 0; } -static const struct file_operations cache_file_operations = { - .owner = THIS_MODULE, - .llseek = no_llseek, - .read = cache_read, - .write = cache_write, - .poll = cache_poll, - .ioctl = cache_ioctl, /* for FIONREAD */ - .open = cache_open, - .release = cache_release, -}; - - static void queue_loose(struct cache_detail *detail, struct cache_head *ch) { struct cache_queue *cq; @@ -1020,15 +987,21 @@ static void warn_no_listener(struct cache_detail *detail) if (detail->last_warn != detail->last_close) { detail->last_warn = detail->last_close; if (detail->warn_no_listener) - detail->warn_no_listener(detail); + detail->warn_no_listener(detail, detail->last_close != 0); } } /* - * register an upcall request to user-space. + * register an upcall request to user-space and queue it up for read() by the + * upcall daemon. + * * Each request is at most one page long. */ -static int cache_make_upcall(struct cache_detail *detail, struct cache_head *h) +int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h, + void (*cache_request)(struct cache_detail *, + struct cache_head *, + char **, + int *)) { char *buf; @@ -1036,9 +1009,6 @@ static int cache_make_upcall(struct cache_detail *detail, struct cache_head *h) char *bp; int len; - if (detail->cache_request == NULL) - return -EINVAL; - if (atomic_read(&detail->readers) == 0 && detail->last_close < get_seconds() - 30) { warn_no_listener(detail); @@ -1057,7 +1027,7 @@ static int cache_make_upcall(struct cache_detail *detail, struct cache_head *h) bp = buf; len = PAGE_SIZE; - detail->cache_request(detail, h, &bp, &len); + cache_request(detail, h, &bp, &len); if (len < 0) { kfree(buf); @@ -1075,6 +1045,7 @@ static int cache_make_upcall(struct cache_detail *detail, struct cache_head *h) wake_up(&queue_wait); return 0; } +EXPORT_SYMBOL_GPL(sunrpc_cache_pipe_upcall); /* * parse a message from user-space and pass it @@ -1242,11 +1213,13 @@ static const struct seq_operations cache_content_op = { .show = c_show, }; -static int content_open(struct inode *inode, struct file *file) +static int content_open(struct inode *inode, struct file *file, + struct cache_detail *cd) { struct handle *han; - struct cache_detail *cd = PDE(inode)->data; + if (!cd || !try_module_get(cd->owner)) + return -EACCES; han = __seq_open_private(file, &cache_content_op, sizeof(*han)); if (han == NULL) return -ENOMEM; @@ -1255,17 +1228,33 @@ static int content_open(struct inode *inode, struct file *file) return 0; } -static const struct file_operations content_file_operations = { - .open = content_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_private, -}; +static int content_release(struct inode *inode, struct file *file, + struct cache_detail *cd) +{ + int ret = seq_release_private(inode, file); + module_put(cd->owner); + return ret; +} + +static int open_flush(struct inode *inode, struct file *file, + struct cache_detail *cd) +{ + if (!cd || !try_module_get(cd->owner)) + return -EACCES; + return nonseekable_open(inode, file); +} + +static int release_flush(struct inode *inode, struct file *file, + struct cache_detail *cd) +{ + module_put(cd->owner); + return 0; +} static ssize_t read_flush(struct file *file, char __user *buf, - size_t count, loff_t *ppos) + size_t count, loff_t *ppos, + struct cache_detail *cd) { - struct cache_detail *cd = PDE(file->f_path.dentry->d_inode)->data; char tbuf[20]; unsigned long p = *ppos; size_t len; @@ -1283,10 +1272,10 @@ static ssize_t read_flush(struct file *file, char __user *buf, return len; } -static ssize_t write_flush(struct file * file, const char __user * buf, - size_t count, loff_t *ppos) +static ssize_t write_flush(struct file *file, const char __user *buf, + size_t count, loff_t *ppos, + struct cache_detail *cd) { - struct cache_detail *cd = PDE(file->f_path.dentry->d_inode)->data; char tbuf[20]; char *ep; long flushtime; @@ -1307,8 +1296,343 @@ static ssize_t write_flush(struct file * file, const char __user * buf, return count; } -static const struct file_operations cache_flush_operations = { - .open = nonseekable_open, - .read = read_flush, - .write = write_flush, +static ssize_t cache_read_procfs(struct file *filp, char __user *buf, + size_t count, loff_t *ppos) +{ + struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; + + return cache_read(filp, buf, count, ppos, cd); +} + +static ssize_t cache_write_procfs(struct file *filp, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; + + return cache_write(filp, buf, count, ppos, cd); +} + +static unsigned int cache_poll_procfs(struct file *filp, poll_table *wait) +{ + struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; + + return cache_poll(filp, wait, cd); +} + +static int cache_ioctl_procfs(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + struct cache_detail *cd = PDE(inode)->data; + + return cache_ioctl(inode, filp, cmd, arg, cd); +} + +static int cache_open_procfs(struct inode *inode, struct file *filp) +{ + struct cache_detail *cd = PDE(inode)->data; + + return cache_open(inode, filp, cd); +} + +static int cache_release_procfs(struct inode *inode, struct file *filp) +{ + struct cache_detail *cd = PDE(inode)->data; + + return cache_release(inode, filp, cd); +} + +static const struct file_operations cache_file_operations_procfs = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .read = cache_read_procfs, + .write = cache_write_procfs, + .poll = cache_poll_procfs, + .ioctl = cache_ioctl_procfs, /* for FIONREAD */ + .open = cache_open_procfs, + .release = cache_release_procfs, }; + +static int content_open_procfs(struct inode *inode, struct file *filp) +{ + struct cache_detail *cd = PDE(inode)->data; + + return content_open(inode, filp, cd); +} + +static int content_release_procfs(struct inode *inode, struct file *filp) +{ + struct cache_detail *cd = PDE(inode)->data; + + return content_release(inode, filp, cd); +} + +static const struct file_operations content_file_operations_procfs = { + .open = content_open_procfs, + .read = seq_read, + .llseek = seq_lseek, + .release = content_release_procfs, +}; + +static int open_flush_procfs(struct inode *inode, struct file *filp) +{ + struct cache_detail *cd = PDE(inode)->data; + + return open_flush(inode, filp, cd); +} + +static int release_flush_procfs(struct inode *inode, struct file *filp) +{ + struct cache_detail *cd = PDE(inode)->data; + + return release_flush(inode, filp, cd); +} + +static ssize_t read_flush_procfs(struct file *filp, char __user *buf, + size_t count, loff_t *ppos) +{ + struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; + + return read_flush(filp, buf, count, ppos, cd); +} + +static ssize_t write_flush_procfs(struct file *filp, + const char __user *buf, + size_t count, loff_t *ppos) +{ + struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; + + return write_flush(filp, buf, count, ppos, cd); +} + +static const struct file_operations cache_flush_operations_procfs = { + .open = open_flush_procfs, + .read = read_flush_procfs, + .write = write_flush_procfs, + .release = release_flush_procfs, +}; + +static void remove_cache_proc_entries(struct cache_detail *cd) +{ + if (cd->u.procfs.proc_ent == NULL) + return; + if (cd->u.procfs.flush_ent) + remove_proc_entry("flush", cd->u.procfs.proc_ent); + if (cd->u.procfs.channel_ent) + remove_proc_entry("channel", cd->u.procfs.proc_ent); + if (cd->u.procfs.content_ent) + remove_proc_entry("content", cd->u.procfs.proc_ent); + cd->u.procfs.proc_ent = NULL; + remove_proc_entry(cd->name, proc_net_rpc); +} + +#ifdef CONFIG_PROC_FS +static int create_cache_proc_entries(struct cache_detail *cd) +{ + struct proc_dir_entry *p; + + cd->u.procfs.proc_ent = proc_mkdir(cd->name, proc_net_rpc); + if (cd->u.procfs.proc_ent == NULL) + goto out_nomem; + cd->u.procfs.channel_ent = NULL; + cd->u.procfs.content_ent = NULL; + + p = proc_create_data("flush", S_IFREG|S_IRUSR|S_IWUSR, + cd->u.procfs.proc_ent, + &cache_flush_operations_procfs, cd); + cd->u.procfs.flush_ent = p; + if (p == NULL) + goto out_nomem; + + if (cd->cache_upcall || cd->cache_parse) { + p = proc_create_data("channel", S_IFREG|S_IRUSR|S_IWUSR, + cd->u.procfs.proc_ent, + &cache_file_operations_procfs, cd); + cd->u.procfs.channel_ent = p; + if (p == NULL) + goto out_nomem; + } + if (cd->cache_show) { + p = proc_create_data("content", S_IFREG|S_IRUSR|S_IWUSR, + cd->u.procfs.proc_ent, + &content_file_operations_procfs, cd); + cd->u.procfs.content_ent = p; + if (p == NULL) + goto out_nomem; + } + return 0; +out_nomem: + remove_cache_proc_entries(cd); + return -ENOMEM; +} +#else /* CONFIG_PROC_FS */ +static int create_cache_proc_entries(struct cache_detail *cd) +{ + return 0; +} +#endif + +int cache_register(struct cache_detail *cd) +{ + int ret; + + sunrpc_init_cache_detail(cd); + ret = create_cache_proc_entries(cd); + if (ret) + sunrpc_destroy_cache_detail(cd); + return ret; +} +EXPORT_SYMBOL_GPL(cache_register); + +void cache_unregister(struct cache_detail *cd) +{ + remove_cache_proc_entries(cd); + sunrpc_destroy_cache_detail(cd); +} +EXPORT_SYMBOL_GPL(cache_unregister); + +static ssize_t cache_read_pipefs(struct file *filp, char __user *buf, + size_t count, loff_t *ppos) +{ + struct cache_detail *cd = RPC_I(filp->f_path.dentry->d_inode)->private; + + return cache_read(filp, buf, count, ppos, cd); +} + +static ssize_t cache_write_pipefs(struct file *filp, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct cache_detail *cd = RPC_I(filp->f_path.dentry->d_inode)->private; + + return cache_write(filp, buf, count, ppos, cd); +} + +static unsigned int cache_poll_pipefs(struct file *filp, poll_table *wait) +{ + struct cache_detail *cd = RPC_I(filp->f_path.dentry->d_inode)->private; + + return cache_poll(filp, wait, cd); +} + +static int cache_ioctl_pipefs(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + struct cache_detail *cd = RPC_I(inode)->private; + + return cache_ioctl(inode, filp, cmd, arg, cd); +} + +static int cache_open_pipefs(struct inode *inode, struct file *filp) +{ + struct cache_detail *cd = RPC_I(inode)->private; + + return cache_open(inode, filp, cd); +} + +static int cache_release_pipefs(struct inode *inode, struct file *filp) +{ + struct cache_detail *cd = RPC_I(inode)->private; + + return cache_release(inode, filp, cd); +} + +const struct file_operations cache_file_operations_pipefs = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .read = cache_read_pipefs, + .write = cache_write_pipefs, + .poll = cache_poll_pipefs, + .ioctl = cache_ioctl_pipefs, /* for FIONREAD */ + .open = cache_open_pipefs, + .release = cache_release_pipefs, +}; + +static int content_open_pipefs(struct inode *inode, struct file *filp) +{ + struct cache_detail *cd = RPC_I(inode)->private; + + return content_open(inode, filp, cd); +} + +static int content_release_pipefs(struct inode *inode, struct file *filp) +{ + struct cache_detail *cd = RPC_I(inode)->private; + + return content_release(inode, filp, cd); +} + +const struct file_operations content_file_operations_pipefs = { + .open = content_open_pipefs, + .read = seq_read, + .llseek = seq_lseek, + .release = content_release_pipefs, +}; + +static int open_flush_pipefs(struct inode *inode, struct file *filp) +{ + struct cache_detail *cd = RPC_I(inode)->private; + + return open_flush(inode, filp, cd); +} + +static int release_flush_pipefs(struct inode *inode, struct file *filp) +{ + struct cache_detail *cd = RPC_I(inode)->private; + + return release_flush(inode, filp, cd); +} + +static ssize_t read_flush_pipefs(struct file *filp, char __user *buf, + size_t count, loff_t *ppos) +{ + struct cache_detail *cd = RPC_I(filp->f_path.dentry->d_inode)->private; + + return read_flush(filp, buf, count, ppos, cd); +} + +static ssize_t write_flush_pipefs(struct file *filp, + const char __user *buf, + size_t count, loff_t *ppos) +{ + struct cache_detail *cd = RPC_I(filp->f_path.dentry->d_inode)->private; + + return write_flush(filp, buf, count, ppos, cd); +} + +const struct file_operations cache_flush_operations_pipefs = { + .open = open_flush_pipefs, + .read = read_flush_pipefs, + .write = write_flush_pipefs, + .release = release_flush_pipefs, +}; + +int sunrpc_cache_register_pipefs(struct dentry *parent, + const char *name, mode_t umode, + struct cache_detail *cd) +{ + struct qstr q; + struct dentry *dir; + int ret = 0; + + sunrpc_init_cache_detail(cd); + q.name = name; + q.len = strlen(name); + q.hash = full_name_hash(q.name, q.len); + dir = rpc_create_cache_dir(parent, &q, umode, cd); + if (!IS_ERR(dir)) + cd->u.pipefs.dir = dir; + else { + sunrpc_destroy_cache_detail(cd); + ret = PTR_ERR(dir); + } + return ret; +} +EXPORT_SYMBOL_GPL(sunrpc_cache_register_pipefs); + +void sunrpc_cache_unregister_pipefs(struct cache_detail *cd) +{ + rpc_remove_cache_dir(cd->u.pipefs.dir); + cd->u.pipefs.dir = NULL; + sunrpc_destroy_cache_detail(cd); +} +EXPORT_SYMBOL_GPL(sunrpc_cache_unregister_pipefs); + diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 5abab094441f..fac0ca93f06b 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -27,8 +27,9 @@ #include <linux/types.h> #include <linux/kallsyms.h> #include <linux/mm.h> +#include <linux/namei.h> +#include <linux/mount.h> #include <linux/slab.h> -#include <linux/smp_lock.h> #include <linux/utsname.h> #include <linux/workqueue.h> #include <linux/in6.h> @@ -36,7 +37,9 @@ #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/rpc_pipe_fs.h> #include <linux/sunrpc/metrics.h> +#include <linux/sunrpc/bc_xprt.h> +#include "sunrpc.h" #ifdef RPC_DEBUG # define RPCDBG_FACILITY RPCDBG_CALL @@ -63,6 +66,9 @@ static void call_decode(struct rpc_task *task); static void call_bind(struct rpc_task *task); static void call_bind_status(struct rpc_task *task); static void call_transmit(struct rpc_task *task); +#if defined(CONFIG_NFS_V4_1) +static void call_bc_transmit(struct rpc_task *task); +#endif /* CONFIG_NFS_V4_1 */ static void call_status(struct rpc_task *task); static void call_transmit_status(struct rpc_task *task); static void call_refresh(struct rpc_task *task); @@ -93,33 +99,49 @@ static int rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name) { static uint32_t clntid; + struct nameidata nd; + struct path path; + char name[15]; + struct qstr q = { + .name = name, + }; int error; - clnt->cl_vfsmnt = ERR_PTR(-ENOENT); - clnt->cl_dentry = ERR_PTR(-ENOENT); + clnt->cl_path.mnt = ERR_PTR(-ENOENT); + clnt->cl_path.dentry = ERR_PTR(-ENOENT); if (dir_name == NULL) return 0; - clnt->cl_vfsmnt = rpc_get_mount(); - if (IS_ERR(clnt->cl_vfsmnt)) - return PTR_ERR(clnt->cl_vfsmnt); + path.mnt = rpc_get_mount(); + if (IS_ERR(path.mnt)) + return PTR_ERR(path.mnt); + error = vfs_path_lookup(path.mnt->mnt_root, path.mnt, dir_name, 0, &nd); + if (error) + goto err; for (;;) { - snprintf(clnt->cl_pathname, sizeof(clnt->cl_pathname), - "%s/clnt%x", dir_name, - (unsigned int)clntid++); - clnt->cl_pathname[sizeof(clnt->cl_pathname) - 1] = '\0'; - clnt->cl_dentry = rpc_mkdir(clnt->cl_pathname, clnt); - if (!IS_ERR(clnt->cl_dentry)) - return 0; - error = PTR_ERR(clnt->cl_dentry); + q.len = snprintf(name, sizeof(name), "clnt%x", (unsigned int)clntid++); + name[sizeof(name) - 1] = '\0'; + q.hash = full_name_hash(q.name, q.len); + path.dentry = rpc_create_client_dir(nd.path.dentry, &q, clnt); + if (!IS_ERR(path.dentry)) + break; + error = PTR_ERR(path.dentry); if (error != -EEXIST) { - printk(KERN_INFO "RPC: Couldn't create pipefs entry %s, error %d\n", - clnt->cl_pathname, error); - rpc_put_mount(); - return error; + printk(KERN_INFO "RPC: Couldn't create pipefs entry" + " %s/%s, error %d\n", + dir_name, name, error); + goto err_path_put; } } + path_put(&nd.path); + clnt->cl_path = path; + return 0; +err_path_put: + path_put(&nd.path); +err: + rpc_put_mount(); + return error; } static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, struct rpc_xprt *xprt) @@ -227,8 +249,8 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru return clnt; out_no_auth: - if (!IS_ERR(clnt->cl_dentry)) { - rpc_rmdir(clnt->cl_dentry); + if (!IS_ERR(clnt->cl_path.dentry)) { + rpc_remove_client_dir(clnt->cl_path.dentry); rpc_put_mount(); } out_no_path: @@ -419,8 +441,8 @@ rpc_free_client(struct kref *kref) dprintk("RPC: destroying %s client for %s\n", clnt->cl_protname, clnt->cl_server); - if (!IS_ERR(clnt->cl_dentry)) { - rpc_rmdir(clnt->cl_dentry); + if (!IS_ERR(clnt->cl_path.dentry)) { + rpc_remove_client_dir(clnt->cl_path.dentry); rpc_put_mount(); } if (clnt->cl_parent != clnt) { @@ -613,6 +635,50 @@ rpc_call_async(struct rpc_clnt *clnt, const struct rpc_message *msg, int flags, } EXPORT_SYMBOL_GPL(rpc_call_async); +#if defined(CONFIG_NFS_V4_1) +/** + * rpc_run_bc_task - Allocate a new RPC task for backchannel use, then run + * rpc_execute against it + * @ops: RPC call ops + */ +struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req, + const struct rpc_call_ops *tk_ops) +{ + struct rpc_task *task; + struct xdr_buf *xbufp = &req->rq_snd_buf; + struct rpc_task_setup task_setup_data = { + .callback_ops = tk_ops, + }; + + dprintk("RPC: rpc_run_bc_task req= %p\n", req); + /* + * Create an rpc_task to send the data + */ + task = rpc_new_task(&task_setup_data); + if (!task) { + xprt_free_bc_request(req); + goto out; + } + task->tk_rqstp = req; + + /* + * Set up the xdr_buf length. + * This also indicates that the buffer is XDR encoded already. + */ + xbufp->len = xbufp->head[0].iov_len + xbufp->page_len + + xbufp->tail[0].iov_len; + + task->tk_action = call_bc_transmit; + atomic_inc(&task->tk_count); + BUG_ON(atomic_read(&task->tk_count) != 2); + rpc_execute(task); + +out: + dprintk("RPC: rpc_run_bc_task: task= %p\n", task); + return task; +} +#endif /* CONFIG_NFS_V4_1 */ + void rpc_call_start(struct rpc_task *task) { @@ -695,6 +761,19 @@ void rpc_force_rebind(struct rpc_clnt *clnt) EXPORT_SYMBOL_GPL(rpc_force_rebind); /* + * Restart an (async) RPC call from the call_prepare state. + * Usually called from within the exit handler. + */ +void +rpc_restart_call_prepare(struct rpc_task *task) +{ + if (RPC_ASSASSINATED(task)) + return; + task->tk_action = rpc_prepare_task; +} +EXPORT_SYMBOL_GPL(rpc_restart_call_prepare); + +/* * Restart an (async) RPC call. Usually called from within the * exit handler. */ @@ -876,6 +955,7 @@ static inline void rpc_task_force_reencode(struct rpc_task *task) { task->tk_rqstp->rq_snd_buf.len = 0; + task->tk_rqstp->rq_bytes_sent = 0; } static inline void @@ -1085,7 +1165,7 @@ call_transmit(struct rpc_task *task) * in order to allow access to the socket to other RPC requests. */ call_transmit_status(task); - if (task->tk_msg.rpc_proc->p_decode != NULL) + if (rpc_reply_expected(task)) return; task->tk_action = rpc_exit_task; rpc_wake_up_queued_task(&task->tk_xprt->pending, task); @@ -1120,6 +1200,72 @@ call_transmit_status(struct rpc_task *task) } } +#if defined(CONFIG_NFS_V4_1) +/* + * 5b. Send the backchannel RPC reply. On error, drop the reply. In + * addition, disconnect on connectivity errors. + */ +static void +call_bc_transmit(struct rpc_task *task) +{ + struct rpc_rqst *req = task->tk_rqstp; + + BUG_ON(task->tk_status != 0); + task->tk_status = xprt_prepare_transmit(task); + if (task->tk_status == -EAGAIN) { + /* + * Could not reserve the transport. Try again after the + * transport is released. + */ + task->tk_status = 0; + task->tk_action = call_bc_transmit; + return; + } + + task->tk_action = rpc_exit_task; + if (task->tk_status < 0) { + printk(KERN_NOTICE "RPC: Could not send backchannel reply " + "error: %d\n", task->tk_status); + return; + } + + xprt_transmit(task); + xprt_end_transmit(task); + dprint_status(task); + switch (task->tk_status) { + case 0: + /* Success */ + break; + case -EHOSTDOWN: + case -EHOSTUNREACH: + case -ENETUNREACH: + case -ETIMEDOUT: + /* + * Problem reaching the server. Disconnect and let the + * forechannel reestablish the connection. The server will + * have to retransmit the backchannel request and we'll + * reprocess it. Since these ops are idempotent, there's no + * need to cache our reply at this time. + */ + printk(KERN_NOTICE "RPC: Could not send backchannel reply " + "error: %d\n", task->tk_status); + xprt_conditional_disconnect(task->tk_xprt, + req->rq_connect_cookie); + break; + default: + /* + * We were unable to reply and will have to drop the + * request. The server should reconnect and retransmit. + */ + BUG_ON(task->tk_status == -EAGAIN); + printk(KERN_NOTICE "RPC: Could not send backchannel reply " + "error: %d\n", task->tk_status); + break; + } + rpc_wake_up_queued_task(&req->rq_xprt->pending, task); +} +#endif /* CONFIG_NFS_V4_1 */ + /* * 6. Sort out the RPC call status */ @@ -1130,8 +1276,8 @@ call_status(struct rpc_task *task) struct rpc_rqst *req = task->tk_rqstp; int status; - if (req->rq_received > 0 && !req->rq_bytes_sent) - task->tk_status = req->rq_received; + if (req->rq_reply_bytes_recvd > 0 && !req->rq_bytes_sent) + task->tk_status = req->rq_reply_bytes_recvd; dprint_status(task); @@ -1248,7 +1394,7 @@ call_decode(struct rpc_task *task) /* * Ensure that we see all writes made by xprt_complete_rqst() - * before it changed req->rq_received. + * before it changed req->rq_reply_bytes_recvd. */ smp_rmb(); req->rq_rcv_buf.len = req->rq_private_buf.len; @@ -1289,7 +1435,7 @@ out_retry: task->tk_status = 0; /* Note: rpc_verify_header() may have freed the RPC slot */ if (task->tk_rqstp == req) { - req->rq_received = req->rq_rcv_buf.len = 0; + req->rq_reply_bytes_recvd = req->rq_rcv_buf.len = 0; if (task->tk_client->cl_discrtry) xprt_conditional_disconnect(task->tk_xprt, req->rq_connect_cookie); @@ -1377,13 +1523,14 @@ rpc_verify_header(struct rpc_task *task) } if ((len -= 3) < 0) goto out_overflow; - p += 1; /* skip XID */ + p += 1; /* skip XID */ if ((n = ntohl(*p++)) != RPC_REPLY) { dprintk("RPC: %5u %s: not an RPC reply: %x\n", - task->tk_pid, __func__, n); + task->tk_pid, __func__, n); goto out_garbage; } + if ((n = ntohl(*p++)) != RPC_MSG_ACCEPTED) { if (--len < 0) goto out_overflow; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 9ced0628d69c..7f676bdf70d3 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -26,6 +26,7 @@ #include <linux/sunrpc/clnt.h> #include <linux/workqueue.h> #include <linux/sunrpc/rpc_pipe_fs.h> +#include <linux/sunrpc/cache.h> static struct vfsmount *rpc_mount __read_mostly; static int rpc_mount_count; @@ -125,7 +126,7 @@ static void rpc_close_pipes(struct inode *inode) { struct rpc_inode *rpci = RPC_I(inode); - struct rpc_pipe_ops *ops; + const struct rpc_pipe_ops *ops; int need_release; mutex_lock(&inode->i_mutex); @@ -398,66 +399,12 @@ static const struct file_operations rpc_info_operations = { /* - * We have a single directory with 1 node in it. - */ -enum { - RPCAUTH_Root = 1, - RPCAUTH_lockd, - RPCAUTH_mount, - RPCAUTH_nfs, - RPCAUTH_portmap, - RPCAUTH_statd, - RPCAUTH_nfsd4_cb, - RPCAUTH_RootEOF -}; - -/* * Description of fs contents. */ struct rpc_filelist { - char *name; + const char *name; const struct file_operations *i_fop; - int mode; -}; - -static struct rpc_filelist files[] = { - [RPCAUTH_lockd] = { - .name = "lockd", - .mode = S_IFDIR | S_IRUGO | S_IXUGO, - }, - [RPCAUTH_mount] = { - .name = "mount", - .mode = S_IFDIR | S_IRUGO | S_IXUGO, - }, - [RPCAUTH_nfs] = { - .name = "nfs", - .mode = S_IFDIR | S_IRUGO | S_IXUGO, - }, - [RPCAUTH_portmap] = { - .name = "portmap", - .mode = S_IFDIR | S_IRUGO | S_IXUGO, - }, - [RPCAUTH_statd] = { - .name = "statd", - .mode = S_IFDIR | S_IRUGO | S_IXUGO, - }, - [RPCAUTH_nfsd4_cb] = { - .name = "nfsd4_cb", - .mode = S_IFDIR | S_IRUGO | S_IXUGO, - }, -}; - -enum { - RPCAUTH_info = 2, - RPCAUTH_EOF -}; - -static struct rpc_filelist authfiles[] = { - [RPCAUTH_info] = { - .name = "info", - .i_fop = &rpc_info_operations, - .mode = S_IFREG | S_IRUSR, - }, + umode_t mode; }; struct vfsmount *rpc_get_mount(void) @@ -469,11 +416,13 @@ struct vfsmount *rpc_get_mount(void) return ERR_PTR(err); return rpc_mount; } +EXPORT_SYMBOL_GPL(rpc_get_mount); void rpc_put_mount(void) { simple_release_fs(&rpc_mount, &rpc_mount_count); } +EXPORT_SYMBOL_GPL(rpc_put_mount); static int rpc_delete_dentry(struct dentry *dentry) { @@ -484,39 +433,8 @@ static const struct dentry_operations rpc_dentry_operations = { .d_delete = rpc_delete_dentry, }; -static int -rpc_lookup_parent(char *path, struct nameidata *nd) -{ - struct vfsmount *mnt; - - if (path[0] == '\0') - return -ENOENT; - - mnt = rpc_get_mount(); - if (IS_ERR(mnt)) { - printk(KERN_WARNING "%s: %s failed to mount " - "pseudofilesystem \n", __FILE__, __func__); - return PTR_ERR(mnt); - } - - if (vfs_path_lookup(mnt->mnt_root, mnt, path, LOOKUP_PARENT, nd)) { - printk(KERN_WARNING "%s: %s failed to find path %s\n", - __FILE__, __func__, path); - rpc_put_mount(); - return -ENOENT; - } - return 0; -} - -static void -rpc_release_path(struct nameidata *nd) -{ - path_put(&nd->path); - rpc_put_mount(); -} - static struct inode * -rpc_get_inode(struct super_block *sb, int mode) +rpc_get_inode(struct super_block *sb, umode_t mode) { struct inode *inode = new_inode(sb); if (!inode) @@ -534,212 +452,274 @@ rpc_get_inode(struct super_block *sb, int mode) return inode; } -/* - * FIXME: This probably has races. - */ -static void rpc_depopulate(struct dentry *parent, - unsigned long start, unsigned long eof) +static int __rpc_create_common(struct inode *dir, struct dentry *dentry, + umode_t mode, + const struct file_operations *i_fop, + void *private) { - struct inode *dir = parent->d_inode; - struct list_head *pos, *next; - struct dentry *dentry, *dvec[10]; - int n = 0; + struct inode *inode; - mutex_lock_nested(&dir->i_mutex, I_MUTEX_CHILD); -repeat: - spin_lock(&dcache_lock); - list_for_each_safe(pos, next, &parent->d_subdirs) { - dentry = list_entry(pos, struct dentry, d_u.d_child); - if (!dentry->d_inode || - dentry->d_inode->i_ino < start || - dentry->d_inode->i_ino >= eof) - continue; - spin_lock(&dentry->d_lock); - if (!d_unhashed(dentry)) { - dget_locked(dentry); - __d_drop(dentry); - spin_unlock(&dentry->d_lock); - dvec[n++] = dentry; - if (n == ARRAY_SIZE(dvec)) - break; - } else - spin_unlock(&dentry->d_lock); - } - spin_unlock(&dcache_lock); - if (n) { - do { - dentry = dvec[--n]; - if (S_ISREG(dentry->d_inode->i_mode)) - simple_unlink(dir, dentry); - else if (S_ISDIR(dentry->d_inode->i_mode)) - simple_rmdir(dir, dentry); - d_delete(dentry); - dput(dentry); - } while (n); - goto repeat; - } - mutex_unlock(&dir->i_mutex); + BUG_ON(!d_unhashed(dentry)); + inode = rpc_get_inode(dir->i_sb, mode); + if (!inode) + goto out_err; + inode->i_ino = iunique(dir->i_sb, 100); + if (i_fop) + inode->i_fop = i_fop; + if (private) + rpc_inode_setowner(inode, private); + d_add(dentry, inode); + return 0; +out_err: + printk(KERN_WARNING "%s: %s failed to allocate inode for dentry %s\n", + __FILE__, __func__, dentry->d_name.name); + dput(dentry); + return -ENOMEM; } -static int -rpc_populate(struct dentry *parent, - struct rpc_filelist *files, - int start, int eof) +static int __rpc_create(struct inode *dir, struct dentry *dentry, + umode_t mode, + const struct file_operations *i_fop, + void *private) { - struct inode *inode, *dir = parent->d_inode; - void *private = RPC_I(dir)->private; - struct dentry *dentry; - int mode, i; + int err; - mutex_lock(&dir->i_mutex); - for (i = start; i < eof; i++) { - dentry = d_alloc_name(parent, files[i].name); - if (!dentry) - goto out_bad; - dentry->d_op = &rpc_dentry_operations; - mode = files[i].mode; - inode = rpc_get_inode(dir->i_sb, mode); - if (!inode) { - dput(dentry); - goto out_bad; - } - inode->i_ino = i; - if (files[i].i_fop) - inode->i_fop = files[i].i_fop; - if (private) - rpc_inode_setowner(inode, private); - if (S_ISDIR(mode)) - inc_nlink(dir); - d_add(dentry, inode); - fsnotify_create(dir, dentry); - } - mutex_unlock(&dir->i_mutex); + err = __rpc_create_common(dir, dentry, S_IFREG | mode, i_fop, private); + if (err) + return err; + fsnotify_create(dir, dentry); return 0; -out_bad: - mutex_unlock(&dir->i_mutex); - printk(KERN_WARNING "%s: %s failed to populate directory %s\n", - __FILE__, __func__, parent->d_name.name); - return -ENOMEM; } -static int -__rpc_mkdir(struct inode *dir, struct dentry *dentry) +static int __rpc_mkdir(struct inode *dir, struct dentry *dentry, + umode_t mode, + const struct file_operations *i_fop, + void *private) { - struct inode *inode; + int err; - inode = rpc_get_inode(dir->i_sb, S_IFDIR | S_IRUGO | S_IXUGO); - if (!inode) - goto out_err; - inode->i_ino = iunique(dir->i_sb, 100); - d_instantiate(dentry, inode); + err = __rpc_create_common(dir, dentry, S_IFDIR | mode, i_fop, private); + if (err) + return err; inc_nlink(dir); fsnotify_mkdir(dir, dentry); return 0; -out_err: - printk(KERN_WARNING "%s: %s failed to allocate inode for dentry %s\n", - __FILE__, __func__, dentry->d_name.name); - return -ENOMEM; } -static int -__rpc_rmdir(struct inode *dir, struct dentry *dentry) +static int __rpc_mkpipe(struct inode *dir, struct dentry *dentry, + umode_t mode, + const struct file_operations *i_fop, + void *private, + const struct rpc_pipe_ops *ops, + int flags) { - int error; - error = simple_rmdir(dir, dentry); - if (!error) - d_delete(dentry); - return error; + struct rpc_inode *rpci; + int err; + + err = __rpc_create_common(dir, dentry, S_IFIFO | mode, i_fop, private); + if (err) + return err; + rpci = RPC_I(dentry->d_inode); + rpci->nkern_readwriters = 1; + rpci->private = private; + rpci->flags = flags; + rpci->ops = ops; + fsnotify_create(dir, dentry); + return 0; } -static struct dentry * -rpc_lookup_create(struct dentry *parent, const char *name, int len, int exclusive) +static int __rpc_rmdir(struct inode *dir, struct dentry *dentry) +{ + int ret; + + dget(dentry); + ret = simple_rmdir(dir, dentry); + d_delete(dentry); + dput(dentry); + return ret; +} + +static int __rpc_unlink(struct inode *dir, struct dentry *dentry) +{ + int ret; + + dget(dentry); + ret = simple_unlink(dir, dentry); + d_delete(dentry); + dput(dentry); + return ret; +} + +static int __rpc_rmpipe(struct inode *dir, struct dentry *dentry) +{ + struct inode *inode = dentry->d_inode; + struct rpc_inode *rpci = RPC_I(inode); + + rpci->nkern_readwriters--; + if (rpci->nkern_readwriters != 0) + return 0; + rpc_close_pipes(inode); + return __rpc_unlink(dir, dentry); +} + +static struct dentry *__rpc_lookup_create(struct dentry *parent, + struct qstr *name) { - struct inode *dir = parent->d_inode; struct dentry *dentry; - mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); - dentry = lookup_one_len(name, parent, len); - if (IS_ERR(dentry)) - goto out_err; + dentry = d_lookup(parent, name); + if (!dentry) { + dentry = d_alloc(parent, name); + if (!dentry) { + dentry = ERR_PTR(-ENOMEM); + goto out_err; + } + } if (!dentry->d_inode) dentry->d_op = &rpc_dentry_operations; - else if (exclusive) { - dput(dentry); - dentry = ERR_PTR(-EEXIST); - goto out_err; - } - return dentry; out_err: - mutex_unlock(&dir->i_mutex); return dentry; } -static struct dentry * -rpc_lookup_negative(char *path, struct nameidata *nd) +static struct dentry *__rpc_lookup_create_exclusive(struct dentry *parent, + struct qstr *name) { struct dentry *dentry; - int error; - if ((error = rpc_lookup_parent(path, nd)) != 0) - return ERR_PTR(error); - dentry = rpc_lookup_create(nd->path.dentry, nd->last.name, nd->last.len, - 1); - if (IS_ERR(dentry)) - rpc_release_path(nd); - return dentry; + dentry = __rpc_lookup_create(parent, name); + if (dentry->d_inode == NULL) + return dentry; + dput(dentry); + return ERR_PTR(-EEXIST); } -/** - * rpc_mkdir - Create a new directory in rpc_pipefs - * @path: path from the rpc_pipefs root to the new directory - * @rpc_client: rpc client to associate with this directory - * - * This creates a directory at the given @path associated with - * @rpc_clnt, which will contain a file named "info" with some basic - * information about the client, together with any "pipes" that may - * later be created using rpc_mkpipe(). +/* + * FIXME: This probably has races. */ -struct dentry * -rpc_mkdir(char *path, struct rpc_clnt *rpc_client) +static void __rpc_depopulate(struct dentry *parent, + const struct rpc_filelist *files, + int start, int eof) { - struct nameidata nd; + struct inode *dir = parent->d_inode; struct dentry *dentry; - struct inode *dir; + struct qstr name; + int i; + + for (i = start; i < eof; i++) { + name.name = files[i].name; + name.len = strlen(files[i].name); + name.hash = full_name_hash(name.name, name.len); + dentry = d_lookup(parent, &name); + + if (dentry == NULL) + continue; + if (dentry->d_inode == NULL) + goto next; + switch (dentry->d_inode->i_mode & S_IFMT) { + default: + BUG(); + case S_IFREG: + __rpc_unlink(dir, dentry); + break; + case S_IFDIR: + __rpc_rmdir(dir, dentry); + } +next: + dput(dentry); + } +} + +static void rpc_depopulate(struct dentry *parent, + const struct rpc_filelist *files, + int start, int eof) +{ + struct inode *dir = parent->d_inode; + + mutex_lock_nested(&dir->i_mutex, I_MUTEX_CHILD); + __rpc_depopulate(parent, files, start, eof); + mutex_unlock(&dir->i_mutex); +} + +static int rpc_populate(struct dentry *parent, + const struct rpc_filelist *files, + int start, int eof, + void *private) +{ + struct inode *dir = parent->d_inode; + struct dentry *dentry; + int i, err; + + mutex_lock(&dir->i_mutex); + for (i = start; i < eof; i++) { + struct qstr q; + + q.name = files[i].name; + q.len = strlen(files[i].name); + q.hash = full_name_hash(q.name, q.len); + dentry = __rpc_lookup_create_exclusive(parent, &q); + err = PTR_ERR(dentry); + if (IS_ERR(dentry)) + goto out_bad; + switch (files[i].mode & S_IFMT) { + default: + BUG(); + case S_IFREG: + err = __rpc_create(dir, dentry, + files[i].mode, + files[i].i_fop, + private); + break; + case S_IFDIR: + err = __rpc_mkdir(dir, dentry, + files[i].mode, + NULL, + private); + } + if (err != 0) + goto out_bad; + } + mutex_unlock(&dir->i_mutex); + return 0; +out_bad: + __rpc_depopulate(parent, files, start, eof); + mutex_unlock(&dir->i_mutex); + printk(KERN_WARNING "%s: %s failed to populate directory %s\n", + __FILE__, __func__, parent->d_name.name); + return err; +} + +static struct dentry *rpc_mkdir_populate(struct dentry *parent, + struct qstr *name, umode_t mode, void *private, + int (*populate)(struct dentry *, void *), void *args_populate) +{ + struct dentry *dentry; + struct inode *dir = parent->d_inode; int error; - dentry = rpc_lookup_negative(path, &nd); + mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); + dentry = __rpc_lookup_create_exclusive(parent, name); if (IS_ERR(dentry)) - return dentry; - dir = nd.path.dentry->d_inode; - if ((error = __rpc_mkdir(dir, dentry)) != 0) - goto err_dput; - RPC_I(dentry->d_inode)->private = rpc_client; - error = rpc_populate(dentry, authfiles, - RPCAUTH_info, RPCAUTH_EOF); - if (error) - goto err_depopulate; - dget(dentry); + goto out; + error = __rpc_mkdir(dir, dentry, mode, NULL, private); + if (error != 0) + goto out_err; + if (populate != NULL) { + error = populate(dentry, args_populate); + if (error) + goto err_rmdir; + } out: mutex_unlock(&dir->i_mutex); - rpc_release_path(&nd); return dentry; -err_depopulate: - rpc_depopulate(dentry, RPCAUTH_info, RPCAUTH_EOF); +err_rmdir: __rpc_rmdir(dir, dentry); -err_dput: - dput(dentry); - printk(KERN_WARNING "%s: %s() failed to create directory %s (errno = %d)\n", - __FILE__, __func__, path, error); +out_err: dentry = ERR_PTR(error); goto out; } -/** - * rpc_rmdir - Remove a directory created with rpc_mkdir() - * @dentry: directory to remove - */ -int -rpc_rmdir(struct dentry *dentry) +static int rpc_rmdir_depopulate(struct dentry *dentry, + void (*depopulate)(struct dentry *)) { struct dentry *parent; struct inode *dir; @@ -748,9 +728,9 @@ rpc_rmdir(struct dentry *dentry) parent = dget_parent(dentry); dir = parent->d_inode; mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); - rpc_depopulate(dentry, RPCAUTH_info, RPCAUTH_EOF); + if (depopulate != NULL) + depopulate(dentry); error = __rpc_rmdir(dir, dentry); - dput(dentry); mutex_unlock(&dir->i_mutex); dput(parent); return error; @@ -776,50 +756,54 @@ rpc_rmdir(struct dentry *dentry) * The @private argument passed here will be available to all these methods * from the file pointer, via RPC_I(file->f_dentry->d_inode)->private. */ -struct dentry * -rpc_mkpipe(struct dentry *parent, const char *name, void *private, struct rpc_pipe_ops *ops, int flags) +struct dentry *rpc_mkpipe(struct dentry *parent, const char *name, + void *private, const struct rpc_pipe_ops *ops, + int flags) { struct dentry *dentry; - struct inode *dir, *inode; - struct rpc_inode *rpci; + struct inode *dir = parent->d_inode; + umode_t umode = S_IFIFO | S_IRUSR | S_IWUSR; + struct qstr q; + int err; + + if (ops->upcall == NULL) + umode &= ~S_IRUGO; + if (ops->downcall == NULL) + umode &= ~S_IWUGO; + + q.name = name; + q.len = strlen(name); + q.hash = full_name_hash(q.name, q.len), - dentry = rpc_lookup_create(parent, name, strlen(name), 0); + mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); + dentry = __rpc_lookup_create(parent, &q); if (IS_ERR(dentry)) - return dentry; - dir = parent->d_inode; + goto out; if (dentry->d_inode) { - rpci = RPC_I(dentry->d_inode); + struct rpc_inode *rpci = RPC_I(dentry->d_inode); if (rpci->private != private || rpci->ops != ops || rpci->flags != flags) { dput (dentry); - dentry = ERR_PTR(-EBUSY); + err = -EBUSY; + goto out_err; } rpci->nkern_readwriters++; goto out; } - inode = rpc_get_inode(dir->i_sb, S_IFIFO | S_IRUSR | S_IWUSR); - if (!inode) - goto err_dput; - inode->i_ino = iunique(dir->i_sb, 100); - inode->i_fop = &rpc_pipe_fops; - d_instantiate(dentry, inode); - rpci = RPC_I(inode); - rpci->private = private; - rpci->flags = flags; - rpci->ops = ops; - rpci->nkern_readwriters = 1; - fsnotify_create(dir, dentry); - dget(dentry); + + err = __rpc_mkpipe(dir, dentry, umode, &rpc_pipe_fops, + private, ops, flags); + if (err) + goto out_err; out: mutex_unlock(&dir->i_mutex); return dentry; -err_dput: - dput(dentry); - dentry = ERR_PTR(-ENOMEM); +out_err: + dentry = ERR_PTR(err); printk(KERN_WARNING "%s: %s() failed to create pipe %s/%s (errno = %d)\n", __FILE__, __func__, parent->d_name.name, name, - -ENOMEM); + err); goto out; } EXPORT_SYMBOL_GPL(rpc_mkpipe); @@ -842,19 +826,107 @@ rpc_unlink(struct dentry *dentry) parent = dget_parent(dentry); dir = parent->d_inode; mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); - if (--RPC_I(dentry->d_inode)->nkern_readwriters == 0) { - rpc_close_pipes(dentry->d_inode); - error = simple_unlink(dir, dentry); - if (!error) - d_delete(dentry); - } - dput(dentry); + error = __rpc_rmpipe(dir, dentry); mutex_unlock(&dir->i_mutex); dput(parent); return error; } EXPORT_SYMBOL_GPL(rpc_unlink); +enum { + RPCAUTH_info, + RPCAUTH_EOF +}; + +static const struct rpc_filelist authfiles[] = { + [RPCAUTH_info] = { + .name = "info", + .i_fop = &rpc_info_operations, + .mode = S_IFREG | S_IRUSR, + }, +}; + +static int rpc_clntdir_populate(struct dentry *dentry, void *private) +{ + return rpc_populate(dentry, + authfiles, RPCAUTH_info, RPCAUTH_EOF, + private); +} + +static void rpc_clntdir_depopulate(struct dentry *dentry) +{ + rpc_depopulate(dentry, authfiles, RPCAUTH_info, RPCAUTH_EOF); +} + +/** + * rpc_create_client_dir - Create a new rpc_client directory in rpc_pipefs + * @path: path from the rpc_pipefs root to the new directory + * @rpc_client: rpc client to associate with this directory + * + * This creates a directory at the given @path associated with + * @rpc_clnt, which will contain a file named "info" with some basic + * information about the client, together with any "pipes" that may + * later be created using rpc_mkpipe(). + */ +struct dentry *rpc_create_client_dir(struct dentry *dentry, + struct qstr *name, + struct rpc_clnt *rpc_client) +{ + return rpc_mkdir_populate(dentry, name, S_IRUGO | S_IXUGO, NULL, + rpc_clntdir_populate, rpc_client); +} + +/** + * rpc_remove_client_dir - Remove a directory created with rpc_create_client_dir() + * @dentry: directory to remove + */ +int rpc_remove_client_dir(struct dentry *dentry) +{ + return rpc_rmdir_depopulate(dentry, rpc_clntdir_depopulate); +} + +static const struct rpc_filelist cache_pipefs_files[3] = { + [0] = { + .name = "channel", + .i_fop = &cache_file_operations_pipefs, + .mode = S_IFREG|S_IRUSR|S_IWUSR, + }, + [1] = { + .name = "content", + .i_fop = &content_file_operations_pipefs, + .mode = S_IFREG|S_IRUSR, + }, + [2] = { + .name = "flush", + .i_fop = &cache_flush_operations_pipefs, + .mode = S_IFREG|S_IRUSR|S_IWUSR, + }, +}; + +static int rpc_cachedir_populate(struct dentry *dentry, void *private) +{ + return rpc_populate(dentry, + cache_pipefs_files, 0, 3, + private); +} + +static void rpc_cachedir_depopulate(struct dentry *dentry) +{ + rpc_depopulate(dentry, cache_pipefs_files, 0, 3); +} + +struct dentry *rpc_create_cache_dir(struct dentry *parent, struct qstr *name, + mode_t umode, struct cache_detail *cd) +{ + return rpc_mkdir_populate(parent, name, umode, NULL, + rpc_cachedir_populate, cd); +} + +void rpc_remove_cache_dir(struct dentry *dentry) +{ + rpc_rmdir_depopulate(dentry, rpc_cachedir_depopulate); +} + /* * populate the filesystem */ @@ -866,6 +938,51 @@ static struct super_operations s_ops = { #define RPCAUTH_GSSMAGIC 0x67596969 +/* + * We have a single directory with 1 node in it. + */ +enum { + RPCAUTH_lockd, + RPCAUTH_mount, + RPCAUTH_nfs, + RPCAUTH_portmap, + RPCAUTH_statd, + RPCAUTH_nfsd4_cb, + RPCAUTH_cache, + RPCAUTH_RootEOF +}; + +static const struct rpc_filelist files[] = { + [RPCAUTH_lockd] = { + .name = "lockd", + .mode = S_IFDIR | S_IRUGO | S_IXUGO, + }, + [RPCAUTH_mount] = { + .name = "mount", + .mode = S_IFDIR | S_IRUGO | S_IXUGO, + }, + [RPCAUTH_nfs] = { + .name = "nfs", + .mode = S_IFDIR | S_IRUGO | S_IXUGO, + }, + [RPCAUTH_portmap] = { + .name = "portmap", + .mode = S_IFDIR | S_IRUGO | S_IXUGO, + }, + [RPCAUTH_statd] = { + .name = "statd", + .mode = S_IFDIR | S_IRUGO | S_IXUGO, + }, + [RPCAUTH_nfsd4_cb] = { + .name = "nfsd4_cb", + .mode = S_IFDIR | S_IRUGO | S_IXUGO, + }, + [RPCAUTH_cache] = { + .name = "cache", + .mode = S_IFDIR | S_IRUGO | S_IXUGO, + }, +}; + static int rpc_fill_super(struct super_block *sb, void *data, int silent) { @@ -886,7 +1003,7 @@ rpc_fill_super(struct super_block *sb, void *data, int silent) iput(inode); return -ENOMEM; } - if (rpc_populate(root, files, RPCAUTH_Root + 1, RPCAUTH_RootEOF)) + if (rpc_populate(root, files, RPCAUTH_lockd, RPCAUTH_RootEOF, NULL)) goto out; sb->s_root = root; return 0; diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index beee6da33035..830faf4d9997 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -75,6 +75,37 @@ enum { #define RPCB_OWNER_STRING "0" #define RPCB_MAXOWNERLEN sizeof(RPCB_OWNER_STRING) +/* + * XDR data type sizes + */ +#define RPCB_program_sz (1) +#define RPCB_version_sz (1) +#define RPCB_protocol_sz (1) +#define RPCB_port_sz (1) +#define RPCB_boolean_sz (1) + +#define RPCB_netid_sz (1 + XDR_QUADLEN(RPCBIND_MAXNETIDLEN)) +#define RPCB_addr_sz (1 + XDR_QUADLEN(RPCBIND_MAXUADDRLEN)) +#define RPCB_ownerstring_sz (1 + XDR_QUADLEN(RPCB_MAXOWNERLEN)) + +/* + * XDR argument and result sizes + */ +#define RPCB_mappingargs_sz (RPCB_program_sz + RPCB_version_sz + \ + RPCB_protocol_sz + RPCB_port_sz) +#define RPCB_getaddrargs_sz (RPCB_program_sz + RPCB_version_sz + \ + RPCB_netid_sz + RPCB_addr_sz + \ + RPCB_ownerstring_sz) + +#define RPCB_getportres_sz RPCB_port_sz +#define RPCB_setres_sz RPCB_boolean_sz + +/* + * Note that RFC 1833 does not put any size restrictions on the + * address string returned by the remote rpcbind database. + */ +#define RPCB_getaddrres_sz RPCB_addr_sz + static void rpcb_getport_done(struct rpc_task *, void *); static void rpcb_map_release(void *data); static struct rpc_program rpcb_program; @@ -122,6 +153,7 @@ static void rpcb_map_release(void *data) rpcb_wake_rpcbind_waiters(map->r_xprt, map->r_status); xprt_put(map->r_xprt); + kfree(map->r_addr); kfree(map); } @@ -268,12 +300,9 @@ static int rpcb_register_inet4(const struct sockaddr *sap, const struct sockaddr_in *sin = (const struct sockaddr_in *)sap; struct rpcbind_args *map = msg->rpc_argp; unsigned short port = ntohs(sin->sin_port); - char buf[32]; + int result; - /* Construct AF_INET universal address */ - snprintf(buf, sizeof(buf), "%pI4.%u.%u", - &sin->sin_addr.s_addr, port >> 8, port & 0xff); - map->r_addr = buf; + map->r_addr = rpc_sockaddr2uaddr(sap); dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " "local rpcbind\n", (port ? "" : "un"), @@ -284,7 +313,9 @@ static int rpcb_register_inet4(const struct sockaddr *sap, if (port) msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; - return rpcb_register_call(RPCBVERS_4, msg); + result = rpcb_register_call(RPCBVERS_4, msg); + kfree(map->r_addr); + return result; } /* @@ -296,16 +327,9 @@ static int rpcb_register_inet6(const struct sockaddr *sap, const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)sap; struct rpcbind_args *map = msg->rpc_argp; unsigned short port = ntohs(sin6->sin6_port); - char buf[64]; + int result; - /* Construct AF_INET6 universal address */ - if (ipv6_addr_any(&sin6->sin6_addr)) - snprintf(buf, sizeof(buf), "::.%u.%u", - port >> 8, port & 0xff); - else - snprintf(buf, sizeof(buf), "%pI6.%u.%u", - &sin6->sin6_addr, port >> 8, port & 0xff); - map->r_addr = buf; + map->r_addr = rpc_sockaddr2uaddr(sap); dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " "local rpcbind\n", (port ? "" : "un"), @@ -316,7 +340,9 @@ static int rpcb_register_inet6(const struct sockaddr *sap, if (port) msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; - return rpcb_register_call(RPCBVERS_4, msg); + result = rpcb_register_call(RPCBVERS_4, msg); + kfree(map->r_addr); + return result; } static int rpcb_unregister_all_protofamilies(struct rpc_message *msg) @@ -428,7 +454,7 @@ int rpcb_getport_sync(struct sockaddr_in *sin, u32 prog, u32 vers, int prot) struct rpc_message msg = { .rpc_proc = &rpcb_procedures2[RPCBPROC_GETPORT], .rpc_argp = &map, - .rpc_resp = &map.r_port, + .rpc_resp = &map, }; struct rpc_clnt *rpcb_clnt; int status; @@ -458,7 +484,7 @@ static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbi struct rpc_message msg = { .rpc_proc = proc, .rpc_argp = map, - .rpc_resp = &map->r_port, + .rpc_resp = map, }; struct rpc_task_setup task_setup_data = { .rpc_client = rpcb_clnt, @@ -539,6 +565,7 @@ void rpcb_getport_async(struct rpc_task *task) goto bailout_nofree; } + /* Parent transport's destination address */ salen = rpc_peeraddr(clnt, sap, sizeof(addr)); /* Don't ever use rpcbind v2 for AF_INET6 requests */ @@ -589,11 +616,22 @@ void rpcb_getport_async(struct rpc_task *task) map->r_prot = xprt->prot; map->r_port = 0; map->r_xprt = xprt_get(xprt); - map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID); - map->r_addr = rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR); - map->r_owner = ""; map->r_status = -EIO; + switch (bind_version) { + case RPCBVERS_4: + case RPCBVERS_3: + map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID); + map->r_addr = rpc_sockaddr2uaddr(sap); + map->r_owner = ""; + break; + case RPCBVERS_2: + map->r_addr = NULL; + break; + default: + BUG(); + } + child = rpcb_call_async(rpcb_clnt, map, proc); rpc_release_client(rpcb_clnt); if (IS_ERR(child)) { @@ -656,176 +694,278 @@ static void rpcb_getport_done(struct rpc_task *child, void *data) * XDR functions for rpcbind */ -static int rpcb_encode_mapping(struct rpc_rqst *req, __be32 *p, - struct rpcbind_args *rpcb) +static int rpcb_enc_mapping(struct rpc_rqst *req, __be32 *p, + const struct rpcbind_args *rpcb) { - dprintk("RPC: encoding rpcb request (%u, %u, %d, %u)\n", + struct rpc_task *task = req->rq_task; + struct xdr_stream xdr; + + dprintk("RPC: %5u encoding PMAP_%s call (%u, %u, %d, %u)\n", + task->tk_pid, task->tk_msg.rpc_proc->p_name, rpcb->r_prog, rpcb->r_vers, rpcb->r_prot, rpcb->r_port); + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + + p = xdr_reserve_space(&xdr, sizeof(__be32) * RPCB_mappingargs_sz); + if (unlikely(p == NULL)) + return -EIO; + *p++ = htonl(rpcb->r_prog); *p++ = htonl(rpcb->r_vers); *p++ = htonl(rpcb->r_prot); - *p++ = htonl(rpcb->r_port); + *p = htonl(rpcb->r_port); - req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); return 0; } -static int rpcb_decode_getport(struct rpc_rqst *req, __be32 *p, - unsigned short *portp) +static int rpcb_dec_getport(struct rpc_rqst *req, __be32 *p, + struct rpcbind_args *rpcb) { - *portp = (unsigned short) ntohl(*p++); - dprintk("RPC: rpcb getport result: %u\n", - *portp); + struct rpc_task *task = req->rq_task; + struct xdr_stream xdr; + unsigned long port; + + xdr_init_decode(&xdr, &req->rq_rcv_buf, p); + + rpcb->r_port = 0; + + p = xdr_inline_decode(&xdr, sizeof(__be32)); + if (unlikely(p == NULL)) + return -EIO; + + port = ntohl(*p); + dprintk("RPC: %5u PMAP_%s result: %lu\n", task->tk_pid, + task->tk_msg.rpc_proc->p_name, port); + if (unlikely(port > USHORT_MAX)) + return -EIO; + + rpcb->r_port = port; return 0; } -static int rpcb_decode_set(struct rpc_rqst *req, __be32 *p, - unsigned int *boolp) +static int rpcb_dec_set(struct rpc_rqst *req, __be32 *p, + unsigned int *boolp) { - *boolp = (unsigned int) ntohl(*p++); - dprintk("RPC: rpcb set/unset call %s\n", + struct rpc_task *task = req->rq_task; + struct xdr_stream xdr; + + xdr_init_decode(&xdr, &req->rq_rcv_buf, p); + + p = xdr_inline_decode(&xdr, sizeof(__be32)); + if (unlikely(p == NULL)) + return -EIO; + + *boolp = 0; + if (*p) + *boolp = 1; + + dprintk("RPC: %5u RPCB_%s call %s\n", + task->tk_pid, task->tk_msg.rpc_proc->p_name, (*boolp ? "succeeded" : "failed")); return 0; } -static int rpcb_encode_getaddr(struct rpc_rqst *req, __be32 *p, - struct rpcbind_args *rpcb) +static int encode_rpcb_string(struct xdr_stream *xdr, const char *string, + const u32 maxstrlen) { - dprintk("RPC: encoding rpcb request (%u, %u, %s)\n", - rpcb->r_prog, rpcb->r_vers, rpcb->r_addr); - *p++ = htonl(rpcb->r_prog); - *p++ = htonl(rpcb->r_vers); + u32 len; + __be32 *p; - p = xdr_encode_string(p, rpcb->r_netid); - p = xdr_encode_string(p, rpcb->r_addr); - p = xdr_encode_string(p, rpcb->r_owner); + if (unlikely(string == NULL)) + return -EIO; + len = strlen(string); + if (unlikely(len > maxstrlen)) + return -EIO; - req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + p = xdr_reserve_space(xdr, sizeof(__be32) + len); + if (unlikely(p == NULL)) + return -EIO; + xdr_encode_opaque(p, string, len); return 0; } -static int rpcb_decode_getaddr(struct rpc_rqst *req, __be32 *p, - unsigned short *portp) +static int rpcb_enc_getaddr(struct rpc_rqst *req, __be32 *p, + const struct rpcbind_args *rpcb) { - char *addr; - u32 addr_len; - int c, i, f, first, val; + struct rpc_task *task = req->rq_task; + struct xdr_stream xdr; - *portp = 0; - addr_len = ntohl(*p++); + dprintk("RPC: %5u encoding RPCB_%s call (%u, %u, '%s', '%s')\n", + task->tk_pid, task->tk_msg.rpc_proc->p_name, + rpcb->r_prog, rpcb->r_vers, + rpcb->r_netid, rpcb->r_addr); - if (addr_len == 0) { - dprintk("RPC: rpcb_decode_getaddr: " - "service is not registered\n"); - return 0; - } + xdr_init_encode(&xdr, &req->rq_snd_buf, p); - /* - * Simple sanity check. - */ - if (addr_len > RPCBIND_MAXUADDRLEN) - goto out_err; - - /* - * Start at the end and walk backwards until the first dot - * is encountered. When the second dot is found, we have - * both parts of the port number. - */ - addr = (char *)p; - val = 0; - first = 1; - f = 1; - for (i = addr_len - 1; i > 0; i--) { - c = addr[i]; - if (c >= '0' && c <= '9') { - val += (c - '0') * f; - f *= 10; - } else if (c == '.') { - if (first) { - *portp = val; - val = first = 0; - f = 1; - } else { - *portp |= (val << 8); - break; - } - } - } + p = xdr_reserve_space(&xdr, + sizeof(__be32) * (RPCB_program_sz + RPCB_version_sz)); + if (unlikely(p == NULL)) + return -EIO; + *p++ = htonl(rpcb->r_prog); + *p = htonl(rpcb->r_vers); - /* - * Simple sanity check. If we never saw a dot in the reply, - * then this was probably just garbage. - */ - if (first) - goto out_err; + if (encode_rpcb_string(&xdr, rpcb->r_netid, RPCBIND_MAXNETIDLEN)) + return -EIO; + if (encode_rpcb_string(&xdr, rpcb->r_addr, RPCBIND_MAXUADDRLEN)) + return -EIO; + if (encode_rpcb_string(&xdr, rpcb->r_owner, RPCB_MAXOWNERLEN)) + return -EIO; - dprintk("RPC: rpcb_decode_getaddr port=%u\n", *portp); return 0; - -out_err: - dprintk("RPC: rpcbind server returned malformed reply\n"); - return -EIO; } -#define RPCB_program_sz (1u) -#define RPCB_version_sz (1u) -#define RPCB_protocol_sz (1u) -#define RPCB_port_sz (1u) -#define RPCB_boolean_sz (1u) +static int rpcb_dec_getaddr(struct rpc_rqst *req, __be32 *p, + struct rpcbind_args *rpcb) +{ + struct sockaddr_storage address; + struct sockaddr *sap = (struct sockaddr *)&address; + struct rpc_task *task = req->rq_task; + struct xdr_stream xdr; + u32 len; -#define RPCB_netid_sz (1+XDR_QUADLEN(RPCBIND_MAXNETIDLEN)) -#define RPCB_addr_sz (1+XDR_QUADLEN(RPCBIND_MAXUADDRLEN)) -#define RPCB_ownerstring_sz (1+XDR_QUADLEN(RPCB_MAXOWNERLEN)) + rpcb->r_port = 0; -#define RPCB_mappingargs_sz RPCB_program_sz+RPCB_version_sz+ \ - RPCB_protocol_sz+RPCB_port_sz -#define RPCB_getaddrargs_sz RPCB_program_sz+RPCB_version_sz+ \ - RPCB_netid_sz+RPCB_addr_sz+ \ - RPCB_ownerstring_sz + xdr_init_decode(&xdr, &req->rq_rcv_buf, p); -#define RPCB_setres_sz RPCB_boolean_sz -#define RPCB_getportres_sz RPCB_port_sz - -/* - * Note that RFC 1833 does not put any size restrictions on the - * address string returned by the remote rpcbind database. - */ -#define RPCB_getaddrres_sz RPCB_addr_sz + p = xdr_inline_decode(&xdr, sizeof(__be32)); + if (unlikely(p == NULL)) + goto out_fail; + len = ntohl(*p); -#define PROC(proc, argtype, restype) \ - [RPCBPROC_##proc] = { \ - .p_proc = RPCBPROC_##proc, \ - .p_encode = (kxdrproc_t) rpcb_encode_##argtype, \ - .p_decode = (kxdrproc_t) rpcb_decode_##restype, \ - .p_arglen = RPCB_##argtype##args_sz, \ - .p_replen = RPCB_##restype##res_sz, \ - .p_statidx = RPCBPROC_##proc, \ - .p_timer = 0, \ - .p_name = #proc, \ + /* + * If the returned universal address is a null string, + * the requested RPC service was not registered. + */ + if (len == 0) { + dprintk("RPC: %5u RPCB reply: program not registered\n", + task->tk_pid); + return 0; } + if (unlikely(len > RPCBIND_MAXUADDRLEN)) + goto out_fail; + + p = xdr_inline_decode(&xdr, len); + if (unlikely(p == NULL)) + goto out_fail; + dprintk("RPC: %5u RPCB_%s reply: %s\n", task->tk_pid, + task->tk_msg.rpc_proc->p_name, (char *)p); + + if (rpc_uaddr2sockaddr((char *)p, len, sap, sizeof(address)) == 0) + goto out_fail; + rpcb->r_port = rpc_get_port(sap); + + return 0; + +out_fail: + dprintk("RPC: %5u malformed RPCB_%s reply\n", + task->tk_pid, task->tk_msg.rpc_proc->p_name); + return -EIO; +} + /* * Not all rpcbind procedures described in RFC 1833 are implemented * since the Linux kernel RPC code requires only these. */ + static struct rpc_procinfo rpcb_procedures2[] = { - PROC(SET, mapping, set), - PROC(UNSET, mapping, set), - PROC(GETPORT, mapping, getport), + [RPCBPROC_SET] = { + .p_proc = RPCBPROC_SET, + .p_encode = (kxdrproc_t)rpcb_enc_mapping, + .p_decode = (kxdrproc_t)rpcb_dec_set, + .p_arglen = RPCB_mappingargs_sz, + .p_replen = RPCB_setres_sz, + .p_statidx = RPCBPROC_SET, + .p_timer = 0, + .p_name = "SET", + }, + [RPCBPROC_UNSET] = { + .p_proc = RPCBPROC_UNSET, + .p_encode = (kxdrproc_t)rpcb_enc_mapping, + .p_decode = (kxdrproc_t)rpcb_dec_set, + .p_arglen = RPCB_mappingargs_sz, + .p_replen = RPCB_setres_sz, + .p_statidx = RPCBPROC_UNSET, + .p_timer = 0, + .p_name = "UNSET", + }, + [RPCBPROC_GETPORT] = { + .p_proc = RPCBPROC_GETPORT, + .p_encode = (kxdrproc_t)rpcb_enc_mapping, + .p_decode = (kxdrproc_t)rpcb_dec_getport, + .p_arglen = RPCB_mappingargs_sz, + .p_replen = RPCB_getportres_sz, + .p_statidx = RPCBPROC_GETPORT, + .p_timer = 0, + .p_name = "GETPORT", + }, }; static struct rpc_procinfo rpcb_procedures3[] = { - PROC(SET, getaddr, set), - PROC(UNSET, getaddr, set), - PROC(GETADDR, getaddr, getaddr), + [RPCBPROC_SET] = { + .p_proc = RPCBPROC_SET, + .p_encode = (kxdrproc_t)rpcb_enc_getaddr, + .p_decode = (kxdrproc_t)rpcb_dec_set, + .p_arglen = RPCB_getaddrargs_sz, + .p_replen = RPCB_setres_sz, + .p_statidx = RPCBPROC_SET, + .p_timer = 0, + .p_name = "SET", + }, + [RPCBPROC_UNSET] = { + .p_proc = RPCBPROC_UNSET, + .p_encode = (kxdrproc_t)rpcb_enc_getaddr, + .p_decode = (kxdrproc_t)rpcb_dec_set, + .p_arglen = RPCB_getaddrargs_sz, + .p_replen = RPCB_setres_sz, + .p_statidx = RPCBPROC_UNSET, + .p_timer = 0, + .p_name = "UNSET", + }, + [RPCBPROC_GETADDR] = { + .p_proc = RPCBPROC_GETADDR, + .p_encode = (kxdrproc_t)rpcb_enc_getaddr, + .p_decode = (kxdrproc_t)rpcb_dec_getaddr, + .p_arglen = RPCB_getaddrargs_sz, + .p_replen = RPCB_getaddrres_sz, + .p_statidx = RPCBPROC_GETADDR, + .p_timer = 0, + .p_name = "GETADDR", + }, }; static struct rpc_procinfo rpcb_procedures4[] = { - PROC(SET, getaddr, set), - PROC(UNSET, getaddr, set), - PROC(GETADDR, getaddr, getaddr), - PROC(GETVERSADDR, getaddr, getaddr), + [RPCBPROC_SET] = { + .p_proc = RPCBPROC_SET, + .p_encode = (kxdrproc_t)rpcb_enc_getaddr, + .p_decode = (kxdrproc_t)rpcb_dec_set, + .p_arglen = RPCB_getaddrargs_sz, + .p_replen = RPCB_setres_sz, + .p_statidx = RPCBPROC_SET, + .p_timer = 0, + .p_name = "SET", + }, + [RPCBPROC_UNSET] = { + .p_proc = RPCBPROC_UNSET, + .p_encode = (kxdrproc_t)rpcb_enc_getaddr, + .p_decode = (kxdrproc_t)rpcb_dec_set, + .p_arglen = RPCB_getaddrargs_sz, + .p_replen = RPCB_setres_sz, + .p_statidx = RPCBPROC_UNSET, + .p_timer = 0, + .p_name = "UNSET", + }, + [RPCBPROC_GETADDR] = { + .p_proc = RPCBPROC_GETADDR, + .p_encode = (kxdrproc_t)rpcb_enc_getaddr, + .p_decode = (kxdrproc_t)rpcb_dec_getaddr, + .p_arglen = RPCB_getaddrargs_sz, + .p_replen = RPCB_getaddrres_sz, + .p_statidx = RPCBPROC_GETADDR, + .p_timer = 0, + .p_name = "GETADDR", + }, }; static struct rpcb_info rpcb_next_version[] = { diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index ff50a0546865..8f459abe97cf 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -16,7 +16,6 @@ #include <linux/slab.h> #include <linux/mempool.h> #include <linux/smp.h> -#include <linux/smp_lock.h> #include <linux/spinlock.h> #include <linux/mutex.h> @@ -569,7 +568,7 @@ EXPORT_SYMBOL_GPL(rpc_delay); /* * Helper to call task->tk_ops->rpc_call_prepare */ -static void rpc_prepare_task(struct rpc_task *task) +void rpc_prepare_task(struct rpc_task *task) { task->tk_ops->rpc_call_prepare(task, task->tk_calldata); } diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c index 1ef6e46d9da2..1b4e6791ecf3 100644 --- a/net/sunrpc/stats.c +++ b/net/sunrpc/stats.c @@ -141,12 +141,14 @@ EXPORT_SYMBOL_GPL(rpc_free_iostats); void rpc_count_iostats(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; - struct rpc_iostats *stats = task->tk_client->cl_metrics; + struct rpc_iostats *stats; struct rpc_iostats *op_metrics; long rtt, execute, queue; - if (!stats || !req) + if (!task->tk_client || !task->tk_client->cl_metrics || !req) return; + + stats = task->tk_client->cl_metrics; op_metrics = &stats[task->tk_msg.rpc_proc->p_statidx]; op_metrics->om_ops++; @@ -154,7 +156,7 @@ void rpc_count_iostats(struct rpc_task *task) op_metrics->om_timeouts += task->tk_timeouts; op_metrics->om_bytes_sent += task->tk_bytes_sent; - op_metrics->om_bytes_recv += req->rq_received; + op_metrics->om_bytes_recv += req->rq_reply_bytes_recvd; queue = (long)req->rq_xtime - task->tk_start; if (queue < 0) diff --git a/net/sunrpc/sunrpc.h b/net/sunrpc/sunrpc.h new file mode 100644 index 000000000000..5d9dd742264b --- /dev/null +++ b/net/sunrpc/sunrpc.h @@ -0,0 +1,37 @@ +/****************************************************************************** + +(c) 2008 NetApp. All Rights Reserved. + +NetApp provides this source code under the GPL v2 License. +The GPL v2 license is available at +http://opensource.org/licenses/gpl-license.php. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +******************************************************************************/ + +/* + * Functions and macros used internally by RPC + */ + +#ifndef _NET_SUNRPC_SUNRPC_H +#define _NET_SUNRPC_SUNRPC_H + +static inline int rpc_reply_expected(struct rpc_task *task) +{ + return (task->tk_msg.rpc_proc != NULL) && + (task->tk_msg.rpc_proc->p_decode != NULL); +} + +#endif /* _NET_SUNRPC_SUNRPC_H */ + diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 843629f55763..8cce92189019 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -66,7 +66,8 @@ cleanup_sunrpc(void) #ifdef CONFIG_PROC_FS rpc_proc_exit(); #endif + rcu_barrier(); /* Wait for completion of call_rcu()'s */ } MODULE_LICENSE("GPL"); -module_init(init_sunrpc); +fs_initcall(init_sunrpc); /* Ensure we're initialised before nfs */ module_exit(cleanup_sunrpc); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 5ed8931dfe98..952f206ff307 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -25,6 +25,7 @@ #include <linux/sunrpc/stats.h> #include <linux/sunrpc/svcsock.h> #include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/bc_xprt.h> #define RPCDBG_FACILITY RPCDBG_SVCDSP @@ -486,6 +487,10 @@ svc_destroy(struct svc_serv *serv) if (svc_serv_is_pooled(serv)) svc_pool_map_put(); +#if defined(CONFIG_NFS_V4_1) + svc_sock_destroy(serv->bc_xprt); +#endif /* CONFIG_NFS_V4_1 */ + svc_unregister(serv); kfree(serv->sv_pools); kfree(serv); @@ -970,20 +975,18 @@ svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) } /* - * Process the RPC request. + * Common routine for processing the RPC request. */ -int -svc_process(struct svc_rqst *rqstp) +static int +svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) { struct svc_program *progp; struct svc_version *versp = NULL; /* compiler food */ struct svc_procedure *procp = NULL; - struct kvec * argv = &rqstp->rq_arg.head[0]; - struct kvec * resv = &rqstp->rq_res.head[0]; struct svc_serv *serv = rqstp->rq_server; kxdrproc_t xdr; __be32 *statp; - u32 dir, prog, vers, proc; + u32 prog, vers, proc; __be32 auth_stat, rpc_stat; int auth_res; __be32 *reply_statp; @@ -993,19 +996,6 @@ svc_process(struct svc_rqst *rqstp) if (argv->iov_len < 6*4) goto err_short_len; - /* setup response xdr_buf. - * Initially it has just one page - */ - rqstp->rq_resused = 1; - resv->iov_base = page_address(rqstp->rq_respages[0]); - resv->iov_len = 0; - rqstp->rq_res.pages = rqstp->rq_respages + 1; - rqstp->rq_res.len = 0; - rqstp->rq_res.page_base = 0; - rqstp->rq_res.page_len = 0; - rqstp->rq_res.buflen = PAGE_SIZE; - rqstp->rq_res.tail[0].iov_base = NULL; - rqstp->rq_res.tail[0].iov_len = 0; /* Will be turned off only in gss privacy case: */ rqstp->rq_splice_ok = 1; /* Will be turned off only when NFSv4 Sessions are used */ @@ -1014,17 +1004,13 @@ svc_process(struct svc_rqst *rqstp) /* Setup reply header */ rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp); - rqstp->rq_xid = svc_getu32(argv); svc_putu32(resv, rqstp->rq_xid); - dir = svc_getnl(argv); vers = svc_getnl(argv); /* First words of reply: */ svc_putnl(resv, 1); /* REPLY */ - if (dir != 0) /* direction != CALL */ - goto err_bad_dir; if (vers != 2) /* RPC version number */ goto err_bad_rpc; @@ -1147,7 +1133,7 @@ svc_process(struct svc_rqst *rqstp) sendit: if (svc_authorise(rqstp)) goto dropit; - return svc_send(rqstp); + return 1; /* Caller can now send it */ dropit: svc_authorise(rqstp); /* doesn't hurt to call this twice */ @@ -1161,12 +1147,6 @@ err_short_len: goto dropit; /* drop request */ -err_bad_dir: - svc_printk(rqstp, "bad direction %d, dropping request\n", dir); - - serv->sv_stats->rpcbadfmt++; - goto dropit; /* drop request */ - err_bad_rpc: serv->sv_stats->rpcbadfmt++; svc_putnl(resv, 1); /* REJECT */ @@ -1220,6 +1200,100 @@ err_bad: EXPORT_SYMBOL_GPL(svc_process); /* + * Process the RPC request. + */ +int +svc_process(struct svc_rqst *rqstp) +{ + struct kvec *argv = &rqstp->rq_arg.head[0]; + struct kvec *resv = &rqstp->rq_res.head[0]; + struct svc_serv *serv = rqstp->rq_server; + u32 dir; + int error; + + /* + * Setup response xdr_buf. + * Initially it has just one page + */ + rqstp->rq_resused = 1; + resv->iov_base = page_address(rqstp->rq_respages[0]); + resv->iov_len = 0; + rqstp->rq_res.pages = rqstp->rq_respages + 1; + rqstp->rq_res.len = 0; + rqstp->rq_res.page_base = 0; + rqstp->rq_res.page_len = 0; + rqstp->rq_res.buflen = PAGE_SIZE; + rqstp->rq_res.tail[0].iov_base = NULL; + rqstp->rq_res.tail[0].iov_len = 0; + + rqstp->rq_xid = svc_getu32(argv); + + dir = svc_getnl(argv); + if (dir != 0) { + /* direction != CALL */ + svc_printk(rqstp, "bad direction %d, dropping request\n", dir); + serv->sv_stats->rpcbadfmt++; + svc_drop(rqstp); + return 0; + } + + error = svc_process_common(rqstp, argv, resv); + if (error <= 0) + return error; + + return svc_send(rqstp); +} + +#if defined(CONFIG_NFS_V4_1) +/* + * Process a backchannel RPC request that arrived over an existing + * outbound connection + */ +int +bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req, + struct svc_rqst *rqstp) +{ + struct kvec *argv = &rqstp->rq_arg.head[0]; + struct kvec *resv = &rqstp->rq_res.head[0]; + int error; + + /* Build the svc_rqst used by the common processing routine */ + rqstp->rq_xprt = serv->bc_xprt; + rqstp->rq_xid = req->rq_xid; + rqstp->rq_prot = req->rq_xprt->prot; + rqstp->rq_server = serv; + + rqstp->rq_addrlen = sizeof(req->rq_xprt->addr); + memcpy(&rqstp->rq_addr, &req->rq_xprt->addr, rqstp->rq_addrlen); + memcpy(&rqstp->rq_arg, &req->rq_rcv_buf, sizeof(rqstp->rq_arg)); + memcpy(&rqstp->rq_res, &req->rq_snd_buf, sizeof(rqstp->rq_res)); + + /* reset result send buffer "put" position */ + resv->iov_len = 0; + + if (rqstp->rq_prot != IPPROTO_TCP) { + printk(KERN_ERR "No support for Non-TCP transports!\n"); + BUG(); + } + + /* + * Skip the next two words because they've already been + * processed in the trasport + */ + svc_getu32(argv); /* XID */ + svc_getnl(argv); /* CALLDIR */ + + error = svc_process_common(rqstp, argv, resv); + if (error <= 0) + return error; + + memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf)); + return bc_send(req); +} +EXPORT_SYMBOL(bc_svc_process); +#endif /* CONFIG_NFS_V4_1 */ + +/* * Return (transport-specific) limit on the rpc payload. */ u32 svc_max_payload(const struct svc_rqst *rqstp) diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index c200d92e57e4..27d44332f017 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -5,12 +5,14 @@ */ #include <linux/sched.h> +#include <linux/smp_lock.h> #include <linux/errno.h> #include <linux/freezer.h> #include <linux/kthread.h> #include <net/sock.h> #include <linux/sunrpc/stats.h> #include <linux/sunrpc/svc_xprt.h> +#include <linux/sunrpc/svcsock.h> #define RPCDBG_FACILITY RPCDBG_SVCXPRT @@ -1097,36 +1099,58 @@ struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name, } EXPORT_SYMBOL_GPL(svc_find_xprt); -/* - * Format a buffer with a list of the active transports. A zero for - * the buflen parameter disables target buffer overflow checking. +static int svc_one_xprt_name(const struct svc_xprt *xprt, + char *pos, int remaining) +{ + int len; + + len = snprintf(pos, remaining, "%s %u\n", + xprt->xpt_class->xcl_name, + svc_xprt_local_port(xprt)); + if (len >= remaining) + return -ENAMETOOLONG; + return len; +} + +/** + * svc_xprt_names - format a buffer with a list of transport names + * @serv: pointer to an RPC service + * @buf: pointer to a buffer to be filled in + * @buflen: length of buffer to be filled in + * + * Fills in @buf with a string containing a list of transport names, + * each name terminated with '\n'. + * + * Returns positive length of the filled-in string on success; otherwise + * a negative errno value is returned if an error occurs. */ -int svc_xprt_names(struct svc_serv *serv, char *buf, int buflen) +int svc_xprt_names(struct svc_serv *serv, char *buf, const int buflen) { struct svc_xprt *xprt; - char xprt_str[64]; - int totlen = 0; - int len; + int len, totlen; + char *pos; /* Sanity check args */ if (!serv) return 0; spin_lock_bh(&serv->sv_lock); + + pos = buf; + totlen = 0; list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list) { - len = snprintf(xprt_str, sizeof(xprt_str), - "%s %d\n", xprt->xpt_class->xcl_name, - svc_xprt_local_port(xprt)); - /* If the string was truncated, replace with error string */ - if (len >= sizeof(xprt_str)) - strcpy(xprt_str, "name-too-long\n"); - /* Don't overflow buffer */ - len = strlen(xprt_str); - if (buflen && (len + totlen >= buflen)) + len = svc_one_xprt_name(xprt, pos, buflen - totlen); + if (len < 0) { + *buf = '\0'; + totlen = len; + } + if (len <= 0) break; - strcpy(buf+totlen, xprt_str); + + pos += len; totlen += len; } + spin_unlock_bh(&serv->sv_lock); return totlen; } diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 5c865e2d299e..6caffa34ac01 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -171,6 +171,11 @@ static void ip_map_request(struct cache_detail *cd, (*bpp)[-1] = '\n'; } +static int ip_map_upcall(struct cache_detail *cd, struct cache_head *h) +{ + return sunrpc_cache_pipe_upcall(cd, h, ip_map_request); +} + static struct ip_map *ip_map_lookup(char *class, struct in6_addr *addr); static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t expiry); @@ -289,7 +294,7 @@ struct cache_detail ip_map_cache = { .hash_table = ip_table, .name = "auth.unix.ip", .cache_put = ip_map_put, - .cache_request = ip_map_request, + .cache_upcall = ip_map_upcall, .cache_parse = ip_map_parse, .cache_show = ip_map_show, .match = ip_map_match, @@ -523,6 +528,11 @@ static void unix_gid_request(struct cache_detail *cd, (*bpp)[-1] = '\n'; } +static int unix_gid_upcall(struct cache_detail *cd, struct cache_head *h) +{ + return sunrpc_cache_pipe_upcall(cd, h, unix_gid_request); +} + static struct unix_gid *unix_gid_lookup(uid_t uid); extern struct cache_detail unix_gid_cache; @@ -622,7 +632,7 @@ struct cache_detail unix_gid_cache = { .hash_table = gid_table, .name = "auth.unix.gid", .cache_put = unix_gid_put, - .cache_request = unix_gid_request, + .cache_upcall = unix_gid_upcall, .cache_parse = unix_gid_parse, .cache_show = unix_gid_show, .match = unix_gid_match, diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 9d504234af4a..23128ee191ae 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -240,42 +240,76 @@ out: /* * Report socket names for nfsdfs */ -static int one_sock_name(char *buf, struct svc_sock *svsk) +static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining) { + const struct sock *sk = svsk->sk_sk; + const char *proto_name = sk->sk_protocol == IPPROTO_UDP ? + "udp" : "tcp"; int len; - switch(svsk->sk_sk->sk_family) { - case AF_INET: - len = sprintf(buf, "ipv4 %s %pI4 %d\n", - svsk->sk_sk->sk_protocol == IPPROTO_UDP ? - "udp" : "tcp", - &inet_sk(svsk->sk_sk)->rcv_saddr, - inet_sk(svsk->sk_sk)->num); + switch (sk->sk_family) { + case PF_INET: + len = snprintf(buf, remaining, "ipv4 %s %pI4 %d\n", + proto_name, + &inet_sk(sk)->rcv_saddr, + inet_sk(sk)->num); + break; + case PF_INET6: + len = snprintf(buf, remaining, "ipv6 %s %pI6 %d\n", + proto_name, + &inet6_sk(sk)->rcv_saddr, + inet_sk(sk)->num); break; default: - len = sprintf(buf, "*unknown-%d*\n", - svsk->sk_sk->sk_family); + len = snprintf(buf, remaining, "*unknown-%d*\n", + sk->sk_family); + } + + if (len >= remaining) { + *buf = '\0'; + return -ENAMETOOLONG; } return len; } -int -svc_sock_names(char *buf, struct svc_serv *serv, char *toclose) +/** + * svc_sock_names - construct a list of listener names in a string + * @serv: pointer to RPC service + * @buf: pointer to a buffer to fill in with socket names + * @buflen: size of the buffer to be filled + * @toclose: pointer to '\0'-terminated C string containing the name + * of a listener to be closed + * + * Fills in @buf with a '\n'-separated list of names of listener + * sockets. If @toclose is not NULL, the socket named by @toclose + * is closed, and is not included in the output list. + * + * Returns positive length of the socket name string, or a negative + * errno value on error. + */ +int svc_sock_names(struct svc_serv *serv, char *buf, const size_t buflen, + const char *toclose) { struct svc_sock *svsk, *closesk = NULL; int len = 0; if (!serv) return 0; + spin_lock_bh(&serv->sv_lock); list_for_each_entry(svsk, &serv->sv_permsocks, sk_xprt.xpt_list) { - int onelen = one_sock_name(buf+len, svsk); - if (toclose && strcmp(toclose, buf+len) == 0) + int onelen = svc_one_sock_name(svsk, buf + len, buflen - len); + if (onelen < 0) { + len = onelen; + break; + } + if (toclose && strcmp(toclose, buf + len) == 0) closesk = svsk; else len += onelen; } spin_unlock_bh(&serv->sv_lock); + if (closesk) /* Should unregister with portmap, but you cannot * unregister just one protocol... @@ -346,6 +380,7 @@ static void svc_sock_setbufsize(struct socket *sock, unsigned int snd, sock->sk->sk_sndbuf = snd * 2; sock->sk->sk_rcvbuf = rcv * 2; sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK; + sock->sk->sk_write_space(sock->sk); release_sock(sock->sk); #endif } @@ -387,6 +422,15 @@ static void svc_write_space(struct sock *sk) } } +static void svc_tcp_write_space(struct sock *sk) +{ + struct socket *sock = sk->sk_socket; + + if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock) + clear_bit(SOCK_NOSPACE, &sock->flags); + svc_write_space(sk); +} + /* * Copy the UDP datagram's destination address to the rqstp structure. * The 'destination' address in this case is the address to which the @@ -427,13 +471,14 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) long all[SVC_PKTINFO_SPACE / sizeof(long)]; } buffer; struct cmsghdr *cmh = &buffer.hdr; - int err, len; struct msghdr msg = { .msg_name = svc_addr(rqstp), .msg_control = cmh, .msg_controllen = sizeof(buffer), .msg_flags = MSG_DONTWAIT, }; + size_t len; + int err; if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags)) /* udp sockets need large rcvbuf as all pending @@ -465,8 +510,8 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) return -EAGAIN; } len = svc_addr_len(svc_addr(rqstp)); - if (len < 0) - return len; + if (len == 0) + return -EAFNOSUPPORT; rqstp->rq_addrlen = len; if (skb->tstamp.tv64 == 0) { skb->tstamp = ktime_get_real(); @@ -980,25 +1025,16 @@ static void svc_tcp_prep_reply_hdr(struct svc_rqst *rqstp) static int svc_tcp_has_wspace(struct svc_xprt *xprt) { struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); - struct svc_serv *serv = svsk->sk_xprt.xpt_server; + struct svc_serv *serv = svsk->sk_xprt.xpt_server; int required; - int wspace; - /* - * Set the SOCK_NOSPACE flag before checking the available - * sock space. - */ + if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) + return 1; + required = atomic_read(&xprt->xpt_reserved) + serv->sv_max_mesg; + if (sk_stream_wspace(svsk->sk_sk) >= required) + return 1; set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); - required = atomic_read(&svsk->sk_xprt.xpt_reserved) + serv->sv_max_mesg; - wspace = sk_stream_wspace(svsk->sk_sk); - - if (wspace < sk_stream_min_wspace(svsk->sk_sk)) - return 0; - if (required * 2 > wspace) - return 0; - - clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); - return 1; + return 0; } static struct svc_xprt *svc_tcp_create(struct svc_serv *serv, @@ -1054,7 +1090,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) dprintk("setting up TCP socket for reading\n"); sk->sk_state_change = svc_tcp_state_change; sk->sk_data_ready = svc_tcp_data_ready; - sk->sk_write_space = svc_write_space; + sk->sk_write_space = svc_tcp_write_space; svsk->sk_reclen = 0; svsk->sk_tcplen = 0; @@ -1148,9 +1184,19 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, return svsk; } -int svc_addsock(struct svc_serv *serv, - int fd, - char *name_return) +/** + * svc_addsock - add a listener socket to an RPC service + * @serv: pointer to RPC service to which to add a new listener + * @fd: file descriptor of the new listener + * @name_return: pointer to buffer to fill in with name of listener + * @len: size of the buffer + * + * Fills in socket name and returns positive length of name if successful. + * Name is terminated with '\n'. On error, returns a negative errno + * value. + */ +int svc_addsock(struct svc_serv *serv, const int fd, char *name_return, + const size_t len) { int err = 0; struct socket *so = sockfd_lookup(fd, &err); @@ -1190,7 +1236,7 @@ int svc_addsock(struct svc_serv *serv, sockfd_put(so); return err; } - return one_sock_name(name_return, svsk); + return svc_one_sock_name(svsk, name_return, len); } EXPORT_SYMBOL_GPL(svc_addsock); @@ -1327,3 +1373,42 @@ static void svc_sock_free(struct svc_xprt *xprt) sock_release(svsk->sk_sock); kfree(svsk); } + +/* + * Create a svc_xprt. + * + * For internal use only (e.g. nfsv4.1 backchannel). + * Callers should typically use the xpo_create() method. + */ +struct svc_xprt *svc_sock_create(struct svc_serv *serv, int prot) +{ + struct svc_sock *svsk; + struct svc_xprt *xprt = NULL; + + dprintk("svc: %s\n", __func__); + svsk = kzalloc(sizeof(*svsk), GFP_KERNEL); + if (!svsk) + goto out; + + xprt = &svsk->sk_xprt; + if (prot == IPPROTO_TCP) + svc_xprt_init(&svc_tcp_class, xprt, serv); + else if (prot == IPPROTO_UDP) + svc_xprt_init(&svc_udp_class, xprt, serv); + else + BUG(); +out: + dprintk("svc: %s return %p\n", __func__, xprt); + return xprt; +} +EXPORT_SYMBOL_GPL(svc_sock_create); + +/* + * Destroy a svc_sock. + */ +void svc_sock_destroy(struct svc_xprt *xprt) +{ + if (xprt) + kfree(container_of(xprt, struct svc_sock, sk_xprt)); +} +EXPORT_SYMBOL_GPL(svc_sock_destroy); diff --git a/net/sunrpc/timer.c b/net/sunrpc/timer.c index 31becbf09263..dd824341c349 100644 --- a/net/sunrpc/timer.c +++ b/net/sunrpc/timer.c @@ -25,8 +25,13 @@ #define RPC_RTO_INIT (HZ/5) #define RPC_RTO_MIN (HZ/10) -void -rpc_init_rtt(struct rpc_rtt *rt, unsigned long timeo) +/** + * rpc_init_rtt - Initialize an RPC RTT estimator context + * @rt: context to initialize + * @timeo: initial timeout value, in jiffies + * + */ +void rpc_init_rtt(struct rpc_rtt *rt, unsigned long timeo) { unsigned long init = 0; unsigned i; @@ -43,12 +48,16 @@ rpc_init_rtt(struct rpc_rtt *rt, unsigned long timeo) } EXPORT_SYMBOL_GPL(rpc_init_rtt); -/* +/** + * rpc_update_rtt - Update an RPC RTT estimator context + * @rt: context to update + * @timer: timer array index (request type) + * @m: recent actual RTT, in jiffies + * * NB: When computing the smoothed RTT and standard deviation, * be careful not to produce negative intermediate results. */ -void -rpc_update_rtt(struct rpc_rtt *rt, unsigned timer, long m) +void rpc_update_rtt(struct rpc_rtt *rt, unsigned timer, long m) { long *srtt, *sdrtt; @@ -79,21 +88,25 @@ rpc_update_rtt(struct rpc_rtt *rt, unsigned timer, long m) } EXPORT_SYMBOL_GPL(rpc_update_rtt); -/* - * Estimate rto for an nfs rpc sent via. an unreliable datagram. - * Use the mean and mean deviation of rtt for the appropriate type of rpc - * for the frequent rpcs and a default for the others. - * The justification for doing "other" this way is that these rpcs - * happen so infrequently that timer est. would probably be stale. - * Also, since many of these rpcs are - * non-idempotent, a conservative timeout is desired. +/** + * rpc_calc_rto - Provide an estimated timeout value + * @rt: context to use for calculation + * @timer: timer array index (request type) + * + * Estimate RTO for an NFS RPC sent via an unreliable datagram. Use + * the mean and mean deviation of RTT for the appropriate type of RPC + * for frequently issued RPCs, and a fixed default for the others. + * + * The justification for doing "other" this way is that these RPCs + * happen so infrequently that timer estimation would probably be + * stale. Also, since many of these RPCs are non-idempotent, a + * conservative timeout is desired. + * * getattr, lookup, * read, write, commit - A+4D * other - timeo */ - -unsigned long -rpc_calc_rto(struct rpc_rtt *rt, unsigned timer) +unsigned long rpc_calc_rto(struct rpc_rtt *rt, unsigned timer) { unsigned long res; diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 406e26de584e..8bd690c48b69 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -24,7 +24,7 @@ xdr_encode_netobj(__be32 *p, const struct xdr_netobj *obj) unsigned int quadlen = XDR_QUADLEN(obj->len); p[quadlen] = 0; /* zero trailing bytes */ - *p++ = htonl(obj->len); + *p++ = cpu_to_be32(obj->len); memcpy(p, obj->data, obj->len); return p + XDR_QUADLEN(obj->len); } @@ -35,7 +35,7 @@ xdr_decode_netobj(__be32 *p, struct xdr_netobj *obj) { unsigned int len; - if ((len = ntohl(*p++)) > XDR_MAX_NETOBJ) + if ((len = be32_to_cpu(*p++)) > XDR_MAX_NETOBJ) return NULL; obj->len = len; obj->data = (u8 *) p; @@ -83,7 +83,7 @@ EXPORT_SYMBOL_GPL(xdr_encode_opaque_fixed); */ __be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int nbytes) { - *p++ = htonl(nbytes); + *p++ = cpu_to_be32(nbytes); return xdr_encode_opaque_fixed(p, ptr, nbytes); } EXPORT_SYMBOL_GPL(xdr_encode_opaque); @@ -101,7 +101,7 @@ xdr_decode_string_inplace(__be32 *p, char **sp, { u32 len; - len = ntohl(*p++); + len = be32_to_cpu(*p++); if (len > maxlen) return NULL; *lenp = len; @@ -771,7 +771,7 @@ xdr_decode_word(struct xdr_buf *buf, unsigned int base, u32 *obj) status = read_bytes_from_xdr_buf(buf, base, &raw, sizeof(*obj)); if (status) return status; - *obj = ntohl(raw); + *obj = be32_to_cpu(raw); return 0; } EXPORT_SYMBOL_GPL(xdr_decode_word); @@ -779,7 +779,7 @@ EXPORT_SYMBOL_GPL(xdr_decode_word); int xdr_encode_word(struct xdr_buf *buf, unsigned int base, u32 obj) { - __be32 raw = htonl(obj); + __be32 raw = cpu_to_be32(obj); return write_bytes_to_xdr_buf(buf, base, &raw, sizeof(obj)); } diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 06ca058572f2..f412a852bc73 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -12,8 +12,9 @@ * - Next, the caller puts together the RPC message, stuffs it into * the request struct, and calls xprt_transmit(). * - xprt_transmit sends the message and installs the caller on the - * transport's wait list. At the same time, it installs a timer that - * is run after the packet's timeout has expired. + * transport's wait list. At the same time, if a reply is expected, + * it installs a timer that is run after the packet's timeout has + * expired. * - When a packet arrives, the data_ready handler walks the list of * pending requests for that transport. If a matching XID is found, the * caller is woken up, and the timer removed. @@ -46,6 +47,8 @@ #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/metrics.h> +#include "sunrpc.h" + /* * Local variables */ @@ -192,8 +195,8 @@ EXPORT_SYMBOL_GPL(xprt_load_transport); */ int xprt_reserve_xprt(struct rpc_task *task) { - struct rpc_xprt *xprt = task->tk_xprt; struct rpc_rqst *req = task->tk_rqstp; + struct rpc_xprt *xprt = req->rq_xprt; if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) { if (task == xprt->snd_task) @@ -803,9 +806,10 @@ void xprt_complete_rqst(struct rpc_task *task, int copied) list_del_init(&req->rq_list); req->rq_private_buf.len = copied; - /* Ensure all writes are done before we update req->rq_received */ + /* Ensure all writes are done before we update */ + /* req->rq_reply_bytes_recvd */ smp_wmb(); - req->rq_received = copied; + req->rq_reply_bytes_recvd = copied; rpc_wake_up_queued_task(&xprt->pending, task); } EXPORT_SYMBOL_GPL(xprt_complete_rqst); @@ -820,7 +824,7 @@ static void xprt_timer(struct rpc_task *task) dprintk("RPC: %5u xprt_timer\n", task->tk_pid); spin_lock_bh(&xprt->transport_lock); - if (!req->rq_received) { + if (!req->rq_reply_bytes_recvd) { if (xprt->ops->timer) xprt->ops->timer(task); } else @@ -842,8 +846,8 @@ int xprt_prepare_transmit(struct rpc_task *task) dprintk("RPC: %5u xprt_prepare_transmit\n", task->tk_pid); spin_lock_bh(&xprt->transport_lock); - if (req->rq_received && !req->rq_bytes_sent) { - err = req->rq_received; + if (req->rq_reply_bytes_recvd && !req->rq_bytes_sent) { + err = req->rq_reply_bytes_recvd; goto out_unlock; } if (!xprt->ops->reserve_xprt(task)) @@ -855,7 +859,7 @@ out_unlock: void xprt_end_transmit(struct rpc_task *task) { - xprt_release_write(task->tk_xprt, task); + xprt_release_write(task->tk_rqstp->rq_xprt, task); } /** @@ -872,8 +876,11 @@ void xprt_transmit(struct rpc_task *task) dprintk("RPC: %5u xprt_transmit(%u)\n", task->tk_pid, req->rq_slen); - if (!req->rq_received) { - if (list_empty(&req->rq_list)) { + if (!req->rq_reply_bytes_recvd) { + if (list_empty(&req->rq_list) && rpc_reply_expected(task)) { + /* + * Add to the list only if we're expecting a reply + */ spin_lock_bh(&xprt->transport_lock); /* Update the softirq receive buffer */ memcpy(&req->rq_private_buf, &req->rq_rcv_buf, @@ -908,8 +915,13 @@ void xprt_transmit(struct rpc_task *task) /* Don't race with disconnect */ if (!xprt_connected(xprt)) task->tk_status = -ENOTCONN; - else if (!req->rq_received) + else if (!req->rq_reply_bytes_recvd && rpc_reply_expected(task)) { + /* + * Sleep on the pending queue since + * we're expecting a reply. + */ rpc_sleep_on(&xprt->pending, task, xprt_timer); + } spin_unlock_bh(&xprt->transport_lock); } @@ -982,11 +994,17 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) */ void xprt_release(struct rpc_task *task) { - struct rpc_xprt *xprt = task->tk_xprt; + struct rpc_xprt *xprt; struct rpc_rqst *req; + int is_bc_request; if (!(req = task->tk_rqstp)) return; + + /* Preallocated backchannel request? */ + is_bc_request = bc_prealloc(req); + + xprt = req->rq_xprt; rpc_count_iostats(task); spin_lock_bh(&xprt->transport_lock); xprt->ops->release_xprt(xprt, task); @@ -999,10 +1017,19 @@ void xprt_release(struct rpc_task *task) mod_timer(&xprt->timer, xprt->last_used + xprt->idle_timeout); spin_unlock_bh(&xprt->transport_lock); - xprt->ops->buf_free(req->rq_buffer); + if (!bc_prealloc(req)) + xprt->ops->buf_free(req->rq_buffer); task->tk_rqstp = NULL; if (req->rq_release_snd_buf) req->rq_release_snd_buf(req); + + /* + * Early exit if this is a backchannel preallocated request. + * There is no need to have it added to the RPC slot list. + */ + if (is_bc_request) + return; + memset(req, 0, sizeof(*req)); /* mark unused */ dprintk("RPC: %5u release request %p\n", task->tk_pid, req); @@ -1049,6 +1076,11 @@ found: INIT_LIST_HEAD(&xprt->free); INIT_LIST_HEAD(&xprt->recv); +#if defined(CONFIG_NFS_V4_1) + spin_lock_init(&xprt->bc_pa_lock); + INIT_LIST_HEAD(&xprt->bc_pa_list); +#endif /* CONFIG_NFS_V4_1 */ + INIT_WORK(&xprt->task_cleanup, xprt_autoclose); setup_timer(&xprt->timer, xprt_init_autodisconnect, (unsigned long)xprt); diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 42a6f9f20285..9e884383134f 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -397,14 +397,14 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt, if (!ch) return 0; - /* Allocate temporary reply and chunk maps */ - rpl_map = svc_rdma_get_req_map(); - chl_map = svc_rdma_get_req_map(); - svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count); if (ch_count > RPCSVC_MAXPAGES) return -EINVAL; + /* Allocate temporary reply and chunk maps */ + rpl_map = svc_rdma_get_req_map(); + chl_map = svc_rdma_get_req_map(); + if (!xprt->sc_frmr_pg_list_len) sge_count = map_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp, rpl_map, chl_map, ch_count, diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 1dd6123070e9..9a63f669ece4 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -168,47 +168,25 @@ static struct rpc_xprt_ops xprt_rdma_procs; /* forward reference */ static void xprt_rdma_format_addresses(struct rpc_xprt *xprt) { - struct sockaddr_in *addr = (struct sockaddr_in *) + struct sockaddr *sap = (struct sockaddr *) &rpcx_to_rdmad(xprt).addr; - char *buf; + struct sockaddr_in *sin = (struct sockaddr_in *)sap; + char buf[64]; - buf = kzalloc(20, GFP_KERNEL); - if (buf) - snprintf(buf, 20, "%pI4", &addr->sin_addr.s_addr); - xprt->address_strings[RPC_DISPLAY_ADDR] = buf; + (void)rpc_ntop(sap, buf, sizeof(buf)); + xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL); - buf = kzalloc(8, GFP_KERNEL); - if (buf) - snprintf(buf, 8, "%u", ntohs(addr->sin_port)); - xprt->address_strings[RPC_DISPLAY_PORT] = buf; + (void)snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap)); + xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL); xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma"; - buf = kzalloc(48, GFP_KERNEL); - if (buf) - snprintf(buf, 48, "addr=%pI4 port=%u proto=%s", - &addr->sin_addr.s_addr, - ntohs(addr->sin_port), "rdma"); - xprt->address_strings[RPC_DISPLAY_ALL] = buf; - - buf = kzalloc(10, GFP_KERNEL); - if (buf) - snprintf(buf, 10, "%02x%02x%02x%02x", - NIPQUAD(addr->sin_addr.s_addr)); - xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf; - - buf = kzalloc(8, GFP_KERNEL); - if (buf) - snprintf(buf, 8, "%4hx", ntohs(addr->sin_port)); - xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf; - - buf = kzalloc(30, GFP_KERNEL); - if (buf) - snprintf(buf, 30, "%pI4.%u.%u", - &addr->sin_addr.s_addr, - ntohs(addr->sin_port) >> 8, - ntohs(addr->sin_port) & 0xff); - xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf; + (void)snprintf(buf, sizeof(buf), "%02x%02x%02x%02x", + NIPQUAD(sin->sin_addr.s_addr)); + xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL); + + (void)snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap)); + xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL); /* netid */ xprt->address_strings[RPC_DISPLAY_NETID] = "rdma"; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 6c2d61586551..62438f3a914d 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -34,6 +34,9 @@ #include <linux/sunrpc/sched.h> #include <linux/sunrpc/xprtsock.h> #include <linux/file.h> +#ifdef CONFIG_NFS_V4_1 +#include <linux/sunrpc/bc_xprt.h> +#endif #include <net/sock.h> #include <net/checksum.h> @@ -245,8 +248,8 @@ struct sock_xprt { * Connection of transports */ struct delayed_work connect_worker; - struct sockaddr_storage addr; - unsigned short port; + struct sockaddr_storage srcaddr; + unsigned short srcport; /* * UDP socket buffer size parameters @@ -270,6 +273,13 @@ struct sock_xprt { #define TCP_RCV_COPY_FRAGHDR (1UL << 1) #define TCP_RCV_COPY_XID (1UL << 2) #define TCP_RCV_COPY_DATA (1UL << 3) +#define TCP_RCV_READ_CALLDIR (1UL << 4) +#define TCP_RCV_COPY_CALLDIR (1UL << 5) + +/* + * TCP RPC flags + */ +#define TCP_RPC_REPLY (1UL << 6) static inline struct sockaddr *xs_addr(struct rpc_xprt *xprt) { @@ -286,117 +296,60 @@ static inline struct sockaddr_in6 *xs_addr_in6(struct rpc_xprt *xprt) return (struct sockaddr_in6 *) &xprt->addr; } -static void xs_format_ipv4_peer_addresses(struct rpc_xprt *xprt, - const char *protocol, - const char *netid) +static void xs_format_common_peer_addresses(struct rpc_xprt *xprt) { - struct sockaddr_in *addr = xs_addr_in(xprt); - char *buf; - - buf = kzalloc(20, GFP_KERNEL); - if (buf) { - snprintf(buf, 20, "%pI4", &addr->sin_addr.s_addr); - } - xprt->address_strings[RPC_DISPLAY_ADDR] = buf; - - buf = kzalloc(8, GFP_KERNEL); - if (buf) { - snprintf(buf, 8, "%u", - ntohs(addr->sin_port)); - } - xprt->address_strings[RPC_DISPLAY_PORT] = buf; - - xprt->address_strings[RPC_DISPLAY_PROTO] = protocol; - - buf = kzalloc(48, GFP_KERNEL); - if (buf) { - snprintf(buf, 48, "addr=%pI4 port=%u proto=%s", - &addr->sin_addr.s_addr, - ntohs(addr->sin_port), - protocol); - } - xprt->address_strings[RPC_DISPLAY_ALL] = buf; + struct sockaddr *sap = xs_addr(xprt); + struct sockaddr_in6 *sin6; + struct sockaddr_in *sin; + char buf[128]; - buf = kzalloc(10, GFP_KERNEL); - if (buf) { - snprintf(buf, 10, "%02x%02x%02x%02x", - NIPQUAD(addr->sin_addr.s_addr)); - } - xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf; + (void)rpc_ntop(sap, buf, sizeof(buf)); + xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL); - buf = kzalloc(8, GFP_KERNEL); - if (buf) { - snprintf(buf, 8, "%4hx", - ntohs(addr->sin_port)); - } - xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf; - - buf = kzalloc(30, GFP_KERNEL); - if (buf) { - snprintf(buf, 30, "%pI4.%u.%u", - &addr->sin_addr.s_addr, - ntohs(addr->sin_port) >> 8, - ntohs(addr->sin_port) & 0xff); + switch (sap->sa_family) { + case AF_INET: + sin = xs_addr_in(xprt); + (void)snprintf(buf, sizeof(buf), "%02x%02x%02x%02x", + NIPQUAD(sin->sin_addr.s_addr)); + break; + case AF_INET6: + sin6 = xs_addr_in6(xprt); + (void)snprintf(buf, sizeof(buf), "%pi6", &sin6->sin6_addr); + break; + default: + BUG(); } - xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf; - - xprt->address_strings[RPC_DISPLAY_NETID] = netid; + xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL); } -static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt, - const char *protocol, - const char *netid) +static void xs_format_common_peer_ports(struct rpc_xprt *xprt) { - struct sockaddr_in6 *addr = xs_addr_in6(xprt); - char *buf; + struct sockaddr *sap = xs_addr(xprt); + char buf[128]; - buf = kzalloc(40, GFP_KERNEL); - if (buf) { - snprintf(buf, 40, "%pI6",&addr->sin6_addr); - } - xprt->address_strings[RPC_DISPLAY_ADDR] = buf; + (void)snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap)); + xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL); - buf = kzalloc(8, GFP_KERNEL); - if (buf) { - snprintf(buf, 8, "%u", - ntohs(addr->sin6_port)); - } - xprt->address_strings[RPC_DISPLAY_PORT] = buf; + (void)snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap)); + xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL); +} +static void xs_format_peer_addresses(struct rpc_xprt *xprt, + const char *protocol, + const char *netid) +{ xprt->address_strings[RPC_DISPLAY_PROTO] = protocol; + xprt->address_strings[RPC_DISPLAY_NETID] = netid; + xs_format_common_peer_addresses(xprt); + xs_format_common_peer_ports(xprt); +} - buf = kzalloc(64, GFP_KERNEL); - if (buf) { - snprintf(buf, 64, "addr=%pI6 port=%u proto=%s", - &addr->sin6_addr, - ntohs(addr->sin6_port), - protocol); - } - xprt->address_strings[RPC_DISPLAY_ALL] = buf; - - buf = kzalloc(36, GFP_KERNEL); - if (buf) - snprintf(buf, 36, "%pi6", &addr->sin6_addr); - - xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf; - - buf = kzalloc(8, GFP_KERNEL); - if (buf) { - snprintf(buf, 8, "%4hx", - ntohs(addr->sin6_port)); - } - xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf; - - buf = kzalloc(50, GFP_KERNEL); - if (buf) { - snprintf(buf, 50, "%pI6.%u.%u", - &addr->sin6_addr, - ntohs(addr->sin6_port) >> 8, - ntohs(addr->sin6_port) & 0xff); - } - xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf; +static void xs_update_peer_port(struct rpc_xprt *xprt) +{ + kfree(xprt->address_strings[RPC_DISPLAY_HEX_PORT]); + kfree(xprt->address_strings[RPC_DISPLAY_PORT]); - xprt->address_strings[RPC_DISPLAY_NETID] = netid; + xs_format_common_peer_ports(xprt); } static void xs_free_peer_addresses(struct rpc_xprt *xprt) @@ -956,7 +909,7 @@ static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, struct xdr_skb_rea transport->tcp_offset = 0; /* Sanity check of the record length */ - if (unlikely(transport->tcp_reclen < 4)) { + if (unlikely(transport->tcp_reclen < 8)) { dprintk("RPC: invalid TCP record fragment length\n"); xprt_force_disconnect(xprt); return; @@ -991,33 +944,77 @@ static inline void xs_tcp_read_xid(struct sock_xprt *transport, struct xdr_skb_r if (used != len) return; transport->tcp_flags &= ~TCP_RCV_COPY_XID; - transport->tcp_flags |= TCP_RCV_COPY_DATA; + transport->tcp_flags |= TCP_RCV_READ_CALLDIR; transport->tcp_copied = 4; - dprintk("RPC: reading reply for XID %08x\n", + dprintk("RPC: reading %s XID %08x\n", + (transport->tcp_flags & TCP_RPC_REPLY) ? "reply for" + : "request with", ntohl(transport->tcp_xid)); xs_tcp_check_fraghdr(transport); } -static inline void xs_tcp_read_request(struct rpc_xprt *xprt, struct xdr_skb_reader *desc) +static inline void xs_tcp_read_calldir(struct sock_xprt *transport, + struct xdr_skb_reader *desc) { - struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); - struct rpc_rqst *req; + size_t len, used; + u32 offset; + __be32 calldir; + + /* + * We want transport->tcp_offset to be 8 at the end of this routine + * (4 bytes for the xid and 4 bytes for the call/reply flag). + * When this function is called for the first time, + * transport->tcp_offset is 4 (after having already read the xid). + */ + offset = transport->tcp_offset - sizeof(transport->tcp_xid); + len = sizeof(calldir) - offset; + dprintk("RPC: reading CALL/REPLY flag (%Zu bytes)\n", len); + used = xdr_skb_read_bits(desc, &calldir, len); + transport->tcp_offset += used; + if (used != len) + return; + transport->tcp_flags &= ~TCP_RCV_READ_CALLDIR; + transport->tcp_flags |= TCP_RCV_COPY_CALLDIR; + transport->tcp_flags |= TCP_RCV_COPY_DATA; + /* + * We don't yet have the XDR buffer, so we will write the calldir + * out after we get the buffer from the 'struct rpc_rqst' + */ + if (ntohl(calldir) == RPC_REPLY) + transport->tcp_flags |= TCP_RPC_REPLY; + else + transport->tcp_flags &= ~TCP_RPC_REPLY; + dprintk("RPC: reading %s CALL/REPLY flag %08x\n", + (transport->tcp_flags & TCP_RPC_REPLY) ? + "reply for" : "request with", calldir); + xs_tcp_check_fraghdr(transport); +} + +static inline void xs_tcp_read_common(struct rpc_xprt *xprt, + struct xdr_skb_reader *desc, + struct rpc_rqst *req) +{ + struct sock_xprt *transport = + container_of(xprt, struct sock_xprt, xprt); struct xdr_buf *rcvbuf; size_t len; ssize_t r; - /* Find and lock the request corresponding to this xid */ - spin_lock(&xprt->transport_lock); - req = xprt_lookup_rqst(xprt, transport->tcp_xid); - if (!req) { - transport->tcp_flags &= ~TCP_RCV_COPY_DATA; - dprintk("RPC: XID %08x request not found!\n", - ntohl(transport->tcp_xid)); - spin_unlock(&xprt->transport_lock); - return; + rcvbuf = &req->rq_private_buf; + + if (transport->tcp_flags & TCP_RCV_COPY_CALLDIR) { + /* + * Save the RPC direction in the XDR buffer + */ + __be32 calldir = transport->tcp_flags & TCP_RPC_REPLY ? + htonl(RPC_REPLY) : 0; + + memcpy(rcvbuf->head[0].iov_base + transport->tcp_copied, + &calldir, sizeof(calldir)); + transport->tcp_copied += sizeof(calldir); + transport->tcp_flags &= ~TCP_RCV_COPY_CALLDIR; } - rcvbuf = &req->rq_private_buf; len = desc->count; if (len > transport->tcp_reclen - transport->tcp_offset) { struct xdr_skb_reader my_desc; @@ -1054,7 +1051,7 @@ static inline void xs_tcp_read_request(struct rpc_xprt *xprt, struct xdr_skb_rea "tcp_offset = %u, tcp_reclen = %u\n", xprt, transport->tcp_copied, transport->tcp_offset, transport->tcp_reclen); - goto out; + return; } dprintk("RPC: XID %08x read %Zd bytes\n", @@ -1070,11 +1067,125 @@ static inline void xs_tcp_read_request(struct rpc_xprt *xprt, struct xdr_skb_rea transport->tcp_flags &= ~TCP_RCV_COPY_DATA; } -out: + return; +} + +/* + * Finds the request corresponding to the RPC xid and invokes the common + * tcp read code to read the data. + */ +static inline int xs_tcp_read_reply(struct rpc_xprt *xprt, + struct xdr_skb_reader *desc) +{ + struct sock_xprt *transport = + container_of(xprt, struct sock_xprt, xprt); + struct rpc_rqst *req; + + dprintk("RPC: read reply XID %08x\n", ntohl(transport->tcp_xid)); + + /* Find and lock the request corresponding to this xid */ + spin_lock(&xprt->transport_lock); + req = xprt_lookup_rqst(xprt, transport->tcp_xid); + if (!req) { + dprintk("RPC: XID %08x request not found!\n", + ntohl(transport->tcp_xid)); + spin_unlock(&xprt->transport_lock); + return -1; + } + + xs_tcp_read_common(xprt, desc, req); + if (!(transport->tcp_flags & TCP_RCV_COPY_DATA)) xprt_complete_rqst(req->rq_task, transport->tcp_copied); + spin_unlock(&xprt->transport_lock); - xs_tcp_check_fraghdr(transport); + return 0; +} + +#if defined(CONFIG_NFS_V4_1) +/* + * Obtains an rpc_rqst previously allocated and invokes the common + * tcp read code to read the data. The result is placed in the callback + * queue. + * If we're unable to obtain the rpc_rqst we schedule the closing of the + * connection and return -1. + */ +static inline int xs_tcp_read_callback(struct rpc_xprt *xprt, + struct xdr_skb_reader *desc) +{ + struct sock_xprt *transport = + container_of(xprt, struct sock_xprt, xprt); + struct rpc_rqst *req; + + req = xprt_alloc_bc_request(xprt); + if (req == NULL) { + printk(KERN_WARNING "Callback slot table overflowed\n"); + xprt_force_disconnect(xprt); + return -1; + } + + req->rq_xid = transport->tcp_xid; + dprintk("RPC: read callback XID %08x\n", ntohl(req->rq_xid)); + xs_tcp_read_common(xprt, desc, req); + + if (!(transport->tcp_flags & TCP_RCV_COPY_DATA)) { + struct svc_serv *bc_serv = xprt->bc_serv; + + /* + * Add callback request to callback list. The callback + * service sleeps on the sv_cb_waitq waiting for new + * requests. Wake it up after adding enqueing the + * request. + */ + dprintk("RPC: add callback request to list\n"); + spin_lock(&bc_serv->sv_cb_lock); + list_add(&req->rq_bc_list, &bc_serv->sv_cb_list); + spin_unlock(&bc_serv->sv_cb_lock); + wake_up(&bc_serv->sv_cb_waitq); + } + + req->rq_private_buf.len = transport->tcp_copied; + + return 0; +} + +static inline int _xs_tcp_read_data(struct rpc_xprt *xprt, + struct xdr_skb_reader *desc) +{ + struct sock_xprt *transport = + container_of(xprt, struct sock_xprt, xprt); + + return (transport->tcp_flags & TCP_RPC_REPLY) ? + xs_tcp_read_reply(xprt, desc) : + xs_tcp_read_callback(xprt, desc); +} +#else +static inline int _xs_tcp_read_data(struct rpc_xprt *xprt, + struct xdr_skb_reader *desc) +{ + return xs_tcp_read_reply(xprt, desc); +} +#endif /* CONFIG_NFS_V4_1 */ + +/* + * Read data off the transport. This can be either an RPC_CALL or an + * RPC_REPLY. Relay the processing to helper functions. + */ +static void xs_tcp_read_data(struct rpc_xprt *xprt, + struct xdr_skb_reader *desc) +{ + struct sock_xprt *transport = + container_of(xprt, struct sock_xprt, xprt); + + if (_xs_tcp_read_data(xprt, desc) == 0) + xs_tcp_check_fraghdr(transport); + else { + /* + * The transport_lock protects the request handling. + * There's no need to hold it to update the tcp_flags. + */ + transport->tcp_flags &= ~TCP_RCV_COPY_DATA; + } } static inline void xs_tcp_read_discard(struct sock_xprt *transport, struct xdr_skb_reader *desc) @@ -1114,9 +1225,14 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns xs_tcp_read_xid(transport, &desc); continue; } + /* Read in the call/reply flag */ + if (transport->tcp_flags & TCP_RCV_READ_CALLDIR) { + xs_tcp_read_calldir(transport, &desc); + continue; + } /* Read in the request data */ if (transport->tcp_flags & TCP_RCV_COPY_DATA) { - xs_tcp_read_request(xprt, &desc); + xs_tcp_read_data(xprt, &desc); continue; } /* Skip over any trailing bytes on short reads */ @@ -1414,25 +1530,15 @@ static unsigned short xs_get_random_port(void) */ static void xs_set_port(struct rpc_xprt *xprt, unsigned short port) { - struct sockaddr *addr = xs_addr(xprt); - dprintk("RPC: setting port for xprt %p to %u\n", xprt, port); - switch (addr->sa_family) { - case AF_INET: - ((struct sockaddr_in *)addr)->sin_port = htons(port); - break; - case AF_INET6: - ((struct sockaddr_in6 *)addr)->sin6_port = htons(port); - break; - default: - BUG(); - } + rpc_set_port(xs_addr(xprt), port); + xs_update_peer_port(xprt); } static unsigned short xs_get_srcport(struct sock_xprt *transport, struct socket *sock) { - unsigned short port = transport->port; + unsigned short port = transport->srcport; if (port == 0 && transport->xprt.resvport) port = xs_get_random_port(); @@ -1441,8 +1547,8 @@ static unsigned short xs_get_srcport(struct sock_xprt *transport, struct socket static unsigned short xs_next_srcport(struct sock_xprt *transport, struct socket *sock, unsigned short port) { - if (transport->port != 0) - transport->port = 0; + if (transport->srcport != 0) + transport->srcport = 0; if (!transport->xprt.resvport) return 0; if (port <= xprt_min_resvport || port > xprt_max_resvport) @@ -1460,7 +1566,7 @@ static int xs_bind4(struct sock_xprt *transport, struct socket *sock) unsigned short port = xs_get_srcport(transport, sock); unsigned short last; - sa = (struct sockaddr_in *)&transport->addr; + sa = (struct sockaddr_in *)&transport->srcaddr; myaddr.sin_addr = sa->sin_addr; do { myaddr.sin_port = htons(port); @@ -1469,7 +1575,7 @@ static int xs_bind4(struct sock_xprt *transport, struct socket *sock) if (port == 0) break; if (err == 0) { - transport->port = port; + transport->srcport = port; break; } last = port; @@ -1493,7 +1599,7 @@ static int xs_bind6(struct sock_xprt *transport, struct socket *sock) unsigned short port = xs_get_srcport(transport, sock); unsigned short last; - sa = (struct sockaddr_in6 *)&transport->addr; + sa = (struct sockaddr_in6 *)&transport->srcaddr; myaddr.sin6_addr = sa->sin6_addr; do { myaddr.sin6_port = htons(port); @@ -1502,7 +1608,7 @@ static int xs_bind6(struct sock_xprt *transport, struct socket *sock) if (port == 0) break; if (err == 0) { - transport->port = port; + transport->srcport = port; break; } last = port; @@ -1607,8 +1713,11 @@ static void xs_udp_connect_worker4(struct work_struct *work) goto out; } - dprintk("RPC: worker connecting xprt %p to address: %s\n", - xprt, xprt->address_strings[RPC_DISPLAY_ALL]); + dprintk("RPC: worker connecting xprt %p via %s to " + "%s (port %s)\n", xprt, + xprt->address_strings[RPC_DISPLAY_PROTO], + xprt->address_strings[RPC_DISPLAY_ADDR], + xprt->address_strings[RPC_DISPLAY_PORT]); xs_udp_finish_connecting(xprt, sock); status = 0; @@ -1649,8 +1758,11 @@ static void xs_udp_connect_worker6(struct work_struct *work) goto out; } - dprintk("RPC: worker connecting xprt %p to address: %s\n", - xprt, xprt->address_strings[RPC_DISPLAY_ALL]); + dprintk("RPC: worker connecting xprt %p via %s to " + "%s (port %s)\n", xprt, + xprt->address_strings[RPC_DISPLAY_PROTO], + xprt->address_strings[RPC_DISPLAY_ADDR], + xprt->address_strings[RPC_DISPLAY_PORT]); xs_udp_finish_connecting(xprt, sock); status = 0; @@ -1775,8 +1887,11 @@ static void xs_tcp_setup_socket(struct rpc_xprt *xprt, goto out_eagain; } - dprintk("RPC: worker connecting xprt %p to address: %s\n", - xprt, xprt->address_strings[RPC_DISPLAY_ALL]); + dprintk("RPC: worker connecting xprt %p via %s to " + "%s (port %s)\n", xprt, + xprt->address_strings[RPC_DISPLAY_PROTO], + xprt->address_strings[RPC_DISPLAY_ADDR], + xprt->address_strings[RPC_DISPLAY_PORT]); status = xs_tcp_finish_connecting(xprt, sock); dprintk("RPC: %p connect status %d connected %d sock state %d\n", @@ -1792,6 +1907,7 @@ static void xs_tcp_setup_socket(struct rpc_xprt *xprt, */ set_bit(XPRT_CONNECTION_CLOSE, &xprt->state); xprt_force_disconnect(xprt); + break; case -ECONNREFUSED: case -ECONNRESET: case -ENETUNREACH: @@ -1946,7 +2062,7 @@ static void xs_udp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); seq_printf(seq, "\txprt:\tudp %u %lu %lu %lu %lu %Lu %Lu\n", - transport->port, + transport->srcport, xprt->stat.bind_count, xprt->stat.sends, xprt->stat.recvs, @@ -1970,7 +2086,7 @@ static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) idle_time = (long)(jiffies - xprt->last_used) / HZ; seq_printf(seq, "\txprt:\ttcp %u %lu %lu %lu %ld %lu %lu %lu %Lu %Lu\n", - transport->port, + transport->srcport, xprt->stat.bind_count, xprt->stat.connect_count, xprt->stat.connect_time, @@ -2010,6 +2126,9 @@ static struct rpc_xprt_ops xs_tcp_ops = { .buf_free = rpc_free, .send_request = xs_tcp_send_request, .set_retrans_timeout = xprt_set_retrans_timeout_def, +#if defined(CONFIG_NFS_V4_1) + .release_request = bc_release_request, +#endif /* CONFIG_NFS_V4_1 */ .close = xs_tcp_close, .destroy = xs_destroy, .print_stats = xs_tcp_print_stats, @@ -2046,7 +2165,7 @@ static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args, memcpy(&xprt->addr, args->dstaddr, args->addrlen); xprt->addrlen = args->addrlen; if (args->srcaddr) - memcpy(&new->addr, args->srcaddr, args->addrlen); + memcpy(&new->srcaddr, args->srcaddr, args->addrlen); return xprt; } @@ -2095,7 +2214,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args) INIT_DELAYED_WORK(&transport->connect_worker, xs_udp_connect_worker4); - xs_format_ipv4_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP); + xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP); break; case AF_INET6: if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) @@ -2103,15 +2222,22 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args) INIT_DELAYED_WORK(&transport->connect_worker, xs_udp_connect_worker6); - xs_format_ipv6_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP6); + xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP6); break; default: kfree(xprt); return ERR_PTR(-EAFNOSUPPORT); } - dprintk("RPC: set up transport to address %s\n", - xprt->address_strings[RPC_DISPLAY_ALL]); + if (xprt_bound(xprt)) + dprintk("RPC: set up xprt to %s (port %s) via %s\n", + xprt->address_strings[RPC_DISPLAY_ADDR], + xprt->address_strings[RPC_DISPLAY_PORT], + xprt->address_strings[RPC_DISPLAY_PROTO]); + else + dprintk("RPC: set up xprt to %s (autobind) via %s\n", + xprt->address_strings[RPC_DISPLAY_ADDR], + xprt->address_strings[RPC_DISPLAY_PROTO]); if (try_module_get(THIS_MODULE)) return xprt; @@ -2160,23 +2286,33 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) if (((struct sockaddr_in *)addr)->sin_port != htons(0)) xprt_set_bound(xprt); - INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker4); - xs_format_ipv4_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP); + INIT_DELAYED_WORK(&transport->connect_worker, + xs_tcp_connect_worker4); + xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP); break; case AF_INET6: if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) xprt_set_bound(xprt); - INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker6); - xs_format_ipv6_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6); + INIT_DELAYED_WORK(&transport->connect_worker, + xs_tcp_connect_worker6); + xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6); break; default: kfree(xprt); return ERR_PTR(-EAFNOSUPPORT); } - dprintk("RPC: set up transport to address %s\n", - xprt->address_strings[RPC_DISPLAY_ALL]); + if (xprt_bound(xprt)) + dprintk("RPC: set up xprt to %s (port %s) via %s\n", + xprt->address_strings[RPC_DISPLAY_ADDR], + xprt->address_strings[RPC_DISPLAY_PORT], + xprt->address_strings[RPC_DISPLAY_PROTO]); + else + dprintk("RPC: set up xprt to %s (autobind) via %s\n", + xprt->address_strings[RPC_DISPLAY_ADDR], + xprt->address_strings[RPC_DISPLAY_PROTO]); + if (try_module_get(THIS_MODULE)) return xprt; @@ -2235,3 +2371,55 @@ void cleanup_socket_xprt(void) xprt_unregister_transport(&xs_udp_transport); xprt_unregister_transport(&xs_tcp_transport); } + +static int param_set_uint_minmax(const char *val, struct kernel_param *kp, + unsigned int min, unsigned int max) +{ + unsigned long num; + int ret; + + if (!val) + return -EINVAL; + ret = strict_strtoul(val, 0, &num); + if (ret == -EINVAL || num < min || num > max) + return -EINVAL; + *((unsigned int *)kp->arg) = num; + return 0; +} + +static int param_set_portnr(const char *val, struct kernel_param *kp) +{ + return param_set_uint_minmax(val, kp, + RPC_MIN_RESVPORT, + RPC_MAX_RESVPORT); +} + +static int param_get_portnr(char *buffer, struct kernel_param *kp) +{ + return param_get_uint(buffer, kp); +} +#define param_check_portnr(name, p) \ + __param_check(name, p, unsigned int); + +module_param_named(min_resvport, xprt_min_resvport, portnr, 0644); +module_param_named(max_resvport, xprt_max_resvport, portnr, 0644); + +static int param_set_slot_table_size(const char *val, struct kernel_param *kp) +{ + return param_set_uint_minmax(val, kp, + RPC_MIN_SLOT_TABLE, + RPC_MAX_SLOT_TABLE); +} + +static int param_get_slot_table_size(char *buffer, struct kernel_param *kp) +{ + return param_get_uint(buffer, kp); +} +#define param_check_slot_table_size(name, p) \ + __param_check(name, p, unsigned int); + +module_param_named(tcp_slot_table_entries, xprt_tcp_slot_table_entries, + slot_table_size, 0644); +module_param_named(udp_slot_table_entries, xprt_udp_slot_table_entries, + slot_table_size, 0644); + diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 9dcc6e7f96ec..fc3ebb906911 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -315,7 +315,7 @@ static void unix_write_space(struct sock *sk) { read_lock(&sk->sk_callback_lock); if (unix_writable(sk)) { - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) + if (sk_has_sleeper(sk)) wake_up_interruptible_sync(sk->sk_sleep); sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); } @@ -1946,7 +1946,7 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) switch (cmd) { case SIOCOUTQ: - amount = atomic_read(&sk->sk_wmem_alloc); + amount = sk_wmem_alloc_get(sk); err = put_user(amount, (int __user *)arg); break; case SIOCINQ: @@ -1985,7 +1985,7 @@ static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table struct sock *sk = sock->sk; unsigned int mask; - poll_wait(file, sk->sk_sleep, wait); + sock_poll_wait(file, sk->sk_sleep, wait); mask = 0; /* exceptional events? */ @@ -2022,7 +2022,7 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, struct sock *sk = sock->sk, *other; unsigned int mask, writable; - poll_wait(file, sk->sk_sleep, wait); + sock_poll_wait(file, sk->sk_sleep, wait); mask = 0; /* exceptional events? */ @@ -2053,7 +2053,7 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, other = unix_peer_get(sk); if (other) { if (unix_peer(other) != sk) { - poll_wait(file, &unix_sk(other)->peer_wait, + sock_poll_wait(file, &unix_sk(other)->peer_wait, wait); if (unix_recvq_full(other)) writable = 0; diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c index 466e2d22d256..258daa80ad92 100644 --- a/net/wanrouter/wanmain.c +++ b/net/wanrouter/wanmain.c @@ -48,6 +48,7 @@ #include <linux/kernel.h> #include <linux/module.h> /* support for loadable modules */ #include <linux/slab.h> /* kmalloc(), kfree() */ +#include <linux/smp_lock.h> #include <linux/mm.h> #include <linux/string.h> /* inline mem*, str* functions */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 24168560ebae..634496b3ed77 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -447,6 +447,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) rdev = __cfg80211_drv_from_info(info); if (IS_ERR(rdev)) { + mutex_unlock(&cfg80211_mutex); result = PTR_ERR(rdev); goto unlock; } @@ -996,7 +997,7 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(hdr)) { err = PTR_ERR(hdr); - goto out; + goto free_msg; } cookie.msg = msg; @@ -1010,7 +1011,7 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info) &cookie, get_key_callback); if (err) - goto out; + goto free_msg; if (cookie.error) goto nla_put_failure; @@ -1021,6 +1022,7 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info) nla_put_failure: err = -ENOBUFS; + free_msg: nlmsg_free(msg); out: cfg80211_put_dev(drv); @@ -1687,13 +1689,52 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) if (err) goto out_rtnl; - if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN) { - err = -EINVAL; + err = get_vlan(info->attrs[NL80211_ATTR_STA_VLAN], drv, ¶ms.vlan); + if (err) goto out; + + /* validate settings */ + err = 0; + + switch (dev->ieee80211_ptr->iftype) { + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: + /* disallow mesh-specific things */ + if (params.plink_action) + err = -EINVAL; + break; + case NL80211_IFTYPE_STATION: + /* disallow everything but AUTHORIZED flag */ + if (params.plink_action) + err = -EINVAL; + if (params.vlan) + err = -EINVAL; + if (params.supported_rates) + err = -EINVAL; + if (params.ht_capa) + err = -EINVAL; + if (params.listen_interval >= 0) + err = -EINVAL; + if (params.sta_flags_mask & ~BIT(NL80211_STA_FLAG_AUTHORIZED)) + err = -EINVAL; + break; + case NL80211_IFTYPE_MESH_POINT: + /* disallow things mesh doesn't support */ + if (params.vlan) + err = -EINVAL; + if (params.ht_capa) + err = -EINVAL; + if (params.listen_interval >= 0) + err = -EINVAL; + if (params.supported_rates) + err = -EINVAL; + if (params.sta_flags_mask) + err = -EINVAL; + break; + default: + err = -EINVAL; } - err = get_vlan(info->attrs[NL80211_ATTR_STA_VLAN], drv, ¶ms.vlan); if (err) goto out; @@ -1728,9 +1769,6 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[NL80211_ATTR_MAC]) return -EINVAL; - if (!info->attrs[NL80211_ATTR_STA_AID]) - return -EINVAL; - if (!info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]) return -EINVAL; @@ -1745,9 +1783,11 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) params.listen_interval = nla_get_u16(info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]); - params.aid = nla_get_u16(info->attrs[NL80211_ATTR_STA_AID]); - if (!params.aid || params.aid > IEEE80211_MAX_AID) - return -EINVAL; + if (info->attrs[NL80211_ATTR_STA_AID]) { + params.aid = nla_get_u16(info->attrs[NL80211_ATTR_STA_AID]); + if (!params.aid || params.aid > IEEE80211_MAX_AID) + return -EINVAL; + } if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) params.ht_capa = @@ -1762,13 +1802,39 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) if (err) goto out_rtnl; - if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN) { - err = -EINVAL; + err = get_vlan(info->attrs[NL80211_ATTR_STA_VLAN], drv, ¶ms.vlan); + if (err) goto out; + + /* validate settings */ + err = 0; + + switch (dev->ieee80211_ptr->iftype) { + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: + /* all ok but must have AID */ + if (!params.aid) + err = -EINVAL; + break; + case NL80211_IFTYPE_MESH_POINT: + /* disallow things mesh doesn't support */ + if (params.vlan) + err = -EINVAL; + if (params.aid) + err = -EINVAL; + if (params.ht_capa) + err = -EINVAL; + if (params.listen_interval >= 0) + err = -EINVAL; + if (params.supported_rates) + err = -EINVAL; + if (params.sta_flags_mask) + err = -EINVAL; + break; + default: + err = -EINVAL; } - err = get_vlan(info->attrs[NL80211_ATTR_STA_VLAN], drv, ¶ms.vlan); if (err) goto out; @@ -1812,7 +1878,8 @@ static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info) goto out_rtnl; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN) { + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN && + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) { err = -EINVAL; goto out; } diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 5e14371cda70..75a406d33619 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1089,17 +1089,18 @@ static void handle_reg_beacon(struct wiphy *wiphy, chan->beacon_found = true; + if (wiphy->disable_beacon_hints) + return; + chan_before.center_freq = chan->center_freq; chan_before.flags = chan->flags; - if ((chan->flags & IEEE80211_CHAN_PASSIVE_SCAN) && - !(chan->orig_flags & IEEE80211_CHAN_PASSIVE_SCAN)) { + if (chan->flags & IEEE80211_CHAN_PASSIVE_SCAN) { chan->flags &= ~IEEE80211_CHAN_PASSIVE_SCAN; channel_changed = true; } - if ((chan->flags & IEEE80211_CHAN_NO_IBSS) && - !(chan->orig_flags & IEEE80211_CHAN_NO_IBSS)) { + if (chan->flags & IEEE80211_CHAN_NO_IBSS) { chan->flags &= ~IEEE80211_CHAN_NO_IBSS; channel_changed = true; } diff --git a/net/wireless/reg.h b/net/wireless/reg.h index e37829a49dc4..4e167a8e11be 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -30,7 +30,8 @@ int set_regdom(const struct ieee80211_regdomain *rd); * non-radar 5 GHz channels. * * Drivers do not need to call this, cfg80211 will do it for after a scan - * on a newly found BSS. + * on a newly found BSS. If you cannot make use of this feature you can + * set the wiphy->disable_beacon_hints to true. */ int regulatory_hint_found_beacon(struct wiphy *wiphy, struct ieee80211_channel *beacon_chan, diff --git a/net/wireless/scan.c b/net/wireless/scan.c index e95b638b919f..7e595ce24eeb 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -35,8 +35,6 @@ void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted) else nl80211_send_scan_done(wiphy_to_dev(request->wiphy), dev); - wiphy_to_dev(request->wiphy)->scan_req = NULL; - #ifdef CONFIG_WIRELESS_EXT if (!aborted) { memset(&wrqu, 0, sizeof(wrqu)); @@ -48,6 +46,7 @@ void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted) dev_put(dev); out: + wiphy_to_dev(request->wiphy)->scan_req = NULL; kfree(request); } EXPORT_SYMBOL(cfg80211_scan_done); @@ -119,7 +118,7 @@ static int cmp_ies(u8 num, u8 *ies1, size_t len1, u8 *ies2, size_t len2) if (!ie1 && !ie2) return 0; - if (!ie1) + if (!ie1 || !ie2) return -1; r = memcmp(ie1 + 2, ie2 + 2, min(ie1[1], ie2[1])); @@ -172,6 +171,8 @@ static bool is_mesh(struct cfg80211_bss *a, ie = find_ie(WLAN_EID_MESH_CONFIG, a->information_elements, a->len_information_elements); + if (!ie) + return false; if (ie[1] != IEEE80211_MESH_CONFIG_LEN) return false; @@ -366,7 +367,6 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev, found = rb_find_bss(dev, res); if (found) { - kref_get(&found->ref); found->pub.beacon_interval = res->pub.beacon_interval; found->pub.tsf = res->pub.tsf; found->pub.signal = res->pub.signal; diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index ed80af8ca5fb..5e6c072c64d3 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -40,6 +40,7 @@ #include <linux/errno.h> #include <linux/kernel.h> #include <linux/sched.h> +#include <linux/smp_lock.h> #include <linux/timer.h> #include <linux/string.h> #include <linux/net.h> @@ -332,14 +333,14 @@ static unsigned int x25_new_lci(struct x25_neigh *nb) /* * Deferred destroy. */ -void x25_destroy_socket(struct sock *); +static void __x25_destroy_socket(struct sock *); /* * handler for deferred kills. */ static void x25_destroy_timer(unsigned long data) { - x25_destroy_socket((struct sock *)data); + x25_destroy_socket_from_timer((struct sock *)data); } /* @@ -349,12 +350,10 @@ static void x25_destroy_timer(unsigned long data) * will touch it and we are (fairly 8-) ) safe. * Not static as it's used by the timer */ -void x25_destroy_socket(struct sock *sk) +static void __x25_destroy_socket(struct sock *sk) { struct sk_buff *skb; - sock_hold(sk); - lock_sock(sk); x25_stop_heartbeat(sk); x25_stop_timer(sk); @@ -374,8 +373,7 @@ void x25_destroy_socket(struct sock *sk) kfree_skb(skb); } - if (atomic_read(&sk->sk_wmem_alloc) || - atomic_read(&sk->sk_rmem_alloc)) { + if (sk_has_allocations(sk)) { /* Defer: outstanding buffers */ sk->sk_timer.expires = jiffies + 10 * HZ; sk->sk_timer.function = x25_destroy_timer; @@ -385,7 +383,22 @@ void x25_destroy_socket(struct sock *sk) /* drop last reference so sock_put will free */ __sock_put(sk); } +} + +void x25_destroy_socket_from_timer(struct sock *sk) +{ + sock_hold(sk); + bh_lock_sock(sk); + __x25_destroy_socket(sk); + bh_unlock_sock(sk); + sock_put(sk); +} +static void x25_destroy_socket(struct sock *sk) +{ + sock_hold(sk); + lock_sock(sk); + __x25_destroy_socket(sk); release_sock(sk); sock_put(sk); } @@ -1259,8 +1272,8 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) switch (cmd) { case TIOCOUTQ: { - int amount = sk->sk_sndbuf - - atomic_read(&sk->sk_wmem_alloc); + int amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); + if (amount < 0) amount = 0; rc = put_user(amount, (unsigned int __user *)argp); diff --git a/net/x25/x25_proc.c b/net/x25/x25_proc.c index 1afa44d25beb..0a04e62e0e18 100644 --- a/net/x25/x25_proc.c +++ b/net/x25/x25_proc.c @@ -163,8 +163,8 @@ static int x25_seq_socket_show(struct seq_file *seq, void *v) devname, x25->lci & 0x0FFF, x25->state, x25->vs, x25->vr, x25->va, x25_display_timer(s) / HZ, x25->t2 / HZ, x25->t21 / HZ, x25->t22 / HZ, x25->t23 / HZ, - atomic_read(&s->sk_wmem_alloc), - atomic_read(&s->sk_rmem_alloc), + sk_wmem_alloc_get(s), + sk_rmem_alloc_get(s), s->sk_socket ? SOCK_INODE(s->sk_socket)->i_ino : 0L); out: return 0; diff --git a/net/x25/x25_timer.c b/net/x25/x25_timer.c index d3e3e54db936..5c5db1a36399 100644 --- a/net/x25/x25_timer.c +++ b/net/x25/x25_timer.c @@ -113,7 +113,7 @@ static void x25_heartbeat_expiry(unsigned long param) (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) { bh_unlock_sock(sk); - x25_destroy_socket(sk); + x25_destroy_socket_from_timer(sk); return; } break; diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index d31ccb487730..faf54c6bf96b 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -292,8 +292,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, { - .name = "cbc(cast128)", - .compat = "cast128", + .name = "cbc(cast5)", + .compat = "cast5", .uinfo = { .encr = { diff --git a/net/xfrm/xfrm_hash.h b/net/xfrm/xfrm_hash.h index d401dc8f05ed..e5195c99f71e 100644 --- a/net/xfrm/xfrm_hash.h +++ b/net/xfrm/xfrm_hash.h @@ -16,7 +16,7 @@ static inline unsigned int __xfrm6_addr_hash(xfrm_address_t *addr) static inline unsigned int __xfrm4_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr) { - return ntohl(daddr->a4 ^ saddr->a4); + return ntohl(daddr->a4 + saddr->a4); } static inline unsigned int __xfrm6_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 5f1f86565f16..f2f7c638083e 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -668,22 +668,10 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, xfrm_address_t *d hlist_for_each_entry(x, entry, net->xfrm.state_byspi+h, byspi) { if (x->props.family != family || x->id.spi != spi || - x->id.proto != proto) + x->id.proto != proto || + xfrm_addr_cmp(&x->id.daddr, daddr, family)) continue; - switch (family) { - case AF_INET: - if (x->id.daddr.a4 != daddr->a4) - continue; - break; - case AF_INET6: - if (!ipv6_addr_equal((struct in6_addr *)daddr, - (struct in6_addr *) - x->id.daddr.a6)) - continue; - break; - } - xfrm_state_hold(x); return x; } @@ -699,26 +687,11 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, xfrm_addre hlist_for_each_entry(x, entry, net->xfrm.state_bysrc+h, bysrc) { if (x->props.family != family || - x->id.proto != proto) + x->id.proto != proto || + xfrm_addr_cmp(&x->id.daddr, daddr, family) || + xfrm_addr_cmp(&x->props.saddr, saddr, family)) continue; - switch (family) { - case AF_INET: - if (x->id.daddr.a4 != daddr->a4 || - x->props.saddr.a4 != saddr->a4) - continue; - break; - case AF_INET6: - if (!ipv6_addr_equal((struct in6_addr *)daddr, - (struct in6_addr *) - x->id.daddr.a6) || - !ipv6_addr_equal((struct in6_addr *)saddr, - (struct in6_addr *) - x->props.saddr.a6)) - continue; - break; - } - xfrm_state_hold(x); return x; } @@ -1001,25 +974,11 @@ static struct xfrm_state *__find_acq_core(struct net *net, unsigned short family x->props.family != family || x->km.state != XFRM_STATE_ACQ || x->id.spi != 0 || - x->id.proto != proto) + x->id.proto != proto || + xfrm_addr_cmp(&x->id.daddr, daddr, family) || + xfrm_addr_cmp(&x->props.saddr, saddr, family)) continue; - switch (family) { - case AF_INET: - if (x->id.daddr.a4 != daddr->a4 || - x->props.saddr.a4 != saddr->a4) - continue; - break; - case AF_INET6: - if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6, - (struct in6_addr *)daddr) || - !ipv6_addr_equal((struct in6_addr *) - x->props.saddr.a6, - (struct in6_addr *)saddr)) - continue; - break; - } - xfrm_state_hold(x); return x; } |